assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1, \2, \3    - that many literal bytes follow in the code stream
  11  * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS
  12  *                 (POP is never used for CS) depending on operand 0
  13  * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending
  14  *                 on operand 0
  15  * \10..\13      - a literal byte follows in the code stream, to be added
  16  *                 to the register value of operand 0..3
  17  * \14..\17      - a signed byte immediate operand, from operand 0..3
  18  * \20..\23      - a byte immediate operand, from operand 0..3
  19  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  20  * \30..\33      - a word immediate operand, from operand 0..3
  21  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  22  *                 assembly mode or the operand-size override on the operand
  23  * \40..\43      - a long immediate operand, from operand 0..3
  24  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  25  *                 depending on the address size of the instruction.
  26  * \50..\53      - a byte relative operand, from operand 0..3
  27  * \54..\57      - a qword immediate operand, from operand 0..3
  28  * \60..\63      - a word relative operand, from operand 0..3
  29  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  30  *                 assembly mode or the operand-size override on the operand
  31  * \70..\73      - a long relative operand, from operand 0..3
  32  * \74..\77       - a word constant, from the _segment_ part of operand 0..3
  33  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  34  *                 field the register value of operand b.
  35  * \140..\143    - an immediate word or signed byte for operand 0..3
  36  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  37  *                  is a signed byte rather than a word.  Opcode byte follows.
  38  * \150..\153     - an immediate dword or signed byte for operand 0..3
  39  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  40  *                  is a signed byte rather than a dword.  Opcode byte follows.
  41  * \160..\163    - this instruction uses DREX rather than REX, with the
  42  *                 OC0 field set to 0, and the dest field taken from
  43  *                 operand 0..3.
  44  * \164..\167    - this instruction uses DREX rather than REX, with the
  45  *                 OC0 field set to 1, and the dest field taken from
  46  *                 operand 0..3.
  47  * \171          - placement of DREX suffix in the absence of an EA
  48  * \172\ab       - the register number from operand a in bits 7..4, with
  49  *                 the 4-bit immediate from operand b in bits 3..0.
  50  * \173\xab      - the register number from operand a in bits 7..4, with
  51  *                 the value b in bits 3..0.
  52  * \174\a        - the register number from operand a in bits 7..4, and
  53  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  54  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  55  *                 field equal to digit b.
  56  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  57  *                 is not equal to the truncated and sign-extended 32-bit
  58  *                 operand; used for 32-bit immediates in 64-bit mode.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  74  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  75  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  76  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  77  * \314          - (disassembler only) invalid with REX.B
  78  * \315          - (disassembler only) invalid with REX.X
  79  * \316          - (disassembler only) invalid with REX.R
  80  * \317          - (disassembler only) invalid with REX.W
  81  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  82  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  83  * \322          - indicates that this instruction is only valid when the
  84  *                 operand size is the default (instruction to disassembler,
  85  *                 generates no code in the assembler)
  86  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  87  * \324          - indicates 64-bit operand size requiring REX prefix.
  88  * \330          - a literal byte follows in the code stream, to be added
  89  *                 to the condition code value of the instruction.
  90  * \331          - instruction not valid with REP prefix.  Hint for
  91  *                 disassembler only; for SSE instructions.
  92  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  93  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  94  * \334          - LOCK prefix used instead of REX.R
  95  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  96  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  97  * \337          - force a REPNE prefix (0xF3) even if not specified.
  98  *                 \336-\337 are still listed as prefixes in the disassembler.
  99  * \340          - reserve <operand 0> bytes of uninitialized storage.
 100  *                 Operand 0 had better be a segmentless constant.
 101  * \360          - no SSE prefix (== \364\331)
 102  * \361          - 66 SSE prefix (== \366\331)
 103  * \362          - F2 SSE prefix (== \364\332)
 104  * \363          - F3 SSE prefix (== \364\333)
 105  * \364          - operand-size prefix (0x66) not permitted
 106  * \365          - address-size prefix (0x67) not permitted
 107  * \366          - operand-size prefix (0x66) used as opcode extension
 108  * \367          - address-size prefix (0x67) used as opcode extension
 109  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 110  *                 370 is used for Jcc, 371 is used for JMP.
 111  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 112  *                 used for conditional jump over longer jump
 113  */
 114
 115 #include "compiler.h"
 116
 117 #include <stdio.h>
 118 #include <string.h>
 119 #include <inttypes.h>
 120
 121 #include "nasm.h"
 122 #include "nasmlib.h"
 123 #include "assemble.h"
 124 #include "insns.h"
 125 #include "tables.h"
 126
 127 /* Initialized to zero by the C standard */
 128 static const uint8_t const_zero_buf[256];
 129
 130 typedef struct {
 131     int sib_present;                 /* is a SIB byte necessary? */
 132     int bytes;                       /* # of bytes of offset needed */
 133     int size;                        /* lazy - this is sib+bytes+1 */
 134     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 135 } ea;
 136
 137 static uint32_t cpu;            /* cpu level received from nasm.c */
 138 static efunc errfunc;
 139 static struct ofmt *outfmt;
 140 static ListGen *list;
 141
 142 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 143 static void gencode(int32_t segment, int64_t offset, int bits,
 144                     insn * ins, const struct itemplate *temp,
 145                     int64_t insn_end);
 146 static int matches(const struct itemplate *, insn *, int bits);
 147 static int32_t regflag(const operand *);
 148 static int32_t regval(const operand *);
 149 static int rexflags(int, int32_t, int);
 150 static int op_rexflags(const operand *, int);
 151 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 152 static void add_asp(insn *, int);
 153
 154 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 155 {
 156     return ins->prefixes[pos] == prefix;
 157 }
 158
 159 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 160 {
 161     if (ins->prefixes[pos])
 162         errfunc(ERR_NONFATAL, "invalid %s prefix",
 163                 prefix_name(ins->prefixes[pos]));
 164 }
 165
 166 static const char *size_name(int size)
 167 {
 168     switch (size) {
 169     case 1:
 170         return "byte";
 171     case 2:
 172         return "word";
 173     case 4:
 174         return "dword";
 175     case 8:
 176         return "qword";
 177     case 10:
 178         return "tword";
 179     case 16:
 180         return "oword";
 181     case 32:
 182         return "yword";
 183     default:
 184         return "???";
 185     }
 186 }
 187
 188 static void warn_overflow(int size, const struct operand *o)
 189 {
 190     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 191         int64_t lim = ((int64_t)1 << (size*8))-1;
 192         int64_t data = o->offset;
 193
 194         if (data < ~lim || data > lim)
 195             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 196                     "%s data exceeds bounds", size_name(size));
 197     }
 198 }
 199 /*
 200  * This routine wrappers the real output format's output routine,
 201  * in order to pass a copy of the data off to the listing file
 202  * generator at the same time.
 203  */
 204 static void out(int64_t offset, int32_t segto, const void *data,
 205                 enum out_type type, uint64_t size,
 206                 int32_t segment, int32_t wrt)
 207 {
 208     static int32_t lineno = 0;     /* static!!! */
 209     static char *lnfname = NULL;
 210     uint8_t p[8];
 211
 212     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 213         /*
 214          * This is a non-relocated address, and we're going to
 215          * convert it into RAWDATA format.
 216          */
 217         uint8_t *q = p;
 218
 219         if (size > 8) {
 220             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 221             return;
 222         }
 223
 224         WRITEADDR(q, *(int64_t *)data, size);
 225         data = p;
 226         type = OUT_RAWDATA;
 227     }
 228
 229     list->output(offset, data, type, size);
 230
 231     /*
 232      * this call to src_get determines when we call the
 233      * debug-format-specific "linenum" function
 234      * it updates lineno and lnfname to the current values
 235      * returning 0 if "same as last time", -2 if lnfname
 236      * changed, and the amount by which lineno changed,
 237      * if it did. thus, these variables must be static
 238      */
 239
 240     if (src_get(&lineno, &lnfname)) {
 241         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 242     }
 243
 244     outfmt->output(segto, data, type, size, segment, wrt);
 245 }
 246
 247 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 248                      insn * ins, const uint8_t *code)
 249 {
 250     int64_t isize;
 251     uint8_t c = code[0];
 252
 253     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 254         return false;
 255     if (!optimizing)
 256         return false;
 257     if (optimizing < 0 && c == 0371)
 258         return false;
 259
 260     isize = calcsize(segment, offset, bits, ins, code);
 261     if (ins->oprs[0].segment != segment)
 262         return false;
 263
 264     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 265     return (isize >= -128 && isize <= 127); /* is it byte size? */
 266 }
 267
 268 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 269               insn * instruction, struct ofmt *output, efunc error,
 270               ListGen * listgen)
 271 {
 272     const struct itemplate *temp;
 273     int j;
 274     int size_prob;
 275     int64_t insn_end;
 276     int32_t itimes;
 277     int64_t start = offset;
 278     int64_t wsize = 0;             /* size for DB etc. */
 279
 280     errfunc = error;            /* to pass to other functions */
 281     cpu = cp;
 282     outfmt = output;            /* likewise */
 283     list = listgen;             /* and again */
 284
 285     switch (instruction->opcode) {
 286     case -1:
 287         return 0;
 288     case I_DB:
 289         wsize = 1;
 290         break;
 291     case I_DW:
 292         wsize = 2;
 293         break;
 294     case I_DD:
 295         wsize = 4;
 296         break;
 297     case I_DQ:
 298         wsize = 8;
 299         break;
 300     case I_DT:
 301         wsize = 10;
 302         break;
 303     case I_DO:
 304         wsize = 16;
 305         break;
 306     case I_DY:
 307         wsize = 32;
 308         break;
 309     default:
 310         break;
 311     }
 312
 313     if (wsize) {
 314         extop *e;
 315         int32_t t = instruction->times;
 316         if (t < 0)
 317             errfunc(ERR_PANIC,
 318                     "instruction->times < 0 (%ld) in assemble()", t);
 319
 320         while (t--) {           /* repeat TIMES times */
 321             for (e = instruction->eops; e; e = e->next) {
 322                 if (e->type == EOT_DB_NUMBER) {
 323                     if (wsize == 1) {
 324                         if (e->segment != NO_SEG)
 325                             errfunc(ERR_NONFATAL,
 326                                     "one-byte relocation attempted");
 327                         else {
 328                             uint8_t out_byte = e->offset;
 329                             out(offset, segment, &out_byte,
 330                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 331                         }
 332                     } else if (wsize > 8) {
 333                         errfunc(ERR_NONFATAL,
 334                                 "integer supplied to a DT, DO or DY"
 335                                 " instruction");
 336                     } else
 337                         out(offset, segment, &e->offset,
 338                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 339                     offset += wsize;
 340                 } else if (e->type == EOT_DB_STRING ||
 341                            e->type == EOT_DB_STRING_FREE) {
 342                     int align;
 343
 344                     out(offset, segment, e->stringval,
 345                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 346                     align = e->stringlen % wsize;
 347
 348                     if (align) {
 349                         align = wsize - align;
 350                         out(offset, segment, const_zero_buf,
 351                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 352                     }
 353                     offset += e->stringlen + align;
 354                 }
 355             }
 356             if (t > 0 && t == instruction->times - 1) {
 357                 /*
 358                  * Dummy call to list->output to give the offset to the
 359                  * listing module.
 360                  */
 361                 list->output(offset, NULL, OUT_RAWDATA, 0);
 362                 list->uplevel(LIST_TIMES);
 363             }
 364         }
 365         if (instruction->times > 1)
 366             list->downlevel(LIST_TIMES);
 367         return offset - start;
 368     }
 369
 370     if (instruction->opcode == I_INCBIN) {
 371         const char *fname = instruction->eops->stringval;
 372         FILE *fp;
 373
 374         fp = fopen(fname, "rb");
 375         if (!fp) {
 376             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 377                   fname);
 378         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 379             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 380                   fname);
 381         } else {
 382             static char buf[4096];
 383             size_t t = instruction->times;
 384             size_t base = 0;
 385             size_t len;
 386
 387             len = ftell(fp);
 388             if (instruction->eops->next) {
 389                 base = instruction->eops->next->offset;
 390                 len -= base;
 391                 if (instruction->eops->next->next &&
 392                     len > (size_t)instruction->eops->next->next->offset)
 393                     len = (size_t)instruction->eops->next->next->offset;
 394             }
 395             /*
 396              * Dummy call to list->output to give the offset to the
 397              * listing module.
 398              */
 399             list->output(offset, NULL, OUT_RAWDATA, 0);
 400             list->uplevel(LIST_INCBIN);
 401             while (t--) {
 402                 size_t l;
 403
 404                 fseek(fp, base, SEEK_SET);
 405                 l = len;
 406                 while (l > 0) {
 407                     int32_t m =
 408                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 409                               fp);
 410                     if (!m) {
 411                         /*
 412                          * This shouldn't happen unless the file
 413                          * actually changes while we are reading
 414                          * it.
 415                          */
 416                         error(ERR_NONFATAL,
 417                               "`incbin': unexpected EOF while"
 418                               " reading file `%s'", fname);
 419                         t = 0;  /* Try to exit cleanly */
 420                         break;
 421                     }
 422                     out(offset, segment, buf, OUT_RAWDATA, m,
 423                         NO_SEG, NO_SEG);
 424                     l -= m;
 425                 }
 426             }
 427             list->downlevel(LIST_INCBIN);
 428             if (instruction->times > 1) {
 429                 /*
 430                  * Dummy call to list->output to give the offset to the
 431                  * listing module.
 432                  */
 433                 list->output(offset, NULL, OUT_RAWDATA, 0);
 434                 list->uplevel(LIST_TIMES);
 435                 list->downlevel(LIST_TIMES);
 436             }
 437             fclose(fp);
 438             return instruction->times * len;
 439         }
 440         return 0;               /* if we're here, there's an error */
 441     }
 442
 443     /* Check to see if we need an address-size prefix */
 444     add_asp(instruction, bits);
 445
 446     size_prob = false;
 447
 448     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 449         int m = matches(temp, instruction, bits);
 450         if (m == 100 ||
 451             (m == 99 && jmp_match(segment, offset, bits,
 452                                   instruction, temp->code))) {
 453             /* Matches! */
 454             int64_t insn_size = calcsize(segment, offset, bits,
 455                                       instruction, temp->code);
 456             itimes = instruction->times;
 457             if (insn_size < 0)  /* shouldn't be, on pass two */
 458                 error(ERR_PANIC, "errors made it through from pass one");
 459             else
 460                 while (itimes--) {
 461                     for (j = 0; j < MAXPREFIX; j++) {
 462                         uint8_t c = 0;
 463                         switch (instruction->prefixes[j]) {
 464                         case P_LOCK:
 465                             c = 0xF0;
 466                             break;
 467                         case P_REPNE:
 468                         case P_REPNZ:
 469                             c = 0xF2;
 470                             break;
 471                         case P_REPE:
 472                         case P_REPZ:
 473                         case P_REP:
 474                             c = 0xF3;
 475                             break;
 476                         case R_CS:
 477                             if (bits == 64) {
 478                                 error(ERR_WARNING | ERR_PASS2,
 479                                       "cs segment base generated, but will be ignored in 64-bit mode");
 480                             }
 481                             c = 0x2E;
 482                             break;
 483                         case R_DS:
 484                             if (bits == 64) {
 485                                 error(ERR_WARNING | ERR_PASS2,
 486                                       "ds segment base generated, but will be ignored in 64-bit mode");
 487                             }
 488                             c = 0x3E;
 489                             break;
 490                         case R_ES:
 491                            if (bits == 64) {
 492                                 error(ERR_WARNING | ERR_PASS2,
 493                                       "es segment base generated, but will be ignored in 64-bit mode");
 494                            }
 495                             c = 0x26;
 496                             break;
 497                         case R_FS:
 498                             c = 0x64;
 499                             break;
 500                         case R_GS:
 501                             c = 0x65;
 502                             break;
 503                         case R_SS:
 504                             if (bits == 64) {
 505                                 error(ERR_WARNING | ERR_PASS2,
 506                                       "ss segment base generated, but will be ignored in 64-bit mode");
 507                             }
 508                             c = 0x36;
 509                             break;
 510                         case R_SEGR6:
 511                         case R_SEGR7:
 512                             error(ERR_NONFATAL,
 513                                   "segr6 and segr7 cannot be used as prefixes");
 514                             break;
 515                         case P_A16:
 516                             if (bits == 64) {
 517                                 error(ERR_NONFATAL,
 518                                       "16-bit addressing is not supported "
 519                                       "in 64-bit mode");
 520                             } else if (bits != 16)
 521                                 c = 0x67;
 522                             break;
 523                         case P_A32:
 524                             if (bits != 32)
 525                                 c = 0x67;
 526                             break;
 527                         case P_A64:
 528                             if (bits != 64) {
 529                                 error(ERR_NONFATAL,
 530                                       "64-bit addressing is only supported "
 531                                       "in 64-bit mode");
 532                             }
 533                             break;
 534                         case P_ASP:
 535                             c = 0x67;
 536                             break;
 537                         case P_O16:
 538                             if (bits != 16)
 539                                 c = 0x66;
 540                             break;
 541                         case P_O32:
 542                             if (bits == 16)
 543                                 c = 0x66;
 544                             break;
 545                         case P_O64:
 546                             /* REX.W */
 547                             break;
 548                         case P_OSP:
 549                             c = 0x66;
 550                             break;
 551                         case P_none:
 552                             break;
 553                         default:
 554                             error(ERR_PANIC, "invalid instruction prefix");
 555                         }
 556                         if (c != 0) {
 557                             out(offset, segment, &c, OUT_RAWDATA, 1,
 558                                 NO_SEG, NO_SEG);
 559                             offset++;
 560                         }
 561                     }
 562                     insn_end = offset + insn_size;
 563                     gencode(segment, offset, bits, instruction,
 564                             temp, insn_end);
 565                     offset += insn_size;
 566                     if (itimes > 0 && itimes == instruction->times - 1) {
 567                         /*
 568                          * Dummy call to list->output to give the offset to the
 569                          * listing module.
 570                          */
 571                         list->output(offset, NULL, OUT_RAWDATA, 0);
 572                         list->uplevel(LIST_TIMES);
 573                     }
 574                 }
 575             if (instruction->times > 1)
 576                 list->downlevel(LIST_TIMES);
 577             return offset - start;
 578         } else if (m > 0 && m > size_prob) {
 579             size_prob = m;
 580         }
 581     }
 582
 583     if (temp->opcode == -1) {   /* didn't match any instruction */
 584         switch (size_prob) {
 585         case 1:
 586             error(ERR_NONFATAL, "operation size not specified");
 587             break;
 588         case 2:
 589             error(ERR_NONFATAL, "mismatch in operand sizes");
 590             break;
 591         case 3:
 592             error(ERR_NONFATAL, "no instruction for this cpu level");
 593             break;
 594         case 4:
 595             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 596             break;
 597         default:
 598             error(ERR_NONFATAL,
 599                   "invalid combination of opcode and operands");
 600             break;
 601         }
 602     }
 603     return 0;
 604 }
 605
 606 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 607                insn * instruction, efunc error)
 608 {
 609     const struct itemplate *temp;
 610
 611     errfunc = error;            /* to pass to other functions */
 612     cpu = cp;
 613
 614     if (instruction->opcode == -1)
 615         return 0;
 616
 617     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 618         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 619         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 620         instruction->opcode == I_DY) {
 621         extop *e;
 622         int32_t isize, osize, wsize = 0;   /* placate gcc */
 623
 624         isize = 0;
 625         switch (instruction->opcode) {
 626         case I_DB:
 627             wsize = 1;
 628             break;
 629         case I_DW:
 630             wsize = 2;
 631             break;
 632         case I_DD:
 633             wsize = 4;
 634             break;
 635         case I_DQ:
 636             wsize = 8;
 637             break;
 638         case I_DT:
 639             wsize = 10;
 640             break;
 641         case I_DO:
 642             wsize = 16;
 643             break;
 644         case I_DY:
 645             wsize = 32;
 646             break;
 647         default:
 648             break;
 649         }
 650
 651         for (e = instruction->eops; e; e = e->next) {
 652             int32_t align;
 653
 654             osize = 0;
 655             if (e->type == EOT_DB_NUMBER)
 656                 osize = 1;
 657             else if (e->type == EOT_DB_STRING ||
 658                      e->type == EOT_DB_STRING_FREE)
 659                 osize = e->stringlen;
 660
 661             align = (-osize) % wsize;
 662             if (align < 0)
 663                 align += wsize;
 664             isize += osize + align;
 665         }
 666         return isize * instruction->times;
 667     }
 668
 669     if (instruction->opcode == I_INCBIN) {
 670         const char *fname = instruction->eops->stringval;
 671         FILE *fp;
 672         size_t len;
 673
 674         fp = fopen(fname, "rb");
 675         if (!fp)
 676             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 677                   fname);
 678         else if (fseek(fp, 0L, SEEK_END) < 0)
 679             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 680                   fname);
 681         else {
 682             len = ftell(fp);
 683             fclose(fp);
 684             if (instruction->eops->next) {
 685                 len -= instruction->eops->next->offset;
 686                 if (instruction->eops->next->next &&
 687                     len > (size_t)instruction->eops->next->next->offset) {
 688                     len = (size_t)instruction->eops->next->next->offset;
 689                 }
 690             }
 691             return instruction->times * len;
 692         }
 693         return 0;               /* if we're here, there's an error */
 694     }
 695
 696     /* Check to see if we need an address-size prefix */
 697     add_asp(instruction, bits);
 698
 699     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 700         int m = matches(temp, instruction, bits);
 701         if (m == 100 ||
 702             (m == 99 && jmp_match(segment, offset, bits,
 703                                   instruction, temp->code))) {
 704             /* we've matched an instruction. */
 705             int64_t isize;
 706             const uint8_t *codes = temp->code;
 707             int j;
 708
 709             isize = calcsize(segment, offset, bits, instruction, codes);
 710             if (isize < 0)
 711                 return -1;
 712             for (j = 0; j < MAXPREFIX; j++) {
 713                 switch (instruction->prefixes[j]) {
 714                 case P_A16:
 715                     if (bits != 16)
 716                         isize++;
 717                     break;
 718                 case P_A32:
 719                     if (bits != 32)
 720                         isize++;
 721                     break;
 722                 case P_O16:
 723                     if (bits != 16)
 724                         isize++;
 725                     break;
 726                 case P_O32:
 727                     if (bits == 16)
 728                         isize++;
 729                     break;
 730                 case P_A64:
 731                 case P_O64:
 732                 case P_none:
 733                     break;
 734                 default:
 735                     isize++;
 736                     break;
 737                 }
 738             }
 739             return isize * instruction->times;
 740         }
 741     }
 742     return -1;                  /* didn't match any instruction */
 743 }
 744
 745 static bool possible_sbyte(operand *o)
 746 {
 747     return !(o->opflags & OPFLAG_FORWARD) &&
 748         optimizing >= 0 && !(o->type & STRICT);
 749 }
 750
 751 /* check that opn[op]  is a signed byte of size 16 or 32 */
 752 static bool is_sbyte16(operand *o)
 753 {
 754     int16_t v;
 755
 756     if (!possible_sbyte(o))
 757         return false;
 758
 759     v = o->offset;
 760     return v >= -128 && v <= 127;
 761 }
 762
 763 static bool is_sbyte32(operand *o)
 764 {
 765     int32_t v;
 766
 767     if (!possible_sbyte(o))
 768         return false;
 769
 770     v = o->offset;
 771     return v >= -128 && v <= 127;
 772 }
 773
 774 /* check that opn[op] is a signed byte of size 32; warn if this is not
 775    the original value when extended to 64 bits */
 776 static bool is_sbyte64(operand *o)
 777 {
 778     int64_t v64;
 779     int32_t v;
 780
 781     if (!(o->wrt == NO_SEG && o->segment == NO_SEG))
 782         return false;           /* Not a pure immediate */
 783
 784     v64 = o->offset;
 785     v = (int32_t)v64;
 786
 787     if (v64 != v)
 788         errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 789                 "signed dword immediate exceeds bounds");
 790
 791     /* dead in the water on forward reference or External */
 792     if (!possible_sbyte(o))
 793         return false;
 794
 795     v = o->offset;
 796     return v >= -128 && v <= 127;
 797 }
 798 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 799                         insn * ins, const uint8_t *codes)
 800 {
 801     int64_t length = 0;
 802     uint8_t c;
 803     int rex_mask = ~0;
 804     struct operand *opx;
 805
 806     ins->rex = 0;               /* Ensure REX is reset */
 807
 808     if (ins->prefixes[PPS_OSIZE] == P_O64)
 809         ins->rex |= REX_W;
 810
 811     (void)segment;              /* Don't warn that this parameter is unused */
 812     (void)offset;               /* Don't warn that this parameter is unused */
 813
 814     while (*codes) {
 815         c = *codes++;
 816         opx = &ins->oprs[c & 3];
 817         switch (c) {
 818         case 01:
 819         case 02:
 820         case 03:
 821             codes += c, length += c;
 822             break;
 823         case 04:
 824         case 05:
 825         case 06:
 826         case 07:
 827             length++;
 828             break;
 829         case 010:
 830         case 011:
 831         case 012:
 832         case 013:
 833             ins->rex |=
 834                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 835             codes++, length++;
 836             break;
 837         case 014:
 838         case 015:
 839         case 016:
 840         case 017:
 841             length++;
 842             break;
 843         case 020:
 844         case 021:
 845         case 022:
 846         case 023:
 847             length++;
 848             break;
 849         case 024:
 850         case 025:
 851         case 026:
 852         case 027:
 853             length++;
 854             break;
 855         case 030:
 856         case 031:
 857         case 032:
 858         case 033:
 859             length += 2;
 860             break;
 861         case 034:
 862         case 035:
 863         case 036:
 864         case 037:
 865             if (opx->type & (BITS16 | BITS32 | BITS64))
 866                 length += (opx->type & BITS16) ? 2 : 4;
 867             else
 868                 length += (bits == 16) ? 2 : 4;
 869             break;
 870         case 040:
 871         case 041:
 872         case 042:
 873         case 043:
 874             length += 4;
 875             break;
 876         case 044:
 877         case 045:
 878         case 046:
 879         case 047:
 880             length += ins->addr_size >> 3;
 881             break;
 882         case 050:
 883         case 051:
 884         case 052:
 885         case 053:
 886             length++;
 887             break;
 888         case 054:
 889         case 055:
 890         case 056:
 891         case 057:
 892             length += 8; /* MOV reg64/imm */
 893             break;
 894         case 060:
 895         case 061:
 896         case 062:
 897         case 063:
 898             length += 2;
 899             break;
 900         case 064:
 901         case 065:
 902         case 066:
 903         case 067:
 904             if (opx->type & (BITS16 | BITS32 | BITS64))
 905                 length += (opx->type & BITS16) ? 2 : 4;
 906             else
 907                 length += (bits == 16) ? 2 : 4;
 908             break;
 909         case 070:
 910         case 071:
 911         case 072:
 912         case 073:
 913             length += 4;
 914             break;
 915         case 074:
 916         case 075:
 917         case 076:
 918         case 077:
 919             length += 2;
 920             break;
 921         case 0140:
 922         case 0141:
 923         case 0142:
 924         case 0143:
 925             length += is_sbyte16(opx) ? 1 : 2;
 926             break;
 927         case 0144:
 928         case 0145:
 929         case 0146:
 930         case 0147:
 931             codes++;
 932             length++;
 933             break;
 934         case 0150:
 935         case 0151:
 936         case 0152:
 937         case 0153:
 938             length += is_sbyte32(opx) ? 1 : 4;
 939             break;
 940         case 0154:
 941         case 0155:
 942         case 0156:
 943         case 0157:
 944             codes++;
 945             length++;
 946             break;
 947         case 0160:
 948         case 0161:
 949         case 0162:
 950         case 0163:
 951             length++;
 952             ins->rex |= REX_D;
 953             ins->drexdst = regval(opx);
 954             break;
 955         case 0164:
 956         case 0165:
 957         case 0166:
 958         case 0167:
 959             length++;
 960             ins->rex |= REX_D|REX_OC;
 961             ins->drexdst = regval(opx);
 962             break;
 963         case 0171:
 964             break;
 965         case 0172:
 966         case 0173:
 967         case 0174:
 968             codes++;
 969             length++;
 970             break;
 971         case 0250:
 972         case 0251:
 973         case 0252:
 974         case 0253:
 975             length += is_sbyte64(opx) ? 1 : 4;
 976             break;
 977         case 0260:
 978         case 0261:
 979         case 0262:
 980         case 0263:
 981             ins->rex |= REX_V;
 982             ins->drexdst = regval(opx);
 983             ins->vex_m = *codes++;
 984             ins->vex_wlp = *codes++;
 985             break;
 986         case 0270:
 987             ins->rex |= REX_V;
 988             ins->drexdst = 0;
 989             ins->vex_m = *codes++;
 990             ins->vex_wlp = *codes++;
 991             break;
 992         case 0300:
 993         case 0301:
 994         case 0302:
 995         case 0303:
 996             break;
 997         case 0310:
 998             if (bits == 64)
 999                 return -1;
1000             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1001             break;
1002         case 0311:
1003             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1004             break;
1005         case 0312:
1006             break;
1007         case 0313:
1008             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1009                 has_prefix(ins, PPS_ASIZE, P_A32))
1010                 return -1;
1011             break;
1012         case 0314:
1013         case 0315:
1014         case 0316:
1015         case 0317:
1016             break;
1017         case 0320:
1018             length += (bits != 16);
1019             break;
1020         case 0321:
1021             length += (bits == 16);
1022             break;
1023         case 0322:
1024             break;
1025         case 0323:
1026             rex_mask &= ~REX_W;
1027             break;
1028         case 0324:
1029             ins->rex |= REX_W;
1030             break;
1031         case 0330:
1032             codes++, length++;
1033             break;
1034         case 0331:
1035             break;
1036         case 0332:
1037         case 0333:
1038             length++;
1039             break;
1040         case 0334:
1041             ins->rex |= REX_L;
1042             break;
1043         case 0335:
1044             break;
1045         case 0336:
1046             if (!ins->prefixes[PPS_LREP])
1047                 ins->prefixes[PPS_LREP] = P_REP;
1048             break;
1049         case 0337:
1050             if (!ins->prefixes[PPS_LREP])
1051                 ins->prefixes[PPS_LREP] = P_REPNE;
1052             break;
1053         case 0340:
1054             if (ins->oprs[0].segment != NO_SEG)
1055                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1056                         " quantity of BSS space");
1057             else
1058                 length += ins->oprs[0].offset;
1059             break;
1060         case 0360:
1061             break;
1062         case 0361:
1063         case 0362:
1064         case 0363:
1065             length++;
1066             break;
1067         case 0364:
1068         case 0365:
1069             break;
1070         case 0366:
1071         case 0367:
1072             length++;
1073             break;
1074         case 0370:
1075         case 0371:
1076         case 0372:
1077             break;
1078         case 0373:
1079             length++;
1080             break;
1081         default:               /* can't do it by 'case' statements */
1082             if (c >= 0100 && c <= 0277) {       /* it's an EA */
1083                 ea ea_data;
1084                 int rfield;
1085                 int32_t rflags;
1086                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1087
1088                 if (c <= 0177) {
1089                     /* pick rfield from operand b */
1090                     rflags = regflag(&ins->oprs[c & 7]);
1091                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1092                 } else {
1093                     rflags = 0;
1094                     rfield = c & 7;
1095                 }
1096
1097                 if (!process_ea
1098                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1099                      ins->addr_size, rfield, rflags)) {
1100                     errfunc(ERR_NONFATAL, "invalid effective address");
1101                     return -1;
1102                 } else {
1103                     ins->rex |= ea_data.rex;
1104                     length += ea_data.size;
1105                 }
1106             } else {
1107                 errfunc(ERR_PANIC, "internal instruction table corrupt"
1108                         ": instruction code 0x%02X given", c);
1109             }
1110         }
1111     }
1112
1113     ins->rex &= rex_mask;
1114
1115     if (ins->rex & REX_V) {
1116         int bad32 = REX_R|REX_W|REX_X|REX_B;
1117
1118         if (ins->rex & REX_H) {
1119             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1120             return -1;
1121         }
1122         switch (ins->vex_wlp & 030) {
1123         case 000:
1124         case 020:
1125             ins->rex &= ~REX_W;
1126             break;
1127         case 010:
1128             ins->rex |= REX_W;
1129             bad32 &= ~REX_W;
1130             break;
1131         case 030:
1132             /* Follow REX_W */
1133             break;
1134         }
1135
1136         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1137             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1138             return -1;
1139         }
1140         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1141             length += 3;
1142         else
1143             length += 2;
1144     } else if (ins->rex & REX_D) {
1145         if (ins->rex & REX_H) {
1146             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1147             return -1;
1148         }
1149         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1150                            ins->drexdst > 7)) {
1151             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1152             return -1;
1153         }
1154         length++;
1155     } else if (ins->rex & REX_REAL) {
1156         if (ins->rex & REX_H) {
1157             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1158             return -1;
1159         } else if (bits == 64) {
1160             length++;
1161         } else if ((ins->rex & REX_L) &&
1162                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1163                    cpu >= IF_X86_64) {
1164             /* LOCK-as-REX.R */
1165             assert_no_prefix(ins, PPS_LREP);
1166             length++;
1167         } else {
1168             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1169             return -1;
1170         }
1171     }
1172
1173     return length;
1174 }
1175
1176 #define EMIT_REX()                                                      \
1177     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1178         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1179         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1180         ins->rex = 0;                                                   \
1181         offset += 1; \
1182     }
1183
1184 static void gencode(int32_t segment, int64_t offset, int bits,
1185                     insn * ins, const struct itemplate *temp,
1186                     int64_t insn_end)
1187 {
1188     static char condval[] = {   /* conditional opcodes */
1189         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1190         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1191         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1192     };
1193     uint8_t c;
1194     uint8_t bytes[4];
1195     int64_t size;
1196     int64_t data;
1197     struct operand *opx;
1198     const uint8_t *codes = temp->code;
1199
1200     while (*codes) {
1201         c = *codes++;
1202         opx = &ins->oprs[c & 3];
1203         switch (c) {
1204         case 01:
1205         case 02:
1206         case 03:
1207             EMIT_REX();
1208             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1209             codes += c;
1210             offset += c;
1211             break;
1212
1213         case 04:
1214         case 06:
1215             switch (ins->oprs[0].basereg) {
1216             case R_CS:
1217                 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0);
1218                 break;
1219             case R_DS:
1220                 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0);
1221                 break;
1222             case R_ES:
1223                 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0);
1224                 break;
1225             case R_SS:
1226                 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0);
1227                 break;
1228             default:
1229                 errfunc(ERR_PANIC,
1230                         "bizarre 8086 segment register received");
1231             }
1232             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1233             offset++;
1234             break;
1235
1236         case 05:
1237         case 07:
1238             switch (ins->oprs[0].basereg) {
1239             case R_FS:
1240                 bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0);
1241                 break;
1242             case R_GS:
1243                 bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0);
1244                 break;
1245             default:
1246                 errfunc(ERR_PANIC,
1247                         "bizarre 386 segment register received");
1248             }
1249             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1250             offset++;
1251             break;
1252
1253         case 010:
1254         case 011:
1255         case 012:
1256         case 013:
1257             EMIT_REX();
1258             bytes[0] = *codes++ + ((regval(opx)) & 7);
1259             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1260             offset += 1;
1261             break;
1262
1263         case 014:
1264         case 015:
1265         case 016:
1266         case 017:
1267             /* The test for BITS8 and SBYTE here is intended to avoid
1268                warning on optimizer actions due to SBYTE, while still
1269                warn on explicit BYTE directives.  Also warn, obviously,
1270                if the optimizer isn't enabled. */
1271             if (((opx->type & BITS8) ||
1272                  !(opx->type & temp->opd[c & 3] & BYTENESS)) &&
1273                 (opx->offset < -128 || opx->offset > 127)) {
1274                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1275                         "signed byte value exceeds bounds");
1276             }
1277             if (opx->segment != NO_SEG) {
1278                 data = opx->offset;
1279                 out(offset, segment, &data, OUT_ADDRESS, 1,
1280                     opx->segment, opx->wrt);
1281             } else {
1282                 bytes[0] = opx->offset;
1283                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1284                     NO_SEG);
1285             }
1286             offset += 1;
1287             break;
1288
1289         case 020:
1290         case 021:
1291         case 022:
1292         case 023:
1293             if (opx->offset < -256 || opx->offset > 255) {
1294                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1295                         "byte value exceeds bounds");
1296             }
1297             if (opx->segment != NO_SEG) {
1298                 data = opx->offset;
1299                 out(offset, segment, &data, OUT_ADDRESS, 1,
1300                     opx->segment, opx->wrt);
1301             } else {
1302                 bytes[0] = opx->offset;
1303                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1304                     NO_SEG);
1305             }
1306             offset += 1;
1307             break;
1308
1309         case 024:
1310         case 025:
1311         case 026:
1312         case 027:
1313             if (opx->offset < 0 || opx->offset > 255)
1314                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1315                         "unsigned byte value exceeds bounds");
1316             if (opx->segment != NO_SEG) {
1317                 data = opx->offset;
1318                 out(offset, segment, &data, OUT_ADDRESS, 1,
1319                     opx->segment, opx->wrt);
1320             } else {
1321                 bytes[0] = opx->offset;
1322                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1323                     NO_SEG);
1324             }
1325             offset += 1;
1326             break;
1327
1328         case 030:
1329         case 031:
1330         case 032:
1331         case 033:
1332             warn_overflow(2, opx);
1333             data = opx->offset;
1334             out(offset, segment, &data, OUT_ADDRESS, 2,
1335                 opx->segment, opx->wrt);
1336             offset += 2;
1337             break;
1338
1339         case 034:
1340         case 035:
1341         case 036:
1342         case 037:
1343             if (opx->type & (BITS16 | BITS32))
1344                 size = (opx->type & BITS16) ? 2 : 4;
1345             else
1346                 size = (bits == 16) ? 2 : 4;
1347             warn_overflow(size, opx);
1348             data = opx->offset;
1349             out(offset, segment, &data, OUT_ADDRESS, size,
1350                 opx->segment, opx->wrt);
1351             offset += size;
1352             break;
1353
1354         case 040:
1355         case 041:
1356         case 042:
1357         case 043:
1358             warn_overflow(4, opx);
1359             data = opx->offset;
1360             out(offset, segment, &data, OUT_ADDRESS, 4,
1361                 opx->segment, opx->wrt);
1362             offset += 4;
1363             break;
1364
1365         case 044:
1366         case 045:
1367         case 046:
1368         case 047:
1369             data = opx->offset;
1370             size = ins->addr_size >> 3;
1371             warn_overflow(size, opx);
1372             out(offset, segment, &data, OUT_ADDRESS, size,
1373                 opx->segment, opx->wrt);
1374             offset += size;
1375             break;
1376
1377         case 050:
1378         case 051:
1379         case 052:
1380         case 053:
1381             if (opx->segment != segment)
1382                 errfunc(ERR_NONFATAL,
1383                         "short relative jump outside segment");
1384             data = opx->offset - insn_end;
1385             if (data > 127 || data < -128)
1386                 errfunc(ERR_NONFATAL, "short jump is out of range");
1387             bytes[0] = data;
1388             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1389             offset += 1;
1390             break;
1391
1392         case 054:
1393         case 055:
1394         case 056:
1395         case 057:
1396             data = (int64_t)opx->offset;
1397             out(offset, segment, &data, OUT_ADDRESS, 8,
1398                 opx->segment, opx->wrt);
1399             offset += 8;
1400             break;
1401
1402         case 060:
1403         case 061:
1404         case 062:
1405         case 063:
1406             if (opx->segment != segment) {
1407                 data = opx->offset;
1408                 out(offset, segment, &data,
1409                     OUT_REL2ADR, insn_end - offset,
1410                     opx->segment, opx->wrt);
1411             } else {
1412                 data = opx->offset - insn_end;
1413                 out(offset, segment, &data,
1414                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1415             }
1416             offset += 2;
1417             break;
1418
1419         case 064:
1420         case 065:
1421         case 066:
1422         case 067:
1423             if (opx->type & (BITS16 | BITS32 | BITS64))
1424                 size = (opx->type & BITS16) ? 2 : 4;
1425             else
1426                 size = (bits == 16) ? 2 : 4;
1427             if (opx->segment != segment) {
1428                 data = opx->offset;
1429                 out(offset, segment, &data,
1430                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1431                     insn_end - offset, opx->segment, opx->wrt);
1432             } else {
1433                 data = opx->offset - insn_end;
1434                 out(offset, segment, &data,
1435                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1436             }
1437             offset += size;
1438             break;
1439
1440         case 070:
1441         case 071:
1442         case 072:
1443         case 073:
1444             if (opx->segment != segment) {
1445                 data = opx->offset;
1446                 out(offset, segment, &data,
1447                     OUT_REL4ADR, insn_end - offset,
1448                     opx->segment, opx->wrt);
1449             } else {
1450                 data = opx->offset - insn_end;
1451                 out(offset, segment, &data,
1452                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1453             }
1454             offset += 4;
1455             break;
1456
1457         case 074:
1458         case 075:
1459         case 076:
1460         case 077:
1461             if (opx->segment == NO_SEG)
1462                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1463                         " relocatable");
1464             data = 0;
1465             out(offset, segment, &data, OUT_ADDRESS, 2,
1466                 outfmt->segbase(1 + opx->segment),
1467                 opx->wrt);
1468             offset += 2;
1469             break;
1470
1471         case 0140:
1472         case 0141:
1473         case 0142:
1474         case 0143:
1475             data = opx->offset;
1476             warn_overflow(2, opx);
1477             if (is_sbyte16(opx)) {
1478                 bytes[0] = data;
1479                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1480                     NO_SEG);
1481                 offset++;
1482             } else {
1483                 out(offset, segment, &data, OUT_ADDRESS, 2,
1484                     opx->segment, opx->wrt);
1485                 offset += 2;
1486             }
1487             break;
1488
1489         case 0144:
1490         case 0145:
1491         case 0146:
1492         case 0147:
1493             EMIT_REX();
1494             bytes[0] = *codes++;
1495             if (is_sbyte16(opx))
1496                 bytes[0] |= 2;  /* s-bit */
1497             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1498             offset++;
1499             break;
1500
1501         case 0150:
1502         case 0151:
1503         case 0152:
1504         case 0153:
1505             data = opx->offset;
1506             warn_overflow(4, opx);
1507             if (is_sbyte32(opx)) {
1508                 bytes[0] = data;
1509                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1510                     NO_SEG);
1511                 offset++;
1512             } else {
1513                 out(offset, segment, &data, OUT_ADDRESS, 4,
1514                     opx->segment, opx->wrt);
1515                 offset += 4;
1516             }
1517             break;
1518
1519         case 0154:
1520         case 0155:
1521         case 0156:
1522         case 0157:
1523             EMIT_REX();
1524             bytes[0] = *codes++;
1525             if (is_sbyte32(opx))
1526                 bytes[0] |= 2;  /* s-bit */
1527             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1528             offset++;
1529             break;
1530
1531         case 0160:
1532         case 0161:
1533         case 0162:
1534         case 0163:
1535         case 0164:
1536         case 0165:
1537         case 0166:
1538         case 0167:
1539             break;
1540
1541         case 0171:
1542             bytes[0] =
1543                 (ins->drexdst << 4) |
1544                 (ins->rex & REX_OC ? 0x08 : 0) |
1545                 (ins->rex & (REX_R|REX_X|REX_B));
1546             ins->rex = 0;
1547             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1548             offset++;
1549             break;
1550
1551         case 0172:
1552             c = *codes++;
1553             opx = &ins->oprs[c >> 3];
1554             bytes[0] = nasm_regvals[opx->basereg] << 4;
1555             opx = &ins->oprs[c & 7];
1556             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1557                 errfunc(ERR_NONFATAL,
1558                         "non-absolute expression not permitted as argument %d",
1559                         c & 7);
1560             } else {
1561                 if (opx->offset & ~15) {
1562                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1563                             "four-bit argument exceeds bounds");
1564                 }
1565                 bytes[0] |= opx->offset & 15;
1566             }
1567             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1568             offset++;
1569             break;
1570
1571         case 0173:
1572             c = *codes++;
1573             opx = &ins->oprs[c >> 4];
1574             bytes[0] = nasm_regvals[opx->basereg] << 4;
1575             bytes[0] |= c & 15;
1576             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1577             offset++;
1578             break;
1579
1580         case 0174:
1581             c = *codes++;
1582             opx = &ins->oprs[c];
1583             bytes[0] = nasm_regvals[opx->basereg] << 4;
1584             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1585             offset++;
1586             break;
1587
1588         case 0250:
1589         case 0251:
1590         case 0252:
1591         case 0253:
1592             data = opx->offset;
1593             warn_overflow(4, opx);
1594             if (is_sbyte64(opx)) {
1595                 bytes[0] = data;
1596                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1597                     NO_SEG);
1598                 offset++;
1599             } else {
1600                 out(offset, segment, &data, OUT_ADDRESS, 4,
1601                     opx->segment, opx->wrt);
1602                 offset += 4;
1603             }
1604             break;
1605
1606         case 0260:
1607         case 0261:
1608         case 0262:
1609         case 0263:
1610         case 0270:
1611             codes += 2;
1612             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1613                 bytes[0] = 0xc4;
1614                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1615                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1616                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1617                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1618                 offset += 3;
1619             } else {
1620                 bytes[0] = 0xc5;
1621                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1622                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1623                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1624                 offset += 2;
1625             }
1626             break;
1627
1628         case 0300:
1629         case 0301:
1630         case 0302:
1631         case 0303:
1632             break;
1633
1634         case 0310:
1635             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1636                 *bytes = 0x67;
1637                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1638                 offset += 1;
1639             } else
1640                 offset += 0;
1641             break;
1642
1643         case 0311:
1644             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1645                 *bytes = 0x67;
1646                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1647                 offset += 1;
1648             } else
1649                 offset += 0;
1650             break;
1651
1652         case 0312:
1653             break;
1654
1655         case 0313:
1656             ins->rex = 0;
1657             break;
1658
1659         case 0314:
1660         case 0315:
1661         case 0316:
1662         case 0317:
1663             break;
1664
1665         case 0320:
1666             if (bits != 16) {
1667                 *bytes = 0x66;
1668                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1669                 offset += 1;
1670             } else
1671                 offset += 0;
1672             break;
1673
1674         case 0321:
1675             if (bits == 16) {
1676                 *bytes = 0x66;
1677                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1678                 offset += 1;
1679             } else
1680                 offset += 0;
1681             break;
1682
1683         case 0322:
1684         case 0323:
1685             break;
1686
1687         case 0324:
1688             ins->rex |= REX_W;
1689             break;
1690
1691         case 0330:
1692             *bytes = *codes++ ^ condval[ins->condition];
1693             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1694             offset += 1;
1695             break;
1696
1697         case 0331:
1698             break;
1699
1700         case 0332:
1701         case 0333:
1702             *bytes = c - 0332 + 0xF2;
1703             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1704             offset += 1;
1705             break;
1706
1707         case 0334:
1708             if (ins->rex & REX_R) {
1709                 *bytes = 0xF0;
1710                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1711                 offset += 1;
1712             }
1713             ins->rex &= ~(REX_L|REX_R);
1714             break;
1715
1716         case 0335:
1717             break;
1718
1719         case 0336:
1720         case 0337:
1721             break;
1722
1723         case 0340:
1724             if (ins->oprs[0].segment != NO_SEG)
1725                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1726             else {
1727                 int64_t size = ins->oprs[0].offset;
1728                 if (size > 0)
1729                     out(offset, segment, NULL,
1730                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1731                 offset += size;
1732             }
1733             break;
1734
1735         case 0360:
1736             break;
1737
1738         case 0361:
1739             bytes[0] = 0x66;
1740             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1741             offset += 1;
1742             break;
1743
1744         case 0362:
1745         case 0363:
1746             bytes[0] = c - 0362 + 0xf2;
1747             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1748             offset += 1;
1749             break;
1750
1751         case 0364:
1752         case 0365:
1753             break;
1754
1755         case 0366:
1756         case 0367:
1757             *bytes = c - 0366 + 0x66;
1758             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1759             offset += 1;
1760             break;
1761
1762         case 0370:
1763         case 0371:
1764         case 0372:
1765             break;
1766
1767         case 0373:
1768             *bytes = bits == 16 ? 3 : 5;
1769             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1770             offset += 1;
1771             break;
1772
1773         default:               /* can't do it by 'case' statements */
1774             if (c >= 0100 && c <= 0277) {       /* it's an EA */
1775                 ea ea_data;
1776                 int rfield;
1777                 int32_t rflags;
1778                 uint8_t *p;
1779                 int32_t s;
1780                 enum out_type type;
1781
1782                 if (c <= 0177) {
1783                     /* pick rfield from operand b */
1784                     rflags = regflag(&ins->oprs[c & 7]);
1785                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1786                 } else {
1787                     /* rfield is constant */
1788                     rflags = 0;
1789                     rfield = c & 7;
1790                 }
1791
1792                 if (!process_ea
1793                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1794                      ins->addr_size, rfield, rflags)) {
1795                     errfunc(ERR_NONFATAL, "invalid effective address");
1796                 }
1797
1798
1799                 p = bytes;
1800                 *p++ = ea_data.modrm;
1801                 if (ea_data.sib_present)
1802                     *p++ = ea_data.sib;
1803
1804                 /* DREX suffixes come between the SIB and the displacement */
1805                 if (ins->rex & REX_D) {
1806                     *p++ =
1807                         (ins->drexdst << 4) |
1808                         (ins->rex & REX_OC ? 0x08 : 0) |
1809                         (ins->rex & (REX_R|REX_X|REX_B));
1810                     ins->rex = 0;
1811                 }
1812
1813                 s = p - bytes;
1814                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1815
1816                 /*
1817                  * Make sure the address gets the right offset in case
1818                  * the line breaks in the .lst file (BR 1197827)
1819                  */
1820                 offset += s;
1821                 s = 0;
1822
1823                 switch (ea_data.bytes) {
1824                 case 0:
1825                     break;
1826                 case 1:
1827                     if (ins->oprs[(c >> 3) & 7].segment != NO_SEG) {
1828                         data = ins->oprs[(c >> 3) & 7].offset;
1829                         out(offset, segment, &data, OUT_ADDRESS, 1,
1830                             ins->oprs[(c >> 3) & 7].segment,
1831                             ins->oprs[(c >> 3) & 7].wrt);
1832                     } else {
1833                         *bytes = ins->oprs[(c >> 3) & 7].offset;
1834                         out(offset, segment, bytes, OUT_RAWDATA, 1,
1835                             NO_SEG, NO_SEG);
1836                     }
1837                     s++;
1838                     break;
1839                 case 8:
1840                 case 2:
1841                 case 4:
1842                     data = ins->oprs[(c >> 3) & 7].offset;
1843                     warn_overflow(ea_data.bytes, opx);
1844                     s += ea_data.bytes;
1845                     if (ea_data.rip) {
1846                         data -= insn_end - (offset+ea_data.bytes);
1847                         type = OUT_REL4ADR;
1848                     } else {
1849                         type = OUT_ADDRESS;
1850                     }
1851                     out(offset, segment, &data, type, ea_data.bytes,
1852                         ins->oprs[(c >> 3) & 7].segment,
1853                         ins->oprs[(c >> 3) & 7].wrt);
1854                     break;
1855                 }
1856                 offset += s;
1857             } else {
1858                 errfunc(ERR_PANIC, "internal instruction table corrupt"
1859                         ": instruction code 0x%02X given", c);
1860             }
1861         }
1862     }
1863 }
1864
1865 static int32_t regflag(const operand * o)
1866 {
1867     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1868         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1869     }
1870     return nasm_reg_flags[o->basereg];
1871 }
1872
1873 static int32_t regval(const operand * o)
1874 {
1875     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1876         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1877     }
1878     return nasm_regvals[o->basereg];
1879 }
1880
1881 static int op_rexflags(const operand * o, int mask)
1882 {
1883     int32_t flags;
1884     int val;
1885
1886     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1887         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1888     }
1889
1890     flags = nasm_reg_flags[o->basereg];
1891     val = nasm_regvals[o->basereg];
1892
1893     return rexflags(val, flags, mask);
1894 }
1895
1896 static int rexflags(int val, int32_t flags, int mask)
1897 {
1898     int rex = 0;
1899
1900     if (val >= 8)
1901         rex |= REX_B|REX_X|REX_R;
1902     if (flags & BITS64)
1903         rex |= REX_W;
1904     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1905         rex |= REX_H;
1906     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1907         rex |= REX_P;
1908
1909     return rex & mask;
1910 }
1911
1912 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1913 {
1914     int i, size[MAX_OPERANDS], asize, oprs, ret;
1915
1916     ret = 100;
1917
1918     /*
1919      * Check the opcode
1920      */
1921     if (itemp->opcode != instruction->opcode)
1922         return 0;
1923
1924     /*
1925      * Count the operands
1926      */
1927     if (itemp->operands != instruction->operands)
1928         return 0;
1929
1930     /*
1931      * Check that no spurious colons or TOs are present
1932      */
1933     for (i = 0; i < itemp->operands; i++)
1934         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1935             return 0;
1936
1937     /*
1938      * Process size flags
1939      */
1940     if (itemp->flags & IF_ARMASK) {
1941         memset(size, 0, sizeof size);
1942
1943         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1944
1945         switch (itemp->flags & IF_SMASK) {
1946         case IF_SB:
1947             size[i] = BITS8;
1948             break;
1949         case IF_SW:
1950             size[i] = BITS16;
1951             break;
1952         case IF_SD:
1953             size[i] = BITS32;
1954             break;
1955         case IF_SQ:
1956             size[i] = BITS64;
1957             break;
1958         case IF_SO:
1959             size[i] = BITS128;
1960             break;
1961         case IF_SY:
1962             size[i] = BITS256;
1963             break;
1964         case IF_SZ:
1965             switch (bits) {
1966             case 16:
1967                 size[i] = BITS16;
1968                 break;
1969             case 32:
1970                 size[i] = BITS32;
1971                 break;
1972             case 64:
1973                 size[i] = BITS64;
1974                 break;
1975             }
1976             break;
1977         default:
1978             break;
1979         }
1980     } else {
1981         asize = 0;
1982         switch (itemp->flags & IF_SMASK) {
1983         case IF_SB:
1984             asize = BITS8;
1985             break;
1986         case IF_SW:
1987             asize = BITS16;
1988             break;
1989         case IF_SD:
1990             asize = BITS32;
1991             break;
1992         case IF_SQ:
1993             asize = BITS64;
1994             break;
1995         case IF_SO:
1996             asize = BITS128;
1997             break;
1998         case IF_SY:
1999             asize = BITS256;
2000             break;
2001         case IF_SZ:
2002             switch (bits) {
2003             case 16:
2004                 asize = BITS16;
2005                 break;
2006             case 32:
2007                 asize = BITS32;
2008                 break;
2009             case 64:
2010                 asize = BITS64;
2011                 break;
2012             }
2013             break;
2014         default:
2015             break;
2016         }
2017         for (i = 0; i < MAX_OPERANDS; i++)
2018             size[i] = asize;
2019     }
2020
2021     /*
2022      * Check that the operand flags all match up
2023      */
2024     for (i = 0; i < itemp->operands; i++) {
2025         int32_t type = instruction->oprs[i].type;
2026         if (!(type & SIZE_MASK))
2027             type |= size[i];
2028
2029         if (itemp->opd[i] & SAME_AS) {
2030             int j = itemp->opd[i] & ~SAME_AS;
2031             if (type != instruction->oprs[j].type ||
2032                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2033                 return 0;
2034         } else if (itemp->opd[i] & ~type ||
2035             ((itemp->opd[i] & SIZE_MASK) &&
2036              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2037             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2038                 (type & SIZE_MASK))
2039                 return 0;
2040             else
2041                 return 1;
2042         }
2043     }
2044
2045     /*
2046      * Check operand sizes
2047      */
2048     if (itemp->flags & (IF_SM | IF_SM2)) {
2049         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2050         asize = 0;
2051         for (i = 0; i < oprs; i++) {
2052             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2053                 int j;
2054                 for (j = 0; j < oprs; j++)
2055                     size[j] = asize;
2056                 break;
2057             }
2058         }
2059     } else {
2060         oprs = itemp->operands;
2061     }
2062
2063     for (i = 0; i < itemp->operands; i++) {
2064         if (!(itemp->opd[i] & SIZE_MASK) &&
2065             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2066             return 2;
2067     }
2068
2069     /*
2070      * Check template is okay at the set cpu level
2071      */
2072     if (((itemp->flags & IF_PLEVEL) > cpu))
2073         return 3;
2074
2075     /*
2076      * Check if instruction is available in long mode
2077      */
2078     if ((itemp->flags & IF_NOLONG) && (bits == 64))
2079         return 4;
2080
2081     /*
2082      * Check if special handling needed for Jumps
2083      */
2084     if ((uint8_t)(itemp->code[0]) >= 0370)
2085         return 99;
2086
2087     return ret;
2088 }
2089
2090 static ea *process_ea(operand * input, ea * output, int bits,
2091                       int addrbits, int rfield, int32_t rflags)
2092 {
2093     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2094
2095     output->rip = false;
2096
2097     /* REX flags for the rfield operand */
2098     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2099
2100     if (!(REGISTER & ~input->type)) {   /* register direct */
2101         int i;
2102         int32_t f;
2103
2104         if (input->basereg < EXPR_REG_START /* Verify as Register */
2105             || input->basereg >= REG_ENUM_LIMIT)
2106             return NULL;
2107         f = regflag(input);
2108         i = nasm_regvals[input->basereg];
2109
2110         if (REG_EA & ~f)
2111             return NULL;        /* Invalid EA register */
2112
2113         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2114
2115         output->sib_present = false;             /* no SIB necessary */
2116         output->bytes = 0;  /* no offset necessary either */
2117         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2118     } else {                    /* it's a memory reference */
2119         if (input->basereg == -1
2120             && (input->indexreg == -1 || input->scale == 0)) {
2121             /* it's a pure offset */
2122             if (bits == 64 && (~input->type & IP_REL)) {
2123               int scale, index, base;
2124               output->sib_present = true;
2125               scale = 0;
2126               index = 4;
2127               base = 5;
2128               output->sib = (scale << 6) | (index << 3) | base;
2129               output->bytes = 4;
2130               output->modrm = 4 | ((rfield & 7) << 3);
2131               output->rip = false;
2132             } else {
2133               output->sib_present = false;
2134               output->bytes = (addrbits != 16 ? 4 : 2);
2135               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2136               output->rip = bits == 64;
2137             }
2138         } else {                /* it's an indirection */
2139             int i = input->indexreg, b = input->basereg, s = input->scale;
2140             int32_t o = input->offset, seg = input->segment;
2141             int hb = input->hintbase, ht = input->hinttype;
2142             int t;
2143             int it, bt;
2144             int32_t ix, bx;     /* register flags */
2145
2146             if (s == 0)
2147                 i = -1;         /* make this easy, at least */
2148
2149             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2150                 it = nasm_regvals[i];
2151                 ix = nasm_reg_flags[i];
2152             } else {
2153                 it = -1;
2154                 ix = 0;
2155             }
2156
2157             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2158                 bt = nasm_regvals[b];
2159                 bx = nasm_reg_flags[b];
2160             } else {
2161                 bt = -1;
2162                 bx = 0;
2163             }
2164
2165             /* check for a 32/64-bit memory reference... */
2166             if ((ix|bx) & (BITS32|BITS64)) {
2167                 /* it must be a 32/64-bit memory reference. Firstly we have
2168                  * to check that all registers involved are type E/Rxx. */
2169                 int32_t sok = BITS32|BITS64;
2170
2171                 if (it != -1) {
2172                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2173                         sok &= ix;
2174                     else
2175                         return NULL;
2176                 }
2177
2178                 if (bt != -1) {
2179                     if (REG_GPR & ~bx)
2180                         return NULL; /* Invalid register */
2181                     if (~sok & bx & SIZE_MASK)
2182                         return NULL; /* Invalid size */
2183                     sok &= bx;
2184                 }
2185
2186                 /* While we're here, ensure the user didn't specify
2187                    WORD or QWORD. */
2188                 if (input->disp_size == 16 || input->disp_size == 64)
2189                     return NULL;
2190
2191                 if (addrbits == 16 ||
2192                     (addrbits == 32 && !(sok & BITS32)) ||
2193                     (addrbits == 64 && !(sok & BITS64)))
2194                     return NULL;
2195
2196                 /* now reorganize base/index */
2197                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2198                     ((hb == b && ht == EAH_NOTBASE)
2199                      || (hb == i && ht == EAH_MAKEBASE))) {
2200                     /* swap if hints say so */
2201                     t = bt, bt = it, it = t;
2202                     t = bx, bx = ix, ix = t;
2203                 }
2204                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2205                     bt = -1, bx = 0, s++;
2206                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2207                     /* make single reg base, unless hint */
2208                     bt = it, bx = ix, it = -1, ix = 0;
2209                 }
2210                 if (((s == 2 && it != REG_NUM_ESP
2211                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2212                      || s == 5 || s == 9) && bt == -1)
2213                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2214                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2215                     && (input->eaflags & EAF_TIMESTWO))
2216                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2217                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2218                 if (s == 1 && it == REG_NUM_ESP) {
2219                     /* swap ESP into base if scale is 1 */
2220                     t = it, it = bt, bt = t;
2221                     t = ix, ix = bx, bx = t;
2222                 }
2223                 if (it == REG_NUM_ESP
2224                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2225                     return NULL;        /* wrong, for various reasons */
2226
2227                 output->rex |= rexflags(it, ix, REX_X);
2228                 output->rex |= rexflags(bt, bx, REX_B);
2229
2230                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2231                     /* no SIB needed */
2232                     int mod, rm;
2233
2234                     if (bt == -1) {
2235                         rm = 5;
2236                         mod = 0;
2237                     } else {
2238                         rm = (bt & 7);
2239                         if (rm != REG_NUM_EBP && o == 0 &&
2240                                 seg == NO_SEG && !forw_ref &&
2241                                 !(input->eaflags &
2242                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2243                             mod = 0;
2244                         else if (input->eaflags & EAF_BYTEOFFS ||
2245                                  (o >= -128 && o <= 127 && seg == NO_SEG
2246                                   && !forw_ref
2247                                   && !(input->eaflags & EAF_WORDOFFS)))
2248                             mod = 1;
2249                         else
2250                             mod = 2;
2251                     }
2252
2253                     output->sib_present = false;
2254                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2255                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2256                 } else {
2257                     /* we need a SIB */
2258                     int mod, scale, index, base;
2259
2260                     if (it == -1)
2261                         index = 4, s = 1;
2262                     else
2263                         index = (it & 7);
2264
2265                     switch (s) {
2266                     case 1:
2267                         scale = 0;
2268                         break;
2269                     case 2:
2270                         scale = 1;
2271                         break;
2272                     case 4:
2273                         scale = 2;
2274                         break;
2275                     case 8:
2276                         scale = 3;
2277                         break;
2278                     default:   /* then what the smeg is it? */
2279                         return NULL;    /* panic */
2280                     }
2281
2282                     if (bt == -1) {
2283                         base = 5;
2284                         mod = 0;
2285                     } else {
2286                         base = (bt & 7);
2287                         if (base != REG_NUM_EBP && o == 0 &&
2288                                     seg == NO_SEG && !forw_ref &&
2289                                     !(input->eaflags &
2290                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2291                             mod = 0;
2292                         else if (input->eaflags & EAF_BYTEOFFS ||
2293                                  (o >= -128 && o <= 127 && seg == NO_SEG
2294                                   && !forw_ref
2295                                   && !(input->eaflags & EAF_WORDOFFS)))
2296                             mod = 1;
2297                         else
2298                             mod = 2;
2299                     }
2300
2301                     output->sib_present = true;
2302                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2303                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2304                     output->sib = (scale << 6) | (index << 3) | base;
2305                 }
2306             } else {            /* it's 16-bit */
2307                 int mod, rm;
2308
2309                 /* check for 64-bit long mode */
2310                 if (addrbits == 64)
2311                     return NULL;
2312
2313                 /* check all registers are BX, BP, SI or DI */
2314                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2315                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2316                                        && i != R_SI && i != R_DI))
2317                     return NULL;
2318
2319                 /* ensure the user didn't specify DWORD/QWORD */
2320                 if (input->disp_size == 32 || input->disp_size == 64)
2321                     return NULL;
2322
2323                 if (s != 1 && i != -1)
2324                     return NULL;        /* no can do, in 16-bit EA */
2325                 if (b == -1 && i != -1) {
2326                     int tmp = b;
2327                     b = i;
2328                     i = tmp;
2329                 }               /* swap */
2330                 if ((b == R_SI || b == R_DI) && i != -1) {
2331                     int tmp = b;
2332                     b = i;
2333                     i = tmp;
2334                 }
2335                 /* have BX/BP as base, SI/DI index */
2336                 if (b == i)
2337                     return NULL;        /* shouldn't ever happen, in theory */
2338                 if (i != -1 && b != -1 &&
2339                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2340                     return NULL;        /* invalid combinations */
2341                 if (b == -1)    /* pure offset: handled above */
2342                     return NULL;        /* so if it gets to here, panic! */
2343
2344                 rm = -1;
2345                 if (i != -1)
2346                     switch (i * 256 + b) {
2347                     case R_SI * 256 + R_BX:
2348                         rm = 0;
2349                         break;
2350                     case R_DI * 256 + R_BX:
2351                         rm = 1;
2352                         break;
2353                     case R_SI * 256 + R_BP:
2354                         rm = 2;
2355                         break;
2356                     case R_DI * 256 + R_BP:
2357                         rm = 3;
2358                         break;
2359                 } else
2360                     switch (b) {
2361                     case R_SI:
2362                         rm = 4;
2363                         break;
2364                     case R_DI:
2365                         rm = 5;
2366                         break;
2367                     case R_BP:
2368                         rm = 6;
2369                         break;
2370                     case R_BX:
2371                         rm = 7;
2372                         break;
2373                     }
2374                 if (rm == -1)   /* can't happen, in theory */
2375                     return NULL;        /* so panic if it does */
2376
2377                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2378                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2379                     mod = 0;
2380                 else if (input->eaflags & EAF_BYTEOFFS ||
2381                          (o >= -128 && o <= 127 && seg == NO_SEG
2382                           && !forw_ref
2383                           && !(input->eaflags & EAF_WORDOFFS)))
2384                     mod = 1;
2385                 else
2386                     mod = 2;
2387
2388                 output->sib_present = false;    /* no SIB - it's 16-bit */
2389                 output->bytes = mod;    /* bytes of offset needed */
2390                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2391             }
2392         }
2393     }
2394
2395     output->size = 1 + output->sib_present + output->bytes;
2396     return output;
2397 }
2398
2399 static void add_asp(insn *ins, int addrbits)
2400 {
2401     int j, valid;
2402     int defdisp;
2403
2404     valid = (addrbits == 64) ? 64|32 : 32|16;
2405
2406     switch (ins->prefixes[PPS_ASIZE]) {
2407     case P_A16:
2408         valid &= 16;
2409         break;
2410     case P_A32:
2411         valid &= 32;
2412         break;
2413     case P_A64:
2414         valid &= 64;
2415         break;
2416     case P_ASP:
2417         valid &= (addrbits == 32) ? 16 : 32;
2418         break;
2419     default:
2420         break;
2421     }
2422
2423     for (j = 0; j < ins->operands; j++) {
2424         if (!(MEMORY & ~ins->oprs[j].type)) {
2425             int32_t i, b;
2426
2427             /* Verify as Register */
2428             if (ins->oprs[j].indexreg < EXPR_REG_START
2429                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2430                 i = 0;
2431             else
2432                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2433
2434             /* Verify as Register */
2435             if (ins->oprs[j].basereg < EXPR_REG_START
2436                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2437                 b = 0;
2438             else
2439                 b = nasm_reg_flags[ins->oprs[j].basereg];
2440
2441             if (ins->oprs[j].scale == 0)
2442                 i = 0;
2443
2444             if (!i && !b) {
2445                 int ds = ins->oprs[j].disp_size;
2446                 if ((addrbits != 64 && ds > 8) ||
2447                     (addrbits == 64 && ds == 16))
2448                     valid &= ds;
2449             } else {
2450                 if (!(REG16 & ~b))
2451                     valid &= 16;
2452                 if (!(REG32 & ~b))
2453                     valid &= 32;
2454                 if (!(REG64 & ~b))
2455                     valid &= 64;
2456
2457                 if (!(REG16 & ~i))
2458                     valid &= 16;
2459                 if (!(REG32 & ~i))
2460                     valid &= 32;
2461                 if (!(REG64 & ~i))
2462                     valid &= 64;
2463             }
2464         }
2465     }
2466
2467     if (valid & addrbits) {
2468         ins->addr_size = addrbits;
2469     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2470         /* Add an address size prefix */
2471         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2472         ins->prefixes[PPS_ASIZE] = pref;
2473         ins->addr_size = (addrbits == 32) ? 16 : 32;
2474     } else {
2475         /* Impossible... */
2476         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2477         ins->addr_size = addrbits; /* Error recovery */
2478     }
2479
2480     defdisp = ins->addr_size == 16 ? 16 : 32;
2481
2482     for (j = 0; j < ins->operands; j++) {
2483         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2484             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2485             != ins->addr_size) {
2486             /* mem_offs sizes must match the address size; if not,
2487                strip the MEM_OFFS bit and match only EA instructions */
2488             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2489         }
2490     }
2491 }