assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  74  *                 which is to be extended to the operand size.
  75  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  76  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  77  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  78  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  79  * \314          - (disassembler only) invalid with REX.B
  80  * \315          - (disassembler only) invalid with REX.X
  81  * \316          - (disassembler only) invalid with REX.R
  82  * \317          - (disassembler only) invalid with REX.W
  83  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  84  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  85  * \322          - indicates that this instruction is only valid when the
  86  *                 operand size is the default (instruction to disassembler,
  87  *                 generates no code in the assembler)
  88  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  89  * \324          - indicates 64-bit operand size requiring REX prefix.
  90  * \330          - a literal byte follows in the code stream, to be added
  91  *                 to the condition code value of the instruction.
  92  * \331          - instruction not valid with REP prefix.  Hint for
  93  *                 disassembler only; for SSE instructions.
  94  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  95  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  96  * \334          - LOCK prefix used instead of REX.R
  97  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  98  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  99  * \337          - force a REPNE prefix (0xF3) even if not specified.
 100  *                 \336-\337 are still listed as prefixes in the disassembler.
 101  * \340          - reserve <operand 0> bytes of uninitialized storage.
 102  *                 Operand 0 had better be a segmentless constant.
 103  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 104  *                 (POP is never used for CS) depending on operand 0
 105  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 106  *                 on operand 0
 107  * \360          - no SSE prefix (== \364\331)
 108  * \361          - 66 SSE prefix (== \366\331)
 109  * \362          - F2 SSE prefix (== \364\332)
 110  * \363          - F3 SSE prefix (== \364\333)
 111  * \364          - operand-size prefix (0x66) not permitted
 112  * \365          - address-size prefix (0x67) not permitted
 113  * \366          - operand-size prefix (0x66) used as opcode extension
 114  * \367          - address-size prefix (0x67) used as opcode extension
 115  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 116  *                 370 is used for Jcc, 371 is used for JMP.
 117  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 118  *                 used for conditional jump over longer jump
 119  */
 120
 121 #include "compiler.h"
 122
 123 #include <stdio.h>
 124 #include <string.h>
 125 #include <inttypes.h>
 126
 127 #include "nasm.h"
 128 #include "nasmlib.h"
 129 #include "assemble.h"
 130 #include "insns.h"
 131 #include "tables.h"
 132
 133 typedef struct {
 134     int sib_present;                 /* is a SIB byte necessary? */
 135     int bytes;                       /* # of bytes of offset needed */
 136     int size;                        /* lazy - this is sib+bytes+1 */
 137     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 138 } ea;
 139
 140 static uint32_t cpu;            /* cpu level received from nasm.c */
 141 static efunc errfunc;
 142 static struct ofmt *outfmt;
 143 static ListGen *list;
 144
 145 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 146 static void gencode(int32_t segment, int64_t offset, int bits,
 147                     insn * ins, const struct itemplate *temp,
 148                     int64_t insn_end);
 149 static int matches(const struct itemplate *, insn *, int bits);
 150 static int32_t regflag(const operand *);
 151 static int32_t regval(const operand *);
 152 static int rexflags(int, int32_t, int);
 153 static int op_rexflags(const operand *, int);
 154 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 155 static void add_asp(insn *, int);
 156
 157 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 158 {
 159     return ins->prefixes[pos] == prefix;
 160 }
 161
 162 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 163 {
 164     if (ins->prefixes[pos])
 165         errfunc(ERR_NONFATAL, "invalid %s prefix",
 166                 prefix_name(ins->prefixes[pos]));
 167 }
 168
 169 static const char *size_name(int size)
 170 {
 171     switch (size) {
 172     case 1:
 173         return "byte";
 174     case 2:
 175         return "word";
 176     case 4:
 177         return "dword";
 178     case 8:
 179         return "qword";
 180     case 10:
 181         return "tword";
 182     case 16:
 183         return "oword";
 184     case 32:
 185         return "yword";
 186     default:
 187         return "???";
 188     }
 189 }
 190
 191 static void warn_overflow(int size, const struct operand *o)
 192 {
 193     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 194         int64_t lim = ((int64_t)1 << (size*8))-1;
 195         int64_t data = o->offset;
 196
 197         if (data < ~lim || data > lim)
 198             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 199                     "%s data exceeds bounds", size_name(size));
 200     }
 201 }
 202 /*
 203  * This routine wrappers the real output format's output routine,
 204  * in order to pass a copy of the data off to the listing file
 205  * generator at the same time.
 206  */
 207 static void out(int64_t offset, int32_t segto, const void *data,
 208                 enum out_type type, uint64_t size,
 209                 int32_t segment, int32_t wrt)
 210 {
 211     static int32_t lineno = 0;     /* static!!! */
 212     static char *lnfname = NULL;
 213     uint8_t p[8];
 214
 215     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 216         /*
 217          * This is a non-relocated address, and we're going to
 218          * convert it into RAWDATA format.
 219          */
 220         uint8_t *q = p;
 221
 222         if (size > 8) {
 223             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 224             return;
 225         }
 226
 227         WRITEADDR(q, *(int64_t *)data, size);
 228         data = p;
 229         type = OUT_RAWDATA;
 230     }
 231
 232     list->output(offset, data, type, size);
 233
 234     /*
 235      * this call to src_get determines when we call the
 236      * debug-format-specific "linenum" function
 237      * it updates lineno and lnfname to the current values
 238      * returning 0 if "same as last time", -2 if lnfname
 239      * changed, and the amount by which lineno changed,
 240      * if it did. thus, these variables must be static
 241      */
 242
 243     if (src_get(&lineno, &lnfname)) {
 244         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 245     }
 246
 247     outfmt->output(segto, data, type, size, segment, wrt);
 248 }
 249
 250 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 251                      insn * ins, const uint8_t *code)
 252 {
 253     int64_t isize;
 254     uint8_t c = code[0];
 255
 256     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 257         return false;
 258     if (!optimizing)
 259         return false;
 260     if (optimizing < 0 && c == 0371)
 261         return false;
 262
 263     isize = calcsize(segment, offset, bits, ins, code);
 264     if (ins->oprs[0].segment != segment)
 265         return false;
 266
 267     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 268     return (isize >= -128 && isize <= 127); /* is it byte size? */
 269 }
 270
 271 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 272               insn * instruction, struct ofmt *output, efunc error,
 273               ListGen * listgen)
 274 {
 275     const struct itemplate *temp;
 276     int j;
 277     int size_prob;
 278     int64_t insn_end;
 279     int32_t itimes;
 280     int64_t start = offset;
 281     int64_t wsize = 0;             /* size for DB etc. */
 282
 283     errfunc = error;            /* to pass to other functions */
 284     cpu = cp;
 285     outfmt = output;            /* likewise */
 286     list = listgen;             /* and again */
 287
 288     switch (instruction->opcode) {
 289     case -1:
 290         return 0;
 291     case I_DB:
 292         wsize = 1;
 293         break;
 294     case I_DW:
 295         wsize = 2;
 296         break;
 297     case I_DD:
 298         wsize = 4;
 299         break;
 300     case I_DQ:
 301         wsize = 8;
 302         break;
 303     case I_DT:
 304         wsize = 10;
 305         break;
 306     case I_DO:
 307         wsize = 16;
 308         break;
 309     case I_DY:
 310         wsize = 32;
 311         break;
 312     default:
 313         break;
 314     }
 315
 316     if (wsize) {
 317         extop *e;
 318         int32_t t = instruction->times;
 319         if (t < 0)
 320             errfunc(ERR_PANIC,
 321                     "instruction->times < 0 (%ld) in assemble()", t);
 322
 323         while (t--) {           /* repeat TIMES times */
 324             for (e = instruction->eops; e; e = e->next) {
 325                 if (e->type == EOT_DB_NUMBER) {
 326                     if (wsize == 1) {
 327                         if (e->segment != NO_SEG)
 328                             errfunc(ERR_NONFATAL,
 329                                     "one-byte relocation attempted");
 330                         else {
 331                             uint8_t out_byte = e->offset;
 332                             out(offset, segment, &out_byte,
 333                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 334                         }
 335                     } else if (wsize > 8) {
 336                         errfunc(ERR_NONFATAL,
 337                                 "integer supplied to a DT, DO or DY"
 338                                 " instruction");
 339                     } else
 340                         out(offset, segment, &e->offset,
 341                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 342                     offset += wsize;
 343                 } else if (e->type == EOT_DB_STRING ||
 344                            e->type == EOT_DB_STRING_FREE) {
 345                     int align;
 346
 347                     out(offset, segment, e->stringval,
 348                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 349                     align = e->stringlen % wsize;
 350
 351                     if (align) {
 352                         align = wsize - align;
 353                         out(offset, segment, zero_buffer,
 354                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 355                     }
 356                     offset += e->stringlen + align;
 357                 }
 358             }
 359             if (t > 0 && t == instruction->times - 1) {
 360                 /*
 361                  * Dummy call to list->output to give the offset to the
 362                  * listing module.
 363                  */
 364                 list->output(offset, NULL, OUT_RAWDATA, 0);
 365                 list->uplevel(LIST_TIMES);
 366             }
 367         }
 368         if (instruction->times > 1)
 369             list->downlevel(LIST_TIMES);
 370         return offset - start;
 371     }
 372
 373     if (instruction->opcode == I_INCBIN) {
 374         const char *fname = instruction->eops->stringval;
 375         FILE *fp;
 376
 377         fp = fopen(fname, "rb");
 378         if (!fp) {
 379             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 380                   fname);
 381         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 382             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 383                   fname);
 384         } else {
 385             static char buf[4096];
 386             size_t t = instruction->times;
 387             size_t base = 0;
 388             size_t len;
 389
 390             len = ftell(fp);
 391             if (instruction->eops->next) {
 392                 base = instruction->eops->next->offset;
 393                 len -= base;
 394                 if (instruction->eops->next->next &&
 395                     len > (size_t)instruction->eops->next->next->offset)
 396                     len = (size_t)instruction->eops->next->next->offset;
 397             }
 398             /*
 399              * Dummy call to list->output to give the offset to the
 400              * listing module.
 401              */
 402             list->output(offset, NULL, OUT_RAWDATA, 0);
 403             list->uplevel(LIST_INCBIN);
 404             while (t--) {
 405                 size_t l;
 406
 407                 fseek(fp, base, SEEK_SET);
 408                 l = len;
 409                 while (l > 0) {
 410                     int32_t m =
 411                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 412                               fp);
 413                     if (!m) {
 414                         /*
 415                          * This shouldn't happen unless the file
 416                          * actually changes while we are reading
 417                          * it.
 418                          */
 419                         error(ERR_NONFATAL,
 420                               "`incbin': unexpected EOF while"
 421                               " reading file `%s'", fname);
 422                         t = 0;  /* Try to exit cleanly */
 423                         break;
 424                     }
 425                     out(offset, segment, buf, OUT_RAWDATA, m,
 426                         NO_SEG, NO_SEG);
 427                     l -= m;
 428                 }
 429             }
 430             list->downlevel(LIST_INCBIN);
 431             if (instruction->times > 1) {
 432                 /*
 433                  * Dummy call to list->output to give the offset to the
 434                  * listing module.
 435                  */
 436                 list->output(offset, NULL, OUT_RAWDATA, 0);
 437                 list->uplevel(LIST_TIMES);
 438                 list->downlevel(LIST_TIMES);
 439             }
 440             fclose(fp);
 441             return instruction->times * len;
 442         }
 443         return 0;               /* if we're here, there's an error */
 444     }
 445
 446     /* Check to see if we need an address-size prefix */
 447     add_asp(instruction, bits);
 448
 449     size_prob = 0;
 450
 451     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 452         int m = matches(temp, instruction, bits);
 453         if (m == 100 ||
 454             (m == 99 && jmp_match(segment, offset, bits,
 455                                   instruction, temp->code))) {
 456             /* Matches! */
 457             int64_t insn_size = calcsize(segment, offset, bits,
 458                                       instruction, temp->code);
 459             itimes = instruction->times;
 460             if (insn_size < 0)  /* shouldn't be, on pass two */
 461                 error(ERR_PANIC, "errors made it through from pass one");
 462             else
 463                 while (itimes--) {
 464                     for (j = 0; j < MAXPREFIX; j++) {
 465                         uint8_t c = 0;
 466                         switch (instruction->prefixes[j]) {
 467                         case P_LOCK:
 468                             c = 0xF0;
 469                             break;
 470                         case P_REPNE:
 471                         case P_REPNZ:
 472                             c = 0xF2;
 473                             break;
 474                         case P_REPE:
 475                         case P_REPZ:
 476                         case P_REP:
 477                             c = 0xF3;
 478                             break;
 479                         case R_CS:
 480                             if (bits == 64) {
 481                                 error(ERR_WARNING | ERR_PASS2,
 482                                       "cs segment base generated, but will be ignored in 64-bit mode");
 483                             }
 484                             c = 0x2E;
 485                             break;
 486                         case R_DS:
 487                             if (bits == 64) {
 488                                 error(ERR_WARNING | ERR_PASS2,
 489                                       "ds segment base generated, but will be ignored in 64-bit mode");
 490                             }
 491                             c = 0x3E;
 492                             break;
 493                         case R_ES:
 494                            if (bits == 64) {
 495                                 error(ERR_WARNING | ERR_PASS2,
 496                                       "es segment base generated, but will be ignored in 64-bit mode");
 497                            }
 498                             c = 0x26;
 499                             break;
 500                         case R_FS:
 501                             c = 0x64;
 502                             break;
 503                         case R_GS:
 504                             c = 0x65;
 505                             break;
 506                         case R_SS:
 507                             if (bits == 64) {
 508                                 error(ERR_WARNING | ERR_PASS2,
 509                                       "ss segment base generated, but will be ignored in 64-bit mode");
 510                             }
 511                             c = 0x36;
 512                             break;
 513                         case R_SEGR6:
 514                         case R_SEGR7:
 515                             error(ERR_NONFATAL,
 516                                   "segr6 and segr7 cannot be used as prefixes");
 517                             break;
 518                         case P_A16:
 519                             if (bits == 64) {
 520                                 error(ERR_NONFATAL,
 521                                       "16-bit addressing is not supported "
 522                                       "in 64-bit mode");
 523                             } else if (bits != 16)
 524                                 c = 0x67;
 525                             break;
 526                         case P_A32:
 527                             if (bits != 32)
 528                                 c = 0x67;
 529                             break;
 530                         case P_A64:
 531                             if (bits != 64) {
 532                                 error(ERR_NONFATAL,
 533                                       "64-bit addressing is only supported "
 534                                       "in 64-bit mode");
 535                             }
 536                             break;
 537                         case P_ASP:
 538                             c = 0x67;
 539                             break;
 540                         case P_O16:
 541                             if (bits != 16)
 542                                 c = 0x66;
 543                             break;
 544                         case P_O32:
 545                             if (bits == 16)
 546                                 c = 0x66;
 547                             break;
 548                         case P_O64:
 549                             /* REX.W */
 550                             break;
 551                         case P_OSP:
 552                             c = 0x66;
 553                             break;
 554                         case P_none:
 555                             break;
 556                         default:
 557                             error(ERR_PANIC, "invalid instruction prefix");
 558                         }
 559                         if (c != 0) {
 560                             out(offset, segment, &c, OUT_RAWDATA, 1,
 561                                 NO_SEG, NO_SEG);
 562                             offset++;
 563                         }
 564                     }
 565                     insn_end = offset + insn_size;
 566                     gencode(segment, offset, bits, instruction,
 567                             temp, insn_end);
 568                     offset += insn_size;
 569                     if (itimes > 0 && itimes == instruction->times - 1) {
 570                         /*
 571                          * Dummy call to list->output to give the offset to the
 572                          * listing module.
 573                          */
 574                         list->output(offset, NULL, OUT_RAWDATA, 0);
 575                         list->uplevel(LIST_TIMES);
 576                     }
 577                 }
 578             if (instruction->times > 1)
 579                 list->downlevel(LIST_TIMES);
 580             return offset - start;
 581         } else if (m > 0 && m > size_prob) {
 582             size_prob = m;
 583         }
 584     }
 585
 586     if (temp->opcode == -1) {   /* didn't match any instruction */
 587         switch (size_prob) {
 588         case 1:
 589             error(ERR_NONFATAL, "operation size not specified");
 590             break;
 591         case 2:
 592             error(ERR_NONFATAL, "mismatch in operand sizes");
 593             break;
 594         case 3:
 595             error(ERR_NONFATAL, "no instruction for this cpu level");
 596             break;
 597         case 4:
 598             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 599                   bits);
 600             break;
 601         default:
 602             error(ERR_NONFATAL,
 603                   "invalid combination of opcode and operands");
 604             break;
 605         }
 606     }
 607     return 0;
 608 }
 609
 610 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 611                insn * instruction, efunc error)
 612 {
 613     const struct itemplate *temp;
 614
 615     errfunc = error;            /* to pass to other functions */
 616     cpu = cp;
 617
 618     if (instruction->opcode == -1)
 619         return 0;
 620
 621     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 622         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 623         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 624         instruction->opcode == I_DY) {
 625         extop *e;
 626         int32_t isize, osize, wsize = 0;   /* placate gcc */
 627
 628         isize = 0;
 629         switch (instruction->opcode) {
 630         case I_DB:
 631             wsize = 1;
 632             break;
 633         case I_DW:
 634             wsize = 2;
 635             break;
 636         case I_DD:
 637             wsize = 4;
 638             break;
 639         case I_DQ:
 640             wsize = 8;
 641             break;
 642         case I_DT:
 643             wsize = 10;
 644             break;
 645         case I_DO:
 646             wsize = 16;
 647             break;
 648         case I_DY:
 649             wsize = 32;
 650             break;
 651         default:
 652             break;
 653         }
 654
 655         for (e = instruction->eops; e; e = e->next) {
 656             int32_t align;
 657
 658             osize = 0;
 659             if (e->type == EOT_DB_NUMBER)
 660                 osize = 1;
 661             else if (e->type == EOT_DB_STRING ||
 662                      e->type == EOT_DB_STRING_FREE)
 663                 osize = e->stringlen;
 664
 665             align = (-osize) % wsize;
 666             if (align < 0)
 667                 align += wsize;
 668             isize += osize + align;
 669         }
 670         return isize * instruction->times;
 671     }
 672
 673     if (instruction->opcode == I_INCBIN) {
 674         const char *fname = instruction->eops->stringval;
 675         FILE *fp;
 676         size_t len;
 677
 678         fp = fopen(fname, "rb");
 679         if (!fp)
 680             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 681                   fname);
 682         else if (fseek(fp, 0L, SEEK_END) < 0)
 683             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 684                   fname);
 685         else {
 686             len = ftell(fp);
 687             fclose(fp);
 688             if (instruction->eops->next) {
 689                 len -= instruction->eops->next->offset;
 690                 if (instruction->eops->next->next &&
 691                     len > (size_t)instruction->eops->next->next->offset) {
 692                     len = (size_t)instruction->eops->next->next->offset;
 693                 }
 694             }
 695             return instruction->times * len;
 696         }
 697         return 0;               /* if we're here, there's an error */
 698     }
 699
 700     /* Check to see if we need an address-size prefix */
 701     add_asp(instruction, bits);
 702
 703     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 704         int m = matches(temp, instruction, bits);
 705         if (m == 100 ||
 706             (m == 99 && jmp_match(segment, offset, bits,
 707                                   instruction, temp->code))) {
 708             /* we've matched an instruction. */
 709             int64_t isize;
 710             const uint8_t *codes = temp->code;
 711             int j;
 712
 713             isize = calcsize(segment, offset, bits, instruction, codes);
 714             if (isize < 0)
 715                 return -1;
 716             for (j = 0; j < MAXPREFIX; j++) {
 717                 switch (instruction->prefixes[j]) {
 718                 case P_A16:
 719                     if (bits != 16)
 720                         isize++;
 721                     break;
 722                 case P_A32:
 723                     if (bits != 32)
 724                         isize++;
 725                     break;
 726                 case P_O16:
 727                     if (bits != 16)
 728                         isize++;
 729                     break;
 730                 case P_O32:
 731                     if (bits == 16)
 732                         isize++;
 733                     break;
 734                 case P_A64:
 735                 case P_O64:
 736                 case P_none:
 737                     break;
 738                 default:
 739                     isize++;
 740                     break;
 741                 }
 742             }
 743             return isize * instruction->times;
 744         }
 745     }
 746     return -1;                  /* didn't match any instruction */
 747 }
 748
 749 static bool possible_sbyte(operand *o)
 750 {
 751     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 752         !(o->opflags & OPFLAG_FORWARD) &&
 753         optimizing >= 0 && !(o->type & STRICT);
 754 }
 755
 756 /* check that opn[op]  is a signed byte of size 16 or 32 */
 757 static bool is_sbyte16(operand *o)
 758 {
 759     int16_t v;
 760
 761     if (!possible_sbyte(o))
 762         return false;
 763
 764     v = o->offset;
 765     return v >= -128 && v <= 127;
 766 }
 767
 768 static bool is_sbyte32(operand *o)
 769 {
 770     int32_t v;
 771
 772     if (!possible_sbyte(o))
 773         return false;
 774
 775     v = o->offset;
 776     return v >= -128 && v <= 127;
 777 }
 778
 779 /* Common construct */
 780 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 781
 782 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 783                         insn * ins, const uint8_t *codes)
 784 {
 785     int64_t length = 0;
 786     uint8_t c;
 787     int rex_mask = ~0;
 788     int op1, op2;
 789     struct operand *opx;
 790     uint8_t opex = 0;
 791
 792     ins->rex = 0;               /* Ensure REX is reset */
 793
 794     if (ins->prefixes[PPS_OSIZE] == P_O64)
 795         ins->rex |= REX_W;
 796
 797     (void)segment;              /* Don't warn that this parameter is unused */
 798     (void)offset;               /* Don't warn that this parameter is unused */
 799
 800     while (*codes) {
 801         c = *codes++;
 802         op1 = (c & 3) + ((opex & 1) << 2);
 803         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 804         opx = &ins->oprs[op1];
 805         opex = 0;               /* For the next iteration */
 806
 807         switch (c) {
 808         case 01:
 809         case 02:
 810         case 03:
 811         case 04:
 812             codes += c, length += c;
 813             break;
 814
 815         case 05:
 816         case 06:
 817         case 07:
 818             opex = c;
 819             break;
 820
 821         case4(010):
 822             ins->rex |=
 823                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 824             codes++, length++;
 825             break;
 826
 827         case4(014):
 828         case4(020):
 829         case4(024):
 830             length++;
 831             break;
 832
 833         case4(030):
 834             length += 2;
 835             break;
 836
 837         case4(034):
 838             if (opx->type & (BITS16 | BITS32 | BITS64))
 839                 length += (opx->type & BITS16) ? 2 : 4;
 840             else
 841                 length += (bits == 16) ? 2 : 4;
 842             break;
 843
 844         case4(040):
 845             length += 4;
 846             break;
 847
 848         case4(044):
 849             length += ins->addr_size >> 3;
 850             break;
 851
 852         case4(050):
 853             length++;
 854             break;
 855
 856         case4(054):
 857             length += 8; /* MOV reg64/imm */
 858             break;
 859
 860         case4(060):
 861             length += 2;
 862             break;
 863
 864         case4(064):
 865             if (opx->type & (BITS16 | BITS32 | BITS64))
 866                 length += (opx->type & BITS16) ? 2 : 4;
 867             else
 868                 length += (bits == 16) ? 2 : 4;
 869             break;
 870
 871         case4(070):
 872             length += 4;
 873             break;
 874
 875         case4(074):
 876             length += 2;
 877             break;
 878
 879         case4(0140):
 880             length += is_sbyte16(opx) ? 1 : 2;
 881             break;
 882
 883         case4(0144):
 884             codes++;
 885             length++;
 886             break;
 887
 888         case4(0150):
 889             length += is_sbyte32(opx) ? 1 : 4;
 890             break;
 891
 892         case4(0154):
 893             codes++;
 894             length++;
 895             break;
 896
 897         case4(0160):
 898             length++;
 899             ins->rex |= REX_D;
 900             ins->drexdst = regval(opx);
 901             break;
 902
 903         case4(0164):
 904             length++;
 905             ins->rex |= REX_D|REX_OC;
 906             ins->drexdst = regval(opx);
 907             break;
 908
 909         case 0171:
 910             break;
 911
 912         case 0172:
 913         case 0173:
 914         case 0174:
 915             codes++;
 916             length++;
 917             break;
 918
 919         case4(0250):
 920             length += is_sbyte32(opx) ? 1 : 4;
 921             break;
 922
 923         case4(0254):
 924             length += 4;
 925             break;
 926
 927         case4(0260):
 928             ins->rex |= REX_V;
 929             ins->drexdst = regval(opx);
 930             ins->vex_m = *codes++;
 931             ins->vex_wlp = *codes++;
 932             break;
 933
 934         case 0270:
 935             ins->rex |= REX_V;
 936             ins->drexdst = 0;
 937             ins->vex_m = *codes++;
 938             ins->vex_wlp = *codes++;
 939             break;
 940
 941         case4(0274):
 942             length++;
 943             break;
 944
 945         case4(0300):
 946             break;
 947
 948         case 0310:
 949             if (bits == 64)
 950                 return -1;
 951             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 952             break;
 953
 954         case 0311:
 955             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 956             break;
 957
 958         case 0312:
 959             break;
 960
 961         case 0313:
 962             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 963                 has_prefix(ins, PPS_ASIZE, P_A32))
 964                 return -1;
 965             break;
 966
 967         case4(0314):
 968             break;
 969
 970         case 0320:
 971             length += (bits != 16);
 972             break;
 973
 974         case 0321:
 975             length += (bits == 16);
 976             break;
 977
 978         case 0322:
 979             break;
 980
 981         case 0323:
 982             rex_mask &= ~REX_W;
 983             break;
 984
 985         case 0324:
 986             ins->rex |= REX_W;
 987             break;
 988
 989         case 0330:
 990             codes++, length++;
 991             break;
 992
 993         case 0331:
 994             break;
 995
 996         case 0332:
 997         case 0333:
 998             length++;
 999             break;
1000
1001         case 0334:
1002             ins->rex |= REX_L;
1003             break;
1004
1005         case 0335:
1006             break;
1007
1008         case 0336:
1009             if (!ins->prefixes[PPS_LREP])
1010                 ins->prefixes[PPS_LREP] = P_REP;
1011             break;
1012
1013         case 0337:
1014             if (!ins->prefixes[PPS_LREP])
1015                 ins->prefixes[PPS_LREP] = P_REPNE;
1016             break;
1017
1018         case 0340:
1019             if (ins->oprs[0].segment != NO_SEG)
1020                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1021                         " quantity of BSS space");
1022             else
1023                 length += ins->oprs[0].offset;
1024             break;
1025
1026         case4(0344):
1027             length++;
1028             break;
1029
1030         case 0360:
1031             break;
1032
1033         case 0361:
1034         case 0362:
1035         case 0363:
1036             length++;
1037             break;
1038
1039         case 0364:
1040         case 0365:
1041             break;
1042
1043         case 0366:
1044         case 0367:
1045             length++;
1046             break;
1047
1048         case 0370:
1049         case 0371:
1050         case 0372:
1051             break;
1052
1053         case 0373:
1054             length++;
1055             break;
1056
1057         case4(0100):
1058         case4(0110):
1059         case4(0120):
1060         case4(0130):
1061         case4(0200):
1062         case4(0204):
1063         case4(0210):
1064         case4(0214):
1065         case4(0220):
1066         case4(0224):
1067         case4(0230):
1068         case4(0234):
1069             {
1070                 ea ea_data;
1071                 int rfield;
1072                 int32_t rflags;
1073                 struct operand *opy = &ins->oprs[op2];
1074
1075                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1076
1077                 if (c <= 0177) {
1078                     /* pick rfield from operand b (opx) */
1079                     rflags = regflag(opx);
1080                     rfield = nasm_regvals[opx->basereg];
1081                 } else {
1082                     rflags = 0;
1083                     rfield = c & 7;
1084                 }
1085                 if (!process_ea(opy, &ea_data, bits,
1086                                 ins->addr_size, rfield, rflags)) {
1087                     errfunc(ERR_NONFATAL, "invalid effective address");
1088                     return -1;
1089                 } else {
1090                     ins->rex |= ea_data.rex;
1091                     length += ea_data.size;
1092                 }
1093             }
1094             break;
1095
1096         default:
1097             errfunc(ERR_PANIC, "internal instruction table corrupt"
1098                     ": instruction code 0x%02X given", c);
1099             break;
1100         }
1101     }
1102
1103     ins->rex &= rex_mask;
1104
1105     if (ins->rex & REX_V) {
1106         int bad32 = REX_R|REX_W|REX_X|REX_B;
1107
1108         if (ins->rex & REX_H) {
1109             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1110             return -1;
1111         }
1112         switch (ins->vex_wlp & 030) {
1113         case 000:
1114         case 020:
1115             ins->rex &= ~REX_W;
1116             break;
1117         case 010:
1118             ins->rex |= REX_W;
1119             bad32 &= ~REX_W;
1120             break;
1121         case 030:
1122             /* Follow REX_W */
1123             break;
1124         }
1125
1126         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1127             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1128             return -1;
1129         }
1130         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1131             length += 3;
1132         else
1133             length += 2;
1134     } else if (ins->rex & REX_D) {
1135         if (ins->rex & REX_H) {
1136             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1137             return -1;
1138         }
1139         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1140                            ins->drexdst > 7)) {
1141             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1142             return -1;
1143         }
1144         length++;
1145     } else if (ins->rex & REX_REAL) {
1146         if (ins->rex & REX_H) {
1147             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1148             return -1;
1149         } else if (bits == 64) {
1150             length++;
1151         } else if ((ins->rex & REX_L) &&
1152                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1153                    cpu >= IF_X86_64) {
1154             /* LOCK-as-REX.R */
1155             assert_no_prefix(ins, PPS_LREP);
1156             length++;
1157         } else {
1158             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1159             return -1;
1160         }
1161     }
1162
1163     return length;
1164 }
1165
1166 #define EMIT_REX()                                                      \
1167     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1168         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1169         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1170         ins->rex = 0;                                                   \
1171         offset += 1; \
1172     }
1173
1174 static void gencode(int32_t segment, int64_t offset, int bits,
1175                     insn * ins, const struct itemplate *temp,
1176                     int64_t insn_end)
1177 {
1178     static char condval[] = {   /* conditional opcodes */
1179         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1180         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1181         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1182     };
1183     uint8_t c;
1184     uint8_t bytes[4];
1185     int64_t size;
1186     int64_t data;
1187     int op1, op2;
1188     struct operand *opx;
1189     const uint8_t *codes = temp->code;
1190     uint8_t opex = 0;
1191
1192     while (*codes) {
1193         c = *codes++;
1194         op1 = (c & 3) + ((opex & 1) << 2);
1195         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1196         opx = &ins->oprs[op1];
1197         opex = 0;               /* For the next iteration */
1198
1199         switch (c) {
1200         case 01:
1201         case 02:
1202         case 03:
1203         case 04:
1204             EMIT_REX();
1205             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1206             codes += c;
1207             offset += c;
1208             break;
1209
1210         case 05:
1211         case 06:
1212         case 07:
1213             opex = c;
1214             break;
1215
1216         case4(010):
1217             EMIT_REX();
1218             bytes[0] = *codes++ + (regval(opx) & 7);
1219             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1220             offset += 1;
1221             break;
1222
1223         case4(014):
1224             /* The test for BITS8 and SBYTE here is intended to avoid
1225                warning on optimizer actions due to SBYTE, while still
1226                warn on explicit BYTE directives.  Also warn, obviously,
1227                if the optimizer isn't enabled. */
1228             if (((opx->type & BITS8) ||
1229                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1230                 (opx->offset < -128 || opx->offset > 127)) {
1231                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1232                         "signed byte value exceeds bounds");
1233             }
1234             if (opx->segment != NO_SEG) {
1235                 data = opx->offset;
1236                 out(offset, segment, &data, OUT_ADDRESS, 1,
1237                     opx->segment, opx->wrt);
1238             } else {
1239                 bytes[0] = opx->offset;
1240                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1241                     NO_SEG);
1242             }
1243             offset += 1;
1244             break;
1245
1246         case4(020):
1247             if (opx->offset < -256 || opx->offset > 255) {
1248                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1249                         "byte value exceeds bounds");
1250             }
1251             if (opx->segment != NO_SEG) {
1252                 data = opx->offset;
1253                 out(offset, segment, &data, OUT_ADDRESS, 1,
1254                     opx->segment, opx->wrt);
1255             } else {
1256                 bytes[0] = opx->offset;
1257                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1258                     NO_SEG);
1259             }
1260             offset += 1;
1261             break;
1262
1263         case4(024):
1264             if (opx->offset < 0 || opx->offset > 255)
1265                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1266                         "unsigned byte value exceeds bounds");
1267             if (opx->segment != NO_SEG) {
1268                 data = opx->offset;
1269                 out(offset, segment, &data, OUT_ADDRESS, 1,
1270                     opx->segment, opx->wrt);
1271             } else {
1272                 bytes[0] = opx->offset;
1273                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1274                     NO_SEG);
1275             }
1276             offset += 1;
1277             break;
1278
1279         case4(030):
1280             warn_overflow(2, opx);
1281             data = opx->offset;
1282             out(offset, segment, &data, OUT_ADDRESS, 2,
1283                 opx->segment, opx->wrt);
1284             offset += 2;
1285             break;
1286
1287         case4(034):
1288             if (opx->type & (BITS16 | BITS32))
1289                 size = (opx->type & BITS16) ? 2 : 4;
1290             else
1291                 size = (bits == 16) ? 2 : 4;
1292             warn_overflow(size, opx);
1293             data = opx->offset;
1294             out(offset, segment, &data, OUT_ADDRESS, size,
1295                 opx->segment, opx->wrt);
1296             offset += size;
1297             break;
1298
1299         case4(040):
1300             warn_overflow(4, opx);
1301             data = opx->offset;
1302             out(offset, segment, &data, OUT_ADDRESS, 4,
1303                 opx->segment, opx->wrt);
1304             offset += 4;
1305             break;
1306
1307         case4(044):
1308             data = opx->offset;
1309             size = ins->addr_size >> 3;
1310             warn_overflow(size, opx);
1311             out(offset, segment, &data, OUT_ADDRESS, size,
1312                 opx->segment, opx->wrt);
1313             offset += size;
1314             break;
1315
1316         case4(050):
1317             if (opx->segment != segment)
1318                 errfunc(ERR_NONFATAL,
1319                         "short relative jump outside segment");
1320             data = opx->offset - insn_end;
1321             if (data > 127 || data < -128)
1322                 errfunc(ERR_NONFATAL, "short jump is out of range");
1323             bytes[0] = data;
1324             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1325             offset += 1;
1326             break;
1327
1328         case4(054):
1329             data = (int64_t)opx->offset;
1330             out(offset, segment, &data, OUT_ADDRESS, 8,
1331                 opx->segment, opx->wrt);
1332             offset += 8;
1333             break;
1334
1335         case4(060):
1336             if (opx->segment != segment) {
1337                 data = opx->offset;
1338                 out(offset, segment, &data,
1339                     OUT_REL2ADR, insn_end - offset,
1340                     opx->segment, opx->wrt);
1341             } else {
1342                 data = opx->offset - insn_end;
1343                 out(offset, segment, &data,
1344                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1345             }
1346             offset += 2;
1347             break;
1348
1349         case4(064):
1350             if (opx->type & (BITS16 | BITS32 | BITS64))
1351                 size = (opx->type & BITS16) ? 2 : 4;
1352             else
1353                 size = (bits == 16) ? 2 : 4;
1354             if (opx->segment != segment) {
1355                 data = opx->offset;
1356                 out(offset, segment, &data,
1357                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1358                     insn_end - offset, opx->segment, opx->wrt);
1359             } else {
1360                 data = opx->offset - insn_end;
1361                 out(offset, segment, &data,
1362                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1363             }
1364             offset += size;
1365             break;
1366
1367         case4(070):
1368             if (opx->segment != segment) {
1369                 data = opx->offset;
1370                 out(offset, segment, &data,
1371                     OUT_REL4ADR, insn_end - offset,
1372                     opx->segment, opx->wrt);
1373             } else {
1374                 data = opx->offset - insn_end;
1375                 out(offset, segment, &data,
1376                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1377             }
1378             offset += 4;
1379             break;
1380
1381         case4(074):
1382             if (opx->segment == NO_SEG)
1383                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1384                         " relocatable");
1385             data = 0;
1386             out(offset, segment, &data, OUT_ADDRESS, 2,
1387                 outfmt->segbase(1 + opx->segment),
1388                 opx->wrt);
1389             offset += 2;
1390             break;
1391
1392         case4(0140):
1393             data = opx->offset;
1394             warn_overflow(2, opx);
1395             if (is_sbyte16(opx)) {
1396                 bytes[0] = data;
1397                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1398                     NO_SEG);
1399                 offset++;
1400             } else {
1401                 out(offset, segment, &data, OUT_ADDRESS, 2,
1402                     opx->segment, opx->wrt);
1403                 offset += 2;
1404             }
1405             break;
1406
1407         case4(0144):
1408             EMIT_REX();
1409             bytes[0] = *codes++;
1410             if (is_sbyte16(opx))
1411                 bytes[0] |= 2;  /* s-bit */
1412             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1413             offset++;
1414             break;
1415
1416         case4(0150):
1417             data = opx->offset;
1418             warn_overflow(4, opx);
1419             if (is_sbyte32(opx)) {
1420                 bytes[0] = data;
1421                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1422                     NO_SEG);
1423                 offset++;
1424             } else {
1425                 out(offset, segment, &data, OUT_ADDRESS, 4,
1426                     opx->segment, opx->wrt);
1427                 offset += 4;
1428             }
1429             break;
1430
1431         case4(0154):
1432             EMIT_REX();
1433             bytes[0] = *codes++;
1434             if (is_sbyte32(opx))
1435                 bytes[0] |= 2;  /* s-bit */
1436             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1437             offset++;
1438             break;
1439
1440         case4(0160):
1441         case4(0164):
1442             break;
1443
1444         case 0171:
1445             bytes[0] =
1446                 (ins->drexdst << 4) |
1447                 (ins->rex & REX_OC ? 0x08 : 0) |
1448                 (ins->rex & (REX_R|REX_X|REX_B));
1449             ins->rex = 0;
1450             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1451             offset++;
1452             break;
1453
1454         case 0172:
1455             c = *codes++;
1456             opx = &ins->oprs[c >> 3];
1457             bytes[0] = nasm_regvals[opx->basereg] << 4;
1458             opx = &ins->oprs[c & 7];
1459             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1460                 errfunc(ERR_NONFATAL,
1461                         "non-absolute expression not permitted as argument %d",
1462                         c & 7);
1463             } else {
1464                 if (opx->offset & ~15) {
1465                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1466                             "four-bit argument exceeds bounds");
1467                 }
1468                 bytes[0] |= opx->offset & 15;
1469             }
1470             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1471             offset++;
1472             break;
1473
1474         case 0173:
1475             c = *codes++;
1476             opx = &ins->oprs[c >> 4];
1477             bytes[0] = nasm_regvals[opx->basereg] << 4;
1478             bytes[0] |= c & 15;
1479             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1480             offset++;
1481             break;
1482
1483         case 0174:
1484             c = *codes++;
1485             opx = &ins->oprs[c];
1486             bytes[0] = nasm_regvals[opx->basereg] << 4;
1487             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1488             offset++;
1489             break;
1490
1491         case4(0250):
1492             data = opx->offset;
1493             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1494                 (int32_t)data != (int64_t)data) {
1495                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1496                         "signed dword immediate exceeds bounds");
1497             }
1498             if (is_sbyte32(opx)) {
1499                 bytes[0] = data;
1500                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1501                     NO_SEG);
1502                 offset++;
1503             } else {
1504                 out(offset, segment, &data, OUT_ADDRESS, 4,
1505                     opx->segment, opx->wrt);
1506                 offset += 4;
1507             }
1508             break;
1509
1510         case4(0254):
1511             data = opx->offset;
1512             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1513                 (int32_t)data != (int64_t)data) {
1514                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1515                         "signed dword immediate exceeds bounds");
1516             }
1517             out(offset, segment, &data, OUT_ADDRESS, 4,
1518                 opx->segment, opx->wrt);
1519             offset += 4;
1520             break;
1521
1522         case4(0260):
1523         case 0270:
1524             codes += 2;
1525             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1526                 bytes[0] = 0xc4;
1527                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1528                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1529                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1530                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1531                 offset += 3;
1532             } else {
1533                 bytes[0] = 0xc5;
1534                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1535                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1536                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1537                 offset += 2;
1538             }
1539             break;
1540
1541         case4(0274):
1542         {
1543             uint64_t uv, um;
1544             int s;
1545
1546             if (ins->rex & REX_W)
1547                 s = 64;
1548             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1549                 s = 16;
1550             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1551                 s = 32;
1552             else
1553                 s = bits;
1554
1555             um = (uint64_t)2 << (s-1);
1556             uv = opx->offset;
1557
1558             if (uv > 127 && uv < (uint64_t)-128 &&
1559                 (uv < um-128 || uv > um-1)) {
1560                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1561                         "signed byte value exceeds bounds");
1562             }
1563             if (opx->segment != NO_SEG) {
1564                 data = uv;
1565                 out(offset, segment, &data, OUT_ADDRESS, 1,
1566                     opx->segment, opx->wrt);
1567             } else {
1568                 bytes[0] = uv;
1569                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1570                     NO_SEG);
1571             }
1572             offset += 1;
1573             break;
1574         }
1575
1576         case4(0300):
1577             break;
1578
1579         case 0310:
1580             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1581                 *bytes = 0x67;
1582                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1583                 offset += 1;
1584             } else
1585                 offset += 0;
1586             break;
1587
1588         case 0311:
1589             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1590                 *bytes = 0x67;
1591                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1592                 offset += 1;
1593             } else
1594                 offset += 0;
1595             break;
1596
1597         case 0312:
1598             break;
1599
1600         case 0313:
1601             ins->rex = 0;
1602             break;
1603
1604         case4(0314):
1605             break;
1606
1607         case 0320:
1608             if (bits != 16) {
1609                 *bytes = 0x66;
1610                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1611                 offset += 1;
1612             } else
1613                 offset += 0;
1614             break;
1615
1616         case 0321:
1617             if (bits == 16) {
1618                 *bytes = 0x66;
1619                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1620                 offset += 1;
1621             } else
1622                 offset += 0;
1623             break;
1624
1625         case 0322:
1626         case 0323:
1627             break;
1628
1629         case 0324:
1630             ins->rex |= REX_W;
1631             break;
1632
1633         case 0330:
1634             *bytes = *codes++ ^ condval[ins->condition];
1635             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1636             offset += 1;
1637             break;
1638
1639         case 0331:
1640             break;
1641
1642         case 0332:
1643         case 0333:
1644             *bytes = c - 0332 + 0xF2;
1645             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1646             offset += 1;
1647             break;
1648
1649         case 0334:
1650             if (ins->rex & REX_R) {
1651                 *bytes = 0xF0;
1652                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1653                 offset += 1;
1654             }
1655             ins->rex &= ~(REX_L|REX_R);
1656             break;
1657
1658         case 0335:
1659             break;
1660
1661         case 0336:
1662         case 0337:
1663             break;
1664
1665         case 0340:
1666             if (ins->oprs[0].segment != NO_SEG)
1667                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1668             else {
1669                 int64_t size = ins->oprs[0].offset;
1670                 if (size > 0)
1671                     out(offset, segment, NULL,
1672                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1673                 offset += size;
1674             }
1675             break;
1676
1677         case 0344:
1678         case 0345:
1679             bytes[0] = c & 1;
1680             switch (ins->oprs[0].basereg) {
1681             case R_CS:
1682                 bytes[0] += 0x0E;
1683                 break;
1684             case R_DS:
1685                 bytes[0] += 0x1E;
1686                 break;
1687             case R_ES:
1688                 bytes[0] += 0x06;
1689                 break;
1690             case R_SS:
1691                 bytes[0] += 0x16;
1692                 break;
1693             default:
1694                 errfunc(ERR_PANIC,
1695                         "bizarre 8086 segment register received");
1696             }
1697             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1698             offset++;
1699             break;
1700
1701         case 0346:
1702         case 0347:
1703             bytes[0] = c & 1;
1704             switch (ins->oprs[0].basereg) {
1705             case R_FS:
1706                 bytes[0] += 0xA0;
1707                 break;
1708             case R_GS:
1709                 bytes[0] += 0xA8;
1710                 break;
1711             default:
1712                 errfunc(ERR_PANIC,
1713                         "bizarre 386 segment register received");
1714             }
1715             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1716             offset++;
1717             break;
1718
1719         case 0360:
1720             break;
1721
1722         case 0361:
1723             bytes[0] = 0x66;
1724             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1725             offset += 1;
1726             break;
1727
1728         case 0362:
1729         case 0363:
1730             bytes[0] = c - 0362 + 0xf2;
1731             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1732             offset += 1;
1733             break;
1734
1735         case 0364:
1736         case 0365:
1737             break;
1738
1739         case 0366:
1740         case 0367:
1741             *bytes = c - 0366 + 0x66;
1742             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1743             offset += 1;
1744             break;
1745
1746         case 0370:
1747         case 0371:
1748         case 0372:
1749             break;
1750
1751         case 0373:
1752             *bytes = bits == 16 ? 3 : 5;
1753             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1754             offset += 1;
1755             break;
1756
1757         case4(0100):
1758         case4(0110):
1759         case4(0120):
1760         case4(0130):
1761         case4(0200):
1762         case4(0204):
1763         case4(0210):
1764         case4(0214):
1765         case4(0220):
1766         case4(0224):
1767         case4(0230):
1768         case4(0234):
1769             {
1770                 ea ea_data;
1771                 int rfield;
1772                 int32_t rflags;
1773                 uint8_t *p;
1774                 int32_t s;
1775                 enum out_type type;
1776                 struct operand *opy = &ins->oprs[op2];
1777
1778                 if (c <= 0177) {
1779                     /* pick rfield from operand b (opx) */
1780                     rflags = regflag(opx);
1781                     rfield = nasm_regvals[opx->basereg];
1782                 } else {
1783                     /* rfield is constant */
1784                     rflags = 0;
1785                     rfield = c & 7;
1786                 }
1787
1788                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1789                                 rfield, rflags)) {
1790                     errfunc(ERR_NONFATAL, "invalid effective address");
1791                 }
1792
1793
1794                 p = bytes;
1795                 *p++ = ea_data.modrm;
1796                 if (ea_data.sib_present)
1797                     *p++ = ea_data.sib;
1798
1799                 /* DREX suffixes come between the SIB and the displacement */
1800                 if (ins->rex & REX_D) {
1801                     *p++ = (ins->drexdst << 4) |
1802                            (ins->rex & REX_OC ? 0x08 : 0) |
1803                            (ins->rex & (REX_R|REX_X|REX_B));
1804                     ins->rex = 0;
1805                 }
1806
1807                 s = p - bytes;
1808                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1809
1810                 /*
1811                  * Make sure the address gets the right offset in case
1812                  * the line breaks in the .lst file (BR 1197827)
1813                  */
1814                 offset += s;
1815                 s = 0;
1816
1817                 switch (ea_data.bytes) {
1818                 case 0:
1819                     break;
1820                 case 1:
1821                 case 2:
1822                 case 4:
1823                 case 8:
1824                     data = opy->offset;
1825                     warn_overflow(ea_data.bytes, opy);
1826                     s += ea_data.bytes;
1827                     if (ea_data.rip) {
1828                         if (opy->segment == segment) {
1829                             data -= insn_end;
1830                             out(offset, segment, &data, OUT_ADDRESS,
1831                                 ea_data.bytes, NO_SEG, NO_SEG);
1832                         } else {
1833                             out(offset, segment, &data, OUT_REL4ADR,
1834                                 insn_end - offset, opy->segment, opy->wrt);
1835                         }
1836                     } else {
1837                         type = OUT_ADDRESS;
1838                         out(offset, segment, &data, OUT_ADDRESS,
1839                             ea_data.bytes, opy->segment, opy->wrt);
1840                     }
1841                     break;
1842                 default:
1843                     /* Impossible! */
1844                     errfunc(ERR_PANIC,
1845                             "Invalid amount of bytes (%d) for offset?!",
1846                             ea_data.bytes);
1847                     break;
1848                 }
1849                 offset += s;
1850             }
1851             break;
1852
1853         default:
1854             errfunc(ERR_PANIC, "internal instruction table corrupt"
1855                     ": instruction code 0x%02X given", c);
1856             break;
1857         }
1858     }
1859 }
1860
1861 static int32_t regflag(const operand * o)
1862 {
1863     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1864         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1865     }
1866     return nasm_reg_flags[o->basereg];
1867 }
1868
1869 static int32_t regval(const operand * o)
1870 {
1871     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1872         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1873     }
1874     return nasm_regvals[o->basereg];
1875 }
1876
1877 static int op_rexflags(const operand * o, int mask)
1878 {
1879     int32_t flags;
1880     int val;
1881
1882     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1883         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1884     }
1885
1886     flags = nasm_reg_flags[o->basereg];
1887     val = nasm_regvals[o->basereg];
1888
1889     return rexflags(val, flags, mask);
1890 }
1891
1892 static int rexflags(int val, int32_t flags, int mask)
1893 {
1894     int rex = 0;
1895
1896     if (val >= 8)
1897         rex |= REX_B|REX_X|REX_R;
1898     if (flags & BITS64)
1899         rex |= REX_W;
1900     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1901         rex |= REX_H;
1902     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1903         rex |= REX_P;
1904
1905     return rex & mask;
1906 }
1907
1908 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1909 {
1910     int i, size[MAX_OPERANDS], asize, oprs, ret;
1911
1912     ret = 100;
1913
1914     /*
1915      * Check the opcode
1916      */
1917     if (itemp->opcode != instruction->opcode)
1918         return 0;
1919
1920     /*
1921      * Count the operands
1922      */
1923     if (itemp->operands != instruction->operands)
1924         return 0;
1925
1926     /*
1927      * Check that no spurious colons or TOs are present
1928      */
1929     for (i = 0; i < itemp->operands; i++)
1930         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1931             return 0;
1932
1933     /*
1934      * Process size flags
1935      */
1936     if (itemp->flags & IF_ARMASK) {
1937         memset(size, 0, sizeof size);
1938
1939         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1940
1941         switch (itemp->flags & IF_SMASK) {
1942         case IF_SB:
1943             size[i] = BITS8;
1944             break;
1945         case IF_SW:
1946             size[i] = BITS16;
1947             break;
1948         case IF_SD:
1949             size[i] = BITS32;
1950             break;
1951         case IF_SQ:
1952             size[i] = BITS64;
1953             break;
1954         case IF_SO:
1955             size[i] = BITS128;
1956             break;
1957         case IF_SY:
1958             size[i] = BITS256;
1959             break;
1960         case IF_SZ:
1961             switch (bits) {
1962             case 16:
1963                 size[i] = BITS16;
1964                 break;
1965             case 32:
1966                 size[i] = BITS32;
1967                 break;
1968             case 64:
1969                 size[i] = BITS64;
1970                 break;
1971             }
1972             break;
1973         default:
1974             break;
1975         }
1976     } else {
1977         asize = 0;
1978         switch (itemp->flags & IF_SMASK) {
1979         case IF_SB:
1980             asize = BITS8;
1981             break;
1982         case IF_SW:
1983             asize = BITS16;
1984             break;
1985         case IF_SD:
1986             asize = BITS32;
1987             break;
1988         case IF_SQ:
1989             asize = BITS64;
1990             break;
1991         case IF_SO:
1992             asize = BITS128;
1993             break;
1994         case IF_SY:
1995             asize = BITS256;
1996             break;
1997         case IF_SZ:
1998             switch (bits) {
1999             case 16:
2000                 asize = BITS16;
2001                 break;
2002             case 32:
2003                 asize = BITS32;
2004                 break;
2005             case 64:
2006                 asize = BITS64;
2007                 break;
2008             }
2009             break;
2010         default:
2011             break;
2012         }
2013         for (i = 0; i < MAX_OPERANDS; i++)
2014             size[i] = asize;
2015     }
2016
2017     /*
2018      * Check that the operand flags all match up
2019      */
2020     for (i = 0; i < itemp->operands; i++) {
2021         int32_t type = instruction->oprs[i].type;
2022         if (!(type & SIZE_MASK))
2023             type |= size[i];
2024
2025         if (itemp->opd[i] & SAME_AS) {
2026             int j = itemp->opd[i] & ~SAME_AS;
2027             if (type != instruction->oprs[j].type ||
2028                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2029                 return 0;
2030         } else if (itemp->opd[i] & ~type ||
2031             ((itemp->opd[i] & SIZE_MASK) &&
2032              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2033             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2034                 (type & SIZE_MASK))
2035                 return 0;
2036             else
2037                 return 1;
2038         }
2039     }
2040
2041     /*
2042      * Check operand sizes
2043      */
2044     if (itemp->flags & (IF_SM | IF_SM2)) {
2045         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2046         asize = 0;
2047         for (i = 0; i < oprs; i++) {
2048             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2049                 int j;
2050                 for (j = 0; j < oprs; j++)
2051                     size[j] = asize;
2052                 break;
2053             }
2054         }
2055     } else {
2056         oprs = itemp->operands;
2057     }
2058
2059     for (i = 0; i < itemp->operands; i++) {
2060         if (!(itemp->opd[i] & SIZE_MASK) &&
2061             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2062             return 2;
2063     }
2064
2065     /*
2066      * Check template is okay at the set cpu level
2067      */
2068     if (((itemp->flags & IF_PLEVEL) > cpu))
2069         return 3;
2070
2071     /*
2072      * Verify the appropriate long mode flag.
2073      */
2074     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2075         return 4;
2076
2077     /*
2078      * Check if special handling needed for Jumps
2079      */
2080     if ((uint8_t)(itemp->code[0]) >= 0370)
2081         return 99;
2082
2083     return ret;
2084 }
2085
2086 static ea *process_ea(operand * input, ea * output, int bits,
2087                       int addrbits, int rfield, int32_t rflags)
2088 {
2089     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2090
2091     output->rip = false;
2092
2093     /* REX flags for the rfield operand */
2094     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2095
2096     if (!(REGISTER & ~input->type)) {   /* register direct */
2097         int i;
2098         int32_t f;
2099
2100         if (input->basereg < EXPR_REG_START /* Verify as Register */
2101             || input->basereg >= REG_ENUM_LIMIT)
2102             return NULL;
2103         f = regflag(input);
2104         i = nasm_regvals[input->basereg];
2105
2106         if (REG_EA & ~f)
2107             return NULL;        /* Invalid EA register */
2108
2109         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2110
2111         output->sib_present = false;             /* no SIB necessary */
2112         output->bytes = 0;  /* no offset necessary either */
2113         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2114     } else {                    /* it's a memory reference */
2115         if (input->basereg == -1
2116             && (input->indexreg == -1 || input->scale == 0)) {
2117             /* it's a pure offset */
2118             if (bits == 64 && (~input->type & IP_REL)) {
2119               int scale, index, base;
2120               output->sib_present = true;
2121               scale = 0;
2122               index = 4;
2123               base = 5;
2124               output->sib = (scale << 6) | (index << 3) | base;
2125               output->bytes = 4;
2126               output->modrm = 4 | ((rfield & 7) << 3);
2127               output->rip = false;
2128             } else {
2129               output->sib_present = false;
2130               output->bytes = (addrbits != 16 ? 4 : 2);
2131               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2132               output->rip = bits == 64;
2133             }
2134         } else {                /* it's an indirection */
2135             int i = input->indexreg, b = input->basereg, s = input->scale;
2136             int32_t o = input->offset, seg = input->segment;
2137             int hb = input->hintbase, ht = input->hinttype;
2138             int t;
2139             int it, bt;
2140             int32_t ix, bx;     /* register flags */
2141
2142             if (s == 0)
2143                 i = -1;         /* make this easy, at least */
2144
2145             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2146                 it = nasm_regvals[i];
2147                 ix = nasm_reg_flags[i];
2148             } else {
2149                 it = -1;
2150                 ix = 0;
2151             }
2152
2153             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2154                 bt = nasm_regvals[b];
2155                 bx = nasm_reg_flags[b];
2156             } else {
2157                 bt = -1;
2158                 bx = 0;
2159             }
2160
2161             /* check for a 32/64-bit memory reference... */
2162             if ((ix|bx) & (BITS32|BITS64)) {
2163                 /* it must be a 32/64-bit memory reference. Firstly we have
2164                  * to check that all registers involved are type E/Rxx. */
2165                 int32_t sok = BITS32|BITS64;
2166
2167                 if (it != -1) {
2168                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2169                         sok &= ix;
2170                     else
2171                         return NULL;
2172                 }
2173
2174                 if (bt != -1) {
2175                     if (REG_GPR & ~bx)
2176                         return NULL; /* Invalid register */
2177                     if (~sok & bx & SIZE_MASK)
2178                         return NULL; /* Invalid size */
2179                     sok &= bx;
2180                 }
2181
2182                 /* While we're here, ensure the user didn't specify
2183                    WORD or QWORD. */
2184                 if (input->disp_size == 16 || input->disp_size == 64)
2185                     return NULL;
2186
2187                 if (addrbits == 16 ||
2188                     (addrbits == 32 && !(sok & BITS32)) ||
2189                     (addrbits == 64 && !(sok & BITS64)))
2190                     return NULL;
2191
2192                 /* now reorganize base/index */
2193                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2194                     ((hb == b && ht == EAH_NOTBASE)
2195                      || (hb == i && ht == EAH_MAKEBASE))) {
2196                     /* swap if hints say so */
2197                     t = bt, bt = it, it = t;
2198                     t = bx, bx = ix, ix = t;
2199                 }
2200                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2201                     bt = -1, bx = 0, s++;
2202                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2203                     /* make single reg base, unless hint */
2204                     bt = it, bx = ix, it = -1, ix = 0;
2205                 }
2206                 if (((s == 2 && it != REG_NUM_ESP
2207                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2208                      || s == 5 || s == 9) && bt == -1)
2209                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2210                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2211                     && (input->eaflags & EAF_TIMESTWO))
2212                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2213                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2214                 if (s == 1 && it == REG_NUM_ESP) {
2215                     /* swap ESP into base if scale is 1 */
2216                     t = it, it = bt, bt = t;
2217                     t = ix, ix = bx, bx = t;
2218                 }
2219                 if (it == REG_NUM_ESP
2220                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2221                     return NULL;        /* wrong, for various reasons */
2222
2223                 output->rex |= rexflags(it, ix, REX_X);
2224                 output->rex |= rexflags(bt, bx, REX_B);
2225
2226                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2227                     /* no SIB needed */
2228                     int mod, rm;
2229
2230                     if (bt == -1) {
2231                         rm = 5;
2232                         mod = 0;
2233                     } else {
2234                         rm = (bt & 7);
2235                         if (rm != REG_NUM_EBP && o == 0 &&
2236                                 seg == NO_SEG && !forw_ref &&
2237                                 !(input->eaflags &
2238                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2239                             mod = 0;
2240                         else if (input->eaflags & EAF_BYTEOFFS ||
2241                                  (o >= -128 && o <= 127 && seg == NO_SEG
2242                                   && !forw_ref
2243                                   && !(input->eaflags & EAF_WORDOFFS)))
2244                             mod = 1;
2245                         else
2246                             mod = 2;
2247                     }
2248
2249                     output->sib_present = false;
2250                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2251                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2252                 } else {
2253                     /* we need a SIB */
2254                     int mod, scale, index, base;
2255
2256                     if (it == -1)
2257                         index = 4, s = 1;
2258                     else
2259                         index = (it & 7);
2260
2261                     switch (s) {
2262                     case 1:
2263                         scale = 0;
2264                         break;
2265                     case 2:
2266                         scale = 1;
2267                         break;
2268                     case 4:
2269                         scale = 2;
2270                         break;
2271                     case 8:
2272                         scale = 3;
2273                         break;
2274                     default:   /* then what the smeg is it? */
2275                         return NULL;    /* panic */
2276                     }
2277
2278                     if (bt == -1) {
2279                         base = 5;
2280                         mod = 0;
2281                     } else {
2282                         base = (bt & 7);
2283                         if (base != REG_NUM_EBP && o == 0 &&
2284                                     seg == NO_SEG && !forw_ref &&
2285                                     !(input->eaflags &
2286                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2287                             mod = 0;
2288                         else if (input->eaflags & EAF_BYTEOFFS ||
2289                                  (o >= -128 && o <= 127 && seg == NO_SEG
2290                                   && !forw_ref
2291                                   && !(input->eaflags & EAF_WORDOFFS)))
2292                             mod = 1;
2293                         else
2294                             mod = 2;
2295                     }
2296
2297                     output->sib_present = true;
2298                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2299                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2300                     output->sib = (scale << 6) | (index << 3) | base;
2301                 }
2302             } else {            /* it's 16-bit */
2303                 int mod, rm;
2304
2305                 /* check for 64-bit long mode */
2306                 if (addrbits == 64)
2307                     return NULL;
2308
2309                 /* check all registers are BX, BP, SI or DI */
2310                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2311                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2312                                        && i != R_SI && i != R_DI))
2313                     return NULL;
2314
2315                 /* ensure the user didn't specify DWORD/QWORD */
2316                 if (input->disp_size == 32 || input->disp_size == 64)
2317                     return NULL;
2318
2319                 if (s != 1 && i != -1)
2320                     return NULL;        /* no can do, in 16-bit EA */
2321                 if (b == -1 && i != -1) {
2322                     int tmp = b;
2323                     b = i;
2324                     i = tmp;
2325                 }               /* swap */
2326                 if ((b == R_SI || b == R_DI) && i != -1) {
2327                     int tmp = b;
2328                     b = i;
2329                     i = tmp;
2330                 }
2331                 /* have BX/BP as base, SI/DI index */
2332                 if (b == i)
2333                     return NULL;        /* shouldn't ever happen, in theory */
2334                 if (i != -1 && b != -1 &&
2335                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2336                     return NULL;        /* invalid combinations */
2337                 if (b == -1)    /* pure offset: handled above */
2338                     return NULL;        /* so if it gets to here, panic! */
2339
2340                 rm = -1;
2341                 if (i != -1)
2342                     switch (i * 256 + b) {
2343                     case R_SI * 256 + R_BX:
2344                         rm = 0;
2345                         break;
2346                     case R_DI * 256 + R_BX:
2347                         rm = 1;
2348                         break;
2349                     case R_SI * 256 + R_BP:
2350                         rm = 2;
2351                         break;
2352                     case R_DI * 256 + R_BP:
2353                         rm = 3;
2354                         break;
2355                 } else
2356                     switch (b) {
2357                     case R_SI:
2358                         rm = 4;
2359                         break;
2360                     case R_DI:
2361                         rm = 5;
2362                         break;
2363                     case R_BP:
2364                         rm = 6;
2365                         break;
2366                     case R_BX:
2367                         rm = 7;
2368                         break;
2369                     }
2370                 if (rm == -1)   /* can't happen, in theory */
2371                     return NULL;        /* so panic if it does */
2372
2373                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2374                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2375                     mod = 0;
2376                 else if (input->eaflags & EAF_BYTEOFFS ||
2377                          (o >= -128 && o <= 127 && seg == NO_SEG
2378                           && !forw_ref
2379                           && !(input->eaflags & EAF_WORDOFFS)))
2380                     mod = 1;
2381                 else
2382                     mod = 2;
2383
2384                 output->sib_present = false;    /* no SIB - it's 16-bit */
2385                 output->bytes = mod;    /* bytes of offset needed */
2386                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2387             }
2388         }
2389     }
2390
2391     output->size = 1 + output->sib_present + output->bytes;
2392     return output;
2393 }
2394
2395 static void add_asp(insn *ins, int addrbits)
2396 {
2397     int j, valid;
2398     int defdisp;
2399
2400     valid = (addrbits == 64) ? 64|32 : 32|16;
2401
2402     switch (ins->prefixes[PPS_ASIZE]) {
2403     case P_A16:
2404         valid &= 16;
2405         break;
2406     case P_A32:
2407         valid &= 32;
2408         break;
2409     case P_A64:
2410         valid &= 64;
2411         break;
2412     case P_ASP:
2413         valid &= (addrbits == 32) ? 16 : 32;
2414         break;
2415     default:
2416         break;
2417     }
2418
2419     for (j = 0; j < ins->operands; j++) {
2420         if (!(MEMORY & ~ins->oprs[j].type)) {
2421             int32_t i, b;
2422
2423             /* Verify as Register */
2424             if (ins->oprs[j].indexreg < EXPR_REG_START
2425                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2426                 i = 0;
2427             else
2428                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2429
2430             /* Verify as Register */
2431             if (ins->oprs[j].basereg < EXPR_REG_START
2432                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2433                 b = 0;
2434             else
2435                 b = nasm_reg_flags[ins->oprs[j].basereg];
2436
2437             if (ins->oprs[j].scale == 0)
2438                 i = 0;
2439
2440             if (!i && !b) {
2441                 int ds = ins->oprs[j].disp_size;
2442                 if ((addrbits != 64 && ds > 8) ||
2443                     (addrbits == 64 && ds == 16))
2444                     valid &= ds;
2445             } else {
2446                 if (!(REG16 & ~b))
2447                     valid &= 16;
2448                 if (!(REG32 & ~b))
2449                     valid &= 32;
2450                 if (!(REG64 & ~b))
2451                     valid &= 64;
2452
2453                 if (!(REG16 & ~i))
2454                     valid &= 16;
2455                 if (!(REG32 & ~i))
2456                     valid &= 32;
2457                 if (!(REG64 & ~i))
2458                     valid &= 64;
2459             }
2460         }
2461     }
2462
2463     if (valid & addrbits) {
2464         ins->addr_size = addrbits;
2465     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2466         /* Add an address size prefix */
2467         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2468         ins->prefixes[PPS_ASIZE] = pref;
2469         ins->addr_size = (addrbits == 32) ? 16 : 32;
2470     } else {
2471         /* Impossible... */
2472         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2473         ins->addr_size = addrbits; /* Error recovery */
2474     }
2475
2476     defdisp = ins->addr_size == 16 ? 16 : 32;
2477
2478     for (j = 0; j < ins->operands; j++) {
2479         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2480             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2481             != ins->addr_size) {
2482             /* mem_offs sizes must match the address size; if not,
2483                strip the MEM_OFFS bit and match only EA instructions */
2484             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2485         }
2486     }
2487 }