assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   This program is free software; you can redistribute it and/or modify
   8  *   it under the terms of the GNU Lesser General Public License as
   9  *   published by the Free Software Foundation, Inc.,
  10  *   51 Franklin St, Fifth Floor, Boston MA 02110-1301, USA; version 2.1,
  11  *   or, at your option, any later version, incorporated herein by
  12  *   reference.
  13  *
  14  *   Patches submitted to this file are required to be dual licensed
  15  *   under the LGPL 2.1+ and the 2-clause BSD license:
  16  *
  17  *   Copyright 1996-2009 the NASM Authors - All rights reserved.
  18  *
  19  *   Redistribution and use in source and binary forms, with or without
  20  *   modification, are permitted provided that the following
  21  *   conditions are met:
  22  *
  23  *   * Redistributions of source code must retain the above copyright
  24  *     notice, this list of conditions and the following disclaimer.
  25  *   * Redistributions in binary form must reproduce the above
  26  *     copyright notice, this list of conditions and the following
  27  *     disclaimer in the documentation and/or other materials provided
  28  *     with the distribution.
  29  *
  30  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  33  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  34  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  39  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  40  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  41  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  42  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43  *
  44  * ----------------------------------------------------------------------- */
  45
  46 /*
  47  * assemble.c   code generation for the Netwide Assembler
  48  *
  49  * the actual codes (C syntax, i.e. octal):
  50  * \0            - terminates the code. (Unless it's a literal of course.)
  51  * \1..\4        - that many literal bytes follow in the code stream
  52  * \5            - add 4 to the primary operand number (b, low octdigit)
  53  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  54  * \7            - add 4 to both the primary and the secondary operand number
  55  * \10..\13      - a literal byte follows in the code stream, to be added
  56  *                 to the register value of operand 0..3
  57  * \14..\17      - a signed byte immediate operand, from operand 0..3
  58  * \20..\23      - a byte immediate operand, from operand 0..3
  59  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  60  * \30..\33      - a word immediate operand, from operand 0..3
  61  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  62  *                 assembly mode or the operand-size override on the operand
  63  * \40..\43      - a long immediate operand, from operand 0..3
  64  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  65  *                 depending on the address size of the instruction.
  66  * \50..\53      - a byte relative operand, from operand 0..3
  67  * \54..\57      - a qword immediate operand, from operand 0..3
  68  * \60..\63      - a word relative operand, from operand 0..3
  69  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  70  *                 assembly mode or the operand-size override on the operand
  71  * \70..\73      - a long relative operand, from operand 0..3
  72  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  73  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  74  *                 field the register value of operand b.
  75  * \140..\143    - an immediate word or signed byte for operand 0..3
  76  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  77  *                  is a signed byte rather than a word.  Opcode byte follows.
  78  * \150..\153    - an immediate dword or signed byte for operand 0..3
  79  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  80  *                  is a signed byte rather than a dword.  Opcode byte follows.
  81  * \160..\163    - this instruction uses DREX rather than REX, with the
  82  *                 OC0 field set to 0, and the dest field taken from
  83  *                 operand 0..3.
  84  * \164..\167    - this instruction uses DREX rather than REX, with the
  85  *                 OC0 field set to 1, and the dest field taken from
  86  *                 operand 0..3.
  87  * \171          - placement of DREX suffix in the absence of an EA
  88  * \172\ab       - the register number from operand a in bits 7..4, with
  89  *                 the 4-bit immediate from operand b in bits 3..0.
  90  * \173\xab      - the register number from operand a in bits 7..4, with
  91  *                 the value b in bits 3..0.
  92  * \174\a        - the register number from operand a in bits 7..4, and
  93  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  94  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  95  *                 field equal to digit b.
  96  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  97  *                 is not equal to the truncated and sign-extended 32-bit
  98  *                 operand; used for 32-bit immediates in 64-bit mode.
  99  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
 100  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
 101  *                 V field taken from operand 0..3.
 102  * \270          - this instruction uses VEX/XOP rather than REX, with the
 103  *                 V field set to 1111b.
 104  *
 105  * VEX/XOP prefixes are followed by the sequence:
 106  * \tmm\wlp        where mm is the M field; and wlp is:
 107  *                 00 0ww lpp
 108  *                 [w0] ww = 0 for W = 0
 109  *                 [w1] ww = 1 for W = 1
 110  *                 [wx] ww = 2 for W don't care (always assembled as 0)
 111  *                 [ww] ww = 3 for W used as REX.W
 112  *
 113  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 114  *
 115  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 116  *                 which is to be extended to the operand size.
 117  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 118  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 119  * \312          - (disassembler only) invalid with non-default address size.
 120  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 121  * \314          - (disassembler only) invalid with REX.B
 122  * \315          - (disassembler only) invalid with REX.X
 123  * \316          - (disassembler only) invalid with REX.R
 124  * \317          - (disassembler only) invalid with REX.W
 125  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 126  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 127  * \322          - indicates that this instruction is only valid when the
 128  *                 operand size is the default (instruction to disassembler,
 129  *                 generates no code in the assembler)
 130  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 131  * \324          - indicates 64-bit operand size requiring REX prefix.
 132  * \325          - instruction which always uses spl/bpl/sil/dil
 133  * \330          - a literal byte follows in the code stream, to be added
 134  *                 to the condition code value of the instruction.
 135  * \331          - instruction not valid with REP prefix.  Hint for
 136  *                 disassembler only; for SSE instructions.
 137  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 138  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 139  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 140  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 141  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 142  * \337          - force a REPNE prefix (0xF3) even if not specified.
 143  *                 \336-\337 are still listed as prefixes in the disassembler.
 144  * \340          - reserve <operand 0> bytes of uninitialized storage.
 145  *                 Operand 0 had better be a segmentless constant.
 146  * \341          - this instruction needs a WAIT "prefix"
 147  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 148  *                 (POP is never used for CS) depending on operand 0
 149  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 150  *                 on operand 0
 151  * \360          - no SSE prefix (== \364\331)
 152  * \361          - 66 SSE prefix (== \366\331)
 153  * \362          - F2 SSE prefix (== \364\332)
 154  * \363          - F3 SSE prefix (== \364\333)
 155  * \364          - operand-size prefix (0x66) not permitted
 156  * \365          - address-size prefix (0x67) not permitted
 157  * \366          - operand-size prefix (0x66) used as opcode extension
 158  * \367          - address-size prefix (0x67) used as opcode extension
 159  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 160  *                 370 is used for Jcc, 371 is used for JMP.
 161  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 162  *                 used for conditional jump over longer jump
 163  */
 164
 165 #include "compiler.h"
 166
 167 #include <stdio.h>
 168 #include <string.h>
 169 #include <inttypes.h>
 170
 171 #include "nasm.h"
 172 #include "nasmlib.h"
 173 #include "assemble.h"
 174 #include "insns.h"
 175 #include "tables.h"
 176
 177 typedef struct {
 178     int sib_present;                 /* is a SIB byte necessary? */
 179     int bytes;                       /* # of bytes of offset needed */
 180     int size;                        /* lazy - this is sib+bytes+1 */
 181     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 182 } ea;
 183
 184 static uint32_t cpu;            /* cpu level received from nasm.c */
 185 static efunc errfunc;
 186 static struct ofmt *outfmt;
 187 static ListGen *list;
 188
 189 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 190 static void gencode(int32_t segment, int64_t offset, int bits,
 191                     insn * ins, const struct itemplate *temp,
 192                     int64_t insn_end);
 193 static int matches(const struct itemplate *, insn *, int bits);
 194 static int32_t regflag(const operand *);
 195 static int32_t regval(const operand *);
 196 static int rexflags(int, int32_t, int);
 197 static int op_rexflags(const operand *, int);
 198 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 199 static void add_asp(insn *, int);
 200
 201 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 202 {
 203     return ins->prefixes[pos] == prefix;
 204 }
 205
 206 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 207 {
 208     if (ins->prefixes[pos])
 209         errfunc(ERR_NONFATAL, "invalid %s prefix",
 210                 prefix_name(ins->prefixes[pos]));
 211 }
 212
 213 static const char *size_name(int size)
 214 {
 215     switch (size) {
 216     case 1:
 217         return "byte";
 218     case 2:
 219         return "word";
 220     case 4:
 221         return "dword";
 222     case 8:
 223         return "qword";
 224     case 10:
 225         return "tword";
 226     case 16:
 227         return "oword";
 228     case 32:
 229         return "yword";
 230     default:
 231         return "???";
 232     }
 233 }
 234
 235 static void warn_overflow(int size, const struct operand *o)
 236 {
 237     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 238         int64_t lim = ((int64_t)1 << (size*8))-1;
 239         int64_t data = o->offset;
 240
 241         if (data < ~lim || data > lim)
 242             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 243                     "%s data exceeds bounds", size_name(size));
 244     }
 245 }
 246 /*
 247  * This routine wrappers the real output format's output routine,
 248  * in order to pass a copy of the data off to the listing file
 249  * generator at the same time.
 250  */
 251 static void out(int64_t offset, int32_t segto, const void *data,
 252                 enum out_type type, uint64_t size,
 253                 int32_t segment, int32_t wrt)
 254 {
 255     static int32_t lineno = 0;     /* static!!! */
 256     static char *lnfname = NULL;
 257     uint8_t p[8];
 258
 259     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 260         /*
 261          * This is a non-relocated address, and we're going to
 262          * convert it into RAWDATA format.
 263          */
 264         uint8_t *q = p;
 265
 266         if (size > 8) {
 267             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 268             return;
 269         }
 270
 271         WRITEADDR(q, *(int64_t *)data, size);
 272         data = p;
 273         type = OUT_RAWDATA;
 274     }
 275
 276     list->output(offset, data, type, size);
 277
 278     /*
 279      * this call to src_get determines when we call the
 280      * debug-format-specific "linenum" function
 281      * it updates lineno and lnfname to the current values
 282      * returning 0 if "same as last time", -2 if lnfname
 283      * changed, and the amount by which lineno changed,
 284      * if it did. thus, these variables must be static
 285      */
 286
 287     if (src_get(&lineno, &lnfname)) {
 288         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 289     }
 290
 291     outfmt->output(segto, data, type, size, segment, wrt);
 292 }
 293
 294 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 295                      insn * ins, const uint8_t *code)
 296 {
 297     int64_t isize;
 298     uint8_t c = code[0];
 299
 300     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 301         return false;
 302     if (!optimizing)
 303         return false;
 304     if (optimizing < 0 && c == 0371)
 305         return false;
 306
 307     isize = calcsize(segment, offset, bits, ins, code);
 308
 309     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 310         /* Be optimistic in pass 1 */
 311         return true;
 312
 313     if (ins->oprs[0].segment != segment)
 314         return false;
 315
 316     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 317     return (isize >= -128 && isize <= 127); /* is it byte size? */
 318 }
 319
 320 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 321               insn * instruction, struct ofmt *output, efunc error,
 322               ListGen * listgen)
 323 {
 324     const struct itemplate *temp;
 325     int j;
 326     int size_prob;
 327     int64_t insn_end;
 328     int32_t itimes;
 329     int64_t start = offset;
 330     int64_t wsize = 0;             /* size for DB etc. */
 331
 332     errfunc = error;            /* to pass to other functions */
 333     cpu = cp;
 334     outfmt = output;            /* likewise */
 335     list = listgen;             /* and again */
 336
 337     switch (instruction->opcode) {
 338     case -1:
 339         return 0;
 340     case I_DB:
 341         wsize = 1;
 342         break;
 343     case I_DW:
 344         wsize = 2;
 345         break;
 346     case I_DD:
 347         wsize = 4;
 348         break;
 349     case I_DQ:
 350         wsize = 8;
 351         break;
 352     case I_DT:
 353         wsize = 10;
 354         break;
 355     case I_DO:
 356         wsize = 16;
 357         break;
 358     case I_DY:
 359         wsize = 32;
 360         break;
 361     default:
 362         break;
 363     }
 364
 365     if (wsize) {
 366         extop *e;
 367         int32_t t = instruction->times;
 368         if (t < 0)
 369             errfunc(ERR_PANIC,
 370                     "instruction->times < 0 (%ld) in assemble()", t);
 371
 372         while (t--) {           /* repeat TIMES times */
 373             for (e = instruction->eops; e; e = e->next) {
 374                 if (e->type == EOT_DB_NUMBER) {
 375                     if (wsize == 1) {
 376                         if (e->segment != NO_SEG)
 377                             errfunc(ERR_NONFATAL,
 378                                     "one-byte relocation attempted");
 379                         else {
 380                             uint8_t out_byte = e->offset;
 381                             out(offset, segment, &out_byte,
 382                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 383                         }
 384                     } else if (wsize > 8) {
 385                         errfunc(ERR_NONFATAL,
 386                                 "integer supplied to a DT, DO or DY"
 387                                 " instruction");
 388                     } else
 389                         out(offset, segment, &e->offset,
 390                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 391                     offset += wsize;
 392                 } else if (e->type == EOT_DB_STRING ||
 393                            e->type == EOT_DB_STRING_FREE) {
 394                     int align;
 395
 396                     out(offset, segment, e->stringval,
 397                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 398                     align = e->stringlen % wsize;
 399
 400                     if (align) {
 401                         align = wsize - align;
 402                         out(offset, segment, zero_buffer,
 403                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 404                     }
 405                     offset += e->stringlen + align;
 406                 }
 407             }
 408             if (t > 0 && t == instruction->times - 1) {
 409                 /*
 410                  * Dummy call to list->output to give the offset to the
 411                  * listing module.
 412                  */
 413                 list->output(offset, NULL, OUT_RAWDATA, 0);
 414                 list->uplevel(LIST_TIMES);
 415             }
 416         }
 417         if (instruction->times > 1)
 418             list->downlevel(LIST_TIMES);
 419         return offset - start;
 420     }
 421
 422     if (instruction->opcode == I_INCBIN) {
 423         const char *fname = instruction->eops->stringval;
 424         FILE *fp;
 425
 426         fp = fopen(fname, "rb");
 427         if (!fp) {
 428             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 429                   fname);
 430         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 431             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 432                   fname);
 433         } else {
 434             static char buf[4096];
 435             size_t t = instruction->times;
 436             size_t base = 0;
 437             size_t len;
 438
 439             len = ftell(fp);
 440             if (instruction->eops->next) {
 441                 base = instruction->eops->next->offset;
 442                 len -= base;
 443                 if (instruction->eops->next->next &&
 444                     len > (size_t)instruction->eops->next->next->offset)
 445                     len = (size_t)instruction->eops->next->next->offset;
 446             }
 447             /*
 448              * Dummy call to list->output to give the offset to the
 449              * listing module.
 450              */
 451             list->output(offset, NULL, OUT_RAWDATA, 0);
 452             list->uplevel(LIST_INCBIN);
 453             while (t--) {
 454                 size_t l;
 455
 456                 fseek(fp, base, SEEK_SET);
 457                 l = len;
 458                 while (l > 0) {
 459                     int32_t m;
 460                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 461                     if (!m) {
 462                         /*
 463                          * This shouldn't happen unless the file
 464                          * actually changes while we are reading
 465                          * it.
 466                          */
 467                         error(ERR_NONFATAL,
 468                               "`incbin': unexpected EOF while"
 469                               " reading file `%s'", fname);
 470                         t = 0;  /* Try to exit cleanly */
 471                         break;
 472                     }
 473                     out(offset, segment, buf, OUT_RAWDATA, m,
 474                         NO_SEG, NO_SEG);
 475                     l -= m;
 476                 }
 477             }
 478             list->downlevel(LIST_INCBIN);
 479             if (instruction->times > 1) {
 480                 /*
 481                  * Dummy call to list->output to give the offset to the
 482                  * listing module.
 483                  */
 484                 list->output(offset, NULL, OUT_RAWDATA, 0);
 485                 list->uplevel(LIST_TIMES);
 486                 list->downlevel(LIST_TIMES);
 487             }
 488             fclose(fp);
 489             return instruction->times * len;
 490         }
 491         return 0;               /* if we're here, there's an error */
 492     }
 493
 494     /* Check to see if we need an address-size prefix */
 495     add_asp(instruction, bits);
 496
 497     size_prob = 0;
 498
 499     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 500         int m = matches(temp, instruction, bits);
 501         if (m == 100 ||
 502             (m == 99 && jmp_match(segment, offset, bits,
 503                                   instruction, temp->code))) {
 504             /* Matches! */
 505             int64_t insn_size = calcsize(segment, offset, bits,
 506                                       instruction, temp->code);
 507             itimes = instruction->times;
 508             if (insn_size < 0)  /* shouldn't be, on pass two */
 509                 error(ERR_PANIC, "errors made it through from pass one");
 510             else
 511                 while (itimes--) {
 512                     for (j = 0; j < MAXPREFIX; j++) {
 513                         uint8_t c = 0;
 514                         switch (instruction->prefixes[j]) {
 515                         case P_WAIT:
 516                             c = 0x9B;
 517                             break;
 518                         case P_LOCK:
 519                             c = 0xF0;
 520                             break;
 521                         case P_REPNE:
 522                         case P_REPNZ:
 523                             c = 0xF2;
 524                             break;
 525                         case P_REPE:
 526                         case P_REPZ:
 527                         case P_REP:
 528                             c = 0xF3;
 529                             break;
 530                         case R_CS:
 531                             if (bits == 64) {
 532                                 error(ERR_WARNING | ERR_PASS2,
 533                                       "cs segment base generated, but will be ignored in 64-bit mode");
 534                             }
 535                             c = 0x2E;
 536                             break;
 537                         case R_DS:
 538                             if (bits == 64) {
 539                                 error(ERR_WARNING | ERR_PASS2,
 540                                       "ds segment base generated, but will be ignored in 64-bit mode");
 541                             }
 542                             c = 0x3E;
 543                             break;
 544                         case R_ES:
 545                            if (bits == 64) {
 546                                 error(ERR_WARNING | ERR_PASS2,
 547                                       "es segment base generated, but will be ignored in 64-bit mode");
 548                            }
 549                             c = 0x26;
 550                             break;
 551                         case R_FS:
 552                             c = 0x64;
 553                             break;
 554                         case R_GS:
 555                             c = 0x65;
 556                             break;
 557                         case R_SS:
 558                             if (bits == 64) {
 559                                 error(ERR_WARNING | ERR_PASS2,
 560                                       "ss segment base generated, but will be ignored in 64-bit mode");
 561                             }
 562                             c = 0x36;
 563                             break;
 564                         case R_SEGR6:
 565                         case R_SEGR7:
 566                             error(ERR_NONFATAL,
 567                                   "segr6 and segr7 cannot be used as prefixes");
 568                             break;
 569                         case P_A16:
 570                             if (bits == 64) {
 571                                 error(ERR_NONFATAL,
 572                                       "16-bit addressing is not supported "
 573                                       "in 64-bit mode");
 574                             } else if (bits != 16)
 575                                 c = 0x67;
 576                             break;
 577                         case P_A32:
 578                             if (bits != 32)
 579                                 c = 0x67;
 580                             break;
 581                         case P_A64:
 582                             if (bits != 64) {
 583                                 error(ERR_NONFATAL,
 584                                       "64-bit addressing is only supported "
 585                                       "in 64-bit mode");
 586                             }
 587                             break;
 588                         case P_ASP:
 589                             c = 0x67;
 590                             break;
 591                         case P_O16:
 592                             if (bits != 16)
 593                                 c = 0x66;
 594                             break;
 595                         case P_O32:
 596                             if (bits == 16)
 597                                 c = 0x66;
 598                             break;
 599                         case P_O64:
 600                             /* REX.W */
 601                             break;
 602                         case P_OSP:
 603                             c = 0x66;
 604                             break;
 605                         case P_none:
 606                             break;
 607                         default:
 608                             error(ERR_PANIC, "invalid instruction prefix");
 609                         }
 610                         if (c != 0) {
 611                             out(offset, segment, &c, OUT_RAWDATA, 1,
 612                                 NO_SEG, NO_SEG);
 613                             offset++;
 614                         }
 615                     }
 616                     insn_end = offset + insn_size;
 617                     gencode(segment, offset, bits, instruction,
 618                             temp, insn_end);
 619                     offset += insn_size;
 620                     if (itimes > 0 && itimes == instruction->times - 1) {
 621                         /*
 622                          * Dummy call to list->output to give the offset to the
 623                          * listing module.
 624                          */
 625                         list->output(offset, NULL, OUT_RAWDATA, 0);
 626                         list->uplevel(LIST_TIMES);
 627                     }
 628                 }
 629             if (instruction->times > 1)
 630                 list->downlevel(LIST_TIMES);
 631             return offset - start;
 632         } else if (m > 0 && m > size_prob) {
 633             size_prob = m;
 634         }
 635     }
 636
 637     if (temp->opcode == -1) {   /* didn't match any instruction */
 638         switch (size_prob) {
 639         case 1:
 640             error(ERR_NONFATAL, "operation size not specified");
 641             break;
 642         case 2:
 643             error(ERR_NONFATAL, "mismatch in operand sizes");
 644             break;
 645         case 3:
 646             error(ERR_NONFATAL, "no instruction for this cpu level");
 647             break;
 648         case 4:
 649             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 650                   bits);
 651             break;
 652         default:
 653             error(ERR_NONFATAL,
 654                   "invalid combination of opcode and operands");
 655             break;
 656         }
 657     }
 658     return 0;
 659 }
 660
 661 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 662                insn * instruction, efunc error)
 663 {
 664     const struct itemplate *temp;
 665
 666     errfunc = error;            /* to pass to other functions */
 667     cpu = cp;
 668
 669     if (instruction->opcode == -1)
 670         return 0;
 671
 672     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 673         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 674         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 675         instruction->opcode == I_DY) {
 676         extop *e;
 677         int32_t isize, osize, wsize = 0;   /* placate gcc */
 678
 679         isize = 0;
 680         switch (instruction->opcode) {
 681         case I_DB:
 682             wsize = 1;
 683             break;
 684         case I_DW:
 685             wsize = 2;
 686             break;
 687         case I_DD:
 688             wsize = 4;
 689             break;
 690         case I_DQ:
 691             wsize = 8;
 692             break;
 693         case I_DT:
 694             wsize = 10;
 695             break;
 696         case I_DO:
 697             wsize = 16;
 698             break;
 699         case I_DY:
 700             wsize = 32;
 701             break;
 702         default:
 703             break;
 704         }
 705
 706         for (e = instruction->eops; e; e = e->next) {
 707             int32_t align;
 708
 709             osize = 0;
 710             if (e->type == EOT_DB_NUMBER)
 711                 osize = 1;
 712             else if (e->type == EOT_DB_STRING ||
 713                      e->type == EOT_DB_STRING_FREE)
 714                 osize = e->stringlen;
 715
 716             align = (-osize) % wsize;
 717             if (align < 0)
 718                 align += wsize;
 719             isize += osize + align;
 720         }
 721         return isize * instruction->times;
 722     }
 723
 724     if (instruction->opcode == I_INCBIN) {
 725         const char *fname = instruction->eops->stringval;
 726         FILE *fp;
 727         size_t len;
 728
 729         fp = fopen(fname, "rb");
 730         if (!fp)
 731             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 732                   fname);
 733         else if (fseek(fp, 0L, SEEK_END) < 0)
 734             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 735                   fname);
 736         else {
 737             len = ftell(fp);
 738             fclose(fp);
 739             if (instruction->eops->next) {
 740                 len -= instruction->eops->next->offset;
 741                 if (instruction->eops->next->next &&
 742                     len > (size_t)instruction->eops->next->next->offset) {
 743                     len = (size_t)instruction->eops->next->next->offset;
 744                 }
 745             }
 746             return instruction->times * len;
 747         }
 748         return 0;               /* if we're here, there's an error */
 749     }
 750
 751     /* Check to see if we need an address-size prefix */
 752     add_asp(instruction, bits);
 753
 754     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 755         int m = matches(temp, instruction, bits);
 756         if (m == 100 ||
 757             (m == 99 && jmp_match(segment, offset, bits,
 758                                   instruction, temp->code))) {
 759             /* we've matched an instruction. */
 760             int64_t isize;
 761             const uint8_t *codes = temp->code;
 762             int j;
 763
 764             isize = calcsize(segment, offset, bits, instruction, codes);
 765             if (isize < 0)
 766                 return -1;
 767             for (j = 0; j < MAXPREFIX; j++) {
 768                 switch (instruction->prefixes[j]) {
 769                 case P_A16:
 770                     if (bits != 16)
 771                         isize++;
 772                     break;
 773                 case P_A32:
 774                     if (bits != 32)
 775                         isize++;
 776                     break;
 777                 case P_O16:
 778                     if (bits != 16)
 779                         isize++;
 780                     break;
 781                 case P_O32:
 782                     if (bits == 16)
 783                         isize++;
 784                     break;
 785                 case P_A64:
 786                 case P_O64:
 787                 case P_none:
 788                     break;
 789                 default:
 790                     isize++;
 791                     break;
 792                 }
 793             }
 794             return isize * instruction->times;
 795         }
 796     }
 797     return -1;                  /* didn't match any instruction */
 798 }
 799
 800 static bool possible_sbyte(operand *o)
 801 {
 802     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 803         !(o->opflags & OPFLAG_UNKNOWN) &&
 804         optimizing >= 0 && !(o->type & STRICT);
 805 }
 806
 807 /* check that opn[op]  is a signed byte of size 16 or 32 */
 808 static bool is_sbyte16(operand *o)
 809 {
 810     int16_t v;
 811
 812     if (!possible_sbyte(o))
 813         return false;
 814
 815     v = o->offset;
 816     return v >= -128 && v <= 127;
 817 }
 818
 819 static bool is_sbyte32(operand *o)
 820 {
 821     int32_t v;
 822
 823     if (!possible_sbyte(o))
 824         return false;
 825
 826     v = o->offset;
 827     return v >= -128 && v <= 127;
 828 }
 829
 830 /* Common construct */
 831 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 832
 833 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 834                         insn * ins, const uint8_t *codes)
 835 {
 836     int64_t length = 0;
 837     uint8_t c;
 838     int rex_mask = ~0;
 839     int op1, op2;
 840     struct operand *opx;
 841     uint8_t opex = 0;
 842
 843     ins->rex = 0;               /* Ensure REX is reset */
 844
 845     if (ins->prefixes[PPS_OSIZE] == P_O64)
 846         ins->rex |= REX_W;
 847
 848     (void)segment;              /* Don't warn that this parameter is unused */
 849     (void)offset;               /* Don't warn that this parameter is unused */
 850
 851     while (*codes) {
 852         c = *codes++;
 853         op1 = (c & 3) + ((opex & 1) << 2);
 854         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 855         opx = &ins->oprs[op1];
 856         opex = 0;               /* For the next iteration */
 857
 858         switch (c) {
 859         case 01:
 860         case 02:
 861         case 03:
 862         case 04:
 863             codes += c, length += c;
 864             break;
 865
 866         case 05:
 867         case 06:
 868         case 07:
 869             opex = c;
 870             break;
 871
 872         case4(010):
 873             ins->rex |=
 874                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 875             codes++, length++;
 876             break;
 877
 878         case4(014):
 879         case4(020):
 880         case4(024):
 881             length++;
 882             break;
 883
 884         case4(030):
 885             length += 2;
 886             break;
 887
 888         case4(034):
 889             if (opx->type & (BITS16 | BITS32 | BITS64))
 890                 length += (opx->type & BITS16) ? 2 : 4;
 891             else
 892                 length += (bits == 16) ? 2 : 4;
 893             break;
 894
 895         case4(040):
 896             length += 4;
 897             break;
 898
 899         case4(044):
 900             length += ins->addr_size >> 3;
 901             break;
 902
 903         case4(050):
 904             length++;
 905             break;
 906
 907         case4(054):
 908             length += 8; /* MOV reg64/imm */
 909             break;
 910
 911         case4(060):
 912             length += 2;
 913             break;
 914
 915         case4(064):
 916             if (opx->type & (BITS16 | BITS32 | BITS64))
 917                 length += (opx->type & BITS16) ? 2 : 4;
 918             else
 919                 length += (bits == 16) ? 2 : 4;
 920             break;
 921
 922         case4(070):
 923             length += 4;
 924             break;
 925
 926         case4(074):
 927             length += 2;
 928             break;
 929
 930         case4(0140):
 931             length += is_sbyte16(opx) ? 1 : 2;
 932             break;
 933
 934         case4(0144):
 935             codes++;
 936             length++;
 937             break;
 938
 939         case4(0150):
 940             length += is_sbyte32(opx) ? 1 : 4;
 941             break;
 942
 943         case4(0154):
 944             codes++;
 945             length++;
 946             break;
 947
 948         case4(0160):
 949             length++;
 950             ins->rex |= REX_D;
 951             ins->drexdst = regval(opx);
 952             break;
 953
 954         case4(0164):
 955             length++;
 956             ins->rex |= REX_D|REX_OC;
 957             ins->drexdst = regval(opx);
 958             break;
 959
 960         case 0171:
 961             break;
 962
 963         case 0172:
 964         case 0173:
 965         case 0174:
 966             codes++;
 967             length++;
 968             break;
 969
 970         case4(0250):
 971             length += is_sbyte32(opx) ? 1 : 4;
 972             break;
 973
 974         case4(0254):
 975             length += 4;
 976             break;
 977
 978         case4(0260):
 979             ins->rex |= REX_V;
 980             ins->drexdst = regval(opx);
 981             ins->vex_cm = *codes++;
 982             ins->vex_wlp = *codes++;
 983             break;
 984
 985         case 0270:
 986             ins->rex |= REX_V;
 987             ins->drexdst = 0;
 988             ins->vex_cm = *codes++;
 989             ins->vex_wlp = *codes++;
 990             break;
 991
 992         case4(0274):
 993             length++;
 994             break;
 995
 996         case4(0300):
 997             break;
 998
 999         case 0310:
1000             if (bits == 64)
1001                 return -1;
1002             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1003             break;
1004
1005         case 0311:
1006             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1007             break;
1008
1009         case 0312:
1010             break;
1011
1012         case 0313:
1013             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1014                 has_prefix(ins, PPS_ASIZE, P_A32))
1015                 return -1;
1016             break;
1017
1018         case4(0314):
1019             break;
1020
1021         case 0320:
1022             length += (bits != 16);
1023             break;
1024
1025         case 0321:
1026             length += (bits == 16);
1027             break;
1028
1029         case 0322:
1030             break;
1031
1032         case 0323:
1033             rex_mask &= ~REX_W;
1034             break;
1035
1036         case 0324:
1037             ins->rex |= REX_W;
1038             break;
1039
1040         case 0325:
1041             ins->rex |= REX_NH;
1042             break;
1043
1044         case 0330:
1045             codes++, length++;
1046             break;
1047
1048         case 0331:
1049             break;
1050
1051         case 0332:
1052         case 0333:
1053             length++;
1054             break;
1055
1056         case 0334:
1057             ins->rex |= REX_L;
1058             break;
1059
1060         case 0335:
1061             break;
1062
1063         case 0336:
1064             if (!ins->prefixes[PPS_LREP])
1065                 ins->prefixes[PPS_LREP] = P_REP;
1066             break;
1067
1068         case 0337:
1069             if (!ins->prefixes[PPS_LREP])
1070                 ins->prefixes[PPS_LREP] = P_REPNE;
1071             break;
1072
1073         case 0340:
1074             if (ins->oprs[0].segment != NO_SEG)
1075                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1076                         " quantity of BSS space");
1077             else
1078                 length += ins->oprs[0].offset;
1079             break;
1080
1081         case 0341:
1082             if (!ins->prefixes[PPS_WAIT])
1083                 ins->prefixes[PPS_WAIT] = P_WAIT;
1084             break;
1085
1086         case4(0344):
1087             length++;
1088             break;
1089
1090         case 0360:
1091             break;
1092
1093         case 0361:
1094         case 0362:
1095         case 0363:
1096             length++;
1097             break;
1098
1099         case 0364:
1100         case 0365:
1101             break;
1102
1103         case 0366:
1104         case 0367:
1105             length++;
1106             break;
1107
1108         case 0370:
1109         case 0371:
1110         case 0372:
1111             break;
1112
1113         case 0373:
1114             length++;
1115             break;
1116
1117         case4(0100):
1118         case4(0110):
1119         case4(0120):
1120         case4(0130):
1121         case4(0200):
1122         case4(0204):
1123         case4(0210):
1124         case4(0214):
1125         case4(0220):
1126         case4(0224):
1127         case4(0230):
1128         case4(0234):
1129             {
1130                 ea ea_data;
1131                 int rfield;
1132                 int32_t rflags;
1133                 struct operand *opy = &ins->oprs[op2];
1134
1135                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1136
1137                 if (c <= 0177) {
1138                     /* pick rfield from operand b (opx) */
1139                     rflags = regflag(opx);
1140                     rfield = nasm_regvals[opx->basereg];
1141                 } else {
1142                     rflags = 0;
1143                     rfield = c & 7;
1144                 }
1145                 if (!process_ea(opy, &ea_data, bits,
1146                                 ins->addr_size, rfield, rflags)) {
1147                     errfunc(ERR_NONFATAL, "invalid effective address");
1148                     return -1;
1149                 } else {
1150                     ins->rex |= ea_data.rex;
1151                     length += ea_data.size;
1152                 }
1153             }
1154             break;
1155
1156         default:
1157             errfunc(ERR_PANIC, "internal instruction table corrupt"
1158                     ": instruction code \\%o (0x%02X) given", c, c);
1159             break;
1160         }
1161     }
1162
1163     ins->rex &= rex_mask;
1164
1165     if (ins->rex & REX_NH) {
1166         if (ins->rex & REX_H) {
1167             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1168             return -1;
1169         }
1170         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1171     }
1172
1173     if (ins->rex & REX_V) {
1174         int bad32 = REX_R|REX_W|REX_X|REX_B;
1175
1176         if (ins->rex & REX_H) {
1177             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1178             return -1;
1179         }
1180         switch (ins->vex_wlp & 030) {
1181         case 000:
1182         case 020:
1183             ins->rex &= ~REX_W;
1184             break;
1185         case 010:
1186             ins->rex |= REX_W;
1187             bad32 &= ~REX_W;
1188             break;
1189         case 030:
1190             /* Follow REX_W */
1191             break;
1192         }
1193
1194         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1195             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1196             return -1;
1197         }
1198         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1199             length += 3;
1200         else
1201             length += 2;
1202     } else if (ins->rex & REX_D) {
1203         if (ins->rex & REX_H) {
1204             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1205             return -1;
1206         }
1207         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1208                            ins->drexdst > 7)) {
1209             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1210             return -1;
1211         }
1212         length++;
1213     } else if (ins->rex & REX_REAL) {
1214         if (ins->rex & REX_H) {
1215             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1216             return -1;
1217         } else if (bits == 64) {
1218             length++;
1219         } else if ((ins->rex & REX_L) &&
1220                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1221                    cpu >= IF_X86_64) {
1222             /* LOCK-as-REX.R */
1223             assert_no_prefix(ins, PPS_LREP);
1224             length++;
1225         } else {
1226             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1227             return -1;
1228         }
1229     }
1230
1231     return length;
1232 }
1233
1234 #define EMIT_REX()                                                      \
1235     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1236         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1237         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1238         ins->rex = 0;                                                   \
1239         offset += 1; \
1240     }
1241
1242 static void gencode(int32_t segment, int64_t offset, int bits,
1243                     insn * ins, const struct itemplate *temp,
1244                     int64_t insn_end)
1245 {
1246     static char condval[] = {   /* conditional opcodes */
1247         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1248         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1249         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1250     };
1251     uint8_t c;
1252     uint8_t bytes[4];
1253     int64_t size;
1254     int64_t data;
1255     int op1, op2;
1256     struct operand *opx;
1257     const uint8_t *codes = temp->code;
1258     uint8_t opex = 0;
1259
1260     while (*codes) {
1261         c = *codes++;
1262         op1 = (c & 3) + ((opex & 1) << 2);
1263         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1264         opx = &ins->oprs[op1];
1265         opex = 0;               /* For the next iteration */
1266
1267         switch (c) {
1268         case 01:
1269         case 02:
1270         case 03:
1271         case 04:
1272             EMIT_REX();
1273             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1274             codes += c;
1275             offset += c;
1276             break;
1277
1278         case 05:
1279         case 06:
1280         case 07:
1281             opex = c;
1282             break;
1283
1284         case4(010):
1285             EMIT_REX();
1286             bytes[0] = *codes++ + (regval(opx) & 7);
1287             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1288             offset += 1;
1289             break;
1290
1291         case4(014):
1292             /* The test for BITS8 and SBYTE here is intended to avoid
1293                warning on optimizer actions due to SBYTE, while still
1294                warn on explicit BYTE directives.  Also warn, obviously,
1295                if the optimizer isn't enabled. */
1296             if (((opx->type & BITS8) ||
1297                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1298                 (opx->offset < -128 || opx->offset > 127)) {
1299                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1300                         "signed byte value exceeds bounds");
1301             }
1302             if (opx->segment != NO_SEG) {
1303                 data = opx->offset;
1304                 out(offset, segment, &data, OUT_ADDRESS, 1,
1305                     opx->segment, opx->wrt);
1306             } else {
1307                 bytes[0] = opx->offset;
1308                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1309                     NO_SEG);
1310             }
1311             offset += 1;
1312             break;
1313
1314         case4(020):
1315             if (opx->offset < -256 || opx->offset > 255) {
1316                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1317                         "byte value exceeds bounds");
1318             }
1319             if (opx->segment != NO_SEG) {
1320                 data = opx->offset;
1321                 out(offset, segment, &data, OUT_ADDRESS, 1,
1322                     opx->segment, opx->wrt);
1323             } else {
1324                 bytes[0] = opx->offset;
1325                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1326                     NO_SEG);
1327             }
1328             offset += 1;
1329             break;
1330
1331         case4(024):
1332             if (opx->offset < 0 || opx->offset > 255)
1333                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1334                         "unsigned byte value exceeds bounds");
1335             if (opx->segment != NO_SEG) {
1336                 data = opx->offset;
1337                 out(offset, segment, &data, OUT_ADDRESS, 1,
1338                     opx->segment, opx->wrt);
1339             } else {
1340                 bytes[0] = opx->offset;
1341                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1342                     NO_SEG);
1343             }
1344             offset += 1;
1345             break;
1346
1347         case4(030):
1348             warn_overflow(2, opx);
1349             data = opx->offset;
1350             out(offset, segment, &data, OUT_ADDRESS, 2,
1351                 opx->segment, opx->wrt);
1352             offset += 2;
1353             break;
1354
1355         case4(034):
1356             if (opx->type & (BITS16 | BITS32))
1357                 size = (opx->type & BITS16) ? 2 : 4;
1358             else
1359                 size = (bits == 16) ? 2 : 4;
1360             warn_overflow(size, opx);
1361             data = opx->offset;
1362             out(offset, segment, &data, OUT_ADDRESS, size,
1363                 opx->segment, opx->wrt);
1364             offset += size;
1365             break;
1366
1367         case4(040):
1368             warn_overflow(4, opx);
1369             data = opx->offset;
1370             out(offset, segment, &data, OUT_ADDRESS, 4,
1371                 opx->segment, opx->wrt);
1372             offset += 4;
1373             break;
1374
1375         case4(044):
1376             data = opx->offset;
1377             size = ins->addr_size >> 3;
1378             warn_overflow(size, opx);
1379             out(offset, segment, &data, OUT_ADDRESS, size,
1380                 opx->segment, opx->wrt);
1381             offset += size;
1382             break;
1383
1384         case4(050):
1385             if (opx->segment != segment)
1386                 errfunc(ERR_NONFATAL,
1387                         "short relative jump outside segment");
1388             data = opx->offset - insn_end;
1389             if (data > 127 || data < -128)
1390                 errfunc(ERR_NONFATAL, "short jump is out of range");
1391             bytes[0] = data;
1392             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1393             offset += 1;
1394             break;
1395
1396         case4(054):
1397             data = (int64_t)opx->offset;
1398             out(offset, segment, &data, OUT_ADDRESS, 8,
1399                 opx->segment, opx->wrt);
1400             offset += 8;
1401             break;
1402
1403         case4(060):
1404             if (opx->segment != segment) {
1405                 data = opx->offset;
1406                 out(offset, segment, &data,
1407                     OUT_REL2ADR, insn_end - offset,
1408                     opx->segment, opx->wrt);
1409             } else {
1410                 data = opx->offset - insn_end;
1411                 out(offset, segment, &data,
1412                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1413             }
1414             offset += 2;
1415             break;
1416
1417         case4(064):
1418             if (opx->type & (BITS16 | BITS32 | BITS64))
1419                 size = (opx->type & BITS16) ? 2 : 4;
1420             else
1421                 size = (bits == 16) ? 2 : 4;
1422             if (opx->segment != segment) {
1423                 data = opx->offset;
1424                 out(offset, segment, &data,
1425                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1426                     insn_end - offset, opx->segment, opx->wrt);
1427             } else {
1428                 data = opx->offset - insn_end;
1429                 out(offset, segment, &data,
1430                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1431             }
1432             offset += size;
1433             break;
1434
1435         case4(070):
1436             if (opx->segment != segment) {
1437                 data = opx->offset;
1438                 out(offset, segment, &data,
1439                     OUT_REL4ADR, insn_end - offset,
1440                     opx->segment, opx->wrt);
1441             } else {
1442                 data = opx->offset - insn_end;
1443                 out(offset, segment, &data,
1444                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1445             }
1446             offset += 4;
1447             break;
1448
1449         case4(074):
1450             if (opx->segment == NO_SEG)
1451                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1452                         " relocatable");
1453             data = 0;
1454             out(offset, segment, &data, OUT_ADDRESS, 2,
1455                 outfmt->segbase(1 + opx->segment),
1456                 opx->wrt);
1457             offset += 2;
1458             break;
1459
1460         case4(0140):
1461             data = opx->offset;
1462             warn_overflow(2, opx);
1463             if (is_sbyte16(opx)) {
1464                 bytes[0] = data;
1465                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1466                     NO_SEG);
1467                 offset++;
1468             } else {
1469                 out(offset, segment, &data, OUT_ADDRESS, 2,
1470                     opx->segment, opx->wrt);
1471                 offset += 2;
1472             }
1473             break;
1474
1475         case4(0144):
1476             EMIT_REX();
1477             bytes[0] = *codes++;
1478             if (is_sbyte16(opx))
1479                 bytes[0] |= 2;  /* s-bit */
1480             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1481             offset++;
1482             break;
1483
1484         case4(0150):
1485             data = opx->offset;
1486             warn_overflow(4, opx);
1487             if (is_sbyte32(opx)) {
1488                 bytes[0] = data;
1489                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1490                     NO_SEG);
1491                 offset++;
1492             } else {
1493                 out(offset, segment, &data, OUT_ADDRESS, 4,
1494                     opx->segment, opx->wrt);
1495                 offset += 4;
1496             }
1497             break;
1498
1499         case4(0154):
1500             EMIT_REX();
1501             bytes[0] = *codes++;
1502             if (is_sbyte32(opx))
1503                 bytes[0] |= 2;  /* s-bit */
1504             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1505             offset++;
1506             break;
1507
1508         case4(0160):
1509         case4(0164):
1510             break;
1511
1512         case 0171:
1513             bytes[0] =
1514                 (ins->drexdst << 4) |
1515                 (ins->rex & REX_OC ? 0x08 : 0) |
1516                 (ins->rex & (REX_R|REX_X|REX_B));
1517             ins->rex = 0;
1518             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1519             offset++;
1520             break;
1521
1522         case 0172:
1523             c = *codes++;
1524             opx = &ins->oprs[c >> 3];
1525             bytes[0] = nasm_regvals[opx->basereg] << 4;
1526             opx = &ins->oprs[c & 7];
1527             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1528                 errfunc(ERR_NONFATAL,
1529                         "non-absolute expression not permitted as argument %d",
1530                         c & 7);
1531             } else {
1532                 if (opx->offset & ~15) {
1533                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1534                             "four-bit argument exceeds bounds");
1535                 }
1536                 bytes[0] |= opx->offset & 15;
1537             }
1538             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1539             offset++;
1540             break;
1541
1542         case 0173:
1543             c = *codes++;
1544             opx = &ins->oprs[c >> 4];
1545             bytes[0] = nasm_regvals[opx->basereg] << 4;
1546             bytes[0] |= c & 15;
1547             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1548             offset++;
1549             break;
1550
1551         case 0174:
1552             c = *codes++;
1553             opx = &ins->oprs[c];
1554             bytes[0] = nasm_regvals[opx->basereg] << 4;
1555             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1556             offset++;
1557             break;
1558
1559         case4(0250):
1560             data = opx->offset;
1561             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1562                 (int32_t)data != (int64_t)data) {
1563                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1564                         "signed dword immediate exceeds bounds");
1565             }
1566             if (is_sbyte32(opx)) {
1567                 bytes[0] = data;
1568                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1569                     NO_SEG);
1570                 offset++;
1571             } else {
1572                 out(offset, segment, &data, OUT_ADDRESS, 4,
1573                     opx->segment, opx->wrt);
1574                 offset += 4;
1575             }
1576             break;
1577
1578         case4(0254):
1579             data = opx->offset;
1580             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1581                 (int32_t)data != (int64_t)data) {
1582                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1583                         "signed dword immediate exceeds bounds");
1584             }
1585             out(offset, segment, &data, OUT_ADDRESS, 4,
1586                 opx->segment, opx->wrt);
1587             offset += 4;
1588             break;
1589
1590         case4(0260):
1591         case 0270:
1592             codes += 2;
1593             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1594                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1595                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1596                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1597                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1598                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1599                 offset += 3;
1600             } else {
1601                 bytes[0] = 0xc5;
1602                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1603                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1604                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1605                 offset += 2;
1606             }
1607             break;
1608
1609         case4(0274):
1610         {
1611             uint64_t uv, um;
1612             int s;
1613
1614             if (ins->rex & REX_W)
1615                 s = 64;
1616             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1617                 s = 16;
1618             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1619                 s = 32;
1620             else
1621                 s = bits;
1622
1623             um = (uint64_t)2 << (s-1);
1624             uv = opx->offset;
1625
1626             if (uv > 127 && uv < (uint64_t)-128 &&
1627                 (uv < um-128 || uv > um-1)) {
1628                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1629                         "signed byte value exceeds bounds");
1630             }
1631             if (opx->segment != NO_SEG) {
1632                 data = uv;
1633                 out(offset, segment, &data, OUT_ADDRESS, 1,
1634                     opx->segment, opx->wrt);
1635             } else {
1636                 bytes[0] = uv;
1637                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1638                     NO_SEG);
1639             }
1640             offset += 1;
1641             break;
1642         }
1643
1644         case4(0300):
1645             break;
1646
1647         case 0310:
1648             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1649                 *bytes = 0x67;
1650                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1651                 offset += 1;
1652             } else
1653                 offset += 0;
1654             break;
1655
1656         case 0311:
1657             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1658                 *bytes = 0x67;
1659                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1660                 offset += 1;
1661             } else
1662                 offset += 0;
1663             break;
1664
1665         case 0312:
1666             break;
1667
1668         case 0313:
1669             ins->rex = 0;
1670             break;
1671
1672         case4(0314):
1673             break;
1674
1675         case 0320:
1676             if (bits != 16) {
1677                 *bytes = 0x66;
1678                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1679                 offset += 1;
1680             } else
1681                 offset += 0;
1682             break;
1683
1684         case 0321:
1685             if (bits == 16) {
1686                 *bytes = 0x66;
1687                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1688                 offset += 1;
1689             } else
1690                 offset += 0;
1691             break;
1692
1693         case 0322:
1694         case 0323:
1695             break;
1696
1697         case 0324:
1698             ins->rex |= REX_W;
1699             break;
1700
1701         case 0325:
1702             break;
1703
1704         case 0330:
1705             *bytes = *codes++ ^ condval[ins->condition];
1706             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1707             offset += 1;
1708             break;
1709
1710         case 0331:
1711             break;
1712
1713         case 0332:
1714         case 0333:
1715             *bytes = c - 0332 + 0xF2;
1716             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1717             offset += 1;
1718             break;
1719
1720         case 0334:
1721             if (ins->rex & REX_R) {
1722                 *bytes = 0xF0;
1723                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1724                 offset += 1;
1725             }
1726             ins->rex &= ~(REX_L|REX_R);
1727             break;
1728
1729         case 0335:
1730             break;
1731
1732         case 0336:
1733         case 0337:
1734             break;
1735
1736         case 0340:
1737             if (ins->oprs[0].segment != NO_SEG)
1738                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1739             else {
1740                 int64_t size = ins->oprs[0].offset;
1741                 if (size > 0)
1742                     out(offset, segment, NULL,
1743                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1744                 offset += size;
1745             }
1746             break;
1747
1748         case 0341:
1749             break;
1750
1751         case 0344:
1752         case 0345:
1753             bytes[0] = c & 1;
1754             switch (ins->oprs[0].basereg) {
1755             case R_CS:
1756                 bytes[0] += 0x0E;
1757                 break;
1758             case R_DS:
1759                 bytes[0] += 0x1E;
1760                 break;
1761             case R_ES:
1762                 bytes[0] += 0x06;
1763                 break;
1764             case R_SS:
1765                 bytes[0] += 0x16;
1766                 break;
1767             default:
1768                 errfunc(ERR_PANIC,
1769                         "bizarre 8086 segment register received");
1770             }
1771             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1772             offset++;
1773             break;
1774
1775         case 0346:
1776         case 0347:
1777             bytes[0] = c & 1;
1778             switch (ins->oprs[0].basereg) {
1779             case R_FS:
1780                 bytes[0] += 0xA0;
1781                 break;
1782             case R_GS:
1783                 bytes[0] += 0xA8;
1784                 break;
1785             default:
1786                 errfunc(ERR_PANIC,
1787                         "bizarre 386 segment register received");
1788             }
1789             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1790             offset++;
1791             break;
1792
1793         case 0360:
1794             break;
1795
1796         case 0361:
1797             bytes[0] = 0x66;
1798             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1799             offset += 1;
1800             break;
1801
1802         case 0362:
1803         case 0363:
1804             bytes[0] = c - 0362 + 0xf2;
1805             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1806             offset += 1;
1807             break;
1808
1809         case 0364:
1810         case 0365:
1811             break;
1812
1813         case 0366:
1814         case 0367:
1815             *bytes = c - 0366 + 0x66;
1816             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1817             offset += 1;
1818             break;
1819
1820         case 0370:
1821         case 0371:
1822         case 0372:
1823             break;
1824
1825         case 0373:
1826             *bytes = bits == 16 ? 3 : 5;
1827             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1828             offset += 1;
1829             break;
1830
1831         case4(0100):
1832         case4(0110):
1833         case4(0120):
1834         case4(0130):
1835         case4(0200):
1836         case4(0204):
1837         case4(0210):
1838         case4(0214):
1839         case4(0220):
1840         case4(0224):
1841         case4(0230):
1842         case4(0234):
1843             {
1844                 ea ea_data;
1845                 int rfield;
1846                 int32_t rflags;
1847                 uint8_t *p;
1848                 int32_t s;
1849                 enum out_type type;
1850                 struct operand *opy = &ins->oprs[op2];
1851
1852                 if (c <= 0177) {
1853                     /* pick rfield from operand b (opx) */
1854                     rflags = regflag(opx);
1855                     rfield = nasm_regvals[opx->basereg];
1856                 } else {
1857                     /* rfield is constant */
1858                     rflags = 0;
1859                     rfield = c & 7;
1860                 }
1861
1862                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1863                                 rfield, rflags)) {
1864                     errfunc(ERR_NONFATAL, "invalid effective address");
1865                 }
1866
1867
1868                 p = bytes;
1869                 *p++ = ea_data.modrm;
1870                 if (ea_data.sib_present)
1871                     *p++ = ea_data.sib;
1872
1873                 /* DREX suffixes come between the SIB and the displacement */
1874                 if (ins->rex & REX_D) {
1875                     *p++ = (ins->drexdst << 4) |
1876                            (ins->rex & REX_OC ? 0x08 : 0) |
1877                            (ins->rex & (REX_R|REX_X|REX_B));
1878                     ins->rex = 0;
1879                 }
1880
1881                 s = p - bytes;
1882                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1883
1884                 /*
1885                  * Make sure the address gets the right offset in case
1886                  * the line breaks in the .lst file (BR 1197827)
1887                  */
1888                 offset += s;
1889                 s = 0;
1890
1891                 switch (ea_data.bytes) {
1892                 case 0:
1893                     break;
1894                 case 1:
1895                 case 2:
1896                 case 4:
1897                 case 8:
1898                     data = opy->offset;
1899                     warn_overflow(ea_data.bytes, opy);
1900                     s += ea_data.bytes;
1901                     if (ea_data.rip) {
1902                         if (opy->segment == segment) {
1903                             data -= insn_end;
1904                             out(offset, segment, &data, OUT_ADDRESS,
1905                                 ea_data.bytes, NO_SEG, NO_SEG);
1906                         } else {
1907                             out(offset, segment, &data, OUT_REL4ADR,
1908                                 insn_end - offset, opy->segment, opy->wrt);
1909                         }
1910                     } else {
1911                         type = OUT_ADDRESS;
1912                         out(offset, segment, &data, OUT_ADDRESS,
1913                             ea_data.bytes, opy->segment, opy->wrt);
1914                     }
1915                     break;
1916                 default:
1917                     /* Impossible! */
1918                     errfunc(ERR_PANIC,
1919                             "Invalid amount of bytes (%d) for offset?!",
1920                             ea_data.bytes);
1921                     break;
1922                 }
1923                 offset += s;
1924             }
1925             break;
1926
1927         default:
1928             errfunc(ERR_PANIC, "internal instruction table corrupt"
1929                     ": instruction code \\%o (0x%02X) given", c, c);
1930             break;
1931         }
1932     }
1933 }
1934
1935 static int32_t regflag(const operand * o)
1936 {
1937     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1938         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1939     }
1940     return nasm_reg_flags[o->basereg];
1941 }
1942
1943 static int32_t regval(const operand * o)
1944 {
1945     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1946         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1947     }
1948     return nasm_regvals[o->basereg];
1949 }
1950
1951 static int op_rexflags(const operand * o, int mask)
1952 {
1953     int32_t flags;
1954     int val;
1955
1956     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1957         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1958     }
1959
1960     flags = nasm_reg_flags[o->basereg];
1961     val = nasm_regvals[o->basereg];
1962
1963     return rexflags(val, flags, mask);
1964 }
1965
1966 static int rexflags(int val, int32_t flags, int mask)
1967 {
1968     int rex = 0;
1969
1970     if (val >= 8)
1971         rex |= REX_B|REX_X|REX_R;
1972     if (flags & BITS64)
1973         rex |= REX_W;
1974     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1975         rex |= REX_H;
1976     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1977         rex |= REX_P;
1978
1979     return rex & mask;
1980 }
1981
1982 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1983 {
1984     int i, size[MAX_OPERANDS], asize, oprs, ret;
1985
1986     ret = 100;
1987
1988     /*
1989      * Check the opcode
1990      */
1991     if (itemp->opcode != instruction->opcode)
1992         return 0;
1993
1994     /*
1995      * Count the operands
1996      */
1997     if (itemp->operands != instruction->operands)
1998         return 0;
1999
2000     /*
2001      * Check that no spurious colons or TOs are present
2002      */
2003     for (i = 0; i < itemp->operands; i++)
2004         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2005             return 0;
2006
2007     /*
2008      * Process size flags
2009      */
2010     if (itemp->flags & IF_ARMASK) {
2011         memset(size, 0, sizeof size);
2012
2013         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2014
2015         switch (itemp->flags & IF_SMASK) {
2016         case IF_SB:
2017             size[i] = BITS8;
2018             break;
2019         case IF_SW:
2020             size[i] = BITS16;
2021             break;
2022         case IF_SD:
2023             size[i] = BITS32;
2024             break;
2025         case IF_SQ:
2026             size[i] = BITS64;
2027             break;
2028         case IF_SO:
2029             size[i] = BITS128;
2030             break;
2031         case IF_SY:
2032             size[i] = BITS256;
2033             break;
2034         case IF_SZ:
2035             switch (bits) {
2036             case 16:
2037                 size[i] = BITS16;
2038                 break;
2039             case 32:
2040                 size[i] = BITS32;
2041                 break;
2042             case 64:
2043                 size[i] = BITS64;
2044                 break;
2045             }
2046             break;
2047         default:
2048             break;
2049         }
2050     } else {
2051         asize = 0;
2052         switch (itemp->flags & IF_SMASK) {
2053         case IF_SB:
2054             asize = BITS8;
2055             break;
2056         case IF_SW:
2057             asize = BITS16;
2058             break;
2059         case IF_SD:
2060             asize = BITS32;
2061             break;
2062         case IF_SQ:
2063             asize = BITS64;
2064             break;
2065         case IF_SO:
2066             asize = BITS128;
2067             break;
2068         case IF_SY:
2069             asize = BITS256;
2070             break;
2071         case IF_SZ:
2072             switch (bits) {
2073             case 16:
2074                 asize = BITS16;
2075                 break;
2076             case 32:
2077                 asize = BITS32;
2078                 break;
2079             case 64:
2080                 asize = BITS64;
2081                 break;
2082             }
2083             break;
2084         default:
2085             break;
2086         }
2087         for (i = 0; i < MAX_OPERANDS; i++)
2088             size[i] = asize;
2089     }
2090
2091     /*
2092      * Check that the operand flags all match up
2093      */
2094     for (i = 0; i < itemp->operands; i++) {
2095         int32_t type = instruction->oprs[i].type;
2096         if (!(type & SIZE_MASK))
2097             type |= size[i];
2098
2099         if (itemp->opd[i] & SAME_AS) {
2100             int j = itemp->opd[i] & ~SAME_AS;
2101             if (type != instruction->oprs[j].type ||
2102                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2103                 return 0;
2104         } else if (itemp->opd[i] & ~type ||
2105             ((itemp->opd[i] & SIZE_MASK) &&
2106              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2107             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2108                 (type & SIZE_MASK))
2109                 return 0;
2110             else
2111                 return 1;
2112         }
2113     }
2114
2115     /*
2116      * Check operand sizes
2117      */
2118     if (itemp->flags & (IF_SM | IF_SM2)) {
2119         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2120         asize = 0;
2121         for (i = 0; i < oprs; i++) {
2122             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2123                 int j;
2124                 for (j = 0; j < oprs; j++)
2125                     size[j] = asize;
2126                 break;
2127             }
2128         }
2129     } else {
2130         oprs = itemp->operands;
2131     }
2132
2133     for (i = 0; i < itemp->operands; i++) {
2134         if (!(itemp->opd[i] & SIZE_MASK) &&
2135             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2136             return 2;
2137     }
2138
2139     /*
2140      * Check template is okay at the set cpu level
2141      */
2142     if (((itemp->flags & IF_PLEVEL) > cpu))
2143         return 3;
2144
2145     /*
2146      * Verify the appropriate long mode flag.
2147      */
2148     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2149         return 4;
2150
2151     /*
2152      * Check if special handling needed for Jumps
2153      */
2154     if ((uint8_t)(itemp->code[0]) >= 0370)
2155         return 99;
2156
2157     return ret;
2158 }
2159
2160 static ea *process_ea(operand * input, ea * output, int bits,
2161                       int addrbits, int rfield, int32_t rflags)
2162 {
2163     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2164
2165     output->rip = false;
2166
2167     /* REX flags for the rfield operand */
2168     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2169
2170     if (!(REGISTER & ~input->type)) {   /* register direct */
2171         int i;
2172         int32_t f;
2173
2174         if (input->basereg < EXPR_REG_START /* Verify as Register */
2175             || input->basereg >= REG_ENUM_LIMIT)
2176             return NULL;
2177         f = regflag(input);
2178         i = nasm_regvals[input->basereg];
2179
2180         if (REG_EA & ~f)
2181             return NULL;        /* Invalid EA register */
2182
2183         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2184
2185         output->sib_present = false;             /* no SIB necessary */
2186         output->bytes = 0;  /* no offset necessary either */
2187         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2188     } else {                    /* it's a memory reference */
2189         if (input->basereg == -1
2190             && (input->indexreg == -1 || input->scale == 0)) {
2191             /* it's a pure offset */
2192             if (bits == 64 && (~input->type & IP_REL)) {
2193               int scale, index, base;
2194               output->sib_present = true;
2195               scale = 0;
2196               index = 4;
2197               base = 5;
2198               output->sib = (scale << 6) | (index << 3) | base;
2199               output->bytes = 4;
2200               output->modrm = 4 | ((rfield & 7) << 3);
2201               output->rip = false;
2202             } else {
2203               output->sib_present = false;
2204               output->bytes = (addrbits != 16 ? 4 : 2);
2205               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2206               output->rip = bits == 64;
2207             }
2208         } else {                /* it's an indirection */
2209             int i = input->indexreg, b = input->basereg, s = input->scale;
2210             int32_t o = input->offset, seg = input->segment;
2211             int hb = input->hintbase, ht = input->hinttype;
2212             int t;
2213             int it, bt;
2214             int32_t ix, bx;     /* register flags */
2215
2216             if (s == 0)
2217                 i = -1;         /* make this easy, at least */
2218
2219             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2220                 it = nasm_regvals[i];
2221                 ix = nasm_reg_flags[i];
2222             } else {
2223                 it = -1;
2224                 ix = 0;
2225             }
2226
2227             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2228                 bt = nasm_regvals[b];
2229                 bx = nasm_reg_flags[b];
2230             } else {
2231                 bt = -1;
2232                 bx = 0;
2233             }
2234
2235             /* check for a 32/64-bit memory reference... */
2236             if ((ix|bx) & (BITS32|BITS64)) {
2237                 /* it must be a 32/64-bit memory reference. Firstly we have
2238                  * to check that all registers involved are type E/Rxx. */
2239                 int32_t sok = BITS32|BITS64;
2240
2241                 if (it != -1) {
2242                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2243                         sok &= ix;
2244                     else
2245                         return NULL;
2246                 }
2247
2248                 if (bt != -1) {
2249                     if (REG_GPR & ~bx)
2250                         return NULL; /* Invalid register */
2251                     if (~sok & bx & SIZE_MASK)
2252                         return NULL; /* Invalid size */
2253                     sok &= bx;
2254                 }
2255
2256                 /* While we're here, ensure the user didn't specify
2257                    WORD or QWORD. */
2258                 if (input->disp_size == 16 || input->disp_size == 64)
2259                     return NULL;
2260
2261                 if (addrbits == 16 ||
2262                     (addrbits == 32 && !(sok & BITS32)) ||
2263                     (addrbits == 64 && !(sok & BITS64)))
2264                     return NULL;
2265
2266                 /* now reorganize base/index */
2267                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2268                     ((hb == b && ht == EAH_NOTBASE)
2269                      || (hb == i && ht == EAH_MAKEBASE))) {
2270                     /* swap if hints say so */
2271                     t = bt, bt = it, it = t;
2272                     t = bx, bx = ix, ix = t;
2273                 }
2274                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2275                     bt = -1, bx = 0, s++;
2276                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2277                     /* make single reg base, unless hint */
2278                     bt = it, bx = ix, it = -1, ix = 0;
2279                 }
2280                 if (((s == 2 && it != REG_NUM_ESP
2281                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2282                      || s == 5 || s == 9) && bt == -1)
2283                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2284                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2285                     && (input->eaflags & EAF_TIMESTWO))
2286                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2287                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2288                 if (s == 1 && it == REG_NUM_ESP) {
2289                     /* swap ESP into base if scale is 1 */
2290                     t = it, it = bt, bt = t;
2291                     t = ix, ix = bx, bx = t;
2292                 }
2293                 if (it == REG_NUM_ESP
2294                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2295                     return NULL;        /* wrong, for various reasons */
2296
2297                 output->rex |= rexflags(it, ix, REX_X);
2298                 output->rex |= rexflags(bt, bx, REX_B);
2299
2300                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2301                     /* no SIB needed */
2302                     int mod, rm;
2303
2304                     if (bt == -1) {
2305                         rm = 5;
2306                         mod = 0;
2307                     } else {
2308                         rm = (bt & 7);
2309                         if (rm != REG_NUM_EBP && o == 0 &&
2310                                 seg == NO_SEG && !forw_ref &&
2311                                 !(input->eaflags &
2312                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2313                             mod = 0;
2314                         else if (input->eaflags & EAF_BYTEOFFS ||
2315                                  (o >= -128 && o <= 127 && seg == NO_SEG
2316                                   && !forw_ref
2317                                   && !(input->eaflags & EAF_WORDOFFS)))
2318                             mod = 1;
2319                         else
2320                             mod = 2;
2321                     }
2322
2323                     output->sib_present = false;
2324                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2325                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2326                 } else {
2327                     /* we need a SIB */
2328                     int mod, scale, index, base;
2329
2330                     if (it == -1)
2331                         index = 4, s = 1;
2332                     else
2333                         index = (it & 7);
2334
2335                     switch (s) {
2336                     case 1:
2337                         scale = 0;
2338                         break;
2339                     case 2:
2340                         scale = 1;
2341                         break;
2342                     case 4:
2343                         scale = 2;
2344                         break;
2345                     case 8:
2346                         scale = 3;
2347                         break;
2348                     default:   /* then what the smeg is it? */
2349                         return NULL;    /* panic */
2350                     }
2351
2352                     if (bt == -1) {
2353                         base = 5;
2354                         mod = 0;
2355                     } else {
2356                         base = (bt & 7);
2357                         if (base != REG_NUM_EBP && o == 0 &&
2358                                     seg == NO_SEG && !forw_ref &&
2359                                     !(input->eaflags &
2360                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2361                             mod = 0;
2362                         else if (input->eaflags & EAF_BYTEOFFS ||
2363                                  (o >= -128 && o <= 127 && seg == NO_SEG
2364                                   && !forw_ref
2365                                   && !(input->eaflags & EAF_WORDOFFS)))
2366                             mod = 1;
2367                         else
2368                             mod = 2;
2369                     }
2370
2371                     output->sib_present = true;
2372                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2373                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2374                     output->sib = (scale << 6) | (index << 3) | base;
2375                 }
2376             } else {            /* it's 16-bit */
2377                 int mod, rm;
2378
2379                 /* check for 64-bit long mode */
2380                 if (addrbits == 64)
2381                     return NULL;
2382
2383                 /* check all registers are BX, BP, SI or DI */
2384                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2385                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2386                                        && i != R_SI && i != R_DI))
2387                     return NULL;
2388
2389                 /* ensure the user didn't specify DWORD/QWORD */
2390                 if (input->disp_size == 32 || input->disp_size == 64)
2391                     return NULL;
2392
2393                 if (s != 1 && i != -1)
2394                     return NULL;        /* no can do, in 16-bit EA */
2395                 if (b == -1 && i != -1) {
2396                     int tmp = b;
2397                     b = i;
2398                     i = tmp;
2399                 }               /* swap */
2400                 if ((b == R_SI || b == R_DI) && i != -1) {
2401                     int tmp = b;
2402                     b = i;
2403                     i = tmp;
2404                 }
2405                 /* have BX/BP as base, SI/DI index */
2406                 if (b == i)
2407                     return NULL;        /* shouldn't ever happen, in theory */
2408                 if (i != -1 && b != -1 &&
2409                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2410                     return NULL;        /* invalid combinations */
2411                 if (b == -1)    /* pure offset: handled above */
2412                     return NULL;        /* so if it gets to here, panic! */
2413
2414                 rm = -1;
2415                 if (i != -1)
2416                     switch (i * 256 + b) {
2417                     case R_SI * 256 + R_BX:
2418                         rm = 0;
2419                         break;
2420                     case R_DI * 256 + R_BX:
2421                         rm = 1;
2422                         break;
2423                     case R_SI * 256 + R_BP:
2424                         rm = 2;
2425                         break;
2426                     case R_DI * 256 + R_BP:
2427                         rm = 3;
2428                         break;
2429                 } else
2430                     switch (b) {
2431                     case R_SI:
2432                         rm = 4;
2433                         break;
2434                     case R_DI:
2435                         rm = 5;
2436                         break;
2437                     case R_BP:
2438                         rm = 6;
2439                         break;
2440                     case R_BX:
2441                         rm = 7;
2442                         break;
2443                     }
2444                 if (rm == -1)   /* can't happen, in theory */
2445                     return NULL;        /* so panic if it does */
2446
2447                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2448                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2449                     mod = 0;
2450                 else if (input->eaflags & EAF_BYTEOFFS ||
2451                          (o >= -128 && o <= 127 && seg == NO_SEG
2452                           && !forw_ref
2453                           && !(input->eaflags & EAF_WORDOFFS)))
2454                     mod = 1;
2455                 else
2456                     mod = 2;
2457
2458                 output->sib_present = false;    /* no SIB - it's 16-bit */
2459                 output->bytes = mod;    /* bytes of offset needed */
2460                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2461             }
2462         }
2463     }
2464
2465     output->size = 1 + output->sib_present + output->bytes;
2466     return output;
2467 }
2468
2469 static void add_asp(insn *ins, int addrbits)
2470 {
2471     int j, valid;
2472     int defdisp;
2473
2474     valid = (addrbits == 64) ? 64|32 : 32|16;
2475
2476     switch (ins->prefixes[PPS_ASIZE]) {
2477     case P_A16:
2478         valid &= 16;
2479         break;
2480     case P_A32:
2481         valid &= 32;
2482         break;
2483     case P_A64:
2484         valid &= 64;
2485         break;
2486     case P_ASP:
2487         valid &= (addrbits == 32) ? 16 : 32;
2488         break;
2489     default:
2490         break;
2491     }
2492
2493     for (j = 0; j < ins->operands; j++) {
2494         if (!(MEMORY & ~ins->oprs[j].type)) {
2495             int32_t i, b;
2496
2497             /* Verify as Register */
2498             if (ins->oprs[j].indexreg < EXPR_REG_START
2499                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2500                 i = 0;
2501             else
2502                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2503
2504             /* Verify as Register */
2505             if (ins->oprs[j].basereg < EXPR_REG_START
2506                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2507                 b = 0;
2508             else
2509                 b = nasm_reg_flags[ins->oprs[j].basereg];
2510
2511             if (ins->oprs[j].scale == 0)
2512                 i = 0;
2513
2514             if (!i && !b) {
2515                 int ds = ins->oprs[j].disp_size;
2516                 if ((addrbits != 64 && ds > 8) ||
2517                     (addrbits == 64 && ds == 16))
2518                     valid &= ds;
2519             } else {
2520                 if (!(REG16 & ~b))
2521                     valid &= 16;
2522                 if (!(REG32 & ~b))
2523                     valid &= 32;
2524                 if (!(REG64 & ~b))
2525                     valid &= 64;
2526
2527                 if (!(REG16 & ~i))
2528                     valid &= 16;
2529                 if (!(REG32 & ~i))
2530                     valid &= 32;
2531                 if (!(REG64 & ~i))
2532                     valid &= 64;
2533             }
2534         }
2535     }
2536
2537     if (valid & addrbits) {
2538         ins->addr_size = addrbits;
2539     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2540         /* Add an address size prefix */
2541         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2542         ins->prefixes[PPS_ASIZE] = pref;
2543         ins->addr_size = (addrbits == 32) ? 16 : 32;
2544     } else {
2545         /* Impossible... */
2546         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2547         ins->addr_size = addrbits; /* Error recovery */
2548     }
2549
2550     defdisp = ins->addr_size == 16 ? 16 : 32;
2551
2552     for (j = 0; j < ins->operands; j++) {
2553         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2554             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2555             != ins->addr_size) {
2556             /* mem_offs sizes must match the address size; if not,
2557                strip the MEM_OFFS bit and match only EA instructions */
2558             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2559         }
2560     }
2561 }