assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 wwl lpp
  96  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  97  *                 [l1]  ll = 1 for L = 1 (.256)
  98  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  99  *
 100  *                 [w0]  ww = 0 for W = 0
 101  *                 [w1 ] ww = 1 for W = 1
 102  *                 [wig] ww = 2 for W don't care (always assembled as 0)
 103  *                 [ww]  ww = 3 for W used as REX.W
 104  *
 105  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 106  *
 107  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 108  *                 which is to be extended to the operand size.
 109  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 110  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 111  * \312          - (disassembler only) invalid with non-default address size.
 112  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 113  * \314          - (disassembler only) invalid with REX.B
 114  * \315          - (disassembler only) invalid with REX.X
 115  * \316          - (disassembler only) invalid with REX.R
 116  * \317          - (disassembler only) invalid with REX.W
 117  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 118  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 119  * \322          - indicates that this instruction is only valid when the
 120  *                 operand size is the default (instruction to disassembler,
 121  *                 generates no code in the assembler)
 122  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 123  * \324          - indicates 64-bit operand size requiring REX prefix.
 124  * \325          - instruction which always uses spl/bpl/sil/dil
 125  * \330          - a literal byte follows in the code stream, to be added
 126  *                 to the condition code value of the instruction.
 127  * \331          - instruction not valid with REP prefix.  Hint for
 128  *                 disassembler only; for SSE instructions.
 129  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 130  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 131  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 132  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 133  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 134  * \337          - force a REPNE prefix (0xF3) even if not specified.
 135  *                 \336-\337 are still listed as prefixes in the disassembler.
 136  * \340          - reserve <operand 0> bytes of uninitialized storage.
 137  *                 Operand 0 had better be a segmentless constant.
 138  * \341          - this instruction needs a WAIT "prefix"
 139  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 140  *                 (POP is never used for CS) depending on operand 0
 141  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 142  *                 on operand 0
 143  * \360          - no SSE prefix (== \364\331)
 144  * \361          - 66 SSE prefix (== \366\331)
 145  * \362          - F2 SSE prefix (== \364\332)
 146  * \363          - F3 SSE prefix (== \364\333)
 147  * \364          - operand-size prefix (0x66) not permitted
 148  * \365          - address-size prefix (0x67) not permitted
 149  * \366          - operand-size prefix (0x66) used as opcode extension
 150  * \367          - address-size prefix (0x67) used as opcode extension
 151  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 152  *                 370 is used for Jcc, 371 is used for JMP.
 153  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 154  *                 used for conditional jump over longer jump
 155  */
 156
 157 #include "compiler.h"
 158
 159 #include <stdio.h>
 160 #include <string.h>
 161 #include <inttypes.h>
 162
 163 #include "nasm.h"
 164 #include "nasmlib.h"
 165 #include "assemble.h"
 166 #include "insns.h"
 167 #include "tables.h"
 168
 169 enum match_result {
 170     /*
 171      * Matching errors.  These should be sorted so that more specific
 172      * errors come later in the sequence.
 173      */
 174     MERR_INVALOP,
 175     MERR_OPSIZEMISSING,
 176     MERR_OPSIZEMISMATCH,
 177     MERR_BADCPU,
 178     MERR_BADMODE,
 179     /*
 180      * Matching success; the conditional ones first
 181      */
 182     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 183     MOK_GOOD    /* Matching unconditionally OK */
 184 };
 185
 186 typedef struct {
 187     int sib_present;                 /* is a SIB byte necessary? */
 188     int bytes;                       /* # of bytes of offset needed */
 189     int size;                        /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 191 } ea;
 192
 193 static uint32_t cpu;            /* cpu level received from nasm.c */
 194 static efunc errfunc;
 195 static struct ofmt *outfmt;
 196 static ListGen *list;
 197
 198 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 199 static void gencode(int32_t segment, int64_t offset, int bits,
 200                     insn * ins, const struct itemplate *temp,
 201                     int64_t insn_end);
 202 static enum match_result find_match(const struct itemplate **tempp,
 203                                     insn *instruction,
 204                                     int32_t segment, int64_t offset, int bits);
 205 static enum match_result matches(const struct itemplate *, insn *, int bits);
 206 static opflags_t regflag(const operand *);
 207 static int32_t regval(const operand *);
 208 static int rexflags(int, opflags_t, int);
 209 static int op_rexflags(const operand *, int);
 210 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 211 static void add_asp(insn *, int);
 212
 213 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 214 {
 215     return ins->prefixes[pos] == prefix;
 216 }
 217
 218 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 219 {
 220     if (ins->prefixes[pos])
 221         errfunc(ERR_NONFATAL, "invalid %s prefix",
 222                 prefix_name(ins->prefixes[pos]));
 223 }
 224
 225 static const char *size_name(int size)
 226 {
 227     switch (size) {
 228     case 1:
 229         return "byte";
 230     case 2:
 231         return "word";
 232     case 4:
 233         return "dword";
 234     case 8:
 235         return "qword";
 236     case 10:
 237         return "tword";
 238     case 16:
 239         return "oword";
 240     case 32:
 241         return "yword";
 242     default:
 243         return "???";
 244     }
 245 }
 246
 247 static void warn_overflow(int pass, int size)
 248 {
 249     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 250             "%s data exceeds bounds", size_name(size));
 251 }
 252
 253 static void warn_overflow_const(int64_t data, int size)
 254 {
 255     if (overflow_general(data, size))
 256         warn_overflow(ERR_PASS1, size);
 257 }
 258
 259 static void warn_overflow_opd(const struct operand *o, int size)
 260 {
 261     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 262         if (overflow_general(o->offset, size))
 263             warn_overflow(ERR_PASS2, size);
 264     }
 265 }
 266
 267 /*
 268  * This routine wrappers the real output format's output routine,
 269  * in order to pass a copy of the data off to the listing file
 270  * generator at the same time.
 271  */
 272 static void out(int64_t offset, int32_t segto, const void *data,
 273                 enum out_type type, uint64_t size,
 274                 int32_t segment, int32_t wrt)
 275 {
 276     static int32_t lineno = 0;     /* static!!! */
 277     static char *lnfname = NULL;
 278     uint8_t p[8];
 279
 280     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 281         /*
 282          * This is a non-relocated address, and we're going to
 283          * convert it into RAWDATA format.
 284          */
 285         uint8_t *q = p;
 286
 287         if (size > 8) {
 288             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 289             return;
 290         }
 291
 292         WRITEADDR(q, *(int64_t *)data, size);
 293         data = p;
 294         type = OUT_RAWDATA;
 295     }
 296
 297     list->output(offset, data, type, size);
 298
 299     /*
 300      * this call to src_get determines when we call the
 301      * debug-format-specific "linenum" function
 302      * it updates lineno and lnfname to the current values
 303      * returning 0 if "same as last time", -2 if lnfname
 304      * changed, and the amount by which lineno changed,
 305      * if it did. thus, these variables must be static
 306      */
 307
 308     if (src_get(&lineno, &lnfname))
 309         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 310
 311     outfmt->output(segto, data, type, size, segment, wrt);
 312 }
 313
 314 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 315                      insn * ins, const uint8_t *code)
 316 {
 317     int64_t isize;
 318     uint8_t c = code[0];
 319
 320     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 321         return false;
 322     if (!optimizing)
 323         return false;
 324     if (optimizing < 0 && c == 0371)
 325         return false;
 326
 327     isize = calcsize(segment, offset, bits, ins, code);
 328
 329     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 330         /* Be optimistic in pass 1 */
 331         return true;
 332
 333     if (ins->oprs[0].segment != segment)
 334         return false;
 335
 336     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 337     return (isize >= -128 && isize <= 127); /* is it byte size? */
 338 }
 339
 340 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 341                  insn * instruction, struct ofmt *output, efunc error,
 342                  ListGen * listgen)
 343 {
 344     const struct itemplate *temp;
 345     int j;
 346     enum match_result m;
 347     int64_t insn_end;
 348     int32_t itimes;
 349     int64_t start = offset;
 350     int64_t wsize;              /* size for DB etc. */
 351
 352     errfunc = error;            /* to pass to other functions */
 353     cpu = cp;
 354     outfmt = output;            /* likewise */
 355     list = listgen;             /* and again */
 356
 357     wsize = idata_bytes(instruction->opcode);
 358     if (wsize == -1)
 359         return 0;
 360
 361     if (wsize) {
 362         extop *e;
 363         int32_t t = instruction->times;
 364         if (t < 0)
 365             errfunc(ERR_PANIC,
 366                     "instruction->times < 0 (%ld) in assemble()", t);
 367
 368         while (t--) {           /* repeat TIMES times */
 369             list_for_each(e, instruction->eops) {
 370                 if (e->type == EOT_DB_NUMBER) {
 371                     if (wsize > 8) {
 372                         errfunc(ERR_NONFATAL,
 373                                 "integer supplied to a DT, DO or DY"
 374                                 " instruction");
 375                     } else {
 376                         out(offset, segment, &e->offset,
 377                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 378                         offset += wsize;
 379                     }
 380                 } else if (e->type == EOT_DB_STRING ||
 381                            e->type == EOT_DB_STRING_FREE) {
 382                     int align;
 383
 384                     out(offset, segment, e->stringval,
 385                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 386                     align = e->stringlen % wsize;
 387
 388                     if (align) {
 389                         align = wsize - align;
 390                         out(offset, segment, zero_buffer,
 391                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 392                     }
 393                     offset += e->stringlen + align;
 394                 }
 395             }
 396             if (t > 0 && t == instruction->times - 1) {
 397                 /*
 398                  * Dummy call to list->output to give the offset to the
 399                  * listing module.
 400                  */
 401                 list->output(offset, NULL, OUT_RAWDATA, 0);
 402                 list->uplevel(LIST_TIMES);
 403             }
 404         }
 405         if (instruction->times > 1)
 406             list->downlevel(LIST_TIMES);
 407         return offset - start;
 408     }
 409
 410     if (instruction->opcode == I_INCBIN) {
 411         const char *fname = instruction->eops->stringval;
 412         FILE *fp;
 413
 414         fp = fopen(fname, "rb");
 415         if (!fp) {
 416             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 417                   fname);
 418         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 419             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 420                   fname);
 421         } else {
 422             static char buf[4096];
 423             size_t t = instruction->times;
 424             size_t base = 0;
 425             size_t len;
 426
 427             len = ftell(fp);
 428             if (instruction->eops->next) {
 429                 base = instruction->eops->next->offset;
 430                 len -= base;
 431                 if (instruction->eops->next->next &&
 432                     len > (size_t)instruction->eops->next->next->offset)
 433                     len = (size_t)instruction->eops->next->next->offset;
 434             }
 435             /*
 436              * Dummy call to list->output to give the offset to the
 437              * listing module.
 438              */
 439             list->output(offset, NULL, OUT_RAWDATA, 0);
 440             list->uplevel(LIST_INCBIN);
 441             while (t--) {
 442                 size_t l;
 443
 444                 fseek(fp, base, SEEK_SET);
 445                 l = len;
 446                 while (l > 0) {
 447                     int32_t m;
 448                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 449                     if (!m) {
 450                         /*
 451                          * This shouldn't happen unless the file
 452                          * actually changes while we are reading
 453                          * it.
 454                          */
 455                         error(ERR_NONFATAL,
 456                               "`incbin': unexpected EOF while"
 457                               " reading file `%s'", fname);
 458                         t = 0;  /* Try to exit cleanly */
 459                         break;
 460                     }
 461                     out(offset, segment, buf, OUT_RAWDATA, m,
 462                         NO_SEG, NO_SEG);
 463                     l -= m;
 464                 }
 465             }
 466             list->downlevel(LIST_INCBIN);
 467             if (instruction->times > 1) {
 468                 /*
 469                  * Dummy call to list->output to give the offset to the
 470                  * listing module.
 471                  */
 472                 list->output(offset, NULL, OUT_RAWDATA, 0);
 473                 list->uplevel(LIST_TIMES);
 474                 list->downlevel(LIST_TIMES);
 475             }
 476             fclose(fp);
 477             return instruction->times * len;
 478         }
 479         return 0;               /* if we're here, there's an error */
 480     }
 481
 482     /* Check to see if we need an address-size prefix */
 483     add_asp(instruction, bits);
 484
 485     m = find_match(&temp, instruction, segment, offset, bits);
 486
 487     if (m == MOK_GOOD) {
 488         /* Matches! */
 489         int64_t insn_size = calcsize(segment, offset, bits,
 490                                      instruction, temp->code);
 491         itimes = instruction->times;
 492         if (insn_size < 0)  /* shouldn't be, on pass two */
 493             error(ERR_PANIC, "errors made it through from pass one");
 494         else
 495             while (itimes--) {
 496                 for (j = 0; j < MAXPREFIX; j++) {
 497                     uint8_t c = 0;
 498                     switch (instruction->prefixes[j]) {
 499                     case P_WAIT:
 500                         c = 0x9B;
 501                         break;
 502                     case P_LOCK:
 503                         c = 0xF0;
 504                         break;
 505                     case P_REPNE:
 506                     case P_REPNZ:
 507                         c = 0xF2;
 508                         break;
 509                     case P_REPE:
 510                     case P_REPZ:
 511                     case P_REP:
 512                         c = 0xF3;
 513                         break;
 514                     case R_CS:
 515                         if (bits == 64) {
 516                             error(ERR_WARNING | ERR_PASS2,
 517                                   "cs segment base generated, but will be ignored in 64-bit mode");
 518                         }
 519                         c = 0x2E;
 520                         break;
 521                     case R_DS:
 522                         if (bits == 64) {
 523                             error(ERR_WARNING | ERR_PASS2,
 524                                   "ds segment base generated, but will be ignored in 64-bit mode");
 525                         }
 526                         c = 0x3E;
 527                         break;
 528                     case R_ES:
 529                         if (bits == 64) {
 530                             error(ERR_WARNING | ERR_PASS2,
 531                                   "es segment base generated, but will be ignored in 64-bit mode");
 532                         }
 533                         c = 0x26;
 534                         break;
 535                     case R_FS:
 536                         c = 0x64;
 537                         break;
 538                     case R_GS:
 539                         c = 0x65;
 540                         break;
 541                     case R_SS:
 542                         if (bits == 64) {
 543                             error(ERR_WARNING | ERR_PASS2,
 544                                   "ss segment base generated, but will be ignored in 64-bit mode");
 545                         }
 546                         c = 0x36;
 547                         break;
 548                     case R_SEGR6:
 549                     case R_SEGR7:
 550                         error(ERR_NONFATAL,
 551                               "segr6 and segr7 cannot be used as prefixes");
 552                         break;
 553                     case P_A16:
 554                         if (bits == 64) {
 555                             error(ERR_NONFATAL,
 556                                   "16-bit addressing is not supported "
 557                                   "in 64-bit mode");
 558                         } else if (bits != 16)
 559                             c = 0x67;
 560                         break;
 561                     case P_A32:
 562                         if (bits != 32)
 563                             c = 0x67;
 564                         break;
 565                     case P_A64:
 566                         if (bits != 64) {
 567                             error(ERR_NONFATAL,
 568                                   "64-bit addressing is only supported "
 569                                   "in 64-bit mode");
 570                         }
 571                         break;
 572                     case P_ASP:
 573                         c = 0x67;
 574                         break;
 575                     case P_O16:
 576                         if (bits != 16)
 577                             c = 0x66;
 578                         break;
 579                     case P_O32:
 580                         if (bits == 16)
 581                             c = 0x66;
 582                         break;
 583                     case P_O64:
 584                         /* REX.W */
 585                         break;
 586                     case P_OSP:
 587                         c = 0x66;
 588                         break;
 589                     case P_none:
 590                         break;
 591                     default:
 592                         error(ERR_PANIC, "invalid instruction prefix");
 593                     }
 594                     if (c != 0) {
 595                         out(offset, segment, &c, OUT_RAWDATA, 1,
 596                             NO_SEG, NO_SEG);
 597                         offset++;
 598                     }
 599                 }
 600                 insn_end = offset + insn_size;
 601                 gencode(segment, offset, bits, instruction,
 602                         temp, insn_end);
 603                 offset += insn_size;
 604                 if (itimes > 0 && itimes == instruction->times - 1) {
 605                     /*
 606                      * Dummy call to list->output to give the offset to the
 607                      * listing module.
 608                      */
 609                     list->output(offset, NULL, OUT_RAWDATA, 0);
 610                     list->uplevel(LIST_TIMES);
 611                 }
 612             }
 613         if (instruction->times > 1)
 614             list->downlevel(LIST_TIMES);
 615         return offset - start;
 616     } else {
 617         /* No match */
 618         switch (m) {
 619         case MERR_OPSIZEMISSING:
 620             error(ERR_NONFATAL, "operation size not specified");
 621             break;
 622         case MERR_OPSIZEMISMATCH:
 623             error(ERR_NONFATAL, "mismatch in operand sizes");
 624             break;
 625         case MERR_BADCPU:
 626             error(ERR_NONFATAL, "no instruction for this cpu level");
 627             break;
 628         case MERR_BADMODE:
 629             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 630                   bits);
 631             break;
 632         default:
 633             error(ERR_NONFATAL,
 634                   "invalid combination of opcode and operands");
 635             break;
 636         }
 637     }
 638     return 0;
 639 }
 640
 641 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 642                   insn * instruction, efunc error)
 643 {
 644     const struct itemplate *temp;
 645     enum match_result m;
 646
 647     errfunc = error;            /* to pass to other functions */
 648     cpu = cp;
 649
 650     if (instruction->opcode == I_none)
 651         return 0;
 652
 653     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 654         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 655         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 656         instruction->opcode == I_DY) {
 657         extop *e;
 658         int32_t isize, osize, wsize;
 659
 660         isize = 0;
 661         wsize = idata_bytes(instruction->opcode);
 662
 663         list_for_each(e, instruction->eops) {
 664             int32_t align;
 665
 666             osize = 0;
 667             if (e->type == EOT_DB_NUMBER) {
 668                 osize = 1;
 669                 warn_overflow_const(e->offset, wsize);
 670             } else if (e->type == EOT_DB_STRING ||
 671                        e->type == EOT_DB_STRING_FREE)
 672                 osize = e->stringlen;
 673
 674             align = (-osize) % wsize;
 675             if (align < 0)
 676                 align += wsize;
 677             isize += osize + align;
 678         }
 679         return isize * instruction->times;
 680     }
 681
 682     if (instruction->opcode == I_INCBIN) {
 683         const char *fname = instruction->eops->stringval;
 684         FILE *fp;
 685         int64_t val = 0;
 686         size_t len;
 687
 688         fp = fopen(fname, "rb");
 689         if (!fp)
 690             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 691                   fname);
 692         else if (fseek(fp, 0L, SEEK_END) < 0)
 693             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 694                   fname);
 695         else {
 696             len = ftell(fp);
 697             if (instruction->eops->next) {
 698                 len -= instruction->eops->next->offset;
 699                 if (instruction->eops->next->next &&
 700                     len > (size_t)instruction->eops->next->next->offset) {
 701                     len = (size_t)instruction->eops->next->next->offset;
 702                 }
 703             }
 704             val = instruction->times * len;
 705         }
 706         if (fp)
 707             fclose(fp);
 708         return val;
 709     }
 710
 711     /* Check to see if we need an address-size prefix */
 712     add_asp(instruction, bits);
 713
 714     m = find_match(&temp, instruction, segment, offset, bits);
 715     if (m == MOK_GOOD) {
 716         /* we've matched an instruction. */
 717         int64_t isize;
 718         const uint8_t *codes = temp->code;
 719         int j;
 720
 721         isize = calcsize(segment, offset, bits, instruction, codes);
 722         if (isize < 0)
 723             return -1;
 724         for (j = 0; j < MAXPREFIX; j++) {
 725             switch (instruction->prefixes[j]) {
 726             case P_A16:
 727                 if (bits != 16)
 728                     isize++;
 729                 break;
 730             case P_A32:
 731                 if (bits != 32)
 732                     isize++;
 733                 break;
 734             case P_O16:
 735                 if (bits != 16)
 736                     isize++;
 737                 break;
 738             case P_O32:
 739                 if (bits == 16)
 740                     isize++;
 741                 break;
 742             case P_A64:
 743             case P_O64:
 744             case P_none:
 745                 break;
 746             default:
 747                 isize++;
 748                 break;
 749             }
 750         }
 751         return isize * instruction->times;
 752     } else {
 753         return -1;                  /* didn't match any instruction */
 754     }
 755 }
 756
 757 static bool possible_sbyte(operand *o)
 758 {
 759     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 760         !(o->opflags & OPFLAG_UNKNOWN) &&
 761         optimizing >= 0 && !(o->type & STRICT);
 762 }
 763
 764 /* check that opn[op]  is a signed byte of size 16 or 32 */
 765 static bool is_sbyte16(operand *o)
 766 {
 767     int16_t v;
 768
 769     if (!possible_sbyte(o))
 770         return false;
 771
 772     v = o->offset;
 773     return v >= -128 && v <= 127;
 774 }
 775
 776 static bool is_sbyte32(operand *o)
 777 {
 778     int32_t v;
 779
 780     if (!possible_sbyte(o))
 781         return false;
 782
 783     v = o->offset;
 784     return v >= -128 && v <= 127;
 785 }
 786
 787 /* Common construct */
 788 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 789
 790 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 791                         insn * ins, const uint8_t *codes)
 792 {
 793     int64_t length = 0;
 794     uint8_t c;
 795     int rex_mask = ~0;
 796     int op1, op2;
 797     struct operand *opx;
 798     uint8_t opex = 0;
 799
 800     ins->rex = 0;               /* Ensure REX is reset */
 801
 802     if (ins->prefixes[PPS_OSIZE] == P_O64)
 803         ins->rex |= REX_W;
 804
 805     (void)segment;              /* Don't warn that this parameter is unused */
 806     (void)offset;               /* Don't warn that this parameter is unused */
 807
 808     while (*codes) {
 809         c = *codes++;
 810         op1 = (c & 3) + ((opex & 1) << 2);
 811         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 812         opx = &ins->oprs[op1];
 813         opex = 0;               /* For the next iteration */
 814
 815         switch (c) {
 816         case 01:
 817         case 02:
 818         case 03:
 819         case 04:
 820             codes += c, length += c;
 821             break;
 822
 823         case 05:
 824         case 06:
 825         case 07:
 826             opex = c;
 827             break;
 828
 829         case4(010):
 830             ins->rex |=
 831                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 832             codes++, length++;
 833             break;
 834
 835         case4(014):
 836         case4(020):
 837         case4(024):
 838             length++;
 839             break;
 840
 841         case4(030):
 842             length += 2;
 843             break;
 844
 845         case4(034):
 846             if (opx->type & (BITS16 | BITS32 | BITS64))
 847                 length += (opx->type & BITS16) ? 2 : 4;
 848             else
 849                 length += (bits == 16) ? 2 : 4;
 850             break;
 851
 852         case4(040):
 853             length += 4;
 854             break;
 855
 856         case4(044):
 857             length += ins->addr_size >> 3;
 858             break;
 859
 860         case4(050):
 861             length++;
 862             break;
 863
 864         case4(054):
 865             length += 8; /* MOV reg64/imm */
 866             break;
 867
 868         case4(060):
 869             length += 2;
 870             break;
 871
 872         case4(064):
 873             if (opx->type & (BITS16 | BITS32 | BITS64))
 874                 length += (opx->type & BITS16) ? 2 : 4;
 875             else
 876                 length += (bits == 16) ? 2 : 4;
 877             break;
 878
 879         case4(070):
 880             length += 4;
 881             break;
 882
 883         case4(074):
 884             length += 2;
 885             break;
 886
 887         case4(0140):
 888             length += is_sbyte16(opx) ? 1 : 2;
 889             break;
 890
 891         case4(0144):
 892             codes++;
 893             length++;
 894             break;
 895
 896         case4(0150):
 897             length += is_sbyte32(opx) ? 1 : 4;
 898             break;
 899
 900         case4(0154):
 901             codes++;
 902             length++;
 903             break;
 904
 905         case4(0160):
 906             length++;
 907             ins->rex |= REX_D;
 908             ins->drexdst = regval(opx);
 909             break;
 910
 911         case4(0164):
 912             length++;
 913             ins->rex |= REX_D|REX_OC;
 914             ins->drexdst = regval(opx);
 915             break;
 916
 917         case 0171:
 918             break;
 919
 920         case 0172:
 921         case 0173:
 922         case 0174:
 923             codes++;
 924             length++;
 925             break;
 926
 927         case4(0250):
 928             length += is_sbyte32(opx) ? 1 : 4;
 929             break;
 930
 931         case4(0254):
 932             length += 4;
 933             break;
 934
 935         case4(0260):
 936             ins->rex |= REX_V;
 937             ins->drexdst = regval(opx);
 938             ins->vex_cm = *codes++;
 939             ins->vex_wlp = *codes++;
 940             break;
 941
 942         case 0270:
 943             ins->rex |= REX_V;
 944             ins->drexdst = 0;
 945             ins->vex_cm = *codes++;
 946             ins->vex_wlp = *codes++;
 947             break;
 948
 949         case4(0274):
 950             length++;
 951             break;
 952
 953         case4(0300):
 954             break;
 955
 956         case 0310:
 957             if (bits == 64)
 958                 return -1;
 959             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 960             break;
 961
 962         case 0311:
 963             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 964             break;
 965
 966         case 0312:
 967             break;
 968
 969         case 0313:
 970             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 971                 has_prefix(ins, PPS_ASIZE, P_A32))
 972                 return -1;
 973             break;
 974
 975         case4(0314):
 976             break;
 977
 978         case 0320:
 979             length += (bits != 16);
 980             break;
 981
 982         case 0321:
 983             length += (bits == 16);
 984             break;
 985
 986         case 0322:
 987             break;
 988
 989         case 0323:
 990             rex_mask &= ~REX_W;
 991             break;
 992
 993         case 0324:
 994             ins->rex |= REX_W;
 995             break;
 996
 997         case 0325:
 998             ins->rex |= REX_NH;
 999             break;
1000
1001         case 0330:
1002             codes++, length++;
1003             break;
1004
1005         case 0331:
1006             break;
1007
1008         case 0332:
1009         case 0333:
1010             length++;
1011             break;
1012
1013         case 0334:
1014             ins->rex |= REX_L;
1015             break;
1016
1017         case 0335:
1018             break;
1019
1020         case 0336:
1021             if (!ins->prefixes[PPS_LREP])
1022                 ins->prefixes[PPS_LREP] = P_REP;
1023             break;
1024
1025         case 0337:
1026             if (!ins->prefixes[PPS_LREP])
1027                 ins->prefixes[PPS_LREP] = P_REPNE;
1028             break;
1029
1030         case 0340:
1031             if (ins->oprs[0].segment != NO_SEG)
1032                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1033                         " quantity of BSS space");
1034             else
1035                 length += ins->oprs[0].offset;
1036             break;
1037
1038         case 0341:
1039             if (!ins->prefixes[PPS_WAIT])
1040                 ins->prefixes[PPS_WAIT] = P_WAIT;
1041             break;
1042
1043         case4(0344):
1044             length++;
1045             break;
1046
1047         case 0360:
1048             break;
1049
1050         case 0361:
1051         case 0362:
1052         case 0363:
1053             length++;
1054             break;
1055
1056         case 0364:
1057         case 0365:
1058             break;
1059
1060         case 0366:
1061         case 0367:
1062             length++;
1063             break;
1064
1065         case 0370:
1066         case 0371:
1067         case 0372:
1068             break;
1069
1070         case 0373:
1071             length++;
1072             break;
1073
1074         case4(0100):
1075         case4(0110):
1076         case4(0120):
1077         case4(0130):
1078         case4(0200):
1079         case4(0204):
1080         case4(0210):
1081         case4(0214):
1082         case4(0220):
1083         case4(0224):
1084         case4(0230):
1085         case4(0234):
1086             {
1087                 ea ea_data;
1088                 int rfield;
1089                 opflags_t rflags;
1090                 struct operand *opy = &ins->oprs[op2];
1091
1092                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1093
1094                 if (c <= 0177) {
1095                     /* pick rfield from operand b (opx) */
1096                     rflags = regflag(opx);
1097                     rfield = nasm_regvals[opx->basereg];
1098                 } else {
1099                     rflags = 0;
1100                     rfield = c & 7;
1101                 }
1102                 if (!process_ea(opy, &ea_data, bits,
1103                                 ins->addr_size, rfield, rflags)) {
1104                     errfunc(ERR_NONFATAL, "invalid effective address");
1105                     return -1;
1106                 } else {
1107                     ins->rex |= ea_data.rex;
1108                     length += ea_data.size;
1109                 }
1110             }
1111             break;
1112
1113         default:
1114             errfunc(ERR_PANIC, "internal instruction table corrupt"
1115                     ": instruction code \\%o (0x%02X) given", c, c);
1116             break;
1117         }
1118     }
1119
1120     ins->rex &= rex_mask;
1121
1122     if (ins->rex & REX_NH) {
1123         if (ins->rex & REX_H) {
1124             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1125             return -1;
1126         }
1127         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1128     }
1129
1130     if (ins->rex & REX_V) {
1131         int bad32 = REX_R|REX_W|REX_X|REX_B;
1132
1133         if (ins->rex & REX_H) {
1134             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1135             return -1;
1136         }
1137         switch (ins->vex_wlp & 060) {
1138         case 000:
1139         case 040:
1140             ins->rex &= ~REX_W;
1141             break;
1142         case 020:
1143             ins->rex |= REX_W;
1144             bad32 &= ~REX_W;
1145             break;
1146         case 060:
1147             /* Follow REX_W */
1148             break;
1149         }
1150
1151         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1152             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1153             return -1;
1154         }
1155         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1156             length += 3;
1157         else
1158             length += 2;
1159     } else if (ins->rex & REX_D) {
1160         if (ins->rex & REX_H) {
1161             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1162             return -1;
1163         }
1164         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1165                            ins->drexdst > 7)) {
1166             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1167             return -1;
1168         }
1169         length++;
1170     } else if (ins->rex & REX_REAL) {
1171         if (ins->rex & REX_H) {
1172             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1173             return -1;
1174         } else if (bits == 64) {
1175             length++;
1176         } else if ((ins->rex & REX_L) &&
1177                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1178                    cpu >= IF_X86_64) {
1179             /* LOCK-as-REX.R */
1180             assert_no_prefix(ins, PPS_LREP);
1181             length++;
1182         } else {
1183             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1184             return -1;
1185         }
1186     }
1187
1188     return length;
1189 }
1190
1191 #define EMIT_REX()                                                              \
1192     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1193         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1194         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1195         ins->rex = 0;                                                           \
1196         offset += 1;                                                            \
1197     }
1198
1199 static void gencode(int32_t segment, int64_t offset, int bits,
1200                     insn * ins, const struct itemplate *temp,
1201                     int64_t insn_end)
1202 {
1203     static char condval[] = {   /* conditional opcodes */
1204         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1205         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1206         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1207     };
1208     uint8_t c;
1209     uint8_t bytes[4];
1210     int64_t size;
1211     int64_t data;
1212     int op1, op2;
1213     struct operand *opx;
1214     const uint8_t *codes = temp->code;
1215     uint8_t opex = 0;
1216
1217     while (*codes) {
1218         c = *codes++;
1219         op1 = (c & 3) + ((opex & 1) << 2);
1220         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1221         opx = &ins->oprs[op1];
1222         opex = 0;                /* For the next iteration */
1223
1224         switch (c) {
1225         case 01:
1226         case 02:
1227         case 03:
1228         case 04:
1229             EMIT_REX();
1230             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1231             codes += c;
1232             offset += c;
1233             break;
1234
1235         case 05:
1236         case 06:
1237         case 07:
1238             opex = c;
1239             break;
1240
1241         case4(010):
1242             EMIT_REX();
1243             bytes[0] = *codes++ + (regval(opx) & 7);
1244             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1245             offset += 1;
1246             break;
1247
1248         case4(014):
1249             /*
1250              * The test for BITS8 and SBYTE here is intended to avoid
1251              * warning on optimizer actions due to SBYTE, while still
1252              * warn on explicit BYTE directives.  Also warn, obviously,
1253              * if the optimizer isn't enabled.
1254              */
1255             if (((opx->type & BITS8) ||
1256                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1257                 (opx->offset < -128 || opx->offset > 127)) {
1258                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1259                         "signed byte value exceeds bounds");
1260             }
1261             if (opx->segment != NO_SEG) {
1262                 data = opx->offset;
1263                 out(offset, segment, &data, OUT_ADDRESS, 1,
1264                     opx->segment, opx->wrt);
1265             } else {
1266                 bytes[0] = opx->offset;
1267                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1268                     NO_SEG);
1269             }
1270             offset += 1;
1271             break;
1272
1273         case4(020):
1274             if (opx->offset < -256 || opx->offset > 255) {
1275                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1276                         "byte value exceeds bounds");
1277             }
1278             if (opx->segment != NO_SEG) {
1279                 data = opx->offset;
1280                 out(offset, segment, &data, OUT_ADDRESS, 1,
1281                     opx->segment, opx->wrt);
1282             } else {
1283                 bytes[0] = opx->offset;
1284                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1285                     NO_SEG);
1286             }
1287             offset += 1;
1288             break;
1289
1290         case4(024):
1291             if (opx->offset < 0 || opx->offset > 255)
1292                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1293                         "unsigned byte value exceeds bounds");
1294             if (opx->segment != NO_SEG) {
1295                 data = opx->offset;
1296                 out(offset, segment, &data, OUT_ADDRESS, 1,
1297                     opx->segment, opx->wrt);
1298             } else {
1299                 bytes[0] = opx->offset;
1300                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1301                     NO_SEG);
1302             }
1303             offset += 1;
1304             break;
1305
1306         case4(030):
1307             warn_overflow_opd(opx, 2);
1308             data = opx->offset;
1309             out(offset, segment, &data, OUT_ADDRESS, 2,
1310                 opx->segment, opx->wrt);
1311             offset += 2;
1312             break;
1313
1314         case4(034):
1315             if (opx->type & (BITS16 | BITS32))
1316                 size = (opx->type & BITS16) ? 2 : 4;
1317             else
1318                 size = (bits == 16) ? 2 : 4;
1319             warn_overflow_opd(opx, size);
1320             data = opx->offset;
1321             out(offset, segment, &data, OUT_ADDRESS, size,
1322                 opx->segment, opx->wrt);
1323             offset += size;
1324             break;
1325
1326         case4(040):
1327             warn_overflow_opd(opx, 4);
1328             data = opx->offset;
1329             out(offset, segment, &data, OUT_ADDRESS, 4,
1330                 opx->segment, opx->wrt);
1331             offset += 4;
1332             break;
1333
1334         case4(044):
1335             data = opx->offset;
1336             size = ins->addr_size >> 3;
1337             warn_overflow_opd(opx, size);
1338             out(offset, segment, &data, OUT_ADDRESS, size,
1339                 opx->segment, opx->wrt);
1340             offset += size;
1341             break;
1342
1343         case4(050):
1344             if (opx->segment != segment) {
1345                 data = opx->offset;
1346                 out(offset, segment, &data,
1347                     OUT_REL1ADR, insn_end - offset,
1348                     opx->segment, opx->wrt);
1349             } else {
1350                 data = opx->offset - insn_end;
1351                 if (data > 127 || data < -128)
1352                     errfunc(ERR_NONFATAL, "short jump is out of range");
1353                 out(offset, segment, &data,
1354                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1355             }
1356             offset += 1;
1357             break;
1358
1359         case4(054):
1360             data = (int64_t)opx->offset;
1361             out(offset, segment, &data, OUT_ADDRESS, 8,
1362                 opx->segment, opx->wrt);
1363             offset += 8;
1364             break;
1365
1366         case4(060):
1367             if (opx->segment != segment) {
1368                 data = opx->offset;
1369                 out(offset, segment, &data,
1370                     OUT_REL2ADR, insn_end - offset,
1371                     opx->segment, opx->wrt);
1372             } else {
1373                 data = opx->offset - insn_end;
1374                 out(offset, segment, &data,
1375                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1376             }
1377             offset += 2;
1378             break;
1379
1380         case4(064):
1381             if (opx->type & (BITS16 | BITS32 | BITS64))
1382                 size = (opx->type & BITS16) ? 2 : 4;
1383             else
1384                 size = (bits == 16) ? 2 : 4;
1385             if (opx->segment != segment) {
1386                 data = opx->offset;
1387                 out(offset, segment, &data,
1388                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1389                     insn_end - offset, opx->segment, opx->wrt);
1390             } else {
1391                 data = opx->offset - insn_end;
1392                 out(offset, segment, &data,
1393                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1394             }
1395             offset += size;
1396             break;
1397
1398         case4(070):
1399             if (opx->segment != segment) {
1400                 data = opx->offset;
1401                 out(offset, segment, &data,
1402                     OUT_REL4ADR, insn_end - offset,
1403                     opx->segment, opx->wrt);
1404             } else {
1405                 data = opx->offset - insn_end;
1406                 out(offset, segment, &data,
1407                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1408             }
1409             offset += 4;
1410             break;
1411
1412         case4(074):
1413             if (opx->segment == NO_SEG)
1414                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1415                         " relocatable");
1416             data = 0;
1417             out(offset, segment, &data, OUT_ADDRESS, 2,
1418                 outfmt->segbase(1 + opx->segment),
1419                 opx->wrt);
1420             offset += 2;
1421             break;
1422
1423         case4(0140):
1424             data = opx->offset;
1425             warn_overflow_opd(opx, 2);
1426             if (is_sbyte16(opx)) {
1427                 bytes[0] = data;
1428                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1429                     NO_SEG);
1430                 offset++;
1431             } else {
1432                 out(offset, segment, &data, OUT_ADDRESS, 2,
1433                     opx->segment, opx->wrt);
1434                 offset += 2;
1435             }
1436             break;
1437
1438         case4(0144):
1439             EMIT_REX();
1440             bytes[0] = *codes++;
1441             if (is_sbyte16(opx))
1442                 bytes[0] |= 2;  /* s-bit */
1443             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1444             offset++;
1445             break;
1446
1447         case4(0150):
1448             data = opx->offset;
1449             warn_overflow_opd(opx, 4);
1450             if (is_sbyte32(opx)) {
1451                 bytes[0] = data;
1452                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1453                     NO_SEG);
1454                 offset++;
1455             } else {
1456                 out(offset, segment, &data, OUT_ADDRESS, 4,
1457                     opx->segment, opx->wrt);
1458                 offset += 4;
1459             }
1460             break;
1461
1462         case4(0154):
1463             EMIT_REX();
1464             bytes[0] = *codes++;
1465             if (is_sbyte32(opx))
1466                 bytes[0] |= 2;  /* s-bit */
1467             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1468             offset++;
1469             break;
1470
1471         case4(0160):
1472         case4(0164):
1473             break;
1474
1475         case 0171:
1476             bytes[0] =
1477                 (ins->drexdst << 4) |
1478                 (ins->rex & REX_OC ? 0x08 : 0) |
1479                 (ins->rex & (REX_R|REX_X|REX_B));
1480             ins->rex = 0;
1481             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1482             offset++;
1483             break;
1484
1485         case 0172:
1486             c = *codes++;
1487             opx = &ins->oprs[c >> 3];
1488             bytes[0] = nasm_regvals[opx->basereg] << 4;
1489             opx = &ins->oprs[c & 7];
1490             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1491                 errfunc(ERR_NONFATAL,
1492                         "non-absolute expression not permitted as argument %d",
1493                         c & 7);
1494             } else {
1495                 if (opx->offset & ~15) {
1496                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1497                             "four-bit argument exceeds bounds");
1498                 }
1499                 bytes[0] |= opx->offset & 15;
1500             }
1501             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1502             offset++;
1503             break;
1504
1505         case 0173:
1506             c = *codes++;
1507             opx = &ins->oprs[c >> 4];
1508             bytes[0] = nasm_regvals[opx->basereg] << 4;
1509             bytes[0] |= c & 15;
1510             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1511             offset++;
1512             break;
1513
1514         case 0174:
1515             c = *codes++;
1516             opx = &ins->oprs[c];
1517             bytes[0] = nasm_regvals[opx->basereg] << 4;
1518             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1519             offset++;
1520             break;
1521
1522         case4(0250):
1523             data = opx->offset;
1524             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1525                 (int32_t)data != (int64_t)data) {
1526                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1527                         "signed dword immediate exceeds bounds");
1528             }
1529             if (is_sbyte32(opx)) {
1530                 bytes[0] = data;
1531                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1532                     NO_SEG);
1533                 offset++;
1534             } else {
1535                 out(offset, segment, &data, OUT_ADDRESS, 4,
1536                     opx->segment, opx->wrt);
1537                 offset += 4;
1538             }
1539             break;
1540
1541         case4(0254):
1542             data = opx->offset;
1543             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1544                 (int32_t)data != (int64_t)data) {
1545                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1546                         "signed dword immediate exceeds bounds");
1547             }
1548             out(offset, segment, &data, OUT_ADDRESS, 4,
1549                 opx->segment, opx->wrt);
1550             offset += 4;
1551             break;
1552
1553         case4(0260):
1554         case 0270:
1555             codes += 2;
1556             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1557                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1558                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1559                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1560                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1561                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1562                 offset += 3;
1563             } else {
1564                 bytes[0] = 0xc5;
1565                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1566                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1567                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1568                 offset += 2;
1569             }
1570             break;
1571
1572         case4(0274):
1573         {
1574             uint64_t uv, um;
1575             int s;
1576
1577             if (ins->rex & REX_W)
1578                 s = 64;
1579             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1580                 s = 16;
1581             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1582                 s = 32;
1583             else
1584                 s = bits;
1585
1586             um = (uint64_t)2 << (s-1);
1587             uv = opx->offset;
1588
1589             if (uv > 127 && uv < (uint64_t)-128 &&
1590                 (uv < um-128 || uv > um-1)) {
1591                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1592                         "signed byte value exceeds bounds");
1593             }
1594             if (opx->segment != NO_SEG) {
1595                 data = uv;
1596                 out(offset, segment, &data, OUT_ADDRESS, 1,
1597                     opx->segment, opx->wrt);
1598             } else {
1599                 bytes[0] = uv;
1600                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1601                     NO_SEG);
1602             }
1603             offset += 1;
1604             break;
1605         }
1606
1607         case4(0300):
1608             break;
1609
1610         case 0310:
1611             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1612                 *bytes = 0x67;
1613                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1614                 offset += 1;
1615             } else
1616                 offset += 0;
1617             break;
1618
1619         case 0311:
1620             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1621                 *bytes = 0x67;
1622                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1623                 offset += 1;
1624             } else
1625                 offset += 0;
1626             break;
1627
1628         case 0312:
1629             break;
1630
1631         case 0313:
1632             ins->rex = 0;
1633             break;
1634
1635         case4(0314):
1636             break;
1637
1638         case 0320:
1639         {
1640             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1641             if (pfx != P_O16 && pfx != P_none)
1642                 nasm_error(ERR_WARNING, "Invalid operand size prefix");
1643             if (pfx != P_O16 && bits != 16) {
1644                 ins->prefixes[PPS_OSIZE] = P_O16;
1645                 *bytes = 0x66;
1646                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1647                 offset += 1;
1648             }
1649             break;
1650         }
1651
1652         case 0321:
1653         {
1654             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1655             if (pfx != P_O32 && pfx != P_none)
1656                 nasm_error(ERR_WARNING, "Invalid operand size prefix");
1657             if (pfx != P_O32 && bits == 16) {
1658                 ins->prefixes[PPS_OSIZE] = P_O32;
1659                 *bytes = 0x66;
1660                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1661                 offset += 1;
1662             }
1663             break;
1664         }
1665
1666         case 0322:
1667         case 0323:
1668             break;
1669
1670         case 0324:
1671             ins->rex |= REX_W;
1672             break;
1673
1674         case 0325:
1675             break;
1676
1677         case 0330:
1678             *bytes = *codes++ ^ condval[ins->condition];
1679             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1680             offset += 1;
1681             break;
1682
1683         case 0331:
1684             break;
1685
1686         case 0332:
1687         case 0333:
1688             *bytes = c - 0332 + 0xF2;
1689             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1690             offset += 1;
1691             break;
1692
1693         case 0334:
1694             if (ins->rex & REX_R) {
1695                 *bytes = 0xF0;
1696                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1697                 offset += 1;
1698             }
1699             ins->rex &= ~(REX_L|REX_R);
1700             break;
1701
1702         case 0335:
1703             break;
1704
1705         case 0336:
1706         case 0337:
1707             break;
1708
1709         case 0340:
1710             if (ins->oprs[0].segment != NO_SEG)
1711                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1712             else {
1713                 int64_t size = ins->oprs[0].offset;
1714                 if (size > 0)
1715                     out(offset, segment, NULL,
1716                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1717                 offset += size;
1718             }
1719             break;
1720
1721         case 0341:
1722             break;
1723
1724         case 0344:
1725         case 0345:
1726             bytes[0] = c & 1;
1727             switch (ins->oprs[0].basereg) {
1728             case R_CS:
1729                 bytes[0] += 0x0E;
1730                 break;
1731             case R_DS:
1732                 bytes[0] += 0x1E;
1733                 break;
1734             case R_ES:
1735                 bytes[0] += 0x06;
1736                 break;
1737             case R_SS:
1738                 bytes[0] += 0x16;
1739                 break;
1740             default:
1741                 errfunc(ERR_PANIC,
1742                         "bizarre 8086 segment register received");
1743             }
1744             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1745             offset++;
1746             break;
1747
1748         case 0346:
1749         case 0347:
1750             bytes[0] = c & 1;
1751             switch (ins->oprs[0].basereg) {
1752             case R_FS:
1753                 bytes[0] += 0xA0;
1754                 break;
1755             case R_GS:
1756                 bytes[0] += 0xA8;
1757                 break;
1758             default:
1759                 errfunc(ERR_PANIC,
1760                         "bizarre 386 segment register received");
1761             }
1762             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1763             offset++;
1764             break;
1765
1766         case 0360:
1767             break;
1768
1769         case 0361:
1770             bytes[0] = 0x66;
1771             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1772             offset += 1;
1773             break;
1774
1775         case 0362:
1776         case 0363:
1777             bytes[0] = c - 0362 + 0xf2;
1778             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1779             offset += 1;
1780             break;
1781
1782         case 0364:
1783         case 0365:
1784             break;
1785
1786         case 0366:
1787         case 0367:
1788             *bytes = c - 0366 + 0x66;
1789             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1790             offset += 1;
1791             break;
1792
1793         case 0370:
1794         case 0371:
1795         case 0372:
1796             break;
1797
1798         case 0373:
1799             *bytes = bits == 16 ? 3 : 5;
1800             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1801             offset += 1;
1802             break;
1803
1804         case4(0100):
1805         case4(0110):
1806         case4(0120):
1807         case4(0130):
1808         case4(0200):
1809         case4(0204):
1810         case4(0210):
1811         case4(0214):
1812         case4(0220):
1813         case4(0224):
1814         case4(0230):
1815         case4(0234):
1816             {
1817                 ea ea_data;
1818                 int rfield;
1819                 opflags_t rflags;
1820                 uint8_t *p;
1821                 int32_t s;
1822                 enum out_type type;
1823                 struct operand *opy = &ins->oprs[op2];
1824
1825                 if (c <= 0177) {
1826                     /* pick rfield from operand b (opx) */
1827                     rflags = regflag(opx);
1828                     rfield = nasm_regvals[opx->basereg];
1829                 } else {
1830                     /* rfield is constant */
1831                     rflags = 0;
1832                     rfield = c & 7;
1833                 }
1834
1835                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1836                                 rfield, rflags)) {
1837                     errfunc(ERR_NONFATAL, "invalid effective address");
1838                 }
1839
1840
1841                 p = bytes;
1842                 *p++ = ea_data.modrm;
1843                 if (ea_data.sib_present)
1844                     *p++ = ea_data.sib;
1845
1846                 /* DREX suffixes come between the SIB and the displacement */
1847                 if (ins->rex & REX_D) {
1848                     *p++ = (ins->drexdst << 4) |
1849                            (ins->rex & REX_OC ? 0x08 : 0) |
1850                            (ins->rex & (REX_R|REX_X|REX_B));
1851                     ins->rex = 0;
1852                 }
1853
1854                 s = p - bytes;
1855                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1856
1857                 /*
1858                  * Make sure the address gets the right offset in case
1859                  * the line breaks in the .lst file (BR 1197827)
1860                  */
1861                 offset += s;
1862                 s = 0;
1863
1864                 switch (ea_data.bytes) {
1865                 case 0:
1866                     break;
1867                 case 1:
1868                 case 2:
1869                 case 4:
1870                 case 8:
1871                     data = opy->offset;
1872                     s += ea_data.bytes;
1873                     if (ea_data.rip) {
1874                         if (opy->segment == segment) {
1875                             data -= insn_end;
1876                             if (overflow_signed(data, ea_data.bytes))
1877                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1878                             out(offset, segment, &data, OUT_ADDRESS,
1879                                 ea_data.bytes, NO_SEG, NO_SEG);
1880                         } else {
1881                             /* overflow check in output/linker? */
1882                             out(offset, segment, &data,        OUT_REL4ADR,
1883                                 insn_end - offset, opy->segment, opy->wrt);
1884                         }
1885                     } else {
1886                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1887                             signed_bits(opy->offset, ins->addr_size) !=
1888                             signed_bits(opy->offset, ea_data.bytes * 8))
1889                             warn_overflow(ERR_PASS2, ea_data.bytes);
1890
1891                         type = OUT_ADDRESS;
1892                         out(offset, segment, &data, OUT_ADDRESS,
1893                             ea_data.bytes, opy->segment, opy->wrt);
1894                     }
1895                     break;
1896                 default:
1897                     /* Impossible! */
1898                     errfunc(ERR_PANIC,
1899                             "Invalid amount of bytes (%d) for offset?!",
1900                             ea_data.bytes);
1901                     break;
1902                 }
1903                 offset += s;
1904             }
1905             break;
1906
1907         default:
1908             errfunc(ERR_PANIC, "internal instruction table corrupt"
1909                     ": instruction code \\%o (0x%02X) given", c, c);
1910             break;
1911         }
1912     }
1913 }
1914
1915 static opflags_t regflag(const operand * o)
1916 {
1917     if (!is_register(o->basereg))
1918         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1919     return nasm_reg_flags[o->basereg];
1920 }
1921
1922 static int32_t regval(const operand * o)
1923 {
1924     if (!is_register(o->basereg))
1925         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1926     return nasm_regvals[o->basereg];
1927 }
1928
1929 static int op_rexflags(const operand * o, int mask)
1930 {
1931     opflags_t flags;
1932     int val;
1933
1934     if (!is_register(o->basereg))
1935         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1936
1937     flags = nasm_reg_flags[o->basereg];
1938     val = nasm_regvals[o->basereg];
1939
1940     return rexflags(val, flags, mask);
1941 }
1942
1943 static int rexflags(int val, opflags_t flags, int mask)
1944 {
1945     int rex = 0;
1946
1947     if (val >= 8)
1948         rex |= REX_B|REX_X|REX_R;
1949     if (flags & BITS64)
1950         rex |= REX_W;
1951     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1952         rex |= REX_H;
1953     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1954         rex |= REX_P;
1955
1956     return rex & mask;
1957 }
1958
1959 static enum match_result find_match(const struct itemplate **tempp,
1960                                     insn *instruction,
1961                                     int32_t segment, int64_t offset, int bits)
1962 {
1963     const struct itemplate *temp;
1964     enum match_result m, merr;
1965     opflags_t xsizeflags[MAX_OPERANDS];
1966     bool opsizemissing = false;
1967     int i;
1968
1969     for (i = 0; i < instruction->operands; i++)
1970         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1971
1972     merr = MERR_INVALOP;
1973
1974     for (temp = nasm_instructions[instruction->opcode];
1975          temp->opcode != I_none; temp++) {
1976         m = matches(temp, instruction, bits);
1977         if (m == MOK_JUMP) {
1978             if (jmp_match(segment, offset, bits, instruction, temp->code))
1979                 m = MOK_GOOD;
1980             else
1981                 m = MERR_INVALOP;
1982         } else if (m == MERR_OPSIZEMISSING &&
1983                    (temp->flags & IF_SMASK) != IF_SX) {
1984             /*
1985              * Missing operand size and a candidate for fuzzy matching...
1986              */
1987             for (i = 0; i < temp->operands; i++) {
1988                 if ((temp->opd[i] & SAME_AS) == 0)
1989                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1990             }
1991             opsizemissing = true;
1992         }
1993         if (m > merr)
1994             merr = m;
1995         if (merr == MOK_GOOD)
1996             goto done;
1997     }
1998
1999     /* No match, but see if we can get a fuzzy operand size match... */
2000     if (!opsizemissing)
2001         goto done;
2002
2003     for (i = 0; i < instruction->operands; i++) {
2004         /*
2005          * We ignore extrinsic operand sizes on registers, so we should
2006          * never try to fuzzy-match on them.  This also resolves the case
2007          * when we have e.g. "xmmrm128" in two different positions.
2008          */
2009         if (is_class(REGISTER, instruction->oprs[i].type))
2010             continue;
2011
2012         /* This tests if xsizeflags[i] has more than one bit set */
2013         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2014             goto done;                /* No luck */
2015
2016         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2017     }
2018
2019     /* Try matching again... */
2020     for (temp = nasm_instructions[instruction->opcode];
2021          temp->opcode != I_none; temp++) {
2022         m = matches(temp, instruction, bits);
2023         if (m == MOK_JUMP) {
2024             if (jmp_match(segment, offset, bits, instruction, temp->code))
2025                 m = MOK_GOOD;
2026             else
2027                 m = MERR_INVALOP;
2028         }
2029         if (m > merr)
2030             merr = m;
2031         if (merr == MOK_GOOD)
2032             goto done;
2033     }
2034
2035 done:
2036     *tempp = temp;
2037     return merr;
2038 }
2039
2040 static enum match_result matches(const struct itemplate *itemp,
2041                                  insn *instruction, int bits)
2042 {
2043     int i, size[MAX_OPERANDS], asize, oprs;
2044     bool opsizemissing = false;
2045
2046     /*
2047      * Check the opcode
2048      */
2049     if (itemp->opcode != instruction->opcode)
2050         return MERR_INVALOP;
2051
2052     /*
2053      * Count the operands
2054      */
2055     if (itemp->operands != instruction->operands)
2056         return MERR_INVALOP;
2057
2058     /*
2059      * Is it legal?
2060      */
2061     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2062         return MERR_INVALOP;
2063
2064     /*
2065      * Check that no spurious colons or TOs are present
2066      */
2067     for (i = 0; i < itemp->operands; i++)
2068         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2069             return MERR_INVALOP;
2070
2071     /*
2072      * Process size flags
2073      */
2074     switch (itemp->flags & IF_SMASK) {
2075     case IF_SB:
2076         asize = BITS8;
2077         break;
2078     case IF_SW:
2079         asize = BITS16;
2080         break;
2081     case IF_SD:
2082         asize = BITS32;
2083         break;
2084     case IF_SQ:
2085         asize = BITS64;
2086         break;
2087     case IF_SO:
2088         asize = BITS128;
2089         break;
2090     case IF_SY:
2091         asize = BITS256;
2092         break;
2093     case IF_SZ:
2094         switch (bits) {
2095         case 16:
2096             asize = BITS16;
2097             break;
2098         case 32:
2099             asize = BITS32;
2100             break;
2101         case 64:
2102             asize = BITS64;
2103             break;
2104         default:
2105             asize = 0;
2106             break;
2107         }
2108         break;
2109     default:
2110         asize = 0;
2111         break;
2112     }
2113
2114     if (itemp->flags & IF_ARMASK) {
2115         /* S- flags only apply to a specific operand */
2116         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2117         memset(size, 0, sizeof size);
2118         size[i] = asize;
2119     } else {
2120         /* S- flags apply to all operands */
2121         for (i = 0; i < MAX_OPERANDS; i++)
2122             size[i] = asize;
2123     }
2124
2125     /*
2126      * Check that the operand flags all match up,
2127      * it's a bit tricky so lets be verbose:
2128      *
2129      * 1) Find out the size of operand. If instruction
2130      *    doesn't have one specified -- we're trying to
2131      *    guess it either from template (IF_S* flag) or
2132      *    from code bits.
2133      *
2134      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2135      *    (ie the same operand as was specified somewhere in template, and
2136      *    this referred operand index is being achieved via ~SAME_AS)
2137      *    we are to be sure that both registers (in template and instruction)
2138      *    do exactly match.
2139      *
2140      * 3) If template operand do not match the instruction OR
2141      *    template has an operand size specified AND this size differ
2142      *    from which instruction has (perhaps we got it from code bits)
2143      *    we are:
2144      *      a)  Check that only size of instruction and operand is differ
2145      *          other characteristics do match
2146      *      b)  Perhaps it's a register specified in instruction so
2147      *          for such a case we just mark that operand as "size
2148      *          missing" and this will turn on fuzzy operand size
2149      *          logic facility (handled by a caller)
2150      */
2151     for (i = 0; i < itemp->operands; i++) {
2152         opflags_t type = instruction->oprs[i].type;
2153         if (!(type & SIZE_MASK))
2154             type |= size[i];
2155
2156         if (itemp->opd[i] & SAME_AS) {
2157             int j = itemp->opd[i] & ~SAME_AS;
2158             if (type != instruction->oprs[j].type ||
2159                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2160                 return MERR_INVALOP;
2161         } else if (itemp->opd[i] & ~type ||
2162             ((itemp->opd[i] & SIZE_MASK) &&
2163              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2164             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2165                 return MERR_INVALOP;
2166             } else if (!is_class(REGISTER, type)) {
2167                 /*
2168                  * Note: we don't honor extrinsic operand sizes for registers,
2169                  * so "missing operand size" for a register should be
2170                  * considered a wildcard match rather than an error.
2171                  */
2172                 opsizemissing = true;
2173             }
2174         }
2175     }
2176
2177     if (opsizemissing)
2178         return MERR_OPSIZEMISSING;
2179
2180     /*
2181      * Check operand sizes
2182      */
2183     if (itemp->flags & (IF_SM | IF_SM2)) {
2184         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2185         for (i = 0; i < oprs; i++) {
2186             asize = itemp->opd[i] & SIZE_MASK;
2187             if (asize) {
2188                 for (i = 0; i < oprs; i++)
2189                     size[i] = asize;
2190                 break;
2191             }
2192         }
2193     } else {
2194         oprs = itemp->operands;
2195     }
2196
2197     for (i = 0; i < itemp->operands; i++) {
2198         if (!(itemp->opd[i] & SIZE_MASK) &&
2199             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2200             return MERR_OPSIZEMISMATCH;
2201     }
2202
2203     /*
2204      * Check template is okay at the set cpu level
2205      */
2206     if (((itemp->flags & IF_PLEVEL) > cpu))
2207         return MERR_BADCPU;
2208
2209     /*
2210      * Verify the appropriate long mode flag.
2211      */
2212     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2213         return MERR_BADMODE;
2214
2215     /*
2216      * Check if special handling needed for Jumps
2217      */
2218     if ((itemp->code[0] & 0374) == 0370)
2219         return MOK_JUMP;
2220
2221     return MOK_GOOD;
2222 }
2223
2224 static ea *process_ea(operand * input, ea * output, int bits,
2225                       int addrbits, int rfield, opflags_t rflags)
2226 {
2227     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2228
2229     output->rip = false;
2230
2231     /* REX flags for the rfield operand */
2232     output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2233
2234     if (is_class(REGISTER, input->type)) {  /* register direct */
2235         int i;
2236         opflags_t f;
2237
2238         if (!is_register(input->basereg))
2239             return NULL;
2240         f = regflag(input);
2241         i = nasm_regvals[input->basereg];
2242
2243         if (REG_EA & ~f)
2244             return NULL;        /* Invalid EA register */
2245
2246         output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2247
2248         output->sib_present = false;    /* no SIB necessary */
2249         output->bytes = 0;              /* no offset necessary either */
2250         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2251     } else {                    /* it's a memory reference */
2252         if (input->basereg == -1 &&
2253             (input->indexreg == -1 || input->scale == 0)) {
2254             /* it's a pure offset */
2255
2256             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2257                 input->segment == NO_SEG) {
2258                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2259                 input->type &= ~IP_REL;
2260                 input->type |= MEMORY;
2261             }
2262
2263             if (input->eaflags & EAF_BYTEOFFS ||
2264                 (input->eaflags & EAF_WORDOFFS &&
2265                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2266                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2267             }
2268
2269             if (bits == 64 && (~input->type & IP_REL)) {
2270                 int scale, index, base;
2271                 output->sib_present = true;
2272                 scale = 0;
2273                 index = 4;
2274                 base = 5;
2275                 output->sib = (scale << 6) | (index << 3) | base;
2276                 output->bytes = 4;
2277                 output->modrm = 4 | ((rfield & 7) << 3);
2278                 output->rip = false;
2279             } else {
2280                 output->sib_present = false;
2281                 output->bytes = (addrbits != 16 ? 4 : 2);
2282                 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2283                 output->rip = bits == 64;
2284             }
2285         } else {                /* it's an indirection */
2286             int i = input->indexreg, b = input->basereg, s = input->scale;
2287             int32_t seg = input->segment;
2288             int hb = input->hintbase, ht = input->hinttype;
2289             int t, it, bt;              /* register numbers */
2290             opflags_t x, ix, bx;        /* register flags */
2291
2292             if (s == 0)
2293                 i = -1;         /* make this easy, at least */
2294
2295             if (is_register(i)) {
2296                 it = nasm_regvals[i];
2297                 ix = nasm_reg_flags[i];
2298             } else {
2299                 it = -1;
2300                 ix = 0;
2301             }
2302
2303             if (is_register(b)) {
2304                 bt = nasm_regvals[b];
2305                 bx = nasm_reg_flags[b];
2306             } else {
2307                 bt = -1;
2308                 bx = 0;
2309             }
2310
2311             /* check for a 32/64-bit memory reference... */
2312             if ((ix|bx) & (BITS32|BITS64)) {
2313                 /*
2314                  * it must be a 32/64-bit memory reference. Firstly we have
2315                  * to check that all registers involved are type E/Rxx.
2316                  */
2317                 int32_t sok = BITS32 | BITS64, o = input->offset;
2318
2319                 if (it != -1) {
2320                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2321                         sok &= ix;
2322                     else
2323                         return NULL;
2324                 }
2325
2326                 if (bt != -1) {
2327                     if (REG_GPR & ~bx)
2328                         return NULL; /* Invalid register */
2329                     if (~sok & bx & SIZE_MASK)
2330                         return NULL; /* Invalid size */
2331                     sok &= bx;
2332                 }
2333
2334                 /*
2335                  * While we're here, ensure the user didn't specify
2336                  * WORD or QWORD
2337                  */
2338                 if (input->disp_size == 16 || input->disp_size == 64)
2339                     return NULL;
2340
2341                 if (addrbits == 16 ||
2342                     (addrbits == 32 && !(sok & BITS32)) ||
2343                     (addrbits == 64 && !(sok & BITS64)))
2344                     return NULL;
2345
2346                 /* now reorganize base/index */
2347                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2348                     ((hb == b && ht == EAH_NOTBASE) ||
2349                      (hb == i && ht == EAH_MAKEBASE))) {
2350                     /* swap if hints say so */
2351                     t = bt, bt = it, it = t;
2352                     x = bx, bx = ix, ix = x;
2353                 }
2354                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2355                     bt = -1, bx = 0, s++;
2356                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2357                     /* make single reg base, unless hint */
2358                     bt = it, bx = ix, it = -1, ix = 0;
2359                 }
2360                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2361                       s == 3 || s == 5 || s == 9) && bt == -1)
2362                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2363                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2364                     (input->eaflags & EAF_TIMESTWO))
2365                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2366                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2367                 if (s == 1 && it == REG_NUM_ESP) {
2368                     /* swap ESP into base if scale is 1 */
2369                     t = it, it = bt, bt = t;
2370                     x = ix, ix = bx, bx = x;
2371                 }
2372                 if (it == REG_NUM_ESP ||
2373                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2374                     return NULL;        /* wrong, for various reasons */
2375
2376                 output->rex |= rexflags(it, ix, REX_X);
2377                 output->rex |= rexflags(bt, bx, REX_B);
2378
2379                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2380                     /* no SIB needed */
2381                     int mod, rm;
2382
2383                     if (bt == -1) {
2384                         rm = 5;
2385                         mod = 0;
2386                     } else {
2387                         rm = (bt & 7);
2388                         if (rm != REG_NUM_EBP && o == 0 &&
2389                             seg == NO_SEG && !forw_ref &&
2390                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2391                             mod = 0;
2392                         else if (input->eaflags & EAF_BYTEOFFS ||
2393                                  (o >= -128 && o <= 127 &&
2394                                   seg == NO_SEG && !forw_ref &&
2395                                   !(input->eaflags & EAF_WORDOFFS)))
2396                             mod = 1;
2397                         else
2398                             mod = 2;
2399                     }
2400
2401                     output->sib_present = false;
2402                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2403                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2404                 } else {
2405                     /* we need a SIB */
2406                     int mod, scale, index, base;
2407
2408                     if (it == -1)
2409                         index = 4, s = 1;
2410                     else
2411                         index = (it & 7);
2412
2413                     switch (s) {
2414                     case 1:
2415                         scale = 0;
2416                         break;
2417                     case 2:
2418                         scale = 1;
2419                         break;
2420                     case 4:
2421                         scale = 2;
2422                         break;
2423                     case 8:
2424                         scale = 3;
2425                         break;
2426                     default:   /* then what the smeg is it? */
2427                         return NULL;    /* panic */
2428                     }
2429
2430                     if (bt == -1) {
2431                         base = 5;
2432                         mod = 0;
2433                     } else {
2434                         base = (bt & 7);
2435                         if (base != REG_NUM_EBP && o == 0 &&
2436                             seg == NO_SEG && !forw_ref &&
2437                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2438                             mod = 0;
2439                         else if (input->eaflags & EAF_BYTEOFFS ||
2440                                  (o >= -128 && o <= 127 &&
2441                                   seg == NO_SEG && !forw_ref &&
2442                                   !(input->eaflags & EAF_WORDOFFS)))
2443                             mod = 1;
2444                         else
2445                             mod = 2;
2446                     }
2447
2448                     output->sib_present = true;
2449                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2450                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2451                     output->sib = (scale << 6) | (index << 3) | base;
2452                 }
2453             } else {            /* it's 16-bit */
2454                 int mod, rm;
2455                 int16_t o = input->offset;
2456
2457                 /* check for 64-bit long mode */
2458                 if (addrbits == 64)
2459                     return NULL;
2460
2461                 /* check all registers are BX, BP, SI or DI */
2462                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2463                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2464                     return NULL;
2465
2466                 /* ensure the user didn't specify DWORD/QWORD */
2467                 if (input->disp_size == 32 || input->disp_size == 64)
2468                     return NULL;
2469
2470                 if (s != 1 && i != -1)
2471                     return NULL;        /* no can do, in 16-bit EA */
2472                 if (b == -1 && i != -1) {
2473                     int tmp = b;
2474                     b = i;
2475                     i = tmp;
2476                 }               /* swap */
2477                 if ((b == R_SI || b == R_DI) && i != -1) {
2478                     int tmp = b;
2479                     b = i;
2480                     i = tmp;
2481                 }
2482                 /* have BX/BP as base, SI/DI index */
2483                 if (b == i)
2484                     return NULL;        /* shouldn't ever happen, in theory */
2485                 if (i != -1 && b != -1 &&
2486                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2487                     return NULL;        /* invalid combinations */
2488                 if (b == -1)            /* pure offset: handled above */
2489                     return NULL;        /* so if it gets to here, panic! */
2490
2491                 rm = -1;
2492                 if (i != -1)
2493                     switch (i * 256 + b) {
2494                     case R_SI * 256 + R_BX:
2495                         rm = 0;
2496                         break;
2497                     case R_DI * 256 + R_BX:
2498                         rm = 1;
2499                         break;
2500                     case R_SI * 256 + R_BP:
2501                         rm = 2;
2502                         break;
2503                     case R_DI * 256 + R_BP:
2504                         rm = 3;
2505                         break;
2506                 } else
2507                     switch (b) {
2508                     case R_SI:
2509                         rm = 4;
2510                         break;
2511                     case R_DI:
2512                         rm = 5;
2513                         break;
2514                     case R_BP:
2515                         rm = 6;
2516                         break;
2517                     case R_BX:
2518                         rm = 7;
2519                         break;
2520                     }
2521                 if (rm == -1)           /* can't happen, in theory */
2522                     return NULL;        /* so panic if it does */
2523
2524                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2525                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2526                     mod = 0;
2527                 else if (input->eaflags & EAF_BYTEOFFS ||
2528                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2529                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2530                     mod = 1;
2531                 else
2532                     mod = 2;
2533
2534                 output->sib_present = false;    /* no SIB - it's 16-bit */
2535                 output->bytes = mod;            /* bytes of offset needed */
2536                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2537             }
2538         }
2539     }
2540
2541     output->size = 1 + output->sib_present + output->bytes;
2542     return output;
2543 }
2544
2545 static void add_asp(insn *ins, int addrbits)
2546 {
2547     int j, valid;
2548     int defdisp;
2549
2550     valid = (addrbits == 64) ? 64|32 : 32|16;
2551
2552     switch (ins->prefixes[PPS_ASIZE]) {
2553     case P_A16:
2554         valid &= 16;
2555         break;
2556     case P_A32:
2557         valid &= 32;
2558         break;
2559     case P_A64:
2560         valid &= 64;
2561         break;
2562     case P_ASP:
2563         valid &= (addrbits == 32) ? 16 : 32;
2564         break;
2565     default:
2566         break;
2567     }
2568
2569     for (j = 0; j < ins->operands; j++) {
2570         if (is_class(MEMORY, ins->oprs[j].type)) {
2571             opflags_t i, b;
2572
2573             /* Verify as Register */
2574             if (!is_register(ins->oprs[j].indexreg))
2575                 i = 0;
2576             else
2577                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2578
2579             /* Verify as Register */
2580             if (!is_register(ins->oprs[j].basereg))
2581                 b = 0;
2582             else
2583                 b = nasm_reg_flags[ins->oprs[j].basereg];
2584
2585             if (ins->oprs[j].scale == 0)
2586                 i = 0;
2587
2588             if (!i && !b) {
2589                 int ds = ins->oprs[j].disp_size;
2590                 if ((addrbits != 64 && ds > 8) ||
2591                     (addrbits == 64 && ds == 16))
2592                     valid &= ds;
2593             } else {
2594                 if (!(REG16 & ~b))
2595                     valid &= 16;
2596                 if (!(REG32 & ~b))
2597                     valid &= 32;
2598                 if (!(REG64 & ~b))
2599                     valid &= 64;
2600
2601                 if (!(REG16 & ~i))
2602                     valid &= 16;
2603                 if (!(REG32 & ~i))
2604                     valid &= 32;
2605                 if (!(REG64 & ~i))
2606                     valid &= 64;
2607             }
2608         }
2609     }
2610
2611     if (valid & addrbits) {
2612         ins->addr_size = addrbits;
2613     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2614         /* Add an address size prefix */
2615         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2616         ins->prefixes[PPS_ASIZE] = pref;
2617         ins->addr_size = (addrbits == 32) ? 16 : 32;
2618     } else {
2619         /* Impossible... */
2620         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2621         ins->addr_size = addrbits; /* Error recovery */
2622     }
2623
2624     defdisp = ins->addr_size == 16 ? 16 : 32;
2625
2626     for (j = 0; j < ins->operands; j++) {
2627         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2628             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2629             /*
2630              * mem_offs sizes must match the address size; if not,
2631              * strip the MEM_OFFS bit and match only EA instructions
2632              */
2633             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2634         }
2635     }
2636 }