assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 wwl lpp
  96  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  97  *                 [l1]  ll = 1 for L = 1 (.256)
  98  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  99  *
 100  *                 [w0]  ww = 0 for W = 0
 101  *                 [w1 ] ww = 1 for W = 1
 102  *                 [wig] ww = 2 for W don't care (always assembled as 0)
 103  *                 [ww]  ww = 3 for W used as REX.W
 104  *
 105  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 106  *
 107  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 108  *                 which is to be extended to the operand size.
 109  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 110  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 111  * \312          - (disassembler only) invalid with non-default address size.
 112  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 113  * \314          - (disassembler only) invalid with REX.B
 114  * \315          - (disassembler only) invalid with REX.X
 115  * \316          - (disassembler only) invalid with REX.R
 116  * \317          - (disassembler only) invalid with REX.W
 117  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 118  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 119  * \322          - indicates that this instruction is only valid when the
 120  *                 operand size is the default (instruction to disassembler,
 121  *                 generates no code in the assembler)
 122  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 123  * \324          - indicates 64-bit operand size requiring REX prefix.
 124  * \325          - instruction which always uses spl/bpl/sil/dil
 125  * \330          - a literal byte follows in the code stream, to be added
 126  *                 to the condition code value of the instruction.
 127  * \331          - instruction not valid with REP prefix.  Hint for
 128  *                 disassembler only; for SSE instructions.
 129  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 130  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 131  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 132  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 133  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 134  * \337          - force a REPNE prefix (0xF3) even if not specified.
 135  *                 \336-\337 are still listed as prefixes in the disassembler.
 136  * \340          - reserve <operand 0> bytes of uninitialized storage.
 137  *                 Operand 0 had better be a segmentless constant.
 138  * \341          - this instruction needs a WAIT "prefix"
 139  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 140  *                 (POP is never used for CS) depending on operand 0
 141  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 142  *                 on operand 0
 143  * \360          - no SSE prefix (== \364\331)
 144  * \361          - 66 SSE prefix (== \366\331)
 145  * \362          - F2 SSE prefix (== \364\332)
 146  * \363          - F3 SSE prefix (== \364\333)
 147  * \364          - operand-size prefix (0x66) not permitted
 148  * \365          - address-size prefix (0x67) not permitted
 149  * \366          - operand-size prefix (0x66) used as opcode extension
 150  * \367          - address-size prefix (0x67) used as opcode extension
 151  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 152  *                 370 is used for Jcc, 371 is used for JMP.
 153  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 154  *                 used for conditional jump over longer jump
 155  */
 156
 157 #include "compiler.h"
 158
 159 #include <stdio.h>
 160 #include <string.h>
 161 #include <inttypes.h>
 162
 163 #include "nasm.h"
 164 #include "nasmlib.h"
 165 #include "assemble.h"
 166 #include "insns.h"
 167 #include "tables.h"
 168
 169 enum match_result {
 170     /*
 171      * Matching errors.  These should be sorted so that more specific
 172      * errors come later in the sequence.
 173      */
 174     MERR_INVALOP,
 175     MERR_OPSIZEMISSING,
 176     MERR_OPSIZEMISMATCH,
 177     MERR_BADCPU,
 178     MERR_BADMODE,
 179     /*
 180      * Matching success; the conditional ones first
 181      */
 182     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 183     MOK_GOOD    /* Matching unconditionally OK */
 184 };
 185
 186 typedef struct {
 187     int sib_present;                 /* is a SIB byte necessary? */
 188     int bytes;                       /* # of bytes of offset needed */
 189     int size;                        /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 191 } ea;
 192
 193 static uint32_t cpu;            /* cpu level received from nasm.c */
 194 static efunc errfunc;
 195 static struct ofmt *outfmt;
 196 static ListGen *list;
 197
 198 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 199 static void gencode(int32_t segment, int64_t offset, int bits,
 200                     insn * ins, const struct itemplate *temp,
 201                     int64_t insn_end);
 202 static enum match_result find_match(const struct itemplate **tempp,
 203                                     insn *instruction,
 204                                     int32_t segment, int64_t offset, int bits);
 205 static enum match_result matches(const struct itemplate *, insn *, int bits);
 206 static opflags_t regflag(const operand *);
 207 static int32_t regval(const operand *);
 208 static int rexflags(int, opflags_t, int);
 209 static int op_rexflags(const operand *, int);
 210 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 211 static void add_asp(insn *, int);
 212
 213 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 214 {
 215     return ins->prefixes[pos] == prefix;
 216 }
 217
 218 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 219 {
 220     if (ins->prefixes[pos])
 221         errfunc(ERR_NONFATAL, "invalid %s prefix",
 222                 prefix_name(ins->prefixes[pos]));
 223 }
 224
 225 static const char *size_name(int size)
 226 {
 227     switch (size) {
 228     case 1:
 229         return "byte";
 230     case 2:
 231         return "word";
 232     case 4:
 233         return "dword";
 234     case 8:
 235         return "qword";
 236     case 10:
 237         return "tword";
 238     case 16:
 239         return "oword";
 240     case 32:
 241         return "yword";
 242     default:
 243         return "???";
 244     }
 245 }
 246
 247 static void warn_overflow(int pass, int size)
 248 {
 249     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 250             "%s data exceeds bounds", size_name(size));
 251 }
 252
 253 static void warn_overflow_const(int64_t data, int size)
 254 {
 255     if (overflow_general(data, size))
 256         warn_overflow(ERR_PASS1, size);
 257 }
 258
 259 static void warn_overflow_opd(const struct operand *o, int size)
 260 {
 261     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 262         if (overflow_general(o->offset, size))
 263             warn_overflow(ERR_PASS2, size);
 264     }
 265 }
 266
 267 /*
 268  * This routine wrappers the real output format's output routine,
 269  * in order to pass a copy of the data off to the listing file
 270  * generator at the same time.
 271  */
 272 static void out(int64_t offset, int32_t segto, const void *data,
 273                 enum out_type type, uint64_t size,
 274                 int32_t segment, int32_t wrt)
 275 {
 276     static int32_t lineno = 0;     /* static!!! */
 277     static char *lnfname = NULL;
 278     uint8_t p[8];
 279
 280     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 281         /*
 282          * This is a non-relocated address, and we're going to
 283          * convert it into RAWDATA format.
 284          */
 285         uint8_t *q = p;
 286
 287         if (size > 8) {
 288             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 289             return;
 290         }
 291
 292         WRITEADDR(q, *(int64_t *)data, size);
 293         data = p;
 294         type = OUT_RAWDATA;
 295     }
 296
 297     list->output(offset, data, type, size);
 298
 299     /*
 300      * this call to src_get determines when we call the
 301      * debug-format-specific "linenum" function
 302      * it updates lineno and lnfname to the current values
 303      * returning 0 if "same as last time", -2 if lnfname
 304      * changed, and the amount by which lineno changed,
 305      * if it did. thus, these variables must be static
 306      */
 307
 308     if (src_get(&lineno, &lnfname))
 309         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 310
 311     outfmt->output(segto, data, type, size, segment, wrt);
 312 }
 313
 314 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 315                      insn * ins, const uint8_t *code)
 316 {
 317     int64_t isize;
 318     uint8_t c = code[0];
 319
 320     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 321         return false;
 322     if (!optimizing)
 323         return false;
 324     if (optimizing < 0 && c == 0371)
 325         return false;
 326
 327     isize = calcsize(segment, offset, bits, ins, code);
 328
 329     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 330         /* Be optimistic in pass 1 */
 331         return true;
 332
 333     if (ins->oprs[0].segment != segment)
 334         return false;
 335
 336     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 337     return (isize >= -128 && isize <= 127); /* is it byte size? */
 338 }
 339
 340 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 341                  insn * instruction, struct ofmt *output, efunc error,
 342                  ListGen * listgen)
 343 {
 344     const struct itemplate *temp;
 345     int j;
 346     enum match_result m;
 347     int64_t insn_end;
 348     int32_t itimes;
 349     int64_t start = offset;
 350     int64_t wsize;              /* size for DB etc. */
 351
 352     errfunc = error;            /* to pass to other functions */
 353     cpu = cp;
 354     outfmt = output;            /* likewise */
 355     list = listgen;             /* and again */
 356
 357     wsize = idata_bytes(instruction->opcode);
 358     if (wsize == -1)
 359         return 0;
 360
 361     if (wsize) {
 362         extop *e;
 363         int32_t t = instruction->times;
 364         if (t < 0)
 365             errfunc(ERR_PANIC,
 366                     "instruction->times < 0 (%ld) in assemble()", t);
 367
 368         while (t--) {           /* repeat TIMES times */
 369             list_for_each(e, instruction->eops) {
 370                 if (e->type == EOT_DB_NUMBER) {
 371                     if (wsize > 8) {
 372                         errfunc(ERR_NONFATAL,
 373                                 "integer supplied to a DT, DO or DY"
 374                                 " instruction");
 375                     } else {
 376                         out(offset, segment, &e->offset,
 377                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 378                         offset += wsize;
 379                     }
 380                 } else if (e->type == EOT_DB_STRING ||
 381                            e->type == EOT_DB_STRING_FREE) {
 382                     int align;
 383
 384                     out(offset, segment, e->stringval,
 385                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 386                     align = e->stringlen % wsize;
 387
 388                     if (align) {
 389                         align = wsize - align;
 390                         out(offset, segment, zero_buffer,
 391                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 392                     }
 393                     offset += e->stringlen + align;
 394                 }
 395             }
 396             if (t > 0 && t == instruction->times - 1) {
 397                 /*
 398                  * Dummy call to list->output to give the offset to the
 399                  * listing module.
 400                  */
 401                 list->output(offset, NULL, OUT_RAWDATA, 0);
 402                 list->uplevel(LIST_TIMES);
 403             }
 404         }
 405         if (instruction->times > 1)
 406             list->downlevel(LIST_TIMES);
 407         return offset - start;
 408     }
 409
 410     if (instruction->opcode == I_INCBIN) {
 411         const char *fname = instruction->eops->stringval;
 412         FILE *fp;
 413
 414         fp = fopen(fname, "rb");
 415         if (!fp) {
 416             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 417                   fname);
 418         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 419             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 420                   fname);
 421         } else {
 422             static char buf[4096];
 423             size_t t = instruction->times;
 424             size_t base = 0;
 425             size_t len;
 426
 427             len = ftell(fp);
 428             if (instruction->eops->next) {
 429                 base = instruction->eops->next->offset;
 430                 len -= base;
 431                 if (instruction->eops->next->next &&
 432                     len > (size_t)instruction->eops->next->next->offset)
 433                     len = (size_t)instruction->eops->next->next->offset;
 434             }
 435             /*
 436              * Dummy call to list->output to give the offset to the
 437              * listing module.
 438              */
 439             list->output(offset, NULL, OUT_RAWDATA, 0);
 440             list->uplevel(LIST_INCBIN);
 441             while (t--) {
 442                 size_t l;
 443
 444                 fseek(fp, base, SEEK_SET);
 445                 l = len;
 446                 while (l > 0) {
 447                     int32_t m;
 448                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 449                     if (!m) {
 450                         /*
 451                          * This shouldn't happen unless the file
 452                          * actually changes while we are reading
 453                          * it.
 454                          */
 455                         error(ERR_NONFATAL,
 456                               "`incbin': unexpected EOF while"
 457                               " reading file `%s'", fname);
 458                         t = 0;  /* Try to exit cleanly */
 459                         break;
 460                     }
 461                     out(offset, segment, buf, OUT_RAWDATA, m,
 462                         NO_SEG, NO_SEG);
 463                     l -= m;
 464                 }
 465             }
 466             list->downlevel(LIST_INCBIN);
 467             if (instruction->times > 1) {
 468                 /*
 469                  * Dummy call to list->output to give the offset to the
 470                  * listing module.
 471                  */
 472                 list->output(offset, NULL, OUT_RAWDATA, 0);
 473                 list->uplevel(LIST_TIMES);
 474                 list->downlevel(LIST_TIMES);
 475             }
 476             fclose(fp);
 477             return instruction->times * len;
 478         }
 479         return 0;               /* if we're here, there's an error */
 480     }
 481
 482     /* Check to see if we need an address-size prefix */
 483     add_asp(instruction, bits);
 484
 485     m = find_match(&temp, instruction, segment, offset, bits);
 486
 487     if (m == MOK_GOOD) {
 488         /* Matches! */
 489         int64_t insn_size = calcsize(segment, offset, bits,
 490                                      instruction, temp->code);
 491         itimes = instruction->times;
 492         if (insn_size < 0)  /* shouldn't be, on pass two */
 493             error(ERR_PANIC, "errors made it through from pass one");
 494         else
 495             while (itimes--) {
 496                 for (j = 0; j < MAXPREFIX; j++) {
 497                     uint8_t c = 0;
 498                     switch (instruction->prefixes[j]) {
 499                     case P_WAIT:
 500                         c = 0x9B;
 501                         break;
 502                     case P_LOCK:
 503                         c = 0xF0;
 504                         break;
 505                     case P_REPNE:
 506                     case P_REPNZ:
 507                         c = 0xF2;
 508                         break;
 509                     case P_REPE:
 510                     case P_REPZ:
 511                     case P_REP:
 512                         c = 0xF3;
 513                         break;
 514                     case R_CS:
 515                         if (bits == 64) {
 516                             error(ERR_WARNING | ERR_PASS2,
 517                                   "cs segment base generated, but will be ignored in 64-bit mode");
 518                         }
 519                         c = 0x2E;
 520                         break;
 521                     case R_DS:
 522                         if (bits == 64) {
 523                             error(ERR_WARNING | ERR_PASS2,
 524                                   "ds segment base generated, but will be ignored in 64-bit mode");
 525                         }
 526                         c = 0x3E;
 527                         break;
 528                     case R_ES:
 529                         if (bits == 64) {
 530                             error(ERR_WARNING | ERR_PASS2,
 531                                   "es segment base generated, but will be ignored in 64-bit mode");
 532                         }
 533                         c = 0x26;
 534                         break;
 535                     case R_FS:
 536                         c = 0x64;
 537                         break;
 538                     case R_GS:
 539                         c = 0x65;
 540                         break;
 541                     case R_SS:
 542                         if (bits == 64) {
 543                             error(ERR_WARNING | ERR_PASS2,
 544                                   "ss segment base generated, but will be ignored in 64-bit mode");
 545                         }
 546                         c = 0x36;
 547                         break;
 548                     case R_SEGR6:
 549                     case R_SEGR7:
 550                         error(ERR_NONFATAL,
 551                               "segr6 and segr7 cannot be used as prefixes");
 552                         break;
 553                     case P_A16:
 554                         if (bits == 64) {
 555                             error(ERR_NONFATAL,
 556                                   "16-bit addressing is not supported "
 557                                   "in 64-bit mode");
 558                         } else if (bits != 16)
 559                             c = 0x67;
 560                         break;
 561                     case P_A32:
 562                         if (bits != 32)
 563                             c = 0x67;
 564                         break;
 565                     case P_A64:
 566                         if (bits != 64) {
 567                             error(ERR_NONFATAL,
 568                                   "64-bit addressing is only supported "
 569                                   "in 64-bit mode");
 570                         }
 571                         break;
 572                     case P_ASP:
 573                         c = 0x67;
 574                         break;
 575                     case P_O16:
 576                         if (bits != 16)
 577                             c = 0x66;
 578                         break;
 579                     case P_O32:
 580                         if (bits == 16)
 581                             c = 0x66;
 582                         break;
 583                     case P_O64:
 584                         /* REX.W */
 585                         break;
 586                     case P_OSP:
 587                         c = 0x66;
 588                         break;
 589                     case P_none:
 590                         break;
 591                     default:
 592                         error(ERR_PANIC, "invalid instruction prefix");
 593                     }
 594                     if (c != 0) {
 595                         out(offset, segment, &c, OUT_RAWDATA, 1,
 596                             NO_SEG, NO_SEG);
 597                         offset++;
 598                     }
 599                 }
 600                 insn_end = offset + insn_size;
 601                 gencode(segment, offset, bits, instruction,
 602                         temp, insn_end);
 603                 offset += insn_size;
 604                 if (itimes > 0 && itimes == instruction->times - 1) {
 605                     /*
 606                      * Dummy call to list->output to give the offset to the
 607                      * listing module.
 608                      */
 609                     list->output(offset, NULL, OUT_RAWDATA, 0);
 610                     list->uplevel(LIST_TIMES);
 611                 }
 612             }
 613         if (instruction->times > 1)
 614             list->downlevel(LIST_TIMES);
 615         return offset - start;
 616     } else {
 617         /* No match */
 618         switch (m) {
 619         case MERR_OPSIZEMISSING:
 620             error(ERR_NONFATAL, "operation size not specified");
 621             break;
 622         case MERR_OPSIZEMISMATCH:
 623             error(ERR_NONFATAL, "mismatch in operand sizes");
 624             break;
 625         case MERR_BADCPU:
 626             error(ERR_NONFATAL, "no instruction for this cpu level");
 627             break;
 628         case MERR_BADMODE:
 629             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 630                   bits);
 631             break;
 632         default:
 633             error(ERR_NONFATAL,
 634                   "invalid combination of opcode and operands");
 635             break;
 636         }
 637     }
 638     return 0;
 639 }
 640
 641 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 642                   insn * instruction, efunc error)
 643 {
 644     const struct itemplate *temp;
 645     enum match_result m;
 646
 647     errfunc = error;            /* to pass to other functions */
 648     cpu = cp;
 649
 650     if (instruction->opcode == I_none)
 651         return 0;
 652
 653     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 654         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 655         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 656         instruction->opcode == I_DY) {
 657         extop *e;
 658         int32_t isize, osize, wsize;
 659
 660         isize = 0;
 661         wsize = idata_bytes(instruction->opcode);
 662
 663         list_for_each(e, instruction->eops) {
 664             int32_t align;
 665
 666             osize = 0;
 667             if (e->type == EOT_DB_NUMBER) {
 668                 osize = 1;
 669                 warn_overflow_const(e->offset, wsize);
 670             } else if (e->type == EOT_DB_STRING ||
 671                        e->type == EOT_DB_STRING_FREE)
 672                 osize = e->stringlen;
 673
 674             align = (-osize) % wsize;
 675             if (align < 0)
 676                 align += wsize;
 677             isize += osize + align;
 678         }
 679         return isize * instruction->times;
 680     }
 681
 682     if (instruction->opcode == I_INCBIN) {
 683         const char *fname = instruction->eops->stringval;
 684         FILE *fp;
 685         int64_t val = 0;
 686         size_t len;
 687
 688         fp = fopen(fname, "rb");
 689         if (!fp)
 690             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 691                   fname);
 692         else if (fseek(fp, 0L, SEEK_END) < 0)
 693             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 694                   fname);
 695         else {
 696             len = ftell(fp);
 697             if (instruction->eops->next) {
 698                 len -= instruction->eops->next->offset;
 699                 if (instruction->eops->next->next &&
 700                     len > (size_t)instruction->eops->next->next->offset) {
 701                     len = (size_t)instruction->eops->next->next->offset;
 702                 }
 703             }
 704             val = instruction->times * len;
 705         }
 706         if (fp)
 707             fclose(fp);
 708         return val;
 709     }
 710
 711     /* Check to see if we need an address-size prefix */
 712     add_asp(instruction, bits);
 713
 714     m = find_match(&temp, instruction, segment, offset, bits);
 715     if (m == MOK_GOOD) {
 716         /* we've matched an instruction. */
 717         int64_t isize;
 718         const uint8_t *codes = temp->code;
 719         int j;
 720
 721         isize = calcsize(segment, offset, bits, instruction, codes);
 722         if (isize < 0)
 723             return -1;
 724         for (j = 0; j < MAXPREFIX; j++) {
 725             switch (instruction->prefixes[j]) {
 726             case P_A16:
 727                 if (bits != 16)
 728                     isize++;
 729                 break;
 730             case P_A32:
 731                 if (bits != 32)
 732                     isize++;
 733                 break;
 734             case P_O16:
 735                 if (bits != 16)
 736                     isize++;
 737                 break;
 738             case P_O32:
 739                 if (bits == 16)
 740                     isize++;
 741                 break;
 742             case P_A64:
 743             case P_O64:
 744             case P_none:
 745                 break;
 746             default:
 747                 isize++;
 748                 break;
 749             }
 750         }
 751         return isize * instruction->times;
 752     } else {
 753         return -1;                  /* didn't match any instruction */
 754     }
 755 }
 756
 757 static bool possible_sbyte(operand *o)
 758 {
 759     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 760         !(o->opflags & OPFLAG_UNKNOWN) &&
 761         optimizing >= 0 && !(o->type & STRICT);
 762 }
 763
 764 /* check that opn[op]  is a signed byte of size 16 or 32 */
 765 static bool is_sbyte16(operand *o)
 766 {
 767     int16_t v;
 768
 769     if (!possible_sbyte(o))
 770         return false;
 771
 772     v = o->offset;
 773     return v >= -128 && v <= 127;
 774 }
 775
 776 static bool is_sbyte32(operand *o)
 777 {
 778     int32_t v;
 779
 780     if (!possible_sbyte(o))
 781         return false;
 782
 783     v = o->offset;
 784     return v >= -128 && v <= 127;
 785 }
 786
 787 /* Common construct */
 788 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 789
 790 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 791                         insn * ins, const uint8_t *codes)
 792 {
 793     int64_t length = 0;
 794     uint8_t c;
 795     int rex_mask = ~0;
 796     int op1, op2;
 797     struct operand *opx;
 798     uint8_t opex = 0;
 799
 800     ins->rex = 0;               /* Ensure REX is reset */
 801
 802     if (ins->prefixes[PPS_OSIZE] == P_O64)
 803         ins->rex |= REX_W;
 804
 805     (void)segment;              /* Don't warn that this parameter is unused */
 806     (void)offset;               /* Don't warn that this parameter is unused */
 807
 808     while (*codes) {
 809         c = *codes++;
 810         op1 = (c & 3) + ((opex & 1) << 2);
 811         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 812         opx = &ins->oprs[op1];
 813         opex = 0;               /* For the next iteration */
 814
 815         switch (c) {
 816         case 01:
 817         case 02:
 818         case 03:
 819         case 04:
 820             codes += c, length += c;
 821             break;
 822
 823         case 05:
 824         case 06:
 825         case 07:
 826             opex = c;
 827             break;
 828
 829         case4(010):
 830             ins->rex |=
 831                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 832             codes++, length++;
 833             break;
 834
 835         case4(014):
 836         case4(020):
 837         case4(024):
 838             length++;
 839             break;
 840
 841         case4(030):
 842             length += 2;
 843             break;
 844
 845         case4(034):
 846             if (opx->type & (BITS16 | BITS32 | BITS64))
 847                 length += (opx->type & BITS16) ? 2 : 4;
 848             else
 849                 length += (bits == 16) ? 2 : 4;
 850             break;
 851
 852         case4(040):
 853             length += 4;
 854             break;
 855
 856         case4(044):
 857             length += ins->addr_size >> 3;
 858             break;
 859
 860         case4(050):
 861             length++;
 862             break;
 863
 864         case4(054):
 865             length += 8; /* MOV reg64/imm */
 866             break;
 867
 868         case4(060):
 869             length += 2;
 870             break;
 871
 872         case4(064):
 873             if (opx->type & (BITS16 | BITS32 | BITS64))
 874                 length += (opx->type & BITS16) ? 2 : 4;
 875             else
 876                 length += (bits == 16) ? 2 : 4;
 877             break;
 878
 879         case4(070):
 880             length += 4;
 881             break;
 882
 883         case4(074):
 884             length += 2;
 885             break;
 886
 887         case4(0140):
 888             length += is_sbyte16(opx) ? 1 : 2;
 889             break;
 890
 891         case4(0144):
 892             codes++;
 893             length++;
 894             break;
 895
 896         case4(0150):
 897             length += is_sbyte32(opx) ? 1 : 4;
 898             break;
 899
 900         case4(0154):
 901             codes++;
 902             length++;
 903             break;
 904
 905         case4(0160):
 906             length++;
 907             ins->rex |= REX_D;
 908             ins->drexdst = regval(opx);
 909             break;
 910
 911         case4(0164):
 912             length++;
 913             ins->rex |= REX_D|REX_OC;
 914             ins->drexdst = regval(opx);
 915             break;
 916
 917         case 0171:
 918             break;
 919
 920         case 0172:
 921         case 0173:
 922         case 0174:
 923             codes++;
 924             length++;
 925             break;
 926
 927         case4(0250):
 928             length += is_sbyte32(opx) ? 1 : 4;
 929             break;
 930
 931         case4(0254):
 932             length += 4;
 933             break;
 934
 935         case4(0260):
 936             ins->rex |= REX_V;
 937             ins->drexdst = regval(opx);
 938             ins->vex_cm = *codes++;
 939             ins->vex_wlp = *codes++;
 940             break;
 941
 942         case 0270:
 943             ins->rex |= REX_V;
 944             ins->drexdst = 0;
 945             ins->vex_cm = *codes++;
 946             ins->vex_wlp = *codes++;
 947             break;
 948
 949         case4(0274):
 950             length++;
 951             break;
 952
 953         case4(0300):
 954             break;
 955
 956         case 0310:
 957             if (bits == 64)
 958                 return -1;
 959             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 960             break;
 961
 962         case 0311:
 963             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 964             break;
 965
 966         case 0312:
 967             break;
 968
 969         case 0313:
 970             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 971                 has_prefix(ins, PPS_ASIZE, P_A32))
 972                 return -1;
 973             break;
 974
 975         case4(0314):
 976             break;
 977
 978         case 0320:
 979             length += (bits != 16);
 980             break;
 981
 982         case 0321:
 983             length += (bits == 16);
 984             break;
 985
 986         case 0322:
 987             break;
 988
 989         case 0323:
 990             rex_mask &= ~REX_W;
 991             break;
 992
 993         case 0324:
 994             ins->rex |= REX_W;
 995             break;
 996
 997         case 0325:
 998             ins->rex |= REX_NH;
 999             break;
1000
1001         case 0330:
1002             codes++, length++;
1003             break;
1004
1005         case 0331:
1006             break;
1007
1008         case 0332:
1009         case 0333:
1010             length++;
1011             break;
1012
1013         case 0334:
1014             ins->rex |= REX_L;
1015             break;
1016
1017         case 0335:
1018             break;
1019
1020         case 0336:
1021             if (!ins->prefixes[PPS_LREP])
1022                 ins->prefixes[PPS_LREP] = P_REP;
1023             break;
1024
1025         case 0337:
1026             if (!ins->prefixes[PPS_LREP])
1027                 ins->prefixes[PPS_LREP] = P_REPNE;
1028             break;
1029
1030         case 0340:
1031             if (ins->oprs[0].segment != NO_SEG)
1032                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1033                         " quantity of BSS space");
1034             else
1035                 length += ins->oprs[0].offset;
1036             break;
1037
1038         case 0341:
1039             if (!ins->prefixes[PPS_WAIT])
1040                 ins->prefixes[PPS_WAIT] = P_WAIT;
1041             break;
1042
1043         case4(0344):
1044             length++;
1045             break;
1046
1047         case 0360:
1048             break;
1049
1050         case 0361:
1051         case 0362:
1052         case 0363:
1053             length++;
1054             break;
1055
1056         case 0364:
1057         case 0365:
1058             break;
1059
1060         case 0366:
1061         case 0367:
1062             length++;
1063             break;
1064
1065         case 0370:
1066         case 0371:
1067         case 0372:
1068             break;
1069
1070         case 0373:
1071             length++;
1072             break;
1073
1074         case4(0100):
1075         case4(0110):
1076         case4(0120):
1077         case4(0130):
1078         case4(0200):
1079         case4(0204):
1080         case4(0210):
1081         case4(0214):
1082         case4(0220):
1083         case4(0224):
1084         case4(0230):
1085         case4(0234):
1086             {
1087                 ea ea_data;
1088                 int rfield;
1089                 opflags_t rflags;
1090                 struct operand *opy = &ins->oprs[op2];
1091
1092                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1093
1094                 if (c <= 0177) {
1095                     /* pick rfield from operand b (opx) */
1096                     rflags = regflag(opx);
1097                     rfield = nasm_regvals[opx->basereg];
1098                 } else {
1099                     rflags = 0;
1100                     rfield = c & 7;
1101                 }
1102                 if (!process_ea(opy, &ea_data, bits,
1103                                 ins->addr_size, rfield, rflags)) {
1104                     errfunc(ERR_NONFATAL, "invalid effective address");
1105                     return -1;
1106                 } else {
1107                     ins->rex |= ea_data.rex;
1108                     length += ea_data.size;
1109                 }
1110             }
1111             break;
1112
1113         default:
1114             errfunc(ERR_PANIC, "internal instruction table corrupt"
1115                     ": instruction code \\%o (0x%02X) given", c, c);
1116             break;
1117         }
1118     }
1119
1120     ins->rex &= rex_mask;
1121
1122     if (ins->rex & REX_NH) {
1123         if (ins->rex & REX_H) {
1124             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1125             return -1;
1126         }
1127         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1128     }
1129
1130     if (ins->rex & REX_V) {
1131         int bad32 = REX_R|REX_W|REX_X|REX_B;
1132
1133         if (ins->rex & REX_H) {
1134             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1135             return -1;
1136         }
1137         switch (ins->vex_wlp & 060) {
1138         case 000:
1139         case 040:
1140             ins->rex &= ~REX_W;
1141             break;
1142         case 020:
1143             ins->rex |= REX_W;
1144             bad32 &= ~REX_W;
1145             break;
1146         case 060:
1147             /* Follow REX_W */
1148             break;
1149         }
1150
1151         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1152             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1153             return -1;
1154         }
1155         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1156             length += 3;
1157         else
1158             length += 2;
1159     } else if (ins->rex & REX_D) {
1160         if (ins->rex & REX_H) {
1161             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1162             return -1;
1163         }
1164         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1165                            ins->drexdst > 7)) {
1166             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1167             return -1;
1168         }
1169         length++;
1170     } else if (ins->rex & REX_REAL) {
1171         if (ins->rex & REX_H) {
1172             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1173             return -1;
1174         } else if (bits == 64) {
1175             length++;
1176         } else if ((ins->rex & REX_L) &&
1177                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1178                    cpu >= IF_X86_64) {
1179             /* LOCK-as-REX.R */
1180             assert_no_prefix(ins, PPS_LREP);
1181             length++;
1182         } else {
1183             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1184             return -1;
1185         }
1186     }
1187
1188     return length;
1189 }
1190
1191 #define EMIT_REX()                                                              \
1192     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1193         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1194         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1195         ins->rex = 0;                                                           \
1196         offset += 1;                                                            \
1197     }
1198
1199 static void gencode(int32_t segment, int64_t offset, int bits,
1200                     insn * ins, const struct itemplate *temp,
1201                     int64_t insn_end)
1202 {
1203     static char condval[] = {   /* conditional opcodes */
1204         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1205         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1206         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1207     };
1208     uint8_t c;
1209     uint8_t bytes[4];
1210     int64_t size;
1211     int64_t data;
1212     int op1, op2;
1213     struct operand *opx;
1214     const uint8_t *codes = temp->code;
1215     uint8_t opex = 0;
1216
1217     while (*codes) {
1218         c = *codes++;
1219         op1 = (c & 3) + ((opex & 1) << 2);
1220         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1221         opx = &ins->oprs[op1];
1222         opex = 0;                /* For the next iteration */
1223
1224         switch (c) {
1225         case 01:
1226         case 02:
1227         case 03:
1228         case 04:
1229             EMIT_REX();
1230             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1231             codes += c;
1232             offset += c;
1233             break;
1234
1235         case 05:
1236         case 06:
1237         case 07:
1238             opex = c;
1239             break;
1240
1241         case4(010):
1242             EMIT_REX();
1243             bytes[0] = *codes++ + (regval(opx) & 7);
1244             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1245             offset += 1;
1246             break;
1247
1248         case4(014):
1249             /*
1250              * The test for BITS8 and SBYTE here is intended to avoid
1251              * warning on optimizer actions due to SBYTE, while still
1252              * warn on explicit BYTE directives.  Also warn, obviously,
1253              * if the optimizer isn't enabled.
1254              */
1255             if (((opx->type & BITS8) ||
1256                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1257                 (opx->offset < -128 || opx->offset > 127)) {
1258                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1259                         "signed byte value exceeds bounds");
1260             }
1261             if (opx->segment != NO_SEG) {
1262                 data = opx->offset;
1263                 out(offset, segment, &data, OUT_ADDRESS, 1,
1264                     opx->segment, opx->wrt);
1265             } else {
1266                 bytes[0] = opx->offset;
1267                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1268                     NO_SEG);
1269             }
1270             offset += 1;
1271             break;
1272
1273         case4(020):
1274             if (opx->offset < -256 || opx->offset > 255) {
1275                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1276                         "byte value exceeds bounds");
1277             }
1278             if (opx->segment != NO_SEG) {
1279                 data = opx->offset;
1280                 out(offset, segment, &data, OUT_ADDRESS, 1,
1281                     opx->segment, opx->wrt);
1282             } else {
1283                 bytes[0] = opx->offset;
1284                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1285                     NO_SEG);
1286             }
1287             offset += 1;
1288             break;
1289
1290         case4(024):
1291             if (opx->offset < 0 || opx->offset > 255)
1292                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1293                         "unsigned byte value exceeds bounds");
1294             if (opx->segment != NO_SEG) {
1295                 data = opx->offset;
1296                 out(offset, segment, &data, OUT_ADDRESS, 1,
1297                     opx->segment, opx->wrt);
1298             } else {
1299                 bytes[0] = opx->offset;
1300                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1301                     NO_SEG);
1302             }
1303             offset += 1;
1304             break;
1305
1306         case4(030):
1307             warn_overflow_opd(opx, 2);
1308             data = opx->offset;
1309             out(offset, segment, &data, OUT_ADDRESS, 2,
1310                 opx->segment, opx->wrt);
1311             offset += 2;
1312             break;
1313
1314         case4(034):
1315             if (opx->type & (BITS16 | BITS32))
1316                 size = (opx->type & BITS16) ? 2 : 4;
1317             else
1318                 size = (bits == 16) ? 2 : 4;
1319             warn_overflow_opd(opx, size);
1320             data = opx->offset;
1321             out(offset, segment, &data, OUT_ADDRESS, size,
1322                 opx->segment, opx->wrt);
1323             offset += size;
1324             break;
1325
1326         case4(040):
1327             warn_overflow_opd(opx, 4);
1328             data = opx->offset;
1329             out(offset, segment, &data, OUT_ADDRESS, 4,
1330                 opx->segment, opx->wrt);
1331             offset += 4;
1332             break;
1333
1334         case4(044):
1335             data = opx->offset;
1336             size = ins->addr_size >> 3;
1337             warn_overflow_opd(opx, size);
1338             out(offset, segment, &data, OUT_ADDRESS, size,
1339                 opx->segment, opx->wrt);
1340             offset += size;
1341             break;
1342
1343         case4(050):
1344             if (opx->segment != segment) {
1345                 data = opx->offset;
1346                 out(offset, segment, &data,
1347                     OUT_REL1ADR, insn_end - offset,
1348                     opx->segment, opx->wrt);
1349             } else {
1350                 data = opx->offset - insn_end;
1351                 if (data > 127 || data < -128)
1352                     errfunc(ERR_NONFATAL, "short jump is out of range");
1353                 out(offset, segment, &data,
1354                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1355             }
1356             offset += 1;
1357             break;
1358
1359         case4(054):
1360             data = (int64_t)opx->offset;
1361             out(offset, segment, &data, OUT_ADDRESS, 8,
1362                 opx->segment, opx->wrt);
1363             offset += 8;
1364             break;
1365
1366         case4(060):
1367             if (opx->segment != segment) {
1368                 data = opx->offset;
1369                 out(offset, segment, &data,
1370                     OUT_REL2ADR, insn_end - offset,
1371                     opx->segment, opx->wrt);
1372             } else {
1373                 data = opx->offset - insn_end;
1374                 out(offset, segment, &data,
1375                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1376             }
1377             offset += 2;
1378             break;
1379
1380         case4(064):
1381             if (opx->type & (BITS16 | BITS32 | BITS64))
1382                 size = (opx->type & BITS16) ? 2 : 4;
1383             else
1384                 size = (bits == 16) ? 2 : 4;
1385             if (opx->segment != segment) {
1386                 data = opx->offset;
1387                 out(offset, segment, &data,
1388                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1389                     insn_end - offset, opx->segment, opx->wrt);
1390             } else {
1391                 data = opx->offset - insn_end;
1392                 out(offset, segment, &data,
1393                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1394             }
1395             offset += size;
1396             break;
1397
1398         case4(070):
1399             if (opx->segment != segment) {
1400                 data = opx->offset;
1401                 out(offset, segment, &data,
1402                     OUT_REL4ADR, insn_end - offset,
1403                     opx->segment, opx->wrt);
1404             } else {
1405                 data = opx->offset - insn_end;
1406                 out(offset, segment, &data,
1407                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1408             }
1409             offset += 4;
1410             break;
1411
1412         case4(074):
1413             if (opx->segment == NO_SEG)
1414                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1415                         " relocatable");
1416             data = 0;
1417             out(offset, segment, &data, OUT_ADDRESS, 2,
1418                 outfmt->segbase(1 + opx->segment),
1419                 opx->wrt);
1420             offset += 2;
1421             break;
1422
1423         case4(0140):
1424             data = opx->offset;
1425             warn_overflow_opd(opx, 2);
1426             if (is_sbyte16(opx)) {
1427                 bytes[0] = data;
1428                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1429                     NO_SEG);
1430                 offset++;
1431             } else {
1432                 out(offset, segment, &data, OUT_ADDRESS, 2,
1433                     opx->segment, opx->wrt);
1434                 offset += 2;
1435             }
1436             break;
1437
1438         case4(0144):
1439             EMIT_REX();
1440             bytes[0] = *codes++;
1441             if (is_sbyte16(opx))
1442                 bytes[0] |= 2;  /* s-bit */
1443             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1444             offset++;
1445             break;
1446
1447         case4(0150):
1448             data = opx->offset;
1449             warn_overflow_opd(opx, 4);
1450             if (is_sbyte32(opx)) {
1451                 bytes[0] = data;
1452                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1453                     NO_SEG);
1454                 offset++;
1455             } else {
1456                 out(offset, segment, &data, OUT_ADDRESS, 4,
1457                     opx->segment, opx->wrt);
1458                 offset += 4;
1459             }
1460             break;
1461
1462         case4(0154):
1463             EMIT_REX();
1464             bytes[0] = *codes++;
1465             if (is_sbyte32(opx))
1466                 bytes[0] |= 2;  /* s-bit */
1467             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1468             offset++;
1469             break;
1470
1471         case4(0160):
1472         case4(0164):
1473             break;
1474
1475         case 0171:
1476             bytes[0] =
1477                 (ins->drexdst << 4) |
1478                 (ins->rex & REX_OC ? 0x08 : 0) |
1479                 (ins->rex & (REX_R|REX_X|REX_B));
1480             ins->rex = 0;
1481             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1482             offset++;
1483             break;
1484
1485         case 0172:
1486             c = *codes++;
1487             opx = &ins->oprs[c >> 3];
1488             bytes[0] = nasm_regvals[opx->basereg] << 4;
1489             opx = &ins->oprs[c & 7];
1490             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1491                 errfunc(ERR_NONFATAL,
1492                         "non-absolute expression not permitted as argument %d",
1493                         c & 7);
1494             } else {
1495                 if (opx->offset & ~15) {
1496                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1497                             "four-bit argument exceeds bounds");
1498                 }
1499                 bytes[0] |= opx->offset & 15;
1500             }
1501             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1502             offset++;
1503             break;
1504
1505         case 0173:
1506             c = *codes++;
1507             opx = &ins->oprs[c >> 4];
1508             bytes[0] = nasm_regvals[opx->basereg] << 4;
1509             bytes[0] |= c & 15;
1510             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1511             offset++;
1512             break;
1513
1514         case 0174:
1515             c = *codes++;
1516             opx = &ins->oprs[c];
1517             bytes[0] = nasm_regvals[opx->basereg] << 4;
1518             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1519             offset++;
1520             break;
1521
1522         case4(0250):
1523             data = opx->offset;
1524             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1525                 (int32_t)data != (int64_t)data) {
1526                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1527                         "signed dword immediate exceeds bounds");
1528             }
1529             if (is_sbyte32(opx)) {
1530                 bytes[0] = data;
1531                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1532                     NO_SEG);
1533                 offset++;
1534             } else {
1535                 out(offset, segment, &data, OUT_ADDRESS, 4,
1536                     opx->segment, opx->wrt);
1537                 offset += 4;
1538             }
1539             break;
1540
1541         case4(0254):
1542             data = opx->offset;
1543             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1544                 (int32_t)data != (int64_t)data) {
1545                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1546                         "signed dword immediate exceeds bounds");
1547             }
1548             out(offset, segment, &data, OUT_ADDRESS, 4,
1549                 opx->segment, opx->wrt);
1550             offset += 4;
1551             break;
1552
1553         case4(0260):
1554         case 0270:
1555             codes += 2;
1556             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1557                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1558                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1559                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1560                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1561                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1562                 offset += 3;
1563             } else {
1564                 bytes[0] = 0xc5;
1565                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1566                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1567                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1568                 offset += 2;
1569             }
1570             break;
1571
1572         case4(0274):
1573         {
1574             uint64_t uv, um;
1575             int s;
1576
1577             if (ins->rex & REX_W)
1578                 s = 64;
1579             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1580                 s = 16;
1581             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1582                 s = 32;
1583             else
1584                 s = bits;
1585
1586             um = (uint64_t)2 << (s-1);
1587             uv = opx->offset;
1588
1589             if (uv > 127 && uv < (uint64_t)-128 &&
1590                 (uv < um-128 || uv > um-1)) {
1591                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1592                         "signed byte value exceeds bounds");
1593             }
1594             if (opx->segment != NO_SEG) {
1595                 data = uv;
1596                 out(offset, segment, &data, OUT_ADDRESS, 1,
1597                     opx->segment, opx->wrt);
1598             } else {
1599                 bytes[0] = uv;
1600                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1601                     NO_SEG);
1602             }
1603             offset += 1;
1604             break;
1605         }
1606
1607         case4(0300):
1608             break;
1609
1610         case 0310:
1611             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1612                 *bytes = 0x67;
1613                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1614                 offset += 1;
1615             } else
1616                 offset += 0;
1617             break;
1618
1619         case 0311:
1620             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1621                 *bytes = 0x67;
1622                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1623                 offset += 1;
1624             } else
1625                 offset += 0;
1626             break;
1627
1628         case 0312:
1629             break;
1630
1631         case 0313:
1632             ins->rex = 0;
1633             break;
1634
1635         case4(0314):
1636             break;
1637
1638         case 0320:
1639             if (bits != 16) {
1640                 *bytes = 0x66;
1641                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1642                 offset += 1;
1643             } else
1644                 offset += 0;
1645             break;
1646
1647         case 0321:
1648             if (bits == 16) {
1649                 *bytes = 0x66;
1650                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1651                 offset += 1;
1652             } else
1653                 offset += 0;
1654             break;
1655
1656         case 0322:
1657         case 0323:
1658             break;
1659
1660         case 0324:
1661             ins->rex |= REX_W;
1662             break;
1663
1664         case 0325:
1665             break;
1666
1667         case 0330:
1668             *bytes = *codes++ ^ condval[ins->condition];
1669             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1670             offset += 1;
1671             break;
1672
1673         case 0331:
1674             break;
1675
1676         case 0332:
1677         case 0333:
1678             *bytes = c - 0332 + 0xF2;
1679             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1680             offset += 1;
1681             break;
1682
1683         case 0334:
1684             if (ins->rex & REX_R) {
1685                 *bytes = 0xF0;
1686                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1687                 offset += 1;
1688             }
1689             ins->rex &= ~(REX_L|REX_R);
1690             break;
1691
1692         case 0335:
1693             break;
1694
1695         case 0336:
1696         case 0337:
1697             break;
1698
1699         case 0340:
1700             if (ins->oprs[0].segment != NO_SEG)
1701                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1702             else {
1703                 int64_t size = ins->oprs[0].offset;
1704                 if (size > 0)
1705                     out(offset, segment, NULL,
1706                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1707                 offset += size;
1708             }
1709             break;
1710
1711         case 0341:
1712             break;
1713
1714         case 0344:
1715         case 0345:
1716             bytes[0] = c & 1;
1717             switch (ins->oprs[0].basereg) {
1718             case R_CS:
1719                 bytes[0] += 0x0E;
1720                 break;
1721             case R_DS:
1722                 bytes[0] += 0x1E;
1723                 break;
1724             case R_ES:
1725                 bytes[0] += 0x06;
1726                 break;
1727             case R_SS:
1728                 bytes[0] += 0x16;
1729                 break;
1730             default:
1731                 errfunc(ERR_PANIC,
1732                         "bizarre 8086 segment register received");
1733             }
1734             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1735             offset++;
1736             break;
1737
1738         case 0346:
1739         case 0347:
1740             bytes[0] = c & 1;
1741             switch (ins->oprs[0].basereg) {
1742             case R_FS:
1743                 bytes[0] += 0xA0;
1744                 break;
1745             case R_GS:
1746                 bytes[0] += 0xA8;
1747                 break;
1748             default:
1749                 errfunc(ERR_PANIC,
1750                         "bizarre 386 segment register received");
1751             }
1752             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1753             offset++;
1754             break;
1755
1756         case 0360:
1757             break;
1758
1759         case 0361:
1760             bytes[0] = 0x66;
1761             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1762             offset += 1;
1763             break;
1764
1765         case 0362:
1766         case 0363:
1767             bytes[0] = c - 0362 + 0xf2;
1768             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1769             offset += 1;
1770             break;
1771
1772         case 0364:
1773         case 0365:
1774             break;
1775
1776         case 0366:
1777         case 0367:
1778             *bytes = c - 0366 + 0x66;
1779             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1780             offset += 1;
1781             break;
1782
1783         case 0370:
1784         case 0371:
1785         case 0372:
1786             break;
1787
1788         case 0373:
1789             *bytes = bits == 16 ? 3 : 5;
1790             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1791             offset += 1;
1792             break;
1793
1794         case4(0100):
1795         case4(0110):
1796         case4(0120):
1797         case4(0130):
1798         case4(0200):
1799         case4(0204):
1800         case4(0210):
1801         case4(0214):
1802         case4(0220):
1803         case4(0224):
1804         case4(0230):
1805         case4(0234):
1806             {
1807                 ea ea_data;
1808                 int rfield;
1809                 opflags_t rflags;
1810                 uint8_t *p;
1811                 int32_t s;
1812                 enum out_type type;
1813                 struct operand *opy = &ins->oprs[op2];
1814
1815                 if (c <= 0177) {
1816                     /* pick rfield from operand b (opx) */
1817                     rflags = regflag(opx);
1818                     rfield = nasm_regvals[opx->basereg];
1819                 } else {
1820                     /* rfield is constant */
1821                     rflags = 0;
1822                     rfield = c & 7;
1823                 }
1824
1825                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1826                                 rfield, rflags)) {
1827                     errfunc(ERR_NONFATAL, "invalid effective address");
1828                 }
1829
1830
1831                 p = bytes;
1832                 *p++ = ea_data.modrm;
1833                 if (ea_data.sib_present)
1834                     *p++ = ea_data.sib;
1835
1836                 /* DREX suffixes come between the SIB and the displacement */
1837                 if (ins->rex & REX_D) {
1838                     *p++ = (ins->drexdst << 4) |
1839                            (ins->rex & REX_OC ? 0x08 : 0) |
1840                            (ins->rex & (REX_R|REX_X|REX_B));
1841                     ins->rex = 0;
1842                 }
1843
1844                 s = p - bytes;
1845                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1846
1847                 /*
1848                  * Make sure the address gets the right offset in case
1849                  * the line breaks in the .lst file (BR 1197827)
1850                  */
1851                 offset += s;
1852                 s = 0;
1853
1854                 switch (ea_data.bytes) {
1855                 case 0:
1856                     break;
1857                 case 1:
1858                 case 2:
1859                 case 4:
1860                 case 8:
1861                     data = opy->offset;
1862                     s += ea_data.bytes;
1863                     if (ea_data.rip) {
1864                         if (opy->segment == segment) {
1865                             data -= insn_end;
1866                             if (overflow_signed(data, ea_data.bytes))
1867                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1868                             out(offset, segment, &data, OUT_ADDRESS,
1869                                 ea_data.bytes, NO_SEG, NO_SEG);
1870                         } else {
1871                             /* overflow check in output/linker? */
1872                             out(offset, segment, &data,        OUT_REL4ADR,
1873                                 insn_end - offset, opy->segment, opy->wrt);
1874                         }
1875                     } else {
1876                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1877                             signed_bits(opy->offset, ins->addr_size) !=
1878                             signed_bits(opy->offset, ea_data.bytes * 8))
1879                             warn_overflow(ERR_PASS2, ea_data.bytes);
1880
1881                         type = OUT_ADDRESS;
1882                         out(offset, segment, &data, OUT_ADDRESS,
1883                             ea_data.bytes, opy->segment, opy->wrt);
1884                     }
1885                     break;
1886                 default:
1887                     /* Impossible! */
1888                     errfunc(ERR_PANIC,
1889                             "Invalid amount of bytes (%d) for offset?!",
1890                             ea_data.bytes);
1891                     break;
1892                 }
1893                 offset += s;
1894             }
1895             break;
1896
1897         default:
1898             errfunc(ERR_PANIC, "internal instruction table corrupt"
1899                     ": instruction code \\%o (0x%02X) given", c, c);
1900             break;
1901         }
1902     }
1903 }
1904
1905 static opflags_t regflag(const operand * o)
1906 {
1907     if (!is_register(o->basereg))
1908         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1909     return nasm_reg_flags[o->basereg];
1910 }
1911
1912 static int32_t regval(const operand * o)
1913 {
1914     if (!is_register(o->basereg))
1915         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1916     return nasm_regvals[o->basereg];
1917 }
1918
1919 static int op_rexflags(const operand * o, int mask)
1920 {
1921     opflags_t flags;
1922     int val;
1923
1924     if (!is_register(o->basereg))
1925         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1926
1927     flags = nasm_reg_flags[o->basereg];
1928     val = nasm_regvals[o->basereg];
1929
1930     return rexflags(val, flags, mask);
1931 }
1932
1933 static int rexflags(int val, opflags_t flags, int mask)
1934 {
1935     int rex = 0;
1936
1937     if (val >= 8)
1938         rex |= REX_B|REX_X|REX_R;
1939     if (flags & BITS64)
1940         rex |= REX_W;
1941     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1942         rex |= REX_H;
1943     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1944         rex |= REX_P;
1945
1946     return rex & mask;
1947 }
1948
1949 static enum match_result find_match(const struct itemplate **tempp,
1950                                     insn *instruction,
1951                                     int32_t segment, int64_t offset, int bits)
1952 {
1953     const struct itemplate *temp;
1954     enum match_result m, merr;
1955     opflags_t xsizeflags[MAX_OPERANDS];
1956     bool opsizemissing = false;
1957     int i;
1958
1959     for (i = 0; i < instruction->operands; i++)
1960         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1961
1962     merr = MERR_INVALOP;
1963
1964     for (temp = nasm_instructions[instruction->opcode];
1965          temp->opcode != I_none; temp++) {
1966         m = matches(temp, instruction, bits);
1967         if (m == MOK_JUMP) {
1968             if (jmp_match(segment, offset, bits, instruction, temp->code))
1969                 m = MOK_GOOD;
1970             else
1971                 m = MERR_INVALOP;
1972         } else if (m == MERR_OPSIZEMISSING &&
1973                    (temp->flags & IF_SMASK) != IF_SX) {
1974             /*
1975              * Missing operand size and a candidate for fuzzy matching...
1976              */
1977             for (i = 0; i < temp->operands; i++) {
1978                 if ((temp->opd[i] & SAME_AS) == 0)
1979                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1980             }
1981             opsizemissing = true;
1982         }
1983         if (m > merr)
1984             merr = m;
1985         if (merr == MOK_GOOD)
1986             goto done;
1987     }
1988
1989     /* No match, but see if we can get a fuzzy operand size match... */
1990     if (!opsizemissing)
1991         goto done;
1992
1993     for (i = 0; i < instruction->operands; i++) {
1994         /*
1995          * We ignore extrinsic operand sizes on registers, so we should
1996          * never try to fuzzy-match on them.  This also resolves the case
1997          * when we have e.g. "xmmrm128" in two different positions.
1998          */
1999         if (is_class(REGISTER, instruction->oprs[i].type))
2000             continue;
2001
2002         /* This tests if xsizeflags[i] has more than one bit set */
2003         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2004             goto done;                /* No luck */
2005
2006         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2007     }
2008
2009     /* Try matching again... */
2010     for (temp = nasm_instructions[instruction->opcode];
2011          temp->opcode != I_none; temp++) {
2012         m = matches(temp, instruction, bits);
2013         if (m == MOK_JUMP) {
2014             if (jmp_match(segment, offset, bits, instruction, temp->code))
2015                 m = MOK_GOOD;
2016             else
2017                 m = MERR_INVALOP;
2018         }
2019         if (m > merr)
2020             merr = m;
2021         if (merr == MOK_GOOD)
2022             goto done;
2023     }
2024
2025 done:
2026     *tempp = temp;
2027     return merr;
2028 }
2029
2030 static enum match_result matches(const struct itemplate *itemp,
2031                                  insn *instruction, int bits)
2032 {
2033     int i, size[MAX_OPERANDS], asize, oprs;
2034     bool opsizemissing = false;
2035
2036     /*
2037      * Check the opcode
2038      */
2039     if (itemp->opcode != instruction->opcode)
2040         return MERR_INVALOP;
2041
2042     /*
2043      * Count the operands
2044      */
2045     if (itemp->operands != instruction->operands)
2046         return MERR_INVALOP;
2047
2048     /*
2049      * Check that no spurious colons or TOs are present
2050      */
2051     for (i = 0; i < itemp->operands; i++)
2052         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2053             return MERR_INVALOP;
2054
2055     /*
2056      * Process size flags
2057      */
2058     switch (itemp->flags & IF_SMASK) {
2059     case IF_SB:
2060         asize = BITS8;
2061         break;
2062     case IF_SW:
2063         asize = BITS16;
2064         break;
2065     case IF_SD:
2066         asize = BITS32;
2067         break;
2068     case IF_SQ:
2069         asize = BITS64;
2070         break;
2071     case IF_SO:
2072         asize = BITS128;
2073         break;
2074     case IF_SY:
2075         asize = BITS256;
2076         break;
2077     case IF_SZ:
2078         switch (bits) {
2079         case 16:
2080             asize = BITS16;
2081             break;
2082         case 32:
2083             asize = BITS32;
2084             break;
2085         case 64:
2086             asize = BITS64;
2087             break;
2088         default:
2089             asize = 0;
2090             break;
2091         }
2092         break;
2093     default:
2094         asize = 0;
2095         break;
2096     }
2097
2098     if (itemp->flags & IF_ARMASK) {
2099         /* S- flags only apply to a specific operand */
2100         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2101         memset(size, 0, sizeof size);
2102         size[i] = asize;
2103     } else {
2104         /* S- flags apply to all operands */
2105         for (i = 0; i < MAX_OPERANDS; i++)
2106             size[i] = asize;
2107     }
2108
2109     /*
2110      * Check that the operand flags all match up,
2111      * it's a bit tricky so lets be verbose:
2112      *
2113      * 1) Find out the size of operand. If instruction
2114      *    doesn't have one specified -- we're trying to
2115      *    guess it either from template (IF_S* flag) or
2116      *    from code bits.
2117      *
2118      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2119      *    (ie the same operand as was specified somewhere in template, and
2120      *    this referred operand index is being achieved via ~SAME_AS)
2121      *    we are to be sure that both registers (in template and instruction)
2122      *    do exactly match.
2123      *
2124      * 3) If template operand do not match the instruction OR
2125      *    template has an operand size specified AND this size differ
2126      *    from which instruction has (perhaps we got it from code bits)
2127      *    we are:
2128      *      a)  Check that only size of instruction and operand is differ
2129      *          other characteristics do match
2130      *      b)  Perhaps it's a register specified in instruction so
2131      *          for such a case we just mark that operand as "size
2132      *          missing" and this will turn on fuzzy operand size
2133      *          logic facility (handled by a caller)
2134      */
2135     for (i = 0; i < itemp->operands; i++) {
2136         opflags_t type = instruction->oprs[i].type;
2137         if (!(type & SIZE_MASK))
2138             type |= size[i];
2139
2140         if (itemp->opd[i] & SAME_AS) {
2141             int j = itemp->opd[i] & ~SAME_AS;
2142             if (type != instruction->oprs[j].type ||
2143                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2144                 return MERR_INVALOP;
2145         } else if (itemp->opd[i] & ~type ||
2146             ((itemp->opd[i] & SIZE_MASK) &&
2147              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2148             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2149                 return MERR_INVALOP;
2150             } else if (!is_class(REGISTER, type)) {
2151                 /*
2152                  * Note: we don't honor extrinsic operand sizes for registers,
2153                  * so "missing operand size" for a register should be
2154                  * considered a wildcard match rather than an error.
2155                  */
2156                 opsizemissing = true;
2157             }
2158         }
2159     }
2160
2161     if (opsizemissing)
2162         return MERR_OPSIZEMISSING;
2163
2164     /*
2165      * Check operand sizes
2166      */
2167     if (itemp->flags & (IF_SM | IF_SM2)) {
2168         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2169         for (i = 0; i < oprs; i++) {
2170             asize = itemp->opd[i] & SIZE_MASK;
2171             if (asize) {
2172                 for (i = 0; i < oprs; i++)
2173                     size[i] = asize;
2174                 break;
2175             }
2176         }
2177     } else {
2178         oprs = itemp->operands;
2179     }
2180
2181     for (i = 0; i < itemp->operands; i++) {
2182         if (!(itemp->opd[i] & SIZE_MASK) &&
2183             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2184             return MERR_OPSIZEMISMATCH;
2185     }
2186
2187     /*
2188      * Check template is okay at the set cpu level
2189      */
2190     if (((itemp->flags & IF_PLEVEL) > cpu))
2191         return MERR_BADCPU;
2192
2193     /*
2194      * Verify the appropriate long mode flag.
2195      */
2196     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2197         return MERR_BADMODE;
2198
2199     /*
2200      * Check if special handling needed for Jumps
2201      */
2202     if ((itemp->code[0] & 0374) == 0370)
2203         return MOK_JUMP;
2204
2205     return MOK_GOOD;
2206 }
2207
2208 static ea *process_ea(operand * input, ea * output, int bits,
2209                       int addrbits, int rfield, opflags_t rflags)
2210 {
2211     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2212
2213     output->rip = false;
2214
2215     /* REX flags for the rfield operand */
2216     output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2217
2218     if (is_class(REGISTER, input->type)) {  /* register direct */
2219         int i;
2220         opflags_t f;
2221
2222         if (!is_register(input->basereg))
2223             return NULL;
2224         f = regflag(input);
2225         i = nasm_regvals[input->basereg];
2226
2227         if (REG_EA & ~f)
2228             return NULL;        /* Invalid EA register */
2229
2230         output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2231
2232         output->sib_present = false;    /* no SIB necessary */
2233         output->bytes = 0;              /* no offset necessary either */
2234         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2235     } else {                    /* it's a memory reference */
2236         if (input->basereg == -1 &&
2237             (input->indexreg == -1 || input->scale == 0)) {
2238             /* it's a pure offset */
2239
2240             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2241                 input->segment == NO_SEG) {
2242                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2243                 input->type &= ~IP_REL;
2244                 input->type |= MEMORY;
2245             }
2246
2247             if (input->eaflags & EAF_BYTEOFFS ||
2248                 (input->eaflags & EAF_WORDOFFS &&
2249                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2250                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2251             }
2252
2253             if (bits == 64 && (~input->type & IP_REL)) {
2254                 int scale, index, base;
2255                 output->sib_present = true;
2256                 scale = 0;
2257                 index = 4;
2258                 base = 5;
2259                 output->sib = (scale << 6) | (index << 3) | base;
2260                 output->bytes = 4;
2261                 output->modrm = 4 | ((rfield & 7) << 3);
2262                 output->rip = false;
2263             } else {
2264                 output->sib_present = false;
2265                 output->bytes = (addrbits != 16 ? 4 : 2);
2266                 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2267                 output->rip = bits == 64;
2268             }
2269         } else {                /* it's an indirection */
2270             int i = input->indexreg, b = input->basereg, s = input->scale;
2271             int32_t seg = input->segment;
2272             int hb = input->hintbase, ht = input->hinttype;
2273             int t, it, bt;              /* register numbers */
2274             opflags_t x, ix, bx;        /* register flags */
2275
2276             if (s == 0)
2277                 i = -1;         /* make this easy, at least */
2278
2279             if (is_register(i)) {
2280                 it = nasm_regvals[i];
2281                 ix = nasm_reg_flags[i];
2282             } else {
2283                 it = -1;
2284                 ix = 0;
2285             }
2286
2287             if (is_register(b)) {
2288                 bt = nasm_regvals[b];
2289                 bx = nasm_reg_flags[b];
2290             } else {
2291                 bt = -1;
2292                 bx = 0;
2293             }
2294
2295             /* check for a 32/64-bit memory reference... */
2296             if ((ix|bx) & (BITS32|BITS64)) {
2297                 /*
2298                  * it must be a 32/64-bit memory reference. Firstly we have
2299                  * to check that all registers involved are type E/Rxx.
2300                  */
2301                 int32_t sok = BITS32 | BITS64, o = input->offset;
2302
2303                 if (it != -1) {
2304                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2305                         sok &= ix;
2306                     else
2307                         return NULL;
2308                 }
2309
2310                 if (bt != -1) {
2311                     if (REG_GPR & ~bx)
2312                         return NULL; /* Invalid register */
2313                     if (~sok & bx & SIZE_MASK)
2314                         return NULL; /* Invalid size */
2315                     sok &= bx;
2316                 }
2317
2318                 /*
2319                  * While we're here, ensure the user didn't specify
2320                  * WORD or QWORD
2321                  */
2322                 if (input->disp_size == 16 || input->disp_size == 64)
2323                     return NULL;
2324
2325                 if (addrbits == 16 ||
2326                     (addrbits == 32 && !(sok & BITS32)) ||
2327                     (addrbits == 64 && !(sok & BITS64)))
2328                     return NULL;
2329
2330                 /* now reorganize base/index */
2331                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2332                     ((hb == b && ht == EAH_NOTBASE) ||
2333                      (hb == i && ht == EAH_MAKEBASE))) {
2334                     /* swap if hints say so */
2335                     t = bt, bt = it, it = t;
2336                     x = bx, bx = ix, ix = x;
2337                 }
2338                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2339                     bt = -1, bx = 0, s++;
2340                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2341                     /* make single reg base, unless hint */
2342                     bt = it, bx = ix, it = -1, ix = 0;
2343                 }
2344                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2345                       s == 3 || s == 5 || s == 9) && bt == -1)
2346                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2347                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2348                     (input->eaflags & EAF_TIMESTWO))
2349                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2350                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2351                 if (s == 1 && it == REG_NUM_ESP) {
2352                     /* swap ESP into base if scale is 1 */
2353                     t = it, it = bt, bt = t;
2354                     x = ix, ix = bx, bx = x;
2355                 }
2356                 if (it == REG_NUM_ESP ||
2357                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2358                     return NULL;        /* wrong, for various reasons */
2359
2360                 output->rex |= rexflags(it, ix, REX_X);
2361                 output->rex |= rexflags(bt, bx, REX_B);
2362
2363                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2364                     /* no SIB needed */
2365                     int mod, rm;
2366
2367                     if (bt == -1) {
2368                         rm = 5;
2369                         mod = 0;
2370                     } else {
2371                         rm = (bt & 7);
2372                         if (rm != REG_NUM_EBP && o == 0 &&
2373                             seg == NO_SEG && !forw_ref &&
2374                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2375                             mod = 0;
2376                         else if (input->eaflags & EAF_BYTEOFFS ||
2377                                  (o >= -128 && o <= 127 &&
2378                                   seg == NO_SEG && !forw_ref &&
2379                                   !(input->eaflags & EAF_WORDOFFS)))
2380                             mod = 1;
2381                         else
2382                             mod = 2;
2383                     }
2384
2385                     output->sib_present = false;
2386                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2387                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2388                 } else {
2389                     /* we need a SIB */
2390                     int mod, scale, index, base;
2391
2392                     if (it == -1)
2393                         index = 4, s = 1;
2394                     else
2395                         index = (it & 7);
2396
2397                     switch (s) {
2398                     case 1:
2399                         scale = 0;
2400                         break;
2401                     case 2:
2402                         scale = 1;
2403                         break;
2404                     case 4:
2405                         scale = 2;
2406                         break;
2407                     case 8:
2408                         scale = 3;
2409                         break;
2410                     default:   /* then what the smeg is it? */
2411                         return NULL;    /* panic */
2412                     }
2413
2414                     if (bt == -1) {
2415                         base = 5;
2416                         mod = 0;
2417                     } else {
2418                         base = (bt & 7);
2419                         if (base != REG_NUM_EBP && o == 0 &&
2420                             seg == NO_SEG && !forw_ref &&
2421                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2422                             mod = 0;
2423                         else if (input->eaflags & EAF_BYTEOFFS ||
2424                                  (o >= -128 && o <= 127 &&
2425                                   seg == NO_SEG && !forw_ref &&
2426                                   !(input->eaflags & EAF_WORDOFFS)))
2427                             mod = 1;
2428                         else
2429                             mod = 2;
2430                     }
2431
2432                     output->sib_present = true;
2433                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2434                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2435                     output->sib = (scale << 6) | (index << 3) | base;
2436                 }
2437             } else {            /* it's 16-bit */
2438                 int mod, rm;
2439                 int16_t o = input->offset;
2440
2441                 /* check for 64-bit long mode */
2442                 if (addrbits == 64)
2443                     return NULL;
2444
2445                 /* check all registers are BX, BP, SI or DI */
2446                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2447                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2448                     return NULL;
2449
2450                 /* ensure the user didn't specify DWORD/QWORD */
2451                 if (input->disp_size == 32 || input->disp_size == 64)
2452                     return NULL;
2453
2454                 if (s != 1 && i != -1)
2455                     return NULL;        /* no can do, in 16-bit EA */
2456                 if (b == -1 && i != -1) {
2457                     int tmp = b;
2458                     b = i;
2459                     i = tmp;
2460                 }               /* swap */
2461                 if ((b == R_SI || b == R_DI) && i != -1) {
2462                     int tmp = b;
2463                     b = i;
2464                     i = tmp;
2465                 }
2466                 /* have BX/BP as base, SI/DI index */
2467                 if (b == i)
2468                     return NULL;        /* shouldn't ever happen, in theory */
2469                 if (i != -1 && b != -1 &&
2470                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2471                     return NULL;        /* invalid combinations */
2472                 if (b == -1)            /* pure offset: handled above */
2473                     return NULL;        /* so if it gets to here, panic! */
2474
2475                 rm = -1;
2476                 if (i != -1)
2477                     switch (i * 256 + b) {
2478                     case R_SI * 256 + R_BX:
2479                         rm = 0;
2480                         break;
2481                     case R_DI * 256 + R_BX:
2482                         rm = 1;
2483                         break;
2484                     case R_SI * 256 + R_BP:
2485                         rm = 2;
2486                         break;
2487                     case R_DI * 256 + R_BP:
2488                         rm = 3;
2489                         break;
2490                 } else
2491                     switch (b) {
2492                     case R_SI:
2493                         rm = 4;
2494                         break;
2495                     case R_DI:
2496                         rm = 5;
2497                         break;
2498                     case R_BP:
2499                         rm = 6;
2500                         break;
2501                     case R_BX:
2502                         rm = 7;
2503                         break;
2504                     }
2505                 if (rm == -1)           /* can't happen, in theory */
2506                     return NULL;        /* so panic if it does */
2507
2508                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2509                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2510                     mod = 0;
2511                 else if (input->eaflags & EAF_BYTEOFFS ||
2512                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2513                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2514                     mod = 1;
2515                 else
2516                     mod = 2;
2517
2518                 output->sib_present = false;    /* no SIB - it's 16-bit */
2519                 output->bytes = mod;            /* bytes of offset needed */
2520                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2521             }
2522         }
2523     }
2524
2525     output->size = 1 + output->sib_present + output->bytes;
2526     return output;
2527 }
2528
2529 static void add_asp(insn *ins, int addrbits)
2530 {
2531     int j, valid;
2532     int defdisp;
2533
2534     valid = (addrbits == 64) ? 64|32 : 32|16;
2535
2536     switch (ins->prefixes[PPS_ASIZE]) {
2537     case P_A16:
2538         valid &= 16;
2539         break;
2540     case P_A32:
2541         valid &= 32;
2542         break;
2543     case P_A64:
2544         valid &= 64;
2545         break;
2546     case P_ASP:
2547         valid &= (addrbits == 32) ? 16 : 32;
2548         break;
2549     default:
2550         break;
2551     }
2552
2553     for (j = 0; j < ins->operands; j++) {
2554         if (is_class(MEMORY, ins->oprs[j].type)) {
2555             opflags_t i, b;
2556
2557             /* Verify as Register */
2558             if (!is_register(ins->oprs[j].indexreg))
2559                 i = 0;
2560             else
2561                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2562
2563             /* Verify as Register */
2564             if (!is_register(ins->oprs[j].basereg))
2565                 b = 0;
2566             else
2567                 b = nasm_reg_flags[ins->oprs[j].basereg];
2568
2569             if (ins->oprs[j].scale == 0)
2570                 i = 0;
2571
2572             if (!i && !b) {
2573                 int ds = ins->oprs[j].disp_size;
2574                 if ((addrbits != 64 && ds > 8) ||
2575                     (addrbits == 64 && ds == 16))
2576                     valid &= ds;
2577             } else {
2578                 if (!(REG16 & ~b))
2579                     valid &= 16;
2580                 if (!(REG32 & ~b))
2581                     valid &= 32;
2582                 if (!(REG64 & ~b))
2583                     valid &= 64;
2584
2585                 if (!(REG16 & ~i))
2586                     valid &= 16;
2587                 if (!(REG32 & ~i))
2588                     valid &= 32;
2589                 if (!(REG64 & ~i))
2590                     valid &= 64;
2591             }
2592         }
2593     }
2594
2595     if (valid & addrbits) {
2596         ins->addr_size = addrbits;
2597     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2598         /* Add an address size prefix */
2599         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2600         ins->prefixes[PPS_ASIZE] = pref;
2601         ins->addr_size = (addrbits == 32) ? 16 : 32;
2602     } else {
2603         /* Impossible... */
2604         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2605         ins->addr_size = addrbits; /* Error recovery */
2606     }
2607
2608     defdisp = ins->addr_size == 16 ? 16 : 32;
2609
2610     for (j = 0; j < ins->operands; j++) {
2611         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2612             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2613             /*
2614              * mem_offs sizes must match the address size; if not,
2615              * strip the MEM_OFFS bit and match only EA instructions
2616              */
2617             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2618         }
2619     }
2620 }