assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 wwl lpp
  96  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  97  *                 [l1]  ll = 1 for L = 1 (.256)
  98  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  99  *
 100  *                 [w0]  ww = 0 for W = 0
 101  *                 [w1 ] ww = 1 for W = 1
 102  *                 [wig] ww = 2 for W don't care (always assembled as 0)
 103  *                 [ww]  ww = 3 for W used as REX.W
 104  *
 105  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 106  *
 107  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 108  *                 which is to be extended to the operand size.
 109  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 110  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 111  * \312          - (disassembler only) invalid with non-default address size.
 112  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 113  * \314          - (disassembler only) invalid with REX.B
 114  * \315          - (disassembler only) invalid with REX.X
 115  * \316          - (disassembler only) invalid with REX.R
 116  * \317          - (disassembler only) invalid with REX.W
 117  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 118  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 119  * \322          - indicates that this instruction is only valid when the
 120  *                 operand size is the default (instruction to disassembler,
 121  *                 generates no code in the assembler)
 122  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 123  * \324          - indicates 64-bit operand size requiring REX prefix.
 124  * \325          - instruction which always uses spl/bpl/sil/dil
 125  * \330          - a literal byte follows in the code stream, to be added
 126  *                 to the condition code value of the instruction.
 127  * \331          - instruction not valid with REP prefix.  Hint for
 128  *                 disassembler only; for SSE instructions.
 129  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 130  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 131  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 132  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 133  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 134  * \337          - force a REPNE prefix (0xF3) even if not specified.
 135  *                 \336-\337 are still listed as prefixes in the disassembler.
 136  * \340          - reserve <operand 0> bytes of uninitialized storage.
 137  *                 Operand 0 had better be a segmentless constant.
 138  * \341          - this instruction needs a WAIT "prefix"
 139  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 140  *                 (POP is never used for CS) depending on operand 0
 141  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 142  *                 on operand 0
 143  * \360          - no SSE prefix (== \364\331)
 144  * \361          - 66 SSE prefix (== \366\331)
 145  * \362          - F2 SSE prefix (== \364\332)
 146  * \363          - F3 SSE prefix (== \364\333)
 147  * \364          - operand-size prefix (0x66) not permitted
 148  * \365          - address-size prefix (0x67) not permitted
 149  * \366          - operand-size prefix (0x66) used as opcode extension
 150  * \367          - address-size prefix (0x67) used as opcode extension
 151  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 152  *                 370 is used for Jcc, 371 is used for JMP.
 153  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 154  *                 used for conditional jump over longer jump
 155  */
 156
 157 #include "compiler.h"
 158
 159 #include <stdio.h>
 160 #include <string.h>
 161 #include <inttypes.h>
 162
 163 #include "nasm.h"
 164 #include "nasmlib.h"
 165 #include "assemble.h"
 166 #include "insns.h"
 167 #include "tables.h"
 168
 169 enum match_result {
 170     /*
 171      * Matching errors.  These should be sorted so that more specific
 172      * errors come later in the sequence.
 173      */
 174     MERR_INVALOP,
 175     MERR_OPSIZEMISSING,
 176     MERR_OPSIZEMISMATCH,
 177     MERR_BADCPU,
 178     MERR_BADMODE,
 179     /*
 180      * Matching success; the conditional ones first
 181      */
 182     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 183     MOK_GOOD    /* Matching unconditionally OK */
 184 };
 185
 186 typedef struct {
 187     int sib_present;                 /* is a SIB byte necessary? */
 188     int bytes;                       /* # of bytes of offset needed */
 189     int size;                        /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 191 } ea;
 192
 193 static uint32_t cpu;            /* cpu level received from nasm.c */
 194 static efunc errfunc;
 195 static struct ofmt *outfmt;
 196 static ListGen *list;
 197
 198 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 199 static void gencode(int32_t segment, int64_t offset, int bits,
 200                     insn * ins, const struct itemplate *temp,
 201                     int64_t insn_end);
 202 static enum match_result find_match(const struct itemplate **tempp,
 203                                     insn *instruction,
 204                                     int32_t segment, int64_t offset, int bits);
 205 static enum match_result matches(const struct itemplate *, insn *, int bits);
 206 static opflags_t regflag(const operand *);
 207 static int32_t regval(const operand *);
 208 static int rexflags(int, opflags_t, int);
 209 static int op_rexflags(const operand *, int);
 210 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 211 static void add_asp(insn *, int);
 212
 213 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 214 {
 215     return ins->prefixes[pos] == prefix;
 216 }
 217
 218 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 219 {
 220     if (ins->prefixes[pos])
 221         errfunc(ERR_NONFATAL, "invalid %s prefix",
 222                 prefix_name(ins->prefixes[pos]));
 223 }
 224
 225 static const char *size_name(int size)
 226 {
 227     switch (size) {
 228     case 1:
 229         return "byte";
 230     case 2:
 231         return "word";
 232     case 4:
 233         return "dword";
 234     case 8:
 235         return "qword";
 236     case 10:
 237         return "tword";
 238     case 16:
 239         return "oword";
 240     case 32:
 241         return "yword";
 242     default:
 243         return "???";
 244     }
 245 }
 246
 247 static void warn_overflow(int pass, int size)
 248 {
 249     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 250             "%s data exceeds bounds", size_name(size));
 251 }
 252
 253 static void warn_overflow_const(int64_t data, int size)
 254 {
 255     if (overflow_general(data, size))
 256         warn_overflow(ERR_PASS1, size);
 257 }
 258
 259 static void warn_overflow_opd(const struct operand *o, int size)
 260 {
 261     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 262         if (overflow_general(o->offset, size))
 263             warn_overflow(ERR_PASS2, size);
 264     }
 265 }
 266
 267 /*
 268  * This routine wrappers the real output format's output routine,
 269  * in order to pass a copy of the data off to the listing file
 270  * generator at the same time.
 271  */
 272 static void out(int64_t offset, int32_t segto, const void *data,
 273                 enum out_type type, uint64_t size,
 274                 int32_t segment, int32_t wrt)
 275 {
 276     static int32_t lineno = 0;     /* static!!! */
 277     static char *lnfname = NULL;
 278     uint8_t p[8];
 279
 280     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 281         /*
 282          * This is a non-relocated address, and we're going to
 283          * convert it into RAWDATA format.
 284          */
 285         uint8_t *q = p;
 286
 287         if (size > 8) {
 288             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 289             return;
 290         }
 291
 292         WRITEADDR(q, *(int64_t *)data, size);
 293         data = p;
 294         type = OUT_RAWDATA;
 295     }
 296
 297     list->output(offset, data, type, size);
 298
 299     /*
 300      * this call to src_get determines when we call the
 301      * debug-format-specific "linenum" function
 302      * it updates lineno and lnfname to the current values
 303      * returning 0 if "same as last time", -2 if lnfname
 304      * changed, and the amount by which lineno changed,
 305      * if it did. thus, these variables must be static
 306      */
 307
 308     if (src_get(&lineno, &lnfname))
 309         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 310
 311     outfmt->output(segto, data, type, size, segment, wrt);
 312 }
 313
 314 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 315                      insn * ins, const uint8_t *code)
 316 {
 317     int64_t isize;
 318     uint8_t c = code[0];
 319
 320     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 321         return false;
 322     if (!optimizing)
 323         return false;
 324     if (optimizing < 0 && c == 0371)
 325         return false;
 326
 327     isize = calcsize(segment, offset, bits, ins, code);
 328
 329     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 330         /* Be optimistic in pass 1 */
 331         return true;
 332
 333     if (ins->oprs[0].segment != segment)
 334         return false;
 335
 336     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 337     return (isize >= -128 && isize <= 127); /* is it byte size? */
 338 }
 339
 340 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 341                  insn * instruction, struct ofmt *output, efunc error,
 342                  ListGen * listgen)
 343 {
 344     const struct itemplate *temp;
 345     int j;
 346     enum match_result m;
 347     int64_t insn_end;
 348     int32_t itimes;
 349     int64_t start = offset;
 350     int64_t wsize;              /* size for DB etc. */
 351
 352     errfunc = error;            /* to pass to other functions */
 353     cpu = cp;
 354     outfmt = output;            /* likewise */
 355     list = listgen;             /* and again */
 356
 357     wsize = idata_bytes(instruction->opcode);
 358     if (wsize == -1)
 359         return 0;
 360
 361     if (wsize) {
 362         extop *e;
 363         int32_t t = instruction->times;
 364         if (t < 0)
 365             errfunc(ERR_PANIC,
 366                     "instruction->times < 0 (%ld) in assemble()", t);
 367
 368         while (t--) {           /* repeat TIMES times */
 369             list_for_each(e, instruction->eops) {
 370                 if (e->type == EOT_DB_NUMBER) {
 371                     if (wsize > 8) {
 372                         errfunc(ERR_NONFATAL,
 373                                 "integer supplied to a DT, DO or DY"
 374                                 " instruction");
 375                     } else {
 376                         out(offset, segment, &e->offset,
 377                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 378                         offset += wsize;
 379                     }
 380                 } else if (e->type == EOT_DB_STRING ||
 381                            e->type == EOT_DB_STRING_FREE) {
 382                     int align;
 383
 384                     out(offset, segment, e->stringval,
 385                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 386                     align = e->stringlen % wsize;
 387
 388                     if (align) {
 389                         align = wsize - align;
 390                         out(offset, segment, zero_buffer,
 391                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 392                     }
 393                     offset += e->stringlen + align;
 394                 }
 395             }
 396             if (t > 0 && t == instruction->times - 1) {
 397                 /*
 398                  * Dummy call to list->output to give the offset to the
 399                  * listing module.
 400                  */
 401                 list->output(offset, NULL, OUT_RAWDATA, 0);
 402                 list->uplevel(LIST_TIMES);
 403             }
 404         }
 405         if (instruction->times > 1)
 406             list->downlevel(LIST_TIMES);
 407         return offset - start;
 408     }
 409
 410     if (instruction->opcode == I_INCBIN) {
 411         const char *fname = instruction->eops->stringval;
 412         FILE *fp;
 413
 414         fp = fopen(fname, "rb");
 415         if (!fp) {
 416             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 417                   fname);
 418         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 419             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 420                   fname);
 421         } else {
 422             static char buf[4096];
 423             size_t t = instruction->times;
 424             size_t base = 0;
 425             size_t len;
 426
 427             len = ftell(fp);
 428             if (instruction->eops->next) {
 429                 base = instruction->eops->next->offset;
 430                 len -= base;
 431                 if (instruction->eops->next->next &&
 432                     len > (size_t)instruction->eops->next->next->offset)
 433                     len = (size_t)instruction->eops->next->next->offset;
 434             }
 435             /*
 436              * Dummy call to list->output to give the offset to the
 437              * listing module.
 438              */
 439             list->output(offset, NULL, OUT_RAWDATA, 0);
 440             list->uplevel(LIST_INCBIN);
 441             while (t--) {
 442                 size_t l;
 443
 444                 fseek(fp, base, SEEK_SET);
 445                 l = len;
 446                 while (l > 0) {
 447                     int32_t m;
 448                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 449                     if (!m) {
 450                         /*
 451                          * This shouldn't happen unless the file
 452                          * actually changes while we are reading
 453                          * it.
 454                          */
 455                         error(ERR_NONFATAL,
 456                               "`incbin': unexpected EOF while"
 457                               " reading file `%s'", fname);
 458                         t = 0;  /* Try to exit cleanly */
 459                         break;
 460                     }
 461                     out(offset, segment, buf, OUT_RAWDATA, m,
 462                         NO_SEG, NO_SEG);
 463                     l -= m;
 464                 }
 465             }
 466             list->downlevel(LIST_INCBIN);
 467             if (instruction->times > 1) {
 468                 /*
 469                  * Dummy call to list->output to give the offset to the
 470                  * listing module.
 471                  */
 472                 list->output(offset, NULL, OUT_RAWDATA, 0);
 473                 list->uplevel(LIST_TIMES);
 474                 list->downlevel(LIST_TIMES);
 475             }
 476             fclose(fp);
 477             return instruction->times * len;
 478         }
 479         return 0;               /* if we're here, there's an error */
 480     }
 481
 482     /* Check to see if we need an address-size prefix */
 483     add_asp(instruction, bits);
 484
 485     m = find_match(&temp, instruction, segment, offset, bits);
 486
 487     if (m == MOK_GOOD) {
 488         /* Matches! */
 489         int64_t insn_size = calcsize(segment, offset, bits,
 490                                      instruction, temp->code);
 491         itimes = instruction->times;
 492         if (insn_size < 0)  /* shouldn't be, on pass two */
 493             error(ERR_PANIC, "errors made it through from pass one");
 494         else
 495             while (itimes--) {
 496                 for (j = 0; j < MAXPREFIX; j++) {
 497                     uint8_t c = 0;
 498                     switch (instruction->prefixes[j]) {
 499                     case P_WAIT:
 500                         c = 0x9B;
 501                         break;
 502                     case P_LOCK:
 503                         c = 0xF0;
 504                         break;
 505                     case P_REPNE:
 506                     case P_REPNZ:
 507                         c = 0xF2;
 508                         break;
 509                     case P_REPE:
 510                     case P_REPZ:
 511                     case P_REP:
 512                         c = 0xF3;
 513                         break;
 514                     case R_CS:
 515                         if (bits == 64) {
 516                             error(ERR_WARNING | ERR_PASS2,
 517                                   "cs segment base generated, but will be ignored in 64-bit mode");
 518                         }
 519                         c = 0x2E;
 520                         break;
 521                     case R_DS:
 522                         if (bits == 64) {
 523                             error(ERR_WARNING | ERR_PASS2,
 524                                   "ds segment base generated, but will be ignored in 64-bit mode");
 525                         }
 526                         c = 0x3E;
 527                         break;
 528                     case R_ES:
 529                         if (bits == 64) {
 530                             error(ERR_WARNING | ERR_PASS2,
 531                                   "es segment base generated, but will be ignored in 64-bit mode");
 532                         }
 533                         c = 0x26;
 534                         break;
 535                     case R_FS:
 536                         c = 0x64;
 537                         break;
 538                     case R_GS:
 539                         c = 0x65;
 540                         break;
 541                     case R_SS:
 542                         if (bits == 64) {
 543                             error(ERR_WARNING | ERR_PASS2,
 544                                   "ss segment base generated, but will be ignored in 64-bit mode");
 545                         }
 546                         c = 0x36;
 547                         break;
 548                     case R_SEGR6:
 549                     case R_SEGR7:
 550                         error(ERR_NONFATAL,
 551                               "segr6 and segr7 cannot be used as prefixes");
 552                         break;
 553                     case P_A16:
 554                         if (bits == 64) {
 555                             error(ERR_NONFATAL,
 556                                   "16-bit addressing is not supported "
 557                                   "in 64-bit mode");
 558                         } else if (bits != 16)
 559                             c = 0x67;
 560                         break;
 561                     case P_A32:
 562                         if (bits != 32)
 563                             c = 0x67;
 564                         break;
 565                     case P_A64:
 566                         if (bits != 64) {
 567                             error(ERR_NONFATAL,
 568                                   "64-bit addressing is only supported "
 569                                   "in 64-bit mode");
 570                         }
 571                         break;
 572                     case P_ASP:
 573                         c = 0x67;
 574                         break;
 575                     case P_O16:
 576                         if (bits != 16)
 577                             c = 0x66;
 578                         break;
 579                     case P_O32:
 580                         if (bits == 16)
 581                             c = 0x66;
 582                         break;
 583                     case P_O64:
 584                         /* REX.W */
 585                         break;
 586                     case P_OSP:
 587                         c = 0x66;
 588                         break;
 589                     case P_none:
 590                         break;
 591                     default:
 592                         error(ERR_PANIC, "invalid instruction prefix");
 593                     }
 594                     if (c != 0) {
 595                         out(offset, segment, &c, OUT_RAWDATA, 1,
 596                             NO_SEG, NO_SEG);
 597                         offset++;
 598                     }
 599                 }
 600                 insn_end = offset + insn_size;
 601                 gencode(segment, offset, bits, instruction,
 602                         temp, insn_end);
 603                 offset += insn_size;
 604                 if (itimes > 0 && itimes == instruction->times - 1) {
 605                     /*
 606                      * Dummy call to list->output to give the offset to the
 607                      * listing module.
 608                      */
 609                     list->output(offset, NULL, OUT_RAWDATA, 0);
 610                     list->uplevel(LIST_TIMES);
 611                 }
 612             }
 613         if (instruction->times > 1)
 614             list->downlevel(LIST_TIMES);
 615         return offset - start;
 616     } else {
 617         /* No match */
 618         switch (m) {
 619         case MERR_OPSIZEMISSING:
 620             error(ERR_NONFATAL, "operation size not specified");
 621             break;
 622         case MERR_OPSIZEMISMATCH:
 623             error(ERR_NONFATAL, "mismatch in operand sizes");
 624             break;
 625         case MERR_BADCPU:
 626             error(ERR_NONFATAL, "no instruction for this cpu level");
 627             break;
 628         case MERR_BADMODE:
 629             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 630                   bits);
 631             break;
 632         default:
 633             error(ERR_NONFATAL,
 634                   "invalid combination of opcode and operands");
 635             break;
 636         }
 637     }
 638     return 0;
 639 }
 640
 641 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 642                   insn * instruction, efunc error)
 643 {
 644     const struct itemplate *temp;
 645     enum match_result m;
 646
 647     errfunc = error;            /* to pass to other functions */
 648     cpu = cp;
 649
 650     if (instruction->opcode == I_none)
 651         return 0;
 652
 653     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 654         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 655         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 656         instruction->opcode == I_DY) {
 657         extop *e;
 658         int32_t isize, osize, wsize;
 659
 660         isize = 0;
 661         wsize = idata_bytes(instruction->opcode);
 662
 663         list_for_each(e, instruction->eops) {
 664             int32_t align;
 665
 666             osize = 0;
 667             if (e->type == EOT_DB_NUMBER) {
 668                 osize = 1;
 669                 warn_overflow_const(e->offset, wsize);
 670             } else if (e->type == EOT_DB_STRING ||
 671                        e->type == EOT_DB_STRING_FREE)
 672                 osize = e->stringlen;
 673
 674             align = (-osize) % wsize;
 675             if (align < 0)
 676                 align += wsize;
 677             isize += osize + align;
 678         }
 679         return isize * instruction->times;
 680     }
 681
 682     if (instruction->opcode == I_INCBIN) {
 683         const char *fname = instruction->eops->stringval;
 684         FILE *fp;
 685         int64_t val = 0;
 686         size_t len;
 687
 688         fp = fopen(fname, "rb");
 689         if (!fp)
 690             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 691                   fname);
 692         else if (fseek(fp, 0L, SEEK_END) < 0)
 693             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 694                   fname);
 695         else {
 696             len = ftell(fp);
 697             if (instruction->eops->next) {
 698                 len -= instruction->eops->next->offset;
 699                 if (instruction->eops->next->next &&
 700                     len > (size_t)instruction->eops->next->next->offset) {
 701                     len = (size_t)instruction->eops->next->next->offset;
 702                 }
 703             }
 704             val = instruction->times * len;
 705         }
 706         if (fp)
 707             fclose(fp);
 708         return val;
 709     }
 710
 711     /* Check to see if we need an address-size prefix */
 712     add_asp(instruction, bits);
 713
 714     m = find_match(&temp, instruction, segment, offset, bits);
 715     if (m == MOK_GOOD) {
 716         /* we've matched an instruction. */
 717         int64_t isize;
 718         const uint8_t *codes = temp->code;
 719         int j;
 720
 721         isize = calcsize(segment, offset, bits, instruction, codes);
 722         if (isize < 0)
 723             return -1;
 724         for (j = 0; j < MAXPREFIX; j++) {
 725             switch (instruction->prefixes[j]) {
 726             case P_A16:
 727                 if (bits != 16)
 728                     isize++;
 729                 break;
 730             case P_A32:
 731                 if (bits != 32)
 732                     isize++;
 733                 break;
 734             case P_O16:
 735                 if (bits != 16)
 736                     isize++;
 737                 break;
 738             case P_O32:
 739                 if (bits == 16)
 740                     isize++;
 741                 break;
 742             case P_A64:
 743             case P_O64:
 744             case P_none:
 745                 break;
 746             default:
 747                 isize++;
 748                 break;
 749             }
 750         }
 751         return isize * instruction->times;
 752     } else {
 753         return -1;                  /* didn't match any instruction */
 754     }
 755 }
 756
 757 static bool possible_sbyte(operand *o)
 758 {
 759     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 760         !(o->opflags & OPFLAG_UNKNOWN) &&
 761         optimizing >= 0 && !(o->type & STRICT);
 762 }
 763
 764 /* check that opn[op]  is a signed byte of size 16 or 32 */
 765 static bool is_sbyte16(operand *o)
 766 {
 767     int16_t v;
 768
 769     if (!possible_sbyte(o))
 770         return false;
 771
 772     v = o->offset;
 773     return v >= -128 && v <= 127;
 774 }
 775
 776 static bool is_sbyte32(operand *o)
 777 {
 778     int32_t v;
 779
 780     if (!possible_sbyte(o))
 781         return false;
 782
 783     v = o->offset;
 784     return v >= -128 && v <= 127;
 785 }
 786
 787 /* Common construct */
 788 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 789
 790 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 791                         insn * ins, const uint8_t *codes)
 792 {
 793     int64_t length = 0;
 794     uint8_t c;
 795     int rex_mask = ~0;
 796     int op1, op2;
 797     struct operand *opx;
 798     uint8_t opex = 0;
 799
 800     ins->rex = 0;               /* Ensure REX is reset */
 801
 802     if (ins->prefixes[PPS_OSIZE] == P_O64)
 803         ins->rex |= REX_W;
 804
 805     (void)segment;              /* Don't warn that this parameter is unused */
 806     (void)offset;               /* Don't warn that this parameter is unused */
 807
 808     while (*codes) {
 809         c = *codes++;
 810         op1 = (c & 3) + ((opex & 1) << 2);
 811         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 812         opx = &ins->oprs[op1];
 813         opex = 0;               /* For the next iteration */
 814
 815         switch (c) {
 816         case 01:
 817         case 02:
 818         case 03:
 819         case 04:
 820             codes += c, length += c;
 821             break;
 822
 823         case 05:
 824         case 06:
 825         case 07:
 826             opex = c;
 827             break;
 828
 829         case4(010):
 830             ins->rex |=
 831                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 832             codes++, length++;
 833             break;
 834
 835         case4(014):
 836         case4(020):
 837         case4(024):
 838             length++;
 839             break;
 840
 841         case4(030):
 842             length += 2;
 843             break;
 844
 845         case4(034):
 846             if (opx->type & (BITS16 | BITS32 | BITS64))
 847                 length += (opx->type & BITS16) ? 2 : 4;
 848             else
 849                 length += (bits == 16) ? 2 : 4;
 850             break;
 851
 852         case4(040):
 853             length += 4;
 854             break;
 855
 856         case4(044):
 857             length += ins->addr_size >> 3;
 858             break;
 859
 860         case4(050):
 861             length++;
 862             break;
 863
 864         case4(054):
 865             length += 8; /* MOV reg64/imm */
 866             break;
 867
 868         case4(060):
 869             length += 2;
 870             break;
 871
 872         case4(064):
 873             if (opx->type & (BITS16 | BITS32 | BITS64))
 874                 length += (opx->type & BITS16) ? 2 : 4;
 875             else
 876                 length += (bits == 16) ? 2 : 4;
 877             break;
 878
 879         case4(070):
 880             length += 4;
 881             break;
 882
 883         case4(074):
 884             length += 2;
 885             break;
 886
 887         case4(0140):
 888             length += is_sbyte16(opx) ? 1 : 2;
 889             break;
 890
 891         case4(0144):
 892             codes++;
 893             length++;
 894             break;
 895
 896         case4(0150):
 897             length += is_sbyte32(opx) ? 1 : 4;
 898             break;
 899
 900         case4(0154):
 901             codes++;
 902             length++;
 903             break;
 904
 905         case4(0160):
 906             length++;
 907             ins->rex |= REX_D;
 908             ins->drexdst = regval(opx);
 909             break;
 910
 911         case4(0164):
 912             length++;
 913             ins->rex |= REX_D|REX_OC;
 914             ins->drexdst = regval(opx);
 915             break;
 916
 917         case 0171:
 918             break;
 919
 920         case 0172:
 921         case 0173:
 922         case 0174:
 923             codes++;
 924             length++;
 925             break;
 926
 927         case4(0250):
 928             length += is_sbyte32(opx) ? 1 : 4;
 929             break;
 930
 931         case4(0254):
 932             length += 4;
 933             break;
 934
 935         case4(0260):
 936             ins->rex |= REX_V;
 937             ins->drexdst = regval(opx);
 938             ins->vex_cm = *codes++;
 939             ins->vex_wlp = *codes++;
 940             break;
 941
 942         case 0270:
 943             ins->rex |= REX_V;
 944             ins->drexdst = 0;
 945             ins->vex_cm = *codes++;
 946             ins->vex_wlp = *codes++;
 947             break;
 948
 949         case4(0274):
 950             length++;
 951             break;
 952
 953         case4(0300):
 954             break;
 955
 956         case 0310:
 957             if (bits == 64)
 958                 return -1;
 959             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 960             break;
 961
 962         case 0311:
 963             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 964             break;
 965
 966         case 0312:
 967             break;
 968
 969         case 0313:
 970             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 971                 has_prefix(ins, PPS_ASIZE, P_A32))
 972                 return -1;
 973             break;
 974
 975         case4(0314):
 976             break;
 977
 978         case 0320:
 979         {
 980             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
 981             if (pfx == P_O16)
 982                 break;
 983             if (pfx != P_none)
 984                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
 985             else
 986                 ins->prefixes[PPS_OSIZE] = P_O16;
 987             break;
 988         }
 989
 990         case 0321:
 991         {
 992             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
 993             if (pfx == P_O32)
 994                 break;
 995             if (pfx != P_none)
 996                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
 997             else
 998                 ins->prefixes[PPS_OSIZE] = P_O32;
 999             break;
1000         }
1001
1002         case 0322:
1003             break;
1004
1005         case 0323:
1006             rex_mask &= ~REX_W;
1007             break;
1008
1009         case 0324:
1010             ins->rex |= REX_W;
1011             break;
1012
1013         case 0325:
1014             ins->rex |= REX_NH;
1015             break;
1016
1017         case 0330:
1018             codes++, length++;
1019             break;
1020
1021         case 0331:
1022             break;
1023
1024         case 0332:
1025         case 0333:
1026             length++;
1027             break;
1028
1029         case 0334:
1030             ins->rex |= REX_L;
1031             break;
1032
1033         case 0335:
1034             break;
1035
1036         case 0336:
1037             if (!ins->prefixes[PPS_LREP])
1038                 ins->prefixes[PPS_LREP] = P_REP;
1039             break;
1040
1041         case 0337:
1042             if (!ins->prefixes[PPS_LREP])
1043                 ins->prefixes[PPS_LREP] = P_REPNE;
1044             break;
1045
1046         case 0340:
1047             if (ins->oprs[0].segment != NO_SEG)
1048                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1049                         " quantity of BSS space");
1050             else
1051                 length += ins->oprs[0].offset;
1052             break;
1053
1054         case 0341:
1055             if (!ins->prefixes[PPS_WAIT])
1056                 ins->prefixes[PPS_WAIT] = P_WAIT;
1057             break;
1058
1059         case4(0344):
1060             length++;
1061             break;
1062
1063         case 0360:
1064             break;
1065
1066         case 0361:
1067         case 0362:
1068         case 0363:
1069             length++;
1070             break;
1071
1072         case 0364:
1073         case 0365:
1074             break;
1075
1076         case 0366:
1077         case 0367:
1078             length++;
1079             break;
1080
1081         case 0370:
1082         case 0371:
1083         case 0372:
1084             break;
1085
1086         case 0373:
1087             length++;
1088             break;
1089
1090         case4(0100):
1091         case4(0110):
1092         case4(0120):
1093         case4(0130):
1094         case4(0200):
1095         case4(0204):
1096         case4(0210):
1097         case4(0214):
1098         case4(0220):
1099         case4(0224):
1100         case4(0230):
1101         case4(0234):
1102             {
1103                 ea ea_data;
1104                 int rfield;
1105                 opflags_t rflags;
1106                 struct operand *opy = &ins->oprs[op2];
1107
1108                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1109
1110                 if (c <= 0177) {
1111                     /* pick rfield from operand b (opx) */
1112                     rflags = regflag(opx);
1113                     rfield = nasm_regvals[opx->basereg];
1114                 } else {
1115                     rflags = 0;
1116                     rfield = c & 7;
1117                 }
1118                 if (!process_ea(opy, &ea_data, bits,
1119                                 ins->addr_size, rfield, rflags)) {
1120                     errfunc(ERR_NONFATAL, "invalid effective address");
1121                     return -1;
1122                 } else {
1123                     ins->rex |= ea_data.rex;
1124                     length += ea_data.size;
1125                 }
1126             }
1127             break;
1128
1129         default:
1130             errfunc(ERR_PANIC, "internal instruction table corrupt"
1131                     ": instruction code \\%o (0x%02X) given", c, c);
1132             break;
1133         }
1134     }
1135
1136     ins->rex &= rex_mask;
1137
1138     if (ins->rex & REX_NH) {
1139         if (ins->rex & REX_H) {
1140             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1141             return -1;
1142         }
1143         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1144     }
1145
1146     if (ins->rex & REX_V) {
1147         int bad32 = REX_R|REX_W|REX_X|REX_B;
1148
1149         if (ins->rex & REX_H) {
1150             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1151             return -1;
1152         }
1153         switch (ins->vex_wlp & 060) {
1154         case 000:
1155         case 040:
1156             ins->rex &= ~REX_W;
1157             break;
1158         case 020:
1159             ins->rex |= REX_W;
1160             bad32 &= ~REX_W;
1161             break;
1162         case 060:
1163             /* Follow REX_W */
1164             break;
1165         }
1166
1167         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1168             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1169             return -1;
1170         }
1171         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1172             length += 3;
1173         else
1174             length += 2;
1175     } else if (ins->rex & REX_D) {
1176         if (ins->rex & REX_H) {
1177             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1178             return -1;
1179         }
1180         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1181                            ins->drexdst > 7)) {
1182             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1183             return -1;
1184         }
1185         length++;
1186     } else if (ins->rex & REX_REAL) {
1187         if (ins->rex & REX_H) {
1188             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1189             return -1;
1190         } else if (bits == 64) {
1191             length++;
1192         } else if ((ins->rex & REX_L) &&
1193                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1194                    cpu >= IF_X86_64) {
1195             /* LOCK-as-REX.R */
1196             assert_no_prefix(ins, PPS_LREP);
1197             length++;
1198         } else {
1199             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1200             return -1;
1201         }
1202     }
1203
1204     return length;
1205 }
1206
1207 #define EMIT_REX()                                                              \
1208     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1209         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1210         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1211         ins->rex = 0;                                                           \
1212         offset += 1;                                                            \
1213     }
1214
1215 static void gencode(int32_t segment, int64_t offset, int bits,
1216                     insn * ins, const struct itemplate *temp,
1217                     int64_t insn_end)
1218 {
1219     static char condval[] = {   /* conditional opcodes */
1220         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1221         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1222         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1223     };
1224     uint8_t c;
1225     uint8_t bytes[4];
1226     int64_t size;
1227     int64_t data;
1228     int op1, op2;
1229     struct operand *opx;
1230     const uint8_t *codes = temp->code;
1231     uint8_t opex = 0;
1232
1233     while (*codes) {
1234         c = *codes++;
1235         op1 = (c & 3) + ((opex & 1) << 2);
1236         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1237         opx = &ins->oprs[op1];
1238         opex = 0;                /* For the next iteration */
1239
1240         switch (c) {
1241         case 01:
1242         case 02:
1243         case 03:
1244         case 04:
1245             EMIT_REX();
1246             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1247             codes += c;
1248             offset += c;
1249             break;
1250
1251         case 05:
1252         case 06:
1253         case 07:
1254             opex = c;
1255             break;
1256
1257         case4(010):
1258             EMIT_REX();
1259             bytes[0] = *codes++ + (regval(opx) & 7);
1260             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1261             offset += 1;
1262             break;
1263
1264         case4(014):
1265             /*
1266              * The test for BITS8 and SBYTE here is intended to avoid
1267              * warning on optimizer actions due to SBYTE, while still
1268              * warn on explicit BYTE directives.  Also warn, obviously,
1269              * if the optimizer isn't enabled.
1270              */
1271             if (((opx->type & BITS8) ||
1272                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1273                 (opx->offset < -128 || opx->offset > 127)) {
1274                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1275                         "signed byte value exceeds bounds");
1276             }
1277             if (opx->segment != NO_SEG) {
1278                 data = opx->offset;
1279                 out(offset, segment, &data, OUT_ADDRESS, 1,
1280                     opx->segment, opx->wrt);
1281             } else {
1282                 bytes[0] = opx->offset;
1283                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1284                     NO_SEG);
1285             }
1286             offset += 1;
1287             break;
1288
1289         case4(020):
1290             if (opx->offset < -256 || opx->offset > 255) {
1291                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1292                         "byte value exceeds bounds");
1293             }
1294             if (opx->segment != NO_SEG) {
1295                 data = opx->offset;
1296                 out(offset, segment, &data, OUT_ADDRESS, 1,
1297                     opx->segment, opx->wrt);
1298             } else {
1299                 bytes[0] = opx->offset;
1300                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1301                     NO_SEG);
1302             }
1303             offset += 1;
1304             break;
1305
1306         case4(024):
1307             if (opx->offset < 0 || opx->offset > 255)
1308                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1309                         "unsigned byte value exceeds bounds");
1310             if (opx->segment != NO_SEG) {
1311                 data = opx->offset;
1312                 out(offset, segment, &data, OUT_ADDRESS, 1,
1313                     opx->segment, opx->wrt);
1314             } else {
1315                 bytes[0] = opx->offset;
1316                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1317                     NO_SEG);
1318             }
1319             offset += 1;
1320             break;
1321
1322         case4(030):
1323             warn_overflow_opd(opx, 2);
1324             data = opx->offset;
1325             out(offset, segment, &data, OUT_ADDRESS, 2,
1326                 opx->segment, opx->wrt);
1327             offset += 2;
1328             break;
1329
1330         case4(034):
1331             if (opx->type & (BITS16 | BITS32))
1332                 size = (opx->type & BITS16) ? 2 : 4;
1333             else
1334                 size = (bits == 16) ? 2 : 4;
1335             warn_overflow_opd(opx, size);
1336             data = opx->offset;
1337             out(offset, segment, &data, OUT_ADDRESS, size,
1338                 opx->segment, opx->wrt);
1339             offset += size;
1340             break;
1341
1342         case4(040):
1343             warn_overflow_opd(opx, 4);
1344             data = opx->offset;
1345             out(offset, segment, &data, OUT_ADDRESS, 4,
1346                 opx->segment, opx->wrt);
1347             offset += 4;
1348             break;
1349
1350         case4(044):
1351             data = opx->offset;
1352             size = ins->addr_size >> 3;
1353             warn_overflow_opd(opx, size);
1354             out(offset, segment, &data, OUT_ADDRESS, size,
1355                 opx->segment, opx->wrt);
1356             offset += size;
1357             break;
1358
1359         case4(050):
1360             if (opx->segment != segment) {
1361                 data = opx->offset;
1362                 out(offset, segment, &data,
1363                     OUT_REL1ADR, insn_end - offset,
1364                     opx->segment, opx->wrt);
1365             } else {
1366                 data = opx->offset - insn_end;
1367                 if (data > 127 || data < -128)
1368                     errfunc(ERR_NONFATAL, "short jump is out of range");
1369                 out(offset, segment, &data,
1370                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1371             }
1372             offset += 1;
1373             break;
1374
1375         case4(054):
1376             data = (int64_t)opx->offset;
1377             out(offset, segment, &data, OUT_ADDRESS, 8,
1378                 opx->segment, opx->wrt);
1379             offset += 8;
1380             break;
1381
1382         case4(060):
1383             if (opx->segment != segment) {
1384                 data = opx->offset;
1385                 out(offset, segment, &data,
1386                     OUT_REL2ADR, insn_end - offset,
1387                     opx->segment, opx->wrt);
1388             } else {
1389                 data = opx->offset - insn_end;
1390                 out(offset, segment, &data,
1391                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1392             }
1393             offset += 2;
1394             break;
1395
1396         case4(064):
1397             if (opx->type & (BITS16 | BITS32 | BITS64))
1398                 size = (opx->type & BITS16) ? 2 : 4;
1399             else
1400                 size = (bits == 16) ? 2 : 4;
1401             if (opx->segment != segment) {
1402                 data = opx->offset;
1403                 out(offset, segment, &data,
1404                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1405                     insn_end - offset, opx->segment, opx->wrt);
1406             } else {
1407                 data = opx->offset - insn_end;
1408                 out(offset, segment, &data,
1409                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1410             }
1411             offset += size;
1412             break;
1413
1414         case4(070):
1415             if (opx->segment != segment) {
1416                 data = opx->offset;
1417                 out(offset, segment, &data,
1418                     OUT_REL4ADR, insn_end - offset,
1419                     opx->segment, opx->wrt);
1420             } else {
1421                 data = opx->offset - insn_end;
1422                 out(offset, segment, &data,
1423                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1424             }
1425             offset += 4;
1426             break;
1427
1428         case4(074):
1429             if (opx->segment == NO_SEG)
1430                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1431                         " relocatable");
1432             data = 0;
1433             out(offset, segment, &data, OUT_ADDRESS, 2,
1434                 outfmt->segbase(1 + opx->segment),
1435                 opx->wrt);
1436             offset += 2;
1437             break;
1438
1439         case4(0140):
1440             data = opx->offset;
1441             warn_overflow_opd(opx, 2);
1442             if (is_sbyte16(opx)) {
1443                 bytes[0] = data;
1444                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1445                     NO_SEG);
1446                 offset++;
1447             } else {
1448                 out(offset, segment, &data, OUT_ADDRESS, 2,
1449                     opx->segment, opx->wrt);
1450                 offset += 2;
1451             }
1452             break;
1453
1454         case4(0144):
1455             EMIT_REX();
1456             bytes[0] = *codes++;
1457             if (is_sbyte16(opx))
1458                 bytes[0] |= 2;  /* s-bit */
1459             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1460             offset++;
1461             break;
1462
1463         case4(0150):
1464             data = opx->offset;
1465             warn_overflow_opd(opx, 4);
1466             if (is_sbyte32(opx)) {
1467                 bytes[0] = data;
1468                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1469                     NO_SEG);
1470                 offset++;
1471             } else {
1472                 out(offset, segment, &data, OUT_ADDRESS, 4,
1473                     opx->segment, opx->wrt);
1474                 offset += 4;
1475             }
1476             break;
1477
1478         case4(0154):
1479             EMIT_REX();
1480             bytes[0] = *codes++;
1481             if (is_sbyte32(opx))
1482                 bytes[0] |= 2;  /* s-bit */
1483             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1484             offset++;
1485             break;
1486
1487         case4(0160):
1488         case4(0164):
1489             break;
1490
1491         case 0171:
1492             bytes[0] =
1493                 (ins->drexdst << 4) |
1494                 (ins->rex & REX_OC ? 0x08 : 0) |
1495                 (ins->rex & (REX_R|REX_X|REX_B));
1496             ins->rex = 0;
1497             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1498             offset++;
1499             break;
1500
1501         case 0172:
1502             c = *codes++;
1503             opx = &ins->oprs[c >> 3];
1504             bytes[0] = nasm_regvals[opx->basereg] << 4;
1505             opx = &ins->oprs[c & 7];
1506             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1507                 errfunc(ERR_NONFATAL,
1508                         "non-absolute expression not permitted as argument %d",
1509                         c & 7);
1510             } else {
1511                 if (opx->offset & ~15) {
1512                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1513                             "four-bit argument exceeds bounds");
1514                 }
1515                 bytes[0] |= opx->offset & 15;
1516             }
1517             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1518             offset++;
1519             break;
1520
1521         case 0173:
1522             c = *codes++;
1523             opx = &ins->oprs[c >> 4];
1524             bytes[0] = nasm_regvals[opx->basereg] << 4;
1525             bytes[0] |= c & 15;
1526             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1527             offset++;
1528             break;
1529
1530         case 0174:
1531             c = *codes++;
1532             opx = &ins->oprs[c];
1533             bytes[0] = nasm_regvals[opx->basereg] << 4;
1534             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1535             offset++;
1536             break;
1537
1538         case4(0250):
1539             data = opx->offset;
1540             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1541                 (int32_t)data != (int64_t)data) {
1542                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1543                         "signed dword immediate exceeds bounds");
1544             }
1545             if (is_sbyte32(opx)) {
1546                 bytes[0] = data;
1547                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1548                     NO_SEG);
1549                 offset++;
1550             } else {
1551                 out(offset, segment, &data, OUT_ADDRESS, 4,
1552                     opx->segment, opx->wrt);
1553                 offset += 4;
1554             }
1555             break;
1556
1557         case4(0254):
1558             data = opx->offset;
1559             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1560                 (int32_t)data != (int64_t)data) {
1561                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1562                         "signed dword immediate exceeds bounds");
1563             }
1564             out(offset, segment, &data, OUT_ADDRESS, 4,
1565                 opx->segment, opx->wrt);
1566             offset += 4;
1567             break;
1568
1569         case4(0260):
1570         case 0270:
1571             codes += 2;
1572             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1573                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1574                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1575                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1576                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1577                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1578                 offset += 3;
1579             } else {
1580                 bytes[0] = 0xc5;
1581                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1582                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1583                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1584                 offset += 2;
1585             }
1586             break;
1587
1588         case4(0274):
1589         {
1590             uint64_t uv, um;
1591             int s;
1592
1593             if (ins->rex & REX_W)
1594                 s = 64;
1595             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1596                 s = 16;
1597             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1598                 s = 32;
1599             else
1600                 s = bits;
1601
1602             um = (uint64_t)2 << (s-1);
1603             uv = opx->offset;
1604
1605             if (uv > 127 && uv < (uint64_t)-128 &&
1606                 (uv < um-128 || uv > um-1)) {
1607                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1608                         "signed byte value exceeds bounds");
1609             }
1610             if (opx->segment != NO_SEG) {
1611                 data = uv;
1612                 out(offset, segment, &data, OUT_ADDRESS, 1,
1613                     opx->segment, opx->wrt);
1614             } else {
1615                 bytes[0] = uv;
1616                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1617                     NO_SEG);
1618             }
1619             offset += 1;
1620             break;
1621         }
1622
1623         case4(0300):
1624             break;
1625
1626         case 0310:
1627             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1628                 *bytes = 0x67;
1629                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1630                 offset += 1;
1631             } else
1632                 offset += 0;
1633             break;
1634
1635         case 0311:
1636             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1637                 *bytes = 0x67;
1638                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1639                 offset += 1;
1640             } else
1641                 offset += 0;
1642             break;
1643
1644         case 0312:
1645             break;
1646
1647         case 0313:
1648             ins->rex = 0;
1649             break;
1650
1651         case4(0314):
1652             break;
1653
1654         case 0320:
1655         case 0321:
1656             break;
1657
1658         case 0322:
1659         case 0323:
1660             break;
1661
1662         case 0324:
1663             ins->rex |= REX_W;
1664             break;
1665
1666         case 0325:
1667             break;
1668
1669         case 0330:
1670             *bytes = *codes++ ^ condval[ins->condition];
1671             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1672             offset += 1;
1673             break;
1674
1675         case 0331:
1676             break;
1677
1678         case 0332:
1679         case 0333:
1680             *bytes = c - 0332 + 0xF2;
1681             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1682             offset += 1;
1683             break;
1684
1685         case 0334:
1686             if (ins->rex & REX_R) {
1687                 *bytes = 0xF0;
1688                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1689                 offset += 1;
1690             }
1691             ins->rex &= ~(REX_L|REX_R);
1692             break;
1693
1694         case 0335:
1695             break;
1696
1697         case 0336:
1698         case 0337:
1699             break;
1700
1701         case 0340:
1702             if (ins->oprs[0].segment != NO_SEG)
1703                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1704             else {
1705                 int64_t size = ins->oprs[0].offset;
1706                 if (size > 0)
1707                     out(offset, segment, NULL,
1708                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1709                 offset += size;
1710             }
1711             break;
1712
1713         case 0341:
1714             break;
1715
1716         case 0344:
1717         case 0345:
1718             bytes[0] = c & 1;
1719             switch (ins->oprs[0].basereg) {
1720             case R_CS:
1721                 bytes[0] += 0x0E;
1722                 break;
1723             case R_DS:
1724                 bytes[0] += 0x1E;
1725                 break;
1726             case R_ES:
1727                 bytes[0] += 0x06;
1728                 break;
1729             case R_SS:
1730                 bytes[0] += 0x16;
1731                 break;
1732             default:
1733                 errfunc(ERR_PANIC,
1734                         "bizarre 8086 segment register received");
1735             }
1736             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1737             offset++;
1738             break;
1739
1740         case 0346:
1741         case 0347:
1742             bytes[0] = c & 1;
1743             switch (ins->oprs[0].basereg) {
1744             case R_FS:
1745                 bytes[0] += 0xA0;
1746                 break;
1747             case R_GS:
1748                 bytes[0] += 0xA8;
1749                 break;
1750             default:
1751                 errfunc(ERR_PANIC,
1752                         "bizarre 386 segment register received");
1753             }
1754             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1755             offset++;
1756             break;
1757
1758         case 0360:
1759             break;
1760
1761         case 0361:
1762             bytes[0] = 0x66;
1763             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1764             offset += 1;
1765             break;
1766
1767         case 0362:
1768         case 0363:
1769             bytes[0] = c - 0362 + 0xf2;
1770             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1771             offset += 1;
1772             break;
1773
1774         case 0364:
1775         case 0365:
1776             break;
1777
1778         case 0366:
1779         case 0367:
1780             *bytes = c - 0366 + 0x66;
1781             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1782             offset += 1;
1783             break;
1784
1785         case 0370:
1786         case 0371:
1787         case 0372:
1788             break;
1789
1790         case 0373:
1791             *bytes = bits == 16 ? 3 : 5;
1792             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1793             offset += 1;
1794             break;
1795
1796         case4(0100):
1797         case4(0110):
1798         case4(0120):
1799         case4(0130):
1800         case4(0200):
1801         case4(0204):
1802         case4(0210):
1803         case4(0214):
1804         case4(0220):
1805         case4(0224):
1806         case4(0230):
1807         case4(0234):
1808             {
1809                 ea ea_data;
1810                 int rfield;
1811                 opflags_t rflags;
1812                 uint8_t *p;
1813                 int32_t s;
1814                 enum out_type type;
1815                 struct operand *opy = &ins->oprs[op2];
1816
1817                 if (c <= 0177) {
1818                     /* pick rfield from operand b (opx) */
1819                     rflags = regflag(opx);
1820                     rfield = nasm_regvals[opx->basereg];
1821                 } else {
1822                     /* rfield is constant */
1823                     rflags = 0;
1824                     rfield = c & 7;
1825                 }
1826
1827                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1828                                 rfield, rflags)) {
1829                     errfunc(ERR_NONFATAL, "invalid effective address");
1830                 }
1831
1832
1833                 p = bytes;
1834                 *p++ = ea_data.modrm;
1835                 if (ea_data.sib_present)
1836                     *p++ = ea_data.sib;
1837
1838                 /* DREX suffixes come between the SIB and the displacement */
1839                 if (ins->rex & REX_D) {
1840                     *p++ = (ins->drexdst << 4) |
1841                            (ins->rex & REX_OC ? 0x08 : 0) |
1842                            (ins->rex & (REX_R|REX_X|REX_B));
1843                     ins->rex = 0;
1844                 }
1845
1846                 s = p - bytes;
1847                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1848
1849                 /*
1850                  * Make sure the address gets the right offset in case
1851                  * the line breaks in the .lst file (BR 1197827)
1852                  */
1853                 offset += s;
1854                 s = 0;
1855
1856                 switch (ea_data.bytes) {
1857                 case 0:
1858                     break;
1859                 case 1:
1860                 case 2:
1861                 case 4:
1862                 case 8:
1863                     data = opy->offset;
1864                     s += ea_data.bytes;
1865                     if (ea_data.rip) {
1866                         if (opy->segment == segment) {
1867                             data -= insn_end;
1868                             if (overflow_signed(data, ea_data.bytes))
1869                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1870                             out(offset, segment, &data, OUT_ADDRESS,
1871                                 ea_data.bytes, NO_SEG, NO_SEG);
1872                         } else {
1873                             /* overflow check in output/linker? */
1874                             out(offset, segment, &data,        OUT_REL4ADR,
1875                                 insn_end - offset, opy->segment, opy->wrt);
1876                         }
1877                     } else {
1878                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1879                             signed_bits(opy->offset, ins->addr_size) !=
1880                             signed_bits(opy->offset, ea_data.bytes * 8))
1881                             warn_overflow(ERR_PASS2, ea_data.bytes);
1882
1883                         type = OUT_ADDRESS;
1884                         out(offset, segment, &data, OUT_ADDRESS,
1885                             ea_data.bytes, opy->segment, opy->wrt);
1886                     }
1887                     break;
1888                 default:
1889                     /* Impossible! */
1890                     errfunc(ERR_PANIC,
1891                             "Invalid amount of bytes (%d) for offset?!",
1892                             ea_data.bytes);
1893                     break;
1894                 }
1895                 offset += s;
1896             }
1897             break;
1898
1899         default:
1900             errfunc(ERR_PANIC, "internal instruction table corrupt"
1901                     ": instruction code \\%o (0x%02X) given", c, c);
1902             break;
1903         }
1904     }
1905 }
1906
1907 static opflags_t regflag(const operand * o)
1908 {
1909     if (!is_register(o->basereg))
1910         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1911     return nasm_reg_flags[o->basereg];
1912 }
1913
1914 static int32_t regval(const operand * o)
1915 {
1916     if (!is_register(o->basereg))
1917         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1918     return nasm_regvals[o->basereg];
1919 }
1920
1921 static int op_rexflags(const operand * o, int mask)
1922 {
1923     opflags_t flags;
1924     int val;
1925
1926     if (!is_register(o->basereg))
1927         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1928
1929     flags = nasm_reg_flags[o->basereg];
1930     val = nasm_regvals[o->basereg];
1931
1932     return rexflags(val, flags, mask);
1933 }
1934
1935 static int rexflags(int val, opflags_t flags, int mask)
1936 {
1937     int rex = 0;
1938
1939     if (val >= 8)
1940         rex |= REX_B|REX_X|REX_R;
1941     if (flags & BITS64)
1942         rex |= REX_W;
1943     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1944         rex |= REX_H;
1945     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1946         rex |= REX_P;
1947
1948     return rex & mask;
1949 }
1950
1951 static enum match_result find_match(const struct itemplate **tempp,
1952                                     insn *instruction,
1953                                     int32_t segment, int64_t offset, int bits)
1954 {
1955     const struct itemplate *temp;
1956     enum match_result m, merr;
1957     opflags_t xsizeflags[MAX_OPERANDS];
1958     bool opsizemissing = false;
1959     int i;
1960
1961     for (i = 0; i < instruction->operands; i++)
1962         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1963
1964     merr = MERR_INVALOP;
1965
1966     for (temp = nasm_instructions[instruction->opcode];
1967          temp->opcode != I_none; temp++) {
1968         m = matches(temp, instruction, bits);
1969         if (m == MOK_JUMP) {
1970             if (jmp_match(segment, offset, bits, instruction, temp->code))
1971                 m = MOK_GOOD;
1972             else
1973                 m = MERR_INVALOP;
1974         } else if (m == MERR_OPSIZEMISSING &&
1975                    (temp->flags & IF_SMASK) != IF_SX) {
1976             /*
1977              * Missing operand size and a candidate for fuzzy matching...
1978              */
1979             for (i = 0; i < temp->operands; i++) {
1980                 if ((temp->opd[i] & SAME_AS) == 0)
1981                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1982             }
1983             opsizemissing = true;
1984         }
1985         if (m > merr)
1986             merr = m;
1987         if (merr == MOK_GOOD)
1988             goto done;
1989     }
1990
1991     /* No match, but see if we can get a fuzzy operand size match... */
1992     if (!opsizemissing)
1993         goto done;
1994
1995     for (i = 0; i < instruction->operands; i++) {
1996         /*
1997          * We ignore extrinsic operand sizes on registers, so we should
1998          * never try to fuzzy-match on them.  This also resolves the case
1999          * when we have e.g. "xmmrm128" in two different positions.
2000          */
2001         if (is_class(REGISTER, instruction->oprs[i].type))
2002             continue;
2003
2004         /* This tests if xsizeflags[i] has more than one bit set */
2005         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2006             goto done;                /* No luck */
2007
2008         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2009     }
2010
2011     /* Try matching again... */
2012     for (temp = nasm_instructions[instruction->opcode];
2013          temp->opcode != I_none; temp++) {
2014         m = matches(temp, instruction, bits);
2015         if (m == MOK_JUMP) {
2016             if (jmp_match(segment, offset, bits, instruction, temp->code))
2017                 m = MOK_GOOD;
2018             else
2019                 m = MERR_INVALOP;
2020         }
2021         if (m > merr)
2022             merr = m;
2023         if (merr == MOK_GOOD)
2024             goto done;
2025     }
2026
2027 done:
2028     *tempp = temp;
2029     return merr;
2030 }
2031
2032 static enum match_result matches(const struct itemplate *itemp,
2033                                  insn *instruction, int bits)
2034 {
2035     int i, size[MAX_OPERANDS], asize, oprs;
2036     bool opsizemissing = false;
2037
2038     /*
2039      * Check the opcode
2040      */
2041     if (itemp->opcode != instruction->opcode)
2042         return MERR_INVALOP;
2043
2044     /*
2045      * Count the operands
2046      */
2047     if (itemp->operands != instruction->operands)
2048         return MERR_INVALOP;
2049
2050     /*
2051      * Is it legal?
2052      */
2053     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2054         return MERR_INVALOP;
2055
2056     /*
2057      * Check that no spurious colons or TOs are present
2058      */
2059     for (i = 0; i < itemp->operands; i++)
2060         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2061             return MERR_INVALOP;
2062
2063     /*
2064      * Process size flags
2065      */
2066     switch (itemp->flags & IF_SMASK) {
2067     case IF_SB:
2068         asize = BITS8;
2069         break;
2070     case IF_SW:
2071         asize = BITS16;
2072         break;
2073     case IF_SD:
2074         asize = BITS32;
2075         break;
2076     case IF_SQ:
2077         asize = BITS64;
2078         break;
2079     case IF_SO:
2080         asize = BITS128;
2081         break;
2082     case IF_SY:
2083         asize = BITS256;
2084         break;
2085     case IF_SZ:
2086         switch (bits) {
2087         case 16:
2088             asize = BITS16;
2089             break;
2090         case 32:
2091             asize = BITS32;
2092             break;
2093         case 64:
2094             asize = BITS64;
2095             break;
2096         default:
2097             asize = 0;
2098             break;
2099         }
2100         break;
2101     default:
2102         asize = 0;
2103         break;
2104     }
2105
2106     if (itemp->flags & IF_ARMASK) {
2107         /* S- flags only apply to a specific operand */
2108         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2109         memset(size, 0, sizeof size);
2110         size[i] = asize;
2111     } else {
2112         /* S- flags apply to all operands */
2113         for (i = 0; i < MAX_OPERANDS; i++)
2114             size[i] = asize;
2115     }
2116
2117     /*
2118      * Check that the operand flags all match up,
2119      * it's a bit tricky so lets be verbose:
2120      *
2121      * 1) Find out the size of operand. If instruction
2122      *    doesn't have one specified -- we're trying to
2123      *    guess it either from template (IF_S* flag) or
2124      *    from code bits.
2125      *
2126      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2127      *    (ie the same operand as was specified somewhere in template, and
2128      *    this referred operand index is being achieved via ~SAME_AS)
2129      *    we are to be sure that both registers (in template and instruction)
2130      *    do exactly match.
2131      *
2132      * 3) If template operand do not match the instruction OR
2133      *    template has an operand size specified AND this size differ
2134      *    from which instruction has (perhaps we got it from code bits)
2135      *    we are:
2136      *      a)  Check that only size of instruction and operand is differ
2137      *          other characteristics do match
2138      *      b)  Perhaps it's a register specified in instruction so
2139      *          for such a case we just mark that operand as "size
2140      *          missing" and this will turn on fuzzy operand size
2141      *          logic facility (handled by a caller)
2142      */
2143     for (i = 0; i < itemp->operands; i++) {
2144         opflags_t type = instruction->oprs[i].type;
2145         if (!(type & SIZE_MASK))
2146             type |= size[i];
2147
2148         if (itemp->opd[i] & SAME_AS) {
2149             int j = itemp->opd[i] & ~SAME_AS;
2150             if (type != instruction->oprs[j].type ||
2151                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2152                 return MERR_INVALOP;
2153         } else if (itemp->opd[i] & ~type ||
2154             ((itemp->opd[i] & SIZE_MASK) &&
2155              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2156             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2157                 return MERR_INVALOP;
2158             } else if (!is_class(REGISTER, type)) {
2159                 /*
2160                  * Note: we don't honor extrinsic operand sizes for registers,
2161                  * so "missing operand size" for a register should be
2162                  * considered a wildcard match rather than an error.
2163                  */
2164                 opsizemissing = true;
2165             }
2166         }
2167     }
2168
2169     if (opsizemissing)
2170         return MERR_OPSIZEMISSING;
2171
2172     /*
2173      * Check operand sizes
2174      */
2175     if (itemp->flags & (IF_SM | IF_SM2)) {
2176         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2177         for (i = 0; i < oprs; i++) {
2178             asize = itemp->opd[i] & SIZE_MASK;
2179             if (asize) {
2180                 for (i = 0; i < oprs; i++)
2181                     size[i] = asize;
2182                 break;
2183             }
2184         }
2185     } else {
2186         oprs = itemp->operands;
2187     }
2188
2189     for (i = 0; i < itemp->operands; i++) {
2190         if (!(itemp->opd[i] & SIZE_MASK) &&
2191             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2192             return MERR_OPSIZEMISMATCH;
2193     }
2194
2195     /*
2196      * Check template is okay at the set cpu level
2197      */
2198     if (((itemp->flags & IF_PLEVEL) > cpu))
2199         return MERR_BADCPU;
2200
2201     /*
2202      * Verify the appropriate long mode flag.
2203      */
2204     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2205         return MERR_BADMODE;
2206
2207     /*
2208      * Check if special handling needed for Jumps
2209      */
2210     if ((itemp->code[0] & 0374) == 0370)
2211         return MOK_JUMP;
2212
2213     return MOK_GOOD;
2214 }
2215
2216 static ea *process_ea(operand * input, ea * output, int bits,
2217                       int addrbits, int rfield, opflags_t rflags)
2218 {
2219     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2220
2221     output->rip = false;
2222
2223     /* REX flags for the rfield operand */
2224     output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2225
2226     if (is_class(REGISTER, input->type)) {  /* register direct */
2227         int i;
2228         opflags_t f;
2229
2230         if (!is_register(input->basereg))
2231             return NULL;
2232         f = regflag(input);
2233         i = nasm_regvals[input->basereg];
2234
2235         if (REG_EA & ~f)
2236             return NULL;        /* Invalid EA register */
2237
2238         output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2239
2240         output->sib_present = false;    /* no SIB necessary */
2241         output->bytes = 0;              /* no offset necessary either */
2242         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2243     } else {                    /* it's a memory reference */
2244         if (input->basereg == -1 &&
2245             (input->indexreg == -1 || input->scale == 0)) {
2246             /* it's a pure offset */
2247
2248             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2249                 input->segment == NO_SEG) {
2250                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2251                 input->type &= ~IP_REL;
2252                 input->type |= MEMORY;
2253             }
2254
2255             if (input->eaflags & EAF_BYTEOFFS ||
2256                 (input->eaflags & EAF_WORDOFFS &&
2257                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2258                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2259             }
2260
2261             if (bits == 64 && (~input->type & IP_REL)) {
2262                 int scale, index, base;
2263                 output->sib_present = true;
2264                 scale = 0;
2265                 index = 4;
2266                 base = 5;
2267                 output->sib = (scale << 6) | (index << 3) | base;
2268                 output->bytes = 4;
2269                 output->modrm = 4 | ((rfield & 7) << 3);
2270                 output->rip = false;
2271             } else {
2272                 output->sib_present = false;
2273                 output->bytes = (addrbits != 16 ? 4 : 2);
2274                 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2275                 output->rip = bits == 64;
2276             }
2277         } else {                /* it's an indirection */
2278             int i = input->indexreg, b = input->basereg, s = input->scale;
2279             int32_t seg = input->segment;
2280             int hb = input->hintbase, ht = input->hinttype;
2281             int t, it, bt;              /* register numbers */
2282             opflags_t x, ix, bx;        /* register flags */
2283
2284             if (s == 0)
2285                 i = -1;         /* make this easy, at least */
2286
2287             if (is_register(i)) {
2288                 it = nasm_regvals[i];
2289                 ix = nasm_reg_flags[i];
2290             } else {
2291                 it = -1;
2292                 ix = 0;
2293             }
2294
2295             if (is_register(b)) {
2296                 bt = nasm_regvals[b];
2297                 bx = nasm_reg_flags[b];
2298             } else {
2299                 bt = -1;
2300                 bx = 0;
2301             }
2302
2303             /* check for a 32/64-bit memory reference... */
2304             if ((ix|bx) & (BITS32|BITS64)) {
2305                 /*
2306                  * it must be a 32/64-bit memory reference. Firstly we have
2307                  * to check that all registers involved are type E/Rxx.
2308                  */
2309                 int32_t sok = BITS32 | BITS64, o = input->offset;
2310
2311                 if (it != -1) {
2312                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2313                         sok &= ix;
2314                     else
2315                         return NULL;
2316                 }
2317
2318                 if (bt != -1) {
2319                     if (REG_GPR & ~bx)
2320                         return NULL; /* Invalid register */
2321                     if (~sok & bx & SIZE_MASK)
2322                         return NULL; /* Invalid size */
2323                     sok &= bx;
2324                 }
2325
2326                 /*
2327                  * While we're here, ensure the user didn't specify
2328                  * WORD or QWORD
2329                  */
2330                 if (input->disp_size == 16 || input->disp_size == 64)
2331                     return NULL;
2332
2333                 if (addrbits == 16 ||
2334                     (addrbits == 32 && !(sok & BITS32)) ||
2335                     (addrbits == 64 && !(sok & BITS64)))
2336                     return NULL;
2337
2338                 /* now reorganize base/index */
2339                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2340                     ((hb == b && ht == EAH_NOTBASE) ||
2341                      (hb == i && ht == EAH_MAKEBASE))) {
2342                     /* swap if hints say so */
2343                     t = bt, bt = it, it = t;
2344                     x = bx, bx = ix, ix = x;
2345                 }
2346                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2347                     bt = -1, bx = 0, s++;
2348                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2349                     /* make single reg base, unless hint */
2350                     bt = it, bx = ix, it = -1, ix = 0;
2351                 }
2352                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2353                       s == 3 || s == 5 || s == 9) && bt == -1)
2354                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2355                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2356                     (input->eaflags & EAF_TIMESTWO))
2357                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2358                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2359                 if (s == 1 && it == REG_NUM_ESP) {
2360                     /* swap ESP into base if scale is 1 */
2361                     t = it, it = bt, bt = t;
2362                     x = ix, ix = bx, bx = x;
2363                 }
2364                 if (it == REG_NUM_ESP ||
2365                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2366                     return NULL;        /* wrong, for various reasons */
2367
2368                 output->rex |= rexflags(it, ix, REX_X);
2369                 output->rex |= rexflags(bt, bx, REX_B);
2370
2371                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2372                     /* no SIB needed */
2373                     int mod, rm;
2374
2375                     if (bt == -1) {
2376                         rm = 5;
2377                         mod = 0;
2378                     } else {
2379                         rm = (bt & 7);
2380                         if (rm != REG_NUM_EBP && o == 0 &&
2381                             seg == NO_SEG && !forw_ref &&
2382                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2383                             mod = 0;
2384                         else if (input->eaflags & EAF_BYTEOFFS ||
2385                                  (o >= -128 && o <= 127 &&
2386                                   seg == NO_SEG && !forw_ref &&
2387                                   !(input->eaflags & EAF_WORDOFFS)))
2388                             mod = 1;
2389                         else
2390                             mod = 2;
2391                     }
2392
2393                     output->sib_present = false;
2394                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2395                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2396                 } else {
2397                     /* we need a SIB */
2398                     int mod, scale, index, base;
2399
2400                     if (it == -1)
2401                         index = 4, s = 1;
2402                     else
2403                         index = (it & 7);
2404
2405                     switch (s) {
2406                     case 1:
2407                         scale = 0;
2408                         break;
2409                     case 2:
2410                         scale = 1;
2411                         break;
2412                     case 4:
2413                         scale = 2;
2414                         break;
2415                     case 8:
2416                         scale = 3;
2417                         break;
2418                     default:   /* then what the smeg is it? */
2419                         return NULL;    /* panic */
2420                     }
2421
2422                     if (bt == -1) {
2423                         base = 5;
2424                         mod = 0;
2425                     } else {
2426                         base = (bt & 7);
2427                         if (base != REG_NUM_EBP && o == 0 &&
2428                             seg == NO_SEG && !forw_ref &&
2429                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2430                             mod = 0;
2431                         else if (input->eaflags & EAF_BYTEOFFS ||
2432                                  (o >= -128 && o <= 127 &&
2433                                   seg == NO_SEG && !forw_ref &&
2434                                   !(input->eaflags & EAF_WORDOFFS)))
2435                             mod = 1;
2436                         else
2437                             mod = 2;
2438                     }
2439
2440                     output->sib_present = true;
2441                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2442                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2443                     output->sib = (scale << 6) | (index << 3) | base;
2444                 }
2445             } else {            /* it's 16-bit */
2446                 int mod, rm;
2447                 int16_t o = input->offset;
2448
2449                 /* check for 64-bit long mode */
2450                 if (addrbits == 64)
2451                     return NULL;
2452
2453                 /* check all registers are BX, BP, SI or DI */
2454                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2455                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2456                     return NULL;
2457
2458                 /* ensure the user didn't specify DWORD/QWORD */
2459                 if (input->disp_size == 32 || input->disp_size == 64)
2460                     return NULL;
2461
2462                 if (s != 1 && i != -1)
2463                     return NULL;        /* no can do, in 16-bit EA */
2464                 if (b == -1 && i != -1) {
2465                     int tmp = b;
2466                     b = i;
2467                     i = tmp;
2468                 }               /* swap */
2469                 if ((b == R_SI || b == R_DI) && i != -1) {
2470                     int tmp = b;
2471                     b = i;
2472                     i = tmp;
2473                 }
2474                 /* have BX/BP as base, SI/DI index */
2475                 if (b == i)
2476                     return NULL;        /* shouldn't ever happen, in theory */
2477                 if (i != -1 && b != -1 &&
2478                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2479                     return NULL;        /* invalid combinations */
2480                 if (b == -1)            /* pure offset: handled above */
2481                     return NULL;        /* so if it gets to here, panic! */
2482
2483                 rm = -1;
2484                 if (i != -1)
2485                     switch (i * 256 + b) {
2486                     case R_SI * 256 + R_BX:
2487                         rm = 0;
2488                         break;
2489                     case R_DI * 256 + R_BX:
2490                         rm = 1;
2491                         break;
2492                     case R_SI * 256 + R_BP:
2493                         rm = 2;
2494                         break;
2495                     case R_DI * 256 + R_BP:
2496                         rm = 3;
2497                         break;
2498                 } else
2499                     switch (b) {
2500                     case R_SI:
2501                         rm = 4;
2502                         break;
2503                     case R_DI:
2504                         rm = 5;
2505                         break;
2506                     case R_BP:
2507                         rm = 6;
2508                         break;
2509                     case R_BX:
2510                         rm = 7;
2511                         break;
2512                     }
2513                 if (rm == -1)           /* can't happen, in theory */
2514                     return NULL;        /* so panic if it does */
2515
2516                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2517                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2518                     mod = 0;
2519                 else if (input->eaflags & EAF_BYTEOFFS ||
2520                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2521                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2522                     mod = 1;
2523                 else
2524                     mod = 2;
2525
2526                 output->sib_present = false;    /* no SIB - it's 16-bit */
2527                 output->bytes = mod;            /* bytes of offset needed */
2528                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2529             }
2530         }
2531     }
2532
2533     output->size = 1 + output->sib_present + output->bytes;
2534     return output;
2535 }
2536
2537 static void add_asp(insn *ins, int addrbits)
2538 {
2539     int j, valid;
2540     int defdisp;
2541
2542     valid = (addrbits == 64) ? 64|32 : 32|16;
2543
2544     switch (ins->prefixes[PPS_ASIZE]) {
2545     case P_A16:
2546         valid &= 16;
2547         break;
2548     case P_A32:
2549         valid &= 32;
2550         break;
2551     case P_A64:
2552         valid &= 64;
2553         break;
2554     case P_ASP:
2555         valid &= (addrbits == 32) ? 16 : 32;
2556         break;
2557     default:
2558         break;
2559     }
2560
2561     for (j = 0; j < ins->operands; j++) {
2562         if (is_class(MEMORY, ins->oprs[j].type)) {
2563             opflags_t i, b;
2564
2565             /* Verify as Register */
2566             if (!is_register(ins->oprs[j].indexreg))
2567                 i = 0;
2568             else
2569                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2570
2571             /* Verify as Register */
2572             if (!is_register(ins->oprs[j].basereg))
2573                 b = 0;
2574             else
2575                 b = nasm_reg_flags[ins->oprs[j].basereg];
2576
2577             if (ins->oprs[j].scale == 0)
2578                 i = 0;
2579
2580             if (!i && !b) {
2581                 int ds = ins->oprs[j].disp_size;
2582                 if ((addrbits != 64 && ds > 8) ||
2583                     (addrbits == 64 && ds == 16))
2584                     valid &= ds;
2585             } else {
2586                 if (!(REG16 & ~b))
2587                     valid &= 16;
2588                 if (!(REG32 & ~b))
2589                     valid &= 32;
2590                 if (!(REG64 & ~b))
2591                     valid &= 64;
2592
2593                 if (!(REG16 & ~i))
2594                     valid &= 16;
2595                 if (!(REG32 & ~i))
2596                     valid &= 32;
2597                 if (!(REG64 & ~i))
2598                     valid &= 64;
2599             }
2600         }
2601     }
2602
2603     if (valid & addrbits) {
2604         ins->addr_size = addrbits;
2605     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2606         /* Add an address size prefix */
2607         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2608         ins->prefixes[PPS_ASIZE] = pref;
2609         ins->addr_size = (addrbits == 32) ? 16 : 32;
2610     } else {
2611         /* Impossible... */
2612         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2613         ins->addr_size = addrbits; /* Error recovery */
2614     }
2615
2616     defdisp = ins->addr_size == 16 ? 16 : 32;
2617
2618     for (j = 0; j < ins->operands; j++) {
2619         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2620             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2621             /*
2622              * mem_offs sizes must match the address size; if not,
2623              * strip the MEM_OFFS bit and match only EA instructions
2624              */
2625             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2626         }
2627     }
2628 }