assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 wwl lpp
  96  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  97  *                 [l1]  ll = 1 for L = 1 (.256)
  98  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  99  *
 100  *                 [w0]  ww = 0 for W = 0
 101  *                 [w1 ] ww = 1 for W = 1
 102  *                 [wig] ww = 2 for W don't care (always assembled as 0)
 103  *                 [ww]  ww = 3 for W used as REX.W
 104  *
 105  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 106  *
 107  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 108  *                 which is to be extended to the operand size.
 109  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 110  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 111  * \312          - (disassembler only) invalid with non-default address size.
 112  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 113  * \314          - (disassembler only) invalid with REX.B
 114  * \315          - (disassembler only) invalid with REX.X
 115  * \316          - (disassembler only) invalid with REX.R
 116  * \317          - (disassembler only) invalid with REX.W
 117  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 118  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 119  * \322          - indicates that this instruction is only valid when the
 120  *                 operand size is the default (instruction to disassembler,
 121  *                 generates no code in the assembler)
 122  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 123  * \324          - indicates 64-bit operand size requiring REX prefix.
 124  * \325          - instruction which always uses spl/bpl/sil/dil
 125  * \330          - a literal byte follows in the code stream, to be added
 126  *                 to the condition code value of the instruction.
 127  * \331          - instruction not valid with REP prefix.  Hint for
 128  *                 disassembler only; for SSE instructions.
 129  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 130  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 131  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 132  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 133  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 134  * \337          - force a REPNE prefix (0xF3) even if not specified.
 135  *                 \336-\337 are still listed as prefixes in the disassembler.
 136  * \340          - reserve <operand 0> bytes of uninitialized storage.
 137  *                 Operand 0 had better be a segmentless constant.
 138  * \341          - this instruction needs a WAIT "prefix"
 139  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 140  *                 (POP is never used for CS) depending on operand 0
 141  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 142  *                 on operand 0
 143  * \360          - no SSE prefix (== \364\331)
 144  * \361          - 66 SSE prefix (== \366\331)
 145  * \362          - F2 SSE prefix (== \364\332)
 146  * \363          - F3 SSE prefix (== \364\333)
 147  * \364          - operand-size prefix (0x66) not permitted
 148  * \365          - address-size prefix (0x67) not permitted
 149  * \366          - operand-size prefix (0x66) used as opcode extension
 150  * \367          - address-size prefix (0x67) used as opcode extension
 151  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 152  *                 370 is used for Jcc, 371 is used for JMP.
 153  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 154  *                 used for conditional jump over longer jump
 155  */
 156
 157 #include "compiler.h"
 158
 159 #include <stdio.h>
 160 #include <string.h>
 161 #include <inttypes.h>
 162
 163 #include "nasm.h"
 164 #include "nasmlib.h"
 165 #include "assemble.h"
 166 #include "insns.h"
 167 #include "tables.h"
 168
 169 enum match_result {
 170     /*
 171      * Matching errors.  These should be sorted so that more specific
 172      * errors come later in the sequence.
 173      */
 174     MERR_INVALOP,
 175     MERR_OPSIZEMISSING,
 176     MERR_OPSIZEMISMATCH,
 177     MERR_BADCPU,
 178     MERR_BADMODE,
 179     /*
 180      * Matching success; the conditional ones first
 181      */
 182     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 183     MOK_GOOD    /* Matching unconditionally OK */
 184 };
 185
 186 typedef struct {
 187     int sib_present;                 /* is a SIB byte necessary? */
 188     int bytes;                       /* # of bytes of offset needed */
 189     int size;                        /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 191 } ea;
 192
 193 static uint32_t cpu;            /* cpu level received from nasm.c */
 194 static efunc errfunc;
 195 static struct ofmt *outfmt;
 196 static ListGen *list;
 197
 198 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 199 static void gencode(int32_t segment, int64_t offset, int bits,
 200                     insn * ins, const struct itemplate *temp,
 201                     int64_t insn_end);
 202 static enum match_result find_match(const struct itemplate **tempp,
 203                                     insn *instruction,
 204                                     int32_t segment, int64_t offset, int bits);
 205 static enum match_result matches(const struct itemplate *, insn *, int bits);
 206 static opflags_t regflag(const operand *);
 207 static int32_t regval(const operand *);
 208 static int rexflags(int, opflags_t, int);
 209 static int op_rexflags(const operand *, int);
 210 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 211 static void add_asp(insn *, int);
 212
 213 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 214 {
 215     return ins->prefixes[pos] == prefix;
 216 }
 217
 218 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 219 {
 220     if (ins->prefixes[pos])
 221         errfunc(ERR_NONFATAL, "invalid %s prefix",
 222                 prefix_name(ins->prefixes[pos]));
 223 }
 224
 225 static const char *size_name(int size)
 226 {
 227     switch (size) {
 228     case 1:
 229         return "byte";
 230     case 2:
 231         return "word";
 232     case 4:
 233         return "dword";
 234     case 8:
 235         return "qword";
 236     case 10:
 237         return "tword";
 238     case 16:
 239         return "oword";
 240     case 32:
 241         return "yword";
 242     default:
 243         return "???";
 244     }
 245 }
 246
 247 static void warn_overflow(int pass, int size)
 248 {
 249     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 250             "%s data exceeds bounds", size_name(size));
 251 }
 252
 253 static void warn_overflow_const(int64_t data, int size)
 254 {
 255     if (overflow_general(data, size))
 256         warn_overflow(ERR_PASS1, size);
 257 }
 258
 259 static void warn_overflow_opd(const struct operand *o, int size)
 260 {
 261     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 262         if (overflow_general(o->offset, size))
 263             warn_overflow(ERR_PASS2, size);
 264     }
 265 }
 266
 267 /*
 268  * This routine wrappers the real output format's output routine,
 269  * in order to pass a copy of the data off to the listing file
 270  * generator at the same time.
 271  */
 272 static void out(int64_t offset, int32_t segto, const void *data,
 273                 enum out_type type, uint64_t size,
 274                 int32_t segment, int32_t wrt)
 275 {
 276     static int32_t lineno = 0;     /* static!!! */
 277     static char *lnfname = NULL;
 278     uint8_t p[8];
 279
 280     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 281         /*
 282          * This is a non-relocated address, and we're going to
 283          * convert it into RAWDATA format.
 284          */
 285         uint8_t *q = p;
 286
 287         if (size > 8) {
 288             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 289             return;
 290         }
 291
 292         WRITEADDR(q, *(int64_t *)data, size);
 293         data = p;
 294         type = OUT_RAWDATA;
 295     }
 296
 297     list->output(offset, data, type, size);
 298
 299     /*
 300      * this call to src_get determines when we call the
 301      * debug-format-specific "linenum" function
 302      * it updates lineno and lnfname to the current values
 303      * returning 0 if "same as last time", -2 if lnfname
 304      * changed, and the amount by which lineno changed,
 305      * if it did. thus, these variables must be static
 306      */
 307
 308     if (src_get(&lineno, &lnfname))
 309         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 310
 311     outfmt->output(segto, data, type, size, segment, wrt);
 312 }
 313
 314 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 315                      insn * ins, const uint8_t *code)
 316 {
 317     int64_t isize;
 318     uint8_t c = code[0];
 319
 320     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 321         return false;
 322     if (!optimizing)
 323         return false;
 324     if (optimizing < 0 && c == 0371)
 325         return false;
 326
 327     isize = calcsize(segment, offset, bits, ins, code);
 328
 329     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 330         /* Be optimistic in pass 1 */
 331         return true;
 332
 333     if (ins->oprs[0].segment != segment)
 334         return false;
 335
 336     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 337     return (isize >= -128 && isize <= 127); /* is it byte size? */
 338 }
 339
 340 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 341                  insn * instruction, struct ofmt *output, efunc error,
 342                  ListGen * listgen)
 343 {
 344     const struct itemplate *temp;
 345     int j;
 346     enum match_result m;
 347     int64_t insn_end;
 348     int32_t itimes;
 349     int64_t start = offset;
 350     int64_t wsize;              /* size for DB etc. */
 351
 352     errfunc = error;            /* to pass to other functions */
 353     cpu = cp;
 354     outfmt = output;            /* likewise */
 355     list = listgen;             /* and again */
 356
 357     wsize = idata_bytes(instruction->opcode);
 358     if (wsize == -1)
 359         return 0;
 360
 361     if (wsize) {
 362         extop *e;
 363         int32_t t = instruction->times;
 364         if (t < 0)
 365             errfunc(ERR_PANIC,
 366                     "instruction->times < 0 (%ld) in assemble()", t);
 367
 368         while (t--) {           /* repeat TIMES times */
 369             list_for_each(e, instruction->eops) {
 370                 if (e->type == EOT_DB_NUMBER) {
 371                     if (wsize > 8) {
 372                         errfunc(ERR_NONFATAL,
 373                                 "integer supplied to a DT, DO or DY"
 374                                 " instruction");
 375                     } else {
 376                         out(offset, segment, &e->offset,
 377                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 378                         offset += wsize;
 379                     }
 380                 } else if (e->type == EOT_DB_STRING ||
 381                            e->type == EOT_DB_STRING_FREE) {
 382                     int align;
 383
 384                     out(offset, segment, e->stringval,
 385                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 386                     align = e->stringlen % wsize;
 387
 388                     if (align) {
 389                         align = wsize - align;
 390                         out(offset, segment, zero_buffer,
 391                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 392                     }
 393                     offset += e->stringlen + align;
 394                 }
 395             }
 396             if (t > 0 && t == instruction->times - 1) {
 397                 /*
 398                  * Dummy call to list->output to give the offset to the
 399                  * listing module.
 400                  */
 401                 list->output(offset, NULL, OUT_RAWDATA, 0);
 402                 list->uplevel(LIST_TIMES);
 403             }
 404         }
 405         if (instruction->times > 1)
 406             list->downlevel(LIST_TIMES);
 407         return offset - start;
 408     }
 409
 410     if (instruction->opcode == I_INCBIN) {
 411         const char *fname = instruction->eops->stringval;
 412         FILE *fp;
 413
 414         fp = fopen(fname, "rb");
 415         if (!fp) {
 416             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 417                   fname);
 418         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 419             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 420                   fname);
 421         } else {
 422             static char buf[4096];
 423             size_t t = instruction->times;
 424             size_t base = 0;
 425             size_t len;
 426
 427             len = ftell(fp);
 428             if (instruction->eops->next) {
 429                 base = instruction->eops->next->offset;
 430                 len -= base;
 431                 if (instruction->eops->next->next &&
 432                     len > (size_t)instruction->eops->next->next->offset)
 433                     len = (size_t)instruction->eops->next->next->offset;
 434             }
 435             /*
 436              * Dummy call to list->output to give the offset to the
 437              * listing module.
 438              */
 439             list->output(offset, NULL, OUT_RAWDATA, 0);
 440             list->uplevel(LIST_INCBIN);
 441             while (t--) {
 442                 size_t l;
 443
 444                 fseek(fp, base, SEEK_SET);
 445                 l = len;
 446                 while (l > 0) {
 447                     int32_t m;
 448                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 449                     if (!m) {
 450                         /*
 451                          * This shouldn't happen unless the file
 452                          * actually changes while we are reading
 453                          * it.
 454                          */
 455                         error(ERR_NONFATAL,
 456                               "`incbin': unexpected EOF while"
 457                               " reading file `%s'", fname);
 458                         t = 0;  /* Try to exit cleanly */
 459                         break;
 460                     }
 461                     out(offset, segment, buf, OUT_RAWDATA, m,
 462                         NO_SEG, NO_SEG);
 463                     l -= m;
 464                 }
 465             }
 466             list->downlevel(LIST_INCBIN);
 467             if (instruction->times > 1) {
 468                 /*
 469                  * Dummy call to list->output to give the offset to the
 470                  * listing module.
 471                  */
 472                 list->output(offset, NULL, OUT_RAWDATA, 0);
 473                 list->uplevel(LIST_TIMES);
 474                 list->downlevel(LIST_TIMES);
 475             }
 476             fclose(fp);
 477             return instruction->times * len;
 478         }
 479         return 0;               /* if we're here, there's an error */
 480     }
 481
 482     /* Check to see if we need an address-size prefix */
 483     add_asp(instruction, bits);
 484
 485     m = find_match(&temp, instruction, segment, offset, bits);
 486
 487     if (m == MOK_GOOD) {
 488         /* Matches! */
 489         int64_t insn_size = calcsize(segment, offset, bits,
 490                                      instruction, temp->code);
 491         itimes = instruction->times;
 492         if (insn_size < 0)  /* shouldn't be, on pass two */
 493             error(ERR_PANIC, "errors made it through from pass one");
 494         else
 495             while (itimes--) {
 496                 for (j = 0; j < MAXPREFIX; j++) {
 497                     uint8_t c = 0;
 498                     switch (instruction->prefixes[j]) {
 499                     case P_WAIT:
 500                         c = 0x9B;
 501                         break;
 502                     case P_LOCK:
 503                         c = 0xF0;
 504                         break;
 505                     case P_REPNE:
 506                     case P_REPNZ:
 507                         c = 0xF2;
 508                         break;
 509                     case P_REPE:
 510                     case P_REPZ:
 511                     case P_REP:
 512                         c = 0xF3;
 513                         break;
 514                     case R_CS:
 515                         if (bits == 64) {
 516                             error(ERR_WARNING | ERR_PASS2,
 517                                   "cs segment base generated, but will be ignored in 64-bit mode");
 518                         }
 519                         c = 0x2E;
 520                         break;
 521                     case R_DS:
 522                         if (bits == 64) {
 523                             error(ERR_WARNING | ERR_PASS2,
 524                                   "ds segment base generated, but will be ignored in 64-bit mode");
 525                         }
 526                         c = 0x3E;
 527                         break;
 528                     case R_ES:
 529                         if (bits == 64) {
 530                             error(ERR_WARNING | ERR_PASS2,
 531                                   "es segment base generated, but will be ignored in 64-bit mode");
 532                         }
 533                         c = 0x26;
 534                         break;
 535                     case R_FS:
 536                         c = 0x64;
 537                         break;
 538                     case R_GS:
 539                         c = 0x65;
 540                         break;
 541                     case R_SS:
 542                         if (bits == 64) {
 543                             error(ERR_WARNING | ERR_PASS2,
 544                                   "ss segment base generated, but will be ignored in 64-bit mode");
 545                         }
 546                         c = 0x36;
 547                         break;
 548                     case R_SEGR6:
 549                     case R_SEGR7:
 550                         error(ERR_NONFATAL,
 551                               "segr6 and segr7 cannot be used as prefixes");
 552                         break;
 553                     case P_A16:
 554                         if (bits == 64) {
 555                             error(ERR_NONFATAL,
 556                                   "16-bit addressing is not supported "
 557                                   "in 64-bit mode");
 558                         } else if (bits != 16)
 559                             c = 0x67;
 560                         break;
 561                     case P_A32:
 562                         if (bits != 32)
 563                             c = 0x67;
 564                         break;
 565                     case P_A64:
 566                         if (bits != 64) {
 567                             error(ERR_NONFATAL,
 568                                   "64-bit addressing is only supported "
 569                                   "in 64-bit mode");
 570                         }
 571                         break;
 572                     case P_ASP:
 573                         c = 0x67;
 574                         break;
 575                     case P_O16:
 576                         if (bits != 16)
 577                             c = 0x66;
 578                         break;
 579                     case P_O32:
 580                         if (bits == 16)
 581                             c = 0x66;
 582                         break;
 583                     case P_O64:
 584                         /* REX.W */
 585                         break;
 586                     case P_OSP:
 587                         c = 0x66;
 588                         break;
 589                     case P_none:
 590                         break;
 591                     default:
 592                         error(ERR_PANIC, "invalid instruction prefix");
 593                     }
 594                     if (c != 0) {
 595                         out(offset, segment, &c, OUT_RAWDATA, 1,
 596                             NO_SEG, NO_SEG);
 597                         offset++;
 598                     }
 599                 }
 600                 insn_end = offset + insn_size;
 601                 gencode(segment, offset, bits, instruction,
 602                         temp, insn_end);
 603                 offset += insn_size;
 604                 if (itimes > 0 && itimes == instruction->times - 1) {
 605                     /*
 606                      * Dummy call to list->output to give the offset to the
 607                      * listing module.
 608                      */
 609                     list->output(offset, NULL, OUT_RAWDATA, 0);
 610                     list->uplevel(LIST_TIMES);
 611                 }
 612             }
 613         if (instruction->times > 1)
 614             list->downlevel(LIST_TIMES);
 615         return offset - start;
 616     } else {
 617         /* No match */
 618         switch (m) {
 619         case MERR_OPSIZEMISSING:
 620             error(ERR_NONFATAL, "operation size not specified");
 621             break;
 622         case MERR_OPSIZEMISMATCH:
 623             error(ERR_NONFATAL, "mismatch in operand sizes");
 624             break;
 625         case MERR_BADCPU:
 626             error(ERR_NONFATAL, "no instruction for this cpu level");
 627             break;
 628         case MERR_BADMODE:
 629             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 630                   bits);
 631             break;
 632         default:
 633             error(ERR_NONFATAL,
 634                   "invalid combination of opcode and operands");
 635             break;
 636         }
 637     }
 638     return 0;
 639 }
 640
 641 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 642                   insn * instruction, efunc error)
 643 {
 644     const struct itemplate *temp;
 645     enum match_result m;
 646
 647     errfunc = error;            /* to pass to other functions */
 648     cpu = cp;
 649
 650     if (instruction->opcode == I_none)
 651         return 0;
 652
 653     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 654         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 655         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 656         instruction->opcode == I_DY) {
 657         extop *e;
 658         int32_t isize, osize, wsize;
 659
 660         isize = 0;
 661         wsize = idata_bytes(instruction->opcode);
 662
 663         list_for_each(e, instruction->eops) {
 664             int32_t align;
 665
 666             osize = 0;
 667             if (e->type == EOT_DB_NUMBER) {
 668                 osize = 1;
 669                 warn_overflow_const(e->offset, wsize);
 670             } else if (e->type == EOT_DB_STRING ||
 671                        e->type == EOT_DB_STRING_FREE)
 672                 osize = e->stringlen;
 673
 674             align = (-osize) % wsize;
 675             if (align < 0)
 676                 align += wsize;
 677             isize += osize + align;
 678         }
 679         return isize * instruction->times;
 680     }
 681
 682     if (instruction->opcode == I_INCBIN) {
 683         const char *fname = instruction->eops->stringval;
 684         FILE *fp;
 685         int64_t val = 0;
 686         size_t len;
 687
 688         fp = fopen(fname, "rb");
 689         if (!fp)
 690             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 691                   fname);
 692         else if (fseek(fp, 0L, SEEK_END) < 0)
 693             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 694                   fname);
 695         else {
 696             len = ftell(fp);
 697             if (instruction->eops->next) {
 698                 len -= instruction->eops->next->offset;
 699                 if (instruction->eops->next->next &&
 700                     len > (size_t)instruction->eops->next->next->offset) {
 701                     len = (size_t)instruction->eops->next->next->offset;
 702                 }
 703             }
 704             val = instruction->times * len;
 705         }
 706         if (fp)
 707             fclose(fp);
 708         return val;
 709     }
 710
 711     /* Check to see if we need an address-size prefix */
 712     add_asp(instruction, bits);
 713
 714     m = find_match(&temp, instruction, segment, offset, bits);
 715     if (m == MOK_GOOD) {
 716         /* we've matched an instruction. */
 717         int64_t isize;
 718         const uint8_t *codes = temp->code;
 719         int j;
 720
 721         isize = calcsize(segment, offset, bits, instruction, codes);
 722         if (isize < 0)
 723             return -1;
 724         for (j = 0; j < MAXPREFIX; j++) {
 725             switch (instruction->prefixes[j]) {
 726             case P_A16:
 727                 if (bits != 16)
 728                     isize++;
 729                 break;
 730             case P_A32:
 731                 if (bits != 32)
 732                     isize++;
 733                 break;
 734             case P_O16:
 735                 if (bits != 16)
 736                     isize++;
 737                 break;
 738             case P_O32:
 739                 if (bits == 16)
 740                     isize++;
 741                 break;
 742             case P_A64:
 743             case P_O64:
 744             case P_none:
 745                 break;
 746             default:
 747                 isize++;
 748                 break;
 749             }
 750         }
 751         return isize * instruction->times;
 752     } else {
 753         return -1;                  /* didn't match any instruction */
 754     }
 755 }
 756
 757 static bool possible_sbyte(operand *o)
 758 {
 759     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 760         !(o->opflags & OPFLAG_UNKNOWN) &&
 761         optimizing >= 0 && !(o->type & STRICT);
 762 }
 763
 764 /* check that opn[op]  is a signed byte of size 16 or 32 */
 765 static bool is_sbyte16(operand *o)
 766 {
 767     int16_t v;
 768
 769     if (!possible_sbyte(o))
 770         return false;
 771
 772     v = o->offset;
 773     return v >= -128 && v <= 127;
 774 }
 775
 776 static bool is_sbyte32(operand *o)
 777 {
 778     int32_t v;
 779
 780     if (!possible_sbyte(o))
 781         return false;
 782
 783     v = o->offset;
 784     return v >= -128 && v <= 127;
 785 }
 786
 787 /* Common construct */
 788 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 789
 790 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 791                         insn * ins, const uint8_t *codes)
 792 {
 793     int64_t length = 0;
 794     uint8_t c;
 795     int rex_mask = ~0;
 796     int op1, op2;
 797     struct operand *opx;
 798     uint8_t opex = 0;
 799
 800     ins->rex = 0;               /* Ensure REX is reset */
 801
 802     if (ins->prefixes[PPS_OSIZE] == P_O64)
 803         ins->rex |= REX_W;
 804
 805     (void)segment;              /* Don't warn that this parameter is unused */
 806     (void)offset;               /* Don't warn that this parameter is unused */
 807
 808     while (*codes) {
 809         c = *codes++;
 810         op1 = (c & 3) + ((opex & 1) << 2);
 811         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 812         opx = &ins->oprs[op1];
 813         opex = 0;               /* For the next iteration */
 814
 815         switch (c) {
 816         case 01:
 817         case 02:
 818         case 03:
 819         case 04:
 820             codes += c, length += c;
 821             break;
 822
 823         case 05:
 824         case 06:
 825         case 07:
 826             opex = c;
 827             break;
 828
 829         case4(010):
 830             ins->rex |=
 831                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 832             codes++, length++;
 833             break;
 834
 835         case4(014):
 836         case4(020):
 837         case4(024):
 838             length++;
 839             break;
 840
 841         case4(030):
 842             length += 2;
 843             break;
 844
 845         case4(034):
 846             if (opx->type & (BITS16 | BITS32 | BITS64))
 847                 length += (opx->type & BITS16) ? 2 : 4;
 848             else
 849                 length += (bits == 16) ? 2 : 4;
 850             break;
 851
 852         case4(040):
 853             length += 4;
 854             break;
 855
 856         case4(044):
 857             length += ins->addr_size >> 3;
 858             break;
 859
 860         case4(050):
 861             length++;
 862             break;
 863
 864         case4(054):
 865             length += 8; /* MOV reg64/imm */
 866             break;
 867
 868         case4(060):
 869             length += 2;
 870             break;
 871
 872         case4(064):
 873             if (opx->type & (BITS16 | BITS32 | BITS64))
 874                 length += (opx->type & BITS16) ? 2 : 4;
 875             else
 876                 length += (bits == 16) ? 2 : 4;
 877             break;
 878
 879         case4(070):
 880             length += 4;
 881             break;
 882
 883         case4(074):
 884             length += 2;
 885             break;
 886
 887         case4(0140):
 888             length += is_sbyte16(opx) ? 1 : 2;
 889             break;
 890
 891         case4(0144):
 892             codes++;
 893             length++;
 894             break;
 895
 896         case4(0150):
 897             length += is_sbyte32(opx) ? 1 : 4;
 898             break;
 899
 900         case4(0154):
 901             codes++;
 902             length++;
 903             break;
 904
 905         case4(0160):
 906             length++;
 907             ins->rex |= REX_D;
 908             ins->drexdst = regval(opx);
 909             break;
 910
 911         case4(0164):
 912             length++;
 913             ins->rex |= REX_D|REX_OC;
 914             ins->drexdst = regval(opx);
 915             break;
 916
 917         case 0171:
 918             break;
 919
 920         case 0172:
 921         case 0173:
 922         case 0174:
 923             codes++;
 924             length++;
 925             break;
 926
 927         case4(0250):
 928             length += is_sbyte32(opx) ? 1 : 4;
 929             break;
 930
 931         case4(0254):
 932             length += 4;
 933             break;
 934
 935         case4(0260):
 936             ins->rex |= REX_V;
 937             ins->drexdst = regval(opx);
 938             ins->vex_cm = *codes++;
 939             ins->vex_wlp = *codes++;
 940             break;
 941
 942         case 0270:
 943             ins->rex |= REX_V;
 944             ins->drexdst = 0;
 945             ins->vex_cm = *codes++;
 946             ins->vex_wlp = *codes++;
 947             break;
 948
 949         case4(0274):
 950             length++;
 951             break;
 952
 953         case4(0300):
 954             break;
 955
 956         case 0310:
 957             if (bits == 64)
 958                 return -1;
 959             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 960             break;
 961
 962         case 0311:
 963             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 964             break;
 965
 966         case 0312:
 967             break;
 968
 969         case 0313:
 970             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 971                 has_prefix(ins, PPS_ASIZE, P_A32))
 972                 return -1;
 973             break;
 974
 975         case4(0314):
 976             break;
 977
 978         case 0320:
 979             length += (bits != 16);
 980             break;
 981
 982         case 0321:
 983             length += (bits == 16);
 984             break;
 985
 986         case 0322:
 987             break;
 988
 989         case 0323:
 990             rex_mask &= ~REX_W;
 991             break;
 992
 993         case 0324:
 994             ins->rex |= REX_W;
 995             break;
 996
 997         case 0325:
 998             ins->rex |= REX_NH;
 999             break;
1000
1001         case 0330:
1002             codes++, length++;
1003             break;
1004
1005         case 0331:
1006             break;
1007
1008         case 0332:
1009         case 0333:
1010             length++;
1011             break;
1012
1013         case 0334:
1014             ins->rex |= REX_L;
1015             break;
1016
1017         case 0335:
1018             break;
1019
1020         case 0336:
1021             if (!ins->prefixes[PPS_LREP])
1022                 ins->prefixes[PPS_LREP] = P_REP;
1023             break;
1024
1025         case 0337:
1026             if (!ins->prefixes[PPS_LREP])
1027                 ins->prefixes[PPS_LREP] = P_REPNE;
1028             break;
1029
1030         case 0340:
1031             if (ins->oprs[0].segment != NO_SEG)
1032                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1033                         " quantity of BSS space");
1034             else
1035                 length += ins->oprs[0].offset;
1036             break;
1037
1038         case 0341:
1039             if (!ins->prefixes[PPS_WAIT])
1040                 ins->prefixes[PPS_WAIT] = P_WAIT;
1041             break;
1042
1043         case4(0344):
1044             length++;
1045             break;
1046
1047         case 0360:
1048             break;
1049
1050         case 0361:
1051         case 0362:
1052         case 0363:
1053             length++;
1054             break;
1055
1056         case 0364:
1057         case 0365:
1058             break;
1059
1060         case 0366:
1061         case 0367:
1062             length++;
1063             break;
1064
1065         case 0370:
1066         case 0371:
1067         case 0372:
1068             break;
1069
1070         case 0373:
1071             length++;
1072             break;
1073
1074         case4(0100):
1075         case4(0110):
1076         case4(0120):
1077         case4(0130):
1078         case4(0200):
1079         case4(0204):
1080         case4(0210):
1081         case4(0214):
1082         case4(0220):
1083         case4(0224):
1084         case4(0230):
1085         case4(0234):
1086             {
1087                 ea ea_data;
1088                 int rfield;
1089                 opflags_t rflags;
1090                 struct operand *opy = &ins->oprs[op2];
1091
1092                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1093
1094                 if (c <= 0177) {
1095                     /* pick rfield from operand b (opx) */
1096                     rflags = regflag(opx);
1097                     rfield = nasm_regvals[opx->basereg];
1098                 } else {
1099                     rflags = 0;
1100                     rfield = c & 7;
1101                 }
1102                 if (!process_ea(opy, &ea_data, bits,
1103                                 ins->addr_size, rfield, rflags)) {
1104                     errfunc(ERR_NONFATAL, "invalid effective address");
1105                     return -1;
1106                 } else {
1107                     ins->rex |= ea_data.rex;
1108                     length += ea_data.size;
1109                 }
1110             }
1111             break;
1112
1113         default:
1114             errfunc(ERR_PANIC, "internal instruction table corrupt"
1115                     ": instruction code \\%o (0x%02X) given", c, c);
1116             break;
1117         }
1118     }
1119
1120     ins->rex &= rex_mask;
1121
1122     if (ins->rex & REX_NH) {
1123         if (ins->rex & REX_H) {
1124             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1125             return -1;
1126         }
1127         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1128     }
1129
1130     if (ins->rex & REX_V) {
1131         int bad32 = REX_R|REX_W|REX_X|REX_B;
1132
1133         if (ins->rex & REX_H) {
1134             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1135             return -1;
1136         }
1137         switch (ins->vex_wlp & 060) {
1138         case 000:
1139         case 040:
1140             ins->rex &= ~REX_W;
1141             break;
1142         case 020:
1143             ins->rex |= REX_W;
1144             bad32 &= ~REX_W;
1145             break;
1146         case 060:
1147             /* Follow REX_W */
1148             break;
1149         }
1150
1151         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1152             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1153             return -1;
1154         }
1155         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1156             length += 3;
1157         else
1158             length += 2;
1159     } else if (ins->rex & REX_D) {
1160         if (ins->rex & REX_H) {
1161             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1162             return -1;
1163         }
1164         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1165                            ins->drexdst > 7)) {
1166             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1167             return -1;
1168         }
1169         length++;
1170     } else if (ins->rex & REX_REAL) {
1171         if (ins->rex & REX_H) {
1172             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1173             return -1;
1174         } else if (bits == 64) {
1175             length++;
1176         } else if ((ins->rex & REX_L) &&
1177                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1178                    cpu >= IF_X86_64) {
1179             /* LOCK-as-REX.R */
1180             assert_no_prefix(ins, PPS_LREP);
1181             length++;
1182         } else {
1183             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1184             return -1;
1185         }
1186     }
1187
1188     return length;
1189 }
1190
1191 #define EMIT_REX()                                                              \
1192     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1193         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1194         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1195         ins->rex = 0;                                                           \
1196         offset += 1;                                                            \
1197     }
1198
1199 static void gencode(int32_t segment, int64_t offset, int bits,
1200                     insn * ins, const struct itemplate *temp,
1201                     int64_t insn_end)
1202 {
1203     static char condval[] = {   /* conditional opcodes */
1204         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1205         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1206         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1207     };
1208     uint8_t c;
1209     uint8_t bytes[4];
1210     int64_t size;
1211     int64_t data;
1212     int op1, op2;
1213     struct operand *opx;
1214     const uint8_t *codes = temp->code;
1215     uint8_t opex = 0;
1216
1217     while (*codes) {
1218         c = *codes++;
1219         op1 = (c & 3) + ((opex & 1) << 2);
1220         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1221         opx = &ins->oprs[op1];
1222         opex = 0;                /* For the next iteration */
1223
1224         switch (c) {
1225         case 01:
1226         case 02:
1227         case 03:
1228         case 04:
1229             EMIT_REX();
1230             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1231             codes += c;
1232             offset += c;
1233             break;
1234
1235         case 05:
1236         case 06:
1237         case 07:
1238             opex = c;
1239             break;
1240
1241         case4(010):
1242             EMIT_REX();
1243             bytes[0] = *codes++ + (regval(opx) & 7);
1244             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1245             offset += 1;
1246             break;
1247
1248         case4(014):
1249             /*
1250              * The test for BITS8 and SBYTE here is intended to avoid
1251              * warning on optimizer actions due to SBYTE, while still
1252              * warn on explicit BYTE directives.  Also warn, obviously,
1253              * if the optimizer isn't enabled.
1254              */
1255             if (((opx->type & BITS8) ||
1256                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1257                 (opx->offset < -128 || opx->offset > 127)) {
1258                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1259                         "signed byte value exceeds bounds");
1260             }
1261             if (opx->segment != NO_SEG) {
1262                 data = opx->offset;
1263                 out(offset, segment, &data, OUT_ADDRESS, 1,
1264                     opx->segment, opx->wrt);
1265             } else {
1266                 bytes[0] = opx->offset;
1267                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1268                     NO_SEG);
1269             }
1270             offset += 1;
1271             break;
1272
1273         case4(020):
1274             if (opx->offset < -256 || opx->offset > 255) {
1275                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1276                         "byte value exceeds bounds");
1277             }
1278             if (opx->segment != NO_SEG) {
1279                 data = opx->offset;
1280                 out(offset, segment, &data, OUT_ADDRESS, 1,
1281                     opx->segment, opx->wrt);
1282             } else {
1283                 bytes[0] = opx->offset;
1284                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1285                     NO_SEG);
1286             }
1287             offset += 1;
1288             break;
1289
1290         case4(024):
1291             if (opx->offset < 0 || opx->offset > 255)
1292                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1293                         "unsigned byte value exceeds bounds");
1294             if (opx->segment != NO_SEG) {
1295                 data = opx->offset;
1296                 out(offset, segment, &data, OUT_ADDRESS, 1,
1297                     opx->segment, opx->wrt);
1298             } else {
1299                 bytes[0] = opx->offset;
1300                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1301                     NO_SEG);
1302             }
1303             offset += 1;
1304             break;
1305
1306         case4(030):
1307             warn_overflow_opd(opx, 2);
1308             data = opx->offset;
1309             out(offset, segment, &data, OUT_ADDRESS, 2,
1310                 opx->segment, opx->wrt);
1311             offset += 2;
1312             break;
1313
1314         case4(034):
1315             if (opx->type & (BITS16 | BITS32))
1316                 size = (opx->type & BITS16) ? 2 : 4;
1317             else
1318                 size = (bits == 16) ? 2 : 4;
1319             warn_overflow_opd(opx, size);
1320             data = opx->offset;
1321             out(offset, segment, &data, OUT_ADDRESS, size,
1322                 opx->segment, opx->wrt);
1323             offset += size;
1324             break;
1325
1326         case4(040):
1327             warn_overflow_opd(opx, 4);
1328             data = opx->offset;
1329             out(offset, segment, &data, OUT_ADDRESS, 4,
1330                 opx->segment, opx->wrt);
1331             offset += 4;
1332             break;
1333
1334         case4(044):
1335             data = opx->offset;
1336             size = ins->addr_size >> 3;
1337             warn_overflow_opd(opx, size);
1338             out(offset, segment, &data, OUT_ADDRESS, size,
1339                 opx->segment, opx->wrt);
1340             offset += size;
1341             break;
1342
1343         case4(050):
1344             if (opx->segment != segment) {
1345                 data = opx->offset;
1346                 out(offset, segment, &data,
1347                     OUT_REL1ADR, insn_end - offset,
1348                     opx->segment, opx->wrt);
1349             } else {
1350                 data = opx->offset - insn_end;
1351                 if (data > 127 || data < -128)
1352                     errfunc(ERR_NONFATAL, "short jump is out of range");
1353                 out(offset, segment, &data,
1354                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1355             }
1356             offset += 1;
1357             break;
1358
1359         case4(054):
1360             data = (int64_t)opx->offset;
1361             out(offset, segment, &data, OUT_ADDRESS, 8,
1362                 opx->segment, opx->wrt);
1363             offset += 8;
1364             break;
1365
1366         case4(060):
1367             if (opx->segment != segment) {
1368                 data = opx->offset;
1369                 out(offset, segment, &data,
1370                     OUT_REL2ADR, insn_end - offset,
1371                     opx->segment, opx->wrt);
1372             } else {
1373                 data = opx->offset - insn_end;
1374                 out(offset, segment, &data,
1375                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1376             }
1377             offset += 2;
1378             break;
1379
1380         case4(064):
1381             if (opx->type & (BITS16 | BITS32 | BITS64))
1382                 size = (opx->type & BITS16) ? 2 : 4;
1383             else
1384                 size = (bits == 16) ? 2 : 4;
1385             if (opx->segment != segment) {
1386                 data = opx->offset;
1387                 out(offset, segment, &data,
1388                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1389                     insn_end - offset, opx->segment, opx->wrt);
1390             } else {
1391                 data = opx->offset - insn_end;
1392                 out(offset, segment, &data,
1393                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1394             }
1395             offset += size;
1396             break;
1397
1398         case4(070):
1399             if (opx->segment != segment) {
1400                 data = opx->offset;
1401                 out(offset, segment, &data,
1402                     OUT_REL4ADR, insn_end - offset,
1403                     opx->segment, opx->wrt);
1404             } else {
1405                 data = opx->offset - insn_end;
1406                 out(offset, segment, &data,
1407                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1408             }
1409             offset += 4;
1410             break;
1411
1412         case4(074):
1413             if (opx->segment == NO_SEG)
1414                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1415                         " relocatable");
1416             data = 0;
1417             out(offset, segment, &data, OUT_ADDRESS, 2,
1418                 outfmt->segbase(1 + opx->segment),
1419                 opx->wrt);
1420             offset += 2;
1421             break;
1422
1423         case4(0140):
1424             data = opx->offset;
1425             warn_overflow_opd(opx, 2);
1426             if (is_sbyte16(opx)) {
1427                 bytes[0] = data;
1428                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1429                     NO_SEG);
1430                 offset++;
1431             } else {
1432                 out(offset, segment, &data, OUT_ADDRESS, 2,
1433                     opx->segment, opx->wrt);
1434                 offset += 2;
1435             }
1436             break;
1437
1438         case4(0144):
1439             EMIT_REX();
1440             bytes[0] = *codes++;
1441             if (is_sbyte16(opx))
1442                 bytes[0] |= 2;  /* s-bit */
1443             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1444             offset++;
1445             break;
1446
1447         case4(0150):
1448             data = opx->offset;
1449             warn_overflow_opd(opx, 4);
1450             if (is_sbyte32(opx)) {
1451                 bytes[0] = data;
1452                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1453                     NO_SEG);
1454                 offset++;
1455             } else {
1456                 out(offset, segment, &data, OUT_ADDRESS, 4,
1457                     opx->segment, opx->wrt);
1458                 offset += 4;
1459             }
1460             break;
1461
1462         case4(0154):
1463             EMIT_REX();
1464             bytes[0] = *codes++;
1465             if (is_sbyte32(opx))
1466                 bytes[0] |= 2;  /* s-bit */
1467             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1468             offset++;
1469             break;
1470
1471         case4(0160):
1472         case4(0164):
1473             break;
1474
1475         case 0171:
1476             bytes[0] =
1477                 (ins->drexdst << 4) |
1478                 (ins->rex & REX_OC ? 0x08 : 0) |
1479                 (ins->rex & (REX_R|REX_X|REX_B));
1480             ins->rex = 0;
1481             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1482             offset++;
1483             break;
1484
1485         case 0172:
1486             c = *codes++;
1487             opx = &ins->oprs[c >> 3];
1488             bytes[0] = nasm_regvals[opx->basereg] << 4;
1489             opx = &ins->oprs[c & 7];
1490             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1491                 errfunc(ERR_NONFATAL,
1492                         "non-absolute expression not permitted as argument %d",
1493                         c & 7);
1494             } else {
1495                 if (opx->offset & ~15) {
1496                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1497                             "four-bit argument exceeds bounds");
1498                 }
1499                 bytes[0] |= opx->offset & 15;
1500             }
1501             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1502             offset++;
1503             break;
1504
1505         case 0173:
1506             c = *codes++;
1507             opx = &ins->oprs[c >> 4];
1508             bytes[0] = nasm_regvals[opx->basereg] << 4;
1509             bytes[0] |= c & 15;
1510             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1511             offset++;
1512             break;
1513
1514         case 0174:
1515             c = *codes++;
1516             opx = &ins->oprs[c];
1517             bytes[0] = nasm_regvals[opx->basereg] << 4;
1518             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1519             offset++;
1520             break;
1521
1522         case4(0250):
1523             data = opx->offset;
1524             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1525                 (int32_t)data != (int64_t)data) {
1526                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1527                         "signed dword immediate exceeds bounds");
1528             }
1529             if (is_sbyte32(opx)) {
1530                 bytes[0] = data;
1531                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1532                     NO_SEG);
1533                 offset++;
1534             } else {
1535                 out(offset, segment, &data, OUT_ADDRESS, 4,
1536                     opx->segment, opx->wrt);
1537                 offset += 4;
1538             }
1539             break;
1540
1541         case4(0254):
1542             data = opx->offset;
1543             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1544                 (int32_t)data != (int64_t)data) {
1545                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1546                         "signed dword immediate exceeds bounds");
1547             }
1548             out(offset, segment, &data, OUT_ADDRESS, 4,
1549                 opx->segment, opx->wrt);
1550             offset += 4;
1551             break;
1552
1553         case4(0260):
1554         case 0270:
1555             codes += 2;
1556             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1557                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1558                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1559                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1560                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1561                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1562                 offset += 3;
1563             } else {
1564                 bytes[0] = 0xc5;
1565                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1566                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1567                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1568                 offset += 2;
1569             }
1570             break;
1571
1572         case4(0274):
1573         {
1574             uint64_t uv, um;
1575             int s;
1576
1577             if (ins->rex & REX_W)
1578                 s = 64;
1579             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1580                 s = 16;
1581             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1582                 s = 32;
1583             else
1584                 s = bits;
1585
1586             um = (uint64_t)2 << (s-1);
1587             uv = opx->offset;
1588
1589             if (uv > 127 && uv < (uint64_t)-128 &&
1590                 (uv < um-128 || uv > um-1)) {
1591                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1592                         "signed byte value exceeds bounds");
1593             }
1594             if (opx->segment != NO_SEG) {
1595                 data = uv;
1596                 out(offset, segment, &data, OUT_ADDRESS, 1,
1597                     opx->segment, opx->wrt);
1598             } else {
1599                 bytes[0] = uv;
1600                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1601                     NO_SEG);
1602             }
1603             offset += 1;
1604             break;
1605         }
1606
1607         case4(0300):
1608             break;
1609
1610         case 0310:
1611             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1612                 *bytes = 0x67;
1613                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1614                 offset += 1;
1615             } else
1616                 offset += 0;
1617             break;
1618
1619         case 0311:
1620             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1621                 *bytes = 0x67;
1622                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1623                 offset += 1;
1624             } else
1625                 offset += 0;
1626             break;
1627
1628         case 0312:
1629             break;
1630
1631         case 0313:
1632             ins->rex = 0;
1633             break;
1634
1635         case4(0314):
1636             break;
1637
1638         case 0320:
1639         {
1640             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1641             if (pfx != P_O16 && pfx != P_none)
1642                 nasm_error(ERR_WARNING, "Invalid operand size prefix");
1643             if (pfx != P_O16 && bits != 16) {
1644                 ins->prefixes[PPS_OSIZE] = P_O16;
1645                 *bytes = 0x66;
1646                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1647                 offset += 1;
1648             }
1649             break;
1650         }
1651
1652         case 0321:
1653         {
1654             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1655             if (pfx != P_O32 && pfx != P_none)
1656                 nasm_error(ERR_WARNING, "Invalid operand size prefix");
1657             if (pfx != P_O32 && bits == 16) {
1658                 ins->prefixes[PPS_OSIZE] = P_O32;
1659                 *bytes = 0x66;
1660                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1661                 offset += 1;
1662             }
1663             break;
1664         }
1665
1666         case 0322:
1667         case 0323:
1668             break;
1669
1670         case 0324:
1671             ins->rex |= REX_W;
1672             break;
1673
1674         case 0325:
1675             break;
1676
1677         case 0330:
1678             *bytes = *codes++ ^ condval[ins->condition];
1679             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1680             offset += 1;
1681             break;
1682
1683         case 0331:
1684             break;
1685
1686         case 0332:
1687         case 0333:
1688             *bytes = c - 0332 + 0xF2;
1689             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1690             offset += 1;
1691             break;
1692
1693         case 0334:
1694             if (ins->rex & REX_R) {
1695                 *bytes = 0xF0;
1696                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1697                 offset += 1;
1698             }
1699             ins->rex &= ~(REX_L|REX_R);
1700             break;
1701
1702         case 0335:
1703             break;
1704
1705         case 0336:
1706         case 0337:
1707             break;
1708
1709         case 0340:
1710             if (ins->oprs[0].segment != NO_SEG)
1711                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1712             else {
1713                 int64_t size = ins->oprs[0].offset;
1714                 if (size > 0)
1715                     out(offset, segment, NULL,
1716                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1717                 offset += size;
1718             }
1719             break;
1720
1721         case 0341:
1722             break;
1723
1724         case 0344:
1725         case 0345:
1726             bytes[0] = c & 1;
1727             switch (ins->oprs[0].basereg) {
1728             case R_CS:
1729                 bytes[0] += 0x0E;
1730                 break;
1731             case R_DS:
1732                 bytes[0] += 0x1E;
1733                 break;
1734             case R_ES:
1735                 bytes[0] += 0x06;
1736                 break;
1737             case R_SS:
1738                 bytes[0] += 0x16;
1739                 break;
1740             default:
1741                 errfunc(ERR_PANIC,
1742                         "bizarre 8086 segment register received");
1743             }
1744             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1745             offset++;
1746             break;
1747
1748         case 0346:
1749         case 0347:
1750             bytes[0] = c & 1;
1751             switch (ins->oprs[0].basereg) {
1752             case R_FS:
1753                 bytes[0] += 0xA0;
1754                 break;
1755             case R_GS:
1756                 bytes[0] += 0xA8;
1757                 break;
1758             default:
1759                 errfunc(ERR_PANIC,
1760                         "bizarre 386 segment register received");
1761             }
1762             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1763             offset++;
1764             break;
1765
1766         case 0360:
1767             break;
1768
1769         case 0361:
1770             bytes[0] = 0x66;
1771             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1772             offset += 1;
1773             break;
1774
1775         case 0362:
1776         case 0363:
1777             bytes[0] = c - 0362 + 0xf2;
1778             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1779             offset += 1;
1780             break;
1781
1782         case 0364:
1783         case 0365:
1784             break;
1785
1786         case 0366:
1787         case 0367:
1788             *bytes = c - 0366 + 0x66;
1789             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1790             offset += 1;
1791             break;
1792
1793         case 0370:
1794         case 0371:
1795         case 0372:
1796             break;
1797
1798         case 0373:
1799             *bytes = bits == 16 ? 3 : 5;
1800             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1801             offset += 1;
1802             break;
1803
1804         case4(0100):
1805         case4(0110):
1806         case4(0120):
1807         case4(0130):
1808         case4(0200):
1809         case4(0204):
1810         case4(0210):
1811         case4(0214):
1812         case4(0220):
1813         case4(0224):
1814         case4(0230):
1815         case4(0234):
1816             {
1817                 ea ea_data;
1818                 int rfield;
1819                 opflags_t rflags;
1820                 uint8_t *p;
1821                 int32_t s;
1822                 enum out_type type;
1823                 struct operand *opy = &ins->oprs[op2];
1824
1825                 if (c <= 0177) {
1826                     /* pick rfield from operand b (opx) */
1827                     rflags = regflag(opx);
1828                     rfield = nasm_regvals[opx->basereg];
1829                 } else {
1830                     /* rfield is constant */
1831                     rflags = 0;
1832                     rfield = c & 7;
1833                 }
1834
1835                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1836                                 rfield, rflags)) {
1837                     errfunc(ERR_NONFATAL, "invalid effective address");
1838                 }
1839
1840
1841                 p = bytes;
1842                 *p++ = ea_data.modrm;
1843                 if (ea_data.sib_present)
1844                     *p++ = ea_data.sib;
1845
1846                 /* DREX suffixes come between the SIB and the displacement */
1847                 if (ins->rex & REX_D) {
1848                     *p++ = (ins->drexdst << 4) |
1849                            (ins->rex & REX_OC ? 0x08 : 0) |
1850                            (ins->rex & (REX_R|REX_X|REX_B));
1851                     ins->rex = 0;
1852                 }
1853
1854                 s = p - bytes;
1855                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1856
1857                 /*
1858                  * Make sure the address gets the right offset in case
1859                  * the line breaks in the .lst file (BR 1197827)
1860                  */
1861                 offset += s;
1862                 s = 0;
1863
1864                 switch (ea_data.bytes) {
1865                 case 0:
1866                     break;
1867                 case 1:
1868                 case 2:
1869                 case 4:
1870                 case 8:
1871                     data = opy->offset;
1872                     s += ea_data.bytes;
1873                     if (ea_data.rip) {
1874                         if (opy->segment == segment) {
1875                             data -= insn_end;
1876                             if (overflow_signed(data, ea_data.bytes))
1877                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1878                             out(offset, segment, &data, OUT_ADDRESS,
1879                                 ea_data.bytes, NO_SEG, NO_SEG);
1880                         } else {
1881                             /* overflow check in output/linker? */
1882                             out(offset, segment, &data,        OUT_REL4ADR,
1883                                 insn_end - offset, opy->segment, opy->wrt);
1884                         }
1885                     } else {
1886                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1887                             signed_bits(opy->offset, ins->addr_size) !=
1888                             signed_bits(opy->offset, ea_data.bytes * 8))
1889                             warn_overflow(ERR_PASS2, ea_data.bytes);
1890
1891                         type = OUT_ADDRESS;
1892                         out(offset, segment, &data, OUT_ADDRESS,
1893                             ea_data.bytes, opy->segment, opy->wrt);
1894                     }
1895                     break;
1896                 default:
1897                     /* Impossible! */
1898                     errfunc(ERR_PANIC,
1899                             "Invalid amount of bytes (%d) for offset?!",
1900                             ea_data.bytes);
1901                     break;
1902                 }
1903                 offset += s;
1904             }
1905             break;
1906
1907         default:
1908             errfunc(ERR_PANIC, "internal instruction table corrupt"
1909                     ": instruction code \\%o (0x%02X) given", c, c);
1910             break;
1911         }
1912     }
1913 }
1914
1915 static opflags_t regflag(const operand * o)
1916 {
1917     if (!is_register(o->basereg))
1918         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1919     return nasm_reg_flags[o->basereg];
1920 }
1921
1922 static int32_t regval(const operand * o)
1923 {
1924     if (!is_register(o->basereg))
1925         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1926     return nasm_regvals[o->basereg];
1927 }
1928
1929 static int op_rexflags(const operand * o, int mask)
1930 {
1931     opflags_t flags;
1932     int val;
1933
1934     if (!is_register(o->basereg))
1935         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1936
1937     flags = nasm_reg_flags[o->basereg];
1938     val = nasm_regvals[o->basereg];
1939
1940     return rexflags(val, flags, mask);
1941 }
1942
1943 static int rexflags(int val, opflags_t flags, int mask)
1944 {
1945     int rex = 0;
1946
1947     if (val >= 8)
1948         rex |= REX_B|REX_X|REX_R;
1949     if (flags & BITS64)
1950         rex |= REX_W;
1951     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1952         rex |= REX_H;
1953     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1954         rex |= REX_P;
1955
1956     return rex & mask;
1957 }
1958
1959 static enum match_result find_match(const struct itemplate **tempp,
1960                                     insn *instruction,
1961                                     int32_t segment, int64_t offset, int bits)
1962 {
1963     const struct itemplate *temp;
1964     enum match_result m, merr;
1965     opflags_t xsizeflags[MAX_OPERANDS];
1966     bool opsizemissing = false;
1967     int i;
1968
1969     for (i = 0; i < instruction->operands; i++)
1970         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1971
1972     merr = MERR_INVALOP;
1973
1974     for (temp = nasm_instructions[instruction->opcode];
1975          temp->opcode != I_none; temp++) {
1976         m = matches(temp, instruction, bits);
1977         if (m == MOK_JUMP) {
1978             if (jmp_match(segment, offset, bits, instruction, temp->code))
1979                 m = MOK_GOOD;
1980             else
1981                 m = MERR_INVALOP;
1982         } else if (m == MERR_OPSIZEMISSING &&
1983                    (temp->flags & IF_SMASK) != IF_SX) {
1984             /*
1985              * Missing operand size and a candidate for fuzzy matching...
1986              */
1987             for (i = 0; i < temp->operands; i++) {
1988                 if ((temp->opd[i] & SAME_AS) == 0)
1989                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1990             }
1991             opsizemissing = true;
1992         }
1993         if (m > merr)
1994             merr = m;
1995         if (merr == MOK_GOOD)
1996             goto done;
1997     }
1998
1999     /* No match, but see if we can get a fuzzy operand size match... */
2000     if (!opsizemissing)
2001         goto done;
2002
2003     for (i = 0; i < instruction->operands; i++) {
2004         /*
2005          * We ignore extrinsic operand sizes on registers, so we should
2006          * never try to fuzzy-match on them.  This also resolves the case
2007          * when we have e.g. "xmmrm128" in two different positions.
2008          */
2009         if (is_class(REGISTER, instruction->oprs[i].type))
2010             continue;
2011
2012         /* This tests if xsizeflags[i] has more than one bit set */
2013         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2014             goto done;                /* No luck */
2015
2016         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2017     }
2018
2019     /* Try matching again... */
2020     for (temp = nasm_instructions[instruction->opcode];
2021          temp->opcode != I_none; temp++) {
2022         m = matches(temp, instruction, bits);
2023         if (m == MOK_JUMP) {
2024             if (jmp_match(segment, offset, bits, instruction, temp->code))
2025                 m = MOK_GOOD;
2026             else
2027                 m = MERR_INVALOP;
2028         }
2029         if (m > merr)
2030             merr = m;
2031         if (merr == MOK_GOOD)
2032             goto done;
2033     }
2034
2035 done:
2036     *tempp = temp;
2037     return merr;
2038 }
2039
2040 static enum match_result matches(const struct itemplate *itemp,
2041                                  insn *instruction, int bits)
2042 {
2043     int i, size[MAX_OPERANDS], asize, oprs;
2044     bool opsizemissing = false;
2045
2046     /*
2047      * Check the opcode
2048      */
2049     if (itemp->opcode != instruction->opcode)
2050         return MERR_INVALOP;
2051
2052     /*
2053      * Count the operands
2054      */
2055     if (itemp->operands != instruction->operands)
2056         return MERR_INVALOP;
2057
2058     /*
2059      * Check that no spurious colons or TOs are present
2060      */
2061     for (i = 0; i < itemp->operands; i++)
2062         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2063             return MERR_INVALOP;
2064
2065     /*
2066      * Process size flags
2067      */
2068     switch (itemp->flags & IF_SMASK) {
2069     case IF_SB:
2070         asize = BITS8;
2071         break;
2072     case IF_SW:
2073         asize = BITS16;
2074         break;
2075     case IF_SD:
2076         asize = BITS32;
2077         break;
2078     case IF_SQ:
2079         asize = BITS64;
2080         break;
2081     case IF_SO:
2082         asize = BITS128;
2083         break;
2084     case IF_SY:
2085         asize = BITS256;
2086         break;
2087     case IF_SZ:
2088         switch (bits) {
2089         case 16:
2090             asize = BITS16;
2091             break;
2092         case 32:
2093             asize = BITS32;
2094             break;
2095         case 64:
2096             asize = BITS64;
2097             break;
2098         default:
2099             asize = 0;
2100             break;
2101         }
2102         break;
2103     default:
2104         asize = 0;
2105         break;
2106     }
2107
2108     if (itemp->flags & IF_ARMASK) {
2109         /* S- flags only apply to a specific operand */
2110         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2111         memset(size, 0, sizeof size);
2112         size[i] = asize;
2113     } else {
2114         /* S- flags apply to all operands */
2115         for (i = 0; i < MAX_OPERANDS; i++)
2116             size[i] = asize;
2117     }
2118
2119     /*
2120      * Check that the operand flags all match up,
2121      * it's a bit tricky so lets be verbose:
2122      *
2123      * 1) Find out the size of operand. If instruction
2124      *    doesn't have one specified -- we're trying to
2125      *    guess it either from template (IF_S* flag) or
2126      *    from code bits.
2127      *
2128      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2129      *    (ie the same operand as was specified somewhere in template, and
2130      *    this referred operand index is being achieved via ~SAME_AS)
2131      *    we are to be sure that both registers (in template and instruction)
2132      *    do exactly match.
2133      *
2134      * 3) If template operand do not match the instruction OR
2135      *    template has an operand size specified AND this size differ
2136      *    from which instruction has (perhaps we got it from code bits)
2137      *    we are:
2138      *      a)  Check that only size of instruction and operand is differ
2139      *          other characteristics do match
2140      *      b)  Perhaps it's a register specified in instruction so
2141      *          for such a case we just mark that operand as "size
2142      *          missing" and this will turn on fuzzy operand size
2143      *          logic facility (handled by a caller)
2144      */
2145     for (i = 0; i < itemp->operands; i++) {
2146         opflags_t type = instruction->oprs[i].type;
2147         if (!(type & SIZE_MASK))
2148             type |= size[i];
2149
2150         if (itemp->opd[i] & SAME_AS) {
2151             int j = itemp->opd[i] & ~SAME_AS;
2152             if (type != instruction->oprs[j].type ||
2153                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2154                 return MERR_INVALOP;
2155         } else if (itemp->opd[i] & ~type ||
2156             ((itemp->opd[i] & SIZE_MASK) &&
2157              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2158             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2159                 return MERR_INVALOP;
2160             } else if (!is_class(REGISTER, type)) {
2161                 /*
2162                  * Note: we don't honor extrinsic operand sizes for registers,
2163                  * so "missing operand size" for a register should be
2164                  * considered a wildcard match rather than an error.
2165                  */
2166                 opsizemissing = true;
2167             }
2168         }
2169     }
2170
2171     if (opsizemissing)
2172         return MERR_OPSIZEMISSING;
2173
2174     /*
2175      * Check operand sizes
2176      */
2177     if (itemp->flags & (IF_SM | IF_SM2)) {
2178         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2179         for (i = 0; i < oprs; i++) {
2180             asize = itemp->opd[i] & SIZE_MASK;
2181             if (asize) {
2182                 for (i = 0; i < oprs; i++)
2183                     size[i] = asize;
2184                 break;
2185             }
2186         }
2187     } else {
2188         oprs = itemp->operands;
2189     }
2190
2191     for (i = 0; i < itemp->operands; i++) {
2192         if (!(itemp->opd[i] & SIZE_MASK) &&
2193             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2194             return MERR_OPSIZEMISMATCH;
2195     }
2196
2197     /*
2198      * Check template is okay at the set cpu level
2199      */
2200     if (((itemp->flags & IF_PLEVEL) > cpu))
2201         return MERR_BADCPU;
2202
2203     /*
2204      * Verify the appropriate long mode flag.
2205      */
2206     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2207         return MERR_BADMODE;
2208
2209     /*
2210      * Check if special handling needed for Jumps
2211      */
2212     if ((itemp->code[0] & 0374) == 0370)
2213         return MOK_JUMP;
2214
2215     return MOK_GOOD;
2216 }
2217
2218 static ea *process_ea(operand * input, ea * output, int bits,
2219                       int addrbits, int rfield, opflags_t rflags)
2220 {
2221     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2222
2223     output->rip = false;
2224
2225     /* REX flags for the rfield operand */
2226     output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2227
2228     if (is_class(REGISTER, input->type)) {  /* register direct */
2229         int i;
2230         opflags_t f;
2231
2232         if (!is_register(input->basereg))
2233             return NULL;
2234         f = regflag(input);
2235         i = nasm_regvals[input->basereg];
2236
2237         if (REG_EA & ~f)
2238             return NULL;        /* Invalid EA register */
2239
2240         output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2241
2242         output->sib_present = false;    /* no SIB necessary */
2243         output->bytes = 0;              /* no offset necessary either */
2244         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2245     } else {                    /* it's a memory reference */
2246         if (input->basereg == -1 &&
2247             (input->indexreg == -1 || input->scale == 0)) {
2248             /* it's a pure offset */
2249
2250             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2251                 input->segment == NO_SEG) {
2252                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2253                 input->type &= ~IP_REL;
2254                 input->type |= MEMORY;
2255             }
2256
2257             if (input->eaflags & EAF_BYTEOFFS ||
2258                 (input->eaflags & EAF_WORDOFFS &&
2259                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2260                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2261             }
2262
2263             if (bits == 64 && (~input->type & IP_REL)) {
2264                 int scale, index, base;
2265                 output->sib_present = true;
2266                 scale = 0;
2267                 index = 4;
2268                 base = 5;
2269                 output->sib = (scale << 6) | (index << 3) | base;
2270                 output->bytes = 4;
2271                 output->modrm = 4 | ((rfield & 7) << 3);
2272                 output->rip = false;
2273             } else {
2274                 output->sib_present = false;
2275                 output->bytes = (addrbits != 16 ? 4 : 2);
2276                 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2277                 output->rip = bits == 64;
2278             }
2279         } else {                /* it's an indirection */
2280             int i = input->indexreg, b = input->basereg, s = input->scale;
2281             int32_t seg = input->segment;
2282             int hb = input->hintbase, ht = input->hinttype;
2283             int t, it, bt;              /* register numbers */
2284             opflags_t x, ix, bx;        /* register flags */
2285
2286             if (s == 0)
2287                 i = -1;         /* make this easy, at least */
2288
2289             if (is_register(i)) {
2290                 it = nasm_regvals[i];
2291                 ix = nasm_reg_flags[i];
2292             } else {
2293                 it = -1;
2294                 ix = 0;
2295             }
2296
2297             if (is_register(b)) {
2298                 bt = nasm_regvals[b];
2299                 bx = nasm_reg_flags[b];
2300             } else {
2301                 bt = -1;
2302                 bx = 0;
2303             }
2304
2305             /* check for a 32/64-bit memory reference... */
2306             if ((ix|bx) & (BITS32|BITS64)) {
2307                 /*
2308                  * it must be a 32/64-bit memory reference. Firstly we have
2309                  * to check that all registers involved are type E/Rxx.
2310                  */
2311                 int32_t sok = BITS32 | BITS64, o = input->offset;
2312
2313                 if (it != -1) {
2314                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2315                         sok &= ix;
2316                     else
2317                         return NULL;
2318                 }
2319
2320                 if (bt != -1) {
2321                     if (REG_GPR & ~bx)
2322                         return NULL; /* Invalid register */
2323                     if (~sok & bx & SIZE_MASK)
2324                         return NULL; /* Invalid size */
2325                     sok &= bx;
2326                 }
2327
2328                 /*
2329                  * While we're here, ensure the user didn't specify
2330                  * WORD or QWORD
2331                  */
2332                 if (input->disp_size == 16 || input->disp_size == 64)
2333                     return NULL;
2334
2335                 if (addrbits == 16 ||
2336                     (addrbits == 32 && !(sok & BITS32)) ||
2337                     (addrbits == 64 && !(sok & BITS64)))
2338                     return NULL;
2339
2340                 /* now reorganize base/index */
2341                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2342                     ((hb == b && ht == EAH_NOTBASE) ||
2343                      (hb == i && ht == EAH_MAKEBASE))) {
2344                     /* swap if hints say so */
2345                     t = bt, bt = it, it = t;
2346                     x = bx, bx = ix, ix = x;
2347                 }
2348                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2349                     bt = -1, bx = 0, s++;
2350                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2351                     /* make single reg base, unless hint */
2352                     bt = it, bx = ix, it = -1, ix = 0;
2353                 }
2354                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2355                       s == 3 || s == 5 || s == 9) && bt == -1)
2356                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2357                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2358                     (input->eaflags & EAF_TIMESTWO))
2359                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2360                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2361                 if (s == 1 && it == REG_NUM_ESP) {
2362                     /* swap ESP into base if scale is 1 */
2363                     t = it, it = bt, bt = t;
2364                     x = ix, ix = bx, bx = x;
2365                 }
2366                 if (it == REG_NUM_ESP ||
2367                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2368                     return NULL;        /* wrong, for various reasons */
2369
2370                 output->rex |= rexflags(it, ix, REX_X);
2371                 output->rex |= rexflags(bt, bx, REX_B);
2372
2373                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2374                     /* no SIB needed */
2375                     int mod, rm;
2376
2377                     if (bt == -1) {
2378                         rm = 5;
2379                         mod = 0;
2380                     } else {
2381                         rm = (bt & 7);
2382                         if (rm != REG_NUM_EBP && o == 0 &&
2383                             seg == NO_SEG && !forw_ref &&
2384                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2385                             mod = 0;
2386                         else if (input->eaflags & EAF_BYTEOFFS ||
2387                                  (o >= -128 && o <= 127 &&
2388                                   seg == NO_SEG && !forw_ref &&
2389                                   !(input->eaflags & EAF_WORDOFFS)))
2390                             mod = 1;
2391                         else
2392                             mod = 2;
2393                     }
2394
2395                     output->sib_present = false;
2396                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2397                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2398                 } else {
2399                     /* we need a SIB */
2400                     int mod, scale, index, base;
2401
2402                     if (it == -1)
2403                         index = 4, s = 1;
2404                     else
2405                         index = (it & 7);
2406
2407                     switch (s) {
2408                     case 1:
2409                         scale = 0;
2410                         break;
2411                     case 2:
2412                         scale = 1;
2413                         break;
2414                     case 4:
2415                         scale = 2;
2416                         break;
2417                     case 8:
2418                         scale = 3;
2419                         break;
2420                     default:   /* then what the smeg is it? */
2421                         return NULL;    /* panic */
2422                     }
2423
2424                     if (bt == -1) {
2425                         base = 5;
2426                         mod = 0;
2427                     } else {
2428                         base = (bt & 7);
2429                         if (base != REG_NUM_EBP && o == 0 &&
2430                             seg == NO_SEG && !forw_ref &&
2431                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2432                             mod = 0;
2433                         else if (input->eaflags & EAF_BYTEOFFS ||
2434                                  (o >= -128 && o <= 127 &&
2435                                   seg == NO_SEG && !forw_ref &&
2436                                   !(input->eaflags & EAF_WORDOFFS)))
2437                             mod = 1;
2438                         else
2439                             mod = 2;
2440                     }
2441
2442                     output->sib_present = true;
2443                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2444                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2445                     output->sib = (scale << 6) | (index << 3) | base;
2446                 }
2447             } else {            /* it's 16-bit */
2448                 int mod, rm;
2449                 int16_t o = input->offset;
2450
2451                 /* check for 64-bit long mode */
2452                 if (addrbits == 64)
2453                     return NULL;
2454
2455                 /* check all registers are BX, BP, SI or DI */
2456                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2457                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2458                     return NULL;
2459
2460                 /* ensure the user didn't specify DWORD/QWORD */
2461                 if (input->disp_size == 32 || input->disp_size == 64)
2462                     return NULL;
2463
2464                 if (s != 1 && i != -1)
2465                     return NULL;        /* no can do, in 16-bit EA */
2466                 if (b == -1 && i != -1) {
2467                     int tmp = b;
2468                     b = i;
2469                     i = tmp;
2470                 }               /* swap */
2471                 if ((b == R_SI || b == R_DI) && i != -1) {
2472                     int tmp = b;
2473                     b = i;
2474                     i = tmp;
2475                 }
2476                 /* have BX/BP as base, SI/DI index */
2477                 if (b == i)
2478                     return NULL;        /* shouldn't ever happen, in theory */
2479                 if (i != -1 && b != -1 &&
2480                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2481                     return NULL;        /* invalid combinations */
2482                 if (b == -1)            /* pure offset: handled above */
2483                     return NULL;        /* so if it gets to here, panic! */
2484
2485                 rm = -1;
2486                 if (i != -1)
2487                     switch (i * 256 + b) {
2488                     case R_SI * 256 + R_BX:
2489                         rm = 0;
2490                         break;
2491                     case R_DI * 256 + R_BX:
2492                         rm = 1;
2493                         break;
2494                     case R_SI * 256 + R_BP:
2495                         rm = 2;
2496                         break;
2497                     case R_DI * 256 + R_BP:
2498                         rm = 3;
2499                         break;
2500                 } else
2501                     switch (b) {
2502                     case R_SI:
2503                         rm = 4;
2504                         break;
2505                     case R_DI:
2506                         rm = 5;
2507                         break;
2508                     case R_BP:
2509                         rm = 6;
2510                         break;
2511                     case R_BX:
2512                         rm = 7;
2513                         break;
2514                     }
2515                 if (rm == -1)           /* can't happen, in theory */
2516                     return NULL;        /* so panic if it does */
2517
2518                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2519                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2520                     mod = 0;
2521                 else if (input->eaflags & EAF_BYTEOFFS ||
2522                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2523                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2524                     mod = 1;
2525                 else
2526                     mod = 2;
2527
2528                 output->sib_present = false;    /* no SIB - it's 16-bit */
2529                 output->bytes = mod;            /* bytes of offset needed */
2530                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2531             }
2532         }
2533     }
2534
2535     output->size = 1 + output->sib_present + output->bytes;
2536     return output;
2537 }
2538
2539 static void add_asp(insn *ins, int addrbits)
2540 {
2541     int j, valid;
2542     int defdisp;
2543
2544     valid = (addrbits == 64) ? 64|32 : 32|16;
2545
2546     switch (ins->prefixes[PPS_ASIZE]) {
2547     case P_A16:
2548         valid &= 16;
2549         break;
2550     case P_A32:
2551         valid &= 32;
2552         break;
2553     case P_A64:
2554         valid &= 64;
2555         break;
2556     case P_ASP:
2557         valid &= (addrbits == 32) ? 16 : 32;
2558         break;
2559     default:
2560         break;
2561     }
2562
2563     for (j = 0; j < ins->operands; j++) {
2564         if (is_class(MEMORY, ins->oprs[j].type)) {
2565             opflags_t i, b;
2566
2567             /* Verify as Register */
2568             if (!is_register(ins->oprs[j].indexreg))
2569                 i = 0;
2570             else
2571                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2572
2573             /* Verify as Register */
2574             if (!is_register(ins->oprs[j].basereg))
2575                 b = 0;
2576             else
2577                 b = nasm_reg_flags[ins->oprs[j].basereg];
2578
2579             if (ins->oprs[j].scale == 0)
2580                 i = 0;
2581
2582             if (!i && !b) {
2583                 int ds = ins->oprs[j].disp_size;
2584                 if ((addrbits != 64 && ds > 8) ||
2585                     (addrbits == 64 && ds == 16))
2586                     valid &= ds;
2587             } else {
2588                 if (!(REG16 & ~b))
2589                     valid &= 16;
2590                 if (!(REG32 & ~b))
2591                     valid &= 32;
2592                 if (!(REG64 & ~b))
2593                     valid &= 64;
2594
2595                 if (!(REG16 & ~i))
2596                     valid &= 16;
2597                 if (!(REG32 & ~i))
2598                     valid &= 32;
2599                 if (!(REG64 & ~i))
2600                     valid &= 64;
2601             }
2602         }
2603     }
2604
2605     if (valid & addrbits) {
2606         ins->addr_size = addrbits;
2607     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2608         /* Add an address size prefix */
2609         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2610         ins->prefixes[PPS_ASIZE] = pref;
2611         ins->addr_size = (addrbits == 32) ? 16 : 32;
2612     } else {
2613         /* Impossible... */
2614         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2615         ins->addr_size = addrbits; /* Error recovery */
2616     }
2617
2618     defdisp = ins->addr_size == 16 ? 16 : 32;
2619
2620     for (j = 0; j < ins->operands; j++) {
2621         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2622             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2623             /*
2624              * mem_offs sizes must match the address size; if not,
2625              * strip the MEM_OFFS bit and match only EA instructions
2626              */
2627             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2628         }
2629     }
2630 }