asm/parser.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2019 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * parser.c   source line parser for the Netwide Assembler
  36  */
  37
  38 #include "compiler.h"
  39
  40 #include "nctype.h"
  41
  42 #include "nasm.h"
  43 #include "insns.h"
  44 #include "nasmlib.h"
  45 #include "error.h"
  46 #include "stdscan.h"
  47 #include "eval.h"
  48 #include "parser.h"
  49 #include "float.h"
  50 #include "assemble.h"
  51 #include "tables.h"
  52
  53
  54 static int is_comma_next(void);
  55
  56 static struct tokenval tokval;
  57
  58 static int prefix_slot(int prefix)
  59 {
  60     switch (prefix) {
  61     case P_WAIT:
  62         return PPS_WAIT;
  63     case R_CS:
  64     case R_DS:
  65     case R_SS:
  66     case R_ES:
  67     case R_FS:
  68     case R_GS:
  69         return PPS_SEG;
  70     case P_LOCK:
  71         return PPS_LOCK;
  72     case P_REP:
  73     case P_REPE:
  74     case P_REPZ:
  75     case P_REPNE:
  76     case P_REPNZ:
  77     case P_XACQUIRE:
  78     case P_XRELEASE:
  79     case P_BND:
  80     case P_NOBND:
  81         return PPS_REP;
  82     case P_O16:
  83     case P_O32:
  84     case P_O64:
  85     case P_OSP:
  86         return PPS_OSIZE;
  87     case P_A16:
  88     case P_A32:
  89     case P_A64:
  90     case P_ASP:
  91         return PPS_ASIZE;
  92     case P_EVEX:
  93     case P_VEX3:
  94     case P_VEX2:
  95         return PPS_VEX;
  96     default:
  97         nasm_panic("Invalid value %d passed to prefix_slot()", prefix);
  98         return -1;
  99     }
 100 }
 101
 102 static void process_size_override(insn *result, operand *op)
 103 {
 104     if (tasm_compatible_mode) {
 105         switch (tokval.t_integer) {
 106             /* For TASM compatibility a size override inside the
 107              * brackets changes the size of the operand, not the
 108              * address type of the operand as it does in standard
 109              * NASM syntax. Hence:
 110              *
 111              *  mov     eax,[DWORD val]
 112              *
 113              * is valid syntax in TASM compatibility mode. Note that
 114              * you lose the ability to override the default address
 115              * type for the instruction, but we never use anything
 116              * but 32-bit flat model addressing in our code.
 117              */
 118         case S_BYTE:
 119             op->type |= BITS8;
 120             break;
 121         case S_WORD:
 122             op->type |= BITS16;
 123             break;
 124         case S_DWORD:
 125         case S_LONG:
 126             op->type |= BITS32;
 127             break;
 128         case S_QWORD:
 129             op->type |= BITS64;
 130             break;
 131         case S_TWORD:
 132             op->type |= BITS80;
 133             break;
 134         case S_OWORD:
 135             op->type |= BITS128;
 136             break;
 137         default:
 138             nasm_nonfatal("invalid operand size specification");
 139             break;
 140         }
 141     } else {
 142         /* Standard NASM compatible syntax */
 143         switch (tokval.t_integer) {
 144         case S_NOSPLIT:
 145             op->eaflags |= EAF_TIMESTWO;
 146             break;
 147         case S_REL:
 148             op->eaflags |= EAF_REL;
 149             break;
 150         case S_ABS:
 151             op->eaflags |= EAF_ABS;
 152             break;
 153         case S_BYTE:
 154             op->disp_size = 8;
 155             op->eaflags |= EAF_BYTEOFFS;
 156             break;
 157         case P_A16:
 158         case P_A32:
 159         case P_A64:
 160             if (result->prefixes[PPS_ASIZE] &&
 161                 result->prefixes[PPS_ASIZE] != tokval.t_integer)
 162                 nasm_nonfatal("conflicting address size specifications");
 163             else
 164                 result->prefixes[PPS_ASIZE] = tokval.t_integer;
 165             break;
 166         case S_WORD:
 167             op->disp_size = 16;
 168             op->eaflags |= EAF_WORDOFFS;
 169             break;
 170         case S_DWORD:
 171         case S_LONG:
 172             op->disp_size = 32;
 173             op->eaflags |= EAF_WORDOFFS;
 174             break;
 175         case S_QWORD:
 176             op->disp_size = 64;
 177             op->eaflags |= EAF_WORDOFFS;
 178             break;
 179         default:
 180             nasm_nonfatal("invalid size specification in"
 181                           " effective address");
 182             break;
 183         }
 184     }
 185 }
 186
 187 /*
 188  * Brace decorators are are parsed here.  opmask and zeroing
 189  * decorators can be placed in any order.  e.g. zmm1 {k2}{z} or zmm2
 190  * {z}{k3} decorator(s) are placed at the end of an operand.
 191  */
 192 static bool parse_braces(decoflags_t *decoflags)
 193 {
 194     int i, j;
 195
 196     i = tokval.t_type;
 197
 198     while (true) {
 199         switch (i) {
 200         case TOKEN_OPMASK:
 201             if (*decoflags & OPMASK_MASK) {
 202                 nasm_nonfatal("opmask k%"PRIu64" is already set",
 203                               *decoflags & OPMASK_MASK);
 204                 *decoflags &= ~OPMASK_MASK;
 205             }
 206             *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
 207             break;
 208         case TOKEN_DECORATOR:
 209             j = tokval.t_integer;
 210             switch (j) {
 211             case BRC_Z:
 212                 *decoflags |= Z_MASK;
 213                 break;
 214             case BRC_1TO2:
 215             case BRC_1TO4:
 216             case BRC_1TO8:
 217             case BRC_1TO16:
 218                 *decoflags |= BRDCAST_MASK | VAL_BRNUM(j - BRC_1TO2);
 219                 break;
 220             default:
 221                 nasm_nonfatal("{%s} is not an expected decorator",
 222                               tokval.t_charptr);
 223                 break;
 224             }
 225             break;
 226         case ',':
 227         case TOKEN_EOS:
 228             return false;
 229         default:
 230             nasm_nonfatal("only a series of valid decorators expected");
 231             return true;
 232         }
 233         i = stdscan(NULL, &tokval);
 234     }
 235 }
 236
 237 static inline const expr *next_expr(const expr *e, const expr **next_list)
 238 {
 239     e++;
 240     if (!e->type) {
 241         if (next_list) {
 242             e = *next_list;
 243             *next_list = NULL;
 244         } else {
 245             e = NULL;
 246         }
 247     }
 248     return e;
 249 }
 250
 251 static inline void init_operand(operand *op)
 252 {
 253     memset(op, 0, sizeof *op);
 254
 255     op->basereg  = -1;
 256     op->indexreg = -1;
 257     op->segment  = NO_SEG;
 258     op->wrt      = NO_SEG;
 259 }
 260
 261 static int parse_mref(operand *op, const expr *e)
 262 {
 263     int b, i, s;        /* basereg, indexreg, scale */
 264     int64_t o;          /* offset */
 265
 266     b = op->basereg;
 267     i = op->indexreg;
 268     s = op->scale;
 269     o = op->offset;
 270
 271     for (; e->type; e++) {
 272         if (e->type <= EXPR_REG_END) {
 273             bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
 274
 275             if (is_gpr && e->value == 1 && b == -1) {
 276                 /* It can be basereg */
 277                 b = e->type;
 278             } else if (i == -1) {
 279                 /* Must be index register */
 280                 i = e->type;
 281                 s = e->value;
 282             } else {
 283                 if (b == -1)
 284                     nasm_nonfatal("invalid effective address: two index registers");
 285                 else if (!is_gpr)
 286                     nasm_nonfatal("invalid effective address: impossible register");
 287                 else
 288                     nasm_nonfatal("invalid effective address: too many registers");
 289                 return -1;
 290             }
 291         } else if (e->type == EXPR_UNKNOWN) {
 292             op->opflags |= OPFLAG_UNKNOWN;
 293         } else if (e->type == EXPR_SIMPLE) {
 294             o += e->value;
 295         } else if  (e->type == EXPR_WRT) {
 296             op->wrt = e->value;
 297         } else if (e->type >= EXPR_SEGBASE) {
 298             if (e->value == 1) {
 299                 if (op->segment != NO_SEG) {
 300                     nasm_nonfatal("invalid effective address: multiple base segments");
 301                     return -1;
 302                 }
 303                 op->segment = e->type - EXPR_SEGBASE;
 304             } else if (e->value == -1 &&
 305                        e->type == location.segment + EXPR_SEGBASE &&
 306                        !(op->opflags & OPFLAG_RELATIVE)) {
 307                 op->opflags |= OPFLAG_RELATIVE;
 308             } else {
 309                 nasm_nonfatal("invalid effective address: impossible segment base multiplier");
 310                 return -1;
 311             }
 312         } else {
 313             nasm_nonfatal("invalid effective address: bad subexpression type");
 314             return -1;
 315         }
 316    }
 317
 318     op->basereg  = b;
 319     op->indexreg = i;
 320     op->scale    = s;
 321     op->offset   = o;
 322     return 0;
 323 }
 324
 325 static void mref_set_optype(operand *op)
 326 {
 327     int b = op->basereg;
 328     int i = op->indexreg;
 329     int s = op->scale;
 330
 331     /* It is memory, but it can match any r/m operand */
 332     op->type |= MEMORY_ANY;
 333
 334     if (b == -1 && (i == -1 || s == 0)) {
 335         int is_rel = globalbits == 64 &&
 336             !(op->eaflags & EAF_ABS) &&
 337             ((globalrel &&
 338               !(op->eaflags & EAF_FSGS)) ||
 339              (op->eaflags & EAF_REL));
 340
 341         op->type |= is_rel ? IP_REL : MEM_OFFS;
 342     }
 343
 344     if (i != -1) {
 345         opflags_t iclass = nasm_reg_flags[i];
 346
 347         if (is_class(XMMREG,iclass))
 348             op->type |= XMEM;
 349         else if (is_class(YMMREG,iclass))
 350             op->type |= YMEM;
 351         else if (is_class(ZMMREG,iclass))
 352             op->type |= ZMEM;
 353     }
 354 }
 355
 356 /*
 357  * Convert an expression vector returned from evaluate() into an
 358  * extop structure.  Return zero on success.
 359  */
 360 static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
 361 {
 362     eop->type = EOT_DB_NUMBER;
 363     eop->offset = 0;
 364     eop->segment = eop->wrt = NO_SEG;
 365     eop->relative = false;
 366
 367     for (; vect->type; vect++) {
 368         if (!vect->value)       /* zero term, safe to ignore */
 369             continue;
 370
 371         if (vect->type <= EXPR_REG_END) /* false if a register is present */
 372             return -1;
 373
 374         if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */
 375             return 0;
 376
 377         if (vect->type == EXPR_SIMPLE) {
 378             /* Simple number expression */
 379             eop->offset += vect->value;
 380             continue;
 381         }
 382         if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) {
 383             /* WRT term */
 384             eop->wrt = vect->value;
 385             continue;
 386         }
 387
 388         if (!eop->relative &&
 389             vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
 390             /* Expression of the form: foo - $ */
 391             eop->relative = true;
 392             continue;
 393         }
 394
 395         if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE &&
 396             vect->value == 1) {
 397             eop->segment = vect->type - EXPR_SEGBASE;
 398             continue;
 399         }
 400
 401         /* Otherwise, badness */
 402         return -1;
 403     }
 404
 405     /* We got to the end and it was all okay */
 406     return 0;
 407 }
 408
 409 insn *parse_line(char *buffer, insn *result)
 410 {
 411     bool insn_is_label = false;
 412     struct eval_hints hints;
 413     int opnum;
 414     bool critical;
 415     bool first;
 416     bool recover;
 417     bool far_jmp_ok;
 418     int i;
 419
 420     nasm_static_assert(P_none == 0);
 421
 422 restart_parse:
 423     first               = true;
 424     result->forw_ref    = false;
 425
 426     stdscan_reset();
 427     stdscan_set(buffer);
 428     i = stdscan(NULL, &tokval);
 429
 430     memset(result->prefixes, P_none, sizeof(result->prefixes));
 431     result->times       = 1;    /* No TIMES either yet */
 432     result->label       = NULL; /* Assume no label */
 433     result->eops        = NULL; /* must do this, whatever happens */
 434     result->operands    = 0;    /* must initialize this */
 435     result->evex_rm     = 0;    /* Ensure EVEX rounding mode is reset */
 436     result->evex_brerop = -1;   /* Reset EVEX broadcasting/ER op position */
 437
 438     /* Ignore blank lines */
 439     if (i == TOKEN_EOS)
 440         goto fail;
 441
 442     if (i != TOKEN_ID       &&
 443         i != TOKEN_INSN     &&
 444         i != TOKEN_PREFIX   &&
 445         (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
 446         nasm_nonfatal("label or instruction expected at start of line");
 447         goto fail;
 448     }
 449
 450     if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
 451         /* there's a label here */
 452         first = false;
 453         result->label = tokval.t_charptr;
 454         i = stdscan(NULL, &tokval);
 455         if (i == ':') {         /* skip over the optional colon */
 456             i = stdscan(NULL, &tokval);
 457         } else if (i == 0) {
 458             /*!
 459              *!label-orphan [on] labels alone on lines without trailing `:'
 460              *!=orphan-labels
 461              *!  warns about source lines which contain no instruction but define
 462              *!  a label without a trailing colon. This is most likely indicative
 463              *!  of a typo, but is technically correct NASM syntax (see \k{syntax}.)
 464              */
 465             nasm_warn(WARN_LABEL_ORPHAN ,
 466                        "label alone on a line without a colon might be in error");
 467         }
 468         if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
 469             /*
 470              * FIXME: location.segment could be NO_SEG, in which case
 471              * it is possible we should be passing 'absolute.segment'. Look into this.
 472              * Work out whether that is *really* what we should be doing.
 473              * Generally fix things. I think this is right as it is, but
 474              * am still not certain.
 475              */
 476             define_label(result->label,
 477                          in_absolute ? absolute.segment : location.segment,
 478                          location.offset, true);
 479         }
 480     }
 481
 482     /* Just a label here */
 483     if (i == TOKEN_EOS)
 484         goto fail;
 485
 486     while (i == TOKEN_PREFIX ||
 487            (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
 488         first = false;
 489
 490         /*
 491          * Handle special case: the TIMES prefix.
 492          */
 493         if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
 494             expr *value;
 495
 496             i = stdscan(NULL, &tokval);
 497             value = evaluate(stdscan, NULL, &tokval, NULL, pass_stable(), NULL);
 498             i = tokval.t_type;
 499             if (!value)                  /* Error in evaluator */
 500                 goto fail;
 501             if (!is_simple(value)) {
 502                 nasm_nonfatal("non-constant argument supplied to TIMES");
 503                 result->times = 1L;
 504             } else {
 505                 result->times = value->value;
 506                 if (value->value < 0) {
 507                     nasm_nonfatalf(ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
 508                     result->times = 0;
 509                 }
 510             }
 511         } else {
 512             int slot = prefix_slot(tokval.t_integer);
 513             if (result->prefixes[slot]) {
 514                if (result->prefixes[slot] == tokval.t_integer)
 515                     nasm_warn(WARN_OTHER, "instruction has redundant prefixes");
 516                else
 517                     nasm_nonfatal("instruction has conflicting prefixes");
 518             }
 519             result->prefixes[slot] = tokval.t_integer;
 520             i = stdscan(NULL, &tokval);
 521         }
 522     }
 523
 524     if (i != TOKEN_INSN) {
 525         int j;
 526         enum prefixes pfx;
 527
 528         for (j = 0; j < MAXPREFIX; j++) {
 529             if ((pfx = result->prefixes[j]) != P_none)
 530                 break;
 531         }
 532
 533         if (i == 0 && pfx != P_none) {
 534             /*
 535              * Instruction prefixes are present, but no actual
 536              * instruction. This is allowed: at this point we
 537              * invent a notional instruction of RESB 0.
 538              */
 539             result->opcode          = I_RESB;
 540             result->operands        = 1;
 541             nasm_zero(result->oprs);
 542             result->oprs[0].type    = IMMEDIATE;
 543             result->oprs[0].offset  = 0L;
 544             result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
 545             return result;
 546         } else {
 547             nasm_nonfatal("parser: instruction expected");
 548             goto fail;
 549         }
 550     }
 551
 552     result->opcode = tokval.t_integer;
 553     result->condition = tokval.t_inttwo;
 554
 555     /*
 556      * INCBIN cannot be satisfied with incorrectly
 557      * evaluated operands, since the correct values _must_ be known
 558      * on the first pass. Hence, even in pass one, we set the
 559      * `critical' flag on calling evaluate(), so that it will bomb
 560      * out on undefined symbols.
 561      */
 562     critical = pass_final() || (result->opcode == I_INCBIN);
 563
 564     if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
 565         extop *eop, **tail = &result->eops, **fixptr;
 566         int oper_num = 0;
 567         int32_t sign;
 568
 569         result->eops_float = false;
 570
 571         /*
 572          * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands.
 573          */
 574         while (1) {
 575             i = stdscan(NULL, &tokval);
 576             if (i == TOKEN_EOS)
 577                 break;
 578             else if (first && i == ':') {
 579                 insn_is_label = true;
 580                 goto restart_parse;
 581             }
 582             first = false;
 583             fixptr = tail;
 584             eop = *tail = nasm_malloc(sizeof(extop));
 585             tail = &eop->next;
 586             eop->next = NULL;
 587             eop->type = EOT_NOTHING;
 588             oper_num++;
 589             sign = +1;
 590
 591             /*
 592              * is_comma_next() here is to distinguish this from
 593              * a string used as part of an expression...
 594              */
 595             if (i == TOKEN_STR && is_comma_next()) {
 596                 eop->type       = EOT_DB_STRING;
 597                 eop->stringval  = tokval.t_charptr;
 598                 eop->stringlen  = tokval.t_inttwo;
 599                 i = stdscan(NULL, &tokval);     /* eat the comma */
 600             } else if (i == TOKEN_STRFUNC) {
 601                 bool parens = false;
 602                 const char *funcname = tokval.t_charptr;
 603                 enum strfunc func = tokval.t_integer;
 604                 i = stdscan(NULL, &tokval);
 605                 if (i == '(') {
 606                     parens = true;
 607                     i = stdscan(NULL, &tokval);
 608                 }
 609                 if (i != TOKEN_STR) {
 610                     nasm_nonfatal("%s must be followed by a string constant",
 611                                   funcname);
 612                     eop->type = EOT_NOTHING;
 613                 } else {
 614                     eop->type = EOT_DB_STRING_FREE;
 615                     eop->stringlen =
 616                         string_transform(tokval.t_charptr, tokval.t_inttwo,
 617                                          &eop->stringval, func);
 618                     if (eop->stringlen == (size_t)-1) {
 619                         nasm_nonfatal("invalid input string to %s", funcname);
 620                         eop->type = EOT_NOTHING;
 621                     }
 622                 }
 623                 if (parens && i && i != ')') {
 624                     i = stdscan(NULL, &tokval);
 625                     if (i != ')')
 626                         nasm_nonfatal("unterminated %s function", funcname);
 627                 }
 628                 if (i && i != ',')
 629                     i = stdscan(NULL, &tokval);
 630             } else if (i == '-' || i == '+') {
 631                 char *save = stdscan_get();
 632                 int token = i;
 633                 sign = (i == '-') ? -1 : 1;
 634                 i = stdscan(NULL, &tokval);
 635                 if (i != TOKEN_FLOAT) {
 636                     stdscan_set(save);
 637                     i = tokval.t_type = token;
 638                     goto is_expression;
 639                 } else {
 640                     goto is_float;
 641                 }
 642             } else if (i == TOKEN_FLOAT) {
 643 is_float:
 644                 eop->type = EOT_DB_STRING;
 645                 result->eops_float = true;
 646
 647                 eop->stringlen = db_bytes(result->opcode);
 648                 if (eop->stringlen > 16) {
 649                     nasm_nonfatal("floating-point constant"
 650                                   " encountered in DY or DZ instruction");
 651                     eop->stringlen = 0;
 652                 } else if (eop->stringlen < 1) {
 653                     nasm_nonfatal("floating-point constant"
 654                                   " encountered in unknown instruction");
 655                     /*
 656                      * fix suggested by Pedro Gimeno... original line was:
 657                      * eop->type = EOT_NOTHING;
 658                      */
 659                     eop->stringlen = 0;
 660                 }
 661
 662                 eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
 663                 tail = &eop->next;
 664                 *fixptr = eop;
 665                 eop->stringval = (char *)eop + sizeof(extop);
 666                 if (!eop->stringlen ||
 667                     !float_const(tokval.t_charptr, sign,
 668                                  (uint8_t *)eop->stringval, eop->stringlen))
 669                     eop->type = EOT_NOTHING;
 670                 i = stdscan(NULL, &tokval); /* eat the comma */
 671             } else {
 672                 /* anything else, assume it is an expression */
 673                 expr *value;
 674
 675 is_expression:
 676                 value = evaluate(stdscan, NULL, &tokval, NULL,
 677                                  critical, NULL);
 678                 i = tokval.t_type;
 679                 if (!value)                  /* Error in evaluator */
 680                     goto fail;
 681                 if (value_to_extop(value, eop, location.segment)) {
 682                     nasm_nonfatal("operand %d: expression is not simple or relocatable",
 683                                   oper_num);
 684                 }
 685             }
 686
 687             /*
 688              * We're about to call stdscan(), which will eat the
 689              * comma that we're currently sitting on between
 690              * arguments. However, we'd better check first that it
 691              * _is_ a comma.
 692              */
 693             if (i == TOKEN_EOS) /* also could be EOL */
 694                 break;
 695             if (i != ',') {
 696                 nasm_nonfatal("comma expected after operand %d", oper_num);
 697                 goto fail;
 698             }
 699         }
 700
 701         if (result->opcode == I_INCBIN) {
 702             /*
 703              * Correct syntax for INCBIN is that there should be
 704              * one string operand, followed by one or two numeric
 705              * operands.
 706              */
 707             if (!result->eops || result->eops->type != EOT_DB_STRING)
 708                 nasm_nonfatal("`incbin' expects a file name");
 709             else if (result->eops->next &&
 710                      result->eops->next->type != EOT_DB_NUMBER)
 711                 nasm_nonfatal("`incbin': second parameter is"
 712                               " non-numeric");
 713             else if (result->eops->next && result->eops->next->next &&
 714                      result->eops->next->next->type != EOT_DB_NUMBER)
 715                 nasm_nonfatal("`incbin': third parameter is"
 716                               " non-numeric");
 717             else if (result->eops->next && result->eops->next->next &&
 718                      result->eops->next->next->next)
 719                 nasm_nonfatal("`incbin': more than three parameters");
 720             else
 721                 return result;
 722             /*
 723              * If we reach here, one of the above errors happened.
 724              * Throw the instruction away.
 725              */
 726             goto fail;
 727         } else /* DB ... */ if (oper_num == 0)
 728             nasm_warn(WARN_OTHER, "no operand for data declaration");
 729         else
 730             result->operands = oper_num;
 731
 732         return result;
 733     }
 734
 735     /*
 736      * Now we begin to parse the operands. There may be up to four
 737      * of these, separated by commas, and terminated by a zero token.
 738      */
 739     far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
 740
 741     for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
 742         operand *op = &result->oprs[opnum];
 743         expr *value;            /* used most of the time */
 744         bool mref = false;      /* is this going to be a memory ref? */
 745         int bracket = 0;        /* is it a [] mref, or a "naked" mref? */
 746         bool mib;               /* compound (mib) mref? */
 747         int setsize = 0;
 748         decoflags_t brace_flags = 0;    /* flags for decorators in braces */
 749
 750         init_operand(op);
 751
 752         i = stdscan(NULL, &tokval);
 753         if (i == TOKEN_EOS)
 754             break;              /* end of operands: get out of here */
 755         else if (first && i == ':') {
 756             insn_is_label = true;
 757             goto restart_parse;
 758         }
 759         first = false;
 760         op->type = 0; /* so far, no override */
 761         /* size specifiers */
 762         while (i == TOKEN_SPECIAL || i == TOKEN_SIZE) {
 763             switch (tokval.t_integer) {
 764             case S_BYTE:
 765                 if (!setsize)   /* we want to use only the first */
 766                     op->type |= BITS8;
 767                 setsize = 1;
 768                 break;
 769             case S_WORD:
 770                 if (!setsize)
 771                     op->type |= BITS16;
 772                 setsize = 1;
 773                 break;
 774             case S_DWORD:
 775             case S_LONG:
 776                 if (!setsize)
 777                     op->type |= BITS32;
 778                 setsize = 1;
 779                 break;
 780             case S_QWORD:
 781                 if (!setsize)
 782                     op->type |= BITS64;
 783                 setsize = 1;
 784                 break;
 785             case S_TWORD:
 786                 if (!setsize)
 787                     op->type |= BITS80;
 788                 setsize = 1;
 789                 break;
 790             case S_OWORD:
 791                 if (!setsize)
 792                     op->type |= BITS128;
 793                 setsize = 1;
 794                 break;
 795             case S_YWORD:
 796                 if (!setsize)
 797                     op->type |= BITS256;
 798                 setsize = 1;
 799                 break;
 800             case S_ZWORD:
 801                 if (!setsize)
 802                     op->type |= BITS512;
 803                 setsize = 1;
 804                 break;
 805             case S_TO:
 806                 op->type |= TO;
 807                 break;
 808             case S_STRICT:
 809                 op->type |= STRICT;
 810                 break;
 811             case S_FAR:
 812                 op->type |= FAR;
 813                 break;
 814             case S_NEAR:
 815                 op->type |= NEAR;
 816                 break;
 817             case S_SHORT:
 818                 op->type |= SHORT;
 819                 break;
 820             default:
 821                 nasm_nonfatal("invalid operand size specification");
 822             }
 823             i = stdscan(NULL, &tokval);
 824         }
 825
 826         if (i == '[' || i == TOKEN_MASM_PTR || i == '&') {
 827             /* memory reference */
 828             mref = true;
 829             bracket += (i == '[');
 830             i = stdscan(NULL, &tokval);
 831         }
 832
 833     mref_more:
 834         if (mref) {
 835             bool done = false;
 836             bool nofw = false;
 837
 838             while (!done) {
 839                 switch (i) {
 840                 case TOKEN_SPECIAL:
 841                 case TOKEN_SIZE:
 842                 case TOKEN_PREFIX:
 843                     process_size_override(result, op);
 844                     break;
 845
 846                 case '[':
 847                     bracket++;
 848                     break;
 849
 850                 case ',':
 851                     tokval.t_type = TOKEN_NUM;
 852                     tokval.t_integer = 0;
 853                     stdscan_set(stdscan_get() - 1);     /* rewind the comma */
 854                     done = nofw = true;
 855                     break;
 856
 857                 case TOKEN_MASM_FLAT:
 858                     i = stdscan(NULL, &tokval);
 859                     if (i != ':') {
 860                         nasm_nonfatal("unknown use of FLAT in MASM emulation");
 861                         nofw = true;
 862                     }
 863                     done = true;
 864                     break;
 865
 866                 default:
 867                     done = nofw = true;
 868                     break;
 869                 }
 870
 871                 if (!nofw)
 872                     i = stdscan(NULL, &tokval);
 873             }
 874         }
 875
 876         value = evaluate(stdscan, NULL, &tokval,
 877                          &op->opflags, critical, &hints);
 878         i = tokval.t_type;
 879         if (op->opflags & OPFLAG_FORWARD) {
 880             result->forw_ref = true;
 881         }
 882         if (!value)                  /* Error in evaluator */
 883             goto fail;
 884
 885         if (i == '[' && !bracket) {
 886             /* displacement[regs] syntax */
 887             mref = true;
 888             parse_mref(op, value); /* Process what we have so far */
 889             goto mref_more;
 890         }
 891
 892         if (i == ':' && (mref || !far_jmp_ok)) {
 893             /* segment override? */
 894             mref = true;
 895
 896             /*
 897              * Process the segment override.
 898              */
 899             if (value[1].type   != 0    ||
 900                 value->value    != 1    ||
 901                 !IS_SREG(value->type))
 902                 nasm_nonfatal("invalid segment override");
 903             else if (result->prefixes[PPS_SEG])
 904                 nasm_nonfatal("instruction has conflicting segment overrides");
 905             else {
 906                 result->prefixes[PPS_SEG] = value->type;
 907                 if (IS_FSGS(value->type))
 908                     op->eaflags |= EAF_FSGS;
 909             }
 910
 911             i = stdscan(NULL, &tokval); /* then skip the colon */
 912             goto mref_more;
 913         }
 914
 915         mib = false;
 916         if (mref && bracket && i == ',') {
 917             /* [seg:base+offset,index*scale] syntax (mib) */
 918             operand o2;         /* Index operand */
 919
 920             if (parse_mref(op, value))
 921                 goto fail;
 922
 923             i = stdscan(NULL, &tokval); /* Eat comma */
 924             value = evaluate(stdscan, NULL, &tokval, &op->opflags,
 925                              critical, &hints);
 926             i = tokval.t_type;
 927             if (!value)
 928                 goto fail;
 929
 930             init_operand(&o2);
 931             if (parse_mref(&o2, value))
 932                 goto fail;
 933
 934             if (o2.basereg != -1 && o2.indexreg == -1) {
 935                 o2.indexreg = o2.basereg;
 936                 o2.scale = 1;
 937                 o2.basereg = -1;
 938             }
 939
 940             if (op->indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
 941                 o2.segment != NO_SEG || o2.wrt != NO_SEG) {
 942                 nasm_nonfatal("invalid mib expression");
 943                 goto fail;
 944             }
 945
 946             op->indexreg = o2.indexreg;
 947             op->scale = o2.scale;
 948
 949             if (op->basereg != -1) {
 950                 op->hintbase = op->basereg;
 951                 op->hinttype = EAH_MAKEBASE;
 952             } else if (op->indexreg != -1) {
 953                 op->hintbase = op->indexreg;
 954                 op->hinttype = EAH_NOTBASE;
 955             } else {
 956                 op->hintbase = -1;
 957                 op->hinttype = EAH_NOHINT;
 958             }
 959
 960             mib = true;
 961         }
 962
 963         recover = false;
 964         if (mref) {
 965             if (bracket == 1) {
 966                 if (i == ']') {
 967                     bracket--;
 968                     i = stdscan(NULL, &tokval);
 969                 } else {
 970                     nasm_nonfatal("expecting ] at end of memory operand");
 971                     recover = true;
 972                 }
 973             } else if (bracket == 0) {
 974                 /* Do nothing */
 975             } else if (bracket > 0) {
 976                 nasm_nonfatal("excess brackets in memory operand");
 977                 recover = true;
 978             } else if (bracket < 0) {
 979                 nasm_nonfatal("unmatched ] in memory operand");
 980                 recover = true;
 981             }
 982
 983             if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
 984                 /* parse opmask (and zeroing) after an operand */
 985                 recover = parse_braces(&brace_flags);
 986                 i = tokval.t_type;
 987             }
 988             if (!recover && i != 0 && i != ',') {
 989                 nasm_nonfatal("comma, decorator or end of line expected, got %d", i);
 990                 recover = true;
 991             }
 992         } else {                /* immediate operand */
 993             if (i != 0 && i != ',' && i != ':' &&
 994                 i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
 995                 nasm_nonfatal("comma, colon, decorator or end of "
 996                               "line expected after operand");
 997                 recover = true;
 998             } else if (i == ':') {
 999                 op->type |= COLON;
1000             } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
1001                 /* parse opmask (and zeroing) after an operand */
1002                 recover = parse_braces(&brace_flags);
1003             }
1004         }
1005         if (recover) {
1006             do {                /* error recovery */
1007                 i = stdscan(NULL, &tokval);
1008             } while (i != 0 && i != ',');
1009         }
1010
1011         /*
1012          * now convert the exprs returned from evaluate()
1013          * into operand descriptions...
1014          */
1015         op->decoflags |= brace_flags;
1016
1017         if (mref) {             /* it's a memory reference */
1018             /* A mib reference was fully parsed already */
1019             if (!mib) {
1020                 if (parse_mref(op, value))
1021                     goto fail;
1022                 op->hintbase = hints.base;
1023                 op->hinttype = hints.type;
1024             }
1025             mref_set_optype(op);
1026         } else if ((op->type & FAR) && !far_jmp_ok) {
1027                 nasm_nonfatal("invalid use of FAR operand specifier");
1028                 recover = true;
1029         } else {                /* it's not a memory reference */
1030             if (is_just_unknown(value)) {       /* it's immediate but unknown */
1031                 op->type      |= IMMEDIATE;
1032                 op->opflags   |= OPFLAG_UNKNOWN;
1033                 op->offset    = 0;        /* don't care */
1034                 op->segment   = NO_SEG;   /* don't care again */
1035                 op->wrt       = NO_SEG;   /* still don't care */
1036
1037                 if(optimizing.level >= 0 && !(op->type & STRICT)) {
1038                     /* Be optimistic */
1039                     op->type |=
1040                         UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD;
1041                 }
1042             } else if (is_reloc(value)) {       /* it's immediate */
1043                 uint64_t n = reloc_value(value);
1044
1045                 op->type      |= IMMEDIATE;
1046                 op->offset    = n;
1047                 op->segment   = reloc_seg(value);
1048                 op->wrt       = reloc_wrt(value);
1049                 op->opflags   |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
1050
1051                 if (is_simple(value)) {
1052                     if (n == 1)
1053                         op->type |= UNITY;
1054                     if (optimizing.level >= 0 && !(op->type & STRICT)) {
1055                         if ((uint32_t) (n + 128) <= 255)
1056                             op->type |= SBYTEDWORD;
1057                         if ((uint16_t) (n + 128) <= 255)
1058                             op->type |= SBYTEWORD;
1059                         if (n <= UINT64_C(0xFFFFFFFF))
1060                             op->type |= UDWORD;
1061                         if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF))
1062                             op->type |= SDWORD;
1063                     }
1064                 }
1065             } else if (value->type == EXPR_RDSAE) {
1066                 /*
1067                  * it's not an operand but a rounding or SAE decorator.
1068                  * put the decorator information in the (opflag_t) type field
1069                  * of previous operand.
1070                  */
1071                 opnum--; op--;
1072                 switch (value->value) {
1073                 case BRC_RN:
1074                 case BRC_RU:
1075                 case BRC_RD:
1076                 case BRC_RZ:
1077                 case BRC_SAE:
1078                     op->decoflags |= (value->value == BRC_SAE ? SAE : ER);
1079                     result->evex_rm = value->value;
1080                     break;
1081                 default:
1082                     nasm_nonfatal("invalid decorator");
1083                     break;
1084                 }
1085             } else {            /* it's a register */
1086                 opflags_t rs;
1087                 uint64_t regset_size = 0;
1088
1089                 if (value->type >= EXPR_SIMPLE || value->value != 1) {
1090                     nasm_nonfatal("invalid operand type");
1091                     goto fail;
1092                 }
1093
1094                 /*
1095                  * We do not allow any kind of expression, except for
1096                  * reg+value in which case it is a register set.
1097                  */
1098                 for (i = 1; value[i].type; i++) {
1099                     if (!value[i].value)
1100                         continue;
1101
1102                     switch (value[i].type) {
1103                     case EXPR_SIMPLE:
1104                         if (!regset_size) {
1105                             regset_size = value[i].value + 1;
1106                             break;
1107                         }
1108                         /* fallthrough */
1109                     default:
1110                         nasm_nonfatal("invalid operand type");
1111                         goto fail;
1112                     }
1113                 }
1114
1115                 if ((regset_size & (regset_size - 1)) ||
1116                     regset_size >= (UINT64_C(1) << REGSET_BITS)) {
1117                     nasm_nonfatalf(ERR_PASS2, "invalid register set size");
1118                     regset_size = 0;
1119                 }
1120
1121                 /* clear overrides, except TO which applies to FPU regs */
1122                 if (op->type & ~TO) {
1123                     /*
1124                      * we want to produce a warning iff the specified size
1125                      * is different from the register size
1126                      */
1127                     rs = op->type & SIZE_MASK;
1128                 } else {
1129                     rs = 0;
1130                 }
1131
1132                 /*
1133                  * Make sure we're not out of nasm_reg_flags, still
1134                  * probably this should be fixed when we're defining
1135                  * the label.
1136                  *
1137                  * An easy trigger is
1138                  *
1139                  *      e equ 0x80000000:0
1140                  *      pshufw word e-0
1141                  *
1142                  */
1143                 if (value->type < EXPR_REG_START ||
1144                     value->type > EXPR_REG_END) {
1145                         nasm_nonfatal("invalid operand type");
1146                         goto fail;
1147                 }
1148
1149                 op->type      &= TO;
1150                 op->type      |= REGISTER;
1151                 op->type      |= nasm_reg_flags[value->type];
1152                 op->type      |= (regset_size >> 1) << REGSET_SHIFT;
1153                 op->decoflags |= brace_flags;
1154                 op->basereg   = value->type;
1155
1156                 if (rs) {
1157                     opflags_t opsize = nasm_reg_flags[value->type] & SIZE_MASK;
1158                     if (!opsize) {
1159                         op->type |= rs; /* For non-size-specific registers, permit size override */
1160                     } else if (opsize != rs) {
1161                         /*!
1162                          *!regsize [on] register size specification ignored
1163                          *!
1164                          *!  warns about a register with implicit size (such as \c{EAX}, which is always 32 bits)
1165                          *!  been given an explicit size specification which is inconsistent with the size
1166                          *!  of the named register, e.g. \c{WORD EAX}. \c{DWORD EAX} or \c{WORD AX} are
1167                          *!  permitted, and do not trigger this warning. Some registers which \e{do not} imply
1168                          *!  a specific size, such as \c{K0}, may need this specification unless the instruction
1169                          *!  itself implies the instruction size:
1170                          *!
1171                          *!  \c      KMOVW K0,[foo]          ; Permitted, KMOVW implies 16 bits
1172                          *!  \c      KMOV  WORD K0,[foo]     ; Permitted, WORD K0 specifies instruction size
1173                          *!  \c      KMOV  K0,WORD [foo]     ; Permitted, WORD [foo] specifies instruction size
1174                          *!  \c      KMOV  K0,[foo]          ; Not permitted, instruction size ambiguous
1175                          */
1176                         nasm_warn(WARN_REGSIZE, "invalid register size specification ignored");
1177                     }
1178                 }
1179             }
1180         }
1181
1182         /* remember the position of operand having broadcasting/ER mode */
1183         if (op->decoflags & (BRDCAST_MASK | ER | SAE))
1184             result->evex_brerop = opnum;
1185     }
1186
1187     result->operands = opnum; /* set operand count */
1188
1189     /* clear remaining operands */
1190     while (opnum < MAX_OPERANDS)
1191         result->oprs[opnum++].type = 0;
1192
1193     return result;
1194
1195 fail:
1196     result->opcode = I_none;
1197     return result;
1198 }
1199
1200 static int is_comma_next(void)
1201 {
1202     struct tokenval tv;
1203     char *p;
1204     int i;
1205
1206     p = stdscan_get();
1207     i = stdscan(NULL, &tv);
1208     stdscan_set(p);
1209
1210     return (i == ',' || i == ';' || !i);
1211 }
1212
1213 void cleanup_insn(insn * i)
1214 {
1215     extop *e;
1216
1217     while ((e = i->eops)) {
1218         i->eops = e->next;
1219         if (e->type == EOT_DB_STRING_FREE)
1220             nasm_free(e->stringval);
1221         nasm_free(e);
1222     }
1223 }