preproc: fix multiple memory corruption issues
[nasm.git] / asm / parser.c
blob012364ac9058860b46df135959bdeb8408a1c099
1 /* ----------------------------------------------------------------------- *
3 * Copyright 1996-2019 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * parser.c source line parser for the Netwide Assembler
38 #include "compiler.h"
40 #include "nctype.h"
42 #include "nasm.h"
43 #include "insns.h"
44 #include "nasmlib.h"
45 #include "error.h"
46 #include "stdscan.h"
47 #include "eval.h"
48 #include "parser.h"
49 #include "float.h"
50 #include "assemble.h"
51 #include "tables.h"
54 static int is_comma_next(void);
56 static struct tokenval tokval;
58 static int prefix_slot(int prefix)
60 switch (prefix) {
61 case P_WAIT:
62 return PPS_WAIT;
63 case R_CS:
64 case R_DS:
65 case R_SS:
66 case R_ES:
67 case R_FS:
68 case R_GS:
69 return PPS_SEG;
70 case P_LOCK:
71 return PPS_LOCK;
72 case P_REP:
73 case P_REPE:
74 case P_REPZ:
75 case P_REPNE:
76 case P_REPNZ:
77 case P_XACQUIRE:
78 case P_XRELEASE:
79 case P_BND:
80 case P_NOBND:
81 return PPS_REP;
82 case P_O16:
83 case P_O32:
84 case P_O64:
85 case P_OSP:
86 return PPS_OSIZE;
87 case P_A16:
88 case P_A32:
89 case P_A64:
90 case P_ASP:
91 return PPS_ASIZE;
92 case P_EVEX:
93 case P_VEX3:
94 case P_VEX2:
95 return PPS_VEX;
96 default:
97 nasm_panic("Invalid value %d passed to prefix_slot()", prefix);
98 return -1;
102 static void process_size_override(insn *result, operand *op)
104 if (tasm_compatible_mode) {
105 switch (tokval.t_integer) {
106 /* For TASM compatibility a size override inside the
107 * brackets changes the size of the operand, not the
108 * address type of the operand as it does in standard
109 * NASM syntax. Hence:
111 * mov eax,[DWORD val]
113 * is valid syntax in TASM compatibility mode. Note that
114 * you lose the ability to override the default address
115 * type for the instruction, but we never use anything
116 * but 32-bit flat model addressing in our code.
118 case S_BYTE:
119 op->type |= BITS8;
120 break;
121 case S_WORD:
122 op->type |= BITS16;
123 break;
124 case S_DWORD:
125 case S_LONG:
126 op->type |= BITS32;
127 break;
128 case S_QWORD:
129 op->type |= BITS64;
130 break;
131 case S_TWORD:
132 op->type |= BITS80;
133 break;
134 case S_OWORD:
135 op->type |= BITS128;
136 break;
137 default:
138 nasm_nonfatal("invalid operand size specification");
139 break;
141 } else {
142 /* Standard NASM compatible syntax */
143 switch (tokval.t_integer) {
144 case S_NOSPLIT:
145 op->eaflags |= EAF_TIMESTWO;
146 break;
147 case S_REL:
148 op->eaflags |= EAF_REL;
149 break;
150 case S_ABS:
151 op->eaflags |= EAF_ABS;
152 break;
153 case S_BYTE:
154 op->disp_size = 8;
155 op->eaflags |= EAF_BYTEOFFS;
156 break;
157 case P_A16:
158 case P_A32:
159 case P_A64:
160 if (result->prefixes[PPS_ASIZE] &&
161 result->prefixes[PPS_ASIZE] != tokval.t_integer)
162 nasm_nonfatal("conflicting address size specifications");
163 else
164 result->prefixes[PPS_ASIZE] = tokval.t_integer;
165 break;
166 case S_WORD:
167 op->disp_size = 16;
168 op->eaflags |= EAF_WORDOFFS;
169 break;
170 case S_DWORD:
171 case S_LONG:
172 op->disp_size = 32;
173 op->eaflags |= EAF_WORDOFFS;
174 break;
175 case S_QWORD:
176 op->disp_size = 64;
177 op->eaflags |= EAF_WORDOFFS;
178 break;
179 default:
180 nasm_nonfatal("invalid size specification in"
181 " effective address");
182 break;
188 * Brace decorators are are parsed here. opmask and zeroing
189 * decorators can be placed in any order. e.g. zmm1 {k2}{z} or zmm2
190 * {z}{k3} decorator(s) are placed at the end of an operand.
192 static bool parse_braces(decoflags_t *decoflags)
194 int i, j;
196 i = tokval.t_type;
198 while (true) {
199 switch (i) {
200 case TOKEN_OPMASK:
201 if (*decoflags & OPMASK_MASK) {
202 nasm_nonfatal("opmask k%"PRIu64" is already set",
203 *decoflags & OPMASK_MASK);
204 *decoflags &= ~OPMASK_MASK;
206 *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
207 break;
208 case TOKEN_DECORATOR:
209 j = tokval.t_integer;
210 switch (j) {
211 case BRC_Z:
212 *decoflags |= Z_MASK;
213 break;
214 case BRC_1TO2:
215 case BRC_1TO4:
216 case BRC_1TO8:
217 case BRC_1TO16:
218 *decoflags |= BRDCAST_MASK | VAL_BRNUM(j - BRC_1TO2);
219 break;
220 default:
221 nasm_nonfatal("{%s} is not an expected decorator",
222 tokval.t_charptr);
223 break;
225 break;
226 case ',':
227 case TOKEN_EOS:
228 return false;
229 default:
230 nasm_nonfatal("only a series of valid decorators expected");
231 return true;
233 i = stdscan(NULL, &tokval);
237 static inline const expr *next_expr(const expr *e, const expr **next_list)
239 e++;
240 if (!e->type) {
241 if (next_list) {
242 e = *next_list;
243 *next_list = NULL;
244 } else {
245 e = NULL;
248 return e;
251 static inline void init_operand(operand *op)
253 memset(op, 0, sizeof *op);
255 op->basereg = -1;
256 op->indexreg = -1;
257 op->segment = NO_SEG;
258 op->wrt = NO_SEG;
261 static int parse_mref(operand *op, const expr *e)
263 int b, i, s; /* basereg, indexreg, scale */
264 int64_t o; /* offset */
266 b = op->basereg;
267 i = op->indexreg;
268 s = op->scale;
269 o = op->offset;
271 for (; e->type; e++) {
272 if (e->type <= EXPR_REG_END) {
273 bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
275 if (is_gpr && e->value == 1 && b == -1) {
276 /* It can be basereg */
277 b = e->type;
278 } else if (i == -1) {
279 /* Must be index register */
280 i = e->type;
281 s = e->value;
282 } else {
283 if (b == -1)
284 nasm_nonfatal("invalid effective address: two index registers");
285 else if (!is_gpr)
286 nasm_nonfatal("invalid effective address: impossible register");
287 else
288 nasm_nonfatal("invalid effective address: too many registers");
289 return -1;
291 } else if (e->type == EXPR_UNKNOWN) {
292 op->opflags |= OPFLAG_UNKNOWN;
293 } else if (e->type == EXPR_SIMPLE) {
294 o += e->value;
295 } else if (e->type == EXPR_WRT) {
296 op->wrt = e->value;
297 } else if (e->type >= EXPR_SEGBASE) {
298 if (e->value == 1) {
299 if (op->segment != NO_SEG) {
300 nasm_nonfatal("invalid effective address: multiple base segments");
301 return -1;
303 op->segment = e->type - EXPR_SEGBASE;
304 } else if (e->value == -1 &&
305 e->type == location.segment + EXPR_SEGBASE &&
306 !(op->opflags & OPFLAG_RELATIVE)) {
307 op->opflags |= OPFLAG_RELATIVE;
308 } else {
309 nasm_nonfatal("invalid effective address: impossible segment base multiplier");
310 return -1;
312 } else {
313 nasm_nonfatal("invalid effective address: bad subexpression type");
314 return -1;
318 op->basereg = b;
319 op->indexreg = i;
320 op->scale = s;
321 op->offset = o;
322 return 0;
325 static void mref_set_optype(operand *op)
327 int b = op->basereg;
328 int i = op->indexreg;
329 int s = op->scale;
331 /* It is memory, but it can match any r/m operand */
332 op->type |= MEMORY_ANY;
334 if (b == -1 && (i == -1 || s == 0)) {
335 int is_rel = globalbits == 64 &&
336 !(op->eaflags & EAF_ABS) &&
337 ((globalrel &&
338 !(op->eaflags & EAF_FSGS)) ||
339 (op->eaflags & EAF_REL));
341 op->type |= is_rel ? IP_REL : MEM_OFFS;
344 if (i != -1) {
345 opflags_t iclass = nasm_reg_flags[i];
347 if (is_class(XMMREG,iclass))
348 op->type |= XMEM;
349 else if (is_class(YMMREG,iclass))
350 op->type |= YMEM;
351 else if (is_class(ZMMREG,iclass))
352 op->type |= ZMEM;
357 * Convert an expression vector returned from evaluate() into an
358 * extop structure. Return zero on success.
360 static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
362 eop->type = EOT_DB_NUMBER;
363 eop->offset = 0;
364 eop->segment = eop->wrt = NO_SEG;
365 eop->relative = false;
367 for (; vect->type; vect++) {
368 if (!vect->value) /* zero term, safe to ignore */
369 continue;
371 if (vect->type <= EXPR_REG_END) /* false if a register is present */
372 return -1;
374 if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */
375 return 0;
377 if (vect->type == EXPR_SIMPLE) {
378 /* Simple number expression */
379 eop->offset += vect->value;
380 continue;
382 if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) {
383 /* WRT term */
384 eop->wrt = vect->value;
385 continue;
388 if (!eop->relative &&
389 vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
390 /* Expression of the form: foo - $ */
391 eop->relative = true;
392 continue;
395 if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE &&
396 vect->value == 1) {
397 eop->segment = vect->type - EXPR_SEGBASE;
398 continue;
401 /* Otherwise, badness */
402 return -1;
405 /* We got to the end and it was all okay */
406 return 0;
409 insn *parse_line(char *buffer, insn *result)
411 bool insn_is_label = false;
412 struct eval_hints hints;
413 int opnum;
414 bool critical;
415 bool first;
416 bool recover;
417 bool far_jmp_ok;
418 int i;
420 nasm_static_assert(P_none == 0);
422 restart_parse:
423 first = true;
424 result->forw_ref = false;
426 stdscan_reset();
427 stdscan_set(buffer);
428 i = stdscan(NULL, &tokval);
430 memset(result->prefixes, P_none, sizeof(result->prefixes));
431 result->times = 1; /* No TIMES either yet */
432 result->label = NULL; /* Assume no label */
433 result->eops = NULL; /* must do this, whatever happens */
434 result->operands = 0; /* must initialize this */
435 result->evex_rm = 0; /* Ensure EVEX rounding mode is reset */
436 result->evex_brerop = -1; /* Reset EVEX broadcasting/ER op position */
438 /* Ignore blank lines */
439 if (i == TOKEN_EOS)
440 goto fail;
442 if (i != TOKEN_ID &&
443 i != TOKEN_INSN &&
444 i != TOKEN_PREFIX &&
445 (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
446 nasm_nonfatal("label or instruction expected at start of line");
447 goto fail;
450 if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
451 /* there's a label here */
452 first = false;
453 result->label = tokval.t_charptr;
454 i = stdscan(NULL, &tokval);
455 if (i == ':') { /* skip over the optional colon */
456 i = stdscan(NULL, &tokval);
457 } else if (i == 0) {
459 *!label-orphan [on] labels alone on lines without trailing `:'
460 *!=orphan-labels
461 *! warns about source lines which contain no instruction but define
462 *! a label without a trailing colon. This is most likely indicative
463 *! of a typo, but is technically correct NASM syntax (see \k{syntax}.)
465 nasm_warn(WARN_LABEL_ORPHAN ,
466 "label alone on a line without a colon might be in error");
468 if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
470 * FIXME: location.segment could be NO_SEG, in which case
471 * it is possible we should be passing 'absolute.segment'. Look into this.
472 * Work out whether that is *really* what we should be doing.
473 * Generally fix things. I think this is right as it is, but
474 * am still not certain.
476 define_label(result->label,
477 in_absolute ? absolute.segment : location.segment,
478 location.offset, true);
482 /* Just a label here */
483 if (i == TOKEN_EOS)
484 goto fail;
486 while (i == TOKEN_PREFIX ||
487 (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
488 first = false;
491 * Handle special case: the TIMES prefix.
493 if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
494 expr *value;
496 i = stdscan(NULL, &tokval);
497 value = evaluate(stdscan, NULL, &tokval, NULL, pass_stable(), NULL);
498 i = tokval.t_type;
499 if (!value) /* Error in evaluator */
500 goto fail;
501 if (!is_simple(value)) {
502 nasm_nonfatal("non-constant argument supplied to TIMES");
503 result->times = 1L;
504 } else {
505 result->times = value->value;
506 if (value->value < 0) {
507 nasm_nonfatalf(ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
508 result->times = 0;
511 } else {
512 int slot = prefix_slot(tokval.t_integer);
513 if (result->prefixes[slot]) {
514 if (result->prefixes[slot] == tokval.t_integer)
515 nasm_warn(WARN_OTHER, "instruction has redundant prefixes");
516 else
517 nasm_nonfatal("instruction has conflicting prefixes");
519 result->prefixes[slot] = tokval.t_integer;
520 i = stdscan(NULL, &tokval);
524 if (i != TOKEN_INSN) {
525 int j;
526 enum prefixes pfx;
528 for (j = 0; j < MAXPREFIX; j++) {
529 if ((pfx = result->prefixes[j]) != P_none)
530 break;
533 if (i == 0 && pfx != P_none) {
535 * Instruction prefixes are present, but no actual
536 * instruction. This is allowed: at this point we
537 * invent a notional instruction of RESB 0.
539 result->opcode = I_RESB;
540 result->operands = 1;
541 nasm_zero(result->oprs);
542 result->oprs[0].type = IMMEDIATE;
543 result->oprs[0].offset = 0L;
544 result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
545 return result;
546 } else {
547 nasm_nonfatal("parser: instruction expected");
548 goto fail;
552 result->opcode = tokval.t_integer;
553 result->condition = tokval.t_inttwo;
556 * INCBIN cannot be satisfied with incorrectly
557 * evaluated operands, since the correct values _must_ be known
558 * on the first pass. Hence, even in pass one, we set the
559 * `critical' flag on calling evaluate(), so that it will bomb
560 * out on undefined symbols.
562 critical = pass_final() || (result->opcode == I_INCBIN);
564 if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
565 extop *eop, **tail = &result->eops, **fixptr;
566 int oper_num = 0;
567 int32_t sign;
569 result->eops_float = false;
572 * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands.
574 while (1) {
575 i = stdscan(NULL, &tokval);
576 if (i == TOKEN_EOS)
577 break;
578 else if (first && i == ':') {
579 insn_is_label = true;
580 goto restart_parse;
582 first = false;
583 fixptr = tail;
584 eop = *tail = nasm_malloc(sizeof(extop));
585 tail = &eop->next;
586 eop->next = NULL;
587 eop->type = EOT_NOTHING;
588 oper_num++;
589 sign = +1;
592 * is_comma_next() here is to distinguish this from
593 * a string used as part of an expression...
595 if (i == TOKEN_STR && is_comma_next()) {
596 eop->type = EOT_DB_STRING;
597 eop->stringval = tokval.t_charptr;
598 eop->stringlen = tokval.t_inttwo;
599 i = stdscan(NULL, &tokval); /* eat the comma */
600 } else if (i == TOKEN_STRFUNC) {
601 bool parens = false;
602 const char *funcname = tokval.t_charptr;
603 enum strfunc func = tokval.t_integer;
604 i = stdscan(NULL, &tokval);
605 if (i == '(') {
606 parens = true;
607 i = stdscan(NULL, &tokval);
609 if (i != TOKEN_STR) {
610 nasm_nonfatal("%s must be followed by a string constant",
611 funcname);
612 eop->type = EOT_NOTHING;
613 } else {
614 eop->type = EOT_DB_STRING_FREE;
615 eop->stringlen =
616 string_transform(tokval.t_charptr, tokval.t_inttwo,
617 &eop->stringval, func);
618 if (eop->stringlen == (size_t)-1) {
619 nasm_nonfatal("invalid input string to %s", funcname);
620 eop->type = EOT_NOTHING;
623 if (parens && i && i != ')') {
624 i = stdscan(NULL, &tokval);
625 if (i != ')')
626 nasm_nonfatal("unterminated %s function", funcname);
628 if (i && i != ',')
629 i = stdscan(NULL, &tokval);
630 } else if (i == '-' || i == '+') {
631 char *save = stdscan_get();
632 int token = i;
633 sign = (i == '-') ? -1 : 1;
634 i = stdscan(NULL, &tokval);
635 if (i != TOKEN_FLOAT) {
636 stdscan_set(save);
637 i = tokval.t_type = token;
638 goto is_expression;
639 } else {
640 goto is_float;
642 } else if (i == TOKEN_FLOAT) {
643 is_float:
644 eop->type = EOT_DB_STRING;
645 result->eops_float = true;
647 eop->stringlen = db_bytes(result->opcode);
648 if (eop->stringlen > 16) {
649 nasm_nonfatal("floating-point constant"
650 " encountered in DY or DZ instruction");
651 eop->stringlen = 0;
652 } else if (eop->stringlen < 1) {
653 nasm_nonfatal("floating-point constant"
654 " encountered in unknown instruction");
656 * fix suggested by Pedro Gimeno... original line was:
657 * eop->type = EOT_NOTHING;
659 eop->stringlen = 0;
662 eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
663 tail = &eop->next;
664 *fixptr = eop;
665 eop->stringval = (char *)eop + sizeof(extop);
666 if (!eop->stringlen ||
667 !float_const(tokval.t_charptr, sign,
668 (uint8_t *)eop->stringval, eop->stringlen))
669 eop->type = EOT_NOTHING;
670 i = stdscan(NULL, &tokval); /* eat the comma */
671 } else {
672 /* anything else, assume it is an expression */
673 expr *value;
675 is_expression:
676 value = evaluate(stdscan, NULL, &tokval, NULL,
677 critical, NULL);
678 i = tokval.t_type;
679 if (!value) /* Error in evaluator */
680 goto fail;
681 if (value_to_extop(value, eop, location.segment)) {
682 nasm_nonfatal("operand %d: expression is not simple or relocatable",
683 oper_num);
688 * We're about to call stdscan(), which will eat the
689 * comma that we're currently sitting on between
690 * arguments. However, we'd better check first that it
691 * _is_ a comma.
693 if (i == TOKEN_EOS) /* also could be EOL */
694 break;
695 if (i != ',') {
696 nasm_nonfatal("comma expected after operand %d", oper_num);
697 goto fail;
701 if (result->opcode == I_INCBIN) {
703 * Correct syntax for INCBIN is that there should be
704 * one string operand, followed by one or two numeric
705 * operands.
707 if (!result->eops || result->eops->type != EOT_DB_STRING)
708 nasm_nonfatal("`incbin' expects a file name");
709 else if (result->eops->next &&
710 result->eops->next->type != EOT_DB_NUMBER)
711 nasm_nonfatal("`incbin': second parameter is"
712 " non-numeric");
713 else if (result->eops->next && result->eops->next->next &&
714 result->eops->next->next->type != EOT_DB_NUMBER)
715 nasm_nonfatal("`incbin': third parameter is"
716 " non-numeric");
717 else if (result->eops->next && result->eops->next->next &&
718 result->eops->next->next->next)
719 nasm_nonfatal("`incbin': more than three parameters");
720 else
721 return result;
723 * If we reach here, one of the above errors happened.
724 * Throw the instruction away.
726 goto fail;
727 } else /* DB ... */ if (oper_num == 0)
728 nasm_warn(WARN_OTHER, "no operand for data declaration");
729 else
730 result->operands = oper_num;
732 return result;
736 * Now we begin to parse the operands. There may be up to four
737 * of these, separated by commas, and terminated by a zero token.
739 far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
741 for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
742 operand *op = &result->oprs[opnum];
743 expr *value; /* used most of the time */
744 bool mref = false; /* is this going to be a memory ref? */
745 int bracket = 0; /* is it a [] mref, or a "naked" mref? */
746 bool mib; /* compound (mib) mref? */
747 int setsize = 0;
748 decoflags_t brace_flags = 0; /* flags for decorators in braces */
750 init_operand(op);
752 i = stdscan(NULL, &tokval);
753 if (i == TOKEN_EOS)
754 break; /* end of operands: get out of here */
755 else if (first && i == ':') {
756 insn_is_label = true;
757 goto restart_parse;
759 first = false;
760 op->type = 0; /* so far, no override */
761 /* size specifiers */
762 while (i == TOKEN_SPECIAL || i == TOKEN_SIZE) {
763 switch (tokval.t_integer) {
764 case S_BYTE:
765 if (!setsize) /* we want to use only the first */
766 op->type |= BITS8;
767 setsize = 1;
768 break;
769 case S_WORD:
770 if (!setsize)
771 op->type |= BITS16;
772 setsize = 1;
773 break;
774 case S_DWORD:
775 case S_LONG:
776 if (!setsize)
777 op->type |= BITS32;
778 setsize = 1;
779 break;
780 case S_QWORD:
781 if (!setsize)
782 op->type |= BITS64;
783 setsize = 1;
784 break;
785 case S_TWORD:
786 if (!setsize)
787 op->type |= BITS80;
788 setsize = 1;
789 break;
790 case S_OWORD:
791 if (!setsize)
792 op->type |= BITS128;
793 setsize = 1;
794 break;
795 case S_YWORD:
796 if (!setsize)
797 op->type |= BITS256;
798 setsize = 1;
799 break;
800 case S_ZWORD:
801 if (!setsize)
802 op->type |= BITS512;
803 setsize = 1;
804 break;
805 case S_TO:
806 op->type |= TO;
807 break;
808 case S_STRICT:
809 op->type |= STRICT;
810 break;
811 case S_FAR:
812 op->type |= FAR;
813 break;
814 case S_NEAR:
815 op->type |= NEAR;
816 break;
817 case S_SHORT:
818 op->type |= SHORT;
819 break;
820 default:
821 nasm_nonfatal("invalid operand size specification");
823 i = stdscan(NULL, &tokval);
826 if (i == '[' || i == TOKEN_MASM_PTR || i == '&') {
827 /* memory reference */
828 mref = true;
829 bracket += (i == '[');
830 i = stdscan(NULL, &tokval);
833 mref_more:
834 if (mref) {
835 bool done = false;
836 bool nofw = false;
838 while (!done) {
839 switch (i) {
840 case TOKEN_SPECIAL:
841 case TOKEN_SIZE:
842 case TOKEN_PREFIX:
843 process_size_override(result, op);
844 break;
846 case '[':
847 bracket++;
848 break;
850 case ',':
851 tokval.t_type = TOKEN_NUM;
852 tokval.t_integer = 0;
853 stdscan_set(stdscan_get() - 1); /* rewind the comma */
854 done = nofw = true;
855 break;
857 case TOKEN_MASM_FLAT:
858 i = stdscan(NULL, &tokval);
859 if (i != ':') {
860 nasm_nonfatal("unknown use of FLAT in MASM emulation");
861 nofw = true;
863 done = true;
864 break;
866 default:
867 done = nofw = true;
868 break;
871 if (!nofw)
872 i = stdscan(NULL, &tokval);
876 value = evaluate(stdscan, NULL, &tokval,
877 &op->opflags, critical, &hints);
878 i = tokval.t_type;
879 if (op->opflags & OPFLAG_FORWARD) {
880 result->forw_ref = true;
882 if (!value) /* Error in evaluator */
883 goto fail;
885 if (i == '[' && !bracket) {
886 /* displacement[regs] syntax */
887 mref = true;
888 parse_mref(op, value); /* Process what we have so far */
889 goto mref_more;
892 if (i == ':' && (mref || !far_jmp_ok)) {
893 /* segment override? */
894 mref = true;
897 * Process the segment override.
899 if (value[1].type != 0 ||
900 value->value != 1 ||
901 !IS_SREG(value->type))
902 nasm_nonfatal("invalid segment override");
903 else if (result->prefixes[PPS_SEG])
904 nasm_nonfatal("instruction has conflicting segment overrides");
905 else {
906 result->prefixes[PPS_SEG] = value->type;
907 if (IS_FSGS(value->type))
908 op->eaflags |= EAF_FSGS;
911 i = stdscan(NULL, &tokval); /* then skip the colon */
912 goto mref_more;
915 mib = false;
916 if (mref && bracket && i == ',') {
917 /* [seg:base+offset,index*scale] syntax (mib) */
918 operand o2; /* Index operand */
920 if (parse_mref(op, value))
921 goto fail;
923 i = stdscan(NULL, &tokval); /* Eat comma */
924 value = evaluate(stdscan, NULL, &tokval, &op->opflags,
925 critical, &hints);
926 i = tokval.t_type;
927 if (!value)
928 goto fail;
930 init_operand(&o2);
931 if (parse_mref(&o2, value))
932 goto fail;
934 if (o2.basereg != -1 && o2.indexreg == -1) {
935 o2.indexreg = o2.basereg;
936 o2.scale = 1;
937 o2.basereg = -1;
940 if (op->indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
941 o2.segment != NO_SEG || o2.wrt != NO_SEG) {
942 nasm_nonfatal("invalid mib expression");
943 goto fail;
946 op->indexreg = o2.indexreg;
947 op->scale = o2.scale;
949 if (op->basereg != -1) {
950 op->hintbase = op->basereg;
951 op->hinttype = EAH_MAKEBASE;
952 } else if (op->indexreg != -1) {
953 op->hintbase = op->indexreg;
954 op->hinttype = EAH_NOTBASE;
955 } else {
956 op->hintbase = -1;
957 op->hinttype = EAH_NOHINT;
960 mib = true;
963 recover = false;
964 if (mref) {
965 if (bracket == 1) {
966 if (i == ']') {
967 bracket--;
968 i = stdscan(NULL, &tokval);
969 } else {
970 nasm_nonfatal("expecting ] at end of memory operand");
971 recover = true;
973 } else if (bracket == 0) {
974 /* Do nothing */
975 } else if (bracket > 0) {
976 nasm_nonfatal("excess brackets in memory operand");
977 recover = true;
978 } else if (bracket < 0) {
979 nasm_nonfatal("unmatched ] in memory operand");
980 recover = true;
983 if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
984 /* parse opmask (and zeroing) after an operand */
985 recover = parse_braces(&brace_flags);
986 i = tokval.t_type;
988 if (!recover && i != 0 && i != ',') {
989 nasm_nonfatal("comma, decorator or end of line expected, got %d", i);
990 recover = true;
992 } else { /* immediate operand */
993 if (i != 0 && i != ',' && i != ':' &&
994 i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
995 nasm_nonfatal("comma, colon, decorator or end of "
996 "line expected after operand");
997 recover = true;
998 } else if (i == ':') {
999 op->type |= COLON;
1000 } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
1001 /* parse opmask (and zeroing) after an operand */
1002 recover = parse_braces(&brace_flags);
1005 if (recover) {
1006 do { /* error recovery */
1007 i = stdscan(NULL, &tokval);
1008 } while (i != 0 && i != ',');
1012 * now convert the exprs returned from evaluate()
1013 * into operand descriptions...
1015 op->decoflags |= brace_flags;
1017 if (mref) { /* it's a memory reference */
1018 /* A mib reference was fully parsed already */
1019 if (!mib) {
1020 if (parse_mref(op, value))
1021 goto fail;
1022 op->hintbase = hints.base;
1023 op->hinttype = hints.type;
1025 mref_set_optype(op);
1026 } else if ((op->type & FAR) && !far_jmp_ok) {
1027 nasm_nonfatal("invalid use of FAR operand specifier");
1028 recover = true;
1029 } else { /* it's not a memory reference */
1030 if (is_just_unknown(value)) { /* it's immediate but unknown */
1031 op->type |= IMMEDIATE;
1032 op->opflags |= OPFLAG_UNKNOWN;
1033 op->offset = 0; /* don't care */
1034 op->segment = NO_SEG; /* don't care again */
1035 op->wrt = NO_SEG; /* still don't care */
1037 if(optimizing.level >= 0 && !(op->type & STRICT)) {
1038 /* Be optimistic */
1039 op->type |=
1040 UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD;
1042 } else if (is_reloc(value)) { /* it's immediate */
1043 uint64_t n = reloc_value(value);
1045 op->type |= IMMEDIATE;
1046 op->offset = n;
1047 op->segment = reloc_seg(value);
1048 op->wrt = reloc_wrt(value);
1049 op->opflags |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
1051 if (is_simple(value)) {
1052 if (n == 1)
1053 op->type |= UNITY;
1054 if (optimizing.level >= 0 && !(op->type & STRICT)) {
1055 if ((uint32_t) (n + 128) <= 255)
1056 op->type |= SBYTEDWORD;
1057 if ((uint16_t) (n + 128) <= 255)
1058 op->type |= SBYTEWORD;
1059 if (n <= UINT64_C(0xFFFFFFFF))
1060 op->type |= UDWORD;
1061 if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF))
1062 op->type |= SDWORD;
1065 } else if (value->type == EXPR_RDSAE) {
1067 * it's not an operand but a rounding or SAE decorator.
1068 * put the decorator information in the (opflag_t) type field
1069 * of previous operand.
1071 opnum--; op--;
1072 switch (value->value) {
1073 case BRC_RN:
1074 case BRC_RU:
1075 case BRC_RD:
1076 case BRC_RZ:
1077 case BRC_SAE:
1078 op->decoflags |= (value->value == BRC_SAE ? SAE : ER);
1079 result->evex_rm = value->value;
1080 break;
1081 default:
1082 nasm_nonfatal("invalid decorator");
1083 break;
1085 } else { /* it's a register */
1086 opflags_t rs;
1087 uint64_t regset_size = 0;
1089 if (value->type >= EXPR_SIMPLE || value->value != 1) {
1090 nasm_nonfatal("invalid operand type");
1091 goto fail;
1095 * We do not allow any kind of expression, except for
1096 * reg+value in which case it is a register set.
1098 for (i = 1; value[i].type; i++) {
1099 if (!value[i].value)
1100 continue;
1102 switch (value[i].type) {
1103 case EXPR_SIMPLE:
1104 if (!regset_size) {
1105 regset_size = value[i].value + 1;
1106 break;
1108 /* fallthrough */
1109 default:
1110 nasm_nonfatal("invalid operand type");
1111 goto fail;
1115 if ((regset_size & (regset_size - 1)) ||
1116 regset_size >= (UINT64_C(1) << REGSET_BITS)) {
1117 nasm_nonfatalf(ERR_PASS2, "invalid register set size");
1118 regset_size = 0;
1121 /* clear overrides, except TO which applies to FPU regs */
1122 if (op->type & ~TO) {
1124 * we want to produce a warning iff the specified size
1125 * is different from the register size
1127 rs = op->type & SIZE_MASK;
1128 } else {
1129 rs = 0;
1133 * Make sure we're not out of nasm_reg_flags, still
1134 * probably this should be fixed when we're defining
1135 * the label.
1137 * An easy trigger is
1139 * e equ 0x80000000:0
1140 * pshufw word e-0
1143 if (value->type < EXPR_REG_START ||
1144 value->type > EXPR_REG_END) {
1145 nasm_nonfatal("invalid operand type");
1146 goto fail;
1149 op->type &= TO;
1150 op->type |= REGISTER;
1151 op->type |= nasm_reg_flags[value->type];
1152 op->type |= (regset_size >> 1) << REGSET_SHIFT;
1153 op->decoflags |= brace_flags;
1154 op->basereg = value->type;
1156 if (rs) {
1157 opflags_t opsize = nasm_reg_flags[value->type] & SIZE_MASK;
1158 if (!opsize) {
1159 op->type |= rs; /* For non-size-specific registers, permit size override */
1160 } else if (opsize != rs) {
1162 *!regsize [on] register size specification ignored
1164 *! warns about a register with implicit size (such as \c{EAX}, which is always 32 bits)
1165 *! been given an explicit size specification which is inconsistent with the size
1166 *! of the named register, e.g. \c{WORD EAX}. \c{DWORD EAX} or \c{WORD AX} are
1167 *! permitted, and do not trigger this warning. Some registers which \e{do not} imply
1168 *! a specific size, such as \c{K0}, may need this specification unless the instruction
1169 *! itself implies the instruction size:
1171 *! \c KMOVW K0,[foo] ; Permitted, KMOVW implies 16 bits
1172 *! \c KMOV WORD K0,[foo] ; Permitted, WORD K0 specifies instruction size
1173 *! \c KMOV K0,WORD [foo] ; Permitted, WORD [foo] specifies instruction size
1174 *! \c KMOV K0,[foo] ; Not permitted, instruction size ambiguous
1176 nasm_warn(WARN_REGSIZE, "invalid register size specification ignored");
1182 /* remember the position of operand having broadcasting/ER mode */
1183 if (op->decoflags & (BRDCAST_MASK | ER | SAE))
1184 result->evex_brerop = opnum;
1187 result->operands = opnum; /* set operand count */
1189 /* clear remaining operands */
1190 while (opnum < MAX_OPERANDS)
1191 result->oprs[opnum++].type = 0;
1193 return result;
1195 fail:
1196 result->opcode = I_none;
1197 return result;
1200 static int is_comma_next(void)
1202 struct tokenval tv;
1203 char *p;
1204 int i;
1206 p = stdscan_get();
1207 i = stdscan(NULL, &tv);
1208 stdscan_set(p);
1210 return (i == ',' || i == ';' || !i);
1213 void cleanup_insn(insn * i)
1215 extop *e;
1217 while ((e = i->eops)) {
1218 i->eops = e->next;
1219 if (e->type == EOT_DB_STRING_FREE)
1220 nasm_free(e->stringval);
1221 nasm_free(e);