NASM 0.93
[nasm/avx512.git] / parser.c
bloba45bf0daaed185fffcee2d2b8383a62cd10fa93d
1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
9 */
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <stddef.h>
14 #include <string.h>
15 #include <ctype.h>
17 #include "nasm.h"
18 #include "nasmlib.h"
19 #include "parser.h"
20 #include "float.h"
22 #include "names.c"
25 static long reg_flags[] = { /* sizes and special flags */
26 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
27 REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
28 REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
29 REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
30 REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
31 MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
32 REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
33 FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
34 REG_TREG
37 enum { /* special tokens */
38 S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
39 S_TWORD, S_WORD
42 static char *special_names[] = { /* and the actual text */
43 "byte", "dword", "far", "long", "near", "qword", "short", "to",
44 "tword", "word"
47 static char *prefix_names[] = {
48 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
49 "repnz", "repz", "times"
53 * Evaluator datatype. Expressions, within the evaluator, are
54 * stored as an array of these beasts, terminated by a record with
55 * type==0. Mostly, it's a vector type: each type denotes some kind
56 * of a component, and the value denotes the multiple of that
57 * component present in the expression. The exception is the WRT
58 * type, whose `value' field denotes the segment to which the
59 * expression is relative. These segments will be segment-base
60 * types, i.e. either odd segment values or SEG_ABS types. So it is
61 * still valid to assume that anything with a `value' field of zero
62 * is insignificant.
64 typedef struct {
65 long type; /* a register, or EXPR_xxx */
66 long value; /* must be >= 32 bits */
67 } expr;
69 static void eval_reset(void);
70 static expr *evaluate(int);
73 * ASSUMPTION MADE HERE. The number of distinct register names
74 * (i.e. possible "type" fields for an expr structure) does not
75 * exceed 126.
77 #define EXPR_SIMPLE 126
78 #define EXPR_WRT 127
79 #define EXPR_SEGBASE 128
81 static int is_reloc(expr *);
82 static int is_simple(expr *);
83 static int is_really_simple (expr *);
84 static long reloc_value(expr *);
85 static long reloc_seg(expr *);
86 static long reloc_wrt(expr *);
88 enum { /* token types, other than chars */
89 TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
90 TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
91 TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
92 TOKEN_FLOAT
95 struct tokenval {
96 long t_integer, t_inttwo;
97 char *t_charptr;
100 static char tempstorage[1024], *q;
101 static int bsi (char *string, char **array, int size);/* binary search */
103 static int nexttoken (void);
104 static int is_comma_next (void);
106 static char *bufptr;
107 static int i;
108 static struct tokenval tokval;
109 static lfunc labelfunc;
110 static efunc error;
111 static char *label;
112 static struct ofmt *outfmt;
114 static long seg, ofs;
116 static int forward;
118 insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
119 char *buffer, insn *result, struct ofmt *output,
120 efunc errfunc) {
121 int operand;
122 int critical;
124 forward = result->forw_ref = FALSE;
125 q = tempstorage;
126 bufptr = buffer;
127 labelfunc = lookup_label;
128 outfmt = output;
129 error = errfunc;
130 seg = segment;
131 ofs = offset;
132 label = "";
134 i = nexttoken();
136 result->eops = NULL; /* must do this, whatever happens */
138 if (i==0) { /* blank line - ignore */
139 result->label = NULL; /* so, no label on it */
140 result->opcode = -1; /* and no instruction either */
141 return result;
143 if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
144 (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
145 error (ERR_NONFATAL, "label or instruction expected"
146 " at start of line");
147 result->label = NULL;
148 result->opcode = -1;
149 return result;
152 if (i == TOKEN_ID) { /* there's a label here */
153 label = result->label = tokval.t_charptr;
154 i = nexttoken();
155 if (i == ':') { /* skip over the optional colon */
156 i = nexttoken();
158 } else /* no label; so, moving swiftly on */
159 result->label = NULL;
161 if (i==0) {
162 result->opcode = -1; /* this line contains just a label */
163 return result;
166 result->nprefix = 0;
167 result->times = 1;
169 while (i == TOKEN_PREFIX ||
170 (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
172 * Handle special case: the TIMES prefix.
174 if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
175 expr *value;
177 i = nexttoken();
178 eval_reset();
179 value = evaluate (pass);
180 if (!value) { /* but, error in evaluator */
181 result->opcode = -1; /* unrecoverable parse error: */
182 return result; /* ignore this instruction */
184 if (!is_simple (value)) {
185 error (ERR_NONFATAL,
186 "non-constant argument supplied to TIMES");
187 result->times = 1;
188 } else
189 result->times = value->value;
190 } else {
191 if (result->nprefix == MAXPREFIX)
192 error (ERR_NONFATAL,
193 "instruction has more than %d prefixes", MAXPREFIX);
194 else
195 result->prefixes[result->nprefix++] = tokval.t_integer;
196 i = nexttoken();
200 if (i != TOKEN_INSN) {
201 error (ERR_NONFATAL, "parser: instruction expected");
202 result->opcode = -1;
203 return result;
206 result->opcode = tokval.t_integer;
207 result->condition = tokval.t_inttwo;
210 * RESB, RESW and RESD cannot be satisfied with incorrectly
211 * evaluated operands, since the correct values _must_ be known
212 * on the first pass. Hence, even in pass one, we set the
213 * `critical' flag on calling evaluate(), so that it will bomb
214 * out on undefined symbols. Nasty, but there's nothing we can
215 * do about it.
217 * For the moment, EQU has the same difficulty, so we'll
218 * include that.
220 if (result->opcode == I_RESB ||
221 result->opcode == I_RESW ||
222 result->opcode == I_RESD ||
223 result->opcode == I_RESQ ||
224 result->opcode == I_REST ||
225 result->opcode == I_EQU)
226 critical = pass;
227 else
228 critical = (pass==2 ? 2 : 0);
230 if (result->opcode == I_DB ||
231 result->opcode == I_DW ||
232 result->opcode == I_DD ||
233 result->opcode == I_DQ ||
234 result->opcode == I_DT) {
235 extop *eop, **tail = &result->eops;
236 int oper_num = 0;
239 * Begin to read the DB/DW/DD/DQ/DT operands.
241 while (1) {
242 i = nexttoken();
243 if (i == 0)
244 break;
245 eop = *tail = nasm_malloc(sizeof(extop));
246 tail = &eop->next;
247 eop->next = NULL;
248 eop->type = EOT_NOTHING;
249 oper_num++;
251 if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
252 eop->type = EOT_DB_STRING;
253 eop->stringval = tokval.t_charptr;
254 eop->stringlen = tokval.t_inttwo;
255 i = nexttoken(); /* eat the comma */
256 continue;
259 if (i == TOKEN_FLOAT || i == '-') {
260 long sign = +1L;
262 if (i == '-') {
263 char *save = bufptr;
264 i = nexttoken();
265 sign = -1L;
266 if (i != TOKEN_FLOAT) {
267 bufptr = save;
268 i = '-';
272 if (i == TOKEN_FLOAT) {
273 eop->type = EOT_DB_STRING;
274 eop->stringval = q;
275 if (result->opcode == I_DD)
276 eop->stringlen = 4;
277 else if (result->opcode == I_DQ)
278 eop->stringlen = 8;
279 else if (result->opcode == I_DT)
280 eop->stringlen = 10;
281 else {
282 error(ERR_NONFATAL, "floating-point constant"
283 " encountered in `D%c' instruction",
284 result->opcode == I_DW ? 'W' : 'B');
285 eop->type = EOT_NOTHING;
287 q += eop->stringlen;
288 if (!float_const (tokval.t_charptr, sign,
289 (unsigned char *)eop->stringval,
290 eop->stringlen, error))
291 eop->type = EOT_NOTHING;
292 i = nexttoken(); /* eat the comma */
293 continue;
297 /* anything else */ {
298 expr *value;
299 eval_reset();
300 value = evaluate (critical);
301 if (!value) { /* but, error in evaluator */
302 result->opcode = -1;/* unrecoverable parse error: */
303 return result; /* ignore this instruction */
305 if (is_reloc(value)) {
306 eop->type = EOT_DB_NUMBER;
307 eop->offset = reloc_value(value);
308 eop->segment = reloc_seg(value);
309 eop->wrt = reloc_wrt(value);
310 } else {
311 error (ERR_NONFATAL,
312 "`%s' operand %d: expression is not simple"
313 " or relocatable",
314 insn_names[result->opcode], oper_num);
318 return result;
321 /* right. Now we begin to parse the operands. There may be up to three
322 * of these, separated by commas, and terminated by a zero token. */
324 for (operand = 0; operand < 3; operand++) {
325 expr *seg, *value; /* used most of the time */
326 int mref; /* is this going to be a memory ref? */
328 result->oprs[operand].addr_size = 0;/* have to zero this whatever */
329 i = nexttoken();
330 if (i == 0) break; /* end of operands: get out of here */
331 result->oprs[operand].type = 0; /* so far, no override */
332 while (i == TOKEN_SPECIAL) {/* size specifiers */
333 switch ((int)tokval.t_integer) {
334 case S_BYTE:
335 result->oprs[operand].type |= BITS8;
336 break;
337 case S_WORD:
338 result->oprs[operand].type |= BITS16;
339 break;
340 case S_DWORD:
341 case S_LONG:
342 result->oprs[operand].type |= BITS32;
343 break;
344 case S_QWORD:
345 result->oprs[operand].type |= BITS64;
346 break;
347 case S_TWORD:
348 result->oprs[operand].type |= BITS80;
349 break;
350 case S_TO:
351 result->oprs[operand].type |= TO;
352 break;
353 case S_FAR:
354 result->oprs[operand].type |= FAR;
355 break;
356 case S_NEAR:
357 result->oprs[operand].type |= NEAR;
358 break;
359 case S_SHORT:
360 result->oprs[operand].type |= SHORT;
361 break;
363 i = nexttoken();
366 if (i == '[') { /* memory reference */
367 i = nexttoken();
368 mref = TRUE;
369 if (i == TOKEN_SPECIAL) { /* check for address size override */
370 switch ((int)tokval.t_integer) {
371 case S_WORD:
372 result->oprs[operand].addr_size = 16;
373 break;
374 case S_DWORD:
375 case S_LONG:
376 result->oprs[operand].addr_size = 32;
377 break;
378 default:
379 error (ERR_NONFATAL, "invalid size specification in"
380 " effective address");
382 i = nexttoken();
384 } else /* immediate operand, or register */
385 mref = FALSE;
387 eval_reset();
389 value = evaluate (critical);
390 if (forward)
391 result->forw_ref = TRUE;
392 if (!value) { /* error in evaluator */
393 result->opcode = -1; /* unrecoverable parse error: */
394 return result; /* ignore this instruction */
396 if (i == ':' && mref) { /* it was seg:offset */
397 seg = value; /* so shift this into the segment */
398 i = nexttoken(); /* then skip the colon */
399 if (i == TOKEN_SPECIAL) { /* another check for size override */
400 switch ((int)tokval.t_integer) {
401 case S_WORD:
402 result->oprs[operand].addr_size = 16;
403 break;
404 case S_DWORD:
405 case S_LONG:
406 result->oprs[operand].addr_size = 32;
407 break;
408 default:
409 error (ERR_NONFATAL, "invalid size specification in"
410 " effective address");
412 i = nexttoken();
414 value = evaluate (critical);
415 if (forward)
416 result->forw_ref = TRUE;
417 /* and get the offset */
418 if (!value) { /* but, error in evaluator */
419 result->opcode = -1; /* unrecoverable parse error: */
420 return result; /* ignore this instruction */
422 } else seg = NULL;
423 if (mref) { /* find ] at the end */
424 if (i != ']') {
425 error (ERR_NONFATAL, "parser: expecting ]");
426 do { /* error recovery again */
427 i = nexttoken();
428 } while (i != 0 && i != ',');
429 } else /* we got the required ] */
430 i = nexttoken();
431 } else { /* immediate operand */
432 if (i != 0 && i != ',' && i != ':') {
433 error (ERR_NONFATAL, "comma or end of line expected");
434 do { /* error recovery */
435 i = nexttoken();
436 } while (i != 0 && i != ',');
437 } else if (i == ':') {
438 result->oprs[operand].type |= COLON;
442 /* now convert the exprs returned from evaluate() into operand
443 * descriptions... */
445 if (mref) { /* it's a memory reference */
446 expr *e = value;
447 int b, i, s; /* basereg, indexreg, scale */
448 long o; /* offset */
450 if (seg) { /* segment override */
451 if (seg[1].type!=0 || seg->value!=1 ||
452 REG_SREG & ~reg_flags[seg->type])
453 error (ERR_NONFATAL, "invalid segment override");
454 else if (result->nprefix == MAXPREFIX)
455 error (ERR_NONFATAL,
456 "instruction has more than %d prefixes",
457 MAXPREFIX);
458 else
459 result->prefixes[result->nprefix++] = seg->type;
462 b = i = -1, o = s = 0;
464 if (e->type < EXPR_SIMPLE) { /* this bit's a register */
465 if (e->value == 1) /* in fact it can be basereg */
466 b = e->type;
467 else /* no, it has to be indexreg */
468 i = e->type, s = e->value;
469 e++;
471 if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
472 if (e->value != 1) { /* it has to be indexreg */
473 if (i != -1) { /* but it can't be */
474 error(ERR_NONFATAL, "invalid effective address");
475 result->opcode = -1;
476 return result;
477 } else
478 i = e->type, s = e->value;
479 } else { /* it can be basereg */
480 if (b != -1) /* or can it? */
481 i = e->type, s = 1;
482 else
483 b = e->type;
485 e++;
487 if (e->type != 0) { /* is there an offset? */
488 if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
489 error (ERR_NONFATAL, "invalid effective address");
490 result->opcode = -1;
491 return result;
492 } else {
493 if (e->type == EXPR_SIMPLE) {
494 o = e->value;
495 e++;
497 if (e->type == EXPR_WRT) {
498 result->oprs[operand].wrt = e->value;
499 e++;
500 } else
501 result->oprs[operand].wrt = NO_SEG;
503 * Look for a segment base type.
505 if (e->type && e->type < EXPR_SEGBASE) {
506 error (ERR_NONFATAL, "invalid effective address");
507 result->opcode = -1;
508 return result;
510 while (e->type && e->value == 0)
511 e++;
512 if (e->type && e->value != 1) {
513 error (ERR_NONFATAL, "invalid effective address");
514 result->opcode = -1;
515 return result;
517 if (e->type) {
518 result->oprs[operand].segment = e->type-EXPR_SEGBASE;
519 e++;
520 } else
521 result->oprs[operand].segment = NO_SEG;
522 while (e->type && e->value == 0)
523 e++;
524 if (e->type) {
525 error (ERR_NONFATAL, "invalid effective address");
526 result->opcode = -1;
527 return result;
530 } else {
531 o = 0;
532 result->oprs[operand].wrt = NO_SEG;
533 result->oprs[operand].segment = NO_SEG;
536 if (e->type != 0) { /* there'd better be nothing left! */
537 error (ERR_NONFATAL, "invalid effective address");
538 result->opcode = -1;
539 return result;
542 result->oprs[operand].type |= MEMORY;
543 if (b==-1 && (i==-1 || s==0))
544 result->oprs[operand].type |= MEM_OFFS;
545 result->oprs[operand].basereg = b;
546 result->oprs[operand].indexreg = i;
547 result->oprs[operand].scale = s;
548 result->oprs[operand].offset = o;
549 } else { /* it's not a memory reference */
550 if (is_reloc(value)) { /* it's immediate */
551 result->oprs[operand].type |= IMMEDIATE;
552 result->oprs[operand].offset = reloc_value(value);
553 result->oprs[operand].segment = reloc_seg(value);
554 result->oprs[operand].wrt = reloc_wrt(value);
555 if (is_simple(value) && reloc_value(value)==1)
556 result->oprs[operand].type |= UNITY;
557 } else { /* it's a register */
558 if (value->type>=EXPR_SIMPLE || value->value!=1) {
559 error (ERR_NONFATAL, "invalid operand type");
560 result->opcode = -1;
561 return result;
563 /* clear overrides, except TO which applies to FPU regs */
564 result->oprs[operand].type &= TO;
565 result->oprs[operand].type |= REGISTER;
566 result->oprs[operand].type |= reg_flags[value->type];
567 result->oprs[operand].basereg = value->type;
572 result->operands = operand; /* set operand count */
574 while (operand<3) /* clear remaining operands */
575 result->oprs[operand++].type = 0;
578 * Transform RESW, RESD, RESQ, REST into RESB.
580 switch (result->opcode) {
581 case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
582 case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
583 case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
584 case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
587 return result;
590 static int is_comma_next (void) {
591 char *p;
593 p = bufptr;
594 while (isspace(*p)) p++;
595 return (*p == ',' || *p == ';' || !*p);
598 /* isidstart matches any character that may start an identifier, and isidchar
599 * matches any character that may appear at places other than the start of an
600 * identifier. E.g. a period may only appear at the start of an identifier
601 * (for local labels), whereas a number may appear anywhere *but* at the
602 * start. */
604 #define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' )
605 #define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \
606 || (c)=='@' || (c)=='~' )
608 /* Ditto for numeric constants. */
610 #define isnumstart(c) ( isdigit(c) || (c)=='$' )
611 #define isnumchar(c) ( isalnum(c) )
613 /* This returns the numeric value of a given 'digit'. */
615 #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
618 * This tokeniser routine has only one side effect, that of
619 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
620 * performed.
623 static int nexttoken (void) {
624 char ourcopy[256], *r, *s;
626 while (isspace(*bufptr)) bufptr++;
627 if (!*bufptr) return 0;
629 /* we have a token; either an id, a number or a char */
630 if (isidstart(*bufptr) ||
631 (*bufptr == '$' && isidstart(bufptr[1]))) {
632 /* now we've got an identifier */
633 int i;
634 int is_sym = FALSE;
636 if (*bufptr == '$') {
637 is_sym = TRUE;
638 bufptr++;
641 tokval.t_charptr = q;
642 *q++ = *bufptr++;
643 while (isidchar(*bufptr)) *q++ = *bufptr++;
644 *q++ = '\0';
645 for (s=tokval.t_charptr, r=ourcopy; *s; s++)
646 *r++ = tolower (*s);
647 *r = '\0';
648 if (is_sym)
649 return TOKEN_ID; /* bypass all other checks */
650 /* right, so we have an identifier sitting in temp storage. now,
651 * is it actually a register or instruction name, or what? */
652 if ((tokval.t_integer=bsi(ourcopy, reg_names,
653 elements(reg_names)))>=0)
654 return TOKEN_REG;
655 if ((tokval.t_integer=bsi(ourcopy, insn_names,
656 elements(insn_names)))>=0)
657 return TOKEN_INSN;
658 for (i=0; i<elements(icn); i++)
659 if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
660 char *p = ourcopy + strlen(icn[i]);
661 tokval.t_integer = ico[i];
662 if ((tokval.t_inttwo=bsi(p, conditions,
663 elements(conditions)))>=0)
664 return TOKEN_INSN;
666 if ((tokval.t_integer=bsi(ourcopy, prefix_names,
667 elements(prefix_names)))>=0) {
668 tokval.t_integer += PREFIX_ENUM_START;
669 return TOKEN_PREFIX;
671 if ((tokval.t_integer=bsi(ourcopy, special_names,
672 elements(special_names)))>=0)
673 return TOKEN_SPECIAL;
674 if (!strcmp(ourcopy, "seg"))
675 return TOKEN_SEG;
676 if (!strcmp(ourcopy, "wrt"))
677 return TOKEN_WRT;
678 return TOKEN_ID;
679 } else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
681 * It's a $ sign with no following hex number; this must
682 * mean it's a Here token ($), evaluating to the current
683 * assembly location, or a Base token ($$), evaluating to
684 * the base of the current segment.
686 bufptr++;
687 if (*bufptr == '$') {
688 bufptr++;
689 return TOKEN_BASE;
691 return TOKEN_HERE;
692 } else if (isnumstart(*bufptr)) { /* now we've got a number */
693 char *r = q;
694 int rn_error;
696 *q++ = *bufptr++;
697 while (isnumchar(*bufptr)) {
698 *q++ = *bufptr++;
700 if (*bufptr == '.') {
702 * a floating point constant
704 *q++ = *bufptr++;
705 while (isnumchar(*bufptr)) {
706 *q++ = *bufptr++;
708 *q++ = '\0';
709 tokval.t_charptr = r;
710 return TOKEN_FLOAT;
712 *q++ = '\0';
713 tokval.t_integer = readnum(r, &rn_error);
714 if (rn_error)
715 return TOKEN_ERRNUM; /* some malformation occurred */
716 tokval.t_charptr = NULL;
717 return TOKEN_NUM;
718 } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */
719 char quote = *bufptr++, *r;
720 r = tokval.t_charptr = bufptr;
721 while (*bufptr && *bufptr != quote) bufptr++;
722 tokval.t_inttwo = bufptr - r; /* store full version */
723 if (!*bufptr)
724 return TOKEN_ERRNUM; /* unmatched quotes */
725 tokval.t_integer = 0;
726 r = bufptr++; /* skip over final quote */
727 while (quote != *--r) {
728 tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
730 return TOKEN_NUM;
731 } else if (*bufptr == ';') { /* a comment has happened - stay */
732 return 0;
733 } else if ((*bufptr == '>' || *bufptr == '<' ||
734 *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) {
735 bufptr += 2;
736 return (bufptr[-2] == '>' ? TOKEN_SHR :
737 bufptr[-2] == '<' ? TOKEN_SHL :
738 bufptr[-2] == '/' ? TOKEN_SDIV :
739 TOKEN_SMOD);
740 } else /* just an ordinary char */
741 return (unsigned char) (*bufptr++);
744 /* return index of "string" in "array", or -1 if no match. */
745 static int bsi (char *string, char **array, int size) {
746 int i = -1, j = size; /* always, i < index < j */
747 while (j-i >= 2) {
748 int k = (i+j)/2;
749 int l = strcmp(string, array[k]);
750 if (l<0) /* it's in the first half */
751 j = k;
752 else if (l>0) /* it's in the second half */
753 i = k;
754 else /* we've got it :) */
755 return k;
757 return -1; /* we haven't got it :( */
760 void cleanup_insn (insn *i) {
761 extop *e;
763 while (i->eops) {
764 e = i->eops;
765 i->eops = i->eops->next;
766 nasm_free (e);
770 /* ------------- Evaluator begins here ------------------ */
772 static expr exprtempstorage[1024], *tempptr; /* store exprs in here */
775 * Add two vector datatypes. We have some bizarre behaviour on far-
776 * absolute segment types: we preserve them during addition _only_
777 * if one of the segments is a truly pure scalar.
779 static expr *add_vectors(expr *p, expr *q) {
780 expr *r = tempptr;
781 int preserve;
783 preserve = is_really_simple(p) || is_really_simple(q);
785 while (p->type && q->type &&
786 p->type < EXPR_SEGBASE+SEG_ABS &&
787 q->type < EXPR_SEGBASE+SEG_ABS)
788 if (p->type > q->type) {
789 tempptr->type = q->type;
790 tempptr->value = q->value;
791 tempptr++, q++;
792 } else if (p->type < q->type) {
793 tempptr->type = p->type;
794 tempptr->value = p->value;
795 tempptr++, p++;
796 } else { /* *p and *q have same type */
797 tempptr->type = p->type;
798 tempptr->value = p->value + q->value;
799 tempptr++, p++, q++;
801 while (p->type &&
802 (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) {
803 tempptr->type = p->type;
804 tempptr->value = p->value;
805 tempptr++, p++;
807 while (q->type &&
808 (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) {
809 tempptr->type = q->type;
810 tempptr->value = q->value;
811 tempptr++, q++;
813 (tempptr++)->type = 0;
815 return r;
819 * Multiply a vector by a scalar. Strip far-absolute segment part
820 * if present.
822 static expr *scalar_mult(expr *vect, long scalar) {
823 expr *p = vect;
825 while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
826 p->value = scalar * (p->value);
827 p++;
829 p->type = 0;
831 return vect;
834 static expr *scalarvect (long scalar) {
835 expr *p = tempptr;
836 tempptr->type = EXPR_SIMPLE;
837 tempptr->value = scalar;
838 tempptr++;
839 tempptr->type = 0;
840 tempptr++;
841 return p;
845 * Return TRUE if the argument is a simple scalar. (Or a far-
846 * absolute, which counts.)
848 static int is_simple (expr *vect) {
849 while (vect->type && !vect->value)
850 vect++;
851 if (!vect->type)
852 return 1;
853 if (vect->type != EXPR_SIMPLE)
854 return 0;
855 do {
856 vect++;
857 } while (vect->type && !vect->value);
858 if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
859 return 1;
863 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
864 * absolute.
866 static int is_really_simple (expr *vect) {
867 while (vect->type && !vect->value)
868 vect++;
869 if (!vect->type)
870 return 1;
871 if (vect->type != EXPR_SIMPLE)
872 return 0;
873 do {
874 vect++;
875 } while (vect->type && !vect->value);
876 if (vect->type) return 0;
877 return 1;
881 * Return TRUE if the argument is relocatable (i.e. a simple
882 * scalar, plus at most one segment-base, plus possibly a WRT).
884 static int is_reloc (expr *vect) {
885 while (vect->type && !vect->value)
886 vect++;
887 if (!vect->type)
888 return 1;
889 if (vect->type < EXPR_SIMPLE)
890 return 0;
891 if (vect->type == EXPR_SIMPLE) {
892 do {
893 vect++;
894 } while (vect->type && !vect->value);
895 if (!vect->type)
896 return 1;
898 do {
899 vect++;
900 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
901 if (!vect->type)
902 return 1;
903 return 1;
907 * Return the scalar part of a relocatable vector. (Including
908 * simple scalar vectors - those qualify as relocatable.)
910 static long reloc_value (expr *vect) {
911 while (vect->type && !vect->value)
912 vect++;
913 if (!vect->type) return 0;
914 if (vect->type == EXPR_SIMPLE)
915 return vect->value;
916 else
917 return 0;
921 * Return the segment number of a relocatable vector, or NO_SEG for
922 * simple scalars.
924 static long reloc_seg (expr *vect) {
925 while (vect->type && (vect->type == EXPR_WRT || !vect->value))
926 vect++;
927 if (vect->type == EXPR_SIMPLE) {
928 do {
929 vect++;
930 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
932 if (!vect->type)
933 return NO_SEG;
934 else
935 return vect->type - EXPR_SEGBASE;
939 * Return the WRT segment number of a relocatable vector, or NO_SEG
940 * if no WRT part is present.
942 static long reloc_wrt (expr *vect) {
943 while (vect->type && vect->type < EXPR_WRT)
944 vect++;
945 if (vect->type == EXPR_WRT) {
946 return vect->value;
947 } else
948 return NO_SEG;
951 static void eval_reset(void) {
952 tempptr = exprtempstorage; /* initialise temporary storage */
956 * The SEG operator: calculate the segment part of a relocatable
957 * value. Return NULL, as usual, if an error occurs. Report the
958 * error too.
960 static expr *segment_part (expr *e) {
961 long seg;
963 if (!is_reloc(e)) {
964 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
965 return NULL;
968 seg = reloc_seg(e);
969 if (seg == NO_SEG) {
970 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
971 return NULL;
972 } else if (seg & SEG_ABS)
973 return scalarvect(seg & ~SEG_ABS);
974 else {
975 expr *f = tempptr++;
976 tempptr++->type = 0;
977 f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
978 f->value = 1;
979 return f;
984 * Recursive-descent parser. Called with a single boolean operand,
985 * which is TRUE if the evaluation is critical (i.e. unresolved
986 * symbols are an error condition). Must update the global `i' to
987 * reflect the token after the parsed string. May return NULL.
989 * evaluate() should report its own errors: on return it is assumed
990 * that if NULL has been returned, the error has already been
991 * reported.
995 * Grammar parsed is:
997 * expr : expr0 [ WRT expr6 ]
998 * expr0 : expr1 [ {|} expr1]
999 * expr1 : expr2 [ {^} expr2]
1000 * expr2 : expr3 [ {&} expr3]
1001 * expr3 : expr4 [ {<<,>>} expr4...]
1002 * expr4 : expr5 [ {+,-} expr5...]
1003 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
1004 * expr6 : { ~,+,-,SEG } expr6
1005 * | (expr0)
1006 * | symbol
1007 * | $
1008 * | number
1011 static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
1012 static expr *expr4(int), *expr5(int), *expr6(int);
1014 static expr *expr0(int critical) {
1015 expr *e, *f;
1017 e = expr1(critical);
1018 if (!e)
1019 return NULL;
1020 while (i == '|') {
1021 i = nexttoken();
1022 f = expr1(critical);
1023 if (!f)
1024 return NULL;
1025 if (!is_simple(e) || !is_simple(f)) {
1026 error(ERR_NONFATAL, "`|' operator may only be applied to"
1027 " scalar values");
1029 e = scalarvect (reloc_value(e) | reloc_value(f));
1031 return e;
1034 static expr *expr1(int critical) {
1035 expr *e, *f;
1037 e = expr2(critical);
1038 if (!e)
1039 return NULL;
1040 while (i == '^') {
1041 i = nexttoken();
1042 f = expr2(critical);
1043 if (!f)
1044 return NULL;
1045 if (!is_simple(e) || !is_simple(f)) {
1046 error(ERR_NONFATAL, "`^' operator may only be applied to"
1047 " scalar values");
1049 e = scalarvect (reloc_value(e) ^ reloc_value(f));
1051 return e;
1054 static expr *expr2(int critical) {
1055 expr *e, *f;
1057 e = expr3(critical);
1058 if (!e)
1059 return NULL;
1060 while (i == '&') {
1061 i = nexttoken();
1062 f = expr3(critical);
1063 if (!f)
1064 return NULL;
1065 if (!is_simple(e) || !is_simple(f)) {
1066 error(ERR_NONFATAL, "`&' operator may only be applied to"
1067 " scalar values");
1069 e = scalarvect (reloc_value(e) & reloc_value(f));
1071 return e;
1074 static expr *expr3(int critical) {
1075 expr *e, *f;
1077 e = expr4(critical);
1078 if (!e)
1079 return NULL;
1080 while (i == TOKEN_SHL || i == TOKEN_SHR) {
1081 int j = i;
1082 i = nexttoken();
1083 f = expr4(critical);
1084 if (!f)
1085 return NULL;
1086 if (!is_simple(e) || !is_simple(f)) {
1087 error(ERR_NONFATAL, "shift operator may only be applied to"
1088 " scalar values");
1090 switch (j) {
1091 case TOKEN_SHL:
1092 e = scalarvect (reloc_value(e) << reloc_value(f));
1093 break;
1094 case TOKEN_SHR:
1095 e = scalarvect (((unsigned long)reloc_value(e)) >>
1096 reloc_value(f));
1097 break;
1100 return e;
1103 static expr *expr4(int critical) {
1104 expr *e, *f;
1106 e = expr5(critical);
1107 if (!e)
1108 return NULL;
1109 while (i == '+' || i == '-') {
1110 int j = i;
1111 i = nexttoken();
1112 f = expr5(critical);
1113 if (!f)
1114 return NULL;
1115 switch (j) {
1116 case '+':
1117 e = add_vectors (e, f);
1118 break;
1119 case '-':
1120 e = add_vectors (e, scalar_mult(f, -1L));
1121 break;
1124 return e;
1127 static expr *expr5(int critical) {
1128 expr *e, *f;
1130 e = expr6(critical);
1131 if (!e)
1132 return NULL;
1133 while (i == '*' || i == '/' || i == '*' ||
1134 i == TOKEN_SDIV || i == TOKEN_SMOD) {
1135 int j = i;
1136 i = nexttoken();
1137 f = expr6(critical);
1138 if (!f)
1139 return NULL;
1140 if (j != '*' && (!is_simple(e) || !is_simple(f))) {
1141 error(ERR_NONFATAL, "division operator may only be applied to"
1142 " scalar values");
1143 return NULL;
1145 if (j != '*' && reloc_value(f) == 0) {
1146 error(ERR_NONFATAL, "division by zero");
1147 return NULL;
1149 switch (j) {
1150 case '*':
1151 if (is_simple(e))
1152 e = scalar_mult (f, reloc_value(e));
1153 else if (is_simple(f))
1154 e = scalar_mult (e, reloc_value(f));
1155 else {
1156 error(ERR_NONFATAL, "unable to multiply two "
1157 "non-scalar objects");
1158 return NULL;
1160 break;
1161 case '/':
1162 e = scalarvect (((unsigned long)reloc_value(e)) /
1163 ((unsigned long)reloc_value(f)));
1164 break;
1165 case '%':
1166 e = scalarvect (((unsigned long)reloc_value(e)) %
1167 ((unsigned long)reloc_value(f)));
1168 break;
1169 case TOKEN_SDIV:
1170 e = scalarvect (((signed long)reloc_value(e)) /
1171 ((signed long)reloc_value(f)));
1172 break;
1173 case TOKEN_SMOD:
1174 e = scalarvect (((signed long)reloc_value(e)) %
1175 ((signed long)reloc_value(f)));
1176 break;
1179 return e;
1182 static expr *expr6(int critical) {
1183 expr *e;
1184 long label_seg, label_ofs;
1186 if (i == '-') {
1187 i = nexttoken();
1188 e = expr6(critical);
1189 if (!e)
1190 return NULL;
1191 return scalar_mult (e, -1L);
1192 } else if (i == '+') {
1193 i = nexttoken();
1194 return expr6(critical);
1195 } else if (i == '~') {
1196 i = nexttoken();
1197 e = expr6(critical);
1198 if (!e)
1199 return NULL;
1200 if (!is_simple(e)) {
1201 error(ERR_NONFATAL, "`~' operator may only be applied to"
1202 " scalar values");
1203 return NULL;
1205 return scalarvect(~reloc_value(e));
1206 } else if (i == TOKEN_SEG) {
1207 i = nexttoken();
1208 e = expr6(critical);
1209 if (!e)
1210 return NULL;
1211 return segment_part(e);
1212 } else if (i == '(') {
1213 i = nexttoken();
1214 e = expr0(critical);
1215 if (!e)
1216 return NULL;
1217 if (i != ')') {
1218 error(ERR_NONFATAL, "expecting `)'");
1219 return NULL;
1221 i = nexttoken();
1222 return e;
1223 } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID ||
1224 i == TOKEN_HERE || i == TOKEN_BASE) {
1225 e = tempptr;
1226 switch (i) {
1227 case TOKEN_NUM:
1228 e->type = EXPR_SIMPLE;
1229 e->value = tokval.t_integer;
1230 break;
1231 case TOKEN_REG:
1232 e->type = tokval.t_integer;
1233 e->value = 1;
1234 break;
1235 case TOKEN_ID:
1236 case TOKEN_HERE:
1237 case TOKEN_BASE:
1239 * Since the whole line is parsed before the label it
1240 * defines is given to the label manager, we have
1241 * problems with lines such as
1243 * end: TIMES 512-(end-start) DB 0
1245 * where `end' is not known on pass one, despite not
1246 * really being a forward reference, and due to
1247 * criticality it is _needed_. Hence we check our label
1248 * against the currently defined one, and do our own
1249 * resolution of it if we have to.
1251 if (i == TOKEN_BASE) {
1252 label_seg = seg;
1253 label_ofs = 0;
1254 } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) {
1255 label_seg = seg;
1256 label_ofs = ofs;
1257 } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
1258 if (critical == 2) {
1259 error (ERR_NONFATAL, "symbol `%s' undefined",
1260 tokval.t_charptr);
1261 return NULL;
1262 } else if (critical == 1) {
1263 error (ERR_NONFATAL, "symbol `%s' not defined before use",
1264 tokval.t_charptr);
1265 return NULL;
1266 } else {
1267 forward = TRUE;
1268 label_seg = seg;
1269 label_ofs = ofs;
1272 e->type = EXPR_SIMPLE;
1273 e->value = label_ofs;
1274 if (label_seg!=NO_SEG) {
1275 tempptr++;
1276 tempptr->type = EXPR_SEGBASE + label_seg;
1277 tempptr->value = 1;
1279 break;
1281 tempptr++;
1282 tempptr->type = 0;
1283 tempptr++;
1284 i = nexttoken();
1285 return e;
1286 } else {
1287 error(ERR_NONFATAL, "expression syntax error");
1288 return NULL;
1292 static expr *evaluate (int critical) {
1293 expr *e;
1294 expr *f = NULL;
1296 e = expr0 (critical);
1297 if (!e)
1298 return NULL;
1300 if (i == TOKEN_WRT) {
1301 if (!is_reloc(e)) {
1302 error(ERR_NONFATAL, "invalid left-hand operand to WRT");
1303 return NULL;
1305 i = nexttoken(); /* eat the WRT */
1306 f = expr6 (critical);
1307 if (!f)
1308 return NULL;
1310 e = scalar_mult (e, 1L); /* strip far-absolute segment part */
1311 if (f) {
1312 expr *g = tempptr++;
1313 tempptr++->type = 0;
1314 g->type = EXPR_WRT;
1315 if (!is_reloc(f)) {
1316 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1317 return NULL;
1319 g->value = reloc_seg(f);
1320 if (g->value == NO_SEG)
1321 g->value = reloc_value(f) | SEG_ABS;
1322 else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
1323 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1324 return NULL;
1326 e = add_vectors (e, g);
1328 return e;