1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
25 static long reg_flags
[] = { /* sizes and special flags */
26 0, REG8
, REG_AL
, REG_AX
, REG8
, REG8
, REG16
, REG16
, REG8
, REG_CL
,
27 REG_CREG
, REG_CREG
, REG_CREG
, REG_CR4
, REG_CS
, REG_CX
, REG8
,
28 REG16
, REG8
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
,
29 REG_DREG
, REG_DESS
, REG_DX
, REG_EAX
, REG32
, REG32
, REG_ECX
,
30 REG32
, REG32
, REG_DESS
, REG32
, REG32
, REG_FSGS
, REG_FSGS
,
31 MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
,
32 REG16
, REG16
, REG_DESS
, FPU0
, FPUREG
, FPUREG
, FPUREG
, FPUREG
,
33 FPUREG
, FPUREG
, FPUREG
, REG_TREG
, REG_TREG
, REG_TREG
, REG_TREG
,
37 enum { /* special tokens */
38 S_BYTE
, S_DWORD
, S_FAR
, S_LONG
, S_NEAR
, S_QWORD
, S_SHORT
, S_TO
,
42 static char *special_names
[] = { /* and the actual text */
43 "byte", "dword", "far", "long", "near", "qword", "short", "to",
47 static char *prefix_names
[] = {
48 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
49 "repnz", "repz", "times"
53 * Evaluator datatype. Expressions, within the evaluator, are
54 * stored as an array of these beasts, terminated by a record with
55 * type==0. Mostly, it's a vector type: each type denotes some kind
56 * of a component, and the value denotes the multiple of that
57 * component present in the expression. The exception is the WRT
58 * type, whose `value' field denotes the segment to which the
59 * expression is relative. These segments will be segment-base
60 * types, i.e. either odd segment values or SEG_ABS types. So it is
61 * still valid to assume that anything with a `value' field of zero
65 long type
; /* a register, or EXPR_xxx */
66 long value
; /* must be >= 32 bits */
69 static void eval_reset(void);
70 static expr
*evaluate(int);
73 * ASSUMPTION MADE HERE. The number of distinct register names
74 * (i.e. possible "type" fields for an expr structure) does not
77 #define EXPR_SIMPLE 126
79 #define EXPR_SEGBASE 128
81 static int is_reloc(expr
*);
82 static int is_simple(expr
*);
83 static int is_really_simple (expr
*);
84 static long reloc_value(expr
*);
85 static long reloc_seg(expr
*);
86 static long reloc_wrt(expr
*);
88 enum { /* token types, other than chars */
89 TOKEN_ID
= 256, TOKEN_NUM
, TOKEN_REG
, TOKEN_INSN
, TOKEN_ERRNUM
,
90 TOKEN_HERE
, TOKEN_BASE
, TOKEN_SPECIAL
, TOKEN_PREFIX
, TOKEN_SHL
,
91 TOKEN_SHR
, TOKEN_SDIV
, TOKEN_SMOD
, TOKEN_SEG
, TOKEN_WRT
,
96 long t_integer
, t_inttwo
;
100 static char tempstorage
[1024], *q
;
101 static int bsi (char *string
, char **array
, int size
);/* binary search */
103 static int nexttoken (void);
104 static int is_comma_next (void);
108 static struct tokenval tokval
;
109 static lfunc labelfunc
;
112 static struct ofmt
*outfmt
;
114 static long seg
, ofs
;
118 insn
*parse_line (long segment
, long offset
, lfunc lookup_label
, int pass
,
119 char *buffer
, insn
*result
, struct ofmt
*output
,
124 forward
= result
->forw_ref
= FALSE
;
127 labelfunc
= lookup_label
;
136 result
->eops
= NULL
; /* must do this, whatever happens */
138 if (i
==0) { /* blank line - ignore */
139 result
->label
= NULL
; /* so, no label on it */
140 result
->opcode
= -1; /* and no instruction either */
143 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
144 (i
!=TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
145 error (ERR_NONFATAL
, "label or instruction expected"
146 " at start of line");
147 result
->label
= NULL
;
152 if (i
== TOKEN_ID
) { /* there's a label here */
153 label
= result
->label
= tokval
.t_charptr
;
155 if (i
== ':') { /* skip over the optional colon */
158 } else /* no label; so, moving swiftly on */
159 result
->label
= NULL
;
162 result
->opcode
= -1; /* this line contains just a label */
169 while (i
== TOKEN_PREFIX
||
170 (i
==TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
172 * Handle special case: the TIMES prefix.
174 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
179 value
= evaluate (pass
);
180 if (!value
) { /* but, error in evaluator */
181 result
->opcode
= -1; /* unrecoverable parse error: */
182 return result
; /* ignore this instruction */
184 if (!is_simple (value
)) {
186 "non-constant argument supplied to TIMES");
189 result
->times
= value
->value
;
191 if (result
->nprefix
== MAXPREFIX
)
193 "instruction has more than %d prefixes", MAXPREFIX
);
195 result
->prefixes
[result
->nprefix
++] = tokval
.t_integer
;
200 if (i
!= TOKEN_INSN
) {
201 error (ERR_NONFATAL
, "parser: instruction expected");
206 result
->opcode
= tokval
.t_integer
;
207 result
->condition
= tokval
.t_inttwo
;
210 * RESB, RESW and RESD cannot be satisfied with incorrectly
211 * evaluated operands, since the correct values _must_ be known
212 * on the first pass. Hence, even in pass one, we set the
213 * `critical' flag on calling evaluate(), so that it will bomb
214 * out on undefined symbols. Nasty, but there's nothing we can
217 * For the moment, EQU has the same difficulty, so we'll
220 if (result
->opcode
== I_RESB
||
221 result
->opcode
== I_RESW
||
222 result
->opcode
== I_RESD
||
223 result
->opcode
== I_RESQ
||
224 result
->opcode
== I_REST
||
225 result
->opcode
== I_EQU
)
228 critical
= (pass
==2 ? 2 : 0);
230 if (result
->opcode
== I_DB
||
231 result
->opcode
== I_DW
||
232 result
->opcode
== I_DD
||
233 result
->opcode
== I_DQ
||
234 result
->opcode
== I_DT
) {
235 extop
*eop
, **tail
= &result
->eops
;
239 * Begin to read the DB/DW/DD/DQ/DT operands.
245 eop
= *tail
= nasm_malloc(sizeof(extop
));
248 eop
->type
= EOT_NOTHING
;
251 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
252 eop
->type
= EOT_DB_STRING
;
253 eop
->stringval
= tokval
.t_charptr
;
254 eop
->stringlen
= tokval
.t_inttwo
;
255 i
= nexttoken(); /* eat the comma */
259 if (i
== TOKEN_FLOAT
|| i
== '-') {
266 if (i
!= TOKEN_FLOAT
) {
272 if (i
== TOKEN_FLOAT
) {
273 eop
->type
= EOT_DB_STRING
;
275 if (result
->opcode
== I_DD
)
277 else if (result
->opcode
== I_DQ
)
279 else if (result
->opcode
== I_DT
)
282 error(ERR_NONFATAL
, "floating-point constant"
283 " encountered in `D%c' instruction",
284 result
->opcode
== I_DW
? 'W' : 'B');
285 eop
->type
= EOT_NOTHING
;
288 if (!float_const (tokval
.t_charptr
, sign
,
289 (unsigned char *)eop
->stringval
,
290 eop
->stringlen
, error
))
291 eop
->type
= EOT_NOTHING
;
292 i
= nexttoken(); /* eat the comma */
297 /* anything else */ {
300 value
= evaluate (critical
);
301 if (!value
) { /* but, error in evaluator */
302 result
->opcode
= -1;/* unrecoverable parse error: */
303 return result
; /* ignore this instruction */
305 if (is_reloc(value
)) {
306 eop
->type
= EOT_DB_NUMBER
;
307 eop
->offset
= reloc_value(value
);
308 eop
->segment
= reloc_seg(value
);
309 eop
->wrt
= reloc_wrt(value
);
312 "`%s' operand %d: expression is not simple"
314 insn_names
[result
->opcode
], oper_num
);
321 /* right. Now we begin to parse the operands. There may be up to three
322 * of these, separated by commas, and terminated by a zero token. */
324 for (operand
= 0; operand
< 3; operand
++) {
325 expr
*seg
, *value
; /* used most of the time */
326 int mref
; /* is this going to be a memory ref? */
328 result
->oprs
[operand
].addr_size
= 0;/* have to zero this whatever */
330 if (i
== 0) break; /* end of operands: get out of here */
331 result
->oprs
[operand
].type
= 0; /* so far, no override */
332 while (i
== TOKEN_SPECIAL
) {/* size specifiers */
333 switch ((int)tokval
.t_integer
) {
335 result
->oprs
[operand
].type
|= BITS8
;
338 result
->oprs
[operand
].type
|= BITS16
;
342 result
->oprs
[operand
].type
|= BITS32
;
345 result
->oprs
[operand
].type
|= BITS64
;
348 result
->oprs
[operand
].type
|= BITS80
;
351 result
->oprs
[operand
].type
|= TO
;
354 result
->oprs
[operand
].type
|= FAR
;
357 result
->oprs
[operand
].type
|= NEAR
;
360 result
->oprs
[operand
].type
|= SHORT
;
366 if (i
== '[') { /* memory reference */
369 if (i
== TOKEN_SPECIAL
) { /* check for address size override */
370 switch ((int)tokval
.t_integer
) {
372 result
->oprs
[operand
].addr_size
= 16;
376 result
->oprs
[operand
].addr_size
= 32;
379 error (ERR_NONFATAL
, "invalid size specification in"
380 " effective address");
384 } else /* immediate operand, or register */
389 value
= evaluate (critical
);
391 result
->forw_ref
= TRUE
;
392 if (!value
) { /* error in evaluator */
393 result
->opcode
= -1; /* unrecoverable parse error: */
394 return result
; /* ignore this instruction */
396 if (i
== ':' && mref
) { /* it was seg:offset */
397 seg
= value
; /* so shift this into the segment */
398 i
= nexttoken(); /* then skip the colon */
399 if (i
== TOKEN_SPECIAL
) { /* another check for size override */
400 switch ((int)tokval
.t_integer
) {
402 result
->oprs
[operand
].addr_size
= 16;
406 result
->oprs
[operand
].addr_size
= 32;
409 error (ERR_NONFATAL
, "invalid size specification in"
410 " effective address");
414 value
= evaluate (critical
);
416 result
->forw_ref
= TRUE
;
417 /* and get the offset */
418 if (!value
) { /* but, error in evaluator */
419 result
->opcode
= -1; /* unrecoverable parse error: */
420 return result
; /* ignore this instruction */
423 if (mref
) { /* find ] at the end */
425 error (ERR_NONFATAL
, "parser: expecting ]");
426 do { /* error recovery again */
428 } while (i
!= 0 && i
!= ',');
429 } else /* we got the required ] */
431 } else { /* immediate operand */
432 if (i
!= 0 && i
!= ',' && i
!= ':') {
433 error (ERR_NONFATAL
, "comma or end of line expected");
434 do { /* error recovery */
436 } while (i
!= 0 && i
!= ',');
437 } else if (i
== ':') {
438 result
->oprs
[operand
].type
|= COLON
;
442 /* now convert the exprs returned from evaluate() into operand
445 if (mref
) { /* it's a memory reference */
447 int b
, i
, s
; /* basereg, indexreg, scale */
450 if (seg
) { /* segment override */
451 if (seg
[1].type
!=0 || seg
->value
!=1 ||
452 REG_SREG
& ~reg_flags
[seg
->type
])
453 error (ERR_NONFATAL
, "invalid segment override");
454 else if (result
->nprefix
== MAXPREFIX
)
456 "instruction has more than %d prefixes",
459 result
->prefixes
[result
->nprefix
++] = seg
->type
;
462 b
= i
= -1, o
= s
= 0;
464 if (e
->type
< EXPR_SIMPLE
) { /* this bit's a register */
465 if (e
->value
== 1) /* in fact it can be basereg */
467 else /* no, it has to be indexreg */
468 i
= e
->type
, s
= e
->value
;
471 if (e
->type
&& e
->type
< EXPR_SIMPLE
) {/* it's a second register */
472 if (e
->value
!= 1) { /* it has to be indexreg */
473 if (i
!= -1) { /* but it can't be */
474 error(ERR_NONFATAL
, "invalid effective address");
478 i
= e
->type
, s
= e
->value
;
479 } else { /* it can be basereg */
480 if (b
!= -1) /* or can it? */
487 if (e
->type
!= 0) { /* is there an offset? */
488 if (e
->type
< EXPR_SIMPLE
) {/* in fact, is there an error? */
489 error (ERR_NONFATAL
, "invalid effective address");
493 if (e
->type
== EXPR_SIMPLE
) {
497 if (e
->type
== EXPR_WRT
) {
498 result
->oprs
[operand
].wrt
= e
->value
;
501 result
->oprs
[operand
].wrt
= NO_SEG
;
503 * Look for a segment base type.
505 if (e
->type
&& e
->type
< EXPR_SEGBASE
) {
506 error (ERR_NONFATAL
, "invalid effective address");
510 while (e
->type
&& e
->value
== 0)
512 if (e
->type
&& e
->value
!= 1) {
513 error (ERR_NONFATAL
, "invalid effective address");
518 result
->oprs
[operand
].segment
= e
->type
-EXPR_SEGBASE
;
521 result
->oprs
[operand
].segment
= NO_SEG
;
522 while (e
->type
&& e
->value
== 0)
525 error (ERR_NONFATAL
, "invalid effective address");
532 result
->oprs
[operand
].wrt
= NO_SEG
;
533 result
->oprs
[operand
].segment
= NO_SEG
;
536 if (e
->type
!= 0) { /* there'd better be nothing left! */
537 error (ERR_NONFATAL
, "invalid effective address");
542 result
->oprs
[operand
].type
|= MEMORY
;
543 if (b
==-1 && (i
==-1 || s
==0))
544 result
->oprs
[operand
].type
|= MEM_OFFS
;
545 result
->oprs
[operand
].basereg
= b
;
546 result
->oprs
[operand
].indexreg
= i
;
547 result
->oprs
[operand
].scale
= s
;
548 result
->oprs
[operand
].offset
= o
;
549 } else { /* it's not a memory reference */
550 if (is_reloc(value
)) { /* it's immediate */
551 result
->oprs
[operand
].type
|= IMMEDIATE
;
552 result
->oprs
[operand
].offset
= reloc_value(value
);
553 result
->oprs
[operand
].segment
= reloc_seg(value
);
554 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
555 if (is_simple(value
) && reloc_value(value
)==1)
556 result
->oprs
[operand
].type
|= UNITY
;
557 } else { /* it's a register */
558 if (value
->type
>=EXPR_SIMPLE
|| value
->value
!=1) {
559 error (ERR_NONFATAL
, "invalid operand type");
563 /* clear overrides, except TO which applies to FPU regs */
564 result
->oprs
[operand
].type
&= TO
;
565 result
->oprs
[operand
].type
|= REGISTER
;
566 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
567 result
->oprs
[operand
].basereg
= value
->type
;
572 result
->operands
= operand
; /* set operand count */
574 while (operand
<3) /* clear remaining operands */
575 result
->oprs
[operand
++].type
= 0;
578 * Transform RESW, RESD, RESQ, REST into RESB.
580 switch (result
->opcode
) {
581 case I_RESW
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=2; break;
582 case I_RESD
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=4; break;
583 case I_RESQ
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=8; break;
584 case I_REST
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=10; break;
590 static int is_comma_next (void) {
594 while (isspace(*p
)) p
++;
595 return (*p
== ',' || *p
== ';' || !*p
);
598 /* isidstart matches any character that may start an identifier, and isidchar
599 * matches any character that may appear at places other than the start of an
600 * identifier. E.g. a period may only appear at the start of an identifier
601 * (for local labels), whereas a number may appear anywhere *but* at the
604 #define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' )
605 #define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \
606 || (c)=='@' || (c)=='~' )
608 /* Ditto for numeric constants. */
610 #define isnumstart(c) ( isdigit(c) || (c)=='$' )
611 #define isnumchar(c) ( isalnum(c) )
613 /* This returns the numeric value of a given 'digit'. */
615 #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
618 * This tokeniser routine has only one side effect, that of
619 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
623 static int nexttoken (void) {
624 char ourcopy
[256], *r
, *s
;
626 while (isspace(*bufptr
)) bufptr
++;
627 if (!*bufptr
) return 0;
629 /* we have a token; either an id, a number or a char */
630 if (isidstart(*bufptr
) ||
631 (*bufptr
== '$' && isidstart(bufptr
[1]))) {
632 /* now we've got an identifier */
636 if (*bufptr
== '$') {
641 tokval
.t_charptr
= q
;
643 while (isidchar(*bufptr
)) *q
++ = *bufptr
++;
645 for (s
=tokval
.t_charptr
, r
=ourcopy
; *s
; s
++)
649 return TOKEN_ID
; /* bypass all other checks */
650 /* right, so we have an identifier sitting in temp storage. now,
651 * is it actually a register or instruction name, or what? */
652 if ((tokval
.t_integer
=bsi(ourcopy
, reg_names
,
653 elements(reg_names
)))>=0)
655 if ((tokval
.t_integer
=bsi(ourcopy
, insn_names
,
656 elements(insn_names
)))>=0)
658 for (i
=0; i
<elements(icn
); i
++)
659 if (!strncmp(ourcopy
, icn
[i
], strlen(icn
[i
]))) {
660 char *p
= ourcopy
+ strlen(icn
[i
]);
661 tokval
.t_integer
= ico
[i
];
662 if ((tokval
.t_inttwo
=bsi(p
, conditions
,
663 elements(conditions
)))>=0)
666 if ((tokval
.t_integer
=bsi(ourcopy
, prefix_names
,
667 elements(prefix_names
)))>=0) {
668 tokval
.t_integer
+= PREFIX_ENUM_START
;
671 if ((tokval
.t_integer
=bsi(ourcopy
, special_names
,
672 elements(special_names
)))>=0)
673 return TOKEN_SPECIAL
;
674 if (!strcmp(ourcopy
, "seg"))
676 if (!strcmp(ourcopy
, "wrt"))
679 } else if (*bufptr
== '$' && !isnumchar(bufptr
[1])) {
681 * It's a $ sign with no following hex number; this must
682 * mean it's a Here token ($), evaluating to the current
683 * assembly location, or a Base token ($$), evaluating to
684 * the base of the current segment.
687 if (*bufptr
== '$') {
692 } else if (isnumstart(*bufptr
)) { /* now we've got a number */
697 while (isnumchar(*bufptr
)) {
700 if (*bufptr
== '.') {
702 * a floating point constant
705 while (isnumchar(*bufptr
)) {
709 tokval
.t_charptr
= r
;
713 tokval
.t_integer
= readnum(r
, &rn_error
);
715 return TOKEN_ERRNUM
; /* some malformation occurred */
716 tokval
.t_charptr
= NULL
;
718 } else if (*bufptr
== '\'' || *bufptr
== '"') {/* a char constant */
719 char quote
= *bufptr
++, *r
;
720 r
= tokval
.t_charptr
= bufptr
;
721 while (*bufptr
&& *bufptr
!= quote
) bufptr
++;
722 tokval
.t_inttwo
= bufptr
- r
; /* store full version */
724 return TOKEN_ERRNUM
; /* unmatched quotes */
725 tokval
.t_integer
= 0;
726 r
= bufptr
++; /* skip over final quote */
727 while (quote
!= *--r
) {
728 tokval
.t_integer
= (tokval
.t_integer
<<8) + (unsigned char) *r
;
731 } else if (*bufptr
== ';') { /* a comment has happened - stay */
733 } else if ((*bufptr
== '>' || *bufptr
== '<' ||
734 *bufptr
== '/' || *bufptr
== '%') && bufptr
[1] == *bufptr
) {
736 return (bufptr
[-2] == '>' ? TOKEN_SHR
:
737 bufptr
[-2] == '<' ? TOKEN_SHL
:
738 bufptr
[-2] == '/' ? TOKEN_SDIV
:
740 } else /* just an ordinary char */
741 return (unsigned char) (*bufptr
++);
744 /* return index of "string" in "array", or -1 if no match. */
745 static int bsi (char *string
, char **array
, int size
) {
746 int i
= -1, j
= size
; /* always, i < index < j */
749 int l
= strcmp(string
, array
[k
]);
750 if (l
<0) /* it's in the first half */
752 else if (l
>0) /* it's in the second half */
754 else /* we've got it :) */
757 return -1; /* we haven't got it :( */
760 void cleanup_insn (insn
*i
) {
765 i
->eops
= i
->eops
->next
;
770 /* ------------- Evaluator begins here ------------------ */
772 static expr exprtempstorage
[1024], *tempptr
; /* store exprs in here */
775 * Add two vector datatypes. We have some bizarre behaviour on far-
776 * absolute segment types: we preserve them during addition _only_
777 * if one of the segments is a truly pure scalar.
779 static expr
*add_vectors(expr
*p
, expr
*q
) {
783 preserve
= is_really_simple(p
) || is_really_simple(q
);
785 while (p
->type
&& q
->type
&&
786 p
->type
< EXPR_SEGBASE
+SEG_ABS
&&
787 q
->type
< EXPR_SEGBASE
+SEG_ABS
)
788 if (p
->type
> q
->type
) {
789 tempptr
->type
= q
->type
;
790 tempptr
->value
= q
->value
;
792 } else if (p
->type
< q
->type
) {
793 tempptr
->type
= p
->type
;
794 tempptr
->value
= p
->value
;
796 } else { /* *p and *q have same type */
797 tempptr
->type
= p
->type
;
798 tempptr
->value
= p
->value
+ q
->value
;
802 (preserve
|| p
->type
< EXPR_SEGBASE
+SEG_ABS
)) {
803 tempptr
->type
= p
->type
;
804 tempptr
->value
= p
->value
;
808 (preserve
|| q
->type
< EXPR_SEGBASE
+SEG_ABS
)) {
809 tempptr
->type
= q
->type
;
810 tempptr
->value
= q
->value
;
813 (tempptr
++)->type
= 0;
819 * Multiply a vector by a scalar. Strip far-absolute segment part
822 static expr
*scalar_mult(expr
*vect
, long scalar
) {
825 while (p
->type
&& p
->type
< EXPR_SEGBASE
+SEG_ABS
) {
826 p
->value
= scalar
* (p
->value
);
834 static expr
*scalarvect (long scalar
) {
836 tempptr
->type
= EXPR_SIMPLE
;
837 tempptr
->value
= scalar
;
845 * Return TRUE if the argument is a simple scalar. (Or a far-
846 * absolute, which counts.)
848 static int is_simple (expr
*vect
) {
849 while (vect
->type
&& !vect
->value
)
853 if (vect
->type
!= EXPR_SIMPLE
)
857 } while (vect
->type
&& !vect
->value
);
858 if (vect
->type
&& vect
->type
< EXPR_SEGBASE
+SEG_ABS
) return 0;
863 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
866 static int is_really_simple (expr
*vect
) {
867 while (vect
->type
&& !vect
->value
)
871 if (vect
->type
!= EXPR_SIMPLE
)
875 } while (vect
->type
&& !vect
->value
);
876 if (vect
->type
) return 0;
881 * Return TRUE if the argument is relocatable (i.e. a simple
882 * scalar, plus at most one segment-base, plus possibly a WRT).
884 static int is_reloc (expr
*vect
) {
885 while (vect
->type
&& !vect
->value
)
889 if (vect
->type
< EXPR_SIMPLE
)
891 if (vect
->type
== EXPR_SIMPLE
) {
894 } while (vect
->type
&& !vect
->value
);
900 } while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
));
907 * Return the scalar part of a relocatable vector. (Including
908 * simple scalar vectors - those qualify as relocatable.)
910 static long reloc_value (expr
*vect
) {
911 while (vect
->type
&& !vect
->value
)
913 if (!vect
->type
) return 0;
914 if (vect
->type
== EXPR_SIMPLE
)
921 * Return the segment number of a relocatable vector, or NO_SEG for
924 static long reloc_seg (expr
*vect
) {
925 while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
))
927 if (vect
->type
== EXPR_SIMPLE
) {
930 } while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
));
935 return vect
->type
- EXPR_SEGBASE
;
939 * Return the WRT segment number of a relocatable vector, or NO_SEG
940 * if no WRT part is present.
942 static long reloc_wrt (expr
*vect
) {
943 while (vect
->type
&& vect
->type
< EXPR_WRT
)
945 if (vect
->type
== EXPR_WRT
) {
951 static void eval_reset(void) {
952 tempptr
= exprtempstorage
; /* initialise temporary storage */
956 * The SEG operator: calculate the segment part of a relocatable
957 * value. Return NULL, as usual, if an error occurs. Report the
960 static expr
*segment_part (expr
*e
) {
964 error(ERR_NONFATAL
, "cannot apply SEG to a non-relocatable value");
970 error(ERR_NONFATAL
, "cannot apply SEG to a non-relocatable value");
972 } else if (seg
& SEG_ABS
)
973 return scalarvect(seg
& ~SEG_ABS
);
977 f
->type
= EXPR_SEGBASE
+outfmt
->segbase(seg
+1);
984 * Recursive-descent parser. Called with a single boolean operand,
985 * which is TRUE if the evaluation is critical (i.e. unresolved
986 * symbols are an error condition). Must update the global `i' to
987 * reflect the token after the parsed string. May return NULL.
989 * evaluate() should report its own errors: on return it is assumed
990 * that if NULL has been returned, the error has already been
997 * expr : expr0 [ WRT expr6 ]
998 * expr0 : expr1 [ {|} expr1]
999 * expr1 : expr2 [ {^} expr2]
1000 * expr2 : expr3 [ {&} expr3]
1001 * expr3 : expr4 [ {<<,>>} expr4...]
1002 * expr4 : expr5 [ {+,-} expr5...]
1003 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
1004 * expr6 : { ~,+,-,SEG } expr6
1011 static expr
*expr0(int), *expr1(int), *expr2(int), *expr3(int);
1012 static expr
*expr4(int), *expr5(int), *expr6(int);
1014 static expr
*expr0(int critical
) {
1017 e
= expr1(critical
);
1022 f
= expr1(critical
);
1025 if (!is_simple(e
) || !is_simple(f
)) {
1026 error(ERR_NONFATAL
, "`|' operator may only be applied to"
1029 e
= scalarvect (reloc_value(e
) | reloc_value(f
));
1034 static expr
*expr1(int critical
) {
1037 e
= expr2(critical
);
1042 f
= expr2(critical
);
1045 if (!is_simple(e
) || !is_simple(f
)) {
1046 error(ERR_NONFATAL
, "`^' operator may only be applied to"
1049 e
= scalarvect (reloc_value(e
) ^ reloc_value(f
));
1054 static expr
*expr2(int critical
) {
1057 e
= expr3(critical
);
1062 f
= expr3(critical
);
1065 if (!is_simple(e
) || !is_simple(f
)) {
1066 error(ERR_NONFATAL
, "`&' operator may only be applied to"
1069 e
= scalarvect (reloc_value(e
) & reloc_value(f
));
1074 static expr
*expr3(int critical
) {
1077 e
= expr4(critical
);
1080 while (i
== TOKEN_SHL
|| i
== TOKEN_SHR
) {
1083 f
= expr4(critical
);
1086 if (!is_simple(e
) || !is_simple(f
)) {
1087 error(ERR_NONFATAL
, "shift operator may only be applied to"
1092 e
= scalarvect (reloc_value(e
) << reloc_value(f
));
1095 e
= scalarvect (((unsigned long)reloc_value(e
)) >>
1103 static expr
*expr4(int critical
) {
1106 e
= expr5(critical
);
1109 while (i
== '+' || i
== '-') {
1112 f
= expr5(critical
);
1117 e
= add_vectors (e
, f
);
1120 e
= add_vectors (e
, scalar_mult(f
, -1L));
1127 static expr
*expr5(int critical
) {
1130 e
= expr6(critical
);
1133 while (i
== '*' || i
== '/' || i
== '*' ||
1134 i
== TOKEN_SDIV
|| i
== TOKEN_SMOD
) {
1137 f
= expr6(critical
);
1140 if (j
!= '*' && (!is_simple(e
) || !is_simple(f
))) {
1141 error(ERR_NONFATAL
, "division operator may only be applied to"
1145 if (j
!= '*' && reloc_value(f
) == 0) {
1146 error(ERR_NONFATAL
, "division by zero");
1152 e
= scalar_mult (f
, reloc_value(e
));
1153 else if (is_simple(f
))
1154 e
= scalar_mult (e
, reloc_value(f
));
1156 error(ERR_NONFATAL
, "unable to multiply two "
1157 "non-scalar objects");
1162 e
= scalarvect (((unsigned long)reloc_value(e
)) /
1163 ((unsigned long)reloc_value(f
)));
1166 e
= scalarvect (((unsigned long)reloc_value(e
)) %
1167 ((unsigned long)reloc_value(f
)));
1170 e
= scalarvect (((signed long)reloc_value(e
)) /
1171 ((signed long)reloc_value(f
)));
1174 e
= scalarvect (((signed long)reloc_value(e
)) %
1175 ((signed long)reloc_value(f
)));
1182 static expr
*expr6(int critical
) {
1184 long label_seg
, label_ofs
;
1188 e
= expr6(critical
);
1191 return scalar_mult (e
, -1L);
1192 } else if (i
== '+') {
1194 return expr6(critical
);
1195 } else if (i
== '~') {
1197 e
= expr6(critical
);
1200 if (!is_simple(e
)) {
1201 error(ERR_NONFATAL
, "`~' operator may only be applied to"
1205 return scalarvect(~reloc_value(e
));
1206 } else if (i
== TOKEN_SEG
) {
1208 e
= expr6(critical
);
1211 return segment_part(e
);
1212 } else if (i
== '(') {
1214 e
= expr0(critical
);
1218 error(ERR_NONFATAL
, "expecting `)'");
1223 } else if (i
== TOKEN_NUM
|| i
== TOKEN_REG
|| i
== TOKEN_ID
||
1224 i
== TOKEN_HERE
|| i
== TOKEN_BASE
) {
1228 e
->type
= EXPR_SIMPLE
;
1229 e
->value
= tokval
.t_integer
;
1232 e
->type
= tokval
.t_integer
;
1239 * Since the whole line is parsed before the label it
1240 * defines is given to the label manager, we have
1241 * problems with lines such as
1243 * end: TIMES 512-(end-start) DB 0
1245 * where `end' is not known on pass one, despite not
1246 * really being a forward reference, and due to
1247 * criticality it is _needed_. Hence we check our label
1248 * against the currently defined one, and do our own
1249 * resolution of it if we have to.
1251 if (i
== TOKEN_BASE
) {
1254 } else if (i
== TOKEN_HERE
|| !strcmp(tokval
.t_charptr
, label
)) {
1257 } else if (!labelfunc(tokval
.t_charptr
, &label_seg
, &label_ofs
)) {
1258 if (critical
== 2) {
1259 error (ERR_NONFATAL
, "symbol `%s' undefined",
1262 } else if (critical
== 1) {
1263 error (ERR_NONFATAL
, "symbol `%s' not defined before use",
1272 e
->type
= EXPR_SIMPLE
;
1273 e
->value
= label_ofs
;
1274 if (label_seg
!=NO_SEG
) {
1276 tempptr
->type
= EXPR_SEGBASE
+ label_seg
;
1287 error(ERR_NONFATAL
, "expression syntax error");
1292 static expr
*evaluate (int critical
) {
1296 e
= expr0 (critical
);
1300 if (i
== TOKEN_WRT
) {
1302 error(ERR_NONFATAL
, "invalid left-hand operand to WRT");
1305 i
= nexttoken(); /* eat the WRT */
1306 f
= expr6 (critical
);
1310 e
= scalar_mult (e
, 1L); /* strip far-absolute segment part */
1312 expr
*g
= tempptr
++;
1313 tempptr
++->type
= 0;
1316 error(ERR_NONFATAL
, "invalid right-hand operand to WRT");
1319 g
->value
= reloc_seg(f
);
1320 if (g
->value
== NO_SEG
)
1321 g
->value
= reloc_value(f
) | SEG_ABS
;
1322 else if (!(g
->value
& SEG_ABS
) && !(g
->value
% 2) && critical
) {
1323 error(ERR_NONFATAL
, "invalid right-hand operand to WRT");
1326 e
= add_vectors (e
, g
);