1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
24 static long reg_flags
[] = { /* sizes and special flags */
25 0, REG8
, REG_AL
, REG_AX
, REG8
, REG8
, REG16
, REG16
, REG8
, REG_CL
,
26 REG_CREG
, REG_CREG
, REG_CREG
, REG_CR4
, REG_CS
, REG_CX
, REG8
,
27 REG16
, REG8
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
,
28 REG_DREG
, REG_DESS
, REG_DX
, REG_EAX
, REG32
, REG32
, REG_ECX
,
29 REG32
, REG32
, REG_DESS
, REG32
, REG32
, REG_FSGS
, REG_FSGS
,
30 MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
,
31 REG16
, REG16
, REG_DESS
, FPU0
, FPUREG
, FPUREG
, FPUREG
, FPUREG
,
32 FPUREG
, FPUREG
, FPUREG
, REG_TREG
, REG_TREG
, REG_TREG
, REG_TREG
,
36 enum { /* special tokens */
37 S_BYTE
, S_DWORD
, S_FAR
, S_LONG
, S_NEAR
, S_QWORD
, S_SHORT
, S_TO
,
41 static char *special_names
[] = { /* and the actual text */
42 "byte", "dword", "far", "long", "near", "qword", "short", "to",
46 static char *prefix_names
[] = {
47 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
48 "repnz", "repz", "times"
52 * Evaluator datatype. Expressions, within the evaluator, are
53 * stored as an array of these beasts, terminated by a record with
54 * type==0. Mostly, it's a vector type: each type denotes some kind
55 * of a component, and the value denotes the multiple of that
56 * component present in the expression. The exception is the WRT
57 * type, whose `value' field denotes the segment to which the
58 * expression is relative. These segments will be segment-base
59 * types, i.e. either odd segment values or SEG_ABS types. So it is
60 * still valid to assume that anything with a `value' field of zero
64 long type
; /* a register, or EXPR_xxx */
65 long value
; /* must be >= 32 bits */
68 static void eval_reset(void);
69 static expr
*evaluate(int);
72 * ASSUMPTION MADE HERE. The number of distinct register names
73 * (i.e. possible "type" fields for an expr structure) does not
76 #define EXPR_SIMPLE 126
78 #define EXPR_SEGBASE 128
80 static int is_reloc(expr
*);
81 static int is_simple(expr
*);
82 static int is_really_simple (expr
*);
83 static long reloc_value(expr
*);
84 static long reloc_seg(expr
*);
85 static long reloc_wrt(expr
*);
87 enum { /* token types, other than chars */
88 TOKEN_ID
= 256, TOKEN_NUM
, TOKEN_REG
, TOKEN_INSN
, TOKEN_ERRNUM
,
89 TOKEN_HERE
, TOKEN_BASE
, TOKEN_SPECIAL
, TOKEN_PREFIX
, TOKEN_SHL
,
90 TOKEN_SHR
, TOKEN_SDIV
, TOKEN_SMOD
, TOKEN_SEG
, TOKEN_WRT
,
95 long t_integer
, t_inttwo
;
99 static char tempstorage
[1024], *q
;
100 static int bsi (char *string
, char **array
, int size
);/* binary search */
102 static int nexttoken (void);
103 static int is_comma_next (void);
107 static struct tokenval tokval
;
108 static lfunc labelfunc
;
111 static struct ofmt
*outfmt
;
113 static long seg
, ofs
;
117 insn
*parse_line (long segment
, long offset
, lfunc lookup_label
, int pass
,
118 char *buffer
, insn
*result
, struct ofmt
*output
,
123 forward
= result
->forw_ref
= FALSE
;
126 labelfunc
= lookup_label
;
135 result
->eops
= NULL
; /* must do this, whatever happens */
137 if (i
==0) { /* blank line - ignore */
138 result
->label
= NULL
; /* so, no label on it */
139 result
->opcode
= -1; /* and no instruction either */
142 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
143 (i
!=TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
144 error (ERR_NONFATAL
, "label or instruction expected"
145 " at start of line");
146 result
->label
= NULL
;
151 if (i
== TOKEN_ID
) { /* there's a label here */
152 label
= result
->label
= tokval
.t_charptr
;
154 if (i
== ':') { /* skip over the optional colon */
157 } else /* no label; so, moving swiftly on */
158 result
->label
= NULL
;
161 result
->opcode
= -1; /* this line contains just a label */
168 while (i
== TOKEN_PREFIX
||
169 (i
==TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
171 * Handle special case: the TIMES prefix.
173 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
178 value
= evaluate (pass
);
179 if (!value
) { /* but, error in evaluator */
180 result
->opcode
= -1; /* unrecoverable parse error: */
181 return result
; /* ignore this instruction */
183 if (!is_simple (value
)) {
185 "non-constant argument supplied to TIMES");
188 result
->times
= value
->value
;
189 if (value
->value
< 0)
190 error(ERR_WARNING
, "TIMES value %d is negative",
194 if (result
->nprefix
== MAXPREFIX
)
196 "instruction has more than %d prefixes", MAXPREFIX
);
198 result
->prefixes
[result
->nprefix
++] = tokval
.t_integer
;
203 if (i
!= TOKEN_INSN
) {
204 error (ERR_NONFATAL
, "parser: instruction expected");
209 result
->opcode
= tokval
.t_integer
;
210 result
->condition
= tokval
.t_inttwo
;
213 * RESB, RESW and RESD cannot be satisfied with incorrectly
214 * evaluated operands, since the correct values _must_ be known
215 * on the first pass. Hence, even in pass one, we set the
216 * `critical' flag on calling evaluate(), so that it will bomb
217 * out on undefined symbols. Nasty, but there's nothing we can
220 * For the moment, EQU has the same difficulty, so we'll
223 if (result
->opcode
== I_RESB
||
224 result
->opcode
== I_RESW
||
225 result
->opcode
== I_RESD
||
226 result
->opcode
== I_RESQ
||
227 result
->opcode
== I_REST
||
228 result
->opcode
== I_EQU
)
231 critical
= (pass
==2 ? 2 : 0);
233 if (result
->opcode
== I_DB
||
234 result
->opcode
== I_DW
||
235 result
->opcode
== I_DD
||
236 result
->opcode
== I_DQ
||
237 result
->opcode
== I_DT
||
238 result
->opcode
== I_INCBIN
) {
239 extop
*eop
, **tail
= &result
->eops
;
243 * Begin to read the DB/DW/DD/DQ/DT operands.
249 eop
= *tail
= nasm_malloc(sizeof(extop
));
252 eop
->type
= EOT_NOTHING
;
255 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
256 eop
->type
= EOT_DB_STRING
;
257 eop
->stringval
= tokval
.t_charptr
;
258 eop
->stringlen
= tokval
.t_inttwo
;
259 i
= nexttoken(); /* eat the comma */
263 if (i
== TOKEN_FLOAT
|| i
== '-') {
270 if (i
!= TOKEN_FLOAT
) {
276 if (i
== TOKEN_FLOAT
) {
277 eop
->type
= EOT_DB_STRING
;
279 if (result
->opcode
== I_DD
)
281 else if (result
->opcode
== I_DQ
)
283 else if (result
->opcode
== I_DT
)
286 error(ERR_NONFATAL
, "floating-point constant"
287 " encountered in `D%c' instruction",
288 result
->opcode
== I_DW
? 'W' : 'B');
289 eop
->type
= EOT_NOTHING
;
292 if (!float_const (tokval
.t_charptr
, sign
,
293 (unsigned char *)eop
->stringval
,
294 eop
->stringlen
, error
))
295 eop
->type
= EOT_NOTHING
;
296 i
= nexttoken(); /* eat the comma */
301 /* anything else */ {
304 value
= evaluate (critical
);
305 if (!value
) { /* but, error in evaluator */
306 result
->opcode
= -1;/* unrecoverable parse error: */
307 return result
; /* ignore this instruction */
309 if (is_reloc(value
)) {
310 eop
->type
= EOT_DB_NUMBER
;
311 eop
->offset
= reloc_value(value
);
312 eop
->segment
= reloc_seg(value
);
313 eop
->wrt
= reloc_wrt(value
);
316 "`%s' operand %d: expression is not simple"
318 insn_names
[result
->opcode
], oper_num
);
323 if (result
->opcode
== I_INCBIN
) {
325 * Correct syntax for INCBIN is that there should be
326 * one string operand, followed by one or two numeric
329 if (!result
->eops
|| result
->eops
->type
!= EOT_DB_STRING
)
330 error (ERR_NONFATAL
, "`incbin' expects a file name");
331 else if (result
->eops
->next
&&
332 result
->eops
->next
->type
!= EOT_DB_NUMBER
)
333 error (ERR_NONFATAL
, "`incbin': second parameter is",
335 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
336 result
->eops
->next
->next
->type
!= EOT_DB_NUMBER
)
337 error (ERR_NONFATAL
, "`incbin': third parameter is",
339 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
340 result
->eops
->next
->next
->next
)
341 error (ERR_NONFATAL
, "`incbin': more than three parameters");
345 * If we reach here, one of the above errors happened.
346 * Throw the instruction away.
355 /* right. Now we begin to parse the operands. There may be up to three
356 * of these, separated by commas, and terminated by a zero token. */
358 for (operand
= 0; operand
< 3; operand
++) {
359 expr
*seg
, *value
; /* used most of the time */
360 int mref
; /* is this going to be a memory ref? */
362 result
->oprs
[operand
].addr_size
= 0;/* have to zero this whatever */
364 if (i
== 0) break; /* end of operands: get out of here */
365 result
->oprs
[operand
].type
= 0; /* so far, no override */
366 while (i
== TOKEN_SPECIAL
) {/* size specifiers */
367 switch ((int)tokval
.t_integer
) {
369 result
->oprs
[operand
].type
|= BITS8
;
372 result
->oprs
[operand
].type
|= BITS16
;
376 result
->oprs
[operand
].type
|= BITS32
;
379 result
->oprs
[operand
].type
|= BITS64
;
382 result
->oprs
[operand
].type
|= BITS80
;
385 result
->oprs
[operand
].type
|= TO
;
388 result
->oprs
[operand
].type
|= FAR
;
391 result
->oprs
[operand
].type
|= NEAR
;
394 result
->oprs
[operand
].type
|= SHORT
;
400 if (i
== '[') { /* memory reference */
403 if (i
== TOKEN_SPECIAL
) { /* check for address size override */
404 switch ((int)tokval
.t_integer
) {
406 result
->oprs
[operand
].addr_size
= 16;
410 result
->oprs
[operand
].addr_size
= 32;
413 error (ERR_NONFATAL
, "invalid size specification in"
414 " effective address");
418 } else /* immediate operand, or register */
423 value
= evaluate (critical
);
425 result
->forw_ref
= TRUE
;
426 if (!value
) { /* error in evaluator */
427 result
->opcode
= -1; /* unrecoverable parse error: */
428 return result
; /* ignore this instruction */
430 if (i
== ':' && mref
) { /* it was seg:offset */
431 seg
= value
; /* so shift this into the segment */
432 i
= nexttoken(); /* then skip the colon */
433 if (i
== TOKEN_SPECIAL
) { /* another check for size override */
434 switch ((int)tokval
.t_integer
) {
436 result
->oprs
[operand
].addr_size
= 16;
440 result
->oprs
[operand
].addr_size
= 32;
443 error (ERR_NONFATAL
, "invalid size specification in"
444 " effective address");
448 value
= evaluate (critical
);
450 result
->forw_ref
= TRUE
;
451 /* and get the offset */
452 if (!value
) { /* but, error in evaluator */
453 result
->opcode
= -1; /* unrecoverable parse error: */
454 return result
; /* ignore this instruction */
457 if (mref
) { /* find ] at the end */
459 error (ERR_NONFATAL
, "parser: expecting ]");
460 do { /* error recovery again */
462 } while (i
!= 0 && i
!= ',');
463 } else /* we got the required ] */
465 } else { /* immediate operand */
466 if (i
!= 0 && i
!= ',' && i
!= ':') {
467 error (ERR_NONFATAL
, "comma or end of line expected");
468 do { /* error recovery */
470 } while (i
!= 0 && i
!= ',');
471 } else if (i
== ':') {
472 result
->oprs
[operand
].type
|= COLON
;
476 /* now convert the exprs returned from evaluate() into operand
479 if (mref
) { /* it's a memory reference */
481 int b
, i
, s
; /* basereg, indexreg, scale */
484 if (seg
) { /* segment override */
485 if (seg
[1].type
!=0 || seg
->value
!=1 ||
486 REG_SREG
& ~reg_flags
[seg
->type
])
487 error (ERR_NONFATAL
, "invalid segment override");
488 else if (result
->nprefix
== MAXPREFIX
)
490 "instruction has more than %d prefixes",
493 result
->prefixes
[result
->nprefix
++] = seg
->type
;
496 b
= i
= -1, o
= s
= 0;
498 if (e
->type
< EXPR_SIMPLE
) { /* this bit's a register */
499 if (e
->value
== 1) /* in fact it can be basereg */
501 else /* no, it has to be indexreg */
502 i
= e
->type
, s
= e
->value
;
505 if (e
->type
&& e
->type
< EXPR_SIMPLE
) {/* it's a second register */
506 if (e
->value
!= 1) { /* it has to be indexreg */
507 if (i
!= -1) { /* but it can't be */
508 error(ERR_NONFATAL
, "invalid effective address");
512 i
= e
->type
, s
= e
->value
;
513 } else { /* it can be basereg */
514 if (b
!= -1) /* or can it? */
521 if (e
->type
!= 0) { /* is there an offset? */
522 if (e
->type
< EXPR_SIMPLE
) {/* in fact, is there an error? */
523 error (ERR_NONFATAL
, "invalid effective address");
527 if (e
->type
== EXPR_SIMPLE
) {
531 if (e
->type
== EXPR_WRT
) {
532 result
->oprs
[operand
].wrt
= e
->value
;
535 result
->oprs
[operand
].wrt
= NO_SEG
;
537 * Look for a segment base type.
539 if (e
->type
&& e
->type
< EXPR_SEGBASE
) {
540 error (ERR_NONFATAL
, "invalid effective address");
544 while (e
->type
&& e
->value
== 0)
546 if (e
->type
&& e
->value
!= 1) {
547 error (ERR_NONFATAL
, "invalid effective address");
552 result
->oprs
[operand
].segment
= e
->type
-EXPR_SEGBASE
;
555 result
->oprs
[operand
].segment
= NO_SEG
;
556 while (e
->type
&& e
->value
== 0)
559 error (ERR_NONFATAL
, "invalid effective address");
566 result
->oprs
[operand
].wrt
= NO_SEG
;
567 result
->oprs
[operand
].segment
= NO_SEG
;
570 if (e
->type
!= 0) { /* there'd better be nothing left! */
571 error (ERR_NONFATAL
, "invalid effective address");
576 result
->oprs
[operand
].type
|= MEMORY
;
577 if (b
==-1 && (i
==-1 || s
==0))
578 result
->oprs
[operand
].type
|= MEM_OFFS
;
579 result
->oprs
[operand
].basereg
= b
;
580 result
->oprs
[operand
].indexreg
= i
;
581 result
->oprs
[operand
].scale
= s
;
582 result
->oprs
[operand
].offset
= o
;
583 } else { /* it's not a memory reference */
584 if (is_reloc(value
)) { /* it's immediate */
585 result
->oprs
[operand
].type
|= IMMEDIATE
;
586 result
->oprs
[operand
].offset
= reloc_value(value
);
587 result
->oprs
[operand
].segment
= reloc_seg(value
);
588 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
589 if (is_simple(value
) && reloc_value(value
)==1)
590 result
->oprs
[operand
].type
|= UNITY
;
591 } else { /* it's a register */
592 if (value
->type
>=EXPR_SIMPLE
|| value
->value
!=1) {
593 error (ERR_NONFATAL
, "invalid operand type");
597 /* clear overrides, except TO which applies to FPU regs */
598 result
->oprs
[operand
].type
&= TO
;
599 result
->oprs
[operand
].type
|= REGISTER
;
600 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
601 result
->oprs
[operand
].basereg
= value
->type
;
606 result
->operands
= operand
; /* set operand count */
608 while (operand
<3) /* clear remaining operands */
609 result
->oprs
[operand
++].type
= 0;
612 * Transform RESW, RESD, RESQ, REST into RESB.
614 switch (result
->opcode
) {
615 case I_RESW
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=2; break;
616 case I_RESD
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=4; break;
617 case I_RESQ
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=8; break;
618 case I_REST
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=10; break;
624 static int is_comma_next (void) {
628 while (isspace(*p
)) p
++;
629 return (*p
== ',' || *p
== ';' || !*p
);
633 * This tokeniser routine has only one side effect, that of
634 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
638 static int nexttoken (void) {
639 char ourcopy
[256], *r
, *s
;
641 while (isspace(*bufptr
)) bufptr
++;
642 if (!*bufptr
) return 0;
644 /* we have a token; either an id, a number or a char */
645 if (isidstart(*bufptr
) ||
646 (*bufptr
== '$' && isidstart(bufptr
[1]))) {
647 /* now we've got an identifier */
651 if (*bufptr
== '$') {
656 tokval
.t_charptr
= q
;
658 while (isidchar(*bufptr
)) *q
++ = *bufptr
++;
660 for (s
=tokval
.t_charptr
, r
=ourcopy
; *s
; s
++)
664 return TOKEN_ID
; /* bypass all other checks */
665 /* right, so we have an identifier sitting in temp storage. now,
666 * is it actually a register or instruction name, or what? */
667 if ((tokval
.t_integer
=bsi(ourcopy
, reg_names
,
668 elements(reg_names
)))>=0)
670 if ((tokval
.t_integer
=bsi(ourcopy
, insn_names
,
671 elements(insn_names
)))>=0)
673 for (i
=0; i
<elements(icn
); i
++)
674 if (!strncmp(ourcopy
, icn
[i
], strlen(icn
[i
]))) {
675 char *p
= ourcopy
+ strlen(icn
[i
]);
676 tokval
.t_integer
= ico
[i
];
677 if ((tokval
.t_inttwo
=bsi(p
, conditions
,
678 elements(conditions
)))>=0)
681 if ((tokval
.t_integer
=bsi(ourcopy
, prefix_names
,
682 elements(prefix_names
)))>=0) {
683 tokval
.t_integer
+= PREFIX_ENUM_START
;
686 if ((tokval
.t_integer
=bsi(ourcopy
, special_names
,
687 elements(special_names
)))>=0)
688 return TOKEN_SPECIAL
;
689 if (!strcmp(ourcopy
, "seg"))
691 if (!strcmp(ourcopy
, "wrt"))
694 } else if (*bufptr
== '$' && !isnumchar(bufptr
[1])) {
696 * It's a $ sign with no following hex number; this must
697 * mean it's a Here token ($), evaluating to the current
698 * assembly location, or a Base token ($$), evaluating to
699 * the base of the current segment.
702 if (*bufptr
== '$') {
707 } else if (isnumstart(*bufptr
)) { /* now we've got a number */
712 while (isnumchar(*bufptr
)) {
715 if (*bufptr
== '.') {
717 * a floating point constant
720 while (isnumchar(*bufptr
)) {
724 tokval
.t_charptr
= r
;
728 tokval
.t_integer
= readnum(r
, &rn_error
);
730 return TOKEN_ERRNUM
; /* some malformation occurred */
731 tokval
.t_charptr
= NULL
;
733 } else if (*bufptr
== '\'' || *bufptr
== '"') {/* a char constant */
734 char quote
= *bufptr
++, *r
;
735 r
= tokval
.t_charptr
= bufptr
;
736 while (*bufptr
&& *bufptr
!= quote
) bufptr
++;
737 tokval
.t_inttwo
= bufptr
- r
; /* store full version */
739 return TOKEN_ERRNUM
; /* unmatched quotes */
740 tokval
.t_integer
= 0;
741 r
= bufptr
++; /* skip over final quote */
742 while (quote
!= *--r
) {
743 tokval
.t_integer
= (tokval
.t_integer
<<8) + (unsigned char) *r
;
746 } else if (*bufptr
== ';') { /* a comment has happened - stay */
748 } else if ((*bufptr
== '>' || *bufptr
== '<' ||
749 *bufptr
== '/' || *bufptr
== '%') && bufptr
[1] == *bufptr
) {
751 return (bufptr
[-2] == '>' ? TOKEN_SHR
:
752 bufptr
[-2] == '<' ? TOKEN_SHL
:
753 bufptr
[-2] == '/' ? TOKEN_SDIV
:
755 } else /* just an ordinary char */
756 return (unsigned char) (*bufptr
++);
759 /* return index of "string" in "array", or -1 if no match. */
760 static int bsi (char *string
, char **array
, int size
) {
761 int i
= -1, j
= size
; /* always, i < index < j */
764 int l
= strcmp(string
, array
[k
]);
765 if (l
<0) /* it's in the first half */
767 else if (l
>0) /* it's in the second half */
769 else /* we've got it :) */
772 return -1; /* we haven't got it :( */
775 void cleanup_insn (insn
*i
) {
780 i
->eops
= i
->eops
->next
;
785 /* ------------- Evaluator begins here ------------------ */
787 static expr exprtempstorage
[1024], *tempptr
; /* store exprs in here */
790 * Add two vector datatypes. We have some bizarre behaviour on far-
791 * absolute segment types: we preserve them during addition _only_
792 * if one of the segments is a truly pure scalar.
794 static expr
*add_vectors(expr
*p
, expr
*q
) {
798 preserve
= is_really_simple(p
) || is_really_simple(q
);
800 while (p
->type
&& q
->type
&&
801 p
->type
< EXPR_SEGBASE
+SEG_ABS
&&
802 q
->type
< EXPR_SEGBASE
+SEG_ABS
)
803 if (p
->type
> q
->type
) {
804 tempptr
->type
= q
->type
;
805 tempptr
->value
= q
->value
;
807 } else if (p
->type
< q
->type
) {
808 tempptr
->type
= p
->type
;
809 tempptr
->value
= p
->value
;
811 } else { /* *p and *q have same type */
812 tempptr
->type
= p
->type
;
813 tempptr
->value
= p
->value
+ q
->value
;
817 (preserve
|| p
->type
< EXPR_SEGBASE
+SEG_ABS
)) {
818 tempptr
->type
= p
->type
;
819 tempptr
->value
= p
->value
;
823 (preserve
|| q
->type
< EXPR_SEGBASE
+SEG_ABS
)) {
824 tempptr
->type
= q
->type
;
825 tempptr
->value
= q
->value
;
828 (tempptr
++)->type
= 0;
834 * Multiply a vector by a scalar. Strip far-absolute segment part
837 static expr
*scalar_mult(expr
*vect
, long scalar
) {
840 while (p
->type
&& p
->type
< EXPR_SEGBASE
+SEG_ABS
) {
841 p
->value
= scalar
* (p
->value
);
849 static expr
*scalarvect (long scalar
) {
851 tempptr
->type
= EXPR_SIMPLE
;
852 tempptr
->value
= scalar
;
860 * Return TRUE if the argument is a simple scalar. (Or a far-
861 * absolute, which counts.)
863 static int is_simple (expr
*vect
) {
864 while (vect
->type
&& !vect
->value
)
868 if (vect
->type
!= EXPR_SIMPLE
)
872 } while (vect
->type
&& !vect
->value
);
873 if (vect
->type
&& vect
->type
< EXPR_SEGBASE
+SEG_ABS
) return 0;
878 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
881 static int is_really_simple (expr
*vect
) {
882 while (vect
->type
&& !vect
->value
)
886 if (vect
->type
!= EXPR_SIMPLE
)
890 } while (vect
->type
&& !vect
->value
);
891 if (vect
->type
) return 0;
896 * Return TRUE if the argument is relocatable (i.e. a simple
897 * scalar, plus at most one segment-base, plus possibly a WRT).
899 static int is_reloc (expr
*vect
) {
900 while (vect
->type
&& !vect
->value
)
904 if (vect
->type
< EXPR_SIMPLE
)
906 if (vect
->type
== EXPR_SIMPLE
) {
909 } while (vect
->type
&& !vect
->value
);
915 } while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
));
922 * Return the scalar part of a relocatable vector. (Including
923 * simple scalar vectors - those qualify as relocatable.)
925 static long reloc_value (expr
*vect
) {
926 while (vect
->type
&& !vect
->value
)
928 if (!vect
->type
) return 0;
929 if (vect
->type
== EXPR_SIMPLE
)
936 * Return the segment number of a relocatable vector, or NO_SEG for
939 static long reloc_seg (expr
*vect
) {
940 while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
))
942 if (vect
->type
== EXPR_SIMPLE
) {
945 } while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
));
950 return vect
->type
- EXPR_SEGBASE
;
954 * Return the WRT segment number of a relocatable vector, or NO_SEG
955 * if no WRT part is present.
957 static long reloc_wrt (expr
*vect
) {
958 while (vect
->type
&& vect
->type
< EXPR_WRT
)
960 if (vect
->type
== EXPR_WRT
) {
966 static void eval_reset(void) {
967 tempptr
= exprtempstorage
; /* initialise temporary storage */
971 * The SEG operator: calculate the segment part of a relocatable
972 * value. Return NULL, as usual, if an error occurs. Report the
975 static expr
*segment_part (expr
*e
) {
979 error(ERR_NONFATAL
, "cannot apply SEG to a non-relocatable value");
985 error(ERR_NONFATAL
, "cannot apply SEG to a non-relocatable value");
987 } else if (seg
& SEG_ABS
)
988 return scalarvect(seg
& ~SEG_ABS
);
992 f
->type
= EXPR_SEGBASE
+outfmt
->segbase(seg
+1);
999 * Recursive-descent parser. Called with a single boolean operand,
1000 * which is TRUE if the evaluation is critical (i.e. unresolved
1001 * symbols are an error condition). Must update the global `i' to
1002 * reflect the token after the parsed string. May return NULL.
1004 * evaluate() should report its own errors: on return it is assumed
1005 * that if NULL has been returned, the error has already been
1010 * Grammar parsed is:
1012 * expr : expr0 [ WRT expr6 ]
1013 * expr0 : expr1 [ {|} expr1]
1014 * expr1 : expr2 [ {^} expr2]
1015 * expr2 : expr3 [ {&} expr3]
1016 * expr3 : expr4 [ {<<,>>} expr4...]
1017 * expr4 : expr5 [ {+,-} expr5...]
1018 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
1019 * expr6 : { ~,+,-,SEG } expr6
1026 static expr
*expr0(int), *expr1(int), *expr2(int), *expr3(int);
1027 static expr
*expr4(int), *expr5(int), *expr6(int);
1029 static expr
*expr0(int critical
) {
1032 e
= expr1(critical
);
1037 f
= expr1(critical
);
1040 if (!is_simple(e
) || !is_simple(f
)) {
1041 error(ERR_NONFATAL
, "`|' operator may only be applied to"
1044 e
= scalarvect (reloc_value(e
) | reloc_value(f
));
1049 static expr
*expr1(int critical
) {
1052 e
= expr2(critical
);
1057 f
= expr2(critical
);
1060 if (!is_simple(e
) || !is_simple(f
)) {
1061 error(ERR_NONFATAL
, "`^' operator may only be applied to"
1064 e
= scalarvect (reloc_value(e
) ^ reloc_value(f
));
1069 static expr
*expr2(int critical
) {
1072 e
= expr3(critical
);
1077 f
= expr3(critical
);
1080 if (!is_simple(e
) || !is_simple(f
)) {
1081 error(ERR_NONFATAL
, "`&' operator may only be applied to"
1084 e
= scalarvect (reloc_value(e
) & reloc_value(f
));
1089 static expr
*expr3(int critical
) {
1092 e
= expr4(critical
);
1095 while (i
== TOKEN_SHL
|| i
== TOKEN_SHR
) {
1098 f
= expr4(critical
);
1101 if (!is_simple(e
) || !is_simple(f
)) {
1102 error(ERR_NONFATAL
, "shift operator may only be applied to"
1107 e
= scalarvect (reloc_value(e
) << reloc_value(f
));
1110 e
= scalarvect (((unsigned long)reloc_value(e
)) >>
1118 static expr
*expr4(int critical
) {
1121 e
= expr5(critical
);
1124 while (i
== '+' || i
== '-') {
1127 f
= expr5(critical
);
1132 e
= add_vectors (e
, f
);
1135 e
= add_vectors (e
, scalar_mult(f
, -1L));
1142 static expr
*expr5(int critical
) {
1145 e
= expr6(critical
);
1148 while (i
== '*' || i
== '/' || i
== '*' ||
1149 i
== TOKEN_SDIV
|| i
== TOKEN_SMOD
) {
1152 f
= expr6(critical
);
1155 if (j
!= '*' && (!is_simple(e
) || !is_simple(f
))) {
1156 error(ERR_NONFATAL
, "division operator may only be applied to"
1160 if (j
!= '*' && reloc_value(f
) == 0) {
1161 error(ERR_NONFATAL
, "division by zero");
1167 e
= scalar_mult (f
, reloc_value(e
));
1168 else if (is_simple(f
))
1169 e
= scalar_mult (e
, reloc_value(f
));
1171 error(ERR_NONFATAL
, "unable to multiply two "
1172 "non-scalar objects");
1177 e
= scalarvect (((unsigned long)reloc_value(e
)) /
1178 ((unsigned long)reloc_value(f
)));
1181 e
= scalarvect (((unsigned long)reloc_value(e
)) %
1182 ((unsigned long)reloc_value(f
)));
1185 e
= scalarvect (((signed long)reloc_value(e
)) /
1186 ((signed long)reloc_value(f
)));
1189 e
= scalarvect (((signed long)reloc_value(e
)) %
1190 ((signed long)reloc_value(f
)));
1197 static expr
*expr6(int critical
) {
1199 long label_seg
, label_ofs
;
1203 e
= expr6(critical
);
1206 return scalar_mult (e
, -1L);
1207 } else if (i
== '+') {
1209 return expr6(critical
);
1210 } else if (i
== '~') {
1212 e
= expr6(critical
);
1215 if (!is_simple(e
)) {
1216 error(ERR_NONFATAL
, "`~' operator may only be applied to"
1220 return scalarvect(~reloc_value(e
));
1221 } else if (i
== TOKEN_SEG
) {
1223 e
= expr6(critical
);
1226 return segment_part(e
);
1227 } else if (i
== '(') {
1229 e
= expr0(critical
);
1233 error(ERR_NONFATAL
, "expecting `)'");
1238 } else if (i
== TOKEN_NUM
|| i
== TOKEN_REG
|| i
== TOKEN_ID
||
1239 i
== TOKEN_HERE
|| i
== TOKEN_BASE
) {
1243 e
->type
= EXPR_SIMPLE
;
1244 e
->value
= tokval
.t_integer
;
1247 e
->type
= tokval
.t_integer
;
1254 * Since the whole line is parsed before the label it
1255 * defines is given to the label manager, we have
1256 * problems with lines such as
1258 * end: TIMES 512-(end-start) DB 0
1260 * where `end' is not known on pass one, despite not
1261 * really being a forward reference, and due to
1262 * criticality it is _needed_. Hence we check our label
1263 * against the currently defined one, and do our own
1264 * resolution of it if we have to.
1266 if (i
== TOKEN_BASE
) {
1269 } else if (i
== TOKEN_HERE
|| !strcmp(tokval
.t_charptr
, label
)) {
1272 } else if (!labelfunc(tokval
.t_charptr
, &label_seg
, &label_ofs
)) {
1273 if (critical
== 2) {
1274 error (ERR_NONFATAL
, "symbol `%s' undefined",
1277 } else if (critical
== 1) {
1278 error (ERR_NONFATAL
, "symbol `%s' not defined before use",
1287 e
->type
= EXPR_SIMPLE
;
1288 e
->value
= label_ofs
;
1289 if (label_seg
!=NO_SEG
) {
1291 tempptr
->type
= EXPR_SEGBASE
+ label_seg
;
1302 error(ERR_NONFATAL
, "expression syntax error");
1307 static expr
*evaluate (int critical
) {
1311 e
= expr0 (critical
);
1315 if (i
== TOKEN_WRT
) {
1317 error(ERR_NONFATAL
, "invalid left-hand operand to WRT");
1320 i
= nexttoken(); /* eat the WRT */
1321 f
= expr6 (critical
);
1325 e
= scalar_mult (e
, 1L); /* strip far-absolute segment part */
1327 expr
*g
= tempptr
++;
1328 tempptr
++->type
= 0;
1331 error(ERR_NONFATAL
, "invalid right-hand operand to WRT");
1334 g
->value
= reloc_seg(f
);
1335 if (g
->value
== NO_SEG
)
1336 g
->value
= reloc_value(f
) | SEG_ABS
;
1337 else if (!(g
->value
& SEG_ABS
) && !(g
->value
% 2) && critical
) {
1338 error(ERR_NONFATAL
, "invalid right-hand operand to WRT");
1341 e
= add_vectors (e
, g
);