1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
23 extern int in_abs_seg
; /* ABSOLUTE segment flag */
24 extern long abs_seg
; /* ABSOLUTE segment */
25 extern long abs_offset
; /* ABSOLUTE segment offset */
27 #include "regflags.c" /* List of register flags */
29 enum { /* special tokens */
30 S_BYTE
, S_DWORD
, S_FAR
, S_LONG
, S_NEAR
, S_NOSPLIT
, S_QWORD
,
31 S_SHORT
, S_STRICT
, S_TO
, S_TWORD
, S_WORD
34 static int is_comma_next(void);
37 static struct tokenval tokval
;
39 static struct ofmt
*outfmt
; /* Structure of addresses of output routines */
40 static loc_t
*location
; /* Pointer to current line's segment,offset */
42 void parser_global_info(struct ofmt
*output
, loc_t
* locp
)
48 insn
*parse_line(int pass
, char *buffer
, insn
* result
,
49 efunc errfunc
, evalfunc evaluate
, ldfunc ldef
)
53 struct eval_hints hints
;
55 result
->forw_ref
= FALSE
;
59 stdscan_bufptr
= buffer
;
60 i
= stdscan(NULL
, &tokval
);
62 result
->label
= NULL
; /* Assume no label */
63 result
->eops
= NULL
; /* must do this, whatever happens */
64 result
->operands
= 0; /* must initialise this */
66 if (i
== 0) { /* blank line - ignore */
67 result
->opcode
= -1; /* and no instruction either */
70 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
71 (i
!= TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
72 error(ERR_NONFATAL
, "label or instruction expected"
78 if (i
== TOKEN_ID
) { /* there's a label here */
79 result
->label
= tokval
.t_charptr
;
80 i
= stdscan(NULL
, &tokval
);
81 if (i
== ':') { /* skip over the optional colon */
82 i
= stdscan(NULL
, &tokval
);
84 error(ERR_WARNING
| ERR_WARN_OL
| ERR_PASS1
,
85 "label alone on a line without a colon might be in error");
87 if (i
!= TOKEN_INSN
|| tokval
.t_integer
!= I_EQU
) {
89 * FIXME: location->segment could be NO_SEG, in which case
90 * it is possible we should be passing 'abs_seg'. Look into this.
91 * Work out whether that is *really* what we should be doing.
92 * Generally fix things. I think this is right as it is, but
93 * am still not certain.
95 ldef(result
->label
, in_abs_seg
? abs_seg
: location
->segment
,
96 location
->offset
, NULL
, TRUE
, FALSE
, outfmt
, errfunc
);
101 result
->opcode
= -1; /* this line contains just a label */
108 while (i
== TOKEN_PREFIX
||
109 (i
== TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
])))
112 * Handle special case: the TIMES prefix.
114 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
117 i
= stdscan(NULL
, &tokval
);
119 evaluate(stdscan
, NULL
, &tokval
, NULL
, pass0
, error
, NULL
);
121 if (!value
) { /* but, error in evaluator */
122 result
->opcode
= -1; /* unrecoverable parse error: */
123 return result
; /* ignore this instruction */
125 if (!is_simple(value
)) {
127 "non-constant argument supplied to TIMES");
130 result
->times
= value
->value
;
131 if (value
->value
< 0) {
132 error(ERR_NONFATAL
, "TIMES value %d is negative",
138 if (result
->nprefix
== MAXPREFIX
)
140 "instruction has more than %d prefixes", MAXPREFIX
);
142 result
->prefixes
[result
->nprefix
++] = tokval
.t_integer
;
143 i
= stdscan(NULL
, &tokval
);
147 if (i
!= TOKEN_INSN
) {
148 if (result
->nprefix
> 0 && i
== 0) {
150 * Instruction prefixes are present, but no actual
151 * instruction. This is allowed: at this point we
152 * invent a notional instruction of RESB 0.
154 result
->opcode
= I_RESB
;
155 result
->operands
= 1;
156 result
->oprs
[0].type
= IMMEDIATE
;
157 result
->oprs
[0].offset
= 0L;
158 result
->oprs
[0].segment
= result
->oprs
[0].wrt
= NO_SEG
;
161 error(ERR_NONFATAL
, "parser: instruction expected");
167 result
->opcode
= tokval
.t_integer
;
168 result
->condition
= tokval
.t_inttwo
;
171 * RESB, RESW and RESD cannot be satisfied with incorrectly
172 * evaluated operands, since the correct values _must_ be known
173 * on the first pass. Hence, even in pass one, we set the
174 * `critical' flag on calling evaluate(), so that it will bomb
175 * out on undefined symbols. Nasty, but there's nothing we can
178 * For the moment, EQU has the same difficulty, so we'll
181 if (result
->opcode
== I_RESB
|| result
->opcode
== I_RESW
|| result
->opcode
== I_RESD
|| result
->opcode
== I_RESQ
|| result
->opcode
== I_REST
|| result
->opcode
== I_EQU
|| result
->opcode
== I_INCBIN
) { /* fbk */
184 critical
= (pass
== 2 ? 2 : 0);
186 if (result
->opcode
== I_DB
||
187 result
->opcode
== I_DW
||
188 result
->opcode
== I_DD
||
189 result
->opcode
== I_DQ
||
190 result
->opcode
== I_DT
|| result
->opcode
== I_INCBIN
) {
191 extop
*eop
, **tail
= &result
->eops
, **fixptr
;
194 result
->eops_float
= FALSE
;
197 * Begin to read the DB/DW/DD/DQ/DT/INCBIN operands.
200 i
= stdscan(NULL
, &tokval
);
204 eop
= *tail
= nasm_malloc(sizeof(extop
));
207 eop
->type
= EOT_NOTHING
;
210 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
211 eop
->type
= EOT_DB_STRING
;
212 eop
->stringval
= tokval
.t_charptr
;
213 eop
->stringlen
= tokval
.t_inttwo
;
214 i
= stdscan(NULL
, &tokval
); /* eat the comma */
218 if ((i
== TOKEN_FLOAT
&& is_comma_next()) || i
== '-') {
222 char *save
= stdscan_bufptr
;
223 i
= stdscan(NULL
, &tokval
);
225 if (i
!= TOKEN_FLOAT
|| !is_comma_next()) {
226 stdscan_bufptr
= save
;
227 i
= tokval
.t_type
= '-';
231 if (i
== TOKEN_FLOAT
) {
232 eop
->type
= EOT_DB_STRING
;
233 result
->eops_float
= TRUE
;
234 if (result
->opcode
== I_DD
)
236 else if (result
->opcode
== I_DQ
)
238 else if (result
->opcode
== I_DT
)
241 error(ERR_NONFATAL
, "floating-point constant"
242 " encountered in `D%c' instruction",
243 result
->opcode
== I_DW
? 'W' : 'B');
245 * fix suggested by Pedro Gimeno... original line
247 * eop->type = EOT_NOTHING;
252 nasm_realloc(eop
, sizeof(extop
) + eop
->stringlen
);
255 eop
->stringval
= (char *)eop
+ sizeof(extop
);
256 if (eop
->stringlen
< 4 ||
257 !float_const(tokval
.t_charptr
, sign
,
258 (unsigned char *)eop
->stringval
,
259 eop
->stringlen
, error
))
260 eop
->type
= EOT_NOTHING
;
261 i
= stdscan(NULL
, &tokval
); /* eat the comma */
269 value
= evaluate(stdscan
, NULL
, &tokval
, NULL
,
270 critical
, error
, NULL
);
272 if (!value
) { /* error in evaluator */
273 result
->opcode
= -1; /* unrecoverable parse error: */
274 return result
; /* ignore this instruction */
276 if (is_unknown(value
)) {
277 eop
->type
= EOT_DB_NUMBER
;
278 eop
->offset
= 0; /* doesn't matter what we put */
279 eop
->segment
= eop
->wrt
= NO_SEG
; /* likewise */
280 } else if (is_reloc(value
)) {
281 eop
->type
= EOT_DB_NUMBER
;
282 eop
->offset
= reloc_value(value
);
283 eop
->segment
= reloc_seg(value
);
284 eop
->wrt
= reloc_wrt(value
);
287 "operand %d: expression is not simple"
288 " or relocatable", oper_num
);
293 * We're about to call stdscan(), which will eat the
294 * comma that we're currently sitting on between
295 * arguments. However, we'd better check first that it
298 if (i
== 0) /* also could be EOL */
301 error(ERR_NONFATAL
, "comma expected after operand %d",
303 result
->opcode
= -1; /* unrecoverable parse error: */
304 return result
; /* ignore this instruction */
308 if (result
->opcode
== I_INCBIN
) {
310 * Correct syntax for INCBIN is that there should be
311 * one string operand, followed by one or two numeric
314 if (!result
->eops
|| result
->eops
->type
!= EOT_DB_STRING
)
315 error(ERR_NONFATAL
, "`incbin' expects a file name");
316 else if (result
->eops
->next
&&
317 result
->eops
->next
->type
!= EOT_DB_NUMBER
)
318 error(ERR_NONFATAL
, "`incbin': second parameter is",
320 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
321 result
->eops
->next
->next
->type
!= EOT_DB_NUMBER
)
322 error(ERR_NONFATAL
, "`incbin': third parameter is",
324 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
325 result
->eops
->next
->next
->next
)
327 "`incbin': more than three parameters");
331 * If we reach here, one of the above errors happened.
332 * Throw the instruction away.
336 } else /* DB ... */ if (oper_num
== 0)
337 error(ERR_WARNING
| ERR_PASS1
,
338 "no operand for data declaration");
340 result
->operands
= oper_num
;
345 /* right. Now we begin to parse the operands. There may be up to three
346 * of these, separated by commas, and terminated by a zero token. */
348 for (operand
= 0; operand
< 3; operand
++) {
349 expr
*value
; /* used most of the time */
350 int mref
; /* is this going to be a memory ref? */
351 int bracket
; /* is it a [] mref, or a & mref? */
354 result
->oprs
[operand
].addr_size
= 0; /* have to zero this whatever */
355 result
->oprs
[operand
].eaflags
= 0; /* and this */
356 result
->oprs
[operand
].opflags
= 0;
358 i
= stdscan(NULL
, &tokval
);
360 break; /* end of operands: get out of here */
361 result
->oprs
[operand
].type
= 0; /* so far, no override */
362 while (i
== TOKEN_SPECIAL
) { /* size specifiers */
363 switch ((int)tokval
.t_integer
) {
365 if (!setsize
) /* we want to use only the first */
366 result
->oprs
[operand
].type
|= BITS8
;
371 result
->oprs
[operand
].type
|= BITS16
;
377 result
->oprs
[operand
].type
|= BITS32
;
382 result
->oprs
[operand
].type
|= BITS64
;
387 result
->oprs
[operand
].type
|= BITS80
;
391 result
->oprs
[operand
].type
|= TO
;
394 result
->oprs
[operand
].type
|= STRICT
;
397 result
->oprs
[operand
].type
|= FAR
;
400 result
->oprs
[operand
].type
|= NEAR
;
403 result
->oprs
[operand
].type
|= SHORT
;
406 error(ERR_NONFATAL
, "invalid operand size specification");
408 i
= stdscan(NULL
, &tokval
);
411 if (i
== '[' || i
== '&') { /* memory reference */
413 bracket
= (i
== '[');
414 i
= stdscan(NULL
, &tokval
);
415 if (i
== TOKEN_SPECIAL
) { /* check for address size override */
416 if (tasm_compatible_mode
) {
417 switch ((int)tokval
.t_integer
) {
418 /* For TASM compatibility a size override inside the
419 * brackets changes the size of the operand, not the
420 * address type of the operand as it does in standard
421 * NASM syntax. Hence:
423 * mov eax,[DWORD val]
425 * is valid syntax in TASM compatibility mode. Note that
426 * you lose the ability to override the default address
427 * type for the instruction, but we never use anything
428 * but 32-bit flat model addressing in our code.
431 result
->oprs
[operand
].type
|= BITS8
;
434 result
->oprs
[operand
].type
|= BITS16
;
438 result
->oprs
[operand
].type
|= BITS32
;
441 result
->oprs
[operand
].type
|= BITS64
;
444 result
->oprs
[operand
].type
|= BITS80
;
448 "invalid operand size specification");
451 /* Standard NASM compatible syntax */
452 switch ((int)tokval
.t_integer
) {
454 result
->oprs
[operand
].eaflags
|= EAF_TIMESTWO
;
457 result
->oprs
[operand
].eaflags
|= EAF_BYTEOFFS
;
460 result
->oprs
[operand
].addr_size
= 16;
461 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
465 result
->oprs
[operand
].addr_size
= 32;
466 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
469 error(ERR_NONFATAL
, "invalid size specification in"
470 " effective address");
473 i
= stdscan(NULL
, &tokval
);
475 } else { /* immediate operand, or register */
477 bracket
= FALSE
; /* placate optimisers */
480 if ((result
->oprs
[operand
].type
& FAR
) && !mref
&&
481 result
->opcode
!= I_JMP
&& result
->opcode
!= I_CALL
) {
482 error(ERR_NONFATAL
, "invalid use of FAR operand specifier");
485 value
= evaluate(stdscan
, NULL
, &tokval
,
486 &result
->oprs
[operand
].opflags
,
487 critical
, error
, &hints
);
489 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
490 result
->forw_ref
= TRUE
;
492 if (!value
) { /* error in evaluator */
493 result
->opcode
= -1; /* unrecoverable parse error: */
494 return result
; /* ignore this instruction */
496 if (i
== ':' && mref
) { /* it was seg:offset */
498 * Process the segment override.
500 if (value
[1].type
!= 0 || value
->value
!= 1 ||
501 REG_SREG
& ~reg_flags
[value
->type
])
502 error(ERR_NONFATAL
, "invalid segment override");
503 else if (result
->nprefix
== MAXPREFIX
)
505 "instruction has more than %d prefixes", MAXPREFIX
);
507 result
->prefixes
[result
->nprefix
++] = value
->type
;
509 i
= stdscan(NULL
, &tokval
); /* then skip the colon */
510 if (i
== TOKEN_SPECIAL
) { /* another check for size override */
511 switch ((int)tokval
.t_integer
) {
513 result
->oprs
[operand
].addr_size
= 16;
517 result
->oprs
[operand
].addr_size
= 32;
520 error(ERR_NONFATAL
, "invalid size specification in"
521 " effective address");
523 i
= stdscan(NULL
, &tokval
);
525 value
= evaluate(stdscan
, NULL
, &tokval
,
526 &result
->oprs
[operand
].opflags
,
527 critical
, error
, &hints
);
529 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
530 result
->forw_ref
= TRUE
;
532 /* and get the offset */
533 if (!value
) { /* but, error in evaluator */
534 result
->opcode
= -1; /* unrecoverable parse error: */
535 return result
; /* ignore this instruction */
538 if (mref
&& bracket
) { /* find ] at the end */
540 error(ERR_NONFATAL
, "parser: expecting ]");
541 do { /* error recovery again */
542 i
= stdscan(NULL
, &tokval
);
543 } while (i
!= 0 && i
!= ',');
544 } else /* we got the required ] */
545 i
= stdscan(NULL
, &tokval
);
546 } else { /* immediate operand */
547 if (i
!= 0 && i
!= ',' && i
!= ':') {
548 error(ERR_NONFATAL
, "comma or end of line expected");
549 do { /* error recovery */
550 i
= stdscan(NULL
, &tokval
);
551 } while (i
!= 0 && i
!= ',');
552 } else if (i
== ':') {
553 result
->oprs
[operand
].type
|= COLON
;
557 /* now convert the exprs returned from evaluate() into operand
560 if (mref
) { /* it's a memory reference */
562 int b
, i
, s
; /* basereg, indexreg, scale */
565 b
= i
= -1, o
= s
= 0;
566 result
->oprs
[operand
].hintbase
= hints
.base
;
567 result
->oprs
[operand
].hinttype
= hints
.type
;
569 if (e
->type
&& e
->type
<= EXPR_REG_END
) { /* this bit's a register */
570 if (e
->value
== 1) /* in fact it can be basereg */
572 else /* no, it has to be indexreg */
573 i
= e
->type
, s
= e
->value
;
576 if (e
->type
&& e
->type
<= EXPR_REG_END
) { /* it's a 2nd register */
577 if (b
!= -1) /* If the first was the base, ... */
578 i
= e
->type
, s
= e
->value
; /* second has to be indexreg */
580 else if (e
->value
!= 1) { /* If both want to be index */
582 "beroset-p-592-invalid effective address");
589 if (e
->type
!= 0) { /* is there an offset? */
590 if (e
->type
<= EXPR_REG_END
) { /* in fact, is there an error? */
592 "beroset-p-603-invalid effective address");
596 if (e
->type
== EXPR_UNKNOWN
) {
597 o
= 0; /* doesn't matter what */
598 result
->oprs
[operand
].wrt
= NO_SEG
; /* nor this */
599 result
->oprs
[operand
].segment
= NO_SEG
; /* or this */
601 e
++; /* go to the end of the line */
603 if (e
->type
== EXPR_SIMPLE
) {
607 if (e
->type
== EXPR_WRT
) {
608 result
->oprs
[operand
].wrt
= e
->value
;
611 result
->oprs
[operand
].wrt
= NO_SEG
;
613 * Look for a segment base type.
615 if (e
->type
&& e
->type
< EXPR_SEGBASE
) {
617 "beroset-p-630-invalid effective address");
621 while (e
->type
&& e
->value
== 0)
623 if (e
->type
&& e
->value
!= 1) {
625 "beroset-p-637-invalid effective address");
630 result
->oprs
[operand
].segment
=
631 e
->type
- EXPR_SEGBASE
;
634 result
->oprs
[operand
].segment
= NO_SEG
;
635 while (e
->type
&& e
->value
== 0)
639 "beroset-p-650-invalid effective address");
647 result
->oprs
[operand
].wrt
= NO_SEG
;
648 result
->oprs
[operand
].segment
= NO_SEG
;
651 if (e
->type
!= 0) { /* there'd better be nothing left! */
653 "beroset-p-663-invalid effective address");
658 result
->oprs
[operand
].type
|= MEMORY
;
659 if (b
== -1 && (i
== -1 || s
== 0))
660 result
->oprs
[operand
].type
|= MEM_OFFS
;
661 result
->oprs
[operand
].basereg
= b
;
662 result
->oprs
[operand
].indexreg
= i
;
663 result
->oprs
[operand
].scale
= s
;
664 result
->oprs
[operand
].offset
= o
;
665 } else { /* it's not a memory reference */
667 if (is_just_unknown(value
)) { /* it's immediate but unknown */
668 result
->oprs
[operand
].type
|= IMMEDIATE
;
669 result
->oprs
[operand
].offset
= 0; /* don't care */
670 result
->oprs
[operand
].segment
= NO_SEG
; /* don't care again */
671 result
->oprs
[operand
].wrt
= NO_SEG
; /* still don't care */
672 } else if (is_reloc(value
)) { /* it's immediate */
673 result
->oprs
[operand
].type
|= IMMEDIATE
;
674 result
->oprs
[operand
].offset
= reloc_value(value
);
675 result
->oprs
[operand
].segment
= reloc_seg(value
);
676 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
677 if (is_simple(value
)) {
678 if (reloc_value(value
) == 1)
679 result
->oprs
[operand
].type
|= UNITY
;
680 if (optimizing
>= 0 &&
681 !(result
->oprs
[operand
].type
& STRICT
)) {
682 if (reloc_value(value
) >= -128 &&
683 reloc_value(value
) <= 127)
684 result
->oprs
[operand
].type
|= SBYTE
;
687 } else { /* it's a register */
689 if (value
->type
>= EXPR_SIMPLE
|| value
->value
!= 1) {
690 error(ERR_NONFATAL
, "invalid operand type");
696 * check that its only 1 register, not an expression...
698 for (i
= 1; value
[i
].type
; i
++)
699 if (value
[i
].value
) {
700 error(ERR_NONFATAL
, "invalid operand type");
705 /* clear overrides, except TO which applies to FPU regs */
706 if (result
->oprs
[operand
].type
& ~TO
) {
708 * we want to produce a warning iff the specified size
709 * is different from the register size
711 i
= result
->oprs
[operand
].type
& SIZE_MASK
;
715 result
->oprs
[operand
].type
&= TO
;
716 result
->oprs
[operand
].type
|= REGISTER
;
717 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
718 result
->oprs
[operand
].basereg
= value
->type
;
720 if (i
&& (result
->oprs
[operand
].type
& SIZE_MASK
) != i
)
721 error(ERR_WARNING
| ERR_PASS1
,
722 "register size specification ignored");
727 result
->operands
= operand
; /* set operand count */
729 while (operand
< 3) /* clear remaining operands */
730 result
->oprs
[operand
++].type
= 0;
733 * Transform RESW, RESD, RESQ, REST into RESB.
735 switch (result
->opcode
) {
737 result
->opcode
= I_RESB
;
738 result
->oprs
[0].offset
*= 2;
741 result
->opcode
= I_RESB
;
742 result
->oprs
[0].offset
*= 4;
745 result
->opcode
= I_RESB
;
746 result
->oprs
[0].offset
*= 8;
749 result
->opcode
= I_RESB
;
750 result
->oprs
[0].offset
*= 10;
757 static int is_comma_next(void)
764 i
= stdscan(NULL
, &tv
);
766 return (i
== ',' || i
== ';' || !i
);
769 void cleanup_insn(insn
* i
)
775 i
->eops
= i
->eops
->next
;