1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
25 extern int in_abs_seg
; /* ABSOLUTE segment flag */
26 extern int32_t abs_seg
; /* ABSOLUTE segment */
27 extern int32_t abs_offset
; /* ABSOLUTE segment offset */
29 #include "regflags.c" /* List of register flags */
31 static int is_comma_next(void);
34 static struct tokenval tokval
;
36 static struct ofmt
*outfmt
; /* Structure of addresses of output routines */
37 static loc_t
*location
; /* Pointer to current line's segment,offset */
39 void parser_global_info(struct ofmt
*output
, loc_t
* locp
)
45 insn
*parse_line(int pass
, char *buffer
, insn
* result
,
46 efunc errfunc
, evalfunc evaluate
, ldfunc ldef
)
50 struct eval_hints hints
;
52 result
->forw_ref
= FALSE
;
56 stdscan_bufptr
= buffer
;
57 i
= stdscan(NULL
, &tokval
);
59 result
->label
= NULL
; /* Assume no label */
60 result
->eops
= NULL
; /* must do this, whatever happens */
61 result
->operands
= 0; /* must initialize this */
63 if (i
== 0) { /* blank line - ignore */
64 result
->opcode
= -1; /* and no instruction either */
67 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
68 (i
!= TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
69 error(ERR_NONFATAL
, "label or instruction expected"
75 if (i
== TOKEN_ID
) { /* there's a label here */
76 result
->label
= tokval
.t_charptr
;
77 i
= stdscan(NULL
, &tokval
);
78 if (i
== ':') { /* skip over the optional colon */
79 i
= stdscan(NULL
, &tokval
);
81 error(ERR_WARNING
| ERR_WARN_OL
| ERR_PASS1
,
82 "label alone on a line without a colon might be in error");
84 if (i
!= TOKEN_INSN
|| tokval
.t_integer
!= I_EQU
) {
86 * FIXME: location->segment could be NO_SEG, in which case
87 * it is possible we should be passing 'abs_seg'. Look into this.
88 * Work out whether that is *really* what we should be doing.
89 * Generally fix things. I think this is right as it is, but
90 * am still not certain.
92 ldef(result
->label
, in_abs_seg
? abs_seg
: location
->segment
,
93 location
->offset
, NULL
, TRUE
, FALSE
, outfmt
, errfunc
);
98 result
->opcode
= -1; /* this line contains just a label */
105 while (i
== TOKEN_PREFIX
||
106 (i
== TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
])))
109 * Handle special case: the TIMES prefix.
111 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
114 i
= stdscan(NULL
, &tokval
);
116 evaluate(stdscan
, NULL
, &tokval
, NULL
, pass0
, error
, NULL
);
118 if (!value
) { /* but, error in evaluator */
119 result
->opcode
= -1; /* unrecoverable parse error: */
120 return result
; /* ignore this instruction */
122 if (!is_simple(value
)) {
124 "non-constant argument supplied to TIMES");
127 result
->times
= value
->value
;
128 if (value
->value
< 0) {
129 error(ERR_NONFATAL
, "TIMES value %d is negative",
135 if (result
->nprefix
== MAXPREFIX
)
137 "instruction has more than %d prefixes", MAXPREFIX
);
139 result
->prefixes
[result
->nprefix
++] = tokval
.t_integer
;
140 i
= stdscan(NULL
, &tokval
);
144 if (i
!= TOKEN_INSN
) {
145 if (result
->nprefix
> 0 && i
== 0) {
147 * Instruction prefixes are present, but no actual
148 * instruction. This is allowed: at this point we
149 * invent a notional instruction of RESB 0.
151 result
->opcode
= I_RESB
;
152 result
->operands
= 1;
153 result
->oprs
[0].type
= IMMEDIATE
;
154 result
->oprs
[0].offset
= 0L;
155 result
->oprs
[0].segment
= result
->oprs
[0].wrt
= NO_SEG
;
158 error(ERR_NONFATAL
, "parser: instruction expected");
164 result
->opcode
= tokval
.t_integer
;
165 result
->condition
= tokval
.t_inttwo
;
168 * RESB, RESW and RESD cannot be satisfied with incorrectly
169 * evaluated operands, since the correct values _must_ be known
170 * on the first pass. Hence, even in pass one, we set the
171 * `critical' flag on calling evaluate(), so that it will bomb
172 * out on undefined symbols. Nasty, but there's nothing we can
175 * For the moment, EQU has the same difficulty, so we'll
178 if (result
->opcode
== I_RESB
|| result
->opcode
== I_RESW
||
179 result
->opcode
== I_RESD
|| result
->opcode
== I_RESQ
||
180 result
->opcode
== I_REST
|| result
->opcode
== I_RESO
||
181 result
->opcode
== I_EQU
|| result
->opcode
== I_INCBIN
) {
184 critical
= (pass
== 2 ? 2 : 0);
186 if (result
->opcode
== I_DB
|| result
->opcode
== I_DW
||
187 result
->opcode
== I_DD
|| result
->opcode
== I_DQ
||
188 result
->opcode
== I_DT
|| result
->opcode
== I_DO
||
189 result
->opcode
== I_INCBIN
) {
190 extop
*eop
, **tail
= &result
->eops
, **fixptr
;
193 result
->eops_float
= FALSE
;
196 * Begin to read the DB/DW/DD/DQ/DT/DO/INCBIN operands.
199 i
= stdscan(NULL
, &tokval
);
203 eop
= *tail
= nasm_malloc(sizeof(extop
));
206 eop
->type
= EOT_NOTHING
;
209 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
210 eop
->type
= EOT_DB_STRING
;
211 eop
->stringval
= tokval
.t_charptr
;
212 eop
->stringlen
= tokval
.t_inttwo
;
213 i
= stdscan(NULL
, &tokval
); /* eat the comma */
217 if ((i
== TOKEN_FLOAT
&& is_comma_next())
218 || i
== '-' || i
== '+') {
221 if (i
== '+' || i
== '-') {
222 char *save
= stdscan_bufptr
;
224 sign
= (i
== '-') ? -1 : 1;
225 i
= stdscan(NULL
, &tokval
);
226 if (i
!= TOKEN_FLOAT
|| !is_comma_next()) {
227 stdscan_bufptr
= save
;
228 i
= tokval
.t_type
= token
;
232 if (i
== TOKEN_FLOAT
) {
233 eop
->type
= EOT_DB_STRING
;
234 result
->eops_float
= TRUE
;
235 switch (result
->opcode
) {
252 error(ERR_NONFATAL
, "floating-point constant"
253 " encountered in `db' instruction");
255 * fix suggested by Pedro Gimeno... original line
257 * eop->type = EOT_NOTHING;
262 eop
= nasm_realloc(eop
, sizeof(extop
) + eop
->stringlen
);
265 eop
->stringval
= (char *)eop
+ sizeof(extop
);
266 if (!eop
->stringlen
||
267 !float_const(tokval
.t_charptr
, sign
,
268 (uint8_t *)eop
->stringval
,
269 eop
->stringlen
, error
))
270 eop
->type
= EOT_NOTHING
;
271 i
= stdscan(NULL
, &tokval
); /* eat the comma */
279 value
= evaluate(stdscan
, NULL
, &tokval
, NULL
,
280 critical
, error
, NULL
);
282 if (!value
) { /* error in evaluator */
283 result
->opcode
= -1; /* unrecoverable parse error: */
284 return result
; /* ignore this instruction */
286 if (is_unknown(value
)) {
287 eop
->type
= EOT_DB_NUMBER
;
288 eop
->offset
= 0; /* doesn't matter what we put */
289 eop
->segment
= eop
->wrt
= NO_SEG
; /* likewise */
290 } else if (is_reloc(value
)) {
291 eop
->type
= EOT_DB_NUMBER
;
292 eop
->offset
= reloc_value(value
);
293 eop
->segment
= reloc_seg(value
);
294 eop
->wrt
= reloc_wrt(value
);
297 "operand %d: expression is not simple"
298 " or relocatable", oper_num
);
303 * We're about to call stdscan(), which will eat the
304 * comma that we're currently sitting on between
305 * arguments. However, we'd better check first that it
308 if (i
== 0) /* also could be EOL */
311 error(ERR_NONFATAL
, "comma expected after operand %d",
313 result
->opcode
= -1; /* unrecoverable parse error: */
314 return result
; /* ignore this instruction */
318 if (result
->opcode
== I_INCBIN
) {
320 * Correct syntax for INCBIN is that there should be
321 * one string operand, followed by one or two numeric
324 if (!result
->eops
|| result
->eops
->type
!= EOT_DB_STRING
)
325 error(ERR_NONFATAL
, "`incbin' expects a file name");
326 else if (result
->eops
->next
&&
327 result
->eops
->next
->type
!= EOT_DB_NUMBER
)
328 error(ERR_NONFATAL
, "`incbin': second parameter is",
330 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
331 result
->eops
->next
->next
->type
!= EOT_DB_NUMBER
)
332 error(ERR_NONFATAL
, "`incbin': third parameter is",
334 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
335 result
->eops
->next
->next
->next
)
337 "`incbin': more than three parameters");
341 * If we reach here, one of the above errors happened.
342 * Throw the instruction away.
346 } else /* DB ... */ if (oper_num
== 0)
347 error(ERR_WARNING
| ERR_PASS1
,
348 "no operand for data declaration");
350 result
->operands
= oper_num
;
355 /* right. Now we begin to parse the operands. There may be up to four
356 * of these, separated by commas, and terminated by a zero token. */
358 for (operand
= 0; operand
< MAX_OPERANDS
; operand
++) {
359 expr
*value
; /* used most of the time */
360 int mref
; /* is this going to be a memory ref? */
361 int bracket
; /* is it a [] mref, or a & mref? */
364 result
->oprs
[operand
].addr_size
= 0; /* have to zero this whatever */
365 result
->oprs
[operand
].eaflags
= 0; /* and this */
366 result
->oprs
[operand
].opflags
= 0;
368 i
= stdscan(NULL
, &tokval
);
370 break; /* end of operands: get out of here */
371 result
->oprs
[operand
].type
= 0; /* so far, no override */
372 while (i
== TOKEN_SPECIAL
) { /* size specifiers */
373 switch ((int)tokval
.t_integer
) {
375 if (!setsize
) /* we want to use only the first */
376 result
->oprs
[operand
].type
|= BITS8
;
381 result
->oprs
[operand
].type
|= BITS16
;
387 result
->oprs
[operand
].type
|= BITS32
;
392 result
->oprs
[operand
].type
|= BITS64
;
397 result
->oprs
[operand
].type
|= BITS80
;
402 result
->oprs
[operand
].type
|= BITS128
;
406 result
->oprs
[operand
].type
|= TO
;
409 result
->oprs
[operand
].type
|= STRICT
;
412 result
->oprs
[operand
].type
|= FAR
;
415 result
->oprs
[operand
].type
|= NEAR
;
418 result
->oprs
[operand
].type
|= SHORT
;
421 error(ERR_NONFATAL
, "invalid operand size specification");
423 i
= stdscan(NULL
, &tokval
);
426 if (i
== '[' || i
== '&') { /* memory reference */
428 bracket
= (i
== '[');
429 while ((i
= stdscan(NULL
, &tokval
)) == TOKEN_SPECIAL
) {
430 /* check for address directives */
431 if (tasm_compatible_mode
) {
432 switch ((int)tokval
.t_integer
) {
433 /* For TASM compatibility a size override inside the
434 * brackets changes the size of the operand, not the
435 * address type of the operand as it does in standard
436 * NASM syntax. Hence:
438 * mov eax,[DWORD val]
440 * is valid syntax in TASM compatibility mode. Note that
441 * you lose the ability to override the default address
442 * type for the instruction, but we never use anything
443 * but 32-bit flat model addressing in our code.
446 result
->oprs
[operand
].type
|= BITS8
;
449 result
->oprs
[operand
].type
|= BITS16
;
453 result
->oprs
[operand
].type
|= BITS32
;
456 result
->oprs
[operand
].type
|= BITS64
;
459 result
->oprs
[operand
].type
|= BITS80
;
462 result
->oprs
[operand
].type
|= BITS128
;
466 "invalid operand size specification");
469 /* Standard NASM compatible syntax */
470 switch ((int)tokval
.t_integer
) {
472 result
->oprs
[operand
].eaflags
|= EAF_TIMESTWO
;
475 result
->oprs
[operand
].eaflags
|= EAF_REL
;
478 result
->oprs
[operand
].eaflags
|= EAF_ABS
;
481 result
->oprs
[operand
].eaflags
|= EAF_BYTEOFFS
;
484 result
->oprs
[operand
].addr_size
= 16;
485 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
489 result
->oprs
[operand
].addr_size
= 32;
490 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
493 result
->oprs
[operand
].addr_size
= 64;
494 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
497 error(ERR_NONFATAL
, "invalid size specification in"
498 " effective address");
502 } else { /* immediate operand, or register */
504 bracket
= FALSE
; /* placate optimisers */
507 if ((result
->oprs
[operand
].type
& FAR
) && !mref
&&
508 result
->opcode
!= I_JMP
&& result
->opcode
!= I_CALL
) {
509 error(ERR_NONFATAL
, "invalid use of FAR operand specifier");
512 value
= evaluate(stdscan
, NULL
, &tokval
,
513 &result
->oprs
[operand
].opflags
,
514 critical
, error
, &hints
);
516 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
517 result
->forw_ref
= TRUE
;
519 if (!value
) { /* error in evaluator */
520 result
->opcode
= -1; /* unrecoverable parse error: */
521 return result
; /* ignore this instruction */
523 if (i
== ':' && mref
) { /* it was seg:offset */
525 * Process the segment override.
527 if (value
[1].type
!= 0 || value
->value
!= 1 ||
528 REG_SREG
& ~reg_flags
[value
->type
])
529 error(ERR_NONFATAL
, "invalid segment override");
530 else if (result
->nprefix
== MAXPREFIX
)
532 "instruction has more than %d prefixes", MAXPREFIX
);
534 result
->prefixes
[result
->nprefix
++] = value
->type
;
535 if (!(REG_FSGS
& ~reg_flags
[value
->type
]))
536 result
->oprs
[operand
].eaflags
|= EAF_FSGS
;
539 i
= stdscan(NULL
, &tokval
); /* then skip the colon */
540 if (i
== TOKEN_SPECIAL
) { /* another check for size override */
541 switch ((int)tokval
.t_integer
) {
543 result
->oprs
[operand
].addr_size
= 16;
547 result
->oprs
[operand
].addr_size
= 32;
550 result
->oprs
[operand
].addr_size
= 64;
553 error(ERR_NONFATAL
, "invalid size specification in"
554 " effective address");
556 i
= stdscan(NULL
, &tokval
);
558 value
= evaluate(stdscan
, NULL
, &tokval
,
559 &result
->oprs
[operand
].opflags
,
560 critical
, error
, &hints
);
562 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
563 result
->forw_ref
= TRUE
;
565 /* and get the offset */
566 if (!value
) { /* but, error in evaluator */
567 result
->opcode
= -1; /* unrecoverable parse error: */
568 return result
; /* ignore this instruction */
571 if (mref
&& bracket
) { /* find ] at the end */
573 error(ERR_NONFATAL
, "parser: expecting ]");
574 do { /* error recovery again */
575 i
= stdscan(NULL
, &tokval
);
576 } while (i
!= 0 && i
!= ',');
577 } else /* we got the required ] */
578 i
= stdscan(NULL
, &tokval
);
579 } else { /* immediate operand */
580 if (i
!= 0 && i
!= ',' && i
!= ':') {
581 error(ERR_NONFATAL
, "comma or end of line expected");
582 do { /* error recovery */
583 i
= stdscan(NULL
, &tokval
);
584 } while (i
!= 0 && i
!= ',');
585 } else if (i
== ':') {
586 result
->oprs
[operand
].type
|= COLON
;
590 /* now convert the exprs returned from evaluate() into operand
593 if (mref
) { /* it's a memory reference */
595 int b
, i
, s
; /* basereg, indexreg, scale */
596 int64_t o
; /* offset */
598 b
= i
= -1, o
= s
= 0;
599 result
->oprs
[operand
].hintbase
= hints
.base
;
600 result
->oprs
[operand
].hinttype
= hints
.type
;
602 if (e
->type
&& e
->type
<= EXPR_REG_END
) { /* this bit's a register */
603 if (e
->value
== 1) /* in fact it can be basereg */
605 else /* no, it has to be indexreg */
606 i
= e
->type
, s
= e
->value
;
609 if (e
->type
&& e
->type
<= EXPR_REG_END
) { /* it's a 2nd register */
610 if (b
!= -1) /* If the first was the base, ... */
611 i
= e
->type
, s
= e
->value
; /* second has to be indexreg */
613 else if (e
->value
!= 1) { /* If both want to be index */
615 "beroset-p-592-invalid effective address");
622 if (e
->type
!= 0) { /* is there an offset? */
623 if (e
->type
<= EXPR_REG_END
) { /* in fact, is there an error? */
625 "beroset-p-603-invalid effective address");
629 if (e
->type
== EXPR_UNKNOWN
) {
630 o
= 0; /* doesn't matter what */
631 result
->oprs
[operand
].wrt
= NO_SEG
; /* nor this */
632 result
->oprs
[operand
].segment
= NO_SEG
; /* or this */
634 e
++; /* go to the end of the line */
636 if (e
->type
== EXPR_SIMPLE
) {
640 if (e
->type
== EXPR_WRT
) {
641 result
->oprs
[operand
].wrt
= e
->value
;
644 result
->oprs
[operand
].wrt
= NO_SEG
;
646 * Look for a segment base type.
648 if (e
->type
&& e
->type
< EXPR_SEGBASE
) {
650 "beroset-p-630-invalid effective address");
654 while (e
->type
&& e
->value
== 0)
656 if (e
->type
&& e
->value
!= 1) {
658 "beroset-p-637-invalid effective address");
663 result
->oprs
[operand
].segment
=
664 e
->type
- EXPR_SEGBASE
;
667 result
->oprs
[operand
].segment
= NO_SEG
;
668 while (e
->type
&& e
->value
== 0)
672 "beroset-p-650-invalid effective address");
680 result
->oprs
[operand
].wrt
= NO_SEG
;
681 result
->oprs
[operand
].segment
= NO_SEG
;
684 if (e
->type
!= 0) { /* there'd better be nothing left! */
686 "beroset-p-663-invalid effective address");
691 /* It is memory, but it can match any r/m operand */
692 result
->oprs
[operand
].type
|= MEMORY_ANY
;
694 if (b
== -1 && (i
== -1 || s
== 0)) {
695 int is_rel
= globalbits
== 64 &&
696 !(result
->oprs
[operand
].eaflags
& EAF_ABS
) &&
698 !(result
->oprs
[operand
].eaflags
& EAF_FSGS
)) ||
699 (result
->oprs
[operand
].eaflags
& EAF_REL
));
701 result
->oprs
[operand
].type
|= is_rel
? IP_REL
: MEM_OFFS
;
703 result
->oprs
[operand
].basereg
= b
;
704 result
->oprs
[operand
].indexreg
= i
;
705 result
->oprs
[operand
].scale
= s
;
706 result
->oprs
[operand
].offset
= o
;
707 } else { /* it's not a memory reference */
709 if (is_just_unknown(value
)) { /* it's immediate but unknown */
710 result
->oprs
[operand
].type
|= IMMEDIATE
;
711 result
->oprs
[operand
].offset
= 0; /* don't care */
712 result
->oprs
[operand
].segment
= NO_SEG
; /* don't care again */
713 result
->oprs
[operand
].wrt
= NO_SEG
; /* still don't care */
714 } else if (is_reloc(value
)) { /* it's immediate */
715 result
->oprs
[operand
].type
|= IMMEDIATE
;
716 result
->oprs
[operand
].offset
= reloc_value(value
);
717 result
->oprs
[operand
].segment
= reloc_seg(value
);
718 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
719 if (is_simple(value
)) {
720 if (reloc_value(value
) == 1)
721 result
->oprs
[operand
].type
|= UNITY
;
722 if (optimizing
>= 0 &&
723 !(result
->oprs
[operand
].type
& STRICT
)) {
724 if (reloc_value(value
) >= -128 &&
725 reloc_value(value
) <= 127)
726 result
->oprs
[operand
].type
|= SBYTE
;
729 } else { /* it's a register */
731 if (value
->type
>= EXPR_SIMPLE
|| value
->value
!= 1) {
732 error(ERR_NONFATAL
, "invalid operand type");
738 * check that its only 1 register, not an expression...
740 for (i
= 1; value
[i
].type
; i
++)
741 if (value
[i
].value
) {
742 error(ERR_NONFATAL
, "invalid operand type");
747 /* clear overrides, except TO which applies to FPU regs */
748 if (result
->oprs
[operand
].type
& ~TO
) {
750 * we want to produce a warning iff the specified size
751 * is different from the register size
753 i
= result
->oprs
[operand
].type
& SIZE_MASK
;
757 result
->oprs
[operand
].type
&= TO
;
758 result
->oprs
[operand
].type
|= REGISTER
;
759 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
760 result
->oprs
[operand
].basereg
= value
->type
;
762 if (i
&& (result
->oprs
[operand
].type
& SIZE_MASK
) != i
)
763 error(ERR_WARNING
| ERR_PASS1
,
764 "register size specification ignored");
769 result
->operands
= operand
; /* set operand count */
771 while (operand
< 3) /* clear remaining operands */
772 result
->oprs
[operand
++].type
= 0;
775 * Transform RESW, RESD, RESQ, REST, RESO into RESB.
777 switch (result
->opcode
) {
779 result
->opcode
= I_RESB
;
780 result
->oprs
[0].offset
*= 2;
783 result
->opcode
= I_RESB
;
784 result
->oprs
[0].offset
*= 4;
787 result
->opcode
= I_RESB
;
788 result
->oprs
[0].offset
*= 8;
791 result
->opcode
= I_RESB
;
792 result
->oprs
[0].offset
*= 10;
795 result
->opcode
= I_RESB
;
796 result
->oprs
[0].offset
*= 16;
805 static int is_comma_next(void)
812 i
= stdscan(NULL
, &tv
);
814 return (i
== ',' || i
== ';' || !i
);
817 void cleanup_insn(insn
* i
)
823 i
->eops
= i
->eops
->next
;