1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
22 static long reg_flags
[] = { /* sizes and special flags */
23 0, REG8
, REG_AL
, REG_AX
, REG8
, REG8
, REG16
, REG16
, REG8
, REG_CL
,
24 REG_CREG
, REG_CREG
, REG_CREG
, REG_CR4
, REG_CS
, REG_CX
, REG8
,
25 REG16
, REG8
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
,
26 REG_DREG
, REG_DESS
, REG_DX
, REG_EAX
, REG32
, REG32
, REG_ECX
,
27 REG32
, REG32
, REG_DESS
, REG32
, REG32
, REG_FSGS
, REG_FSGS
,
28 MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
,
29 REG16
, REG16
, REG_DESS
, FPU0
, FPUREG
, FPUREG
, FPUREG
, FPUREG
,
30 FPUREG
, FPUREG
, FPUREG
, REG_TREG
, REG_TREG
, REG_TREG
, REG_TREG
,
32 XMMREG
, XMMREG
, XMMREG
, XMMREG
, XMMREG
, XMMREG
, XMMREG
, XMMREG
35 enum { /* special tokens */
36 S_BYTE
, S_DWORD
, S_FAR
, S_LONG
, S_NEAR
, S_NOSPLIT
, S_QWORD
,
37 S_SHORT
, S_TO
, S_TWORD
, S_WORD
40 static int is_comma_next (void);
43 static struct tokenval tokval
;
45 static struct ofmt
*outfmt
; /* Structure of addresses of output routines */
46 static loc_t
*location
; /* Pointer to current line's segment,offset */
48 void parser_global_info (struct ofmt
*output
, loc_t
*locp
)
54 insn
*parse_line (int pass
, char *buffer
, insn
*result
,
55 efunc errfunc
, evalfunc evaluate
, ldfunc ldef
)
59 struct eval_hints hints
;
61 result
->forw_ref
= FALSE
;
65 stdscan_bufptr
= buffer
;
66 i
= stdscan(NULL
, &tokval
);
68 result
->label
= NULL
; /* Assume no label */
69 result
->eops
= NULL
; /* must do this, whatever happens */
70 result
->operands
= 0; /* must initialise this */
72 if (i
==0) { /* blank line - ignore */
73 result
->opcode
= -1; /* and no instruction either */
76 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
77 (i
!=TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
78 error (ERR_NONFATAL
, "label or instruction expected"
84 if (i
== TOKEN_ID
) { /* there's a label here */
85 result
->label
= tokval
.t_charptr
;
86 i
= stdscan(NULL
, &tokval
);
87 if (i
== ':') { /* skip over the optional colon */
88 i
= stdscan(NULL
, &tokval
);
90 error (ERR_WARNING
|ERR_WARN_OL
|ERR_PASS1
,
91 "label alone on a line without a colon might be in error");
93 if (i
!= TOKEN_INSN
|| tokval
.t_integer
!= I_EQU
)
96 * FIXME: location->segment could be NO_SEG, in which case
97 * it is possible we should be passing 'abs_seg'. Look into this.
98 * Work out whether that is *really* what we should be doing.
99 * Generally fix things. I think this is right as it is, but
100 * am still not certain.
102 ldef (result
->label
, location
->segment
,
103 location
->offset
, NULL
, TRUE
, FALSE
, outfmt
, errfunc
);
108 result
->opcode
= -1; /* this line contains just a label */
115 while (i
== TOKEN_PREFIX
||
116 (i
==TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
])))
119 * Handle special case: the TIMES prefix.
121 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
124 i
= stdscan(NULL
, &tokval
);
125 value
= evaluate (stdscan
, NULL
, &tokval
, NULL
, pass
, error
, NULL
);
127 if (!value
) { /* but, error in evaluator */
128 result
->opcode
= -1; /* unrecoverable parse error: */
129 return result
; /* ignore this instruction */
131 if (!is_simple (value
)) {
133 "non-constant argument supplied to TIMES");
136 result
->times
= value
->value
;
137 if (value
->value
< 0) {
138 error(ERR_NONFATAL
, "TIMES value %d is negative",
144 if (result
->nprefix
== MAXPREFIX
)
146 "instruction has more than %d prefixes", MAXPREFIX
);
148 result
->prefixes
[result
->nprefix
++] = tokval
.t_integer
;
149 i
= stdscan(NULL
, &tokval
);
153 if (i
!= TOKEN_INSN
) {
154 if (result
->nprefix
> 0 && i
== 0) {
156 * Instruction prefixes are present, but no actual
157 * instruction. This is allowed: at this point we
158 * invent a notional instruction of RESB 0.
160 result
->opcode
= I_RESB
;
161 result
->operands
= 1;
162 result
->oprs
[0].type
= IMMEDIATE
;
163 result
->oprs
[0].offset
= 0L;
164 result
->oprs
[0].segment
= result
->oprs
[0].wrt
= NO_SEG
;
167 error (ERR_NONFATAL
, "parser: instruction expected");
173 result
->opcode
= tokval
.t_integer
;
174 result
->condition
= tokval
.t_inttwo
;
177 * RESB, RESW and RESD cannot be satisfied with incorrectly
178 * evaluated operands, since the correct values _must_ be known
179 * on the first pass. Hence, even in pass one, we set the
180 * `critical' flag on calling evaluate(), so that it will bomb
181 * out on undefined symbols. Nasty, but there's nothing we can
184 * For the moment, EQU has the same difficulty, so we'll
187 if (result
->opcode
== I_RESB
||
188 result
->opcode
== I_RESW
||
189 result
->opcode
== I_RESD
||
190 result
->opcode
== I_RESQ
||
191 result
->opcode
== I_REST
||
192 result
->opcode
== I_EQU
)
197 critical
= (pass
==2 ? 2 : 0);
199 if (result
->opcode
== I_DB
||
200 result
->opcode
== I_DW
||
201 result
->opcode
== I_DD
||
202 result
->opcode
== I_DQ
||
203 result
->opcode
== I_DT
||
204 result
->opcode
== I_INCBIN
)
206 extop
*eop
, **tail
= &result
->eops
, **fixptr
;
209 result
->eops_float
= FALSE
;
212 * Begin to read the DB/DW/DD/DQ/DT/INCBIN operands.
215 i
= stdscan(NULL
, &tokval
);
219 eop
= *tail
= nasm_malloc(sizeof(extop
));
222 eop
->type
= EOT_NOTHING
;
225 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
226 eop
->type
= EOT_DB_STRING
;
227 eop
->stringval
= tokval
.t_charptr
;
228 eop
->stringlen
= tokval
.t_inttwo
;
229 i
= stdscan(NULL
, &tokval
); /* eat the comma */
233 if ((i
== TOKEN_FLOAT
&& is_comma_next()) || i
== '-') {
237 char *save
= stdscan_bufptr
;
238 i
= stdscan(NULL
, &tokval
);
240 if (i
!= TOKEN_FLOAT
|| !is_comma_next()) {
241 stdscan_bufptr
= save
;
242 i
= tokval
.t_type
= '-';
246 if (i
== TOKEN_FLOAT
) {
247 eop
->type
= EOT_DB_STRING
;
248 result
->eops_float
= TRUE
;
249 if (result
->opcode
== I_DD
)
251 else if (result
->opcode
== I_DQ
)
253 else if (result
->opcode
== I_DT
)
256 error(ERR_NONFATAL
, "floating-point constant"
257 " encountered in `D%c' instruction",
258 result
->opcode
== I_DW
? 'W' : 'B');
260 * fix suggested by Pedro Gimeno... original line
262 * eop->type = EOT_NOTHING;
266 eop
= nasm_realloc(eop
, sizeof(extop
)+eop
->stringlen
);
269 eop
->stringval
= (char *)eop
+ sizeof(extop
);
270 if (eop
->stringlen
< 4 ||
271 !float_const (tokval
.t_charptr
, sign
,
272 (unsigned char *)eop
->stringval
,
273 eop
->stringlen
, error
))
274 eop
->type
= EOT_NOTHING
;
275 i
= stdscan(NULL
, &tokval
); /* eat the comma */
283 value
= evaluate (stdscan
, NULL
, &tokval
, NULL
,
284 critical
, error
, NULL
);
286 if (!value
) { /* error in evaluator */
287 result
->opcode
= -1;/* unrecoverable parse error: */
288 return result
; /* ignore this instruction */
290 if (is_unknown(value
)) {
291 eop
->type
= EOT_DB_NUMBER
;
292 eop
->offset
= 0; /* doesn't matter what we put */
293 eop
->segment
= eop
->wrt
= NO_SEG
; /* likewise */
294 } else if (is_reloc(value
)) {
295 eop
->type
= EOT_DB_NUMBER
;
296 eop
->offset
= reloc_value(value
);
297 eop
->segment
= reloc_seg(value
);
298 eop
->wrt
= reloc_wrt(value
);
301 "operand %d: expression is not simple"
302 " or relocatable", oper_num
);
307 * We're about to call stdscan(), which will eat the
308 * comma that we're currently sitting on between
309 * arguments. However, we'd better check first that it
312 if (i
== 0) /* also could be EOL */
315 error (ERR_NONFATAL
, "comma expected after operand %d",
317 result
->opcode
= -1;/* unrecoverable parse error: */
318 return result
; /* ignore this instruction */
322 if (result
->opcode
== I_INCBIN
) {
324 * Correct syntax for INCBIN is that there should be
325 * one string operand, followed by one or two numeric
328 if (!result
->eops
|| result
->eops
->type
!= EOT_DB_STRING
)
329 error (ERR_NONFATAL
, "`incbin' expects a file name");
330 else if (result
->eops
->next
&&
331 result
->eops
->next
->type
!= EOT_DB_NUMBER
)
332 error (ERR_NONFATAL
, "`incbin': second parameter is",
334 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
335 result
->eops
->next
->next
->type
!= EOT_DB_NUMBER
)
336 error (ERR_NONFATAL
, "`incbin': third parameter is",
338 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
339 result
->eops
->next
->next
->next
)
340 error (ERR_NONFATAL
, "`incbin': more than three parameters");
344 * If we reach here, one of the above errors happened.
345 * Throw the instruction away.
351 error (ERR_WARNING
|ERR_PASS1
,
352 "no operand for data declaration");
354 result
->operands
= oper_num
;
359 /* right. Now we begin to parse the operands. There may be up to three
360 * of these, separated by commas, and terminated by a zero token. */
362 for (operand
= 0; operand
< 3; operand
++) {
363 expr
*value
; /* used most of the time */
364 int mref
; /* is this going to be a memory ref? */
365 int bracket
; /* is it a [] mref, or a & mref? */
368 result
->oprs
[operand
].addr_size
= 0;/* have to zero this whatever */
369 result
->oprs
[operand
].eaflags
= 0; /* and this */
370 result
->oprs
[operand
].opflags
= 0;
372 i
= stdscan(NULL
, &tokval
);
373 if (i
== 0) break; /* end of operands: get out of here */
374 result
->oprs
[operand
].type
= 0; /* so far, no override */
375 while (i
== TOKEN_SPECIAL
) {/* size specifiers */
376 switch ((int)tokval
.t_integer
) {
378 if (!setsize
) /* we want to use only the first */
379 result
->oprs
[operand
].type
|= BITS8
;
384 result
->oprs
[operand
].type
|= BITS16
;
390 result
->oprs
[operand
].type
|= BITS32
;
395 result
->oprs
[operand
].type
|= BITS64
;
400 result
->oprs
[operand
].type
|= BITS80
;
404 result
->oprs
[operand
].type
|= TO
;
407 result
->oprs
[operand
].type
|= FAR
;
410 result
->oprs
[operand
].type
|= NEAR
;
413 result
->oprs
[operand
].type
|= SHORT
;
416 error (ERR_NONFATAL
, "invalid operand size specification");
418 i
= stdscan(NULL
, &tokval
);
421 if (i
== '[' || i
== '&') { /* memory reference */
423 bracket
= (i
== '[');
424 i
= stdscan(NULL
, &tokval
);
425 if (i
== TOKEN_SPECIAL
) { /* check for address size override */
426 switch ((int)tokval
.t_integer
) {
428 result
->oprs
[operand
].eaflags
|= EAF_TIMESTWO
;
431 result
->oprs
[operand
].eaflags
|= EAF_BYTEOFFS
;
434 result
->oprs
[operand
].addr_size
= 16;
435 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
439 result
->oprs
[operand
].addr_size
= 32;
440 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
443 error (ERR_NONFATAL
, "invalid size specification in"
444 " effective address");
446 i
= stdscan(NULL
, &tokval
);
448 } else { /* immediate operand, or register */
450 bracket
= FALSE
; /* placate optimisers */
453 value
= evaluate (stdscan
, NULL
, &tokval
,
454 &result
->oprs
[operand
].opflags
,
455 critical
, error
, &hints
);
457 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
458 result
->forw_ref
= TRUE
;
460 if (!value
) { /* error in evaluator */
461 result
->opcode
= -1; /* unrecoverable parse error: */
462 return result
; /* ignore this instruction */
464 if (i
== ':' && mref
) { /* it was seg:offset */
466 * Process the segment override.
468 if (value
[1].type
!=0 || value
->value
!=1 ||
469 REG_SREG
& ~reg_flags
[value
->type
])
470 error (ERR_NONFATAL
, "invalid segment override");
471 else if (result
->nprefix
== MAXPREFIX
)
473 "instruction has more than %d prefixes",
476 result
->prefixes
[result
->nprefix
++] = value
->type
;
478 i
= stdscan(NULL
, &tokval
); /* then skip the colon */
479 if (i
== TOKEN_SPECIAL
) { /* another check for size override */
480 switch ((int)tokval
.t_integer
) {
482 result
->oprs
[operand
].addr_size
= 16;
486 result
->oprs
[operand
].addr_size
= 32;
489 error (ERR_NONFATAL
, "invalid size specification in"
490 " effective address");
492 i
= stdscan(NULL
, &tokval
);
494 value
= evaluate (stdscan
, NULL
, &tokval
,
495 &result
->oprs
[operand
].opflags
,
496 critical
, error
, &hints
);
498 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
499 result
->forw_ref
= TRUE
;
501 /* and get the offset */
502 if (!value
) { /* but, error in evaluator */
503 result
->opcode
= -1; /* unrecoverable parse error: */
504 return result
; /* ignore this instruction */
507 if (mref
&& bracket
) { /* find ] at the end */
509 error (ERR_NONFATAL
, "parser: expecting ]");
510 do { /* error recovery again */
511 i
= stdscan(NULL
, &tokval
);
512 } while (i
!= 0 && i
!= ',');
513 } else /* we got the required ] */
514 i
= stdscan(NULL
, &tokval
);
515 } else { /* immediate operand */
516 if (i
!= 0 && i
!= ',' && i
!= ':') {
517 error (ERR_NONFATAL
, "comma or end of line expected");
518 do { /* error recovery */
519 i
= stdscan(NULL
, &tokval
);
520 } while (i
!= 0 && i
!= ',');
521 } else if (i
== ':') {
522 result
->oprs
[operand
].type
|= COLON
;
526 /* now convert the exprs returned from evaluate() into operand
529 if (mref
) { /* it's a memory reference */
531 int b
, i
, s
; /* basereg, indexreg, scale */
534 b
= i
= -1, o
= s
= 0;
535 result
->oprs
[operand
].hintbase
= hints
.base
;
536 result
->oprs
[operand
].hinttype
= hints
.type
;
538 if (e
->type
<= EXPR_REG_END
) { /* this bit's a register */
539 if (e
->value
== 1) /* in fact it can be basereg */
541 else /* no, it has to be indexreg */
542 i
= e
->type
, s
= e
->value
;
545 if (e
->type
&& e
->type
<= EXPR_REG_END
) /* it's a 2nd register */
547 if (b
!= -1) /* If the first was the base, ... */
548 i
= e
->type
, s
= e
->value
; /* second has to be indexreg */
550 else if (e
->value
!= 1) /* If both want to be index */
552 error(ERR_NONFATAL
, "invalid effective address");
560 if (e
->type
!= 0) { /* is there an offset? */
561 if (e
->type
<= EXPR_REG_END
) /* in fact, is there an error? */
563 error (ERR_NONFATAL
, "invalid effective address");
569 if (e
->type
== EXPR_UNKNOWN
) {
570 o
= 0; /* doesn't matter what */
571 result
->oprs
[operand
].wrt
= NO_SEG
; /* nor this */
572 result
->oprs
[operand
].segment
= NO_SEG
; /* or this */
573 while (e
->type
) e
++; /* go to the end of the line */
577 if (e
->type
== EXPR_SIMPLE
) {
581 if (e
->type
== EXPR_WRT
) {
582 result
->oprs
[operand
].wrt
= e
->value
;
585 result
->oprs
[operand
].wrt
= NO_SEG
;
587 * Look for a segment base type.
589 if (e
->type
&& e
->type
< EXPR_SEGBASE
) {
590 error (ERR_NONFATAL
, "invalid effective address");
594 while (e
->type
&& e
->value
== 0)
596 if (e
->type
&& e
->value
!= 1) {
597 error (ERR_NONFATAL
, "invalid effective address");
602 result
->oprs
[operand
].segment
=
603 e
->type
- EXPR_SEGBASE
;
606 result
->oprs
[operand
].segment
= NO_SEG
;
607 while (e
->type
&& e
->value
== 0)
610 error (ERR_NONFATAL
, "invalid effective address");
618 result
->oprs
[operand
].wrt
= NO_SEG
;
619 result
->oprs
[operand
].segment
= NO_SEG
;
622 if (e
->type
!= 0) { /* there'd better be nothing left! */
623 error (ERR_NONFATAL
, "invalid effective address");
628 result
->oprs
[operand
].type
|= MEMORY
;
629 if (b
==-1 && (i
==-1 || s
==0))
630 result
->oprs
[operand
].type
|= MEM_OFFS
;
631 result
->oprs
[operand
].basereg
= b
;
632 result
->oprs
[operand
].indexreg
= i
;
633 result
->oprs
[operand
].scale
= s
;
634 result
->oprs
[operand
].offset
= o
;
636 else /* it's not a memory reference */
638 if (is_just_unknown(value
)) { /* it's immediate but unknown */
639 result
->oprs
[operand
].type
|= IMMEDIATE
;
640 result
->oprs
[operand
].offset
= 0; /* don't care */
641 result
->oprs
[operand
].segment
= NO_SEG
; /* don't care again */
642 result
->oprs
[operand
].wrt
= NO_SEG
;/* still don't care */
644 else if (is_reloc(value
)) /* it's immediate */
646 result
->oprs
[operand
].type
|= IMMEDIATE
;
647 result
->oprs
[operand
].offset
= reloc_value(value
);
648 result
->oprs
[operand
].segment
= reloc_seg(value
);
649 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
650 if (is_simple(value
)) {
651 if (reloc_value(value
)==1)
652 result
->oprs
[operand
].type
|= UNITY
;
653 if (reloc_value(value
) >= -128 &&
654 reloc_value(value
) <= 127)
655 result
->oprs
[operand
].type
|= SBYTE
;
658 else /* it's a register */
662 if (value
->type
>=EXPR_SIMPLE
|| value
->value
!=1) {
663 error (ERR_NONFATAL
, "invalid operand type");
669 * check that its only 1 register, not an expression...
671 for (i
= 1; value
[i
].type
; i
++)
672 if (value
[i
].value
) {
673 error (ERR_NONFATAL
, "invalid operand type");
678 /* clear overrides, except TO which applies to FPU regs */
679 if (result
->oprs
[operand
].type
& ~TO
) {
681 * we want to produce a warning iff the specified size
682 * is different from the register size
684 i
= result
->oprs
[operand
].type
& SIZE_MASK
;
689 result
->oprs
[operand
].type
&= TO
;
690 result
->oprs
[operand
].type
|= REGISTER
;
691 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
692 result
->oprs
[operand
].basereg
= value
->type
;
694 if (i
&& (result
->oprs
[operand
].type
& SIZE_MASK
) != i
)
695 error (ERR_WARNING
|ERR_PASS1
,
696 "register size specification ignored");
701 result
->operands
= operand
; /* set operand count */
703 while (operand
<3) /* clear remaining operands */
704 result
->oprs
[operand
++].type
= 0;
707 * Transform RESW, RESD, RESQ, REST into RESB.
709 switch (result
->opcode
) {
710 case I_RESW
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=2; break;
711 case I_RESD
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=4; break;
712 case I_RESQ
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=8; break;
713 case I_REST
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=10; break;
719 static int is_comma_next (void)
726 i
= stdscan (NULL
, &tv
);
728 return (i
== ',' || i
== ';' || !i
);
731 void cleanup_insn (insn
*i
)
737 i
->eops
= i
->eops
->next
;