doc: slight cleanup of MASM compatibility package information
[nasm.git] / include / nasm.h
blob046f5fb91840334636893a8c86e51c31fd338028
1 /* ----------------------------------------------------------------------- *
3 * Copyright 1996-2018 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * nasm.h main header file for the Netwide Assembler: inter-module interface
38 #ifndef NASM_NASM_H
39 #define NASM_NASM_H
41 #include "compiler.h"
43 #include <time.h>
45 #include "nasmlib.h"
46 #include "nctype.h"
47 #include "strlist.h"
48 #include "preproc.h"
49 #include "insnsi.h" /* For enum opcode */
50 #include "directiv.h" /* For enum directive */
51 #include "labels.h" /* For enum mangle_index, enum label_type */
52 #include "opflags.h"
53 #include "regs.h"
54 #include "srcfile.h"
55 #include "error.h"
57 /* Program name for error messages etc. */
58 extern const char *_progname;
60 /* Time stamp for the official start of compilation */
61 struct compile_time {
62 time_t t;
63 bool have_local, have_gm, have_posix;
64 int64_t posix;
65 struct tm local;
66 struct tm gm;
68 extern struct compile_time official_compile_time;
70 #define NO_SEG INT32_C(-1) /* null segment value */
71 #define SEG_ABS 0x40000000L /* mask for far-absolute segments */
73 #define IDLEN_MAX 4096
74 #define DECOLEN_MAX 32
77 * Name pollution problems: <time.h> on Digital UNIX pulls in some
78 * strange hardware header file which sees fit to define R_SP. We
79 * undefine it here so as not to break the enum below.
81 #ifdef R_SP
82 #undef R_SP
83 #endif
86 * We must declare the existence of this structure type up here,
87 * since we have to reference it before we define it...
89 struct ofmt;
92 * Values for the `type' parameter to an output function.
94 enum out_type {
95 OUT_RAWDATA, /* Plain bytes */
96 OUT_RESERVE, /* Reserved bytes (RESB et al) */
97 OUT_ZERODATA, /* Initialized data, but all zero */
98 OUT_ADDRESS, /* An address (symbol value) */
99 OUT_RELADDR, /* A relative address */
100 OUT_SEGMENT, /* A segment number */
103 * These values are used by the legacy backend interface only;
104 * see output/legacy.c for more information. These should never
105 * be used otherwise. Once all backends have been migrated to the
106 * new interface they should be removed.
108 OUT_REL1ADR,
109 OUT_REL2ADR,
110 OUT_REL4ADR,
111 OUT_REL8ADR
114 enum out_sign {
115 OUT_WRAP, /* Undefined signedness (wraps) */
116 OUT_SIGNED, /* Value is signed */
117 OUT_UNSIGNED /* Value is unsigned */
121 * The data we send down to the backend.
122 * XXX: We still want to push down the base address symbol if
123 * available, and replace the segment numbers with a structure.
125 struct out_data {
126 int64_t offset; /* Offset within segment */
127 int32_t segment; /* Segment written to */
128 enum out_type type; /* See above */
129 enum out_sign sign; /* See above */
130 int inslen; /* Length of instruction */
131 int insoffs; /* Offset inside instruction */
132 int bits; /* Bits mode of compilation */
133 uint64_t size; /* Size of output */
134 const struct itemplate *itemp; /* Instruction template */
135 const void *data; /* Data for OUT_RAWDATA */
136 uint64_t toffset; /* Target address offset for relocation */
137 int32_t tsegment; /* Target segment for relocation */
138 int32_t twrt; /* Relocation with respect to */
139 int64_t relbase; /* Relative base for OUT_RELADDR */
143 * And a label-definition function. The boolean parameter
144 * `is_norm' states whether the label is a `normal' label (which
145 * should affect the local-label system), or something odder like
146 * an EQU or a segment-base symbol, which shouldn't.
148 typedef void (*ldfunc)(char *label, int32_t segment, int64_t offset,
149 char *special, bool is_norm);
152 * Token types returned by the scanner, in addition to ordinary
153 * ASCII character values, and zero for end-of-string.
155 enum token_type { /* token types, other than chars */
156 TOKEN_INVALID = -1, /* a placeholder value */
157 TOKEN_EOS = 0, /* end of string */
158 TOKEN_QMARK = '?',
159 TOKEN_EQ = '=',
160 TOKEN_GT = '>',
161 TOKEN_LT = '<', /* aliases */
162 TOKEN_ID = 256, /* identifier */
163 TOKEN_NUM, /* numeric constant */
164 TOKEN_ERRNUM, /* malformed numeric constant */
165 TOKEN_STR, /* string constant */
166 TOKEN_ERRSTR, /* unterminated string constant */
167 TOKEN_FLOAT, /* floating-point constant */
168 TOKEN_REG, /* register name */
169 TOKEN_INSN, /* instruction name */
170 TOKEN_HERE, /* $ */
171 TOKEN_BASE, /* $$ */
172 TOKEN_SIZE, /* BYTE, WORD, DWORD, QWORD, etc */
173 TOKEN_SPECIAL, /* REL, FAR, NEAR, STRICT, NOSPLIT, etc */
174 TOKEN_PREFIX, /* A32, O16, LOCK, REPNZ, TIMES, etc */
175 TOKEN_SHL, /* << or <<< */
176 TOKEN_SHR, /* >> */
177 TOKEN_SAR, /* >>> */
178 TOKEN_SDIV, /* // */
179 TOKEN_SMOD, /* %% */
180 TOKEN_GE, /* >= */
181 TOKEN_LE, /* <= */
182 TOKEN_NE, /* <> (!= is same as <>) */
183 TOKEN_LEG, /* <=> */
184 TOKEN_DBL_AND, /* && */
185 TOKEN_DBL_OR, /* || */
186 TOKEN_DBL_XOR, /* ^^ */
187 TOKEN_SEG, /* SEG */
188 TOKEN_WRT, /* WRT */
189 TOKEN_FLOATIZE, /* __?floatX?__ */
190 TOKEN_STRFUNC, /* __utf16*__, __utf32*__ */
191 TOKEN_IFUNC, /* __ilog2*__ */
192 TOKEN_DECORATOR, /* decorators such as {...} */
193 TOKEN_MASM_PTR, /* __?masm_ptr?__ for the masm package */
194 TOKEN_MASM_FLAT, /* __?masm_flat?__ for the masm package */
195 TOKEN_OPMASK /* translated token for opmask registers */
198 enum floatize {
199 FLOAT_8,
200 FLOAT_16,
201 FLOAT_32,
202 FLOAT_64,
203 FLOAT_80M,
204 FLOAT_80E,
205 FLOAT_128L,
206 FLOAT_128H
209 /* Must match the list in string_transform(), in strfunc.c */
210 enum strfunc {
211 STRFUNC_UTF16,
212 STRFUNC_UTF16LE,
213 STRFUNC_UTF16BE,
214 STRFUNC_UTF32,
215 STRFUNC_UTF32LE,
216 STRFUNC_UTF32BE
219 enum ifunc {
220 IFUNC_ILOG2E,
221 IFUNC_ILOG2W,
222 IFUNC_ILOG2F,
223 IFUNC_ILOG2C
226 size_t string_transform(char *, size_t, char **, enum strfunc);
229 * The expression evaluator must be passed a scanner function; a
230 * standard scanner is provided as part of nasmlib.c. The
231 * preprocessor will use a different one. Scanners, and the
232 * token-value structures they return, look like this.
234 * The return value from the scanner is always a copy of the
235 * `t_type' field in the structure.
237 struct tokenval {
238 char *t_charptr;
239 int64_t t_integer;
240 int64_t t_inttwo;
241 enum token_type t_type;
242 int8_t t_flag;
244 typedef int (*scanner)(void *private_data, struct tokenval *tv);
246 struct location {
247 int64_t offset;
248 int32_t segment;
249 int known;
251 extern struct location location;
254 * Expression-evaluator datatype. Expressions, within the
255 * evaluator, are stored as an array of these beasts, terminated by
256 * a record with type==0. Mostly, it's a vector type: each type
257 * denotes some kind of a component, and the value denotes the
258 * multiple of that component present in the expression. The
259 * exception is the WRT type, whose `value' field denotes the
260 * segment to which the expression is relative. These segments will
261 * be segment-base types, i.e. either odd segment values or SEG_ABS
262 * types. So it is still valid to assume that anything with a
263 * `value' field of zero is insignificant.
265 typedef struct {
266 int32_t type; /* a register, or EXPR_xxx */
267 int64_t value; /* must be >= 32 bits */
268 } expr;
271 * Library routines to manipulate expression data types.
273 bool is_reloc(const expr *vect);
274 bool is_simple(const expr *vect);
275 bool is_really_simple(const expr *vect);
276 bool is_unknown(const expr *vect);
277 bool is_just_unknown(const expr *vect);
278 int64_t reloc_value(const expr *vect);
279 int32_t reloc_seg(const expr *vect);
280 int32_t reloc_wrt(const expr *vect);
281 bool is_self_relative(const expr *vect);
282 void dump_expr(const expr *vect);
285 * The evaluator can also return hints about which of two registers
286 * used in an expression should be the base register. See also the
287 * `operand' structure.
289 struct eval_hints {
290 int64_t base;
291 int type;
295 * The actual expression evaluator function looks like this. When
296 * called, it expects the first token of its expression to already
297 * be in `*tv'; if it is not, set tv->t_type to TOKEN_INVALID and
298 * it will start by calling the scanner.
300 * If a forward reference happens during evaluation, the evaluator
301 * must set `*fwref' to true if `fwref' is non-NULL.
303 * `critical' is non-zero if the expression may not contain forward
304 * references. The evaluator will report its own error if this
305 * occurs; if `critical' is 1, the error will be "symbol not
306 * defined before use", whereas if `critical' is 2, the error will
307 * be "symbol undefined".
309 * If `critical' has bit 8 set (in addition to its main value: 0x101
310 * and 0x102 correspond to 1 and 2) then an extended expression
311 * syntax is recognised, in which relational operators such as =, <
312 * and >= are accepted, as well as low-precedence logical operators
313 * &&, ^^ and ||.
315 * If `hints' is non-NULL, it gets filled in with some hints as to
316 * the base register in complex effective addresses.
318 #define CRITICAL 0x100
319 typedef expr *(*evalfunc)(scanner sc, void *scprivate,
320 struct tokenval *tv, int *fwref, int critical,
321 struct eval_hints *hints);
324 * Special values for expr->type.
325 * These come after EXPR_REG_END as defined in regs.h.
326 * Expr types : 0 ~ EXPR_REG_END, EXPR_UNKNOWN, EXPR_...., EXPR_RDSAE,
327 * EXPR_SEGBASE ~ EXPR_SEGBASE + SEG_ABS, ...
329 #define EXPR_UNKNOWN (EXPR_REG_END+1) /* forward references */
330 #define EXPR_SIMPLE (EXPR_REG_END+2)
331 #define EXPR_WRT (EXPR_REG_END+3)
332 #define EXPR_RDSAE (EXPR_REG_END+4)
333 #define EXPR_SEGBASE (EXPR_REG_END+5)
336 * preprocessors ought to look like this:
339 enum preproc_mode {
340 PP_NORMAL, /* Assembly */
341 PP_DEPS, /* Dependencies only */
342 PP_PREPROC /* Preprocessing only */
345 struct preproc_ops {
347 * Called once at the very start of assembly.
349 void (*init)(void);
352 * Called at the start of a pass; given a file name, the number
353 * of the pass, an error reporting function, an evaluator
354 * function, and a listing generator to talk to.
356 void (*reset)(const char *file, enum preproc_mode mode,
357 struct strlist *deplist);
360 * Called to fetch a line of preprocessed source. The line
361 * returned has been malloc'ed, and so should be freed after
362 * use.
364 char *(*getline)(void);
366 /* Called at the end of each pass. */
367 void (*cleanup_pass)(void);
370 * Called at the end of the assembly session,
371 * after cleanup_pass() has been called for the
372 * last pass.
374 void (*cleanup_session)(void);
376 /* Additional macros specific to output format */
377 void (*extra_stdmac)(macros_t *macros);
379 /* Early definitions and undefinitions for macros */
380 void (*pre_define)(char *definition);
381 void (*pre_undefine)(char *definition);
383 /* Include file from command line */
384 void (*pre_include)(char *fname);
386 /* Add a command from the command line */
387 void (*pre_command)(const char *what, char *str);
389 /* Include path from command line */
390 void (*include_path)(struct strlist *ipath);
392 /* Unwind the macro stack when printing an error message */
393 void (*error_list_macros)(errflags severity);
395 /* Return true if an error message should be suppressed */
396 bool (*suppress_error)(errflags severity);
399 extern const struct preproc_ops nasmpp;
400 extern const struct preproc_ops preproc_nop;
402 /* List of dependency files */
403 extern struct strlist *depend_list;
405 /* TASM mode changes some properties */
406 extern bool tasm_compatible_mode;
409 * inline function to skip past an identifier; returns the first character past
410 * the identifier if valid, otherwise NULL.
412 static inline char *nasm_skip_identifier(const char *str)
414 const char *p = str;
416 if (!nasm_isidstart(*p++)) {
417 p = NULL;
418 } else {
419 while (nasm_isidchar(*p++))
422 return (char *)p;
426 * Data-type flags that get passed to listing-file routines.
428 enum {
429 LIST_READ,
430 LIST_MACRO,
431 LIST_MACRO_NOLIST,
432 LIST_INCLUDE,
433 LIST_INCBIN,
434 LIST_TIMES
438 * -----------------------------------------------------------
439 * Format of the `insn' structure returned from `parser.c' and
440 * passed into `assemble.c'
441 * -----------------------------------------------------------
444 /* Verify value to be a valid register */
445 static inline bool is_register(int reg)
447 return reg >= EXPR_REG_START && reg < REG_ENUM_LIMIT;
450 enum ccode { /* condition code names */
451 C_A, C_AE, C_B, C_BE, C_C, C_E, C_G, C_GE, C_L, C_LE, C_NA, C_NAE,
452 C_NB, C_NBE, C_NC, C_NE, C_NG, C_NGE, C_NL, C_NLE, C_NO, C_NP,
453 C_NS, C_NZ, C_O, C_P, C_PE, C_PO, C_S, C_Z,
454 C_none = -1
458 * token flags
460 #define TFLAG_BRC (1 << 0) /* valid only with braces. {1to8}, {rd-sae}, ...*/
461 #define TFLAG_BRC_OPT (1 << 1) /* may or may not have braces. opmasks {k1} */
462 #define TFLAG_BRC_ANY (TFLAG_BRC | TFLAG_BRC_OPT)
463 #define TFLAG_BRDCAST (1 << 2) /* broadcasting decorator */
464 #define TFLAG_WARN (1 << 3) /* warning only, treat as ID */
465 #define TFLAG_DUP (1 << 4) /* valid ID but also has context-specific use */
467 static inline uint8_t get_cond_opcode(enum ccode c)
469 static const uint8_t ccode_opcodes[] = {
470 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xf, 0xd, 0xc, 0xe, 0x6, 0x2,
471 0x3, 0x7, 0x3, 0x5, 0xe, 0xc, 0xd, 0xf, 0x1, 0xb, 0x9, 0x5,
472 0x0, 0xa, 0xa, 0xb, 0x8, 0x4
475 return ccode_opcodes[(int)c];
479 * REX flags
481 #define REX_MASK 0x4f /* Actual REX prefix bits */
482 #define REX_B 0x01 /* ModRM r/m extension */
483 #define REX_X 0x02 /* SIB index extension */
484 #define REX_R 0x04 /* ModRM reg extension */
485 #define REX_W 0x08 /* 64-bit operand size */
486 #define REX_L 0x20 /* Use LOCK prefix instead of REX.R */
487 #define REX_P 0x40 /* REX prefix present/required */
488 #define REX_H 0x80 /* High register present, REX forbidden */
489 #define REX_V 0x0100 /* Instruction uses VEX/XOP instead of REX */
490 #define REX_NH 0x0200 /* Instruction which doesn't use high regs */
491 #define REX_EV 0x0400 /* Instruction uses EVEX instead of REX */
494 * EVEX bit field
496 #define EVEX_P0MM 0x0f /* EVEX P[3:0] : Opcode map */
497 #define EVEX_P0RP 0x10 /* EVEX P[4] : High-16 reg */
498 #define EVEX_P0X 0x40 /* EVEX P[6] : High-16 rm */
499 #define EVEX_P1PP 0x03 /* EVEX P[9:8] : Legacy prefix */
500 #define EVEX_P1VVVV 0x78 /* EVEX P[14:11] : NDS register */
501 #define EVEX_P1W 0x80 /* EVEX P[15] : Osize extension */
502 #define EVEX_P2AAA 0x07 /* EVEX P[18:16] : Embedded opmask */
503 #define EVEX_P2VP 0x08 /* EVEX P[19] : High-16 NDS reg */
504 #define EVEX_P2B 0x10 /* EVEX P[20] : Broadcast / RC / SAE */
505 #define EVEX_P2LL 0x60 /* EVEX P[22:21] : Vector length */
506 #define EVEX_P2RC EVEX_P2LL /* EVEX P[22:21] : Rounding control */
507 #define EVEX_P2Z 0x80 /* EVEX P[23] : Zeroing/Merging */
510 * REX_V "classes" (prefixes which behave like VEX)
512 enum vex_class {
513 RV_VEX = 0, /* C4/C5 */
514 RV_XOP = 1, /* 8F */
515 RV_EVEX = 2 /* 62 */
519 * Note that because segment registers may be used as instruction
520 * prefixes, we must ensure the enumerations for prefixes and
521 * register names do not overlap.
523 enum prefixes { /* instruction prefixes */
524 P_none = 0,
525 PREFIX_ENUM_START = REG_ENUM_LIMIT,
526 P_A16 = PREFIX_ENUM_START,
527 P_A32,
528 P_A64,
529 P_ASP,
530 P_LOCK,
531 P_O16,
532 P_O32,
533 P_O64,
534 P_OSP,
535 P_REP,
536 P_REPE,
537 P_REPNE,
538 P_REPNZ,
539 P_REPZ,
540 P_TIMES,
541 P_WAIT,
542 P_XACQUIRE,
543 P_XRELEASE,
544 P_BND,
545 P_NOBND,
546 P_EVEX,
547 P_VEX3,
548 P_VEX2,
549 PREFIX_ENUM_LIMIT
552 enum ea_flags { /* special EA flags */
553 EAF_BYTEOFFS = 1, /* force offset part to byte size */
554 EAF_WORDOFFS = 2, /* force offset part to [d]word size */
555 EAF_TIMESTWO = 4, /* really do EAX*2 not EAX+EAX */
556 EAF_REL = 8, /* IP-relative addressing */
557 EAF_ABS = 16, /* non-IP-relative addressing */
558 EAF_FSGS = 32, /* fs/gs segment override present */
559 EAF_MIB = 64 /* mib operand */
562 enum eval_hint { /* values for `hinttype' */
563 EAH_NOHINT = 0, /* no hint at all - our discretion */
564 EAH_MAKEBASE = 1, /* try to make given reg the base */
565 EAH_NOTBASE = 2, /* try _not_ to make reg the base */
566 EAH_SUMMED = 3 /* base and index are summed into index */
569 typedef struct operand { /* operand to an instruction */
570 opflags_t type; /* type of operand */
571 int disp_size; /* 0 means default; 16; 32; 64 */
572 enum reg_enum basereg;
573 enum reg_enum indexreg; /* address registers */
574 int scale; /* index scale */
575 int hintbase;
576 enum eval_hint hinttype; /* hint as to real base register */
577 int32_t segment; /* immediate segment, if needed */
578 int64_t offset; /* any immediate number */
579 int32_t wrt; /* segment base it's relative to */
580 int eaflags; /* special EA flags */
581 int opflags; /* see OPFLAG_* defines below */
582 decoflags_t decoflags; /* decorator flags such as {...} */
583 } operand;
585 #define OPFLAG_FORWARD 1 /* operand is a forward reference */
586 #define OPFLAG_EXTERN 2 /* operand is an external reference */
587 #define OPFLAG_UNKNOWN 4 /* operand is an unknown reference
588 (always a forward reference also) */
589 #define OPFLAG_RELATIVE 8 /* operand is self-relative, e.g. [foo - $]
590 where foo is not in the current segment */
592 enum extop_type { /* extended operand types */
593 EOT_NOTHING = 0,
594 EOT_EXTOP, /* Subexpression */
595 EOT_DB_STRING, /* Byte string */
596 EOT_DB_FLOAT, /* Floating-pointer number (special byte string) */
597 EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/
598 EOT_DB_NUMBER, /* Integer */
599 EOT_DB_RESERVE /* ? */
602 typedef struct extop { /* extended operand */
603 struct extop *next; /* linked list */
604 union {
605 struct { /* text or byte string */
606 char *data;
607 size_t len;
608 } string;
609 struct { /* numeric expression */
610 int64_t offset; /* numeric value or address offset */
611 int32_t segment; /* address segment */
612 int32_t wrt; /* address wrt */
613 bool relative; /* self-relative expression */
614 } num;
615 struct extop *subexpr; /* actual expressions */
616 } val;
617 size_t dup; /* duplicated? */
618 enum extop_type type; /* defined above */
619 int elem; /* element size override, if any (bytes) */
620 } extop;
622 enum ea_type {
623 EA_INVALID, /* Not a valid EA at all */
624 EA_SCALAR, /* Scalar EA */
625 EA_XMMVSIB, /* XMM vector EA */
626 EA_YMMVSIB, /* YMM vector EA */
627 EA_ZMMVSIB /* ZMM vector EA */
631 * Prefix positions: each type of prefix goes in a specific slot.
632 * This affects the final ordering of the assembled output, which
633 * shouldn't matter to the processor, but if you have stylistic
634 * preferences, you can change this. REX prefixes are handled
635 * differently for the time being.
637 * LOCK and REP used to be one slot; this is no longer the case since
638 * the introduction of HLE.
640 enum prefix_pos {
641 PPS_WAIT, /* WAIT (technically not a prefix!) */
642 PPS_REP, /* REP/HLE prefix */
643 PPS_LOCK, /* LOCK prefix */
644 PPS_SEG, /* Segment override prefix */
645 PPS_OSIZE, /* Operand size prefix */
646 PPS_ASIZE, /* Address size prefix */
647 PPS_VEX, /* VEX type */
648 MAXPREFIX /* Total number of prefix slots */
652 * Tuple types that are used when determining Disp8*N eligibility
653 * The order must match with a hash %tuple_codes in insns.pl
655 enum ttypes {
656 FV = 001,
657 HV = 002,
658 FVM = 003,
659 T1S8 = 004,
660 T1S16 = 005,
661 T1S = 006,
662 T1F32 = 007,
663 T1F64 = 010,
664 T2 = 011,
665 T4 = 012,
666 T8 = 013,
667 HVM = 014,
668 QVM = 015,
669 OVM = 016,
670 M128 = 017,
671 DUP = 020
674 /* EVEX.L'L : Vector length on vector insns */
675 enum vectlens {
676 VL128 = 0,
677 VL256 = 1,
678 VL512 = 2,
679 VLMAX = 3
682 /* If you need to change this, also change it in insns.pl */
683 #define MAX_OPERANDS 5
685 typedef struct insn { /* an instruction itself */
686 char *label; /* the label defined, or NULL */
687 int prefixes[MAXPREFIX]; /* instruction prefixes, if any */
688 enum opcode opcode; /* the opcode - not just the string */
689 enum ccode condition; /* the condition code, if Jcc/SETcc */
690 int operands; /* how many operands? 0-3 (more if db et al) */
691 int addr_size; /* address size */
692 operand oprs[MAX_OPERANDS]; /* the operands, defined as above */
693 extop *eops; /* extended operands */
694 int eops_float; /* true if DD and floating */
695 int32_t times; /* repeat count (TIMES prefix) */
696 bool forw_ref; /* is there a forward reference? */
697 bool rex_done; /* REX prefix emitted? */
698 int rex; /* Special REX Prefix */
699 int vexreg; /* Register encoded in VEX prefix */
700 int vex_cm; /* Class and M field for VEX prefix */
701 int vex_wlp; /* W, P and L information for VEX prefix */
702 uint8_t evex_p[3]; /* EVEX.P0: [RXB,R',00,mm], P1: [W,vvvv,1,pp] */
703 /* EVEX.P2: [z,L'L,b,V',aaa] */
704 enum ttypes evex_tuple; /* Tuple type for compressed Disp8*N */
705 int evex_rm; /* static rounding mode for AVX512 (EVEX) */
706 int8_t evex_brerop; /* BR/ER/SAE operand position */
707 } insn;
709 /* Instruction flags type: IF_* flags are defined in insns.h */
710 typedef uint64_t iflags_t;
713 * What to return from a directive- or pragma-handling function.
714 * Currently DIRR_OK and DIRR_ERROR are treated the same way;
715 * in both cases the backend is expected to produce the appropriate
716 * error message on its own.
718 * DIRR_BADPARAM causes a generic error message to be printed. Note
719 * that it is an error, not a warning, even in the case of pragmas;
720 * don't use it where forward compatiblity would be compromised
721 * (instead consider adding a DIRR_WARNPARAM.)
723 enum directive_result {
724 DIRR_UNKNOWN, /* Directive not handled by backend */
725 DIRR_OK, /* Directive processed */
726 DIRR_ERROR, /* Directive processed unsuccessfully */
727 DIRR_BADPARAM /* Print bad argument error message */
731 * A pragma facility: this structure is used to request passing a
732 * parsed pragma directive for a specific facility. If the handler is
733 * NULL then this pragma facility is recognized but ignored; pragma
734 * processing stops at that point.
736 * Note that the handler is passed a pointer to the facility structure
737 * as part of the struct pragma.
739 struct pragma;
740 typedef enum directive_result (*pragma_handler)(const struct pragma *);
742 struct pragma_facility {
743 const char *name;
744 pragma_handler handler;
748 * This structure defines how a pragma directive is passed to a
749 * facility. This structure may be augmented in the future.
751 * Any facility MAY, but is not required to, add its operations
752 * keywords or a subset thereof into asm/directiv.dat, in which case
753 * the "opcode" field will be set to the corresponding D_ constant
754 * from directiv.h; otherwise it will be D_unknown.
756 struct pragma {
757 const struct pragma_facility *facility;
758 const char *facility_name; /* Facility name exactly as entered by user */
759 const char *opname; /* First word after the facility name */
760 const char *tail; /* Anything after the operation */
761 enum directive opcode; /* Operation as a D_ directives constant */
765 * These are semi-arbitrary limits to keep the assembler from going
766 * into a black hole on certain kinds of bugs. They can be overridden
767 * by command-line options or %pragma.
769 enum nasm_limit {
770 LIMIT_PASSES,
771 LIMIT_STALLED,
772 LIMIT_MACRO_LEVELS,
773 LIMIT_MACRO_TOKENS,
774 LIMIT_MMACROS,
775 LIMIT_REP,
776 LIMIT_EVAL,
777 LIMIT_LINES
779 #define LIMIT_MAX LIMIT_LINES
780 extern int64_t nasm_limit[LIMIT_MAX+1];
781 extern enum directive_result nasm_set_limit(const char *, const char *);
784 * The data structure defining an output format driver, and the
785 * interfaces to the functions therein.
787 struct ofmt {
789 * This is a short (one-liner) description of the type of
790 * output generated by the driver.
792 const char *fullname;
795 * This is a single keyword used to select the driver.
797 const char *shortname;
800 * Default output filename extension, or a null string
802 const char *extension;
805 * Output format flags.
807 #define OFMT_TEXT 1 /* Text file format */
808 #define OFMT_KEEP_ADDR 2 /* Keep addr; no conversion to data */
810 unsigned int flags;
812 int maxbits; /* Maximum segment bits supported */
815 * this is a pointer to the first element of the debug information
817 const struct dfmt * const *debug_formats;
820 * the default debugging format if -F is not specified
822 const struct dfmt *default_dfmt;
825 * This, if non-NULL, is a NULL-terminated list of `char *'s
826 * pointing to extra standard macros supplied by the object
827 * format (e.g. a sensible initial default value of __?SECT?__,
828 * and user-level equivalents for any format-specific
829 * directives).
831 macros_t *stdmac;
834 * This procedure is called at the start of an output session to set
835 * up internal parameters.
837 void (*init)(void);
840 * This procedure is called at the start of each pass.
842 void (*reset)(void);
845 * This is the modern output function, which gets passed
846 * a struct out_data with much more information. See the
847 * definition of struct out_data.
849 void (*output)(const struct out_data *data);
852 * This procedure is called by assemble() to write actual
853 * generated code or data to the object file. Typically it
854 * doesn't have to actually _write_ it, just store it for
855 * later.
857 * The `type' argument specifies the type of output data, and
858 * usually the size as well: its contents are described below.
860 * This is used for backends which have not yet been ported to
861 * the new interface, and should be NULL on ported backends.
862 * To use this entry point, set the output pointer to
863 * nasm_do_legacy_output.
865 void (*legacy_output)(int32_t segto, const void *data,
866 enum out_type type, uint64_t size,
867 int32_t segment, int32_t wrt);
870 * This procedure is called once for every symbol defined in
871 * the module being assembled. It gives the name and value of
872 * the symbol, in NASM's terms, and indicates whether it has
873 * been declared to be global. Note that the parameter "name",
874 * when passed, will point to a piece of static storage
875 * allocated inside the label manager - it's safe to keep using
876 * that pointer, because the label manager doesn't clean up
877 * until after the output driver has.
879 * Values of `is_global' are: 0 means the symbol is local; 1
880 * means the symbol is global; 2 means the symbol is common (in
881 * which case `offset' holds the _size_ of the variable).
882 * Anything else is available for the output driver to use
883 * internally.
885 * This routine explicitly _is_ allowed to call the label
886 * manager to define further symbols, if it wants to, even
887 * though it's been called _from_ the label manager. That much
888 * re-entrancy is guaranteed in the label manager. However, the
889 * label manager will in turn call this routine, so it should
890 * be prepared to be re-entrant itself.
892 * The `special' parameter contains special information passed
893 * through from the command that defined the label: it may have
894 * been an EXTERN, a COMMON or a GLOBAL. The distinction should
895 * be obvious to the output format from the other parameters.
897 void (*symdef)(char *name, int32_t segment, int64_t offset,
898 int is_global, char *special);
901 * This procedure is called when the source code requests a
902 * segment change. It should return the corresponding segment
903 * _number_ for the name, or NO_SEG if the name is not a valid
904 * segment name.
906 * It may also be called with NULL, in which case it is to
907 * return the _default_ section number for starting assembly in.
909 * It is allowed to modify the string it is given a pointer to.
911 * It is also allowed to specify a default instruction size for
912 * the segment, by setting `*bits' to 16 or 32. Or, if it
913 * doesn't wish to define a default, it can leave `bits' alone.
915 int32_t (*section)(char *name, int *bits);
918 * This function is called when a label is defined
919 * in the source code. It is allowed to change the section
920 * number as a result, but not the bits value.
921 * This is *only* called if the symbol defined is at the
922 * current offset, i.e. "foo:" or "foo equ $".
923 * The offset isn't passed; and may not be stable at this point.
924 * The subsection number is a field available for use by the
925 * backend. It is initialized to NO_SEG.
927 * If "copyoffset" is set by the backend then the offset is
928 * copied from the previous segment, otherwise the new segment
929 * is treated as a new segment the normal way.
931 int32_t (*herelabel)(const char *name, enum label_type type,
932 int32_t seg, int32_t *subsection,
933 bool *copyoffset);
936 * This procedure is called to modify section alignment,
937 * note there is a trick, the alignment can only increase
939 void (*sectalign)(int32_t seg, unsigned int value);
942 * This procedure is called to modify the segment base values
943 * returned from the SEG operator. It is given a segment base
944 * value (i.e. a segment value with the low bit set), and is
945 * required to produce in return a segment value which may be
946 * different. It can map segment bases to absolute numbers by
947 * means of returning SEG_ABS types.
949 * It should return NO_SEG if the segment base cannot be
950 * determined; the evaluator (which calls this routine) is
951 * responsible for throwing an error condition if that occurs
952 * in pass two or in a critical expression.
954 int32_t (*segbase)(int32_t segment);
957 * This procedure is called to allow the output driver to
958 * process its own specific directives. When called, it has the
959 * directive word in `directive' and the parameter string in
960 * `value'.
962 * The following values are (currently) possible for
963 * directive_result:
965 * 0 - DIRR_UNKNOWN - directive not recognized by backend
966 * 1 - DIRR_OK - directive processed ok
967 * 2 - DIRR_ERROR - backend printed its own error message
968 * 3 - DIRR_BADPARAM - print the generic message
969 * "invalid parameter to [*] directive"
971 enum directive_result
972 (*directive)(enum directive directive, char *value);
975 * This procedure is called after assembly finishes, to allow
976 * the output driver to clean itself up and free its memory.
977 * Typically, it will also be the point at which the object
978 * file actually gets _written_.
980 * One thing the cleanup routine should always do is to close
981 * the output file pointer.
983 void (*cleanup)(void);
986 * List of pragma facility names that apply to this backend.
988 const struct pragma_facility *pragmas;
992 * Output format driver alias
994 struct ofmt_alias {
995 const char *shortname;
996 const struct ofmt *ofmt;
999 extern const struct ofmt *ofmt;
1000 extern FILE *ofile;
1003 * ------------------------------------------------------------
1004 * The data structure defining a debug format driver, and the
1005 * interfaces to the functions therein.
1006 * ------------------------------------------------------------
1009 struct dfmt {
1011 * This is a short (one-liner) description of the type of
1012 * output generated by the driver.
1014 const char *fullname;
1017 * This is a single keyword used to select the driver.
1019 const char *shortname;
1022 * init - called initially to set up local pointer to object format.
1024 void (*init)(void);
1027 * linenum - called any time there is output with a change of
1028 * line number or file.
1030 void (*linenum)(const char *filename, int32_t linenumber, int32_t segto);
1033 * debug_deflabel - called whenever a label is defined. Parameters
1034 * are the same as to 'symdef()' in the output format. This function
1035 * is called after the output format version.
1038 void (*debug_deflabel)(char *name, int32_t segment, int64_t offset,
1039 int is_global, char *special);
1041 * debug_directive - called whenever a DEBUG directive other than 'LINE'
1042 * is encountered. 'directive' contains the first parameter to the
1043 * DEBUG directive, and params contains the rest. For example,
1044 * 'DEBUG VAR _somevar:int' would translate to a call to this
1045 * function with 'directive' equal to "VAR" and 'params' equal to
1046 * "_somevar:int".
1048 void (*debug_directive)(const char *directive, const char *params);
1051 * typevalue - called whenever the assembler wishes to register a type
1052 * for the last defined label. This routine MUST detect if a type was
1053 * already registered and not re-register it.
1055 void (*debug_typevalue)(int32_t type);
1058 * debug_output - called whenever output is required
1059 * 'type' is the type of info required, and this is format-specific
1061 void (*debug_output)(int type, void *param);
1064 * cleanup - called after processing of file is complete
1066 void (*cleanup)(void);
1069 * List of pragma facility names that apply to this backend.
1071 const struct pragma_facility *pragmas;
1074 extern const struct dfmt *dfmt;
1077 * The type definition macros
1078 * for debugging
1080 * low 3 bits: reserved
1081 * next 5 bits: type
1082 * next 24 bits: number of elements for arrays (0 for labels)
1085 #define TY_UNKNOWN 0x00
1086 #define TY_LABEL 0x08
1087 #define TY_BYTE 0x10
1088 #define TY_WORD 0x18
1089 #define TY_DWORD 0x20
1090 #define TY_FLOAT 0x28
1091 #define TY_QWORD 0x30
1092 #define TY_TBYTE 0x38
1093 #define TY_OWORD 0x40
1094 #define TY_YWORD 0x48
1095 #define TY_ZWORD 0x50
1096 #define TY_COMMON 0xE0
1097 #define TY_SEG 0xE8
1098 #define TY_EXTERN 0xF0
1099 #define TY_EQU 0xF8
1101 #define TYM_TYPE(x) ((x) & 0xF8)
1102 #define TYM_ELEMENTS(x) (((x) & 0xFFFFFF00) >> 8)
1104 #define TYS_ELEMENTS(x) ((x) << 8)
1106 /* Sizes corresponding to various tokens */
1107 enum byte_sizes {
1108 SIZE_BYTE = 1,
1109 SIZE_WORD = 2,
1110 SIZE_DWORD = 4,
1111 SIZE_QWORD = 8,
1112 SIZE_TWORD = 10,
1113 SIZE_OWORD = 16,
1114 SIZE_YWORD = 32,
1115 SIZE_ZWORD = 64
1118 enum special_tokens {
1119 SIZE_ENUM_START = PREFIX_ENUM_LIMIT,
1120 S_BYTE = SIZE_ENUM_START,
1121 S_WORD,
1122 S_DWORD,
1123 S_QWORD,
1124 S_TWORD,
1125 S_OWORD,
1126 S_YWORD,
1127 S_ZWORD,
1128 SIZE_ENUM_LIMIT,
1130 SPECIAL_ENUM_START = SIZE_ENUM_LIMIT,
1131 S_ABS = SPECIAL_ENUM_START,
1132 S_FAR,
1133 S_LONG,
1134 S_NEAR,
1135 S_NOSPLIT,
1136 S_REL,
1137 S_SHORT,
1138 S_STRICT,
1139 S_TO,
1140 SPECIAL_ENUM_LIMIT
1143 enum decorator_tokens {
1144 DECORATOR_ENUM_START = SPECIAL_ENUM_LIMIT,
1145 BRC_1TO2 = DECORATOR_ENUM_START,
1146 BRC_1TO4,
1147 BRC_1TO8,
1148 BRC_1TO16,
1149 BRC_RN,
1150 BRC_RD,
1151 BRC_RU,
1152 BRC_RZ,
1153 BRC_SAE,
1154 BRC_Z,
1155 DECORATOR_ENUM_LIMIT
1159 * AVX512 Decorator (decoflags_t) bits distribution (counted from 0)
1160 * 3 2 1
1161 * 10987654321098765432109876543210
1163 * | word boundary
1164 * ............................1111 opmask
1165 * ...........................1.... zeroing / merging
1166 * ..........................1..... broadcast
1167 * .........................1...... static rounding
1168 * ........................1....... SAE
1169 * ......................11........ broadcast element size
1170 * ....................11.......... number of broadcast elements
1172 #define OP_GENVAL(val, bits, shift) (((val) & ((UINT64_C(1) << (bits)) - 1)) << (shift))
1175 * Opmask register number
1176 * identical to EVEX.aaa
1178 * Bits: 0 - 3
1180 #define OPMASK_SHIFT (0)
1181 #define OPMASK_BITS (4)
1182 #define OPMASK_MASK OP_GENMASK(OPMASK_BITS, OPMASK_SHIFT)
1183 #define GEN_OPMASK(bit) OP_GENBIT(bit, OPMASK_SHIFT)
1184 #define VAL_OPMASK(val) OP_GENVAL(val, OPMASK_BITS, OPMASK_SHIFT)
1187 * zeroing / merging control available
1188 * matching to EVEX.z
1190 * Bits: 4
1192 #define Z_SHIFT (4)
1193 #define Z_BITS (1)
1194 #define Z_MASK OP_GENMASK(Z_BITS, Z_SHIFT)
1195 #define GEN_Z(bit) OP_GENBIT(bit, Z_SHIFT)
1198 * broadcast - Whether this operand can be broadcasted
1200 * Bits: 5
1202 #define BRDCAST_SHIFT (5)
1203 #define BRDCAST_BITS (1)
1204 #define BRDCAST_MASK OP_GENMASK(BRDCAST_BITS, BRDCAST_SHIFT)
1205 #define GEN_BRDCAST(bit) OP_GENBIT(bit, BRDCAST_SHIFT)
1208 * Whether this instruction can have a static rounding mode.
1209 * It goes with the last simd operand because the static rounding mode
1210 * decorator is located between the last simd operand and imm8 (if any).
1212 * Bits: 6
1214 #define STATICRND_SHIFT (6)
1215 #define STATICRND_BITS (1)
1216 #define STATICRND_MASK OP_GENMASK(STATICRND_BITS, STATICRND_SHIFT)
1217 #define GEN_STATICRND(bit) OP_GENBIT(bit, STATICRND_SHIFT)
1220 * SAE(Suppress all exception) available
1222 * Bits: 7
1224 #define SAE_SHIFT (7)
1225 #define SAE_BITS (1)
1226 #define SAE_MASK OP_GENMASK(SAE_BITS, SAE_SHIFT)
1227 #define GEN_SAE(bit) OP_GENBIT(bit, SAE_SHIFT)
1230 * Broadcasting element size.
1232 * Bits: 8 - 9
1234 #define BRSIZE_SHIFT (8)
1235 #define BRSIZE_BITS (2)
1236 #define BRSIZE_MASK OP_GENMASK(BRSIZE_BITS, BRSIZE_SHIFT)
1237 #define GEN_BRSIZE(bit) OP_GENBIT(bit, BRSIZE_SHIFT)
1239 #define BR_BITS32 GEN_BRSIZE(0)
1240 #define BR_BITS64 GEN_BRSIZE(1)
1243 * Number of broadcasting elements
1245 * Bits: 10 - 11
1247 #define BRNUM_SHIFT (10)
1248 #define BRNUM_BITS (2)
1249 #define BRNUM_MASK OP_GENMASK(BRNUM_BITS, BRNUM_SHIFT)
1250 #define VAL_BRNUM(val) OP_GENVAL(val, BRNUM_BITS, BRNUM_SHIFT)
1252 #define BR_1TO2 VAL_BRNUM(0)
1253 #define BR_1TO4 VAL_BRNUM(1)
1254 #define BR_1TO8 VAL_BRNUM(2)
1255 #define BR_1TO16 VAL_BRNUM(3)
1257 #define MASK OPMASK_MASK /* Opmask (k1 ~ 7) can be used */
1258 #define Z Z_MASK
1259 #define B32 (BRDCAST_MASK|BR_BITS32) /* {1to16} : broadcast 32b * 16 to zmm(512b) */
1260 #define B64 (BRDCAST_MASK|BR_BITS64) /* {1to8} : broadcast 64b * 8 to zmm(512b) */
1261 #define ER STATICRND_MASK /* ER(Embedded Rounding) == Static rounding mode */
1262 #define SAE SAE_MASK /* SAE(Suppress All Exception) */
1265 * Global modes
1269 * flag to disable optimizations selectively
1270 * this is useful to turn-off certain optimizations
1272 enum optimization_disable_flag {
1273 OPTIM_ALL_ENABLED = 0,
1274 OPTIM_DISABLE_JMP_MATCH = 1
1277 struct optimization {
1278 int level;
1279 int flag;
1283 * Various types of compiler passes we may execute.
1285 enum pass_type {
1286 PASS_INIT, /* Initialization, not doing anything yet */
1287 PASS_FIRST, /* The very first pass over the code */
1288 PASS_OPT, /* Optimization pass */
1289 PASS_STAB, /* Stabilization pass (original pass 1) */
1290 PASS_FINAL /* Code generation pass (original pass 2) */
1292 extern const char * const _pass_types[];
1293 extern enum pass_type _pass_type;
1294 static inline enum pass_type pass_type(void)
1296 return _pass_type;
1298 static inline const char *pass_type_name(void)
1300 return _pass_types[_pass_type];
1302 /* True during initialization, no code read yet */
1303 static inline bool not_started(void)
1305 return pass_type() == PASS_INIT;
1307 /* True for the initial pass and setup (old "pass2 < 2") */
1308 static inline bool pass_first(void)
1310 return pass_type() <= PASS_FIRST;
1312 /* At this point we better have stable definitions */
1313 static inline bool pass_stable(void)
1315 return pass_type() >= PASS_STAB;
1317 /* True for the code generation pass only, (old "pass1 >= 2") */
1318 static inline bool pass_final(void)
1320 return pass_type() >= PASS_FINAL;
1324 * The actual pass number. 0 is used during initialization, the very
1325 * first pass is 1, and then it is simply increasing numbers until we are
1326 * done.
1328 extern int64_t _passn; /* Actual pass number */
1329 static inline int64_t pass_count(void)
1331 return _passn;
1334 extern struct optimization optimizing;
1335 extern int globalbits; /* 16, 32 or 64-bit mode */
1336 extern int globalrel; /* default to relative addressing? */
1337 extern int globalbnd; /* default to using bnd prefix? */
1339 extern const char *inname; /* primary input filename */
1340 extern const char *outname; /* output filename */
1343 * Switch to a different segment and return the current offset
1345 int64_t switch_segment(int32_t segment);
1347 #endif /* NASM_NASM_H */