gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2024 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "scfi.h"
  34 #include "gen-sframe.h"
  35 #include "sframe.h"
  36 #include "elf/x86-64.h"
  37 #include "opcodes/i386-init.h"
  38 #include "opcodes/i386-mnem.h"
  39 #include <limits.h>
  40
  41 #ifndef INFER_ADDR_PREFIX
  42 #define INFER_ADDR_PREFIX 1
  43 #endif
  44
  45 #ifndef DEFAULT_ARCH
  46 #define DEFAULT_ARCH "i386"
  47 #endif
  48
  49 #ifndef INLINE
  50 #if __GNUC__ >= 2
  51 #define INLINE __inline__
  52 #else
  53 #define INLINE
  54 #endif
  55 #endif
  56
  57 /* Prefixes will be emitted in the order defined below.
  58    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  59    instruction, and so must come before any prefixes.
  60    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  61    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  62 #define WAIT_PREFIX     0
  63 #define SEG_PREFIX      1
  64 #define ADDR_PREFIX     2
  65 #define DATA_PREFIX     3
  66 #define REP_PREFIX      4
  67 #define HLE_PREFIX      REP_PREFIX
  68 #define BND_PREFIX      REP_PREFIX
  69 #define LOCK_PREFIX     5
  70 #define REX_PREFIX      6       /* must come last.  */
  71 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  72
  73 /* we define the syntax here (modulo base,index,scale syntax) */
  74 #define REGISTER_PREFIX '%'
  75 #define IMMEDIATE_PREFIX '$'
  76 #define ABSOLUTE_PREFIX '*'
  77
  78 /* these are the instruction mnemonic suffixes in AT&T syntax or
  79    memory operand size in Intel syntax.  */
  80 #define WORD_MNEM_SUFFIX  'w'
  81 #define BYTE_MNEM_SUFFIX  'b'
  82 #define SHORT_MNEM_SUFFIX 's'
  83 #define LONG_MNEM_SUFFIX  'l'
  84 #define QWORD_MNEM_SUFFIX  'q'
  85
  86 #define END_OF_INSN '\0'
  87
  88 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  89
  90 /* This matches the C -> StaticRounding alias in the opcode table.  */
  91 #define commutative staticrounding
  92
  93 /*
  94   'templates' is for grouping together 'template' structures for opcodes
  95   of the same name.  This is only used for storing the insns in the grand
  96   ole hash table of insns.
  97   The templates themselves start at START and range up to (but not including)
  98   END.
  99   */
 100 typedef struct
 101 {
 102   const insn_template *start;
 103   const insn_template *end;
 104 }
 105 templates;
 106
 107 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 108 typedef struct
 109 {
 110   unsigned int regmem;  /* codes register or memory operand */
 111   unsigned int reg;     /* codes register operand (or extended opcode) */
 112   unsigned int mode;    /* how to interpret regmem & reg */
 113 }
 114 modrm_byte;
 115
 116 /* x86-64 extension prefix.  */
 117 typedef int rex_byte;
 118
 119 /* 386 opcode byte to code indirect addressing.  */
 120 typedef struct
 121 {
 122   unsigned base;
 123   unsigned index;
 124   unsigned scale;
 125 }
 126 sib_byte;
 127
 128 /* x86 arch names, types and features */
 129 typedef struct
 130 {
 131   const char *name;             /* arch name */
 132   unsigned int len:8;           /* arch string length */
 133   bool skip:1;                  /* show_arch should skip this. */
 134   enum processor_type type;     /* arch type */
 135   enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
 136   i386_cpu_flags enable;                /* cpu feature enable flags */
 137   i386_cpu_flags disable;       /* cpu feature disable flags */
 138 }
 139 arch_entry;
 140
 141 /* Modes for parse_insn() to operate in.  */
 142 enum parse_mode {
 143   parse_all,
 144   parse_prefix,
 145   parse_pseudo_prefix,
 146 };
 147
 148 static void update_code_flag (int, int);
 149 static void s_insn (int);
 150 static void s_noopt (int);
 151 static void set_code_flag (int);
 152 static void set_16bit_gcc_code_flag (int);
 153 static void set_intel_syntax (int);
 154 static void set_intel_mnemonic (int);
 155 static void set_allow_index_reg (int);
 156 static void set_check (int);
 157 static void set_cpu_arch (int);
 158 #ifdef TE_PE
 159 static void pe_directive_secrel (int);
 160 static void pe_directive_secidx (int);
 161 #endif
 162 static void signed_cons (int);
 163 static char *output_invalid (int c);
 164 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 165                                     const char *);
 166 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 167                                        const char *);
 168 static int i386_att_operand (char *);
 169 static int i386_intel_operand (char *, int);
 170 static int i386_intel_simplify (expressionS *);
 171 static int i386_intel_parse_name (const char *, expressionS *);
 172 static const reg_entry *parse_register (const char *, char **);
 173 static const char *parse_insn (const char *, char *, enum parse_mode);
 174 static char *parse_operands (char *, const char *);
 175 static void swap_operands (void);
 176 static void swap_2_operands (unsigned int, unsigned int);
 177 static enum i386_flag_code i386_addressing_mode (void);
 178 static void optimize_imm (void);
 179 static bool optimize_disp (const insn_template *t);
 180 static const insn_template *match_template (char);
 181 static int check_string (void);
 182 static int process_suffix (const insn_template *);
 183 static int check_byte_reg (void);
 184 static int check_long_reg (void);
 185 static int check_qword_reg (void);
 186 static int check_word_reg (void);
 187 static int finalize_imm (void);
 188 static int process_operands (void);
 189 static const reg_entry *build_modrm_byte (void);
 190 static void output_insn (const struct last_insn *);
 191 static void output_imm (fragS *, offsetT);
 192 static void output_disp (fragS *, offsetT);
 193 #ifdef OBJ_AOUT
 194 static void s_bss (int);
 195 #endif
 196 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 197 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 198
 199 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 200 static unsigned int x86_isa_1_used;
 201 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 202 static unsigned int x86_feature_2_used;
 203 /* Generate x86 used ISA and feature properties.  */
 204 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 205 #endif
 206
 207 static const char *default_arch = DEFAULT_ARCH;
 208
 209 /* parse_register() returns this when a register alias cannot be used.  */
 210 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 211                                    { Dw2Inval, Dw2Inval } };
 212
 213 static const reg_entry *reg_eax;
 214 static const reg_entry *reg_ds;
 215 static const reg_entry *reg_es;
 216 static const reg_entry *reg_ss;
 217 static const reg_entry *reg_st0;
 218 static const reg_entry *reg_k0;
 219
 220 /* VEX prefix.  */
 221 typedef struct
 222 {
 223   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 224   unsigned char bytes[4];
 225   unsigned int length;
 226   /* Destination or source register specifier.  */
 227   const reg_entry *register_specifier;
 228 } vex_prefix;
 229
 230 /* 'md_assemble ()' gathers together information and puts it into a
 231    i386_insn.  */
 232
 233 union i386_op
 234   {
 235     expressionS *disps;
 236     expressionS *imms;
 237     const reg_entry *regs;
 238   };
 239
 240 enum i386_error
 241   {
 242     no_error, /* Must be first.  */
 243     operand_size_mismatch,
 244     operand_type_mismatch,
 245     register_type_mismatch,
 246     number_of_operands_mismatch,
 247     invalid_instruction_suffix,
 248     bad_imm4,
 249     unsupported_with_intel_mnemonic,
 250     unsupported_syntax,
 251     unsupported_EGPR_for_addressing,
 252     unsupported_nf,
 253     unsupported,
 254     unsupported_on_arch,
 255     unsupported_64bit,
 256     no_vex_encoding,
 257     no_evex_encoding,
 258     invalid_sib_address,
 259     invalid_vsib_address,
 260     invalid_vector_register_set,
 261     invalid_tmm_register_set,
 262     invalid_dest_and_src_register_set,
 263     invalid_dest_register_set,
 264     invalid_pseudo_prefix,
 265     unsupported_vector_index_register,
 266     unsupported_broadcast,
 267     broadcast_needed,
 268     unsupported_masking,
 269     mask_not_on_destination,
 270     no_default_mask,
 271     unsupported_rc_sae,
 272     unsupported_vector_size,
 273     unsupported_rsp_register,
 274     internal_error,
 275   };
 276
 277 struct _i386_insn
 278   {
 279     /* TM holds the template for the insn were currently assembling.  */
 280     insn_template tm;
 281
 282     /* SUFFIX holds the instruction size suffix for byte, word, dword
 283        or qword, if given.  */
 284     char suffix;
 285
 286     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 287     unsigned char opcode_length;
 288
 289     /* OPERANDS gives the number of given operands.  */
 290     unsigned int operands;
 291
 292     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 293        of given register, displacement, memory operands and immediate
 294        operands.  */
 295     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 296
 297     /* TYPES [i] is the type (see above #defines) which tells us how to
 298        use OP[i] for the corresponding operand.  */
 299     i386_operand_type types[MAX_OPERANDS];
 300
 301     /* Displacement expression, immediate expression, or register for each
 302        operand.  */
 303     union i386_op op[MAX_OPERANDS];
 304
 305     /* Flags for operands.  */
 306     unsigned int flags[MAX_OPERANDS];
 307 #define Operand_PCrel 1
 308 #define Operand_Mem   2
 309 #define Operand_Signed 4 /* .insn only */
 310
 311     /* Relocation type for operand */
 312     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 313
 314     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 315        the base index byte below.  */
 316     const reg_entry *base_reg;
 317     const reg_entry *index_reg;
 318     unsigned int log2_scale_factor;
 319
 320     /* SEG gives the seg_entries of this insn.  They are zero unless
 321        explicit segment overrides are given.  */
 322     const reg_entry *seg[2];
 323
 324     /* PREFIX holds all the given prefix opcodes (usually null).
 325        PREFIXES is the number of prefix opcodes.  */
 326     unsigned int prefixes;
 327     unsigned char prefix[MAX_PREFIXES];
 328
 329     /* .insn allows for reserved opcode spaces.  */
 330     unsigned char insn_opcode_space;
 331
 332     /* .insn also allows (requires) specifying immediate size.  */
 333     unsigned char imm_bits[MAX_OPERANDS];
 334
 335     /* Register is in low 3 bits of opcode.  */
 336     bool short_form;
 337
 338     /* The operand to a branch insn indicates an absolute branch.  */
 339     bool jumpabsolute;
 340
 341     /* The operand to a branch insn indicates a far branch.  */
 342     bool far_branch;
 343
 344     /* There is a memory operand of (%dx) which should be only used
 345        with input/output instructions.  */
 346     bool input_output_operand;
 347
 348     /* Extended states.  */
 349     enum
 350       {
 351         /* Use MMX state.  */
 352         xstate_mmx = 1 << 0,
 353         /* Use XMM state.  */
 354         xstate_xmm = 1 << 1,
 355         /* Use YMM state.  */
 356         xstate_ymm = 1 << 2 | xstate_xmm,
 357         /* Use ZMM state.  */
 358         xstate_zmm = 1 << 3 | xstate_ymm,
 359         /* Use TMM state.  */
 360         xstate_tmm = 1 << 4,
 361         /* Use MASK state.  */
 362         xstate_mask = 1 << 5
 363       } xstate;
 364
 365     /* Has GOTPC or TLS relocation.  */
 366     bool has_gotpc_tls_reloc;
 367
 368     /* RM and SIB are the modrm byte and the sib byte where the
 369        addressing modes of this insn are encoded.  */
 370     modrm_byte rm;
 371     rex_byte rex;
 372     rex_byte vrex;
 373     rex_byte rex2;
 374     sib_byte sib;
 375     vex_prefix vex;
 376
 377     /* Masking attributes.
 378
 379        The struct describes masking, applied to OPERAND in the instruction.
 380        REG is a pointer to the corresponding mask register.  ZEROING tells
 381        whether merging or zeroing mask is used.  */
 382     struct Mask_Operation
 383     {
 384       const reg_entry *reg;
 385       unsigned int zeroing;
 386       /* The operand where this operation is associated.  */
 387       unsigned int operand;
 388     } mask;
 389
 390     /* Rounding control and SAE attributes.  */
 391     struct RC_Operation
 392     {
 393       enum rc_type
 394         {
 395           rc_none = -1,
 396           rne,
 397           rd,
 398           ru,
 399           rz,
 400           saeonly
 401         } type;
 402       /* In Intel syntax the operand modifier form is supposed to be used, but
 403          we continue to accept the immediate forms as well.  */
 404       bool modifier;
 405     } rounding;
 406
 407     /* Broadcasting attributes.
 408
 409        The struct describes broadcasting, applied to OPERAND.  TYPE is
 410        expresses the broadcast factor.  */
 411     struct Broadcast_Operation
 412     {
 413       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 414       unsigned int type;
 415
 416       /* Index of broadcasted operand.  */
 417       unsigned int operand;
 418
 419       /* Number of bytes to broadcast.  */
 420       unsigned int bytes;
 421     } broadcast;
 422
 423     /* Compressed disp8*N attribute.  */
 424     unsigned int memshift;
 425
 426     /* SCC = EVEX.[SC3,SC2,SC1,SC0].  */
 427     unsigned int scc;
 428
 429     /* Store 4 bits of EVEX.[OF,SF,ZF,CF].  */
 430 #define OSZC_CF 1
 431 #define OSZC_ZF 2
 432 #define OSZC_SF 4
 433 #define OSZC_OF 8
 434     unsigned int oszc_flags;
 435
 436     /* Invert the condition encoded in a base opcode.  */
 437     bool invert_cond;
 438
 439     /* REP prefix.  */
 440     const char *rep_prefix;
 441
 442     /* HLE prefix.  */
 443     const char *hle_prefix;
 444
 445     /* Have BND prefix.  */
 446     const char *bnd_prefix;
 447
 448     /* Have NOTRACK prefix.  */
 449     const char *notrack_prefix;
 450
 451     /* Error message.  */
 452     enum i386_error error;
 453   };
 454
 455 typedef struct _i386_insn i386_insn;
 456
 457 /* Pseudo-prefix recording state, separate from i386_insn.  */
 458 static struct pseudo_prefixes {
 459   /* How to encode instructions.  */
 460   enum {
 461     encoding_default = 0,
 462     encoding_vex,
 463     encoding_vex3,
 464     encoding_egpr, /* REX2 or EVEX.  */
 465     encoding_evex,
 466     encoding_evex512,
 467     encoding_error
 468   } encoding;
 469
 470   /* Prefer load or store in encoding.  */
 471   enum {
 472     dir_encoding_default = 0,
 473     dir_encoding_load,
 474     dir_encoding_store,
 475     dir_encoding_swap
 476   } dir_encoding;
 477
 478   /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 479   enum {
 480     disp_encoding_default = 0,
 481     disp_encoding_8bit,
 482     disp_encoding_16bit,
 483     disp_encoding_32bit
 484   } disp_encoding;
 485
 486   /* Prefer the REX byte in encoding.  */
 487   bool rex_encoding;
 488
 489   /* Prefer the REX2 prefix in encoding.  */
 490   bool rex2_encoding;
 491
 492   /* No CSPAZO flags update.  */
 493   bool has_nf;
 494
 495   /* Disable instruction size optimization.  */
 496   bool no_optimize;
 497 } pp;
 498
 499 /* Link RC type with corresponding string, that'll be looked for in
 500    asm.  */
 501 struct RC_name
 502 {
 503   enum rc_type type;
 504   const char *name;
 505   unsigned int len;
 506 };
 507
 508 static const struct RC_name RC_NamesTable[] =
 509 {
 510   {  rne, STRING_COMMA_LEN ("rn-sae") },
 511   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 512   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 513   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 514   {  saeonly,  STRING_COMMA_LEN ("sae") },
 515 };
 516
 517 /* To be indexed by segment register number.  */
 518 static const unsigned char i386_seg_prefixes[] = {
 519   ES_PREFIX_OPCODE,
 520   CS_PREFIX_OPCODE,
 521   SS_PREFIX_OPCODE,
 522   DS_PREFIX_OPCODE,
 523   FS_PREFIX_OPCODE,
 524   GS_PREFIX_OPCODE
 525 };
 526
 527 /* List of chars besides those in app.c:symbol_chars that can start an
 528    operand.  Used to prevent the scrubber eating vital white-space.  */
 529 const char extra_symbol_chars[] = "*%-(["
 530 #ifdef LEX_AT
 531         "@"
 532 #endif
 533 #ifdef LEX_QM
 534         "?"
 535 #endif
 536         ;
 537
 538 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 539      && !defined (TE_GNU)                               \
 540      && !defined (TE_LINUX)                             \
 541      && !defined (TE_Haiku)                             \
 542      && !defined (TE_FreeBSD)                           \
 543      && !defined (TE_DragonFly)                         \
 544      && !defined (TE_NetBSD))
 545 /* This array holds the chars that always start a comment.  If the
 546    pre-processor is disabled, these aren't very useful.  The option
 547    --divide will remove '/' from this list.  */
 548 const char *i386_comment_chars = "#/";
 549 #define SVR4_COMMENT_CHARS 1
 550 #define PREFIX_SEPARATOR '\\'
 551
 552 #else
 553 const char *i386_comment_chars = "#";
 554 #define PREFIX_SEPARATOR '/'
 555 #endif
 556
 557 /* This array holds the chars that only start a comment at the beginning of
 558    a line.  If the line seems to have the form '# 123 filename'
 559    .line and .file directives will appear in the pre-processed output.
 560    Note that input_file.c hand checks for '#' at the beginning of the
 561    first line of the input file.  This is because the compiler outputs
 562    #NO_APP at the beginning of its output.
 563    Also note that comments started like this one will always work if
 564    '/' isn't otherwise defined.  */
 565 const char line_comment_chars[] = "#/";
 566
 567 const char line_separator_chars[] = ";";
 568
 569 /* Chars that can be used to separate mant from exp in floating point
 570    nums.  */
 571 const char EXP_CHARS[] = "eE";
 572
 573 /* Chars that mean this number is a floating point constant
 574    As in 0f12.456
 575    or    0d1.2345e12.  */
 576 const char FLT_CHARS[] = "fFdDxXhHbB";
 577
 578 /* Tables for lexical analysis.  */
 579 static char mnemonic_chars[256];
 580 static char register_chars[256];
 581 static char operand_chars[256];
 582
 583 /* Lexical macros.  */
 584 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 585 #define is_register_char(x) (register_chars[(unsigned char) x])
 586 #define is_space_char(x) ((x) == ' ')
 587
 588 /* All non-digit non-letter characters that may occur in an operand and
 589    which aren't already in extra_symbol_chars[].  */
 590 static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]{}";
 591
 592 /* md_assemble() always leaves the strings it's passed unaltered.  To
 593    effect this we maintain a stack of saved characters that we've smashed
 594    with '\0's (indicating end of strings for various sub-fields of the
 595    assembler instruction).  */
 596 static char save_stack[32];
 597 static char *save_stack_p;
 598 #define END_STRING_AND_SAVE(s) \
 599         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 600 #define RESTORE_END_STRING(s) \
 601         do { *(s) = *--save_stack_p; } while (0)
 602
 603 /* The instruction we're assembling.  */
 604 static i386_insn i;
 605
 606 /* Possible templates for current insn.  */
 607 static templates current_templates;
 608
 609 /* Per instruction expressionS buffers: max displacements & immediates.  */
 610 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 611 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 612
 613 /* Current operand we are working on.  */
 614 static int this_operand = -1;
 615
 616 /* Are we processing a .insn directive?  */
 617 #define dot_insn() (i.tm.mnem_off == MN__insn)
 618
 619 enum i386_flag_code i386_flag_code;
 620 #define flag_code i386_flag_code /* Permit to continue using original name.  */
 621 static unsigned int object_64bit;
 622 static unsigned int disallow_64bit_reloc;
 623 static int use_rela_relocations = 0;
 624 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 625 static const char *tls_get_addr;
 626
 627 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 628
 629 /* The ELF ABI to use.  */
 630 enum x86_elf_abi
 631 {
 632   I386_ABI,
 633   X86_64_ABI,
 634   X86_64_X32_ABI
 635 };
 636
 637 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 638 #endif
 639
 640 #if defined (TE_PE) || defined (TE_PEP)
 641 /* Use big object file format.  */
 642 static int use_big_obj = 0;
 643 #endif
 644
 645 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 646 /* 1 if generating code for a shared library.  */
 647 static int shared = 0;
 648
 649 unsigned int x86_sframe_cfa_sp_reg;
 650 /* The other CFA base register for SFrame stack trace info.  */
 651 unsigned int x86_sframe_cfa_fp_reg;
 652
 653 static ginsnS *x86_ginsn_new (const symbolS *, enum ginsn_gen_mode);
 654 #endif
 655
 656 /* 1 for intel syntax,
 657    0 if att syntax.  */
 658 static int intel_syntax = 0;
 659
 660 static enum x86_64_isa
 661 {
 662   amd64 = 1,    /* AMD64 ISA.  */
 663   intel64       /* Intel64 ISA.  */
 664 } isa64;
 665
 666 /* 1 for intel mnemonic,
 667    0 if att mnemonic.  */
 668 static int intel_mnemonic = !SYSV386_COMPAT;
 669
 670 /* 1 if pseudo registers are permitted.  */
 671 static int allow_pseudo_reg = 0;
 672
 673 /* 1 if register prefix % not required.  */
 674 static int allow_naked_reg = 0;
 675
 676 /* 1 if the assembler should add BND prefix for all control-transferring
 677    instructions supporting it, even if this prefix wasn't specified
 678    explicitly.  */
 679 static int add_bnd_prefix = 0;
 680
 681 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 682 static int allow_index_reg = 0;
 683
 684 /* 1 if the assembler should ignore LOCK prefix, even if it was
 685    specified explicitly.  */
 686 static int omit_lock_prefix = 0;
 687
 688 /* 1 if the assembler should encode lfence, mfence, and sfence as
 689    "lock addl $0, (%{re}sp)".  */
 690 static int avoid_fence = 0;
 691
 692 /* 1 if lfence should be inserted after every load.  */
 693 static int lfence_after_load = 0;
 694
 695 /* Non-zero if lfence should be inserted before indirect branch.  */
 696 static enum lfence_before_indirect_branch_kind
 697   {
 698     lfence_branch_none = 0,
 699     lfence_branch_register,
 700     lfence_branch_memory,
 701     lfence_branch_all
 702   }
 703 lfence_before_indirect_branch;
 704
 705 /* Non-zero if lfence should be inserted before ret.  */
 706 static enum lfence_before_ret_kind
 707   {
 708     lfence_before_ret_none = 0,
 709     lfence_before_ret_not,
 710     lfence_before_ret_or,
 711     lfence_before_ret_shl
 712   }
 713 lfence_before_ret;
 714
 715 /* 1 if the assembler should generate relax relocations.  */
 716
 717 static int generate_relax_relocations
 718   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 719
 720 static enum check_kind
 721   {
 722     check_none = 0,
 723     check_warning,
 724     check_error
 725   }
 726 sse_check, operand_check = check_warning;
 727
 728 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 729 static int align_branch_power = 0;
 730
 731 /* Types of branches to align.  */
 732 enum align_branch_kind
 733   {
 734     align_branch_none = 0,
 735     align_branch_jcc = 1,
 736     align_branch_fused = 2,
 737     align_branch_jmp = 3,
 738     align_branch_call = 4,
 739     align_branch_indirect = 5,
 740     align_branch_ret = 6
 741   };
 742
 743 /* Type bits of branches to align.  */
 744 enum align_branch_bit
 745   {
 746     align_branch_jcc_bit = 1 << align_branch_jcc,
 747     align_branch_fused_bit = 1 << align_branch_fused,
 748     align_branch_jmp_bit = 1 << align_branch_jmp,
 749     align_branch_call_bit = 1 << align_branch_call,
 750     align_branch_indirect_bit = 1 << align_branch_indirect,
 751     align_branch_ret_bit = 1 << align_branch_ret
 752   };
 753
 754 static unsigned int align_branch = (align_branch_jcc_bit
 755                                     | align_branch_fused_bit
 756                                     | align_branch_jmp_bit);
 757
 758 /* Types of condition jump used by macro-fusion.  */
 759 enum mf_jcc_kind
 760   {
 761     mf_jcc_jo = 0,  /* base opcode 0x70  */
 762     mf_jcc_jc,      /* base opcode 0x72  */
 763     mf_jcc_je,      /* base opcode 0x74  */
 764     mf_jcc_jna,     /* base opcode 0x76  */
 765     mf_jcc_js,      /* base opcode 0x78  */
 766     mf_jcc_jp,      /* base opcode 0x7a  */
 767     mf_jcc_jl,      /* base opcode 0x7c  */
 768     mf_jcc_jle,     /* base opcode 0x7e  */
 769   };
 770
 771 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 772 enum mf_cmp_kind
 773   {
 774     mf_cmp_test_and,  /* test/cmp */
 775     mf_cmp_alu_cmp,  /* add/sub/cmp */
 776     mf_cmp_incdec  /* inc/dec */
 777   };
 778
 779 /* The maximum padding size for fused jcc.  CMP like instruction can
 780    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 781    prefixes.   */
 782 #define MAX_FUSED_JCC_PADDING_SIZE 20
 783
 784 /* The maximum number of prefixes added for an instruction.  */
 785 static unsigned int align_branch_prefix_size = 5;
 786
 787 /* Optimization:
 788    1. Clear the REX_W bit with register operand if possible.
 789    2. Above plus use 128bit vector instruction to clear the full vector
 790       register.
 791  */
 792 static int optimize = 0;
 793
 794 /* Optimization:
 795    1. Clear the REX_W bit with register operand if possible.
 796    2. Above plus use 128bit vector instruction to clear the full vector
 797       register.
 798    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 799       "testb $imm7,%r8".
 800  */
 801 static int optimize_for_space = 0;
 802
 803 /* Register prefix used for error message.  */
 804 static const char *register_prefix = "%";
 805
 806 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 807    leave, push, and pop instructions so that gcc has the same stack
 808    frame as in 32 bit mode.  */
 809 static char stackop_size = '\0';
 810
 811 /* Non-zero to optimize code alignment.  */
 812 int optimize_align_code = 1;
 813
 814 /* Non-zero to quieten some warnings.  */
 815 static int quiet_warnings = 0;
 816
 817 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 818 static bool pre_386_16bit_warned;
 819
 820 /* CPU name.  */
 821 static const char *cpu_arch_name = NULL;
 822 static char *cpu_sub_arch_name = NULL;
 823
 824 /* CPU feature flags.  */
 825 i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 826
 827 /* ISA extensions available in 64-bit mode only.  */
 828 static const i386_cpu_flags cpu_64_flags = CPU_ANY_64_FLAGS;
 829
 830 /* If we have selected a cpu we are generating instructions for.  */
 831 static int cpu_arch_tune_set = 0;
 832
 833 /* Cpu we are generating instructions for.  */
 834 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 835
 836 /* CPU instruction set architecture used.  */
 837 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 838
 839 /* CPU feature flags of instruction set architecture used.  */
 840 i386_cpu_flags cpu_arch_isa_flags;
 841
 842 /* If set, conditional jumps are not automatically promoted to handle
 843    larger than a byte offset.  */
 844 static bool no_cond_jump_promotion = false;
 845
 846 /* This will be set from an expression parser hook if there's any
 847    applicable operator involved in an expression.  */
 848 static enum {
 849   expr_operator_none,
 850   expr_operator_present,
 851   expr_large_value,
 852 } expr_mode;
 853
 854 /* Encode SSE instructions with VEX prefix.  */
 855 static unsigned int sse2avx;
 856
 857 /* Encode aligned vector move as unaligned vector move.  */
 858 static unsigned int use_unaligned_vector_move;
 859
 860 /* Maximum permitted vector size. */
 861 #define VSZ128 0
 862 #define VSZ256 1
 863 #define VSZ512 2
 864 #define VSZ_DEFAULT VSZ512
 865 static unsigned int vector_size = VSZ_DEFAULT;
 866
 867 /* Encode scalar AVX instructions with specific vector length.  */
 868 static enum
 869   {
 870     vex128 = 0,
 871     vex256
 872   } avxscalar;
 873
 874 /* Encode VEX WIG instructions with specific vex.w.  */
 875 static enum
 876   {
 877     vexw0 = 0,
 878     vexw1
 879   } vexwig;
 880
 881 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 882 static enum
 883   {
 884     evexl128 = 0,
 885     evexl256,
 886     evexl512
 887   } evexlig;
 888
 889 /* Encode EVEX WIG instructions with specific evex.w.  */
 890 static enum
 891   {
 892     evexw0 = 0,
 893     evexw1
 894   } evexwig;
 895
 896 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 897 static enum rc_type evexrcig = rne;
 898
 899 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 900 static symbolS *GOT_symbol;
 901
 902 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 903 unsigned int x86_dwarf2_return_column;
 904
 905 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 906 int x86_cie_data_alignment;
 907
 908 /* Interface to relax_segment.
 909    There are 3 major relax states for 386 jump insns because the
 910    different types of jumps add different sizes to frags when we're
 911    figuring out what sort of jump to choose to reach a given label.
 912
 913    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 914    branches which are handled by md_estimate_size_before_relax() and
 915    i386_generic_table_relax_frag().  */
 916
 917 /* Types.  */
 918 #define UNCOND_JUMP 0
 919 #define COND_JUMP 1
 920 #define COND_JUMP86 2
 921 #define BRANCH_PADDING 3
 922 #define BRANCH_PREFIX 4
 923 #define FUSED_JCC_PADDING 5
 924
 925 /* Sizes.  */
 926 #define CODE16  1
 927 #define SMALL   0
 928 #define SMALL16 (SMALL | CODE16)
 929 #define BIG     2
 930 #define BIG16   (BIG | CODE16)
 931
 932 #ifndef INLINE
 933 #ifdef __GNUC__
 934 #define INLINE __inline__
 935 #else
 936 #define INLINE
 937 #endif
 938 #endif
 939
 940 #define ENCODE_RELAX_STATE(type, size) \
 941   ((relax_substateT) (((type) << 2) | (size)))
 942 #define TYPE_FROM_RELAX_STATE(s) \
 943   ((s) >> 2)
 944 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 945     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 946
 947 /* This table is used by relax_frag to promote short jumps to long
 948    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 949    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 950    don't allow a short jump in a 32 bit code segment to be promoted to
 951    a 16 bit offset jump because it's slower (requires data size
 952    prefix), and doesn't work, unless the destination is in the bottom
 953    64k of the code segment (The top 16 bits of eip are zeroed).  */
 954
 955 const relax_typeS md_relax_table[] =
 956 {
 957   /* The fields are:
 958      1) most positive reach of this state,
 959      2) most negative reach of this state,
 960      3) how many bytes this mode will have in the variable part of the frag
 961      4) which index into the table to try if we can't fit into this one.  */
 962
 963   /* UNCOND_JUMP states.  */
 964   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 965   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 966   /* dword jmp adds 4 bytes to frag:
 967      0 extra opcode bytes, 4 displacement bytes.  */
 968   {0, 0, 4, 0},
 969   /* word jmp adds 2 byte2 to frag:
 970      0 extra opcode bytes, 2 displacement bytes.  */
 971   {0, 0, 2, 0},
 972
 973   /* COND_JUMP states.  */
 974   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 975   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 976   /* dword conditionals adds 5 bytes to frag:
 977      1 extra opcode byte, 4 displacement bytes.  */
 978   {0, 0, 5, 0},
 979   /* word conditionals add 3 bytes to frag:
 980      1 extra opcode byte, 2 displacement bytes.  */
 981   {0, 0, 3, 0},
 982
 983   /* COND_JUMP86 states.  */
 984   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 985   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 986   /* dword conditionals adds 5 bytes to frag:
 987      1 extra opcode byte, 4 displacement bytes.  */
 988   {0, 0, 5, 0},
 989   /* word conditionals add 4 bytes to frag:
 990      1 displacement byte and a 3 byte long branch insn.  */
 991   {0, 0, 4, 0}
 992 };
 993
 994 #define ARCH(n, t, f, s) \
 995   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
 996     CPU_NONE_FLAGS }
 997 #define SUBARCH(n, e, d, s) \
 998   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
 999     CPU_ ## d ## _FLAGS }
1000 #define VECARCH(n, e, d, v) \
1001   { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
1002     CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
1003
1004 static const arch_entry cpu_arch[] =
1005 {
1006   /* Do not replace the first two entries - i386_target_format() and
1007      set_cpu_arch() rely on them being there in this order.  */
1008   ARCH (generic32, GENERIC32, GENERIC32, false),
1009   ARCH (generic64, GENERIC64, GENERIC64, false),
1010   ARCH (i8086, UNKNOWN, NONE, false),
1011   ARCH (i186, UNKNOWN, 186, false),
1012   ARCH (i286, UNKNOWN, 286, false),
1013   ARCH (i386, I386, 386, false),
1014   ARCH (i486, I486, 486, false),
1015   ARCH (i586, PENTIUM, 586, false),
1016   ARCH (pentium, PENTIUM, 586, false),
1017   ARCH (i686, I686, 686, false),
1018   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
1019   ARCH (pentiumii, PENTIUMPRO, P2, false),
1020   ARCH (pentiumiii, PENTIUMPRO, P3, false),
1021   ARCH (pentium4, PENTIUM4, P4, false),
1022   ARCH (prescott, NOCONA, CORE, false),
1023   ARCH (nocona, NOCONA, NOCONA, false),
1024   ARCH (yonah, CORE, CORE, true),
1025   ARCH (core, CORE, CORE, false),
1026   ARCH (merom, CORE2, CORE2, true),
1027   ARCH (core2, CORE2, CORE2, false),
1028   ARCH (corei7, COREI7, COREI7, false),
1029   ARCH (iamcu, IAMCU, IAMCU, false),
1030   ARCH (k6, K6, K6, false),
1031   ARCH (k6_2, K6, K6_2, false),
1032   ARCH (athlon, ATHLON, ATHLON, false),
1033   ARCH (sledgehammer, K8, K8, true),
1034   ARCH (opteron, K8, K8, false),
1035   ARCH (k8, K8, K8, false),
1036   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
1037   ARCH (bdver1, BD, BDVER1, false),
1038   ARCH (bdver2, BD, BDVER2, false),
1039   ARCH (bdver3, BD, BDVER3, false),
1040   ARCH (bdver4, BD, BDVER4, false),
1041   ARCH (znver1, ZNVER, ZNVER1, false),
1042   ARCH (znver2, ZNVER, ZNVER2, false),
1043   ARCH (znver3, ZNVER, ZNVER3, false),
1044   ARCH (znver4, ZNVER, ZNVER4, false),
1045   ARCH (znver5, ZNVER, ZNVER5, false),
1046   ARCH (btver1, BT, BTVER1, false),
1047   ARCH (btver2, BT, BTVER2, false),
1048
1049   SUBARCH (8087, 8087, ANY_8087, false),
1050   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1051   SUBARCH (287, 287, ANY_287, false),
1052   SUBARCH (387, 387, ANY_387, false),
1053   SUBARCH (687, 687, ANY_687, false),
1054   SUBARCH (cmov, CMOV, CMOV, false),
1055   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1056   SUBARCH (mmx, MMX, ANY_MMX, false),
1057   SUBARCH (sse, SSE, ANY_SSE, false),
1058   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1059   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1060   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1061   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1062   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1063   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1064   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1065   VECARCH (avx, AVX, ANY_AVX, reset),
1066   VECARCH (avx2, AVX2, ANY_AVX2, reset),
1067   VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
1068   VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
1069   VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
1070   VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
1071   VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
1072   VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
1073   VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
1074   SUBARCH (monitor, MONITOR, MONITOR, false),
1075   SUBARCH (vmx, VMX, ANY_VMX, false),
1076   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1077   SUBARCH (smx, SMX, SMX, false),
1078   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1079   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1080   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1081   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1082   SUBARCH (aes, AES, ANY_AES, false),
1083   SUBARCH (pclmul, PCLMULQDQ, ANY_PCLMULQDQ, false),
1084   SUBARCH (clmul, PCLMULQDQ, ANY_PCLMULQDQ, true),
1085   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1086   SUBARCH (rdrnd, RDRND, RDRND, false),
1087   SUBARCH (f16c, F16C, ANY_F16C, false),
1088   SUBARCH (bmi2, BMI2, BMI2, false),
1089   SUBARCH (fma, FMA, ANY_FMA, false),
1090   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1091   SUBARCH (xop, XOP, ANY_XOP, false),
1092   SUBARCH (lwp, LWP, ANY_LWP, false),
1093   SUBARCH (movbe, MOVBE, MOVBE, false),
1094   SUBARCH (cx16, CX16, CX16, false),
1095   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1096   SUBARCH (ept, EPT, ANY_EPT, false),
1097   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1098   SUBARCH (popcnt, POPCNT, POPCNT, false),
1099   SUBARCH (hle, HLE, HLE, false),
1100   SUBARCH (rtm, RTM, ANY_RTM, false),
1101   SUBARCH (tsx, TSX, TSX, false),
1102   SUBARCH (invpcid, INVPCID, INVPCID, false),
1103   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1104   SUBARCH (nop, NOP, NOP, false),
1105   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1106   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1107   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1108   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1109   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1110   SUBARCH (pacifica, SVME, ANY_SVME, true),
1111   SUBARCH (svme, SVME, ANY_SVME, false),
1112   SUBARCH (abm, ABM, ABM, false),
1113   SUBARCH (bmi, BMI, BMI, false),
1114   SUBARCH (tbm, TBM, TBM, false),
1115   SUBARCH (adx, ADX, ADX, false),
1116   SUBARCH (rdseed, RDSEED, RDSEED, false),
1117   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1118   SUBARCH (smap, SMAP, SMAP, false),
1119   SUBARCH (mpx, MPX, ANY_MPX, false),
1120   SUBARCH (sha, SHA, ANY_SHA, false),
1121   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1122   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1123   SUBARCH (se1, SE1, SE1, false),
1124   SUBARCH (clwb, CLWB, CLWB, false),
1125   VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
1126   VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
1127   VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
1128   VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
1129   VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
1130   VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
1131   VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
1132   VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
1133   VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
1134   SUBARCH (clzero, CLZERO, CLZERO, false),
1135   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1136   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1137   SUBARCH (rdpid, RDPID, RDPID, false),
1138   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1139   SUBARCH (ibt, IBT, IBT, false),
1140   SUBARCH (shstk, SHSTK, SHSTK, false),
1141   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1142   VECARCH (vaes, VAES, ANY_VAES, reset),
1143   VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
1144   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1145   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1146   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1147   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1148   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1149   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1150   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1151   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
1152   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1153   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1154   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1155   VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
1156   VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1157            ANY_AVX512_VP2INTERSECT, reset),
1158   SUBARCH (tdx, TDX, TDX, false),
1159   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1160   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1161   SUBARCH (rdpru, RDPRU, RDPRU, false),
1162   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1163   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1164   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1165   SUBARCH (kl, KL, ANY_KL, false),
1166   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1167   SUBARCH (uintr, UINTR, UINTR, false),
1168   SUBARCH (hreset, HRESET, HRESET, false),
1169   VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
1170   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1171   VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
1172   VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
1173   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1174   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1175   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1176   VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
1177   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1178   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1179   SUBARCH (fred, FRED, ANY_FRED, false),
1180   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
1181   VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
1182   VECARCH (sha512, SHA512, ANY_SHA512, reset),
1183   VECARCH (sm3, SM3, ANY_SM3, reset),
1184   VECARCH (sm4, SM4, ANY_SM4, reset),
1185   SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
1186   VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
1187   SUBARCH (user_msr, USER_MSR, USER_MSR, false),
1188   SUBARCH (apx_f, APX_F, APX_F, false),
1189 };
1190
1191 #undef SUBARCH
1192 #undef ARCH
1193
1194 #ifdef I386COFF
1195 /* Like s_lcomm_internal in gas/read.c but the alignment string
1196    is allowed to be optional.  */
1197
1198 static symbolS *
1199 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1200 {
1201   addressT align = 0;
1202
1203   SKIP_WHITESPACE ();
1204
1205   if (needs_align
1206       && *input_line_pointer == ',')
1207     {
1208       align = parse_align (needs_align - 1);
1209
1210       if (align == (addressT) -1)
1211         return NULL;
1212     }
1213   else
1214     {
1215       if (size >= 8)
1216         align = 3;
1217       else if (size >= 4)
1218         align = 2;
1219       else if (size >= 2)
1220         align = 1;
1221       else
1222         align = 0;
1223     }
1224
1225   bss_alloc (symbolP, size, align);
1226   return symbolP;
1227 }
1228
1229 static void
1230 pe_lcomm (int needs_align)
1231 {
1232   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1233 }
1234 #endif
1235
1236 const pseudo_typeS md_pseudo_table[] =
1237 {
1238 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1239   {"align", s_align_bytes, 0},
1240 #else
1241   {"align", s_align_ptwo, 0},
1242 #endif
1243   {"arch", set_cpu_arch, 0},
1244 #ifdef OBJ_AOUT
1245   {"bss", s_bss, 0},
1246 #endif
1247 #ifdef I386COFF
1248   {"lcomm", pe_lcomm, 1},
1249 #endif
1250   {"ffloat", float_cons, 'f'},
1251   {"dfloat", float_cons, 'd'},
1252   {"tfloat", float_cons, 'x'},
1253   {"hfloat", float_cons, 'h'},
1254   {"bfloat16", float_cons, 'b'},
1255   {"value", cons, 2},
1256   {"slong", signed_cons, 4},
1257   {"insn", s_insn, 0},
1258   {"noopt", s_noopt, 0},
1259   {"optim", s_ignore, 0},
1260   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1261   {"code16", set_code_flag, CODE_16BIT},
1262   {"code32", set_code_flag, CODE_32BIT},
1263 #ifdef BFD64
1264   {"code64", set_code_flag, CODE_64BIT},
1265 #endif
1266   {"intel_syntax", set_intel_syntax, 1},
1267   {"att_syntax", set_intel_syntax, 0},
1268   {"intel_mnemonic", set_intel_mnemonic, 1},
1269   {"att_mnemonic", set_intel_mnemonic, 0},
1270   {"allow_index_reg", set_allow_index_reg, 1},
1271   {"disallow_index_reg", set_allow_index_reg, 0},
1272   {"sse_check", set_check, 0},
1273   {"operand_check", set_check, 1},
1274 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1275   {"largecomm", handle_large_common, 0},
1276 #else
1277   {"file", dwarf2_directive_file, 0},
1278   {"loc", dwarf2_directive_loc, 0},
1279   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1280 #endif
1281 #ifdef TE_PE
1282   {"secrel32", pe_directive_secrel, 0},
1283   {"secidx", pe_directive_secidx, 0},
1284 #endif
1285   {0, 0, 0}
1286 };
1287
1288 /* For interface with expression ().  */
1289 extern char *input_line_pointer;
1290
1291 /* Hash table for instruction mnemonic lookup.  */
1292 static htab_t op_hash;
1293
1294 /* Hash table for register lookup.  */
1295 static htab_t reg_hash;
1296 \f
1297   /* Various efficient no-op patterns for aligning code labels.
1298      Note: Don't try to assemble the instructions in the comments.
1299      0L and 0w are not legal.  */
1300 static const unsigned char f32_1[] =
1301   {0x90};                               /* nop                  */
1302 static const unsigned char f32_2[] =
1303   {0x66,0x90};                          /* xchg %ax,%ax         */
1304 static const unsigned char f32_3[] =
1305   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1306 #define f32_4 (f32_5 + 1)       /* leal 0(%esi,%eiz),%esi */
1307 static const unsigned char f32_5[] =
1308   {0x2e,0x8d,0x74,0x26,0x00};           /* leal %cs:0(%esi,%eiz),%esi   */
1309 static const unsigned char f32_6[] =
1310   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1311 #define f32_7 (f32_8 + 1)       /* leal 0L(%esi,%eiz),%esi */
1312 static const unsigned char f32_8[] =
1313   {0x2e,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal %cs:0L(%esi,%eiz),%esi */
1314 static const unsigned char f64_3[] =
1315   {0x48,0x89,0xf6};                     /* mov %rsi,%rsi        */
1316 static const unsigned char f64_4[] =
1317   {0x48,0x8d,0x76,0x00};                /* lea 0(%rsi),%rsi     */
1318 #define f64_5 (f64_6 + 1)               /* lea 0(%rsi,%riz),%rsi        */
1319 static const unsigned char f64_6[] =
1320   {0x2e,0x48,0x8d,0x74,0x26,0x00};      /* lea %cs:0(%rsi,%riz),%rsi    */
1321 static const unsigned char f64_7[] =
1322   {0x48,0x8d,0xb6,0x00,0x00,0x00,0x00}; /* lea 0L(%rsi),%rsi    */
1323 #define f64_8 (f64_9 + 1)               /* lea 0L(%rsi,%riz),%rsi */
1324 static const unsigned char f64_9[] =
1325   {0x2e,0x48,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* lea %cs:0L(%rsi,%riz),%rsi */
1326 #define f16_2 (f64_3 + 1)               /* mov %si,%si  */
1327 static const unsigned char f16_3[] =
1328   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1329 #define f16_4 (f16_5 + 1)               /* lea 0W(%si),%si */
1330 static const unsigned char f16_5[] =
1331   {0x2e,0x8d,0xb4,0x00,0x00};           /* lea %cs:0W(%si),%si  */
1332 static const unsigned char jump_disp8[] =
1333   {0xeb};                               /* jmp disp8           */
1334 static const unsigned char jump32_disp32[] =
1335   {0xe9};                               /* jmp disp32          */
1336 static const unsigned char jump16_disp32[] =
1337   {0x66,0xe9};                          /* jmp disp32          */
1338 /* 32-bit NOPs patterns.  */
1339 static const unsigned char *const f32_patt[] = {
1340   f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8
1341 };
1342 /* 64-bit NOPs patterns.  */
1343 static const unsigned char *const f64_patt[] = {
1344   f32_1, f32_2, f64_3, f64_4, f64_5, f64_6, f64_7, f64_8, f64_9
1345 };
1346 /* 16-bit NOPs patterns.  */
1347 static const unsigned char *const f16_patt[] = {
1348   f32_1, f16_2, f16_3, f16_4, f16_5
1349 };
1350 /* nopl (%[re]ax) */
1351 static const unsigned char alt_3[] =
1352   {0x0f,0x1f,0x00};
1353 /* nopl 0(%[re]ax) */
1354 static const unsigned char alt_4[] =
1355   {0x0f,0x1f,0x40,0x00};
1356 /* nopl 0(%[re]ax,%[re]ax,1) */
1357 #define alt_5 (alt_6 + 1)
1358 /* nopw 0(%[re]ax,%[re]ax,1) */
1359 static const unsigned char alt_6[] =
1360   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1361 /* nopl 0L(%[re]ax) */
1362 static const unsigned char alt_7[] =
1363   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1364 /* nopl 0L(%[re]ax,%[re]ax,1) */
1365 #define alt_8 (alt_9 + 1)
1366 /* nopw 0L(%[re]ax,%[re]ax,1) */
1367 static const unsigned char alt_9[] =
1368   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1369 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1370 #define alt_10 (alt_11 + 1)
1371 /* data16 nopw %cs:0L(%eax,%eax,1) */
1372 static const unsigned char alt_11[] =
1373   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1374 /* 32-bit and 64-bit NOPs patterns.  */
1375 static const unsigned char *const alt_patt[] = {
1376   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1377   alt_9, alt_10, alt_11
1378 };
1379 #define alt64_9 (alt64_15 + 6)          /* nopq 0L(%rax,%rax,1)  */
1380 #define alt64_10 (alt64_15 + 5)         /* cs nopq 0L(%rax,%rax,1)  */
1381 /* data16 cs nopq 0L(%rax,%rax,1)  */
1382 #define alt64_11 (alt64_15 + 4)
1383 /* data16 data16 cs nopq 0L(%rax,%rax,1)  */
1384 #define alt64_12 (alt64_15 + 3)
1385 /* data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1386 #define alt64_13 (alt64_15 + 2)
1387 /* data16 data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1388 #define alt64_14 (alt64_15 + 1)
1389 /* data16 data16 data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1390 static const unsigned char alt64_15[] =
1391   {0x66,0x66,0x66,0x66,0x66,0x2e,0x48,
1392    0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1393 /* Long 64-bit NOPs patterns.  */
1394 static const unsigned char *const alt64_patt[] = {
1395   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1396   alt64_9, alt64_10, alt64_11,alt64_12, alt64_13, alt64_14, alt64_15
1397 };
1398
1399 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1400    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1401
1402 static void
1403 i386_output_nops (char *where, const unsigned char *const *patt,
1404                   int count, int max_single_nop_size)
1405
1406 {
1407   /* Place the longer NOP first.  */
1408   int last;
1409   int offset;
1410   const unsigned char *nops;
1411
1412   if (max_single_nop_size < 1)
1413     {
1414       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1415                 max_single_nop_size);
1416       return;
1417     }
1418
1419   nops = patt[max_single_nop_size - 1];
1420   last = count % max_single_nop_size;
1421
1422   count -= last;
1423   for (offset = 0; offset < count; offset += max_single_nop_size)
1424     memcpy (where + offset, nops, max_single_nop_size);
1425
1426   if (last)
1427     {
1428       nops = patt[last - 1];
1429       memcpy (where + offset, nops, last);
1430     }
1431 }
1432
1433 static INLINE int
1434 fits_in_imm7 (offsetT num)
1435 {
1436   return (num & 0x7f) == num;
1437 }
1438
1439 static INLINE int
1440 fits_in_imm31 (offsetT num)
1441 {
1442   return (num & 0x7fffffff) == num;
1443 }
1444
1445 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1446    single NOP instruction LIMIT.  */
1447
1448 void
1449 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1450 {
1451   const unsigned char *const *patt = NULL;
1452   int max_single_nop_size;
1453   /* Maximum number of NOPs before switching to jump over NOPs.  */
1454   int max_number_of_nops;
1455
1456   switch (fragP->fr_type)
1457     {
1458     case rs_fill_nop:
1459     case rs_align_code:
1460       break;
1461     case rs_machine_dependent:
1462       /* Allow NOP padding for jumps and calls.  */
1463       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1464           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1465         break;
1466       /* Fall through.  */
1467     default:
1468       return;
1469     }
1470
1471   /* We need to decide which NOP sequence to use for 32bit and
1472      64bit. When -mtune= is used:
1473
1474      1. For PROCESSOR_I?86, PROCESSOR_PENTIUM, PROCESSOR_IAMCU, and
1475      PROCESSOR_GENERIC32, f32_patt will be used.
1476      2. For the rest, alt_patt will be used.
1477
1478      When -mtune= isn't used, alt_patt will be used if
1479      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt/f64_patt will
1480      be used.
1481
1482      When -march= or .arch is used, we can't use anything beyond
1483      cpu_arch_isa_flags.   */
1484
1485   if (fragP->tc_frag_data.code == CODE_16BIT)
1486     {
1487       patt = f16_patt;
1488       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1489       /* Limit number of NOPs to 2 in 16-bit mode.  */
1490       max_number_of_nops = 2;
1491     }
1492   else
1493     {
1494       patt = fragP->tc_frag_data.code == CODE_64BIT ? f64_patt : f32_patt;
1495       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1496         {
1497           /* PROCESSOR_UNKNOWN means that all ISAs may be used, unless
1498              explicitly disabled.  */
1499           switch (fragP->tc_frag_data.tune)
1500             {
1501             case PROCESSOR_UNKNOWN:
1502               /* We use cpu_arch_isa_flags to check if we SHOULD
1503                  optimize with nops.  */
1504               if (fragP->tc_frag_data.isanop)
1505                 patt = alt_patt;
1506               break;
1507
1508             case PROCESSOR_CORE:
1509             case PROCESSOR_CORE2:
1510             case PROCESSOR_COREI7:
1511               if (fragP->tc_frag_data.cpunop)
1512                 {
1513                   if (fragP->tc_frag_data.code == CODE_64BIT)
1514                     patt = alt64_patt;
1515                   else
1516                     patt = alt_patt;
1517                 }
1518               break;
1519
1520             case PROCESSOR_PENTIUMPRO:
1521             case PROCESSOR_PENTIUM4:
1522             case PROCESSOR_NOCONA:
1523             case PROCESSOR_GENERIC64:
1524             case PROCESSOR_K6:
1525             case PROCESSOR_ATHLON:
1526             case PROCESSOR_K8:
1527             case PROCESSOR_AMDFAM10:
1528             case PROCESSOR_BD:
1529             case PROCESSOR_ZNVER:
1530             case PROCESSOR_BT:
1531               if (fragP->tc_frag_data.cpunop)
1532                 patt = alt_patt;
1533               break;
1534
1535             case PROCESSOR_I386:
1536             case PROCESSOR_I486:
1537             case PROCESSOR_PENTIUM:
1538             case PROCESSOR_I686:
1539             case PROCESSOR_IAMCU:
1540             case PROCESSOR_GENERIC32:
1541               break;
1542             case PROCESSOR_NONE:
1543               abort ();
1544             }
1545         }
1546       else
1547         {
1548           switch (fragP->tc_frag_data.tune)
1549             {
1550             case PROCESSOR_UNKNOWN:
1551               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1552                  PROCESSOR_UNKNOWN.  */
1553               abort ();
1554               break;
1555
1556             default:
1557               /* We use cpu_arch_isa_flags to check if we CAN optimize
1558                  with nops.  */
1559               if (fragP->tc_frag_data.isanop)
1560                 patt = alt_patt;
1561               break;
1562
1563             case PROCESSOR_NONE:
1564               abort ();
1565             }
1566         }
1567
1568       if (patt != alt_patt && patt != alt64_patt)
1569         {
1570           max_single_nop_size = patt == f32_patt ? ARRAY_SIZE (f32_patt)
1571                                                  : ARRAY_SIZE (f64_patt);
1572           /* Limit number of NOPs to 2 for older processors.  */
1573           max_number_of_nops = 2;
1574         }
1575       else
1576         {
1577           max_single_nop_size = patt == alt_patt
1578                                 ? ARRAY_SIZE (alt_patt)
1579                                 : ARRAY_SIZE (alt64_patt);
1580           /* Limit number of NOPs to 7 for newer processors.  */
1581           max_number_of_nops = 7;
1582         }
1583     }
1584
1585   if (limit == 0)
1586     limit = max_single_nop_size;
1587
1588   if (fragP->fr_type == rs_fill_nop)
1589     {
1590       /* Output NOPs for .nop directive.  */
1591       if (limit > max_single_nop_size)
1592         {
1593           as_bad_where (fragP->fr_file, fragP->fr_line,
1594                         _("invalid single nop size: %d "
1595                           "(expect within [0, %d])"),
1596                         limit, max_single_nop_size);
1597           return;
1598         }
1599     }
1600   else if (fragP->fr_type != rs_machine_dependent)
1601     fragP->fr_var = count;
1602
1603   /* Emit a plain NOP first when the last thing we saw may not have been
1604      a proper instruction (e.g. a stand-alone prefix or .byte).  */
1605   if (!fragP->tc_frag_data.last_insn_normal)
1606     {
1607       *where++ = 0x90;
1608       --count;
1609     }
1610
1611   if ((count / max_single_nop_size) > max_number_of_nops)
1612     {
1613       /* Generate jump over NOPs.  */
1614       offsetT disp = count - 2;
1615       if (fits_in_imm7 (disp))
1616         {
1617           /* Use "jmp disp8" if possible.  */
1618           count = disp;
1619           where[0] = jump_disp8[0];
1620           where[1] = count;
1621           where += 2;
1622         }
1623       else
1624         {
1625           unsigned int size_of_jump;
1626
1627           if (flag_code == CODE_16BIT)
1628             {
1629               where[0] = jump16_disp32[0];
1630               where[1] = jump16_disp32[1];
1631               size_of_jump = 2;
1632             }
1633           else
1634             {
1635               where[0] = jump32_disp32[0];
1636               size_of_jump = 1;
1637             }
1638
1639           count -= size_of_jump + 4;
1640           if (!fits_in_imm31 (count))
1641             {
1642               as_bad_where (fragP->fr_file, fragP->fr_line,
1643                             _("jump over nop padding out of range"));
1644               return;
1645             }
1646
1647           md_number_to_chars (where + size_of_jump, count, 4);
1648           where += size_of_jump + 4;
1649         }
1650     }
1651
1652   /* Generate multiple NOPs.  */
1653   i386_output_nops (where, patt, count, limit);
1654 }
1655
1656 static INLINE int
1657 operand_type_all_zero (const union i386_operand_type *x)
1658 {
1659   switch (ARRAY_SIZE(x->array))
1660     {
1661     case 3:
1662       if (x->array[2])
1663         return 0;
1664       /* Fall through.  */
1665     case 2:
1666       if (x->array[1])
1667         return 0;
1668       /* Fall through.  */
1669     case 1:
1670       return !x->array[0];
1671     default:
1672       abort ();
1673     }
1674 }
1675
1676 static INLINE void
1677 operand_type_set (union i386_operand_type *x, unsigned int v)
1678 {
1679   switch (ARRAY_SIZE(x->array))
1680     {
1681     case 3:
1682       x->array[2] = v;
1683       /* Fall through.  */
1684     case 2:
1685       x->array[1] = v;
1686       /* Fall through.  */
1687     case 1:
1688       x->array[0] = v;
1689       /* Fall through.  */
1690       break;
1691     default:
1692       abort ();
1693     }
1694
1695   x->bitfield.class = ClassNone;
1696   x->bitfield.instance = InstanceNone;
1697 }
1698
1699 static INLINE int
1700 operand_type_equal (const union i386_operand_type *x,
1701                     const union i386_operand_type *y)
1702 {
1703   switch (ARRAY_SIZE(x->array))
1704     {
1705     case 3:
1706       if (x->array[2] != y->array[2])
1707         return 0;
1708       /* Fall through.  */
1709     case 2:
1710       if (x->array[1] != y->array[1])
1711         return 0;
1712       /* Fall through.  */
1713     case 1:
1714       return x->array[0] == y->array[0];
1715       break;
1716     default:
1717       abort ();
1718     }
1719 }
1720
1721 static INLINE bool
1722 _is_cpu (const i386_cpu_attr *a, enum i386_cpu cpu)
1723 {
1724   switch (cpu)
1725     {
1726     case Cpu287:      return a->bitfield.cpu287;
1727     case Cpu387:      return a->bitfield.cpu387;
1728     case Cpu3dnow:    return a->bitfield.cpu3dnow;
1729     case Cpu3dnowA:   return a->bitfield.cpu3dnowa;
1730     case CpuAVX:      return a->bitfield.cpuavx;
1731     case CpuHLE:      return a->bitfield.cpuhle;
1732     case CpuAVX512F:  return a->bitfield.cpuavx512f;
1733     case CpuAVX512VL: return a->bitfield.cpuavx512vl;
1734     case CpuAPX_F:    return a->bitfield.cpuapx_f;
1735     case Cpu64:       return a->bitfield.cpu64;
1736     case CpuNo64:     return a->bitfield.cpuno64;
1737     default:
1738       gas_assert (cpu < CpuAttrEnums);
1739     }
1740   return a->bitfield.isa == cpu + 1u;
1741 }
1742
1743 static INLINE bool
1744 is_cpu (const insn_template *t, enum i386_cpu cpu)
1745 {
1746   return _is_cpu(&t->cpu, cpu);
1747 }
1748
1749 static INLINE bool
1750 maybe_cpu (const insn_template *t, enum i386_cpu cpu)
1751 {
1752   return _is_cpu(&t->cpu_any, cpu);
1753 }
1754
1755 static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
1756 {
1757   const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
1758   i386_cpu_flags f = { .array[0] = 0 };
1759
1760   switch (ARRAY_SIZE (a.array))
1761     {
1762     case 1:
1763       f.array[CpuAttrEnums / bps]
1764 #ifndef WORDS_BIGENDIAN
1765         |= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
1766 #else
1767         |= (a.array[0] << CpuIsaBits) >> (CpuAttrEnums % bps);
1768 #endif
1769       if (CpuMax / bps > CpuAttrEnums / bps)
1770         f.array[CpuAttrEnums / bps + 1]
1771 #ifndef WORDS_BIGENDIAN
1772           = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
1773 #else
1774           = (a.array[0] << CpuIsaBits) << (bps - CpuAttrEnums % bps);
1775 #endif
1776       break;
1777
1778     default:
1779       abort ();
1780     }
1781
1782   if (a.bitfield.isa)
1783 #ifndef WORDS_BIGENDIAN
1784     f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
1785 #else
1786     f.array[(a.bitfield.isa - 1) / bps] |= 1u << (~(a.bitfield.isa - 1) % bps);
1787 #endif
1788
1789   return f;
1790 }
1791
1792 static INLINE int
1793 cpu_flags_all_zero (const union i386_cpu_flags *x)
1794 {
1795   switch (ARRAY_SIZE(x->array))
1796     {
1797     case 5:
1798       if (x->array[4])
1799         return 0;
1800       /* Fall through.  */
1801     case 4:
1802       if (x->array[3])
1803         return 0;
1804       /* Fall through.  */
1805     case 3:
1806       if (x->array[2])
1807         return 0;
1808       /* Fall through.  */
1809     case 2:
1810       if (x->array[1])
1811         return 0;
1812       /* Fall through.  */
1813     case 1:
1814       return !x->array[0];
1815     default:
1816       abort ();
1817     }
1818 }
1819
1820 static INLINE int
1821 cpu_flags_equal (const union i386_cpu_flags *x,
1822                  const union i386_cpu_flags *y)
1823 {
1824   switch (ARRAY_SIZE(x->array))
1825     {
1826     case 5:
1827       if (x->array[4] != y->array[4])
1828         return 0;
1829       /* Fall through.  */
1830     case 4:
1831       if (x->array[3] != y->array[3])
1832         return 0;
1833       /* Fall through.  */
1834     case 3:
1835       if (x->array[2] != y->array[2])
1836         return 0;
1837       /* Fall through.  */
1838     case 2:
1839       if (x->array[1] != y->array[1])
1840         return 0;
1841       /* Fall through.  */
1842     case 1:
1843       return x->array[0] == y->array[0];
1844       break;
1845     default:
1846       abort ();
1847     }
1848 }
1849
1850 static INLINE int
1851 cpu_flags_check_cpu64 (const insn_template *t)
1852 {
1853   return flag_code == CODE_64BIT
1854          ? !t->cpu.bitfield.cpuno64
1855          : !t->cpu.bitfield.cpu64;
1856 }
1857
1858 static INLINE i386_cpu_flags
1859 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1860 {
1861   switch (ARRAY_SIZE (x.array))
1862     {
1863     case 5:
1864       x.array [4] &= y.array [4];
1865       /* Fall through.  */
1866     case 4:
1867       x.array [3] &= y.array [3];
1868       /* Fall through.  */
1869     case 3:
1870       x.array [2] &= y.array [2];
1871       /* Fall through.  */
1872     case 2:
1873       x.array [1] &= y.array [1];
1874       /* Fall through.  */
1875     case 1:
1876       x.array [0] &= y.array [0];
1877       break;
1878     default:
1879       abort ();
1880     }
1881   return x;
1882 }
1883
1884 static INLINE i386_cpu_flags
1885 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1886 {
1887   switch (ARRAY_SIZE (x.array))
1888     {
1889     case 5:
1890       x.array [4] |= y.array [4];
1891       /* Fall through.  */
1892     case 4:
1893       x.array [3] |= y.array [3];
1894       /* Fall through.  */
1895     case 3:
1896       x.array [2] |= y.array [2];
1897       /* Fall through.  */
1898     case 2:
1899       x.array [1] |= y.array [1];
1900       /* Fall through.  */
1901     case 1:
1902       x.array [0] |= y.array [0];
1903       break;
1904     default:
1905       abort ();
1906     }
1907   return x;
1908 }
1909
1910 static INLINE i386_cpu_flags
1911 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1912 {
1913   switch (ARRAY_SIZE (x.array))
1914     {
1915     case 5:
1916       x.array [4] &= ~y.array [4];
1917       /* Fall through.  */
1918     case 4:
1919       x.array [3] &= ~y.array [3];
1920       /* Fall through.  */
1921     case 3:
1922       x.array [2] &= ~y.array [2];
1923       /* Fall through.  */
1924     case 2:
1925       x.array [1] &= ~y.array [1];
1926       /* Fall through.  */
1927     case 1:
1928       x.array [0] &= ~y.array [0];
1929       break;
1930     default:
1931       abort ();
1932     }
1933   return x;
1934 }
1935
1936 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1937
1938 static INLINE bool need_evex_encoding (const insn_template *t)
1939 {
1940   return pp.encoding == encoding_evex
1941         || pp.encoding == encoding_evex512
1942         || pp.has_nf
1943         || (t->opcode_modifier.vex && pp.encoding == encoding_egpr)
1944         || i.mask.reg;
1945 }
1946
1947 #define CPU_FLAGS_ARCH_MATCH            0x1
1948 #define CPU_FLAGS_64BIT_MATCH           0x2
1949
1950 #define CPU_FLAGS_PERFECT_MATCH \
1951   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1952
1953 static INLINE bool set_oszc_flags (unsigned int oszc_shift)
1954 {
1955   if (i.oszc_flags & oszc_shift)
1956     {
1957       as_bad (_("same oszc flag used twice"));
1958       return false;
1959     }
1960   i.oszc_flags |= oszc_shift;
1961   return true;
1962 }
1963
1964 /* Handle SCC OSZC flags.  */
1965
1966 static int
1967 check_Scc_OszcOperations (const char *l)
1968 {
1969   const char *suffix_string = l;
1970
1971   while (is_space_char (*suffix_string))
1972     suffix_string++;
1973
1974   /* If {oszc flags} is absent, just return.  */
1975   if (*suffix_string != '{')
1976     return 0;
1977
1978   /* Skip '{'.  */
1979   suffix_string++;
1980
1981   /* Parse 'dfv='.  */
1982   while (is_space_char (*suffix_string))
1983     suffix_string++;
1984
1985   if (strncasecmp (suffix_string, "dfv", 3) == 0)
1986     suffix_string += 3;
1987   else
1988     {
1989       as_bad (_("unrecognized pseudo-suffix"));
1990       return -1;
1991     }
1992
1993   while (is_space_char (*suffix_string))
1994     suffix_string++;
1995
1996   if (*suffix_string == '=')
1997     suffix_string++;
1998   else
1999     {
2000       as_bad (_("unrecognized pseudo-suffix"));
2001       return -1;
2002     }
2003
2004   /* Parse 'of, sf, zf, cf}'.  */
2005   while (*suffix_string)
2006     {
2007       while (is_space_char (*suffix_string))
2008         suffix_string++;
2009
2010       /* Return for '{dfv=}'.  */
2011       if (*suffix_string == '}')
2012         return suffix_string + 1 - l;
2013
2014       if (strncasecmp (suffix_string, "of", 2) == 0)
2015         {
2016           if (!set_oszc_flags (OSZC_OF))
2017             return -1;
2018         }
2019       else if (strncasecmp (suffix_string, "sf", 2) == 0)
2020         {
2021           if (!set_oszc_flags (OSZC_SF))
2022             return -1;
2023         }
2024       else if (strncasecmp (suffix_string, "zf", 2) == 0)
2025         {
2026           if (!set_oszc_flags (OSZC_ZF))
2027             return -1;
2028         }
2029       else if (strncasecmp (suffix_string, "cf", 2) == 0)
2030         {
2031           if (!set_oszc_flags (OSZC_CF))
2032             return -1;
2033         }
2034       else
2035         {
2036           as_bad (_("unrecognized oszc flags or illegal `,' in pseudo-suffix"));
2037           return -1;
2038         }
2039
2040       suffix_string += 2;
2041
2042       while (is_space_char (*suffix_string))
2043         suffix_string++;
2044
2045       if (*suffix_string == '}')
2046         return ++suffix_string - l;
2047
2048       if (*suffix_string != ',')
2049         break;
2050       suffix_string ++;
2051     }
2052
2053   as_bad (_("missing `}' or `,' in pseudo-suffix"));
2054   return -1;
2055 }
2056
2057 /* Return CPU flags match bits. */
2058
2059 static int
2060 cpu_flags_match (const insn_template *t)
2061 {
2062   i386_cpu_flags cpu, active, all = cpu_flags_from_attr (t->cpu);
2063   i386_cpu_flags any = cpu_flags_from_attr (t->cpu_any);
2064   int match = cpu_flags_check_cpu64 (t) ? CPU_FLAGS_64BIT_MATCH : 0;
2065
2066   all.bitfield.cpu64 = 0;
2067   all.bitfield.cpuno64 = 0;
2068   gas_assert (!any.bitfield.cpu64);
2069   gas_assert (!any.bitfield.cpuno64);
2070
2071   if (cpu_flags_all_zero (&all) && cpu_flags_all_zero (&any))
2072     {
2073       /* This instruction is available on all archs.  */
2074       return match | CPU_FLAGS_ARCH_MATCH;
2075     }
2076
2077   /* This instruction is available only on some archs.  */
2078
2079   /* Dual VEX/EVEX templates may need stripping of one of the flags.  */
2080   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
2081     {
2082       /* Dual AVX/AVX512 templates need to retain AVX512* only if we already
2083          know that EVEX encoding will be needed.  */
2084       if ((any.bitfield.cpuavx || any.bitfield.cpuavx2 || any.bitfield.cpufma)
2085           && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl))
2086         {
2087           if (need_evex_encoding (t))
2088             {
2089               any.bitfield.cpuavx = 0;
2090               any.bitfield.cpuavx2 = 0;
2091               any.bitfield.cpufma = 0;
2092             }
2093           /* need_evex_encoding(t) isn't reliable before operands were
2094              parsed.  */
2095           else if (i.operands)
2096             {
2097               any.bitfield.cpuavx512f = 0;
2098               any.bitfield.cpuavx512vl = 0;
2099             }
2100         }
2101
2102       /* Dual non-APX/APX templates need massaging from what APX_F() in the
2103          opcode table has produced.  While the direct transformation of the
2104          incoming cpuid&(cpuid|APX_F) would be to cpuid&(cpuid) / cpuid&(APX_F)
2105          respectively, it's cheaper to move to just cpuid / cpuid&APX_F
2106          instead.  */
2107       if (any.bitfield.cpuapx_f
2108           && (any.bitfield.cpubmi || any.bitfield.cpubmi2
2109               || any.bitfield.cpuavx512f || any.bitfield.cpuavx512bw
2110               || any.bitfield.cpuavx512dq || any.bitfield.cpuamx_tile
2111               || any.bitfield.cpucmpccxadd || any.bitfield.cpuuser_msr))
2112         {
2113           /* These checks (verifying that APX_F() was properly used in the
2114              opcode table entry) make sure there's no need for an "else" to
2115              the "if()" below.  */
2116           gas_assert (!cpu_flags_all_zero (&all));
2117           cpu = cpu_flags_and (all, any);
2118           gas_assert (cpu_flags_equal (&cpu, &all));
2119
2120           if (need_evex_encoding (t))
2121             all = any;
2122
2123           memset (&any, 0, sizeof (any));
2124         }
2125     }
2126
2127   if (flag_code != CODE_64BIT)
2128     active = cpu_flags_and_not (cpu_arch_flags, cpu_64_flags);
2129   else
2130     active = cpu_arch_flags;
2131   cpu = cpu_flags_and (all, active);
2132   if (cpu_flags_equal (&cpu, &all))
2133     {
2134       /* AVX and AVX2 present at the same time express an operand size
2135          dependency - strip AVX2 for the purposes here.  The operand size
2136          dependent check occurs in check_vecOperands().  */
2137       if (any.bitfield.cpuavx && any.bitfield.cpuavx2)
2138         any.bitfield.cpuavx2 = 0;
2139
2140       cpu = cpu_flags_and (any, active);
2141       if (cpu_flags_all_zero (&any) || !cpu_flags_all_zero (&cpu))
2142         match |= CPU_FLAGS_ARCH_MATCH;
2143     }
2144   return match;
2145 }
2146
2147 static INLINE i386_operand_type
2148 operand_type_and (i386_operand_type x, i386_operand_type y)
2149 {
2150   if (x.bitfield.class != y.bitfield.class)
2151     x.bitfield.class = ClassNone;
2152   if (x.bitfield.instance != y.bitfield.instance)
2153     x.bitfield.instance = InstanceNone;
2154
2155   switch (ARRAY_SIZE (x.array))
2156     {
2157     case 3:
2158       x.array [2] &= y.array [2];
2159       /* Fall through.  */
2160     case 2:
2161       x.array [1] &= y.array [1];
2162       /* Fall through.  */
2163     case 1:
2164       x.array [0] &= y.array [0];
2165       break;
2166     default:
2167       abort ();
2168     }
2169   return x;
2170 }
2171
2172 static INLINE i386_operand_type
2173 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2174 {
2175   gas_assert (y.bitfield.class == ClassNone);
2176   gas_assert (y.bitfield.instance == InstanceNone);
2177
2178   switch (ARRAY_SIZE (x.array))
2179     {
2180     case 3:
2181       x.array [2] &= ~y.array [2];
2182       /* Fall through.  */
2183     case 2:
2184       x.array [1] &= ~y.array [1];
2185       /* Fall through.  */
2186     case 1:
2187       x.array [0] &= ~y.array [0];
2188       break;
2189     default:
2190       abort ();
2191     }
2192   return x;
2193 }
2194
2195 static INLINE i386_operand_type
2196 operand_type_or (i386_operand_type x, i386_operand_type y)
2197 {
2198   gas_assert (x.bitfield.class == ClassNone ||
2199               y.bitfield.class == ClassNone ||
2200               x.bitfield.class == y.bitfield.class);
2201   gas_assert (x.bitfield.instance == InstanceNone ||
2202               y.bitfield.instance == InstanceNone ||
2203               x.bitfield.instance == y.bitfield.instance);
2204
2205   switch (ARRAY_SIZE (x.array))
2206     {
2207     case 3:
2208       x.array [2] |= y.array [2];
2209       /* Fall through.  */
2210     case 2:
2211       x.array [1] |= y.array [1];
2212       /* Fall through.  */
2213     case 1:
2214       x.array [0] |= y.array [0];
2215       break;
2216     default:
2217       abort ();
2218     }
2219   return x;
2220 }
2221
2222 static INLINE i386_operand_type
2223 operand_type_xor (i386_operand_type x, i386_operand_type y)
2224 {
2225   gas_assert (y.bitfield.class == ClassNone);
2226   gas_assert (y.bitfield.instance == InstanceNone);
2227
2228   switch (ARRAY_SIZE (x.array))
2229     {
2230     case 3:
2231       x.array [2] ^= y.array [2];
2232       /* Fall through.  */
2233     case 2:
2234       x.array [1] ^= y.array [1];
2235       /* Fall through.  */
2236     case 1:
2237       x.array [0] ^= y.array [0];
2238       break;
2239     default:
2240       abort ();
2241     }
2242   return x;
2243 }
2244
2245 static const i386_operand_type anydisp = {
2246   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
2247 };
2248
2249 enum operand_type
2250 {
2251   reg,
2252   imm,
2253   disp,
2254   anymem
2255 };
2256
2257 static INLINE int
2258 operand_type_check (i386_operand_type t, enum operand_type c)
2259 {
2260   switch (c)
2261     {
2262     case reg:
2263       return t.bitfield.class == Reg;
2264
2265     case imm:
2266       return (t.bitfield.imm8
2267               || t.bitfield.imm8s
2268               || t.bitfield.imm16
2269               || t.bitfield.imm32
2270               || t.bitfield.imm32s
2271               || t.bitfield.imm64);
2272
2273     case disp:
2274       return (t.bitfield.disp8
2275               || t.bitfield.disp16
2276               || t.bitfield.disp32
2277               || t.bitfield.disp64);
2278
2279     case anymem:
2280       return (t.bitfield.disp8
2281               || t.bitfield.disp16
2282               || t.bitfield.disp32
2283               || t.bitfield.disp64
2284               || t.bitfield.baseindex);
2285
2286     default:
2287       abort ();
2288     }
2289
2290   return 0;
2291 }
2292
2293 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2294    between operand GIVEN and opeand WANTED for instruction template T.  */
2295
2296 static INLINE int
2297 match_operand_size (const insn_template *t, unsigned int wanted,
2298                     unsigned int given)
2299 {
2300   return !((i.types[given].bitfield.byte
2301             && !t->operand_types[wanted].bitfield.byte)
2302            || (i.types[given].bitfield.word
2303                && !t->operand_types[wanted].bitfield.word)
2304            || (i.types[given].bitfield.dword
2305                && !t->operand_types[wanted].bitfield.dword)
2306            || (i.types[given].bitfield.qword
2307                && (!t->operand_types[wanted].bitfield.qword
2308                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2309                       mode, when they're used where a 64-bit GPR could also
2310                       be used.  Checking is needed for Intel Syntax only.  */
2311                    || (intel_syntax
2312                        && flag_code != CODE_64BIT
2313                        && (t->operand_types[wanted].bitfield.class == Reg
2314                            || t->operand_types[wanted].bitfield.class == Accum
2315                            || t->opcode_modifier.isstring))))
2316            || (i.types[given].bitfield.tbyte
2317                && !t->operand_types[wanted].bitfield.tbyte));
2318 }
2319
2320 /* Return 1 if there is no conflict in SIMD register between operand
2321    GIVEN and opeand WANTED for instruction template T.  */
2322
2323 static INLINE int
2324 match_simd_size (const insn_template *t, unsigned int wanted,
2325                  unsigned int given)
2326 {
2327   return !((i.types[given].bitfield.xmmword
2328             && !t->operand_types[wanted].bitfield.xmmword)
2329            || (i.types[given].bitfield.ymmword
2330                && !t->operand_types[wanted].bitfield.ymmword)
2331            || (i.types[given].bitfield.zmmword
2332                && !t->operand_types[wanted].bitfield.zmmword)
2333            || (i.types[given].bitfield.tmmword
2334                && !t->operand_types[wanted].bitfield.tmmword));
2335 }
2336
2337 /* Return 1 if there is no conflict in any size between operand GIVEN
2338    and opeand WANTED for instruction template T.  */
2339
2340 static INLINE int
2341 match_mem_size (const insn_template *t, unsigned int wanted,
2342                 unsigned int given)
2343 {
2344   return (match_operand_size (t, wanted, given)
2345           && !((i.types[given].bitfield.unspecified
2346                 && !i.broadcast.type
2347                 && !i.broadcast.bytes
2348                 && !t->operand_types[wanted].bitfield.unspecified)
2349                || (i.types[given].bitfield.fword
2350                    && !t->operand_types[wanted].bitfield.fword)
2351                /* For scalar opcode templates to allow register and memory
2352                   operands at the same time, some special casing is needed
2353                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2354                   down-conversion vpmov*.  */
2355                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2356                     && t->operand_types[wanted].bitfield.byte
2357                        + t->operand_types[wanted].bitfield.word
2358                        + t->operand_types[wanted].bitfield.dword
2359                        + t->operand_types[wanted].bitfield.qword
2360                        > !!t->opcode_modifier.broadcast)
2361                    ? (i.types[given].bitfield.xmmword
2362                       || i.types[given].bitfield.ymmword
2363                       || i.types[given].bitfield.zmmword)
2364                    : !match_simd_size(t, wanted, given))));
2365 }
2366
2367 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2368    operands for instruction template T, and it has MATCH_REVERSE set if there
2369    is no size conflict on any operands for the template with operands reversed
2370    (and the template allows for reversing in the first place).  */
2371
2372 #define MATCH_STRAIGHT 1
2373 #define MATCH_REVERSE  2
2374
2375 static INLINE unsigned int
2376 operand_size_match (const insn_template *t)
2377 {
2378   unsigned int j, match = MATCH_STRAIGHT;
2379
2380   /* Don't check non-absolute jump instructions.  */
2381   if (t->opcode_modifier.jump
2382       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2383     return match;
2384
2385   /* Check memory and accumulator operand size.  */
2386   for (j = 0; j < i.operands; j++)
2387     {
2388       if (i.types[j].bitfield.class != Reg
2389           && i.types[j].bitfield.class != RegSIMD
2390           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2391         continue;
2392
2393       if (t->operand_types[j].bitfield.class == Reg
2394           && !match_operand_size (t, j, j))
2395         {
2396           match = 0;
2397           break;
2398         }
2399
2400       if (t->operand_types[j].bitfield.class == RegSIMD
2401           && !match_simd_size (t, j, j))
2402         {
2403           match = 0;
2404           break;
2405         }
2406
2407       if (t->operand_types[j].bitfield.instance == Accum
2408           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2409         {
2410           match = 0;
2411           break;
2412         }
2413
2414       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2415         {
2416           match = 0;
2417           break;
2418         }
2419     }
2420
2421   if (!t->opcode_modifier.d)
2422     return match;
2423
2424   /* Check reverse.  */
2425   gas_assert (i.operands >= 2);
2426
2427   for (j = 0; j < i.operands; j++)
2428     {
2429       unsigned int given = i.operands - j - 1;
2430
2431       /* For FMA4 and XOP insns VEX.W controls just the first two
2432          register operands. And APX_F insns just swap the two source operands,
2433          with the 3rd one being the destination.  */
2434       if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP)
2435           || is_cpu (t, CpuAPX_F))
2436         given = j < 2 ? 1 - j : j;
2437
2438       if (t->operand_types[j].bitfield.class == Reg
2439           && !match_operand_size (t, j, given))
2440         return match;
2441
2442       if (t->operand_types[j].bitfield.class == RegSIMD
2443           && !match_simd_size (t, j, given))
2444         return match;
2445
2446       if (t->operand_types[j].bitfield.instance == Accum
2447           && (!match_operand_size (t, j, given)
2448               || !match_simd_size (t, j, given)))
2449         return match;
2450
2451       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2452         return match;
2453     }
2454
2455   return match | MATCH_REVERSE;
2456 }
2457
2458 static INLINE int
2459 operand_type_match (i386_operand_type overlap,
2460                     i386_operand_type given)
2461 {
2462   i386_operand_type temp = overlap;
2463
2464   temp.bitfield.unspecified = 0;
2465   temp.bitfield.byte = 0;
2466   temp.bitfield.word = 0;
2467   temp.bitfield.dword = 0;
2468   temp.bitfield.fword = 0;
2469   temp.bitfield.qword = 0;
2470   temp.bitfield.tbyte = 0;
2471   temp.bitfield.xmmword = 0;
2472   temp.bitfield.ymmword = 0;
2473   temp.bitfield.zmmword = 0;
2474   temp.bitfield.tmmword = 0;
2475   if (operand_type_all_zero (&temp))
2476     goto mismatch;
2477
2478   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2479     return 1;
2480
2481  mismatch:
2482   i.error = operand_type_mismatch;
2483   return 0;
2484 }
2485
2486 /* If given types g0 and g1 are registers they must be of the same type
2487    unless the expected operand type register overlap is null.
2488    Intel syntax sized memory operands are also checked here.  */
2489
2490 static INLINE int
2491 operand_type_register_match (i386_operand_type g0,
2492                              i386_operand_type t0,
2493                              i386_operand_type g1,
2494                              i386_operand_type t1)
2495 {
2496   if (g0.bitfield.class != Reg
2497       && g0.bitfield.class != RegSIMD
2498       && (g0.bitfield.unspecified
2499           || !operand_type_check (g0, anymem)))
2500     return 1;
2501
2502   if (g1.bitfield.class != Reg
2503       && g1.bitfield.class != RegSIMD
2504       && (g1.bitfield.unspecified
2505           || !operand_type_check (g1, anymem)))
2506     return 1;
2507
2508   if (g0.bitfield.byte == g1.bitfield.byte
2509       && g0.bitfield.word == g1.bitfield.word
2510       && g0.bitfield.dword == g1.bitfield.dword
2511       && g0.bitfield.qword == g1.bitfield.qword
2512       && g0.bitfield.xmmword == g1.bitfield.xmmword
2513       && g0.bitfield.ymmword == g1.bitfield.ymmword
2514       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2515     return 1;
2516
2517   /* If expectations overlap in no more than a single size, all is fine. */
2518   g0 = operand_type_and (t0, t1);
2519   if (g0.bitfield.byte
2520       + g0.bitfield.word
2521       + g0.bitfield.dword
2522       + g0.bitfield.qword
2523       + g0.bitfield.xmmword
2524       + g0.bitfield.ymmword
2525       + g0.bitfield.zmmword <= 1)
2526     return 1;
2527
2528   i.error = register_type_mismatch;
2529
2530   return 0;
2531 }
2532
2533 static INLINE unsigned int
2534 register_number (const reg_entry *r)
2535 {
2536   unsigned int nr = r->reg_num;
2537
2538   if (r->reg_flags & RegRex)
2539     nr += 8;
2540
2541   if (r->reg_flags & (RegVRex | RegRex2))
2542     nr += 16;
2543
2544   return nr;
2545 }
2546
2547 static INLINE unsigned int
2548 mode_from_disp_size (i386_operand_type t)
2549 {
2550   if (t.bitfield.disp8)
2551     return 1;
2552   else if (t.bitfield.disp16
2553            || t.bitfield.disp32)
2554     return 2;
2555   else
2556     return 0;
2557 }
2558
2559 static INLINE int
2560 fits_in_signed_byte (addressT num)
2561 {
2562   return num + 0x80 <= 0xff;
2563 }
2564
2565 static INLINE int
2566 fits_in_unsigned_byte (addressT num)
2567 {
2568   return num <= 0xff;
2569 }
2570
2571 static INLINE int
2572 fits_in_unsigned_word (addressT num)
2573 {
2574   return num <= 0xffff;
2575 }
2576
2577 static INLINE int
2578 fits_in_signed_word (addressT num)
2579 {
2580   return num + 0x8000 <= 0xffff;
2581 }
2582
2583 static INLINE int
2584 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2585 {
2586 #ifndef BFD64
2587   return 1;
2588 #else
2589   return num + 0x80000000 <= 0xffffffff;
2590 #endif
2591 }                               /* fits_in_signed_long() */
2592
2593 static INLINE int
2594 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2595 {
2596 #ifndef BFD64
2597   return 1;
2598 #else
2599   return num <= 0xffffffff;
2600 #endif
2601 }                               /* fits_in_unsigned_long() */
2602
2603 static INLINE valueT extend_to_32bit_address (addressT num)
2604 {
2605 #ifdef BFD64
2606   if (fits_in_unsigned_long(num))
2607     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2608
2609   if (!fits_in_signed_long (num))
2610     return num & 0xffffffff;
2611 #endif
2612
2613   return num;
2614 }
2615
2616 static INLINE int
2617 fits_in_disp8 (offsetT num)
2618 {
2619   int shift = i.memshift;
2620   unsigned int mask;
2621
2622   if (shift == -1)
2623     abort ();
2624
2625   mask = (1 << shift) - 1;
2626
2627   /* Return 0 if NUM isn't properly aligned.  */
2628   if ((num & mask))
2629     return 0;
2630
2631   /* Check if NUM will fit in 8bit after shift.  */
2632   return fits_in_signed_byte (num >> shift);
2633 }
2634
2635 static INLINE int
2636 fits_in_imm4 (offsetT num)
2637 {
2638   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
2639   return (num & (pp.encoding != encoding_evex
2640                  && pp.encoding != encoding_egpr ? 0xf : 7)) == num;
2641 }
2642
2643 static i386_operand_type
2644 smallest_imm_type (offsetT num)
2645 {
2646   i386_operand_type t;
2647
2648   operand_type_set (&t, 0);
2649   t.bitfield.imm64 = 1;
2650
2651   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2652     {
2653       /* This code is disabled on the 486 because all the Imm1 forms
2654          in the opcode table are slower on the i486.  They're the
2655          versions with the implicitly specified single-position
2656          displacement, which has another syntax if you really want to
2657          use that form.  */
2658       t.bitfield.imm1 = 1;
2659       t.bitfield.imm8 = 1;
2660       t.bitfield.imm8s = 1;
2661       t.bitfield.imm16 = 1;
2662       t.bitfield.imm32 = 1;
2663       t.bitfield.imm32s = 1;
2664     }
2665   else if (fits_in_signed_byte (num))
2666     {
2667       if (fits_in_unsigned_byte (num))
2668         t.bitfield.imm8 = 1;
2669       t.bitfield.imm8s = 1;
2670       t.bitfield.imm16 = 1;
2671       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2672         t.bitfield.imm32 = 1;
2673       t.bitfield.imm32s = 1;
2674     }
2675   else if (fits_in_unsigned_byte (num))
2676     {
2677       t.bitfield.imm8 = 1;
2678       t.bitfield.imm16 = 1;
2679       t.bitfield.imm32 = 1;
2680       t.bitfield.imm32s = 1;
2681     }
2682   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2683     {
2684       t.bitfield.imm16 = 1;
2685       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2686         t.bitfield.imm32 = 1;
2687       t.bitfield.imm32s = 1;
2688     }
2689   else if (fits_in_signed_long (num))
2690     {
2691       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2692         t.bitfield.imm32 = 1;
2693       t.bitfield.imm32s = 1;
2694     }
2695   else if (fits_in_unsigned_long (num))
2696     t.bitfield.imm32 = 1;
2697
2698   return t;
2699 }
2700
2701 static offsetT
2702 offset_in_range (offsetT val, int size)
2703 {
2704   addressT mask;
2705
2706   switch (size)
2707     {
2708     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2709     case 2: mask = ((addressT) 1 << 16) - 1; break;
2710 #ifdef BFD64
2711     case 4: mask = ((addressT) 1 << 32) - 1; break;
2712 #endif
2713     case sizeof (val): return val;
2714     default: abort ();
2715     }
2716
2717   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2718     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2719              (uint64_t) val, (uint64_t) (val & mask));
2720
2721   return val & mask;
2722 }
2723
2724 static INLINE const char *insn_name (const insn_template *t)
2725 {
2726   return &i386_mnemonics[t->mnem_off];
2727 }
2728
2729 enum PREFIX_GROUP
2730 {
2731   PREFIX_EXIST = 0,
2732   PREFIX_LOCK,
2733   PREFIX_REP,
2734   PREFIX_DS,
2735   PREFIX_OTHER
2736 };
2737
2738 /* Returns
2739    a. PREFIX_EXIST if attempting to add a prefix where one from the
2740    same class already exists.
2741    b. PREFIX_LOCK if lock prefix is added.
2742    c. PREFIX_REP if rep/repne prefix is added.
2743    d. PREFIX_DS if ds prefix is added.
2744    e. PREFIX_OTHER if other prefix is added.
2745  */
2746
2747 static enum PREFIX_GROUP
2748 add_prefix (unsigned int prefix)
2749 {
2750   enum PREFIX_GROUP ret = PREFIX_OTHER;
2751   unsigned int q;
2752
2753   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2754       && flag_code == CODE_64BIT)
2755     {
2756       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2757           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2758           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2759           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2760         ret = PREFIX_EXIST;
2761       q = REX_PREFIX;
2762     }
2763   else
2764     {
2765       switch (prefix)
2766         {
2767         default:
2768           abort ();
2769
2770         case DS_PREFIX_OPCODE:
2771           ret = PREFIX_DS;
2772           /* Fall through.  */
2773         case CS_PREFIX_OPCODE:
2774         case ES_PREFIX_OPCODE:
2775         case FS_PREFIX_OPCODE:
2776         case GS_PREFIX_OPCODE:
2777         case SS_PREFIX_OPCODE:
2778           q = SEG_PREFIX;
2779           break;
2780
2781         case REPNE_PREFIX_OPCODE:
2782         case REPE_PREFIX_OPCODE:
2783           q = REP_PREFIX;
2784           ret = PREFIX_REP;
2785           break;
2786
2787         case LOCK_PREFIX_OPCODE:
2788           q = LOCK_PREFIX;
2789           ret = PREFIX_LOCK;
2790           break;
2791
2792         case FWAIT_OPCODE:
2793           q = WAIT_PREFIX;
2794           break;
2795
2796         case ADDR_PREFIX_OPCODE:
2797           q = ADDR_PREFIX;
2798           break;
2799
2800         case DATA_PREFIX_OPCODE:
2801           q = DATA_PREFIX;
2802           break;
2803         }
2804       if (i.prefix[q] != 0)
2805         ret = PREFIX_EXIST;
2806     }
2807
2808   if (ret)
2809     {
2810       if (!i.prefix[q])
2811         ++i.prefixes;
2812       i.prefix[q] |= prefix;
2813     }
2814   else
2815     as_bad (_("same type of prefix used twice"));
2816
2817   return ret;
2818 }
2819
2820 static void
2821 update_code_flag (int value, int check)
2822 {
2823   PRINTF_LIKE ((*as_error)) = check ? as_fatal : as_bad;
2824
2825   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpu64 )
2826     {
2827       as_error (_("64bit mode not supported on `%s'."),
2828                 cpu_arch_name ? cpu_arch_name : default_arch);
2829       return;
2830     }
2831
2832   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2833     {
2834       as_error (_("32bit mode not supported on `%s'."),
2835                 cpu_arch_name ? cpu_arch_name : default_arch);
2836       return;
2837     }
2838
2839   flag_code = (enum flag_code) value;
2840
2841   stackop_size = '\0';
2842 }
2843
2844 static void
2845 set_code_flag (int value)
2846 {
2847   update_code_flag (value, 0);
2848 }
2849
2850 static void
2851 set_16bit_gcc_code_flag (int new_code_flag)
2852 {
2853   flag_code = (enum flag_code) new_code_flag;
2854   if (flag_code != CODE_16BIT)
2855     abort ();
2856   stackop_size = LONG_MNEM_SUFFIX;
2857 }
2858
2859 static void
2860 _set_intel_syntax (int syntax_flag)
2861 {
2862   intel_syntax = syntax_flag;
2863
2864   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2865
2866   register_prefix = allow_naked_reg ? "" : "%";
2867 }
2868
2869 static void
2870 set_intel_syntax (int syntax_flag)
2871 {
2872   /* Find out if register prefixing is specified.  */
2873   int ask_naked_reg = 0;
2874
2875   SKIP_WHITESPACE ();
2876   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2877     {
2878       char *string;
2879       int e = get_symbol_name (&string);
2880
2881       if (strcmp (string, "prefix") == 0)
2882         ask_naked_reg = 1;
2883       else if (strcmp (string, "noprefix") == 0)
2884         ask_naked_reg = -1;
2885       else
2886         as_bad (_("bad argument to syntax directive."));
2887       (void) restore_line_pointer (e);
2888     }
2889   demand_empty_rest_of_line ();
2890
2891   if (ask_naked_reg == 0)
2892     allow_naked_reg = (syntax_flag
2893                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2894   else
2895     allow_naked_reg = (ask_naked_reg < 0);
2896
2897   _set_intel_syntax (syntax_flag);
2898 }
2899
2900 static void
2901 set_intel_mnemonic (int mnemonic_flag)
2902 {
2903   intel_mnemonic = mnemonic_flag;
2904 }
2905
2906 static void
2907 set_allow_index_reg (int flag)
2908 {
2909   allow_index_reg = flag;
2910 }
2911
2912 static void
2913 set_check (int what)
2914 {
2915   enum check_kind *kind;
2916   const char *str;
2917
2918   if (what)
2919     {
2920       kind = &operand_check;
2921       str = "operand";
2922     }
2923   else
2924     {
2925       kind = &sse_check;
2926       str = "sse";
2927     }
2928
2929   SKIP_WHITESPACE ();
2930
2931   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2932     {
2933       char *string;
2934       int e = get_symbol_name (&string);
2935
2936       if (strcmp (string, "none") == 0)
2937         *kind = check_none;
2938       else if (strcmp (string, "warning") == 0)
2939         *kind = check_warning;
2940       else if (strcmp (string, "error") == 0)
2941         *kind = check_error;
2942       else
2943         as_bad (_("bad argument to %s_check directive."), str);
2944       (void) restore_line_pointer (e);
2945     }
2946   else
2947     as_bad (_("missing argument for %s_check directive"), str);
2948
2949   demand_empty_rest_of_line ();
2950 }
2951
2952 static void
2953 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2954                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2955 {
2956 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2957   static const char *arch;
2958
2959   /* Intel MCU is only supported on ELF.  */
2960   if (!IS_ELF)
2961     return;
2962
2963   if (!arch)
2964     {
2965       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2966          use default_arch.  */
2967       arch = cpu_arch_name;
2968       if (!arch)
2969         arch = default_arch;
2970     }
2971
2972   /* If we are targeting Intel MCU, we must enable it.  */
2973   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2974       == new_flag.bitfield.cpuiamcu)
2975     return;
2976
2977   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2978 #endif
2979 }
2980
2981 static void
2982 extend_cpu_sub_arch_name (const char *pfx, const char *name)
2983 {
2984   if (cpu_sub_arch_name)
2985     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2986                                   pfx, name, (const char *) NULL);
2987   else
2988     cpu_sub_arch_name = concat (pfx, name, (const char *) NULL);
2989 }
2990
2991 static void isa_enable (unsigned int idx)
2992 {
2993   i386_cpu_flags flags = cpu_flags_or (cpu_arch_flags, cpu_arch[idx].enable);
2994
2995   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2996     {
2997       extend_cpu_sub_arch_name (".", cpu_arch[idx].name);
2998       cpu_arch_flags = flags;
2999     }
3000
3001   cpu_arch_isa_flags = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[idx].enable);
3002 }
3003
3004 static void isa_disable (unsigned int idx)
3005 {
3006   i386_cpu_flags flags
3007     = cpu_flags_and_not (cpu_arch_flags, cpu_arch[idx].disable);
3008
3009   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
3010     {
3011       extend_cpu_sub_arch_name (".no", cpu_arch[idx].name);
3012       cpu_arch_flags = flags;
3013     }
3014
3015   cpu_arch_isa_flags
3016     = cpu_flags_and_not (cpu_arch_isa_flags, cpu_arch[idx].disable);
3017 }
3018
3019 static void
3020 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
3021 {
3022   typedef struct arch_stack_entry
3023   {
3024     const struct arch_stack_entry *prev;
3025     const char *name;
3026     char *sub_name;
3027     i386_cpu_flags flags;
3028     i386_cpu_flags isa_flags;
3029     enum processor_type isa;
3030     enum flag_code flag_code;
3031     unsigned int vector_size;
3032     char stackop_size;
3033     bool no_cond_jump_promotion;
3034   } arch_stack_entry;
3035   static const arch_stack_entry *arch_stack_top;
3036   char *s;
3037   int e;
3038   const char *string;
3039   unsigned int j = 0;
3040
3041   SKIP_WHITESPACE ();
3042
3043   if (is_end_of_line[(unsigned char) *input_line_pointer])
3044     {
3045       as_bad (_("missing cpu architecture"));
3046       input_line_pointer++;
3047       return;
3048     }
3049
3050   e = get_symbol_name (&s);
3051   string = s;
3052
3053   if (strcmp (string, "push") == 0)
3054     {
3055       arch_stack_entry *top = XNEW (arch_stack_entry);
3056
3057       top->name = cpu_arch_name;
3058       if (cpu_sub_arch_name)
3059         top->sub_name = xstrdup (cpu_sub_arch_name);
3060       else
3061         top->sub_name = NULL;
3062       top->flags = cpu_arch_flags;
3063       top->isa = cpu_arch_isa;
3064       top->isa_flags = cpu_arch_isa_flags;
3065       top->flag_code = flag_code;
3066       top->vector_size = vector_size;
3067       top->stackop_size = stackop_size;
3068       top->no_cond_jump_promotion = no_cond_jump_promotion;
3069
3070       top->prev = arch_stack_top;
3071       arch_stack_top = top;
3072
3073       (void) restore_line_pointer (e);
3074       demand_empty_rest_of_line ();
3075       return;
3076     }
3077
3078   if (strcmp (string, "pop") == 0)
3079     {
3080       const arch_stack_entry *top = arch_stack_top;
3081
3082       if (!top)
3083         as_bad (_(".arch stack is empty"));
3084       else if (top->flag_code != flag_code
3085                || top->stackop_size != stackop_size)
3086         {
3087           static const unsigned int bits[] = {
3088             [CODE_16BIT] = 16,
3089             [CODE_32BIT] = 32,
3090             [CODE_64BIT] = 64,
3091           };
3092
3093           as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
3094                   bits[top->flag_code],
3095                   top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
3096         }
3097       else
3098         {
3099           arch_stack_top = top->prev;
3100
3101           cpu_arch_name = top->name;
3102           free (cpu_sub_arch_name);
3103           cpu_sub_arch_name = top->sub_name;
3104           cpu_arch_flags = top->flags;
3105           cpu_arch_isa = top->isa;
3106           cpu_arch_isa_flags = top->isa_flags;
3107           vector_size = top->vector_size;
3108           no_cond_jump_promotion = top->no_cond_jump_promotion;
3109
3110           XDELETE (top);
3111         }
3112
3113       (void) restore_line_pointer (e);
3114       demand_empty_rest_of_line ();
3115       return;
3116     }
3117
3118   if (strcmp (string, "default") == 0)
3119     {
3120       if (strcmp (default_arch, "iamcu") == 0)
3121         string = default_arch;
3122       else
3123         {
3124           static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
3125
3126           cpu_arch_name = NULL;
3127           free (cpu_sub_arch_name);
3128           cpu_sub_arch_name = NULL;
3129           cpu_arch_flags = cpu_unknown_flags;
3130           cpu_arch_isa = PROCESSOR_UNKNOWN;
3131           cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
3132           if (!cpu_arch_tune_set)
3133             cpu_arch_tune = PROCESSOR_UNKNOWN;
3134
3135           vector_size = VSZ_DEFAULT;
3136
3137           j = ARRAY_SIZE (cpu_arch) + 1;
3138         }
3139     }
3140
3141   for (; j < ARRAY_SIZE (cpu_arch); j++)
3142     {
3143       if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
3144           && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
3145         {
3146           if (*string != '.')
3147             {
3148               check_cpu_arch_compatible (string, cpu_arch[j].enable);
3149
3150               if (flag_code == CODE_64BIT && !cpu_arch[j].enable.bitfield.cpu64 )
3151                 {
3152                   as_bad (_("64bit mode not supported on `%s'."),
3153                           cpu_arch[j].name);
3154                   (void) restore_line_pointer (e);
3155                   ignore_rest_of_line ();
3156                   return;
3157                 }
3158
3159               if (flag_code == CODE_32BIT && !cpu_arch[j].enable.bitfield.cpui386)
3160                 {
3161                   as_bad (_("32bit mode not supported on `%s'."),
3162                           cpu_arch[j].name);
3163                   (void) restore_line_pointer (e);
3164                   ignore_rest_of_line ();
3165                   return;
3166                 }
3167
3168               cpu_arch_name = cpu_arch[j].name;
3169               free (cpu_sub_arch_name);
3170               cpu_sub_arch_name = NULL;
3171               cpu_arch_flags = cpu_arch[j].enable;
3172               cpu_arch_isa = cpu_arch[j].type;
3173               cpu_arch_isa_flags = cpu_arch[j].enable;
3174               if (!cpu_arch_tune_set)
3175                 cpu_arch_tune = cpu_arch_isa;
3176
3177               vector_size = VSZ_DEFAULT;
3178
3179               pre_386_16bit_warned = false;
3180               break;
3181             }
3182
3183           if (cpu_flags_all_zero (&cpu_arch[j].enable))
3184             continue;
3185
3186           isa_enable (j);
3187
3188           (void) restore_line_pointer (e);
3189
3190           switch (cpu_arch[j].vsz)
3191             {
3192             default:
3193               break;
3194
3195             case vsz_set:
3196 #ifdef SVR4_COMMENT_CHARS
3197               if (*input_line_pointer == ':' || *input_line_pointer == '/')
3198 #else
3199               if (*input_line_pointer == '/')
3200 #endif
3201                 {
3202                   ++input_line_pointer;
3203                   switch (get_absolute_expression ())
3204                     {
3205                     case 512: vector_size = VSZ512; break;
3206                     case 256: vector_size = VSZ256; break;
3207                     case 128: vector_size = VSZ128; break;
3208                     default:
3209                       as_bad (_("Unrecognized vector size specifier"));
3210                       ignore_rest_of_line ();
3211                       return;
3212                     }
3213                   break;
3214                 }
3215                 /* Fall through.  */
3216             case vsz_reset:
3217               vector_size = VSZ_DEFAULT;
3218               break;
3219             }
3220
3221           demand_empty_rest_of_line ();
3222           return;
3223         }
3224     }
3225
3226   if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
3227     {
3228       /* Disable an ISA extension.  */
3229       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
3230         if (cpu_arch[j].type == PROCESSOR_NONE
3231             && strcmp (string + 3, cpu_arch[j].name) == 0)
3232           {
3233             isa_disable (j);
3234
3235             if (cpu_arch[j].vsz == vsz_set)
3236               vector_size = VSZ_DEFAULT;
3237
3238             (void) restore_line_pointer (e);
3239             demand_empty_rest_of_line ();
3240             return;
3241           }
3242     }
3243
3244   if (j == ARRAY_SIZE (cpu_arch))
3245     as_bad (_("no such architecture: `%s'"), string);
3246
3247   *input_line_pointer = e;
3248
3249   no_cond_jump_promotion = 0;
3250   if (*input_line_pointer == ','
3251       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
3252     {
3253       ++input_line_pointer;
3254       e = get_symbol_name (&s);
3255       string = s;
3256
3257       if (strcmp (string, "nojumps") == 0)
3258         no_cond_jump_promotion = 1;
3259       else if (strcmp (string, "jumps") == 0)
3260         ;
3261       else
3262         as_bad (_("no such architecture modifier: `%s'"), string);
3263
3264       (void) restore_line_pointer (e);
3265     }
3266
3267   demand_empty_rest_of_line ();
3268 }
3269
3270 enum bfd_architecture
3271 i386_arch (void)
3272 {
3273   if (cpu_arch_isa == PROCESSOR_IAMCU)
3274     {
3275       if (!IS_ELF || flag_code == CODE_64BIT)
3276         as_fatal (_("Intel MCU is 32bit ELF only"));
3277       return bfd_arch_iamcu;
3278     }
3279   else
3280     return bfd_arch_i386;
3281 }
3282
3283 unsigned long
3284 i386_mach (void)
3285 {
3286   if (startswith (default_arch, "x86_64"))
3287     {
3288       if (default_arch[6] == '\0')
3289         return bfd_mach_x86_64;
3290       else
3291         return bfd_mach_x64_32;
3292     }
3293   else if (!strcmp (default_arch, "i386")
3294            || !strcmp (default_arch, "iamcu"))
3295     {
3296       if (cpu_arch_isa == PROCESSOR_IAMCU)
3297         {
3298           if (!IS_ELF)
3299             as_fatal (_("Intel MCU is 32bit ELF only"));
3300           return bfd_mach_i386_iamcu;
3301         }
3302       else
3303         return bfd_mach_i386_i386;
3304     }
3305   else
3306     as_fatal (_("unknown architecture"));
3307 }
3308 \f
3309 #include "opcodes/i386-tbl.h"
3310
3311 static void
3312 op_lookup (const char *mnemonic)
3313 {
3314    i386_op_off_t *pos = str_hash_find (op_hash, mnemonic);
3315
3316    if (pos != NULL)
3317      {
3318        current_templates.start = &i386_optab[pos[0]];
3319        current_templates.end = &i386_optab[pos[1]];
3320      }
3321    else
3322      current_templates.end = current_templates.start = NULL;
3323 }
3324
3325 void
3326 md_begin (void)
3327 {
3328   /* Make sure possible padding space is clear.  */
3329   memset (&pp, 0, sizeof (pp));
3330
3331   /* Initialize op_hash hash table.  */
3332   op_hash = str_htab_create ();
3333
3334   {
3335     const i386_op_off_t *cur = i386_op_sets;
3336     const i386_op_off_t *end = cur + ARRAY_SIZE (i386_op_sets) - 1;
3337
3338     for (; cur < end; ++cur)
3339       if (str_hash_insert (op_hash, insn_name (&i386_optab[*cur]), cur, 0))
3340         as_fatal (_("duplicate %s"), insn_name (&i386_optab[*cur]));
3341   }
3342
3343   /* Initialize reg_hash hash table.  */
3344   reg_hash = str_htab_create ();
3345   {
3346     const reg_entry *regtab;
3347     unsigned int regtab_size = i386_regtab_size;
3348
3349     for (regtab = i386_regtab; regtab_size--; regtab++)
3350       {
3351         switch (regtab->reg_type.bitfield.class)
3352           {
3353           case Reg:
3354             if (regtab->reg_type.bitfield.dword)
3355               {
3356                 if (regtab->reg_type.bitfield.instance == Accum)
3357                   reg_eax = regtab;
3358               }
3359             else if (regtab->reg_type.bitfield.tbyte)
3360               {
3361                 /* There's no point inserting st(<N>) in the hash table, as
3362                    parentheses aren't included in register_chars[] anyway.  */
3363                 if (regtab->reg_type.bitfield.instance != Accum)
3364                   continue;
3365                 reg_st0 = regtab;
3366               }
3367             break;
3368
3369           case SReg:
3370             switch (regtab->reg_num)
3371               {
3372               case 0: reg_es = regtab; break;
3373               case 2: reg_ss = regtab; break;
3374               case 3: reg_ds = regtab; break;
3375               }
3376             break;
3377
3378           case RegMask:
3379             if (!regtab->reg_num)
3380               reg_k0 = regtab;
3381             break;
3382           }
3383
3384         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3385           as_fatal (_("duplicate %s"), regtab->reg_name);
3386       }
3387   }
3388
3389   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3390   {
3391     int c;
3392     const char *p;
3393
3394     for (c = 0; c < 256; c++)
3395       {
3396         if (ISDIGIT (c) || ISLOWER (c))
3397           {
3398             mnemonic_chars[c] = c;
3399             register_chars[c] = c;
3400             operand_chars[c] = c;
3401           }
3402         else if (ISUPPER (c))
3403           {
3404             mnemonic_chars[c] = TOLOWER (c);
3405             register_chars[c] = mnemonic_chars[c];
3406             operand_chars[c] = c;
3407           }
3408 #ifdef SVR4_COMMENT_CHARS
3409         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3410           operand_chars[c] = c;
3411 #endif
3412
3413         if (c >= 128)
3414           operand_chars[c] = c;
3415       }
3416
3417     mnemonic_chars['_'] = '_';
3418     mnemonic_chars['-'] = '-';
3419     mnemonic_chars['.'] = '.';
3420
3421     for (p = extra_symbol_chars; *p != '\0'; p++)
3422       operand_chars[(unsigned char) *p] = *p;
3423     for (p = operand_special_chars; *p != '\0'; p++)
3424       operand_chars[(unsigned char) *p] = *p;
3425   }
3426
3427   if (object_64bit)
3428     {
3429 #if defined (OBJ_COFF) && defined (TE_PE)
3430       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3431                                   ? 32 : 16);
3432 #else
3433       x86_dwarf2_return_column = 16;
3434 #endif
3435       x86_cie_data_alignment = -8;
3436 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3437       x86_sframe_cfa_sp_reg = REG_SP;
3438       x86_sframe_cfa_fp_reg = REG_FP;
3439 #endif
3440     }
3441   else
3442     {
3443       x86_dwarf2_return_column = 8;
3444       x86_cie_data_alignment = -4;
3445     }
3446
3447   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3448      can be turned into BRANCH_PREFIX frag.  */
3449   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3450     abort ();
3451 }
3452
3453 void
3454 i386_print_statistics (FILE *file)
3455 {
3456   htab_print_statistics (file, "i386 opcode", op_hash);
3457   htab_print_statistics (file, "i386 register", reg_hash);
3458 }
3459
3460 void
3461 i386_md_end (void)
3462 {
3463   htab_delete (op_hash);
3464   htab_delete (reg_hash);
3465 }
3466 \f
3467 #ifdef DEBUG386
3468
3469 /* Debugging routines for md_assemble.  */
3470 static void pte (insn_template *);
3471 static void pt (i386_operand_type);
3472 static void pe (expressionS *);
3473 static void ps (symbolS *);
3474
3475 static void
3476 pi (const char *line, i386_insn *x)
3477 {
3478   unsigned int j;
3479
3480   fprintf (stdout, "%s: template ", line);
3481   pte (&x->tm);
3482   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3483            x->base_reg ? x->base_reg->reg_name : "none",
3484            x->index_reg ? x->index_reg->reg_name : "none",
3485            x->log2_scale_factor);
3486   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3487            x->rm.mode, x->rm.reg, x->rm.regmem);
3488   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3489            x->sib.base, x->sib.index, x->sib.scale);
3490   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3491            (x->rex & REX_W) != 0,
3492            (x->rex & REX_R) != 0,
3493            (x->rex & REX_X) != 0,
3494            (x->rex & REX_B) != 0);
3495   for (j = 0; j < x->operands; j++)
3496     {
3497       fprintf (stdout, "    #%d:  ", j + 1);
3498       pt (x->types[j]);
3499       fprintf (stdout, "\n");
3500       if (x->types[j].bitfield.class == Reg
3501           || x->types[j].bitfield.class == RegMMX
3502           || x->types[j].bitfield.class == RegSIMD
3503           || x->types[j].bitfield.class == RegMask
3504           || x->types[j].bitfield.class == SReg
3505           || x->types[j].bitfield.class == RegCR
3506           || x->types[j].bitfield.class == RegDR
3507           || x->types[j].bitfield.class == RegTR
3508           || x->types[j].bitfield.class == RegBND)
3509         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3510       if (operand_type_check (x->types[j], imm))
3511         pe (x->op[j].imms);
3512       if (operand_type_check (x->types[j], disp))
3513         pe (x->op[j].disps);
3514     }
3515 }
3516
3517 static void
3518 pte (insn_template *t)
3519 {
3520   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3521   static const char *const opc_spc[] = {
3522     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3523     "XOP08", "XOP09", "XOP0A",
3524   };
3525   unsigned int j;
3526
3527   fprintf (stdout, " %d operands ", t->operands);
3528   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3529     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3530   if (opc_spc[t->opcode_space])
3531     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3532   fprintf (stdout, "opcode %x ", t->base_opcode);
3533   if (t->extension_opcode != None)
3534     fprintf (stdout, "ext %x ", t->extension_opcode);
3535   if (t->opcode_modifier.d)
3536     fprintf (stdout, "D");
3537   if (t->opcode_modifier.w)
3538     fprintf (stdout, "W");
3539   fprintf (stdout, "\n");
3540   for (j = 0; j < t->operands; j++)
3541     {
3542       fprintf (stdout, "    #%d type ", j + 1);
3543       pt (t->operand_types[j]);
3544       fprintf (stdout, "\n");
3545     }
3546 }
3547
3548 static void
3549 pe (expressionS *e)
3550 {
3551   fprintf (stdout, "    operation     %d\n", e->X_op);
3552   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3553            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3554   if (e->X_add_symbol)
3555     {
3556       fprintf (stdout, "    add_symbol    ");
3557       ps (e->X_add_symbol);
3558       fprintf (stdout, "\n");
3559     }
3560   if (e->X_op_symbol)
3561     {
3562       fprintf (stdout, "    op_symbol    ");
3563       ps (e->X_op_symbol);
3564       fprintf (stdout, "\n");
3565     }
3566 }
3567
3568 static void
3569 ps (symbolS *s)
3570 {
3571   fprintf (stdout, "%s type %s%s",
3572            S_GET_NAME (s),
3573            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3574            segment_name (S_GET_SEGMENT (s)));
3575 }
3576
3577 static struct type_name
3578   {
3579     i386_operand_type mask;
3580     const char *name;
3581   }
3582 const type_names[] =
3583 {
3584   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3585   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3586   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3587   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3588   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3589   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3590   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3591   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3592   { { .bitfield = { .imm8 = 1 } }, "i8" },
3593   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3594   { { .bitfield = { .imm16 = 1 } }, "i16" },
3595   { { .bitfield = { .imm32 = 1 } }, "i32" },
3596   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3597   { { .bitfield = { .imm64 = 1 } }, "i64" },
3598   { { .bitfield = { .imm1 = 1 } }, "i1" },
3599   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3600   { { .bitfield = { .disp8 = 1 } }, "d8" },
3601   { { .bitfield = { .disp16 = 1 } }, "d16" },
3602   { { .bitfield = { .disp32 = 1 } }, "d32" },
3603   { { .bitfield = { .disp64 = 1 } }, "d64" },
3604   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3605   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3606   { { .bitfield = { .class = RegCR } }, "control reg" },
3607   { { .bitfield = { .class = RegTR } }, "test reg" },
3608   { { .bitfield = { .class = RegDR } }, "debug reg" },
3609   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3610   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3611   { { .bitfield = { .class = SReg } }, "SReg" },
3612   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3613   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3614   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3615   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3616   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3617   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3618 };
3619
3620 static void
3621 pt (i386_operand_type t)
3622 {
3623   unsigned int j;
3624   i386_operand_type a;
3625
3626   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3627     {
3628       a = operand_type_and (t, type_names[j].mask);
3629       if (operand_type_equal (&a, &type_names[j].mask))
3630         fprintf (stdout, "%s, ",  type_names[j].name);
3631     }
3632   fflush (stdout);
3633 }
3634
3635 #endif /* DEBUG386 */
3636 \f
3637 static bfd_reloc_code_real_type
3638 reloc (unsigned int size,
3639        int pcrel,
3640        int sign,
3641        bfd_reloc_code_real_type other)
3642 {
3643   if (other != NO_RELOC)
3644     {
3645       reloc_howto_type *rel;
3646
3647       if (size == 8)
3648         switch (other)
3649           {
3650           case BFD_RELOC_X86_64_GOT32:
3651             return BFD_RELOC_X86_64_GOT64;
3652             break;
3653           case BFD_RELOC_X86_64_GOTPLT64:
3654             return BFD_RELOC_X86_64_GOTPLT64;
3655             break;
3656           case BFD_RELOC_X86_64_PLTOFF64:
3657             return BFD_RELOC_X86_64_PLTOFF64;
3658             break;
3659           case BFD_RELOC_X86_64_GOTPC32:
3660             other = BFD_RELOC_X86_64_GOTPC64;
3661             break;
3662           case BFD_RELOC_X86_64_GOTPCREL:
3663             other = BFD_RELOC_X86_64_GOTPCREL64;
3664             break;
3665           case BFD_RELOC_X86_64_TPOFF32:
3666             other = BFD_RELOC_X86_64_TPOFF64;
3667             break;
3668           case BFD_RELOC_X86_64_DTPOFF32:
3669             other = BFD_RELOC_X86_64_DTPOFF64;
3670             break;
3671           default:
3672             break;
3673           }
3674
3675 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3676       if (other == BFD_RELOC_SIZE32)
3677         {
3678           if (size == 8)
3679             other = BFD_RELOC_SIZE64;
3680           if (pcrel)
3681             {
3682               as_bad (_("there are no pc-relative size relocations"));
3683               return NO_RELOC;
3684             }
3685         }
3686 #endif
3687
3688       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3689       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3690         sign = -1;
3691
3692       rel = bfd_reloc_type_lookup (stdoutput, other);
3693       if (!rel)
3694         as_bad (_("unknown relocation (%u)"), other);
3695       else if (size != bfd_get_reloc_size (rel))
3696         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3697                 bfd_get_reloc_size (rel),
3698                 size);
3699       else if (pcrel && !rel->pc_relative)
3700         as_bad (_("non-pc-relative relocation for pc-relative field"));
3701       else if ((rel->complain_on_overflow == complain_overflow_signed
3702                 && !sign)
3703                || (rel->complain_on_overflow == complain_overflow_unsigned
3704                    && sign > 0))
3705         as_bad (_("relocated field and relocation type differ in signedness"));
3706       else
3707         return other;
3708       return NO_RELOC;
3709     }
3710
3711   if (pcrel)
3712     {
3713       if (!sign)
3714         as_bad (_("there are no unsigned pc-relative relocations"));
3715       switch (size)
3716         {
3717         case 1: return BFD_RELOC_8_PCREL;
3718         case 2: return BFD_RELOC_16_PCREL;
3719         case 4: return BFD_RELOC_32_PCREL;
3720         case 8: return BFD_RELOC_64_PCREL;
3721         }
3722       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3723     }
3724   else
3725     {
3726       if (sign > 0)
3727         switch (size)
3728           {
3729           case 4: return BFD_RELOC_X86_64_32S;
3730           }
3731       else
3732         switch (size)
3733           {
3734           case 1: return BFD_RELOC_8;
3735           case 2: return BFD_RELOC_16;
3736           case 4: return BFD_RELOC_32;
3737           case 8: return BFD_RELOC_64;
3738           }
3739       as_bad (_("cannot do %s %u byte relocation"),
3740               sign > 0 ? "signed" : "unsigned", size);
3741     }
3742
3743   return NO_RELOC;
3744 }
3745
3746 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3747 /* Here we decide which fixups can be adjusted to make them relative to
3748    the beginning of the section instead of the symbol.  Basically we need
3749    to make sure that the dynamic relocations are done correctly, so in
3750    some cases we force the original symbol to be used.  */
3751
3752 int
3753 tc_i386_fix_adjustable (fixS *fixP)
3754 {
3755   if (!IS_ELF)
3756     return 1;
3757
3758   /* Don't adjust pc-relative references to merge sections in 64-bit
3759      mode.  */
3760   if (use_rela_relocations
3761       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3762       && fixP->fx_pcrel)
3763     return 0;
3764
3765   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3766      and changed later by validate_fix.  */
3767   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3768       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3769     return 0;
3770
3771   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3772      for size relocations.  */
3773   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3774       || fixP->fx_r_type == BFD_RELOC_SIZE64
3775       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3776       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3777       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3778       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3779       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3780       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3781       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3782       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3783       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3784       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3785       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3786       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3787       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3788       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3789       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3790       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3791       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3792       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPCRELX
3793       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3794       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3795       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3796       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3797       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3798       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTTPOFF
3799       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_6_GOTTPOFF
3800       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3801       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3802       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3803       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3804       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
3805       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3806       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3807       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3808     return 0;
3809   return 1;
3810 }
3811 #endif
3812
3813 static INLINE bool
3814 want_disp32 (const insn_template *t)
3815 {
3816   return flag_code != CODE_64BIT
3817          || i.prefix[ADDR_PREFIX]
3818          || ((t->mnem_off == MN_lea
3819               || (i.tm.base_opcode == 0x8d && i.tm.opcode_space == SPACE_BASE))
3820              && (!i.types[1].bitfield.qword
3821                  || t->opcode_modifier.size == SIZE32));
3822 }
3823
3824 static int
3825 intel_float_operand (const char *mnemonic)
3826 {
3827   /* Note that the value returned is meaningful only for opcodes with (memory)
3828      operands, hence the code here is free to improperly handle opcodes that
3829      have no operands (for better performance and smaller code). */
3830
3831   if (mnemonic[0] != 'f')
3832     return 0; /* non-math */
3833
3834   switch (mnemonic[1])
3835     {
3836     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3837        the fs segment override prefix not currently handled because no
3838        call path can make opcodes without operands get here */
3839     case 'i':
3840       return 2 /* integer op */;
3841     case 'l':
3842       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3843         return 3; /* fldcw/fldenv */
3844       break;
3845     case 'n':
3846       if (mnemonic[2] != 'o' /* fnop */)
3847         return 3; /* non-waiting control op */
3848       break;
3849     case 'r':
3850       if (mnemonic[2] == 's')
3851         return 3; /* frstor/frstpm */
3852       break;
3853     case 's':
3854       if (mnemonic[2] == 'a')
3855         return 3; /* fsave */
3856       if (mnemonic[2] == 't')
3857         {
3858           switch (mnemonic[3])
3859             {
3860             case 'c': /* fstcw */
3861             case 'd': /* fstdw */
3862             case 'e': /* fstenv */
3863             case 's': /* fsts[gw] */
3864               return 3;
3865             }
3866         }
3867       break;
3868     case 'x':
3869       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3870         return 0; /* fxsave/fxrstor are not really math ops */
3871       break;
3872     }
3873
3874   return 1;
3875 }
3876
3877 static INLINE void
3878 install_template (const insn_template *t)
3879 {
3880   unsigned int l;
3881
3882   i.tm = *t;
3883
3884   /* Dual VEX/EVEX templates need stripping one of the possible variants.  */
3885   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
3886     {
3887       if ((maybe_cpu (t, CpuAVX) || maybe_cpu (t, CpuAVX2)
3888            || maybe_cpu (t, CpuFMA))
3889           && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)))
3890         {
3891           if (need_evex_encoding (t))
3892             {
3893               i.tm.opcode_modifier.vex = 0;
3894               i.tm.cpu.bitfield.cpuavx512f = i.tm.cpu_any.bitfield.cpuavx512f;
3895               i.tm.cpu.bitfield.cpuavx512vl = i.tm.cpu_any.bitfield.cpuavx512vl;
3896             }
3897           else
3898             {
3899               i.tm.opcode_modifier.evex = 0;
3900               if (i.tm.cpu_any.bitfield.cpuavx)
3901                 i.tm.cpu.bitfield.cpuavx = 1;
3902               else if (!i.tm.cpu.bitfield.isa)
3903                 i.tm.cpu.bitfield.isa = i.tm.cpu_any.bitfield.isa;
3904               else
3905                 gas_assert (i.tm.cpu.bitfield.isa == i.tm.cpu_any.bitfield.isa);
3906             }
3907         }
3908
3909       if ((maybe_cpu (t, CpuCMPCCXADD) || maybe_cpu (t, CpuAMX_TILE)
3910            || maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512DQ)
3911            || maybe_cpu (t, CpuAVX512BW) || maybe_cpu (t, CpuBMI)
3912            || maybe_cpu (t, CpuBMI2) || maybe_cpu (t, CpuUSER_MSR))
3913           && maybe_cpu (t, CpuAPX_F))
3914         {
3915           if (need_evex_encoding (t))
3916             i.tm.opcode_modifier.vex = 0;
3917           else
3918             i.tm.opcode_modifier.evex = 0;
3919         }
3920     }
3921
3922   /* For CCMP and CTEST the template has EVEX.SCC in base_opcode. Move it out of
3923      there, to then adjust base_opcode to obtain its normal meaning.  */
3924   if (i.tm.opcode_modifier.operandconstraint == SCC)
3925     {
3926       /* Get EVEX.SCC value from the lower 4 bits of base_opcode.  */
3927       i.scc = i.tm.base_opcode & 0xf;
3928       i.tm.base_opcode >>= 8;
3929     }
3930
3931   /* For CMOVcc having undergone NDD-to-legacy optimization with its source
3932      operands being swapped, we need to invert the encoded condition.  */
3933   if (i.invert_cond)
3934     i.tm.base_opcode ^= 1;
3935
3936   /* Note that for pseudo prefixes this produces a length of 1. But for them
3937      the length isn't interesting at all.  */
3938   for (l = 1; l < 4; ++l)
3939     if (!(i.tm.base_opcode >> (8 * l)))
3940       break;
3941
3942   i.opcode_length = l;
3943 }
3944
3945 /* Build the VEX prefix.  */
3946
3947 static void
3948 build_vex_prefix (const insn_template *t)
3949 {
3950   unsigned int register_specifier;
3951   unsigned int vector_length;
3952   unsigned int w;
3953
3954   /* Check register specifier.  */
3955   if (i.vex.register_specifier)
3956     {
3957       register_specifier =
3958         ~register_number (i.vex.register_specifier) & 0xf;
3959       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3960     }
3961   else
3962     register_specifier = 0xf;
3963
3964   /* Use 2-byte VEX prefix by swapping destination and source operand
3965      if there are more than 1 register operand.  */
3966   if (i.reg_operands > 1
3967       && pp.encoding != encoding_vex3
3968       && pp.dir_encoding == dir_encoding_default
3969       && i.operands == i.reg_operands
3970       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3971       && i.tm.opcode_space == SPACE_0F
3972       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3973       && i.rex == REX_B)
3974     {
3975       unsigned int xchg;
3976
3977       swap_2_operands (0, i.operands - 1);
3978
3979       gas_assert (i.rm.mode == 3);
3980
3981       i.rex = REX_R;
3982       xchg = i.rm.regmem;
3983       i.rm.regmem = i.rm.reg;
3984       i.rm.reg = xchg;
3985
3986       if (i.tm.opcode_modifier.d)
3987         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3988                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3989       else /* Use the next insn.  */
3990         install_template (&t[1]);
3991     }
3992
3993   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3994      are no memory operands and at least 3 register ones.  */
3995   if (i.reg_operands >= 3
3996       && pp.encoding != encoding_vex3
3997       && i.reg_operands == i.operands - i.imm_operands
3998       && i.tm.opcode_modifier.vex
3999       && i.tm.opcode_modifier.commutative
4000       /* .commutative aliases .staticrounding; disambiguate.  */
4001       && !i.tm.opcode_modifier.sae
4002       && (i.tm.opcode_modifier.sse2avx
4003           || (optimize > 1 && !pp.no_optimize))
4004       && i.rex == REX_B
4005       && i.vex.register_specifier
4006       && !(i.vex.register_specifier->reg_flags & RegRex))
4007     {
4008       unsigned int xchg = i.operands - i.reg_operands;
4009
4010       gas_assert (i.tm.opcode_space == SPACE_0F);
4011       gas_assert (!i.tm.opcode_modifier.sae);
4012       gas_assert (operand_type_equal (&i.types[i.operands - 2],
4013                                       &i.types[i.operands - 3]));
4014       gas_assert (i.rm.mode == 3);
4015
4016       swap_2_operands (xchg, xchg + 1);
4017
4018       i.rex = 0;
4019       xchg = i.rm.regmem | 8;
4020       i.rm.regmem = ~register_specifier & 0xf;
4021       gas_assert (!(i.rm.regmem & 8));
4022       i.vex.register_specifier += xchg - i.rm.regmem;
4023       register_specifier = ~xchg & 0xf;
4024     }
4025
4026   if (i.tm.opcode_modifier.vex == VEXScalar)
4027     vector_length = avxscalar;
4028   else if (i.tm.opcode_modifier.vex == VEX256)
4029     vector_length = 1;
4030   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
4031     vector_length = 0;
4032   else
4033     {
4034       unsigned int op;
4035
4036       /* Determine vector length from the last multi-length vector
4037          operand.  */
4038       vector_length = 0;
4039       for (op = t->operands; op--;)
4040         if (t->operand_types[op].bitfield.xmmword
4041             && t->operand_types[op].bitfield.ymmword
4042             && i.types[op].bitfield.ymmword)
4043           {
4044             vector_length = 1;
4045             break;
4046           }
4047     }
4048
4049   /* Check the REX.W bit and VEXW.  */
4050   if (i.tm.opcode_modifier.vexw == VEXWIG)
4051     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
4052   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
4053     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
4054   else
4055     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
4056
4057   /* Use 2-byte VEX prefix if possible.  */
4058   if (w == 0
4059       && pp.encoding != encoding_vex3
4060       && i.tm.opcode_space == SPACE_0F
4061       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
4062     {
4063       /* 2-byte VEX prefix.  */
4064       unsigned int r;
4065
4066       i.vex.length = 2;
4067       i.vex.bytes[0] = 0xc5;
4068
4069       /* Check the REX.R bit.  */
4070       r = (i.rex & REX_R) ? 0 : 1;
4071       i.vex.bytes[1] = (r << 7
4072                         | register_specifier << 3
4073                         | vector_length << 2
4074                         | i.tm.opcode_modifier.opcodeprefix);
4075     }
4076   else
4077     {
4078       /* 3-byte VEX prefix.  */
4079       i.vex.length = 3;
4080
4081       switch (i.tm.opcode_space)
4082         {
4083         case SPACE_0F:
4084         case SPACE_0F38:
4085         case SPACE_0F3A:
4086         case SPACE_VEXMAP7:
4087           i.vex.bytes[0] = 0xc4;
4088           break;
4089         case SPACE_XOP08:
4090         case SPACE_XOP09:
4091         case SPACE_XOP0A:
4092           i.vex.bytes[0] = 0x8f;
4093           break;
4094         default:
4095           abort ();
4096         }
4097
4098       /* The high 3 bits of the second VEX byte are 1's compliment
4099          of RXB bits from REX.  */
4100       i.vex.bytes[1] = ((~i.rex & 7) << 5)
4101                        | (!dot_insn () ? i.tm.opcode_space
4102                                        : i.insn_opcode_space);
4103
4104       i.vex.bytes[2] = (w << 7
4105                         | register_specifier << 3
4106                         | vector_length << 2
4107                         | i.tm.opcode_modifier.opcodeprefix);
4108     }
4109 }
4110
4111 static INLINE bool
4112 is_any_vex_encoding (const insn_template *t)
4113 {
4114   return t->opcode_modifier.vex || t->opcode_modifier.evex;
4115 }
4116
4117 /* We can use this function only when the current encoding is evex.  */
4118 static INLINE bool
4119 is_apx_evex_encoding (void)
4120 {
4121   return i.rex2 || i.tm.opcode_space == SPACE_EVEXMAP4 || pp.has_nf
4122     || (i.vex.register_specifier
4123         && (i.vex.register_specifier->reg_flags & RegRex2));
4124 }
4125
4126 static INLINE bool
4127 is_apx_rex2_encoding (void)
4128 {
4129   return i.rex2 || pp.rex2_encoding
4130         || i.tm.opcode_modifier.rex2;
4131 }
4132
4133 static unsigned int
4134 get_broadcast_bytes (const insn_template *t, bool diag)
4135 {
4136   unsigned int op, bytes;
4137   const i386_operand_type *types;
4138
4139   if (i.broadcast.type)
4140     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
4141
4142   gas_assert (intel_syntax);
4143
4144   for (op = 0; op < t->operands; ++op)
4145     if (t->operand_types[op].bitfield.baseindex)
4146       break;
4147
4148   gas_assert (op < t->operands);
4149
4150   if (t->opcode_modifier.evex != EVEXDYN)
4151     switch (i.broadcast.bytes)
4152       {
4153       case 1:
4154         if (t->operand_types[op].bitfield.word)
4155           return 2;
4156       /* Fall through.  */
4157       case 2:
4158         if (t->operand_types[op].bitfield.dword)
4159           return 4;
4160       /* Fall through.  */
4161       case 4:
4162         if (t->operand_types[op].bitfield.qword)
4163           return 8;
4164       /* Fall through.  */
4165       case 8:
4166         if (t->operand_types[op].bitfield.xmmword)
4167           return 16;
4168         if (t->operand_types[op].bitfield.ymmword)
4169           return 32;
4170         if (t->operand_types[op].bitfield.zmmword)
4171           return 64;
4172       /* Fall through.  */
4173       default:
4174         abort ();
4175       }
4176
4177   gas_assert (op + 1 < t->operands);
4178
4179   if (t->operand_types[op + 1].bitfield.xmmword
4180       + t->operand_types[op + 1].bitfield.ymmword
4181       + t->operand_types[op + 1].bitfield.zmmword > 1)
4182     {
4183       types = &i.types[op + 1];
4184       diag = false;
4185     }
4186   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
4187     types = &t->operand_types[op];
4188
4189   if (types->bitfield.zmmword)
4190     bytes = 64;
4191   else if (types->bitfield.ymmword)
4192     bytes = 32;
4193   else
4194     bytes = 16;
4195
4196   if (diag)
4197     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
4198              insn_name (t), bytes * 8);
4199
4200   return bytes;
4201 }
4202
4203 /* Build the EVEX prefix.  */
4204
4205 static void
4206 build_evex_prefix (void)
4207 {
4208   unsigned int register_specifier, w;
4209   rex_byte vrex_used = 0;
4210
4211   /* Check register specifier.  */
4212   if (i.vex.register_specifier)
4213     {
4214       gas_assert ((i.vrex & REX_X) == 0);
4215
4216       register_specifier = i.vex.register_specifier->reg_num;
4217       if ((i.vex.register_specifier->reg_flags & RegRex))
4218         register_specifier += 8;
4219       /* The upper 16 registers are encoded in the fourth byte of the
4220          EVEX prefix.  */
4221       if (!(i.vex.register_specifier->reg_flags & RegVRex))
4222         i.vex.bytes[3] = 0x8;
4223       register_specifier = ~register_specifier & 0xf;
4224     }
4225   else
4226     {
4227       register_specifier = 0xf;
4228
4229       /* Encode upper 16 vector index register in the fourth byte of
4230          the EVEX prefix.  */
4231       if (!(i.vrex & REX_X))
4232         i.vex.bytes[3] = 0x8;
4233       else
4234         vrex_used |= REX_X;
4235     }
4236
4237   /* 4 byte EVEX prefix.  */
4238   i.vex.length = 4;
4239   i.vex.bytes[0] = 0x62;
4240
4241   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
4242      bits from REX.  */
4243   gas_assert (i.tm.opcode_space >= SPACE_0F);
4244   gas_assert (i.tm.opcode_space <= SPACE_VEXMAP7);
4245   i.vex.bytes[1] = ((~i.rex & 7) << 5)
4246                    | (!dot_insn () ? i.tm.opcode_space
4247                                    : i.insn_opcode_space);
4248
4249   /* The fifth bit of the second EVEX byte is 1's compliment of the
4250      REX_R bit in VREX.  */
4251   if (!(i.vrex & REX_R))
4252     i.vex.bytes[1] |= 0x10;
4253   else
4254     vrex_used |= REX_R;
4255
4256   if ((i.reg_operands + i.imm_operands) == i.operands)
4257     {
4258       /* When all operands are registers, the REX_X bit in REX is not
4259          used.  We reuse it to encode the upper 16 registers, which is
4260          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
4261          as 1's compliment.  */
4262       if ((i.vrex & REX_B))
4263         {
4264           vrex_used |= REX_B;
4265           i.vex.bytes[1] &= ~0x40;
4266         }
4267     }
4268
4269   /* EVEX instructions shouldn't need the REX prefix.  */
4270   i.vrex &= ~vrex_used;
4271   gas_assert (i.vrex == 0);
4272
4273   /* Check the REX.W bit and VEXW.  */
4274   if (i.tm.opcode_modifier.vexw == VEXWIG)
4275     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
4276   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
4277     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
4278   else
4279     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
4280
4281   /* The third byte of the EVEX prefix.  */
4282   i.vex.bytes[2] = ((w << 7)
4283                     | (register_specifier << 3)
4284                     | 4 /* Encode the U bit.  */
4285                     | i.tm.opcode_modifier.opcodeprefix);
4286
4287   /* The fourth byte of the EVEX prefix.  */
4288   /* The zeroing-masking bit.  */
4289   if (i.mask.reg && i.mask.zeroing)
4290     i.vex.bytes[3] |= 0x80;
4291
4292   /* Don't always set the broadcast bit if there is no RC.  */
4293   if (i.rounding.type == rc_none)
4294     {
4295       /* Encode the vector length.  */
4296       unsigned int vec_length;
4297
4298       if (i.tm.opcode_modifier.evex == EVEXDYN)
4299         {
4300           unsigned int op;
4301
4302           /* Determine vector length from the last multi-length vector
4303              operand.  */
4304           for (op = i.operands; op--;)
4305             if (i.tm.operand_types[op].bitfield.xmmword
4306                 + i.tm.operand_types[op].bitfield.ymmword
4307                 + i.tm.operand_types[op].bitfield.zmmword > 1)
4308               {
4309                 if (i.types[op].bitfield.zmmword)
4310                   {
4311                     i.tm.opcode_modifier.evex = EVEX512;
4312                     break;
4313                   }
4314                 else if (i.types[op].bitfield.ymmword)
4315                   {
4316                     i.tm.opcode_modifier.evex = EVEX256;
4317                     break;
4318                   }
4319                 else if (i.types[op].bitfield.xmmword)
4320                   {
4321                     i.tm.opcode_modifier.evex = EVEX128;
4322                     break;
4323                   }
4324                 else if ((i.broadcast.type || i.broadcast.bytes)
4325                          && op == i.broadcast.operand)
4326                   {
4327                     switch (get_broadcast_bytes (&i.tm, true))
4328                       {
4329                         case 64:
4330                           i.tm.opcode_modifier.evex = EVEX512;
4331                           break;
4332                         case 32:
4333                           i.tm.opcode_modifier.evex = EVEX256;
4334                           break;
4335                         case 16:
4336                           i.tm.opcode_modifier.evex = EVEX128;
4337                           break;
4338                         default:
4339                           abort ();
4340                       }
4341                     break;
4342                   }
4343               }
4344
4345           if (op >= MAX_OPERANDS)
4346             abort ();
4347         }
4348
4349       switch (i.tm.opcode_modifier.evex)
4350         {
4351         case EVEXLIG: /* LL' is ignored */
4352           vec_length = evexlig << 5;
4353           break;
4354         case EVEX128:
4355           vec_length = 0 << 5;
4356           break;
4357         case EVEX256:
4358           vec_length = 1 << 5;
4359           break;
4360         case EVEX512:
4361           vec_length = 2 << 5;
4362           break;
4363         case EVEX_L3:
4364           if (dot_insn ())
4365             {
4366               vec_length = 3 << 5;
4367               break;
4368             }
4369           /* Fall through.  */
4370         default:
4371           abort ();
4372           break;
4373         }
4374       i.vex.bytes[3] |= vec_length;
4375       /* Encode the broadcast bit.  */
4376       if (i.broadcast.type || i.broadcast.bytes)
4377         i.vex.bytes[3] |= 0x10;
4378     }
4379   else if (i.rounding.type != saeonly)
4380     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
4381   else
4382     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
4383
4384   if (i.mask.reg)
4385     i.vex.bytes[3] |= i.mask.reg->reg_num;
4386 }
4387
4388 /* Build (2 bytes) rex2 prefix.
4389    | D5h |
4390    | m | R4 X4 B4 | W R X B |
4391
4392    Rex2 reuses i.vex as they both encode i.tm.opcode_space in their prefixes.
4393  */
4394 static void
4395 build_rex2_prefix (void)
4396 {
4397   i.vex.length = 2;
4398   i.vex.bytes[0] = 0xd5;
4399   /* For the W R X B bits, the variables of rex prefix will be reused.  */
4400   i.vex.bytes[1] = ((i.tm.opcode_space << 7)
4401                     | (i.rex2 << 4) | i.rex);
4402 }
4403
4404 /* Build the EVEX prefix (4-byte) for evex insn
4405    | 62h |
4406    | `R`X`B`R' | B'mmm |
4407    | W | v`v`v`v | `x' | pp |
4408    | z| L'L | b | `v | aaa |
4409 */
4410 static bool
4411 build_apx_evex_prefix (void)
4412 {
4413   /* To mimic behavior for legacy insns, transform use of DATA16 and REX64 into
4414      their embedded-prefix representations.  */
4415   if (i.tm.opcode_space == SPACE_EVEXMAP4)
4416     {
4417       if (i.prefix[DATA_PREFIX])
4418         {
4419           if (i.tm.opcode_modifier.opcodeprefix)
4420             {
4421               as_bad (i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66
4422                       ? _("same type of prefix used twice")
4423                       : _("conflicting use of `data16' prefix"));
4424               return false;
4425             }
4426           i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
4427           i.prefix[DATA_PREFIX] = 0;
4428         }
4429       if (i.prefix[REX_PREFIX] & REX_W)
4430         {
4431           if (i.suffix == QWORD_MNEM_SUFFIX)
4432             {
4433               as_bad (_("same type of prefix used twice"));
4434               return false;
4435             }
4436           i.tm.opcode_modifier.vexw = VEXW1;
4437           i.prefix[REX_PREFIX] = 0;
4438         }
4439     }
4440
4441   build_evex_prefix ();
4442   if (i.rex2 & REX_R)
4443     i.vex.bytes[1] &= ~0x10;
4444   if (i.rex2 & REX_B)
4445     i.vex.bytes[1] |= 0x08;
4446   if (i.rex2 & REX_X)
4447     {
4448       gas_assert (i.rm.mode != 3);
4449       i.vex.bytes[2] &= ~0x04;
4450     }
4451   if (i.vex.register_specifier
4452       && i.vex.register_specifier->reg_flags & RegRex2)
4453     i.vex.bytes[3] &= ~0x08;
4454
4455   /* Encode the NDD bit of the instruction promoted from the legacy
4456      space. ZU shares the same bit with NDD.  */
4457   if ((i.vex.register_specifier && i.tm.opcode_space == SPACE_EVEXMAP4)
4458       || i.tm.opcode_modifier.operandconstraint == ZERO_UPPER)
4459     i.vex.bytes[3] |= 0x10;
4460
4461   /* Encode SCC and oszc flags bits.  */
4462   if (i.tm.opcode_modifier.operandconstraint == SCC)
4463     {
4464       /* The default value of vvvv is 1111 and needs to be cleared.  */
4465       i.vex.bytes[2] &= ~0x78;
4466       i.vex.bytes[2] |= (i.oszc_flags << 3);
4467       /* ND and aaa bits shold be 0.  */
4468       know (!(i.vex.bytes[3] & 0x17));
4469       /* The default value of V' is 1 and needs to be cleared.  */
4470       i.vex.bytes[3] = (i.vex.bytes[3] & ~0x08) | i.scc;
4471     }
4472
4473   /* Encode the NF bit.  */
4474   if (pp.has_nf || i.tm.opcode_modifier.operandconstraint == EVEX_NF)
4475     i.vex.bytes[3] |= 0x04;
4476
4477   return true;
4478 }
4479
4480 static void establish_rex (void)
4481 {
4482   /* Note that legacy encodings have at most 2 non-immediate operands.  */
4483   unsigned int first = i.imm_operands;
4484   unsigned int last = i.operands > first ? i.operands - first - 1 : first;
4485
4486   /* Respect a user-specified REX prefix.  */
4487   i.rex |= i.prefix[REX_PREFIX] & REX_OPCODE;
4488
4489   /* For 8 bit RegRex64 registers without a prefix, we need an empty rex prefix.  */
4490   if (((i.types[first].bitfield.class == Reg && i.types[first].bitfield.byte
4491         && (i.op[first].regs->reg_flags & RegRex64) != 0)
4492        || (i.types[last].bitfield.class == Reg && i.types[last].bitfield.byte
4493            && (i.op[last].regs->reg_flags & RegRex64) != 0))
4494       && !is_apx_rex2_encoding () && !is_any_vex_encoding (&i.tm))
4495     i.rex |= REX_OPCODE;
4496
4497   /* For REX/REX2/EVEX prefix instructions, we need to convert old registers
4498      (AL, CL, DL and BL) to new ones (AXL, CXL, DXL and BXL) and reject AH,
4499      CH, DH and BH.  */
4500   if (i.rex || i.rex2 || i.tm.opcode_modifier.evex)
4501     {
4502       for (unsigned int x = first; x <= last; x++)
4503         {
4504           /* Look for 8 bit operand that uses old registers.  */
4505           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4506               && (i.op[x].regs->reg_flags & RegRex64) == 0)
4507             {
4508               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4509               /* In case it is "hi" register, give up.  */
4510               if (i.op[x].regs->reg_num > 3)
4511                 as_bad (_("can't encode register '%s%s' in an "
4512                           "instruction requiring %s prefix"),
4513                         register_prefix, i.op[x].regs->reg_name,
4514                         i.tm.opcode_modifier.evex ? "EVEX" : "REX/REX2");
4515
4516               /* Otherwise it is equivalent to the extended register.
4517                  Since the encoding doesn't change this is merely
4518                  cosmetic cleanup for debug output.  */
4519               i.op[x].regs += 8;
4520             }
4521         }
4522     }
4523
4524   if (i.rex == 0 && i.rex2 == 0 && (pp.rex_encoding || pp.rex2_encoding))
4525     {
4526       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
4527          that uses legacy register.  If it is "hi" register, don't add
4528          rex and rex2 prefix.  */
4529       unsigned int x;
4530
4531       for (x = first; x <= last; x++)
4532         if (i.types[x].bitfield.class == Reg
4533             && i.types[x].bitfield.byte
4534             && (i.op[x].regs->reg_flags & RegRex64) == 0
4535             && i.op[x].regs->reg_num > 3)
4536           {
4537             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4538             pp.rex_encoding = false;
4539             pp.rex2_encoding = false;
4540             break;
4541           }
4542
4543       if (pp.rex_encoding)
4544         i.rex = REX_OPCODE;
4545     }
4546
4547   if (is_apx_rex2_encoding ())
4548     {
4549       build_rex2_prefix ();
4550       /* The individual REX.RXBW bits got consumed.  */
4551       i.rex &= REX_OPCODE;
4552     }
4553   else if (i.rex != 0)
4554     add_prefix (REX_OPCODE | i.rex);
4555 }
4556
4557 static void
4558 process_immext (void)
4559 {
4560   expressionS *exp;
4561
4562   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4563      which is coded in the same place as an 8-bit immediate field
4564      would be.  Here we fake an 8-bit immediate operand from the
4565      opcode suffix stored in tm.extension_opcode.
4566
4567      AVX instructions also use this encoding, for some of
4568      3 argument instructions.  */
4569
4570   gas_assert (i.imm_operands <= 1
4571               && (i.operands <= 2
4572                   || (is_any_vex_encoding (&i.tm)
4573                       && i.operands <= 4)));
4574
4575   exp = &im_expressions[i.imm_operands++];
4576   i.op[i.operands].imms = exp;
4577   i.types[i.operands].bitfield.imm8 = 1;
4578   i.operands++;
4579   exp->X_op = O_constant;
4580   exp->X_add_number = i.tm.extension_opcode;
4581   i.tm.extension_opcode = None;
4582 }
4583
4584
4585 static int
4586 check_hle (void)
4587 {
4588   switch (i.tm.opcode_modifier.prefixok)
4589     {
4590     default:
4591       abort ();
4592     case PrefixLock:
4593     case PrefixNone:
4594     case PrefixNoTrack:
4595     case PrefixRep:
4596       as_bad (_("invalid instruction `%s' after `%s'"),
4597               insn_name (&i.tm), i.hle_prefix);
4598       return 0;
4599     case PrefixHLELock:
4600       if (i.prefix[LOCK_PREFIX])
4601         return 1;
4602       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4603       return 0;
4604     case PrefixHLEAny:
4605       return 1;
4606     case PrefixHLERelease:
4607       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4608         {
4609           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4610                   insn_name (&i.tm));
4611           return 0;
4612         }
4613       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4614         {
4615           as_bad (_("memory destination needed for instruction `%s'"
4616                     " after `xrelease'"), insn_name (&i.tm));
4617           return 0;
4618         }
4619       return 1;
4620     }
4621 }
4622
4623 /* Helper for optimization (running ahead of process_suffix()), to make sure we
4624    convert only well-formed insns.  @OP is the sized operand to cross check
4625    against (typically a register).  Checking against a single operand typically
4626    suffices, as match_template() has already honored CheckOperandSize.  */
4627
4628 static bool is_plausible_suffix (unsigned int op)
4629 {
4630   return !i.suffix
4631          || (i.suffix == BYTE_MNEM_SUFFIX && i.types[op].bitfield.byte)
4632          || (i.suffix == WORD_MNEM_SUFFIX && i.types[op].bitfield.word)
4633          || (i.suffix == LONG_MNEM_SUFFIX && i.types[op].bitfield.dword)
4634          || (i.suffix == QWORD_MNEM_SUFFIX && i.types[op].bitfield.qword);
4635 }
4636
4637 /* Encode aligned vector move as unaligned vector move.  */
4638
4639 static void
4640 encode_with_unaligned_vector_move (void)
4641 {
4642   switch (i.tm.base_opcode)
4643     {
4644     case 0x28:  /* Load instructions.  */
4645     case 0x29:  /* Store instructions.  */
4646       /* movaps/movapd/vmovaps/vmovapd.  */
4647       if (i.tm.opcode_space == SPACE_0F
4648           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4649         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4650       break;
4651     case 0x6f:  /* Load instructions.  */
4652     case 0x7f:  /* Store instructions.  */
4653       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4654       if (i.tm.opcode_space == SPACE_0F
4655           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4656         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4657       break;
4658     default:
4659       break;
4660     }
4661 }
4662
4663 /* Try the shortest encoding by shortening operand size.  */
4664
4665 static void
4666 optimize_encoding (void)
4667 {
4668   unsigned int j;
4669
4670   if (i.tm.mnem_off == MN_lea)
4671     {
4672       /* Optimize: -O:
4673            lea symbol, %rN    -> mov $symbol, %rN
4674            lea (%rM), %rN     -> mov %rM, %rN
4675            lea (,%rM,1), %rN  -> mov %rM, %rN
4676
4677            and in 32-bit mode for 16-bit addressing
4678
4679            lea (%rM), %rN     -> movzx %rM, %rN
4680
4681            and in 64-bit mode zap 32-bit addressing in favor of using a
4682            32-bit (or less) destination.
4683        */
4684       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4685         {
4686           if (!i.op[1].regs->reg_type.bitfield.word)
4687             i.tm.opcode_modifier.size = SIZE32;
4688           i.prefix[ADDR_PREFIX] = 0;
4689         }
4690
4691       if (!i.index_reg && !i.base_reg)
4692         {
4693           /* Handle:
4694                lea symbol, %rN    -> mov $symbol, %rN
4695            */
4696           if (flag_code == CODE_64BIT)
4697             {
4698               /* Don't transform a relocation to a 16-bit one.  */
4699               if (i.op[0].disps
4700                   && i.op[0].disps->X_op != O_constant
4701                   && i.op[1].regs->reg_type.bitfield.word)
4702                 return;
4703
4704               if (!i.op[1].regs->reg_type.bitfield.qword
4705                   || i.tm.opcode_modifier.size == SIZE32)
4706                 {
4707                   i.tm.base_opcode = 0xb8;
4708                   i.tm.opcode_modifier.modrm = 0;
4709                   if (!i.op[1].regs->reg_type.bitfield.word)
4710                     i.types[0].bitfield.imm32 = 1;
4711                   else
4712                     {
4713                       i.tm.opcode_modifier.size = SIZE16;
4714                       i.types[0].bitfield.imm16 = 1;
4715                     }
4716                 }
4717               else
4718                 {
4719                   /* Subject to further optimization below.  */
4720                   i.tm.base_opcode = 0xc7;
4721                   i.tm.extension_opcode = 0;
4722                   i.types[0].bitfield.imm32s = 1;
4723                   i.types[0].bitfield.baseindex = 0;
4724                 }
4725             }
4726           /* Outside of 64-bit mode address and operand sizes have to match if
4727              a relocation is involved, as otherwise we wouldn't (currently) or
4728              even couldn't express the relocation correctly.  */
4729           else if (i.op[0].disps
4730                    && i.op[0].disps->X_op != O_constant
4731                    && ((!i.prefix[ADDR_PREFIX])
4732                        != (flag_code == CODE_32BIT
4733                            ? i.op[1].regs->reg_type.bitfield.dword
4734                            : i.op[1].regs->reg_type.bitfield.word)))
4735             return;
4736           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4737              destination is going to grow encoding size.  */
4738           else if (flag_code == CODE_16BIT
4739                    && (optimize <= 1 || optimize_for_space)
4740                    && !i.prefix[ADDR_PREFIX]
4741                    && i.op[1].regs->reg_type.bitfield.dword)
4742             return;
4743           else
4744             {
4745               i.tm.base_opcode = 0xb8;
4746               i.tm.opcode_modifier.modrm = 0;
4747               if (i.op[1].regs->reg_type.bitfield.dword)
4748                 i.types[0].bitfield.imm32 = 1;
4749               else
4750                 i.types[0].bitfield.imm16 = 1;
4751
4752               if (i.op[0].disps
4753                   && i.op[0].disps->X_op == O_constant
4754                   && i.op[1].regs->reg_type.bitfield.dword
4755                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4756                      GCC 5. */
4757                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4758                 i.op[0].disps->X_add_number &= 0xffff;
4759             }
4760
4761           i.tm.operand_types[0] = i.types[0];
4762           i.imm_operands = 1;
4763           if (!i.op[0].imms)
4764             {
4765               i.op[0].imms = &im_expressions[0];
4766               i.op[0].imms->X_op = O_absent;
4767             }
4768         }
4769       else if (i.op[0].disps
4770                   && (i.op[0].disps->X_op != O_constant
4771                       || i.op[0].disps->X_add_number))
4772         return;
4773       else
4774         {
4775           /* Handle:
4776                lea (%rM), %rN     -> mov %rM, %rN
4777                lea (,%rM,1), %rN  -> mov %rM, %rN
4778                lea (%rM), %rN     -> movzx %rM, %rN
4779            */
4780           const reg_entry *addr_reg;
4781
4782           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4783             addr_reg = i.base_reg;
4784           else if (!i.base_reg
4785                    && i.index_reg->reg_num != RegIZ
4786                    && !i.log2_scale_factor)
4787             addr_reg = i.index_reg;
4788           else
4789             return;
4790
4791           if (addr_reg->reg_type.bitfield.word
4792               && i.op[1].regs->reg_type.bitfield.dword)
4793             {
4794               if (flag_code != CODE_32BIT)
4795                 return;
4796               i.tm.opcode_space = SPACE_0F;
4797               i.tm.base_opcode = 0xb7;
4798             }
4799           else
4800             i.tm.base_opcode = 0x8b;
4801
4802           if (addr_reg->reg_type.bitfield.dword
4803               && i.op[1].regs->reg_type.bitfield.qword)
4804             i.tm.opcode_modifier.size = SIZE32;
4805
4806           i.op[0].regs = addr_reg;
4807           i.reg_operands = 2;
4808         }
4809
4810       i.mem_operands = 0;
4811       i.disp_operands = 0;
4812       i.prefix[ADDR_PREFIX] = 0;
4813       i.prefix[SEG_PREFIX] = 0;
4814       i.seg[0] = NULL;
4815     }
4816
4817   if (optimize_for_space
4818       && (i.tm.mnem_off == MN_test
4819           || (i.tm.base_opcode == 0xf6
4820               && i.tm.opcode_space == SPACE_EVEXMAP4))
4821       && i.reg_operands == 1
4822       && i.imm_operands == 1
4823       && !i.types[1].bitfield.byte
4824       && is_plausible_suffix (1)
4825       && i.op[0].imms->X_op == O_constant
4826       && fits_in_imm7 (i.op[0].imms->X_add_number))
4827     {
4828       /* Optimize: -Os:
4829            test      $imm7, %r64/%r32/%r16  -> test      $imm7, %r8
4830            ctest<cc> $imm7, %r64/%r32/%r16  -> ctest<cc> $imm7, %r8
4831        */
4832       unsigned int base_regnum = i.op[1].regs->reg_num;
4833
4834       gas_assert (!i.tm.opcode_modifier.modrm || i.tm.extension_opcode == 0);
4835
4836       if (flag_code == CODE_64BIT || base_regnum < 4)
4837         {
4838           i.types[1].bitfield.byte = 1;
4839           /* Squash the suffix.  */
4840           i.suffix = 0;
4841           /* Convert to byte registers. 8-bit registers are special,
4842              RegRex64 and non-RegRex64 each have 8 registers.  */
4843           if (i.types[1].bitfield.word)
4844             /* 32 (or 40) 8-bit registers.  */
4845             j = 32;
4846           else if (i.types[1].bitfield.dword)
4847             /* 32 (or 40) 8-bit registers + 32 16-bit registers.  */
4848             j = 64;
4849           else
4850             /* 32 (or 40) 8-bit registers + 32 16-bit registers
4851                + 32 32-bit registers.  */
4852             j = 96;
4853
4854           /* In 64-bit mode, the following byte registers cannot be accessed
4855              if using the Rex and Rex2 prefix: AH, BH, CH, DH */
4856           if (!(i.op[1].regs->reg_flags & (RegRex | RegRex2)) && base_regnum < 4)
4857             j += 8;
4858           i.op[1].regs -= j;
4859         }
4860     }
4861   else if (flag_code == CODE_64BIT
4862            && i.tm.opcode_space == SPACE_BASE
4863            && i.types[i.operands - 1].bitfield.qword
4864            && ((i.reg_operands == 1
4865                 && i.imm_operands == 1
4866                 && i.op[0].imms->X_op == O_constant
4867                 && ((i.tm.base_opcode == 0xb8
4868                      && i.tm.extension_opcode == None
4869                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4870                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4871                         && (i.tm.base_opcode == 0x24
4872                             || (((i.tm.base_opcode == 0x80
4873                                   && i.tm.extension_opcode == 0x4)
4874                                  || i.tm.mnem_off == MN_test)
4875                                 && !(i.op[1].regs->reg_flags
4876                                      & (RegRex | RegRex2)))
4877                             || ((i.tm.base_opcode | 1) == 0xc7
4878                                 && i.tm.extension_opcode == 0x0)))
4879                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4880                         && i.tm.base_opcode == 0x83
4881                         && i.tm.extension_opcode == 0x4
4882                         && !(i.op[1].regs->reg_flags & (RegRex | RegRex2)))))
4883                || ((i.reg_operands == 2
4884                     && i.op[0].regs == i.op[1].regs
4885                     && (i.tm.mnem_off == MN_xor
4886                         || i.tm.mnem_off == MN_sub))
4887                    || i.tm.mnem_off == MN_clr)))
4888     {
4889       /* Optimize: -O:
4890            andq $imm31, %r64   -> andl $imm31, %r32
4891            andq $imm7, %r64    -> andl $imm7, %r32
4892            testq $imm31, %r64  -> testl $imm31, %r32
4893            xorq %r64, %r64     -> xorl %r32, %r32
4894            clrq %r64           -> clrl %r32
4895            subq %r64, %r64     -> subl %r32, %r32
4896            movq $imm31, %r64   -> movl $imm31, %r32
4897            movq $imm32, %r64   -> movl $imm32, %r32
4898         */
4899       i.tm.opcode_modifier.size = SIZE32;
4900       if (i.imm_operands)
4901         {
4902           i.types[0].bitfield.imm32 = 1;
4903           i.types[0].bitfield.imm32s = 0;
4904           i.types[0].bitfield.imm64 = 0;
4905         }
4906       else
4907         {
4908           i.types[0].bitfield.dword = 1;
4909           i.types[0].bitfield.qword = 0;
4910         }
4911       i.types[1].bitfield.dword = 1;
4912       i.types[1].bitfield.qword = 0;
4913       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
4914         {
4915           /* Handle
4916                movq $imm31, %r64   -> movl $imm31, %r32
4917                movq $imm32, %r64   -> movl $imm32, %r32
4918            */
4919           i.tm.operand_types[0].bitfield.imm32 = 1;
4920           i.tm.operand_types[0].bitfield.imm32s = 0;
4921           i.tm.operand_types[0].bitfield.imm64 = 0;
4922           if ((i.tm.base_opcode | 1) == 0xc7)
4923             {
4924               /* Handle
4925                    movq $imm31, %r64   -> movl $imm31, %r32
4926                */
4927               i.tm.base_opcode = 0xb8;
4928               i.tm.extension_opcode = None;
4929               i.tm.opcode_modifier.w = 0;
4930               i.tm.opcode_modifier.modrm = 0;
4931             }
4932         }
4933     }
4934   else if (i.reg_operands == 3
4935            && i.op[0].regs == i.op[1].regs
4936            && pp.encoding != encoding_evex
4937            && (i.tm.mnem_off == MN_xor
4938                || i.tm.mnem_off == MN_sub))
4939     {
4940       /* Optimize: -O:
4941            xorb %rNb, %rNb, %rMb  -> xorl %rMd, %rMd
4942            xorw %rNw, %rNw, %rMw  -> xorl %rMd, %rMd
4943            xorl %rNd, %rNd, %rMd  -> xorl %rMd, %rMd
4944            xorq %rN,  %rN,  %rM   -> xorl %rMd, %rMd
4945            subb %rNb, %rNb, %rMb  -> subl %rMd, %rMd
4946            subw %rNw, %rNw, %rMw  -> subl %rMd, %rMd
4947            subl %rNd, %rNd, %rMd  -> subl %rMd, %rMd
4948            subq %rN,  %rN,  %rM   -> subl %rMd, %rMd
4949         */
4950       i.tm.opcode_space = SPACE_BASE;
4951       i.tm.opcode_modifier.evex = 0;
4952       i.tm.opcode_modifier.size = SIZE32;
4953       i.types[0].bitfield.byte = 0;
4954       i.types[0].bitfield.word = 0;
4955       i.types[0].bitfield.dword = 1;
4956       i.types[0].bitfield.qword = 0;
4957       i.op[0].regs = i.op[2].regs;
4958       i.types[1] = i.types[0];
4959       i.op[1].regs = i.op[2].regs;
4960       i.reg_operands = 2;
4961     }
4962   else if (optimize > 1
4963            && !optimize_for_space
4964            && i.reg_operands == 2
4965            && i.op[0].regs == i.op[1].regs
4966            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
4967            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4968     {
4969       /* Optimize: -O2:
4970            andb %rN, %rN  -> testb %rN, %rN
4971            andw %rN, %rN  -> testw %rN, %rN
4972            andq %rN, %rN  -> testq %rN, %rN
4973            orb %rN, %rN   -> testb %rN, %rN
4974            orw %rN, %rN   -> testw %rN, %rN
4975            orq %rN, %rN   -> testq %rN, %rN
4976
4977            and outside of 64-bit mode
4978
4979            andl %rN, %rN  -> testl %rN, %rN
4980            orl %rN, %rN   -> testl %rN, %rN
4981        */
4982       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4983     }
4984   else if (!optimize_for_space
4985            && i.tm.base_opcode == 0xd0
4986            && i.tm.extension_opcode == 4
4987            && (i.tm.opcode_space == SPACE_BASE
4988                || i.tm.opcode_space == SPACE_EVEXMAP4)
4989            && !i.mem_operands)
4990     {
4991       /* Optimize: -O:
4992            shlb $1, %rN  -> addb %rN, %rN
4993            shlw $1, %rN  -> addw %rN, %rN
4994            shll $1, %rN  -> addl %rN, %rN
4995            shlq $1, %rN  -> addq %rN, %rN
4996
4997            shlb $1, %rN, %rM  -> addb %rN, %rN, %rM
4998            shlw $1, %rN, %rM  -> addw %rN, %rN, %rM
4999            shll $1, %rN, %rM  -> addl %rN, %rN, %rM
5000            shlq $1, %rN, %rM  -> addq %rN, %rN, %rM
5001        */
5002       i.tm.base_opcode = 0x00;
5003       i.tm.extension_opcode = None;
5004       if (i.operands >= 2)
5005         {
5006           i.tm.operand_types[0] = i.tm.operand_types[1];
5007           i.op[0].regs = i.op[1].regs;
5008           i.types[0] = i.types[1];
5009         }
5010       else
5011         {
5012           /* Legacy form with omitted shift count operand.  */
5013           i.tm.operand_types[1] = i.tm.operand_types[0];
5014           i.op[1].regs = i.op[0].regs;
5015           i.types[1] = i.types[0];
5016           i.operands = 2;
5017         }
5018       i.reg_operands++;
5019       i.imm_operands = 0;
5020     }
5021   else if (i.tm.base_opcode == 0xba
5022            && i.tm.opcode_space == SPACE_0F
5023            && i.reg_operands == 1
5024            && i.op[0].imms->X_op == O_constant
5025            && i.op[0].imms->X_add_number >= 0)
5026     {
5027       /* Optimize: -O:
5028            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
5029            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
5030            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
5031
5032            With <BT> one of bts, btr, and bts also:
5033            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
5034            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
5035        */
5036       switch (flag_code)
5037         {
5038         case CODE_64BIT:
5039           if (i.tm.extension_opcode != 4)
5040             break;
5041           if (i.types[1].bitfield.qword
5042               && i.op[0].imms->X_add_number < 32
5043               && !(i.op[1].regs->reg_flags & RegRex))
5044             i.tm.opcode_modifier.size = SIZE32;
5045           /* Fall through.  */
5046         case CODE_32BIT:
5047           if (i.types[1].bitfield.word
5048               && i.op[0].imms->X_add_number < 16)
5049             i.tm.opcode_modifier.size = SIZE32;
5050           break;
5051         case CODE_16BIT:
5052           if (i.op[0].imms->X_add_number < 16)
5053             i.tm.opcode_modifier.size = SIZE16;
5054           break;
5055         }
5056     }
5057   else if (i.reg_operands == 3
5058            && i.op[0].regs == i.op[1].regs
5059            && !i.types[2].bitfield.xmmword
5060            && (i.tm.opcode_modifier.vex
5061                || ((!i.mask.reg || i.mask.zeroing)
5062                    && i.tm.opcode_modifier.evex
5063                    && (pp.encoding != encoding_evex
5064                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
5065                        || is_cpu (&i.tm, CpuAVX512VL)
5066                        || (i.tm.operand_types[2].bitfield.zmmword
5067                            && i.types[2].bitfield.ymmword))))
5068            && i.tm.opcode_space == SPACE_0F
5069            && ((i.tm.base_opcode | 2) == 0x57
5070                || i.tm.base_opcode == 0xdf
5071                || i.tm.base_opcode == 0xef
5072                || (i.tm.base_opcode | 3) == 0xfb
5073                || i.tm.base_opcode == 0x42
5074                || i.tm.base_opcode == 0x47))
5075     {
5076       /* Optimize: -O1:
5077            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
5078            vpsubq and vpsubw:
5079              EVEX VOP %zmmM, %zmmM, %zmmN
5080                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
5081                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5082              EVEX VOP %ymmM, %ymmM, %ymmN
5083                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
5084                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5085              VEX VOP %ymmM, %ymmM, %ymmN
5086                -> VEX VOP %xmmM, %xmmM, %xmmN
5087            VOP, one of vpandn and vpxor:
5088              VEX VOP %ymmM, %ymmM, %ymmN
5089                -> VEX VOP %xmmM, %xmmM, %xmmN
5090            VOP, one of vpandnd and vpandnq:
5091              EVEX VOP %zmmM, %zmmM, %zmmN
5092                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
5093                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5094              EVEX VOP %ymmM, %ymmM, %ymmN
5095                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
5096                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5097            VOP, one of vpxord and vpxorq:
5098              EVEX VOP %zmmM, %zmmM, %zmmN
5099                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
5100                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5101              EVEX VOP %ymmM, %ymmM, %ymmN
5102                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
5103                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5104            VOP, one of kxord and kxorq:
5105              VEX VOP %kM, %kM, %kN
5106                -> VEX kxorw %kM, %kM, %kN
5107            VOP, one of kandnd and kandnq:
5108              VEX VOP %kM, %kM, %kN
5109                -> VEX kandnw %kM, %kM, %kN
5110        */
5111       if (i.tm.opcode_modifier.evex)
5112         {
5113           if (pp.encoding != encoding_evex)
5114             {
5115               i.tm.opcode_modifier.vex = VEX128;
5116               i.tm.opcode_modifier.vexw = VEXW0;
5117               i.tm.opcode_modifier.evex = 0;
5118               pp.encoding = encoding_vex;
5119               i.mask.reg = NULL;
5120             }
5121           else if (optimize > 1)
5122             i.tm.opcode_modifier.evex = EVEX128;
5123           else
5124             return;
5125         }
5126       else if (i.tm.operand_types[0].bitfield.class == RegMask)
5127         {
5128           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
5129           i.tm.opcode_modifier.vexw = VEXW0;
5130         }
5131       else
5132         i.tm.opcode_modifier.vex = VEX128;
5133
5134       if (i.tm.opcode_modifier.vex)
5135         for (j = 0; j < 3; j++)
5136           {
5137             i.types[j].bitfield.xmmword = 1;
5138             i.types[j].bitfield.ymmword = 0;
5139           }
5140     }
5141   else if (pp.encoding != encoding_evex
5142            && pp.encoding != encoding_egpr
5143            && !i.types[0].bitfield.zmmword
5144            && !i.types[1].bitfield.zmmword
5145            && !i.mask.reg
5146            && !i.broadcast.type
5147            && !i.broadcast.bytes
5148            && i.tm.opcode_modifier.evex
5149            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
5150                || (i.tm.base_opcode & ~4) == 0xdb
5151                || (i.tm.base_opcode & ~4) == 0xeb)
5152            && i.tm.extension_opcode == None)
5153     {
5154       /* Optimize: -O1:
5155            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
5156            vmovdqu32 and vmovdqu64:
5157              EVEX VOP %xmmM, %xmmN
5158                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
5159              EVEX VOP %ymmM, %ymmN
5160                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
5161              EVEX VOP %xmmM, mem
5162                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
5163              EVEX VOP %ymmM, mem
5164                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
5165              EVEX VOP mem, %xmmN
5166                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
5167              EVEX VOP mem, %ymmN
5168                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
5169            VOP, one of vpand, vpandn, vpor, vpxor:
5170              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
5171                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
5172              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
5173                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
5174              EVEX VOP{d,q} mem, %xmmM, %xmmN
5175                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
5176              EVEX VOP{d,q} mem, %ymmM, %ymmN
5177                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
5178        */
5179       for (j = 0; j < i.operands; j++)
5180         if (operand_type_check (i.types[j], disp)
5181             && i.op[j].disps->X_op == O_constant)
5182           {
5183             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
5184                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
5185                bytes, we choose EVEX Disp8 over VEX Disp32.  */
5186             int evex_disp8, vex_disp8;
5187             unsigned int memshift = i.memshift;
5188             offsetT n = i.op[j].disps->X_add_number;
5189
5190             evex_disp8 = fits_in_disp8 (n);
5191             i.memshift = 0;
5192             vex_disp8 = fits_in_disp8 (n);
5193             if (evex_disp8 != vex_disp8)
5194               {
5195                 i.memshift = memshift;
5196                 return;
5197               }
5198
5199             i.types[j].bitfield.disp8 = vex_disp8;
5200             break;
5201           }
5202       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
5203           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
5204         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
5205       i.tm.opcode_modifier.vex
5206         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
5207       i.tm.opcode_modifier.vexw = VEXW0;
5208       /* VPAND, VPOR, and VPXOR are commutative.  */
5209       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
5210         i.tm.opcode_modifier.commutative = 1;
5211       i.tm.opcode_modifier.evex = 0;
5212       i.tm.opcode_modifier.masking = 0;
5213       i.tm.opcode_modifier.broadcast = 0;
5214       i.tm.opcode_modifier.disp8memshift = 0;
5215       i.memshift = 0;
5216       if (j < i.operands)
5217         i.types[j].bitfield.disp8
5218           = fits_in_disp8 (i.op[j].disps->X_add_number);
5219     }
5220   else if (optimize_for_space
5221            && i.tm.base_opcode == 0x29
5222            && i.tm.opcode_space == SPACE_0F38
5223            && i.operands == i.reg_operands
5224            && i.op[0].regs == i.op[1].regs
5225            && (!i.tm.opcode_modifier.vex
5226                || !(i.op[0].regs->reg_flags & RegRex))
5227            && !i.tm.opcode_modifier.evex)
5228     {
5229       /* Optimize: -Os:
5230          pcmpeqq %xmmN, %xmmN          -> pcmpeqd %xmmN, %xmmN
5231          vpcmpeqq %xmmN, %xmmN, %xmmM  -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
5232          vpcmpeqq %ymmN, %ymmN, %ymmM  -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
5233        */
5234       i.tm.opcode_space = SPACE_0F;
5235       i.tm.base_opcode = 0x76;
5236     }
5237   else if (((i.tm.base_opcode >= 0x64
5238              && i.tm.base_opcode <= 0x66
5239              && i.tm.opcode_space == SPACE_0F)
5240             || (i.tm.base_opcode == 0x37
5241                 && i.tm.opcode_space == SPACE_0F38))
5242            && i.operands == i.reg_operands
5243            && i.op[0].regs == i.op[1].regs
5244            && !i.tm.opcode_modifier.evex)
5245     {
5246       /* Optimize: -O:
5247          pcmpgt[bwd] %mmN, %mmN             -> pxor %mmN, %mmN
5248          pcmpgt[bwdq] %xmmN, %xmmN          -> pxor %xmmN, %xmmN
5249          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
5250          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
5251          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
5252          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
5253        */
5254       i.tm.opcode_space = SPACE_0F;
5255       i.tm.base_opcode = 0xef;
5256       if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
5257         {
5258           if (i.operands == 2)
5259             {
5260               gas_assert (i.tm.opcode_modifier.sse2avx);
5261
5262               i.operands = 3;
5263               i.reg_operands = 3;
5264               i.tm.operands = 3;
5265
5266               i.op[2].regs = i.op[0].regs;
5267               i.types[2] = i.types[0];
5268               i.flags[2] = i.flags[0];
5269               i.tm.operand_types[2] = i.tm.operand_types[0];
5270
5271               i.tm.opcode_modifier.sse2avx = 0;
5272             }
5273           i.op[0].regs -= i.op[0].regs->reg_num + 8;
5274           i.op[1].regs = i.op[0].regs;
5275         }
5276     }
5277   else if (i.tm.extension_opcode == 6
5278            && i.tm.base_opcode >= 0x71
5279            && i.tm.base_opcode <= 0x73
5280            && i.tm.opcode_space == SPACE_0F
5281            && i.op[0].imms->X_op == O_constant
5282            && i.op[0].imms->X_add_number == 1
5283            && !i.mem_operands)
5284     {
5285       /* Optimize: -O:
5286            psllw $1, %mmxN          -> paddw %mmxN, %mmxN
5287            psllw $1, %xmmN          -> paddw %xmmN, %xmmN
5288            vpsllw $1, %xmmN, %xmmM  -> vpaddw %xmmN, %xmmN, %xmmM
5289            vpsllw $1, %ymmN, %ymmM  -> vpaddw %ymmN, %ymmN, %ymmM
5290            vpsllw $1, %zmmN, %zmmM  -> vpaddw %zmmN, %zmmN, %zmmM
5291
5292            pslld $1, %mmxN          -> paddd %mmxN, %mmxN
5293            pslld $1, %xmmN          -> paddd %xmmN, %xmmN
5294            vpslld $1, %xmmN, %xmmM  -> vpaddd %xmmN, %xmmN, %xmmM
5295            vpslld $1, %ymmN, %ymmM  -> vpaddd %ymmN, %ymmN, %ymmM
5296            vpslld $1, %zmmN, %zmmM  -> vpaddd %zmmN, %zmmN, %zmmM
5297
5298            psllq $1, %xmmN          -> paddq %xmmN, %xmmN
5299            vpsllq $1, %xmmN, %xmmM  -> vpaddq %xmmN, %xmmN, %xmmM
5300            vpsllq $1, %ymmN, %ymmM  -> vpaddq %ymmN, %ymmN, %ymmM
5301            vpsllq $1, %zmmN, %zmmM  -> vpaddq %zmmN, %zmmN, %zmmM
5302           */
5303       if (i.tm.base_opcode != 0x73)
5304         i.tm.base_opcode |= 0xfc; /* {,v}padd{w,d} */
5305       else
5306         {
5307           gas_assert (i.tm.operand_types[1].bitfield.class != RegMMX);
5308           i.tm.base_opcode = 0xd4; /* {,v}paddq */
5309         }
5310       i.tm.extension_opcode = None;
5311       if (i.tm.opcode_modifier.vexvvvv)
5312         i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
5313       i.tm.operand_types[0] = i.tm.operand_types[1];
5314       i.op[0].regs = i.op[1].regs;
5315       i.types[0] = i.types[1];
5316       i.reg_operands++;
5317       i.imm_operands = 0;
5318     }
5319   else if (optimize_for_space
5320            && i.tm.base_opcode == 0x59
5321            && i.tm.opcode_space == SPACE_0F38
5322            && i.operands == i.reg_operands
5323            && i.tm.opcode_modifier.vex
5324            && !(i.op[0].regs->reg_flags & RegRex)
5325            && i.op[0].regs->reg_type.bitfield.xmmword
5326            && pp.encoding != encoding_vex3)
5327     {
5328       /* Optimize: -Os:
5329          vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
5330        */
5331       i.tm.opcode_space = SPACE_0F;
5332       i.tm.base_opcode = 0x6c;
5333       i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
5334
5335       ++i.operands;
5336       ++i.reg_operands;
5337       ++i.tm.operands;
5338
5339       i.op[2].regs = i.op[0].regs;
5340       i.types[2] = i.types[0];
5341       i.flags[2] = i.flags[0];
5342       i.tm.operand_types[2] = i.tm.operand_types[0];
5343
5344       swap_2_operands (1, 2);
5345     }
5346   else if (i.tm.base_opcode == 0x16
5347            && i.tm.opcode_space == SPACE_0F3A
5348            && i.op[0].imms->X_op == O_constant
5349            && i.op[0].imms->X_add_number == 0)
5350     {
5351       /* Optimize: -O:
5352          pextrd $0, %xmmN, ...   -> movd %xmmN, ...
5353          pextrq $0, %xmmN, ...   -> movq %xmmN, ...
5354          vpextrd $0, %xmmN, ...  -> vmovd %xmmN, ...
5355          vpextrq $0, %xmmN, ...  -> vmovq %xmmN, ...
5356        */
5357       i.tm.opcode_space = SPACE_0F;
5358       if (!i.mem_operands
5359           || i.tm.opcode_modifier.evex
5360           || (i.tm.opcode_modifier.vexw != VEXW1
5361               && i.tm.opcode_modifier.size != SIZE64))
5362         i.tm.base_opcode = 0x7e;
5363       else
5364         {
5365           i.tm.base_opcode = 0xd6;
5366           i.tm.opcode_modifier.size = 0;
5367           i.tm.opcode_modifier.vexw
5368             = i.tm.opcode_modifier.sse2avx ? VEXW0 : VEXWIG;
5369         }
5370
5371       i.op[0].regs = i.op[1].regs;
5372       i.types[0] = i.types[1];
5373       i.flags[0] = i.flags[1];
5374       i.tm.operand_types[0] = i.tm.operand_types[1];
5375
5376       i.op[1].regs = i.op[2].regs;
5377       i.types[1] = i.types[2];
5378       i.flags[1] = i.flags[2];
5379       i.tm.operand_types[1] = i.tm.operand_types[2];
5380
5381       i.operands = 2;
5382       i.imm_operands = 0;
5383     }
5384 }
5385
5386 /* Check whether the promoted (to address size) register is usable as index
5387    register in ModR/M SIB addressing.  */
5388
5389 static bool is_index (const reg_entry *r)
5390 {
5391   gas_assert (flag_code == CODE_64BIT);
5392
5393   if (r->reg_type.bitfield.byte)
5394     {
5395       if (!(r->reg_flags & RegRex64))
5396         {
5397           if (r->reg_num >= 4)
5398             return false;
5399           r += 8;
5400         }
5401       r += 32;
5402     }
5403   if (r->reg_type.bitfield.word)
5404     r += 32;
5405   /* No need to further check .dword here.  */
5406
5407   return r->reg_type.bitfield.baseindex;
5408 }
5409
5410 /* Try to shorten {nf} encodings, by shortening operand size or switching to
5411    functionally identical encodings.  */
5412
5413 static void
5414 optimize_nf_encoding (void)
5415 {
5416   if (i.tm.base_opcode == 0x80
5417       && (i.tm.extension_opcode == 0 || i.tm.extension_opcode == 5)
5418       && i.suffix != BYTE_MNEM_SUFFIX
5419       && !i.types[1].bitfield.byte
5420       && !i.types[2].bitfield.byte
5421       && i.op[0].imms->X_op == O_constant
5422       && i.op[0].imms->X_add_number == 0x80)
5423     {
5424       /* Optimize: -O:
5425            {nf} addw $0x80, ...  -> {nf} subw $-0x80, ...
5426            {nf} addl $0x80, ...  -> {nf} subl $-0x80, ...
5427            {nf} addq $0x80, ...  -> {nf} subq $-0x80, ...
5428
5429            {nf} subw $0x80, ...  -> {nf} addw $-0x80, ...
5430            {nf} subl $0x80, ...  -> {nf} addl $-0x80, ...
5431            {nf} subq $0x80, ...  -> {nf} addq $-0x80, ...
5432        */
5433       i.tm.base_opcode |= 3;
5434       i.tm.extension_opcode ^= 5;
5435       i.tm.opcode_modifier.w = 0;
5436       i.op[0].imms->X_add_number = -i.op[0].imms->X_add_number;
5437
5438       i.tm.operand_types[0].bitfield.imm8 = 0;
5439       i.tm.operand_types[0].bitfield.imm8s = 1;
5440       i.tm.operand_types[0].bitfield.imm16 = 0;
5441       i.tm.operand_types[0].bitfield.imm32 = 0;
5442       i.tm.operand_types[0].bitfield.imm32s = 0;
5443
5444       i.types[0] = i.tm.operand_types[0];
5445     }
5446   else if ((i.tm.base_opcode | 3) == 0x83
5447       && (i.tm.extension_opcode == 0 || i.tm.extension_opcode == 5)
5448       && i.op[0].imms->X_op == O_constant
5449       && (i.op[0].imms->X_add_number == 1
5450           || i.op[0].imms->X_add_number == -1
5451           /* While for wider than byte operations immediates were suitably
5452              adjusted earlier on, 0xff in the byte case needs covering
5453              explicitly.  */
5454           || (i.op[0].imms->X_add_number == 0xff
5455               && (i.suffix == BYTE_MNEM_SUFFIX
5456                   || i.types[i.operands - 1].bitfield.byte))))
5457     {
5458       /* Optimize: -O:
5459            {nf} add $1, ...        -> {nf} inc ...
5460            {nf} add $-1, ...       -> {nf} dec ...
5461            {nf} add $0xf...f, ...  -> {nf} dec ...
5462
5463            {nf} sub $1, ...        -> {nf} dec ...
5464            {nf} sub $-1, ...       -> {nf} inc ...
5465            {nf} sub $0xf...f, ...  -> {nf} inc ...
5466        */
5467       i.tm.base_opcode = 0xfe;
5468       i.tm.extension_opcode
5469         = (i.op[0].imms->X_add_number == 1) != (i.tm.extension_opcode == 0);
5470       i.tm.opcode_modifier.w = 1;
5471
5472       i.types[0] = i.types[1];
5473       i.types[1] = i.types[2];
5474       i.tm.operand_types[0] = i.tm.operand_types[1];
5475       i.tm.operand_types[1] = i.tm.operand_types[2];
5476       i.op[0] = i.op[1];
5477       i.op[1] = i.op[2];
5478       i.flags[0] = i.flags[1];
5479       i.flags[1] = i.flags[2];
5480       i.reloc[0] = i.reloc[1];
5481       i.reloc[1] = NO_RELOC;
5482
5483       i.imm_operands = 0;
5484       --i.operands;
5485     }
5486   else if (i.tm.base_opcode == 0xc0
5487            && i.op[0].imms->X_op == O_constant
5488            && i.op[0].imms->X_add_number
5489               == (i.types[i.operands - 1].bitfield.byte
5490                   || i.suffix == BYTE_MNEM_SUFFIX
5491                   ? 7 : i.types[i.operands - 1].bitfield.word
5492                         || i.suffix == WORD_MNEM_SUFFIX
5493                         ? 15 : 63 >> (i.types[i.operands - 1].bitfield.dword
5494                                       || i.suffix == LONG_MNEM_SUFFIX)))
5495     {
5496       /* Optimize: -O:
5497            {nf} rol $osz-1, ...   -> {nf} ror $1, ...
5498            {nf} ror $osz-1, ...   -> {nf} rol $1, ...
5499        */
5500       gas_assert (i.tm.extension_opcode <= 1);
5501       i.tm.extension_opcode ^= 1;
5502       i.tm.base_opcode = 0xd0;
5503       i.tm.operand_types[0].bitfield.imm1 = 1;
5504       i.imm_operands = 0;
5505     }
5506   else if ((i.tm.base_opcode | 2) == 0x6b
5507            && i.op[0].imms->X_op == O_constant
5508            && (i.op[0].imms->X_add_number > 0
5509                ? !(i.op[0].imms->X_add_number & (i.op[0].imms->X_add_number - 1))
5510                /* optimize_imm() converts to sign-extended representation where
5511                   possible (and input can also come with these specific numbers).  */
5512                : (i.types[i.operands - 1].bitfield.word
5513                   && i.op[0].imms->X_add_number == -0x8000)
5514                  || (i.types[i.operands - 1].bitfield.dword
5515                      && i.op[0].imms->X_add_number + 1 == -0x7fffffff))
5516            /* 16-bit 3-operand non-ZU forms need leaviong alone, to prevent
5517               zero-extension of the result.  Unless, of course, both non-
5518               immediate operands match (which can be converted to the non-NDD
5519               form).  */
5520            && (i.operands < 3
5521                || !i.types[2].bitfield.word
5522                || i.tm.mnem_off == MN_imulzu
5523                || i.op[2].regs == i.op[1].regs)
5524            /* When merely optimizing for size, exclude cases where we'd convert
5525               from Imm8S to Imm8 encoding, thus not actually reducing size.  */
5526            && (!optimize_for_space
5527                || i.tm.base_opcode == 0x69
5528                || !(i.op[0].imms->X_add_number & 0x7d)))
5529     {
5530       /* Optimize: -O:
5531            {nf} imul   $1<<N, ...   -> {nf} shl $N, ...
5532            {nf} imulzu $1<<N, ...   -> {nf} shl $N, ...
5533        */
5534       if (i.op[0].imms->X_add_number != 2)
5535         {
5536           i.tm.base_opcode = 0xc0;
5537           i.op[0].imms->X_add_number = ffs (i.op[0].imms->X_add_number) - 1;
5538           i.tm.operand_types[0].bitfield.imm8 = 1;
5539           i.tm.operand_types[0].bitfield.imm16 = 0;
5540           i.tm.operand_types[0].bitfield.imm32 = 0;
5541           i.tm.operand_types[0].bitfield.imm32s = 0;
5542         }
5543       else
5544         {
5545           i.tm.base_opcode = 0xd0;
5546           i.tm.operand_types[0].bitfield.imm1 = 1;
5547         }
5548       i.types[0] = i.tm.operand_types[0];
5549       i.tm.extension_opcode = 4;
5550       i.tm.opcode_modifier.w = 1;
5551       i.tm.opcode_modifier.operandconstraint = 0;
5552       if (i.operands == 3)
5553         {
5554           if (i.op[2].regs == i.op[1].regs && i.tm.mnem_off != MN_imulzu)
5555             {
5556               /* Convert to non-NDD form.  This is required for 16-bit insns
5557                  (to prevent zero-extension) and benign for others.  */
5558               i.operands = 2;
5559               i.reg_operands = 1;
5560             }
5561           else
5562             i.tm.opcode_modifier.vexvvvv = VexVVVV_DST;
5563         }
5564       else if (i.tm.mnem_off == MN_imulzu)
5565         {
5566           /* Convert to NDD form, to effect zero-extension of the result.  */
5567           i.tm.opcode_modifier.vexvvvv = VexVVVV_DST;
5568           i.operands = 3;
5569           i.reg_operands = 2;
5570           i.op[2].regs = i.op[1].regs;
5571           i.tm.operand_types[2] = i.tm.operand_types[1];
5572           i.types[2] = i.types[1];
5573         }
5574     }
5575
5576   if (optimize_for_space
5577       && pp.encoding != encoding_evex
5578       && (i.tm.base_opcode == 0x00
5579           || (i.tm.base_opcode == 0xd0 && i.tm.extension_opcode == 4))
5580       && !i.mem_operands
5581       && !i.types[1].bitfield.byte
5582       /* 16-bit operand size has extra restrictions: If REX2 was needed,
5583          no size reduction would be possible.  Plus 3-operand forms zero-
5584          extend the result, which can't be expressed with LEA.  */
5585       && (!i.types[1].bitfield.word
5586           || (i.operands == 2 && pp.encoding != encoding_egpr))
5587       && is_plausible_suffix (1)
5588       /* %rsp can't be the index.  */
5589       && (is_index (i.op[1].regs)
5590           || (i.imm_operands == 0 && is_index (i.op[0].regs)))
5591       /* While %rbp, %r13, %r21, and %r29 can be made the index in order to
5592          avoid the otherwise necessary Disp8, if the other operand is also
5593          from that set and REX2 would be required to encode the insn, the
5594          resulting encoding would be no smaller than the EVEX one.  */
5595       && (i.op[1].regs->reg_num != 5
5596           || pp.encoding != encoding_egpr
5597           || i.imm_operands > 0
5598           || i.op[0].regs->reg_num != 5))
5599     {
5600       /* Optimize: -Os:
5601            {nf} addw %N, %M    -> leaw (%rM,%rN), %M
5602            {nf} addl %eN, %eM  -> leal (%rM,%rN), %eM
5603            {nf} addq %rN, %rM  -> leaq (%rM,%rN), %rM
5604
5605            {nf} shlw $1, %N   -> leaw (%rN,%rN), %N
5606            {nf} shll $1, %eN  -> leal (%rN,%rN), %eN
5607            {nf} shlq $1, %rN  -> leaq (%rN,%rN), %rN
5608
5609            {nf} addl %eK, %eN, %eM  -> leal (%rN,%rK), %eM
5610            {nf} addq %rK, %rN, %rM  -> leaq (%rN,%rK), %rM
5611
5612            {nf} shll $1, %eN, %eM  -> leal (%rN,%rN), %eM
5613            {nf} shlq $1, %rN, %rM  -> leaq (%rN,%rN), %rM
5614        */
5615       i.tm.opcode_space = SPACE_BASE;
5616       i.tm.base_opcode = 0x8d;
5617       i.tm.extension_opcode = None;
5618       i.tm.opcode_modifier.evex = 0;
5619       i.tm.opcode_modifier.vexvvvv = 0;
5620       if (i.imm_operands != 0)
5621         i.index_reg = i.base_reg = i.op[1].regs;
5622       else if (!is_index (i.op[0].regs)
5623                || (i.op[1].regs->reg_num == 5
5624                    && i.op[0].regs->reg_num != 5))
5625         {
5626           i.base_reg = i.op[0].regs;
5627           i.index_reg = i.op[1].regs;
5628         }
5629       else
5630         {
5631           i.base_reg = i.op[1].regs;
5632           i.index_reg = i.op[0].regs;
5633         }
5634       if (i.types[1].bitfield.word)
5635         {
5636           /* NB: No similar adjustment is needed when operand size is 32-bit.  */
5637           i.base_reg += 64;
5638           i.index_reg += 64;
5639         }
5640       i.op[1].regs = i.op[i.operands - 1].regs;
5641
5642       operand_type_set (&i.types[0], 0);
5643       i.types[0].bitfield.baseindex = 1;
5644       i.tm.operand_types[0] = i.types[0];
5645       i.op[0].disps = NULL;
5646       i.flags[0] = Operand_Mem;
5647
5648       i.operands = 2;
5649       i.mem_operands = i.reg_operands = 1;
5650       i.imm_operands = 0;
5651       pp.has_nf = false;
5652     }
5653   else if (optimize_for_space
5654            && pp.encoding != encoding_evex
5655            && (i.tm.base_opcode == 0x80 || i.tm.base_opcode == 0x83)
5656            && (i.tm.extension_opcode == 0
5657                || (i.tm.extension_opcode == 5
5658                    && i.op[0].imms->X_op == O_constant
5659                    /* Subtraction of -0x80 will end up smaller only if neither
5660                       operand size nor REX/REX2 prefixes are needed.  */
5661                    && (i.op[0].imms->X_add_number != -0x80
5662                        || (i.types[1].bitfield.dword
5663                            && !(i.op[1].regs->reg_flags & RegRex)
5664                            && !(i.op[i.operands - 1].regs->reg_flags & RegRex)
5665                            && pp.encoding != encoding_egpr))))
5666            && !i.mem_operands
5667            && !i.types[1].bitfield.byte
5668            /* 16-bit operand size has extra restrictions: If REX2 was needed,
5669               no size reduction would be possible.  Plus 3-operand forms zero-
5670               extend the result, which can't be expressed with LEA.  */
5671            && (!i.types[1].bitfield.word
5672                || (i.operands == 2 && pp.encoding != encoding_egpr))
5673            && is_plausible_suffix (1))
5674     {
5675       /* Optimize: -Os:
5676            {nf} addw $N, %M   -> leaw N(%rM), %M
5677            {nf} addl $N, %eM  -> leal N(%rM), %eM
5678            {nf} addq $N, %rM  -> leaq N(%rM), %rM
5679
5680            {nf} subw $N, %M   -> leaw -N(%rM), %M
5681            {nf} subl $N, %eM  -> leal -N(%rM), %eM
5682            {nf} subq $N, %rM  -> leaq -N(%rM), %rM
5683
5684            {nf} addl $N, %eK, %eM  -> leal N(%rK), %eM
5685            {nf} addq $N, %rK, %rM  -> leaq N(%rK), %rM
5686
5687            {nf} subl $N, %eK, %eM  -> leal -N(%rK), %eM
5688            {nf} subq $N, %rK, %rM  -> leaq -N(%rK), %rM
5689        */
5690       i.tm.opcode_space = SPACE_BASE;
5691       i.tm.base_opcode = 0x8d;
5692       if (i.tm.extension_opcode == 5)
5693         i.op[0].imms->X_add_number = -i.op[0].imms->X_add_number;
5694       i.tm.extension_opcode = None;
5695       i.tm.opcode_modifier.evex = 0;
5696       i.tm.opcode_modifier.vexvvvv = 0;
5697       i.base_reg = i.op[1].regs;
5698       if (i.types[1].bitfield.word)
5699         {
5700           /* NB: No similar adjustment is needed when operand size is 32-bit.  */
5701           i.base_reg += 64;
5702         }
5703       i.op[1].regs = i.op[i.operands - 1].regs;
5704
5705       operand_type_set (&i.types[0], 0);
5706       i.types[0].bitfield.baseindex = 1;
5707       i.types[0].bitfield.disp32 = 1;
5708       i.op[0].disps = i.op[0].imms;
5709       i.flags[0] = Operand_Mem;
5710       optimize_disp (&i.tm);
5711       i.tm.operand_types[0] = i.types[0];
5712
5713       i.operands = 2;
5714       i.disp_operands = i.mem_operands = i.reg_operands = 1;
5715       i.imm_operands = 0;
5716       pp.has_nf = false;
5717     }
5718   else if (i.tm.base_opcode == 0x6b
5719            && !i.mem_operands
5720            && pp.encoding != encoding_evex
5721            && i.tm.mnem_off != MN_imulzu
5722            && is_plausible_suffix (1)
5723            /* %rsp can't be the index.  */
5724            && is_index (i.op[1].regs)
5725            /* There's no reduction in size for 16-bit forms requiring Disp8 and
5726               REX2.  */
5727            && (!optimize_for_space
5728                || !i.types[1].bitfield.word
5729                || i.op[1].regs->reg_num != 5
5730                || pp.encoding != encoding_egpr)
5731            && i.op[0].imms->X_op == O_constant
5732            && (i.op[0].imms->X_add_number == 3
5733                || i.op[0].imms->X_add_number == 5
5734                || i.op[0].imms->X_add_number == 9))
5735     {
5736       /* Optimize: -O:
5737         For n one of 3, 5, or 9
5738            {nf} imulw $n, %N, %M    -> leaw (%rN,%rN,n-1), %M
5739            {nf} imull $n, %eN, %eM  -> leal (%rN,%rN,n-1), %eM
5740            {nf} imulq $n, %rN, %rM  -> leaq (%rN,%rN,n-1), %rM
5741
5742            {nf} imulw $n, %N   -> leaw (%rN,%rN,s), %N
5743            {nf} imull $n, %eN  -> leal (%rN,%rN,s), %eN
5744            {nf} imulq $n, %rN  -> leaq (%rN,%rN,s), %rN
5745        */
5746       i.tm.opcode_space = SPACE_BASE;
5747       i.tm.base_opcode = 0x8d;
5748       i.tm.extension_opcode = None;
5749       i.tm.opcode_modifier.evex = 0;
5750       i.base_reg = i.op[1].regs;
5751       /* NB: No similar adjustment is needed when operand size is 32 bits.  */
5752       if (i.types[1].bitfield.word)
5753         i.base_reg += 64;
5754       i.index_reg = i.base_reg;
5755       i.log2_scale_factor = i.op[0].imms->X_add_number == 9
5756                             ? 3 : i.op[0].imms->X_add_number >> 1;
5757
5758       operand_type_set (&i.types[0], 0);
5759       i.types[0].bitfield.baseindex = 1;
5760       i.tm.operand_types[0] = i.types[0];
5761       i.op[0].disps = NULL;
5762       i.flags[0] = Operand_Mem;
5763
5764       i.tm.operand_types[1] = i.tm.operand_types[i.operands - 1];
5765       i.op[1].regs = i.op[i.operands - 1].regs;
5766       i.types[1] = i.types[i.operands - 1];
5767
5768       i.operands = 2;
5769       i.mem_operands = i.reg_operands = 1;
5770       i.imm_operands = 0;
5771       pp.has_nf = false;
5772     }
5773   else if (cpu_arch_isa_flags.bitfield.cpubmi2
5774            && pp.encoding == encoding_default
5775            && (i.operands > 2 || !i.mem_operands)
5776            && (i.types[i.operands - 1].bitfield.dword
5777                || i.types[i.operands - 1].bitfield.qword))
5778     {
5779       if (i.tm.base_opcode == 0xd2)
5780         {
5781           /* Optimize: -O:
5782                <OP> one of sal, sar, shl, shr:
5783                {nf} <OP> %cl, %rN       -> <OP>x %{e,r}cx, %rN, %rN (N < 16)
5784                {nf} <OP> %cl, ..., %rN  -> <OP>x %{e,r}cx, ..., %rN (no eGPR used)
5785            */
5786           gas_assert (i.tm.extension_opcode & 4);
5787           i.tm.operand_types[0] = i.tm.operand_types[i.operands - 1];
5788           /* NB: i.op[0].regs specifying %cl is good enough.  */
5789           i.types[0] = i.types[i.operands - 1];
5790           if (i.operands == 2)
5791             {
5792               i.tm.operand_types[0].bitfield.baseindex = 0;
5793               i.tm.operand_types[2] = i.tm.operand_types[0];
5794               i.op[2].regs = i.op[1].regs;
5795               i.types[2] = i.types[1];
5796               i.reg_operands = i.operands = 3;
5797             }
5798           pp.has_nf = false;
5799           i.tm.opcode_modifier.w = 0;
5800           i.tm.opcode_modifier.evex = 0;
5801           i.tm.opcode_modifier.vex = VEX128;
5802           i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC2;
5803           i.tm.opcode_space = SPACE_0F38;
5804           i.tm.base_opcode = 0xf7;
5805           i.tm.opcode_modifier.opcodeprefix
5806             = !(i.tm.extension_opcode & 1)
5807               ? PREFIX_0X66 /* shlx */
5808               : i.tm.extension_opcode & 2
5809                 ? PREFIX_0XF3 /* sarx */
5810                 : PREFIX_0XF2 /* shrx */;
5811           i.tm.extension_opcode = None;
5812         }
5813       else if (i.tm.base_opcode == 0xc0
5814                && i.tm.extension_opcode <= 1
5815                && i.op[0].imms->X_op == O_constant)
5816         {
5817           /* Optimize: -O:
5818                {nf} rol $I, %rN       -> rorx $osz-I, %rN, %rN (I != osz-1, N < 16)
5819                {nf} rol $I, ..., %rN  -> rorx $osz-I, ..., %rN (I != osz-1, no eGPR used)
5820                {nf} ror $I, %rN       -> rorx $I, %rN, %rN (I != 1, N < 16)
5821                {nf} ror $I, ..., %rN  -> rorx $I,..., %rN (I != 1, no eGPR used)
5822              NB: rol -> ror transformation for I == osz-1 was already handled above.
5823              NB2: ror with an immediate of 1 uses a different base opcode.
5824            */
5825           if (i.operands == 2)
5826             {
5827               i.tm.operand_types[2] = i.tm.operand_types[1];
5828               i.tm.operand_types[2].bitfield.baseindex = 0;
5829               i.op[2].regs = i.op[1].regs;
5830               i.types[2] = i.types[1];
5831               i.reg_operands = 2;
5832               i.operands = 3;
5833             }
5834           pp.has_nf = false;
5835           i.tm.opcode_modifier.w = 0;
5836           i.tm.opcode_modifier.evex = 0;
5837           i.tm.opcode_modifier.vex = VEX128;
5838           i.tm.opcode_modifier.vexvvvv = 0;
5839           i.tm.opcode_space = SPACE_0F3A;
5840           i.tm.base_opcode = 0xf0;
5841           i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
5842           if (!i.tm.extension_opcode)
5843             i.op[0].imms->X_add_number =
5844               (i.types[i.operands - 1].bitfield.byte
5845                ? 8 : i.types[i.operands - 1].bitfield.word
5846                      ? 16 : 64 >> i.types[i.operands - 1].bitfield.dword)
5847               - i.op[0].imms->X_add_number;
5848           i.tm.extension_opcode = None;
5849         }
5850       else if (i.tm.base_opcode == 0xf6
5851                && i.tm.extension_opcode == 4
5852                && !i.mem_operands
5853                && i.op[0].regs->reg_num == 2
5854                && !(i.op[0].regs->reg_flags & RegRex) )
5855         {
5856           /* Optimize: -O:
5857                {nf} mul %edx  -> mulx %eax, %eax, %edx
5858                {nf} mul %rdx  -> mulx %rax, %rax, %rdx
5859            */
5860           i.tm.operand_types[1] = i.tm.operand_types[0];
5861           i.tm.operand_types[1].bitfield.baseindex = 0;
5862           i.tm.operand_types[2] = i.tm.operand_types[1];
5863           i.op[2].regs = i.op[0].regs;
5864           /* NB: %eax is good enough also for 64-bit operand size.  */
5865           i.op[1].regs = i.op[0].regs = reg_eax;
5866           i.types[2] = i.types[1] = i.types[0];
5867           i.reg_operands = i.operands = 3;
5868
5869           pp.has_nf = false;
5870           i.tm.opcode_modifier.w = 0;
5871           i.tm.opcode_modifier.evex = 0;
5872           i.tm.opcode_modifier.vex = VEX128;
5873           i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
5874           i.tm.opcode_space = SPACE_0F38;
5875           i.tm.base_opcode = 0xf6;
5876           i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
5877           i.tm.extension_opcode = None;
5878         }
5879     }
5880 }
5881
5882 static void
5883 s_noopt (int dummy ATTRIBUTE_UNUSED)
5884 {
5885   if (!is_it_end_of_statement ())
5886     as_warn (_("`.noopt' arguments ignored"));
5887
5888   optimize = 0;
5889   optimize_for_space = 0;
5890
5891   ignore_rest_of_line ();
5892 }
5893
5894 /* Return non-zero for load instruction.  */
5895
5896 static int
5897 load_insn_p (void)
5898 {
5899   unsigned int dest;
5900   int any_vex_p = is_any_vex_encoding (&i.tm);
5901   unsigned int base_opcode = i.tm.base_opcode | 1;
5902
5903   if (!any_vex_p)
5904     {
5905       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
5906          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
5907       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
5908         return 0;
5909
5910       /* pop.   */
5911       if (i.tm.mnem_off == MN_pop)
5912         return 1;
5913     }
5914
5915   if (i.tm.opcode_space == SPACE_BASE)
5916     {
5917       /* popf, popa.   */
5918       if (i.tm.base_opcode == 0x9d
5919           || i.tm.base_opcode == 0x61)
5920         return 1;
5921
5922       /* movs, cmps, lods, scas.  */
5923       if ((i.tm.base_opcode | 0xb) == 0xaf)
5924         return 1;
5925
5926       /* outs, xlatb.  */
5927       if (base_opcode == 0x6f
5928           || i.tm.base_opcode == 0xd7)
5929         return 1;
5930       /* NB: For AMD-specific insns with implicit memory operands,
5931          they're intentionally not covered.  */
5932     }
5933
5934   /* No memory operand.  */
5935   if (!i.mem_operands)
5936     return 0;
5937
5938   if (any_vex_p)
5939     {
5940       if (i.tm.mnem_off == MN_vldmxcsr)
5941         return 1;
5942     }
5943   else if (i.tm.opcode_space == SPACE_BASE)
5944     {
5945       /* test, not, neg, mul, imul, div, idiv.  */
5946       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
5947         return 1;
5948
5949       /* inc, dec.  */
5950       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
5951         return 1;
5952
5953       /* add, or, adc, sbb, and, sub, xor, cmp.  */
5954       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
5955         return 1;
5956
5957       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
5958       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
5959           && i.tm.extension_opcode != 6)
5960         return 1;
5961
5962       /* Check for x87 instructions.  */
5963       if ((base_opcode | 6) == 0xdf)
5964         {
5965           /* Skip fst, fstp, fstenv, fstcw.  */
5966           if (i.tm.base_opcode == 0xd9
5967               && (i.tm.extension_opcode == 2
5968                   || i.tm.extension_opcode == 3
5969                   || i.tm.extension_opcode == 6
5970                   || i.tm.extension_opcode == 7))
5971             return 0;
5972
5973           /* Skip fisttp, fist, fistp, fstp.  */
5974           if (i.tm.base_opcode == 0xdb
5975               && (i.tm.extension_opcode == 1
5976                   || i.tm.extension_opcode == 2
5977                   || i.tm.extension_opcode == 3
5978                   || i.tm.extension_opcode == 7))
5979             return 0;
5980
5981           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
5982           if (i.tm.base_opcode == 0xdd
5983               && (i.tm.extension_opcode == 1
5984                   || i.tm.extension_opcode == 2
5985                   || i.tm.extension_opcode == 3
5986                   || i.tm.extension_opcode == 6
5987                   || i.tm.extension_opcode == 7))
5988             return 0;
5989
5990           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
5991           if (i.tm.base_opcode == 0xdf
5992               && (i.tm.extension_opcode == 1
5993                   || i.tm.extension_opcode == 2
5994                   || i.tm.extension_opcode == 3
5995                   || i.tm.extension_opcode == 6
5996                   || i.tm.extension_opcode == 7))
5997             return 0;
5998
5999           return 1;
6000         }
6001     }
6002   else if (i.tm.opcode_space == SPACE_0F)
6003     {
6004       /* bt, bts, btr, btc.  */
6005       if (i.tm.base_opcode == 0xba
6006           && (i.tm.extension_opcode | 3) == 7)
6007         return 1;
6008
6009       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
6010       if (i.tm.base_opcode == 0xc7
6011           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
6012           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
6013               || i.tm.extension_opcode == 6))
6014         return 1;
6015
6016       /* fxrstor, ldmxcsr, xrstor.  */
6017       if (i.tm.base_opcode == 0xae
6018           && (i.tm.extension_opcode == 1
6019               || i.tm.extension_opcode == 2
6020               || i.tm.extension_opcode == 5))
6021         return 1;
6022
6023       /* lgdt, lidt, lmsw.  */
6024       if (i.tm.base_opcode == 0x01
6025           && (i.tm.extension_opcode == 2
6026               || i.tm.extension_opcode == 3
6027               || i.tm.extension_opcode == 6))
6028         return 1;
6029     }
6030
6031   dest = i.operands - 1;
6032
6033   /* Check fake imm8 operand and 3 source operands.  */
6034   if ((i.tm.opcode_modifier.immext
6035        || i.reg_operands + i.mem_operands == 4)
6036       && i.types[dest].bitfield.imm8)
6037     dest--;
6038
6039   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
6040   if (i.tm.opcode_space == SPACE_BASE
6041       && ((base_opcode | 0x38) == 0x39
6042           || (base_opcode | 2) == 0x87))
6043     return 1;
6044
6045   if (i.tm.mnem_off == MN_xadd)
6046     return 1;
6047
6048   /* Check for load instruction.  */
6049   return (i.types[dest].bitfield.class != ClassNone
6050           || i.types[dest].bitfield.instance == Accum);
6051 }
6052
6053 /* Output lfence, 0xfaee8, after instruction.  */
6054
6055 static void
6056 insert_lfence_after (void)
6057 {
6058   if (lfence_after_load && load_insn_p ())
6059     {
6060       /* There are also two REP string instructions that require
6061          special treatment. Specifically, the compare string (CMPS)
6062          and scan string (SCAS) instructions set EFLAGS in a manner
6063          that depends on the data being compared/scanned. When used
6064          with a REP prefix, the number of iterations may therefore
6065          vary depending on this data. If the data is a program secret
6066          chosen by the adversary using an LVI method,
6067          then this data-dependent behavior may leak some aspect
6068          of the secret.  */
6069       if (((i.tm.base_opcode | 0x9) == 0xaf)
6070           && i.prefix[REP_PREFIX])
6071         {
6072             as_warn (_("`%s` changes flags which would affect control flow behavior"),
6073                      insn_name (&i.tm));
6074         }
6075       char *p = frag_more (3);
6076       *p++ = 0xf;
6077       *p++ = 0xae;
6078       *p = 0xe8;
6079     }
6080 }
6081
6082 /* Output lfence, 0xfaee8, before instruction.  */
6083
6084 static void
6085 insert_lfence_before (const struct last_insn *last_insn)
6086 {
6087   char *p;
6088
6089   if (i.tm.opcode_space != SPACE_BASE)
6090     return;
6091
6092   if (i.tm.base_opcode == 0xff
6093       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
6094     {
6095       /* Insert lfence before indirect branch if needed.  */
6096
6097       if (lfence_before_indirect_branch == lfence_branch_none)
6098         return;
6099
6100       if (i.operands != 1)
6101         abort ();
6102
6103       if (i.reg_operands == 1)
6104         {
6105           /* Indirect branch via register.  Don't insert lfence with
6106              -mlfence-after-load=yes.  */
6107           if (lfence_after_load
6108               || lfence_before_indirect_branch == lfence_branch_memory)
6109             return;
6110         }
6111       else if (i.mem_operands == 1
6112                && lfence_before_indirect_branch != lfence_branch_register)
6113         {
6114           as_warn (_("indirect `%s` with memory operand should be avoided"),
6115                    insn_name (&i.tm));
6116           return;
6117         }
6118       else
6119         return;
6120
6121       if (last_insn->kind != last_insn_other)
6122         {
6123           as_warn_where (last_insn->file, last_insn->line,
6124                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
6125                          last_insn->name, insn_name (&i.tm));
6126           return;
6127         }
6128
6129       p = frag_more (3);
6130       *p++ = 0xf;
6131       *p++ = 0xae;
6132       *p = 0xe8;
6133       return;
6134     }
6135
6136   /* Output or/not/shl and lfence before near ret.  */
6137   if (lfence_before_ret != lfence_before_ret_none
6138       && (i.tm.base_opcode | 1) == 0xc3)
6139     {
6140       if (last_insn->kind != last_insn_other)
6141         {
6142           as_warn_where (last_insn->file, last_insn->line,
6143                          _("`%s` skips -mlfence-before-ret on `%s`"),
6144                          last_insn->name, insn_name (&i.tm));
6145           return;
6146         }
6147
6148       /* Near ret ingore operand size override under CPU64.  */
6149       char prefix = flag_code == CODE_64BIT
6150                     ? 0x48
6151                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
6152
6153       if (lfence_before_ret == lfence_before_ret_not)
6154         {
6155           /* not: 0xf71424, may add prefix
6156              for operand size override or 64-bit code.  */
6157           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
6158           if (prefix)
6159             *p++ = prefix;
6160           *p++ = 0xf7;
6161           *p++ = 0x14;
6162           *p++ = 0x24;
6163           if (prefix)
6164             *p++ = prefix;
6165           *p++ = 0xf7;
6166           *p++ = 0x14;
6167           *p++ = 0x24;
6168         }
6169       else
6170         {
6171           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
6172           if (prefix)
6173             *p++ = prefix;
6174           if (lfence_before_ret == lfence_before_ret_or)
6175             {
6176               /* or: 0x830c2400, may add prefix
6177                  for operand size override or 64-bit code.  */
6178               *p++ = 0x83;
6179               *p++ = 0x0c;
6180             }
6181           else
6182             {
6183               /* shl: 0xc1242400, may add prefix
6184                  for operand size override or 64-bit code.  */
6185               *p++ = 0xc1;
6186               *p++ = 0x24;
6187             }
6188
6189           *p++ = 0x24;
6190           *p++ = 0x0;
6191         }
6192
6193       *p++ = 0xf;
6194       *p++ = 0xae;
6195       *p = 0xe8;
6196     }
6197 }
6198
6199 /* Shared helper for md_assemble() and s_insn().  */
6200 static void init_globals (void)
6201 {
6202   unsigned int j;
6203
6204   memset (&i, '\0', sizeof (i));
6205   i.rounding.type = rc_none;
6206   for (j = 0; j < MAX_OPERANDS; j++)
6207     i.reloc[j] = NO_RELOC;
6208   memset (disp_expressions, '\0', sizeof (disp_expressions));
6209   memset (im_expressions, '\0', sizeof (im_expressions));
6210   save_stack_p = save_stack;
6211 }
6212
6213 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
6214    parsing pass. Instead of introducing a rarely used new insn attribute this
6215    utilizes a common pattern between affected templates. It is deemed
6216    acceptable that this will lead to unnecessary pass 2 preparations in a
6217    limited set of cases.  */
6218 static INLINE bool may_need_pass2 (const insn_template *t)
6219 {
6220   return t->opcode_modifier.sse2avx
6221          /* Note that all SSE2AVX templates have at least one operand.  */
6222          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
6223          : (t->opcode_space == SPACE_0F
6224             && (t->base_opcode | 1) == 0xbf)
6225            || (t->opcode_space == SPACE_BASE
6226                && t->base_opcode == 0x63)
6227            || (intel_syntax /* shld / shrd may mean suffixed shl / shr.  */
6228                && t->opcode_space == SPACE_EVEXMAP4
6229                && (t->base_opcode | 8) == 0x2c);
6230 }
6231
6232 /* This is the guts of the machine-dependent assembler.  LINE points to a
6233    machine dependent instruction.  This function is supposed to emit
6234    the frags/bytes it assembles to.  */
6235
6236 static void
6237 i386_assemble (char *line)
6238 {
6239   unsigned int j;
6240   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
6241   char *xstrdup_copy = NULL;
6242   const char *end, *pass1_mnem = NULL;
6243   enum i386_error pass1_err = 0;
6244   struct pseudo_prefixes orig_pp = pp;
6245   const insn_template *t;
6246   struct last_insn *last_insn
6247     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
6248
6249   /* Initialize globals.  */
6250   current_templates.end = current_templates.start = NULL;
6251  retry:
6252   init_globals ();
6253
6254   /* Suppress optimization when the last thing we saw may not have been
6255      a proper instruction (e.g. a stand-alone prefix or .byte).  */
6256   if (last_insn->kind != last_insn_other)
6257     pp.no_optimize = true;
6258
6259   /* First parse an instruction mnemonic & call i386_operand for the operands.
6260      We assume that the scrubber has arranged it so that line[0] is the valid
6261      start of a (possibly prefixed) mnemonic.  */
6262
6263   end = parse_insn (line, mnemonic, parse_all);
6264   if (end == NULL)
6265     {
6266       if (pass1_mnem != NULL)
6267         goto match_error;
6268       if (i.error != no_error)
6269         {
6270           gas_assert (current_templates.start != NULL);
6271           if (may_need_pass2 (current_templates.start) && !i.suffix)
6272             goto no_match;
6273           /* No point in trying a 2nd pass - it'll only find the same suffix
6274              again.  */
6275           mnem_suffix = i.suffix;
6276           goto match_error;
6277         }
6278       return;
6279     }
6280   t = current_templates.start;
6281   /* NB: LINE may be change to be the same as XSTRDUP_COPY.  */
6282   if (xstrdup_copy != line && may_need_pass2 (t))
6283     {
6284       /* Make a copy of the full line in case we need to retry.  */
6285       xstrdup_copy = xstrdup (line);
6286       copy = xstrdup_copy;
6287     }
6288   line += end - line;
6289   mnem_suffix = i.suffix;
6290
6291   line = parse_operands (line, mnemonic);
6292   this_operand = -1;
6293   if (line == NULL)
6294     {
6295       free (xstrdup_copy);
6296       return;
6297     }
6298
6299   /* Now we've parsed the mnemonic into a set of templates, and have the
6300      operands at hand.  */
6301
6302   /* All Intel opcodes have reversed operands except for "bound", "enter",
6303      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
6304      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
6305      intersegment "jmp" and "call" instructions with 2 immediate operands so
6306      that the immediate segment precedes the offset consistently in Intel and
6307      AT&T modes.  */
6308   if (intel_syntax
6309       && i.operands > 1
6310       && (t->mnem_off != MN_bound)
6311       && !startswith (mnemonic, "invlpg")
6312       && !startswith (mnemonic, "monitor")
6313       && !startswith (mnemonic, "mwait")
6314       && (t->mnem_off != MN_pvalidate)
6315       && !startswith (mnemonic, "rmp")
6316       && (t->mnem_off != MN_tpause)
6317       && (t->mnem_off != MN_umwait)
6318       && !(i.operands == 2
6319            && operand_type_check (i.types[0], imm)
6320            && operand_type_check (i.types[1], imm)))
6321     swap_operands ();
6322
6323   /* The order of the immediates should be reversed for 2-immediates EXTRQ
6324      and INSERTQ instructions.  Also UWRMSR wants its immediate to be in the
6325      "canonical" place (first), despite it appearing last (in AT&T syntax, or
6326      because of the swapping above) in the incoming set of operands.  */
6327   if ((i.imm_operands == 2
6328        && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
6329       || (t->mnem_off == MN_uwrmsr && i.imm_operands
6330           && i.operands > i.imm_operands))
6331       swap_2_operands (0, 1);
6332
6333   if (i.imm_operands)
6334     {
6335       /* For USER_MSR instructions, imm32 stands for the name of an model specific
6336          register (MSR). That's an unsigned quantity, whereas all other insns with
6337          32-bit immediate and 64-bit operand size use sign-extended
6338          immediates (imm32s). Therefore these insns are special-cased, bypassing
6339          the normal handling of immediates here.  */
6340       if (is_cpu(current_templates.start, CpuUSER_MSR))
6341         {
6342           for (j = 0; j < i.operands; j++)
6343             {
6344               if (operand_type_check(i.types[j], imm))
6345                 i.types[j] = smallest_imm_type (i.op[j].imms->X_add_number);
6346             }
6347         }
6348       else
6349         optimize_imm ();
6350     }
6351
6352   if (i.disp_operands && !optimize_disp (t))
6353     return;
6354
6355   /* Next, we find a template that matches the given insn,
6356      making sure the overlap of the given operands types is consistent
6357      with the template operand types.  */
6358
6359   if (!(t = match_template (mnem_suffix)))
6360     {
6361       const char *err_msg;
6362
6363       if (copy && !mnem_suffix)
6364         {
6365           line = copy;
6366           copy = NULL;
6367   no_match:
6368           pass1_err = i.error;
6369           pass1_mnem = insn_name (current_templates.start);
6370           pp = orig_pp;
6371           goto retry;
6372         }
6373
6374       /* If a non-/only-64bit template (group) was found in pass 1, and if
6375          _some_ template (group) was found in pass 2, squash pass 1's
6376          error.  */
6377       if (pass1_err == unsupported_64bit)
6378         pass1_mnem = NULL;
6379
6380   match_error:
6381       free (xstrdup_copy);
6382
6383       switch (pass1_mnem ? pass1_err : i.error)
6384         {
6385         default:
6386           abort ();
6387         case operand_size_mismatch:
6388           err_msg = _("operand size mismatch");
6389           break;
6390         case operand_type_mismatch:
6391           err_msg = _("operand type mismatch");
6392           break;
6393         case register_type_mismatch:
6394           err_msg = _("register type mismatch");
6395           break;
6396         case number_of_operands_mismatch:
6397           err_msg = _("number of operands mismatch");
6398           break;
6399         case invalid_instruction_suffix:
6400           err_msg = _("invalid instruction suffix");
6401           break;
6402         case bad_imm4:
6403           err_msg = _("constant doesn't fit in 4 bits");
6404           break;
6405         case unsupported_with_intel_mnemonic:
6406           err_msg = _("unsupported with Intel mnemonic");
6407           break;
6408         case unsupported_syntax:
6409           err_msg = _("unsupported syntax");
6410           break;
6411         case unsupported_EGPR_for_addressing:
6412           err_msg = _("extended GPR cannot be used as base/index");
6413           break;
6414         case unsupported_nf:
6415           err_msg = _("{nf} unsupported");
6416           break;
6417         case unsupported:
6418           as_bad (_("unsupported instruction `%s'"),
6419                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6420           return;
6421         case unsupported_on_arch:
6422           as_bad (_("`%s' is not supported on `%s%s'"),
6423                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6424                   cpu_arch_name ? cpu_arch_name : default_arch,
6425                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
6426           return;
6427         case unsupported_64bit:
6428           if (ISLOWER (mnem_suffix))
6429             {
6430               if (flag_code == CODE_64BIT)
6431                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
6432                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6433                         mnem_suffix);
6434               else
6435                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
6436                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6437                         mnem_suffix);
6438             }
6439           else
6440             {
6441               if (flag_code == CODE_64BIT)
6442                 as_bad (_("`%s' is not supported in 64-bit mode"),
6443                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6444               else
6445                 as_bad (_("`%s' is only supported in 64-bit mode"),
6446                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6447             }
6448           return;
6449         case no_vex_encoding:
6450           err_msg = _("no VEX/XOP encoding");
6451           break;
6452         case no_evex_encoding:
6453           err_msg = _("no EVEX encoding");
6454           break;
6455         case invalid_sib_address:
6456           err_msg = _("invalid SIB address");
6457           break;
6458         case invalid_vsib_address:
6459           err_msg = _("invalid VSIB address");
6460           break;
6461         case invalid_vector_register_set:
6462           err_msg = _("mask, index, and destination registers must be distinct");
6463           break;
6464         case invalid_tmm_register_set:
6465           err_msg = _("all tmm registers must be distinct");
6466           break;
6467         case invalid_dest_and_src_register_set:
6468           err_msg = _("destination and source registers must be distinct");
6469           break;
6470         case invalid_dest_register_set:
6471           err_msg = _("two dest registers must be distinct");
6472           break;
6473         case invalid_pseudo_prefix:
6474           err_msg = _("rex2 pseudo prefix cannot be used");
6475           break;
6476         case unsupported_vector_index_register:
6477           err_msg = _("unsupported vector index register");
6478           break;
6479         case unsupported_broadcast:
6480           err_msg = _("unsupported broadcast");
6481           break;
6482         case broadcast_needed:
6483           err_msg = _("broadcast is needed for operand of such type");
6484           break;
6485         case unsupported_masking:
6486           err_msg = _("unsupported masking");
6487           break;
6488         case mask_not_on_destination:
6489           err_msg = _("mask not on destination operand");
6490           break;
6491         case no_default_mask:
6492           err_msg = _("default mask isn't allowed");
6493           break;
6494         case unsupported_rc_sae:
6495           err_msg = _("unsupported static rounding/sae");
6496           break;
6497         case unsupported_vector_size:
6498           as_bad (_("vector size above %u required for `%s'"), 128u << vector_size,
6499                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6500           return;
6501         case unsupported_rsp_register:
6502           err_msg = _("'rsp' register cannot be used");
6503           break;
6504         case internal_error:
6505           err_msg = _("internal error");
6506           break;
6507         }
6508       as_bad (_("%s for `%s'"), err_msg,
6509               pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6510       return;
6511     }
6512
6513   free (xstrdup_copy);
6514
6515   if (sse_check != check_none
6516       /* The opcode space check isn't strictly needed; it's there only to
6517          bypass the logic below when easily possible.  */
6518       && t->opcode_space >= SPACE_0F
6519       && t->opcode_space <= SPACE_0F3A
6520       && !is_cpu (&i.tm, CpuSSE4a)
6521       && !is_any_vex_encoding (t))
6522     {
6523       /* Some KL and all WideKL insns have only implicit %xmm operands.  */
6524       bool simd = is_cpu (t, CpuKL) || is_cpu (t, CpuWideKL);
6525
6526       for (j = 0; j < t->operands; ++j)
6527         {
6528           if (t->operand_types[j].bitfield.class == RegMMX)
6529             break;
6530           if (t->operand_types[j].bitfield.class == RegSIMD)
6531             simd = true;
6532         }
6533
6534       if (j >= t->operands && simd)
6535         (sse_check == check_warning
6536          ? as_warn
6537          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
6538     }
6539
6540   if (i.tm.opcode_modifier.fwait)
6541     if (!add_prefix (FWAIT_OPCODE))
6542       return;
6543
6544   /* Check if REP prefix is OK.  */
6545   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
6546     {
6547       as_bad (_("invalid instruction `%s' after `%s'"),
6548                 insn_name (&i.tm), i.rep_prefix);
6549       return;
6550     }
6551
6552   /* Check for lock without a lockable instruction.  Destination operand
6553      must be memory unless it is xchg (0x86).  */
6554   if (i.prefix[LOCK_PREFIX])
6555     {
6556       if (i.tm.opcode_modifier.prefixok < PrefixLock
6557           || i.mem_operands == 0
6558           || (i.tm.base_opcode != 0x86
6559               && !(i.flags[i.operands - 1] & Operand_Mem)))
6560         {
6561           as_bad (_("expecting lockable instruction after `lock'"));
6562           return;
6563         }
6564
6565       /* Zap the redundant prefix from XCHG when optimizing.  */
6566       if (i.tm.base_opcode == 0x86 && optimize && !pp.no_optimize)
6567         i.prefix[LOCK_PREFIX] = 0;
6568     }
6569
6570   if ((is_any_vex_encoding (&i.tm) && i.tm.opcode_space != SPACE_EVEXMAP4)
6571       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
6572       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
6573     {
6574       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
6575       if (i.prefix[DATA_PREFIX])
6576         {
6577           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
6578           return;
6579         }
6580
6581       /* Don't allow e.g. KMOV in TLS code sequences.  */
6582       for (j = i.imm_operands; j < i.operands; ++j)
6583         switch (i.reloc[j])
6584           {
6585           case BFD_RELOC_X86_64_GOTTPOFF:
6586           case BFD_RELOC_386_TLS_GOTIE:
6587           case BFD_RELOC_386_TLS_LE_32:
6588           case BFD_RELOC_X86_64_TLSLD:
6589             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
6590             return;
6591           default:
6592             break;
6593           }
6594     }
6595
6596   /* Check if HLE prefix is OK.  */
6597   if (i.hle_prefix && !check_hle ())
6598     return;
6599
6600   /* Check BND prefix.  */
6601   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
6602     as_bad (_("expecting valid branch instruction after `bnd'"));
6603
6604   /* Check NOTRACK prefix.  */
6605   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
6606     as_bad (_("expecting indirect branch instruction after `notrack'"));
6607
6608   if (is_cpu (&i.tm, CpuMPX))
6609     {
6610       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
6611         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
6612       else if (flag_code != CODE_16BIT
6613                ? i.prefix[ADDR_PREFIX]
6614                : i.mem_operands && !i.prefix[ADDR_PREFIX])
6615         as_bad (_("16-bit address isn't allowed in MPX instructions"));
6616     }
6617
6618   /* Insert BND prefix.  */
6619   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
6620     {
6621       if (!i.prefix[BND_PREFIX])
6622         add_prefix (BND_PREFIX_OPCODE);
6623       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
6624         {
6625           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
6626           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
6627         }
6628     }
6629
6630   /* Check string instruction segment overrides.  */
6631   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
6632     {
6633       gas_assert (i.mem_operands);
6634       if (!check_string ())
6635         return;
6636       i.disp_operands = 0;
6637     }
6638
6639   /* The memory operand of (%dx) should be only used with input/output
6640      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
6641   if (i.input_output_operand
6642       && ((i.tm.base_opcode | 0x82) != 0xee
6643           || i.tm.opcode_space != SPACE_BASE))
6644     {
6645       as_bad (_("input/output port address isn't allowed with `%s'"),
6646               insn_name (&i.tm));
6647       return;
6648     }
6649
6650   if (optimize && !pp.no_optimize && i.tm.opcode_modifier.optimize)
6651     {
6652       if (pp.has_nf)
6653         optimize_nf_encoding ();
6654       optimize_encoding ();
6655     }
6656
6657   /* Past optimization there's no need to distinguish encoding_evex,
6658      encoding_evex512, and encoding_egpr anymore.  */
6659   if (pp.encoding == encoding_evex512)
6660     pp.encoding = encoding_evex;
6661   else if (pp.encoding == encoding_egpr)
6662     pp.encoding = is_any_vex_encoding (&i.tm) ? encoding_evex
6663                                              : encoding_default;
6664
6665   /* Similarly {nf} can now be taken to imply {evex}.  */
6666   if (pp.has_nf && pp.encoding == encoding_default)
6667     pp.encoding = encoding_evex;
6668
6669   if (use_unaligned_vector_move)
6670     encode_with_unaligned_vector_move ();
6671
6672   if (!process_suffix (t))
6673     return;
6674
6675   /* Check if IP-relative addressing requirements can be satisfied.  */
6676   if (is_cpu (&i.tm, CpuPREFETCHI)
6677       && !(i.base_reg && i.base_reg->reg_num == RegIP))
6678     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
6679
6680   /* Update operand types and check extended states.  */
6681   for (j = 0; j < i.operands; j++)
6682     {
6683       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
6684       switch (i.tm.operand_types[j].bitfield.class)
6685         {
6686         default:
6687           break;
6688         case RegMMX:
6689           i.xstate |= xstate_mmx;
6690           break;
6691         case RegMask:
6692           i.xstate |= xstate_mask;
6693           break;
6694         case RegSIMD:
6695           if (i.tm.operand_types[j].bitfield.tmmword)
6696             i.xstate |= xstate_tmm;
6697           else if (i.tm.operand_types[j].bitfield.zmmword
6698                    && !i.tm.opcode_modifier.vex
6699                    && vector_size >= VSZ512)
6700             i.xstate |= xstate_zmm;
6701           else if (i.tm.operand_types[j].bitfield.ymmword
6702                    && vector_size >= VSZ256)
6703             i.xstate |= xstate_ymm;
6704           else if (i.tm.operand_types[j].bitfield.xmmword)
6705             i.xstate |= xstate_xmm;
6706           break;
6707         }
6708     }
6709
6710   /* Make still unresolved immediate matches conform to size of immediate
6711      given in i.suffix.  */
6712   if (!finalize_imm ())
6713     return;
6714
6715   if (i.types[0].bitfield.imm1)
6716     i.imm_operands = 0; /* kludge for shift insns.  */
6717
6718   /* For insns with operands there are more diddles to do to the opcode.  */
6719   if (i.operands)
6720     {
6721       if (!process_operands ())
6722         return;
6723     }
6724   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
6725     {
6726       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
6727       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
6728     }
6729
6730   if (is_any_vex_encoding (&i.tm))
6731     {
6732       if (!cpu_arch_flags.bitfield.cpui286)
6733         {
6734           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
6735                   insn_name (&i.tm));
6736           return;
6737         }
6738
6739       /* Check for explicit REX prefix.  */
6740       if ((i.prefix[REX_PREFIX]
6741            && (i.tm.opcode_space != SPACE_EVEXMAP4
6742                /* To mimic behavior for legacy insns, permit use of REX64 for promoted
6743                   legacy instructions.  */
6744                || i.prefix[REX_PREFIX] != (REX_OPCODE | REX_W)))
6745           || pp.rex_encoding)
6746         {
6747           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
6748           return;
6749         }
6750
6751       /* Check for explicit REX2 prefix.  */
6752       if (pp.rex2_encoding)
6753         {
6754           as_bad (_("{rex2} prefix invalid with `%s'"), insn_name (&i.tm));
6755           return;
6756         }
6757
6758       if (is_apx_evex_encoding ())
6759         {
6760           if (!build_apx_evex_prefix ())
6761             return;
6762         }
6763       else if (i.tm.opcode_modifier.vex)
6764         build_vex_prefix (t);
6765       else
6766         build_evex_prefix ();
6767
6768       /* The individual REX.RXBW bits got consumed.  */
6769       i.rex &= REX_OPCODE;
6770
6771       /* The rex2 bits got consumed.  */
6772       i.rex2 = 0;
6773     }
6774
6775   /* Handle conversion of 'int $3' --> special int3 insn.  */
6776   if (i.tm.mnem_off == MN_int
6777       && i.op[0].imms->X_add_number == 3)
6778     {
6779       i.tm.base_opcode = INT3_OPCODE;
6780       i.imm_operands = 0;
6781     }
6782
6783   if ((i.tm.opcode_modifier.jump == JUMP
6784        || i.tm.opcode_modifier.jump == JUMP_BYTE
6785        || i.tm.opcode_modifier.jump == JUMP_DWORD)
6786       && i.op[0].disps->X_op == O_constant)
6787     {
6788       /* Convert "jmp constant" (and "call constant") to a jump (call) to
6789          the absolute address given by the constant.  Since ix86 jumps and
6790          calls are pc relative, we need to generate a reloc.  */
6791       i.op[0].disps->X_add_symbol = &abs_symbol;
6792       i.op[0].disps->X_op = O_symbol;
6793     }
6794
6795   establish_rex ();
6796
6797   insert_lfence_before (last_insn);
6798
6799   /* We are ready to output the insn.  */
6800   output_insn (last_insn);
6801
6802 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
6803   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
6804      performed in i386_target_format.  */
6805   if (IS_ELF && flag_synth_cfi)
6806     {
6807       ginsnS *ginsn;
6808       ginsn = x86_ginsn_new (symbol_temp_new_now (), frch_ginsn_gen_mode ());
6809       frch_ginsn_data_append (ginsn);
6810     }
6811 #endif
6812
6813   insert_lfence_after ();
6814
6815   if (i.tm.opcode_modifier.isprefix)
6816     {
6817       last_insn->kind = last_insn_prefix;
6818       last_insn->name = insn_name (&i.tm);
6819       last_insn->file = as_where (&last_insn->line);
6820     }
6821   else
6822     last_insn->kind = last_insn_other;
6823 }
6824
6825 void
6826 md_assemble (char *line)
6827 {
6828   i386_assemble (line);
6829   current_templates.start = NULL;
6830   memset (&pp, 0, sizeof (pp));
6831 }
6832
6833 /* The Q suffix is generally valid only in 64-bit mode, with very few
6834    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
6835    and fisttp only one of their two templates is matched below: That's
6836    sufficient since other relevant attributes are the same between both
6837    respective templates.  */
6838 static INLINE bool q_suffix_allowed(const insn_template *t)
6839 {
6840   return flag_code == CODE_64BIT
6841          || (t->opcode_space == SPACE_BASE
6842              && t->base_opcode == 0xdf
6843              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
6844          || t->mnem_off == MN_cmpxchg8b;
6845 }
6846
6847 static const char *
6848 parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
6849 {
6850   const char *l = line, *token_start = l;
6851   char *mnem_p;
6852   bool pass1 = !current_templates.start;
6853   int supported;
6854   const insn_template *t;
6855   char *dot_p = NULL;
6856
6857   while (1)
6858     {
6859       const char *split;
6860
6861       mnem_p = mnemonic;
6862       /* Pseudo-prefixes start with an opening figure brace.  */
6863       if ((*mnem_p = *l) == '{')
6864         {
6865           ++mnem_p;
6866           ++l;
6867           if (is_space_char (*l))
6868             ++l;
6869         }
6870       else if (mode == parse_pseudo_prefix)
6871         break;
6872       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
6873         {
6874           if (*mnem_p == '.')
6875             dot_p = mnem_p;
6876           mnem_p++;
6877           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
6878             {
6879             too_long:
6880               as_bad (_("no such instruction: `%s'"), token_start);
6881               return NULL;
6882             }
6883           l++;
6884         }
6885       split = l;
6886       if (is_space_char (*l))
6887         ++l;
6888       /* Pseudo-prefixes end with a closing figure brace.  */
6889       if (*mnemonic == '{' && *l == '}')
6890         {
6891           *mnem_p++ = *l++;
6892           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
6893             goto too_long;
6894           *mnem_p = '\0';
6895
6896           if (is_space_char (*l))
6897             ++l;
6898         }
6899       else if (l == split
6900                && *l != END_OF_INSN
6901                && (intel_syntax
6902                    || (*l != PREFIX_SEPARATOR && *l != ',')))
6903         {
6904           if (mode != parse_all)
6905             break;
6906           as_bad (_("invalid character %s in mnemonic"),
6907                   output_invalid (*split));
6908           return NULL;
6909         }
6910       if (token_start == l)
6911         {
6912           if (!intel_syntax && *l == PREFIX_SEPARATOR)
6913             as_bad (_("expecting prefix; got nothing"));
6914           else
6915             as_bad (_("expecting mnemonic; got nothing"));
6916           return NULL;
6917         }
6918
6919       /* Look up instruction (or prefix) via hash table.  */
6920       op_lookup (mnemonic);
6921
6922       if (*l != END_OF_INSN
6923           && current_templates.start
6924           && current_templates.start->opcode_modifier.isprefix)
6925         {
6926           supported = cpu_flags_match (current_templates.start);
6927           if (!(supported & CPU_FLAGS_64BIT_MATCH))
6928             {
6929               as_bad ((flag_code != CODE_64BIT
6930                        ? _("`%s' is only supported in 64-bit mode")
6931                        : _("`%s' is not supported in 64-bit mode")),
6932                       insn_name (current_templates.start));
6933               return NULL;
6934             }
6935           if (supported != CPU_FLAGS_PERFECT_MATCH)
6936             {
6937               as_bad (_("`%s' is not supported on `%s%s'"),
6938                       insn_name (current_templates.start),
6939                       cpu_arch_name ? cpu_arch_name : default_arch,
6940                       cpu_sub_arch_name ? cpu_sub_arch_name : "");
6941               return NULL;
6942             }
6943           /* If we are in 16-bit mode, do not allow addr16 or data16.
6944              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
6945           if ((current_templates.start->opcode_modifier.size == SIZE16
6946                || current_templates.start->opcode_modifier.size == SIZE32)
6947               && flag_code != CODE_64BIT
6948               && ((current_templates.start->opcode_modifier.size == SIZE32)
6949                   ^ (flag_code == CODE_16BIT)))
6950             {
6951               as_bad (_("redundant %s prefix"),
6952                       insn_name (current_templates.start));
6953               return NULL;
6954             }
6955
6956           if (current_templates.start->base_opcode == PSEUDO_PREFIX)
6957             {
6958               /* Handle pseudo prefixes.  */
6959               switch (current_templates.start->extension_opcode)
6960                 {
6961                 case Prefix_Disp8:
6962                   /* {disp8} */
6963                   pp.disp_encoding = disp_encoding_8bit;
6964                   break;
6965                 case Prefix_Disp16:
6966                   /* {disp16} */
6967                   pp.disp_encoding = disp_encoding_16bit;
6968                   break;
6969                 case Prefix_Disp32:
6970                   /* {disp32} */
6971                   pp.disp_encoding = disp_encoding_32bit;
6972                   break;
6973                 case Prefix_Load:
6974                   /* {load} */
6975                   pp.dir_encoding = dir_encoding_load;
6976                   break;
6977                 case Prefix_Store:
6978                   /* {store} */
6979                   pp.dir_encoding = dir_encoding_store;
6980                   break;
6981                 case Prefix_VEX:
6982                   /* {vex} */
6983                   pp.encoding = encoding_vex;
6984                   break;
6985                 case Prefix_VEX3:
6986                   /* {vex3} */
6987                   pp.encoding = encoding_vex3;
6988                   break;
6989                 case Prefix_EVEX:
6990                   /* {evex} */
6991                   pp.encoding = encoding_evex;
6992                   break;
6993                 case Prefix_REX:
6994                   /* {rex} */
6995                   pp.rex_encoding = true;
6996                   break;
6997                 case Prefix_REX2:
6998                   /* {rex2} */
6999                   pp.rex2_encoding = true;
7000                   break;
7001                 case Prefix_NF:
7002                   /* {nf} */
7003                   pp.has_nf = true;
7004                   break;
7005                 case Prefix_NoOptimize:
7006                   /* {nooptimize} */
7007                   pp.no_optimize = true;
7008                   break;
7009                 default:
7010                   abort ();
7011                 }
7012               if (pp.has_nf
7013                   && pp.encoding != encoding_default
7014                   && pp.encoding != encoding_evex)
7015                 {
7016                   as_bad (_("{nf} cannot be combined with {vex}/{vex3}"));
7017                   return NULL;
7018                 }
7019             }
7020           else
7021             {
7022               /* Add prefix, checking for repeated prefixes.  */
7023               switch (add_prefix (current_templates.start->base_opcode))
7024                 {
7025                 case PREFIX_EXIST:
7026                   return NULL;
7027                 case PREFIX_DS:
7028                   if (is_cpu (current_templates.start, CpuIBT))
7029                     i.notrack_prefix = insn_name (current_templates.start);
7030                   break;
7031                 case PREFIX_REP:
7032                   if (is_cpu (current_templates.start, CpuHLE))
7033                     i.hle_prefix = insn_name (current_templates.start);
7034                   else if (is_cpu (current_templates.start, CpuMPX))
7035                     i.bnd_prefix = insn_name (current_templates.start);
7036                   else
7037                     i.rep_prefix = insn_name (current_templates.start);
7038                   break;
7039                 default:
7040                   break;
7041                 }
7042             }
7043           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
7044           l += (!intel_syntax && *l == PREFIX_SEPARATOR);
7045           if (is_space_char (*l))
7046             ++l;
7047           token_start = l;
7048         }
7049       else
7050         break;
7051     }
7052
7053   if (mode != parse_all)
7054     return token_start;
7055
7056   if (!current_templates.start)
7057     {
7058       /* Deprecated functionality (new code should use pseudo-prefixes instead):
7059          Check if we should swap operand or force 32bit displacement in
7060          encoding.  */
7061       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
7062         {
7063           if (pp.dir_encoding == dir_encoding_default)
7064             pp.dir_encoding = dir_encoding_swap;
7065           else
7066             as_warn (_("ignoring `.s' suffix due to earlier `{%s}'"),
7067                      pp.dir_encoding == dir_encoding_load ? "load" : "store");
7068         }
7069       else if (mnem_p - 3 == dot_p
7070                && dot_p[1] == 'd'
7071                && dot_p[2] == '8')
7072         {
7073           if (pp.disp_encoding == disp_encoding_default)
7074             pp.disp_encoding = disp_encoding_8bit;
7075           else if (pp.disp_encoding != disp_encoding_8bit)
7076             as_warn (_("ignoring `.d8' suffix due to earlier `{disp<N>}'"));
7077         }
7078       else if (mnem_p - 4 == dot_p
7079                && dot_p[1] == 'd'
7080                && dot_p[2] == '3'
7081                && dot_p[3] == '2')
7082         {
7083           if (pp.disp_encoding == disp_encoding_default)
7084             pp.disp_encoding = disp_encoding_32bit;
7085           else if (pp.disp_encoding != disp_encoding_32bit)
7086             as_warn (_("ignoring `.d32' suffix due to earlier `{disp<N>}'"));
7087         }
7088       else
7089         goto check_suffix;
7090       mnem_p = dot_p;
7091       *dot_p = '\0';
7092       op_lookup (mnemonic);
7093     }
7094
7095   if (!current_templates.start || !pass1)
7096     {
7097       current_templates.start = NULL;
7098
7099     check_suffix:
7100       if (mnem_p > mnemonic)
7101         {
7102           /* See if we can get a match by trimming off a suffix.  */
7103           switch (mnem_p[-1])
7104             {
7105             case WORD_MNEM_SUFFIX:
7106               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
7107                 i.suffix = SHORT_MNEM_SUFFIX;
7108               else
7109                 /* Fall through.  */
7110               case BYTE_MNEM_SUFFIX:
7111               case QWORD_MNEM_SUFFIX:
7112                 i.suffix = mnem_p[-1];
7113               mnem_p[-1] = '\0';
7114               op_lookup (mnemonic);
7115               break;
7116             case SHORT_MNEM_SUFFIX:
7117             case LONG_MNEM_SUFFIX:
7118               if (!intel_syntax)
7119                 {
7120                   i.suffix = mnem_p[-1];
7121                   mnem_p[-1] = '\0';
7122                   op_lookup (mnemonic);
7123                 }
7124               break;
7125
7126               /* Intel Syntax.  */
7127             case 'd':
7128               if (intel_syntax)
7129                 {
7130                   if (intel_float_operand (mnemonic) == 1)
7131                     i.suffix = SHORT_MNEM_SUFFIX;
7132                   else
7133                     i.suffix = LONG_MNEM_SUFFIX;
7134                   mnem_p[-1] = '\0';
7135                   op_lookup (mnemonic);
7136                 }
7137               /* For compatibility reasons accept MOVSD and CMPSD without
7138                  operands even in AT&T mode.  */
7139               else if (*l == END_OF_INSN)
7140                 {
7141                   mnem_p[-1] = '\0';
7142                   op_lookup (mnemonic);
7143                   if (current_templates.start != NULL
7144                       /* MOVS or CMPS */
7145                       && (current_templates.start->base_opcode | 2) == 0xa6
7146                       && current_templates.start->opcode_space
7147                          == SPACE_BASE
7148                       && mnem_p[-2] == 's')
7149                     {
7150                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
7151                                mnemonic, mnemonic);
7152                       i.suffix = LONG_MNEM_SUFFIX;
7153                     }
7154                   else
7155                     {
7156                       current_templates.start = NULL;
7157                       mnem_p[-1] = 'd';
7158                     }
7159                 }
7160               break;
7161             }
7162         }
7163
7164       if (!current_templates.start)
7165         {
7166           if (pass1)
7167             as_bad (_("no such instruction: `%s'"), token_start);
7168           return NULL;
7169         }
7170     }
7171
7172   /* Handle SCC OSZC flgs.  */
7173   if (current_templates.start->opcode_modifier.operandconstraint == SCC)
7174     {
7175       int length = check_Scc_OszcOperations (l);
7176       if (length < 0)
7177         return NULL;
7178       l += length;
7179     }
7180
7181   if ((current_templates.start->opcode_modifier.jump == JUMP
7182        || current_templates.start->opcode_modifier.jump == JUMP_BYTE)
7183       && *l == ',')
7184     {
7185       /* Check for a branch hint.  We allow ",pt" and ",pn" for
7186          predict taken and predict not taken respectively.
7187          I'm not sure that branch hints actually do anything on loop
7188          and jcxz insns (JumpByte) for current Pentium4 chips.  They
7189          may work in the future and it doesn't hurt to accept them
7190          now.  */
7191       token_start = l++;
7192       if (is_space_char (*l))
7193         ++l;
7194       if (TOLOWER (*l) == 'p' && ISALPHA (l[1])
7195           && (l[2] == END_OF_INSN || is_space_char (l[2])))
7196         {
7197           if (TOLOWER (l[1]) == 't')
7198             {
7199               if (!add_prefix (DS_PREFIX_OPCODE))
7200                 return NULL;
7201               l += 2;
7202             }
7203           else if (TOLOWER (l[1]) == 'n')
7204             {
7205               if (!add_prefix (CS_PREFIX_OPCODE))
7206                 return NULL;
7207               l += 2;
7208             }
7209           else
7210             l = token_start;
7211         }
7212       else
7213         l = token_start;
7214     }
7215   /* Any other comma loses.  */
7216   if (*l == ',')
7217     {
7218       as_bad (_("invalid character %s in mnemonic"),
7219               output_invalid (*l));
7220       return NULL;
7221     }
7222
7223   /* Check if instruction is supported on specified architecture.  */
7224   supported = 0;
7225   for (t = current_templates.start; t < current_templates.end; ++t)
7226     {
7227       supported |= cpu_flags_match (t);
7228
7229       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
7230         supported &= ~CPU_FLAGS_64BIT_MATCH;
7231
7232       if (supported == CPU_FLAGS_PERFECT_MATCH)
7233         return l;
7234     }
7235
7236   if (pass1)
7237     {
7238       if (supported & CPU_FLAGS_64BIT_MATCH)
7239         i.error = unsupported_on_arch;
7240       else
7241         i.error = unsupported_64bit;
7242     }
7243
7244   return NULL;
7245 }
7246
7247 static char *
7248 parse_operands (char *l, const char *mnemonic)
7249 {
7250   char *token_start;
7251
7252   /* 1 if operand is pending after ','.  */
7253   unsigned int expecting_operand = 0;
7254
7255   while (*l != END_OF_INSN)
7256     {
7257       /* Non-zero if operand parens not balanced.  */
7258       unsigned int paren_not_balanced = 0;
7259       /* True if inside double quotes.  */
7260       bool in_quotes = false;
7261
7262       /* Skip optional white space before operand.  */
7263       if (is_space_char (*l))
7264         ++l;
7265       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
7266         {
7267           as_bad (_("invalid character %s before operand %d"),
7268                   output_invalid (*l),
7269                   i.operands + 1);
7270           return NULL;
7271         }
7272       token_start = l;  /* After white space.  */
7273       while (in_quotes || paren_not_balanced || *l != ',')
7274         {
7275           if (*l == END_OF_INSN)
7276             {
7277               if (in_quotes)
7278                 {
7279                   as_bad (_("unbalanced double quotes in operand %d."),
7280                           i.operands + 1);
7281                   return NULL;
7282                 }
7283               if (paren_not_balanced)
7284                 {
7285                   know (!intel_syntax);
7286                   as_bad (_("unbalanced parenthesis in operand %d."),
7287                           i.operands + 1);
7288                   return NULL;
7289                 }
7290               else
7291                 break;  /* we are done */
7292             }
7293           else if (*l == '\\' && l[1] == '"')
7294             ++l;
7295           else if (*l == '"')
7296             in_quotes = !in_quotes;
7297           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
7298             {
7299               as_bad (_("invalid character %s in operand %d"),
7300                       output_invalid (*l),
7301                       i.operands + 1);
7302               return NULL;
7303             }
7304           if (!intel_syntax && !in_quotes)
7305             {
7306               if (*l == '(')
7307                 ++paren_not_balanced;
7308               if (*l == ')')
7309                 --paren_not_balanced;
7310             }
7311           l++;
7312         }
7313       if (l != token_start)
7314         {                       /* Yes, we've read in another operand.  */
7315           unsigned int operand_ok;
7316           this_operand = i.operands++;
7317           if (i.operands > MAX_OPERANDS)
7318             {
7319               as_bad (_("spurious operands; (%d operands/instruction max)"),
7320                       MAX_OPERANDS);
7321               return NULL;
7322             }
7323           i.types[this_operand].bitfield.unspecified = 1;
7324           /* Now parse operand adding info to 'i' as we go along.  */
7325           END_STRING_AND_SAVE (l);
7326
7327           if (i.mem_operands > 1)
7328             {
7329               as_bad (_("too many memory references for `%s'"),
7330                       mnemonic);
7331               return 0;
7332             }
7333
7334           if (intel_syntax)
7335             operand_ok =
7336               i386_intel_operand (token_start,
7337                                   intel_float_operand (mnemonic));
7338           else
7339             operand_ok = i386_att_operand (token_start);
7340
7341           RESTORE_END_STRING (l);
7342           if (!operand_ok)
7343             return NULL;
7344         }
7345       else
7346         {
7347           if (expecting_operand)
7348             {
7349             expecting_operand_after_comma:
7350               as_bad (_("expecting operand after ','; got nothing"));
7351               return NULL;
7352             }
7353           if (*l == ',')
7354             {
7355               as_bad (_("expecting operand before ','; got nothing"));
7356               return NULL;
7357             }
7358         }
7359
7360       /* Now *l must be either ',' or END_OF_INSN.  */
7361       if (*l == ',')
7362         {
7363           if (*++l == END_OF_INSN)
7364             {
7365               /* Just skip it, if it's \n complain.  */
7366               goto expecting_operand_after_comma;
7367             }
7368           expecting_operand = 1;
7369         }
7370     }
7371   return l;
7372 }
7373
7374 static void
7375 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
7376 {
7377   union i386_op temp_op;
7378   i386_operand_type temp_type;
7379   unsigned int temp_flags;
7380   enum bfd_reloc_code_real temp_reloc;
7381
7382   temp_type = i.types[xchg2];
7383   i.types[xchg2] = i.types[xchg1];
7384   i.types[xchg1] = temp_type;
7385
7386   temp_flags = i.flags[xchg2];
7387   i.flags[xchg2] = i.flags[xchg1];
7388   i.flags[xchg1] = temp_flags;
7389
7390   temp_op = i.op[xchg2];
7391   i.op[xchg2] = i.op[xchg1];
7392   i.op[xchg1] = temp_op;
7393
7394   temp_reloc = i.reloc[xchg2];
7395   i.reloc[xchg2] = i.reloc[xchg1];
7396   i.reloc[xchg1] = temp_reloc;
7397
7398   temp_flags = i.imm_bits[xchg2];
7399   i.imm_bits[xchg2] = i.imm_bits[xchg1];
7400   i.imm_bits[xchg1] = temp_flags;
7401
7402   if (i.mask.reg)
7403     {
7404       if (i.mask.operand == xchg1)
7405         i.mask.operand = xchg2;
7406       else if (i.mask.operand == xchg2)
7407         i.mask.operand = xchg1;
7408     }
7409   if (i.broadcast.type || i.broadcast.bytes)
7410     {
7411       if (i.broadcast.operand == xchg1)
7412         i.broadcast.operand = xchg2;
7413       else if (i.broadcast.operand == xchg2)
7414         i.broadcast.operand = xchg1;
7415     }
7416 }
7417
7418 static void
7419 swap_operands (void)
7420 {
7421   switch (i.operands)
7422     {
7423     case 5:
7424     case 4:
7425       swap_2_operands (1, i.operands - 2);
7426       /* Fall through.  */
7427     case 3:
7428     case 2:
7429       swap_2_operands (0, i.operands - 1);
7430       break;
7431     default:
7432       abort ();
7433     }
7434
7435   if (i.mem_operands == 2)
7436     {
7437       const reg_entry *temp_seg;
7438       temp_seg = i.seg[0];
7439       i.seg[0] = i.seg[1];
7440       i.seg[1] = temp_seg;
7441     }
7442 }
7443
7444 /* Try to ensure constant immediates are represented in the smallest
7445    opcode possible.  */
7446 static void
7447 optimize_imm (void)
7448 {
7449   char guess_suffix = 0;
7450   int op;
7451
7452   if (i.suffix)
7453     guess_suffix = i.suffix;
7454   else if (i.reg_operands)
7455     {
7456       /* Figure out a suffix from the last register operand specified.
7457          We can't do this properly yet, i.e. excluding special register
7458          instances, but the following works for instructions with
7459          immediates.  In any case, we can't set i.suffix yet.  */
7460       for (op = i.operands; --op >= 0;)
7461         if (i.types[op].bitfield.class != Reg)
7462           continue;
7463         else if (i.types[op].bitfield.byte)
7464           {
7465             guess_suffix = BYTE_MNEM_SUFFIX;
7466             break;
7467           }
7468         else if (i.types[op].bitfield.word)
7469           {
7470             guess_suffix = WORD_MNEM_SUFFIX;
7471             break;
7472           }
7473         else if (i.types[op].bitfield.dword)
7474           {
7475             guess_suffix = LONG_MNEM_SUFFIX;
7476             break;
7477           }
7478         else if (i.types[op].bitfield.qword)
7479           {
7480             guess_suffix = QWORD_MNEM_SUFFIX;
7481             break;
7482           }
7483     }
7484   else if ((flag_code == CODE_16BIT)
7485             ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
7486     guess_suffix = WORD_MNEM_SUFFIX;
7487   else if (flag_code != CODE_64BIT
7488            || (!(i.prefix[REX_PREFIX] & REX_W)
7489                /* A more generic (but also more involved) way of dealing
7490                   with the special case(s) would be to go look for
7491                   DefaultSize attributes on any of the templates.  */
7492                && current_templates.start->mnem_off != MN_push))
7493     guess_suffix = LONG_MNEM_SUFFIX;
7494
7495   for (op = i.operands; --op >= 0;)
7496     if (operand_type_check (i.types[op], imm))
7497       {
7498         switch (i.op[op].imms->X_op)
7499           {
7500           case O_constant:
7501             /* If a suffix is given, this operand may be shortened.  */
7502             switch (guess_suffix)
7503               {
7504               case LONG_MNEM_SUFFIX:
7505                 i.types[op].bitfield.imm32 = 1;
7506                 i.types[op].bitfield.imm64 = 1;
7507                 break;
7508               case WORD_MNEM_SUFFIX:
7509                 i.types[op].bitfield.imm16 = 1;
7510                 i.types[op].bitfield.imm32 = 1;
7511                 i.types[op].bitfield.imm32s = 1;
7512                 i.types[op].bitfield.imm64 = 1;
7513                 break;
7514               case BYTE_MNEM_SUFFIX:
7515                 i.types[op].bitfield.imm8 = 1;
7516                 i.types[op].bitfield.imm8s = 1;
7517                 i.types[op].bitfield.imm16 = 1;
7518                 i.types[op].bitfield.imm32 = 1;
7519                 i.types[op].bitfield.imm32s = 1;
7520                 i.types[op].bitfield.imm64 = 1;
7521                 break;
7522               }
7523
7524             /* If this operand is at most 16 bits, convert it
7525                to a signed 16 bit number before trying to see
7526                whether it will fit in an even smaller size.
7527                This allows a 16-bit operand such as $0xffe0 to
7528                be recognised as within Imm8S range.  */
7529             if ((i.types[op].bitfield.imm16)
7530                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
7531               {
7532                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
7533                                                 ^ 0x8000) - 0x8000);
7534               }
7535 #ifdef BFD64
7536             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
7537             if ((i.types[op].bitfield.imm32)
7538                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
7539               {
7540                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
7541                                                 ^ ((offsetT) 1 << 31))
7542                                                - ((offsetT) 1 << 31));
7543               }
7544 #endif
7545             i.types[op]
7546               = operand_type_or (i.types[op],
7547                                  smallest_imm_type (i.op[op].imms->X_add_number));
7548
7549             /* We must avoid matching of Imm32 templates when 64bit
7550                only immediate is available.  */
7551             if (guess_suffix == QWORD_MNEM_SUFFIX)
7552               i.types[op].bitfield.imm32 = 0;
7553             break;
7554
7555           case O_absent:
7556           case O_register:
7557             abort ();
7558
7559             /* Symbols and expressions.  */
7560           default:
7561             /* Convert symbolic operand to proper sizes for matching, but don't
7562                prevent matching a set of insns that only supports sizes other
7563                than those matching the insn suffix.  */
7564             {
7565               i386_operand_type mask, allowed;
7566               const insn_template *t = current_templates.start;
7567
7568               operand_type_set (&mask, 0);
7569               switch (guess_suffix)
7570                 {
7571                 case QWORD_MNEM_SUFFIX:
7572                   mask.bitfield.imm64 = 1;
7573                   mask.bitfield.imm32s = 1;
7574                   break;
7575                 case LONG_MNEM_SUFFIX:
7576                   mask.bitfield.imm32 = 1;
7577                   break;
7578                 case WORD_MNEM_SUFFIX:
7579                   mask.bitfield.imm16 = 1;
7580                   break;
7581                 case BYTE_MNEM_SUFFIX:
7582                   mask.bitfield.imm8 = 1;
7583                   break;
7584                 default:
7585                   break;
7586                 }
7587
7588               allowed = operand_type_and (t->operand_types[op], mask);
7589               while (++t < current_templates.end)
7590                 {
7591                   allowed = operand_type_or (allowed, t->operand_types[op]);
7592                   allowed = operand_type_and (allowed, mask);
7593                 }
7594
7595               if (!operand_type_all_zero (&allowed))
7596                 i.types[op] = operand_type_and (i.types[op], mask);
7597             }
7598             break;
7599           }
7600       }
7601 }
7602
7603 /* Try to use the smallest displacement type too.  */
7604 static bool
7605 optimize_disp (const insn_template *t)
7606 {
7607   unsigned int op;
7608
7609   if (!want_disp32 (t)
7610       && (!t->opcode_modifier.jump
7611           || i.jumpabsolute || i.types[0].bitfield.baseindex))
7612     {
7613       for (op = 0; op < i.operands; ++op)
7614         {
7615           const expressionS *exp = i.op[op].disps;
7616
7617           if (!operand_type_check (i.types[op], disp))
7618             continue;
7619
7620           if (exp->X_op != O_constant)
7621             continue;
7622
7623           /* Since displacement is signed extended to 64bit, don't allow
7624              disp32 if it is out of range.  */
7625           if (fits_in_signed_long (exp->X_add_number))
7626             continue;
7627
7628           i.types[op].bitfield.disp32 = 0;
7629           if (i.types[op].bitfield.baseindex)
7630             {
7631               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
7632                       (uint64_t) exp->X_add_number);
7633               return false;
7634             }
7635         }
7636     }
7637
7638   /* Don't optimize displacement for movabs since it only takes 64bit
7639      displacement.  */
7640   if (pp.disp_encoding > disp_encoding_8bit
7641       || (flag_code == CODE_64BIT && t->mnem_off == MN_movabs))
7642     return true;
7643
7644   for (op = i.operands; op-- > 0;)
7645     if (operand_type_check (i.types[op], disp))
7646       {
7647         if (i.op[op].disps->X_op == O_constant)
7648           {
7649             offsetT op_disp = i.op[op].disps->X_add_number;
7650
7651             if (!op_disp && i.types[op].bitfield.baseindex)
7652               {
7653                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
7654                 i.op[op].disps = NULL;
7655                 i.disp_operands--;
7656                 continue;
7657               }
7658
7659             if (i.types[op].bitfield.disp16
7660                 && fits_in_unsigned_word (op_disp))
7661               {
7662                 /* If this operand is at most 16 bits, convert
7663                    to a signed 16 bit number and don't use 64bit
7664                    displacement.  */
7665                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
7666                 i.types[op].bitfield.disp64 = 0;
7667               }
7668
7669 #ifdef BFD64
7670             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
7671             if ((flag_code != CODE_64BIT
7672                  ? i.types[op].bitfield.disp32
7673                  : want_disp32 (t)
7674                    && (!t->opcode_modifier.jump
7675                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
7676                 && fits_in_unsigned_long (op_disp))
7677               {
7678                 /* If this operand is at most 32 bits, convert
7679                    to a signed 32 bit number and don't use 64bit
7680                    displacement.  */
7681                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
7682                 i.types[op].bitfield.disp64 = 0;
7683                 i.types[op].bitfield.disp32 = 1;
7684               }
7685
7686             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
7687               {
7688                 i.types[op].bitfield.disp64 = 0;
7689                 i.types[op].bitfield.disp32 = 1;
7690               }
7691 #endif
7692             if ((i.types[op].bitfield.disp32
7693                  || i.types[op].bitfield.disp16)
7694                 && fits_in_disp8 (op_disp))
7695               i.types[op].bitfield.disp8 = 1;
7696
7697             i.op[op].disps->X_add_number = op_disp;
7698           }
7699         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
7700                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
7701           {
7702             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
7703                          i.op[op].disps, 0, i.reloc[op]);
7704             i.types[op] = operand_type_and_not (i.types[op], anydisp);
7705           }
7706         else
7707           /* We only support 64bit displacement on constants.  */
7708           i.types[op].bitfield.disp64 = 0;
7709       }
7710
7711   return true;
7712 }
7713
7714 /* Return 1 if there is a match in broadcast bytes between operand
7715    GIVEN and instruction template T.   */
7716
7717 static INLINE int
7718 match_broadcast_size (const insn_template *t, unsigned int given)
7719 {
7720   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
7721            && i.types[given].bitfield.byte)
7722           || (t->opcode_modifier.broadcast == WORD_BROADCAST
7723               && i.types[given].bitfield.word)
7724           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
7725               && i.types[given].bitfield.dword)
7726           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
7727               && i.types[given].bitfield.qword));
7728 }
7729
7730 /* Check if operands are valid for the instruction.  */
7731
7732 static int
7733 check_VecOperands (const insn_template *t)
7734 {
7735   unsigned int op;
7736   i386_cpu_flags cpu;
7737
7738   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
7739      any one operand are implicity requiring AVX512VL support if the actual
7740      operand size is YMMword or XMMword.  Since this function runs after
7741      template matching, there's no need to check for YMMword/XMMword in
7742      the template.  */
7743   cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
7744   if (!cpu_flags_all_zero (&cpu)
7745       && !is_cpu (t, CpuAVX512VL)
7746       && !cpu_arch_flags.bitfield.cpuavx512vl
7747       && (!t->opcode_modifier.vex || need_evex_encoding (t)))
7748     {
7749       for (op = 0; op < t->operands; ++op)
7750         {
7751           if (t->operand_types[op].bitfield.zmmword
7752               && (i.types[op].bitfield.ymmword
7753                   || i.types[op].bitfield.xmmword))
7754             {
7755               i.error = operand_size_mismatch;
7756               return 1;
7757             }
7758         }
7759     }
7760
7761   /* Somewhat similarly, templates specifying both AVX and AVX2 are
7762      requiring AVX2 support if the actual operand size is YMMword.  */
7763   if (maybe_cpu (t, CpuAVX) && maybe_cpu (t, CpuAVX2)
7764       && !cpu_arch_flags.bitfield.cpuavx2)
7765     {
7766       for (op = 0; op < t->operands; ++op)
7767         {
7768           if (t->operand_types[op].bitfield.xmmword
7769               && i.types[op].bitfield.ymmword)
7770             {
7771               i.error = operand_size_mismatch;
7772               return 1;
7773             }
7774         }
7775     }
7776
7777   /* Without VSIB byte, we can't have a vector register for index.  */
7778   if (!t->opcode_modifier.sib
7779       && i.index_reg
7780       && (i.index_reg->reg_type.bitfield.xmmword
7781           || i.index_reg->reg_type.bitfield.ymmword
7782           || i.index_reg->reg_type.bitfield.zmmword))
7783     {
7784       i.error = unsupported_vector_index_register;
7785       return 1;
7786     }
7787
7788   /* Check if default mask is allowed.  */
7789   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
7790       && (!i.mask.reg || i.mask.reg->reg_num == 0))
7791     {
7792       i.error = no_default_mask;
7793       return 1;
7794     }
7795
7796   /* For VSIB byte, we need a vector register for index, and all vector
7797      registers must be distinct.  */
7798   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
7799     {
7800       if (!i.index_reg
7801           || !((t->opcode_modifier.sib == VECSIB128
7802                 && i.index_reg->reg_type.bitfield.xmmword)
7803                || (t->opcode_modifier.sib == VECSIB256
7804                    && i.index_reg->reg_type.bitfield.ymmword)
7805                || (t->opcode_modifier.sib == VECSIB512
7806                    && i.index_reg->reg_type.bitfield.zmmword)))
7807       {
7808         i.error = invalid_vsib_address;
7809         return 1;
7810       }
7811
7812       gas_assert (i.reg_operands == 2 || i.mask.reg);
7813       if (i.reg_operands == 2 && !i.mask.reg)
7814         {
7815           gas_assert (i.types[0].bitfield.class == RegSIMD);
7816           gas_assert (i.types[0].bitfield.xmmword
7817                       || i.types[0].bitfield.ymmword);
7818           gas_assert (i.types[2].bitfield.class == RegSIMD);
7819           gas_assert (i.types[2].bitfield.xmmword
7820                       || i.types[2].bitfield.ymmword);
7821           if (operand_check == check_none)
7822             return 0;
7823           if (register_number (i.op[0].regs)
7824               != register_number (i.index_reg)
7825               && register_number (i.op[2].regs)
7826                  != register_number (i.index_reg)
7827               && register_number (i.op[0].regs)
7828                  != register_number (i.op[2].regs))
7829             return 0;
7830           if (operand_check == check_error)
7831             {
7832               i.error = invalid_vector_register_set;
7833               return 1;
7834             }
7835           as_warn (_("mask, index, and destination registers should be distinct"));
7836         }
7837       else if (i.reg_operands == 1 && i.mask.reg)
7838         {
7839           if (i.types[1].bitfield.class == RegSIMD
7840               && (i.types[1].bitfield.xmmword
7841                   || i.types[1].bitfield.ymmword
7842                   || i.types[1].bitfield.zmmword)
7843               && (register_number (i.op[1].regs)
7844                   == register_number (i.index_reg)))
7845             {
7846               if (operand_check == check_error)
7847                 {
7848                   i.error = invalid_vector_register_set;
7849                   return 1;
7850                 }
7851               if (operand_check != check_none)
7852                 as_warn (_("index and destination registers should be distinct"));
7853             }
7854         }
7855     }
7856
7857   /* For AMX instructions with 3 TMM register operands, all operands
7858       must be distinct.  */
7859   if (i.reg_operands == 3
7860       && t->operand_types[0].bitfield.tmmword
7861       && (i.op[0].regs == i.op[1].regs
7862           || i.op[0].regs == i.op[2].regs
7863           || i.op[1].regs == i.op[2].regs))
7864     {
7865       i.error = invalid_tmm_register_set;
7866       return 1;
7867     }
7868
7869   /* For some special instructions require that destination must be distinct
7870      from source registers.  */
7871   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
7872     {
7873       unsigned int dest_reg = i.operands - 1;
7874
7875       know (i.operands >= 3);
7876
7877       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
7878       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
7879           || (i.reg_operands > 2
7880               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
7881         {
7882           i.error = invalid_dest_and_src_register_set;
7883           return 1;
7884         }
7885     }
7886
7887   /* Check if broadcast is supported by the instruction and is applied
7888      to the memory operand.  */
7889   if (i.broadcast.type || i.broadcast.bytes)
7890     {
7891       i386_operand_type type, overlap;
7892
7893       /* Check if specified broadcast is supported in this instruction,
7894          and its broadcast bytes match the memory operand.  */
7895       op = i.broadcast.operand;
7896       if (!t->opcode_modifier.broadcast
7897           || !(i.flags[op] & Operand_Mem)
7898           || (!i.types[op].bitfield.unspecified
7899               && !match_broadcast_size (t, op)))
7900         {
7901         bad_broadcast:
7902           i.error = unsupported_broadcast;
7903           return 1;
7904         }
7905
7906       operand_type_set (&type, 0);
7907       switch (get_broadcast_bytes (t, false))
7908         {
7909         case 2:
7910           type.bitfield.word = 1;
7911           break;
7912         case 4:
7913           type.bitfield.dword = 1;
7914           break;
7915         case 8:
7916           type.bitfield.qword = 1;
7917           break;
7918         case 16:
7919           type.bitfield.xmmword = 1;
7920           break;
7921         case 32:
7922           if (vector_size < VSZ256)
7923             goto bad_broadcast;
7924           type.bitfield.ymmword = 1;
7925           break;
7926         case 64:
7927           if (vector_size < VSZ512)
7928             goto bad_broadcast;
7929           type.bitfield.zmmword = 1;
7930           break;
7931         default:
7932           goto bad_broadcast;
7933         }
7934
7935       overlap = operand_type_and (type, t->operand_types[op]);
7936       if (t->operand_types[op].bitfield.class == RegSIMD
7937           && t->operand_types[op].bitfield.byte
7938              + t->operand_types[op].bitfield.word
7939              + t->operand_types[op].bitfield.dword
7940              + t->operand_types[op].bitfield.qword > 1)
7941         {
7942           overlap.bitfield.xmmword = 0;
7943           overlap.bitfield.ymmword = 0;
7944           overlap.bitfield.zmmword = 0;
7945         }
7946       if (operand_type_all_zero (&overlap))
7947           goto bad_broadcast;
7948
7949       if (t->opcode_modifier.checkoperandsize)
7950         {
7951           unsigned int j;
7952
7953           type.bitfield.baseindex = 1;
7954           for (j = 0; j < i.operands; ++j)
7955             {
7956               if (j != op
7957                   && !operand_type_register_match(i.types[j],
7958                                                   t->operand_types[j],
7959                                                   type,
7960                                                   t->operand_types[op]))
7961                 goto bad_broadcast;
7962             }
7963         }
7964     }
7965   /* If broadcast is supported in this instruction, we need to check if
7966      operand of one-element size isn't specified without broadcast.  */
7967   else if (t->opcode_modifier.broadcast && i.mem_operands)
7968     {
7969       /* Find memory operand.  */
7970       for (op = 0; op < i.operands; op++)
7971         if (i.flags[op] & Operand_Mem)
7972           break;
7973       gas_assert (op < i.operands);
7974       /* Check size of the memory operand.  */
7975       if (match_broadcast_size (t, op))
7976         {
7977           i.error = broadcast_needed;
7978           return 1;
7979         }
7980     }
7981   else
7982     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
7983
7984   /* Check if requested masking is supported.  */
7985   if (i.mask.reg)
7986     {
7987       if (!t->opcode_modifier.masking)
7988         {
7989           i.error = unsupported_masking;
7990           return 1;
7991         }
7992
7993       /* Common rules for masking:
7994          - mask register destinations permit only zeroing-masking, without
7995            that actually being expressed by a {z} operand suffix or EVEX.z,
7996          - memory destinations allow only merging-masking,
7997          - scatter/gather insns (i.e. ones using vSIB) only allow merging-
7998            masking.  */
7999       if (i.mask.zeroing
8000           && (t->operand_types[t->operands - 1].bitfield.class == RegMask
8001               || (i.flags[t->operands - 1] & Operand_Mem)
8002               || t->opcode_modifier.sib))
8003         {
8004           i.error = unsupported_masking;
8005           return 1;
8006         }
8007     }
8008
8009   /* Check if masking is applied to dest operand.  */
8010   if (i.mask.reg && (i.mask.operand != i.operands - 1))
8011     {
8012       i.error = mask_not_on_destination;
8013       return 1;
8014     }
8015
8016   /* Check RC/SAE.  */
8017   if (i.rounding.type != rc_none)
8018     {
8019       if (!t->opcode_modifier.sae
8020           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
8021           || i.mem_operands)
8022         {
8023           i.error = unsupported_rc_sae;
8024           return 1;
8025         }
8026
8027       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
8028          operand.  */
8029       if (t->opcode_modifier.evex != EVEXLIG)
8030         {
8031           for (op = 0; op < t->operands; ++op)
8032             if (i.types[op].bitfield.zmmword)
8033               break;
8034           if (op >= t->operands)
8035             {
8036               i.error = operand_size_mismatch;
8037               return 1;
8038             }
8039         }
8040     }
8041
8042   /* Check the special Imm4 cases; must be the first operand.  */
8043   if ((is_cpu (t, CpuXOP) && t->operands == 5)
8044       || (t->opcode_space == SPACE_0F3A
8045           && (t->base_opcode | 3) == 0x0b
8046           && (is_cpu (t, CpuAPX_F)
8047            || (t->opcode_modifier.sse2avx && t->opcode_modifier.evex
8048                && (!t->opcode_modifier.vex
8049                    || (pp.encoding != encoding_default
8050                        && pp.encoding != encoding_vex
8051                        && pp.encoding != encoding_vex3))))))
8052     {
8053       if (i.op[0].imms->X_op != O_constant
8054           || !fits_in_imm4 (i.op[0].imms->X_add_number))
8055         {
8056           i.error = bad_imm4;
8057           return 1;
8058         }
8059
8060       /* Turn off Imm<N> so that update_imm won't complain.  */
8061       if (t->operands == 5)
8062         operand_type_set (&i.types[0], 0);
8063     }
8064
8065   /* Check vector Disp8 operand.  */
8066   if (t->opcode_modifier.disp8memshift
8067       && (!t->opcode_modifier.vex
8068           || need_evex_encoding (t))
8069       && pp.disp_encoding <= disp_encoding_8bit)
8070     {
8071       if (i.broadcast.type || i.broadcast.bytes)
8072         i.memshift = t->opcode_modifier.broadcast - 1;
8073       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
8074         i.memshift = t->opcode_modifier.disp8memshift;
8075       else
8076         {
8077           const i386_operand_type *type = NULL, *fallback = NULL;
8078
8079           i.memshift = 0;
8080           for (op = 0; op < i.operands; op++)
8081             if (i.flags[op] & Operand_Mem)
8082               {
8083                 if (t->opcode_modifier.evex == EVEXLIG)
8084                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
8085                 else if (t->operand_types[op].bitfield.xmmword
8086                          + t->operand_types[op].bitfield.ymmword
8087                          + t->operand_types[op].bitfield.zmmword <= 1)
8088                   type = &t->operand_types[op];
8089                 else if (!i.types[op].bitfield.unspecified)
8090                   type = &i.types[op];
8091                 else /* Ambiguities get resolved elsewhere.  */
8092                   fallback = &t->operand_types[op];
8093               }
8094             else if (i.types[op].bitfield.class == RegSIMD
8095                      && t->opcode_modifier.evex != EVEXLIG)
8096               {
8097                 if (i.types[op].bitfield.zmmword)
8098                   i.memshift = 6;
8099                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
8100                   i.memshift = 5;
8101                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
8102                   i.memshift = 4;
8103               }
8104
8105           if (!type && !i.memshift)
8106             type = fallback;
8107           if (type)
8108             {
8109               if (type->bitfield.zmmword)
8110                 i.memshift = 6;
8111               else if (type->bitfield.ymmword)
8112                 i.memshift = 5;
8113               else if (type->bitfield.xmmword)
8114                 i.memshift = 4;
8115             }
8116
8117           /* For the check in fits_in_disp8().  */
8118           if (i.memshift == 0)
8119             i.memshift = -1;
8120         }
8121
8122       for (op = 0; op < i.operands; op++)
8123         if (operand_type_check (i.types[op], disp)
8124             && i.op[op].disps->X_op == O_constant)
8125           {
8126             /* Make sure to leave i.types[op].bitfield.disp8 alone upon
8127                secondary invocations of match_template().  */
8128             if (fits_in_disp8 (i.op[op].disps->X_add_number))
8129               {
8130                 if (!i.tm.mnem_off)
8131                   i.types[op].bitfield.disp8 = 1;
8132                 return 0;
8133               }
8134             if (!i.tm.mnem_off)
8135               i.types[op].bitfield.disp8 = 0;
8136           }
8137     }
8138
8139   i.memshift = 0;
8140
8141   return 0;
8142 }
8143
8144 /* Check if encoding requirements are met by the instruction.  */
8145
8146 static int
8147 VEX_check_encoding (const insn_template *t)
8148 {
8149   if (pp.encoding == encoding_error)
8150     {
8151       i.error = unsupported;
8152       return 1;
8153     }
8154
8155   /* Vector size restrictions.  */
8156   if ((vector_size < VSZ512
8157        && t->opcode_modifier.evex == EVEX512)
8158       || (vector_size < VSZ256
8159           && (t->opcode_modifier.evex == EVEX256
8160               || t->opcode_modifier.vex == VEX256)))
8161     {
8162       i.error = unsupported_vector_size;
8163       return 1;
8164     }
8165
8166   switch (pp.encoding)
8167     {
8168     case encoding_vex:
8169     case encoding_vex3:
8170       /* This instruction must be encoded with VEX prefix.  */
8171       if (!t->opcode_modifier.vex)
8172         {
8173           i.error = no_vex_encoding;
8174           return 1;
8175         }
8176       break;
8177
8178     case encoding_default:
8179       if (!pp.has_nf)
8180         break;
8181       /* Fall through.  */
8182     case encoding_evex:
8183     case encoding_evex512:
8184       /* This instruction must be encoded with EVEX prefix.  */
8185       if (!t->opcode_modifier.evex)
8186         {
8187           i.error = no_evex_encoding;
8188           return 1;
8189         }
8190       break;
8191
8192     case encoding_egpr:
8193       /* This instruction must be encoded with REX2 or EVEX prefix.  */
8194       if (t->opcode_modifier.vex && !t->opcode_modifier.evex)
8195         {
8196           i.error = no_evex_encoding;
8197           return 1;
8198         }
8199       break;
8200
8201     default:
8202       abort ();
8203     }
8204
8205   return 0;
8206 }
8207
8208 /* Check if Egprs operands are valid for the instruction.  */
8209
8210 static bool
8211 check_EgprOperands (const insn_template *t)
8212 {
8213   if (!t->opcode_modifier.noegpr)
8214     return false;
8215
8216   for (unsigned int op = 0; op < i.operands; op++)
8217     {
8218       if (i.types[op].bitfield.class != Reg)
8219         continue;
8220
8221       if (i.op[op].regs->reg_flags & RegRex2)
8222         {
8223           i.error = register_type_mismatch;
8224           return true;
8225         }
8226     }
8227
8228   if ((i.index_reg && (i.index_reg->reg_flags & RegRex2))
8229       || (i.base_reg && (i.base_reg->reg_flags & RegRex2)))
8230     {
8231       i.error = unsupported_EGPR_for_addressing;
8232       return true;
8233     }
8234
8235   /* Check if pseudo prefix {rex2} is valid.  */
8236   if (pp.rex2_encoding && !t->opcode_modifier.sse2avx)
8237     {
8238       i.error = invalid_pseudo_prefix;
8239       return true;
8240     }
8241
8242   return false;
8243 }
8244
8245 /* Check if APX operands are valid for the instruction.  */
8246 static bool
8247 check_APX_operands (const insn_template *t)
8248 {
8249   /* Push2* and Pop2* cannot use RSP and Pop2* cannot pop two same registers.
8250    */
8251   switch (t->mnem_off)
8252     {
8253     case MN_pop2:
8254     case MN_pop2p:
8255       if (register_number (i.op[0].regs) == register_number (i.op[1].regs))
8256         {
8257           i.error = invalid_dest_register_set;
8258           return 1;
8259         }
8260     /* fall through */
8261     case MN_push2:
8262     case MN_push2p:
8263       if (register_number (i.op[0].regs) == 4
8264           || register_number (i.op[1].regs) == 4)
8265         {
8266           i.error = unsupported_rsp_register;
8267           return 1;
8268         }
8269       break;
8270     }
8271   return 0;
8272 }
8273
8274 /* Check if the instruction use the REX registers or REX prefix.  */
8275 static bool
8276 check_Rex_required (void)
8277 {
8278   for (unsigned int op = 0; op < i.operands; op++)
8279     {
8280       if (i.types[op].bitfield.class != Reg)
8281         continue;
8282
8283       if (i.op[op].regs->reg_flags & (RegRex | RegRex64))
8284         return true;
8285     }
8286
8287   if ((i.index_reg && (i.index_reg->reg_flags & RegRex))
8288       || (i.base_reg && (i.base_reg->reg_flags & RegRex)))
8289     return true;
8290
8291   /* Check pseudo prefix {rex} are valid.  */
8292   return pp.rex_encoding;
8293 }
8294
8295 /* Optimize APX NDD insns to legacy insns.  */
8296 static unsigned int
8297 can_convert_NDD_to_legacy (const insn_template *t)
8298 {
8299   unsigned int match_dest_op = ~0;
8300
8301   if (!pp.has_nf && i.reg_operands >= 2)
8302     {
8303       unsigned int dest = i.operands - 1;
8304       unsigned int src1 = i.operands - 2;
8305       unsigned int src2 = (i.operands > 3) ? i.operands - 3 : 0;
8306
8307       if (i.types[src1].bitfield.class == Reg
8308           && i.op[src1].regs == i.op[dest].regs)
8309         match_dest_op = src1;
8310       /* If the first operand is the same as the third operand,
8311          these instructions need to support the ability to commutative
8312          the first two operands and still not change the semantics in order
8313          to be optimized.  */
8314       else if (optimize > 1
8315                && t->opcode_modifier.commutative
8316                && i.types[src2].bitfield.class == Reg
8317                && i.op[src2].regs == i.op[dest].regs)
8318         match_dest_op = src2;
8319     }
8320   return match_dest_op;
8321 }
8322
8323 /* Helper function for the progress() macro in match_template().  */
8324 static INLINE enum i386_error progress (enum i386_error new,
8325                                         enum i386_error last,
8326                                         unsigned int line, unsigned int *line_p)
8327 {
8328   if (line <= *line_p)
8329     return last;
8330   *line_p = line;
8331   return new;
8332 }
8333
8334 static const insn_template *
8335 match_template (char mnem_suffix)
8336 {
8337   /* Points to template once we've found it.  */
8338   const insn_template *t;
8339   i386_operand_type overlap0, overlap1, overlap2, overlap3;
8340   i386_operand_type overlap4;
8341   unsigned int found_reverse_match;
8342   i386_operand_type operand_types [MAX_OPERANDS];
8343   int addr_prefix_disp;
8344   unsigned int j, size_match, check_register, errline = __LINE__;
8345   enum i386_error specific_error = number_of_operands_mismatch;
8346 #define progress(err) progress (err, specific_error, __LINE__, &errline)
8347
8348 #if MAX_OPERANDS != 5
8349 # error "MAX_OPERANDS must be 5."
8350 #endif
8351
8352   found_reverse_match = 0;
8353   addr_prefix_disp = -1;
8354
8355   for (t = current_templates.start; t < current_templates.end; t++)
8356     {
8357       addr_prefix_disp = -1;
8358       found_reverse_match = 0;
8359
8360       /* Must have right number of operands.  */
8361       if (i.operands != t->operands)
8362         continue;
8363
8364       /* Skip SSE2AVX templates when inapplicable.  */
8365       if (t->opcode_modifier.sse2avx
8366           && (!sse2avx || i.prefix[DATA_PREFIX]))
8367         {
8368           /* Another non-SSE2AVX template has to follow.  */
8369           gas_assert (t + 1 < current_templates.end);
8370           continue;
8371         }
8372
8373       /* Check processor support.  */
8374       specific_error = progress (unsupported);
8375       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
8376         continue;
8377
8378       /* Check AT&T mnemonic.   */
8379       specific_error = progress (unsupported_with_intel_mnemonic);
8380       if (!intel_syntax && intel_mnemonic
8381           && t->opcode_modifier.dialect == ATT_MNEMONIC)
8382         continue;
8383
8384       /* Check AT&T/Intel syntax.  */
8385       specific_error = progress (unsupported_syntax);
8386       if (intel_syntax
8387            ? t->opcode_modifier.dialect >= ATT_SYNTAX
8388            : t->opcode_modifier.dialect == INTEL_SYNTAX)
8389         continue;
8390
8391       /* Check NF support.  */
8392       specific_error = progress (unsupported_nf);
8393       if (pp.has_nf && !t->opcode_modifier.nf)
8394         continue;
8395
8396       /* Check Intel64/AMD64 ISA.   */
8397       switch (isa64)
8398         {
8399         default:
8400           /* Default: Don't accept Intel64.  */
8401           if (t->opcode_modifier.isa64 == INTEL64)
8402             continue;
8403           break;
8404         case amd64:
8405           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
8406           if (t->opcode_modifier.isa64 >= INTEL64)
8407             continue;
8408           break;
8409         case intel64:
8410           /* -mintel64: Don't accept AMD64.  */
8411           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
8412             continue;
8413           break;
8414         }
8415
8416       /* Check the suffix.  */
8417       specific_error = progress (invalid_instruction_suffix);
8418       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
8419           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
8420           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
8421           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
8422           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
8423         continue;
8424
8425       specific_error = progress (operand_size_mismatch);
8426       size_match = operand_size_match (t);
8427       if (!size_match)
8428         continue;
8429
8430       /* This is intentionally not
8431
8432          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
8433
8434          as the case of a missing * on the operand is accepted (perhaps with
8435          a warning, issued further down).  */
8436       specific_error = progress (operand_type_mismatch);
8437       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
8438         continue;
8439
8440       /* In Intel syntax, normally we can check for memory operand size when
8441          there is no mnemonic suffix.  But jmp and call have 2 different
8442          encodings with Dword memory operand size.  Skip the "near" one
8443          (permitting a register operand) when "far" was requested.  */
8444       if (i.far_branch
8445           && t->opcode_modifier.jump == JUMP_ABSOLUTE
8446           && t->operand_types[0].bitfield.class == Reg)
8447         continue;
8448
8449       for (j = 0; j < MAX_OPERANDS; j++)
8450         operand_types[j] = t->operand_types[j];
8451
8452       /* In general, don't allow 32-bit operands on pre-386.  */
8453       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
8454                                              : operand_size_mismatch);
8455       j = i.imm_operands + (t->operands > i.imm_operands + 1);
8456       if (i.suffix == LONG_MNEM_SUFFIX
8457           && !cpu_arch_flags.bitfield.cpui386
8458           && (intel_syntax
8459               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
8460                  && !intel_float_operand (insn_name (t)))
8461               : intel_float_operand (insn_name (t)) != 2)
8462           && (t->operands == i.imm_operands
8463               || (operand_types[i.imm_operands].bitfield.class != RegMMX
8464                && operand_types[i.imm_operands].bitfield.class != RegSIMD
8465                && operand_types[i.imm_operands].bitfield.class != RegMask)
8466               || (operand_types[j].bitfield.class != RegMMX
8467                   && operand_types[j].bitfield.class != RegSIMD
8468                   && operand_types[j].bitfield.class != RegMask))
8469           && !t->opcode_modifier.sib)
8470         continue;
8471
8472       /* Do not verify operands when there are none.  */
8473       if (!t->operands)
8474         {
8475           if (VEX_check_encoding (t))
8476             {
8477               specific_error = progress (i.error);
8478               continue;
8479             }
8480
8481           /* Check if pseudo prefix {rex2} is valid.  */
8482           if (t->opcode_modifier.noegpr && pp.rex2_encoding)
8483             {
8484               specific_error = progress (invalid_pseudo_prefix);
8485               continue;
8486             }
8487
8488           /* We've found a match; break out of loop.  */
8489           break;
8490         }
8491
8492       if (!t->opcode_modifier.jump
8493           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
8494         {
8495           /* There should be only one Disp operand.  */
8496           for (j = 0; j < MAX_OPERANDS; j++)
8497             if (operand_type_check (operand_types[j], disp))
8498               break;
8499           if (j < MAX_OPERANDS)
8500             {
8501               bool override = (i.prefix[ADDR_PREFIX] != 0);
8502
8503               addr_prefix_disp = j;
8504
8505               /* Address size prefix will turn Disp64 operand into Disp32 and
8506                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
8507               switch (flag_code)
8508                 {
8509                 case CODE_16BIT:
8510                   override = !override;
8511                   /* Fall through.  */
8512                 case CODE_32BIT:
8513                   if (operand_types[j].bitfield.disp32
8514                       && operand_types[j].bitfield.disp16)
8515                     {
8516                       operand_types[j].bitfield.disp16 = override;
8517                       operand_types[j].bitfield.disp32 = !override;
8518                     }
8519                   gas_assert (!operand_types[j].bitfield.disp64);
8520                   break;
8521
8522                 case CODE_64BIT:
8523                   if (operand_types[j].bitfield.disp64)
8524                     {
8525                       gas_assert (!operand_types[j].bitfield.disp32);
8526                       operand_types[j].bitfield.disp32 = override;
8527                       operand_types[j].bitfield.disp64 = !override;
8528                     }
8529                   operand_types[j].bitfield.disp16 = 0;
8530                   break;
8531                 }
8532             }
8533         }
8534
8535       /* We check register size if needed.  */
8536       if (t->opcode_modifier.checkoperandsize)
8537         {
8538           check_register = (1 << t->operands) - 1;
8539           if (i.broadcast.type || i.broadcast.bytes)
8540             check_register &= ~(1 << i.broadcast.operand);
8541         }
8542       else
8543         check_register = 0;
8544
8545       overlap0 = operand_type_and (i.types[0], operand_types[0]);
8546       switch (t->operands)
8547         {
8548         case 1:
8549           if (!operand_type_match (overlap0, i.types[0]))
8550             continue;
8551
8552           /* Allow the ModR/M encoding to be requested by using the {load} or
8553              {store} pseudo prefix on an applicable insn.  */
8554           if (!t->opcode_modifier.modrm
8555               && i.reg_operands == 1
8556               && ((pp.dir_encoding == dir_encoding_load
8557                    && t->mnem_off != MN_pop)
8558                   || (pp.dir_encoding == dir_encoding_store
8559                       && t->mnem_off != MN_push))
8560               /* Avoid BSWAP.  */
8561               && t->mnem_off != MN_bswap)
8562             continue;
8563           break;
8564
8565         case 2:
8566           /* xchg %eax, %eax is a special case. It is an alias for nop
8567              only in 32bit mode and we can use opcode 0x90.  In 64bit
8568              mode, we can't use 0x90 for xchg %eax, %eax since it should
8569              zero-extend %eax to %rax.  */
8570           if (t->base_opcode == 0x90
8571               && t->opcode_space == SPACE_BASE)
8572             {
8573               if (flag_code == CODE_64BIT
8574                   && i.types[0].bitfield.instance == Accum
8575                   && i.types[0].bitfield.dword
8576                   && i.types[1].bitfield.instance == Accum)
8577                 continue;
8578
8579               /* Allow the ModR/M encoding to be requested by using the
8580                  {load} or {store} pseudo prefix.  */
8581               if (pp.dir_encoding == dir_encoding_load
8582                   || pp.dir_encoding == dir_encoding_store)
8583                 continue;
8584             }
8585
8586           if (t->base_opcode == MOV_AX_DISP32
8587               && t->opcode_space == SPACE_BASE
8588               && t->mnem_off != MN_movabs)
8589             {
8590               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
8591               if (i.reloc[0] == BFD_RELOC_386_GOT32)
8592                 continue;
8593
8594               /* xrelease mov %eax, <disp> is another special case. It must not
8595                  match the accumulator-only encoding of mov.  */
8596               if (i.hle_prefix)
8597                 continue;
8598
8599               /* Allow the ModR/M encoding to be requested by using a suitable
8600                  {load} or {store} pseudo prefix.  */
8601               if (pp.dir_encoding == (i.types[0].bitfield.instance == Accum
8602                                      ? dir_encoding_store
8603                                      : dir_encoding_load)
8604                   && !i.types[0].bitfield.disp64
8605                   && !i.types[1].bitfield.disp64)
8606                 continue;
8607             }
8608
8609           /* Allow the ModR/M encoding to be requested by using the {load} or
8610              {store} pseudo prefix on an applicable insn.  */
8611           if (!t->opcode_modifier.modrm
8612               && i.reg_operands == 1
8613               && i.imm_operands == 1
8614               && (pp.dir_encoding == dir_encoding_load
8615                   || pp.dir_encoding == dir_encoding_store)
8616               && t->opcode_space == SPACE_BASE)
8617             {
8618               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
8619                   && pp.dir_encoding == dir_encoding_store)
8620                 continue;
8621
8622               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
8623                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
8624                       || pp.dir_encoding == dir_encoding_load))
8625                 continue;
8626
8627               if (t->base_opcode == 0xa8 /* test $imm, %acc */
8628                   && pp.dir_encoding == dir_encoding_load)
8629                 continue;
8630             }
8631           /* Fall through.  */
8632
8633         case 3:
8634           if (!(size_match & MATCH_STRAIGHT))
8635             goto check_reverse;
8636           /* Reverse direction of operands if swapping is possible in the first
8637              place (operands need to be symmetric) and
8638              - the load form is requested, and the template is a store form,
8639              - the store form is requested, and the template is a load form,
8640              - the non-default (swapped) form is requested.  */
8641           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
8642
8643           j = i.operands - 1 - (t->opcode_space == SPACE_EVEXMAP4
8644                                 && t->opcode_modifier.vexvvvv);
8645
8646           if (t->opcode_modifier.d && i.reg_operands == i.operands
8647               && !operand_type_all_zero (&overlap1))
8648             switch (pp.dir_encoding)
8649               {
8650               case dir_encoding_load:
8651                 if (operand_type_check (operand_types[j], anymem)
8652                     || t->opcode_modifier.regmem)
8653                   goto check_reverse;
8654                 break;
8655
8656               case dir_encoding_store:
8657                 if (!operand_type_check (operand_types[j], anymem)
8658                     && !t->opcode_modifier.regmem)
8659                   goto check_reverse;
8660                 break;
8661
8662               case dir_encoding_swap:
8663                 goto check_reverse;
8664
8665               case dir_encoding_default:
8666                 break;
8667               }
8668
8669           /* If we want store form, we skip the current load.  */
8670           if ((pp.dir_encoding == dir_encoding_store
8671                || pp.dir_encoding == dir_encoding_swap)
8672               && i.mem_operands == 0
8673               && t->opcode_modifier.load)
8674             continue;
8675           /* Fall through.  */
8676         case 4:
8677         case 5:
8678           overlap1 = operand_type_and (i.types[1], operand_types[1]);
8679           if (!operand_type_match (overlap0, i.types[0])
8680               || !operand_type_match (overlap1, i.types[1])
8681               || ((check_register & 3) == 3
8682                   && !operand_type_register_match (i.types[0],
8683                                                    operand_types[0],
8684                                                    i.types[1],
8685                                                    operand_types[1])))
8686             {
8687               specific_error = progress (i.error);
8688
8689               /* Check if other direction is valid ...  */
8690               if (!t->opcode_modifier.d)
8691                 continue;
8692
8693             check_reverse:
8694               if (!(size_match & MATCH_REVERSE))
8695                 continue;
8696               /* Try reversing direction of operands.  */
8697               j = is_cpu (t, CpuFMA4)
8698                   || is_cpu (t, CpuXOP)
8699                   || is_cpu (t, CpuAPX_F) ? 1 : i.operands - 1;
8700               overlap0 = operand_type_and (i.types[0], operand_types[j]);
8701               overlap1 = operand_type_and (i.types[j], operand_types[0]);
8702               overlap2 = operand_type_and (i.types[1], operand_types[1]);
8703               gas_assert (t->operands != 3 || !check_register
8704                           || is_cpu (t, CpuAPX_F));
8705               if (!operand_type_match (overlap0, i.types[0])
8706                   || !operand_type_match (overlap1, i.types[j])
8707                   || (t->operands == 3
8708                       && !operand_type_match (overlap2, i.types[1]))
8709                   || (check_register
8710                       && !operand_type_register_match (i.types[0],
8711                                                        operand_types[j],
8712                                                        i.types[j],
8713                                                        operand_types[0])))
8714                 {
8715                   /* Does not match either direction.  */
8716                   specific_error = progress (i.error);
8717                   continue;
8718                 }
8719               /* found_reverse_match holds which variant of D
8720                  we've found.  */
8721               if (!t->opcode_modifier.d)
8722                 found_reverse_match = 0;
8723               else if (operand_types[0].bitfield.tbyte)
8724                 {
8725                   if (t->opcode_modifier.operandconstraint != UGH)
8726                     found_reverse_match = Opcode_FloatD;
8727                   else
8728                     found_reverse_match = ~0;
8729                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
8730                   if ((t->extension_opcode & 4)
8731                       && (intel_syntax || intel_mnemonic))
8732                     found_reverse_match |= Opcode_FloatR;
8733                 }
8734               else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
8735                 {
8736                   found_reverse_match = Opcode_VexW;
8737                   goto check_operands_345;
8738                 }
8739               else if (t->opcode_space == SPACE_EVEXMAP4
8740                        && t->operands >= 3)
8741                 {
8742                   found_reverse_match = Opcode_D;
8743                   goto check_operands_345;
8744                 }
8745               else if (t->opcode_modifier.commutative)
8746                 found_reverse_match = ~0;
8747               else if (t->opcode_space != SPACE_BASE
8748                        && (t->opcode_space != SPACE_EVEXMAP4
8749                            /* MOVBE, originating from SPACE_0F38, also
8750                               belongs here.  */
8751                            || t->mnem_off == MN_movbe)
8752                        && (t->opcode_space != SPACE_0F
8753                            /* MOV to/from CR/DR/TR, as an exception, follow
8754                               the base opcode space encoding model.  */
8755                            || (t->base_opcode | 7) != 0x27))
8756                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
8757                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
8758               else
8759                 found_reverse_match = Opcode_D;
8760             }
8761           else
8762             {
8763               /* Found a forward 2 operand match here.  */
8764             check_operands_345:
8765               switch (t->operands)
8766                 {
8767                 case 5:
8768                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
8769                   if (!operand_type_match (overlap4, i.types[4])
8770                       || !operand_type_register_match (i.types[3],
8771                                                        operand_types[3],
8772                                                        i.types[4],
8773                                                        operand_types[4]))
8774                     {
8775                       specific_error = progress (i.error);
8776                       continue;
8777                     }
8778                   /* Fall through.  */
8779                 case 4:
8780                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
8781                   if (!operand_type_match (overlap3, i.types[3])
8782                       || ((check_register & 0xa) == 0xa
8783                           && !operand_type_register_match (i.types[1],
8784                                                             operand_types[1],
8785                                                             i.types[3],
8786                                                             operand_types[3]))
8787                       || ((check_register & 0xc) == 0xc
8788                           && !operand_type_register_match (i.types[2],
8789                                                             operand_types[2],
8790                                                             i.types[3],
8791                                                             operand_types[3])))
8792                     {
8793                       specific_error = progress (i.error);
8794                       continue;
8795                     }
8796                   /* Fall through.  */
8797                 case 3:
8798                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
8799                   if (!operand_type_match (overlap2, i.types[2])
8800                       || ((check_register & 5) == 5
8801                           && !operand_type_register_match (i.types[0],
8802                                                             operand_types[0],
8803                                                             i.types[2],
8804                                                             operand_types[2]))
8805                       || ((check_register & 6) == 6
8806                           && !operand_type_register_match (i.types[1],
8807                                                             operand_types[1],
8808                                                             i.types[2],
8809                                                             operand_types[2])))
8810                     {
8811                       specific_error = progress (i.error);
8812                       continue;
8813                     }
8814                   break;
8815                 }
8816             }
8817           /* Found either forward/reverse 2, 3 or 4 operand match here:
8818              slip through to break.  */
8819         }
8820
8821       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
8822       if (VEX_check_encoding (t))
8823         {
8824           specific_error = progress (i.error);
8825           continue;
8826         }
8827
8828       /* Check if EGPR operands(r16-r31) are valid.  */
8829       if (check_EgprOperands (t))
8830         {
8831           specific_error = progress (i.error);
8832           continue;
8833         }
8834
8835       /* Check if vector operands are valid.  */
8836       if (check_VecOperands (t))
8837         {
8838           specific_error = progress (i.error);
8839           continue;
8840         }
8841
8842       /* Check if APX operands are valid.  */
8843       if (check_APX_operands (t))
8844         {
8845           specific_error = progress (i.error);
8846           continue;
8847         }
8848
8849       /* Check whether to use the shorter VEX encoding for certain insns where
8850          the EVEX encoding comes first in the table.  This requires the respective
8851          AVX-* feature to be explicitly enabled.
8852
8853          Most of the respective insns have just a single EVEX and a single VEX
8854          template.  The one that's presently different is generated using the
8855          Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter
8856          two of which may fall back to their two corresponding VEX forms.  */
8857       j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2;
8858       if ((t == current_templates.start || j > 1)
8859           && t->opcode_modifier.disp8memshift
8860           && !t->opcode_modifier.vex
8861           && !need_evex_encoding (t)
8862           && t + j < current_templates.end
8863           && t[j].opcode_modifier.vex)
8864         {
8865           i386_cpu_flags cpu;
8866           unsigned int memshift = i.memshift;
8867
8868           i.memshift = 0;
8869           cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu),
8870                                cpu_arch_isa_flags);
8871           if (!cpu_flags_all_zero (&cpu)
8872               && (!i.types[0].bitfield.disp8
8873                   || !operand_type_check (i.types[0], disp)
8874                   || i.op[0].disps->X_op != O_constant
8875                   || fits_in_disp8 (i.op[0].disps->X_add_number)))
8876             {
8877               specific_error = progress (internal_error);
8878               t += j - 1;
8879               continue;
8880             }
8881           i.memshift = memshift;
8882         }
8883
8884       /* If we can optimize a NDD insn to legacy insn, like
8885          add %r16, %r8, %r8 -> add %r16, %r8,
8886          add  %r8, %r16, %r8 -> add %r16, %r8, then rematch template.
8887          Note that the semantics have not been changed.  */
8888       if (optimize
8889           && !pp.no_optimize
8890           && pp.encoding != encoding_evex
8891           && ((t + 1 < current_templates.end
8892                && !t[1].opcode_modifier.evex
8893                && t[1].opcode_space <= SPACE_0F38
8894                && t->opcode_modifier.vexvvvv == VexVVVV_DST)
8895               || t->mnem_off == MN_movbe)
8896           && (i.types[i.operands - 1].bitfield.dword
8897               || i.types[i.operands - 1].bitfield.qword))
8898         {
8899           unsigned int match_dest_op = can_convert_NDD_to_legacy (t);
8900
8901           if (match_dest_op != (unsigned int) ~0)
8902             {
8903               size_match = true;
8904               /* We ensure that the next template has the same input
8905                  operands as the original matching template by the first
8906                  opernd (ATT). To avoid someone support new NDD insns and
8907                  put it in the wrong position.  */
8908               overlap0 = operand_type_and (i.types[0],
8909                                            t[1].operand_types[0]);
8910               if (t->opcode_modifier.d)
8911                 overlap1 = operand_type_and (i.types[0],
8912                                              t[1].operand_types[1]);
8913               if (!operand_type_match (overlap0, i.types[0])
8914                   && (!t->opcode_modifier.d
8915                       || !operand_type_match (overlap1, i.types[0])))
8916                 size_match = false;
8917
8918               if (size_match
8919                   && (t[1].opcode_space <= SPACE_0F
8920                       /* Some non-legacy-map0/1 insns can be shorter when
8921                          legacy-encoded and when no REX prefix is required.  */
8922                       || (!check_EgprOperands (t + 1)
8923                           && !check_Rex_required ()
8924                           && !i.op[i.operands - 1].regs->reg_type.bitfield.qword)))
8925                 {
8926                   if (i.operands > 2 && match_dest_op == i.operands - 3)
8927                     {
8928                       swap_2_operands (match_dest_op, i.operands - 2);
8929
8930                       /* CMOVcc is marked commutative, but then also needs its
8931                          encoded condition inverted.  */
8932                       if ((t->base_opcode | 0xf) == 0x4f)
8933                         i.invert_cond = true;
8934                     }
8935
8936                   --i.operands;
8937                   --i.reg_operands;
8938
8939                   if (t->mnem_off == MN_movbe)
8940                     {
8941                       gas_assert (t[1].mnem_off == MN_bswap);
8942                       ++current_templates.end;
8943                     }
8944
8945                   specific_error = progress (internal_error);
8946                   continue;
8947                 }
8948
8949             }
8950         }
8951
8952       /* We've found a match; break out of loop.  */
8953       break;
8954     }
8955
8956 #undef progress
8957
8958   if (t == current_templates.end)
8959     {
8960       /* We found no match.  */
8961       i.error = specific_error;
8962       return NULL;
8963     }
8964
8965   /* Don't emit diagnostics or install the template when one was already
8966      installed, i.e. when called from process_suffix().  */
8967   if (i.tm.mnem_off)
8968     return t;
8969
8970   if (!quiet_warnings)
8971     {
8972       if (!intel_syntax
8973           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
8974         as_warn (_("indirect %s without `*'"), insn_name (t));
8975
8976       if (t->opcode_modifier.isprefix
8977           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
8978         {
8979           /* Warn them that a data or address size prefix doesn't
8980              affect assembly of the next line of code.  */
8981           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
8982         }
8983
8984       if (intel_syntax && mnem_suffix && !t->opcode_modifier.intelsuffix)
8985         {
8986           static bool noticed;
8987
8988           as_warn (_("mnemonic suffix used with `%s'"), insn_name (t));
8989           if (!noticed)
8990             {
8991               noticed = true;
8992               as_warn (_(
8993 "NOTE: Such forms are deprecated and will be rejected by a future version of the assembler"));
8994             }
8995         }
8996     }
8997
8998   /* Copy the template we found.  */
8999   install_template (t);
9000
9001   if (addr_prefix_disp != -1)
9002     i.tm.operand_types[addr_prefix_disp]
9003       = operand_types[addr_prefix_disp];
9004
9005   /* APX insns acting on byte operands are WIG, yet that can't be expressed
9006      in the templates (they're also covering word/dword/qword operands).  */
9007   if (t->opcode_space == SPACE_EVEXMAP4 && !t->opcode_modifier.vexw &&
9008       i.types[i.operands - 1].bitfield.byte)
9009     {
9010       gas_assert (t->opcode_modifier.w);
9011       i.tm.opcode_modifier.vexw = VEXWIG;
9012     }
9013
9014   switch (found_reverse_match)
9015     {
9016     case 0:
9017       break;
9018
9019     case Opcode_FloatR:
9020     case Opcode_FloatR | Opcode_FloatD:
9021       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
9022       found_reverse_match &= Opcode_FloatD;
9023
9024       /* Fall through.  */
9025     default:
9026       /* If we found a reverse match we must alter the opcode direction
9027          bit and clear/flip the regmem modifier one.  found_reverse_match
9028          holds bits to change (different for int & float insns).  */
9029
9030       i.tm.base_opcode ^= found_reverse_match;
9031
9032       if (i.tm.opcode_space == SPACE_EVEXMAP4)
9033         goto swap_first_2;
9034
9035       /* Certain SIMD insns have their load forms specified in the opcode
9036          table, and hence we need to _set_ RegMem instead of clearing it.
9037          We need to avoid setting the bit though on insns like KMOVW.  */
9038       i.tm.opcode_modifier.regmem
9039         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
9040           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
9041           && !i.tm.opcode_modifier.regmem;
9042
9043       /* Fall through.  */
9044     case ~0:
9045       i.tm.operand_types[0] = operand_types[i.operands - 1];
9046       i.tm.operand_types[i.operands - 1] = operand_types[0];
9047       break;
9048
9049     case Opcode_VexW:
9050       /* Only the first two register operands need reversing, alongside
9051          flipping VEX.W.  */
9052       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
9053
9054       /* In 3-operand insns XOP.W changes which operand goes into XOP.vvvv.  */
9055       i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
9056
9057     swap_first_2:
9058       j = i.tm.operand_types[0].bitfield.imm8;
9059       i.tm.operand_types[j] = operand_types[j + 1];
9060       i.tm.operand_types[j + 1] = operand_types[j];
9061       break;
9062     }
9063
9064   return t;
9065 }
9066
9067 static int
9068 check_string (void)
9069 {
9070   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
9071   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
9072
9073   if (i.seg[op] != NULL && i.seg[op] != reg_es)
9074     {
9075       as_bad (_("`%s' operand %u must use `%ses' segment"),
9076               insn_name (&i.tm),
9077               intel_syntax ? i.tm.operands - es_op : es_op + 1,
9078               register_prefix);
9079       return 0;
9080     }
9081
9082   /* There's only ever one segment override allowed per instruction.
9083      This instruction possibly has a legal segment override on the
9084      second operand, so copy the segment to where non-string
9085      instructions store it, allowing common code.  */
9086   i.seg[op] = i.seg[1];
9087
9088   return 1;
9089 }
9090
9091 static int
9092 process_suffix (const insn_template *t)
9093 {
9094   bool is_movx = false;
9095
9096   /* If matched instruction specifies an explicit instruction mnemonic
9097      suffix, use it.  */
9098   if (i.tm.opcode_modifier.size == SIZE16)
9099     i.suffix = WORD_MNEM_SUFFIX;
9100   else if (i.tm.opcode_modifier.size == SIZE32)
9101     i.suffix = LONG_MNEM_SUFFIX;
9102   else if (i.tm.opcode_modifier.size == SIZE64)
9103     i.suffix = QWORD_MNEM_SUFFIX;
9104   else if (i.reg_operands
9105            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
9106            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
9107     {
9108       unsigned int numop = i.operands;
9109
9110       /* MOVSX/MOVZX */
9111       is_movx = (i.tm.opcode_space == SPACE_0F
9112                  && (i.tm.base_opcode | 8) == 0xbe)
9113                 || (i.tm.opcode_space == SPACE_BASE
9114                     && i.tm.base_opcode == 0x63
9115                     && is_cpu (&i.tm, Cpu64));
9116
9117       /* movsx/movzx want only their source operand considered here, for the
9118          ambiguity checking below.  The suffix will be replaced afterwards
9119          to represent the destination (register).  */
9120       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
9121         --i.operands;
9122
9123       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
9124       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
9125         i.rex |= REX_W;
9126
9127       /* If there's no instruction mnemonic suffix we try to invent one
9128          based on GPR operands.  */
9129       if (!i.suffix)
9130         {
9131           /* We take i.suffix from the last register operand specified,
9132              Destination register type is more significant than source
9133              register type.  crc32 in SSE4.2 prefers source register
9134              type. */
9135           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
9136
9137           while (op--)
9138             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
9139                 || i.tm.operand_types[op].bitfield.instance == Accum)
9140               {
9141                 if (i.types[op].bitfield.class != Reg)
9142                   continue;
9143                 if (i.types[op].bitfield.byte)
9144                   i.suffix = BYTE_MNEM_SUFFIX;
9145                 else if (i.types[op].bitfield.word)
9146                   i.suffix = WORD_MNEM_SUFFIX;
9147                 else if (i.types[op].bitfield.dword)
9148                   i.suffix = LONG_MNEM_SUFFIX;
9149                 else if (i.types[op].bitfield.qword)
9150                   i.suffix = QWORD_MNEM_SUFFIX;
9151                 else
9152                   continue;
9153                 break;
9154               }
9155
9156           /* As an exception, movsx/movzx silently default to a byte source
9157              in AT&T mode.  */
9158           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
9159             i.suffix = BYTE_MNEM_SUFFIX;
9160         }
9161       else if (i.suffix == BYTE_MNEM_SUFFIX)
9162         {
9163           if (!check_byte_reg ())
9164             return 0;
9165         }
9166       else if (i.suffix == LONG_MNEM_SUFFIX)
9167         {
9168           if (!check_long_reg ())
9169             return 0;
9170         }
9171       else if (i.suffix == QWORD_MNEM_SUFFIX)
9172         {
9173           if (!check_qword_reg ())
9174             return 0;
9175         }
9176       else if (i.suffix == WORD_MNEM_SUFFIX)
9177         {
9178           if (!check_word_reg ())
9179             return 0;
9180         }
9181       else if (intel_syntax
9182                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
9183         /* Do nothing if the instruction is going to ignore the prefix.  */
9184         ;
9185       else
9186         abort ();
9187
9188       /* Undo the movsx/movzx change done above.  */
9189       i.operands = numop;
9190     }
9191   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
9192            && !i.suffix)
9193     {
9194       i.suffix = stackop_size;
9195       if (stackop_size == LONG_MNEM_SUFFIX)
9196         {
9197           /* stackop_size is set to LONG_MNEM_SUFFIX for the
9198              .code16gcc directive to support 16-bit mode with
9199              32-bit address.  For IRET without a suffix, generate
9200              16-bit IRET (opcode 0xcf) to return from an interrupt
9201              handler.  */
9202           if (i.tm.base_opcode == 0xcf)
9203             {
9204               i.suffix = WORD_MNEM_SUFFIX;
9205               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
9206             }
9207           /* Warn about changed behavior for segment register push/pop.  */
9208           else if ((i.tm.base_opcode | 1) == 0x07)
9209             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
9210                      insn_name (&i.tm));
9211         }
9212     }
9213   else if (!i.suffix
9214            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
9215                || i.tm.opcode_modifier.jump == JUMP_BYTE
9216                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
9217                || (i.tm.opcode_space == SPACE_0F
9218                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
9219                    && i.tm.extension_opcode <= 3)))
9220     {
9221       switch (flag_code)
9222         {
9223         case CODE_64BIT:
9224           if (!i.tm.opcode_modifier.no_qsuf)
9225             {
9226               if (i.tm.opcode_modifier.jump == JUMP_BYTE
9227                   || i.tm.opcode_modifier.no_lsuf)
9228                 i.suffix = QWORD_MNEM_SUFFIX;
9229               break;
9230             }
9231           /* Fall through.  */
9232         case CODE_32BIT:
9233           if (!i.tm.opcode_modifier.no_lsuf)
9234             i.suffix = LONG_MNEM_SUFFIX;
9235           break;
9236         case CODE_16BIT:
9237           if (!i.tm.opcode_modifier.no_wsuf)
9238             i.suffix = WORD_MNEM_SUFFIX;
9239           break;
9240         }
9241     }
9242
9243   if (!i.suffix
9244       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
9245           /* Also cover lret/retf/iret in 64-bit mode.  */
9246           || (flag_code == CODE_64BIT
9247               && !i.tm.opcode_modifier.no_lsuf
9248               && !i.tm.opcode_modifier.no_qsuf))
9249       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
9250       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
9251       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
9252       /* Accept FLDENV et al without suffix.  */
9253       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
9254     {
9255       unsigned int suffixes, evex = 0;
9256
9257       suffixes = !i.tm.opcode_modifier.no_bsuf;
9258       if (!i.tm.opcode_modifier.no_wsuf)
9259         suffixes |= 1 << 1;
9260       if (!i.tm.opcode_modifier.no_lsuf)
9261         suffixes |= 1 << 2;
9262       if (!i.tm.opcode_modifier.no_ssuf)
9263         suffixes |= 1 << 4;
9264       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
9265         suffixes |= 1 << 5;
9266
9267       /* Operand size may be ambiguous only across multiple templates.  Avoid
9268          the extra effort though if we already know that multiple suffixes /
9269          operand sizes are allowed.  Also limit this to non-SIMD operand sizes
9270          (i.e. ones expressable via suffixes) for now.
9271          There's one special case though that needs excluding: Insns taking
9272          Disp<N> operands also match templates permitting BaseIndex.  JMP in
9273          particular would thus wrongly trigger the check further down.  Cover
9274          JUMP_DWORD insns here as well, just in case.  */
9275       if (i.tm.opcode_modifier.jump != JUMP
9276           && i.tm.opcode_modifier.jump != JUMP_DWORD)
9277         while (!(suffixes & (suffixes - 1)))
9278           {
9279             /* Sadly check_VecOperands(), running ahead of install_template(),
9280                may update i.memshift.  Save and restore the value here.  */
9281             unsigned int memshift = i.memshift;
9282
9283             current_templates.start = t + 1;
9284             t = match_template (0);
9285             i.memshift = memshift;
9286             if (t == NULL)
9287               break;
9288             if (!t->opcode_modifier.no_bsuf)
9289               suffixes |= 1 << 0;
9290             if (!t->opcode_modifier.no_wsuf)
9291               suffixes |= 1 << 1;
9292             if (!t->opcode_modifier.no_lsuf)
9293               suffixes |= 1 << 2;
9294             if (!t->opcode_modifier.no_ssuf)
9295               suffixes |= 1 << 4;
9296             if (flag_code == CODE_64BIT && !t->opcode_modifier.no_qsuf)
9297               suffixes |= 1 << 5;
9298           }
9299
9300       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
9301          also suitable for AT&T syntax mode, it was requested that this be
9302          restricted to just Intel syntax.  */
9303       if (intel_syntax && is_any_vex_encoding (&i.tm)
9304           && !i.broadcast.type && !i.broadcast.bytes)
9305         {
9306           unsigned int op;
9307
9308           for (op = 0; op < i.tm.operands; ++op)
9309             {
9310               if (vector_size < VSZ512)
9311                 {
9312                   i.tm.operand_types[op].bitfield.zmmword = 0;
9313                   if (vector_size < VSZ256)
9314                     {
9315                       i.tm.operand_types[op].bitfield.ymmword = 0;
9316                       if (i.tm.operand_types[op].bitfield.xmmword
9317                           && i.tm.opcode_modifier.evex == EVEXDYN)
9318                         i.tm.opcode_modifier.evex = EVEX128;
9319                     }
9320                   else if (i.tm.operand_types[op].bitfield.ymmword
9321                            && !i.tm.operand_types[op].bitfield.xmmword
9322                            && i.tm.opcode_modifier.evex == EVEXDYN)
9323                     i.tm.opcode_modifier.evex = EVEX256;
9324                 }
9325               else if (i.tm.opcode_modifier.evex
9326                        && !cpu_arch_flags.bitfield.cpuavx512vl)
9327                 {
9328                   if (i.tm.operand_types[op].bitfield.ymmword)
9329                     i.tm.operand_types[op].bitfield.xmmword = 0;
9330                   if (i.tm.operand_types[op].bitfield.zmmword)
9331                     i.tm.operand_types[op].bitfield.ymmword = 0;
9332                   if (i.tm.opcode_modifier.evex == EVEXDYN)
9333                     i.tm.opcode_modifier.evex = EVEX512;
9334                 }
9335
9336               if (i.tm.operand_types[op].bitfield.xmmword
9337                   + i.tm.operand_types[op].bitfield.ymmword
9338                   + i.tm.operand_types[op].bitfield.zmmword < 2)
9339                 continue;
9340
9341               /* Any properly sized operand disambiguates the insn.  */
9342               if (i.types[op].bitfield.xmmword
9343                   || i.types[op].bitfield.ymmword
9344                   || i.types[op].bitfield.zmmword)
9345                 {
9346                   suffixes &= ~(7 << 6);
9347                   evex = 0;
9348                   break;
9349                 }
9350
9351               if ((i.flags[op] & Operand_Mem)
9352                   && i.tm.operand_types[op].bitfield.unspecified)
9353                 {
9354                   if (i.tm.operand_types[op].bitfield.xmmword)
9355                     suffixes |= 1 << 6;
9356                   if (i.tm.operand_types[op].bitfield.ymmword)
9357                     suffixes |= 1 << 7;
9358                   if (i.tm.operand_types[op].bitfield.zmmword)
9359                     suffixes |= 1 << 8;
9360                   if (i.tm.opcode_modifier.evex)
9361                     evex = EVEX512;
9362                 }
9363             }
9364         }
9365
9366       /* Are multiple suffixes / operand sizes allowed?  */
9367       if (suffixes & (suffixes - 1))
9368         {
9369           if (intel_syntax
9370               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
9371                   || operand_check == check_error))
9372             {
9373               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
9374               return 0;
9375             }
9376           if (operand_check == check_error)
9377             {
9378               as_bad (_("no instruction mnemonic suffix given and "
9379                         "no register operands; can't size `%s'"), insn_name (&i.tm));
9380               return 0;
9381             }
9382           if (operand_check == check_warning)
9383             as_warn (_("%s; using default for `%s'"),
9384                        intel_syntax
9385                        ? _("ambiguous operand size")
9386                        : _("no instruction mnemonic suffix given and "
9387                            "no register operands"),
9388                        insn_name (&i.tm));
9389
9390           if (i.tm.opcode_modifier.floatmf)
9391             i.suffix = SHORT_MNEM_SUFFIX;
9392           else if (is_movx)
9393             /* handled below */;
9394           else if (evex)
9395             i.tm.opcode_modifier.evex = evex;
9396           else if (flag_code == CODE_16BIT)
9397             i.suffix = WORD_MNEM_SUFFIX;
9398           else if (!i.tm.opcode_modifier.no_lsuf)
9399             i.suffix = LONG_MNEM_SUFFIX;
9400           else
9401             i.suffix = QWORD_MNEM_SUFFIX;
9402         }
9403     }
9404
9405   if (is_movx)
9406     {
9407       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
9408          In AT&T syntax, if there is no suffix (warned about above), the default
9409          will be byte extension.  */
9410       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
9411         i.tm.base_opcode |= 1;
9412
9413       /* For further processing, the suffix should represent the destination
9414          (register).  This is already the case when one was used with
9415          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
9416          no suffix to begin with.  */
9417       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
9418         {
9419           if (i.types[1].bitfield.word)
9420             i.suffix = WORD_MNEM_SUFFIX;
9421           else if (i.types[1].bitfield.qword)
9422             i.suffix = QWORD_MNEM_SUFFIX;
9423           else
9424             i.suffix = LONG_MNEM_SUFFIX;
9425
9426           i.tm.opcode_modifier.w = 0;
9427         }
9428     }
9429
9430   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
9431     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
9432                    != (i.tm.operand_types[1].bitfield.class == Reg);
9433
9434   /* Change the opcode based on the operand size given by i.suffix.  */
9435   switch (i.suffix)
9436     {
9437     /* Size floating point instruction.  */
9438     case LONG_MNEM_SUFFIX:
9439       if (i.tm.opcode_modifier.floatmf)
9440         {
9441           i.tm.base_opcode ^= 4;
9442           break;
9443         }
9444     /* fall through */
9445     case WORD_MNEM_SUFFIX:
9446     case QWORD_MNEM_SUFFIX:
9447       /* It's not a byte, select word/dword operation.  */
9448       if (i.tm.opcode_modifier.w)
9449         {
9450           if (i.short_form)
9451             i.tm.base_opcode |= 8;
9452           else
9453             i.tm.base_opcode |= 1;
9454         }
9455
9456       /* Set mode64 for an operand.  */
9457       if (i.suffix == QWORD_MNEM_SUFFIX)
9458         {
9459           if (flag_code == CODE_64BIT
9460               && !i.tm.opcode_modifier.norex64
9461               && !i.tm.opcode_modifier.vexw
9462               /* Special case for xchg %rax,%rax.  It is NOP and doesn't
9463                  need rex64. */
9464               && ! (i.operands == 2
9465                     && i.tm.base_opcode == 0x90
9466                     && i.tm.opcode_space == SPACE_BASE
9467                     && i.types[0].bitfield.instance == Accum
9468                     && i.types[1].bitfield.instance == Accum))
9469             i.rex |= REX_W;
9470
9471           break;
9472         }
9473
9474     /* fall through */
9475     case SHORT_MNEM_SUFFIX:
9476       /* Now select between word & dword operations via the operand
9477          size prefix, except for instructions that will ignore this
9478          prefix anyway.  */
9479       if (i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
9480           && !i.tm.opcode_modifier.floatmf
9481           && (!is_any_vex_encoding (&i.tm)
9482               || i.tm.opcode_space == SPACE_EVEXMAP4)
9483           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
9484               || (flag_code == CODE_64BIT
9485                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
9486         {
9487           unsigned int prefix = DATA_PREFIX_OPCODE;
9488
9489           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
9490             prefix = ADDR_PREFIX_OPCODE;
9491
9492           /* The DATA PREFIX of EVEX promoted from legacy APX instructions
9493              needs to be adjusted.  */
9494           if (i.tm.opcode_space == SPACE_EVEXMAP4)
9495             {
9496               gas_assert (!i.tm.opcode_modifier.opcodeprefix);
9497               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
9498             }
9499           else if (!add_prefix (prefix))
9500             return 0;
9501         }
9502
9503       break;
9504
9505     case 0:
9506       /* Select word/dword/qword operation with explicit data sizing prefix
9507          when there are no suitable register operands.  */
9508       if (i.tm.opcode_modifier.w
9509           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
9510           && (!i.reg_operands
9511               || (i.reg_operands == 1
9512                       /* ShiftCount */
9513                   && (i.tm.operand_types[0].bitfield.instance == RegC
9514                       /* InOutPortReg */
9515                       || i.tm.operand_types[0].bitfield.instance == RegD
9516                       || i.tm.operand_types[1].bitfield.instance == RegD
9517                       || i.tm.mnem_off == MN_crc32))))
9518         i.tm.base_opcode |= 1;
9519       break;
9520     }
9521
9522   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
9523     {
9524       gas_assert (!i.suffix);
9525       gas_assert (i.reg_operands);
9526
9527       if (i.tm.operand_types[0].bitfield.instance == Accum
9528           || i.operands == 1)
9529         {
9530           /* The address size override prefix changes the size of the
9531              first operand.  */
9532           if (flag_code == CODE_64BIT
9533               && i.op[0].regs->reg_type.bitfield.word)
9534             {
9535               as_bad (_("16-bit addressing unavailable for `%s'"),
9536                       insn_name (&i.tm));
9537               return 0;
9538             }
9539
9540           if ((flag_code == CODE_32BIT
9541                ? i.op[0].regs->reg_type.bitfield.word
9542                : i.op[0].regs->reg_type.bitfield.dword)
9543               && !add_prefix (ADDR_PREFIX_OPCODE))
9544             return 0;
9545         }
9546       else
9547         {
9548           /* Check invalid register operand when the address size override
9549              prefix changes the size of register operands.  */
9550           unsigned int op;
9551           enum { need_word, need_dword, need_qword } need;
9552
9553           /* Check the register operand for the address size prefix if
9554              the memory operand has no real registers, like symbol, DISP
9555              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
9556           if (i.mem_operands == 1
9557               && i.reg_operands == 1
9558               && i.operands == 2
9559               && i.types[1].bitfield.class == Reg
9560               && (flag_code == CODE_32BIT
9561                   ? i.op[1].regs->reg_type.bitfield.word
9562                   : i.op[1].regs->reg_type.bitfield.dword)
9563               && ((i.base_reg == NULL && i.index_reg == NULL)
9564 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9565                   || (x86_elf_abi == X86_64_X32_ABI
9566                       && i.base_reg
9567                       && i.base_reg->reg_num == RegIP
9568                       && i.base_reg->reg_type.bitfield.qword))
9569 #else
9570                   || 0)
9571 #endif
9572               && !add_prefix (ADDR_PREFIX_OPCODE))
9573             return 0;
9574
9575           if (flag_code == CODE_32BIT)
9576             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
9577           else if (i.prefix[ADDR_PREFIX])
9578             need = need_dword;
9579           else
9580             need = flag_code == CODE_64BIT ? need_qword : need_word;
9581
9582           for (op = 0; op < i.operands; op++)
9583             {
9584               if (i.types[op].bitfield.class != Reg)
9585                 continue;
9586
9587               switch (need)
9588                 {
9589                 case need_word:
9590                   if (i.op[op].regs->reg_type.bitfield.word)
9591                     continue;
9592                   break;
9593                 case need_dword:
9594                   if (i.op[op].regs->reg_type.bitfield.dword)
9595                     continue;
9596                   break;
9597                 case need_qword:
9598                   if (i.op[op].regs->reg_type.bitfield.qword)
9599                     continue;
9600                   break;
9601                 }
9602
9603               as_bad (_("invalid register operand size for `%s'"),
9604                       insn_name (&i.tm));
9605               return 0;
9606             }
9607         }
9608     }
9609
9610   return 1;
9611 }
9612
9613 static int
9614 check_byte_reg (void)
9615 {
9616   int op;
9617
9618   for (op = i.operands; --op >= 0;)
9619     {
9620       /* Skip non-register operands. */
9621       if (i.types[op].bitfield.class != Reg)
9622         continue;
9623
9624       /* If this is an eight bit register, it's OK.  */
9625       if (i.types[op].bitfield.byte)
9626         {
9627           if (i.tm.opcode_modifier.checkoperandsize)
9628             break;
9629           continue;
9630         }
9631
9632       /* I/O port address operands are OK too.  */
9633       if (i.tm.operand_types[op].bitfield.instance == RegD
9634           && i.tm.operand_types[op].bitfield.word)
9635         continue;
9636
9637       /* crc32 only wants its source operand checked here.  */
9638       if (i.tm.mnem_off == MN_crc32 && op != 0)
9639         continue;
9640
9641       /* Any other register is bad.  */
9642       as_bad (_("`%s%s' not allowed with `%s%c'"),
9643               register_prefix, i.op[op].regs->reg_name,
9644               insn_name (&i.tm), i.suffix);
9645       return 0;
9646     }
9647   return 1;
9648 }
9649
9650 static int
9651 check_long_reg (void)
9652 {
9653   int op;
9654
9655   for (op = i.operands; --op >= 0;)
9656     /* Skip non-register operands. */
9657     if (i.types[op].bitfield.class != Reg)
9658       continue;
9659     /* Reject eight bit registers, except where the template requires
9660        them. (eg. movzb)  */
9661     else if (i.types[op].bitfield.byte
9662              && (i.tm.operand_types[op].bitfield.word
9663                  || i.tm.operand_types[op].bitfield.dword
9664                  || i.tm.operand_types[op].bitfield.qword))
9665       {
9666         as_bad (_("`%s%s' not allowed with `%s%c'"),
9667                 register_prefix,
9668                 i.op[op].regs->reg_name,
9669                 insn_name (&i.tm),
9670                 i.suffix);
9671         return 0;
9672       }
9673     /* Error if the e prefix on a general reg is missing, or if the r
9674        prefix on a general reg is present.  */
9675     else if ((i.types[op].bitfield.word
9676               || i.types[op].bitfield.qword)
9677              && i.tm.operand_types[op].bitfield.dword)
9678       {
9679         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9680                 register_prefix, i.op[op].regs->reg_name,
9681                 i.suffix);
9682         return 0;
9683       }
9684     else if (i.tm.opcode_modifier.checkoperandsize)
9685       break;
9686
9687   return 1;
9688 }
9689
9690 static int
9691 check_qword_reg (void)
9692 {
9693   int op;
9694
9695   for (op = i.operands; --op >= 0; )
9696     /* Skip non-register operands. */
9697     if (i.types[op].bitfield.class != Reg)
9698       continue;
9699     /* Reject eight bit registers, except where the template requires
9700        them. (eg. movzb)  */
9701     else if (i.types[op].bitfield.byte
9702              && (i.tm.operand_types[op].bitfield.word
9703                  || i.tm.operand_types[op].bitfield.dword
9704                  || i.tm.operand_types[op].bitfield.qword))
9705       {
9706         as_bad (_("`%s%s' not allowed with `%s%c'"),
9707                 register_prefix,
9708                 i.op[op].regs->reg_name,
9709                 insn_name (&i.tm),
9710                 i.suffix);
9711         return 0;
9712       }
9713     /* Error if the r prefix on a general reg is missing.  */
9714     else if ((i.types[op].bitfield.word
9715               || i.types[op].bitfield.dword)
9716              && i.tm.operand_types[op].bitfield.qword)
9717       {
9718         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9719                 register_prefix, i.op[op].regs->reg_name, i.suffix);
9720         return 0;
9721       }
9722     else if (i.tm.opcode_modifier.checkoperandsize)
9723       break;
9724
9725   return 1;
9726 }
9727
9728 static int
9729 check_word_reg (void)
9730 {
9731   int op;
9732   for (op = i.operands; --op >= 0;)
9733     /* Skip non-register operands. */
9734     if (i.types[op].bitfield.class != Reg)
9735       continue;
9736     /* Reject eight bit registers, except where the template requires
9737        them. (eg. movzb)  */
9738     else if (i.types[op].bitfield.byte
9739              && (i.tm.operand_types[op].bitfield.word
9740                  || i.tm.operand_types[op].bitfield.dword
9741                  || i.tm.operand_types[op].bitfield.qword))
9742       {
9743         as_bad (_("`%s%s' not allowed with `%s%c'"),
9744                 register_prefix,
9745                 i.op[op].regs->reg_name,
9746                 insn_name (&i.tm),
9747                 i.suffix);
9748         return 0;
9749       }
9750     /* Error if the e or r prefix on a general reg is present.  */
9751     else if ((i.types[op].bitfield.dword
9752                  || i.types[op].bitfield.qword)
9753              && i.tm.operand_types[op].bitfield.word)
9754       {
9755         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9756                 register_prefix, i.op[op].regs->reg_name,
9757                 i.suffix);
9758         return 0;
9759       }
9760     else if (i.tm.opcode_modifier.checkoperandsize)
9761       break;
9762
9763   return 1;
9764 }
9765
9766 static int
9767 update_imm (unsigned int j)
9768 {
9769   i386_operand_type overlap = i.types[j];
9770
9771   if (i.tm.operand_types[j].bitfield.imm8
9772       && i.tm.operand_types[j].bitfield.imm8s
9773       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
9774     {
9775       /* This combination is used on 8-bit immediates where e.g. $~0 is
9776          desirable to permit.  We're past operand type matching, so simply
9777          put things back in the shape they were before introducing the
9778          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
9779       overlap.bitfield.imm8s = 0;
9780     }
9781
9782   if (overlap.bitfield.imm8
9783       + overlap.bitfield.imm8s
9784       + overlap.bitfield.imm16
9785       + overlap.bitfield.imm32
9786       + overlap.bitfield.imm32s
9787       + overlap.bitfield.imm64 > 1)
9788     {
9789       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
9790       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
9791       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
9792       static const i386_operand_type imm16_32 = { .bitfield =
9793         { .imm16 = 1, .imm32 = 1 }
9794       };
9795       static const i386_operand_type imm16_32s =  { .bitfield =
9796         { .imm16 = 1, .imm32s = 1 }
9797       };
9798       static const i386_operand_type imm16_32_32s = { .bitfield =
9799         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
9800       };
9801
9802       if (i.suffix)
9803         {
9804           i386_operand_type temp;
9805
9806           operand_type_set (&temp, 0);
9807           if (i.suffix == BYTE_MNEM_SUFFIX)
9808             {
9809               temp.bitfield.imm8 = overlap.bitfield.imm8;
9810               temp.bitfield.imm8s = overlap.bitfield.imm8s;
9811             }
9812           else if (i.suffix == WORD_MNEM_SUFFIX)
9813             temp.bitfield.imm16 = overlap.bitfield.imm16;
9814           else if (i.suffix == QWORD_MNEM_SUFFIX)
9815             {
9816               temp.bitfield.imm64 = overlap.bitfield.imm64;
9817               temp.bitfield.imm32s = overlap.bitfield.imm32s;
9818             }
9819           else
9820             temp.bitfield.imm32 = overlap.bitfield.imm32;
9821           overlap = temp;
9822         }
9823       else if (operand_type_equal (&overlap, &imm16_32_32s)
9824                || operand_type_equal (&overlap, &imm16_32)
9825                || operand_type_equal (&overlap, &imm16_32s))
9826         {
9827           if ((flag_code == CODE_16BIT)
9828               ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
9829             overlap = imm16;
9830           else
9831             overlap = imm32s;
9832         }
9833       else if (i.prefix[REX_PREFIX] & REX_W)
9834         overlap = operand_type_and (overlap, imm32s);
9835       else if (i.prefix[DATA_PREFIX])
9836         overlap = operand_type_and (overlap,
9837                                     flag_code != CODE_16BIT ? imm16 : imm32);
9838       if (overlap.bitfield.imm8
9839           + overlap.bitfield.imm8s
9840           + overlap.bitfield.imm16
9841           + overlap.bitfield.imm32
9842           + overlap.bitfield.imm32s
9843           + overlap.bitfield.imm64 != 1)
9844         {
9845           as_bad (_("no instruction mnemonic suffix given; "
9846                     "can't determine immediate size"));
9847           return 0;
9848         }
9849     }
9850   i.types[j] = overlap;
9851
9852   return 1;
9853 }
9854
9855 static int
9856 finalize_imm (void)
9857 {
9858   unsigned int j, n;
9859
9860   /* Update the first 2 immediate operands.  */
9861   n = i.operands > 2 ? 2 : i.operands;
9862   if (n)
9863     {
9864       for (j = 0; j < n; j++)
9865         if (update_imm (j) == 0)
9866           return 0;
9867
9868       /* The 3rd operand can't be immediate operand.  */
9869       gas_assert (operand_type_check (i.types[2], imm) == 0);
9870     }
9871
9872   return 1;
9873 }
9874
9875 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
9876                                  bool do_sse2avx)
9877 {
9878   if (r->reg_flags & RegRex)
9879     {
9880       if (i.rex & rex_bit)
9881         as_bad (_("same type of prefix used twice"));
9882       i.rex |= rex_bit;
9883     }
9884   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
9885     {
9886       gas_assert (i.vex.register_specifier == r);
9887       i.vex.register_specifier += 8;
9888     }
9889
9890   if (r->reg_flags & RegVRex)
9891     i.vrex |= rex_bit;
9892
9893   if (r->reg_flags & RegRex2)
9894     i.rex2 |= rex_bit;
9895 }
9896
9897 static INLINE void
9898 set_rex_rex2 (const reg_entry *r, unsigned int rex_bit)
9899 {
9900   if ((r->reg_flags & RegRex) != 0)
9901     i.rex |= rex_bit;
9902   if ((r->reg_flags & RegRex2) != 0)
9903     i.rex2 |= rex_bit;
9904 }
9905
9906 static int
9907 process_operands (void)
9908 {
9909   /* Default segment register this instruction will use for memory
9910      accesses.  0 means unknown.  This is only for optimizing out
9911      unnecessary segment overrides.  */
9912   const reg_entry *default_seg = NULL;
9913
9914   for (unsigned int j = 0; j < i.operands; j++)
9915     if (i.types[j].bitfield.instance != InstanceNone)
9916       i.reg_operands--;
9917
9918   if (i.tm.opcode_modifier.sse2avx)
9919     {
9920       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
9921          need converting.  */
9922       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
9923       i.prefix[REX_PREFIX] = 0;
9924       pp.rex_encoding = 0;
9925       pp.rex2_encoding = 0;
9926     }
9927   /* ImmExt should be processed after SSE2AVX.  */
9928   else if (i.tm.opcode_modifier.immext)
9929     process_immext ();
9930
9931   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
9932      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
9933      new destination operand here, while converting the source one to register
9934      number 0.  */
9935   if (i.tm.mnem_off == MN_tilezero)
9936     {
9937       i.op[1].regs = i.op[0].regs;
9938       i.op[0].regs -= i.op[0].regs->reg_num;
9939       i.types[1] = i.types[0];
9940       i.tm.operand_types[1] = i.tm.operand_types[0];
9941       i.flags[1] = i.flags[0];
9942       i.operands++;
9943       i.reg_operands++;
9944       i.tm.operands++;
9945     }
9946
9947   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
9948     {
9949       static const i386_operand_type regxmm = {
9950         .bitfield = { .class = RegSIMD, .xmmword = 1 }
9951       };
9952       unsigned int dupl = i.operands;
9953       unsigned int dest = dupl - 1;
9954       unsigned int j;
9955
9956       /* The destination must be an xmm register.  */
9957       gas_assert (i.reg_operands
9958                   && MAX_OPERANDS > dupl
9959                   && operand_type_equal (&i.types[dest], &regxmm));
9960
9961       if (i.tm.operand_types[0].bitfield.instance == Accum
9962           && i.tm.operand_types[0].bitfield.xmmword)
9963         {
9964           /* Keep xmm0 for instructions with VEX prefix and 3
9965              sources.  */
9966           i.tm.operand_types[0].bitfield.instance = InstanceNone;
9967           i.tm.operand_types[0].bitfield.class = RegSIMD;
9968           i.reg_operands++;
9969           goto duplicate;
9970         }
9971
9972       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
9973         {
9974           gas_assert ((MAX_OPERANDS - 1) > dupl);
9975
9976           /* Add the implicit xmm0 for instructions with VEX prefix
9977              and 3 sources.  */
9978           for (j = i.operands; j > 0; j--)
9979             {
9980               i.op[j] = i.op[j - 1];
9981               i.types[j] = i.types[j - 1];
9982               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
9983               i.flags[j] = i.flags[j - 1];
9984             }
9985           i.op[0].regs
9986             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
9987           i.types[0] = regxmm;
9988           i.tm.operand_types[0] = regxmm;
9989
9990           i.operands += 2;
9991           i.reg_operands += 2;
9992           i.tm.operands += 2;
9993
9994           dupl++;
9995           dest++;
9996           i.op[dupl] = i.op[dest];
9997           i.types[dupl] = i.types[dest];
9998           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
9999           i.flags[dupl] = i.flags[dest];
10000         }
10001       else
10002         {
10003         duplicate:
10004           i.operands++;
10005           i.reg_operands++;
10006           i.tm.operands++;
10007
10008           i.op[dupl] = i.op[dest];
10009           i.types[dupl] = i.types[dest];
10010           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
10011           i.flags[dupl] = i.flags[dest];
10012         }
10013
10014        if (i.tm.opcode_modifier.immext)
10015          process_immext ();
10016     }
10017   else if (i.tm.operand_types[0].bitfield.instance == Accum
10018            && i.tm.opcode_modifier.modrm)
10019     {
10020       unsigned int j;
10021
10022       for (j = 1; j < i.operands; j++)
10023         {
10024           i.op[j - 1] = i.op[j];
10025           i.types[j - 1] = i.types[j];
10026
10027           /* We need to adjust fields in i.tm since they are used by
10028              build_modrm_byte.  */
10029           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
10030
10031           i.flags[j - 1] = i.flags[j];
10032         }
10033
10034       /* No adjustment to i.reg_operands: This was already done at the top
10035          of the function.  */
10036       i.operands--;
10037       i.tm.operands--;
10038     }
10039   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
10040     {
10041       unsigned int regnum, first_reg_in_group, last_reg_in_group;
10042
10043       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
10044       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
10045       regnum = register_number (i.op[1].regs);
10046       first_reg_in_group = regnum & ~3;
10047       last_reg_in_group = first_reg_in_group + 3;
10048       if (regnum != first_reg_in_group)
10049         as_warn (_("source register `%s%s' implicitly denotes"
10050                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
10051                  register_prefix, i.op[1].regs->reg_name,
10052                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
10053                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
10054                  insn_name (&i.tm));
10055     }
10056   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
10057     {
10058       /* The imul $imm, %reg instruction is converted into
10059          imul $imm, %reg, %reg, and the clr %reg instruction
10060          is converted into xor %reg, %reg.  */
10061
10062       unsigned int first_reg_op;
10063
10064       if (operand_type_check (i.types[0], reg))
10065         first_reg_op = 0;
10066       else
10067         first_reg_op = 1;
10068       /* Pretend we saw the extra register operand.  */
10069       gas_assert (i.reg_operands == 1
10070                   && i.op[first_reg_op + 1].regs == 0);
10071       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
10072       i.types[first_reg_op + 1] = i.types[first_reg_op];
10073       i.operands++;
10074       i.reg_operands++;
10075
10076       /* For IMULZU switch around the constraint.  */
10077       if (i.tm.mnem_off == MN_imulzu)
10078         i.tm.opcode_modifier.operandconstraint = ZERO_UPPER;
10079     }
10080
10081   if (i.tm.opcode_modifier.modrm)
10082     {
10083       /* The opcode is completed (modulo i.tm.extension_opcode which
10084          must be put into the modrm byte).  Now, we make the modrm and
10085          index base bytes based on all the info we've collected.  */
10086
10087       default_seg = build_modrm_byte ();
10088
10089       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
10090         {
10091           /* Warn about some common errors, but press on regardless.  */
10092           if (i.operands == 2)
10093             {
10094               /* Reversed arguments on faddp or fmulp.  */
10095               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
10096                        register_prefix, i.op[!intel_syntax].regs->reg_name,
10097                        register_prefix, i.op[intel_syntax].regs->reg_name);
10098             }
10099           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
10100             {
10101               /* Extraneous `l' suffix on fp insn.  */
10102               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
10103                        register_prefix, i.op[0].regs->reg_name);
10104             }
10105         }
10106     }
10107   else if (i.types[0].bitfield.class == SReg && !dot_insn ())
10108     {
10109       if (flag_code != CODE_64BIT
10110           ? i.tm.base_opcode == POP_SEG_SHORT
10111             && i.op[0].regs->reg_num == 1
10112           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
10113             && i.op[0].regs->reg_num < 4)
10114         {
10115           as_bad (_("you can't `%s %s%s'"),
10116                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
10117           return 0;
10118         }
10119       if (i.op[0].regs->reg_num > 3
10120           && i.tm.opcode_space == SPACE_BASE )
10121         {
10122           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
10123           i.tm.opcode_space = SPACE_0F;
10124         }
10125       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
10126     }
10127   else if (i.tm.opcode_space == SPACE_BASE
10128            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
10129     {
10130       default_seg = reg_ds;
10131     }
10132   else if (i.tm.opcode_modifier.isstring)
10133     {
10134       /* For the string instructions that allow a segment override
10135          on one of their operands, the default segment is ds.  */
10136       default_seg = reg_ds;
10137     }
10138   else if (i.short_form)
10139     {
10140       /* The register operand is in the 1st or 2nd non-immediate operand.  */
10141       const reg_entry *r = i.op[i.imm_operands].regs;
10142
10143       if (!dot_insn ()
10144           && r->reg_type.bitfield.instance == Accum
10145           && i.op[i.imm_operands + 1].regs)
10146         r = i.op[i.imm_operands + 1].regs;
10147       /* Register goes in low 3 bits of opcode.  */
10148       i.tm.base_opcode |= r->reg_num;
10149       set_rex_vrex (r, REX_B, false);
10150
10151       if (dot_insn () && i.reg_operands == 2)
10152         {
10153           gas_assert (is_any_vex_encoding (&i.tm)
10154                       || pp.encoding != encoding_default);
10155           i.vex.register_specifier = i.op[i.operands - 1].regs;
10156         }
10157     }
10158   else if (i.reg_operands == 1
10159            && !i.flags[i.operands - 1]
10160            && i.tm.operand_types[i.operands - 1].bitfield.instance
10161               == InstanceNone)
10162     {
10163       gas_assert (is_any_vex_encoding (&i.tm)
10164                   || pp.encoding != encoding_default);
10165       i.vex.register_specifier = i.op[i.operands - 1].regs;
10166     }
10167
10168   if ((i.seg[0] || i.prefix[SEG_PREFIX])
10169       && i.tm.mnem_off == MN_lea)
10170     {
10171       if (!quiet_warnings)
10172         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
10173       if (optimize && !pp.no_optimize)
10174         {
10175           i.seg[0] = NULL;
10176           i.prefix[SEG_PREFIX] = 0;
10177         }
10178     }
10179
10180   /* If a segment was explicitly specified, and the specified segment
10181      is neither the default nor the one already recorded from a prefix,
10182      use an opcode prefix to select it.  If we never figured out what
10183      the default segment is, then default_seg will be zero at this
10184      point, and the specified segment prefix will always be used.  */
10185   if (i.seg[0]
10186       && i.seg[0] != default_seg
10187       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
10188     {
10189       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
10190         return 0;
10191     }
10192   return 1;
10193 }
10194
10195 static const reg_entry *
10196 build_modrm_byte (void)
10197 {
10198   const reg_entry *default_seg = NULL;
10199   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
10200                         /* Compensate for kludge in md_assemble().  */
10201                         + i.tm.operand_types[0].bitfield.imm1;
10202   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
10203   unsigned int v, op, reg_slot;
10204
10205   /* Accumulator (in particular %st), shift count (%cl), and alike need
10206      to be skipped just like immediate operands do.  */
10207   if (i.tm.operand_types[source].bitfield.instance)
10208     ++source;
10209   while (i.tm.operand_types[dest].bitfield.instance)
10210     --dest;
10211
10212   for (op = source; op < i.operands; ++op)
10213     if (i.tm.operand_types[op].bitfield.baseindex)
10214       break;
10215
10216   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4)
10217     {
10218       expressionS *exp;
10219
10220       /* There are 2 kinds of instructions:
10221          1. 5 operands: 4 register operands or 3 register operands
10222          plus 1 memory operand plus one Imm4 operand, VexXDS, and
10223          VexW0 or VexW1.  The destination must be either XMM, YMM or
10224          ZMM register.
10225          2. 4 operands: 4 register operands or 3 register operands
10226          plus 1 memory operand, with VexXDS.
10227          3. Other equivalent combinations when coming from s_insn().  */
10228       gas_assert (i.tm.opcode_modifier.vexvvvv
10229                   && i.tm.opcode_modifier.vexw);
10230       gas_assert (dot_insn ()
10231                   || i.tm.operand_types[dest].bitfield.class == RegSIMD);
10232
10233       /* Of the first two non-immediate operands the one with the template
10234          not allowing for a memory one is encoded in the immediate operand.  */
10235       if (source == op)
10236         reg_slot = source + 1;
10237       else
10238         reg_slot = source++;
10239
10240       if (!dot_insn ())
10241         {
10242           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
10243           gas_assert (!(i.op[reg_slot].regs->reg_flags & RegVRex));
10244         }
10245       else
10246         gas_assert (i.tm.operand_types[reg_slot].bitfield.class != ClassNone);
10247
10248       if (i.imm_operands == 0)
10249         {
10250           /* When there is no immediate operand, generate an 8bit
10251              immediate operand to encode the first operand.  */
10252           exp = &im_expressions[i.imm_operands++];
10253           i.op[i.operands].imms = exp;
10254           i.types[i.operands].bitfield.imm8 = 1;
10255           i.operands++;
10256
10257           exp->X_op = O_constant;
10258         }
10259       else
10260         {
10261           gas_assert (i.imm_operands == 1);
10262           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
10263           gas_assert (!i.tm.opcode_modifier.immext);
10264
10265           /* Turn on Imm8 again so that output_imm will generate it.  */
10266           i.types[0].bitfield.imm8 = 1;
10267
10268           exp = i.op[0].imms;
10269         }
10270       exp->X_add_number |= register_number (i.op[reg_slot].regs)
10271                            << (3 + !(i.tm.opcode_modifier.evex
10272                                      || pp.encoding == encoding_evex));
10273     }
10274
10275   switch (i.tm.opcode_modifier.vexvvvv)
10276     {
10277     /* VEX.vvvv encodes the last source register operand.  */
10278     case VexVVVV_SRC2:
10279       v = source++;
10280       break;
10281     /* VEX.vvvv encodes the first source register operand.  */
10282     case VexVVVV_SRC1:
10283       v =  dest - 1;
10284       break;
10285     /* VEX.vvvv encodes the destination register operand.  */
10286     case VexVVVV_DST:
10287       v = dest--;
10288       break;
10289     default:
10290       v = ~0;
10291       break;
10292      }
10293
10294   if (dest == source)
10295     dest = ~0;
10296
10297   gas_assert (source < dest);
10298
10299   if (v < MAX_OPERANDS)
10300     {
10301       gas_assert (i.tm.opcode_modifier.vexvvvv);
10302       i.vex.register_specifier = i.op[v].regs;
10303     }
10304
10305   if (op < i.operands)
10306     {
10307       if (i.mem_operands)
10308         {
10309           unsigned int fake_zero_displacement = 0;
10310
10311           gas_assert (i.flags[op] & Operand_Mem);
10312
10313           if (i.tm.opcode_modifier.sib)
10314             {
10315               /* The index register of VSIB shouldn't be RegIZ.  */
10316               if (i.tm.opcode_modifier.sib != SIBMEM
10317                   && i.index_reg->reg_num == RegIZ)
10318                 abort ();
10319
10320               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10321               if (!i.base_reg)
10322                 {
10323                   i.sib.base = NO_BASE_REGISTER;
10324                   i.sib.scale = i.log2_scale_factor;
10325                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10326                   i.types[op].bitfield.disp32 = 1;
10327                 }
10328
10329               /* Since the mandatory SIB always has index register, so
10330                  the code logic remains unchanged. The non-mandatory SIB
10331                  without index register is allowed and will be handled
10332                  later.  */
10333               if (i.index_reg)
10334                 {
10335                   if (i.index_reg->reg_num == RegIZ)
10336                     i.sib.index = NO_INDEX_REGISTER;
10337                   else
10338                     i.sib.index = i.index_reg->reg_num;
10339                   set_rex_vrex (i.index_reg, REX_X, false);
10340                 }
10341             }
10342
10343           default_seg = reg_ds;
10344
10345           if (i.base_reg == 0)
10346             {
10347               i.rm.mode = 0;
10348               if (!i.disp_operands)
10349                 fake_zero_displacement = 1;
10350               if (i.index_reg == 0)
10351                 {
10352                   /* Both check for VSIB and mandatory non-vector SIB. */
10353                   gas_assert (!i.tm.opcode_modifier.sib
10354                               || i.tm.opcode_modifier.sib == SIBMEM);
10355                   /* Operand is just <disp>  */
10356                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10357                   if (flag_code == CODE_64BIT)
10358                     {
10359                       /* 64bit mode overwrites the 32bit absolute
10360                          addressing by RIP relative addressing and
10361                          absolute addressing is encoded by one of the
10362                          redundant SIB forms.  */
10363                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10364                       i.sib.base = NO_BASE_REGISTER;
10365                       i.sib.index = NO_INDEX_REGISTER;
10366                       i.types[op].bitfield.disp32 = 1;
10367                     }
10368                   else if ((flag_code == CODE_16BIT)
10369                            ^ (i.prefix[ADDR_PREFIX] != 0))
10370                     {
10371                       i.rm.regmem = NO_BASE_REGISTER_16;
10372                       i.types[op].bitfield.disp16 = 1;
10373                     }
10374                   else
10375                     {
10376                       i.rm.regmem = NO_BASE_REGISTER;
10377                       i.types[op].bitfield.disp32 = 1;
10378                     }
10379                 }
10380               else if (!i.tm.opcode_modifier.sib)
10381                 {
10382                   /* !i.base_reg && i.index_reg  */
10383                   if (i.index_reg->reg_num == RegIZ)
10384                     i.sib.index = NO_INDEX_REGISTER;
10385                   else
10386                     i.sib.index = i.index_reg->reg_num;
10387                   i.sib.base = NO_BASE_REGISTER;
10388                   i.sib.scale = i.log2_scale_factor;
10389                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10390                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10391                   i.types[op].bitfield.disp32 = 1;
10392                   set_rex_rex2 (i.index_reg, REX_X);
10393                 }
10394             }
10395           /* RIP addressing for 64bit mode.  */
10396           else if (i.base_reg->reg_num == RegIP)
10397             {
10398               gas_assert (!i.tm.opcode_modifier.sib);
10399               i.rm.regmem = NO_BASE_REGISTER;
10400               i.types[op].bitfield.disp8 = 0;
10401               i.types[op].bitfield.disp16 = 0;
10402               i.types[op].bitfield.disp32 = 1;
10403               i.types[op].bitfield.disp64 = 0;
10404               i.flags[op] |= Operand_PCrel;
10405               if (! i.disp_operands)
10406                 fake_zero_displacement = 1;
10407             }
10408           else if (i.base_reg->reg_type.bitfield.word)
10409             {
10410               gas_assert (!i.tm.opcode_modifier.sib);
10411               switch (i.base_reg->reg_num)
10412                 {
10413                 case 3: /* (%bx)  */
10414                   if (i.index_reg == 0)
10415                     i.rm.regmem = 7;
10416                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
10417                     i.rm.regmem = i.index_reg->reg_num - 6;
10418                   break;
10419                 case 5: /* (%bp)  */
10420                   default_seg = reg_ss;
10421                   if (i.index_reg == 0)
10422                     {
10423                       i.rm.regmem = 6;
10424                       if (operand_type_check (i.types[op], disp) == 0)
10425                         {
10426                           /* fake (%bp) into 0(%bp)  */
10427                           if (pp.disp_encoding == disp_encoding_16bit)
10428                             i.types[op].bitfield.disp16 = 1;
10429                           else
10430                             i.types[op].bitfield.disp8 = 1;
10431                           fake_zero_displacement = 1;
10432                         }
10433                     }
10434                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
10435                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
10436                   break;
10437                 default: /* (%si) -> 4 or (%di) -> 5  */
10438                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
10439                 }
10440               if (!fake_zero_displacement
10441                   && !i.disp_operands
10442                   && pp.disp_encoding)
10443                 {
10444                   fake_zero_displacement = 1;
10445                   if (pp.disp_encoding == disp_encoding_8bit)
10446                     i.types[op].bitfield.disp8 = 1;
10447                   else
10448                     i.types[op].bitfield.disp16 = 1;
10449                 }
10450               i.rm.mode = mode_from_disp_size (i.types[op]);
10451             }
10452           else /* i.base_reg and 32/64 bit mode  */
10453             {
10454               if (operand_type_check (i.types[op], disp))
10455                 {
10456                   i.types[op].bitfield.disp16 = 0;
10457                   i.types[op].bitfield.disp64 = 0;
10458                   i.types[op].bitfield.disp32 = 1;
10459                 }
10460
10461               if (!i.tm.opcode_modifier.sib)
10462                 i.rm.regmem = i.base_reg->reg_num;
10463               set_rex_rex2 (i.base_reg, REX_B);
10464               i.sib.base = i.base_reg->reg_num;
10465               /* x86-64 ignores REX prefix bit here to avoid decoder
10466                  complications.  */
10467               if (!(i.base_reg->reg_flags & RegRex)
10468                   && (i.base_reg->reg_num == EBP_REG_NUM
10469                    || i.base_reg->reg_num == ESP_REG_NUM))
10470                   default_seg = reg_ss;
10471               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
10472                 {
10473                   fake_zero_displacement = 1;
10474                   if (pp.disp_encoding == disp_encoding_32bit)
10475                     i.types[op].bitfield.disp32 = 1;
10476                   else
10477                     i.types[op].bitfield.disp8 = 1;
10478                 }
10479               i.sib.scale = i.log2_scale_factor;
10480               if (i.index_reg == 0)
10481                 {
10482                   /* Only check for VSIB. */
10483                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
10484                               && i.tm.opcode_modifier.sib != VECSIB256
10485                               && i.tm.opcode_modifier.sib != VECSIB512);
10486
10487                   /* <disp>(%esp) becomes two byte modrm with no index
10488                      register.  We've already stored the code for esp
10489                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
10490                      Any base register besides %esp will not use the
10491                      extra modrm byte.  */
10492                   i.sib.index = NO_INDEX_REGISTER;
10493                 }
10494               else if (!i.tm.opcode_modifier.sib)
10495                 {
10496                   if (i.index_reg->reg_num == RegIZ)
10497                     i.sib.index = NO_INDEX_REGISTER;
10498                   else
10499                     i.sib.index = i.index_reg->reg_num;
10500                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10501                   set_rex_rex2 (i.index_reg, REX_X);
10502                 }
10503
10504               if (i.disp_operands
10505                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
10506                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
10507                 i.rm.mode = 0;
10508               else
10509                 {
10510                   if (!fake_zero_displacement
10511                       && !i.disp_operands
10512                       && pp.disp_encoding)
10513                     {
10514                       fake_zero_displacement = 1;
10515                       if (pp.disp_encoding == disp_encoding_8bit)
10516                         i.types[op].bitfield.disp8 = 1;
10517                       else
10518                         i.types[op].bitfield.disp32 = 1;
10519                     }
10520                   i.rm.mode = mode_from_disp_size (i.types[op]);
10521                 }
10522             }
10523
10524           if (fake_zero_displacement)
10525             {
10526               /* Fakes a zero displacement assuming that i.types[op]
10527                  holds the correct displacement size.  */
10528               expressionS *exp;
10529
10530               gas_assert (i.op[op].disps == 0);
10531               exp = &disp_expressions[i.disp_operands++];
10532               i.op[op].disps = exp;
10533               exp->X_op = O_constant;
10534               exp->X_add_number = 0;
10535               exp->X_add_symbol = (symbolS *) 0;
10536               exp->X_op_symbol = (symbolS *) 0;
10537             }
10538         }
10539     else
10540         {
10541       i.rm.mode = 3;
10542       i.rm.regmem = i.op[op].regs->reg_num;
10543       set_rex_vrex (i.op[op].regs, REX_B, false);
10544         }
10545
10546       if (op == dest)
10547         dest = ~0;
10548       if (op == source)
10549         source = ~0;
10550     }
10551   else
10552     {
10553       i.rm.mode = 3;
10554       if (!i.tm.opcode_modifier.regmem)
10555         {
10556           gas_assert (source < MAX_OPERANDS);
10557           i.rm.regmem = i.op[source].regs->reg_num;
10558           set_rex_vrex (i.op[source].regs, REX_B,
10559                         dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
10560           source = ~0;
10561         }
10562       else
10563         {
10564           gas_assert (dest < MAX_OPERANDS);
10565           i.rm.regmem = i.op[dest].regs->reg_num;
10566           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
10567           dest = ~0;
10568         }
10569     }
10570
10571   /* Fill in i.rm.reg field with extension opcode (if any) or the
10572      appropriate register.  */
10573   if (i.tm.extension_opcode != None)
10574     i.rm.reg = i.tm.extension_opcode;
10575   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
10576     {
10577       i.rm.reg = i.op[dest].regs->reg_num;
10578       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
10579     }
10580   else
10581     {
10582       gas_assert (source < MAX_OPERANDS);
10583       i.rm.reg = i.op[source].regs->reg_num;
10584       set_rex_vrex (i.op[source].regs, REX_R, false);
10585     }
10586
10587   if (flag_code != CODE_64BIT && (i.rex & REX_R))
10588     {
10589       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
10590       i.rex &= ~REX_R;
10591       add_prefix (LOCK_PREFIX_OPCODE);
10592     }
10593
10594   return default_seg;
10595 }
10596
10597 static INLINE void
10598 frag_opcode_byte (unsigned char byte)
10599 {
10600   if (now_seg != absolute_section)
10601     FRAG_APPEND_1_CHAR (byte);
10602   else
10603     ++abs_section_offset;
10604 }
10605
10606 static unsigned int
10607 flip_code16 (unsigned int code16)
10608 {
10609   gas_assert (i.tm.operands == 1);
10610
10611   return !(i.prefix[REX_PREFIX] & REX_W)
10612          && (code16 ? i.tm.operand_types[0].bitfield.disp32
10613                     : i.tm.operand_types[0].bitfield.disp16)
10614          ? CODE16 : 0;
10615 }
10616
10617 static void
10618 output_branch (void)
10619 {
10620   char *p;
10621   int size;
10622   int code16;
10623   int prefix;
10624   relax_substateT subtype;
10625   symbolS *sym;
10626   offsetT off;
10627
10628   if (now_seg == absolute_section)
10629     {
10630       as_bad (_("relaxable branches not supported in absolute section"));
10631       return;
10632     }
10633
10634   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
10635   size = pp.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
10636
10637   prefix = 0;
10638   if (i.prefix[DATA_PREFIX] != 0)
10639     {
10640       prefix = 1;
10641       i.prefixes -= 1;
10642       code16 ^= flip_code16(code16);
10643     }
10644   /* Pentium4 branch hints.  */
10645   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
10646       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
10647     {
10648       prefix++;
10649       i.prefixes--;
10650     }
10651   if (i.prefix[REX_PREFIX] != 0)
10652     {
10653       prefix++;
10654       i.prefixes--;
10655     }
10656
10657   /* BND prefixed jump.  */
10658   if (i.prefix[BND_PREFIX] != 0)
10659     {
10660       prefix++;
10661       i.prefixes--;
10662     }
10663
10664   if (i.prefixes != 0)
10665     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
10666
10667   /* It's always a symbol;  End frag & setup for relax.
10668      Make sure there is enough room in this frag for the largest
10669      instruction we may generate in md_convert_frag.  This is 2
10670      bytes for the opcode and room for the prefix and largest
10671      displacement.  */
10672   frag_grow (prefix + 2 + 4);
10673   /* Prefix and 1 opcode byte go in fr_fix.  */
10674   p = frag_more (prefix + 1);
10675   if (i.prefix[DATA_PREFIX] != 0)
10676     *p++ = DATA_PREFIX_OPCODE;
10677   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
10678       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
10679     *p++ = i.prefix[SEG_PREFIX];
10680   if (i.prefix[BND_PREFIX] != 0)
10681     *p++ = BND_PREFIX_OPCODE;
10682   if (i.prefix[REX_PREFIX] != 0)
10683     *p++ = i.prefix[REX_PREFIX];
10684   *p = i.tm.base_opcode;
10685
10686   if ((unsigned char) *p == JUMP_PC_RELATIVE)
10687     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
10688   else if (cpu_arch_flags.bitfield.cpui386)
10689     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
10690   else
10691     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
10692   subtype |= code16;
10693
10694   sym = i.op[0].disps->X_add_symbol;
10695   off = i.op[0].disps->X_add_number;
10696
10697   if (i.op[0].disps->X_op != O_constant
10698       && i.op[0].disps->X_op != O_symbol)
10699     {
10700       /* Handle complex expressions.  */
10701       sym = make_expr_symbol (i.op[0].disps);
10702       off = 0;
10703     }
10704
10705   /* 1 possible extra opcode + 4 byte displacement go in var part.
10706      Pass reloc in fr_var.  */
10707   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
10708 }
10709
10710 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10711 /* Return TRUE iff PLT32 relocation should be used for branching to
10712    symbol S.  */
10713
10714 static bool
10715 need_plt32_p (symbolS *s)
10716 {
10717   /* PLT32 relocation is ELF only.  */
10718   if (!IS_ELF)
10719     return false;
10720
10721 #ifdef TE_SOLARIS
10722   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
10723      krtld support it.  */
10724   return false;
10725 #endif
10726
10727   /* Since there is no need to prepare for PLT branch on x86-64, we
10728      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
10729      be used as a marker for 32-bit PC-relative branches.  */
10730   if (!object_64bit)
10731     return false;
10732
10733   if (s == NULL)
10734     return false;
10735
10736   /* Weak or undefined symbol need PLT32 relocation.  */
10737   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
10738     return true;
10739
10740   /* Non-global symbol doesn't need PLT32 relocation.  */
10741   if (! S_IS_EXTERNAL (s))
10742     return false;
10743
10744   /* Other global symbols need PLT32 relocation.  NB: Symbol with
10745      non-default visibilities are treated as normal global symbol
10746      so that PLT32 relocation can be used as a marker for 32-bit
10747      PC-relative branches.  It is useful for linker relaxation.  */
10748   return true;
10749 }
10750 #endif
10751
10752 static void
10753 output_jump (void)
10754 {
10755   char *p;
10756   int size;
10757   fixS *fixP;
10758   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
10759
10760   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
10761     {
10762       /* This is a loop or jecxz type instruction.  */
10763       size = 1;
10764       if (i.prefix[ADDR_PREFIX] != 0)
10765         {
10766           frag_opcode_byte (ADDR_PREFIX_OPCODE);
10767           i.prefixes -= 1;
10768         }
10769       /* Pentium4 branch hints.  */
10770       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
10771           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
10772         {
10773           frag_opcode_byte (i.prefix[SEG_PREFIX]);
10774           i.prefixes--;
10775         }
10776     }
10777   else
10778     {
10779       int code16;
10780
10781       code16 = 0;
10782       if (flag_code == CODE_16BIT)
10783         code16 = CODE16;
10784
10785       if (i.prefix[DATA_PREFIX] != 0)
10786         {
10787           frag_opcode_byte (DATA_PREFIX_OPCODE);
10788           i.prefixes -= 1;
10789           code16 ^= flip_code16(code16);
10790         }
10791
10792       size = 4;
10793       if (code16)
10794         size = 2;
10795     }
10796
10797   /* BND prefixed jump.  */
10798   if (i.prefix[BND_PREFIX] != 0)
10799     {
10800       frag_opcode_byte (i.prefix[BND_PREFIX]);
10801       i.prefixes -= 1;
10802     }
10803
10804   if (i.prefix[REX_PREFIX] != 0)
10805     {
10806       frag_opcode_byte (i.prefix[REX_PREFIX]);
10807       i.prefixes -= 1;
10808     }
10809
10810   if (i.prefixes != 0)
10811     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
10812
10813   if (now_seg == absolute_section)
10814     {
10815       abs_section_offset += i.opcode_length + size;
10816       return;
10817     }
10818
10819   p = frag_more (i.opcode_length + size);
10820   switch (i.opcode_length)
10821     {
10822     case 2:
10823       *p++ = i.tm.base_opcode >> 8;
10824       /* Fall through.  */
10825     case 1:
10826       *p++ = i.tm.base_opcode;
10827       break;
10828     default:
10829       abort ();
10830     }
10831
10832 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10833   if (flag_code == CODE_64BIT && size == 4
10834       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
10835       && need_plt32_p (i.op[0].disps->X_add_symbol))
10836     jump_reloc = BFD_RELOC_X86_64_PLT32;
10837 #endif
10838
10839   jump_reloc = reloc (size, 1, 1, jump_reloc);
10840
10841   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10842                       i.op[0].disps, 1, jump_reloc);
10843
10844   /* All jumps handled here are signed, but don't unconditionally use a
10845      signed limit check for 32 and 16 bit jumps as we want to allow wrap
10846      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
10847      respectively.  */
10848   switch (size)
10849     {
10850     case 1:
10851       fixP->fx_signed = 1;
10852       break;
10853
10854     case 2:
10855       if (i.tm.mnem_off == MN_xbegin)
10856         fixP->fx_signed = 1;
10857       break;
10858
10859     case 4:
10860       if (flag_code == CODE_64BIT)
10861         fixP->fx_signed = 1;
10862       break;
10863     }
10864 }
10865
10866 static void
10867 output_interseg_jump (void)
10868 {
10869   char *p;
10870   int size;
10871   int prefix;
10872   int code16;
10873
10874   code16 = 0;
10875   if (flag_code == CODE_16BIT)
10876     code16 = CODE16;
10877
10878   prefix = 0;
10879   if (i.prefix[DATA_PREFIX] != 0)
10880     {
10881       prefix = 1;
10882       i.prefixes -= 1;
10883       code16 ^= CODE16;
10884     }
10885
10886   gas_assert (!i.prefix[REX_PREFIX]);
10887
10888   size = 4;
10889   if (code16)
10890     size = 2;
10891
10892   if (i.prefixes != 0)
10893     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
10894
10895   if (now_seg == absolute_section)
10896     {
10897       abs_section_offset += prefix + 1 + 2 + size;
10898       return;
10899     }
10900
10901   /* 1 opcode; 2 segment; offset  */
10902   p = frag_more (prefix + 1 + 2 + size);
10903
10904   if (i.prefix[DATA_PREFIX] != 0)
10905     *p++ = DATA_PREFIX_OPCODE;
10906
10907   if (i.prefix[REX_PREFIX] != 0)
10908     *p++ = i.prefix[REX_PREFIX];
10909
10910   *p++ = i.tm.base_opcode;
10911   if (i.op[1].imms->X_op == O_constant)
10912     {
10913       offsetT n = i.op[1].imms->X_add_number;
10914
10915       if (size == 2
10916           && !fits_in_unsigned_word (n)
10917           && !fits_in_signed_word (n))
10918         {
10919           as_bad (_("16-bit jump out of range"));
10920           return;
10921         }
10922       md_number_to_chars (p, n, size);
10923     }
10924   else
10925     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10926                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
10927
10928   p += size;
10929   if (i.op[0].imms->X_op == O_constant)
10930     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
10931   else
10932     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
10933                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
10934 }
10935
10936 /* Hook used to reject pseudo-prefixes misplaced at the start of a line.  */
10937
10938 void i386_start_line (void)
10939 {
10940   struct pseudo_prefixes last_pp;
10941
10942   memcpy (&last_pp, &pp, sizeof (pp));
10943   memset (&pp, 0, sizeof (pp));
10944   if (memcmp (&pp, &last_pp, sizeof (pp)))
10945     as_bad_where (frag_now->fr_file, frag_now->fr_line,
10946                   _("pseudo prefix without instruction"));
10947 }
10948
10949 /* Hook used to warn about pseudo-prefixes ahead of a label.  */
10950
10951 bool i386_check_label (void)
10952 {
10953   struct pseudo_prefixes last_pp;
10954
10955   memcpy (&last_pp, &pp, sizeof (pp));
10956   memset (&pp, 0, sizeof (pp));
10957   if (memcmp (&pp, &last_pp, sizeof (pp)))
10958     as_warn (_("pseudo prefix ahead of label; ignoring"));
10959   return true;
10960 }
10961
10962 /* Hook used to parse pseudo-prefixes off of the start of a line.  */
10963
10964 int
10965 i386_unrecognized_line (int ch)
10966 {
10967   char mnemonic[MAX_MNEM_SIZE];
10968   const char *end;
10969
10970   if (ch != '{')
10971     return 0;
10972
10973   --input_line_pointer;
10974   know (*input_line_pointer == ch);
10975
10976   end = parse_insn (input_line_pointer, mnemonic, parse_pseudo_prefix);
10977   if (end == NULL)
10978     {
10979       /* Diagnostic was already issued.  */
10980       ignore_rest_of_line ();
10981       memset (&pp, 0, sizeof (pp));
10982       return 1;
10983     }
10984
10985   if (end == input_line_pointer)
10986     {
10987       ++input_line_pointer;
10988       return 0;
10989     }
10990
10991   input_line_pointer += end - input_line_pointer;
10992   return 1;
10993 }
10994
10995 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10996 void
10997 x86_cleanup (void)
10998 {
10999   char *p;
11000   asection *seg = now_seg;
11001   subsegT subseg = now_subseg;
11002   asection *sec;
11003   unsigned int alignment, align_size_1;
11004   unsigned int isa_1_descsz, feature_2_descsz, descsz;
11005   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
11006   unsigned int padding;
11007
11008   if (!IS_ELF || !x86_used_note)
11009     return;
11010
11011   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
11012
11013   /* The .note.gnu.property section layout:
11014
11015      Field      Length          Contents
11016      ----       ----            ----
11017      n_namsz    4               4
11018      n_descsz   4               The note descriptor size
11019      n_type     4               NT_GNU_PROPERTY_TYPE_0
11020      n_name     4               "GNU"
11021      n_desc     n_descsz        The program property array
11022      ....       ....            ....
11023    */
11024
11025   /* Create the .note.gnu.property section.  */
11026   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
11027   bfd_set_section_flags (sec,
11028                          (SEC_ALLOC
11029                           | SEC_LOAD
11030                           | SEC_DATA
11031                           | SEC_HAS_CONTENTS
11032                           | SEC_READONLY));
11033
11034   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
11035     {
11036       align_size_1 = 7;
11037       alignment = 3;
11038     }
11039   else
11040     {
11041       align_size_1 = 3;
11042       alignment = 2;
11043     }
11044
11045   bfd_set_section_alignment (sec, alignment);
11046   elf_section_type (sec) = SHT_NOTE;
11047
11048   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
11049                                   + 4-byte data  */
11050   isa_1_descsz_raw = 4 + 4 + 4;
11051   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
11052   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
11053
11054   feature_2_descsz_raw = isa_1_descsz;
11055   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
11056                                       + 4-byte data  */
11057   feature_2_descsz_raw += 4 + 4 + 4;
11058   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
11059   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
11060                       & ~align_size_1);
11061
11062   descsz = feature_2_descsz;
11063   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
11064   p = frag_more (4 + 4 + 4 + 4 + descsz);
11065
11066   /* Write n_namsz.  */
11067   md_number_to_chars (p, (valueT) 4, 4);
11068
11069   /* Write n_descsz.  */
11070   md_number_to_chars (p + 4, (valueT) descsz, 4);
11071
11072   /* Write n_type.  */
11073   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
11074
11075   /* Write n_name.  */
11076   memcpy (p + 4 * 3, "GNU", 4);
11077
11078   /* Write 4-byte type.  */
11079   md_number_to_chars (p + 4 * 4,
11080                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
11081
11082   /* Write 4-byte data size.  */
11083   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
11084
11085   /* Write 4-byte data.  */
11086   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
11087
11088   /* Zero out paddings.  */
11089   padding = isa_1_descsz - isa_1_descsz_raw;
11090   if (padding)
11091     memset (p + 4 * 7, 0, padding);
11092
11093   /* Write 4-byte type.  */
11094   md_number_to_chars (p + isa_1_descsz + 4 * 4,
11095                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
11096
11097   /* Write 4-byte data size.  */
11098   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
11099
11100   /* Write 4-byte data.  */
11101   md_number_to_chars (p + isa_1_descsz + 4 * 6,
11102                       (valueT) x86_feature_2_used, 4);
11103
11104   /* Zero out paddings.  */
11105   padding = feature_2_descsz - feature_2_descsz_raw;
11106   if (padding)
11107     memset (p + isa_1_descsz + 4 * 7, 0, padding);
11108
11109   /* We probably can't restore the current segment, for there likely
11110      isn't one yet...  */
11111   if (seg && subseg)
11112     subseg_set (seg, subseg);
11113 }
11114
11115 #include "tc-i386-ginsn.c"
11116
11117 /* Whether SFrame stack trace info is supported.  */
11118 bool
11119 x86_support_sframe_p (void)
11120 {
11121   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
11122   return (x86_elf_abi == X86_64_ABI);
11123 }
11124
11125 /* Whether SFrame return address tracking is needed.  */
11126 bool
11127 x86_sframe_ra_tracking_p (void)
11128 {
11129   /* In AMD64, return address is always stored on the stack at a fixed offset
11130      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
11131      Do not track explicitly via an SFrame Frame Row Entry.  */
11132   return false;
11133 }
11134
11135 /* The fixed offset from CFA for SFrame to recover the return address.
11136    (useful only when SFrame RA tracking is not needed).  */
11137 offsetT
11138 x86_sframe_cfa_ra_offset (void)
11139 {
11140   gas_assert (x86_elf_abi == X86_64_ABI);
11141   return (offsetT) -8;
11142 }
11143
11144 /* The abi/arch indentifier for SFrame.  */
11145 unsigned char
11146 x86_sframe_get_abi_arch (void)
11147 {
11148   unsigned char sframe_abi_arch = 0;
11149
11150   if (x86_support_sframe_p ())
11151     {
11152       gas_assert (!target_big_endian);
11153       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
11154     }
11155
11156   return sframe_abi_arch;
11157 }
11158
11159 #endif
11160
11161 static unsigned int
11162 encoding_length (const fragS *start_frag, offsetT start_off,
11163                  const char *frag_now_ptr)
11164 {
11165   unsigned int len = 0;
11166
11167   if (start_frag != frag_now)
11168     {
11169       const fragS *fr = start_frag;
11170
11171       do {
11172         len += fr->fr_fix;
11173         fr = fr->fr_next;
11174       } while (fr && fr != frag_now);
11175     }
11176
11177   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
11178 }
11179
11180 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
11181    be macro-fused with conditional jumps.
11182    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
11183    or is one of the following format:
11184
11185     cmp m, imm
11186     add m, imm
11187     sub m, imm
11188    test m, imm
11189     and m, imm
11190     inc m
11191     dec m
11192
11193    it is unfusible.  */
11194
11195 static int
11196 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
11197 {
11198   /* No RIP address.  */
11199   if (i.base_reg && i.base_reg->reg_num == RegIP)
11200     return 0;
11201
11202   /* No opcodes outside of base encoding space.  */
11203   if (i.tm.opcode_space != SPACE_BASE)
11204     return 0;
11205
11206   /* add, sub without add/sub m, imm.  */
11207   if (i.tm.base_opcode <= 5
11208       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
11209       || ((i.tm.base_opcode | 3) == 0x83
11210           && (i.tm.extension_opcode == 0x5
11211               || i.tm.extension_opcode == 0x0)))
11212     {
11213       *mf_cmp_p = mf_cmp_alu_cmp;
11214       return !(i.mem_operands && i.imm_operands);
11215     }
11216
11217   /* and without and m, imm.  */
11218   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
11219       || ((i.tm.base_opcode | 3) == 0x83
11220           && i.tm.extension_opcode == 0x4))
11221     {
11222       *mf_cmp_p = mf_cmp_test_and;
11223       return !(i.mem_operands && i.imm_operands);
11224     }
11225
11226   /* test without test m imm.  */
11227   if ((i.tm.base_opcode | 1) == 0x85
11228       || (i.tm.base_opcode | 1) == 0xa9
11229       || ((i.tm.base_opcode | 1) == 0xf7
11230           && i.tm.extension_opcode == 0))
11231     {
11232       *mf_cmp_p = mf_cmp_test_and;
11233       return !(i.mem_operands && i.imm_operands);
11234     }
11235
11236   /* cmp without cmp m, imm.  */
11237   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
11238       || ((i.tm.base_opcode | 3) == 0x83
11239           && (i.tm.extension_opcode == 0x7)))
11240     {
11241       *mf_cmp_p = mf_cmp_alu_cmp;
11242       return !(i.mem_operands && i.imm_operands);
11243     }
11244
11245   /* inc, dec without inc/dec m.   */
11246   if ((is_cpu (&i.tm, CpuNo64)
11247        && (i.tm.base_opcode | 0xf) == 0x4f)
11248       || ((i.tm.base_opcode | 1) == 0xff
11249           && i.tm.extension_opcode <= 0x1))
11250     {
11251       *mf_cmp_p = mf_cmp_incdec;
11252       return !i.mem_operands;
11253     }
11254
11255   return 0;
11256 }
11257
11258 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
11259
11260 static int
11261 add_fused_jcc_padding_frag_p (enum mf_cmp_kind *mf_cmp_p,
11262                               const struct last_insn *last_insn)
11263 {
11264   /* NB: Don't work with COND_JUMP86 without i386.  */
11265   if (!align_branch_power
11266       || now_seg == absolute_section
11267       || !cpu_arch_flags.bitfield.cpui386
11268       || !(align_branch & align_branch_fused_bit))
11269     return 0;
11270
11271   if (maybe_fused_with_jcc_p (mf_cmp_p))
11272     {
11273       if (last_insn->kind == last_insn_other)
11274         return 1;
11275       if (flag_debug)
11276         as_warn_where (last_insn->file, last_insn->line,
11277                        _("`%s` skips -malign-branch-boundary on `%s`"),
11278                        last_insn->name, insn_name (&i.tm));
11279     }
11280
11281   return 0;
11282 }
11283
11284 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
11285
11286 static int
11287 add_branch_prefix_frag_p (const struct last_insn *last_insn)
11288 {
11289   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
11290      to PadLock instructions since they include prefixes in opcode.  */
11291   if (!align_branch_power
11292       || !align_branch_prefix_size
11293       || now_seg == absolute_section
11294       || is_cpu (&i.tm, CpuPadLock)
11295       || !cpu_arch_flags.bitfield.cpui386)
11296     return 0;
11297
11298   /* Don't add prefix if it is a prefix or there is no operand in case
11299      that segment prefix is special.  */
11300   if (!i.operands || i.tm.opcode_modifier.isprefix)
11301     return 0;
11302
11303   if (last_insn->kind == last_insn_other)
11304     return 1;
11305
11306   if (flag_debug)
11307     as_warn_where (last_insn->file, last_insn->line,
11308                    _("`%s` skips -malign-branch-boundary on `%s`"),
11309                    last_insn->name, insn_name (&i.tm));
11310
11311   return 0;
11312 }
11313
11314 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
11315
11316 static int
11317 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
11318                            enum mf_jcc_kind *mf_jcc_p,
11319                            const struct last_insn *last_insn)
11320 {
11321   int add_padding;
11322
11323   /* NB: Don't work with COND_JUMP86 without i386.  */
11324   if (!align_branch_power
11325       || now_seg == absolute_section
11326       || !cpu_arch_flags.bitfield.cpui386
11327       || i.tm.opcode_space != SPACE_BASE)
11328     return 0;
11329
11330   add_padding = 0;
11331
11332   /* Check for jcc and direct jmp.  */
11333   if (i.tm.opcode_modifier.jump == JUMP)
11334     {
11335       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
11336         {
11337           *branch_p = align_branch_jmp;
11338           add_padding = align_branch & align_branch_jmp_bit;
11339         }
11340       else
11341         {
11342           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
11343              igore the lowest bit.  */
11344           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
11345           *branch_p = align_branch_jcc;
11346           if ((align_branch & align_branch_jcc_bit))
11347             add_padding = 1;
11348         }
11349     }
11350   else if ((i.tm.base_opcode | 1) == 0xc3)
11351     {
11352       /* Near ret.  */
11353       *branch_p = align_branch_ret;
11354       if ((align_branch & align_branch_ret_bit))
11355         add_padding = 1;
11356     }
11357   else
11358     {
11359       /* Check for indirect jmp, direct and indirect calls.  */
11360       if (i.tm.base_opcode == 0xe8)
11361         {
11362           /* Direct call.  */
11363           *branch_p = align_branch_call;
11364           if ((align_branch & align_branch_call_bit))
11365             add_padding = 1;
11366         }
11367       else if (i.tm.base_opcode == 0xff
11368                && (i.tm.extension_opcode == 2
11369                    || i.tm.extension_opcode == 4))
11370         {
11371           /* Indirect call and jmp.  */
11372           *branch_p = align_branch_indirect;
11373           if ((align_branch & align_branch_indirect_bit))
11374             add_padding = 1;
11375         }
11376
11377       if (add_padding
11378           && i.disp_operands
11379           && tls_get_addr
11380           && (i.op[0].disps->X_op == O_symbol
11381               || (i.op[0].disps->X_op == O_subtract
11382                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
11383         {
11384           symbolS *s = i.op[0].disps->X_add_symbol;
11385           /* No padding to call to global or undefined tls_get_addr.  */
11386           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
11387               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
11388             return 0;
11389         }
11390     }
11391
11392   if (add_padding
11393       && last_insn->kind != last_insn_other)
11394     {
11395       if (flag_debug)
11396         as_warn_where (last_insn->file, last_insn->line,
11397                        _("`%s` skips -malign-branch-boundary on `%s`"),
11398                        last_insn->name, insn_name (&i.tm));
11399       return 0;
11400     }
11401
11402   return add_padding;
11403 }
11404
11405 static void
11406 output_insn (const struct last_insn *last_insn)
11407 {
11408   fragS *insn_start_frag;
11409   offsetT insn_start_off;
11410   fragS *fragP = NULL;
11411   enum align_branch_kind branch = align_branch_none;
11412   /* The initializer is arbitrary just to avoid uninitialized error.
11413      it's actually either assigned in add_branch_padding_frag_p
11414      or never be used.  */
11415   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
11416
11417 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11418   if (IS_ELF && x86_used_note && now_seg != absolute_section)
11419     {
11420       if ((i.xstate & xstate_tmm) == xstate_tmm
11421           || is_cpu (&i.tm, CpuAMX_TILE))
11422         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
11423
11424       if (is_cpu (&i.tm, Cpu8087)
11425           || is_cpu (&i.tm, Cpu287)
11426           || is_cpu (&i.tm, Cpu387)
11427           || is_cpu (&i.tm, Cpu687)
11428           || is_cpu (&i.tm, CpuFISTTP))
11429         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
11430
11431       if ((i.xstate & xstate_mmx)
11432           || i.tm.mnem_off == MN_emms
11433           || i.tm.mnem_off == MN_femms)
11434         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
11435
11436       if (i.index_reg)
11437         {
11438           if (i.index_reg->reg_type.bitfield.zmmword)
11439             i.xstate |= xstate_zmm;
11440           else if (i.index_reg->reg_type.bitfield.ymmword)
11441             i.xstate |= xstate_ymm;
11442           else if (i.index_reg->reg_type.bitfield.xmmword)
11443             i.xstate |= xstate_xmm;
11444         }
11445
11446       /* vzeroall / vzeroupper */
11447       if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
11448         i.xstate |= xstate_ymm;
11449
11450       if ((i.xstate & xstate_xmm)
11451           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
11452           || (i.tm.base_opcode == 0xae
11453               && (is_cpu (&i.tm, CpuSSE)
11454                   || is_cpu (&i.tm, CpuAVX)))
11455           || is_cpu (&i.tm, CpuWideKL)
11456           || is_cpu (&i.tm, CpuKL))
11457         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
11458
11459       if ((i.xstate & xstate_ymm) == xstate_ymm)
11460         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
11461       if ((i.xstate & xstate_zmm) == xstate_zmm)
11462         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
11463       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
11464         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
11465       if (is_cpu (&i.tm, CpuFXSR))
11466         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
11467       if (is_cpu (&i.tm, CpuXsave))
11468         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
11469       if (is_cpu (&i.tm, CpuXsaveopt))
11470         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
11471       if (is_cpu (&i.tm, CpuXSAVEC))
11472         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
11473
11474       if (x86_feature_2_used
11475           || is_cpu (&i.tm, CpuCMOV)
11476           || is_cpu (&i.tm, CpuSYSCALL)
11477           || i.tm.mnem_off == MN_cmpxchg8b)
11478         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
11479       if (is_cpu (&i.tm, CpuSSE3)
11480           || is_cpu (&i.tm, CpuSSSE3)
11481           || is_cpu (&i.tm, CpuSSE4_1)
11482           || is_cpu (&i.tm, CpuSSE4_2)
11483           || is_cpu (&i.tm, CpuCX16)
11484           || is_cpu (&i.tm, CpuPOPCNT)
11485           /* LAHF-SAHF insns in 64-bit mode.  */
11486           || (flag_code == CODE_64BIT
11487               && (i.tm.base_opcode | 1) == 0x9f
11488               && i.tm.opcode_space == SPACE_BASE))
11489         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
11490       if (is_cpu (&i.tm, CpuAVX)
11491           || is_cpu (&i.tm, CpuAVX2)
11492           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
11493              XOP, FMA4, LPW, TBM, and AMX.  */
11494           || (i.tm.opcode_modifier.vex
11495               && !is_cpu (&i.tm, CpuAVX512F)
11496               && !is_cpu (&i.tm, CpuAVX512BW)
11497               && !is_cpu (&i.tm, CpuAVX512DQ)
11498               && !is_cpu (&i.tm, CpuXOP)
11499               && !is_cpu (&i.tm, CpuFMA4)
11500               && !is_cpu (&i.tm, CpuLWP)
11501               && !is_cpu (&i.tm, CpuTBM)
11502               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
11503           || is_cpu (&i.tm, CpuF16C)
11504           || is_cpu (&i.tm, CpuFMA)
11505           || is_cpu (&i.tm, CpuLZCNT)
11506           || is_cpu (&i.tm, CpuMovbe)
11507           || is_cpu (&i.tm, CpuXSAVES)
11508           || (x86_feature_2_used
11509               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
11510                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
11511                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
11512         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
11513       if (is_cpu (&i.tm, CpuAVX512F)
11514           || is_cpu (&i.tm, CpuAVX512BW)
11515           || is_cpu (&i.tm, CpuAVX512DQ)
11516           || is_cpu (&i.tm, CpuAVX512VL)
11517           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
11518              AVX512-4FMAPS, and AVX512-4VNNIW.  */
11519           || (i.tm.opcode_modifier.evex
11520               && !is_cpu (&i.tm, CpuAVX512ER)
11521               && !is_cpu (&i.tm, CpuAVX512PF)
11522               && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
11523               && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
11524         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
11525     }
11526 #endif
11527
11528   /* Tie dwarf2 debug info to the address at the start of the insn.
11529      We can't do this after the insn has been output as the current
11530      frag may have been closed off.  eg. by frag_var.  */
11531   dwarf2_emit_insn (0);
11532
11533   insn_start_frag = frag_now;
11534   insn_start_off = frag_now_fix ();
11535
11536   if (add_branch_padding_frag_p (&branch, &mf_jcc, last_insn))
11537     {
11538       char *p;
11539       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
11540       unsigned int max_branch_padding_size = 14;
11541
11542       /* Align section to boundary.  */
11543       record_alignment (now_seg, align_branch_power);
11544
11545       /* Make room for padding.  */
11546       frag_grow (max_branch_padding_size);
11547
11548       /* Start of the padding.  */
11549       p = frag_more (0);
11550
11551       fragP = frag_now;
11552
11553       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
11554                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
11555                 NULL, 0, p);
11556
11557       fragP->tc_frag_data.mf_type = mf_jcc;
11558       fragP->tc_frag_data.branch_type = branch;
11559       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
11560     }
11561
11562   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
11563       && !pre_386_16bit_warned)
11564     {
11565       as_warn (_("use .code16 to ensure correct addressing mode"));
11566       pre_386_16bit_warned = true;
11567     }
11568
11569   /* Output jumps.  */
11570   if (i.tm.opcode_modifier.jump == JUMP)
11571     output_branch ();
11572   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
11573            || i.tm.opcode_modifier.jump == JUMP_DWORD)
11574     output_jump ();
11575   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
11576     output_interseg_jump ();
11577   else
11578     {
11579       /* Output normal instructions here.  */
11580       char *p;
11581       unsigned char *q;
11582       unsigned int j;
11583       enum mf_cmp_kind mf_cmp;
11584
11585       if (avoid_fence
11586           && (i.tm.base_opcode == 0xaee8
11587               || i.tm.base_opcode == 0xaef0
11588               || i.tm.base_opcode == 0xaef8))
11589         {
11590           /* Encode lfence, mfence, and sfence as
11591              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
11592           if (flag_code == CODE_16BIT)
11593             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
11594           else if (omit_lock_prefix)
11595             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
11596                     insn_name (&i.tm));
11597           else if (now_seg != absolute_section)
11598             {
11599               offsetT val = 0x240483f0ULL;
11600
11601               p = frag_more (5);
11602               md_number_to_chars (p, val, 5);
11603             }
11604           else
11605             abs_section_offset += 5;
11606           return;
11607         }
11608
11609       /* Some processors fail on LOCK prefix. This options makes
11610          assembler ignore LOCK prefix and serves as a workaround.  */
11611       if (omit_lock_prefix)
11612         {
11613           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
11614               && i.tm.opcode_modifier.isprefix)
11615             return;
11616           i.prefix[LOCK_PREFIX] = 0;
11617         }
11618
11619       if (branch)
11620         /* Skip if this is a branch.  */
11621         ;
11622       else if (add_fused_jcc_padding_frag_p (&mf_cmp, last_insn))
11623         {
11624           /* Make room for padding.  */
11625           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
11626           p = frag_more (0);
11627
11628           fragP = frag_now;
11629
11630           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
11631                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
11632                     NULL, 0, p);
11633
11634           fragP->tc_frag_data.mf_type = mf_cmp;
11635           fragP->tc_frag_data.branch_type = align_branch_fused;
11636           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
11637         }
11638       else if (add_branch_prefix_frag_p (last_insn))
11639         {
11640           unsigned int max_prefix_size = align_branch_prefix_size;
11641
11642           /* Make room for padding.  */
11643           frag_grow (max_prefix_size);
11644           p = frag_more (0);
11645
11646           fragP = frag_now;
11647
11648           frag_var (rs_machine_dependent, max_prefix_size, 0,
11649                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
11650                     NULL, 0, p);
11651
11652           fragP->tc_frag_data.max_bytes = max_prefix_size;
11653         }
11654
11655       /* Since the VEX/EVEX prefix contains the implicit prefix, we
11656          don't need the explicit prefix.  */
11657       if (!is_any_vex_encoding (&i.tm))
11658         {
11659           switch (i.tm.opcode_modifier.opcodeprefix)
11660             {
11661             case PREFIX_0X66:
11662               add_prefix (0x66);
11663               break;
11664             case PREFIX_0XF2:
11665               add_prefix (0xf2);
11666               break;
11667             case PREFIX_0XF3:
11668               if (!is_cpu (&i.tm, CpuPadLock)
11669                   || (i.prefix[REP_PREFIX] != 0xf3))
11670                 add_prefix (0xf3);
11671               break;
11672             case PREFIX_NONE:
11673               switch (i.opcode_length)
11674                 {
11675                 case 2:
11676                   break;
11677                 case 1:
11678                   /* Check for pseudo prefixes.  */
11679                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
11680                     break;
11681                   as_bad_where (insn_start_frag->fr_file,
11682                                 insn_start_frag->fr_line,
11683                                 _("pseudo prefix without instruction"));
11684                   return;
11685                 default:
11686                   abort ();
11687                 }
11688               break;
11689             default:
11690               abort ();
11691             }
11692
11693 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
11694           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
11695              R_X86_64_GOTTPOFF relocation so that linker can safely
11696              perform IE->LE optimization.  A dummy REX_OPCODE prefix
11697              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
11698              relocation for GDesc -> IE/LE optimization.  */
11699           if (x86_elf_abi == X86_64_X32_ABI
11700               && !is_apx_rex2_encoding ()
11701               && i.operands == 2
11702               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
11703                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
11704               && i.prefix[REX_PREFIX] == 0)
11705             add_prefix (REX_OPCODE);
11706 #endif
11707
11708           /* The prefix bytes.  */
11709           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
11710             if (*q)
11711               frag_opcode_byte (*q);
11712
11713           if (is_apx_rex2_encoding ())
11714             {
11715               frag_opcode_byte (i.vex.bytes[0]);
11716               frag_opcode_byte (i.vex.bytes[1]);
11717             }
11718         }
11719       else
11720         {
11721           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
11722             if (*q)
11723               switch (j)
11724                 {
11725                 case SEG_PREFIX:
11726                 case ADDR_PREFIX:
11727                   frag_opcode_byte (*q);
11728                   break;
11729                 default:
11730                   /* There should be no other prefixes for instructions
11731                      with VEX prefix.  */
11732                   abort ();
11733                 }
11734
11735           /* For EVEX instructions i.vrex should become 0 after
11736              build_evex_prefix.  For VEX instructions upper 16 registers
11737              aren't available, so VREX should be 0.  */
11738           if (i.vrex)
11739             abort ();
11740           /* Now the VEX prefix.  */
11741           if (now_seg != absolute_section)
11742             {
11743               p = frag_more (i.vex.length);
11744               for (j = 0; j < i.vex.length; j++)
11745                 p[j] = i.vex.bytes[j];
11746             }
11747           else
11748             abs_section_offset += i.vex.length;
11749         }
11750
11751       /* Now the opcode; be careful about word order here!  */
11752       j = i.opcode_length;
11753       if (!i.vex.length)
11754         switch (i.tm.opcode_space)
11755           {
11756           case SPACE_BASE:
11757             break;
11758           case SPACE_0F:
11759             ++j;
11760             break;
11761           case SPACE_0F38:
11762           case SPACE_0F3A:
11763             j += 2;
11764             break;
11765           default:
11766             abort ();
11767           }
11768
11769       if (now_seg == absolute_section)
11770         abs_section_offset += j;
11771       else if (j == 1)
11772         {
11773           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
11774         }
11775       else
11776         {
11777           p = frag_more (j);
11778           if (!i.vex.length
11779               && i.tm.opcode_space != SPACE_BASE)
11780             {
11781               *p++ = 0x0f;
11782               if (i.tm.opcode_space != SPACE_0F)
11783                 *p++ = i.tm.opcode_space == SPACE_0F38
11784                        ? 0x38 : 0x3a;
11785             }
11786
11787           switch (i.opcode_length)
11788             {
11789             case 2:
11790               /* Put out high byte first: can't use md_number_to_chars!  */
11791               *p++ = (i.tm.base_opcode >> 8) & 0xff;
11792               /* Fall through.  */
11793             case 1:
11794               *p = i.tm.base_opcode & 0xff;
11795               break;
11796             default:
11797               abort ();
11798               break;
11799             }
11800
11801         }
11802
11803       /* Now the modrm byte and sib byte (if present).  */
11804       if (i.tm.opcode_modifier.modrm)
11805         {
11806           frag_opcode_byte ((i.rm.regmem << 0)
11807                              | (i.rm.reg << 3)
11808                              | (i.rm.mode << 6));
11809           /* If i.rm.regmem == ESP (4)
11810              && i.rm.mode != (Register mode)
11811              && not 16 bit
11812              ==> need second modrm byte.  */
11813           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
11814               && i.rm.mode != 3
11815               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
11816             frag_opcode_byte ((i.sib.base << 0)
11817                               | (i.sib.index << 3)
11818                               | (i.sib.scale << 6));
11819         }
11820
11821       if (i.disp_operands)
11822         output_disp (insn_start_frag, insn_start_off);
11823
11824       if (i.imm_operands)
11825         output_imm (insn_start_frag, insn_start_off);
11826
11827       /*
11828        * frag_now_fix () returning plain abs_section_offset when we're in the
11829        * absolute section, and abs_section_offset not getting updated as data
11830        * gets added to the frag breaks the logic below.
11831        */
11832       if (now_seg != absolute_section)
11833         {
11834           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
11835           if (j > 15)
11836             {
11837               if (dot_insn ())
11838                 as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
11839                         j);
11840               else
11841                 as_bad (_("instruction length of %u bytes exceeds the limit of 15"),
11842                         j);
11843             }
11844           else if (fragP)
11845             {
11846               /* NB: Don't add prefix with GOTPC relocation since
11847                  output_disp() above depends on the fixed encoding
11848                  length.  Can't add prefix with TLS relocation since
11849                  it breaks TLS linker optimization.  */
11850               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
11851               /* Prefix count on the current instruction.  */
11852               unsigned int count = i.vex.length;
11853               unsigned int k;
11854               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
11855                 /* REX byte is encoded in VEX/EVEX prefix.  */
11856                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
11857                   count++;
11858
11859               /* Count prefixes for extended opcode maps.  */
11860               if (!i.vex.length)
11861                 switch (i.tm.opcode_space)
11862                   {
11863                   case SPACE_BASE:
11864                     break;
11865                   case SPACE_0F:
11866                     count++;
11867                     break;
11868                   case SPACE_0F38:
11869                   case SPACE_0F3A:
11870                     count += 2;
11871                     break;
11872                   default:
11873                     abort ();
11874                   }
11875
11876               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11877                   == BRANCH_PREFIX)
11878                 {
11879                   /* Set the maximum prefix size in BRANCH_PREFIX
11880                      frag.  */
11881                   if (fragP->tc_frag_data.max_bytes > max)
11882                     fragP->tc_frag_data.max_bytes = max;
11883                   if (fragP->tc_frag_data.max_bytes > count)
11884                     fragP->tc_frag_data.max_bytes -= count;
11885                   else
11886                     fragP->tc_frag_data.max_bytes = 0;
11887                 }
11888               else
11889                 {
11890                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
11891                      frag.  */
11892                   unsigned int max_prefix_size;
11893                   if (align_branch_prefix_size > max)
11894                     max_prefix_size = max;
11895                   else
11896                     max_prefix_size = align_branch_prefix_size;
11897                   if (max_prefix_size > count)
11898                     fragP->tc_frag_data.max_prefix_length
11899                       = max_prefix_size - count;
11900                 }
11901
11902               /* Use existing segment prefix if possible.  Use CS
11903                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
11904                  segment prefix with ESP/EBP base register and use DS
11905                  segment prefix without ESP/EBP base register.  */
11906               if (i.prefix[SEG_PREFIX])
11907                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
11908               else if (flag_code == CODE_64BIT)
11909                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
11910               else if (i.base_reg
11911                        && (i.base_reg->reg_num == 4
11912                            || i.base_reg->reg_num == 5))
11913                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
11914               else
11915                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
11916             }
11917         }
11918     }
11919
11920   /* NB: Don't work with COND_JUMP86 without i386.  */
11921   if (align_branch_power
11922       && now_seg != absolute_section
11923       && cpu_arch_flags.bitfield.cpui386)
11924     {
11925       /* Terminate each frag so that we can add prefix and check for
11926          fused jcc.  */
11927       frag_wane (frag_now);
11928       frag_new (0);
11929     }
11930
11931 #ifdef DEBUG386
11932   if (flag_debug)
11933     {
11934       pi ("" /*line*/, &i);
11935     }
11936 #endif /* DEBUG386  */
11937 }
11938
11939 /* Return the size of the displacement operand N.  */
11940
11941 static int
11942 disp_size (unsigned int n)
11943 {
11944   int size = 4;
11945
11946   if (i.types[n].bitfield.disp64)
11947     size = 8;
11948   else if (i.types[n].bitfield.disp8)
11949     size = 1;
11950   else if (i.types[n].bitfield.disp16)
11951     size = 2;
11952   return size;
11953 }
11954
11955 /* Return the size of the immediate operand N.  */
11956
11957 static int
11958 imm_size (unsigned int n)
11959 {
11960   int size = 4;
11961   if (i.types[n].bitfield.imm64)
11962     size = 8;
11963   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
11964     size = 1;
11965   else if (i.types[n].bitfield.imm16)
11966     size = 2;
11967   return size;
11968 }
11969
11970 static void
11971 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
11972 {
11973   char *p;
11974   unsigned int n;
11975
11976   for (n = 0; n < i.operands; n++)
11977     {
11978       if (operand_type_check (i.types[n], disp))
11979         {
11980           int size = disp_size (n);
11981
11982           if (now_seg == absolute_section)
11983             abs_section_offset += size;
11984           else if (i.op[n].disps->X_op == O_constant)
11985             {
11986               offsetT val = i.op[n].disps->X_add_number;
11987
11988               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
11989                                      size);
11990               p = frag_more (size);
11991               md_number_to_chars (p, val, size);
11992             }
11993           else
11994             {
11995               enum bfd_reloc_code_real reloc_type;
11996               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
11997               bool sign = (flag_code == CODE_64BIT && size == 4
11998                            && (!want_disp32 (&i.tm)
11999                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
12000                                    && !i.types[n].bitfield.baseindex)))
12001                           || pcrel;
12002               fixS *fixP;
12003
12004               /* We can't have 8 bit displacement here.  */
12005               gas_assert (!i.types[n].bitfield.disp8);
12006
12007               /* The PC relative address is computed relative
12008                  to the instruction boundary, so in case immediate
12009                  fields follows, we need to adjust the value.  */
12010               if (pcrel && i.imm_operands)
12011                 {
12012                   unsigned int n1;
12013                   int sz = 0;
12014
12015                   for (n1 = 0; n1 < i.operands; n1++)
12016                     if (operand_type_check (i.types[n1], imm))
12017                       {
12018                         /* Only one immediate is allowed for PC
12019                            relative address, except with .insn.  */
12020                         gas_assert (sz == 0 || dot_insn ());
12021                         sz += imm_size (n1);
12022                       }
12023                   /* We should find at least one immediate.  */
12024                   gas_assert (sz != 0);
12025                   i.op[n].disps->X_add_number -= sz;
12026                 }
12027
12028               p = frag_more (size);
12029               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
12030               if (GOT_symbol
12031                   && GOT_symbol == i.op[n].disps->X_add_symbol
12032                   && (((reloc_type == BFD_RELOC_32
12033                         || reloc_type == BFD_RELOC_X86_64_32S
12034                         || (reloc_type == BFD_RELOC_64
12035                             && object_64bit))
12036                        && (i.op[n].disps->X_op == O_symbol
12037                            || (i.op[n].disps->X_op == O_add
12038                                && ((symbol_get_value_expression
12039                                     (i.op[n].disps->X_op_symbol)->X_op)
12040                                    == O_subtract))))
12041                       || reloc_type == BFD_RELOC_32_PCREL))
12042                 {
12043                   if (!object_64bit)
12044                     {
12045                       reloc_type = BFD_RELOC_386_GOTPC;
12046                       i.has_gotpc_tls_reloc = true;
12047                       i.op[n].disps->X_add_number +=
12048                         encoding_length (insn_start_frag, insn_start_off, p);
12049                     }
12050                   else if (reloc_type == BFD_RELOC_64)
12051                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
12052                   else
12053                     /* Don't do the adjustment for x86-64, as there
12054                        the pcrel addressing is relative to the _next_
12055                        insn, and that is taken care of in other code.  */
12056                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
12057                 }
12058               else if (align_branch_power)
12059                 {
12060                   switch (reloc_type)
12061                     {
12062                     case BFD_RELOC_386_TLS_GD:
12063                     case BFD_RELOC_386_TLS_LDM:
12064                     case BFD_RELOC_386_TLS_IE:
12065                     case BFD_RELOC_386_TLS_IE_32:
12066                     case BFD_RELOC_386_TLS_GOTIE:
12067                     case BFD_RELOC_386_TLS_GOTDESC:
12068                     case BFD_RELOC_386_TLS_DESC_CALL:
12069                     case BFD_RELOC_X86_64_TLSGD:
12070                     case BFD_RELOC_X86_64_TLSLD:
12071                     case BFD_RELOC_X86_64_GOTTPOFF:
12072                     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
12073                     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
12074                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12075                     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
12076                     case BFD_RELOC_X86_64_TLSDESC_CALL:
12077                       i.has_gotpc_tls_reloc = true;
12078                     default:
12079                       break;
12080                     }
12081                 }
12082               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
12083                                   size, i.op[n].disps, pcrel,
12084                                   reloc_type);
12085
12086               if (flag_code == CODE_64BIT && size == 4 && pcrel
12087                   && !i.prefix[ADDR_PREFIX])
12088                 fixP->fx_signed = 1;
12089
12090               if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF
12091                   && i.tm.opcode_space == SPACE_EVEXMAP4)
12092                 {
12093                   /* Only "add %reg1, foo@gottpoff(%rip), %reg2" is
12094                      allowed in md_assemble.  Set fx_tcbit2 for EVEX
12095                      prefix.  */
12096                   fixP->fx_tcbit2 = 1;
12097                   continue;
12098                 }
12099
12100               if (i.base_reg && i.base_reg->reg_num == RegIP)
12101                 {
12102                   if (reloc_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
12103                     {
12104                       /* Set fx_tcbit for REX2 prefix.  */
12105                       if (is_apx_rex2_encoding ())
12106                         fixP->fx_tcbit = 1;
12107                       continue;
12108                     }
12109                 }
12110               /* In 64-bit, i386_validate_fix updates only (%rip)
12111                  relocations.  */
12112               else if (object_64bit)
12113                 continue;
12114
12115               /* Check for "call/jmp *mem", "mov mem, %reg",
12116                  "test %reg, mem" and "binop mem, %reg" where binop
12117                  is one of adc, add, and, cmp, or, sbb, sub, xor
12118                  instructions without data prefix.  Always generate
12119                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
12120               if (i.prefix[DATA_PREFIX] == 0
12121                   && (i.rm.mode == 2
12122                       || (i.rm.mode == 0 && i.rm.regmem == 5))
12123                   && i.tm.opcode_space == SPACE_BASE
12124                   && ((i.operands == 1
12125                        && i.tm.base_opcode == 0xff
12126                        && (i.rm.reg == 2 || i.rm.reg == 4))
12127                       || (i.operands == 2
12128                           && (i.tm.base_opcode == 0x8b
12129                               || i.tm.base_opcode == 0x85
12130                               || (i.tm.base_opcode & ~0x38) == 0x03))))
12131                 {
12132                   if (object_64bit)
12133                     {
12134                       if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF)
12135                         {
12136                           /* Set fx_tcbit for REX2 prefix.  */
12137                           if (is_apx_rex2_encoding ())
12138                             fixP->fx_tcbit = 1;
12139                         }
12140                       else if (generate_relax_relocations)
12141                         {
12142                           /* Set fx_tcbit3 for REX2 prefix.  */
12143                           if (is_apx_rex2_encoding ())
12144                             fixP->fx_tcbit3 = 1;
12145                           else if (i.rex)
12146                             fixP->fx_tcbit2 = 1;
12147                           else
12148                             fixP->fx_tcbit = 1;
12149                         }
12150                     }
12151                   else if (generate_relax_relocations
12152                            || (i.rm.mode == 0 && i.rm.regmem == 5))
12153                     fixP->fx_tcbit2 = 1;
12154                 }
12155             }
12156         }
12157     }
12158 }
12159
12160 static void
12161 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
12162 {
12163   char *p;
12164   unsigned int n;
12165
12166   for (n = 0; n < i.operands; n++)
12167     {
12168       if (operand_type_check (i.types[n], imm))
12169         {
12170           int size = imm_size (n);
12171
12172           if (now_seg == absolute_section)
12173             abs_section_offset += size;
12174           else if (i.op[n].imms->X_op == O_constant)
12175             {
12176               offsetT val;
12177
12178               val = offset_in_range (i.op[n].imms->X_add_number,
12179                                      size);
12180               p = frag_more (size);
12181               md_number_to_chars (p, val, size);
12182             }
12183           else
12184             {
12185               /* Not absolute_section.
12186                  Need a 32-bit fixup (don't support 8bit
12187                  non-absolute imms).  Try to support other
12188                  sizes ...  */
12189               enum bfd_reloc_code_real reloc_type;
12190               int sign;
12191
12192               if (i.types[n].bitfield.imm32s
12193                   && (i.suffix == QWORD_MNEM_SUFFIX
12194                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)
12195                       || (i.prefix[REX_PREFIX] & REX_W)
12196                       || dot_insn ()))
12197                 sign = 1;
12198               else
12199                 sign = 0;
12200
12201               p = frag_more (size);
12202               reloc_type = reloc (size, 0, sign, i.reloc[n]);
12203
12204               /*   This is tough to explain.  We end up with this one if we
12205                * have operands that look like
12206                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
12207                * obtain the absolute address of the GOT, and it is strongly
12208                * preferable from a performance point of view to avoid using
12209                * a runtime relocation for this.  The actual sequence of
12210                * instructions often look something like:
12211                *
12212                *        call    .L66
12213                * .L66:
12214                *        popl    %ebx
12215                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
12216                *
12217                *   The call and pop essentially return the absolute address
12218                * of the label .L66 and store it in %ebx.  The linker itself
12219                * will ultimately change the first operand of the addl so
12220                * that %ebx points to the GOT, but to keep things simple, the
12221                * .o file must have this operand set so that it generates not
12222                * the absolute address of .L66, but the absolute address of
12223                * itself.  This allows the linker itself simply treat a GOTPC
12224                * relocation as asking for a pcrel offset to the GOT to be
12225                * added in, and the addend of the relocation is stored in the
12226                * operand field for the instruction itself.
12227                *
12228                *   Our job here is to fix the operand so that it would add
12229                * the correct offset so that %ebx would point to itself.  The
12230                * thing that is tricky is that .-.L66 will point to the
12231                * beginning of the instruction, so we need to further modify
12232                * the operand so that it will point to itself.  There are
12233                * other cases where you have something like:
12234                *
12235                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
12236                *
12237                * and here no correction would be required.  Internally in
12238                * the assembler we treat operands of this form as not being
12239                * pcrel since the '.' is explicitly mentioned, and I wonder
12240                * whether it would simplify matters to do it this way.  Who
12241                * knows.  In earlier versions of the PIC patches, the
12242                * pcrel_adjust field was used to store the correction, but
12243                * since the expression is not pcrel, I felt it would be
12244                * confusing to do it this way.  */
12245
12246               if ((reloc_type == BFD_RELOC_32
12247                    || reloc_type == BFD_RELOC_X86_64_32S
12248                    || reloc_type == BFD_RELOC_64)
12249                   && GOT_symbol
12250                   && GOT_symbol == i.op[n].imms->X_add_symbol
12251                   && (i.op[n].imms->X_op == O_symbol
12252                       || (i.op[n].imms->X_op == O_add
12253                           && ((symbol_get_value_expression
12254                                (i.op[n].imms->X_op_symbol)->X_op)
12255                               == O_subtract))))
12256                 {
12257                   if (!object_64bit)
12258                     reloc_type = BFD_RELOC_386_GOTPC;
12259                   else if (size == 4)
12260                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
12261                   else if (size == 8)
12262                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
12263                   i.has_gotpc_tls_reloc = true;
12264                   i.op[n].imms->X_add_number +=
12265                     encoding_length (insn_start_frag, insn_start_off, p);
12266                 }
12267               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
12268                            i.op[n].imms, 0, reloc_type);
12269             }
12270         }
12271     }
12272 }
12273 \f
12274 /* x86_cons_fix_new is called via the expression parsing code when a
12275    reloc is needed.  We use this hook to get the correct .got reloc.  */
12276 static int cons_sign = -1;
12277
12278 void
12279 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
12280                   expressionS *exp, bfd_reloc_code_real_type r)
12281 {
12282   r = reloc (len, 0, cons_sign, r);
12283
12284 #ifdef TE_PE
12285   if (exp->X_op == O_secrel)
12286     {
12287       exp->X_op = O_symbol;
12288       r = BFD_RELOC_32_SECREL;
12289     }
12290   else if (exp->X_op == O_secidx)
12291     r = BFD_RELOC_16_SECIDX;
12292 #endif
12293
12294   fix_new_exp (frag, off, len, exp, 0, r);
12295 }
12296
12297 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
12298    purpose of the `.dc.a' internal pseudo-op.  */
12299
12300 int
12301 x86_address_bytes (void)
12302 {
12303   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
12304     return 4;
12305   return stdoutput->arch_info->bits_per_address / 8;
12306 }
12307
12308 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
12309      || defined (LEX_AT)) && !defined (TE_PE)
12310 # define lex_got(reloc, adjust, types) NULL
12311 #else
12312 /* Parse operands of the form
12313    <symbol>@GOTOFF+<nnn>
12314    and similar .plt or .got references.
12315
12316    If we find one, set up the correct relocation in RELOC and copy the
12317    input string, minus the `@GOTOFF' into a malloc'd buffer for
12318    parsing by the calling routine.  Return this buffer, and if ADJUST
12319    is non-null set it to the length of the string we removed from the
12320    input line.  Otherwise return NULL.  */
12321 static char *
12322 lex_got (enum bfd_reloc_code_real *rel,
12323          int *adjust,
12324          i386_operand_type *types)
12325 {
12326   /* Some of the relocations depend on the size of what field is to
12327      be relocated.  But in our callers i386_immediate and i386_displacement
12328      we don't yet know the operand size (this will be set by insn
12329      matching).  Hence we record the word32 relocation here,
12330      and adjust the reloc according to the real size in reloc().  */
12331   static const struct
12332   {
12333     const char *str;
12334     int len;
12335     const enum bfd_reloc_code_real rel[2];
12336     const i386_operand_type types64;
12337     bool need_GOT_symbol;
12338   }
12339     gotrel[] =
12340   {
12341
12342 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
12343   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
12344 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
12345   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
12346 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
12347   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
12348 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
12349   { .imm64 = 1, .disp64 = 1 } }
12350
12351 #ifndef TE_PE
12352 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12353     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
12354                                         BFD_RELOC_SIZE32 },
12355       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
12356 #endif
12357     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
12358                                        BFD_RELOC_X86_64_PLTOFF64 },
12359       { .bitfield = { .imm64 = 1 } }, true },
12360     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
12361                                        BFD_RELOC_X86_64_PLT32    },
12362       OPERAND_TYPE_IMM32_32S_DISP32, false },
12363     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
12364                                        BFD_RELOC_X86_64_GOTPLT64 },
12365       OPERAND_TYPE_IMM64_DISP64, true },
12366     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
12367                                        BFD_RELOC_X86_64_GOTOFF64 },
12368       OPERAND_TYPE_IMM64_DISP64, true },
12369     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
12370                                        BFD_RELOC_X86_64_GOTPCREL },
12371       OPERAND_TYPE_IMM32_32S_DISP32, true },
12372     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
12373                                        BFD_RELOC_X86_64_TLSGD    },
12374       OPERAND_TYPE_IMM32_32S_DISP32, true },
12375     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
12376                                        _dummy_first_bfd_reloc_code_real },
12377       OPERAND_TYPE_NONE, true },
12378     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
12379                                        BFD_RELOC_X86_64_TLSLD    },
12380       OPERAND_TYPE_IMM32_32S_DISP32, true },
12381     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
12382                                        BFD_RELOC_X86_64_GOTTPOFF },
12383       OPERAND_TYPE_IMM32_32S_DISP32, true },
12384     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
12385                                        BFD_RELOC_X86_64_TPOFF32  },
12386       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
12387     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
12388                                        _dummy_first_bfd_reloc_code_real },
12389       OPERAND_TYPE_NONE, true },
12390     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
12391                                        BFD_RELOC_X86_64_DTPOFF32 },
12392       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
12393     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
12394                                        _dummy_first_bfd_reloc_code_real },
12395       OPERAND_TYPE_NONE, true },
12396     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
12397                                        _dummy_first_bfd_reloc_code_real },
12398       OPERAND_TYPE_NONE, true },
12399     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
12400                                        BFD_RELOC_X86_64_GOT32    },
12401       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
12402     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
12403                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
12404       OPERAND_TYPE_IMM32_32S_DISP32, true },
12405     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
12406                                        BFD_RELOC_X86_64_TLSDESC_CALL },
12407       OPERAND_TYPE_IMM32_32S_DISP32, true },
12408 #else /* TE_PE */
12409     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
12410                                        BFD_RELOC_32_SECREL },
12411       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
12412 #endif
12413
12414 #undef OPERAND_TYPE_IMM32_32S_DISP32
12415 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
12416 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
12417 #undef OPERAND_TYPE_IMM64_DISP64
12418
12419   };
12420   char *cp;
12421   unsigned int j;
12422
12423 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
12424   if (!IS_ELF)
12425     return NULL;
12426 #endif
12427
12428   for (cp = input_line_pointer; *cp != '@'; cp++)
12429     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
12430       return NULL;
12431
12432   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
12433     {
12434       int len = gotrel[j].len;
12435       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
12436         {
12437           if (gotrel[j].rel[object_64bit] != 0)
12438             {
12439               int first, second;
12440               char *tmpbuf, *past_reloc;
12441
12442               *rel = gotrel[j].rel[object_64bit];
12443
12444               if (types)
12445                 {
12446                   if (flag_code != CODE_64BIT)
12447                     {
12448                       types->bitfield.imm32 = 1;
12449                       types->bitfield.disp32 = 1;
12450                     }
12451                   else
12452                     *types = gotrel[j].types64;
12453                 }
12454
12455               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
12456                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
12457
12458               /* The length of the first part of our input line.  */
12459               first = cp - input_line_pointer;
12460
12461               /* The second part goes from after the reloc token until
12462                  (and including) an end_of_line char or comma.  */
12463               past_reloc = cp + 1 + len;
12464               cp = past_reloc;
12465               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
12466                 ++cp;
12467               second = cp + 1 - past_reloc;
12468
12469               /* Allocate and copy string.  The trailing NUL shouldn't
12470                  be necessary, but be safe.  */
12471               tmpbuf = XNEWVEC (char, first + second + 2);
12472               memcpy (tmpbuf, input_line_pointer, first);
12473               if (second != 0 && *past_reloc != ' ')
12474                 /* Replace the relocation token with ' ', so that
12475                    errors like foo@GOTOFF1 will be detected.  */
12476                 tmpbuf[first++] = ' ';
12477               else
12478                 /* Increment length by 1 if the relocation token is
12479                    removed.  */
12480                 len++;
12481               if (adjust)
12482                 *adjust = len;
12483               memcpy (tmpbuf + first, past_reloc, second);
12484               tmpbuf[first + second] = '\0';
12485               return tmpbuf;
12486             }
12487
12488           as_bad (_("@%s reloc is not supported with %d-bit output format"),
12489                   gotrel[j].str, 1 << (5 + object_64bit));
12490           return NULL;
12491         }
12492     }
12493
12494   /* Might be a symbol version string.  Don't as_bad here.  */
12495   return NULL;
12496 }
12497 #endif
12498
12499 bfd_reloc_code_real_type
12500 x86_cons (expressionS *exp, int size)
12501 {
12502   bfd_reloc_code_real_type got_reloc = NO_RELOC;
12503
12504   intel_syntax = -intel_syntax;
12505   exp->X_md = 0;
12506   expr_mode = expr_operator_none;
12507
12508 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
12509       && !defined (LEX_AT)) \
12510     || defined (TE_PE)
12511   if (size == 4 || (object_64bit && size == 8))
12512     {
12513       /* Handle @GOTOFF and the like in an expression.  */
12514       char *save;
12515       char *gotfree_input_line;
12516       int adjust = 0;
12517
12518       save = input_line_pointer;
12519       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
12520       if (gotfree_input_line)
12521         input_line_pointer = gotfree_input_line;
12522
12523       expression (exp);
12524
12525       if (gotfree_input_line)
12526         {
12527           /* expression () has merrily parsed up to the end of line,
12528              or a comma - in the wrong buffer.  Transfer how far
12529              input_line_pointer has moved to the right buffer.  */
12530           input_line_pointer = (save
12531                                 + (input_line_pointer - gotfree_input_line)
12532                                 + adjust);
12533           free (gotfree_input_line);
12534           if (exp->X_op == O_constant
12535               || exp->X_op == O_absent
12536               || exp->X_op == O_illegal
12537               || exp->X_op == O_register
12538               || exp->X_op == O_big)
12539             {
12540               char c = *input_line_pointer;
12541               *input_line_pointer = 0;
12542               as_bad (_("missing or invalid expression `%s'"), save);
12543               *input_line_pointer = c;
12544             }
12545           else if ((got_reloc == BFD_RELOC_386_PLT32
12546                     || got_reloc == BFD_RELOC_X86_64_PLT32)
12547                    && exp->X_op != O_symbol)
12548             {
12549               char c = *input_line_pointer;
12550               *input_line_pointer = 0;
12551               as_bad (_("invalid PLT expression `%s'"), save);
12552               *input_line_pointer = c;
12553             }
12554         }
12555     }
12556   else
12557 #endif
12558     expression (exp);
12559
12560   intel_syntax = -intel_syntax;
12561
12562   if (intel_syntax)
12563     i386_intel_simplify (exp);
12564
12565   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12566   if (size <= 4 && expr_mode == expr_operator_present
12567       && exp->X_op == O_constant && !object_64bit)
12568     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
12569
12570   return got_reloc;
12571 }
12572
12573 static void
12574 signed_cons (int size)
12575 {
12576   if (object_64bit)
12577     cons_sign = 1;
12578   cons (size);
12579   cons_sign = -1;
12580 }
12581
12582 static void
12583 s_insn (int dummy ATTRIBUTE_UNUSED)
12584 {
12585   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer, *ptr;
12586   char *saved_ilp = find_end_of_line (line, false), saved_char;
12587   const char *end;
12588   unsigned int j;
12589   valueT val;
12590   bool vex = false, xop = false, evex = false;
12591   struct last_insn *last_insn;
12592
12593   init_globals ();
12594
12595   saved_char = *saved_ilp;
12596   *saved_ilp = 0;
12597
12598   end = parse_insn (line, mnemonic, parse_prefix);
12599   if (end == NULL)
12600     {
12601   bad:
12602       *saved_ilp = saved_char;
12603       ignore_rest_of_line ();
12604       i.tm.mnem_off = 0;
12605       memset (&pp, 0, sizeof (pp));
12606       return;
12607     }
12608   line += end - line;
12609
12610   current_templates.start = &i.tm;
12611   current_templates.end = &i.tm + 1;
12612   i.tm.mnem_off = MN__insn;
12613   i.tm.extension_opcode = None;
12614
12615   if (startswith (line, "VEX")
12616       && (line[3] == '.' || is_space_char (line[3])))
12617     {
12618       vex = true;
12619       line += 3;
12620     }
12621   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
12622     {
12623       char *e;
12624       unsigned long n = strtoul (line + 3, &e, 16);
12625
12626       if (e == line + 5 && n >= 0x08 && n <= 0x1f
12627           && (*e == '.' || is_space_char (*e)))
12628         {
12629           xop = true;
12630           /* Arrange for build_vex_prefix() to emit 0x8f.  */
12631           i.tm.opcode_space = SPACE_XOP08;
12632           i.insn_opcode_space = n;
12633           line = e;
12634         }
12635     }
12636   else if (startswith (line, "EVEX")
12637            && (line[4] == '.' || is_space_char (line[4])))
12638     {
12639       evex = true;
12640       line += 4;
12641     }
12642
12643   if (vex || xop
12644       ? pp.encoding == encoding_evex
12645       : evex
12646         ? pp.encoding == encoding_vex
12647           || pp.encoding == encoding_vex3
12648         : pp.encoding != encoding_default)
12649     {
12650       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
12651       goto bad;
12652     }
12653
12654   if (line > end && pp.encoding == encoding_default)
12655     pp.encoding = evex ? encoding_evex : encoding_vex;
12656
12657   if (pp.encoding != encoding_default)
12658     {
12659       /* Only address size and segment override prefixes are permitted with
12660          VEX/XOP/EVEX encodings.  */
12661       const unsigned char *p = i.prefix;
12662
12663       for (j = 0; j < ARRAY_SIZE (i.prefix); ++j, ++p)
12664         {
12665           if (!*p)
12666             continue;
12667
12668           switch (j)
12669             {
12670             case SEG_PREFIX:
12671             case ADDR_PREFIX:
12672               break;
12673             default:
12674                   as_bad (_("illegal prefix used with VEX/XOP/EVEX"));
12675                   goto bad;
12676             }
12677         }
12678     }
12679
12680   if (line > end && *line == '.')
12681     {
12682       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
12683       switch (line[1])
12684         {
12685         case 'L':
12686           switch (line[2])
12687             {
12688             case '0':
12689               if (evex)
12690                 i.tm.opcode_modifier.evex = EVEX128;
12691               else
12692                 i.tm.opcode_modifier.vex = VEX128;
12693               break;
12694
12695             case '1':
12696               if (evex)
12697                 i.tm.opcode_modifier.evex = EVEX256;
12698               else
12699                 i.tm.opcode_modifier.vex = VEX256;
12700               break;
12701
12702             case '2':
12703               if (evex)
12704                 i.tm.opcode_modifier.evex = EVEX512;
12705               break;
12706
12707             case '3':
12708               if (evex)
12709                 i.tm.opcode_modifier.evex = EVEX_L3;
12710               break;
12711
12712             case 'I':
12713               if (line[3] == 'G')
12714                 {
12715                   if (evex)
12716                     i.tm.opcode_modifier.evex = EVEXLIG;
12717                   else
12718                     i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
12719                   ++line;
12720                 }
12721               break;
12722             }
12723
12724           if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
12725             line += 3;
12726           break;
12727
12728         case '1':
12729           if (line[2] == '2' && line[3] == '8')
12730             {
12731               if (evex)
12732                 i.tm.opcode_modifier.evex = EVEX128;
12733               else
12734                 i.tm.opcode_modifier.vex = VEX128;
12735               line += 4;
12736             }
12737           break;
12738
12739         case '2':
12740           if (line[2] == '5' && line[3] == '6')
12741             {
12742               if (evex)
12743                 i.tm.opcode_modifier.evex = EVEX256;
12744               else
12745                 i.tm.opcode_modifier.vex = VEX256;
12746               line += 4;
12747             }
12748           break;
12749
12750         case '5':
12751           if (evex && line[2] == '1' && line[3] == '2')
12752             {
12753               i.tm.opcode_modifier.evex = EVEX512;
12754               line += 4;
12755             }
12756           break;
12757         }
12758     }
12759
12760   if (line > end && *line == '.')
12761     {
12762       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
12763       switch (line[1])
12764         {
12765         case 'N':
12766           if (line[2] == 'P')
12767             line += 3;
12768           break;
12769
12770         case '6':
12771           if (line[2] == '6')
12772             {
12773               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
12774               line += 3;
12775             }
12776           break;
12777
12778         case 'F': case 'f':
12779           if (line[2] == '3')
12780             {
12781               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
12782               line += 3;
12783             }
12784           else if (line[2] == '2')
12785             {
12786               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
12787               line += 3;
12788             }
12789           break;
12790         }
12791     }
12792
12793   if (line > end && !xop && *line == '.')
12794     {
12795       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
12796       switch (line[1])
12797         {
12798         case '0':
12799           if (TOUPPER (line[2]) != 'F')
12800             break;
12801           if (line[3] == '.' || is_space_char (line[3]))
12802             {
12803               i.insn_opcode_space = SPACE_0F;
12804               line += 3;
12805             }
12806           else if (line[3] == '3'
12807                    && (line[4] == '8' || TOUPPER (line[4]) == 'A')
12808                    && (line[5] == '.' || is_space_char (line[5])))
12809             {
12810               i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
12811               line += 5;
12812             }
12813           break;
12814
12815         case 'M':
12816           if (ISDIGIT (line[2]) && line[2] != '0')
12817             {
12818               char *e;
12819               unsigned long n = strtoul (line + 2, &e, 10);
12820
12821               if (n <= (evex ? 15 : 31)
12822                   && (*e == '.' || is_space_char (*e)))
12823                 {
12824                   i.insn_opcode_space = n;
12825                   line = e;
12826                 }
12827             }
12828           break;
12829         }
12830     }
12831
12832   if (line > end && *line == '.' && line[1] == 'W')
12833     {
12834       /* VEX.W, XOP.W, EVEX.W  */
12835       switch (line[2])
12836         {
12837         case '0':
12838           i.tm.opcode_modifier.vexw = VEXW0;
12839           break;
12840
12841         case '1':
12842           i.tm.opcode_modifier.vexw = VEXW1;
12843           break;
12844
12845         case 'I':
12846           if (line[3] == 'G')
12847             {
12848               i.tm.opcode_modifier.vexw = VEXWIG;
12849               ++line;
12850             }
12851           break;
12852         }
12853
12854       if (i.tm.opcode_modifier.vexw)
12855         line += 3;
12856     }
12857
12858   if (line > end && *line && !is_space_char (*line))
12859     {
12860       /* Improve diagnostic a little.  */
12861       if (*line == '.' && line[1] && !is_space_char (line[1]))
12862         ++line;
12863       goto done;
12864     }
12865
12866   /* Before processing the opcode expression, find trailing "+r" or
12867      "/<digit>" specifiers.  */
12868   for (ptr = line; ; ++ptr)
12869     {
12870       unsigned long n;
12871       char *e;
12872
12873       ptr = strpbrk (ptr, "+/,");
12874       if (ptr == NULL || *ptr == ',')
12875         break;
12876
12877       if (*ptr == '+' && ptr[1] == 'r'
12878           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
12879         {
12880           *ptr = ' ';
12881           ptr[1] = ' ';
12882           i.short_form = true;
12883           break;
12884         }
12885
12886       if (*ptr == '/' && ISDIGIT (ptr[1])
12887           && (n = strtoul (ptr + 1, &e, 8)) < 8
12888           && e == ptr + 2
12889           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
12890         {
12891           *ptr = ' ';
12892           ptr[1] = ' ';
12893           i.tm.extension_opcode = n;
12894           i.tm.opcode_modifier.modrm = 1;
12895           break;
12896         }
12897     }
12898
12899   input_line_pointer = line;
12900   val = get_absolute_expression ();
12901   line = input_line_pointer;
12902
12903   if (i.short_form && (val & 7))
12904     as_warn ("`+r' assumes low three opcode bits to be clear");
12905
12906   for (j = 1; j < sizeof(val); ++j)
12907     if (!(val >> (j * 8)))
12908       break;
12909
12910   /* Trim off a prefix if present.  */
12911   if (j > 1 && !vex && !xop && !evex)
12912     {
12913       uint8_t byte = val >> ((j - 1) * 8);
12914
12915       switch (byte)
12916         {
12917         case DATA_PREFIX_OPCODE:
12918         case REPE_PREFIX_OPCODE:
12919         case REPNE_PREFIX_OPCODE:
12920           if (!add_prefix (byte))
12921             goto bad;
12922           val &= ((uint64_t)1 << (--j * 8)) - 1;
12923           break;
12924         }
12925     }
12926
12927   /* Parse operands, if any, before evaluating encoding space.  */
12928   if (*line == ',')
12929     {
12930       i.memshift = -1;
12931
12932       ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
12933       this_operand = -1;
12934       if (!ptr)
12935         goto bad;
12936       line = ptr;
12937
12938       if (!i.operands)
12939         {
12940           as_bad (_("expecting operand after ','; got nothing"));
12941           goto done;
12942         }
12943
12944       if (i.mem_operands > 1)
12945         {
12946           as_bad (_("too many memory references for `%s'"),
12947                   &i386_mnemonics[MN__insn]);
12948           goto done;
12949         }
12950
12951       /* No need to distinguish encoding_evex and encoding_evex512.  */
12952       if (pp.encoding == encoding_evex512)
12953         pp.encoding = encoding_evex;
12954     }
12955
12956   /* Trim off encoding space.  */
12957   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
12958     {
12959       uint8_t byte = val >> ((--j - 1) * 8);
12960
12961       i.insn_opcode_space = SPACE_0F;
12962       switch (byte & -(j > 1 && !pp.rex2_encoding
12963                        && (pp.encoding != encoding_egpr || evex)))
12964         {
12965         case 0x38:
12966           i.insn_opcode_space = SPACE_0F38;
12967           --j;
12968           break;
12969         case 0x3a:
12970           i.insn_opcode_space = SPACE_0F3A;
12971           --j;
12972           break;
12973         }
12974       i.tm.opcode_space = i.insn_opcode_space;
12975       val &= ((uint64_t)1 << (j * 8)) - 1;
12976     }
12977   if (!i.tm.opcode_space && (vex || evex))
12978     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
12979        Also avoid hitting abort() there or in build_evex_prefix().  */
12980     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
12981                                                    : SPACE_0F38;
12982
12983   if (j > 2)
12984     {
12985       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
12986       goto done;
12987     }
12988   i.opcode_length = j;
12989
12990   /* Handle operands, if any.  */
12991   if (i.operands)
12992     {
12993       i386_operand_type combined;
12994       expressionS *disp_exp = NULL;
12995       bool changed;
12996
12997       if (pp.encoding == encoding_egpr)
12998         {
12999           if (vex || xop)
13000             {
13001               as_bad (_("eGPR use conflicts with encoding specifier"));
13002               goto done;
13003             }
13004           if (evex)
13005             pp.encoding = encoding_evex;
13006           else
13007             pp.encoding = encoding_default;
13008         }
13009
13010       /* Are we to emit ModR/M encoding?  */
13011       if (!i.short_form
13012           && (i.mem_operands
13013               || i.reg_operands > (pp.encoding != encoding_default)
13014               || i.tm.extension_opcode != None))
13015         i.tm.opcode_modifier.modrm = 1;
13016
13017       if (!i.tm.opcode_modifier.modrm
13018           && (i.reg_operands
13019               > i.short_form + 0U + (pp.encoding != encoding_default)
13020               || i.mem_operands))
13021         {
13022           as_bad (_("too many register/memory operands"));
13023           goto done;
13024         }
13025
13026       /* Enforce certain constraints on operands.  */
13027       switch (i.reg_operands + i.mem_operands
13028               + (i.tm.extension_opcode != None))
13029         {
13030         case 0:
13031           if (i.short_form)
13032             {
13033               as_bad (_("too few register/memory operands"));
13034               goto done;
13035             }
13036           /* Fall through.  */
13037         case 1:
13038           if (i.tm.opcode_modifier.modrm)
13039             {
13040               as_bad (_("too few register/memory operands"));
13041               goto done;
13042             }
13043           break;
13044
13045         case 2:
13046           break;
13047
13048         case 4:
13049           if (i.imm_operands
13050               && (i.op[0].imms->X_op != O_constant
13051                   || !fits_in_imm4 (i.op[0].imms->X_add_number)))
13052             {
13053               as_bad (_("constant doesn't fit in %d bits"), evex ? 3 : 4);
13054               goto done;
13055             }
13056           /* Fall through.  */
13057         case 3:
13058           if (pp.encoding != encoding_default)
13059             {
13060               i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
13061               break;
13062             }
13063           /* Fall through.  */
13064         default:
13065           as_bad (_("too many register/memory operands"));
13066           goto done;
13067         }
13068
13069       /* Bring operands into canonical order (imm, mem, reg).  */
13070       do
13071         {
13072           changed = false;
13073
13074           for (j = 1; j < i.operands; ++j)
13075             {
13076               if ((!operand_type_check (i.types[j - 1], imm)
13077                    && operand_type_check (i.types[j], imm))
13078                   || (i.types[j - 1].bitfield.class != ClassNone
13079                       && i.types[j].bitfield.class == ClassNone))
13080                 {
13081                   swap_2_operands (j - 1, j);
13082                   changed = true;
13083                 }
13084             }
13085         }
13086       while (changed);
13087
13088       /* For Intel syntax swap the order of register operands.  */
13089       if (intel_syntax)
13090         switch (i.reg_operands)
13091           {
13092           case 0:
13093           case 1:
13094             break;
13095
13096           case 4:
13097             swap_2_operands (i.imm_operands + i.mem_operands + 1, i.operands - 2);
13098             /* Fall through.  */
13099           case 3:
13100           case 2:
13101             swap_2_operands (i.imm_operands + i.mem_operands, i.operands - 1);
13102             break;
13103
13104           default:
13105             abort ();
13106           }
13107
13108       /* Enforce constraints when using VSIB.  */
13109       if (i.index_reg
13110           && (i.index_reg->reg_type.bitfield.xmmword
13111               || i.index_reg->reg_type.bitfield.ymmword
13112               || i.index_reg->reg_type.bitfield.zmmword))
13113         {
13114           if (pp.encoding == encoding_default)
13115             {
13116               as_bad (_("VSIB unavailable with legacy encoding"));
13117               goto done;
13118             }
13119
13120           if (pp.encoding == encoding_evex
13121               && i.reg_operands > 1)
13122             {
13123               /* We could allow two register operands, encoding the 2nd one in
13124                  an 8-bit immediate like for 4-register-operand insns, but that
13125                  would require ugly fiddling with process_operands() and/or
13126                  build_modrm_byte().  */
13127               as_bad (_("too many register operands with VSIB"));
13128               goto done;
13129             }
13130
13131           i.tm.opcode_modifier.sib = 1;
13132         }
13133
13134       /* Establish operand size encoding.  */
13135       operand_type_set (&combined, 0);
13136
13137       for (j = i.imm_operands; j < i.operands; ++j)
13138         {
13139           /* Look for 8-bit operands that use old registers.  */
13140           if (pp.encoding != encoding_default
13141               && flag_code == CODE_64BIT
13142               && i.types[j].bitfield.class == Reg
13143               && i.types[j].bitfield.byte
13144               && !(i.op[j].regs->reg_flags & RegRex64)
13145               && i.op[j].regs->reg_num > 3)
13146             as_bad (_("can't encode register '%s%s' with VEX/XOP/EVEX"),
13147                     register_prefix, i.op[j].regs->reg_name);
13148
13149           i.types[j].bitfield.instance = InstanceNone;
13150
13151           if (operand_type_check (i.types[j], disp))
13152             {
13153               i.types[j].bitfield.baseindex = 1;
13154               disp_exp = i.op[j].disps;
13155             }
13156
13157           if (evex && i.types[j].bitfield.baseindex)
13158             {
13159               unsigned int n = i.memshift;
13160
13161               if (i.types[j].bitfield.byte)
13162                 n = 0;
13163               else if (i.types[j].bitfield.word)
13164                 n = 1;
13165               else if (i.types[j].bitfield.dword)
13166                 n = 2;
13167               else if (i.types[j].bitfield.qword)
13168                 n = 3;
13169               else if (i.types[j].bitfield.xmmword)
13170                 n = 4;
13171               else if (i.types[j].bitfield.ymmword)
13172                 n = 5;
13173               else if (i.types[j].bitfield.zmmword)
13174                 n = 6;
13175
13176               if (i.memshift < 32 && n != i.memshift)
13177                 as_warn ("conflicting memory operand size specifiers");
13178               i.memshift = n;
13179             }
13180
13181           if ((i.broadcast.type || i.broadcast.bytes)
13182               && j == i.broadcast.operand)
13183             continue;
13184
13185           combined = operand_type_or (combined, i.types[j]);
13186           combined.bitfield.class = ClassNone;
13187         }
13188
13189       switch ((i.broadcast.type ? i.broadcast.type : 1)
13190               << (i.memshift < 32 ? i.memshift : 0))
13191         {
13192         case 64: combined.bitfield.zmmword = 1; break;
13193         case 32: combined.bitfield.ymmword = 1; break;
13194         case 16: combined.bitfield.xmmword = 1; break;
13195         case  8: combined.bitfield.qword = 1; break;
13196         case  4: combined.bitfield.dword = 1; break;
13197         }
13198
13199       if (pp.encoding == encoding_default)
13200         {
13201           if (flag_code == CODE_64BIT && combined.bitfield.qword)
13202             i.rex |= REX_W;
13203           else if ((flag_code == CODE_16BIT ? combined.bitfield.dword
13204                                             : combined.bitfield.word)
13205                    && !add_prefix (DATA_PREFIX_OPCODE))
13206             goto done;
13207         }
13208       else if (!i.tm.opcode_modifier.vexw)
13209         {
13210           if (flag_code == CODE_64BIT)
13211             {
13212               if (combined.bitfield.qword)
13213                 i.tm.opcode_modifier.vexw = VEXW1;
13214               else if (combined.bitfield.dword)
13215                 i.tm.opcode_modifier.vexw = VEXW0;
13216             }
13217
13218           if (!i.tm.opcode_modifier.vexw)
13219             i.tm.opcode_modifier.vexw = VEXWIG;
13220         }
13221
13222       if (vex || xop)
13223         {
13224           if (!i.tm.opcode_modifier.vex)
13225             {
13226               if (combined.bitfield.ymmword)
13227                 i.tm.opcode_modifier.vex = VEX256;
13228               else if (combined.bitfield.xmmword)
13229                 i.tm.opcode_modifier.vex = VEX128;
13230             }
13231         }
13232       else if (evex)
13233         {
13234           if (!i.tm.opcode_modifier.evex)
13235             {
13236               /* Do _not_ consider AVX512VL here.  */
13237               if (i.rounding.type != rc_none || combined.bitfield.zmmword)
13238                 i.tm.opcode_modifier.evex = EVEX512;
13239               else if (combined.bitfield.ymmword)
13240                 i.tm.opcode_modifier.evex = EVEX256;
13241               else if (combined.bitfield.xmmword)
13242                 i.tm.opcode_modifier.evex = EVEX128;
13243             }
13244
13245           if (i.memshift >= 32)
13246             {
13247               unsigned int n = 0;
13248
13249               switch (i.tm.opcode_modifier.evex)
13250                 {
13251                 case EVEX512: n = 64; break;
13252                 case EVEX256: n = 32; break;
13253                 case EVEX128: n = 16; break;
13254                 }
13255
13256               if (i.broadcast.type)
13257                 n /= i.broadcast.type;
13258
13259               if (n > 0)
13260                 for (i.memshift = 0; !(n & 1); n >>= 1)
13261                   ++i.memshift;
13262               else if (disp_exp != NULL && disp_exp->X_op == O_constant
13263                        && disp_exp->X_add_number != 0
13264                        && pp.disp_encoding != disp_encoding_32bit)
13265                 {
13266                   if (!quiet_warnings)
13267                     as_warn ("cannot determine memory operand size");
13268                   pp.disp_encoding = disp_encoding_32bit;
13269                 }
13270             }
13271         }
13272
13273       if (i.memshift >= 32)
13274         i.memshift = 0;
13275       else if (!evex)
13276         pp.encoding = encoding_error;
13277
13278       if (i.disp_operands && !optimize_disp (&i.tm))
13279         goto done;
13280
13281       /* Establish size for immediate operands.  */
13282       for (j = 0; j < i.imm_operands; ++j)
13283         {
13284           expressionS *expP = i.op[j].imms;
13285
13286           gas_assert (operand_type_check (i.types[j], imm));
13287           operand_type_set (&i.types[j], 0);
13288
13289           if (i.imm_bits[j] > 32)
13290             i.types[j].bitfield.imm64 = 1;
13291           else if (i.imm_bits[j] > 16)
13292             {
13293               if (flag_code == CODE_64BIT && (i.flags[j] & Operand_Signed))
13294                 i.types[j].bitfield.imm32s = 1;
13295               else
13296                 i.types[j].bitfield.imm32 = 1;
13297             }
13298           else if (i.imm_bits[j] > 8)
13299             i.types[j].bitfield.imm16 = 1;
13300           else if (i.imm_bits[j] > 0)
13301             {
13302               if (i.flags[j] & Operand_Signed)
13303                 i.types[j].bitfield.imm8s = 1;
13304               else
13305                 i.types[j].bitfield.imm8 = 1;
13306             }
13307           else if (expP->X_op == O_constant)
13308             {
13309               i.types[j] = smallest_imm_type (expP->X_add_number);
13310               i.types[j].bitfield.imm1 = 0;
13311               /* Oddly enough imm_size() checks imm64 first, so the bit needs
13312                  zapping since smallest_imm_type() sets it unconditionally.  */
13313               if (flag_code != CODE_64BIT)
13314                 {
13315                   i.types[j].bitfield.imm64 = 0;
13316                   i.types[j].bitfield.imm32s = 0;
13317                   i.types[j].bitfield.imm32 = 1;
13318                 }
13319               else if (i.types[j].bitfield.imm32 || i.types[j].bitfield.imm32s)
13320                 i.types[j].bitfield.imm64 = 0;
13321             }
13322           else
13323             /* Non-constant expressions are sized heuristically.  */
13324             switch (flag_code)
13325               {
13326               case CODE_64BIT: i.types[j].bitfield.imm32s = 1; break;
13327               case CODE_32BIT: i.types[j].bitfield.imm32 = 1; break;
13328               case CODE_16BIT: i.types[j].bitfield.imm16 = 1; break;
13329               }
13330         }
13331
13332       for (j = 0; j < i.operands; ++j)
13333         i.tm.operand_types[j] = i.types[j];
13334
13335       process_operands ();
13336     }
13337
13338   /* Don't set opcode until after processing operands, to avoid any
13339      potential special casing there.  */
13340   i.tm.base_opcode |= val;
13341
13342   if (pp.encoding == encoding_error
13343       || (pp.encoding != encoding_evex
13344           ? i.broadcast.type || i.broadcast.bytes
13345             || i.rounding.type != rc_none
13346             || i.mask.reg
13347           : (i.mem_operands && i.rounding.type != rc_none)
13348             || ((i.broadcast.type || i.broadcast.bytes)
13349                 && !(i.flags[i.broadcast.operand] & Operand_Mem))))
13350     {
13351       as_bad (_("conflicting .insn operands"));
13352       goto done;
13353     }
13354
13355   if (vex || xop)
13356     {
13357       if (!i.tm.opcode_modifier.vex)
13358         i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
13359
13360       build_vex_prefix (NULL);
13361       i.rex &= REX_OPCODE;
13362     }
13363   else if (evex)
13364     {
13365       if (!i.tm.opcode_modifier.evex)
13366         i.tm.opcode_modifier.evex = EVEXLIG;
13367
13368       build_evex_prefix ();
13369       i.rex &= REX_OPCODE;
13370     }
13371   else
13372     establish_rex ();
13373
13374   last_insn = &seg_info(now_seg)->tc_segment_info_data.last_insn;
13375   output_insn (last_insn);
13376   last_insn->kind = last_insn_directive;
13377   last_insn->name = ".insn directive";
13378   last_insn->file = as_where (&last_insn->line);
13379
13380 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13381   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
13382      performed in i386_target_format.  */
13383   if (IS_ELF && flag_synth_cfi)
13384     as_bad (_("SCFI: hand-crafting instructions not supported"));
13385 #endif
13386
13387  done:
13388   *saved_ilp = saved_char;
13389   input_line_pointer = line;
13390
13391   demand_empty_rest_of_line ();
13392
13393   /* Make sure dot_insn() won't yield "true" anymore.  */
13394   i.tm.mnem_off = 0;
13395
13396   current_templates.start = NULL;
13397   memset (&pp, 0, sizeof (pp));
13398 }
13399
13400 #ifdef TE_PE
13401 static void
13402 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
13403 {
13404   expressionS exp;
13405
13406   do
13407     {
13408       expression (&exp);
13409       if (exp.X_op == O_symbol)
13410         exp.X_op = O_secrel;
13411
13412       emit_expr (&exp, 4);
13413     }
13414   while (*input_line_pointer++ == ',');
13415
13416   input_line_pointer--;
13417   demand_empty_rest_of_line ();
13418 }
13419
13420 static void
13421 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
13422 {
13423   expressionS exp;
13424
13425   do
13426     {
13427       expression (&exp);
13428       if (exp.X_op == O_symbol)
13429         exp.X_op = O_secidx;
13430
13431       emit_expr (&exp, 2);
13432     }
13433   while (*input_line_pointer++ == ',');
13434
13435   input_line_pointer--;
13436   demand_empty_rest_of_line ();
13437 }
13438 #endif
13439
13440 /* Handle Rounding Control / SAE specifiers.  */
13441
13442 static char *
13443 RC_SAE_specifier (const char *pstr)
13444 {
13445   unsigned int j;
13446
13447   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
13448     {
13449       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
13450         {
13451           if (i.rounding.type != rc_none)
13452             {
13453               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
13454               return NULL;
13455             }
13456
13457           switch (pp.encoding)
13458             {
13459             case encoding_default:
13460             case encoding_egpr:
13461               pp.encoding = encoding_evex512;
13462               break;
13463             case encoding_evex:
13464             case encoding_evex512:
13465               break;
13466             default:
13467               return NULL;
13468             }
13469
13470           i.rounding.type = RC_NamesTable[j].type;
13471
13472           return (char *)(pstr + RC_NamesTable[j].len);
13473         }
13474     }
13475
13476   return NULL;
13477 }
13478
13479 /* Handle Vector operations.  */
13480
13481 static char *
13482 check_VecOperations (char *op_string)
13483 {
13484   const reg_entry *mask;
13485   const char *saved;
13486   char *end_op;
13487
13488   while (*op_string)
13489     {
13490       saved = op_string;
13491       if (*op_string == '{')
13492         {
13493           op_string++;
13494           if (is_space_char (*op_string))
13495             op_string++;
13496
13497           /* Check broadcasts.  */
13498           if (startswith (op_string, "1to"))
13499             {
13500               unsigned int bcst_type;
13501
13502               if (i.broadcast.type)
13503                 goto duplicated_vec_op;
13504
13505               op_string += 3;
13506               if (*op_string == '8')
13507                 bcst_type = 8;
13508               else if (*op_string == '4')
13509                 bcst_type = 4;
13510               else if (*op_string == '2')
13511                 bcst_type = 2;
13512               else if (*op_string == '1'
13513                        && *(op_string+1) == '6')
13514                 {
13515                   bcst_type = 16;
13516                   op_string++;
13517                 }
13518               else if (*op_string == '3'
13519                        && *(op_string+1) == '2')
13520                 {
13521                   bcst_type = 32;
13522                   op_string++;
13523                 }
13524               else
13525                 {
13526                   as_bad (_("Unsupported broadcast: `%s'"), saved);
13527                   return NULL;
13528                 }
13529               op_string++;
13530
13531               switch (pp.encoding)
13532                 {
13533                 case encoding_default:
13534                 case encoding_egpr:
13535                   pp.encoding = encoding_evex;
13536                   break;
13537                 case encoding_evex:
13538                 case encoding_evex512:
13539                   break;
13540                 default:
13541                   goto unknown_vec_op;
13542                 }
13543
13544               i.broadcast.type = bcst_type;
13545               i.broadcast.operand = this_operand;
13546
13547               /* For .insn a data size specifier may be appended.  */
13548               if (dot_insn () && *op_string == ':')
13549                 goto dot_insn_modifier;
13550             }
13551           /* Check .insn special cases.  */
13552           else if (dot_insn () && *op_string == ':')
13553             {
13554             dot_insn_modifier:
13555               switch (op_string[1])
13556                 {
13557                   unsigned long n;
13558
13559                 case 'd':
13560                   if (i.memshift < 32)
13561                     goto duplicated_vec_op;
13562
13563                   n = strtoul (op_string + 2, &end_op, 0);
13564                   if (n)
13565                     for (i.memshift = 0; !(n & 1); n >>= 1)
13566                       ++i.memshift;
13567                   if (i.memshift < 32 && n == 1)
13568                     op_string = end_op;
13569                   break;
13570
13571                 case 's': case 'u':
13572                   /* This isn't really a "vector" operation, but a sign/size
13573                      specifier for immediate operands of .insn.  Note that AT&T
13574                      syntax handles the same in i386_immediate().  */
13575                   if (!intel_syntax)
13576                     break;
13577
13578                   if (i.imm_bits[this_operand])
13579                     goto duplicated_vec_op;
13580
13581                   n = strtoul (op_string + 2, &end_op, 0);
13582                   if (n && n <= (flag_code == CODE_64BIT ? 64 : 32))
13583                     {
13584                       i.imm_bits[this_operand] = n;
13585                       if (op_string[1] == 's')
13586                         i.flags[this_operand] |= Operand_Signed;
13587                       op_string = end_op;
13588                     }
13589                   break;
13590                 }
13591             }
13592           /* Check masking operation.  */
13593           else if ((mask = parse_register (op_string, &end_op)) != NULL)
13594             {
13595               if (mask == &bad_reg)
13596                 return NULL;
13597
13598               /* k0 can't be used for write mask.  */
13599               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
13600                 {
13601                   as_bad (_("`%s%s' can't be used for write mask"),
13602                           register_prefix, mask->reg_name);
13603                   return NULL;
13604                 }
13605
13606               if (!i.mask.reg)
13607                 {
13608                   i.mask.reg = mask;
13609                   i.mask.operand = this_operand;
13610                 }
13611               else if (i.mask.reg->reg_num)
13612                 goto duplicated_vec_op;
13613               else
13614                 {
13615                   i.mask.reg = mask;
13616
13617                   /* Only "{z}" is allowed here.  No need to check
13618                      zeroing mask explicitly.  */
13619                   if (i.mask.operand != (unsigned int) this_operand)
13620                     {
13621                       as_bad (_("invalid write mask `%s'"), saved);
13622                       return NULL;
13623                     }
13624                 }
13625
13626               op_string = end_op;
13627             }
13628           /* Check zeroing-flag for masking operation.  */
13629           else if (*op_string == 'z')
13630             {
13631               if (!i.mask.reg)
13632                 {
13633                   i.mask.reg = reg_k0;
13634                   i.mask.zeroing = 1;
13635                   i.mask.operand = this_operand;
13636                 }
13637               else
13638                 {
13639                   if (i.mask.zeroing)
13640                     {
13641                     duplicated_vec_op:
13642                       as_bad (_("duplicated `%s'"), saved);
13643                       return NULL;
13644                     }
13645
13646                   i.mask.zeroing = 1;
13647
13648                   /* Only "{%k}" is allowed here.  No need to check mask
13649                      register explicitly.  */
13650                   if (i.mask.operand != (unsigned int) this_operand)
13651                     {
13652                       as_bad (_("invalid zeroing-masking `%s'"),
13653                               saved);
13654                       return NULL;
13655                     }
13656                 }
13657
13658               op_string++;
13659             }
13660           else if (intel_syntax
13661                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
13662             i.rounding.modifier = true;
13663           else
13664             goto unknown_vec_op;
13665
13666           if (is_space_char (*op_string))
13667             op_string++;
13668           if (*op_string != '}')
13669             {
13670               as_bad (_("missing `}' in `%s'"), saved);
13671               return NULL;
13672             }
13673           op_string++;
13674
13675           if (is_space_char (*op_string))
13676             ++op_string;
13677
13678           continue;
13679         }
13680     unknown_vec_op:
13681       /* We don't know this one.  */
13682       as_bad (_("unknown vector operation: `%s'"), saved);
13683       return NULL;
13684     }
13685
13686   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
13687     {
13688       as_bad (_("zeroing-masking only allowed with write mask"));
13689       return NULL;
13690     }
13691
13692   return op_string;
13693 }
13694
13695 static int
13696 i386_immediate (char *imm_start)
13697 {
13698   char *save_input_line_pointer;
13699   char *gotfree_input_line;
13700   segT exp_seg = 0;
13701   expressionS *exp;
13702   i386_operand_type types;
13703
13704   operand_type_set (&types, ~0);
13705
13706   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
13707     {
13708       as_bad (_("at most %d immediate operands are allowed"),
13709               MAX_IMMEDIATE_OPERANDS);
13710       return 0;
13711     }
13712
13713   exp = &im_expressions[i.imm_operands++];
13714   i.op[this_operand].imms = exp;
13715
13716   if (is_space_char (*imm_start))
13717     ++imm_start;
13718
13719   save_input_line_pointer = input_line_pointer;
13720   input_line_pointer = imm_start;
13721
13722   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
13723   if (gotfree_input_line)
13724     input_line_pointer = gotfree_input_line;
13725
13726   expr_mode = expr_operator_none;
13727   exp_seg = expression (exp);
13728
13729   /* For .insn immediates there may be a size specifier.  */
13730   if (dot_insn () && *input_line_pointer == '{' && input_line_pointer[1] == ':'
13731       && (input_line_pointer[2] == 's' || input_line_pointer[2] == 'u'))
13732     {
13733       char *e;
13734       unsigned long n = strtoul (input_line_pointer + 3, &e, 0);
13735
13736       if (*e == '}' && n && n <= (flag_code == CODE_64BIT ? 64 : 32))
13737         {
13738           i.imm_bits[this_operand] = n;
13739           if (input_line_pointer[2] == 's')
13740             i.flags[this_operand] |= Operand_Signed;
13741           input_line_pointer = e + 1;
13742         }
13743     }
13744
13745   SKIP_WHITESPACE ();
13746   if (*input_line_pointer)
13747     as_bad (_("junk `%s' after expression"), input_line_pointer);
13748
13749   input_line_pointer = save_input_line_pointer;
13750   if (gotfree_input_line)
13751     {
13752       free (gotfree_input_line);
13753
13754       if (exp->X_op == O_constant)
13755         exp->X_op = O_illegal;
13756     }
13757
13758   if (exp_seg == reg_section)
13759     {
13760       as_bad (_("illegal immediate register operand %s"), imm_start);
13761       return 0;
13762     }
13763
13764   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
13765 }
13766
13767 static int
13768 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
13769                          i386_operand_type types, const char *imm_start)
13770 {
13771   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
13772     {
13773       if (imm_start)
13774         as_bad (_("missing or invalid immediate expression `%s'"),
13775                 imm_start);
13776       return 0;
13777     }
13778   else if (exp->X_op == O_constant)
13779     {
13780       /* Size it properly later.  */
13781       i.types[this_operand].bitfield.imm64 = 1;
13782
13783       /* If not 64bit, sign/zero extend val, to account for wraparound
13784          when !BFD64.  */
13785       if (expr_mode == expr_operator_present
13786           && flag_code != CODE_64BIT && !object_64bit)
13787         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
13788     }
13789 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
13790   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
13791            && exp_seg != absolute_section
13792            && exp_seg != text_section
13793            && exp_seg != data_section
13794            && exp_seg != bss_section
13795            && exp_seg != undefined_section
13796            && !bfd_is_com_section (exp_seg))
13797     {
13798       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
13799       return 0;
13800     }
13801 #endif
13802   else
13803     {
13804       /* This is an address.  The size of the address will be
13805          determined later, depending on destination register,
13806          suffix, or the default for the section.  */
13807       i.types[this_operand].bitfield.imm8 = 1;
13808       i.types[this_operand].bitfield.imm16 = 1;
13809       i.types[this_operand].bitfield.imm32 = 1;
13810       i.types[this_operand].bitfield.imm32s = 1;
13811       i.types[this_operand].bitfield.imm64 = 1;
13812       i.types[this_operand] = operand_type_and (i.types[this_operand],
13813                                                 types);
13814     }
13815
13816   return 1;
13817 }
13818
13819 static char *
13820 i386_scale (char *scale)
13821 {
13822   offsetT val;
13823   char *save = input_line_pointer;
13824
13825   input_line_pointer = scale;
13826   val = get_absolute_expression ();
13827
13828   switch (val)
13829     {
13830     case 1:
13831       i.log2_scale_factor = 0;
13832       break;
13833     case 2:
13834       i.log2_scale_factor = 1;
13835       break;
13836     case 4:
13837       i.log2_scale_factor = 2;
13838       break;
13839     case 8:
13840       i.log2_scale_factor = 3;
13841       break;
13842     default:
13843       {
13844         char sep = *input_line_pointer;
13845
13846         *input_line_pointer = '\0';
13847         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
13848                 scale);
13849         *input_line_pointer = sep;
13850         input_line_pointer = save;
13851         return NULL;
13852       }
13853     }
13854   if (i.log2_scale_factor != 0 && i.index_reg == 0)
13855     {
13856       as_warn (_("scale factor of %d without an index register"),
13857                1 << i.log2_scale_factor);
13858       i.log2_scale_factor = 0;
13859     }
13860   scale = input_line_pointer;
13861   input_line_pointer = save;
13862   return scale;
13863 }
13864
13865 static int
13866 i386_displacement (char *disp_start, char *disp_end)
13867 {
13868   expressionS *exp;
13869   segT exp_seg = 0;
13870   char *save_input_line_pointer;
13871   char *gotfree_input_line;
13872   int override;
13873   i386_operand_type bigdisp, types = anydisp;
13874   int ret;
13875
13876   if (i.disp_operands == MAX_MEMORY_OPERANDS)
13877     {
13878       as_bad (_("at most %d displacement operands are allowed"),
13879               MAX_MEMORY_OPERANDS);
13880       return 0;
13881     }
13882
13883   operand_type_set (&bigdisp, 0);
13884   if (i.jumpabsolute
13885       || i.types[this_operand].bitfield.baseindex
13886       || (current_templates.start->opcode_modifier.jump != JUMP
13887           && current_templates.start->opcode_modifier.jump != JUMP_DWORD))
13888     {
13889       i386_addressing_mode ();
13890       override = (i.prefix[ADDR_PREFIX] != 0);
13891       if (flag_code == CODE_64BIT)
13892         {
13893           bigdisp.bitfield.disp32 = 1;
13894           if (!override)
13895             bigdisp.bitfield.disp64 = 1;
13896         }
13897       else if ((flag_code == CODE_16BIT) ^ override)
13898           bigdisp.bitfield.disp16 = 1;
13899       else
13900           bigdisp.bitfield.disp32 = 1;
13901     }
13902   else
13903     {
13904       /* For PC-relative branches, the width of the displacement may be
13905          dependent upon data size, but is never dependent upon address size.
13906          Also make sure to not unintentionally match against a non-PC-relative
13907          branch template.  */
13908       const insn_template *t = current_templates.start;
13909       bool has_intel64 = false;
13910
13911       while (++t < current_templates.end)
13912         {
13913           if (t->opcode_modifier.jump
13914               != current_templates.start->opcode_modifier.jump)
13915             break;
13916           if ((t->opcode_modifier.isa64 >= INTEL64))
13917             has_intel64 = true;
13918         }
13919       current_templates.end = t;
13920
13921       override = (i.prefix[DATA_PREFIX] != 0);
13922       if (flag_code == CODE_64BIT)
13923         {
13924           if ((override || i.suffix == WORD_MNEM_SUFFIX)
13925               && (!intel64 || !has_intel64))
13926             bigdisp.bitfield.disp16 = 1;
13927           else
13928             bigdisp.bitfield.disp32 = 1;
13929         }
13930       else
13931         {
13932           if (!override)
13933             override = (i.suffix == (flag_code != CODE_16BIT
13934                                      ? WORD_MNEM_SUFFIX
13935                                      : LONG_MNEM_SUFFIX));
13936           bigdisp.bitfield.disp32 = 1;
13937           if ((flag_code == CODE_16BIT) ^ override)
13938             {
13939               bigdisp.bitfield.disp32 = 0;
13940               bigdisp.bitfield.disp16 = 1;
13941             }
13942         }
13943     }
13944   i.types[this_operand] = operand_type_or (i.types[this_operand],
13945                                            bigdisp);
13946
13947   exp = &disp_expressions[i.disp_operands];
13948   i.op[this_operand].disps = exp;
13949   i.disp_operands++;
13950   save_input_line_pointer = input_line_pointer;
13951   input_line_pointer = disp_start;
13952   END_STRING_AND_SAVE (disp_end);
13953
13954 #ifndef GCC_ASM_O_HACK
13955 #define GCC_ASM_O_HACK 0
13956 #endif
13957 #if GCC_ASM_O_HACK
13958   END_STRING_AND_SAVE (disp_end + 1);
13959   if (i.types[this_operand].bitfield.baseIndex
13960       && displacement_string_end[-1] == '+')
13961     {
13962       /* This hack is to avoid a warning when using the "o"
13963          constraint within gcc asm statements.
13964          For instance:
13965
13966          #define _set_tssldt_desc(n,addr,limit,type) \
13967          __asm__ __volatile__ ( \
13968          "movw %w2,%0\n\t" \
13969          "movw %w1,2+%0\n\t" \
13970          "rorl $16,%1\n\t" \
13971          "movb %b1,4+%0\n\t" \
13972          "movb %4,5+%0\n\t" \
13973          "movb $0,6+%0\n\t" \
13974          "movb %h1,7+%0\n\t" \
13975          "rorl $16,%1" \
13976          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
13977
13978          This works great except that the output assembler ends
13979          up looking a bit weird if it turns out that there is
13980          no offset.  You end up producing code that looks like:
13981
13982          #APP
13983          movw $235,(%eax)
13984          movw %dx,2+(%eax)
13985          rorl $16,%edx
13986          movb %dl,4+(%eax)
13987          movb $137,5+(%eax)
13988          movb $0,6+(%eax)
13989          movb %dh,7+(%eax)
13990          rorl $16,%edx
13991          #NO_APP
13992
13993          So here we provide the missing zero.  */
13994
13995       *displacement_string_end = '0';
13996     }
13997 #endif
13998   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
13999   if (gotfree_input_line)
14000     input_line_pointer = gotfree_input_line;
14001
14002   expr_mode = expr_operator_none;
14003   exp_seg = expression (exp);
14004
14005   SKIP_WHITESPACE ();
14006   if (*input_line_pointer)
14007     as_bad (_("junk `%s' after expression"), input_line_pointer);
14008 #if GCC_ASM_O_HACK
14009   RESTORE_END_STRING (disp_end + 1);
14010 #endif
14011   input_line_pointer = save_input_line_pointer;
14012   if (gotfree_input_line)
14013     {
14014       free (gotfree_input_line);
14015
14016       if (exp->X_op == O_constant || exp->X_op == O_register)
14017         exp->X_op = O_illegal;
14018     }
14019
14020   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
14021
14022   RESTORE_END_STRING (disp_end);
14023
14024   return ret;
14025 }
14026
14027 static int
14028 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
14029                             i386_operand_type types, const char *disp_start)
14030 {
14031   int ret = 1;
14032
14033   /* We do this to make sure that the section symbol is in
14034      the symbol table.  We will ultimately change the relocation
14035      to be relative to the beginning of the section.  */
14036   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
14037       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
14038       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
14039     {
14040       if (exp->X_op != O_symbol)
14041         goto inv_disp;
14042
14043       if (S_IS_LOCAL (exp->X_add_symbol)
14044           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
14045           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
14046         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
14047       exp->X_op = O_subtract;
14048       exp->X_op_symbol = GOT_symbol;
14049       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
14050         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
14051       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
14052         i.reloc[this_operand] = BFD_RELOC_64;
14053       else
14054         i.reloc[this_operand] = BFD_RELOC_32;
14055     }
14056
14057   else if (exp->X_op == O_absent
14058            || exp->X_op == O_illegal
14059            || exp->X_op == O_big)
14060     {
14061     inv_disp:
14062       as_bad (_("missing or invalid displacement expression `%s'"),
14063               disp_start);
14064       ret = 0;
14065     }
14066
14067   else if (exp->X_op == O_constant)
14068     {
14069       /* Sizing gets taken care of by optimize_disp().
14070
14071          If not 64bit, sign/zero extend val, to account for wraparound
14072          when !BFD64.  */
14073       if (expr_mode == expr_operator_present
14074           && flag_code != CODE_64BIT && !object_64bit)
14075         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
14076     }
14077
14078 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14079   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
14080            && exp_seg != absolute_section
14081            && exp_seg != text_section
14082            && exp_seg != data_section
14083            && exp_seg != bss_section
14084            && exp_seg != undefined_section
14085            && !bfd_is_com_section (exp_seg))
14086     {
14087       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
14088       ret = 0;
14089     }
14090 #endif
14091
14092   else if (current_templates.start->opcode_modifier.jump == JUMP_BYTE)
14093     i.types[this_operand].bitfield.disp8 = 1;
14094
14095   /* Check if this is a displacement only operand.  */
14096   if (!i.types[this_operand].bitfield.baseindex)
14097     i.types[this_operand] =
14098       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
14099                        operand_type_and (i.types[this_operand], types));
14100
14101   return ret;
14102 }
14103
14104 /* Return the active addressing mode, taking address override and
14105    registers forming the address into consideration.  Update the
14106    address override prefix if necessary.  */
14107
14108 static enum flag_code
14109 i386_addressing_mode (void)
14110 {
14111   enum flag_code addr_mode;
14112
14113   if (i.prefix[ADDR_PREFIX])
14114     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
14115   else if (flag_code == CODE_16BIT
14116            && is_cpu (current_templates.start, CpuMPX)
14117            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
14118               from md_assemble() by "is not a valid base/index expression"
14119               when there is a base and/or index.  */
14120            && !i.types[this_operand].bitfield.baseindex)
14121     {
14122       /* MPX insn memory operands with neither base nor index must be forced
14123          to use 32-bit addressing in 16-bit mode.  */
14124       addr_mode = CODE_32BIT;
14125       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
14126       ++i.prefixes;
14127       gas_assert (!i.types[this_operand].bitfield.disp16);
14128       gas_assert (!i.types[this_operand].bitfield.disp32);
14129     }
14130   else
14131     {
14132       addr_mode = flag_code;
14133
14134 #if INFER_ADDR_PREFIX
14135       if (i.mem_operands == 0)
14136         {
14137           /* Infer address prefix from the first memory operand.  */
14138           const reg_entry *addr_reg = i.base_reg;
14139
14140           if (addr_reg == NULL)
14141             addr_reg = i.index_reg;
14142
14143           if (addr_reg)
14144             {
14145               if (addr_reg->reg_type.bitfield.dword)
14146                 addr_mode = CODE_32BIT;
14147               else if (flag_code != CODE_64BIT
14148                        && addr_reg->reg_type.bitfield.word)
14149                 addr_mode = CODE_16BIT;
14150
14151               if (addr_mode != flag_code)
14152                 {
14153                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
14154                   i.prefixes += 1;
14155                   /* Change the size of any displacement too.  At most one
14156                      of Disp16 or Disp32 is set.
14157                      FIXME.  There doesn't seem to be any real need for
14158                      separate Disp16 and Disp32 flags.  The same goes for
14159                      Imm16 and Imm32.  Removing them would probably clean
14160                      up the code quite a lot.  */
14161                   if (flag_code != CODE_64BIT
14162                       && (i.types[this_operand].bitfield.disp16
14163                           || i.types[this_operand].bitfield.disp32))
14164                     {
14165                       static const i386_operand_type disp16_32 = {
14166                         .bitfield = { .disp16 = 1, .disp32 = 1 }
14167                       };
14168
14169                       i.types[this_operand]
14170                         = operand_type_xor (i.types[this_operand], disp16_32);
14171                     }
14172                 }
14173             }
14174         }
14175 #endif
14176     }
14177
14178   return addr_mode;
14179 }
14180
14181 /* Make sure the memory operand we've been dealt is valid.
14182    Return 1 on success, 0 on a failure.  */
14183
14184 static int
14185 i386_index_check (const char *operand_string)
14186 {
14187   const char *kind = "base/index";
14188   enum flag_code addr_mode = i386_addressing_mode ();
14189   const insn_template *t = current_templates.end - 1;
14190
14191   if (t->opcode_modifier.isstring)
14192     {
14193       /* Memory operands of string insns are special in that they only allow
14194          a single register (rDI, rSI, or rBX) as their memory address.  */
14195       const reg_entry *expected_reg;
14196       static const char di_si[][2][4] =
14197         {
14198           { "esi", "edi" },
14199           { "si", "di" },
14200           { "rsi", "rdi" }
14201         };
14202       static const char bx[][4] = { "ebx", "bx", "rbx" };
14203
14204       kind = "string address";
14205
14206       if (t->opcode_modifier.prefixok == PrefixRep)
14207         {
14208           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
14209           int op = 0;
14210
14211           if (!t->operand_types[0].bitfield.baseindex
14212               || ((!i.mem_operands != !intel_syntax)
14213                   && t->operand_types[1].bitfield.baseindex))
14214             op = 1;
14215           expected_reg
14216             = (const reg_entry *) str_hash_find (reg_hash,
14217                                                  di_si[addr_mode][op == es_op]);
14218         }
14219       else
14220         expected_reg
14221           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
14222
14223       if (i.base_reg != expected_reg
14224           || i.index_reg
14225           || operand_type_check (i.types[this_operand], disp))
14226         {
14227           /* The second memory operand must have the same size as
14228              the first one.  */
14229           if (i.mem_operands
14230               && i.base_reg
14231               && !((addr_mode == CODE_64BIT
14232                     && i.base_reg->reg_type.bitfield.qword)
14233                    || (addr_mode == CODE_32BIT
14234                        ? i.base_reg->reg_type.bitfield.dword
14235                        : i.base_reg->reg_type.bitfield.word)))
14236             goto bad_address;
14237
14238           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
14239                    operand_string,
14240                    intel_syntax ? '[' : '(',
14241                    register_prefix,
14242                    expected_reg->reg_name,
14243                    intel_syntax ? ']' : ')');
14244           return 1;
14245         }
14246       else
14247         return 1;
14248
14249     bad_address:
14250       as_bad (_("`%s' is not a valid %s expression"),
14251               operand_string, kind);
14252       return 0;
14253     }
14254   else
14255     {
14256       t = current_templates.start;
14257
14258       if (addr_mode != CODE_16BIT)
14259         {
14260           /* 32-bit/64-bit checks.  */
14261           if (pp.disp_encoding == disp_encoding_16bit)
14262             {
14263             bad_disp:
14264               as_bad (_("invalid `%s' prefix"),
14265                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
14266               return 0;
14267             }
14268
14269           if ((i.base_reg
14270                && ((addr_mode == CODE_64BIT
14271                     ? !i.base_reg->reg_type.bitfield.qword
14272                     : !i.base_reg->reg_type.bitfield.dword)
14273                    || (i.index_reg && i.base_reg->reg_num == RegIP)
14274                    || i.base_reg->reg_num == RegIZ))
14275               || (i.index_reg
14276                   && !i.index_reg->reg_type.bitfield.xmmword
14277                   && !i.index_reg->reg_type.bitfield.ymmword
14278                   && !i.index_reg->reg_type.bitfield.zmmword
14279                   && ((addr_mode == CODE_64BIT
14280                        ? !i.index_reg->reg_type.bitfield.qword
14281                        : !i.index_reg->reg_type.bitfield.dword)
14282                       || !i.index_reg->reg_type.bitfield.baseindex)))
14283             goto bad_address;
14284
14285           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
14286           if (t->mnem_off == MN_bndmk
14287               || t->mnem_off == MN_bndldx
14288               || t->mnem_off == MN_bndstx
14289               || t->opcode_modifier.sib == SIBMEM)
14290             {
14291               /* They cannot use RIP-relative addressing. */
14292               if (i.base_reg && i.base_reg->reg_num == RegIP)
14293                 {
14294                   as_bad (_("`%s' cannot be used here"), operand_string);
14295                   return 0;
14296                 }
14297
14298               /* bndldx and bndstx ignore their scale factor. */
14299               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
14300                   && i.log2_scale_factor)
14301                 as_warn (_("register scaling is being ignored here"));
14302             }
14303         }
14304       else
14305         {
14306           /* 16-bit checks.  */
14307           if (pp.disp_encoding == disp_encoding_32bit)
14308             goto bad_disp;
14309
14310           if ((i.base_reg
14311                && (!i.base_reg->reg_type.bitfield.word
14312                    || !i.base_reg->reg_type.bitfield.baseindex))
14313               || (i.index_reg
14314                   && (!i.index_reg->reg_type.bitfield.word
14315                       || !i.index_reg->reg_type.bitfield.baseindex
14316                       || !(i.base_reg
14317                            && i.base_reg->reg_num < 6
14318                            && i.index_reg->reg_num >= 6
14319                            && i.log2_scale_factor == 0))))
14320             goto bad_address;
14321         }
14322     }
14323   return 1;
14324 }
14325
14326 /* Handle vector immediates.  */
14327
14328 static int
14329 RC_SAE_immediate (const char *imm_start)
14330 {
14331   const char *pstr = imm_start;
14332
14333   if (*pstr != '{')
14334     return 0;
14335
14336   pstr++;
14337   if (is_space_char (*pstr))
14338     pstr++;
14339
14340   pstr = RC_SAE_specifier (pstr);
14341   if (pstr == NULL)
14342     return 0;
14343
14344   if (is_space_char (*pstr))
14345     pstr++;
14346
14347   if (*pstr++ != '}')
14348     {
14349       as_bad (_("Missing '}': '%s'"), imm_start);
14350       return 0;
14351     }
14352   /* RC/SAE immediate string should contain nothing more.  */;
14353   if (*pstr != 0)
14354     {
14355       as_bad (_("Junk after '}': '%s'"), imm_start);
14356       return 0;
14357     }
14358
14359   /* Internally this doesn't count as an operand.  */
14360   --i.operands;
14361
14362   return 1;
14363 }
14364
14365 static INLINE bool starts_memory_operand (char c)
14366 {
14367   return ISDIGIT (c)
14368          || is_name_beginner (c)
14369          || strchr ("([\"+-!~", c);
14370 }
14371
14372 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
14373    on error.  */
14374
14375 static int
14376 i386_att_operand (char *operand_string)
14377 {
14378   const reg_entry *r;
14379   char *end_op;
14380   char *op_string = operand_string;
14381
14382   if (is_space_char (*op_string))
14383     ++op_string;
14384
14385   /* We check for an absolute prefix (differentiating,
14386      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
14387   if (*op_string == ABSOLUTE_PREFIX
14388       && current_templates.start->opcode_modifier.jump)
14389     {
14390       ++op_string;
14391       if (is_space_char (*op_string))
14392         ++op_string;
14393       i.jumpabsolute = true;
14394     }
14395
14396   /* Check if operand is a register.  */
14397   if ((r = parse_register (op_string, &end_op)) != NULL)
14398     {
14399       i386_operand_type temp;
14400
14401       if (r == &bad_reg)
14402         return 0;
14403
14404       /* Check for a segment override by searching for ':' after a
14405          segment register.  */
14406       op_string = end_op;
14407       if (is_space_char (*op_string))
14408         ++op_string;
14409       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
14410         {
14411           i.seg[i.mem_operands] = r;
14412
14413           /* Skip the ':' and whitespace.  */
14414           ++op_string;
14415           if (is_space_char (*op_string))
14416             ++op_string;
14417
14418           /* Handle case of %es:*foo.  */
14419           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
14420               && current_templates.start->opcode_modifier.jump)
14421             {
14422               ++op_string;
14423               if (is_space_char (*op_string))
14424                 ++op_string;
14425               i.jumpabsolute = true;
14426             }
14427
14428           if (!starts_memory_operand (*op_string))
14429             {
14430               as_bad (_("bad memory operand `%s'"), op_string);
14431               return 0;
14432             }
14433           goto do_memory_reference;
14434         }
14435
14436       /* Handle vector operations.  */
14437       if (*op_string == '{')
14438         {
14439           op_string = check_VecOperations (op_string);
14440           if (op_string == NULL)
14441             return 0;
14442         }
14443
14444       if (*op_string)
14445         {
14446           as_bad (_("junk `%s' after register"), op_string);
14447           return 0;
14448         }
14449
14450        /* Reject pseudo registers for .insn.  */
14451       if (dot_insn () && r->reg_type.bitfield.class == ClassNone)
14452         {
14453           as_bad (_("`%s%s' cannot be used here"),
14454                   register_prefix, r->reg_name);
14455           return 0;
14456         }
14457
14458       temp = r->reg_type;
14459       temp.bitfield.baseindex = 0;
14460       i.types[this_operand] = operand_type_or (i.types[this_operand],
14461                                                temp);
14462       i.types[this_operand].bitfield.unspecified = 0;
14463       i.op[this_operand].regs = r;
14464       i.reg_operands++;
14465
14466       /* A GPR may follow an RC or SAE immediate only if a (vector) register
14467          operand was also present earlier on.  */
14468       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
14469           && i.reg_operands == 1)
14470         {
14471           unsigned int j;
14472
14473           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
14474             if (i.rounding.type == RC_NamesTable[j].type)
14475               break;
14476           as_bad (_("`%s': misplaced `{%s}'"),
14477                   insn_name (current_templates.start), RC_NamesTable[j].name);
14478           return 0;
14479         }
14480     }
14481   else if (*op_string == REGISTER_PREFIX)
14482     {
14483       as_bad (_("bad register name `%s'"), op_string);
14484       return 0;
14485     }
14486   else if (*op_string == IMMEDIATE_PREFIX)
14487     {
14488       ++op_string;
14489       if (i.jumpabsolute)
14490         {
14491           as_bad (_("immediate operand illegal with absolute jump"));
14492           return 0;
14493         }
14494       if (!i386_immediate (op_string))
14495         return 0;
14496       if (i.rounding.type != rc_none)
14497         {
14498           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
14499                   insn_name (current_templates.start));
14500           return 0;
14501         }
14502     }
14503   else if (RC_SAE_immediate (operand_string))
14504     {
14505       /* If it is a RC or SAE immediate, do the necessary placement check:
14506          Only another immediate or a GPR may precede it.  */
14507       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
14508           || (i.reg_operands == 1
14509               && i.op[0].regs->reg_type.bitfield.class != Reg))
14510         {
14511           as_bad (_("`%s': misplaced `%s'"),
14512                   insn_name (current_templates.start), operand_string);
14513           return 0;
14514         }
14515     }
14516   else if (starts_memory_operand (*op_string))
14517     {
14518       /* This is a memory reference of some sort.  */
14519       char *base_string;
14520
14521       /* Start and end of displacement string expression (if found).  */
14522       char *displacement_string_start;
14523       char *displacement_string_end;
14524
14525     do_memory_reference:
14526       /* Check for base index form.  We detect the base index form by
14527          looking for an ')' at the end of the operand, searching
14528          for the '(' matching it, and finding a REGISTER_PREFIX or ','
14529          after the '('.  */
14530       base_string = op_string + strlen (op_string);
14531
14532       /* Handle vector operations.  */
14533       --base_string;
14534       if (is_space_char (*base_string))
14535         --base_string;
14536
14537       if (*base_string == '}')
14538         {
14539           char *vop_start = NULL;
14540
14541           while (base_string-- > op_string)
14542             {
14543               if (*base_string == '"')
14544                 break;
14545               if (*base_string != '{')
14546                 continue;
14547
14548               vop_start = base_string;
14549
14550               --base_string;
14551               if (is_space_char (*base_string))
14552                 --base_string;
14553
14554               if (*base_string != '}')
14555                 break;
14556
14557               vop_start = NULL;
14558             }
14559
14560           if (!vop_start)
14561             {
14562               as_bad (_("unbalanced figure braces"));
14563               return 0;
14564             }
14565
14566           if (check_VecOperations (vop_start) == NULL)
14567             return 0;
14568         }
14569
14570       /* If we only have a displacement, set-up for it to be parsed later.  */
14571       displacement_string_start = op_string;
14572       displacement_string_end = base_string + 1;
14573
14574       if (*base_string == ')')
14575         {
14576           char *temp_string;
14577           unsigned int parens_not_balanced = 0;
14578           bool in_quotes = false;
14579
14580           /* We've already checked that the number of left & right ()'s are
14581              equal, and that there's a matching set of double quotes.  */
14582           end_op = base_string;
14583           for (temp_string = op_string; temp_string < end_op; temp_string++)
14584             {
14585               if (*temp_string == '\\' && temp_string[1] == '"')
14586                 ++temp_string;
14587               else if (*temp_string == '"')
14588                 in_quotes = !in_quotes;
14589               else if (!in_quotes)
14590                 {
14591                   if (*temp_string == '(' && !parens_not_balanced++)
14592                     base_string = temp_string;
14593                   if (*temp_string == ')')
14594                     --parens_not_balanced;
14595                 }
14596             }
14597
14598           temp_string = base_string;
14599
14600           /* Skip past '(' and whitespace.  */
14601           gas_assert (*base_string == '(');
14602           ++base_string;
14603           if (is_space_char (*base_string))
14604             ++base_string;
14605
14606           if (*base_string == ','
14607               || ((i.base_reg = parse_register (base_string, &end_op))
14608                   != NULL))
14609             {
14610               displacement_string_end = temp_string;
14611
14612               i.types[this_operand].bitfield.baseindex = 1;
14613
14614               if (i.base_reg)
14615                 {
14616                   if (i.base_reg == &bad_reg)
14617                     return 0;
14618                   base_string = end_op;
14619                   if (is_space_char (*base_string))
14620                     ++base_string;
14621                 }
14622
14623               /* There may be an index reg or scale factor here.  */
14624               if (*base_string == ',')
14625                 {
14626                   ++base_string;
14627                   if (is_space_char (*base_string))
14628                     ++base_string;
14629
14630                   if ((i.index_reg = parse_register (base_string, &end_op))
14631                       != NULL)
14632                     {
14633                       if (i.index_reg == &bad_reg)
14634                         return 0;
14635                       base_string = end_op;
14636                       if (is_space_char (*base_string))
14637                         ++base_string;
14638                       if (*base_string == ',')
14639                         {
14640                           ++base_string;
14641                           if (is_space_char (*base_string))
14642                             ++base_string;
14643                         }
14644                       else if (*base_string != ')')
14645                         {
14646                           as_bad (_("expecting `,' or `)' "
14647                                     "after index register in `%s'"),
14648                                   operand_string);
14649                           return 0;
14650                         }
14651                     }
14652                   else if (*base_string == REGISTER_PREFIX)
14653                     {
14654                       end_op = strchr (base_string, ',');
14655                       if (end_op)
14656                         *end_op = '\0';
14657                       as_bad (_("bad register name `%s'"), base_string);
14658                       return 0;
14659                     }
14660
14661                   /* Check for scale factor.  */
14662                   if (*base_string != ')')
14663                     {
14664                       char *end_scale = i386_scale (base_string);
14665
14666                       if (!end_scale)
14667                         return 0;
14668
14669                       base_string = end_scale;
14670                       if (is_space_char (*base_string))
14671                         ++base_string;
14672                       if (*base_string != ')')
14673                         {
14674                           as_bad (_("expecting `)' "
14675                                     "after scale factor in `%s'"),
14676                                   operand_string);
14677                           return 0;
14678                         }
14679                     }
14680                   else if (!i.index_reg)
14681                     {
14682                       as_bad (_("expecting index register or scale factor "
14683                                 "after `,'; got '%c'"),
14684                               *base_string);
14685                       return 0;
14686                     }
14687                 }
14688               else if (*base_string != ')')
14689                 {
14690                   as_bad (_("expecting `,' or `)' "
14691                             "after base register in `%s'"),
14692                           operand_string);
14693                   return 0;
14694                 }
14695             }
14696           else if (*base_string == REGISTER_PREFIX)
14697             {
14698               end_op = strchr (base_string, ',');
14699               if (end_op)
14700                 *end_op = '\0';
14701               as_bad (_("bad register name `%s'"), base_string);
14702               return 0;
14703             }
14704         }
14705
14706       /* If there's an expression beginning the operand, parse it,
14707          assuming displacement_string_start and
14708          displacement_string_end are meaningful.  */
14709       if (displacement_string_start != displacement_string_end)
14710         {
14711           if (!i386_displacement (displacement_string_start,
14712                                   displacement_string_end))
14713             return 0;
14714         }
14715
14716       /* Special case for (%dx) while doing input/output op.  */
14717       if (i.base_reg
14718           && i.base_reg->reg_type.bitfield.instance == RegD
14719           && i.base_reg->reg_type.bitfield.word
14720           && i.index_reg == 0
14721           && i.log2_scale_factor == 0
14722           && i.seg[i.mem_operands] == 0
14723           && !operand_type_check (i.types[this_operand], disp))
14724         {
14725           i.types[this_operand] = i.base_reg->reg_type;
14726           i.op[this_operand].regs = i.base_reg;
14727           i.base_reg = NULL;
14728           i.input_output_operand = true;
14729           return 1;
14730         }
14731
14732       if (i386_index_check (operand_string) == 0)
14733         return 0;
14734       i.flags[this_operand] |= Operand_Mem;
14735       i.mem_operands++;
14736     }
14737   else
14738     {
14739       /* It's not a memory operand; argh!  */
14740       as_bad (_("invalid char %s beginning operand %d `%s'"),
14741               output_invalid (*op_string),
14742               this_operand + 1,
14743               op_string);
14744       return 0;
14745     }
14746   return 1;                     /* Normal return.  */
14747 }
14748 \f
14749 /* Calculate the maximum variable size (i.e., excluding fr_fix)
14750    that an rs_machine_dependent frag may reach.  */
14751
14752 unsigned int
14753 i386_frag_max_var (fragS *frag)
14754 {
14755   /* The only relaxable frags are for jumps.
14756      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
14757   gas_assert (frag->fr_type == rs_machine_dependent);
14758   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
14759 }
14760
14761 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14762 static int
14763 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
14764 {
14765   /* STT_GNU_IFUNC symbol must go through PLT.  */
14766   if ((symbol_get_bfdsym (fr_symbol)->flags
14767        & BSF_GNU_INDIRECT_FUNCTION) != 0)
14768     return 0;
14769
14770   if (!S_IS_EXTERNAL (fr_symbol))
14771     /* Symbol may be weak or local.  */
14772     return !S_IS_WEAK (fr_symbol);
14773
14774   /* Global symbols with non-default visibility can't be preempted. */
14775   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
14776     return 1;
14777
14778   if (fr_var != NO_RELOC)
14779     switch ((enum bfd_reloc_code_real) fr_var)
14780       {
14781       case BFD_RELOC_386_PLT32:
14782       case BFD_RELOC_X86_64_PLT32:
14783         /* Symbol with PLT relocation may be preempted. */
14784         return 0;
14785       default:
14786         abort ();
14787       }
14788
14789   /* Global symbols with default visibility in a shared library may be
14790      preempted by another definition.  */
14791   return !shared;
14792 }
14793 #endif
14794
14795 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
14796    Note also work for Skylake and Cascadelake.
14797 ---------------------------------------------------------------------
14798 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
14799 | ------  | ----------- | ------- | -------- |
14800 |   Jo    |      N      |    N    |     Y    |
14801 |   Jno   |      N      |    N    |     Y    |
14802 |  Jc/Jb  |      Y      |    N    |     Y    |
14803 | Jae/Jnb |      Y      |    N    |     Y    |
14804 |  Je/Jz  |      Y      |    Y    |     Y    |
14805 | Jne/Jnz |      Y      |    Y    |     Y    |
14806 | Jna/Jbe |      Y      |    N    |     Y    |
14807 | Ja/Jnbe |      Y      |    N    |     Y    |
14808 |   Js    |      N      |    N    |     Y    |
14809 |   Jns   |      N      |    N    |     Y    |
14810 |  Jp/Jpe |      N      |    N    |     Y    |
14811 | Jnp/Jpo |      N      |    N    |     Y    |
14812 | Jl/Jnge |      Y      |    Y    |     Y    |
14813 | Jge/Jnl |      Y      |    Y    |     Y    |
14814 | Jle/Jng |      Y      |    Y    |     Y    |
14815 | Jg/Jnle |      Y      |    Y    |     Y    |
14816 ---------------------------------------------------------------------  */
14817 static int
14818 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
14819 {
14820   if (mf_cmp == mf_cmp_alu_cmp)
14821     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
14822             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
14823   if (mf_cmp == mf_cmp_incdec)
14824     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
14825             || mf_jcc == mf_jcc_jle);
14826   if (mf_cmp == mf_cmp_test_and)
14827     return 1;
14828   return 0;
14829 }
14830
14831 /* Return the next non-empty frag.  */
14832
14833 static fragS *
14834 i386_next_non_empty_frag (fragS *fragP)
14835 {
14836   /* There may be a frag with a ".fill 0" when there is no room in
14837      the current frag for frag_grow in output_insn.  */
14838   for (fragP = fragP->fr_next;
14839        (fragP != NULL
14840         && fragP->fr_type == rs_fill
14841         && fragP->fr_fix == 0);
14842        fragP = fragP->fr_next)
14843     ;
14844   return fragP;
14845 }
14846
14847 /* Return the next jcc frag after BRANCH_PADDING.  */
14848
14849 static fragS *
14850 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
14851 {
14852   fragS *branch_fragP;
14853   if (!pad_fragP)
14854     return NULL;
14855
14856   if (pad_fragP->fr_type == rs_machine_dependent
14857       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
14858           == BRANCH_PADDING))
14859     {
14860       branch_fragP = i386_next_non_empty_frag (pad_fragP);
14861       if (branch_fragP->fr_type != rs_machine_dependent)
14862         return NULL;
14863       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
14864           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
14865                                    pad_fragP->tc_frag_data.mf_type))
14866         return branch_fragP;
14867     }
14868
14869   return NULL;
14870 }
14871
14872 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
14873
14874 static void
14875 i386_classify_machine_dependent_frag (fragS *fragP)
14876 {
14877   fragS *cmp_fragP;
14878   fragS *pad_fragP;
14879   fragS *branch_fragP;
14880   fragS *next_fragP;
14881   unsigned int max_prefix_length;
14882
14883   if (fragP->tc_frag_data.classified)
14884     return;
14885
14886   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
14887      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
14888   for (next_fragP = fragP;
14889        next_fragP != NULL;
14890        next_fragP = next_fragP->fr_next)
14891     {
14892       next_fragP->tc_frag_data.classified = 1;
14893       if (next_fragP->fr_type == rs_machine_dependent)
14894         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
14895           {
14896           case BRANCH_PADDING:
14897             /* The BRANCH_PADDING frag must be followed by a branch
14898                frag.  */
14899             branch_fragP = i386_next_non_empty_frag (next_fragP);
14900             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
14901             break;
14902           case FUSED_JCC_PADDING:
14903             /* Check if this is a fused jcc:
14904                FUSED_JCC_PADDING
14905                CMP like instruction
14906                BRANCH_PADDING
14907                COND_JUMP
14908                */
14909             cmp_fragP = i386_next_non_empty_frag (next_fragP);
14910             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
14911             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
14912             if (branch_fragP)
14913               {
14914                 /* The BRANCH_PADDING frag is merged with the
14915                    FUSED_JCC_PADDING frag.  */
14916                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
14917                 /* CMP like instruction size.  */
14918                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
14919                 frag_wane (pad_fragP);
14920                 /* Skip to branch_fragP.  */
14921                 next_fragP = branch_fragP;
14922               }
14923             else if (next_fragP->tc_frag_data.max_prefix_length)
14924               {
14925                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
14926                    a fused jcc.  */
14927                 next_fragP->fr_subtype
14928                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
14929                 next_fragP->tc_frag_data.max_bytes
14930                   = next_fragP->tc_frag_data.max_prefix_length;
14931                 /* This will be updated in the BRANCH_PREFIX scan.  */
14932                 next_fragP->tc_frag_data.max_prefix_length = 0;
14933               }
14934             else
14935               frag_wane (next_fragP);
14936             break;
14937           }
14938     }
14939
14940   /* Stop if there is no BRANCH_PREFIX.  */
14941   if (!align_branch_prefix_size)
14942     return;
14943
14944   /* Scan for BRANCH_PREFIX.  */
14945   for (; fragP != NULL; fragP = fragP->fr_next)
14946     {
14947       if (fragP->fr_type != rs_machine_dependent
14948           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
14949               != BRANCH_PREFIX))
14950         continue;
14951
14952       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
14953          COND_JUMP_PREFIX.  */
14954       max_prefix_length = 0;
14955       for (next_fragP = fragP;
14956            next_fragP != NULL;
14957            next_fragP = next_fragP->fr_next)
14958         {
14959           if (next_fragP->fr_type == rs_fill)
14960             /* Skip rs_fill frags.  */
14961             continue;
14962           else if (next_fragP->fr_type != rs_machine_dependent)
14963             /* Stop for all other frags.  */
14964             break;
14965
14966           /* rs_machine_dependent frags.  */
14967           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
14968               == BRANCH_PREFIX)
14969             {
14970               /* Count BRANCH_PREFIX frags.  */
14971               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
14972                 {
14973                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
14974                   frag_wane (next_fragP);
14975                 }
14976               else
14977                 max_prefix_length
14978                   += next_fragP->tc_frag_data.max_bytes;
14979             }
14980           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
14981                     == BRANCH_PADDING)
14982                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
14983                        == FUSED_JCC_PADDING))
14984             {
14985               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
14986               fragP->tc_frag_data.u.padding_fragP = next_fragP;
14987               break;
14988             }
14989           else
14990             /* Stop for other rs_machine_dependent frags.  */
14991             break;
14992         }
14993
14994       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
14995
14996       /* Skip to the next frag.  */
14997       fragP = next_fragP;
14998     }
14999 }
15000
15001 /* Compute padding size for
15002
15003         FUSED_JCC_PADDING
15004         CMP like instruction
15005         BRANCH_PADDING
15006         COND_JUMP/UNCOND_JUMP
15007
15008    or
15009
15010         BRANCH_PADDING
15011         COND_JUMP/UNCOND_JUMP
15012  */
15013
15014 static int
15015 i386_branch_padding_size (fragS *fragP, offsetT address)
15016 {
15017   unsigned int offset, size, padding_size;
15018   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
15019
15020   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
15021   if (!address)
15022     address = fragP->fr_address;
15023   address += fragP->fr_fix;
15024
15025   /* CMP like instrunction size.  */
15026   size = fragP->tc_frag_data.cmp_size;
15027
15028   /* The base size of the branch frag.  */
15029   size += branch_fragP->fr_fix;
15030
15031   /* Add opcode and displacement bytes for the rs_machine_dependent
15032      branch frag.  */
15033   if (branch_fragP->fr_type == rs_machine_dependent)
15034     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
15035
15036   /* Check if branch is within boundary and doesn't end at the last
15037      byte.  */
15038   offset = address & ((1U << align_branch_power) - 1);
15039   if ((offset + size) >= (1U << align_branch_power))
15040     /* Padding needed to avoid crossing boundary.  */
15041     padding_size = (1U << align_branch_power) - offset;
15042   else
15043     /* No padding needed.  */
15044     padding_size = 0;
15045
15046   /* The return value may be saved in tc_frag_data.length which is
15047      unsigned byte.  */
15048   if (!fits_in_unsigned_byte (padding_size))
15049     abort ();
15050
15051   return padding_size;
15052 }
15053
15054 /* i386_generic_table_relax_frag()
15055
15056    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
15057    grow/shrink padding to align branch frags.  Hand others to
15058    relax_frag().  */
15059
15060 long
15061 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
15062 {
15063   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15064       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
15065     {
15066       long padding_size = i386_branch_padding_size (fragP, 0);
15067       long grow = padding_size - fragP->tc_frag_data.length;
15068
15069       /* When the BRANCH_PREFIX frag is used, the computed address
15070          must match the actual address and there should be no padding.  */
15071       if (fragP->tc_frag_data.padding_address
15072           && (fragP->tc_frag_data.padding_address != fragP->fr_address
15073               || padding_size))
15074         abort ();
15075
15076       /* Update the padding size.  */
15077       if (grow)
15078         fragP->tc_frag_data.length = padding_size;
15079
15080       return grow;
15081     }
15082   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15083     {
15084       fragS *padding_fragP, *next_fragP;
15085       long padding_size, left_size, last_size;
15086
15087       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
15088       if (!padding_fragP)
15089         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
15090         return (fragP->tc_frag_data.length
15091                 - fragP->tc_frag_data.last_length);
15092
15093       /* Compute the relative address of the padding frag in the very
15094         first time where the BRANCH_PREFIX frag sizes are zero.  */
15095       if (!fragP->tc_frag_data.padding_address)
15096         fragP->tc_frag_data.padding_address
15097           = padding_fragP->fr_address - (fragP->fr_address - stretch);
15098
15099       /* First update the last length from the previous interation.  */
15100       left_size = fragP->tc_frag_data.prefix_length;
15101       for (next_fragP = fragP;
15102            next_fragP != padding_fragP;
15103            next_fragP = next_fragP->fr_next)
15104         if (next_fragP->fr_type == rs_machine_dependent
15105             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15106                 == BRANCH_PREFIX))
15107           {
15108             if (left_size)
15109               {
15110                 int max = next_fragP->tc_frag_data.max_bytes;
15111                 if (max)
15112                   {
15113                     int size;
15114                     if (max > left_size)
15115                       size = left_size;
15116                     else
15117                       size = max;
15118                     left_size -= size;
15119                     next_fragP->tc_frag_data.last_length = size;
15120                   }
15121               }
15122             else
15123               next_fragP->tc_frag_data.last_length = 0;
15124           }
15125
15126       /* Check the padding size for the padding frag.  */
15127       padding_size = i386_branch_padding_size
15128         (padding_fragP, (fragP->fr_address
15129                          + fragP->tc_frag_data.padding_address));
15130
15131       last_size = fragP->tc_frag_data.prefix_length;
15132       /* Check if there is change from the last interation.  */
15133       if (padding_size == last_size)
15134         {
15135           /* Update the expected address of the padding frag.  */
15136           padding_fragP->tc_frag_data.padding_address
15137             = (fragP->fr_address + padding_size
15138                + fragP->tc_frag_data.padding_address);
15139           return 0;
15140         }
15141
15142       if (padding_size > fragP->tc_frag_data.max_prefix_length)
15143         {
15144           /* No padding if there is no sufficient room.  Clear the
15145              expected address of the padding frag.  */
15146           padding_fragP->tc_frag_data.padding_address = 0;
15147           padding_size = 0;
15148         }
15149       else
15150         /* Store the expected address of the padding frag.  */
15151         padding_fragP->tc_frag_data.padding_address
15152           = (fragP->fr_address + padding_size
15153              + fragP->tc_frag_data.padding_address);
15154
15155       fragP->tc_frag_data.prefix_length = padding_size;
15156
15157       /* Update the length for the current interation.  */
15158       left_size = padding_size;
15159       for (next_fragP = fragP;
15160            next_fragP != padding_fragP;
15161            next_fragP = next_fragP->fr_next)
15162         if (next_fragP->fr_type == rs_machine_dependent
15163             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15164                 == BRANCH_PREFIX))
15165           {
15166             if (left_size)
15167               {
15168                 int max = next_fragP->tc_frag_data.max_bytes;
15169                 if (max)
15170                   {
15171                     int size;
15172                     if (max > left_size)
15173                       size = left_size;
15174                     else
15175                       size = max;
15176                     left_size -= size;
15177                     next_fragP->tc_frag_data.length = size;
15178                   }
15179               }
15180             else
15181               next_fragP->tc_frag_data.length = 0;
15182           }
15183
15184       return (fragP->tc_frag_data.length
15185               - fragP->tc_frag_data.last_length);
15186     }
15187   return relax_frag (segment, fragP, stretch);
15188 }
15189
15190 /* md_estimate_size_before_relax()
15191
15192    Called just before relax() for rs_machine_dependent frags.  The x86
15193    assembler uses these frags to handle variable size jump
15194    instructions.
15195
15196    Any symbol that is now undefined will not become defined.
15197    Return the correct fr_subtype in the frag.
15198    Return the initial "guess for variable size of frag" to caller.
15199    The guess is actually the growth beyond the fixed part.  Whatever
15200    we do to grow the fixed or variable part contributes to our
15201    returned value.  */
15202
15203 int
15204 md_estimate_size_before_relax (fragS *fragP, segT segment)
15205 {
15206   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15207       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
15208       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
15209     {
15210       i386_classify_machine_dependent_frag (fragP);
15211       return fragP->tc_frag_data.length;
15212     }
15213
15214   /* We've already got fragP->fr_subtype right;  all we have to do is
15215      check for un-relaxable symbols.  On an ELF system, we can't relax
15216      an externally visible symbol, because it may be overridden by a
15217      shared library.  */
15218   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
15219 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15220       || (IS_ELF
15221           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
15222                                                 fragP->fr_var))
15223 #endif
15224 #if defined (OBJ_COFF) && defined (TE_PE)
15225       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
15226           && S_IS_WEAK (fragP->fr_symbol))
15227 #endif
15228       )
15229     {
15230       /* Symbol is undefined in this segment, or we need to keep a
15231          reloc so that weak symbols can be overridden.  */
15232       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
15233       enum bfd_reloc_code_real reloc_type;
15234       unsigned char *opcode;
15235       int old_fr_fix;
15236       fixS *fixP = NULL;
15237
15238       if (fragP->fr_var != NO_RELOC)
15239         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
15240       else if (size == 2)
15241         reloc_type = BFD_RELOC_16_PCREL;
15242 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15243       else if (fragP->tc_frag_data.code == CODE_64BIT
15244                && fragP->fr_offset == 0
15245                && need_plt32_p (fragP->fr_symbol))
15246         reloc_type = BFD_RELOC_X86_64_PLT32;
15247 #endif
15248       else
15249         reloc_type = BFD_RELOC_32_PCREL;
15250
15251       old_fr_fix = fragP->fr_fix;
15252       opcode = (unsigned char *) fragP->fr_opcode;
15253
15254       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
15255         {
15256         case UNCOND_JUMP:
15257           /* Make jmp (0xeb) a (d)word displacement jump.  */
15258           opcode[0] = 0xe9;
15259           fragP->fr_fix += size;
15260           fixP = fix_new (fragP, old_fr_fix, size,
15261                           fragP->fr_symbol,
15262                           fragP->fr_offset, 1,
15263                           reloc_type);
15264           break;
15265
15266         case COND_JUMP86:
15267           if (size == 2
15268               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
15269             {
15270               /* Negate the condition, and branch past an
15271                  unconditional jump.  */
15272               opcode[0] ^= 1;
15273               opcode[1] = 3;
15274               /* Insert an unconditional jump.  */
15275               opcode[2] = 0xe9;
15276               /* We added two extra opcode bytes, and have a two byte
15277                  offset.  */
15278               fragP->fr_fix += 2 + 2;
15279               fix_new (fragP, old_fr_fix + 2, 2,
15280                        fragP->fr_symbol,
15281                        fragP->fr_offset, 1,
15282                        reloc_type);
15283               break;
15284             }
15285           /* Fall through.  */
15286
15287         case COND_JUMP:
15288           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
15289             {
15290               fragP->fr_fix += 1;
15291               fixP = fix_new (fragP, old_fr_fix, 1,
15292                               fragP->fr_symbol,
15293                               fragP->fr_offset, 1,
15294                               BFD_RELOC_8_PCREL);
15295               fixP->fx_signed = 1;
15296               break;
15297             }
15298
15299           /* This changes the byte-displacement jump 0x7N
15300              to the (d)word-displacement jump 0x0f,0x8N.  */
15301           opcode[1] = opcode[0] + 0x10;
15302           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15303           /* We've added an opcode byte.  */
15304           fragP->fr_fix += 1 + size;
15305           fixP = fix_new (fragP, old_fr_fix + 1, size,
15306                           fragP->fr_symbol,
15307                           fragP->fr_offset, 1,
15308                           reloc_type);
15309           break;
15310
15311         default:
15312           BAD_CASE (fragP->fr_subtype);
15313           break;
15314         }
15315
15316       /* All jumps handled here are signed, but don't unconditionally use a
15317          signed limit check for 32 and 16 bit jumps as we want to allow wrap
15318          around at 4G (outside of 64-bit mode) and 64k.  */
15319       if (size == 4 && flag_code == CODE_64BIT)
15320         fixP->fx_signed = 1;
15321
15322       frag_wane (fragP);
15323       return fragP->fr_fix - old_fr_fix;
15324     }
15325
15326   /* Guess size depending on current relax state.  Initially the relax
15327      state will correspond to a short jump and we return 1, because
15328      the variable part of the frag (the branch offset) is one byte
15329      long.  However, we can relax a section more than once and in that
15330      case we must either set fr_subtype back to the unrelaxed state,
15331      or return the value for the appropriate branch.  */
15332   return md_relax_table[fragP->fr_subtype].rlx_length;
15333 }
15334
15335 /* Called after relax() is finished.
15336
15337    In:  Address of frag.
15338         fr_type == rs_machine_dependent.
15339         fr_subtype is what the address relaxed to.
15340
15341    Out: Any fixSs and constants are set up.
15342         Caller will turn frag into a ".space 0".  */
15343
15344 void
15345 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
15346                  fragS *fragP)
15347 {
15348   unsigned char *opcode;
15349   unsigned char *where_to_put_displacement = NULL;
15350   offsetT target_address;
15351   offsetT opcode_address;
15352   unsigned int extension = 0;
15353   offsetT displacement_from_opcode_start;
15354
15355   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15356       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
15357       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15358     {
15359       /* Generate nop padding.  */
15360       unsigned int size = fragP->tc_frag_data.length;
15361       if (size)
15362         {
15363           if (size > fragP->tc_frag_data.max_bytes)
15364             abort ();
15365
15366           if (flag_debug)
15367             {
15368               const char *msg;
15369               const char *branch = "branch";
15370               const char *prefix = "";
15371               fragS *padding_fragP;
15372               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
15373                   == BRANCH_PREFIX)
15374                 {
15375                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
15376                   switch (fragP->tc_frag_data.default_prefix)
15377                     {
15378                     default:
15379                       abort ();
15380                       break;
15381                     case CS_PREFIX_OPCODE:
15382                       prefix = " cs";
15383                       break;
15384                     case DS_PREFIX_OPCODE:
15385                       prefix = " ds";
15386                       break;
15387                     case ES_PREFIX_OPCODE:
15388                       prefix = " es";
15389                       break;
15390                     case FS_PREFIX_OPCODE:
15391                       prefix = " fs";
15392                       break;
15393                     case GS_PREFIX_OPCODE:
15394                       prefix = " gs";
15395                       break;
15396                     case SS_PREFIX_OPCODE:
15397                       prefix = " ss";
15398                       break;
15399                     }
15400                   if (padding_fragP)
15401                     msg = _("%s:%u: add %d%s at 0x%llx to align "
15402                             "%s within %d-byte boundary\n");
15403                   else
15404                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
15405                             "align %s within %d-byte boundary\n");
15406                 }
15407               else
15408                 {
15409                   padding_fragP = fragP;
15410                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
15411                           "%s within %d-byte boundary\n");
15412                 }
15413
15414               if (padding_fragP)
15415                 switch (padding_fragP->tc_frag_data.branch_type)
15416                   {
15417                   case align_branch_jcc:
15418                     branch = "jcc";
15419                     break;
15420                   case align_branch_fused:
15421                     branch = "fused jcc";
15422                     break;
15423                   case align_branch_jmp:
15424                     branch = "jmp";
15425                     break;
15426                   case align_branch_call:
15427                     branch = "call";
15428                     break;
15429                   case align_branch_indirect:
15430                     branch = "indiret branch";
15431                     break;
15432                   case align_branch_ret:
15433                     branch = "ret";
15434                     break;
15435                   default:
15436                     break;
15437                   }
15438
15439               fprintf (stdout, msg,
15440                        fragP->fr_file, fragP->fr_line, size, prefix,
15441                        (long long) fragP->fr_address, branch,
15442                        1 << align_branch_power);
15443             }
15444           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15445             memset (fragP->fr_opcode,
15446                     fragP->tc_frag_data.default_prefix, size);
15447           else
15448             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
15449                                 size, 0);
15450           fragP->fr_fix += size;
15451         }
15452       return;
15453     }
15454
15455   opcode = (unsigned char *) fragP->fr_opcode;
15456
15457   /* Address we want to reach in file space.  */
15458   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
15459
15460   /* Address opcode resides at in file space.  */
15461   opcode_address = fragP->fr_address + fragP->fr_fix;
15462
15463   /* Displacement from opcode start to fill into instruction.  */
15464   displacement_from_opcode_start = target_address - opcode_address;
15465
15466   if ((fragP->fr_subtype & BIG) == 0)
15467     {
15468       /* Don't have to change opcode.  */
15469       extension = 1;            /* 1 opcode + 1 displacement  */
15470       where_to_put_displacement = &opcode[1];
15471     }
15472   else
15473     {
15474       if (no_cond_jump_promotion
15475           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
15476         as_warn_where (fragP->fr_file, fragP->fr_line,
15477                        _("long jump required"));
15478
15479       switch (fragP->fr_subtype)
15480         {
15481         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
15482           extension = 4;                /* 1 opcode + 4 displacement  */
15483           opcode[0] = 0xe9;
15484           where_to_put_displacement = &opcode[1];
15485           break;
15486
15487         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
15488           extension = 2;                /* 1 opcode + 2 displacement  */
15489           opcode[0] = 0xe9;
15490           where_to_put_displacement = &opcode[1];
15491           break;
15492
15493         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
15494         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
15495           extension = 5;                /* 2 opcode + 4 displacement  */
15496           opcode[1] = opcode[0] + 0x10;
15497           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15498           where_to_put_displacement = &opcode[2];
15499           break;
15500
15501         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
15502           extension = 3;                /* 2 opcode + 2 displacement  */
15503           opcode[1] = opcode[0] + 0x10;
15504           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15505           where_to_put_displacement = &opcode[2];
15506           break;
15507
15508         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
15509           extension = 4;
15510           opcode[0] ^= 1;
15511           opcode[1] = 3;
15512           opcode[2] = 0xe9;
15513           where_to_put_displacement = &opcode[3];
15514           break;
15515
15516         default:
15517           BAD_CASE (fragP->fr_subtype);
15518           break;
15519         }
15520     }
15521
15522   /* If size if less then four we are sure that the operand fits,
15523      but if it's 4, then it could be that the displacement is larger
15524      then -/+ 2GB.  */
15525   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
15526       && object_64bit
15527       && ((addressT) (displacement_from_opcode_start - extension
15528                       + ((addressT) 1 << 31))
15529           > (((addressT) 2 << 31) - 1)))
15530     {
15531       as_bad_where (fragP->fr_file, fragP->fr_line,
15532                     _("jump target out of range"));
15533       /* Make us emit 0.  */
15534       displacement_from_opcode_start = extension;
15535     }
15536   /* Now put displacement after opcode.  */
15537   md_number_to_chars ((char *) where_to_put_displacement,
15538                       (valueT) (displacement_from_opcode_start - extension),
15539                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
15540   fragP->fr_fix += extension;
15541 }
15542 \f
15543 /* Apply a fixup (fixP) to segment data, once it has been determined
15544    by our caller that we have all the info we need to fix it up.
15545
15546    Parameter valP is the pointer to the value of the bits.
15547
15548    On the 386, immediates, displacements, and data pointers are all in
15549    the same (little-endian) format, so we don't need to care about which
15550    we are handling.  */
15551
15552 void
15553 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
15554 {
15555   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
15556   valueT value = *valP;
15557
15558 #if !defined (TE_Mach)
15559   if (fixP->fx_pcrel)
15560     {
15561       switch (fixP->fx_r_type)
15562         {
15563         default:
15564           break;
15565
15566         case BFD_RELOC_64:
15567           fixP->fx_r_type = BFD_RELOC_64_PCREL;
15568           break;
15569         case BFD_RELOC_32:
15570         case BFD_RELOC_X86_64_32S:
15571           fixP->fx_r_type = BFD_RELOC_32_PCREL;
15572           break;
15573         case BFD_RELOC_16:
15574           fixP->fx_r_type = BFD_RELOC_16_PCREL;
15575           break;
15576         case BFD_RELOC_8:
15577           fixP->fx_r_type = BFD_RELOC_8_PCREL;
15578           break;
15579         }
15580     }
15581
15582   if (fixP->fx_addsy != NULL
15583       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
15584           || fixP->fx_r_type == BFD_RELOC_64_PCREL
15585           || fixP->fx_r_type == BFD_RELOC_16_PCREL
15586           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
15587       && !use_rela_relocations)
15588     {
15589       /* This is a hack.  There should be a better way to handle this.
15590          This covers for the fact that bfd_install_relocation will
15591          subtract the current location (for partial_inplace, PC relative
15592          relocations); see more below.  */
15593 #ifndef OBJ_AOUT
15594       if (IS_ELF
15595 #ifdef TE_PE
15596           || OUTPUT_FLAVOR == bfd_target_coff_flavour
15597 #endif
15598           )
15599         value += fixP->fx_where + fixP->fx_frag->fr_address;
15600 #endif
15601 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15602       if (IS_ELF)
15603         {
15604           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
15605
15606           if ((sym_seg == seg
15607                || (symbol_section_p (fixP->fx_addsy)
15608                    && sym_seg != absolute_section))
15609               && !generic_force_reloc (fixP))
15610             {
15611               /* Yes, we add the values in twice.  This is because
15612                  bfd_install_relocation subtracts them out again.  I think
15613                  bfd_install_relocation is broken, but I don't dare change
15614                  it.  FIXME.  */
15615               value += fixP->fx_where + fixP->fx_frag->fr_address;
15616             }
15617         }
15618 #endif
15619 #if defined (OBJ_COFF) && defined (TE_PE)
15620       /* For some reason, the PE format does not store a
15621          section address offset for a PC relative symbol.  */
15622       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
15623           || S_IS_WEAK (fixP->fx_addsy))
15624         value += md_pcrel_from (fixP);
15625 #endif
15626     }
15627 #if defined (OBJ_COFF) && defined (TE_PE)
15628   if (fixP->fx_addsy != NULL
15629       && S_IS_WEAK (fixP->fx_addsy)
15630       /* PR 16858: Do not modify weak function references.  */
15631       && ! fixP->fx_pcrel)
15632     {
15633 #if !defined (TE_PEP)
15634       /* For x86 PE weak function symbols are neither PC-relative
15635          nor do they set S_IS_FUNCTION.  So the only reliable way
15636          to detect them is to check the flags of their containing
15637          section.  */
15638       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
15639           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
15640         ;
15641       else
15642 #endif
15643       value -= S_GET_VALUE (fixP->fx_addsy);
15644     }
15645 #endif
15646
15647   /* Fix a few things - the dynamic linker expects certain values here,
15648      and we must not disappoint it.  */
15649 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15650   if (IS_ELF && fixP->fx_addsy)
15651     switch (fixP->fx_r_type)
15652       {
15653       case BFD_RELOC_386_PLT32:
15654       case BFD_RELOC_X86_64_PLT32:
15655         /* Make the jump instruction point to the address of the operand.
15656            At runtime we merely add the offset to the actual PLT entry.
15657            NB: Subtract the offset size only for jump instructions.  */
15658         if (fixP->fx_pcrel)
15659           value = -4;
15660         break;
15661
15662       case BFD_RELOC_386_TLS_GD:
15663       case BFD_RELOC_386_TLS_LDM:
15664       case BFD_RELOC_386_TLS_IE_32:
15665       case BFD_RELOC_386_TLS_IE:
15666       case BFD_RELOC_386_TLS_GOTIE:
15667       case BFD_RELOC_386_TLS_GOTDESC:
15668       case BFD_RELOC_X86_64_TLSGD:
15669       case BFD_RELOC_X86_64_TLSLD:
15670       case BFD_RELOC_X86_64_GOTTPOFF:
15671       case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
15672       case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
15673       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15674       case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
15675         value = 0; /* Fully resolved at runtime.  No addend.  */
15676         /* Fallthrough */
15677       case BFD_RELOC_386_TLS_LE:
15678       case BFD_RELOC_386_TLS_LDO_32:
15679       case BFD_RELOC_386_TLS_LE_32:
15680       case BFD_RELOC_X86_64_DTPOFF32:
15681       case BFD_RELOC_X86_64_DTPOFF64:
15682       case BFD_RELOC_X86_64_TPOFF32:
15683       case BFD_RELOC_X86_64_TPOFF64:
15684         S_SET_THREAD_LOCAL (fixP->fx_addsy);
15685         break;
15686
15687       case BFD_RELOC_386_TLS_DESC_CALL:
15688       case BFD_RELOC_X86_64_TLSDESC_CALL:
15689         value = 0; /* Fully resolved at runtime.  No addend.  */
15690         S_SET_THREAD_LOCAL (fixP->fx_addsy);
15691         fixP->fx_done = 0;
15692         return;
15693
15694       case BFD_RELOC_VTABLE_INHERIT:
15695       case BFD_RELOC_VTABLE_ENTRY:
15696         fixP->fx_done = 0;
15697         return;
15698
15699       default:
15700         break;
15701       }
15702 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
15703
15704   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
15705   if (!object_64bit)
15706     value = extend_to_32bit_address (value);
15707
15708   *valP = value;
15709 #endif /* !defined (TE_Mach)  */
15710
15711   /* Are we finished with this relocation now?  */
15712   if (fixP->fx_addsy == NULL)
15713     {
15714       fixP->fx_done = 1;
15715       switch (fixP->fx_r_type)
15716         {
15717         case BFD_RELOC_X86_64_32S:
15718           fixP->fx_signed = 1;
15719           break;
15720
15721         default:
15722           break;
15723         }
15724     }
15725 #if defined (OBJ_COFF) && defined (TE_PE)
15726   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
15727     {
15728       fixP->fx_done = 0;
15729       /* Remember value for tc_gen_reloc.  */
15730       fixP->fx_addnumber = value;
15731       /* Clear out the frag for now.  */
15732       value = 0;
15733     }
15734 #endif
15735   else if (use_rela_relocations)
15736     {
15737       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
15738         fixP->fx_no_overflow = 1;
15739       /* Remember value for tc_gen_reloc.  */
15740       fixP->fx_addnumber = value;
15741       value = 0;
15742     }
15743
15744   md_number_to_chars (p, value, fixP->fx_size);
15745 }
15746 \f
15747 const char *
15748 md_atof (int type, char *litP, int *sizeP)
15749 {
15750   /* This outputs the LITTLENUMs in REVERSE order;
15751      in accord with the bigendian 386.  */
15752   return ieee_md_atof (type, litP, sizeP, false);
15753 }
15754 \f
15755 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
15756
15757 static char *
15758 output_invalid (int c)
15759 {
15760   if (ISPRINT (c))
15761     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
15762               "'%c'", c);
15763   else
15764     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
15765               "(0x%x)", (unsigned char) c);
15766   return output_invalid_buf;
15767 }
15768
15769 /* Verify that @r can be used in the current context.  */
15770
15771 static bool check_register (const reg_entry *r)
15772 {
15773   if (allow_pseudo_reg)
15774     return true;
15775
15776   if (operand_type_all_zero (&r->reg_type))
15777     return false;
15778
15779   if ((r->reg_type.bitfield.dword
15780        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
15781        || r->reg_type.bitfield.class == RegCR
15782        || r->reg_type.bitfield.class == RegDR)
15783       && !cpu_arch_flags.bitfield.cpui386)
15784     return false;
15785
15786   if (r->reg_type.bitfield.class == RegTR
15787       && (flag_code == CODE_64BIT
15788           || !cpu_arch_flags.bitfield.cpui386
15789           || cpu_arch_isa_flags.bitfield.cpui586
15790           || cpu_arch_isa_flags.bitfield.cpui686))
15791     return false;
15792
15793   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
15794     return false;
15795
15796   if (!cpu_arch_flags.bitfield.cpuavx512f)
15797     {
15798       if (r->reg_type.bitfield.zmmword
15799           || r->reg_type.bitfield.class == RegMask)
15800         return false;
15801
15802       if (!cpu_arch_flags.bitfield.cpuavx)
15803         {
15804           if (r->reg_type.bitfield.ymmword)
15805             return false;
15806
15807           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
15808             return false;
15809         }
15810     }
15811
15812   if (r->reg_type.bitfield.zmmword)
15813     {
15814       if (vector_size < VSZ512)
15815         return false;
15816
15817       /* Don't update pp when not dealing with insn operands.  */
15818       switch (current_templates.start ? pp.encoding : encoding_evex)
15819         {
15820         case encoding_default:
15821         case encoding_egpr:
15822           pp.encoding = encoding_evex512;
15823           break;
15824         case encoding_evex:
15825         case encoding_evex512:
15826           break;
15827         default:
15828           pp.encoding = encoding_error;
15829           break;
15830         }
15831     }
15832
15833   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
15834     return false;
15835
15836   if (r->reg_type.bitfield.tmmword
15837       && (!cpu_arch_flags.bitfield.cpuamx_tile
15838           || flag_code != CODE_64BIT))
15839     return false;
15840
15841   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
15842     return false;
15843
15844   /* Don't allow fake index register unless allow_index_reg isn't 0. */
15845   if (!allow_index_reg && r->reg_num == RegIZ)
15846     return false;
15847
15848   /* Upper 16 vector registers are only available with VREX in 64bit
15849      mode, and require EVEX encoding.  */
15850   if (r->reg_flags & RegVRex)
15851     {
15852       if (!cpu_arch_flags.bitfield.cpuavx512f
15853           || flag_code != CODE_64BIT)
15854         return false;
15855
15856       /* Don't update pp when not dealing with insn operands.  */
15857       switch (current_templates.start ? pp.encoding : encoding_evex)
15858         {
15859           case encoding_default:
15860           case encoding_egpr:
15861           case encoding_evex512:
15862             pp.encoding = encoding_evex;
15863             break;
15864           case encoding_evex:
15865             break;
15866           default:
15867             pp.encoding = encoding_error;
15868             break;
15869         }
15870     }
15871
15872   if (r->reg_flags & RegRex2)
15873     {
15874       if (!cpu_arch_flags.bitfield.cpuapx_f
15875           || flag_code != CODE_64BIT)
15876         return false;
15877
15878       /* Don't update pp when not dealing with insn operands.  */
15879       switch (current_templates.start ? pp.encoding : encoding_egpr)
15880         {
15881         case encoding_default:
15882           pp.encoding = encoding_egpr;
15883           break;
15884         case encoding_egpr:
15885         case encoding_evex:
15886         case encoding_evex512:
15887           break;
15888         default:
15889           pp.encoding = encoding_error;
15890           break;
15891         }
15892     }
15893
15894   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
15895       && (!cpu_arch_flags.bitfield.cpu64
15896           || r->reg_type.bitfield.class != RegCR
15897           || dot_insn ())
15898       && flag_code != CODE_64BIT)
15899     return false;
15900
15901   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
15902       && !intel_syntax)
15903     return false;
15904
15905   return true;
15906 }
15907
15908 /* REG_STRING starts *before* REGISTER_PREFIX.  */
15909
15910 static const reg_entry *
15911 parse_real_register (const char *reg_string, char **end_op)
15912 {
15913   const char *s = reg_string;
15914   char *p;
15915   char reg_name_given[MAX_REG_NAME_SIZE + 1];
15916   const reg_entry *r;
15917
15918   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
15919   if (*s == REGISTER_PREFIX)
15920     ++s;
15921
15922   if (is_space_char (*s))
15923     ++s;
15924
15925   p = reg_name_given;
15926   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
15927     {
15928       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
15929         return (const reg_entry *) NULL;
15930       s++;
15931     }
15932
15933   if (is_part_of_name (*s))
15934     return (const reg_entry *) NULL;
15935
15936   *end_op = (char *) s;
15937
15938   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
15939
15940   /* Handle floating point regs, allowing spaces in the (i) part.  */
15941   if (r == reg_st0)
15942     {
15943       if (!cpu_arch_flags.bitfield.cpu8087
15944           && !cpu_arch_flags.bitfield.cpu287
15945           && !cpu_arch_flags.bitfield.cpu387
15946           && !allow_pseudo_reg)
15947         return (const reg_entry *) NULL;
15948
15949       if (is_space_char (*s))
15950         ++s;
15951       if (*s == '(')
15952         {
15953           ++s;
15954           if (is_space_char (*s))
15955             ++s;
15956           if (*s >= '0' && *s <= '7')
15957             {
15958               int fpr = *s - '0';
15959               ++s;
15960               if (is_space_char (*s))
15961                 ++s;
15962               if (*s == ')')
15963                 {
15964                   *end_op = (char *) s + 1;
15965                   know (r[fpr].reg_num == fpr);
15966                   return r + fpr;
15967                 }
15968             }
15969           /* We have "%st(" then garbage.  */
15970           return (const reg_entry *) NULL;
15971         }
15972     }
15973
15974   return r && check_register (r) ? r : NULL;
15975 }
15976
15977 /* REG_STRING starts *before* REGISTER_PREFIX.  */
15978
15979 static const reg_entry *
15980 parse_register (const char *reg_string, char **end_op)
15981 {
15982   const reg_entry *r;
15983
15984   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
15985     r = parse_real_register (reg_string, end_op);
15986   else
15987     r = NULL;
15988   if (!r)
15989     {
15990       char *save = input_line_pointer;
15991       char *buf = xstrdup (reg_string), *name;
15992       symbolS *symbolP;
15993
15994       input_line_pointer = buf;
15995       get_symbol_name (&name);
15996       symbolP = symbol_find (name);
15997       while (symbolP && symbol_equated_p (symbolP))
15998         {
15999           const expressionS *e = symbol_get_value_expression(symbolP);
16000
16001           if (e->X_add_number)
16002             break;
16003           symbolP = e->X_add_symbol;
16004         }
16005       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
16006         {
16007           const expressionS *e = symbol_get_value_expression (symbolP);
16008
16009           if (e->X_op == O_register)
16010             {
16011               know (e->X_add_number >= 0
16012                     && (valueT) e->X_add_number < i386_regtab_size);
16013               r = i386_regtab + e->X_add_number;
16014               *end_op = (char *) reg_string + (input_line_pointer - buf);
16015             }
16016           if (r && !check_register (r))
16017             {
16018               as_bad (_("register '%s%s' cannot be used here"),
16019                       register_prefix, r->reg_name);
16020               r = &bad_reg;
16021             }
16022         }
16023       input_line_pointer = save;
16024       free (buf);
16025     }
16026   return r;
16027 }
16028
16029 int
16030 i386_parse_name (char *name, expressionS *e, char *nextcharP)
16031 {
16032   const reg_entry *r = NULL;
16033   char *end = input_line_pointer;
16034
16035   /* We only know the terminating character here.  It being double quote could
16036      be the closing one of a quoted symbol name, or an opening one from a
16037      following string (or another quoted symbol name).  Since the latter can't
16038      be valid syntax for anything, bailing in either case is good enough.  */
16039   if (*nextcharP == '"')
16040     return 0;
16041
16042   *end = *nextcharP;
16043   if (*name == REGISTER_PREFIX || allow_naked_reg)
16044     r = parse_real_register (name, &input_line_pointer);
16045   if (r && end <= input_line_pointer)
16046     {
16047       *nextcharP = *input_line_pointer;
16048       *input_line_pointer = 0;
16049       e->X_op = O_register;
16050       e->X_add_number = r - i386_regtab;
16051       return 1;
16052     }
16053   input_line_pointer = end;
16054   *end = 0;
16055   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
16056 }
16057
16058 void
16059 md_operand (expressionS *e)
16060 {
16061   char *end;
16062   const reg_entry *r;
16063
16064   switch (*input_line_pointer)
16065     {
16066     case REGISTER_PREFIX:
16067       r = parse_real_register (input_line_pointer, &end);
16068       if (r)
16069         {
16070           e->X_op = O_register;
16071           e->X_add_number = r - i386_regtab;
16072           input_line_pointer = end;
16073         }
16074       break;
16075
16076     case '[':
16077       gas_assert (intel_syntax);
16078       end = input_line_pointer++;
16079       expression (e);
16080       if (*input_line_pointer == ']')
16081         {
16082           ++input_line_pointer;
16083           e->X_op_symbol = make_expr_symbol (e);
16084           e->X_add_symbol = NULL;
16085           e->X_add_number = 0;
16086           e->X_op = O_index;
16087         }
16088       else
16089         {
16090           e->X_op = O_absent;
16091           input_line_pointer = end;
16092         }
16093       break;
16094     }
16095 }
16096
16097 #ifdef BFD64
16098 /* To maintain consistency with !BFD64 builds of gas record, whether any
16099    (binary) operator was involved in an expression.  As expressions are
16100    evaluated in only 32 bits when !BFD64, we use this to decide whether to
16101    truncate results.  */
16102 bool i386_record_operator (operatorT op,
16103                            const expressionS *left,
16104                            const expressionS *right)
16105 {
16106   if (op == O_absent)
16107     return false;
16108
16109   if (!left)
16110     {
16111       /* Since the expression parser applies unary operators fine to bignum
16112          operands, we don't need to be concerned of respective operands not
16113          fitting in 32 bits.  */
16114       if (right->X_op == O_constant && right->X_unsigned
16115           && !fits_in_unsigned_long (right->X_add_number))
16116         return false;
16117     }
16118   /* This isn't entirely right: The pattern can also result when constant
16119      expressions are folded (e.g. 0xffffffff + 1).  */
16120   else if ((left->X_op == O_constant && left->X_unsigned
16121             && !fits_in_unsigned_long (left->X_add_number))
16122            || (right->X_op == O_constant && right->X_unsigned
16123                && !fits_in_unsigned_long (right->X_add_number)))
16124     expr_mode = expr_large_value;
16125
16126   if (expr_mode != expr_large_value)
16127     expr_mode = expr_operator_present;
16128
16129   return false;
16130 }
16131 #endif
16132 \f
16133 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16134 const char *md_shortopts = "kVQ:sqnO::";
16135 #else
16136 const char *md_shortopts = "qnO::";
16137 #endif
16138
16139 #define OPTION_32 (OPTION_MD_BASE + 0)
16140 #define OPTION_64 (OPTION_MD_BASE + 1)
16141 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
16142 #define OPTION_MARCH (OPTION_MD_BASE + 3)
16143 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
16144 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
16145 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
16146 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
16147 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
16148 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
16149 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
16150 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
16151 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
16152 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
16153 #define OPTION_X32 (OPTION_MD_BASE + 14)
16154 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
16155 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
16156 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
16157 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
16158 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
16159 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
16160 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
16161 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
16162 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
16163 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
16164 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
16165 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
16166 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
16167 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
16168 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
16169 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
16170 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
16171 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
16172 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
16173 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
16174
16175 struct option md_longopts[] =
16176 {
16177   {"32", no_argument, NULL, OPTION_32},
16178 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
16179      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
16180   {"64", no_argument, NULL, OPTION_64},
16181 #endif
16182 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16183   {"x32", no_argument, NULL, OPTION_X32},
16184   {"mshared", no_argument, NULL, OPTION_MSHARED},
16185   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
16186 #endif
16187   {"divide", no_argument, NULL, OPTION_DIVIDE},
16188   {"march", required_argument, NULL, OPTION_MARCH},
16189   {"mtune", required_argument, NULL, OPTION_MTUNE},
16190   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
16191   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
16192   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
16193   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
16194   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
16195   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
16196   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
16197   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
16198   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
16199   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
16200   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
16201   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
16202   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
16203 # if defined (TE_PE) || defined (TE_PEP)
16204   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
16205 #endif
16206   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
16207   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
16208   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
16209   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
16210   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
16211   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
16212   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
16213   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
16214   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
16215   {"mlfence-before-indirect-branch", required_argument, NULL,
16216    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
16217   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
16218   {"mamd64", no_argument, NULL, OPTION_MAMD64},
16219   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
16220   {NULL, no_argument, NULL, 0}
16221 };
16222 size_t md_longopts_size = sizeof (md_longopts);
16223
16224 int
16225 md_parse_option (int c, const char *arg)
16226 {
16227   unsigned int j;
16228   char *arch, *next, *saved, *type;
16229
16230   switch (c)
16231     {
16232     case 'n':
16233       optimize_align_code = 0;
16234       break;
16235
16236     case 'q':
16237       quiet_warnings = 1;
16238       break;
16239
16240 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16241       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
16242          should be emitted or not.  FIXME: Not implemented.  */
16243     case 'Q':
16244       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
16245         return 0;
16246       break;
16247
16248       /* -V: SVR4 argument to print version ID.  */
16249     case 'V':
16250       print_version_id ();
16251       break;
16252
16253       /* -k: Ignore for FreeBSD compatibility.  */
16254     case 'k':
16255       break;
16256
16257     case 's':
16258       /* -s: On i386 Solaris, this tells the native assembler to use
16259          .stab instead of .stab.excl.  We always use .stab anyhow.  */
16260       break;
16261
16262     case OPTION_MSHARED:
16263       shared = 1;
16264       break;
16265
16266     case OPTION_X86_USED_NOTE:
16267       if (strcasecmp (arg, "yes") == 0)
16268         x86_used_note = 1;
16269       else if (strcasecmp (arg, "no") == 0)
16270         x86_used_note = 0;
16271       else
16272         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
16273       break;
16274
16275
16276 #endif
16277 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
16278      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
16279     case OPTION_64:
16280       {
16281         const char **list, **l;
16282
16283         list = bfd_target_list ();
16284         for (l = list; *l != NULL; l++)
16285           if (startswith (*l, "elf64-x86-64")
16286               || strcmp (*l, "coff-x86-64") == 0
16287               || strcmp (*l, "pe-x86-64") == 0
16288               || strcmp (*l, "pei-x86-64") == 0
16289               || strcmp (*l, "mach-o-x86-64") == 0)
16290             {
16291               default_arch = "x86_64";
16292               break;
16293             }
16294         if (*l == NULL)
16295           as_fatal (_("no compiled in support for x86_64"));
16296         free (list);
16297       }
16298       break;
16299 #endif
16300
16301 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16302     case OPTION_X32:
16303       if (IS_ELF)
16304         {
16305           const char **list, **l;
16306
16307           list = bfd_target_list ();
16308           for (l = list; *l != NULL; l++)
16309             if (startswith (*l, "elf32-x86-64"))
16310               {
16311                 default_arch = "x86_64:32";
16312                 break;
16313               }
16314           if (*l == NULL)
16315             as_fatal (_("no compiled in support for 32bit x86_64"));
16316           free (list);
16317         }
16318       else
16319         as_fatal (_("32bit x86_64 is only supported for ELF"));
16320       break;
16321 #endif
16322
16323     case OPTION_32:
16324       {
16325         const char **list, **l;
16326
16327         list = bfd_target_list ();
16328         for (l = list; *l != NULL; l++)
16329           if (strstr (*l, "-i386")
16330               || strstr (*l, "-go32"))
16331             {
16332               default_arch = "i386";
16333               break;
16334             }
16335         if (*l == NULL)
16336           as_fatal (_("no compiled in support for ix86"));
16337         free (list);
16338       }
16339       break;
16340
16341     case OPTION_DIVIDE:
16342 #ifdef SVR4_COMMENT_CHARS
16343       {
16344         char *n, *t;
16345         const char *s;
16346
16347         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
16348         t = n;
16349         for (s = i386_comment_chars; *s != '\0'; s++)
16350           if (*s != '/')
16351             *t++ = *s;
16352         *t = '\0';
16353         i386_comment_chars = n;
16354       }
16355 #endif
16356       break;
16357
16358     case OPTION_MARCH:
16359       saved = xstrdup (arg);
16360       arch = saved;
16361       /* Allow -march=+nosse.  */
16362       if (*arch == '+')
16363         arch++;
16364       do
16365         {
16366           char *vsz;
16367
16368           if (*arch == '.')
16369             as_fatal (_("invalid -march= option: `%s'"), arg);
16370           next = strchr (arch, '+');
16371           if (next)
16372             *next++ = '\0';
16373           vsz = strchr (arch, '/');
16374           if (vsz)
16375             *vsz++ = '\0';
16376           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16377             {
16378               if (vsz && cpu_arch[j].vsz != vsz_set)
16379                 continue;
16380
16381               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
16382                   && strcmp (arch, cpu_arch[j].name) == 0)
16383                 {
16384                   /* Processor.  */
16385                   if (! cpu_arch[j].enable.bitfield.cpui386)
16386                     continue;
16387
16388                   cpu_arch_name = cpu_arch[j].name;
16389                   free (cpu_sub_arch_name);
16390                   cpu_sub_arch_name = NULL;
16391                   cpu_arch_flags = cpu_arch[j].enable;
16392                   cpu_arch_isa = cpu_arch[j].type;
16393                   cpu_arch_isa_flags = cpu_arch[j].enable;
16394                   if (!cpu_arch_tune_set)
16395                     cpu_arch_tune = cpu_arch_isa;
16396                   vector_size = VSZ_DEFAULT;
16397                   break;
16398                 }
16399               else if (cpu_arch[j].type == PROCESSOR_NONE
16400                        && strcmp (arch, cpu_arch[j].name) == 0
16401                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
16402                 {
16403                   /* ISA extension.  */
16404                   isa_enable (j);
16405
16406                   switch (cpu_arch[j].vsz)
16407                     {
16408                     default:
16409                       break;
16410
16411                     case vsz_set:
16412                       if (vsz)
16413                         {
16414                           char *end;
16415                           unsigned long val = strtoul (vsz, &end, 0);
16416
16417                           if (*end)
16418                             val = 0;
16419                           switch (val)
16420                             {
16421                             case 512: vector_size = VSZ512; break;
16422                             case 256: vector_size = VSZ256; break;
16423                             case 128: vector_size = VSZ128; break;
16424                             default:
16425                               as_warn (_("Unrecognized vector size specifier ignored"));
16426                               break;
16427                             }
16428                           break;
16429                         }
16430                         /* Fall through.  */
16431                     case vsz_reset:
16432                       vector_size = VSZ_DEFAULT;
16433                       break;
16434                     }
16435
16436                   break;
16437                 }
16438             }
16439
16440           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
16441             {
16442               /* Disable an ISA extension.  */
16443               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16444                 if (cpu_arch[j].type == PROCESSOR_NONE
16445                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
16446                   {
16447                     isa_disable (j);
16448                     if (cpu_arch[j].vsz == vsz_set)
16449                       vector_size = VSZ_DEFAULT;
16450                     break;
16451                   }
16452             }
16453
16454           if (j >= ARRAY_SIZE (cpu_arch))
16455             as_fatal (_("invalid -march= option: `%s'"), arg);
16456
16457           arch = next;
16458         }
16459       while (next != NULL);
16460       free (saved);
16461       break;
16462
16463     case OPTION_MTUNE:
16464       if (*arg == '.')
16465         as_fatal (_("invalid -mtune= option: `%s'"), arg);
16466       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16467         {
16468           if (cpu_arch[j].type != PROCESSOR_NONE
16469               && strcmp (arg, cpu_arch[j].name) == 0)
16470             {
16471               cpu_arch_tune_set = 1;
16472               cpu_arch_tune = cpu_arch [j].type;
16473               break;
16474             }
16475         }
16476       if (j >= ARRAY_SIZE (cpu_arch))
16477         as_fatal (_("invalid -mtune= option: `%s'"), arg);
16478       break;
16479
16480     case OPTION_MMNEMONIC:
16481       if (strcasecmp (arg, "att") == 0)
16482         intel_mnemonic = 0;
16483       else if (strcasecmp (arg, "intel") == 0)
16484         intel_mnemonic = 1;
16485       else
16486         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
16487       break;
16488
16489     case OPTION_MSYNTAX:
16490       if (strcasecmp (arg, "att") == 0)
16491         _set_intel_syntax (0);
16492       else if (strcasecmp (arg, "intel") == 0)
16493         _set_intel_syntax (1);
16494       else
16495         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
16496       break;
16497
16498     case OPTION_MINDEX_REG:
16499       allow_index_reg = 1;
16500       break;
16501
16502     case OPTION_MNAKED_REG:
16503       allow_naked_reg = 1;
16504       register_prefix = "";
16505       break;
16506
16507     case OPTION_MSSE2AVX:
16508       sse2avx = 1;
16509       break;
16510
16511     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
16512       use_unaligned_vector_move = 1;
16513       break;
16514
16515     case OPTION_MSSE_CHECK:
16516       if (strcasecmp (arg, "error") == 0)
16517         sse_check = check_error;
16518       else if (strcasecmp (arg, "warning") == 0)
16519         sse_check = check_warning;
16520       else if (strcasecmp (arg, "none") == 0)
16521         sse_check = check_none;
16522       else
16523         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
16524       break;
16525
16526     case OPTION_MOPERAND_CHECK:
16527       if (strcasecmp (arg, "error") == 0)
16528         operand_check = check_error;
16529       else if (strcasecmp (arg, "warning") == 0)
16530         operand_check = check_warning;
16531       else if (strcasecmp (arg, "none") == 0)
16532         operand_check = check_none;
16533       else
16534         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
16535       break;
16536
16537     case OPTION_MAVXSCALAR:
16538       if (strcasecmp (arg, "128") == 0)
16539         avxscalar = vex128;
16540       else if (strcasecmp (arg, "256") == 0)
16541         avxscalar = vex256;
16542       else
16543         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
16544       break;
16545
16546     case OPTION_MVEXWIG:
16547       if (strcmp (arg, "0") == 0)
16548         vexwig = vexw0;
16549       else if (strcmp (arg, "1") == 0)
16550         vexwig = vexw1;
16551       else
16552         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
16553       break;
16554
16555     case OPTION_MADD_BND_PREFIX:
16556       add_bnd_prefix = 1;
16557       break;
16558
16559     case OPTION_MEVEXLIG:
16560       if (strcmp (arg, "128") == 0)
16561         evexlig = evexl128;
16562       else if (strcmp (arg, "256") == 0)
16563         evexlig = evexl256;
16564       else  if (strcmp (arg, "512") == 0)
16565         evexlig = evexl512;
16566       else
16567         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
16568       break;
16569
16570     case OPTION_MEVEXRCIG:
16571       if (strcmp (arg, "rne") == 0)
16572         evexrcig = rne;
16573       else if (strcmp (arg, "rd") == 0)
16574         evexrcig = rd;
16575       else if (strcmp (arg, "ru") == 0)
16576         evexrcig = ru;
16577       else if (strcmp (arg, "rz") == 0)
16578         evexrcig = rz;
16579       else
16580         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
16581       break;
16582
16583     case OPTION_MEVEXWIG:
16584       if (strcmp (arg, "0") == 0)
16585         evexwig = evexw0;
16586       else if (strcmp (arg, "1") == 0)
16587         evexwig = evexw1;
16588       else
16589         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
16590       break;
16591
16592 # if defined (TE_PE) || defined (TE_PEP)
16593     case OPTION_MBIG_OBJ:
16594       use_big_obj = 1;
16595       break;
16596 #endif
16597
16598     case OPTION_MOMIT_LOCK_PREFIX:
16599       if (strcasecmp (arg, "yes") == 0)
16600         omit_lock_prefix = 1;
16601       else if (strcasecmp (arg, "no") == 0)
16602         omit_lock_prefix = 0;
16603       else
16604         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
16605       break;
16606
16607     case OPTION_MFENCE_AS_LOCK_ADD:
16608       if (strcasecmp (arg, "yes") == 0)
16609         avoid_fence = 1;
16610       else if (strcasecmp (arg, "no") == 0)
16611         avoid_fence = 0;
16612       else
16613         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
16614       break;
16615
16616     case OPTION_MLFENCE_AFTER_LOAD:
16617       if (strcasecmp (arg, "yes") == 0)
16618         lfence_after_load = 1;
16619       else if (strcasecmp (arg, "no") == 0)
16620         lfence_after_load = 0;
16621       else
16622         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
16623       break;
16624
16625     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
16626       if (strcasecmp (arg, "all") == 0)
16627         {
16628           lfence_before_indirect_branch = lfence_branch_all;
16629           if (lfence_before_ret == lfence_before_ret_none)
16630             lfence_before_ret = lfence_before_ret_shl;
16631         }
16632       else if (strcasecmp (arg, "memory") == 0)
16633         lfence_before_indirect_branch = lfence_branch_memory;
16634       else if (strcasecmp (arg, "register") == 0)
16635         lfence_before_indirect_branch = lfence_branch_register;
16636       else if (strcasecmp (arg, "none") == 0)
16637         lfence_before_indirect_branch = lfence_branch_none;
16638       else
16639         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
16640                   arg);
16641       break;
16642
16643     case OPTION_MLFENCE_BEFORE_RET:
16644       if (strcasecmp (arg, "or") == 0)
16645         lfence_before_ret = lfence_before_ret_or;
16646       else if (strcasecmp (arg, "not") == 0)
16647         lfence_before_ret = lfence_before_ret_not;
16648       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
16649         lfence_before_ret = lfence_before_ret_shl;
16650       else if (strcasecmp (arg, "none") == 0)
16651         lfence_before_ret = lfence_before_ret_none;
16652       else
16653         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
16654                   arg);
16655       break;
16656
16657     case OPTION_MRELAX_RELOCATIONS:
16658       if (strcasecmp (arg, "yes") == 0)
16659         generate_relax_relocations = 1;
16660       else if (strcasecmp (arg, "no") == 0)
16661         generate_relax_relocations = 0;
16662       else
16663         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
16664       break;
16665
16666     case OPTION_MALIGN_BRANCH_BOUNDARY:
16667       {
16668         char *end;
16669         long int align = strtoul (arg, &end, 0);
16670         if (*end == '\0')
16671           {
16672             if (align == 0)
16673               {
16674                 align_branch_power = 0;
16675                 break;
16676               }
16677             else if (align >= 16)
16678               {
16679                 int align_power;
16680                 for (align_power = 0;
16681                      (align & 1) == 0;
16682                      align >>= 1, align_power++)
16683                   continue;
16684                 /* Limit alignment power to 31.  */
16685                 if (align == 1 && align_power < 32)
16686                   {
16687                     align_branch_power = align_power;
16688                     break;
16689                   }
16690               }
16691           }
16692         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
16693       }
16694       break;
16695
16696     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
16697       {
16698         char *end;
16699         int align = strtoul (arg, &end, 0);
16700         /* Some processors only support 5 prefixes.  */
16701         if (*end == '\0' && align >= 0 && align < 6)
16702           {
16703             align_branch_prefix_size = align;
16704             break;
16705           }
16706         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
16707                   arg);
16708       }
16709       break;
16710
16711     case OPTION_MALIGN_BRANCH:
16712       align_branch = 0;
16713       saved = xstrdup (arg);
16714       type = saved;
16715       do
16716         {
16717           next = strchr (type, '+');
16718           if (next)
16719             *next++ = '\0';
16720           if (strcasecmp (type, "jcc") == 0)
16721             align_branch |= align_branch_jcc_bit;
16722           else if (strcasecmp (type, "fused") == 0)
16723             align_branch |= align_branch_fused_bit;
16724           else if (strcasecmp (type, "jmp") == 0)
16725             align_branch |= align_branch_jmp_bit;
16726           else if (strcasecmp (type, "call") == 0)
16727             align_branch |= align_branch_call_bit;
16728           else if (strcasecmp (type, "ret") == 0)
16729             align_branch |= align_branch_ret_bit;
16730           else if (strcasecmp (type, "indirect") == 0)
16731             align_branch |= align_branch_indirect_bit;
16732           else
16733             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
16734           type = next;
16735         }
16736       while (next != NULL);
16737       free (saved);
16738       break;
16739
16740     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
16741       align_branch_power = 5;
16742       align_branch_prefix_size = 5;
16743       align_branch = (align_branch_jcc_bit
16744                       | align_branch_fused_bit
16745                       | align_branch_jmp_bit);
16746       break;
16747
16748     case OPTION_MAMD64:
16749       isa64 = amd64;
16750       break;
16751
16752     case OPTION_MINTEL64:
16753       isa64 = intel64;
16754       break;
16755
16756     case 'O':
16757       if (arg == NULL)
16758         {
16759           optimize = 1;
16760           /* Turn off -Os.  */
16761           optimize_for_space = 0;
16762         }
16763       else if (*arg == 's')
16764         {
16765           optimize_for_space = 1;
16766           /* Turn on all encoding optimizations.  */
16767           optimize = INT_MAX;
16768         }
16769       else
16770         {
16771           optimize = atoi (arg);
16772           /* Turn off -Os.  */
16773           optimize_for_space = 0;
16774         }
16775       break;
16776
16777     default:
16778       return 0;
16779     }
16780   return 1;
16781 }
16782
16783 #define MESSAGE_TEMPLATE \
16784 "                                                                                "
16785
16786 static char *
16787 output_message (FILE *stream, char *p, char *message, char *start,
16788                 int *left_p, const char *name, int len)
16789 {
16790   int size = sizeof (MESSAGE_TEMPLATE);
16791   int left = *left_p;
16792
16793   /* Reserve 2 spaces for ", " or ",\0" */
16794   left -= len + 2;
16795
16796   /* Check if there is any room.  */
16797   if (left >= 0)
16798     {
16799       if (p != start)
16800         {
16801           *p++ = ',';
16802           *p++ = ' ';
16803         }
16804       p = mempcpy (p, name, len);
16805     }
16806   else
16807     {
16808       /* Output the current message now and start a new one.  */
16809       *p++ = ',';
16810       *p = '\0';
16811       fprintf (stream, "%s\n", message);
16812       p = start;
16813       left = size - (start - message) - len - 2;
16814
16815       gas_assert (left >= 0);
16816
16817       p = mempcpy (p, name, len);
16818     }
16819
16820   *left_p = left;
16821   return p;
16822 }
16823
16824 static void
16825 show_arch (FILE *stream, int ext, int check)
16826 {
16827   static char message[] = MESSAGE_TEMPLATE;
16828   char *start = message + 27;
16829   char *p;
16830   int size = sizeof (MESSAGE_TEMPLATE);
16831   int left;
16832   const char *name;
16833   int len;
16834   unsigned int j;
16835
16836   p = start;
16837   left = size - (start - message);
16838
16839   if (!ext && check)
16840     {
16841       p = output_message (stream, p, message, start, &left,
16842                           STRING_COMMA_LEN ("default"));
16843       p = output_message (stream, p, message, start, &left,
16844                           STRING_COMMA_LEN ("push"));
16845       p = output_message (stream, p, message, start, &left,
16846                           STRING_COMMA_LEN ("pop"));
16847     }
16848
16849   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16850     {
16851       /* Should it be skipped?  */
16852       if (cpu_arch [j].skip)
16853         continue;
16854
16855       name = cpu_arch [j].name;
16856       len = cpu_arch [j].len;
16857       if (cpu_arch[j].type == PROCESSOR_NONE)
16858         {
16859           /* It is an extension.  Skip if we aren't asked to show it.  */
16860           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
16861             continue;
16862         }
16863       else if (ext)
16864         {
16865           /* It is an processor.  Skip if we show only extension.  */
16866           continue;
16867         }
16868       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
16869         {
16870           /* It is an impossible processor - skip.  */
16871           continue;
16872         }
16873
16874       p = output_message (stream, p, message, start, &left, name, len);
16875     }
16876
16877   /* Display disabled extensions.  */
16878   if (ext)
16879     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16880       {
16881         char *str;
16882
16883         if (cpu_arch[j].type != PROCESSOR_NONE
16884             || !cpu_flags_all_zero (&cpu_arch[j].enable))
16885           continue;
16886         str = xasprintf ("no%s", cpu_arch[j].name);
16887         p = output_message (stream, p, message, start, &left, str,
16888                             strlen (str));
16889         free (str);
16890       }
16891
16892   *p = '\0';
16893   fprintf (stream, "%s\n", message);
16894 }
16895
16896 void
16897 md_show_usage (FILE *stream)
16898 {
16899 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16900   fprintf (stream, _("\
16901   -Qy, -Qn                ignored\n\
16902   -V                      print assembler version number\n\
16903   -k                      ignored\n"));
16904 #endif
16905   fprintf (stream, _("\
16906   -n                      do not optimize code alignment\n\
16907   -O{012s}                attempt some code optimizations\n\
16908   -q                      quieten some warnings\n"));
16909 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16910   fprintf (stream, _("\
16911   -s                      ignored\n"));
16912 #endif
16913 #ifdef BFD64
16914 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16915   fprintf (stream, _("\
16916   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
16917 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
16918   fprintf (stream, _("\
16919   --32/--64               generate 32bit/64bit object\n"));
16920 # endif
16921 #endif
16922 #ifdef SVR4_COMMENT_CHARS
16923   fprintf (stream, _("\
16924   --divide                do not treat `/' as a comment character\n"));
16925 #else
16926   fprintf (stream, _("\
16927   --divide                ignored\n"));
16928 #endif
16929   fprintf (stream, _("\
16930   -march=CPU[,+EXTENSION...]\n\
16931                           generate code for CPU and EXTENSION, CPU is one of:\n"));
16932   show_arch (stream, 0, 1);
16933   fprintf (stream, _("\
16934                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
16935   show_arch (stream, 1, 0);
16936   fprintf (stream, _("\
16937   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
16938   show_arch (stream, 0, 0);
16939   fprintf (stream, _("\
16940   -msse2avx               encode SSE instructions with VEX prefix\n"));
16941   fprintf (stream, _("\
16942   -muse-unaligned-vector-move\n\
16943                           encode aligned vector move as unaligned vector move\n"));
16944   fprintf (stream, _("\
16945   -msse-check=[none|error|warning] (default: none)\n\
16946                           check SSE instructions\n"));
16947   fprintf (stream, _("\
16948   -moperand-check=[none|error|warning] (default: warning)\n\
16949                           check operand combinations for validity\n"));
16950   fprintf (stream, _("\
16951   -mavxscalar=[128|256] (default: 128)\n\
16952                           encode scalar AVX instructions with specific vector\n\
16953                            length\n"));
16954   fprintf (stream, _("\
16955   -mvexwig=[0|1] (default: 0)\n\
16956                           encode VEX instructions with specific VEX.W value\n\
16957                            for VEX.W bit ignored instructions\n"));
16958   fprintf (stream, _("\
16959   -mevexlig=[128|256|512] (default: 128)\n\
16960                           encode scalar EVEX instructions with specific vector\n\
16961                            length\n"));
16962   fprintf (stream, _("\
16963   -mevexwig=[0|1] (default: 0)\n\
16964                           encode EVEX instructions with specific EVEX.W value\n\
16965                            for EVEX.W bit ignored instructions\n"));
16966   fprintf (stream, _("\
16967   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
16968                           encode EVEX instructions with specific EVEX.RC value\n\
16969                            for SAE-only ignored instructions\n"));
16970   fprintf (stream, _("\
16971   -mmnemonic=[att|intel] "));
16972   if (SYSV386_COMPAT)
16973     fprintf (stream, _("(default: att)\n"));
16974   else
16975     fprintf (stream, _("(default: intel)\n"));
16976   fprintf (stream, _("\
16977                           use AT&T/Intel mnemonic (AT&T syntax only)\n"));
16978   fprintf (stream, _("\
16979   -msyntax=[att|intel] (default: att)\n\
16980                           use AT&T/Intel syntax\n"));
16981   fprintf (stream, _("\
16982   -mindex-reg             support pseudo index registers\n"));
16983   fprintf (stream, _("\
16984   -mnaked-reg             don't require `%%' prefix for registers\n"));
16985   fprintf (stream, _("\
16986   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
16987 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16988   fprintf (stream, _("\
16989   -mshared                disable branch optimization for shared code\n"));
16990   fprintf (stream, _("\
16991   -mx86-used-note=[no|yes] "));
16992   if (DEFAULT_X86_USED_NOTE)
16993     fprintf (stream, _("(default: yes)\n"));
16994   else
16995     fprintf (stream, _("(default: no)\n"));
16996   fprintf (stream, _("\
16997                           generate x86 used ISA and feature properties\n"));
16998 #endif
16999 #if defined (TE_PE) || defined (TE_PEP)
17000   fprintf (stream, _("\
17001   -mbig-obj               generate big object files\n"));
17002 #endif
17003   fprintf (stream, _("\
17004   -momit-lock-prefix=[no|yes] (default: no)\n\
17005                           strip all lock prefixes\n"));
17006   fprintf (stream, _("\
17007   -mfence-as-lock-add=[no|yes] (default: no)\n\
17008                           encode lfence, mfence and sfence as\n\
17009                            lock addl $0x0, (%%{re}sp)\n"));
17010   fprintf (stream, _("\
17011   -mrelax-relocations=[no|yes] "));
17012   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
17013     fprintf (stream, _("(default: yes)\n"));
17014   else
17015     fprintf (stream, _("(default: no)\n"));
17016   fprintf (stream, _("\
17017                           generate relax relocations\n"));
17018   fprintf (stream, _("\
17019   -malign-branch-boundary=NUM (default: 0)\n\
17020                           align branches within NUM byte boundary\n"));
17021   fprintf (stream, _("\
17022   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
17023                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
17024                            indirect\n\
17025                           specify types of branches to align\n"));
17026   fprintf (stream, _("\
17027   -malign-branch-prefix-size=NUM (default: 5)\n\
17028                           align branches with NUM prefixes per instruction\n"));
17029   fprintf (stream, _("\
17030   -mbranches-within-32B-boundaries\n\
17031                           align branches within 32 byte boundary\n"));
17032   fprintf (stream, _("\
17033   -mlfence-after-load=[no|yes] (default: no)\n\
17034                           generate lfence after load\n"));
17035   fprintf (stream, _("\
17036   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
17037                           generate lfence before indirect near branch\n"));
17038   fprintf (stream, _("\
17039   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
17040                           generate lfence before ret\n"));
17041   fprintf (stream, _("\
17042   -mamd64                 accept only AMD64 ISA [default]\n"));
17043   fprintf (stream, _("\
17044   -mintel64               accept only Intel64 ISA\n"));
17045 }
17046
17047 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
17048      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
17049      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
17050
17051 /* Pick the target format to use.  */
17052
17053 const char *
17054 i386_target_format (void)
17055 {
17056   if (startswith (default_arch, "x86_64"))
17057     {
17058       update_code_flag (CODE_64BIT, 1);
17059 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17060       if (default_arch[6] == '\0')
17061         x86_elf_abi = X86_64_ABI;
17062       else
17063         x86_elf_abi = X86_64_X32_ABI;
17064 #endif
17065     }
17066   else if (!strcmp (default_arch, "i386"))
17067     update_code_flag (CODE_32BIT, 1);
17068   else if (!strcmp (default_arch, "iamcu"))
17069     {
17070       update_code_flag (CODE_32BIT, 1);
17071       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
17072         {
17073           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
17074           cpu_arch_name = "iamcu";
17075           free (cpu_sub_arch_name);
17076           cpu_sub_arch_name = NULL;
17077           cpu_arch_flags = iamcu_flags;
17078           cpu_arch_isa = PROCESSOR_IAMCU;
17079           cpu_arch_isa_flags = iamcu_flags;
17080           if (!cpu_arch_tune_set)
17081             cpu_arch_tune = PROCESSOR_IAMCU;
17082         }
17083       else if (cpu_arch_isa != PROCESSOR_IAMCU)
17084         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
17085                   cpu_arch_name);
17086     }
17087   else
17088     as_fatal (_("unknown architecture"));
17089
17090 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17091   if (IS_ELF && flag_synth_cfi && x86_elf_abi != X86_64_ABI)
17092     as_fatal (_("SCFI is not supported for this ABI"));
17093 #endif
17094
17095   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
17096     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
17097
17098   switch (OUTPUT_FLAVOR)
17099     {
17100 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
17101     case bfd_target_aout_flavour:
17102       return AOUT_TARGET_FORMAT;
17103 #endif
17104 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
17105 # if defined (TE_PE) || defined (TE_PEP)
17106     case bfd_target_coff_flavour:
17107       if (flag_code == CODE_64BIT)
17108         {
17109           object_64bit = 1;
17110           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
17111         }
17112       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
17113 # elif defined (TE_GO32)
17114     case bfd_target_coff_flavour:
17115       return "coff-go32";
17116 # else
17117     case bfd_target_coff_flavour:
17118       return "coff-i386";
17119 # endif
17120 #endif
17121 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
17122     case bfd_target_elf_flavour:
17123       {
17124         const char *format;
17125
17126         switch (x86_elf_abi)
17127           {
17128           default:
17129             format = ELF_TARGET_FORMAT;
17130 #ifndef TE_SOLARIS
17131             tls_get_addr = "___tls_get_addr";
17132 #endif
17133             break;
17134           case X86_64_ABI:
17135             use_rela_relocations = 1;
17136             object_64bit = 1;
17137 #ifndef TE_SOLARIS
17138             tls_get_addr = "__tls_get_addr";
17139 #endif
17140             format = ELF_TARGET_FORMAT64;
17141             break;
17142           case X86_64_X32_ABI:
17143             use_rela_relocations = 1;
17144             object_64bit = 1;
17145 #ifndef TE_SOLARIS
17146             tls_get_addr = "__tls_get_addr";
17147 #endif
17148             disallow_64bit_reloc = 1;
17149             format = ELF_TARGET_FORMAT32;
17150             break;
17151           }
17152         if (cpu_arch_isa == PROCESSOR_IAMCU)
17153           {
17154             if (x86_elf_abi != I386_ABI)
17155               as_fatal (_("Intel MCU is 32bit only"));
17156             return ELF_TARGET_IAMCU_FORMAT;
17157           }
17158         else
17159           return format;
17160       }
17161 #endif
17162 #if defined (OBJ_MACH_O)
17163     case bfd_target_mach_o_flavour:
17164       if (flag_code == CODE_64BIT)
17165         {
17166           use_rela_relocations = 1;
17167           object_64bit = 1;
17168           return "mach-o-x86-64";
17169         }
17170       else
17171         return "mach-o-i386";
17172 #endif
17173     default:
17174       abort ();
17175       return NULL;
17176     }
17177 }
17178
17179 #endif /* OBJ_MAYBE_ more than one  */
17180 \f
17181 symbolS *
17182 md_undefined_symbol (char *name)
17183 {
17184   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
17185       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
17186       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
17187       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
17188     {
17189       if (!GOT_symbol)
17190         {
17191           if (symbol_find (name))
17192             as_bad (_("GOT already in symbol table"));
17193           GOT_symbol = symbol_new (name, undefined_section,
17194                                    &zero_address_frag, 0);
17195         };
17196       return GOT_symbol;
17197     }
17198   return 0;
17199 }
17200
17201 #if defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT)
17202 /* Round up a section size to the appropriate boundary.  */
17203
17204 valueT
17205 md_section_align (segT segment, valueT size)
17206 {
17207   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
17208     {
17209       /* For a.out, force the section size to be aligned.  If we don't do
17210          this, BFD will align it for us, but it will not write out the
17211          final bytes of the section.  This may be a bug in BFD, but it is
17212          easier to fix it here since that is how the other a.out targets
17213          work.  */
17214       int align;
17215
17216       align = bfd_section_alignment (segment);
17217       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
17218     }
17219
17220   return size;
17221 }
17222 #endif
17223
17224 /* On the i386, PC-relative offsets are relative to the start of the
17225    next instruction.  That is, the address of the offset, plus its
17226    size, since the offset is always the last part of the insn.  */
17227
17228 long
17229 md_pcrel_from (fixS *fixP)
17230 {
17231   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
17232 }
17233
17234 #ifdef OBJ_AOUT
17235
17236 static void
17237 s_bss (int ignore ATTRIBUTE_UNUSED)
17238 {
17239   int temp;
17240
17241   temp = get_absolute_expression ();
17242   subseg_set (bss_section, (subsegT) temp);
17243   demand_empty_rest_of_line ();
17244 }
17245
17246 #endif
17247
17248 /* Remember constant directive.  */
17249
17250 void
17251 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
17252 {
17253   struct last_insn *last_insn
17254     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
17255
17256   if (bfd_section_flags (now_seg) & SEC_CODE)
17257     {
17258       last_insn->kind = last_insn_directive;
17259       last_insn->name = "constant directive";
17260       last_insn->file = as_where (&last_insn->line);
17261     }
17262 }
17263
17264 int
17265 i386_validate_fix (fixS *fixp)
17266 {
17267   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
17268     {
17269       reloc_howto_type *howto;
17270
17271       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
17272       as_bad_where (fixp->fx_file, fixp->fx_line,
17273                     _("invalid %s relocation against register"),
17274                     howto ? howto->name : "<unknown>");
17275       return 0;
17276     }
17277
17278 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17279   if (fixp->fx_r_type == BFD_RELOC_SIZE32
17280       || fixp->fx_r_type == BFD_RELOC_SIZE64)
17281     return IS_ELF && fixp->fx_addsy
17282            && (!S_IS_DEFINED (fixp->fx_addsy)
17283                || S_IS_EXTERNAL (fixp->fx_addsy));
17284
17285   /* BFD_RELOC_X86_64_GOTTPOFF:
17286       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTTPOFF
17287       2. fx_tcbit2 -> BFD_RELOC_X86_64_CODE_6_GOTTPOFF
17288     BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17289       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
17290     BFD_RELOC_32_PCREL:
17291       1. fx_tcbit -> BFD_RELOC_X86_64_GOTPCRELX
17292       2. fx_tcbit2 -> BFD_RELOC_X86_64_REX_GOTPCRELX
17293       3. fx_tcbit3 -> BFD_RELOC_X86_64_CODE_4_GOTPCRELX
17294       4. else -> BFD_RELOC_X86_64_GOTPCREL
17295    */
17296   if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF)
17297     {
17298       if (fixp->fx_tcbit)
17299         fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTTPOFF;
17300       else if (fixp->fx_tcbit2)
17301         fixp->fx_r_type = BFD_RELOC_X86_64_CODE_6_GOTTPOFF;
17302     }
17303   else if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
17304            && fixp->fx_tcbit)
17305     fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC;
17306 #endif
17307
17308   if (fixp->fx_subsy)
17309     {
17310       if (fixp->fx_subsy == GOT_symbol)
17311         {
17312           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
17313             {
17314               if (!object_64bit)
17315                 abort ();
17316 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17317               if (fixp->fx_tcbit)
17318                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCRELX;
17319               else if (fixp->fx_tcbit2)
17320                 fixp->fx_r_type = BFD_RELOC_X86_64_REX_GOTPCRELX;
17321               else if (fixp->fx_tcbit3)
17322                 fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPCRELX;
17323               else
17324 #endif
17325                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
17326             }
17327           else
17328             {
17329               if (!object_64bit)
17330                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
17331               else
17332                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
17333             }
17334           fixp->fx_subsy = 0;
17335         }
17336     }
17337 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17338   else
17339     {
17340       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
17341          to section.  Since PLT32 relocation must be against symbols,
17342          turn such PLT32 relocation into PC32 relocation.  */
17343       if (fixp->fx_addsy
17344           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
17345               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
17346           && symbol_section_p (fixp->fx_addsy))
17347         fixp->fx_r_type = BFD_RELOC_32_PCREL;
17348       if (!object_64bit)
17349         {
17350           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
17351               && fixp->fx_tcbit2)
17352             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
17353         }
17354     }
17355 #endif
17356
17357   return 1;
17358 }
17359
17360 arelent *
17361 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
17362 {
17363   arelent *rel;
17364   bfd_reloc_code_real_type code;
17365
17366   switch (fixp->fx_r_type)
17367     {
17368 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17369       symbolS *sym;
17370
17371     case BFD_RELOC_SIZE32:
17372     case BFD_RELOC_SIZE64:
17373       if (fixp->fx_addsy
17374           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
17375           && (!fixp->fx_subsy
17376               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
17377         sym = fixp->fx_addsy;
17378       else if (fixp->fx_subsy
17379                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
17380                && (!fixp->fx_addsy
17381                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
17382         sym = fixp->fx_subsy;
17383       else
17384         sym = NULL;
17385       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
17386         {
17387           /* Resolve size relocation against local symbol to size of
17388              the symbol plus addend.  */
17389           valueT value = S_GET_SIZE (sym);
17390
17391           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
17392             value = bfd_section_size (S_GET_SEGMENT (sym));
17393           if (sym == fixp->fx_subsy)
17394             {
17395               value = -value;
17396               if (fixp->fx_addsy)
17397                 value += S_GET_VALUE (fixp->fx_addsy);
17398             }
17399           else if (fixp->fx_subsy)
17400             value -= S_GET_VALUE (fixp->fx_subsy);
17401           value += fixp->fx_offset;
17402           if (fixp->fx_r_type == BFD_RELOC_SIZE32
17403               && object_64bit
17404               && !fits_in_unsigned_long (value))
17405             as_bad_where (fixp->fx_file, fixp->fx_line,
17406                           _("symbol size computation overflow"));
17407           fixp->fx_addsy = NULL;
17408           fixp->fx_subsy = NULL;
17409           md_apply_fix (fixp, (valueT *) &value, NULL);
17410           return NULL;
17411         }
17412       if (!fixp->fx_addsy || fixp->fx_subsy)
17413         {
17414           as_bad_where (fixp->fx_file, fixp->fx_line,
17415                         "unsupported expression involving @size");
17416           return NULL;
17417         }
17418 #endif
17419       /* Fall through.  */
17420
17421     case BFD_RELOC_X86_64_PLT32:
17422     case BFD_RELOC_X86_64_GOT32:
17423     case BFD_RELOC_X86_64_GOTPCREL:
17424     case BFD_RELOC_X86_64_GOTPCRELX:
17425     case BFD_RELOC_X86_64_REX_GOTPCRELX:
17426     case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
17427     case BFD_RELOC_386_PLT32:
17428     case BFD_RELOC_386_GOT32:
17429     case BFD_RELOC_386_GOT32X:
17430     case BFD_RELOC_386_GOTOFF:
17431     case BFD_RELOC_386_GOTPC:
17432     case BFD_RELOC_386_TLS_GD:
17433     case BFD_RELOC_386_TLS_LDM:
17434     case BFD_RELOC_386_TLS_LDO_32:
17435     case BFD_RELOC_386_TLS_IE_32:
17436     case BFD_RELOC_386_TLS_IE:
17437     case BFD_RELOC_386_TLS_GOTIE:
17438     case BFD_RELOC_386_TLS_LE_32:
17439     case BFD_RELOC_386_TLS_LE:
17440     case BFD_RELOC_386_TLS_GOTDESC:
17441     case BFD_RELOC_386_TLS_DESC_CALL:
17442     case BFD_RELOC_X86_64_TLSGD:
17443     case BFD_RELOC_X86_64_TLSLD:
17444     case BFD_RELOC_X86_64_DTPOFF32:
17445     case BFD_RELOC_X86_64_DTPOFF64:
17446     case BFD_RELOC_X86_64_GOTTPOFF:
17447     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
17448     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
17449     case BFD_RELOC_X86_64_TPOFF32:
17450     case BFD_RELOC_X86_64_TPOFF64:
17451     case BFD_RELOC_X86_64_GOTOFF64:
17452     case BFD_RELOC_X86_64_GOTPC32:
17453     case BFD_RELOC_X86_64_GOT64:
17454     case BFD_RELOC_X86_64_GOTPCREL64:
17455     case BFD_RELOC_X86_64_GOTPC64:
17456     case BFD_RELOC_X86_64_GOTPLT64:
17457     case BFD_RELOC_X86_64_PLTOFF64:
17458     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17459     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
17460     case BFD_RELOC_X86_64_TLSDESC_CALL:
17461     case BFD_RELOC_RVA:
17462     case BFD_RELOC_VTABLE_ENTRY:
17463     case BFD_RELOC_VTABLE_INHERIT:
17464 #ifdef TE_PE
17465     case BFD_RELOC_32_SECREL:
17466     case BFD_RELOC_16_SECIDX:
17467 #endif
17468       code = fixp->fx_r_type;
17469       break;
17470     case BFD_RELOC_X86_64_32S:
17471       if (!fixp->fx_pcrel)
17472         {
17473           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
17474           code = fixp->fx_r_type;
17475           break;
17476         }
17477       /* Fall through.  */
17478     default:
17479       if (fixp->fx_pcrel)
17480         {
17481           switch (fixp->fx_size)
17482             {
17483             default:
17484               as_bad_where (fixp->fx_file, fixp->fx_line,
17485                             _("can not do %d byte pc-relative relocation"),
17486                             fixp->fx_size);
17487               code = BFD_RELOC_32_PCREL;
17488               break;
17489             case 1: code = BFD_RELOC_8_PCREL;  break;
17490             case 2: code = BFD_RELOC_16_PCREL; break;
17491             case 4: code = BFD_RELOC_32_PCREL; break;
17492 #ifdef BFD64
17493             case 8: code = BFD_RELOC_64_PCREL; break;
17494 #endif
17495             }
17496         }
17497       else
17498         {
17499           switch (fixp->fx_size)
17500             {
17501             default:
17502               as_bad_where (fixp->fx_file, fixp->fx_line,
17503                             _("can not do %d byte relocation"),
17504                             fixp->fx_size);
17505               code = BFD_RELOC_32;
17506               break;
17507             case 1: code = BFD_RELOC_8;  break;
17508             case 2: code = BFD_RELOC_16; break;
17509             case 4: code = BFD_RELOC_32; break;
17510 #ifdef BFD64
17511             case 8: code = BFD_RELOC_64; break;
17512 #endif
17513             }
17514         }
17515       break;
17516     }
17517
17518   if ((code == BFD_RELOC_32
17519        || code == BFD_RELOC_32_PCREL
17520        || code == BFD_RELOC_X86_64_32S)
17521       && GOT_symbol
17522       && fixp->fx_addsy == GOT_symbol)
17523     {
17524       if (!object_64bit)
17525         code = BFD_RELOC_386_GOTPC;
17526       else
17527         code = BFD_RELOC_X86_64_GOTPC32;
17528     }
17529   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
17530       && GOT_symbol
17531       && fixp->fx_addsy == GOT_symbol)
17532     {
17533       code = BFD_RELOC_X86_64_GOTPC64;
17534     }
17535
17536   rel = XNEW (arelent);
17537   rel->sym_ptr_ptr = XNEW (asymbol *);
17538   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
17539
17540   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
17541
17542   if (!use_rela_relocations)
17543     {
17544       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
17545          vtable entry to be used in the relocation's section offset.  */
17546       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
17547         rel->address = fixp->fx_offset;
17548 #if defined (OBJ_COFF) && defined (TE_PE)
17549       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
17550         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
17551       else
17552 #endif
17553       rel->addend = 0;
17554     }
17555   /* Use the rela in 64bit mode.  */
17556   else
17557     {
17558       if (disallow_64bit_reloc)
17559         switch (code)
17560           {
17561           case BFD_RELOC_X86_64_DTPOFF64:
17562           case BFD_RELOC_X86_64_TPOFF64:
17563           case BFD_RELOC_64_PCREL:
17564           case BFD_RELOC_X86_64_GOTOFF64:
17565           case BFD_RELOC_X86_64_GOT64:
17566           case BFD_RELOC_X86_64_GOTPCREL64:
17567           case BFD_RELOC_X86_64_GOTPC64:
17568           case BFD_RELOC_X86_64_GOTPLT64:
17569           case BFD_RELOC_X86_64_PLTOFF64:
17570             as_bad_where (fixp->fx_file, fixp->fx_line,
17571                           _("cannot represent relocation type %s in x32 mode"),
17572                           bfd_get_reloc_code_name (code));
17573             break;
17574           default:
17575             break;
17576           }
17577
17578       if (!fixp->fx_pcrel)
17579         rel->addend = fixp->fx_offset;
17580       else
17581         switch (code)
17582           {
17583           case BFD_RELOC_X86_64_PLT32:
17584           case BFD_RELOC_X86_64_GOT32:
17585           case BFD_RELOC_X86_64_GOTPCREL:
17586           case BFD_RELOC_X86_64_GOTPCRELX:
17587           case BFD_RELOC_X86_64_REX_GOTPCRELX:
17588           case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
17589           case BFD_RELOC_X86_64_TLSGD:
17590           case BFD_RELOC_X86_64_TLSLD:
17591           case BFD_RELOC_X86_64_GOTTPOFF:
17592           case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
17593           case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
17594           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17595           case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
17596           case BFD_RELOC_X86_64_TLSDESC_CALL:
17597             rel->addend = fixp->fx_offset - fixp->fx_size;
17598             break;
17599           default:
17600             rel->addend = (section->vma
17601                            - fixp->fx_size
17602                            + fixp->fx_addnumber
17603                            + md_pcrel_from (fixp));
17604             break;
17605           }
17606     }
17607
17608   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
17609   if (rel->howto == NULL)
17610     {
17611       as_bad_where (fixp->fx_file, fixp->fx_line,
17612                     _("cannot represent relocation type %s"),
17613                     bfd_get_reloc_code_name (code));
17614       /* Set howto to a garbage value so that we can keep going.  */
17615       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
17616       gas_assert (rel->howto != NULL);
17617     }
17618
17619   return rel;
17620 }
17621
17622 #include "tc-i386-intel.c"
17623
17624 void
17625 tc_x86_parse_to_dw2regnum (expressionS *exp)
17626 {
17627   int saved_naked_reg;
17628   char saved_register_dot;
17629
17630   saved_naked_reg = allow_naked_reg;
17631   allow_naked_reg = 1;
17632   saved_register_dot = register_chars['.'];
17633   register_chars['.'] = '.';
17634   allow_pseudo_reg = 1;
17635   expression_and_evaluate (exp);
17636   allow_pseudo_reg = 0;
17637   register_chars['.'] = saved_register_dot;
17638   allow_naked_reg = saved_naked_reg;
17639
17640   if (exp->X_op == O_register && exp->X_add_number >= 0)
17641     {
17642       exp->X_op = O_illegal;
17643       if ((addressT) exp->X_add_number < i386_regtab_size)
17644         {
17645           exp->X_add_number = i386_regtab[exp->X_add_number]
17646                               .dw2_regnum[object_64bit];
17647           if (exp->X_add_number != Dw2Inval)
17648             exp->X_op = O_constant;
17649         }
17650     }
17651 }
17652
17653 void
17654 tc_x86_frame_initial_instructions (void)
17655 {
17656   cfi_add_CFA_def_cfa (object_64bit ? REG_SP : 4, -x86_cie_data_alignment);
17657   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
17658 }
17659
17660 int
17661 x86_dwarf2_addr_size (void)
17662 {
17663 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
17664   if (x86_elf_abi == X86_64_X32_ABI)
17665     return 4;
17666 #endif
17667   return bfd_arch_bits_per_address (stdoutput) / 8;
17668 }
17669
17670 #ifdef TE_PE
17671 void
17672 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
17673 {
17674   expressionS exp;
17675
17676   exp.X_op = O_secrel;
17677   exp.X_add_symbol = symbol;
17678   exp.X_add_number = 0;
17679   emit_expr (&exp, size);
17680 }
17681 #endif
17682
17683 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17684 int
17685 i386_elf_section_type (const char *str, size_t len)
17686 {
17687   if (flag_code == CODE_64BIT
17688       && len == sizeof ("unwind") - 1
17689       && startswith (str, "unwind"))
17690     return SHT_X86_64_UNWIND;
17691
17692   return -1;
17693 }
17694
17695 void
17696 i386_elf_section_change_hook (void)
17697 {
17698   struct i386_segment_info *info = &seg_info(now_seg)->tc_segment_info_data;
17699   struct i386_segment_info *curr, *prev;
17700
17701   if (info->subseg == now_subseg)
17702     return;
17703
17704   /* Find the (or make a) list entry to save state into.  */
17705   for (prev = info; (curr = prev->next) != NULL; prev = curr)
17706     if (curr->subseg == info->subseg)
17707       break;
17708   if (!curr)
17709     {
17710       curr = notes_alloc (sizeof (*curr));
17711       curr->subseg = info->subseg;
17712       curr->next = NULL;
17713       prev->next = curr;
17714     }
17715   curr->last_insn = info->last_insn;
17716
17717   /* Find the list entry to load state from.  */
17718   for (curr = info->next; curr; curr = curr->next)
17719     if (curr->subseg == now_subseg)
17720       break;
17721   if (curr)
17722     info->last_insn = curr->last_insn;
17723   else
17724     memset (&info->last_insn, 0, sizeof (info->last_insn));
17725   info->subseg = now_subseg;
17726 }
17727
17728 #ifdef TE_SOLARIS
17729 void
17730 i386_solaris_fix_up_eh_frame (segT sec)
17731 {
17732   if (flag_code == CODE_64BIT)
17733     elf_section_type (sec) = SHT_X86_64_UNWIND;
17734 }
17735 #endif
17736
17737 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
17738
17739 bfd_vma
17740 x86_64_section_letter (int letter, const char **ptr_msg)
17741 {
17742   if (flag_code == CODE_64BIT)
17743     {
17744       if (letter == 'l')
17745         return SHF_X86_64_LARGE;
17746
17747       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
17748     }
17749   else
17750     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
17751   return -1;
17752 }
17753
17754 static void
17755 handle_large_common (int small ATTRIBUTE_UNUSED)
17756 {
17757   if (flag_code != CODE_64BIT)
17758     {
17759       s_comm_internal (0, elf_common_parse);
17760       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
17761     }
17762   else
17763     {
17764       static segT lbss_section;
17765       asection *saved_com_section_ptr = elf_com_section_ptr;
17766       asection *saved_bss_section = bss_section;
17767
17768       if (lbss_section == NULL)
17769         {
17770           flagword applicable;
17771           segT seg = now_seg;
17772           subsegT subseg = now_subseg;
17773
17774           /* The .lbss section is for local .largecomm symbols.  */
17775           lbss_section = subseg_new (".lbss", 0);
17776           applicable = bfd_applicable_section_flags (stdoutput);
17777           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
17778           seg_info (lbss_section)->bss = 1;
17779
17780           subseg_set (seg, subseg);
17781         }
17782
17783       elf_com_section_ptr = &_bfd_elf_large_com_section;
17784       bss_section = lbss_section;
17785
17786       s_comm_internal (0, elf_common_parse);
17787
17788       elf_com_section_ptr = saved_com_section_ptr;
17789       bss_section = saved_bss_section;
17790     }
17791 }
17792 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */