gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2024 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "scfi.h"
  34 #include "gen-sframe.h"
  35 #include "sframe.h"
  36 #include "elf/x86-64.h"
  37 #include "opcodes/i386-init.h"
  38 #include "opcodes/i386-mnem.h"
  39 #include <limits.h>
  40
  41 #ifndef INFER_ADDR_PREFIX
  42 #define INFER_ADDR_PREFIX 1
  43 #endif
  44
  45 #ifndef DEFAULT_ARCH
  46 #define DEFAULT_ARCH "i386"
  47 #endif
  48
  49 #ifndef INLINE
  50 #if __GNUC__ >= 2
  51 #define INLINE __inline__
  52 #else
  53 #define INLINE
  54 #endif
  55 #endif
  56
  57 /* Prefixes will be emitted in the order defined below.
  58    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  59    instruction, and so must come before any prefixes.
  60    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  61    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  62 #define WAIT_PREFIX     0
  63 #define SEG_PREFIX      1
  64 #define ADDR_PREFIX     2
  65 #define DATA_PREFIX     3
  66 #define REP_PREFIX      4
  67 #define HLE_PREFIX      REP_PREFIX
  68 #define BND_PREFIX      REP_PREFIX
  69 #define LOCK_PREFIX     5
  70 #define REX_PREFIX      6       /* must come last.  */
  71 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  72
  73 /* we define the syntax here (modulo base,index,scale syntax) */
  74 #define REGISTER_PREFIX '%'
  75 #define IMMEDIATE_PREFIX '$'
  76 #define ABSOLUTE_PREFIX '*'
  77
  78 /* these are the instruction mnemonic suffixes in AT&T syntax or
  79    memory operand size in Intel syntax.  */
  80 #define WORD_MNEM_SUFFIX  'w'
  81 #define BYTE_MNEM_SUFFIX  'b'
  82 #define SHORT_MNEM_SUFFIX 's'
  83 #define LONG_MNEM_SUFFIX  'l'
  84 #define QWORD_MNEM_SUFFIX  'q'
  85
  86 #define END_OF_INSN '\0'
  87
  88 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  89
  90 /* This matches the C -> StaticRounding alias in the opcode table.  */
  91 #define commutative staticrounding
  92
  93 /*
  94   'templates' is for grouping together 'template' structures for opcodes
  95   of the same name.  This is only used for storing the insns in the grand
  96   ole hash table of insns.
  97   The templates themselves start at START and range up to (but not including)
  98   END.
  99   */
 100 typedef struct
 101 {
 102   const insn_template *start;
 103   const insn_template *end;
 104 }
 105 templates;
 106
 107 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 108 typedef struct
 109 {
 110   unsigned int regmem;  /* codes register or memory operand */
 111   unsigned int reg;     /* codes register operand (or extended opcode) */
 112   unsigned int mode;    /* how to interpret regmem & reg */
 113 }
 114 modrm_byte;
 115
 116 /* x86-64 extension prefix.  */
 117 typedef int rex_byte;
 118
 119 /* 386 opcode byte to code indirect addressing.  */
 120 typedef struct
 121 {
 122   unsigned base;
 123   unsigned index;
 124   unsigned scale;
 125 }
 126 sib_byte;
 127
 128 /* x86 arch names, types and features */
 129 typedef struct
 130 {
 131   const char *name;             /* arch name */
 132   unsigned int len:8;           /* arch string length */
 133   bool skip:1;                  /* show_arch should skip this. */
 134   enum processor_type type;     /* arch type */
 135   enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
 136   i386_cpu_flags enable;                /* cpu feature enable flags */
 137   i386_cpu_flags disable;       /* cpu feature disable flags */
 138 }
 139 arch_entry;
 140
 141 /* Modes for parse_insn() to operate in.  */
 142 enum parse_mode {
 143   parse_all,
 144   parse_prefix,
 145   parse_pseudo_prefix,
 146 };
 147
 148 static void update_code_flag (int, int);
 149 static void s_insn (int);
 150 static void s_noopt (int);
 151 static void set_code_flag (int);
 152 static void set_16bit_gcc_code_flag (int);
 153 static void set_intel_syntax (int);
 154 static void set_intel_mnemonic (int);
 155 static void set_allow_index_reg (int);
 156 static void set_check (int);
 157 static void set_cpu_arch (int);
 158 #ifdef TE_PE
 159 static void pe_directive_secrel (int);
 160 static void pe_directive_secidx (int);
 161 #endif
 162 static void signed_cons (int);
 163 static char *output_invalid (int c);
 164 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 165                                     const char *);
 166 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 167                                        const char *);
 168 static int i386_att_operand (char *);
 169 static int i386_intel_operand (char *, int);
 170 static int i386_intel_simplify (expressionS *);
 171 static int i386_intel_parse_name (const char *, expressionS *);
 172 static const reg_entry *parse_register (const char *, char **);
 173 static const char *parse_insn (const char *, char *, enum parse_mode);
 174 static char *parse_operands (char *, const char *);
 175 static void copy_operand (unsigned int, unsigned int);
 176 static void swap_operands (void);
 177 static void swap_2_operands (unsigned int, unsigned int);
 178 static enum i386_flag_code i386_addressing_mode (void);
 179 static void optimize_imm (void);
 180 static bool optimize_disp (const insn_template *t);
 181 static const insn_template *match_template (char);
 182 static int check_string (void);
 183 static int process_suffix (const insn_template *);
 184 static int check_byte_reg (void);
 185 static int check_long_reg (void);
 186 static int check_qword_reg (void);
 187 static int check_word_reg (void);
 188 static int finalize_imm (void);
 189 static int process_operands (void);
 190 static const reg_entry *build_modrm_byte (void);
 191 static void output_insn (const struct last_insn *);
 192 static void output_imm (fragS *, offsetT);
 193 static void output_disp (fragS *, offsetT);
 194 #ifdef OBJ_AOUT
 195 static void s_bss (int);
 196 #endif
 197 #ifdef OBJ_ELF
 198 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 199
 200 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 201 static unsigned int x86_isa_1_used;
 202 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 203 static unsigned int x86_feature_2_used;
 204 /* Generate x86 used ISA and feature properties.  */
 205 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 206 #endif
 207
 208 static const char *default_arch = DEFAULT_ARCH;
 209
 210 /* parse_register() returns this when a register alias cannot be used.  */
 211 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 212                                    { Dw2Inval, Dw2Inval } };
 213
 214 static const reg_entry *reg_eax;
 215 static const reg_entry *reg_ds;
 216 static const reg_entry *reg_es;
 217 static const reg_entry *reg_ss;
 218 static const reg_entry *reg_st0;
 219 static const reg_entry *reg_k0;
 220
 221 /* VEX prefix.  */
 222 typedef struct
 223 {
 224   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 225   unsigned char bytes[4];
 226   unsigned int length;
 227   /* Destination or source register specifier.  */
 228   const reg_entry *register_specifier;
 229 } vex_prefix;
 230
 231 /* 'md_assemble ()' gathers together information and puts it into a
 232    i386_insn.  */
 233
 234 union i386_op
 235   {
 236     expressionS *disps;
 237     expressionS *imms;
 238     const reg_entry *regs;
 239   };
 240
 241 enum i386_error
 242   {
 243     no_error, /* Must be first.  */
 244     operand_size_mismatch,
 245     operand_type_mismatch,
 246     register_type_mismatch,
 247     number_of_operands_mismatch,
 248     invalid_instruction_suffix,
 249     bad_imm4,
 250     unsupported_with_intel_mnemonic,
 251     unsupported_syntax,
 252     unsupported_EGPR_for_addressing,
 253     unsupported_nf,
 254     unsupported,
 255     unsupported_on_arch,
 256     unsupported_64bit,
 257     no_vex_encoding,
 258     no_evex_encoding,
 259     invalid_sib_address,
 260     invalid_vsib_address,
 261     invalid_vector_register_set,
 262     invalid_tmm_register_set,
 263     invalid_dest_and_src_register_set,
 264     invalid_dest_register_set,
 265     invalid_pseudo_prefix,
 266     unsupported_vector_index_register,
 267     unsupported_broadcast,
 268     broadcast_needed,
 269     unsupported_masking,
 270     mask_not_on_destination,
 271     no_default_mask,
 272     unsupported_rc_sae,
 273     unsupported_vector_size,
 274     unsupported_rsp_register,
 275     internal_error,
 276   };
 277
 278 #ifdef OBJ_ELF
 279 enum x86_tls_error_type
 280 {
 281   x86_tls_error_continue,
 282   x86_tls_error_none,
 283   x86_tls_error_insn,
 284   x86_tls_error_opcode,
 285   x86_tls_error_sib,
 286   x86_tls_error_no_base_reg,
 287   x86_tls_error_require_no_base_index_reg,
 288   x86_tls_error_base_reg,
 289   x86_tls_error_index_ebx,
 290   x86_tls_error_eax,
 291   x86_tls_error_RegA,
 292   x86_tls_error_ebx,
 293   x86_tls_error_rip,
 294   x86_tls_error_dest_eax,
 295   x86_tls_error_dest_rdi,
 296   x86_tls_error_scale_factor,
 297   x86_tls_error_base_reg_size,
 298   x86_tls_error_dest_32bit_reg_size,
 299   x86_tls_error_dest_64bit_reg_size,
 300   x86_tls_error_dest_32bit_or_64bit_reg_size
 301 };
 302 #endif
 303
 304 struct _i386_insn
 305   {
 306     /* TM holds the template for the insn were currently assembling.  */
 307     insn_template tm;
 308
 309     /* SUFFIX holds the instruction size suffix for byte, word, dword
 310        or qword, if given.  */
 311     char suffix;
 312
 313     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 314     unsigned char opcode_length;
 315
 316     /* OPERANDS gives the number of given operands.  */
 317     unsigned int operands;
 318
 319     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 320        of given register, displacement, memory operands and immediate
 321        operands.  */
 322     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 323
 324     /* TYPES [i] is the type (see above #defines) which tells us how to
 325        use OP[i] for the corresponding operand.  */
 326     i386_operand_type types[MAX_OPERANDS];
 327
 328     /* Displacement expression, immediate expression, or register for each
 329        operand.  */
 330     union i386_op op[MAX_OPERANDS];
 331
 332     /* Flags for operands.  */
 333     unsigned int flags[MAX_OPERANDS];
 334 #define Operand_PCrel 1
 335 #define Operand_Mem   2
 336 #define Operand_Signed 4 /* .insn only */
 337
 338     /* Relocation type for operand */
 339     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 340
 341     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 342        the base index byte below.  */
 343     const reg_entry *base_reg;
 344     const reg_entry *index_reg;
 345     unsigned int log2_scale_factor;
 346
 347     /* SEG gives the seg_entries of this insn.  They are zero unless
 348        explicit segment overrides are given.  */
 349     const reg_entry *seg[2];
 350
 351     /* PREFIX holds all the given prefix opcodes (usually null).
 352        PREFIXES is the number of prefix opcodes.  */
 353     unsigned int prefixes;
 354     unsigned char prefix[MAX_PREFIXES];
 355
 356     /* .insn allows for reserved opcode spaces.  */
 357     unsigned char insn_opcode_space;
 358
 359     /* .insn also allows (requires) specifying immediate size.  */
 360     unsigned char imm_bits[MAX_OPERANDS];
 361
 362     /* Register is in low 3 bits of opcode.  */
 363     bool short_form;
 364
 365     /* The operand to a branch insn indicates an absolute branch.  */
 366     bool jumpabsolute;
 367
 368     /* The operand to a branch insn indicates a far branch.  */
 369     bool far_branch;
 370
 371     /* There is a memory operand of (%dx) which should be only used
 372        with input/output instructions.  */
 373     bool input_output_operand;
 374
 375     /* Extended states.  */
 376     enum
 377       {
 378         /* Use MMX state.  */
 379         xstate_mmx = 1 << 0,
 380         /* Use XMM state.  */
 381         xstate_xmm = 1 << 1,
 382         /* Use YMM state.  */
 383         xstate_ymm = 1 << 2 | xstate_xmm,
 384         /* Use ZMM state.  */
 385         xstate_zmm = 1 << 3 | xstate_ymm,
 386         /* Use TMM state.  */
 387         xstate_tmm = 1 << 4,
 388         /* Use MASK state.  */
 389         xstate_mask = 1 << 5
 390       } xstate;
 391
 392     /* Has GOTPC or TLS relocation.  */
 393     bool has_gotpc_tls_reloc;
 394
 395     /* Has relocation entry from the gotrel array.  */
 396     bool has_gotrel;
 397
 398     /* RM and SIB are the modrm byte and the sib byte where the
 399        addressing modes of this insn are encoded.  */
 400     modrm_byte rm;
 401     rex_byte rex;
 402     rex_byte vrex;
 403     rex_byte rex2;
 404     sib_byte sib;
 405     vex_prefix vex;
 406
 407     /* Masking attributes.
 408
 409        The struct describes masking, applied to OPERAND in the instruction.
 410        REG is a pointer to the corresponding mask register.  ZEROING tells
 411        whether merging or zeroing mask is used.  */
 412     struct Mask_Operation
 413     {
 414       const reg_entry *reg;
 415       unsigned int zeroing;
 416       /* The operand where this operation is associated.  */
 417       unsigned int operand;
 418     } mask;
 419
 420     /* Rounding control and SAE attributes.  */
 421     struct RC_Operation
 422     {
 423       enum rc_type
 424         {
 425           rc_none = -1,
 426           rne,
 427           rd,
 428           ru,
 429           rz,
 430           saeonly
 431         } type;
 432       /* In Intel syntax the operand modifier form is supposed to be used, but
 433          we continue to accept the immediate forms as well.  */
 434       bool modifier;
 435     } rounding;
 436
 437     /* Broadcasting attributes.
 438
 439        The struct describes broadcasting, applied to OPERAND.  TYPE is
 440        expresses the broadcast factor.  */
 441     struct Broadcast_Operation
 442     {
 443       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 444       unsigned int type;
 445
 446       /* Index of broadcasted operand.  */
 447       unsigned int operand;
 448
 449       /* Number of bytes to broadcast.  */
 450       unsigned int bytes;
 451     } broadcast;
 452
 453     /* Compressed disp8*N attribute.  */
 454     unsigned int memshift;
 455
 456     /* SCC = EVEX.[SC3,SC2,SC1,SC0].  */
 457     unsigned int scc;
 458
 459     /* Store 4 bits of EVEX.[OF,SF,ZF,CF].  */
 460 #define OSZC_CF 1
 461 #define OSZC_ZF 2
 462 #define OSZC_SF 4
 463 #define OSZC_OF 8
 464     unsigned int oszc_flags;
 465
 466     /* Invert the condition encoded in a base opcode.  */
 467     bool invert_cond;
 468
 469     /* REP prefix.  */
 470     const char *rep_prefix;
 471
 472     /* HLE prefix.  */
 473     const char *hle_prefix;
 474
 475     /* Have BND prefix.  */
 476     const char *bnd_prefix;
 477
 478     /* Have NOTRACK prefix.  */
 479     const char *notrack_prefix;
 480
 481     /* Error message.  */
 482     enum i386_error error;
 483   };
 484
 485 typedef struct _i386_insn i386_insn;
 486
 487 /* Pseudo-prefix recording state, separate from i386_insn.  */
 488 static struct pseudo_prefixes {
 489   /* How to encode instructions.  */
 490   enum {
 491     encoding_default = 0,
 492     encoding_vex,
 493     encoding_vex3,
 494     encoding_egpr, /* REX2 or EVEX.  */
 495     encoding_evex,
 496     encoding_evex512,
 497     encoding_error
 498   } encoding;
 499
 500   /* Prefer load or store in encoding.  */
 501   enum {
 502     dir_encoding_default = 0,
 503     dir_encoding_load,
 504     dir_encoding_store,
 505     dir_encoding_swap
 506   } dir_encoding;
 507
 508   /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 509   enum {
 510     disp_encoding_default = 0,
 511     disp_encoding_8bit,
 512     disp_encoding_16bit,
 513     disp_encoding_32bit
 514   } disp_encoding;
 515
 516   /* Prefer the REX byte in encoding.  */
 517   bool rex_encoding;
 518
 519   /* Prefer the REX2 prefix in encoding.  */
 520   bool rex2_encoding;
 521
 522   /* No CSPAZO flags update.  */
 523   bool has_nf;
 524
 525   /* Disable instruction size optimization.  */
 526   bool no_optimize;
 527 } pp;
 528
 529 /* Link RC type with corresponding string, that'll be looked for in
 530    asm.  */
 531 struct RC_name
 532 {
 533   enum rc_type type;
 534   const char *name;
 535   unsigned int len;
 536 };
 537
 538 static const struct RC_name RC_NamesTable[] =
 539 {
 540   {  rne, STRING_COMMA_LEN ("rn-sae") },
 541   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 542   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 543   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 544   {  saeonly,  STRING_COMMA_LEN ("sae") },
 545 };
 546
 547 /* To be indexed by segment register number.  */
 548 static const unsigned char i386_seg_prefixes[] = {
 549   ES_PREFIX_OPCODE,
 550   CS_PREFIX_OPCODE,
 551   SS_PREFIX_OPCODE,
 552   DS_PREFIX_OPCODE,
 553   FS_PREFIX_OPCODE,
 554   GS_PREFIX_OPCODE
 555 };
 556
 557 /* List of chars besides those in app.c:symbol_chars that can start an
 558    operand.  Used to prevent the scrubber eating vital white-space.  */
 559 const char extra_symbol_chars[] = "*%-(["
 560 #ifdef LEX_AT
 561         "@"
 562 #endif
 563 #ifdef LEX_QM
 564         "?"
 565 #endif
 566         ;
 567
 568 #if (defined (OBJ_ELF)                                  \
 569      && !defined (TE_GNU)                               \
 570      && !defined (TE_LINUX)                             \
 571      && !defined (TE_Haiku)                             \
 572      && !defined (TE_FreeBSD)                           \
 573      && !defined (TE_DragonFly)                         \
 574      && !defined (TE_NetBSD))
 575 /* This array holds the chars that always start a comment.  If the
 576    pre-processor is disabled, these aren't very useful.  The option
 577    --divide will remove '/' from this list.  */
 578 const char *i386_comment_chars = "#/";
 579 #define SVR4_COMMENT_CHARS 1
 580 #define PREFIX_SEPARATOR '\\'
 581
 582 #else
 583 const char *i386_comment_chars = "#";
 584 #define PREFIX_SEPARATOR '/'
 585 #endif
 586
 587 /* This array holds the chars that only start a comment at the beginning of
 588    a line.  If the line seems to have the form '# 123 filename'
 589    .line and .file directives will appear in the pre-processed output.
 590    Note that input_file.c hand checks for '#' at the beginning of the
 591    first line of the input file.  This is because the compiler outputs
 592    #NO_APP at the beginning of its output.
 593    Also note that comments started like this one will always work if
 594    '/' isn't otherwise defined.  */
 595 const char line_comment_chars[] = "#/";
 596
 597 const char line_separator_chars[] = ";";
 598
 599 /* Chars that can be used to separate mant from exp in floating point
 600    nums.  */
 601 const char EXP_CHARS[] = "eE";
 602
 603 /* Chars that mean this number is a floating point constant
 604    As in 0f12.456
 605    or    0d1.2345e12.  */
 606 const char FLT_CHARS[] = "fFdDxXhHbB";
 607
 608 /* Tables for lexical analysis.  */
 609 static char mnemonic_chars[256];
 610 static char register_chars[256];
 611 static char operand_chars[256];
 612
 613 /* Lexical macros.  */
 614 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 615 #define is_register_char(x) (register_chars[(unsigned char) x])
 616 #define is_space_char(x) ((x) == ' ')
 617
 618 /* All non-digit non-letter characters that may occur in an operand and
 619    which aren't already in extra_symbol_chars[].  */
 620 static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]{}";
 621
 622 /* md_assemble() always leaves the strings it's passed unaltered.  To
 623    effect this we maintain a stack of saved characters that we've smashed
 624    with '\0's (indicating end of strings for various sub-fields of the
 625    assembler instruction).  */
 626 static char save_stack[32];
 627 static char *save_stack_p;
 628 #define END_STRING_AND_SAVE(s) \
 629         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 630 #define RESTORE_END_STRING(s) \
 631         do { *(s) = *--save_stack_p; } while (0)
 632
 633 /* The instruction we're assembling.  */
 634 static i386_insn i;
 635
 636 /* Possible templates for current insn.  */
 637 static templates current_templates;
 638
 639 /* Per instruction expressionS buffers: max displacements & immediates.  */
 640 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 641 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 642
 643 /* Current operand we are working on.  */
 644 static int this_operand = -1;
 645
 646 /* Are we processing a .insn directive?  */
 647 #define dot_insn() (i.tm.mnem_off == MN__insn)
 648
 649 enum i386_flag_code i386_flag_code;
 650 #define flag_code i386_flag_code /* Permit to continue using original name.  */
 651 static unsigned int object_64bit;
 652 static unsigned int disallow_64bit_reloc;
 653 static int use_rela_relocations = 0;
 654 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 655 static const char *tls_get_addr;
 656
 657 #ifdef OBJ_ELF
 658
 659 /* The ELF ABI to use.  */
 660 enum x86_elf_abi
 661 {
 662   I386_ABI,
 663   X86_64_ABI,
 664   X86_64_X32_ABI
 665 };
 666
 667 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 668 #endif
 669
 670 #if defined (TE_PE) || defined (TE_PEP)
 671 /* Use big object file format.  */
 672 static int use_big_obj = 0;
 673 #endif
 674
 675 #ifdef OBJ_ELF
 676 /* 1 if generating code for a shared library.  */
 677 static int shared = 0;
 678
 679 unsigned int x86_sframe_cfa_sp_reg;
 680 /* The other CFA base register for SFrame stack trace info.  */
 681 unsigned int x86_sframe_cfa_fp_reg;
 682
 683 static ginsnS *x86_ginsn_new (const symbolS *, enum ginsn_gen_mode);
 684 #endif
 685
 686 /* 1 for intel syntax,
 687    0 if att syntax.  */
 688 static int intel_syntax = 0;
 689
 690 static enum x86_64_isa
 691 {
 692   amd64 = 1,    /* AMD64 ISA.  */
 693   intel64       /* Intel64 ISA.  */
 694 } isa64;
 695
 696 /* 1 for intel mnemonic,
 697    0 if att mnemonic.  */
 698 static int intel_mnemonic = !SYSV386_COMPAT;
 699
 700 /* 1 if pseudo registers are permitted.  */
 701 static int allow_pseudo_reg = 0;
 702
 703 /* 1 if register prefix % not required.  */
 704 static int allow_naked_reg = 0;
 705
 706 /* 1 if the assembler should add BND prefix for all control-transferring
 707    instructions supporting it, even if this prefix wasn't specified
 708    explicitly.  */
 709 static int add_bnd_prefix = 0;
 710
 711 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 712 static int allow_index_reg = 0;
 713
 714 /* 1 if the assembler should ignore LOCK prefix, even if it was
 715    specified explicitly.  */
 716 static int omit_lock_prefix = 0;
 717
 718 /* 1 if the assembler should encode lfence, mfence, and sfence as
 719    "lock addl $0, (%{re}sp)".  */
 720 static int avoid_fence = 0;
 721
 722 /* 1 if lfence should be inserted after every load.  */
 723 static int lfence_after_load = 0;
 724
 725 /* Non-zero if lfence should be inserted before indirect branch.  */
 726 static enum lfence_before_indirect_branch_kind
 727   {
 728     lfence_branch_none = 0,
 729     lfence_branch_register,
 730     lfence_branch_memory,
 731     lfence_branch_all
 732   }
 733 lfence_before_indirect_branch;
 734
 735 /* Non-zero if lfence should be inserted before ret.  */
 736 static enum lfence_before_ret_kind
 737   {
 738     lfence_before_ret_none = 0,
 739     lfence_before_ret_not,
 740     lfence_before_ret_or,
 741     lfence_before_ret_shl
 742   }
 743 lfence_before_ret;
 744
 745 /* 1 if the assembler should generate relax relocations.  */
 746
 747 static int generate_relax_relocations
 748   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 749
 750 /* 1 if the assembler should check tls relocation.  */
 751 static bool tls_check = DEFAULT_X86_TLS_CHECK;
 752
 753 static enum check_kind
 754   {
 755     check_none = 0,
 756     check_warning,
 757     check_error
 758   }
 759 sse_check, operand_check = check_warning;
 760
 761 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 762 static int align_branch_power = 0;
 763
 764 /* Types of branches to align.  */
 765 enum align_branch_kind
 766   {
 767     align_branch_none = 0,
 768     align_branch_jcc = 1,
 769     align_branch_fused = 2,
 770     align_branch_jmp = 3,
 771     align_branch_call = 4,
 772     align_branch_indirect = 5,
 773     align_branch_ret = 6
 774   };
 775
 776 /* Type bits of branches to align.  */
 777 enum align_branch_bit
 778   {
 779     align_branch_jcc_bit = 1 << align_branch_jcc,
 780     align_branch_fused_bit = 1 << align_branch_fused,
 781     align_branch_jmp_bit = 1 << align_branch_jmp,
 782     align_branch_call_bit = 1 << align_branch_call,
 783     align_branch_indirect_bit = 1 << align_branch_indirect,
 784     align_branch_ret_bit = 1 << align_branch_ret
 785   };
 786
 787 static unsigned int align_branch = (align_branch_jcc_bit
 788                                     | align_branch_fused_bit
 789                                     | align_branch_jmp_bit);
 790
 791 /* Types of condition jump used by macro-fusion.  */
 792 enum mf_jcc_kind
 793   {
 794     mf_jcc_jo = 0,  /* base opcode 0x70  */
 795     mf_jcc_jc,      /* base opcode 0x72  */
 796     mf_jcc_je,      /* base opcode 0x74  */
 797     mf_jcc_jna,     /* base opcode 0x76  */
 798     mf_jcc_js,      /* base opcode 0x78  */
 799     mf_jcc_jp,      /* base opcode 0x7a  */
 800     mf_jcc_jl,      /* base opcode 0x7c  */
 801     mf_jcc_jle,     /* base opcode 0x7e  */
 802   };
 803
 804 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 805 enum mf_cmp_kind
 806   {
 807     mf_cmp_test_and,  /* test/cmp */
 808     mf_cmp_alu_cmp,  /* add/sub/cmp */
 809     mf_cmp_incdec  /* inc/dec */
 810   };
 811
 812 /* The maximum padding size for fused jcc.  CMP like instruction can
 813    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 814    prefixes.   */
 815 #define MAX_FUSED_JCC_PADDING_SIZE 20
 816
 817 /* The maximum number of prefixes added for an instruction.  */
 818 static unsigned int align_branch_prefix_size = 5;
 819
 820 /* Optimization:
 821    1. Clear the REX_W bit with register operand if possible.
 822    2. Above plus use 128bit vector instruction to clear the full vector
 823       register.
 824  */
 825 static int optimize = 0;
 826
 827 /* Optimization:
 828    1. Clear the REX_W bit with register operand if possible.
 829    2. Above plus use 128bit vector instruction to clear the full vector
 830       register.
 831    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 832       "testb $imm7,%r8".
 833  */
 834 static int optimize_for_space = 0;
 835
 836 /* Register prefix used for error message.  */
 837 static const char *register_prefix = "%";
 838
 839 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 840    leave, push, and pop instructions so that gcc has the same stack
 841    frame as in 32 bit mode.  */
 842 static char stackop_size = '\0';
 843
 844 /* Non-zero to optimize code alignment.  */
 845 int optimize_align_code = 1;
 846
 847 /* Non-zero to quieten some warnings.  */
 848 static int quiet_warnings = 0;
 849
 850 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 851 static bool pre_386_16bit_warned;
 852
 853 /* CPU name.  */
 854 static const char *cpu_arch_name = NULL;
 855 static char *cpu_sub_arch_name = NULL;
 856
 857 /* CPU feature flags.  */
 858 i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 859
 860 /* ISA extensions available in 64-bit mode only.  */
 861 static const i386_cpu_flags cpu_64_flags = CPU_ANY_64_FLAGS;
 862
 863 /* If we have selected a cpu we are generating instructions for.  */
 864 static int cpu_arch_tune_set = 0;
 865
 866 /* Cpu we are generating instructions for.  */
 867 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 868
 869 /* CPU instruction set architecture used.  */
 870 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 871
 872 /* CPU feature flags of instruction set architecture used.  */
 873 i386_cpu_flags cpu_arch_isa_flags;
 874
 875 /* If set, conditional jumps are not automatically promoted to handle
 876    larger than a byte offset.  */
 877 static bool no_cond_jump_promotion = false;
 878
 879 /* This will be set from an expression parser hook if there's any
 880    applicable operator involved in an expression.  */
 881 static enum {
 882   expr_operator_none,
 883   expr_operator_present,
 884   expr_large_value,
 885 } expr_mode;
 886
 887 /* Encode SSE instructions with VEX prefix.  */
 888 static unsigned int sse2avx;
 889
 890 /* Encode aligned vector move as unaligned vector move.  */
 891 static unsigned int use_unaligned_vector_move;
 892
 893 /* Maximum permitted vector size. */
 894 #define VSZ128 0
 895 #define VSZ256 1
 896 #define VSZ512 2
 897 #define VSZ_DEFAULT VSZ512
 898 static unsigned int vector_size = VSZ_DEFAULT;
 899
 900 /* Encode scalar AVX instructions with specific vector length.  */
 901 static enum
 902   {
 903     vex128 = 0,
 904     vex256
 905   } avxscalar;
 906
 907 /* Encode VEX WIG instructions with specific vex.w.  */
 908 static enum
 909   {
 910     vexw0 = 0,
 911     vexw1
 912   } vexwig;
 913
 914 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 915 static enum
 916   {
 917     evexl128 = 0,
 918     evexl256,
 919     evexl512
 920   } evexlig;
 921
 922 /* Encode EVEX WIG instructions with specific evex.w.  */
 923 static enum
 924   {
 925     evexw0 = 0,
 926     evexw1
 927   } evexwig;
 928
 929 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 930 static enum rc_type evexrcig = rne;
 931
 932 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 933 static symbolS *GOT_symbol;
 934
 935 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 936 unsigned int x86_dwarf2_return_column;
 937
 938 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 939 int x86_cie_data_alignment;
 940
 941 /* Interface to relax_segment.
 942    There are 3 major relax states for 386 jump insns because the
 943    different types of jumps add different sizes to frags when we're
 944    figuring out what sort of jump to choose to reach a given label.
 945
 946    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 947    branches which are handled by md_estimate_size_before_relax() and
 948    i386_generic_table_relax_frag().  */
 949
 950 /* Types.  */
 951 #define UNCOND_JUMP 0
 952 #define COND_JUMP 1
 953 #define COND_JUMP86 2
 954 #define BRANCH_PADDING 3
 955 #define BRANCH_PREFIX 4
 956 #define FUSED_JCC_PADDING 5
 957
 958 /* Sizes.  */
 959 #define CODE16  1
 960 #define SMALL   0
 961 #define SMALL16 (SMALL | CODE16)
 962 #define BIG     2
 963 #define BIG16   (BIG | CODE16)
 964
 965 #ifndef INLINE
 966 #ifdef __GNUC__
 967 #define INLINE __inline__
 968 #else
 969 #define INLINE
 970 #endif
 971 #endif
 972
 973 #define ENCODE_RELAX_STATE(type, size) \
 974   ((relax_substateT) (((type) << 2) | (size)))
 975 #define TYPE_FROM_RELAX_STATE(s) \
 976   ((s) >> 2)
 977 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 978     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 979
 980 /* This table is used by relax_frag to promote short jumps to long
 981    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 982    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 983    don't allow a short jump in a 32 bit code segment to be promoted to
 984    a 16 bit offset jump because it's slower (requires data size
 985    prefix), and doesn't work, unless the destination is in the bottom
 986    64k of the code segment (The top 16 bits of eip are zeroed).  */
 987
 988 const relax_typeS md_relax_table[] =
 989 {
 990   /* The fields are:
 991      1) most positive reach of this state,
 992      2) most negative reach of this state,
 993      3) how many bytes this mode will have in the variable part of the frag
 994      4) which index into the table to try if we can't fit into this one.  */
 995
 996   /* UNCOND_JUMP states.  */
 997   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 998   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 999   /* dword jmp adds 4 bytes to frag:
1000      0 extra opcode bytes, 4 displacement bytes.  */
1001   {0, 0, 4, 0},
1002   /* word jmp adds 2 byte2 to frag:
1003      0 extra opcode bytes, 2 displacement bytes.  */
1004   {0, 0, 2, 0},
1005
1006   /* COND_JUMP states.  */
1007   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
1008   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
1009   /* dword conditionals adds 5 bytes to frag:
1010      1 extra opcode byte, 4 displacement bytes.  */
1011   {0, 0, 5, 0},
1012   /* word conditionals add 3 bytes to frag:
1013      1 extra opcode byte, 2 displacement bytes.  */
1014   {0, 0, 3, 0},
1015
1016   /* COND_JUMP86 states.  */
1017   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
1018   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
1019   /* dword conditionals adds 5 bytes to frag:
1020      1 extra opcode byte, 4 displacement bytes.  */
1021   {0, 0, 5, 0},
1022   /* word conditionals add 4 bytes to frag:
1023      1 displacement byte and a 3 byte long branch insn.  */
1024   {0, 0, 4, 0}
1025 };
1026
1027 #define ARCH(n, t, f, s) \
1028   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
1029     CPU_NONE_FLAGS }
1030 #define SUBARCH(n, e, d, s) \
1031   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
1032     CPU_ ## d ## _FLAGS }
1033 #define VECARCH(n, e, d, v) \
1034   { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
1035     CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
1036
1037 static const arch_entry cpu_arch[] =
1038 {
1039   /* Do not replace the first two entries - i386_target_format() and
1040      set_cpu_arch() rely on them being there in this order.  */
1041   ARCH (generic32, GENERIC32, GENERIC32, false),
1042   ARCH (generic64, GENERIC64, GENERIC64, false),
1043   ARCH (i8086, UNKNOWN, NONE, false),
1044   ARCH (i186, UNKNOWN, 186, false),
1045   ARCH (i286, UNKNOWN, 286, false),
1046   ARCH (i386, I386, 386, false),
1047   ARCH (i486, I486, 486, false),
1048   ARCH (i586, PENTIUM, 586, false),
1049   ARCH (pentium, PENTIUM, 586, false),
1050   ARCH (i686, I686, 686, false),
1051   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
1052   ARCH (pentiumii, PENTIUMPRO, P2, false),
1053   ARCH (pentiumiii, PENTIUMPRO, P3, false),
1054   ARCH (pentium4, PENTIUM4, P4, false),
1055   ARCH (prescott, NOCONA, CORE, false),
1056   ARCH (nocona, NOCONA, NOCONA, false),
1057   ARCH (yonah, CORE, CORE, true),
1058   ARCH (core, CORE, CORE, false),
1059   ARCH (merom, CORE2, CORE2, true),
1060   ARCH (core2, CORE2, CORE2, false),
1061   ARCH (corei7, COREI7, COREI7, false),
1062   ARCH (iamcu, IAMCU, IAMCU, false),
1063   ARCH (k6, K6, K6, false),
1064   ARCH (k6_2, K6, K6_2, false),
1065   ARCH (athlon, ATHLON, ATHLON, false),
1066   ARCH (sledgehammer, K8, K8, true),
1067   ARCH (opteron, K8, K8, false),
1068   ARCH (k8, K8, K8, false),
1069   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
1070   ARCH (bdver1, BD, BDVER1, false),
1071   ARCH (bdver2, BD, BDVER2, false),
1072   ARCH (bdver3, BD, BDVER3, false),
1073   ARCH (bdver4, BD, BDVER4, false),
1074   ARCH (znver1, ZNVER, ZNVER1, false),
1075   ARCH (znver2, ZNVER, ZNVER2, false),
1076   ARCH (znver3, ZNVER, ZNVER3, false),
1077   ARCH (znver4, ZNVER, ZNVER4, false),
1078   ARCH (znver5, ZNVER, ZNVER5, false),
1079   ARCH (btver1, BT, BTVER1, false),
1080   ARCH (btver2, BT, BTVER2, false),
1081
1082   SUBARCH (8087, 8087, ANY_8087, false),
1083   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1084   SUBARCH (287, 287, ANY_287, false),
1085   SUBARCH (387, 387, ANY_387, false),
1086   SUBARCH (687, 687, ANY_687, false),
1087   SUBARCH (cmov, CMOV, CMOV, false),
1088   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1089   SUBARCH (mmx, MMX, ANY_MMX, false),
1090   SUBARCH (sse, SSE, ANY_SSE, false),
1091   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1092   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1093   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1094   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1095   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1096   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1097   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1098   VECARCH (avx, AVX, ANY_AVX, reset),
1099   VECARCH (avx2, AVX2, ANY_AVX2, reset),
1100   VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
1101   VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
1102   VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
1103   VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
1104   VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
1105   VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
1106   VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
1107   SUBARCH (monitor, MONITOR, MONITOR, false),
1108   SUBARCH (vmx, VMX, ANY_VMX, false),
1109   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1110   SUBARCH (smx, SMX, SMX, false),
1111   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1112   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1113   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1114   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1115   SUBARCH (aes, AES, ANY_AES, false),
1116   SUBARCH (pclmul, PCLMULQDQ, ANY_PCLMULQDQ, false),
1117   SUBARCH (clmul, PCLMULQDQ, ANY_PCLMULQDQ, true),
1118   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1119   SUBARCH (rdrnd, RDRND, RDRND, false),
1120   SUBARCH (f16c, F16C, ANY_F16C, false),
1121   SUBARCH (bmi2, BMI2, BMI2, false),
1122   SUBARCH (fma, FMA, ANY_FMA, false),
1123   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1124   SUBARCH (xop, XOP, ANY_XOP, false),
1125   SUBARCH (lwp, LWP, ANY_LWP, false),
1126   SUBARCH (movbe, MOVBE, MOVBE, false),
1127   SUBARCH (cx16, CX16, CX16, false),
1128   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1129   SUBARCH (ept, EPT, ANY_EPT, false),
1130   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1131   SUBARCH (popcnt, POPCNT, POPCNT, false),
1132   SUBARCH (hle, HLE, HLE, false),
1133   SUBARCH (rtm, RTM, ANY_RTM, false),
1134   SUBARCH (tsx, TSX, TSX, false),
1135   SUBARCH (invpcid, INVPCID, INVPCID, false),
1136   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1137   SUBARCH (nop, NOP, NOP, false),
1138   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1139   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1140   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1141   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1142   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1143   SUBARCH (pacifica, SVME, ANY_SVME, true),
1144   SUBARCH (svme, SVME, ANY_SVME, false),
1145   SUBARCH (abm, ABM, ABM, false),
1146   SUBARCH (bmi, BMI, BMI, false),
1147   SUBARCH (tbm, TBM, TBM, false),
1148   SUBARCH (adx, ADX, ADX, false),
1149   SUBARCH (rdseed, RDSEED, RDSEED, false),
1150   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1151   SUBARCH (smap, SMAP, SMAP, false),
1152   SUBARCH (mpx, MPX, ANY_MPX, false),
1153   SUBARCH (sha, SHA, ANY_SHA, false),
1154   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1155   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1156   SUBARCH (se1, SE1, SE1, false),
1157   SUBARCH (clwb, CLWB, CLWB, false),
1158   VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
1159   VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
1160   VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
1161   VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
1162   VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
1163   VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
1164   VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
1165   VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
1166   VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
1167   SUBARCH (clzero, CLZERO, CLZERO, false),
1168   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1169   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1170   SUBARCH (rdpid, RDPID, RDPID, false),
1171   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1172   SUBARCH (ibt, IBT, IBT, false),
1173   SUBARCH (shstk, SHSTK, SHSTK, false),
1174   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1175   VECARCH (vaes, VAES, ANY_VAES, reset),
1176   VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
1177   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1178   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1179   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1180   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1181   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1182   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1183   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1184   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
1185   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1186   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1187   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1188   VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
1189   VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1190            ANY_AVX512_VP2INTERSECT, reset),
1191   SUBARCH (tdx, TDX, TDX, false),
1192   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1193   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1194   SUBARCH (rdpru, RDPRU, RDPRU, false),
1195   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1196   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1197   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1198   SUBARCH (kl, KL, ANY_KL, false),
1199   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1200   SUBARCH (uintr, UINTR, UINTR, false),
1201   SUBARCH (hreset, HRESET, HRESET, false),
1202   VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
1203   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1204   VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
1205   VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
1206   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1207   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1208   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1209   VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
1210   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1211   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1212   SUBARCH (fred, FRED, ANY_FRED, false),
1213   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
1214   VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
1215   VECARCH (sha512, SHA512, ANY_SHA512, reset),
1216   VECARCH (sm3, SM3, ANY_SM3, reset),
1217   VECARCH (sm4, SM4, ANY_SM4, reset),
1218   SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
1219   VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
1220   SUBARCH (user_msr, USER_MSR, USER_MSR, false),
1221   SUBARCH (apx_f, APX_F, APX_F, false),
1222   VECARCH (avx10.2, AVX10_2, ANY_AVX10_2, set),
1223   SUBARCH (gmi, GMI, GMI, false),
1224   SUBARCH (msr_imm, MSR_IMM, MSR_IMM, false),
1225 };
1226
1227 #undef SUBARCH
1228 #undef ARCH
1229
1230 #ifdef I386COFF
1231 /* Like s_lcomm_internal in gas/read.c but the alignment string
1232    is allowed to be optional.  */
1233
1234 static symbolS *
1235 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1236 {
1237   addressT align = 0;
1238
1239   SKIP_WHITESPACE ();
1240
1241   if (needs_align
1242       && *input_line_pointer == ',')
1243     {
1244       align = parse_align (needs_align - 1);
1245
1246       if (align == (addressT) -1)
1247         return NULL;
1248     }
1249   else
1250     {
1251       if (size >= 8)
1252         align = 3;
1253       else if (size >= 4)
1254         align = 2;
1255       else if (size >= 2)
1256         align = 1;
1257       else
1258         align = 0;
1259     }
1260
1261   bss_alloc (symbolP, size, align);
1262   return symbolP;
1263 }
1264
1265 static void
1266 pe_lcomm (int needs_align)
1267 {
1268   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1269 }
1270 #endif
1271
1272 const pseudo_typeS md_pseudo_table[] =
1273 {
1274 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1275   {"align", s_align_bytes, 0},
1276 #else
1277   {"align", s_align_ptwo, 0},
1278 #endif
1279   {"arch", set_cpu_arch, 0},
1280 #ifdef OBJ_AOUT
1281   {"bss", s_bss, 0},
1282 #endif
1283 #ifdef I386COFF
1284   {"lcomm", pe_lcomm, 1},
1285 #endif
1286   {"ffloat", float_cons, 'f'},
1287   {"dfloat", float_cons, 'd'},
1288   {"tfloat", float_cons, 'x'},
1289   {"hfloat", float_cons, 'h'},
1290   {"bfloat16", float_cons, 'b'},
1291   {"value", cons, 2},
1292   {"slong", signed_cons, 4},
1293   {"insn", s_insn, 0},
1294   {"noopt", s_noopt, 0},
1295   {"optim", s_ignore, 0},
1296   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1297   {"code16", set_code_flag, CODE_16BIT},
1298   {"code32", set_code_flag, CODE_32BIT},
1299 #ifdef BFD64
1300   {"code64", set_code_flag, CODE_64BIT},
1301 #endif
1302   {"intel_syntax", set_intel_syntax, 1},
1303   {"att_syntax", set_intel_syntax, 0},
1304   {"intel_mnemonic", set_intel_mnemonic, 1},
1305   {"att_mnemonic", set_intel_mnemonic, 0},
1306   {"allow_index_reg", set_allow_index_reg, 1},
1307   {"disallow_index_reg", set_allow_index_reg, 0},
1308   {"sse_check", set_check, 0},
1309   {"operand_check", set_check, 1},
1310 #ifdef OBJ_ELF
1311   {"largecomm", handle_large_common, 0},
1312 #else
1313   {"file", dwarf2_directive_file, 0},
1314   {"loc", dwarf2_directive_loc, 0},
1315   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1316 #endif
1317 #ifdef TE_PE
1318   {"secrel32", pe_directive_secrel, 0},
1319   {"secidx", pe_directive_secidx, 0},
1320 #endif
1321   {0, 0, 0}
1322 };
1323
1324 /* For interface with expression ().  */
1325 extern char *input_line_pointer;
1326
1327 /* Hash table for instruction mnemonic lookup.  */
1328 static htab_t op_hash;
1329
1330 /* Hash table for register lookup.  */
1331 static htab_t reg_hash;
1332
1333 #if (defined (OBJ_ELF) || defined (OBJ_MACH_O) || defined (TE_PE))
1334 static const struct
1335 {
1336   const char *str;
1337   unsigned int len;
1338   const enum bfd_reloc_code_real rel[2];
1339   const i386_operand_type types64;
1340   bool need_GOT_symbol;
1341 }
1342 gotrel[] =
1343 {
1344 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
1345       { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
1346 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
1347       { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
1348 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
1349       { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
1350 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
1351       { .imm64 = 1, .disp64 = 1 } }
1352
1353 #ifndef TE_PE
1354 #ifdef OBJ_ELF
1355     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
1356                                         BFD_RELOC_SIZE32 },
1357     { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
1358 #endif
1359     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
1360                                        BFD_RELOC_X86_64_PLTOFF64 },
1361     { .bitfield = { .imm64 = 1 } }, true },
1362     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
1363                                        BFD_RELOC_X86_64_PLT32    },
1364     OPERAND_TYPE_IMM32_32S_DISP32, false },
1365     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
1366                                        BFD_RELOC_X86_64_GOTPLT64 },
1367     OPERAND_TYPE_IMM64_DISP64, true },
1368     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
1369                                        BFD_RELOC_X86_64_GOTOFF64 },
1370     OPERAND_TYPE_IMM64_DISP64, true },
1371     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
1372                                        BFD_RELOC_X86_64_GOTPCREL },
1373     OPERAND_TYPE_IMM32_32S_DISP32, true },
1374     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
1375                                        BFD_RELOC_X86_64_TLSGD    },
1376     OPERAND_TYPE_IMM32_32S_DISP32, true },
1377     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
1378                                        _dummy_first_bfd_reloc_code_real },
1379     OPERAND_TYPE_NONE, true },
1380     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
1381                                        BFD_RELOC_X86_64_TLSLD    },
1382     OPERAND_TYPE_IMM32_32S_DISP32, true },
1383     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
1384                                        BFD_RELOC_X86_64_GOTTPOFF },
1385     OPERAND_TYPE_IMM32_32S_DISP32, true },
1386     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
1387                                        BFD_RELOC_X86_64_TPOFF32  },
1388     OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
1389     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
1390                                        _dummy_first_bfd_reloc_code_real },
1391     OPERAND_TYPE_NONE, true },
1392     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
1393                                        BFD_RELOC_X86_64_DTPOFF32 },
1394     OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
1395     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
1396                                        _dummy_first_bfd_reloc_code_real },
1397     OPERAND_TYPE_NONE, true },
1398     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
1399                                        _dummy_first_bfd_reloc_code_real },
1400     OPERAND_TYPE_NONE, true },
1401     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
1402                                        BFD_RELOC_X86_64_GOT32    },
1403     OPERAND_TYPE_IMM32_32S_64_DISP32, true },
1404     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
1405                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
1406     OPERAND_TYPE_IMM32_32S_DISP32, true },
1407     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
1408                                        BFD_RELOC_X86_64_TLSDESC_CALL },
1409     OPERAND_TYPE_IMM32_32S_DISP32, true },
1410 #else /* TE_PE */
1411     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
1412                                        BFD_RELOC_32_SECREL },
1413     OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
1414 #endif
1415
1416 #undef OPERAND_TYPE_IMM32_32S_DISP32
1417 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
1418 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
1419 #undef OPERAND_TYPE_IMM64_DISP64
1420 };
1421 #endif
1422 \f
1423   /* Various efficient no-op patterns for aligning code labels.
1424      Note: Don't try to assemble the instructions in the comments.
1425      0L and 0w are not legal.  */
1426 static const unsigned char f32_1[] =
1427   {0x90};                               /* nop                  */
1428 static const unsigned char f32_2[] =
1429   {0x66,0x90};                          /* xchg %ax,%ax         */
1430 static const unsigned char f32_3[] =
1431   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1432 #define f32_4 (f32_5 + 1)       /* leal 0(%esi,%eiz),%esi */
1433 static const unsigned char f32_5[] =
1434   {0x2e,0x8d,0x74,0x26,0x00};           /* leal %cs:0(%esi,%eiz),%esi   */
1435 static const unsigned char f32_6[] =
1436   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1437 #define f32_7 (f32_8 + 1)       /* leal 0L(%esi,%eiz),%esi */
1438 static const unsigned char f32_8[] =
1439   {0x2e,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal %cs:0L(%esi,%eiz),%esi */
1440 static const unsigned char f64_3[] =
1441   {0x48,0x89,0xf6};                     /* mov %rsi,%rsi        */
1442 static const unsigned char f64_4[] =
1443   {0x48,0x8d,0x76,0x00};                /* lea 0(%rsi),%rsi     */
1444 #define f64_5 (f64_6 + 1)               /* lea 0(%rsi,%riz),%rsi        */
1445 static const unsigned char f64_6[] =
1446   {0x2e,0x48,0x8d,0x74,0x26,0x00};      /* lea %cs:0(%rsi,%riz),%rsi    */
1447 static const unsigned char f64_7[] =
1448   {0x48,0x8d,0xb6,0x00,0x00,0x00,0x00}; /* lea 0L(%rsi),%rsi    */
1449 #define f64_8 (f64_9 + 1)               /* lea 0L(%rsi,%riz),%rsi */
1450 static const unsigned char f64_9[] =
1451   {0x2e,0x48,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* lea %cs:0L(%rsi,%riz),%rsi */
1452 #define f16_2 (f64_3 + 1)               /* mov %si,%si  */
1453 static const unsigned char f16_3[] =
1454   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1455 #define f16_4 (f16_5 + 1)               /* lea 0W(%si),%si */
1456 static const unsigned char f16_5[] =
1457   {0x2e,0x8d,0xb4,0x00,0x00};           /* lea %cs:0W(%si),%si  */
1458 static const unsigned char jump_disp8[] =
1459   {0xeb};                               /* jmp disp8           */
1460 static const unsigned char jump32_disp32[] =
1461   {0xe9};                               /* jmp disp32          */
1462 static const unsigned char jump16_disp32[] =
1463   {0x66,0xe9};                          /* jmp disp32          */
1464 /* 32-bit NOPs patterns.  */
1465 static const unsigned char *const f32_patt[] = {
1466   f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8
1467 };
1468 /* 64-bit NOPs patterns.  */
1469 static const unsigned char *const f64_patt[] = {
1470   f32_1, f32_2, f64_3, f64_4, f64_5, f64_6, f64_7, f64_8, f64_9
1471 };
1472 /* 16-bit NOPs patterns.  */
1473 static const unsigned char *const f16_patt[] = {
1474   f32_1, f16_2, f16_3, f16_4, f16_5
1475 };
1476 /* nopl (%[re]ax) */
1477 static const unsigned char alt_3[] =
1478   {0x0f,0x1f,0x00};
1479 /* nopl 0(%[re]ax) */
1480 static const unsigned char alt_4[] =
1481   {0x0f,0x1f,0x40,0x00};
1482 /* nopl 0(%[re]ax,%[re]ax,1) */
1483 #define alt_5 (alt_6 + 1)
1484 /* nopw 0(%[re]ax,%[re]ax,1) */
1485 static const unsigned char alt_6[] =
1486   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1487 /* nopl 0L(%[re]ax) */
1488 static const unsigned char alt_7[] =
1489   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1490 /* nopl 0L(%[re]ax,%[re]ax,1) */
1491 #define alt_8 (alt_9 + 1)
1492 /* nopw 0L(%[re]ax,%[re]ax,1) */
1493 static const unsigned char alt_9[] =
1494   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1495 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1496 #define alt_10 (alt_11 + 1)
1497 /* data16 nopw %cs:0L(%eax,%eax,1) */
1498 static const unsigned char alt_11[] =
1499   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1500 /* 32-bit and 64-bit NOPs patterns.  */
1501 static const unsigned char *const alt_patt[] = {
1502   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1503   alt_9, alt_10, alt_11
1504 };
1505 #define alt64_9 (alt64_15 + 6)          /* nopq 0L(%rax,%rax,1)  */
1506 #define alt64_10 (alt64_15 + 5)         /* cs nopq 0L(%rax,%rax,1)  */
1507 /* data16 cs nopq 0L(%rax,%rax,1)  */
1508 #define alt64_11 (alt64_15 + 4)
1509 /* data16 data16 cs nopq 0L(%rax,%rax,1)  */
1510 #define alt64_12 (alt64_15 + 3)
1511 /* data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1512 #define alt64_13 (alt64_15 + 2)
1513 /* data16 data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1514 #define alt64_14 (alt64_15 + 1)
1515 /* data16 data16 data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1516 static const unsigned char alt64_15[] =
1517   {0x66,0x66,0x66,0x66,0x66,0x2e,0x48,
1518    0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1519 /* Long 64-bit NOPs patterns.  */
1520 static const unsigned char *const alt64_patt[] = {
1521   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1522   alt64_9, alt64_10, alt64_11,alt64_12, alt64_13, alt64_14, alt64_15
1523 };
1524
1525 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1526    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1527
1528 static void
1529 i386_output_nops (char *where, const unsigned char *const *patt,
1530                   int count, int max_single_nop_size)
1531
1532 {
1533   /* Place the longer NOP first.  */
1534   int last;
1535   int offset;
1536   const unsigned char *nops;
1537
1538   if (max_single_nop_size < 1)
1539     {
1540       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1541                 max_single_nop_size);
1542       return;
1543     }
1544
1545   nops = patt[max_single_nop_size - 1];
1546   last = count % max_single_nop_size;
1547
1548   count -= last;
1549   for (offset = 0; offset < count; offset += max_single_nop_size)
1550     memcpy (where + offset, nops, max_single_nop_size);
1551
1552   if (last)
1553     {
1554       nops = patt[last - 1];
1555       memcpy (where + offset, nops, last);
1556     }
1557 }
1558
1559 static INLINE int
1560 fits_in_imm7 (offsetT num)
1561 {
1562   return (num & 0x7f) == num;
1563 }
1564
1565 static INLINE int
1566 fits_in_imm31 (offsetT num)
1567 {
1568   return (num & 0x7fffffff) == num;
1569 }
1570
1571 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1572    single NOP instruction LIMIT.  */
1573
1574 void
1575 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1576 {
1577   const unsigned char *const *patt = NULL;
1578   int max_single_nop_size;
1579   /* Maximum number of NOPs before switching to jump over NOPs.  */
1580   int max_number_of_nops;
1581
1582   switch (fragP->fr_type)
1583     {
1584     case rs_fill_nop:
1585     case rs_align_code:
1586       break;
1587     case rs_machine_dependent:
1588       /* Allow NOP padding for jumps and calls.  */
1589       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1590           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1591         break;
1592       /* Fall through.  */
1593     default:
1594       return;
1595     }
1596
1597   /* We need to decide which NOP sequence to use for 32bit and
1598      64bit. When -mtune= is used:
1599
1600      1. For PROCESSOR_I?86, PROCESSOR_PENTIUM, PROCESSOR_IAMCU, and
1601      PROCESSOR_GENERIC32, f32_patt will be used.
1602      2. For the rest, alt_patt will be used.
1603
1604      When -mtune= isn't used, alt_patt will be used if
1605      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt/f64_patt will
1606      be used.
1607
1608      When -march= or .arch is used, we can't use anything beyond
1609      cpu_arch_isa_flags.   */
1610
1611   if (fragP->tc_frag_data.code == CODE_16BIT)
1612     {
1613       patt = f16_patt;
1614       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1615       /* Limit number of NOPs to 2 in 16-bit mode.  */
1616       max_number_of_nops = 2;
1617     }
1618   else
1619     {
1620       patt = fragP->tc_frag_data.code == CODE_64BIT ? f64_patt : f32_patt;
1621       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1622         {
1623           /* PROCESSOR_UNKNOWN means that all ISAs may be used, unless
1624              explicitly disabled.  */
1625           switch (fragP->tc_frag_data.tune)
1626             {
1627             case PROCESSOR_UNKNOWN:
1628               /* We use cpu_arch_isa_flags to check if we SHOULD
1629                  optimize with nops.  */
1630               if (fragP->tc_frag_data.isanop)
1631                 patt = alt_patt;
1632               break;
1633
1634             case PROCESSOR_CORE:
1635             case PROCESSOR_CORE2:
1636             case PROCESSOR_COREI7:
1637               if (fragP->tc_frag_data.cpunop)
1638                 {
1639                   if (fragP->tc_frag_data.code == CODE_64BIT)
1640                     patt = alt64_patt;
1641                   else
1642                     patt = alt_patt;
1643                 }
1644               break;
1645
1646             case PROCESSOR_PENTIUMPRO:
1647             case PROCESSOR_PENTIUM4:
1648             case PROCESSOR_NOCONA:
1649             case PROCESSOR_GENERIC64:
1650             case PROCESSOR_K6:
1651             case PROCESSOR_ATHLON:
1652             case PROCESSOR_K8:
1653             case PROCESSOR_AMDFAM10:
1654             case PROCESSOR_BD:
1655             case PROCESSOR_ZNVER:
1656             case PROCESSOR_BT:
1657               if (fragP->tc_frag_data.cpunop)
1658                 patt = alt_patt;
1659               break;
1660
1661             case PROCESSOR_I386:
1662             case PROCESSOR_I486:
1663             case PROCESSOR_PENTIUM:
1664             case PROCESSOR_I686:
1665             case PROCESSOR_IAMCU:
1666             case PROCESSOR_GENERIC32:
1667               break;
1668             case PROCESSOR_NONE:
1669               abort ();
1670             }
1671         }
1672       else
1673         {
1674           switch (fragP->tc_frag_data.tune)
1675             {
1676             case PROCESSOR_UNKNOWN:
1677               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1678                  PROCESSOR_UNKNOWN.  */
1679               abort ();
1680               break;
1681
1682             default:
1683               /* We use cpu_arch_isa_flags to check if we CAN optimize
1684                  with nops.  */
1685               if (fragP->tc_frag_data.isanop)
1686                 patt = alt_patt;
1687               break;
1688
1689             case PROCESSOR_NONE:
1690               abort ();
1691             }
1692         }
1693
1694       if (patt != alt_patt && patt != alt64_patt)
1695         {
1696           max_single_nop_size = patt == f32_patt ? ARRAY_SIZE (f32_patt)
1697                                                  : ARRAY_SIZE (f64_patt);
1698           /* Limit number of NOPs to 2 for older processors.  */
1699           max_number_of_nops = 2;
1700         }
1701       else
1702         {
1703           max_single_nop_size = patt == alt_patt
1704                                 ? ARRAY_SIZE (alt_patt)
1705                                 : ARRAY_SIZE (alt64_patt);
1706           /* Limit number of NOPs to 7 for newer processors.  */
1707           max_number_of_nops = 7;
1708         }
1709     }
1710
1711   if (limit == 0)
1712     limit = max_single_nop_size;
1713
1714   if (fragP->fr_type == rs_fill_nop)
1715     {
1716       /* Output NOPs for .nop directive.  */
1717       if (limit > max_single_nop_size)
1718         {
1719           as_bad_where (fragP->fr_file, fragP->fr_line,
1720                         _("invalid single nop size: %d "
1721                           "(expect within [0, %d])"),
1722                         limit, max_single_nop_size);
1723           return;
1724         }
1725     }
1726   else if (fragP->fr_type != rs_machine_dependent)
1727     fragP->fr_var = count;
1728
1729   /* Emit a plain NOP first when the last thing we saw may not have been
1730      a proper instruction (e.g. a stand-alone prefix or .byte).  */
1731   if (!fragP->tc_frag_data.last_insn_normal)
1732     {
1733       *where++ = 0x90;
1734       --count;
1735     }
1736
1737   if ((count / max_single_nop_size) > max_number_of_nops)
1738     {
1739       /* Generate jump over NOPs.  */
1740       offsetT disp = count - 2;
1741       if (fits_in_imm7 (disp))
1742         {
1743           /* Use "jmp disp8" if possible.  */
1744           count = disp;
1745           where[0] = jump_disp8[0];
1746           where[1] = count;
1747           where += 2;
1748         }
1749       else
1750         {
1751           unsigned int size_of_jump;
1752
1753           if (flag_code == CODE_16BIT)
1754             {
1755               where[0] = jump16_disp32[0];
1756               where[1] = jump16_disp32[1];
1757               size_of_jump = 2;
1758             }
1759           else
1760             {
1761               where[0] = jump32_disp32[0];
1762               size_of_jump = 1;
1763             }
1764
1765           count -= size_of_jump + 4;
1766           if (!fits_in_imm31 (count))
1767             {
1768               as_bad_where (fragP->fr_file, fragP->fr_line,
1769                             _("jump over nop padding out of range"));
1770               return;
1771             }
1772
1773           md_number_to_chars (where + size_of_jump, count, 4);
1774           where += size_of_jump + 4;
1775         }
1776     }
1777
1778   /* Generate multiple NOPs.  */
1779   i386_output_nops (where, patt, count, limit);
1780 }
1781
1782 static INLINE int
1783 operand_type_all_zero (const union i386_operand_type *x)
1784 {
1785   switch (ARRAY_SIZE(x->array))
1786     {
1787     case 3:
1788       if (x->array[2])
1789         return 0;
1790       /* Fall through.  */
1791     case 2:
1792       if (x->array[1])
1793         return 0;
1794       /* Fall through.  */
1795     case 1:
1796       return !x->array[0];
1797     default:
1798       abort ();
1799     }
1800 }
1801
1802 static INLINE void
1803 operand_type_set (union i386_operand_type *x, unsigned int v)
1804 {
1805   switch (ARRAY_SIZE(x->array))
1806     {
1807     case 3:
1808       x->array[2] = v;
1809       /* Fall through.  */
1810     case 2:
1811       x->array[1] = v;
1812       /* Fall through.  */
1813     case 1:
1814       x->array[0] = v;
1815       /* Fall through.  */
1816       break;
1817     default:
1818       abort ();
1819     }
1820
1821   x->bitfield.class = ClassNone;
1822   x->bitfield.instance = InstanceNone;
1823 }
1824
1825 static INLINE int
1826 operand_type_equal (const union i386_operand_type *x,
1827                     const union i386_operand_type *y)
1828 {
1829   switch (ARRAY_SIZE(x->array))
1830     {
1831     case 3:
1832       if (x->array[2] != y->array[2])
1833         return 0;
1834       /* Fall through.  */
1835     case 2:
1836       if (x->array[1] != y->array[1])
1837         return 0;
1838       /* Fall through.  */
1839     case 1:
1840       return x->array[0] == y->array[0];
1841       break;
1842     default:
1843       abort ();
1844     }
1845 }
1846
1847 static INLINE bool
1848 _is_cpu (const i386_cpu_attr *a, enum i386_cpu cpu)
1849 {
1850   switch (cpu)
1851     {
1852     case Cpu287:      return a->bitfield.cpu287;
1853     case Cpu387:      return a->bitfield.cpu387;
1854     case Cpu3dnow:    return a->bitfield.cpu3dnow;
1855     case Cpu3dnowA:   return a->bitfield.cpu3dnowa;
1856     case CpuAVX:      return a->bitfield.cpuavx;
1857     case CpuHLE:      return a->bitfield.cpuhle;
1858     case CpuAVX512F:  return a->bitfield.cpuavx512f;
1859     case CpuAVX512VL: return a->bitfield.cpuavx512vl;
1860     case CpuAPX_F:    return a->bitfield.cpuapx_f;
1861     case Cpu64:       return a->bitfield.cpu64;
1862     case CpuNo64:     return a->bitfield.cpuno64;
1863     default:
1864       gas_assert (cpu < CpuAttrEnums);
1865     }
1866   return a->bitfield.isa == cpu + 1u;
1867 }
1868
1869 static INLINE bool
1870 is_cpu (const insn_template *t, enum i386_cpu cpu)
1871 {
1872   return _is_cpu(&t->cpu, cpu);
1873 }
1874
1875 static INLINE bool
1876 maybe_cpu (const insn_template *t, enum i386_cpu cpu)
1877 {
1878   return _is_cpu(&t->cpu_any, cpu);
1879 }
1880
1881 static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
1882 {
1883   const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
1884   i386_cpu_flags f = { .array[0] = 0 };
1885
1886   switch (ARRAY_SIZE (a.array))
1887     {
1888     case 1:
1889       f.array[CpuAttrEnums / bps]
1890 #ifndef WORDS_BIGENDIAN
1891         |= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
1892 #else
1893         |= (a.array[0] << CpuIsaBits) >> (CpuAttrEnums % bps);
1894 #endif
1895       if (CpuMax / bps > CpuAttrEnums / bps)
1896         f.array[CpuAttrEnums / bps + 1]
1897 #ifndef WORDS_BIGENDIAN
1898           = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
1899 #else
1900           = (a.array[0] << CpuIsaBits) << (bps - CpuAttrEnums % bps);
1901 #endif
1902       break;
1903
1904     default:
1905       abort ();
1906     }
1907
1908   if (a.bitfield.isa)
1909 #ifndef WORDS_BIGENDIAN
1910     f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
1911 #else
1912     f.array[(a.bitfield.isa - 1) / bps] |= 1u << (~(a.bitfield.isa - 1) % bps);
1913 #endif
1914
1915   return f;
1916 }
1917
1918 static INLINE int
1919 cpu_flags_all_zero (const union i386_cpu_flags *x)
1920 {
1921   switch (ARRAY_SIZE(x->array))
1922     {
1923     case 5:
1924       if (x->array[4])
1925         return 0;
1926       /* Fall through.  */
1927     case 4:
1928       if (x->array[3])
1929         return 0;
1930       /* Fall through.  */
1931     case 3:
1932       if (x->array[2])
1933         return 0;
1934       /* Fall through.  */
1935     case 2:
1936       if (x->array[1])
1937         return 0;
1938       /* Fall through.  */
1939     case 1:
1940       return !x->array[0];
1941     default:
1942       abort ();
1943     }
1944 }
1945
1946 static INLINE int
1947 cpu_flags_equal (const union i386_cpu_flags *x,
1948                  const union i386_cpu_flags *y)
1949 {
1950   switch (ARRAY_SIZE(x->array))
1951     {
1952     case 5:
1953       if (x->array[4] != y->array[4])
1954         return 0;
1955       /* Fall through.  */
1956     case 4:
1957       if (x->array[3] != y->array[3])
1958         return 0;
1959       /* Fall through.  */
1960     case 3:
1961       if (x->array[2] != y->array[2])
1962         return 0;
1963       /* Fall through.  */
1964     case 2:
1965       if (x->array[1] != y->array[1])
1966         return 0;
1967       /* Fall through.  */
1968     case 1:
1969       return x->array[0] == y->array[0];
1970       break;
1971     default:
1972       abort ();
1973     }
1974 }
1975
1976 static INLINE int
1977 cpu_flags_check_cpu64 (const insn_template *t)
1978 {
1979   return flag_code == CODE_64BIT
1980          ? !t->cpu.bitfield.cpuno64
1981          : !t->cpu.bitfield.cpu64;
1982 }
1983
1984 static INLINE i386_cpu_flags
1985 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1986 {
1987   switch (ARRAY_SIZE (x.array))
1988     {
1989     case 5:
1990       x.array [4] &= y.array [4];
1991       /* Fall through.  */
1992     case 4:
1993       x.array [3] &= y.array [3];
1994       /* Fall through.  */
1995     case 3:
1996       x.array [2] &= y.array [2];
1997       /* Fall through.  */
1998     case 2:
1999       x.array [1] &= y.array [1];
2000       /* Fall through.  */
2001     case 1:
2002       x.array [0] &= y.array [0];
2003       break;
2004     default:
2005       abort ();
2006     }
2007   return x;
2008 }
2009
2010 static INLINE i386_cpu_flags
2011 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
2012 {
2013   switch (ARRAY_SIZE (x.array))
2014     {
2015     case 5:
2016       x.array [4] |= y.array [4];
2017       /* Fall through.  */
2018     case 4:
2019       x.array [3] |= y.array [3];
2020       /* Fall through.  */
2021     case 3:
2022       x.array [2] |= y.array [2];
2023       /* Fall through.  */
2024     case 2:
2025       x.array [1] |= y.array [1];
2026       /* Fall through.  */
2027     case 1:
2028       x.array [0] |= y.array [0];
2029       break;
2030     default:
2031       abort ();
2032     }
2033   return x;
2034 }
2035
2036 static INLINE i386_cpu_flags
2037 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
2038 {
2039   switch (ARRAY_SIZE (x.array))
2040     {
2041     case 5:
2042       x.array [4] &= ~y.array [4];
2043       /* Fall through.  */
2044     case 4:
2045       x.array [3] &= ~y.array [3];
2046       /* Fall through.  */
2047     case 3:
2048       x.array [2] &= ~y.array [2];
2049       /* Fall through.  */
2050     case 2:
2051       x.array [1] &= ~y.array [1];
2052       /* Fall through.  */
2053     case 1:
2054       x.array [0] &= ~y.array [0];
2055       break;
2056     default:
2057       abort ();
2058     }
2059   return x;
2060 }
2061
2062 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
2063
2064 static INLINE bool need_evex_encoding (const insn_template *t)
2065 {
2066   return pp.encoding == encoding_evex
2067         || pp.encoding == encoding_evex512
2068         || pp.has_nf
2069         || (t->opcode_modifier.vex && pp.encoding == encoding_egpr)
2070         || i.mask.reg;
2071 }
2072
2073 #define CPU_FLAGS_ARCH_MATCH            0x1
2074 #define CPU_FLAGS_64BIT_MATCH           0x2
2075
2076 #define CPU_FLAGS_PERFECT_MATCH \
2077   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
2078
2079 static INLINE bool set_oszc_flags (unsigned int oszc_shift)
2080 {
2081   if (i.oszc_flags & oszc_shift)
2082     {
2083       as_bad (_("same oszc flag used twice"));
2084       return false;
2085     }
2086   i.oszc_flags |= oszc_shift;
2087   return true;
2088 }
2089
2090 /* Handle SCC OSZC flags.  */
2091
2092 static int
2093 check_Scc_OszcOperations (const char *l)
2094 {
2095   const char *suffix_string = l;
2096
2097   while (is_space_char (*suffix_string))
2098     suffix_string++;
2099
2100   /* If {oszc flags} is absent, just return.  */
2101   if (*suffix_string != '{')
2102     return 0;
2103
2104   /* Skip '{'.  */
2105   suffix_string++;
2106
2107   /* Parse 'dfv='.  */
2108   while (is_space_char (*suffix_string))
2109     suffix_string++;
2110
2111   if (strncasecmp (suffix_string, "dfv", 3) == 0)
2112     suffix_string += 3;
2113   else
2114     {
2115       as_bad (_("unrecognized pseudo-suffix"));
2116       return -1;
2117     }
2118
2119   while (is_space_char (*suffix_string))
2120     suffix_string++;
2121
2122   if (*suffix_string == '=')
2123     suffix_string++;
2124   else
2125     {
2126       as_bad (_("unrecognized pseudo-suffix"));
2127       return -1;
2128     }
2129
2130   /* Parse 'of, sf, zf, cf}'.  */
2131   while (*suffix_string)
2132     {
2133       while (is_space_char (*suffix_string))
2134         suffix_string++;
2135
2136       /* Return for '{dfv=}'.  */
2137       if (*suffix_string == '}')
2138         return suffix_string + 1 - l;
2139
2140       if (strncasecmp (suffix_string, "of", 2) == 0)
2141         {
2142           if (!set_oszc_flags (OSZC_OF))
2143             return -1;
2144         }
2145       else if (strncasecmp (suffix_string, "sf", 2) == 0)
2146         {
2147           if (!set_oszc_flags (OSZC_SF))
2148             return -1;
2149         }
2150       else if (strncasecmp (suffix_string, "zf", 2) == 0)
2151         {
2152           if (!set_oszc_flags (OSZC_ZF))
2153             return -1;
2154         }
2155       else if (strncasecmp (suffix_string, "cf", 2) == 0)
2156         {
2157           if (!set_oszc_flags (OSZC_CF))
2158             return -1;
2159         }
2160       else
2161         {
2162           as_bad (_("unrecognized oszc flags or illegal `,' in pseudo-suffix"));
2163           return -1;
2164         }
2165
2166       suffix_string += 2;
2167
2168       while (is_space_char (*suffix_string))
2169         suffix_string++;
2170
2171       if (*suffix_string == '}')
2172         return ++suffix_string - l;
2173
2174       if (*suffix_string != ',')
2175         break;
2176       suffix_string ++;
2177     }
2178
2179   as_bad (_("missing `}' or `,' in pseudo-suffix"));
2180   return -1;
2181 }
2182
2183 /* Return CPU flags match bits. */
2184
2185 static int
2186 cpu_flags_match (const insn_template *t)
2187 {
2188   i386_cpu_flags cpu, active, all = cpu_flags_from_attr (t->cpu);
2189   i386_cpu_flags any = cpu_flags_from_attr (t->cpu_any);
2190   int match = cpu_flags_check_cpu64 (t) ? CPU_FLAGS_64BIT_MATCH : 0;
2191
2192   all.bitfield.cpu64 = 0;
2193   all.bitfield.cpuno64 = 0;
2194   gas_assert (!any.bitfield.cpu64);
2195   gas_assert (!any.bitfield.cpuno64);
2196
2197   if (cpu_flags_all_zero (&all) && cpu_flags_all_zero (&any))
2198     {
2199       /* This instruction is available on all archs.  */
2200       return match | CPU_FLAGS_ARCH_MATCH;
2201     }
2202
2203   /* This instruction is available only on some archs.  */
2204
2205   /* Dual VEX/EVEX templates may need stripping of one of the flags.  */
2206   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
2207     {
2208       /* Dual AVX/AVX512 templates need to retain AVX512* only if we already
2209          know that EVEX encoding will be needed.  */
2210       if ((any.bitfield.cpuavx || any.bitfield.cpuavx2 || any.bitfield.cpufma)
2211           && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl))
2212         {
2213           if (need_evex_encoding (t))
2214             {
2215               any.bitfield.cpuavx = 0;
2216               any.bitfield.cpuavx2 = 0;
2217               any.bitfield.cpufma = 0;
2218             }
2219           /* need_evex_encoding(t) isn't reliable before operands were
2220              parsed.  */
2221           else if (i.operands)
2222             {
2223               any.bitfield.cpuavx512f = 0;
2224               any.bitfield.cpuavx512vl = 0;
2225             }
2226         }
2227
2228       /* Dual non-APX/APX templates need massaging from what APX_F() in the
2229          opcode table has produced.  While the direct transformation of the
2230          incoming cpuid&(cpuid|APX_F) would be to cpuid&(cpuid) / cpuid&(APX_F)
2231          respectively, it's cheaper to move to just cpuid / cpuid&APX_F
2232          instead.  */
2233       if (any.bitfield.cpuapx_f
2234           && (any.bitfield.cpubmi || any.bitfield.cpubmi2
2235               || any.bitfield.cpuavx512f || any.bitfield.cpuavx512bw
2236               || any.bitfield.cpuavx512dq || any.bitfield.cpuamx_tile
2237               || any.bitfield.cpucmpccxadd || any.bitfield.cpuuser_msr
2238               || any.bitfield.cpumsr_imm))
2239         {
2240           /* These checks (verifying that APX_F() was properly used in the
2241              opcode table entry) make sure there's no need for an "else" to
2242              the "if()" below.  */
2243           gas_assert (!cpu_flags_all_zero (&all));
2244           cpu = cpu_flags_and (all, any);
2245           gas_assert (cpu_flags_equal (&cpu, &all));
2246
2247           if (need_evex_encoding (t))
2248             all = any;
2249
2250           memset (&any, 0, sizeof (any));
2251         }
2252     }
2253
2254   if (flag_code != CODE_64BIT)
2255     active = cpu_flags_and_not (cpu_arch_flags, cpu_64_flags);
2256   else
2257     active = cpu_arch_flags;
2258   cpu = cpu_flags_and (all, active);
2259   if (cpu_flags_equal (&cpu, &all))
2260     {
2261       /* AVX and AVX2 present at the same time express an operand size
2262          dependency - strip AVX2 for the purposes here.  The operand size
2263          dependent check occurs in check_vecOperands().  */
2264       if (any.bitfield.cpuavx && any.bitfield.cpuavx2)
2265         any.bitfield.cpuavx2 = 0;
2266
2267       cpu = cpu_flags_and (any, active);
2268       if (cpu_flags_all_zero (&any) || !cpu_flags_all_zero (&cpu))
2269         match |= CPU_FLAGS_ARCH_MATCH;
2270     }
2271   return match;
2272 }
2273
2274 static INLINE i386_operand_type
2275 operand_type_and (i386_operand_type x, i386_operand_type y)
2276 {
2277   if (x.bitfield.class != y.bitfield.class)
2278     x.bitfield.class = ClassNone;
2279   if (x.bitfield.instance != y.bitfield.instance)
2280     x.bitfield.instance = InstanceNone;
2281
2282   switch (ARRAY_SIZE (x.array))
2283     {
2284     case 3:
2285       x.array [2] &= y.array [2];
2286       /* Fall through.  */
2287     case 2:
2288       x.array [1] &= y.array [1];
2289       /* Fall through.  */
2290     case 1:
2291       x.array [0] &= y.array [0];
2292       break;
2293     default:
2294       abort ();
2295     }
2296   return x;
2297 }
2298
2299 static INLINE i386_operand_type
2300 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2301 {
2302   gas_assert (y.bitfield.class == ClassNone);
2303   gas_assert (y.bitfield.instance == InstanceNone);
2304
2305   switch (ARRAY_SIZE (x.array))
2306     {
2307     case 3:
2308       x.array [2] &= ~y.array [2];
2309       /* Fall through.  */
2310     case 2:
2311       x.array [1] &= ~y.array [1];
2312       /* Fall through.  */
2313     case 1:
2314       x.array [0] &= ~y.array [0];
2315       break;
2316     default:
2317       abort ();
2318     }
2319   return x;
2320 }
2321
2322 static INLINE i386_operand_type
2323 operand_type_or (i386_operand_type x, i386_operand_type y)
2324 {
2325   gas_assert (x.bitfield.class == ClassNone ||
2326               y.bitfield.class == ClassNone ||
2327               x.bitfield.class == y.bitfield.class);
2328   gas_assert (x.bitfield.instance == InstanceNone ||
2329               y.bitfield.instance == InstanceNone ||
2330               x.bitfield.instance == y.bitfield.instance);
2331
2332   switch (ARRAY_SIZE (x.array))
2333     {
2334     case 3:
2335       x.array [2] |= y.array [2];
2336       /* Fall through.  */
2337     case 2:
2338       x.array [1] |= y.array [1];
2339       /* Fall through.  */
2340     case 1:
2341       x.array [0] |= y.array [0];
2342       break;
2343     default:
2344       abort ();
2345     }
2346   return x;
2347 }
2348
2349 static INLINE i386_operand_type
2350 operand_type_xor (i386_operand_type x, i386_operand_type y)
2351 {
2352   gas_assert (y.bitfield.class == ClassNone);
2353   gas_assert (y.bitfield.instance == InstanceNone);
2354
2355   switch (ARRAY_SIZE (x.array))
2356     {
2357     case 3:
2358       x.array [2] ^= y.array [2];
2359       /* Fall through.  */
2360     case 2:
2361       x.array [1] ^= y.array [1];
2362       /* Fall through.  */
2363     case 1:
2364       x.array [0] ^= y.array [0];
2365       break;
2366     default:
2367       abort ();
2368     }
2369   return x;
2370 }
2371
2372 static const i386_operand_type anydisp = {
2373   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
2374 };
2375
2376 enum operand_type
2377 {
2378   reg,
2379   imm,
2380   disp,
2381   anymem
2382 };
2383
2384 static INLINE int
2385 operand_type_check (i386_operand_type t, enum operand_type c)
2386 {
2387   switch (c)
2388     {
2389     case reg:
2390       return t.bitfield.class == Reg;
2391
2392     case imm:
2393       return (t.bitfield.imm8
2394               || t.bitfield.imm8s
2395               || t.bitfield.imm16
2396               || t.bitfield.imm32
2397               || t.bitfield.imm32s
2398               || t.bitfield.imm64);
2399
2400     case disp:
2401       return (t.bitfield.disp8
2402               || t.bitfield.disp16
2403               || t.bitfield.disp32
2404               || t.bitfield.disp64);
2405
2406     case anymem:
2407       return (t.bitfield.disp8
2408               || t.bitfield.disp16
2409               || t.bitfield.disp32
2410               || t.bitfield.disp64
2411               || t.bitfield.baseindex);
2412
2413     default:
2414       abort ();
2415     }
2416
2417   return 0;
2418 }
2419
2420 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2421    between operand GIVEN and opeand WANTED for instruction template T.  */
2422
2423 static INLINE int
2424 match_operand_size (const insn_template *t, unsigned int wanted,
2425                     unsigned int given)
2426 {
2427   return !((i.types[given].bitfield.byte
2428             && !t->operand_types[wanted].bitfield.byte)
2429            || (i.types[given].bitfield.word
2430                && !t->operand_types[wanted].bitfield.word)
2431            || (i.types[given].bitfield.dword
2432                && !t->operand_types[wanted].bitfield.dword)
2433            || (i.types[given].bitfield.qword
2434                && (!t->operand_types[wanted].bitfield.qword
2435                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2436                       mode, when they're used where a 64-bit GPR could also
2437                       be used.  Checking is needed for Intel Syntax only.  */
2438                    || (intel_syntax
2439                        && flag_code != CODE_64BIT
2440                        && (t->operand_types[wanted].bitfield.class == Reg
2441                            || t->operand_types[wanted].bitfield.class == Accum
2442                            || t->opcode_modifier.isstring))))
2443            || (i.types[given].bitfield.tbyte
2444                && !t->operand_types[wanted].bitfield.tbyte));
2445 }
2446
2447 /* Return 1 if there is no conflict in SIMD register between operand
2448    GIVEN and opeand WANTED for instruction template T.  */
2449
2450 static INLINE int
2451 match_simd_size (const insn_template *t, unsigned int wanted,
2452                  unsigned int given)
2453 {
2454   return !((i.types[given].bitfield.xmmword
2455             && !t->operand_types[wanted].bitfield.xmmword)
2456            || (i.types[given].bitfield.ymmword
2457                && !t->operand_types[wanted].bitfield.ymmword)
2458            || (i.types[given].bitfield.zmmword
2459                && !t->operand_types[wanted].bitfield.zmmword)
2460            || (i.types[given].bitfield.tmmword
2461                && !t->operand_types[wanted].bitfield.tmmword));
2462 }
2463
2464 /* Return 1 if there is no conflict in any size between operand GIVEN
2465    and opeand WANTED for instruction template T.  */
2466
2467 static INLINE int
2468 match_mem_size (const insn_template *t, unsigned int wanted,
2469                 unsigned int given)
2470 {
2471   return (match_operand_size (t, wanted, given)
2472           && !((i.types[given].bitfield.unspecified
2473                 && !i.broadcast.type
2474                 && !i.broadcast.bytes
2475                 && !t->operand_types[wanted].bitfield.unspecified)
2476                || (i.types[given].bitfield.fword
2477                    && !t->operand_types[wanted].bitfield.fword)
2478                /* For scalar opcode templates to allow register and memory
2479                   operands at the same time, some special casing is needed
2480                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2481                   down-conversion vpmov*.  */
2482                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2483                     && t->operand_types[wanted].bitfield.byte
2484                        + t->operand_types[wanted].bitfield.word
2485                        + t->operand_types[wanted].bitfield.dword
2486                        + t->operand_types[wanted].bitfield.qword
2487                        > !!t->opcode_modifier.broadcast)
2488                    ? (i.types[given].bitfield.xmmword
2489                       || i.types[given].bitfield.ymmword
2490                       || i.types[given].bitfield.zmmword)
2491                    : !match_simd_size(t, wanted, given))));
2492 }
2493
2494 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2495    operands for instruction template T, and it has MATCH_REVERSE set if there
2496    is no size conflict on any operands for the template with operands reversed
2497    (and the template allows for reversing in the first place).  */
2498
2499 #define MATCH_STRAIGHT 1
2500 #define MATCH_REVERSE  2
2501
2502 static INLINE unsigned int
2503 operand_size_match (const insn_template *t)
2504 {
2505   unsigned int j, match = MATCH_STRAIGHT;
2506
2507   /* Don't check non-absolute jump instructions.  */
2508   if (t->opcode_modifier.jump
2509       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2510     return match;
2511
2512   /* Check memory and accumulator operand size.  */
2513   for (j = 0; j < i.operands; j++)
2514     {
2515       if (i.types[j].bitfield.class != Reg
2516           && i.types[j].bitfield.class != RegSIMD
2517           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2518         continue;
2519
2520       if (t->operand_types[j].bitfield.class == Reg
2521           && !match_operand_size (t, j, j))
2522         {
2523           match = 0;
2524           break;
2525         }
2526
2527       if (t->operand_types[j].bitfield.class == RegSIMD
2528           && !match_simd_size (t, j, j))
2529         {
2530           match = 0;
2531           break;
2532         }
2533
2534       if (t->operand_types[j].bitfield.instance == Accum
2535           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2536         {
2537           match = 0;
2538           break;
2539         }
2540
2541       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2542         {
2543           match = 0;
2544           break;
2545         }
2546     }
2547
2548   if (!t->opcode_modifier.d)
2549     return match;
2550
2551   /* Check reverse.  */
2552   gas_assert (i.operands >= 2);
2553
2554   for (j = 0; j < i.operands; j++)
2555     {
2556       unsigned int given = i.operands - j - 1;
2557
2558       /* For FMA4 and XOP insns VEX.W controls just the first two
2559          register operands. And APX_F insns just swap the two source operands,
2560          with the 3rd one being the destination.  */
2561       if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP)
2562           || is_cpu (t, CpuAPX_F))
2563         given = j < 2 ? 1 - j : j;
2564
2565       if (t->operand_types[j].bitfield.class == Reg
2566           && !match_operand_size (t, j, given))
2567         return match;
2568
2569       if (t->operand_types[j].bitfield.class == RegSIMD
2570           && !match_simd_size (t, j, given))
2571         return match;
2572
2573       if (t->operand_types[j].bitfield.instance == Accum
2574           && (!match_operand_size (t, j, given)
2575               || !match_simd_size (t, j, given)))
2576         return match;
2577
2578       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2579         return match;
2580     }
2581
2582   return match | MATCH_REVERSE;
2583 }
2584
2585 static INLINE int
2586 operand_type_match (i386_operand_type overlap,
2587                     i386_operand_type given)
2588 {
2589   i386_operand_type temp = overlap;
2590
2591   temp.bitfield.unspecified = 0;
2592   temp.bitfield.byte = 0;
2593   temp.bitfield.word = 0;
2594   temp.bitfield.dword = 0;
2595   temp.bitfield.fword = 0;
2596   temp.bitfield.qword = 0;
2597   temp.bitfield.tbyte = 0;
2598   temp.bitfield.xmmword = 0;
2599   temp.bitfield.ymmword = 0;
2600   temp.bitfield.zmmword = 0;
2601   temp.bitfield.tmmword = 0;
2602   if (operand_type_all_zero (&temp))
2603     goto mismatch;
2604
2605   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2606     return 1;
2607
2608  mismatch:
2609   i.error = operand_type_mismatch;
2610   return 0;
2611 }
2612
2613 /* If given types g0 and g1 are registers they must be of the same type
2614    unless the expected operand type register overlap is null.
2615    Intel syntax sized memory operands are also checked here.  */
2616
2617 static INLINE int
2618 operand_type_register_match (i386_operand_type g0,
2619                              i386_operand_type t0,
2620                              i386_operand_type g1,
2621                              i386_operand_type t1)
2622 {
2623   if (g0.bitfield.class != Reg
2624       && g0.bitfield.class != RegSIMD
2625       && (g0.bitfield.unspecified
2626           || !operand_type_check (g0, anymem)))
2627     return 1;
2628
2629   if (g1.bitfield.class != Reg
2630       && g1.bitfield.class != RegSIMD
2631       && (g1.bitfield.unspecified
2632           || !operand_type_check (g1, anymem)))
2633     return 1;
2634
2635   if (g0.bitfield.byte == g1.bitfield.byte
2636       && g0.bitfield.word == g1.bitfield.word
2637       && g0.bitfield.dword == g1.bitfield.dword
2638       && g0.bitfield.qword == g1.bitfield.qword
2639       && g0.bitfield.xmmword == g1.bitfield.xmmword
2640       && g0.bitfield.ymmword == g1.bitfield.ymmword
2641       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2642     return 1;
2643
2644   /* If expectations overlap in no more than a single size, all is fine. */
2645   g0 = operand_type_and (t0, t1);
2646   if (g0.bitfield.byte
2647       + g0.bitfield.word
2648       + g0.bitfield.dword
2649       + g0.bitfield.qword
2650       + g0.bitfield.xmmword
2651       + g0.bitfield.ymmword
2652       + g0.bitfield.zmmword <= 1)
2653     return 1;
2654
2655   i.error = register_type_mismatch;
2656
2657   return 0;
2658 }
2659
2660 static INLINE unsigned int
2661 register_number (const reg_entry *r)
2662 {
2663   unsigned int nr = r->reg_num;
2664
2665   if (r->reg_flags & RegRex)
2666     nr += 8;
2667
2668   if (r->reg_flags & (RegVRex | RegRex2))
2669     nr += 16;
2670
2671   return nr;
2672 }
2673
2674 static INLINE unsigned int
2675 mode_from_disp_size (i386_operand_type t)
2676 {
2677   if (t.bitfield.disp8)
2678     return 1;
2679   else if (t.bitfield.disp16
2680            || t.bitfield.disp32)
2681     return 2;
2682   else
2683     return 0;
2684 }
2685
2686 static INLINE int
2687 fits_in_signed_byte (addressT num)
2688 {
2689   return num + 0x80 <= 0xff;
2690 }
2691
2692 static INLINE int
2693 fits_in_unsigned_byte (addressT num)
2694 {
2695   return num <= 0xff;
2696 }
2697
2698 static INLINE int
2699 fits_in_unsigned_word (addressT num)
2700 {
2701   return num <= 0xffff;
2702 }
2703
2704 static INLINE int
2705 fits_in_signed_word (addressT num)
2706 {
2707   return num + 0x8000 <= 0xffff;
2708 }
2709
2710 static INLINE int
2711 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2712 {
2713 #ifndef BFD64
2714   return 1;
2715 #else
2716   return num + 0x80000000 <= 0xffffffff;
2717 #endif
2718 }                               /* fits_in_signed_long() */
2719
2720 static INLINE int
2721 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2722 {
2723 #ifndef BFD64
2724   return 1;
2725 #else
2726   return num <= 0xffffffff;
2727 #endif
2728 }                               /* fits_in_unsigned_long() */
2729
2730 static INLINE valueT extend_to_32bit_address (addressT num)
2731 {
2732 #ifdef BFD64
2733   if (fits_in_unsigned_long(num))
2734     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2735
2736   if (!fits_in_signed_long (num))
2737     return num & 0xffffffff;
2738 #endif
2739
2740   return num;
2741 }
2742
2743 static INLINE int
2744 fits_in_disp8 (offsetT num)
2745 {
2746   int shift = i.memshift;
2747   unsigned int mask;
2748
2749   if (shift == -1)
2750     abort ();
2751
2752   mask = (1 << shift) - 1;
2753
2754   /* Return 0 if NUM isn't properly aligned.  */
2755   if ((num & mask))
2756     return 0;
2757
2758   /* Check if NUM will fit in 8bit after shift.  */
2759   return fits_in_signed_byte (num >> shift);
2760 }
2761
2762 static INLINE int
2763 fits_in_imm4 (offsetT num)
2764 {
2765   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
2766   return (num & (pp.encoding != encoding_evex
2767                  && pp.encoding != encoding_egpr ? 0xf : 7)) == num;
2768 }
2769
2770 static i386_operand_type
2771 smallest_imm_type (offsetT num)
2772 {
2773   i386_operand_type t;
2774
2775   operand_type_set (&t, 0);
2776   t.bitfield.imm64 = 1;
2777
2778   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2779     {
2780       /* This code is disabled on the 486 because all the Imm1 forms
2781          in the opcode table are slower on the i486.  They're the
2782          versions with the implicitly specified single-position
2783          displacement, which has another syntax if you really want to
2784          use that form.  */
2785       t.bitfield.imm1 = 1;
2786       t.bitfield.imm8 = 1;
2787       t.bitfield.imm8s = 1;
2788       t.bitfield.imm16 = 1;
2789       t.bitfield.imm32 = 1;
2790       t.bitfield.imm32s = 1;
2791     }
2792   else if (fits_in_signed_byte (num))
2793     {
2794       if (fits_in_unsigned_byte (num))
2795         t.bitfield.imm8 = 1;
2796       t.bitfield.imm8s = 1;
2797       t.bitfield.imm16 = 1;
2798       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2799         t.bitfield.imm32 = 1;
2800       t.bitfield.imm32s = 1;
2801     }
2802   else if (fits_in_unsigned_byte (num))
2803     {
2804       t.bitfield.imm8 = 1;
2805       t.bitfield.imm16 = 1;
2806       t.bitfield.imm32 = 1;
2807       t.bitfield.imm32s = 1;
2808     }
2809   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2810     {
2811       t.bitfield.imm16 = 1;
2812       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2813         t.bitfield.imm32 = 1;
2814       t.bitfield.imm32s = 1;
2815     }
2816   else if (fits_in_signed_long (num))
2817     {
2818       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2819         t.bitfield.imm32 = 1;
2820       t.bitfield.imm32s = 1;
2821     }
2822   else if (fits_in_unsigned_long (num))
2823     t.bitfield.imm32 = 1;
2824
2825   return t;
2826 }
2827
2828 static offsetT
2829 offset_in_range (offsetT val, int size)
2830 {
2831   addressT mask;
2832
2833   switch (size)
2834     {
2835     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2836     case 2: mask = ((addressT) 1 << 16) - 1; break;
2837 #ifdef BFD64
2838     case 4: mask = ((addressT) 1 << 32) - 1; break;
2839 #endif
2840     case sizeof (val): return val;
2841     default: abort ();
2842     }
2843
2844   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2845     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2846              (uint64_t) val, (uint64_t) (val & mask));
2847
2848   return val & mask;
2849 }
2850
2851 static INLINE const char *insn_name (const insn_template *t)
2852 {
2853   return &i386_mnemonics[t->mnem_off];
2854 }
2855
2856 enum PREFIX_GROUP
2857 {
2858   PREFIX_EXIST = 0,
2859   PREFIX_LOCK,
2860   PREFIX_REP,
2861   PREFIX_DS,
2862   PREFIX_OTHER
2863 };
2864
2865 /* Returns
2866    a. PREFIX_EXIST if attempting to add a prefix where one from the
2867    same class already exists.
2868    b. PREFIX_LOCK if lock prefix is added.
2869    c. PREFIX_REP if rep/repne prefix is added.
2870    d. PREFIX_DS if ds prefix is added.
2871    e. PREFIX_OTHER if other prefix is added.
2872  */
2873
2874 static enum PREFIX_GROUP
2875 add_prefix (unsigned int prefix)
2876 {
2877   enum PREFIX_GROUP ret = PREFIX_OTHER;
2878   unsigned int q;
2879
2880   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2881       && flag_code == CODE_64BIT)
2882     {
2883       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2884           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2885           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2886           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2887         ret = PREFIX_EXIST;
2888       q = REX_PREFIX;
2889     }
2890   else
2891     {
2892       switch (prefix)
2893         {
2894         default:
2895           abort ();
2896
2897         case DS_PREFIX_OPCODE:
2898           ret = PREFIX_DS;
2899           /* Fall through.  */
2900         case CS_PREFIX_OPCODE:
2901         case ES_PREFIX_OPCODE:
2902         case FS_PREFIX_OPCODE:
2903         case GS_PREFIX_OPCODE:
2904         case SS_PREFIX_OPCODE:
2905           q = SEG_PREFIX;
2906           break;
2907
2908         case REPNE_PREFIX_OPCODE:
2909         case REPE_PREFIX_OPCODE:
2910           q = REP_PREFIX;
2911           ret = PREFIX_REP;
2912           break;
2913
2914         case LOCK_PREFIX_OPCODE:
2915           q = LOCK_PREFIX;
2916           ret = PREFIX_LOCK;
2917           break;
2918
2919         case FWAIT_OPCODE:
2920           q = WAIT_PREFIX;
2921           break;
2922
2923         case ADDR_PREFIX_OPCODE:
2924           q = ADDR_PREFIX;
2925           break;
2926
2927         case DATA_PREFIX_OPCODE:
2928           q = DATA_PREFIX;
2929           break;
2930         }
2931       if (i.prefix[q] != 0)
2932         ret = PREFIX_EXIST;
2933     }
2934
2935   if (ret)
2936     {
2937       if (!i.prefix[q])
2938         ++i.prefixes;
2939       i.prefix[q] |= prefix;
2940     }
2941   else
2942     as_bad (_("same type of prefix used twice"));
2943
2944   return ret;
2945 }
2946
2947 static void
2948 update_code_flag (int value, int check)
2949 {
2950   PRINTF_LIKE ((*as_error)) = check ? as_fatal : as_bad;
2951
2952   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpu64 )
2953     {
2954       as_error (_("64bit mode not supported on `%s'."),
2955                 cpu_arch_name ? cpu_arch_name : default_arch);
2956       return;
2957     }
2958
2959   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2960     {
2961       as_error (_("32bit mode not supported on `%s'."),
2962                 cpu_arch_name ? cpu_arch_name : default_arch);
2963       return;
2964     }
2965
2966   flag_code = (enum flag_code) value;
2967
2968   stackop_size = '\0';
2969 }
2970
2971 static void
2972 set_code_flag (int value)
2973 {
2974   update_code_flag (value, 0);
2975 }
2976
2977 static void
2978 set_16bit_gcc_code_flag (int new_code_flag)
2979 {
2980   flag_code = (enum flag_code) new_code_flag;
2981   if (flag_code != CODE_16BIT)
2982     abort ();
2983   stackop_size = LONG_MNEM_SUFFIX;
2984 }
2985
2986 static void
2987 _set_intel_syntax (int syntax_flag)
2988 {
2989   intel_syntax = syntax_flag;
2990
2991   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2992
2993   register_prefix = allow_naked_reg ? "" : "%";
2994 }
2995
2996 static void
2997 set_intel_syntax (int syntax_flag)
2998 {
2999   /* Find out if register prefixing is specified.  */
3000   int ask_naked_reg = 0;
3001
3002   SKIP_WHITESPACE ();
3003   if (!is_end_of_line[(unsigned char) *input_line_pointer])
3004     {
3005       char *string;
3006       int e = get_symbol_name (&string);
3007
3008       if (strcmp (string, "prefix") == 0)
3009         ask_naked_reg = 1;
3010       else if (strcmp (string, "noprefix") == 0)
3011         ask_naked_reg = -1;
3012       else
3013         as_bad (_("bad argument to syntax directive."));
3014       (void) restore_line_pointer (e);
3015     }
3016   demand_empty_rest_of_line ();
3017
3018   if (ask_naked_reg == 0)
3019     allow_naked_reg = (syntax_flag
3020                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
3021   else
3022     allow_naked_reg = (ask_naked_reg < 0);
3023
3024   _set_intel_syntax (syntax_flag);
3025 }
3026
3027 static void
3028 set_intel_mnemonic (int mnemonic_flag)
3029 {
3030   intel_mnemonic = mnemonic_flag;
3031 }
3032
3033 static void
3034 set_allow_index_reg (int flag)
3035 {
3036   allow_index_reg = flag;
3037 }
3038
3039 static void
3040 set_check (int what)
3041 {
3042   enum check_kind *kind;
3043   const char *str;
3044
3045   if (what)
3046     {
3047       kind = &operand_check;
3048       str = "operand";
3049     }
3050   else
3051     {
3052       kind = &sse_check;
3053       str = "sse";
3054     }
3055
3056   SKIP_WHITESPACE ();
3057
3058   if (!is_end_of_line[(unsigned char) *input_line_pointer])
3059     {
3060       char *string;
3061       int e = get_symbol_name (&string);
3062
3063       if (strcmp (string, "none") == 0)
3064         *kind = check_none;
3065       else if (strcmp (string, "warning") == 0)
3066         *kind = check_warning;
3067       else if (strcmp (string, "error") == 0)
3068         *kind = check_error;
3069       else
3070         as_bad (_("bad argument to %s_check directive."), str);
3071       (void) restore_line_pointer (e);
3072     }
3073   else
3074     as_bad (_("missing argument for %s_check directive"), str);
3075
3076   demand_empty_rest_of_line ();
3077 }
3078
3079 static void
3080 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
3081                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
3082 {
3083   /* Intel MCU is only supported on ELF.  */
3084 #ifdef OBJ_ELF
3085   static const char *arch;
3086
3087   if (!arch)
3088     {
3089       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
3090          use default_arch.  */
3091       arch = cpu_arch_name;
3092       if (!arch)
3093         arch = default_arch;
3094     }
3095
3096   /* If we are targeting Intel MCU, we must enable it.  */
3097   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
3098       == new_flag.bitfield.cpuiamcu)
3099     return;
3100
3101   as_bad (_("`%s' is not supported on `%s'"), name, arch);
3102 #endif
3103 }
3104
3105 static void
3106 extend_cpu_sub_arch_name (const char *pfx, const char *name)
3107 {
3108   if (cpu_sub_arch_name)
3109     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
3110                                   pfx, name, (const char *) NULL);
3111   else
3112     cpu_sub_arch_name = concat (pfx, name, (const char *) NULL);
3113 }
3114
3115 static void isa_enable (unsigned int idx)
3116 {
3117   i386_cpu_flags flags = cpu_flags_or (cpu_arch_flags, cpu_arch[idx].enable);
3118
3119   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
3120     {
3121       extend_cpu_sub_arch_name (".", cpu_arch[idx].name);
3122       cpu_arch_flags = flags;
3123     }
3124
3125   cpu_arch_isa_flags = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[idx].enable);
3126 }
3127
3128 static void isa_disable (unsigned int idx)
3129 {
3130   i386_cpu_flags flags
3131     = cpu_flags_and_not (cpu_arch_flags, cpu_arch[idx].disable);
3132
3133   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
3134     {
3135       extend_cpu_sub_arch_name (".no", cpu_arch[idx].name);
3136       cpu_arch_flags = flags;
3137     }
3138
3139   cpu_arch_isa_flags
3140     = cpu_flags_and_not (cpu_arch_isa_flags, cpu_arch[idx].disable);
3141 }
3142
3143 static void
3144 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
3145 {
3146   typedef struct arch_stack_entry
3147   {
3148     const struct arch_stack_entry *prev;
3149     const char *name;
3150     char *sub_name;
3151     i386_cpu_flags flags;
3152     i386_cpu_flags isa_flags;
3153     enum processor_type isa;
3154     enum flag_code flag_code;
3155     unsigned int vector_size;
3156     char stackop_size;
3157     bool no_cond_jump_promotion;
3158   } arch_stack_entry;
3159   static const arch_stack_entry *arch_stack_top;
3160   char *s;
3161   int e;
3162   const char *string;
3163   unsigned int j = 0;
3164
3165   SKIP_WHITESPACE ();
3166
3167   if (is_end_of_line[(unsigned char) *input_line_pointer])
3168     {
3169       as_bad (_("missing cpu architecture"));
3170       input_line_pointer++;
3171       return;
3172     }
3173
3174   e = get_symbol_name (&s);
3175   string = s;
3176
3177   if (strcmp (string, "push") == 0)
3178     {
3179       arch_stack_entry *top = XNEW (arch_stack_entry);
3180
3181       top->name = cpu_arch_name;
3182       if (cpu_sub_arch_name)
3183         top->sub_name = xstrdup (cpu_sub_arch_name);
3184       else
3185         top->sub_name = NULL;
3186       top->flags = cpu_arch_flags;
3187       top->isa = cpu_arch_isa;
3188       top->isa_flags = cpu_arch_isa_flags;
3189       top->flag_code = flag_code;
3190       top->vector_size = vector_size;
3191       top->stackop_size = stackop_size;
3192       top->no_cond_jump_promotion = no_cond_jump_promotion;
3193
3194       top->prev = arch_stack_top;
3195       arch_stack_top = top;
3196
3197       (void) restore_line_pointer (e);
3198       demand_empty_rest_of_line ();
3199       return;
3200     }
3201
3202   if (strcmp (string, "pop") == 0)
3203     {
3204       const arch_stack_entry *top = arch_stack_top;
3205
3206       if (!top)
3207         {
3208           as_bad (_(".arch stack is empty"));
3209         restore_bad:
3210           (void) restore_line_pointer (e);
3211           ignore_rest_of_line ();
3212           return;
3213         }
3214
3215       if (top->flag_code != flag_code
3216           || top->stackop_size != stackop_size)
3217         {
3218           static const unsigned int bits[] = {
3219             [CODE_16BIT] = 16,
3220             [CODE_32BIT] = 32,
3221             [CODE_64BIT] = 64,
3222           };
3223
3224           as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
3225                   bits[top->flag_code],
3226                   top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
3227           goto restore_bad;
3228         }
3229
3230       arch_stack_top = top->prev;
3231
3232       cpu_arch_name = top->name;
3233       free (cpu_sub_arch_name);
3234       cpu_sub_arch_name = top->sub_name;
3235       cpu_arch_flags = top->flags;
3236       cpu_arch_isa = top->isa;
3237       cpu_arch_isa_flags = top->isa_flags;
3238       vector_size = top->vector_size;
3239       no_cond_jump_promotion = top->no_cond_jump_promotion;
3240
3241       XDELETE (top);
3242
3243       (void) restore_line_pointer (e);
3244       demand_empty_rest_of_line ();
3245       return;
3246     }
3247
3248   if (strcmp (string, "default") == 0)
3249     {
3250       if (strcmp (default_arch, "iamcu") == 0)
3251         string = default_arch;
3252       else
3253         {
3254           static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
3255
3256           cpu_arch_name = NULL;
3257           free (cpu_sub_arch_name);
3258           cpu_sub_arch_name = NULL;
3259           cpu_arch_flags = cpu_unknown_flags;
3260           cpu_arch_isa = PROCESSOR_UNKNOWN;
3261           cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
3262           if (!cpu_arch_tune_set)
3263             cpu_arch_tune = PROCESSOR_UNKNOWN;
3264
3265           vector_size = VSZ_DEFAULT;
3266
3267           j = ARRAY_SIZE (cpu_arch) + 1;
3268         }
3269     }
3270
3271   for (; j < ARRAY_SIZE (cpu_arch); j++)
3272     {
3273       if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
3274           && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
3275         {
3276           if (*string != '.')
3277             {
3278               check_cpu_arch_compatible (string, cpu_arch[j].enable);
3279
3280               if (flag_code == CODE_64BIT && !cpu_arch[j].enable.bitfield.cpu64 )
3281                 {
3282                   as_bad (_("64bit mode not supported on `%s'."),
3283                           cpu_arch[j].name);
3284                   goto restore_bad;
3285                 }
3286
3287               if (flag_code == CODE_32BIT && !cpu_arch[j].enable.bitfield.cpui386)
3288                 {
3289                   as_bad (_("32bit mode not supported on `%s'."),
3290                           cpu_arch[j].name);
3291                   goto restore_bad;
3292                 }
3293
3294               cpu_arch_name = cpu_arch[j].name;
3295               free (cpu_sub_arch_name);
3296               cpu_sub_arch_name = NULL;
3297               cpu_arch_flags = cpu_arch[j].enable;
3298               cpu_arch_isa = cpu_arch[j].type;
3299               cpu_arch_isa_flags = cpu_arch[j].enable;
3300               if (!cpu_arch_tune_set)
3301                 cpu_arch_tune = cpu_arch_isa;
3302
3303               vector_size = VSZ_DEFAULT;
3304
3305               pre_386_16bit_warned = false;
3306               break;
3307             }
3308
3309           if (cpu_flags_all_zero (&cpu_arch[j].enable))
3310             continue;
3311
3312           isa_enable (j);
3313
3314           (void) restore_line_pointer (e);
3315
3316           switch (cpu_arch[j].vsz)
3317             {
3318             default:
3319               break;
3320
3321             case vsz_set:
3322 #ifdef SVR4_COMMENT_CHARS
3323               if (*input_line_pointer == ':' || *input_line_pointer == '/')
3324 #else
3325               if (*input_line_pointer == '/')
3326 #endif
3327                 {
3328                   ++input_line_pointer;
3329                   switch (get_absolute_expression ())
3330                     {
3331                     case 512: vector_size = VSZ512; break;
3332                     case 256: vector_size = VSZ256; break;
3333                     case 128: vector_size = VSZ128; break;
3334                     default:
3335                       as_bad (_("Unrecognized vector size specifier"));
3336                       ignore_rest_of_line ();
3337                       return;
3338                     }
3339                   break;
3340                 }
3341                 /* Fall through.  */
3342             case vsz_reset:
3343               vector_size = VSZ_DEFAULT;
3344               break;
3345             }
3346
3347           demand_empty_rest_of_line ();
3348           return;
3349         }
3350     }
3351
3352   if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
3353     {
3354       /* Disable an ISA extension.  */
3355       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
3356         if (cpu_arch[j].type == PROCESSOR_NONE
3357             && strcmp (string + 3, cpu_arch[j].name) == 0)
3358           {
3359             isa_disable (j);
3360
3361             if (cpu_arch[j].vsz == vsz_set)
3362               vector_size = VSZ_DEFAULT;
3363
3364             (void) restore_line_pointer (e);
3365             demand_empty_rest_of_line ();
3366             return;
3367           }
3368     }
3369
3370   if (j == ARRAY_SIZE (cpu_arch))
3371     {
3372       as_bad (_("no such architecture: `%s'"), string);
3373       goto restore_bad;
3374     }
3375
3376   no_cond_jump_promotion = 0;
3377   if (restore_line_pointer (e) == ','
3378       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
3379     {
3380       ++input_line_pointer;
3381       e = get_symbol_name (&s);
3382       string = s;
3383
3384       if (strcmp (string, "nojumps") == 0)
3385         no_cond_jump_promotion = 1;
3386       else if (strcmp (string, "jumps") != 0)
3387         {
3388           as_bad (_("no such architecture modifier: `%s'"), string);
3389           goto restore_bad;
3390         }
3391
3392       (void) restore_line_pointer (e);
3393     }
3394
3395   demand_empty_rest_of_line ();
3396 }
3397
3398 enum bfd_architecture
3399 i386_arch (void)
3400 {
3401   if (cpu_arch_isa == PROCESSOR_IAMCU)
3402     {
3403       if (!IS_ELF || flag_code == CODE_64BIT)
3404         as_fatal (_("Intel MCU is 32bit ELF only"));
3405       return bfd_arch_iamcu;
3406     }
3407   else
3408     return bfd_arch_i386;
3409 }
3410
3411 unsigned long
3412 i386_mach (void)
3413 {
3414   if (startswith (default_arch, "x86_64"))
3415     {
3416       if (default_arch[6] == '\0')
3417         return bfd_mach_x86_64;
3418       else
3419         return bfd_mach_x64_32;
3420     }
3421   else if (!strcmp (default_arch, "i386")
3422            || !strcmp (default_arch, "iamcu"))
3423     {
3424       if (cpu_arch_isa == PROCESSOR_IAMCU)
3425         {
3426           if (!IS_ELF)
3427             as_fatal (_("Intel MCU is 32bit ELF only"));
3428           return bfd_mach_i386_iamcu;
3429         }
3430       else
3431         return bfd_mach_i386_i386;
3432     }
3433   else
3434     as_fatal (_("unknown architecture"));
3435 }
3436 \f
3437 #include "opcodes/i386-tbl.h"
3438
3439 static void
3440 op_lookup (const char *mnemonic)
3441 {
3442    i386_op_off_t *pos = str_hash_find (op_hash, mnemonic);
3443
3444    if (pos != NULL)
3445      {
3446        current_templates.start = &i386_optab[pos[0]];
3447        current_templates.end = &i386_optab[pos[1]];
3448      }
3449    else
3450      current_templates.end = current_templates.start = NULL;
3451 }
3452
3453 void
3454 md_begin (void)
3455 {
3456   /* Make sure possible padding space is clear.  */
3457   memset (&pp, 0, sizeof (pp));
3458
3459   /* Initialize op_hash hash table.  */
3460   op_hash = str_htab_create ();
3461
3462   {
3463     const i386_op_off_t *cur = i386_op_sets;
3464     const i386_op_off_t *end = cur + ARRAY_SIZE (i386_op_sets) - 1;
3465
3466     for (; cur < end; ++cur)
3467       if (str_hash_insert (op_hash, insn_name (&i386_optab[*cur]), cur, 0))
3468         as_fatal (_("duplicate %s"), insn_name (&i386_optab[*cur]));
3469   }
3470
3471   /* Initialize reg_hash hash table.  */
3472   reg_hash = str_htab_create ();
3473   {
3474     const reg_entry *regtab;
3475     unsigned int regtab_size = i386_regtab_size;
3476
3477     for (regtab = i386_regtab; regtab_size--; regtab++)
3478       {
3479         switch (regtab->reg_type.bitfield.class)
3480           {
3481           case Reg:
3482             if (regtab->reg_type.bitfield.dword)
3483               {
3484                 if (regtab->reg_type.bitfield.instance == Accum)
3485                   reg_eax = regtab;
3486               }
3487             else if (regtab->reg_type.bitfield.tbyte)
3488               {
3489                 /* There's no point inserting st(<N>) in the hash table, as
3490                    parentheses aren't included in register_chars[] anyway.  */
3491                 if (regtab->reg_type.bitfield.instance != Accum)
3492                   continue;
3493                 reg_st0 = regtab;
3494               }
3495             break;
3496
3497           case SReg:
3498             switch (regtab->reg_num)
3499               {
3500               case 0: reg_es = regtab; break;
3501               case 2: reg_ss = regtab; break;
3502               case 3: reg_ds = regtab; break;
3503               }
3504             break;
3505
3506           case RegMask:
3507             if (!regtab->reg_num)
3508               reg_k0 = regtab;
3509             break;
3510           }
3511
3512         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3513           as_fatal (_("duplicate %s"), regtab->reg_name);
3514       }
3515   }
3516
3517   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3518   {
3519     int c;
3520     const char *p;
3521
3522     for (c = 0; c < 256; c++)
3523       {
3524         if (ISDIGIT (c) || ISLOWER (c))
3525           {
3526             mnemonic_chars[c] = c;
3527             register_chars[c] = c;
3528             operand_chars[c] = c;
3529           }
3530         else if (ISUPPER (c))
3531           {
3532             mnemonic_chars[c] = TOLOWER (c);
3533             register_chars[c] = mnemonic_chars[c];
3534             operand_chars[c] = c;
3535           }
3536 #ifdef SVR4_COMMENT_CHARS
3537         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3538           operand_chars[c] = c;
3539 #endif
3540
3541         if (c >= 128)
3542           operand_chars[c] = c;
3543       }
3544
3545     mnemonic_chars['_'] = '_';
3546     mnemonic_chars['-'] = '-';
3547     mnemonic_chars['.'] = '.';
3548
3549     for (p = extra_symbol_chars; *p != '\0'; p++)
3550       operand_chars[(unsigned char) *p] = *p;
3551     for (p = operand_special_chars; *p != '\0'; p++)
3552       operand_chars[(unsigned char) *p] = *p;
3553   }
3554
3555   if (object_64bit)
3556     {
3557 #if defined (OBJ_COFF) && defined (TE_PE)
3558       x86_dwarf2_return_column = 32;
3559 #else
3560       x86_dwarf2_return_column = 16;
3561 #endif
3562       x86_cie_data_alignment = -8;
3563 #ifdef OBJ_ELF
3564       x86_sframe_cfa_sp_reg = REG_SP;
3565       x86_sframe_cfa_fp_reg = REG_FP;
3566 #endif
3567     }
3568   else
3569     {
3570       x86_dwarf2_return_column = 8;
3571       x86_cie_data_alignment = -4;
3572     }
3573
3574   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3575      can be turned into BRANCH_PREFIX frag.  */
3576   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3577     abort ();
3578 }
3579
3580 void
3581 i386_print_statistics (FILE *file)
3582 {
3583   htab_print_statistics (file, "i386 opcode", op_hash);
3584   htab_print_statistics (file, "i386 register", reg_hash);
3585 }
3586
3587 void
3588 i386_md_end (void)
3589 {
3590   htab_delete (op_hash);
3591   htab_delete (reg_hash);
3592 }
3593 \f
3594 #ifdef DEBUG386
3595
3596 /* Debugging routines for md_assemble.  */
3597 static void pte (insn_template *);
3598 static void pt (i386_operand_type);
3599 static void pe (expressionS *);
3600 static void ps (symbolS *);
3601
3602 static void
3603 pi (const char *line, i386_insn *x)
3604 {
3605   unsigned int j;
3606
3607   fprintf (stdout, "%s: template ", line);
3608   pte (&x->tm);
3609   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3610            x->base_reg ? x->base_reg->reg_name : "none",
3611            x->index_reg ? x->index_reg->reg_name : "none",
3612            x->log2_scale_factor);
3613   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3614            x->rm.mode, x->rm.reg, x->rm.regmem);
3615   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3616            x->sib.base, x->sib.index, x->sib.scale);
3617   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3618            (x->rex & REX_W) != 0,
3619            (x->rex & REX_R) != 0,
3620            (x->rex & REX_X) != 0,
3621            (x->rex & REX_B) != 0);
3622   for (j = 0; j < x->operands; j++)
3623     {
3624       fprintf (stdout, "    #%d:  ", j + 1);
3625       pt (x->types[j]);
3626       fprintf (stdout, "\n");
3627       if (x->types[j].bitfield.class == Reg
3628           || x->types[j].bitfield.class == RegMMX
3629           || x->types[j].bitfield.class == RegSIMD
3630           || x->types[j].bitfield.class == RegMask
3631           || x->types[j].bitfield.class == SReg
3632           || x->types[j].bitfield.class == RegCR
3633           || x->types[j].bitfield.class == RegDR
3634           || x->types[j].bitfield.class == RegTR
3635           || x->types[j].bitfield.class == RegBND)
3636         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3637       if (operand_type_check (x->types[j], imm))
3638         pe (x->op[j].imms);
3639       if (operand_type_check (x->types[j], disp))
3640         pe (x->op[j].disps);
3641     }
3642 }
3643
3644 static void
3645 pte (insn_template *t)
3646 {
3647   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3648   static const char *const opc_spc[] = {
3649     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3650     "XOP08", "XOP09", "XOP0A",
3651   };
3652   unsigned int j;
3653
3654   fprintf (stdout, " %d operands ", t->operands);
3655   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3656     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3657   if (opc_spc[t->opcode_space])
3658     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3659   fprintf (stdout, "opcode %x ", t->base_opcode);
3660   if (t->extension_opcode != None)
3661     fprintf (stdout, "ext %x ", t->extension_opcode);
3662   if (t->opcode_modifier.d)
3663     fprintf (stdout, "D");
3664   if (t->opcode_modifier.w)
3665     fprintf (stdout, "W");
3666   fprintf (stdout, "\n");
3667   for (j = 0; j < t->operands; j++)
3668     {
3669       fprintf (stdout, "    #%d type ", j + 1);
3670       pt (t->operand_types[j]);
3671       fprintf (stdout, "\n");
3672     }
3673 }
3674
3675 static void
3676 pe (expressionS *e)
3677 {
3678   fprintf (stdout, "    operation     %d\n", e->X_op);
3679   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3680            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3681   if (e->X_add_symbol)
3682     {
3683       fprintf (stdout, "    add_symbol    ");
3684       ps (e->X_add_symbol);
3685       fprintf (stdout, "\n");
3686     }
3687   if (e->X_op_symbol)
3688     {
3689       fprintf (stdout, "    op_symbol    ");
3690       ps (e->X_op_symbol);
3691       fprintf (stdout, "\n");
3692     }
3693 }
3694
3695 static void
3696 ps (symbolS *s)
3697 {
3698   fprintf (stdout, "%s type %s%s",
3699            S_GET_NAME (s),
3700            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3701            segment_name (S_GET_SEGMENT (s)));
3702 }
3703
3704 static struct type_name
3705   {
3706     i386_operand_type mask;
3707     const char *name;
3708   }
3709 const type_names[] =
3710 {
3711   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3712   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3713   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3714   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3715   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3716   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3717   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3718   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3719   { { .bitfield = { .imm8 = 1 } }, "i8" },
3720   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3721   { { .bitfield = { .imm16 = 1 } }, "i16" },
3722   { { .bitfield = { .imm32 = 1 } }, "i32" },
3723   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3724   { { .bitfield = { .imm64 = 1 } }, "i64" },
3725   { { .bitfield = { .imm1 = 1 } }, "i1" },
3726   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3727   { { .bitfield = { .disp8 = 1 } }, "d8" },
3728   { { .bitfield = { .disp16 = 1 } }, "d16" },
3729   { { .bitfield = { .disp32 = 1 } }, "d32" },
3730   { { .bitfield = { .disp64 = 1 } }, "d64" },
3731   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3732   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3733   { { .bitfield = { .class = RegCR } }, "control reg" },
3734   { { .bitfield = { .class = RegTR } }, "test reg" },
3735   { { .bitfield = { .class = RegDR } }, "debug reg" },
3736   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3737   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3738   { { .bitfield = { .class = SReg } }, "SReg" },
3739   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3740   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3741   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3742   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3743   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3744   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3745 };
3746
3747 static void
3748 pt (i386_operand_type t)
3749 {
3750   unsigned int j;
3751   i386_operand_type a;
3752
3753   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3754     {
3755       a = operand_type_and (t, type_names[j].mask);
3756       if (operand_type_equal (&a, &type_names[j].mask))
3757         fprintf (stdout, "%s, ",  type_names[j].name);
3758     }
3759   fflush (stdout);
3760 }
3761
3762 #endif /* DEBUG386 */
3763 \f
3764 static bfd_reloc_code_real_type
3765 reloc (unsigned int size,
3766        int pcrel,
3767        int sign,
3768        bfd_reloc_code_real_type other)
3769 {
3770   if (other != NO_RELOC)
3771     {
3772       reloc_howto_type *rel;
3773
3774       if (size == 8)
3775         switch (other)
3776           {
3777           case BFD_RELOC_X86_64_GOT32:
3778             return BFD_RELOC_X86_64_GOT64;
3779             break;
3780           case BFD_RELOC_X86_64_GOTPLT64:
3781             return BFD_RELOC_X86_64_GOTPLT64;
3782             break;
3783           case BFD_RELOC_X86_64_PLTOFF64:
3784             return BFD_RELOC_X86_64_PLTOFF64;
3785             break;
3786           case BFD_RELOC_X86_64_GOTPC32:
3787             other = BFD_RELOC_X86_64_GOTPC64;
3788             break;
3789           case BFD_RELOC_X86_64_GOTPCREL:
3790             other = BFD_RELOC_X86_64_GOTPCREL64;
3791             break;
3792           case BFD_RELOC_X86_64_TPOFF32:
3793             other = BFD_RELOC_X86_64_TPOFF64;
3794             break;
3795           case BFD_RELOC_X86_64_DTPOFF32:
3796             other = BFD_RELOC_X86_64_DTPOFF64;
3797             break;
3798           default:
3799             break;
3800           }
3801
3802 #ifdef OBJ_ELF
3803       if (other == BFD_RELOC_SIZE32)
3804         {
3805           if (size == 8)
3806             other = BFD_RELOC_SIZE64;
3807           if (pcrel)
3808             {
3809               as_bad (_("there are no pc-relative size relocations"));
3810               return NO_RELOC;
3811             }
3812         }
3813 #endif
3814
3815       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3816       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3817         sign = -1;
3818
3819       rel = bfd_reloc_type_lookup (stdoutput, other);
3820       if (!rel)
3821         as_bad (_("unknown relocation (%u)"), other);
3822       else if (size != bfd_get_reloc_size (rel))
3823         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3824                 bfd_get_reloc_size (rel),
3825                 size);
3826       else if (pcrel && !rel->pc_relative)
3827         as_bad (_("non-pc-relative relocation for pc-relative field"));
3828       else if ((rel->complain_on_overflow == complain_overflow_signed
3829                 && !sign)
3830                || (rel->complain_on_overflow == complain_overflow_unsigned
3831                    && sign > 0))
3832         as_bad (_("relocated field and relocation type differ in signedness"));
3833       else
3834         return other;
3835       return NO_RELOC;
3836     }
3837
3838   if (pcrel)
3839     {
3840       if (!sign)
3841         as_bad (_("there are no unsigned pc-relative relocations"));
3842       switch (size)
3843         {
3844         case 1: return BFD_RELOC_8_PCREL;
3845         case 2: return BFD_RELOC_16_PCREL;
3846         case 4: return BFD_RELOC_32_PCREL;
3847         case 8: return BFD_RELOC_64_PCREL;
3848         }
3849       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3850     }
3851   else
3852     {
3853       if (sign > 0)
3854         switch (size)
3855           {
3856           case 4: return BFD_RELOC_X86_64_32S;
3857           }
3858       else
3859         switch (size)
3860           {
3861           case 1: return BFD_RELOC_8;
3862           case 2: return BFD_RELOC_16;
3863           case 4: return BFD_RELOC_32;
3864           case 8: return BFD_RELOC_64;
3865           }
3866       as_bad (_("cannot do %s %u byte relocation"),
3867               sign > 0 ? "signed" : "unsigned", size);
3868     }
3869
3870   return NO_RELOC;
3871 }
3872
3873 #ifdef OBJ_ELF
3874 /* Here we decide which fixups can be adjusted to make them relative to
3875    the beginning of the section instead of the symbol.  Basically we need
3876    to make sure that the dynamic relocations are done correctly, so in
3877    some cases we force the original symbol to be used.  */
3878
3879 int
3880 tc_i386_fix_adjustable (fixS *fixP)
3881 {
3882   /* Don't adjust pc-relative references to merge sections in 64-bit
3883      mode.  */
3884   if (use_rela_relocations
3885       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3886       && fixP->fx_pcrel)
3887     return 0;
3888
3889   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3890      and changed later by validate_fix.  */
3891   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3892       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3893     return 0;
3894
3895   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3896      for size relocations.  */
3897   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3898       || fixP->fx_r_type == BFD_RELOC_SIZE64
3899       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3900       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3901       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3902       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3903       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3904       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3905       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3906       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3907       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3908       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3909       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3910       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3911       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3912       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3913       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3914       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3915       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3916       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPCRELX
3917       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3918       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3919       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3920       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3921       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3922       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTTPOFF
3923       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_6_GOTTPOFF
3924       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3925       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3926       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3927       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT64
3928       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3929       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
3930       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3931       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3932       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3933     return 0;
3934   /* Resolve PLT32 relocation against local symbol to section only for
3935      PC-relative relocations.  */
3936   if (fixP->fx_r_type == BFD_RELOC_386_PLT32
3937       || fixP->fx_r_type == BFD_RELOC_X86_64_PLT32)
3938     return fixP->fx_pcrel;
3939   return 1;
3940 }
3941 #endif
3942
3943 static INLINE bool
3944 want_disp32 (const insn_template *t)
3945 {
3946   return flag_code != CODE_64BIT
3947          || i.prefix[ADDR_PREFIX]
3948          || ((t->mnem_off == MN_lea
3949               || (i.tm.base_opcode == 0x8d && i.tm.opcode_space == SPACE_BASE))
3950              && (!i.types[1].bitfield.qword
3951                  || t->opcode_modifier.size == SIZE32));
3952 }
3953
3954 static int
3955 intel_float_operand (const char *mnemonic)
3956 {
3957   /* Note that the value returned is meaningful only for opcodes with (memory)
3958      operands, hence the code here is free to improperly handle opcodes that
3959      have no operands (for better performance and smaller code). */
3960
3961   if (mnemonic[0] != 'f')
3962     return 0; /* non-math */
3963
3964   switch (mnemonic[1])
3965     {
3966     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3967        the fs segment override prefix not currently handled because no
3968        call path can make opcodes without operands get here */
3969     case 'i':
3970       return 2 /* integer op */;
3971     case 'l':
3972       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3973         return 3; /* fldcw/fldenv */
3974       break;
3975     case 'n':
3976       if (mnemonic[2] != 'o' /* fnop */)
3977         return 3; /* non-waiting control op */
3978       break;
3979     case 'r':
3980       if (mnemonic[2] == 's')
3981         return 3; /* frstor/frstpm */
3982       break;
3983     case 's':
3984       if (mnemonic[2] == 'a')
3985         return 3; /* fsave */
3986       if (mnemonic[2] == 't')
3987         {
3988           switch (mnemonic[3])
3989             {
3990             case 'c': /* fstcw */
3991             case 'd': /* fstdw */
3992             case 'e': /* fstenv */
3993             case 's': /* fsts[gw] */
3994               return 3;
3995             }
3996         }
3997       break;
3998     case 'x':
3999       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
4000         return 0; /* fxsave/fxrstor are not really math ops */
4001       break;
4002     }
4003
4004   return 1;
4005 }
4006
4007 static INLINE void
4008 install_template (const insn_template *t)
4009 {
4010   unsigned int l;
4011
4012   i.tm = *t;
4013
4014   /* Dual VEX/EVEX templates need stripping one of the possible variants.  */
4015   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
4016     {
4017       if ((maybe_cpu (t, CpuAVX) || maybe_cpu (t, CpuAVX2)
4018            || maybe_cpu (t, CpuFMA))
4019           && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)))
4020         {
4021           if (need_evex_encoding (t))
4022             {
4023               i.tm.opcode_modifier.vex = 0;
4024               i.tm.cpu.bitfield.cpuavx512f = i.tm.cpu_any.bitfield.cpuavx512f;
4025               i.tm.cpu.bitfield.cpuavx512vl = i.tm.cpu_any.bitfield.cpuavx512vl;
4026             }
4027           else
4028             {
4029               i.tm.opcode_modifier.evex = 0;
4030               if (i.tm.cpu_any.bitfield.cpuavx)
4031                 i.tm.cpu.bitfield.cpuavx = 1;
4032               else if (!i.tm.cpu.bitfield.isa)
4033                 i.tm.cpu.bitfield.isa = i.tm.cpu_any.bitfield.isa;
4034               else
4035                 gas_assert (i.tm.cpu.bitfield.isa == i.tm.cpu_any.bitfield.isa);
4036             }
4037         }
4038
4039       if ((maybe_cpu (t, CpuCMPCCXADD) || maybe_cpu (t, CpuAMX_TILE)
4040            || maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512DQ)
4041            || maybe_cpu (t, CpuAVX512BW) || maybe_cpu (t, CpuBMI)
4042            || maybe_cpu (t, CpuBMI2) || maybe_cpu (t, CpuUSER_MSR)
4043            || maybe_cpu (t, CpuMSR_IMM))
4044           && maybe_cpu (t, CpuAPX_F))
4045         {
4046           if (need_evex_encoding (t))
4047             i.tm.opcode_modifier.vex = 0;
4048           else
4049             i.tm.opcode_modifier.evex = 0;
4050         }
4051     }
4052
4053   /* For CCMP and CTEST the template has EVEX.SCC in base_opcode. Move it out of
4054      there, to then adjust base_opcode to obtain its normal meaning.  */
4055   if (i.tm.opcode_modifier.operandconstraint == SCC)
4056     {
4057       /* Get EVEX.SCC value from the lower 4 bits of base_opcode.  */
4058       i.scc = i.tm.base_opcode & 0xf;
4059       i.tm.base_opcode >>= 8;
4060     }
4061
4062   /* For CMOVcc having undergone NDD-to-legacy optimization with its source
4063      operands being swapped, we need to invert the encoded condition.  */
4064   if (i.invert_cond)
4065     i.tm.base_opcode ^= 1;
4066
4067   /* Note that for pseudo prefixes this produces a length of 1. But for them
4068      the length isn't interesting at all.  */
4069   for (l = 1; l < 4; ++l)
4070     if (!(i.tm.base_opcode >> (8 * l)))
4071       break;
4072
4073   i.opcode_length = l;
4074 }
4075
4076 /* Build the VEX prefix.  */
4077
4078 static void
4079 build_vex_prefix (const insn_template *t)
4080 {
4081   unsigned int register_specifier;
4082   unsigned int vector_length;
4083   bool w;
4084
4085   /* Check register specifier.  */
4086   if (i.vex.register_specifier)
4087     {
4088       register_specifier =
4089         ~register_number (i.vex.register_specifier) & 0xf;
4090       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
4091     }
4092   else
4093     register_specifier = 0xf;
4094
4095   /* Use 2-byte VEX prefix by swapping destination and source operand
4096      if there are more than 1 register operand.  */
4097   if (i.reg_operands > 1
4098       && pp.encoding != encoding_vex3
4099       && pp.dir_encoding == dir_encoding_default
4100       && i.operands == i.reg_operands
4101       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
4102       && i.tm.opcode_space == SPACE_0F
4103       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
4104       && i.rex == REX_B)
4105     {
4106       unsigned int xchg;
4107
4108       swap_2_operands (0, i.operands - 1);
4109
4110       gas_assert (i.rm.mode == 3);
4111
4112       i.rex = REX_R;
4113       xchg = i.rm.regmem;
4114       i.rm.regmem = i.rm.reg;
4115       i.rm.reg = xchg;
4116
4117       if (i.tm.opcode_modifier.d)
4118         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
4119                             ? Opcode_ExtD : Opcode_SIMD_IntD;
4120       else /* Use the next insn.  */
4121         install_template (&t[1]);
4122     }
4123
4124   /* Use 2-byte VEX prefix by swapping commutative source operands if there
4125      are no memory operands and at least 3 register ones.  */
4126   if (i.reg_operands >= 3
4127       && pp.encoding != encoding_vex3
4128       && i.reg_operands == i.operands - i.imm_operands
4129       && i.tm.opcode_modifier.vex
4130       && i.tm.opcode_modifier.commutative
4131       /* .commutative aliases .staticrounding; disambiguate.  */
4132       && !i.tm.opcode_modifier.sae
4133       && (i.tm.opcode_modifier.sse2avx
4134           || (optimize > 1 && !pp.no_optimize))
4135       && i.rex == REX_B
4136       && i.vex.register_specifier
4137       && !(i.vex.register_specifier->reg_flags & RegRex))
4138     {
4139       unsigned int xchg = i.operands - i.reg_operands;
4140
4141       gas_assert (i.tm.opcode_space == SPACE_0F);
4142       gas_assert (!i.tm.opcode_modifier.sae);
4143       gas_assert (operand_type_equal (&i.types[i.operands - 2],
4144                                       &i.types[i.operands - 3]));
4145       gas_assert (i.rm.mode == 3);
4146
4147       swap_2_operands (xchg, xchg + 1);
4148
4149       i.rex = 0;
4150       xchg = i.rm.regmem | 8;
4151       i.rm.regmem = ~register_specifier & 0xf;
4152       gas_assert (!(i.rm.regmem & 8));
4153       i.vex.register_specifier += xchg - i.rm.regmem;
4154       register_specifier = ~xchg & 0xf;
4155     }
4156
4157   if (i.tm.opcode_modifier.vex == VEXScalar)
4158     vector_length = avxscalar;
4159   else if (i.tm.opcode_modifier.vex == VEX256)
4160     vector_length = 1;
4161   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
4162     vector_length = 0;
4163   else
4164     {
4165       unsigned int op;
4166
4167       /* Determine vector length from the last multi-length vector
4168          operand.  */
4169       vector_length = 0;
4170       for (op = t->operands; op--;)
4171         if (t->operand_types[op].bitfield.xmmword
4172             && t->operand_types[op].bitfield.ymmword
4173             && i.types[op].bitfield.ymmword)
4174           {
4175             vector_length = 1;
4176             break;
4177           }
4178     }
4179
4180   /* Check the REX.W bit and VEXW.  */
4181   if (i.tm.opcode_modifier.vexw == VEXWIG)
4182     w = vexwig == vexw1 || (i.rex & REX_W);
4183   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
4184     w = i.tm.opcode_modifier.vexw == VEXW1;
4185   else
4186     w = flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1;
4187
4188   /* Use 2-byte VEX prefix if possible.  */
4189   if (w == 0
4190       && pp.encoding != encoding_vex3
4191       && i.tm.opcode_space == SPACE_0F
4192       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
4193     {
4194       /* 2-byte VEX prefix.  */
4195       bool r;
4196
4197       i.vex.length = 2;
4198       i.vex.bytes[0] = 0xc5;
4199
4200       /* Check the REX.R bit.  */
4201       r = !(i.rex & REX_R);
4202       i.vex.bytes[1] = (r << 7
4203                         | register_specifier << 3
4204                         | vector_length << 2
4205                         | i.tm.opcode_modifier.opcodeprefix);
4206     }
4207   else
4208     {
4209       /* 3-byte VEX prefix.  */
4210       i.vex.length = 3;
4211
4212       switch (i.tm.opcode_space)
4213         {
4214         case SPACE_0F:
4215         case SPACE_0F38:
4216         case SPACE_0F3A:
4217         case SPACE_MAP7:
4218           i.vex.bytes[0] = 0xc4;
4219           break;
4220         case SPACE_XOP08:
4221         case SPACE_XOP09:
4222         case SPACE_XOP0A:
4223           i.vex.bytes[0] = 0x8f;
4224           break;
4225         default:
4226           abort ();
4227         }
4228
4229       /* The high 3 bits of the second VEX byte are 1's compliment
4230          of RXB bits from REX.  */
4231       i.vex.bytes[1] = ((~i.rex & 7) << 5)
4232                        | (!dot_insn () ? i.tm.opcode_space
4233                                        : i.insn_opcode_space);
4234
4235       i.vex.bytes[2] = (w << 7
4236                         | register_specifier << 3
4237                         | vector_length << 2
4238                         | i.tm.opcode_modifier.opcodeprefix);
4239     }
4240 }
4241
4242 static INLINE bool
4243 is_any_vex_encoding (const insn_template *t)
4244 {
4245   return t->opcode_modifier.vex || t->opcode_modifier.evex;
4246 }
4247
4248 /* We can use this function only when the current encoding is evex.  */
4249 static INLINE bool
4250 is_apx_evex_encoding (void)
4251 {
4252   return i.rex2 || i.tm.opcode_space == SPACE_MAP4 || pp.has_nf
4253     || (i.vex.register_specifier
4254         && (i.vex.register_specifier->reg_flags & RegRex2));
4255 }
4256
4257 static INLINE bool
4258 is_apx_rex2_encoding (void)
4259 {
4260   return i.rex2 || pp.rex2_encoding
4261         || i.tm.opcode_modifier.rex2;
4262 }
4263
4264 static unsigned int
4265 get_broadcast_bytes (const insn_template *t, bool diag)
4266 {
4267   unsigned int op, bytes;
4268   const i386_operand_type *types;
4269
4270   if (i.broadcast.type)
4271     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
4272
4273   gas_assert (intel_syntax);
4274
4275   for (op = 0; op < t->operands; ++op)
4276     if (t->operand_types[op].bitfield.baseindex)
4277       break;
4278
4279   gas_assert (op < t->operands);
4280
4281   if (t->opcode_modifier.evex != EVEXDYN)
4282     switch (i.broadcast.bytes)
4283       {
4284       case 1:
4285         if (t->operand_types[op].bitfield.word)
4286           return 2;
4287       /* Fall through.  */
4288       case 2:
4289         if (t->operand_types[op].bitfield.dword)
4290           return 4;
4291       /* Fall through.  */
4292       case 4:
4293         if (t->operand_types[op].bitfield.qword)
4294           return 8;
4295       /* Fall through.  */
4296       case 8:
4297         if (t->operand_types[op].bitfield.xmmword)
4298           return 16;
4299         if (t->operand_types[op].bitfield.ymmword)
4300           return 32;
4301         if (t->operand_types[op].bitfield.zmmword)
4302           return 64;
4303       /* Fall through.  */
4304       default:
4305         abort ();
4306       }
4307
4308   gas_assert (op + 1 < t->operands);
4309
4310   if (t->operand_types[op + 1].bitfield.xmmword
4311       + t->operand_types[op + 1].bitfield.ymmword
4312       + t->operand_types[op + 1].bitfield.zmmword > 1)
4313     {
4314       types = &i.types[op + 1];
4315       diag = false;
4316     }
4317   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
4318     types = &t->operand_types[op];
4319
4320   if (types->bitfield.zmmword)
4321     bytes = 64;
4322   else if (types->bitfield.ymmword)
4323     bytes = 32;
4324   else
4325     bytes = 16;
4326
4327   if (diag)
4328     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
4329              insn_name (t), bytes * 8);
4330
4331   return bytes;
4332 }
4333
4334 /* Build the EVEX prefix.  */
4335
4336 static void
4337 build_evex_prefix (void)
4338 {
4339   unsigned int register_specifier;
4340   bool w, u;
4341   rex_byte vrex_used = 0;
4342
4343   /* Check register specifier.  */
4344   if (i.vex.register_specifier)
4345     {
4346       gas_assert ((i.vrex & REX_X) == 0);
4347
4348       register_specifier = i.vex.register_specifier->reg_num;
4349       if ((i.vex.register_specifier->reg_flags & RegRex))
4350         register_specifier += 8;
4351       /* The upper 16 registers are encoded in the fourth byte of the
4352          EVEX prefix.  */
4353       if (!(i.vex.register_specifier->reg_flags & RegVRex))
4354         i.vex.bytes[3] = 0x8;
4355       register_specifier = ~register_specifier & 0xf;
4356     }
4357   else
4358     {
4359       register_specifier = 0xf;
4360
4361       /* Encode upper 16 vector index register in the fourth byte of
4362          the EVEX prefix.  */
4363       if (!(i.vrex & REX_X))
4364         i.vex.bytes[3] = 0x8;
4365       else
4366         vrex_used |= REX_X;
4367     }
4368
4369   /* 4 byte EVEX prefix.  */
4370   i.vex.length = 4;
4371   i.vex.bytes[0] = 0x62;
4372
4373   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
4374      bits from REX.  */
4375   gas_assert (i.tm.opcode_space >= SPACE_0F);
4376   gas_assert (i.tm.opcode_space <= SPACE_MAP7);
4377   i.vex.bytes[1] = ((~i.rex & 7) << 5)
4378                    | (!dot_insn () ? i.tm.opcode_space
4379                                    : i.insn_opcode_space);
4380
4381   /* The fifth bit of the second EVEX byte is 1's compliment of the
4382      REX_R bit in VREX.  */
4383   if (!(i.vrex & REX_R))
4384     i.vex.bytes[1] |= 0x10;
4385   else
4386     vrex_used |= REX_R;
4387
4388   if ((i.reg_operands + i.imm_operands) == i.operands)
4389     {
4390       /* When all operands are registers, the REX_X bit in REX is not
4391          used.  We reuse it to encode the upper 16 registers, which is
4392          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
4393          as 1's compliment.  */
4394       if ((i.vrex & REX_B))
4395         {
4396           vrex_used |= REX_B;
4397           i.vex.bytes[1] &= ~0x40;
4398         }
4399     }
4400
4401   /* EVEX instructions shouldn't need the REX prefix.  */
4402   i.vrex &= ~vrex_used;
4403   gas_assert (i.vrex == 0);
4404
4405   /* Check the REX.W bit and VEXW.  */
4406   if (i.tm.opcode_modifier.vexw == VEXWIG)
4407     w = evexwig == evexw1 || (i.rex & REX_W);
4408   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
4409     w = i.tm.opcode_modifier.vexw == VEXW1;
4410   else
4411     w = flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1;
4412
4413   if (i.tm.opcode_modifier.evex == EVEXDYN)
4414     {
4415       unsigned int op;
4416
4417       /* Determine vector length from the last multi-length vector operand.  */
4418       for (op = i.operands; op--;)
4419         if (i.tm.operand_types[op].bitfield.xmmword
4420             + i.tm.operand_types[op].bitfield.ymmword
4421             + i.tm.operand_types[op].bitfield.zmmword > 1)
4422           {
4423             if (i.types[op].bitfield.zmmword)
4424               {
4425                 i.tm.opcode_modifier.evex = EVEX512;
4426                 break;
4427               }
4428             else if (i.types[op].bitfield.ymmword)
4429               {
4430                 i.tm.opcode_modifier.evex = EVEX256;
4431                 break;
4432               }
4433             else if (i.types[op].bitfield.xmmword)
4434               {
4435                 i.tm.opcode_modifier.evex = EVEX128;
4436                 break;
4437               }
4438             else if ((i.broadcast.type || i.broadcast.bytes)
4439                       && op == i.broadcast.operand)
4440               {
4441                 switch (get_broadcast_bytes (&i.tm, true))
4442                   {
4443                     case 64:
4444                       i.tm.opcode_modifier.evex = EVEX512;
4445                       break;
4446                     case 32:
4447                       i.tm.opcode_modifier.evex = EVEX256;
4448                       break;
4449                     case 16:
4450                       i.tm.opcode_modifier.evex = EVEX128;
4451                       break;
4452                     default:
4453                       abort ();
4454                   }
4455                 break;
4456               }
4457           }
4458
4459       if (op >= MAX_OPERANDS)
4460         abort ();
4461     }
4462
4463   u = i.rounding.type == rc_none || i.tm.opcode_modifier.evex != EVEX256;
4464
4465   /* The third byte of the EVEX prefix.  */
4466   i.vex.bytes[2] = ((w << 7)
4467                     | (register_specifier << 3)
4468                     | (u << 2)
4469                     | i.tm.opcode_modifier.opcodeprefix);
4470
4471   /* The fourth byte of the EVEX prefix.  */
4472   /* The zeroing-masking bit.  */
4473   if (i.mask.reg && i.mask.zeroing)
4474     i.vex.bytes[3] |= 0x80;
4475
4476   /* Don't always set the broadcast bit if there is no RC.  */
4477   if (i.rounding.type == rc_none)
4478     {
4479       /* Encode the vector length.  */
4480       unsigned int vec_length;
4481
4482       switch (i.tm.opcode_modifier.evex)
4483         {
4484         case EVEXLIG: /* LL' is ignored */
4485           vec_length = evexlig << 5;
4486           break;
4487         case EVEX128:
4488           vec_length = 0 << 5;
4489           break;
4490         case EVEX256:
4491           vec_length = 1 << 5;
4492           break;
4493         case EVEX512:
4494           vec_length = 2 << 5;
4495           break;
4496         case EVEX_L3:
4497           if (dot_insn ())
4498             {
4499               vec_length = 3 << 5;
4500               break;
4501             }
4502           /* Fall through.  */
4503         default:
4504           abort ();
4505           break;
4506         }
4507       i.vex.bytes[3] |= vec_length;
4508       /* Encode the broadcast bit.  */
4509       if (i.broadcast.type || i.broadcast.bytes)
4510         i.vex.bytes[3] |= 0x10;
4511     }
4512   else if (i.rounding.type != saeonly)
4513     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
4514   else
4515     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
4516
4517   if (i.mask.reg)
4518     i.vex.bytes[3] |= i.mask.reg->reg_num;
4519 }
4520
4521 /* Build (2 bytes) rex2 prefix.
4522    | D5h |
4523    | m | R4 X4 B4 | W R X B |
4524
4525    Rex2 reuses i.vex as they both encode i.tm.opcode_space in their prefixes.
4526  */
4527 static void
4528 build_rex2_prefix (void)
4529 {
4530   i.vex.length = 2;
4531   i.vex.bytes[0] = 0xd5;
4532   /* For the W R X B bits, the variables of rex prefix will be reused.  */
4533   i.vex.bytes[1] = ((i.tm.opcode_space << 7)
4534                     | (i.rex2 << 4)
4535                     | ((i.rex | i.prefix[REX_PREFIX]) & 0xf));
4536 }
4537
4538 /* Build the EVEX prefix (4-byte) for evex insn
4539    | 62h |
4540    | `R`X`B`R' | B'mmm |
4541    | W | v`v`v`v | `x' | pp |
4542    | z| L'L | b | `v | aaa |
4543 */
4544 static bool
4545 build_apx_evex_prefix (void)
4546 {
4547   /* To mimic behavior for legacy insns, transform use of DATA16 and REX64 into
4548      their embedded-prefix representations.  */
4549   if (i.tm.opcode_space == SPACE_MAP4)
4550     {
4551       if (i.prefix[DATA_PREFIX])
4552         {
4553           if (i.tm.opcode_modifier.opcodeprefix)
4554             {
4555               as_bad (i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66
4556                       ? _("same type of prefix used twice")
4557                       : _("conflicting use of `data16' prefix"));
4558               return false;
4559             }
4560           i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
4561           i.prefix[DATA_PREFIX] = 0;
4562         }
4563       if (i.prefix[REX_PREFIX] & REX_W)
4564         {
4565           if (i.suffix == QWORD_MNEM_SUFFIX)
4566             {
4567               as_bad (_("same type of prefix used twice"));
4568               return false;
4569             }
4570           i.tm.opcode_modifier.vexw = VEXW1;
4571           i.prefix[REX_PREFIX] = 0;
4572         }
4573     }
4574
4575   build_evex_prefix ();
4576   if (i.rex2 & REX_R)
4577     i.vex.bytes[1] &= ~0x10;
4578   if (i.rex2 & REX_B)
4579     i.vex.bytes[1] |= 0x08;
4580   if (i.rex2 & REX_X)
4581     {
4582       gas_assert (i.rm.mode != 3);
4583       i.vex.bytes[2] &= ~0x04;
4584     }
4585   if (i.vex.register_specifier
4586       && i.vex.register_specifier->reg_flags & RegRex2)
4587     i.vex.bytes[3] &= ~0x08;
4588
4589   /* Encode the NDD bit of the instruction promoted from the legacy
4590      space. ZU shares the same bit with NDD.  */
4591   if ((i.vex.register_specifier && i.tm.opcode_space == SPACE_MAP4)
4592       || i.tm.opcode_modifier.operandconstraint == ZERO_UPPER)
4593     i.vex.bytes[3] |= 0x10;
4594
4595   /* Encode SCC and oszc flags bits.  */
4596   if (i.tm.opcode_modifier.operandconstraint == SCC)
4597     {
4598       /* The default value of vvvv is 1111 and needs to be cleared.  */
4599       i.vex.bytes[2] &= ~0x78;
4600       i.vex.bytes[2] |= (i.oszc_flags << 3);
4601       /* ND and aaa bits shold be 0.  */
4602       know (!(i.vex.bytes[3] & 0x17));
4603       /* The default value of V' is 1 and needs to be cleared.  */
4604       i.vex.bytes[3] = (i.vex.bytes[3] & ~0x08) | i.scc;
4605     }
4606
4607   /* Encode the NF bit.  */
4608   if (pp.has_nf || i.tm.opcode_modifier.operandconstraint == EVEX_NF)
4609     i.vex.bytes[3] |= 0x04;
4610
4611   return true;
4612 }
4613
4614 static void establish_rex (void)
4615 {
4616   /* Note that legacy encodings have at most 2 non-immediate operands.  */
4617   unsigned int first = i.imm_operands;
4618   unsigned int last = i.operands > first ? i.operands - first - 1 : first;
4619
4620   /* Respect a user-specified REX prefix.  */
4621   i.rex |= i.prefix[REX_PREFIX] & REX_OPCODE;
4622
4623   /* For 8 bit RegRex64 registers without a prefix, we need an empty rex prefix.  */
4624   if (((i.types[first].bitfield.class == Reg
4625         && (i.op[first].regs->reg_flags & RegRex64) != 0)
4626        || (i.types[last].bitfield.class == Reg
4627            && (i.op[last].regs->reg_flags & RegRex64) != 0))
4628       && !is_apx_rex2_encoding () && !is_any_vex_encoding (&i.tm))
4629     i.rex |= REX_OPCODE;
4630
4631   /* For REX/REX2/EVEX prefix instructions, we need to convert old registers
4632      (AL, CL, DL and BL) to new ones (AXL, CXL, DXL and BXL) and reject AH,
4633      CH, DH and BH.  */
4634   if (i.rex || i.rex2 || i.tm.opcode_modifier.evex)
4635     {
4636       for (unsigned int x = first; x <= last; x++)
4637         {
4638           /* Look for 8 bit operand that uses old registers.  */
4639           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4640               && !(i.op[x].regs->reg_flags & (RegRex | RegRex2 | RegRex64)))
4641             {
4642               /* In case it is "hi" register, give up.  */
4643               if (i.op[x].regs->reg_num > 3)
4644                 as_bad (_("can't encode register '%s%s' in an "
4645                           "instruction requiring %s prefix"),
4646                         register_prefix, i.op[x].regs->reg_name,
4647                         i.tm.opcode_modifier.evex ? "EVEX" : "REX/REX2");
4648
4649               /* Otherwise it is equivalent to the extended register.
4650                  Since the encoding doesn't change this is merely
4651                  cosmetic cleanup for debug output.  */
4652               i.op[x].regs += 8;
4653             }
4654         }
4655     }
4656
4657   if (i.rex == 0 && i.rex2 == 0 && (pp.rex_encoding || pp.rex2_encoding))
4658     {
4659       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
4660          that uses legacy register.  If it is "hi" register, don't add
4661          rex and rex2 prefix.  */
4662       unsigned int x;
4663
4664       for (x = first; x <= last; x++)
4665         if (i.types[x].bitfield.class == Reg
4666             && i.types[x].bitfield.byte
4667             && !(i.op[x].regs->reg_flags & (RegRex | RegRex2 | RegRex64))
4668             && i.op[x].regs->reg_num > 3)
4669           {
4670             pp.rex_encoding = false;
4671             pp.rex2_encoding = false;
4672             break;
4673           }
4674
4675       if (pp.rex_encoding)
4676         i.rex = REX_OPCODE;
4677     }
4678
4679   if (is_apx_rex2_encoding ())
4680     {
4681       /* Most prefixes are not permitted with JMPABS.  */
4682       if (i.tm.mnem_off == MN_jmpabs)
4683         {
4684           if (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
4685             {
4686               as_bad (_("size override not allowed with `%s'"),
4687                       insn_name (&i.tm));
4688               i.prefix[DATA_PREFIX] = 0;
4689               i.prefix[REX_PREFIX] &= ~REX_W;
4690             }
4691           if (i.prefix[ADDR_PREFIX])
4692             {
4693               as_bad (_("address override not allowed with `%s'"),
4694                       insn_name (&i.tm));
4695               i.prefix[ADDR_PREFIX] = 0;
4696             }
4697         }
4698
4699       build_rex2_prefix ();
4700       /* The individual REX.RXBW bits got consumed.  */
4701       i.rex &= REX_OPCODE;
4702       i.prefix[REX_PREFIX] = 0;
4703     }
4704   else if (i.rex != 0)
4705     add_prefix (REX_OPCODE | i.rex);
4706 }
4707
4708 static void
4709 process_immext (void)
4710 {
4711   expressionS *exp;
4712
4713   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4714      which is coded in the same place as an 8-bit immediate field
4715      would be.  Here we fake an 8-bit immediate operand from the
4716      opcode suffix stored in tm.extension_opcode.
4717
4718      AVX instructions also use this encoding, for some of
4719      3 argument instructions.  */
4720
4721   gas_assert (i.imm_operands <= 1
4722               && (i.operands <= 2
4723                   || (is_any_vex_encoding (&i.tm)
4724                       && i.operands <= 4)));
4725
4726   exp = &im_expressions[i.imm_operands++];
4727   i.op[i.operands].imms = exp;
4728   i.types[i.operands].bitfield.imm8 = 1;
4729   i.operands++;
4730   exp->X_op = O_constant;
4731   exp->X_add_number = i.tm.extension_opcode;
4732   i.tm.extension_opcode = None;
4733 }
4734
4735
4736 static int
4737 check_hle (void)
4738 {
4739   switch (i.tm.opcode_modifier.prefixok)
4740     {
4741     default:
4742       abort ();
4743     case PrefixLock:
4744     case PrefixNone:
4745     case PrefixNoTrack:
4746     case PrefixRep:
4747       as_bad (_("invalid instruction `%s' after `%s'"),
4748               insn_name (&i.tm), i.hle_prefix);
4749       return 0;
4750     case PrefixHLELock:
4751       if (i.prefix[LOCK_PREFIX])
4752         return 1;
4753       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4754       return 0;
4755     case PrefixHLEAny:
4756       return 1;
4757     case PrefixHLERelease:
4758       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4759         {
4760           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4761                   insn_name (&i.tm));
4762           return 0;
4763         }
4764       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4765         {
4766           as_bad (_("memory destination needed for instruction `%s'"
4767                     " after `xrelease'"), insn_name (&i.tm));
4768           return 0;
4769         }
4770       return 1;
4771     }
4772 }
4773
4774 /* Helper for optimization (running ahead of process_suffix()), to make sure we
4775    convert only well-formed insns.  @OP is the sized operand to cross check
4776    against (typically a register).  Checking against a single operand typically
4777    suffices, as match_template() has already honored CheckOperandSize.  */
4778
4779 static bool is_plausible_suffix (unsigned int op)
4780 {
4781   return !i.suffix
4782          || (i.suffix == BYTE_MNEM_SUFFIX && i.types[op].bitfield.byte)
4783          || (i.suffix == WORD_MNEM_SUFFIX && i.types[op].bitfield.word)
4784          || (i.suffix == LONG_MNEM_SUFFIX && i.types[op].bitfield.dword)
4785          || (i.suffix == QWORD_MNEM_SUFFIX && i.types[op].bitfield.qword);
4786 }
4787
4788 /* Encode aligned vector move as unaligned vector move.  */
4789
4790 static void
4791 encode_with_unaligned_vector_move (void)
4792 {
4793   switch (i.tm.base_opcode)
4794     {
4795     case 0x28:  /* Load instructions.  */
4796     case 0x29:  /* Store instructions.  */
4797       /* movaps/movapd/vmovaps/vmovapd.  */
4798       if (i.tm.opcode_space == SPACE_0F
4799           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4800         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4801       break;
4802     case 0x6f:  /* Load instructions.  */
4803     case 0x7f:  /* Store instructions.  */
4804       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4805       if (i.tm.opcode_space == SPACE_0F
4806           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4807         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4808       break;
4809     default:
4810       break;
4811     }
4812 }
4813
4814 /* Try the shortest encoding by shortening operand size.  */
4815
4816 static void
4817 optimize_encoding (void)
4818 {
4819   unsigned int j;
4820
4821   if (i.tm.mnem_off == MN_lea)
4822     {
4823       /* Optimize: -O:
4824            lea symbol, %rN    -> mov $symbol, %rN
4825            lea (%rM), %rN     -> mov %rM, %rN
4826            lea (,%rM,1), %rN  -> mov %rM, %rN
4827
4828            and in 32-bit mode for 16-bit addressing
4829
4830            lea (%rM), %rN     -> movzx %rM, %rN
4831
4832            and in 64-bit mode zap 32-bit addressing in favor of using a
4833            32-bit (or less) destination.
4834        */
4835       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4836         {
4837           if (!i.op[1].regs->reg_type.bitfield.word)
4838             i.tm.opcode_modifier.size = SIZE32;
4839           i.prefix[ADDR_PREFIX] = 0;
4840         }
4841
4842       if (!i.index_reg && !i.base_reg)
4843         {
4844           /* Handle:
4845                lea symbol, %rN    -> mov $symbol, %rN
4846            */
4847           if (flag_code == CODE_64BIT)
4848             {
4849               /* Don't transform a relocation to a 16-bit one.  */
4850               if (i.op[0].disps
4851                   && i.op[0].disps->X_op != O_constant
4852                   && i.op[1].regs->reg_type.bitfield.word)
4853                 return;
4854
4855               if (!i.op[1].regs->reg_type.bitfield.qword
4856                   || i.tm.opcode_modifier.size == SIZE32)
4857                 {
4858                   i.tm.base_opcode = 0xb8;
4859                   i.tm.opcode_modifier.modrm = 0;
4860                   if (!i.op[1].regs->reg_type.bitfield.word)
4861                     i.types[0].bitfield.imm32 = 1;
4862                   else
4863                     {
4864                       i.tm.opcode_modifier.size = SIZE16;
4865                       i.types[0].bitfield.imm16 = 1;
4866                     }
4867                 }
4868               else
4869                 {
4870                   /* Subject to further optimization below.  */
4871                   i.tm.base_opcode = 0xc7;
4872                   i.tm.extension_opcode = 0;
4873                   i.types[0].bitfield.imm32s = 1;
4874                   i.types[0].bitfield.baseindex = 0;
4875                 }
4876             }
4877           /* Outside of 64-bit mode address and operand sizes have to match if
4878              a relocation is involved, as otherwise we wouldn't (currently) or
4879              even couldn't express the relocation correctly.  */
4880           else if (i.op[0].disps
4881                    && i.op[0].disps->X_op != O_constant
4882                    && ((!i.prefix[ADDR_PREFIX])
4883                        != (flag_code == CODE_32BIT
4884                            ? i.op[1].regs->reg_type.bitfield.dword
4885                            : i.op[1].regs->reg_type.bitfield.word)))
4886             return;
4887           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4888              destination is going to grow encoding size.  */
4889           else if (flag_code == CODE_16BIT
4890                    && (optimize <= 1 || optimize_for_space)
4891                    && !i.prefix[ADDR_PREFIX]
4892                    && i.op[1].regs->reg_type.bitfield.dword)
4893             return;
4894           else
4895             {
4896               i.tm.base_opcode = 0xb8;
4897               i.tm.opcode_modifier.modrm = 0;
4898               if (i.op[1].regs->reg_type.bitfield.dword)
4899                 i.types[0].bitfield.imm32 = 1;
4900               else
4901                 i.types[0].bitfield.imm16 = 1;
4902
4903               if (i.op[0].disps
4904                   && i.op[0].disps->X_op == O_constant
4905                   && i.op[1].regs->reg_type.bitfield.dword
4906                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4907                      GCC 5. */
4908                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4909                 i.op[0].disps->X_add_number &= 0xffff;
4910             }
4911
4912           i.tm.operand_types[0] = i.types[0];
4913           i.imm_operands = 1;
4914           if (!i.op[0].imms)
4915             {
4916               i.op[0].imms = &im_expressions[0];
4917               i.op[0].imms->X_op = O_absent;
4918             }
4919         }
4920       else if (i.op[0].disps
4921                   && (i.op[0].disps->X_op != O_constant
4922                       || i.op[0].disps->X_add_number))
4923         return;
4924       else
4925         {
4926           /* Handle:
4927                lea (%rM), %rN     -> mov %rM, %rN
4928                lea (,%rM,1), %rN  -> mov %rM, %rN
4929                lea (%rM), %rN     -> movzx %rM, %rN
4930            */
4931           const reg_entry *addr_reg;
4932
4933           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4934             addr_reg = i.base_reg;
4935           else if (!i.base_reg
4936                    && i.index_reg->reg_num != RegIZ
4937                    && !i.log2_scale_factor)
4938             addr_reg = i.index_reg;
4939           else
4940             return;
4941
4942           if (addr_reg->reg_type.bitfield.word
4943               && i.op[1].regs->reg_type.bitfield.dword)
4944             {
4945               if (flag_code != CODE_32BIT)
4946                 return;
4947               i.tm.opcode_space = SPACE_0F;
4948               i.tm.base_opcode = 0xb7;
4949             }
4950           else
4951             i.tm.base_opcode = 0x8b;
4952
4953           if (addr_reg->reg_type.bitfield.dword
4954               && i.op[1].regs->reg_type.bitfield.qword)
4955             i.tm.opcode_modifier.size = SIZE32;
4956
4957           i.op[0].regs = addr_reg;
4958           i.reg_operands = 2;
4959         }
4960
4961       i.mem_operands = 0;
4962       i.disp_operands = 0;
4963       i.prefix[ADDR_PREFIX] = 0;
4964       i.prefix[SEG_PREFIX] = 0;
4965       i.seg[0] = NULL;
4966     }
4967
4968   if (optimize_for_space
4969       && (i.tm.mnem_off == MN_test
4970           || (i.tm.base_opcode == 0xf6
4971               && i.tm.opcode_space == SPACE_MAP4))
4972       && i.reg_operands == 1
4973       && i.imm_operands == 1
4974       && !i.types[1].bitfield.byte
4975       && is_plausible_suffix (1)
4976       && i.op[0].imms->X_op == O_constant
4977       && fits_in_imm7 (i.op[0].imms->X_add_number))
4978     {
4979       /* Optimize: -Os:
4980            test      $imm7, %r64/%r32/%r16  -> test      $imm7, %r8
4981            ctest<cc> $imm7, %r64/%r32/%r16  -> ctest<cc> $imm7, %r8
4982        */
4983       unsigned int base_regnum = i.op[1].regs->reg_num;
4984
4985       gas_assert (!i.tm.opcode_modifier.modrm || i.tm.extension_opcode == 0);
4986
4987       if (flag_code == CODE_64BIT || base_regnum < 4)
4988         {
4989           i.types[1].bitfield.byte = 1;
4990           /* Squash the suffix.  */
4991           i.suffix = 0;
4992           /* Convert to byte registers. 8-bit registers are special,
4993              RegRex64 and non-RegRex* each have 8 registers.  */
4994           if (i.types[1].bitfield.word)
4995             /* 32 (or 40) 8-bit registers.  */
4996             j = 32;
4997           else if (i.types[1].bitfield.dword)
4998             /* 32 (or 40) 8-bit registers + 32 16-bit registers.  */
4999             j = 64;
5000           else
5001             /* 32 (or 40) 8-bit registers + 32 16-bit registers
5002                + 32 32-bit registers.  */
5003             j = 96;
5004
5005           /* In 64-bit mode, the following byte registers cannot be accessed
5006              if using the Rex and Rex2 prefix: AH, BH, CH, DH */
5007           if (!(i.op[1].regs->reg_flags & (RegRex | RegRex2)) && base_regnum < 4)
5008             j += 8;
5009           i.op[1].regs -= j;
5010         }
5011     }
5012   else if (flag_code == CODE_64BIT
5013            && i.tm.opcode_space == SPACE_BASE
5014            && i.types[i.operands - 1].bitfield.qword
5015            && ((i.reg_operands == 1
5016                 && i.imm_operands == 1
5017                 && i.op[0].imms->X_op == O_constant
5018                 && ((i.tm.base_opcode == 0xb8
5019                      && i.tm.extension_opcode == None
5020                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
5021                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
5022                         && (i.tm.base_opcode == 0x24
5023                             || (((i.tm.base_opcode == 0x80
5024                                   && i.tm.extension_opcode == 0x4)
5025                                  || i.tm.mnem_off == MN_test)
5026                                 && !(i.op[1].regs->reg_flags
5027                                      & (RegRex | RegRex2)))
5028                             || ((i.tm.base_opcode | 1) == 0xc7
5029                                 && i.tm.extension_opcode == 0x0)))
5030                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
5031                         && i.tm.base_opcode == 0x83
5032                         && i.tm.extension_opcode == 0x4
5033                         && !(i.op[1].regs->reg_flags & (RegRex | RegRex2)))))
5034                || ((i.reg_operands == 2
5035                     && i.op[0].regs == i.op[1].regs
5036                     && (i.tm.mnem_off == MN_xor
5037                         || i.tm.mnem_off == MN_sub))
5038                    || i.tm.mnem_off == MN_clr)))
5039     {
5040       /* Optimize: -O:
5041            andq $imm31, %r64   -> andl $imm31, %r32
5042            andq $imm7, %r64    -> andl $imm7, %r32
5043            testq $imm31, %r64  -> testl $imm31, %r32
5044            xorq %r64, %r64     -> xorl %r32, %r32
5045            clrq %r64           -> clrl %r32
5046            subq %r64, %r64     -> subl %r32, %r32
5047            movq $imm31, %r64   -> movl $imm31, %r32
5048            movq $imm32, %r64   -> movl $imm32, %r32
5049         */
5050       i.tm.opcode_modifier.size = SIZE32;
5051       if (i.imm_operands)
5052         {
5053           i.types[0].bitfield.imm32 = 1;
5054           i.types[0].bitfield.imm32s = 0;
5055           i.types[0].bitfield.imm64 = 0;
5056         }
5057       else
5058         {
5059           i.types[0].bitfield.dword = 1;
5060           i.types[0].bitfield.qword = 0;
5061         }
5062       i.types[1].bitfield.dword = 1;
5063       i.types[1].bitfield.qword = 0;
5064       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
5065         {
5066           /* Handle
5067                movq $imm31, %r64   -> movl $imm31, %r32
5068                movq $imm32, %r64   -> movl $imm32, %r32
5069            */
5070           i.tm.operand_types[0].bitfield.imm32 = 1;
5071           i.tm.operand_types[0].bitfield.imm32s = 0;
5072           i.tm.operand_types[0].bitfield.imm64 = 0;
5073           if ((i.tm.base_opcode | 1) == 0xc7)
5074             {
5075               /* Handle
5076                    movq $imm31, %r64   -> movl $imm31, %r32
5077                */
5078               i.tm.base_opcode = 0xb8;
5079               i.tm.extension_opcode = None;
5080               i.tm.opcode_modifier.w = 0;
5081               i.tm.opcode_modifier.modrm = 0;
5082             }
5083         }
5084     }
5085   else if (i.reg_operands == 3
5086            && i.op[0].regs == i.op[1].regs
5087            && pp.encoding != encoding_evex
5088            && (i.tm.mnem_off == MN_xor
5089                || i.tm.mnem_off == MN_sub))
5090     {
5091       /* Optimize: -O:
5092            xorb %rNb, %rNb, %rMb  -> xorl %rMd, %rMd
5093            xorw %rNw, %rNw, %rMw  -> xorl %rMd, %rMd
5094            xorl %rNd, %rNd, %rMd  -> xorl %rMd, %rMd
5095            xorq %rN,  %rN,  %rM   -> xorl %rMd, %rMd
5096            subb %rNb, %rNb, %rMb  -> subl %rMd, %rMd
5097            subw %rNw, %rNw, %rMw  -> subl %rMd, %rMd
5098            subl %rNd, %rNd, %rMd  -> subl %rMd, %rMd
5099            subq %rN,  %rN,  %rM   -> subl %rMd, %rMd
5100         */
5101       i.tm.opcode_space = SPACE_BASE;
5102       i.tm.opcode_modifier.evex = 0;
5103       i.tm.opcode_modifier.size = SIZE32;
5104       i.types[0].bitfield.byte = 0;
5105       i.types[0].bitfield.word = 0;
5106       i.types[0].bitfield.dword = 1;
5107       i.types[0].bitfield.qword = 0;
5108       i.op[0].regs = i.op[2].regs;
5109       i.types[1] = i.types[0];
5110       i.op[1].regs = i.op[2].regs;
5111       i.reg_operands = 2;
5112     }
5113   else if (optimize > 1
5114            && !optimize_for_space
5115            && i.reg_operands == 2
5116            && i.op[0].regs == i.op[1].regs
5117            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
5118            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
5119     {
5120       /* Optimize: -O2:
5121            andb %rN, %rN  -> testb %rN, %rN
5122            andw %rN, %rN  -> testw %rN, %rN
5123            andq %rN, %rN  -> testq %rN, %rN
5124            orb %rN, %rN   -> testb %rN, %rN
5125            orw %rN, %rN   -> testw %rN, %rN
5126            orq %rN, %rN   -> testq %rN, %rN
5127
5128            and outside of 64-bit mode
5129
5130            andl %rN, %rN  -> testl %rN, %rN
5131            orl %rN, %rN   -> testl %rN, %rN
5132        */
5133       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
5134     }
5135   else if (!optimize_for_space
5136            && i.tm.base_opcode == 0xd0
5137            && i.tm.extension_opcode == 4
5138            && (i.tm.opcode_space == SPACE_BASE
5139                || i.tm.opcode_space == SPACE_MAP4)
5140            && !i.mem_operands)
5141     {
5142       /* Optimize: -O:
5143            shlb $1, %rN  -> addb %rN, %rN
5144            shlw $1, %rN  -> addw %rN, %rN
5145            shll $1, %rN  -> addl %rN, %rN
5146            shlq $1, %rN  -> addq %rN, %rN
5147
5148            shlb $1, %rN, %rM  -> addb %rN, %rN, %rM
5149            shlw $1, %rN, %rM  -> addw %rN, %rN, %rM
5150            shll $1, %rN, %rM  -> addl %rN, %rN, %rM
5151            shlq $1, %rN, %rM  -> addq %rN, %rN, %rM
5152        */
5153       i.tm.base_opcode = 0x00;
5154       i.tm.extension_opcode = None;
5155       if (i.operands >= 2)
5156         copy_operand (0, 1);
5157       else
5158         {
5159           /* Legacy form with omitted shift count operand.  */
5160           copy_operand (1, 0);
5161           i.operands = 2;
5162         }
5163       i.reg_operands++;
5164       i.imm_operands = 0;
5165     }
5166   else if (i.tm.base_opcode == 0xba
5167            && i.tm.opcode_space == SPACE_0F
5168            && i.reg_operands == 1
5169            && i.op[0].imms->X_op == O_constant
5170            && i.op[0].imms->X_add_number >= 0)
5171     {
5172       /* Optimize: -O:
5173            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
5174            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
5175            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
5176
5177            With <BT> one of bts, btr, and bts also:
5178            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
5179            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
5180        */
5181       switch (flag_code)
5182         {
5183         case CODE_64BIT:
5184           if (i.tm.extension_opcode != 4)
5185             break;
5186           if (i.types[1].bitfield.qword
5187               && i.op[0].imms->X_add_number < 32
5188               && !(i.op[1].regs->reg_flags & RegRex))
5189             i.tm.opcode_modifier.size = SIZE32;
5190           /* Fall through.  */
5191         case CODE_32BIT:
5192           if (i.types[1].bitfield.word
5193               && i.op[0].imms->X_add_number < 16)
5194             i.tm.opcode_modifier.size = SIZE32;
5195           break;
5196         case CODE_16BIT:
5197           if (i.op[0].imms->X_add_number < 16)
5198             i.tm.opcode_modifier.size = SIZE16;
5199           break;
5200         }
5201     }
5202   else if (optimize > 1
5203            && (i.tm.base_opcode | 0xf) == 0x4f
5204            && i.tm.opcode_space == SPACE_MAP4
5205            && i.reg_operands == 3
5206            && i.tm.opcode_modifier.operandconstraint == EVEX_NF
5207            && !i.types[0].bitfield.word)
5208     {
5209       /* Optimize: -O2:
5210            cfcmov<cc> %rM, %rN, %rN -> cmov<cc> %rM, %rN
5211            cfcmov<cc> %rM, %rN, %rM -> cmov<!cc> %rN, %rM
5212            cfcmov<cc> %rN, %rN, %rN -> nop %rN
5213        */
5214       if (i.op[0].regs == i.op[2].regs)
5215         {
5216           i.tm.base_opcode ^= 1;
5217           i.op[0].regs = i.op[1].regs;
5218           i.op[1].regs = i.op[2].regs;
5219         }
5220       else if (i.op[1].regs != i.op[2].regs)
5221         return;
5222
5223       i.tm.opcode_space = SPACE_0F;
5224       i.tm.opcode_modifier.evex = 0;
5225       i.tm.opcode_modifier.vexvvvv = 0;
5226       i.tm.opcode_modifier.operandconstraint = 0;
5227       i.reg_operands = 2;
5228
5229       /* While at it, convert to NOP if all three regs match.  */
5230       if (i.op[0].regs == i.op[1].regs)
5231         {
5232           i.tm.base_opcode = 0x1f;
5233           i.tm.extension_opcode = 0;
5234           i.reg_operands = 1;
5235         }
5236     }
5237   else if (i.reg_operands == 3
5238            && i.op[0].regs == i.op[1].regs
5239            && !i.types[2].bitfield.xmmword
5240            && (i.tm.opcode_modifier.vex
5241                || ((!i.mask.reg || i.mask.zeroing)
5242                    && i.tm.opcode_modifier.evex
5243                    && (pp.encoding != encoding_evex
5244                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
5245                        || is_cpu (&i.tm, CpuAVX512VL)
5246                        || (i.tm.operand_types[2].bitfield.zmmword
5247                            && i.types[2].bitfield.ymmword))))
5248            && i.tm.opcode_space == SPACE_0F
5249            && ((i.tm.base_opcode | 2) == 0x57
5250                || i.tm.base_opcode == 0xdf
5251                || i.tm.base_opcode == 0xef
5252                || (i.tm.base_opcode | 3) == 0xfb
5253                || i.tm.base_opcode == 0x42
5254                || i.tm.base_opcode == 0x47))
5255     {
5256       /* Optimize: -O1:
5257            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
5258            vpsubq and vpsubw:
5259              EVEX VOP %zmmM, %zmmM, %zmmN
5260                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
5261                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5262              EVEX VOP %ymmM, %ymmM, %ymmN
5263                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
5264                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5265              VEX VOP %ymmM, %ymmM, %ymmN
5266                -> VEX VOP %xmmM, %xmmM, %xmmN
5267            VOP, one of vpandn and vpxor:
5268              VEX VOP %ymmM, %ymmM, %ymmN
5269                -> VEX VOP %xmmM, %xmmM, %xmmN
5270            VOP, one of vpandnd and vpandnq:
5271              EVEX VOP %zmmM, %zmmM, %zmmN
5272                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
5273                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5274              EVEX VOP %ymmM, %ymmM, %ymmN
5275                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
5276                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5277            VOP, one of vpxord and vpxorq:
5278              EVEX VOP %zmmM, %zmmM, %zmmN
5279                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
5280                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5281              EVEX VOP %ymmM, %ymmM, %ymmN
5282                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
5283                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
5284            VOP, one of kxord and kxorq:
5285              VEX VOP %kM, %kM, %kN
5286                -> VEX kxorw %kM, %kM, %kN
5287            VOP, one of kandnd and kandnq:
5288              VEX VOP %kM, %kM, %kN
5289                -> VEX kandnw %kM, %kM, %kN
5290        */
5291       if (i.tm.opcode_modifier.evex)
5292         {
5293           if (pp.encoding != encoding_evex)
5294             {
5295               i.tm.opcode_modifier.vex = VEX128;
5296               i.tm.opcode_modifier.vexw = VEXW0;
5297               i.tm.opcode_modifier.evex = 0;
5298               pp.encoding = encoding_vex;
5299               i.mask.reg = NULL;
5300             }
5301           else if (optimize > 1)
5302             i.tm.opcode_modifier.evex = EVEX128;
5303           else
5304             return;
5305         }
5306       else if (i.tm.operand_types[0].bitfield.class == RegMask)
5307         {
5308           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
5309           i.tm.opcode_modifier.vexw = VEXW0;
5310         }
5311       else
5312         i.tm.opcode_modifier.vex = VEX128;
5313
5314       if (i.tm.opcode_modifier.vex)
5315         for (j = 0; j < 3; j++)
5316           {
5317             i.types[j].bitfield.xmmword = 1;
5318             i.types[j].bitfield.ymmword = 0;
5319           }
5320     }
5321   else if (pp.encoding != encoding_evex
5322            && pp.encoding != encoding_egpr
5323            && !i.types[0].bitfield.zmmword
5324            && !i.types[1].bitfield.zmmword
5325            && !i.mask.reg
5326            && !i.broadcast.type
5327            && !i.broadcast.bytes
5328            && i.tm.opcode_modifier.evex
5329            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
5330                || (i.tm.base_opcode & ~4) == 0xdb
5331                || (i.tm.base_opcode & ~4) == 0xeb)
5332            && i.tm.extension_opcode == None)
5333     {
5334       /* Optimize: -O1:
5335            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
5336            vmovdqu32 and vmovdqu64:
5337              EVEX VOP %xmmM, %xmmN
5338                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
5339              EVEX VOP %ymmM, %ymmN
5340                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
5341              EVEX VOP %xmmM, mem
5342                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
5343              EVEX VOP %ymmM, mem
5344                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
5345              EVEX VOP mem, %xmmN
5346                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
5347              EVEX VOP mem, %ymmN
5348                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
5349            VOP, one of vpand, vpandn, vpor, vpxor:
5350              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
5351                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
5352              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
5353                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
5354              EVEX VOP{d,q} mem, %xmmM, %xmmN
5355                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
5356              EVEX VOP{d,q} mem, %ymmM, %ymmN
5357                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
5358        */
5359       for (j = 0; j < i.operands; j++)
5360         if (operand_type_check (i.types[j], disp)
5361             && i.op[j].disps->X_op == O_constant)
5362           {
5363             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
5364                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
5365                bytes, we choose EVEX Disp8 over VEX Disp32.  */
5366             int evex_disp8, vex_disp8;
5367             unsigned int memshift = i.memshift;
5368             offsetT n = i.op[j].disps->X_add_number;
5369
5370             evex_disp8 = fits_in_disp8 (n);
5371             i.memshift = 0;
5372             vex_disp8 = fits_in_disp8 (n);
5373             if (evex_disp8 != vex_disp8)
5374               {
5375                 i.memshift = memshift;
5376                 return;
5377               }
5378
5379             i.types[j].bitfield.disp8 = vex_disp8;
5380             break;
5381           }
5382       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
5383           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
5384         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
5385       i.tm.opcode_modifier.vex
5386         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
5387       i.tm.opcode_modifier.vexw = VEXW0;
5388       /* VPAND, VPOR, and VPXOR are commutative.  */
5389       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
5390         i.tm.opcode_modifier.commutative = 1;
5391       i.tm.opcode_modifier.evex = 0;
5392       i.tm.opcode_modifier.masking = 0;
5393       i.tm.opcode_modifier.broadcast = 0;
5394       i.tm.opcode_modifier.disp8memshift = 0;
5395       i.memshift = 0;
5396       if (j < i.operands)
5397         i.types[j].bitfield.disp8
5398           = fits_in_disp8 (i.op[j].disps->X_add_number);
5399     }
5400   else if (optimize_for_space
5401            && i.tm.base_opcode == 0x29
5402            && i.tm.opcode_space == SPACE_0F38
5403            && i.operands == i.reg_operands
5404            && i.op[0].regs == i.op[1].regs
5405            && (!i.tm.opcode_modifier.vex
5406                || !(i.op[0].regs->reg_flags & RegRex))
5407            && !i.tm.opcode_modifier.evex)
5408     {
5409       /* Optimize: -Os:
5410          pcmpeqq %xmmN, %xmmN          -> pcmpeqd %xmmN, %xmmN
5411          vpcmpeqq %xmmN, %xmmN, %xmmM  -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
5412          vpcmpeqq %ymmN, %ymmN, %ymmM  -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
5413        */
5414       i.tm.opcode_space = SPACE_0F;
5415       i.tm.base_opcode = 0x76;
5416     }
5417   else if (((i.tm.base_opcode >= 0x64
5418              && i.tm.base_opcode <= 0x66
5419              && i.tm.opcode_space == SPACE_0F)
5420             || (i.tm.base_opcode == 0x37
5421                 && i.tm.opcode_space == SPACE_0F38))
5422            && i.operands == i.reg_operands
5423            && i.op[0].regs == i.op[1].regs
5424            && !i.tm.opcode_modifier.evex)
5425     {
5426       /* Optimize: -O:
5427          pcmpgt[bwd] %mmN, %mmN             -> pxor %mmN, %mmN
5428          pcmpgt[bwdq] %xmmN, %xmmN          -> pxor %xmmN, %xmmN
5429          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
5430          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
5431          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
5432          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
5433        */
5434       i.tm.opcode_space = SPACE_0F;
5435       i.tm.base_opcode = 0xef;
5436       if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
5437         {
5438           if (i.operands == 2)
5439             {
5440               gas_assert (i.tm.opcode_modifier.sse2avx);
5441
5442               i.operands = 3;
5443               i.reg_operands = 3;
5444               i.tm.operands = 3;
5445
5446               copy_operand (2, 0);
5447
5448               i.tm.opcode_modifier.sse2avx = 0;
5449             }
5450           i.op[0].regs -= i.op[0].regs->reg_num + 8;
5451           i.op[1].regs = i.op[0].regs;
5452         }
5453     }
5454   else if (i.tm.extension_opcode == 6
5455            && i.tm.base_opcode >= 0x71
5456            && i.tm.base_opcode <= 0x73
5457            && i.tm.opcode_space == SPACE_0F
5458            && i.op[0].imms->X_op == O_constant
5459            && i.op[0].imms->X_add_number == 1
5460            && !i.mem_operands)
5461     {
5462       /* Optimize: -O:
5463            psllw $1, %mmxN          -> paddw %mmxN, %mmxN
5464            psllw $1, %xmmN          -> paddw %xmmN, %xmmN
5465            vpsllw $1, %xmmN, %xmmM  -> vpaddw %xmmN, %xmmN, %xmmM
5466            vpsllw $1, %ymmN, %ymmM  -> vpaddw %ymmN, %ymmN, %ymmM
5467            vpsllw $1, %zmmN, %zmmM  -> vpaddw %zmmN, %zmmN, %zmmM
5468
5469            pslld $1, %mmxN          -> paddd %mmxN, %mmxN
5470            pslld $1, %xmmN          -> paddd %xmmN, %xmmN
5471            vpslld $1, %xmmN, %xmmM  -> vpaddd %xmmN, %xmmN, %xmmM
5472            vpslld $1, %ymmN, %ymmM  -> vpaddd %ymmN, %ymmN, %ymmM
5473            vpslld $1, %zmmN, %zmmM  -> vpaddd %zmmN, %zmmN, %zmmM
5474
5475            psllq $1, %xmmN          -> paddq %xmmN, %xmmN
5476            vpsllq $1, %xmmN, %xmmM  -> vpaddq %xmmN, %xmmN, %xmmM
5477            vpsllq $1, %ymmN, %ymmM  -> vpaddq %ymmN, %ymmN, %ymmM
5478            vpsllq $1, %zmmN, %zmmM  -> vpaddq %zmmN, %zmmN, %zmmM
5479           */
5480       if (i.tm.base_opcode != 0x73)
5481         i.tm.base_opcode |= 0xfc; /* {,v}padd{w,d} */
5482       else
5483         {
5484           gas_assert (i.tm.operand_types[1].bitfield.class != RegMMX);
5485           i.tm.base_opcode = 0xd4; /* {,v}paddq */
5486         }
5487       i.tm.extension_opcode = None;
5488       if (i.tm.opcode_modifier.vexvvvv)
5489         i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
5490       copy_operand (0, 1);
5491       i.reg_operands++;
5492       i.imm_operands = 0;
5493     }
5494   else if (optimize_for_space
5495            && i.tm.base_opcode == 0x59
5496            && i.tm.opcode_space == SPACE_0F38
5497            && i.operands == i.reg_operands
5498            && i.tm.opcode_modifier.vex
5499            && !(i.op[0].regs->reg_flags & RegRex)
5500            && i.op[0].regs->reg_type.bitfield.xmmword
5501            && pp.encoding != encoding_vex3)
5502     {
5503       /* Optimize: -Os:
5504          vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
5505        */
5506       i.tm.opcode_space = SPACE_0F;
5507       i.tm.base_opcode = 0x6c;
5508       i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
5509
5510       ++i.operands;
5511       ++i.reg_operands;
5512       ++i.tm.operands;
5513
5514       copy_operand (2, 0);
5515       swap_2_operands (1, 2);
5516     }
5517   else if (i.tm.base_opcode == 0x16
5518            && i.tm.opcode_space == SPACE_0F3A
5519            && i.op[0].imms->X_op == O_constant
5520            && i.op[0].imms->X_add_number == 0)
5521     {
5522       /* Optimize: -O:
5523          pextrd $0, %xmmN, ...   -> movd %xmmN, ...
5524          pextrq $0, %xmmN, ...   -> movq %xmmN, ...
5525          vpextrd $0, %xmmN, ...  -> vmovd %xmmN, ...
5526          vpextrq $0, %xmmN, ...  -> vmovq %xmmN, ...
5527        */
5528       i.tm.opcode_space = SPACE_0F;
5529       if (!i.mem_operands
5530           || i.tm.opcode_modifier.evex
5531           || (i.tm.opcode_modifier.vexw != VEXW1
5532               && i.tm.opcode_modifier.size != SIZE64))
5533         i.tm.base_opcode = 0x7e;
5534       else
5535         {
5536           i.tm.base_opcode = 0xd6;
5537           i.tm.opcode_modifier.size = 0;
5538           i.tm.opcode_modifier.vexw
5539             = i.tm.opcode_modifier.sse2avx ? VEXW0 : VEXWIG;
5540         }
5541
5542       copy_operand (0, 1);
5543       copy_operand (1, 2);
5544
5545       i.operands = 2;
5546       i.imm_operands = 0;
5547     }
5548   else if (i.tm.base_opcode == 0x17
5549            && i.tm.opcode_space == SPACE_0F3A
5550            && i.op[0].imms->X_op == O_constant
5551            && i.op[0].imms->X_add_number == 0)
5552     {
5553       /* Optimize: -O:
5554          extractps $0, %xmmN, %rM   -> movd %xmmN, %rM
5555          extractps $0, %xmmN, mem   -> movss %xmmN, mem
5556          vextractps $0, %xmmN, %rM  -> vmovd %xmmN, %rM
5557          vextractps $0, %xmmN, mem  -> vmovss %xmmN, mem
5558        */
5559       i.tm.opcode_space = SPACE_0F;
5560       i.tm.opcode_modifier.vexw = VEXW0;
5561
5562       if (!i.mem_operands)
5563         i.tm.base_opcode = 0x7e;
5564       else
5565         {
5566           i.tm.base_opcode = 0x11;
5567           i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
5568         }
5569
5570       copy_operand (0, 1);
5571       copy_operand (1, 2);
5572
5573       i.operands = 2;
5574       i.imm_operands = 0;
5575     }
5576   else if ((i.tm.base_opcode | 0x22) == 0x3b
5577            && i.tm.opcode_space == SPACE_0F3A
5578            && i.op[0].imms->X_op == O_constant
5579            && i.op[0].imms->X_add_number == 0)
5580     {
5581       /* Optimize: -O:
5582          vextractf128 $0, %ymmN, %xmmM      -> vmovaps %xmmN, %xmmM
5583          vextractf128 $0, %ymmN, mem        -> vmovups %xmmN, mem
5584          vextractf32x4 $0, %[yz]mmN, %xmmM  -> vmovaps %xmmN, %xmmM
5585          vextractf32x4 $0, %[yz]mmN, mem    -> vmovups %xmmN, mem
5586          vextractf64x2 $0, %[yz]mmN, %xmmM  -> vmovapd %xmmN, %xmmM
5587          vextractf64x2 $0, %[yz]mmN, mem    -> vmovupd %xmmN, mem
5588          vextractf32x8 $0, %zmmN, %ymmM     -> vmovaps %ymmN, %ymmM
5589          vextractf32x8 $0, %zmmN, mem       -> vmovups %ymmN, mem
5590          vextractf64x4 $0, %zmmN, %ymmM     -> vmovapd %ymmN, %ymmM
5591          vextractf64x4 $0, %zmmN, mem       -> vmovupd %ymmN, mem
5592          vextracti128 $0, %ymmN, %xmmM      -> vmovdqa %xmmN, %xmmM
5593          vextracti128 $0, %ymmN, mem        -> vmovdqu %xmmN, mem
5594          vextracti32x4 $0, %[yz]mmN, %xmmM  -> vmovdqa{,32} %xmmN, %xmmM
5595          vextracti32x4 $0, %[yz]mmN, mem    -> vmovdqu{,32} %xmmN, mem
5596          vextracti64x2 $0, %[yz]mmN, %xmmM  -> vmovdqa{,64} %xmmN, %xmmM
5597          vextracti64x2 $0, %[yz]mmN, mem    -> vmovdqu{,64} %xmmN, mem
5598          vextracti32x8 $0, %zmmN, %ymmM     -> vmovdqa{,32} %ymmN, %ymmM
5599          vextracti32x8 $0, %zmmN, mem       -> vmovdqu{,32} %ymmN, mem
5600          vextracti64x4 $0, %zmmN, %ymmM     -> vmovdqa{,64} %ymmN, %ymmM
5601          vextracti64x4 $0, %zmmN, mem       -> vmovdqu{,64} %ymmN, mem
5602        */
5603       i.tm.opcode_space = SPACE_0F;
5604
5605       if (!i.mask.reg
5606           && (pp.encoding <= encoding_vex3
5607               || (pp.encoding == encoding_evex512
5608                   && (!i.base_reg || !(i.base_reg->reg_flags & RegRex2))
5609                   && (!i.index_reg || !(i.index_reg->reg_flags & RegRex2)))))
5610         {
5611           i.tm.opcode_modifier.vex = i.tm.base_opcode & 2 ? VEX256 : VEX128;
5612           i.tm.opcode_modifier.evex = 0;
5613         }
5614       else
5615         i.tm.opcode_modifier.evex = i.tm.base_opcode & 2 ? EVEX256 : EVEX128;
5616
5617       if (i.tm.base_opcode & 0x20)
5618         {
5619           i.tm.base_opcode = 0x7f;
5620           if (i.reg_operands != 2)
5621             i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
5622         }
5623       else
5624         {
5625           if (i.reg_operands == 2)
5626             i.tm.base_opcode = 0x29;
5627           else
5628             i.tm.base_opcode = 0x11;
5629           if (i.tm.opcode_modifier.vexw != VEXW1)
5630             i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
5631         }
5632
5633       if (i.tm.opcode_modifier.vex)
5634         i.tm.opcode_modifier.vexw = VEXWIG;
5635
5636       copy_operand (0, 1);
5637       copy_operand (1, 2);
5638
5639       i.operands = 2;
5640       i.imm_operands = 0;
5641     }
5642   else if (i.tm.base_opcode == 0x21
5643            && i.tm.opcode_space == SPACE_0F3A
5644            && i.op[0].imms->X_op == O_constant
5645            && (i.operands == i.reg_operands + 1
5646                ? i.op[0].imms->X_add_number == 0
5647                  || (i.op[0].imms->X_add_number & 0xf) == 0xf
5648                : (i.op[0].imms->X_add_number & 0x3f) == 0x0e
5649                   && (i.reg_operands == 1 || i.op[2].regs == i.op[3].regs)))
5650     {
5651       /* Optimize: -O:
5652          insertps $0b....1111, %xmmN, %xmmM          -> xorps %xmmM, %xmmM
5653          insertps $0b00000000, %xmmN, %xmmM          -> movss %xmmN, %xmmM
5654          insertps $0b..001110, mem, %xmmN            -> movss mem, %xmmN
5655          vinsertps $0b....1111, %xmmN, %xmmM, %xmmK  -> vxorps %xmm?, %xmm?, %xmmK
5656          vinsertps $0b00000000, %xmmN, %xmmM, %xmmK  -> vmovss %xmmN, %xmmM, %xmmK
5657          vinsertps $0b..001110, mem, %xmmN, %xmmN    -> vmovss mem, %xmmN
5658        */
5659       i.tm.opcode_space = SPACE_0F;
5660       if ((i.op[0].imms->X_add_number & 0xf) == 0xf)
5661         {
5662           i.tm.base_opcode = 0x57;
5663           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
5664
5665           --i.operands;
5666
5667           copy_operand (i.operands - 1, i.operands);
5668           copy_operand (1, i.operands - 1);
5669           copy_operand (0, 1);
5670
5671           /* Switch from EVEX to VEX encoding if possible.  Sadly we can't
5672              (always) tell use of the {evex} pseudo-prefix (which otherwise
5673              we'd like to respect) from use of %xmm16-%xmm31.  */
5674           if (pp.encoding == encoding_evex)
5675             pp.encoding = encoding_default;
5676           if (i.tm.opcode_modifier.evex
5677               && pp.encoding <= encoding_vex3
5678               && !(i.op[0].regs->reg_flags & RegVRex))
5679             {
5680               i.tm.opcode_modifier.evex = 0;
5681               i.tm.opcode_modifier.vex = VEX128;
5682             }
5683
5684           /* Switch from VEX3 to VEX2 encoding if possible.  */
5685           if (i.tm.opcode_modifier.vex
5686               && pp.encoding <= encoding_vex
5687               && (i.op[0].regs->reg_flags & RegRex))
5688             {
5689               i.op[0].regs -= 8;
5690               i.op[1].regs = i.op[0].regs;
5691             }
5692         }
5693       else
5694         {
5695           i.tm.base_opcode = 0x10;
5696           i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
5697
5698           if (i.op[0].imms->X_add_number == 0)
5699             --i.operands;
5700           else
5701             {
5702               i.operands = 2;
5703               i.tm.opcode_modifier.vexvvvv = 0;
5704             }
5705           copy_operand (0, 1);
5706           copy_operand (1, 2);
5707           copy_operand (2, 3);
5708         }
5709
5710       i.imm_operands = 0;
5711     }
5712 }
5713
5714 /* Check whether the promoted (to address size) register is usable as index
5715    register in ModR/M SIB addressing.  */
5716
5717 static bool is_index (const reg_entry *r)
5718 {
5719   gas_assert (flag_code == CODE_64BIT);
5720
5721   if (r->reg_type.bitfield.byte)
5722     {
5723       if (!(r->reg_flags & (RegRex | RegRex2 | RegRex64)))
5724         {
5725           if (r->reg_num >= 4)
5726             return false;
5727           r += 8;
5728         }
5729       r += 32;
5730     }
5731   if (r->reg_type.bitfield.word)
5732     r += 32;
5733   /* No need to further check .dword here.  */
5734
5735   return r->reg_type.bitfield.baseindex;
5736 }
5737
5738 /* Try to shorten {nf} encodings, by shortening operand size or switching to
5739    functionally identical encodings.  */
5740
5741 static void
5742 optimize_nf_encoding (void)
5743 {
5744   if (i.tm.base_opcode == 0x80
5745       && (i.tm.extension_opcode == 0 || i.tm.extension_opcode == 5)
5746       && i.suffix != BYTE_MNEM_SUFFIX
5747       && !i.types[1].bitfield.byte
5748       && !i.types[2].bitfield.byte
5749       && i.op[0].imms->X_op == O_constant
5750       && i.op[0].imms->X_add_number == 0x80)
5751     {
5752       /* Optimize: -O:
5753            {nf} addw $0x80, ...  -> {nf} subw $-0x80, ...
5754            {nf} addl $0x80, ...  -> {nf} subl $-0x80, ...
5755            {nf} addq $0x80, ...  -> {nf} subq $-0x80, ...
5756
5757            {nf} subw $0x80, ...  -> {nf} addw $-0x80, ...
5758            {nf} subl $0x80, ...  -> {nf} addl $-0x80, ...
5759            {nf} subq $0x80, ...  -> {nf} addq $-0x80, ...
5760        */
5761       i.tm.base_opcode |= 3;
5762       i.tm.extension_opcode ^= 5;
5763       i.tm.opcode_modifier.w = 0;
5764       i.op[0].imms->X_add_number = -i.op[0].imms->X_add_number;
5765
5766       i.tm.operand_types[0].bitfield.imm8 = 0;
5767       i.tm.operand_types[0].bitfield.imm8s = 1;
5768       i.tm.operand_types[0].bitfield.imm16 = 0;
5769       i.tm.operand_types[0].bitfield.imm32 = 0;
5770       i.tm.operand_types[0].bitfield.imm32s = 0;
5771
5772       i.types[0] = i.tm.operand_types[0];
5773     }
5774   else if ((i.tm.base_opcode | 3) == 0x83
5775       && (i.tm.extension_opcode == 0 || i.tm.extension_opcode == 5)
5776       && i.op[0].imms->X_op == O_constant
5777       && (i.op[0].imms->X_add_number == 1
5778           || i.op[0].imms->X_add_number == -1
5779           /* While for wider than byte operations immediates were suitably
5780              adjusted earlier on, 0xff in the byte case needs covering
5781              explicitly.  */
5782           || (i.op[0].imms->X_add_number == 0xff
5783               && (i.suffix == BYTE_MNEM_SUFFIX
5784                   || i.types[i.operands - 1].bitfield.byte))))
5785     {
5786       /* Optimize: -O:
5787            {nf} add $1, ...        -> {nf} inc ...
5788            {nf} add $-1, ...       -> {nf} dec ...
5789            {nf} add $0xf...f, ...  -> {nf} dec ...
5790
5791            {nf} sub $1, ...        -> {nf} dec ...
5792            {nf} sub $-1, ...       -> {nf} inc ...
5793            {nf} sub $0xf...f, ...  -> {nf} inc ...
5794        */
5795       i.tm.base_opcode = 0xfe;
5796       i.tm.extension_opcode
5797         = (i.op[0].imms->X_add_number == 1) != (i.tm.extension_opcode == 0);
5798       i.tm.opcode_modifier.w = 1;
5799
5800       copy_operand (0, 1);
5801       copy_operand (1, 2);
5802
5803       i.imm_operands = 0;
5804       --i.operands;
5805     }
5806   else if (i.tm.base_opcode == 0xc0
5807            && i.op[0].imms->X_op == O_constant
5808            && i.op[0].imms->X_add_number
5809               == (i.types[i.operands - 1].bitfield.byte
5810                   || i.suffix == BYTE_MNEM_SUFFIX
5811                   ? 7 : i.types[i.operands - 1].bitfield.word
5812                         || i.suffix == WORD_MNEM_SUFFIX
5813                         ? 15 : 63 >> (i.types[i.operands - 1].bitfield.dword
5814                                       || i.suffix == LONG_MNEM_SUFFIX)))
5815     {
5816       /* Optimize: -O:
5817            {nf} rol $osz-1, ...   -> {nf} ror $1, ...
5818            {nf} ror $osz-1, ...   -> {nf} rol $1, ...
5819        */
5820       gas_assert (i.tm.extension_opcode <= 1);
5821       i.tm.extension_opcode ^= 1;
5822       i.tm.base_opcode = 0xd0;
5823       i.tm.operand_types[0].bitfield.imm1 = 1;
5824       i.imm_operands = 0;
5825     }
5826   else if ((i.tm.base_opcode | 2) == 0x6b
5827            && i.op[0].imms->X_op == O_constant
5828            && (i.op[0].imms->X_add_number > 0
5829                ? !(i.op[0].imms->X_add_number & (i.op[0].imms->X_add_number - 1))
5830                /* optimize_imm() converts to sign-extended representation where
5831                   possible (and input can also come with these specific numbers).  */
5832                : (i.types[i.operands - 1].bitfield.word
5833                   && i.op[0].imms->X_add_number == -0x8000)
5834                  || (i.types[i.operands - 1].bitfield.dword
5835                      && i.op[0].imms->X_add_number + 1 == -0x7fffffff))
5836            /* 16-bit 3-operand non-ZU forms need leaviong alone, to prevent
5837               zero-extension of the result.  Unless, of course, both non-
5838               immediate operands match (which can be converted to the non-NDD
5839               form).  */
5840            && (i.operands < 3
5841                || !i.types[2].bitfield.word
5842                || i.tm.mnem_off == MN_imulzu
5843                || i.op[2].regs == i.op[1].regs)
5844            /* When merely optimizing for size, exclude cases where we'd convert
5845               from Imm8S to Imm8 encoding, thus not actually reducing size.  */
5846            && (!optimize_for_space
5847                || i.tm.base_opcode == 0x69
5848                || !(i.op[0].imms->X_add_number & 0x7d)))
5849     {
5850       /* Optimize: -O:
5851            {nf} imul   $1<<N, ...   -> {nf} shl $N, ...
5852            {nf} imulzu $1<<N, ...   -> {nf} shl $N, ...
5853        */
5854       if (i.op[0].imms->X_add_number != 2)
5855         {
5856           i.tm.base_opcode = 0xc0;
5857           i.op[0].imms->X_add_number = ffs (i.op[0].imms->X_add_number) - 1;
5858           i.tm.operand_types[0].bitfield.imm8 = 1;
5859           i.tm.operand_types[0].bitfield.imm16 = 0;
5860           i.tm.operand_types[0].bitfield.imm32 = 0;
5861           i.tm.operand_types[0].bitfield.imm32s = 0;
5862         }
5863       else
5864         {
5865           i.tm.base_opcode = 0xd0;
5866           i.tm.operand_types[0].bitfield.imm1 = 1;
5867         }
5868       i.types[0] = i.tm.operand_types[0];
5869       i.tm.extension_opcode = 4;
5870       i.tm.opcode_modifier.w = 1;
5871       i.tm.opcode_modifier.operandconstraint = 0;
5872       if (i.operands == 3)
5873         {
5874           if (i.op[2].regs == i.op[1].regs && i.tm.mnem_off != MN_imulzu)
5875             {
5876               /* Convert to non-NDD form.  This is required for 16-bit insns
5877                  (to prevent zero-extension) and benign for others.  */
5878               i.operands = 2;
5879               i.reg_operands = 1;
5880             }
5881           else
5882             i.tm.opcode_modifier.vexvvvv = VexVVVV_DST;
5883         }
5884       else if (i.tm.mnem_off == MN_imulzu)
5885         {
5886           /* Convert to NDD form, to effect zero-extension of the result.  */
5887           i.tm.opcode_modifier.vexvvvv = VexVVVV_DST;
5888           i.operands = 3;
5889           i.reg_operands = 2;
5890           copy_operand (2, 1);
5891         }
5892     }
5893
5894   if (optimize_for_space
5895       && pp.encoding != encoding_evex
5896       && (i.tm.base_opcode == 0x00
5897           || (i.tm.base_opcode == 0xd0 && i.tm.extension_opcode == 4))
5898       && !i.mem_operands
5899       && !i.types[1].bitfield.byte
5900       /* 16-bit operand size has extra restrictions: If REX2 was needed,
5901          no size reduction would be possible.  Plus 3-operand forms zero-
5902          extend the result, which can't be expressed with LEA.  */
5903       && (!i.types[1].bitfield.word
5904           || (i.operands == 2 && pp.encoding != encoding_egpr))
5905       && is_plausible_suffix (1)
5906       /* %rsp can't be the index.  */
5907       && (is_index (i.op[1].regs)
5908           || (i.imm_operands == 0 && is_index (i.op[0].regs)))
5909       /* While %rbp, %r13, %r21, and %r29 can be made the index in order to
5910          avoid the otherwise necessary Disp8, if the other operand is also
5911          from that set and REX2 would be required to encode the insn, the
5912          resulting encoding would be no smaller than the EVEX one.  */
5913       && (i.op[1].regs->reg_num != 5
5914           || pp.encoding != encoding_egpr
5915           || i.imm_operands > 0
5916           || i.op[0].regs->reg_num != 5))
5917     {
5918       /* Optimize: -Os:
5919            {nf} addw %N, %M    -> leaw (%rM,%rN), %M
5920            {nf} addl %eN, %eM  -> leal (%rM,%rN), %eM
5921            {nf} addq %rN, %rM  -> leaq (%rM,%rN), %rM
5922
5923            {nf} shlw $1, %N   -> leaw (%rN,%rN), %N
5924            {nf} shll $1, %eN  -> leal (%rN,%rN), %eN
5925            {nf} shlq $1, %rN  -> leaq (%rN,%rN), %rN
5926
5927            {nf} addl %eK, %eN, %eM  -> leal (%rN,%rK), %eM
5928            {nf} addq %rK, %rN, %rM  -> leaq (%rN,%rK), %rM
5929
5930            {nf} shll $1, %eN, %eM  -> leal (%rN,%rN), %eM
5931            {nf} shlq $1, %rN, %rM  -> leaq (%rN,%rN), %rM
5932        */
5933       i.tm.opcode_space = SPACE_BASE;
5934       i.tm.base_opcode = 0x8d;
5935       i.tm.extension_opcode = None;
5936       i.tm.opcode_modifier.evex = 0;
5937       i.tm.opcode_modifier.vexvvvv = 0;
5938       if (i.imm_operands != 0)
5939         i.index_reg = i.base_reg = i.op[1].regs;
5940       else if (!is_index (i.op[0].regs)
5941                || (i.op[1].regs->reg_num == 5
5942                    && i.op[0].regs->reg_num != 5))
5943         {
5944           i.base_reg = i.op[0].regs;
5945           i.index_reg = i.op[1].regs;
5946         }
5947       else
5948         {
5949           i.base_reg = i.op[1].regs;
5950           i.index_reg = i.op[0].regs;
5951         }
5952       if (i.types[1].bitfield.word)
5953         {
5954           /* NB: No similar adjustment is needed when operand size is 32-bit.  */
5955           i.base_reg += 64;
5956           i.index_reg += 64;
5957         }
5958       i.op[1].regs = i.op[i.operands - 1].regs;
5959
5960       operand_type_set (&i.types[0], 0);
5961       i.types[0].bitfield.baseindex = 1;
5962       i.tm.operand_types[0] = i.types[0];
5963       i.op[0].disps = NULL;
5964       i.flags[0] = Operand_Mem;
5965
5966       i.operands = 2;
5967       i.mem_operands = i.reg_operands = 1;
5968       i.imm_operands = 0;
5969       pp.has_nf = false;
5970     }
5971   else if (optimize_for_space
5972            && pp.encoding != encoding_evex
5973            && (i.tm.base_opcode == 0x80 || i.tm.base_opcode == 0x83)
5974            && (i.tm.extension_opcode == 0
5975                || (i.tm.extension_opcode == 5
5976                    && i.op[0].imms->X_op == O_constant
5977                    /* Subtraction of -0x80 will end up smaller only if neither
5978                       operand size nor REX/REX2 prefixes are needed.  */
5979                    && (i.op[0].imms->X_add_number != -0x80
5980                        || (i.types[1].bitfield.dword
5981                            && !(i.op[1].regs->reg_flags & RegRex)
5982                            && !(i.op[i.operands - 1].regs->reg_flags & RegRex)
5983                            && pp.encoding != encoding_egpr))))
5984            && !i.mem_operands
5985            && !i.types[1].bitfield.byte
5986            /* 16-bit operand size has extra restrictions: If REX2 was needed,
5987               no size reduction would be possible.  Plus 3-operand forms zero-
5988               extend the result, which can't be expressed with LEA.  */
5989            && (!i.types[1].bitfield.word
5990                || (i.operands == 2 && pp.encoding != encoding_egpr))
5991            && is_plausible_suffix (1))
5992     {
5993       /* Optimize: -Os:
5994            {nf} addw $N, %M   -> leaw N(%rM), %M
5995            {nf} addl $N, %eM  -> leal N(%rM), %eM
5996            {nf} addq $N, %rM  -> leaq N(%rM), %rM
5997
5998            {nf} subw $N, %M   -> leaw -N(%rM), %M
5999            {nf} subl $N, %eM  -> leal -N(%rM), %eM
6000            {nf} subq $N, %rM  -> leaq -N(%rM), %rM
6001
6002            {nf} addl $N, %eK, %eM  -> leal N(%rK), %eM
6003            {nf} addq $N, %rK, %rM  -> leaq N(%rK), %rM
6004
6005            {nf} subl $N, %eK, %eM  -> leal -N(%rK), %eM
6006            {nf} subq $N, %rK, %rM  -> leaq -N(%rK), %rM
6007        */
6008       i.tm.opcode_space = SPACE_BASE;
6009       i.tm.base_opcode = 0x8d;
6010       if (i.tm.extension_opcode == 5)
6011         i.op[0].imms->X_add_number = -i.op[0].imms->X_add_number;
6012       i.tm.extension_opcode = None;
6013       i.tm.opcode_modifier.evex = 0;
6014       i.tm.opcode_modifier.vexvvvv = 0;
6015       i.base_reg = i.op[1].regs;
6016       if (i.types[1].bitfield.word)
6017         {
6018           /* NB: No similar adjustment is needed when operand size is 32-bit.  */
6019           i.base_reg += 64;
6020         }
6021       i.op[1].regs = i.op[i.operands - 1].regs;
6022
6023       operand_type_set (&i.types[0], 0);
6024       i.types[0].bitfield.baseindex = 1;
6025       i.types[0].bitfield.disp32 = 1;
6026       i.op[0].disps = i.op[0].imms;
6027       i.flags[0] = Operand_Mem;
6028       optimize_disp (&i.tm);
6029       i.tm.operand_types[0] = i.types[0];
6030
6031       i.operands = 2;
6032       i.disp_operands = i.mem_operands = i.reg_operands = 1;
6033       i.imm_operands = 0;
6034       pp.has_nf = false;
6035     }
6036   else if (i.tm.base_opcode == 0x6b
6037            && !i.mem_operands
6038            && pp.encoding != encoding_evex
6039            && i.tm.mnem_off != MN_imulzu
6040            && is_plausible_suffix (1)
6041            /* %rsp can't be the index.  */
6042            && is_index (i.op[1].regs)
6043            /* There's no reduction in size for 16-bit forms requiring Disp8 and
6044               REX2.  */
6045            && (!optimize_for_space
6046                || !i.types[1].bitfield.word
6047                || i.op[1].regs->reg_num != 5
6048                || pp.encoding != encoding_egpr)
6049            && i.op[0].imms->X_op == O_constant
6050            && (i.op[0].imms->X_add_number == 3
6051                || i.op[0].imms->X_add_number == 5
6052                || i.op[0].imms->X_add_number == 9))
6053     {
6054       /* Optimize: -O:
6055         For n one of 3, 5, or 9
6056            {nf} imulw $n, %N, %M    -> leaw (%rN,%rN,n-1), %M
6057            {nf} imull $n, %eN, %eM  -> leal (%rN,%rN,n-1), %eM
6058            {nf} imulq $n, %rN, %rM  -> leaq (%rN,%rN,n-1), %rM
6059
6060            {nf} imulw $n, %N   -> leaw (%rN,%rN,s), %N
6061            {nf} imull $n, %eN  -> leal (%rN,%rN,s), %eN
6062            {nf} imulq $n, %rN  -> leaq (%rN,%rN,s), %rN
6063        */
6064       i.tm.opcode_space = SPACE_BASE;
6065       i.tm.base_opcode = 0x8d;
6066       i.tm.extension_opcode = None;
6067       i.tm.opcode_modifier.evex = 0;
6068       i.base_reg = i.op[1].regs;
6069       /* NB: No similar adjustment is needed when operand size is 32 bits.  */
6070       if (i.types[1].bitfield.word)
6071         i.base_reg += 64;
6072       i.index_reg = i.base_reg;
6073       i.log2_scale_factor = i.op[0].imms->X_add_number == 9
6074                             ? 3 : i.op[0].imms->X_add_number >> 1;
6075
6076       operand_type_set (&i.types[0], 0);
6077       i.types[0].bitfield.baseindex = 1;
6078       i.tm.operand_types[0] = i.types[0];
6079       i.op[0].disps = NULL;
6080       i.flags[0] = Operand_Mem;
6081
6082       copy_operand (1, i.operands - 1);
6083
6084       i.operands = 2;
6085       i.mem_operands = i.reg_operands = 1;
6086       i.imm_operands = 0;
6087       pp.has_nf = false;
6088     }
6089   else if (cpu_arch_isa_flags.bitfield.cpubmi2
6090            && pp.encoding == encoding_default
6091            && (i.operands > 2 || !i.mem_operands)
6092            && (i.types[i.operands - 1].bitfield.dword
6093                || i.types[i.operands - 1].bitfield.qword))
6094     {
6095       if (i.tm.base_opcode == 0xd2)
6096         {
6097           /* Optimize: -O:
6098                <OP> one of sal, sar, shl, shr:
6099                {nf} <OP> %cl, %rN       -> <OP>x %{e,r}cx, %rN, %rN (N < 16)
6100                {nf} <OP> %cl, ..., %rN  -> <OP>x %{e,r}cx, ..., %rN (no eGPR used)
6101            */
6102           gas_assert (i.tm.extension_opcode & 4);
6103           i.tm.operand_types[0] = i.tm.operand_types[i.operands - 1];
6104           /* NB: i.op[0].regs specifying %cl is good enough.  */
6105           i.types[0] = i.types[i.operands - 1];
6106           if (i.operands == 2)
6107             {
6108               i.tm.operand_types[0].bitfield.baseindex = 0;
6109               i.tm.operand_types[2] = i.tm.operand_types[0];
6110               i.op[2].regs = i.op[1].regs;
6111               i.types[2] = i.types[1];
6112               i.reg_operands = i.operands = 3;
6113             }
6114           pp.has_nf = false;
6115           i.tm.opcode_modifier.w = 0;
6116           i.tm.opcode_modifier.evex = 0;
6117           i.tm.opcode_modifier.vex = VEX128;
6118           i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC2;
6119           i.tm.opcode_space = SPACE_0F38;
6120           i.tm.base_opcode = 0xf7;
6121           i.tm.opcode_modifier.opcodeprefix
6122             = !(i.tm.extension_opcode & 1)
6123               ? PREFIX_0X66 /* shlx */
6124               : i.tm.extension_opcode & 2
6125                 ? PREFIX_0XF3 /* sarx */
6126                 : PREFIX_0XF2 /* shrx */;
6127           i.tm.extension_opcode = None;
6128         }
6129       else if (i.tm.base_opcode == 0xc0
6130                && i.tm.extension_opcode <= 1
6131                && i.op[0].imms->X_op == O_constant)
6132         {
6133           /* Optimize: -O:
6134                {nf} rol $I, %rN       -> rorx $osz-I, %rN, %rN (I != osz-1, N < 16)
6135                {nf} rol $I, ..., %rN  -> rorx $osz-I, ..., %rN (I != osz-1, no eGPR used)
6136                {nf} ror $I, %rN       -> rorx $I, %rN, %rN (I != 1, N < 16)
6137                {nf} ror $I, ..., %rN  -> rorx $I,..., %rN (I != 1, no eGPR used)
6138              NB: rol -> ror transformation for I == osz-1 was already handled above.
6139              NB2: ror with an immediate of 1 uses a different base opcode.
6140            */
6141           if (i.operands == 2)
6142             {
6143               copy_operand (2, 1);
6144               i.tm.operand_types[2].bitfield.baseindex = 0;
6145               i.reg_operands = 2;
6146               i.operands = 3;
6147             }
6148           pp.has_nf = false;
6149           i.tm.opcode_modifier.w = 0;
6150           i.tm.opcode_modifier.evex = 0;
6151           i.tm.opcode_modifier.vex = VEX128;
6152           i.tm.opcode_modifier.vexvvvv = 0;
6153           i.tm.opcode_space = SPACE_0F3A;
6154           i.tm.base_opcode = 0xf0;
6155           i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
6156           if (!i.tm.extension_opcode)
6157             i.op[0].imms->X_add_number =
6158               (i.types[i.operands - 1].bitfield.byte
6159                ? 8 : i.types[i.operands - 1].bitfield.word
6160                      ? 16 : 64 >> i.types[i.operands - 1].bitfield.dword)
6161               - i.op[0].imms->X_add_number;
6162           i.tm.extension_opcode = None;
6163         }
6164       else if (i.tm.base_opcode == 0xf6
6165                && i.tm.extension_opcode == 4
6166                && !i.mem_operands
6167                && i.op[0].regs->reg_num == 2
6168                && !(i.op[0].regs->reg_flags & RegRex) )
6169         {
6170           /* Optimize: -O:
6171                {nf} mul %edx  -> mulx %eax, %eax, %edx
6172                {nf} mul %rdx  -> mulx %rax, %rax, %rdx
6173            */
6174           i.tm.operand_types[1] = i.tm.operand_types[0];
6175           i.tm.operand_types[1].bitfield.baseindex = 0;
6176           i.tm.operand_types[2] = i.tm.operand_types[1];
6177           i.op[2].regs = i.op[0].regs;
6178           /* NB: %eax is good enough also for 64-bit operand size.  */
6179           i.op[1].regs = i.op[0].regs = reg_eax;
6180           i.types[2] = i.types[1] = i.types[0];
6181           i.reg_operands = i.operands = 3;
6182
6183           pp.has_nf = false;
6184           i.tm.opcode_modifier.w = 0;
6185           i.tm.opcode_modifier.evex = 0;
6186           i.tm.opcode_modifier.vex = VEX128;
6187           i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
6188           i.tm.opcode_space = SPACE_0F38;
6189           i.tm.base_opcode = 0xf6;
6190           i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
6191           i.tm.extension_opcode = None;
6192         }
6193     }
6194 }
6195
6196 static void
6197 s_noopt (int dummy ATTRIBUTE_UNUSED)
6198 {
6199   if (!is_it_end_of_statement ())
6200     as_warn (_("`.noopt' arguments ignored"));
6201
6202   optimize = 0;
6203   optimize_for_space = 0;
6204
6205   ignore_rest_of_line ();
6206 }
6207
6208 /* Return non-zero for load instruction.  */
6209
6210 static int
6211 load_insn_p (void)
6212 {
6213   unsigned int dest;
6214   int any_vex_p = is_any_vex_encoding (&i.tm);
6215   unsigned int base_opcode = i.tm.base_opcode | 1;
6216
6217   if (!any_vex_p)
6218     {
6219       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
6220          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
6221       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
6222         return 0;
6223
6224       /* pop.   */
6225       if (i.tm.mnem_off == MN_pop)
6226         return 1;
6227     }
6228
6229   if (i.tm.opcode_space == SPACE_BASE)
6230     {
6231       /* popf, popa.   */
6232       if (i.tm.base_opcode == 0x9d
6233           || i.tm.base_opcode == 0x61)
6234         return 1;
6235
6236       /* movs, cmps, lods, scas.  */
6237       if ((i.tm.base_opcode | 0xb) == 0xaf)
6238         return 1;
6239
6240       /* outs, xlatb.  */
6241       if (base_opcode == 0x6f
6242           || i.tm.base_opcode == 0xd7)
6243         return 1;
6244       /* NB: For AMD-specific insns with implicit memory operands,
6245          they're intentionally not covered.  */
6246     }
6247
6248   /* No memory operand.  */
6249   if (!i.mem_operands)
6250     return 0;
6251
6252   if (any_vex_p)
6253     {
6254       if (i.tm.mnem_off == MN_vldmxcsr)
6255         return 1;
6256     }
6257   else if (i.tm.opcode_space == SPACE_BASE)
6258     {
6259       /* test, not, neg, mul, imul, div, idiv.  */
6260       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
6261         return 1;
6262
6263       /* inc, dec.  */
6264       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
6265         return 1;
6266
6267       /* add, or, adc, sbb, and, sub, xor, cmp.  */
6268       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
6269         return 1;
6270
6271       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
6272       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
6273           && i.tm.extension_opcode != 6)
6274         return 1;
6275
6276       /* Check for x87 instructions.  */
6277       if ((base_opcode | 6) == 0xdf)
6278         {
6279           /* Skip fst, fstp, fstenv, fstcw.  */
6280           if (i.tm.base_opcode == 0xd9
6281               && (i.tm.extension_opcode == 2
6282                   || i.tm.extension_opcode == 3
6283                   || i.tm.extension_opcode == 6
6284                   || i.tm.extension_opcode == 7))
6285             return 0;
6286
6287           /* Skip fisttp, fist, fistp, fstp.  */
6288           if (i.tm.base_opcode == 0xdb
6289               && (i.tm.extension_opcode == 1
6290                   || i.tm.extension_opcode == 2
6291                   || i.tm.extension_opcode == 3
6292                   || i.tm.extension_opcode == 7))
6293             return 0;
6294
6295           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
6296           if (i.tm.base_opcode == 0xdd
6297               && (i.tm.extension_opcode == 1
6298                   || i.tm.extension_opcode == 2
6299                   || i.tm.extension_opcode == 3
6300                   || i.tm.extension_opcode == 6
6301                   || i.tm.extension_opcode == 7))
6302             return 0;
6303
6304           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
6305           if (i.tm.base_opcode == 0xdf
6306               && (i.tm.extension_opcode == 1
6307                   || i.tm.extension_opcode == 2
6308                   || i.tm.extension_opcode == 3
6309                   || i.tm.extension_opcode == 6
6310                   || i.tm.extension_opcode == 7))
6311             return 0;
6312
6313           return 1;
6314         }
6315     }
6316   else if (i.tm.opcode_space == SPACE_0F)
6317     {
6318       /* bt, bts, btr, btc.  */
6319       if (i.tm.base_opcode == 0xba
6320           && (i.tm.extension_opcode | 3) == 7)
6321         return 1;
6322
6323       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
6324       if (i.tm.base_opcode == 0xc7
6325           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
6326           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
6327               || i.tm.extension_opcode == 6))
6328         return 1;
6329
6330       /* fxrstor, ldmxcsr, xrstor.  */
6331       if (i.tm.base_opcode == 0xae
6332           && (i.tm.extension_opcode == 1
6333               || i.tm.extension_opcode == 2
6334               || i.tm.extension_opcode == 5))
6335         return 1;
6336
6337       /* lgdt, lidt, lmsw.  */
6338       if (i.tm.base_opcode == 0x01
6339           && (i.tm.extension_opcode == 2
6340               || i.tm.extension_opcode == 3
6341               || i.tm.extension_opcode == 6))
6342         return 1;
6343     }
6344
6345   dest = i.operands - 1;
6346
6347   /* Check fake imm8 operand and 3 source operands.  */
6348   if ((i.tm.opcode_modifier.immext
6349        || i.reg_operands + i.mem_operands == 4)
6350       && i.types[dest].bitfield.imm8)
6351     dest--;
6352
6353   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
6354   if (i.tm.opcode_space == SPACE_BASE
6355       && ((base_opcode | 0x38) == 0x39
6356           || (base_opcode | 2) == 0x87))
6357     return 1;
6358
6359   if (i.tm.mnem_off == MN_xadd)
6360     return 1;
6361
6362   /* Check for load instruction.  */
6363   return (i.types[dest].bitfield.class != ClassNone
6364           || i.types[dest].bitfield.instance == Accum);
6365 }
6366
6367 /* Output lfence, 0xfaee8, after instruction.  */
6368
6369 static void
6370 insert_lfence_after (void)
6371 {
6372   if (lfence_after_load && load_insn_p ())
6373     {
6374       /* There are also two REP string instructions that require
6375          special treatment. Specifically, the compare string (CMPS)
6376          and scan string (SCAS) instructions set EFLAGS in a manner
6377          that depends on the data being compared/scanned. When used
6378          with a REP prefix, the number of iterations may therefore
6379          vary depending on this data. If the data is a program secret
6380          chosen by the adversary using an LVI method,
6381          then this data-dependent behavior may leak some aspect
6382          of the secret.  */
6383       if (((i.tm.base_opcode | 0x9) == 0xaf)
6384           && i.prefix[REP_PREFIX])
6385         {
6386             as_warn (_("`%s` changes flags which would affect control flow behavior"),
6387                      insn_name (&i.tm));
6388         }
6389       char *p = frag_more (3);
6390       *p++ = 0xf;
6391       *p++ = 0xae;
6392       *p = 0xe8;
6393     }
6394 }
6395
6396 /* Output lfence, 0xfaee8, before instruction.  */
6397
6398 static void
6399 insert_lfence_before (const struct last_insn *last_insn)
6400 {
6401   char *p;
6402
6403   if (i.tm.opcode_space != SPACE_BASE)
6404     return;
6405
6406   if (i.tm.base_opcode == 0xff
6407       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
6408     {
6409       /* Insert lfence before indirect branch if needed.  */
6410
6411       if (lfence_before_indirect_branch == lfence_branch_none)
6412         return;
6413
6414       if (i.operands != 1)
6415         abort ();
6416
6417       if (i.reg_operands == 1)
6418         {
6419           /* Indirect branch via register.  Don't insert lfence with
6420              -mlfence-after-load=yes.  */
6421           if (lfence_after_load
6422               || lfence_before_indirect_branch == lfence_branch_memory)
6423             return;
6424         }
6425       else if (i.mem_operands == 1
6426                && lfence_before_indirect_branch != lfence_branch_register)
6427         {
6428           as_warn (_("indirect `%s` with memory operand should be avoided"),
6429                    insn_name (&i.tm));
6430           return;
6431         }
6432       else
6433         return;
6434
6435       if (last_insn->kind != last_insn_other)
6436         {
6437           as_warn_where (last_insn->file, last_insn->line,
6438                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
6439                          last_insn->name, insn_name (&i.tm));
6440           return;
6441         }
6442
6443       p = frag_more (3);
6444       *p++ = 0xf;
6445       *p++ = 0xae;
6446       *p = 0xe8;
6447       return;
6448     }
6449
6450   /* Output or/not/shl and lfence before near ret.  */
6451   if (lfence_before_ret != lfence_before_ret_none
6452       && (i.tm.base_opcode | 1) == 0xc3)
6453     {
6454       if (last_insn->kind != last_insn_other)
6455         {
6456           as_warn_where (last_insn->file, last_insn->line,
6457                          _("`%s` skips -mlfence-before-ret on `%s`"),
6458                          last_insn->name, insn_name (&i.tm));
6459           return;
6460         }
6461
6462       /* Near ret ingore operand size override under CPU64.  */
6463       char prefix = flag_code == CODE_64BIT
6464                     ? 0x48
6465                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
6466
6467       if (lfence_before_ret == lfence_before_ret_not)
6468         {
6469           /* not: 0xf71424, may add prefix
6470              for operand size override or 64-bit code.  */
6471           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
6472           if (prefix)
6473             *p++ = prefix;
6474           *p++ = 0xf7;
6475           *p++ = 0x14;
6476           *p++ = 0x24;
6477           if (prefix)
6478             *p++ = prefix;
6479           *p++ = 0xf7;
6480           *p++ = 0x14;
6481           *p++ = 0x24;
6482         }
6483       else
6484         {
6485           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
6486           if (prefix)
6487             *p++ = prefix;
6488           if (lfence_before_ret == lfence_before_ret_or)
6489             {
6490               /* or: 0x830c2400, may add prefix
6491                  for operand size override or 64-bit code.  */
6492               *p++ = 0x83;
6493               *p++ = 0x0c;
6494             }
6495           else
6496             {
6497               /* shl: 0xc1242400, may add prefix
6498                  for operand size override or 64-bit code.  */
6499               *p++ = 0xc1;
6500               *p++ = 0x24;
6501             }
6502
6503           *p++ = 0x24;
6504           *p++ = 0x0;
6505         }
6506
6507       *p++ = 0xf;
6508       *p++ = 0xae;
6509       *p = 0xe8;
6510     }
6511 }
6512
6513 /* Shared helper for md_assemble() and s_insn().  */
6514 static void init_globals (void)
6515 {
6516   unsigned int j;
6517
6518   memset (&i, '\0', sizeof (i));
6519   i.rounding.type = rc_none;
6520   for (j = 0; j < MAX_OPERANDS; j++)
6521     i.reloc[j] = NO_RELOC;
6522   memset (disp_expressions, '\0', sizeof (disp_expressions));
6523   memset (im_expressions, '\0', sizeof (im_expressions));
6524   save_stack_p = save_stack;
6525 }
6526
6527 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
6528    parsing pass. Instead of introducing a rarely used new insn attribute this
6529    utilizes a common pattern between affected templates. It is deemed
6530    acceptable that this will lead to unnecessary pass 2 preparations in a
6531    limited set of cases.  */
6532 static INLINE bool may_need_pass2 (const insn_template *t)
6533 {
6534   return t->opcode_modifier.sse2avx
6535          /* Note that all SSE2AVX templates have at least one operand.  */
6536          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
6537          : (t->opcode_space == SPACE_0F
6538             && (t->base_opcode | 1) == 0xbf)
6539            || (t->opcode_space == SPACE_BASE
6540                && t->base_opcode == 0x63)
6541            || (intel_syntax /* shld / shrd may mean suffixed shl / shr.  */
6542                && t->opcode_space == SPACE_MAP4
6543                && (t->base_opcode | 8) == 0x2c);
6544 }
6545
6546 #ifdef OBJ_ELF
6547 static enum x86_tls_error_type
6548 x86_check_tls_relocation (enum bfd_reloc_code_real r_type)
6549 {
6550   switch (r_type)
6551     {
6552     case BFD_RELOC_386_TLS_GOTDESC:
6553       /* Check GDesc access model:
6554
6555          leal x@tlsdesc(%ebx), %reg32 --> Memory reg must be %ebx and
6556                                           SIB is not supported.
6557        */
6558       if (i.tm.mnem_off != MN_lea)
6559         return x86_tls_error_insn;
6560       if (i.index_reg)
6561         return x86_tls_error_sib;
6562       if (!i.base_reg)
6563         return x86_tls_error_no_base_reg;
6564       if (i.base_reg->reg_type.bitfield.instance != RegB)
6565         return x86_tls_error_ebx;
6566       if (!i.op[1].regs->reg_type.bitfield.dword)
6567         return x86_tls_error_dest_32bit_reg_size;
6568       break;
6569
6570     case BFD_RELOC_386_TLS_GD:
6571       /* Check GD access model:
6572
6573          leal foo@tlsgd(,%ebx,1), %eax   --> Only this fixed format is supported.
6574          leal foo@tlsgd(%reg32), %eax    --> Dest reg must be '%eax'
6575                                              Memory reg can't be %eax.
6576        */
6577       if (i.tm.mnem_off != MN_lea)
6578         return x86_tls_error_insn;
6579       if (i.op[1].regs->reg_type.bitfield.instance != Accum)
6580         return x86_tls_error_dest_eax;
6581       if (!i.op[1].regs->reg_type.bitfield.dword)
6582         return x86_tls_error_dest_32bit_reg_size;
6583       if (i.index_reg)
6584         {
6585           if (i.base_reg)
6586             return x86_tls_error_base_reg;
6587           if (i.index_reg->reg_type.bitfield.instance != RegB)
6588             return x86_tls_error_index_ebx;
6589           if (i.log2_scale_factor)
6590             return x86_tls_error_scale_factor;
6591         }
6592       else
6593         {
6594           if (!i.base_reg)
6595             return x86_tls_error_no_base_reg;
6596           if (i.base_reg->reg_type.bitfield.instance == Accum)
6597             return x86_tls_error_eax;
6598         }
6599       break;
6600
6601     case BFD_RELOC_386_TLS_LDM:
6602       /*  Check LDM access model:
6603
6604           leal foo@tlsldm(%reg32), %eax --> Dest reg must be '%eax'
6605                                             Memory reg can't be %eax and SIB
6606                                             is not supported.
6607        */
6608       if (i.tm.mnem_off != MN_lea)
6609         return x86_tls_error_insn;
6610       if (i.index_reg)
6611         return x86_tls_error_sib;
6612       if (!i.base_reg)
6613         return x86_tls_error_no_base_reg;
6614       if (i.base_reg->reg_type.bitfield.instance == Accum)
6615         return x86_tls_error_eax;
6616       if (i.op[1].regs->reg_type.bitfield.instance != Accum)
6617         return x86_tls_error_dest_eax;
6618       if (!i.op[1].regs->reg_type.bitfield.dword)
6619         return x86_tls_error_dest_32bit_reg_size;
6620       break;
6621
6622     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
6623       /* Check GOTPC32 TLSDESC access model:
6624
6625          --- LP64 mode ---
6626          leaq x@tlsdesc(%rip), %reg64 --> Memory reg must be %rip.
6627
6628          --- X32 mode ---
6629          rex/rex2 leal x@tlsdesc(%rip), %reg32 --> Memory reg must be %rip.
6630
6631          In X32 mode, gas will add rex/rex2 for it later, no need to check
6632          here.
6633        */
6634       if (i.tm.mnem_off != MN_lea)
6635         return x86_tls_error_insn;
6636       if (!i.base_reg)
6637         return x86_tls_error_no_base_reg;
6638       if (i.base_reg->reg_num != RegIP
6639           || !i.base_reg->reg_type.bitfield.qword)
6640         return x86_tls_error_rip;
6641       if (x86_elf_abi == X86_64_ABI)
6642         {
6643           if (!i.op[1].regs->reg_type.bitfield.qword)
6644             return x86_tls_error_dest_64bit_reg_size;
6645         }
6646       else if (!i.op[1].regs->reg_type.bitfield.dword
6647                && !i.op[1].regs->reg_type.bitfield.qword)
6648         return x86_tls_error_dest_32bit_or_64bit_reg_size;
6649           break;
6650
6651     case BFD_RELOC_X86_64_TLSGD:
6652       /* Check GD access model:
6653
6654          leaq foo@tlsgd(%rip), %rdi --> Only this fixed format is supported.
6655        */
6656     case BFD_RELOC_X86_64_TLSLD:
6657       /* Check LD access model:
6658
6659          leaq foo@tlsld(%rip), %rdi --> Only this fixed format is supported.
6660        */
6661       if (i.tm.mnem_off != MN_lea)
6662         return x86_tls_error_insn;
6663       if (!i.base_reg)
6664         return x86_tls_error_no_base_reg;
6665       if (i.base_reg->reg_num != RegIP
6666           || !i.base_reg->reg_type.bitfield.qword)
6667         return x86_tls_error_rip;
6668       if (!i.op[1].regs->reg_type.bitfield.qword
6669           || i.op[1].regs->reg_num != EDI_REG_NUM
6670           || i.op[1].regs->reg_flags)
6671         return x86_tls_error_dest_rdi;
6672       break;
6673
6674     case BFD_RELOC_386_TLS_GOTIE:
6675       /* Check GOTIE access model:
6676
6677          subl foo@gotntpoff(%reg1), %reg2
6678          movl foo@gotntpoff(%reg1), %reg2
6679          addl foo@gotntpoff(%reg1), %reg2
6680
6681          Memory operand: SIB is not supported.
6682        */
6683     case BFD_RELOC_386_TLS_IE_32:
6684       /* Check IE_32 access model:
6685
6686          subl foo@gottpoff(%reg1), %reg2
6687          movl foo@gottpoff(%reg1), %reg2
6688          addl foo@gottpoff(%reg1), %reg2
6689
6690          Memory operand: SIB is not supported.
6691        */
6692       if (i.tm.mnem_off != MN_sub
6693           && i.tm.mnem_off != MN_add
6694           && i.tm.mnem_off != MN_mov)
6695         return x86_tls_error_insn;
6696       if (i.imm_operands
6697           || i.disp_operands != 1
6698           || i.reg_operands != 1
6699           || i.types[1].bitfield.class != Reg)
6700         return x86_tls_error_opcode;
6701       if (!i.base_reg)
6702         return x86_tls_error_no_base_reg;
6703       if (i.index_reg)
6704         return x86_tls_error_sib;
6705       if (!i.base_reg->reg_type.bitfield.dword)
6706         return x86_tls_error_base_reg_size;
6707       if (!i.op[1].regs->reg_type.bitfield.dword)
6708         return x86_tls_error_dest_32bit_reg_size;
6709       break;
6710
6711     case BFD_RELOC_386_TLS_IE:
6712       /* Check IE access model:
6713
6714          movl foo@indntpoff, %reg32 --> Mod == 00 && r/m == 5
6715          addl foo@indntpoff, %reg32 --> Mod == 00 && r/m == 5
6716        */
6717       if (i.tm.mnem_off != MN_add && i.tm.mnem_off != MN_mov)
6718         return x86_tls_error_insn;
6719       if (i.imm_operands
6720           || i.disp_operands != 1
6721           || i.reg_operands != 1
6722           || i.types[1].bitfield.class != Reg)
6723         return x86_tls_error_opcode;
6724       if (i.base_reg || i.index_reg)
6725         return x86_tls_error_require_no_base_index_reg;
6726       if (!i.op[1].regs->reg_type.bitfield.dword)
6727         return x86_tls_error_dest_32bit_reg_size;
6728       break;
6729
6730     case BFD_RELOC_X86_64_GOTTPOFF:
6731       /* Check GOTTPOFF access model:
6732
6733          mov foo@gottpoff(%rip), %reg --> Memory Reg must be %rip.
6734          add foo@gottpoff(%rip), %reg --> Memory Reg must be %rip.
6735          add %reg1, foo@gottpoff(%rip), %reg2 --> Memory Reg must be %rip.
6736          add foo@gottpoff(%rip), %reg1, %reg2 --> Memory Reg must be %rip.
6737        */
6738       if (i.tm.mnem_off != MN_add && i.tm.mnem_off != MN_mov)
6739         return x86_tls_error_insn;
6740       if (i.imm_operands
6741           || i.disp_operands != 1
6742           || i.types[i.operands - 1].bitfield.class != Reg)
6743         return x86_tls_error_opcode;
6744       if (!i.base_reg)
6745         return x86_tls_error_no_base_reg;
6746       if (i.base_reg->reg_num != RegIP
6747           || !i.base_reg->reg_type.bitfield.qword)
6748         return x86_tls_error_rip;
6749       if (x86_elf_abi == X86_64_ABI)
6750         {
6751           if (!i.op[i.operands - 1].regs->reg_type.bitfield.qword)
6752             return x86_tls_error_dest_64bit_reg_size;
6753         }
6754       else if (!i.op[i.operands - 1].regs->reg_type.bitfield.dword
6755                && !i.op[i.operands - 1].regs->reg_type.bitfield.qword)
6756         return x86_tls_error_dest_32bit_or_64bit_reg_size;
6757       break;
6758
6759     case BFD_RELOC_386_TLS_DESC_CALL:
6760       /* Check GDesc access model:
6761
6762          call *x@tlscall(%eax) --> Memory reg must be %eax and
6763                                    SIB is not supported.
6764        */
6765     case BFD_RELOC_X86_64_TLSDESC_CALL:
6766       /* Check GDesc access model:
6767
6768          call *x@tlscall(%rax) <--- LP64 mode.
6769          call *x@tlscall(%eax) <--- X32 mode.
6770
6771          Only these fixed formats are supported.
6772        */
6773       if (i.tm.mnem_off != MN_call)
6774         return x86_tls_error_insn;
6775       if (i.index_reg)
6776         return x86_tls_error_sib;
6777       if (!i.base_reg)
6778         return x86_tls_error_no_base_reg;
6779       if (i.base_reg->reg_type.bitfield.instance != Accum)
6780         return x86_tls_error_RegA;
6781       break;
6782
6783     case BFD_RELOC_NONE:
6784       /* This isn't a relocation.  */
6785       return x86_tls_error_continue;
6786
6787     default:
6788       break;
6789     }
6790
6791   /* This relocation is OK.  */
6792   return x86_tls_error_none;
6793 }
6794
6795 static void
6796 x86_report_tls_error (enum x86_tls_error_type tls_error,
6797                       enum bfd_reloc_code_real r_type)
6798 {
6799   unsigned int k;
6800   for (k = 0; k < ARRAY_SIZE (gotrel); k++)
6801     if (gotrel[k].rel[object_64bit] == r_type)
6802       break;
6803
6804   switch (tls_error)
6805     {
6806     case x86_tls_error_insn:
6807       as_bad (_("@%s operator cannot be used with `%s'"),
6808               gotrel[k].str, insn_name (&i.tm));
6809       return;
6810
6811     case x86_tls_error_opcode:
6812       as_bad (_("@%s operator can be used with `%s', but format is wrong"),
6813               gotrel[k].str, insn_name (&i.tm));
6814       return;
6815
6816     case x86_tls_error_sib:
6817       as_bad (_("@%s operator requires no SIB"), gotrel[k].str);
6818       return;
6819
6820     case x86_tls_error_no_base_reg:
6821       as_bad (_("@%s operator requires base register"), gotrel[k].str);
6822       return;
6823
6824     case x86_tls_error_require_no_base_index_reg:
6825       as_bad (_("@%s operator requires no base/index register"),
6826               gotrel[k].str);
6827       return;
6828
6829     case x86_tls_error_base_reg:
6830       as_bad (_("@%s operator requires no base register"), gotrel[k].str);
6831       return;
6832
6833     case x86_tls_error_index_ebx:
6834       as_bad (_("@%s operator requires `%sebx' as index register"),
6835               gotrel[k].str, register_prefix);
6836       return;
6837
6838     case x86_tls_error_eax:
6839       as_bad (_("@%s operator requires `%seax' as base register"),
6840               gotrel[k].str, register_prefix);
6841       return;
6842
6843     case x86_tls_error_RegA:
6844       as_bad (_("@%s operator requires `%seax/%srax' as base register"),
6845               gotrel[k].str, register_prefix, register_prefix);
6846       return;
6847
6848     case x86_tls_error_ebx:
6849       as_bad (_("@%s operator requires `%sebx' as base register"),
6850               gotrel[k].str, register_prefix);
6851       return;
6852
6853     case x86_tls_error_rip:
6854       as_bad (_("@%s operator requires `%srip' as base register"),
6855               gotrel[k].str, register_prefix);
6856       return;
6857
6858     case x86_tls_error_dest_eax:
6859       as_bad (_("@%s operator requires `%seax' as dest register"),
6860               gotrel[k].str, register_prefix);
6861       return;
6862
6863     case x86_tls_error_dest_rdi:
6864       as_bad (_("@%s operator requires `%srdi' as dest register"),
6865               gotrel[k].str, register_prefix);
6866       return;
6867
6868     case x86_tls_error_scale_factor:
6869       as_bad (_("@%s operator requires scale factor of 1"),
6870               gotrel[k].str);
6871       return;
6872
6873     case x86_tls_error_base_reg_size:
6874       as_bad (_("@%s operator requires 32-bit base register"),
6875               gotrel[k].str);
6876       return;
6877
6878     case x86_tls_error_dest_32bit_reg_size:
6879       as_bad (_("@%s operator requires 32-bit dest register"),
6880               gotrel[k].str);
6881       return;
6882
6883     case x86_tls_error_dest_64bit_reg_size:
6884       as_bad (_("@%s operator requires 64-bit dest register"),
6885               gotrel[k].str);
6886       return;
6887
6888     case x86_tls_error_dest_32bit_or_64bit_reg_size:
6889       as_bad (_("@%s operator requires 32-bit or 64-bit dest register"),
6890               gotrel[k].str);
6891       return;
6892
6893     default:
6894       abort ();
6895     }
6896 }
6897 #endif
6898
6899 /* This is the guts of the machine-dependent assembler.  LINE points to a
6900    machine dependent instruction.  This function is supposed to emit
6901    the frags/bytes it assembles to.  */
6902
6903 static void
6904 i386_assemble (char *line)
6905 {
6906   unsigned int j;
6907   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
6908   char *xstrdup_copy = NULL;
6909   const char *end, *pass1_mnem = NULL;
6910   enum i386_error pass1_err = 0;
6911   struct pseudo_prefixes orig_pp = pp;
6912   const insn_template *t;
6913   struct last_insn *last_insn
6914     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
6915
6916   /* Initialize globals.  */
6917   current_templates.end = current_templates.start = NULL;
6918  retry:
6919   init_globals ();
6920
6921   /* Suppress optimization when the last thing we saw may not have been
6922      a proper instruction (e.g. a stand-alone prefix or .byte).  */
6923   if (last_insn->kind != last_insn_other)
6924     pp.no_optimize = true;
6925
6926   /* First parse an instruction mnemonic & call i386_operand for the operands.
6927      We assume that the scrubber has arranged it so that line[0] is the valid
6928      start of a (possibly prefixed) mnemonic.  */
6929
6930   end = parse_insn (line, mnemonic, parse_all);
6931   if (end == NULL)
6932     {
6933       if (pass1_mnem != NULL)
6934         goto match_error;
6935       if (i.error != no_error)
6936         {
6937           gas_assert (current_templates.start != NULL);
6938           if (may_need_pass2 (current_templates.start) && !i.suffix)
6939             goto no_match;
6940           /* No point in trying a 2nd pass - it'll only find the same suffix
6941              again.  */
6942           mnem_suffix = i.suffix;
6943           goto match_error;
6944         }
6945       return;
6946     }
6947   t = current_templates.start;
6948   /* NB: LINE may be change to be the same as XSTRDUP_COPY.  */
6949   if (xstrdup_copy != line && may_need_pass2 (t))
6950     {
6951       /* Make a copy of the full line in case we need to retry.  */
6952       xstrdup_copy = xstrdup (line);
6953       copy = xstrdup_copy;
6954     }
6955   line += end - line;
6956   mnem_suffix = i.suffix;
6957
6958   line = parse_operands (line, mnemonic);
6959   this_operand = -1;
6960   if (line == NULL)
6961     {
6962       free (xstrdup_copy);
6963       return;
6964     }
6965
6966   /* Now we've parsed the mnemonic into a set of templates, and have the
6967      operands at hand.  */
6968
6969   /* All Intel opcodes have reversed operands except for "bound", "enter",
6970      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
6971      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
6972      intersegment "jmp" and "call" instructions with 2 immediate operands so
6973      that the immediate segment precedes the offset consistently in Intel and
6974      AT&T modes.  */
6975   if (intel_syntax
6976       && i.operands > 1
6977       && (t->mnem_off != MN_bound)
6978       && !startswith (mnemonic, "invlpg")
6979       && !startswith (mnemonic, "monitor")
6980       && !startswith (mnemonic, "mwait")
6981       && (t->mnem_off != MN_pvalidate)
6982       && !startswith (mnemonic, "rmp")
6983       && (t->mnem_off != MN_tpause)
6984       && (t->mnem_off != MN_umwait)
6985       && !(i.operands == 2
6986            && operand_type_check (i.types[0], imm)
6987            && operand_type_check (i.types[1], imm)))
6988     swap_operands ();
6989
6990   /* The order of the immediates should be reversed for 2-immediates EXTRQ
6991      and INSERTQ instructions.  Also UWRMSR wants its immediate to be in the
6992      "canonical" place (first), despite it appearing last (in AT&T syntax, or
6993      because of the swapping above) in the incoming set of operands.  */
6994   if ((i.imm_operands == 2
6995        && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
6996       || ((t->mnem_off == MN_uwrmsr || t->mnem_off == MN_wrmsrns)
6997           && i.imm_operands && i.operands > i.imm_operands))
6998       swap_2_operands (0, 1);
6999
7000   if (i.imm_operands)
7001     {
7002       /* For USER_MSR and MSR_IMM instructions, imm32 stands for the name of a
7003          model specific register (MSR). That's an unsigned quantity, whereas all
7004          other insns with 32-bit immediate and 64-bit operand size use
7005          sign-extended immediates (imm32s). Therefore these insns are
7006          special-cased, bypassing the normal handling of immediates here.  */
7007       if (is_cpu(current_templates.start, CpuUSER_MSR)
7008           || t->mnem_off == MN_rdmsr
7009           || t->mnem_off == MN_wrmsrns)
7010         {
7011           for (j = 0; j < i.operands; j++)
7012             {
7013               if (operand_type_check(i.types[j], imm))
7014                 i.types[j] = smallest_imm_type (i.op[j].imms->X_add_number);
7015             }
7016         }
7017       else
7018         optimize_imm ();
7019     }
7020
7021   if (i.disp_operands && !optimize_disp (t))
7022     return;
7023
7024   /* Next, we find a template that matches the given insn,
7025      making sure the overlap of the given operands types is consistent
7026      with the template operand types.  */
7027
7028   if (!(t = match_template (mnem_suffix)))
7029     {
7030       const char *err_msg;
7031
7032       if (copy && !mnem_suffix)
7033         {
7034           line = copy;
7035           copy = NULL;
7036   no_match:
7037           pass1_err = i.error;
7038           pass1_mnem = insn_name (current_templates.start);
7039           pp = orig_pp;
7040           goto retry;
7041         }
7042
7043       /* If a non-/only-64bit template (group) was found in pass 1, and if
7044          _some_ template (group) was found in pass 2, squash pass 1's
7045          error.  */
7046       if (pass1_err == unsupported_64bit)
7047         pass1_mnem = NULL;
7048
7049   match_error:
7050       free (xstrdup_copy);
7051
7052       switch (pass1_mnem ? pass1_err : i.error)
7053         {
7054         default:
7055           abort ();
7056         case operand_size_mismatch:
7057           err_msg = _("operand size mismatch");
7058           break;
7059         case operand_type_mismatch:
7060           err_msg = _("operand type mismatch");
7061           break;
7062         case register_type_mismatch:
7063           err_msg = _("register type mismatch");
7064           break;
7065         case number_of_operands_mismatch:
7066           err_msg = _("number of operands mismatch");
7067           break;
7068         case invalid_instruction_suffix:
7069           err_msg = _("invalid instruction suffix");
7070           break;
7071         case bad_imm4:
7072           err_msg = _("constant doesn't fit in 4 bits");
7073           break;
7074         case unsupported_with_intel_mnemonic:
7075           err_msg = _("unsupported with Intel mnemonic");
7076           break;
7077         case unsupported_syntax:
7078           err_msg = _("unsupported syntax");
7079           break;
7080         case unsupported_EGPR_for_addressing:
7081           err_msg = _("extended GPR cannot be used as base/index");
7082           break;
7083         case unsupported_nf:
7084           err_msg = _("{nf} unsupported");
7085           break;
7086         case unsupported:
7087           as_bad (_("unsupported instruction `%s'"),
7088                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
7089           return;
7090         case unsupported_on_arch:
7091           as_bad (_("`%s' is not supported on `%s%s'"),
7092                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
7093                   cpu_arch_name ? cpu_arch_name : default_arch,
7094                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
7095           return;
7096         case unsupported_64bit:
7097           if (ISLOWER (mnem_suffix))
7098             {
7099               if (flag_code == CODE_64BIT)
7100                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
7101                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
7102                         mnem_suffix);
7103               else
7104                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
7105                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
7106                         mnem_suffix);
7107             }
7108           else
7109             {
7110               if (flag_code == CODE_64BIT)
7111                 as_bad (_("`%s' is not supported in 64-bit mode"),
7112                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
7113               else
7114                 as_bad (_("`%s' is only supported in 64-bit mode"),
7115                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
7116             }
7117           return;
7118         case no_vex_encoding:
7119           err_msg = _("no VEX/XOP encoding");
7120           break;
7121         case no_evex_encoding:
7122           err_msg = _("no EVEX encoding");
7123           break;
7124         case invalid_sib_address:
7125           err_msg = _("invalid SIB address");
7126           break;
7127         case invalid_vsib_address:
7128           err_msg = _("invalid VSIB address");
7129           break;
7130         case invalid_vector_register_set:
7131           err_msg = _("mask, index, and destination registers must be distinct");
7132           break;
7133         case invalid_tmm_register_set:
7134           err_msg = _("all tmm registers must be distinct");
7135           break;
7136         case invalid_dest_and_src_register_set:
7137           err_msg = _("destination and source registers must be distinct");
7138           break;
7139         case invalid_dest_register_set:
7140           err_msg = _("two dest registers must be distinct");
7141           break;
7142         case invalid_pseudo_prefix:
7143           err_msg = _("rex2 pseudo prefix cannot be used");
7144           break;
7145         case unsupported_vector_index_register:
7146           err_msg = _("unsupported vector index register");
7147           break;
7148         case unsupported_broadcast:
7149           err_msg = _("unsupported broadcast");
7150           break;
7151         case broadcast_needed:
7152           err_msg = _("broadcast is needed for operand of such type");
7153           break;
7154         case unsupported_masking:
7155           err_msg = _("unsupported masking");
7156           break;
7157         case mask_not_on_destination:
7158           err_msg = _("mask not on destination operand");
7159           break;
7160         case no_default_mask:
7161           err_msg = _("default mask isn't allowed");
7162           break;
7163         case unsupported_rc_sae:
7164           err_msg = _("unsupported static rounding/sae");
7165           break;
7166         case unsupported_vector_size:
7167           as_bad (_("vector size above %u required for `%s'"), 128u << vector_size,
7168                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
7169           return;
7170         case unsupported_rsp_register:
7171           err_msg = _("'rsp' register cannot be used");
7172           break;
7173         case internal_error:
7174           err_msg = _("internal error");
7175           break;
7176         }
7177       as_bad (_("%s for `%s'"), err_msg,
7178               pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
7179       return;
7180     }
7181
7182   free (xstrdup_copy);
7183
7184   if (sse_check != check_none
7185       /* The opcode space check isn't strictly needed; it's there only to
7186          bypass the logic below when easily possible.  */
7187       && t->opcode_space >= SPACE_0F
7188       && t->opcode_space <= SPACE_0F3A
7189       && !is_cpu (&i.tm, CpuSSE4a)
7190       && !is_any_vex_encoding (t))
7191     {
7192       /* Some KL and all WideKL insns have only implicit %xmm operands.  */
7193       bool simd = is_cpu (t, CpuKL) || is_cpu (t, CpuWideKL);
7194
7195       for (j = 0; j < t->operands; ++j)
7196         {
7197           if (t->operand_types[j].bitfield.class == RegMMX)
7198             break;
7199           if (t->operand_types[j].bitfield.class == RegSIMD)
7200             simd = true;
7201         }
7202
7203       if (j >= t->operands && simd)
7204         (sse_check == check_warning
7205          ? as_warn
7206          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
7207     }
7208
7209   if (i.tm.opcode_modifier.fwait)
7210     if (!add_prefix (FWAIT_OPCODE))
7211       return;
7212
7213   /* Check if REP prefix is OK.  */
7214   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
7215     {
7216       as_bad (_("invalid instruction `%s' after `%s'"),
7217                 insn_name (&i.tm), i.rep_prefix);
7218       return;
7219     }
7220
7221   /* Check for lock without a lockable instruction.  Destination operand
7222      must be memory unless it is xchg (0x86).  */
7223   if (i.prefix[LOCK_PREFIX])
7224     {
7225       if (i.tm.opcode_modifier.prefixok < PrefixLock
7226           || i.mem_operands == 0
7227           || (i.tm.base_opcode != 0x86
7228               && !(i.flags[i.operands - 1] & Operand_Mem)))
7229         {
7230           as_bad (_("expecting lockable instruction after `lock'"));
7231           return;
7232         }
7233
7234       /* Zap the redundant prefix from XCHG when optimizing.  */
7235       if (i.tm.base_opcode == 0x86 && optimize && !pp.no_optimize)
7236         i.prefix[LOCK_PREFIX] = 0;
7237     }
7238
7239 #ifdef OBJ_ELF
7240   if (i.has_gotrel && tls_check)
7241     {
7242       enum x86_tls_error_type tls_error;
7243       for (j = 0; j < i.operands; ++j)
7244         {
7245           tls_error = x86_check_tls_relocation (i.reloc[j]);
7246           if (tls_error == x86_tls_error_continue)
7247             continue;
7248
7249           if (tls_error != x86_tls_error_none)
7250             x86_report_tls_error (tls_error, i.reloc[j]);
7251           break;
7252         }
7253     }
7254 #endif
7255
7256   if ((is_any_vex_encoding (&i.tm) && i.tm.opcode_space != SPACE_MAP4)
7257       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
7258       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
7259     {
7260       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
7261       if (i.prefix[DATA_PREFIX])
7262         {
7263           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
7264           return;
7265         }
7266     }
7267
7268   /* Check if HLE prefix is OK.  */
7269   if (i.hle_prefix && !check_hle ())
7270     return;
7271
7272   /* Check BND prefix.  */
7273   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
7274     as_bad (_("expecting valid branch instruction after `bnd'"));
7275
7276   /* Check NOTRACK prefix.  */
7277   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
7278     as_bad (_("expecting indirect branch instruction after `notrack'"));
7279
7280   if (is_cpu (&i.tm, CpuMPX))
7281     {
7282       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
7283         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
7284       else if (flag_code != CODE_16BIT
7285                ? i.prefix[ADDR_PREFIX]
7286                : i.mem_operands && !i.prefix[ADDR_PREFIX])
7287         as_bad (_("16-bit address isn't allowed in MPX instructions"));
7288     }
7289
7290   /* Insert BND prefix.  */
7291   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
7292     {
7293       if (!i.prefix[BND_PREFIX])
7294         add_prefix (BND_PREFIX_OPCODE);
7295       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
7296         {
7297           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
7298           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
7299         }
7300     }
7301
7302   /* Check string instruction segment overrides.  */
7303   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
7304     {
7305       gas_assert (i.mem_operands);
7306       if (!check_string ())
7307         return;
7308       i.disp_operands = 0;
7309     }
7310
7311   /* The memory operand of (%dx) should be only used with input/output
7312      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
7313   if (i.input_output_operand
7314       && ((i.tm.base_opcode | 0x82) != 0xee
7315           || i.tm.opcode_space != SPACE_BASE))
7316     {
7317       as_bad (_("input/output port address isn't allowed with `%s'"),
7318               insn_name (&i.tm));
7319       return;
7320     }
7321
7322   if (optimize && !pp.no_optimize && i.tm.opcode_modifier.optimize)
7323     {
7324       if (pp.has_nf)
7325         optimize_nf_encoding ();
7326       optimize_encoding ();
7327     }
7328
7329   /* Past optimization there's no need to distinguish encoding_evex,
7330      encoding_evex512, and encoding_egpr anymore.  */
7331   if (pp.encoding == encoding_evex512)
7332     pp.encoding = encoding_evex;
7333   else if (pp.encoding == encoding_egpr)
7334     pp.encoding = is_any_vex_encoding (&i.tm) ? encoding_evex
7335                                              : encoding_default;
7336
7337   /* Similarly {nf} can now be taken to imply {evex}.  */
7338   if (pp.has_nf && pp.encoding == encoding_default)
7339     pp.encoding = encoding_evex;
7340
7341   if (use_unaligned_vector_move)
7342     encode_with_unaligned_vector_move ();
7343
7344   if (!process_suffix (t))
7345     return;
7346
7347   /* Check if IP-relative addressing requirements can be satisfied.  */
7348   if (is_cpu (&i.tm, CpuPREFETCHI)
7349       && !(i.base_reg && i.base_reg->reg_num == RegIP))
7350     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
7351
7352   /* Update operand types and check extended states.  */
7353   for (j = 0; j < i.operands; j++)
7354     {
7355       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
7356       switch (i.tm.operand_types[j].bitfield.class)
7357         {
7358         default:
7359           break;
7360         case RegMMX:
7361           i.xstate |= xstate_mmx;
7362           break;
7363         case RegMask:
7364           i.xstate |= xstate_mask;
7365           break;
7366         case RegSIMD:
7367           if (i.tm.operand_types[j].bitfield.tmmword)
7368             i.xstate |= xstate_tmm;
7369           else if (i.tm.operand_types[j].bitfield.zmmword
7370                    && !i.tm.opcode_modifier.vex
7371                    && vector_size >= VSZ512)
7372             i.xstate |= xstate_zmm;
7373           else if (i.tm.operand_types[j].bitfield.ymmword
7374                    && vector_size >= VSZ256)
7375             i.xstate |= xstate_ymm;
7376           else if (i.tm.operand_types[j].bitfield.xmmword)
7377             i.xstate |= xstate_xmm;
7378           break;
7379         }
7380     }
7381
7382   /* Make still unresolved immediate matches conform to size of immediate
7383      given in i.suffix.  */
7384   if (!finalize_imm ())
7385     return;
7386
7387   if (i.types[0].bitfield.imm1)
7388     i.imm_operands = 0; /* kludge for shift insns.  */
7389
7390   /* For insns with operands there are more diddles to do to the opcode.  */
7391   if (i.operands)
7392     {
7393       if (!process_operands ())
7394         return;
7395     }
7396   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
7397     {
7398       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
7399       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
7400     }
7401
7402   if (is_any_vex_encoding (&i.tm))
7403     {
7404       if (!cpu_arch_flags.bitfield.cpui286)
7405         {
7406           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
7407                   insn_name (&i.tm));
7408           return;
7409         }
7410
7411       /* Check for explicit REX prefix.  */
7412       if ((i.prefix[REX_PREFIX]
7413            && (i.tm.opcode_space != SPACE_MAP4
7414                /* To mimic behavior for legacy insns, permit use of REX64 for promoted
7415                   legacy instructions.  */
7416                || i.prefix[REX_PREFIX] != (REX_OPCODE | REX_W)))
7417           || pp.rex_encoding)
7418         {
7419           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
7420           return;
7421         }
7422
7423       /* Check for explicit REX2 prefix.  */
7424       if (pp.rex2_encoding)
7425         {
7426           as_bad (_("{rex2} prefix invalid with `%s'"), insn_name (&i.tm));
7427           return;
7428         }
7429
7430       if (is_apx_evex_encoding ())
7431         {
7432           if (!build_apx_evex_prefix ())
7433             return;
7434         }
7435       else if (i.tm.opcode_modifier.vex)
7436         build_vex_prefix (t);
7437       else
7438         build_evex_prefix ();
7439
7440       /* The individual REX.RXBW bits got consumed.  */
7441       i.rex &= REX_OPCODE;
7442
7443       /* The rex2 bits got consumed.  */
7444       i.rex2 = 0;
7445     }
7446
7447   /* Handle conversion of 'int $3' --> special int3 insn.  */
7448   if (i.tm.mnem_off == MN_int
7449       && i.op[0].imms->X_add_number == 3)
7450     {
7451       i.tm.base_opcode = INT3_OPCODE;
7452       i.imm_operands = 0;
7453     }
7454
7455   if ((i.tm.opcode_modifier.jump == JUMP
7456        || i.tm.opcode_modifier.jump == JUMP_BYTE
7457        || i.tm.opcode_modifier.jump == JUMP_DWORD)
7458       && i.op[0].disps->X_op == O_constant)
7459     {
7460       /* Convert "jmp constant" (and "call constant") to a jump (call) to
7461          the absolute address given by the constant.  Since ix86 jumps and
7462          calls are pc relative, we need to generate a reloc.  */
7463       i.op[0].disps->X_add_symbol = &abs_symbol;
7464       i.op[0].disps->X_op = O_symbol;
7465     }
7466
7467   establish_rex ();
7468
7469   insert_lfence_before (last_insn);
7470
7471   /* We are ready to output the insn.  */
7472   output_insn (last_insn);
7473
7474 #ifdef OBJ_ELF
7475   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
7476      performed in i386_target_format.  */
7477   if (flag_synth_cfi)
7478     {
7479       ginsnS *ginsn;
7480       ginsn = x86_ginsn_new (symbol_temp_new_now (), frch_ginsn_gen_mode ());
7481       frch_ginsn_data_append (ginsn);
7482     }
7483 #endif
7484
7485   insert_lfence_after ();
7486
7487   if (i.tm.opcode_modifier.isprefix)
7488     {
7489       last_insn->kind = last_insn_prefix;
7490       last_insn->name = insn_name (&i.tm);
7491       last_insn->file = as_where (&last_insn->line);
7492     }
7493   else
7494     last_insn->kind = last_insn_other;
7495 }
7496
7497 void
7498 md_assemble (char *line)
7499 {
7500   i386_assemble (line);
7501   current_templates.start = NULL;
7502   memset (&pp, 0, sizeof (pp));
7503 }
7504
7505 /* The Q suffix is generally valid only in 64-bit mode, with very few
7506    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
7507    and fisttp only one of their two templates is matched below: That's
7508    sufficient since other relevant attributes are the same between both
7509    respective templates.  */
7510 static INLINE bool q_suffix_allowed(const insn_template *t)
7511 {
7512   return flag_code == CODE_64BIT
7513          || (t->opcode_space == SPACE_BASE
7514              && t->base_opcode == 0xdf
7515              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
7516          || t->mnem_off == MN_cmpxchg8b;
7517 }
7518
7519 static const char *
7520 parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
7521 {
7522   const char *l = line, *token_start = l;
7523   char *mnem_p;
7524   bool pass1 = !current_templates.start;
7525   int supported;
7526   const insn_template *t;
7527   char *dot_p = NULL;
7528
7529   while (1)
7530     {
7531       const char *split;
7532
7533       mnem_p = mnemonic;
7534       /* Pseudo-prefixes start with an opening figure brace.  */
7535       if ((*mnem_p = *l) == '{')
7536         {
7537           ++mnem_p;
7538           ++l;
7539           if (is_space_char (*l))
7540             ++l;
7541         }
7542       else if (mode == parse_pseudo_prefix)
7543         break;
7544       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
7545         {
7546           if (*mnem_p == '.')
7547             dot_p = mnem_p;
7548           mnem_p++;
7549           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
7550             {
7551             too_long:
7552               as_bad (_("no such instruction: `%s'"), token_start);
7553               return NULL;
7554             }
7555           l++;
7556         }
7557       split = l;
7558       if (is_space_char (*l))
7559         ++l;
7560       /* Pseudo-prefixes end with a closing figure brace.  */
7561       if (*mnemonic == '{' && *l == '}')
7562         {
7563           *mnem_p++ = *l++;
7564           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
7565             goto too_long;
7566           *mnem_p = '\0';
7567
7568           if (is_space_char (*l))
7569             ++l;
7570         }
7571       else if (l == split
7572                && *l != END_OF_INSN
7573                && (intel_syntax
7574                    || (*l != PREFIX_SEPARATOR && *l != ',')))
7575         {
7576           if (mode != parse_all)
7577             break;
7578           as_bad (_("invalid character %s in mnemonic"),
7579                   output_invalid (*split));
7580           return NULL;
7581         }
7582       if (token_start == l)
7583         {
7584           if (!intel_syntax && *l == PREFIX_SEPARATOR)
7585             as_bad (_("expecting prefix; got nothing"));
7586           else
7587             as_bad (_("expecting mnemonic; got nothing"));
7588           return NULL;
7589         }
7590
7591       /* Look up instruction (or prefix) via hash table.  */
7592       op_lookup (mnemonic);
7593
7594       if (*l != END_OF_INSN
7595           && current_templates.start
7596           && current_templates.start->opcode_modifier.isprefix)
7597         {
7598           supported = cpu_flags_match (current_templates.start);
7599           if (!(supported & CPU_FLAGS_64BIT_MATCH))
7600             {
7601               as_bad ((flag_code != CODE_64BIT
7602                        ? _("`%s' is only supported in 64-bit mode")
7603                        : _("`%s' is not supported in 64-bit mode")),
7604                       insn_name (current_templates.start));
7605               return NULL;
7606             }
7607           if (supported != CPU_FLAGS_PERFECT_MATCH)
7608             {
7609               as_bad (_("`%s' is not supported on `%s%s'"),
7610                       insn_name (current_templates.start),
7611                       cpu_arch_name ? cpu_arch_name : default_arch,
7612                       cpu_sub_arch_name ? cpu_sub_arch_name : "");
7613               return NULL;
7614             }
7615           /* If we are in 16-bit mode, do not allow addr16 or data16.
7616              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
7617           if ((current_templates.start->opcode_modifier.size == SIZE16
7618                || current_templates.start->opcode_modifier.size == SIZE32)
7619               && flag_code != CODE_64BIT
7620               && ((current_templates.start->opcode_modifier.size == SIZE32)
7621                   ^ (flag_code == CODE_16BIT)))
7622             {
7623               as_bad (_("redundant %s prefix"),
7624                       insn_name (current_templates.start));
7625               return NULL;
7626             }
7627
7628           if (current_templates.start->base_opcode == PSEUDO_PREFIX)
7629             {
7630               /* Handle pseudo prefixes.  */
7631               switch (current_templates.start->extension_opcode)
7632                 {
7633                 case Prefix_Disp8:
7634                   /* {disp8} */
7635                   pp.disp_encoding = disp_encoding_8bit;
7636                   break;
7637                 case Prefix_Disp16:
7638                   /* {disp16} */
7639                   pp.disp_encoding = disp_encoding_16bit;
7640                   break;
7641                 case Prefix_Disp32:
7642                   /* {disp32} */
7643                   pp.disp_encoding = disp_encoding_32bit;
7644                   break;
7645                 case Prefix_Load:
7646                   /* {load} */
7647                   pp.dir_encoding = dir_encoding_load;
7648                   break;
7649                 case Prefix_Store:
7650                   /* {store} */
7651                   pp.dir_encoding = dir_encoding_store;
7652                   break;
7653                 case Prefix_VEX:
7654                   /* {vex} */
7655                   pp.encoding = encoding_vex;
7656                   break;
7657                 case Prefix_VEX3:
7658                   /* {vex3} */
7659                   pp.encoding = encoding_vex3;
7660                   break;
7661                 case Prefix_EVEX:
7662                   /* {evex} */
7663                   pp.encoding = encoding_evex;
7664                   break;
7665                 case Prefix_REX:
7666                   /* {rex} */
7667                   pp.rex_encoding = true;
7668                   break;
7669                 case Prefix_REX2:
7670                   /* {rex2} */
7671                   pp.rex2_encoding = true;
7672                   break;
7673                 case Prefix_NF:
7674                   /* {nf} */
7675                   pp.has_nf = true;
7676                   break;
7677                 case Prefix_NoOptimize:
7678                   /* {nooptimize} */
7679                   pp.no_optimize = true;
7680                   break;
7681                 default:
7682                   abort ();
7683                 }
7684               if (pp.has_nf
7685                   && pp.encoding != encoding_default
7686                   && pp.encoding != encoding_evex)
7687                 {
7688                   as_bad (_("{nf} cannot be combined with {vex}/{vex3}"));
7689                   return NULL;
7690                 }
7691             }
7692           else
7693             {
7694               /* Add prefix, checking for repeated prefixes.  */
7695               switch (add_prefix (current_templates.start->base_opcode))
7696                 {
7697                 case PREFIX_EXIST:
7698                   return NULL;
7699                 case PREFIX_DS:
7700                   if (is_cpu (current_templates.start, CpuIBT))
7701                     i.notrack_prefix = insn_name (current_templates.start);
7702                   break;
7703                 case PREFIX_REP:
7704                   if (is_cpu (current_templates.start, CpuHLE))
7705                     i.hle_prefix = insn_name (current_templates.start);
7706                   else if (is_cpu (current_templates.start, CpuMPX))
7707                     i.bnd_prefix = insn_name (current_templates.start);
7708                   else
7709                     i.rep_prefix = insn_name (current_templates.start);
7710                   break;
7711                 default:
7712                   break;
7713                 }
7714             }
7715           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
7716           l += (!intel_syntax && *l == PREFIX_SEPARATOR);
7717           if (is_space_char (*l))
7718             ++l;
7719           token_start = l;
7720         }
7721       else
7722         break;
7723     }
7724
7725   if (mode != parse_all)
7726     return token_start;
7727
7728   if (!current_templates.start)
7729     {
7730       /* Deprecated functionality (new code should use pseudo-prefixes instead):
7731          Check if we should swap operand or force 32bit displacement in
7732          encoding.  */
7733       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
7734         {
7735           if (pp.dir_encoding == dir_encoding_default)
7736             pp.dir_encoding = dir_encoding_swap;
7737           else
7738             as_warn (_("ignoring `.s' suffix due to earlier `{%s}'"),
7739                      pp.dir_encoding == dir_encoding_load ? "load" : "store");
7740         }
7741       else if (mnem_p - 3 == dot_p
7742                && dot_p[1] == 'd'
7743                && dot_p[2] == '8')
7744         {
7745           if (pp.disp_encoding == disp_encoding_default)
7746             pp.disp_encoding = disp_encoding_8bit;
7747           else if (pp.disp_encoding != disp_encoding_8bit)
7748             as_warn (_("ignoring `.d8' suffix due to earlier `{disp<N>}'"));
7749         }
7750       else if (mnem_p - 4 == dot_p
7751                && dot_p[1] == 'd'
7752                && dot_p[2] == '3'
7753                && dot_p[3] == '2')
7754         {
7755           if (pp.disp_encoding == disp_encoding_default)
7756             pp.disp_encoding = disp_encoding_32bit;
7757           else if (pp.disp_encoding != disp_encoding_32bit)
7758             as_warn (_("ignoring `.d32' suffix due to earlier `{disp<N>}'"));
7759         }
7760       else
7761         goto check_suffix;
7762       mnem_p = dot_p;
7763       *dot_p = '\0';
7764       op_lookup (mnemonic);
7765     }
7766
7767   if (!current_templates.start || !pass1)
7768     {
7769       current_templates.start = NULL;
7770
7771     check_suffix:
7772       if (mnem_p > mnemonic)
7773         {
7774           /* See if we can get a match by trimming off a suffix.  */
7775           switch (mnem_p[-1])
7776             {
7777             case WORD_MNEM_SUFFIX:
7778               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
7779                 i.suffix = SHORT_MNEM_SUFFIX;
7780               else
7781                 /* Fall through.  */
7782               case BYTE_MNEM_SUFFIX:
7783               case QWORD_MNEM_SUFFIX:
7784                 i.suffix = mnem_p[-1];
7785               mnem_p[-1] = '\0';
7786               op_lookup (mnemonic);
7787               break;
7788             case SHORT_MNEM_SUFFIX:
7789             case LONG_MNEM_SUFFIX:
7790               if (!intel_syntax)
7791                 {
7792                   i.suffix = mnem_p[-1];
7793                   mnem_p[-1] = '\0';
7794                   op_lookup (mnemonic);
7795                 }
7796               break;
7797
7798               /* Intel Syntax.  */
7799             case 'd':
7800               if (intel_syntax)
7801                 {
7802                   if (intel_float_operand (mnemonic) == 1)
7803                     i.suffix = SHORT_MNEM_SUFFIX;
7804                   else
7805                     i.suffix = LONG_MNEM_SUFFIX;
7806                   mnem_p[-1] = '\0';
7807                   op_lookup (mnemonic);
7808                 }
7809               /* For compatibility reasons accept MOVSD and CMPSD without
7810                  operands even in AT&T mode.  */
7811               else if (*l == END_OF_INSN)
7812                 {
7813                   mnem_p[-1] = '\0';
7814                   op_lookup (mnemonic);
7815                   if (current_templates.start != NULL
7816                       /* MOVS or CMPS */
7817                       && (current_templates.start->base_opcode | 2) == 0xa6
7818                       && current_templates.start->opcode_space
7819                          == SPACE_BASE
7820                       && mnem_p[-2] == 's')
7821                     {
7822                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
7823                                mnemonic, mnemonic);
7824                       i.suffix = LONG_MNEM_SUFFIX;
7825                     }
7826                   else
7827                     {
7828                       current_templates.start = NULL;
7829                       mnem_p[-1] = 'd';
7830                     }
7831                 }
7832               break;
7833             }
7834         }
7835
7836       if (!current_templates.start)
7837         {
7838           if (pass1)
7839             as_bad (_("no such instruction: `%s'"), token_start);
7840           return NULL;
7841         }
7842     }
7843
7844   /* Handle SCC OSZC flgs.  */
7845   if (current_templates.start->opcode_modifier.operandconstraint == SCC)
7846     {
7847       int length = check_Scc_OszcOperations (l);
7848       if (length < 0)
7849         return NULL;
7850       l += length;
7851     }
7852
7853   if ((current_templates.start->opcode_modifier.jump == JUMP
7854        || current_templates.start->opcode_modifier.jump == JUMP_BYTE)
7855       && *l == ',')
7856     {
7857       /* Check for a branch hint.  We allow ",pt" and ",pn" for
7858          predict taken and predict not taken respectively.
7859          I'm not sure that branch hints actually do anything on loop
7860          and jcxz insns (JumpByte) for current Pentium4 chips.  They
7861          may work in the future and it doesn't hurt to accept them
7862          now.  */
7863       token_start = l++;
7864       if (is_space_char (*l))
7865         ++l;
7866       if (TOLOWER (*l) == 'p' && ISALPHA (l[1])
7867           && (l[2] == END_OF_INSN || is_space_char (l[2])))
7868         {
7869           if (TOLOWER (l[1]) == 't')
7870             {
7871               if (!add_prefix (DS_PREFIX_OPCODE))
7872                 return NULL;
7873               l += 2;
7874             }
7875           else if (TOLOWER (l[1]) == 'n')
7876             {
7877               if (!add_prefix (CS_PREFIX_OPCODE))
7878                 return NULL;
7879               l += 2;
7880             }
7881           else
7882             l = token_start;
7883         }
7884       else
7885         l = token_start;
7886     }
7887   /* Any other comma loses.  */
7888   if (*l == ',')
7889     {
7890       as_bad (_("invalid character %s in mnemonic"),
7891               output_invalid (*l));
7892       return NULL;
7893     }
7894
7895   /* Check if instruction is supported on specified architecture.  */
7896   supported = 0;
7897   for (t = current_templates.start; t < current_templates.end; ++t)
7898     {
7899       supported |= cpu_flags_match (t);
7900
7901       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
7902         supported &= ~CPU_FLAGS_64BIT_MATCH;
7903
7904       if (supported == CPU_FLAGS_PERFECT_MATCH)
7905         return l;
7906     }
7907
7908   if (pass1)
7909     {
7910       if (supported & CPU_FLAGS_64BIT_MATCH)
7911         i.error = unsupported_on_arch;
7912       else
7913         i.error = unsupported_64bit;
7914     }
7915
7916   return NULL;
7917 }
7918
7919 static char *
7920 parse_operands (char *l, const char *mnemonic)
7921 {
7922   char *token_start;
7923
7924   /* 1 if operand is pending after ','.  */
7925   unsigned int expecting_operand = 0;
7926
7927   while (*l != END_OF_INSN)
7928     {
7929       /* Non-zero if operand parens not balanced.  */
7930       unsigned int paren_not_balanced = 0;
7931       /* True if inside double quotes.  */
7932       bool in_quotes = false;
7933
7934       /* Skip optional white space before operand.  */
7935       if (is_space_char (*l))
7936         ++l;
7937       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
7938         {
7939           as_bad (_("invalid character %s before operand %d"),
7940                   output_invalid (*l),
7941                   i.operands + 1);
7942           return NULL;
7943         }
7944       token_start = l;  /* After white space.  */
7945       while (in_quotes || paren_not_balanced || *l != ',')
7946         {
7947           if (*l == END_OF_INSN)
7948             {
7949               if (in_quotes)
7950                 {
7951                   as_bad (_("unbalanced double quotes in operand %d."),
7952                           i.operands + 1);
7953                   return NULL;
7954                 }
7955               if (paren_not_balanced)
7956                 {
7957                   know (!intel_syntax);
7958                   as_bad (_("unbalanced parenthesis in operand %d."),
7959                           i.operands + 1);
7960                   return NULL;
7961                 }
7962               else
7963                 break;  /* we are done */
7964             }
7965           else if (*l == '\\' && l[1] == '"')
7966             ++l;
7967           else if (*l == '"')
7968             in_quotes = !in_quotes;
7969           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
7970             {
7971               as_bad (_("invalid character %s in operand %d"),
7972                       output_invalid (*l),
7973                       i.operands + 1);
7974               return NULL;
7975             }
7976           if (!intel_syntax && !in_quotes)
7977             {
7978               if (*l == '(')
7979                 ++paren_not_balanced;
7980               if (*l == ')')
7981                 --paren_not_balanced;
7982             }
7983           l++;
7984         }
7985       if (l != token_start)
7986         {                       /* Yes, we've read in another operand.  */
7987           unsigned int operand_ok;
7988           this_operand = i.operands++;
7989           if (i.operands > MAX_OPERANDS)
7990             {
7991               as_bad (_("spurious operands; (%d operands/instruction max)"),
7992                       MAX_OPERANDS);
7993               return NULL;
7994             }
7995           i.types[this_operand].bitfield.unspecified = 1;
7996           /* Now parse operand adding info to 'i' as we go along.  */
7997           END_STRING_AND_SAVE (l);
7998
7999           if (i.mem_operands > 1)
8000             {
8001               as_bad (_("too many memory references for `%s'"),
8002                       mnemonic);
8003               return 0;
8004             }
8005
8006           if (intel_syntax)
8007             operand_ok =
8008               i386_intel_operand (token_start,
8009                                   intel_float_operand (mnemonic));
8010           else
8011             operand_ok = i386_att_operand (token_start);
8012
8013           RESTORE_END_STRING (l);
8014           if (!operand_ok)
8015             return NULL;
8016         }
8017       else
8018         {
8019           if (expecting_operand)
8020             {
8021             expecting_operand_after_comma:
8022               as_bad (_("expecting operand after ','; got nothing"));
8023               return NULL;
8024             }
8025           if (*l == ',')
8026             {
8027               as_bad (_("expecting operand before ','; got nothing"));
8028               return NULL;
8029             }
8030         }
8031
8032       /* Now *l must be either ',' or END_OF_INSN.  */
8033       if (*l == ',')
8034         {
8035           if (*++l == END_OF_INSN)
8036             {
8037               /* Just skip it, if it's \n complain.  */
8038               goto expecting_operand_after_comma;
8039             }
8040           expecting_operand = 1;
8041         }
8042     }
8043   return l;
8044 }
8045
8046 static void
8047 copy_operand (unsigned int to, unsigned int from)
8048 {
8049   i.types[to] = i.types[from];
8050   i.tm.operand_types[to] = i.tm.operand_types[from];
8051   i.flags[to] = i.flags[from];
8052   i.op[to] = i.op[from];
8053   i.reloc[to] = i.reloc[from];
8054   i.imm_bits[to] = i.imm_bits[from];
8055   /* Note: i.mask and i.broadcast aren't handled here, as what (if
8056      anything) to do there depends on context.  */
8057 }
8058
8059 static void
8060 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
8061 {
8062   union i386_op temp_op;
8063   i386_operand_type temp_type;
8064   unsigned int temp_flags;
8065   enum bfd_reloc_code_real temp_reloc;
8066
8067   temp_type = i.types[xchg2];
8068   i.types[xchg2] = i.types[xchg1];
8069   i.types[xchg1] = temp_type;
8070
8071   temp_flags = i.flags[xchg2];
8072   i.flags[xchg2] = i.flags[xchg1];
8073   i.flags[xchg1] = temp_flags;
8074
8075   temp_op = i.op[xchg2];
8076   i.op[xchg2] = i.op[xchg1];
8077   i.op[xchg1] = temp_op;
8078
8079   temp_reloc = i.reloc[xchg2];
8080   i.reloc[xchg2] = i.reloc[xchg1];
8081   i.reloc[xchg1] = temp_reloc;
8082
8083   temp_flags = i.imm_bits[xchg2];
8084   i.imm_bits[xchg2] = i.imm_bits[xchg1];
8085   i.imm_bits[xchg1] = temp_flags;
8086
8087   if (i.mask.reg)
8088     {
8089       if (i.mask.operand == xchg1)
8090         i.mask.operand = xchg2;
8091       else if (i.mask.operand == xchg2)
8092         i.mask.operand = xchg1;
8093     }
8094   if (i.broadcast.type || i.broadcast.bytes)
8095     {
8096       if (i.broadcast.operand == xchg1)
8097         i.broadcast.operand = xchg2;
8098       else if (i.broadcast.operand == xchg2)
8099         i.broadcast.operand = xchg1;
8100     }
8101 }
8102
8103 static void
8104 swap_operands (void)
8105 {
8106   switch (i.operands)
8107     {
8108     case 5:
8109     case 4:
8110       swap_2_operands (1, i.operands - 2);
8111       /* Fall through.  */
8112     case 3:
8113     case 2:
8114       swap_2_operands (0, i.operands - 1);
8115       break;
8116     default:
8117       abort ();
8118     }
8119
8120   if (i.mem_operands == 2)
8121     {
8122       const reg_entry *temp_seg;
8123       temp_seg = i.seg[0];
8124       i.seg[0] = i.seg[1];
8125       i.seg[1] = temp_seg;
8126     }
8127 }
8128
8129 /* Try to ensure constant immediates are represented in the smallest
8130    opcode possible.  */
8131 static void
8132 optimize_imm (void)
8133 {
8134   char guess_suffix = 0;
8135   int op;
8136
8137   if (i.suffix)
8138     guess_suffix = i.suffix;
8139   else if (i.reg_operands)
8140     {
8141       /* Figure out a suffix from the last register operand specified.
8142          We can't do this properly yet, i.e. excluding special register
8143          instances, but the following works for instructions with
8144          immediates.  In any case, we can't set i.suffix yet.  */
8145       for (op = i.operands; --op >= 0;)
8146         if (i.types[op].bitfield.class != Reg)
8147           continue;
8148         else if (i.types[op].bitfield.byte)
8149           {
8150             guess_suffix = BYTE_MNEM_SUFFIX;
8151             break;
8152           }
8153         else if (i.types[op].bitfield.word)
8154           {
8155             guess_suffix = WORD_MNEM_SUFFIX;
8156             break;
8157           }
8158         else if (i.types[op].bitfield.dword)
8159           {
8160             guess_suffix = LONG_MNEM_SUFFIX;
8161             break;
8162           }
8163         else if (i.types[op].bitfield.qword)
8164           {
8165             guess_suffix = QWORD_MNEM_SUFFIX;
8166             break;
8167           }
8168     }
8169   else if ((flag_code == CODE_16BIT)
8170             ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
8171     guess_suffix = WORD_MNEM_SUFFIX;
8172   else if (flag_code != CODE_64BIT
8173            || (!(i.prefix[REX_PREFIX] & REX_W)
8174                /* A more generic (but also more involved) way of dealing
8175                   with the special case(s) would be to go look for
8176                   DefaultSize attributes on any of the templates.  */
8177                && current_templates.start->mnem_off != MN_push
8178                && current_templates.start->mnem_off != MN_jmpabs))
8179     guess_suffix = LONG_MNEM_SUFFIX;
8180
8181   for (op = i.operands; --op >= 0;)
8182     if (operand_type_check (i.types[op], imm))
8183       {
8184         switch (i.op[op].imms->X_op)
8185           {
8186           case O_constant:
8187             /* If a suffix is given, this operand may be shortened.  */
8188             switch (guess_suffix)
8189               {
8190               case LONG_MNEM_SUFFIX:
8191                 i.types[op].bitfield.imm32 = 1;
8192                 i.types[op].bitfield.imm64 = 1;
8193                 break;
8194               case WORD_MNEM_SUFFIX:
8195                 i.types[op].bitfield.imm16 = 1;
8196                 i.types[op].bitfield.imm32 = 1;
8197                 i.types[op].bitfield.imm32s = 1;
8198                 i.types[op].bitfield.imm64 = 1;
8199                 break;
8200               case BYTE_MNEM_SUFFIX:
8201                 i.types[op].bitfield.imm8 = 1;
8202                 i.types[op].bitfield.imm8s = 1;
8203                 i.types[op].bitfield.imm16 = 1;
8204                 i.types[op].bitfield.imm32 = 1;
8205                 i.types[op].bitfield.imm32s = 1;
8206                 i.types[op].bitfield.imm64 = 1;
8207                 break;
8208               }
8209
8210             /* If this operand is at most 16 bits, convert it
8211                to a signed 16 bit number before trying to see
8212                whether it will fit in an even smaller size.
8213                This allows a 16-bit operand such as $0xffe0 to
8214                be recognised as within Imm8S range.  */
8215             if ((i.types[op].bitfield.imm16)
8216                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
8217               {
8218                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
8219                                                 ^ 0x8000) - 0x8000);
8220               }
8221 #ifdef BFD64
8222             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
8223             if ((i.types[op].bitfield.imm32)
8224                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
8225               {
8226                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
8227                                                 ^ ((offsetT) 1 << 31))
8228                                                - ((offsetT) 1 << 31));
8229               }
8230 #endif
8231             i.types[op]
8232               = operand_type_or (i.types[op],
8233                                  smallest_imm_type (i.op[op].imms->X_add_number));
8234
8235             /* We must avoid matching of Imm32 templates when 64bit
8236                only immediate is available.  */
8237             if (guess_suffix == QWORD_MNEM_SUFFIX)
8238               i.types[op].bitfield.imm32 = 0;
8239             break;
8240
8241           case O_absent:
8242           case O_register:
8243             abort ();
8244
8245             /* Symbols and expressions.  */
8246           default:
8247             /* Convert symbolic operand to proper sizes for matching, but don't
8248                prevent matching a set of insns that only supports sizes other
8249                than those matching the insn suffix.  */
8250             {
8251               i386_operand_type mask, allowed;
8252               const insn_template *t = current_templates.start;
8253
8254               operand_type_set (&mask, 0);
8255               switch (guess_suffix)
8256                 {
8257                 case QWORD_MNEM_SUFFIX:
8258                   mask.bitfield.imm64 = 1;
8259                   mask.bitfield.imm32s = 1;
8260                   break;
8261                 case LONG_MNEM_SUFFIX:
8262                   mask.bitfield.imm32 = 1;
8263                   break;
8264                 case WORD_MNEM_SUFFIX:
8265                   mask.bitfield.imm16 = 1;
8266                   break;
8267                 case BYTE_MNEM_SUFFIX:
8268                   mask.bitfield.imm8 = 1;
8269                   break;
8270                 default:
8271                   break;
8272                 }
8273
8274               allowed = operand_type_and (t->operand_types[op], mask);
8275               while (++t < current_templates.end)
8276                 {
8277                   allowed = operand_type_or (allowed, t->operand_types[op]);
8278                   allowed = operand_type_and (allowed, mask);
8279                 }
8280
8281               if (!operand_type_all_zero (&allowed))
8282                 i.types[op] = operand_type_and (i.types[op], mask);
8283             }
8284             break;
8285           }
8286       }
8287 }
8288
8289 /* Try to use the smallest displacement type too.  */
8290 static bool
8291 optimize_disp (const insn_template *t)
8292 {
8293   unsigned int op;
8294
8295   if (!want_disp32 (t)
8296       && (!t->opcode_modifier.jump
8297           || i.jumpabsolute || i.types[0].bitfield.baseindex))
8298     {
8299       for (op = 0; op < i.operands; ++op)
8300         {
8301           const expressionS *exp = i.op[op].disps;
8302
8303           if (!operand_type_check (i.types[op], disp))
8304             continue;
8305
8306           if (exp->X_op != O_constant)
8307             continue;
8308
8309           /* Since displacement is signed extended to 64bit, don't allow
8310              disp32 if it is out of range.  */
8311           if (fits_in_signed_long (exp->X_add_number))
8312             continue;
8313
8314           i.types[op].bitfield.disp32 = 0;
8315           if (i.types[op].bitfield.baseindex)
8316             {
8317               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
8318                       (uint64_t) exp->X_add_number);
8319               return false;
8320             }
8321         }
8322     }
8323
8324   /* Don't optimize displacement for movabs / jmpabs since they only take
8325      64-bit displacement.  */
8326   if (pp.disp_encoding > disp_encoding_8bit
8327       || t->mnem_off == MN_movabs || t->mnem_off == MN_jmpabs)
8328     return true;
8329
8330   for (op = i.operands; op-- > 0;)
8331     if (operand_type_check (i.types[op], disp))
8332       {
8333         if (i.op[op].disps->X_op == O_constant)
8334           {
8335             offsetT op_disp = i.op[op].disps->X_add_number;
8336
8337             if (!op_disp && i.types[op].bitfield.baseindex)
8338               {
8339                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
8340                 i.op[op].disps = NULL;
8341                 i.disp_operands--;
8342                 continue;
8343               }
8344
8345             if (i.types[op].bitfield.disp16
8346                 && fits_in_unsigned_word (op_disp))
8347               {
8348                 /* If this operand is at most 16 bits, convert
8349                    to a signed 16 bit number and don't use 64bit
8350                    displacement.  */
8351                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
8352                 i.types[op].bitfield.disp64 = 0;
8353               }
8354
8355 #ifdef BFD64
8356             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
8357             if ((flag_code != CODE_64BIT
8358                  ? i.types[op].bitfield.disp32
8359                  : want_disp32 (t)
8360                    && (!t->opcode_modifier.jump
8361                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
8362                 && fits_in_unsigned_long (op_disp))
8363               {
8364                 /* If this operand is at most 32 bits, convert
8365                    to a signed 32 bit number and don't use 64bit
8366                    displacement.  */
8367                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
8368                 i.types[op].bitfield.disp64 = 0;
8369                 i.types[op].bitfield.disp32 = 1;
8370               }
8371
8372             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
8373               {
8374                 i.types[op].bitfield.disp64 = 0;
8375                 i.types[op].bitfield.disp32 = 1;
8376               }
8377 #endif
8378             if ((i.types[op].bitfield.disp32
8379                  || i.types[op].bitfield.disp16)
8380                 && fits_in_disp8 (op_disp))
8381               i.types[op].bitfield.disp8 = 1;
8382
8383             i.op[op].disps->X_add_number = op_disp;
8384           }
8385         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8386                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
8387           {
8388             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
8389                          i.op[op].disps, 0, i.reloc[op]);
8390             i.types[op] = operand_type_and_not (i.types[op], anydisp);
8391           }
8392         else
8393           /* We only support 64bit displacement on constants.  */
8394           i.types[op].bitfield.disp64 = 0;
8395       }
8396
8397   return true;
8398 }
8399
8400 /* Return 1 if there is a match in broadcast bytes between operand
8401    GIVEN and instruction template T.   */
8402
8403 static INLINE int
8404 match_broadcast_size (const insn_template *t, unsigned int given)
8405 {
8406   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
8407            && i.types[given].bitfield.byte)
8408           || (t->opcode_modifier.broadcast == WORD_BROADCAST
8409               && i.types[given].bitfield.word)
8410           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
8411               && i.types[given].bitfield.dword)
8412           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
8413               && i.types[given].bitfield.qword));
8414 }
8415
8416 /* Check if operands are valid for the instruction.  */
8417
8418 static int
8419 check_VecOperands (const insn_template *t)
8420 {
8421   unsigned int op;
8422   i386_cpu_flags cpu;
8423
8424   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
8425      any one operand are implicity requiring AVX512VL support if the actual
8426      operand size is YMMword or XMMword.  Since this function runs after
8427      template matching, there's no need to check for YMMword/XMMword in
8428      the template.  */
8429   cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
8430   if (!cpu_flags_all_zero (&cpu)
8431       && !is_cpu (t, CpuAVX512VL)
8432       && !cpu_arch_flags.bitfield.cpuavx512vl
8433       && (!t->opcode_modifier.vex || need_evex_encoding (t)))
8434     {
8435       for (op = 0; op < t->operands; ++op)
8436         {
8437           if (t->operand_types[op].bitfield.zmmword
8438               && (i.types[op].bitfield.ymmword
8439                   || i.types[op].bitfield.xmmword))
8440             {
8441               i.error = operand_size_mismatch;
8442               return 1;
8443             }
8444         }
8445     }
8446
8447   /* Somewhat similarly, templates specifying both AVX and AVX2 are
8448      requiring AVX2 support if the actual operand size is YMMword.  */
8449   if (maybe_cpu (t, CpuAVX) && maybe_cpu (t, CpuAVX2)
8450       && !cpu_arch_flags.bitfield.cpuavx2)
8451     {
8452       for (op = 0; op < t->operands; ++op)
8453         {
8454           if (t->operand_types[op].bitfield.xmmword
8455               && i.types[op].bitfield.ymmword)
8456             {
8457               i.error = operand_size_mismatch;
8458               return 1;
8459             }
8460         }
8461     }
8462
8463   /* Without VSIB byte, we can't have a vector register for index.  */
8464   if (!t->opcode_modifier.sib
8465       && i.index_reg
8466       && (i.index_reg->reg_type.bitfield.xmmword
8467           || i.index_reg->reg_type.bitfield.ymmword
8468           || i.index_reg->reg_type.bitfield.zmmword))
8469     {
8470       i.error = unsupported_vector_index_register;
8471       return 1;
8472     }
8473
8474   /* Check if default mask is allowed.  */
8475   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
8476       && (!i.mask.reg || i.mask.reg->reg_num == 0))
8477     {
8478       i.error = no_default_mask;
8479       return 1;
8480     }
8481
8482   /* For VSIB byte, we need a vector register for index, and all vector
8483      registers must be distinct.  */
8484   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
8485     {
8486       if (!i.index_reg
8487           || !((t->opcode_modifier.sib == VECSIB128
8488                 && i.index_reg->reg_type.bitfield.xmmword)
8489                || (t->opcode_modifier.sib == VECSIB256
8490                    && i.index_reg->reg_type.bitfield.ymmword)
8491                || (t->opcode_modifier.sib == VECSIB512
8492                    && i.index_reg->reg_type.bitfield.zmmword)))
8493       {
8494         i.error = invalid_vsib_address;
8495         return 1;
8496       }
8497
8498       gas_assert (i.reg_operands == 2 || i.mask.reg);
8499       if (i.reg_operands == 2 && !i.mask.reg)
8500         {
8501           gas_assert (i.types[0].bitfield.class == RegSIMD);
8502           gas_assert (i.types[0].bitfield.xmmword
8503                       || i.types[0].bitfield.ymmword);
8504           gas_assert (i.types[2].bitfield.class == RegSIMD);
8505           gas_assert (i.types[2].bitfield.xmmword
8506                       || i.types[2].bitfield.ymmword);
8507           if (operand_check == check_none)
8508             return 0;
8509           if (register_number (i.op[0].regs)
8510               != register_number (i.index_reg)
8511               && register_number (i.op[2].regs)
8512                  != register_number (i.index_reg)
8513               && register_number (i.op[0].regs)
8514                  != register_number (i.op[2].regs))
8515             return 0;
8516           if (operand_check == check_error)
8517             {
8518               i.error = invalid_vector_register_set;
8519               return 1;
8520             }
8521           as_warn (_("mask, index, and destination registers should be distinct"));
8522         }
8523       else if (i.reg_operands == 1 && i.mask.reg)
8524         {
8525           if (i.types[1].bitfield.class == RegSIMD
8526               && (i.types[1].bitfield.xmmword
8527                   || i.types[1].bitfield.ymmword
8528                   || i.types[1].bitfield.zmmword)
8529               && (register_number (i.op[1].regs)
8530                   == register_number (i.index_reg)))
8531             {
8532               if (operand_check == check_error)
8533                 {
8534                   i.error = invalid_vector_register_set;
8535                   return 1;
8536                 }
8537               if (operand_check != check_none)
8538                 as_warn (_("index and destination registers should be distinct"));
8539             }
8540         }
8541     }
8542
8543   /* For AMX instructions with 3 TMM register operands, all operands
8544       must be distinct.  */
8545   if (i.reg_operands == 3
8546       && t->operand_types[0].bitfield.tmmword
8547       && (i.op[0].regs == i.op[1].regs
8548           || i.op[0].regs == i.op[2].regs
8549           || i.op[1].regs == i.op[2].regs))
8550     {
8551       i.error = invalid_tmm_register_set;
8552       return 1;
8553     }
8554
8555   /* For some special instructions require that destination must be distinct
8556      from source registers.  */
8557   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
8558     {
8559       unsigned int dest_reg = i.operands - 1;
8560
8561       know (i.operands >= 3);
8562
8563       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
8564       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
8565           || (i.reg_operands > 2
8566               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
8567         {
8568           i.error = invalid_dest_and_src_register_set;
8569           return 1;
8570         }
8571     }
8572
8573   /* Check if broadcast is supported by the instruction and is applied
8574      to the memory operand.  */
8575   if (i.broadcast.type || i.broadcast.bytes)
8576     {
8577       i386_operand_type type, overlap;
8578
8579       /* Check if specified broadcast is supported in this instruction,
8580          and its broadcast bytes match the memory operand.  */
8581       op = i.broadcast.operand;
8582       if (!t->opcode_modifier.broadcast
8583           || !(i.flags[op] & Operand_Mem)
8584           || (!i.types[op].bitfield.unspecified
8585               && !match_broadcast_size (t, op)))
8586         {
8587         bad_broadcast:
8588           i.error = unsupported_broadcast;
8589           return 1;
8590         }
8591
8592       operand_type_set (&type, 0);
8593       switch (get_broadcast_bytes (t, false))
8594         {
8595         case 2:
8596           type.bitfield.word = 1;
8597           break;
8598         case 4:
8599           type.bitfield.dword = 1;
8600           break;
8601         case 8:
8602           type.bitfield.qword = 1;
8603           break;
8604         case 16:
8605           type.bitfield.xmmword = 1;
8606           break;
8607         case 32:
8608           if (vector_size < VSZ256)
8609             goto bad_broadcast;
8610           type.bitfield.ymmword = 1;
8611           break;
8612         case 64:
8613           if (vector_size < VSZ512)
8614             goto bad_broadcast;
8615           type.bitfield.zmmword = 1;
8616           break;
8617         default:
8618           goto bad_broadcast;
8619         }
8620
8621       overlap = operand_type_and (type, t->operand_types[op]);
8622       if (t->operand_types[op].bitfield.class == RegSIMD
8623           && t->operand_types[op].bitfield.byte
8624              + t->operand_types[op].bitfield.word
8625              + t->operand_types[op].bitfield.dword
8626              + t->operand_types[op].bitfield.qword > 1)
8627         {
8628           overlap.bitfield.xmmword = 0;
8629           overlap.bitfield.ymmword = 0;
8630           overlap.bitfield.zmmword = 0;
8631         }
8632       if (operand_type_all_zero (&overlap))
8633           goto bad_broadcast;
8634
8635       if (t->opcode_modifier.checkoperandsize)
8636         {
8637           unsigned int j;
8638
8639           type.bitfield.baseindex = 1;
8640           for (j = 0; j < i.operands; ++j)
8641             {
8642               if (j != op
8643                   && !operand_type_register_match(i.types[j],
8644                                                   t->operand_types[j],
8645                                                   type,
8646                                                   t->operand_types[op]))
8647                 goto bad_broadcast;
8648             }
8649         }
8650     }
8651   /* If broadcast is supported in this instruction, we need to check if
8652      operand of one-element size isn't specified without broadcast.  */
8653   else if (t->opcode_modifier.broadcast && i.mem_operands)
8654     {
8655       /* Find memory operand.  */
8656       for (op = 0; op < i.operands; op++)
8657         if (i.flags[op] & Operand_Mem)
8658           break;
8659       gas_assert (op < i.operands);
8660       /* Check size of the memory operand.  */
8661       if (match_broadcast_size (t, op))
8662         {
8663           i.error = broadcast_needed;
8664           return 1;
8665         }
8666     }
8667   else
8668     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
8669
8670   /* Check if requested masking is supported.  */
8671   if (i.mask.reg)
8672     {
8673       if (!t->opcode_modifier.masking)
8674         {
8675           i.error = unsupported_masking;
8676           return 1;
8677         }
8678
8679       /* Common rules for masking:
8680          - mask register destinations permit only zeroing-masking, without
8681            that actually being expressed by a {z} operand suffix or EVEX.z,
8682          - memory destinations allow only merging-masking,
8683          - scatter/gather insns (i.e. ones using vSIB) only allow merging-
8684            masking.  */
8685       if (i.mask.zeroing
8686           && (t->operand_types[t->operands - 1].bitfield.class == RegMask
8687               || (i.flags[t->operands - 1] & Operand_Mem)
8688               || t->opcode_modifier.sib))
8689         {
8690           i.error = unsupported_masking;
8691           return 1;
8692         }
8693     }
8694
8695   /* Check if masking is applied to dest operand.  */
8696   if (i.mask.reg && (i.mask.operand != i.operands - 1))
8697     {
8698       i.error = mask_not_on_destination;
8699       return 1;
8700     }
8701
8702   /* Check RC/SAE.  */
8703   if (i.rounding.type != rc_none)
8704     {
8705       if (!t->opcode_modifier.sae
8706           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
8707           || i.mem_operands)
8708         {
8709           i.error = unsupported_rc_sae;
8710           return 1;
8711         }
8712
8713       /* Non-EVEX.{LIG,512,256} forms need to have a ZMM or YMM register as at
8714          least one operand.  For YMM register or EVEX256, we will need AVX10.2
8715          enabled.  There's no need to check all operands, though: Either of the
8716          last two operands will be of the right size in all relevant templates.  */
8717       if (t->opcode_modifier.evex != EVEXLIG
8718           && t->opcode_modifier.evex != EVEX512
8719           && (t->opcode_modifier.evex != EVEX256
8720               || !cpu_arch_flags.bitfield.cpuavx10_2)
8721           && !i.types[t->operands - 1].bitfield.zmmword
8722           && !i.types[t->operands - 2].bitfield.zmmword
8723           && ((!i.types[t->operands - 1].bitfield.ymmword
8724                && !i.types[t->operands - 2].bitfield.ymmword)
8725               || !cpu_arch_flags.bitfield.cpuavx10_2))
8726         {
8727           i.error = operand_size_mismatch;
8728           return 1;
8729         }
8730     }
8731
8732   /* Check the special Imm4 cases; must be the first operand.  */
8733   if ((is_cpu (t, CpuXOP) && t->operands == 5)
8734       || (t->opcode_space == SPACE_0F3A
8735           && (t->base_opcode | 3) == 0x0b
8736           && (is_cpu (t, CpuAPX_F)
8737            || (t->opcode_modifier.sse2avx && t->opcode_modifier.evex
8738                && (!t->opcode_modifier.vex
8739                    || (pp.encoding != encoding_default
8740                        && pp.encoding != encoding_vex
8741                        && pp.encoding != encoding_vex3))))))
8742     {
8743       if (i.op[0].imms->X_op != O_constant
8744           || !fits_in_imm4 (i.op[0].imms->X_add_number))
8745         {
8746           i.error = bad_imm4;
8747           return 1;
8748         }
8749
8750       /* Turn off Imm<N> so that update_imm won't complain.  */
8751       if (t->operands == 5)
8752         operand_type_set (&i.types[0], 0);
8753     }
8754
8755   /* Check vector Disp8 operand.  */
8756   if (t->opcode_modifier.disp8memshift
8757       && (!t->opcode_modifier.vex
8758           || need_evex_encoding (t))
8759       && pp.disp_encoding <= disp_encoding_8bit)
8760     {
8761       if (i.broadcast.type || i.broadcast.bytes)
8762         i.memshift = t->opcode_modifier.broadcast - 1;
8763       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
8764         i.memshift = t->opcode_modifier.disp8memshift;
8765       else
8766         {
8767           const i386_operand_type *type = NULL, *fallback = NULL;
8768
8769           i.memshift = 0;
8770           for (op = 0; op < i.operands; op++)
8771             if (i.flags[op] & Operand_Mem)
8772               {
8773                 if (t->opcode_modifier.evex == EVEXLIG)
8774                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
8775                 else if (t->operand_types[op].bitfield.xmmword
8776                          + t->operand_types[op].bitfield.ymmword
8777                          + t->operand_types[op].bitfield.zmmword <= 1)
8778                   type = &t->operand_types[op];
8779                 else if (!i.types[op].bitfield.unspecified)
8780                   type = &i.types[op];
8781                 else /* Ambiguities get resolved elsewhere.  */
8782                   fallback = &t->operand_types[op];
8783               }
8784             else if (i.types[op].bitfield.class == RegSIMD
8785                      && t->opcode_modifier.evex != EVEXLIG)
8786               {
8787                 if (i.types[op].bitfield.zmmword)
8788                   i.memshift = 6;
8789                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
8790                   i.memshift = 5;
8791                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
8792                   i.memshift = 4;
8793               }
8794
8795           if (!type && !i.memshift)
8796             type = fallback;
8797           if (type)
8798             {
8799               if (type->bitfield.zmmword)
8800                 i.memshift = 6;
8801               else if (type->bitfield.ymmword)
8802                 i.memshift = 5;
8803               else if (type->bitfield.xmmword)
8804                 i.memshift = 4;
8805             }
8806
8807           /* For the check in fits_in_disp8().  */
8808           if (i.memshift == 0)
8809             i.memshift = -1;
8810         }
8811
8812       for (op = 0; op < i.operands; op++)
8813         if (operand_type_check (i.types[op], disp)
8814             && i.op[op].disps->X_op == O_constant)
8815           {
8816             /* Make sure to leave i.types[op].bitfield.disp8 alone upon
8817                secondary invocations of match_template().  */
8818             if (fits_in_disp8 (i.op[op].disps->X_add_number))
8819               {
8820                 if (!i.tm.mnem_off)
8821                   i.types[op].bitfield.disp8 = 1;
8822                 return 0;
8823               }
8824             if (!i.tm.mnem_off)
8825               i.types[op].bitfield.disp8 = 0;
8826           }
8827     }
8828
8829   i.memshift = 0;
8830
8831   return 0;
8832 }
8833
8834 /* Check if encoding requirements are met by the instruction.  */
8835
8836 static int
8837 VEX_check_encoding (const insn_template *t)
8838 {
8839   if (pp.encoding == encoding_error)
8840     {
8841       i.error = unsupported;
8842       return 1;
8843     }
8844
8845   /* Vector size restrictions.  */
8846   if ((vector_size < VSZ512
8847        && t->opcode_modifier.evex == EVEX512)
8848       || (vector_size < VSZ256
8849           && (t->opcode_modifier.evex == EVEX256
8850               || t->opcode_modifier.vex == VEX256)))
8851     {
8852       i.error = unsupported_vector_size;
8853       return 1;
8854     }
8855
8856   switch (pp.encoding)
8857     {
8858     case encoding_vex:
8859     case encoding_vex3:
8860       /* This instruction must be encoded with VEX prefix.  */
8861       if (!t->opcode_modifier.vex)
8862         {
8863           i.error = no_vex_encoding;
8864           return 1;
8865         }
8866       break;
8867
8868     case encoding_default:
8869       if (!pp.has_nf)
8870         break;
8871       /* Fall through.  */
8872     case encoding_evex:
8873     case encoding_evex512:
8874       /* This instruction must be encoded with EVEX prefix.  */
8875       if (!t->opcode_modifier.evex)
8876         {
8877           i.error = no_evex_encoding;
8878           return 1;
8879         }
8880       break;
8881
8882     case encoding_egpr:
8883       /* This instruction must be encoded with REX2 or EVEX prefix.  */
8884       if (t->opcode_modifier.vex && !t->opcode_modifier.evex)
8885         {
8886           i.error = no_evex_encoding;
8887           return 1;
8888         }
8889       break;
8890
8891     default:
8892       abort ();
8893     }
8894
8895   return 0;
8896 }
8897
8898 /* Check if Egprs operands are valid for the instruction.  */
8899
8900 static bool
8901 check_EgprOperands (const insn_template *t)
8902 {
8903   if (!t->opcode_modifier.noegpr)
8904     return false;
8905
8906   for (unsigned int op = 0; op < i.operands; op++)
8907     {
8908       if (i.types[op].bitfield.class != Reg)
8909         continue;
8910
8911       if (i.op[op].regs->reg_flags & RegRex2)
8912         {
8913           i.error = register_type_mismatch;
8914           return true;
8915         }
8916     }
8917
8918   if ((i.index_reg && (i.index_reg->reg_flags & RegRex2))
8919       || (i.base_reg && (i.base_reg->reg_flags & RegRex2)))
8920     {
8921       i.error = unsupported_EGPR_for_addressing;
8922       return true;
8923     }
8924
8925   /* Check if pseudo prefix {rex2} is valid.  */
8926   if (pp.rex2_encoding && !t->opcode_modifier.sse2avx)
8927     {
8928       i.error = invalid_pseudo_prefix;
8929       return true;
8930     }
8931
8932   return false;
8933 }
8934
8935 /* Check if APX operands are valid for the instruction.  */
8936 static bool
8937 check_APX_operands (const insn_template *t)
8938 {
8939   /* Push2* and Pop2* cannot use RSP and Pop2* cannot pop two same registers.
8940    */
8941   switch (t->mnem_off)
8942     {
8943     case MN_pop2:
8944     case MN_pop2p:
8945       if (register_number (i.op[0].regs) == register_number (i.op[1].regs))
8946         {
8947           i.error = invalid_dest_register_set;
8948           return 1;
8949         }
8950     /* fall through */
8951     case MN_push2:
8952     case MN_push2p:
8953       if (register_number (i.op[0].regs) == 4
8954           || register_number (i.op[1].regs) == 4)
8955         {
8956           i.error = unsupported_rsp_register;
8957           return 1;
8958         }
8959       break;
8960     }
8961   return 0;
8962 }
8963
8964 /* Check if the instruction use the REX registers or REX prefix.  */
8965 static bool
8966 check_Rex_required (void)
8967 {
8968   for (unsigned int op = 0; op < i.operands; op++)
8969     {
8970       if (i.types[op].bitfield.class != Reg)
8971         continue;
8972
8973       if (i.op[op].regs->reg_flags & (RegRex | RegRex64))
8974         return true;
8975     }
8976
8977   if ((i.index_reg && (i.index_reg->reg_flags & RegRex))
8978       || (i.base_reg && (i.base_reg->reg_flags & RegRex)))
8979     return true;
8980
8981   /* Check pseudo prefix {rex} are valid.  */
8982   return pp.rex_encoding;
8983 }
8984
8985 /* Optimize APX NDD insns to legacy insns.  */
8986 static unsigned int
8987 can_convert_NDD_to_legacy (const insn_template *t)
8988 {
8989   unsigned int match_dest_op = ~0;
8990
8991   if (!pp.has_nf && i.reg_operands >= 2)
8992     {
8993       unsigned int dest = i.operands - 1;
8994       unsigned int src1 = i.operands - 2;
8995       unsigned int src2 = (i.operands > 3) ? i.operands - 3 : 0;
8996
8997       if (i.types[src1].bitfield.class == Reg
8998           && i.op[src1].regs == i.op[dest].regs)
8999         match_dest_op = src1;
9000       /* If the first operand is the same as the third operand,
9001          these instructions need to support the ability to commutative
9002          the first two operands and still not change the semantics in order
9003          to be optimized.  */
9004       else if (optimize > 1
9005                && t->opcode_modifier.commutative
9006                && i.types[src2].bitfield.class == Reg
9007                && i.op[src2].regs == i.op[dest].regs)
9008         match_dest_op = src2;
9009     }
9010   return match_dest_op;
9011 }
9012
9013 /* Helper function for the progress() macro in match_template().  */
9014 static INLINE enum i386_error progress (enum i386_error new,
9015                                         enum i386_error last,
9016                                         unsigned int line, unsigned int *line_p)
9017 {
9018   if (line <= *line_p)
9019     return last;
9020   *line_p = line;
9021   return new;
9022 }
9023
9024 static const insn_template *
9025 match_template (char mnem_suffix)
9026 {
9027   /* Points to template once we've found it.  */
9028   const insn_template *t;
9029   i386_operand_type overlap0, overlap1, overlap2, overlap3;
9030   i386_operand_type overlap4;
9031   unsigned int found_reverse_match;
9032   i386_operand_type operand_types [MAX_OPERANDS];
9033   int addr_prefix_disp;
9034   unsigned int j, size_match, check_register, errline = __LINE__;
9035   enum i386_error specific_error = number_of_operands_mismatch;
9036 #define progress(err) progress (err, specific_error, __LINE__, &errline)
9037
9038 #if MAX_OPERANDS != 5
9039 # error "MAX_OPERANDS must be 5."
9040 #endif
9041
9042   found_reverse_match = 0;
9043   addr_prefix_disp = -1;
9044
9045   for (t = current_templates.start; t < current_templates.end; t++)
9046     {
9047       addr_prefix_disp = -1;
9048       found_reverse_match = 0;
9049
9050       /* Must have right number of operands.  */
9051       if (i.operands != t->operands)
9052         continue;
9053
9054       /* Skip SSE2AVX templates when inapplicable.  */
9055       if (t->opcode_modifier.sse2avx
9056           && (!sse2avx || i.prefix[DATA_PREFIX]))
9057         {
9058           /* Another non-SSE2AVX template has to follow.  */
9059           gas_assert (t + 1 < current_templates.end);
9060           continue;
9061         }
9062
9063       /* Check processor support.  */
9064       specific_error = progress (unsupported);
9065       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
9066         continue;
9067
9068       /* Check AT&T mnemonic.   */
9069       specific_error = progress (unsupported_with_intel_mnemonic);
9070       if (!intel_syntax && intel_mnemonic
9071           && t->opcode_modifier.dialect == ATT_MNEMONIC)
9072         continue;
9073
9074       /* Check AT&T/Intel syntax.  */
9075       specific_error = progress (unsupported_syntax);
9076       if (intel_syntax
9077            ? t->opcode_modifier.dialect >= ATT_SYNTAX
9078            : t->opcode_modifier.dialect == INTEL_SYNTAX)
9079         continue;
9080
9081       /* Check NF support.  */
9082       specific_error = progress (unsupported_nf);
9083       if (pp.has_nf && !t->opcode_modifier.nf)
9084         continue;
9085
9086       /* Check Intel64/AMD64 ISA.   */
9087       switch (isa64)
9088         {
9089         default:
9090           /* Default: Don't accept Intel64.  */
9091           if (t->opcode_modifier.isa64 == INTEL64)
9092             continue;
9093           break;
9094         case amd64:
9095           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
9096           if (t->opcode_modifier.isa64 >= INTEL64)
9097             continue;
9098           break;
9099         case intel64:
9100           /* -mintel64: Don't accept AMD64.  */
9101           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
9102             continue;
9103           break;
9104         }
9105
9106       /* Check the suffix.  */
9107       specific_error = progress (invalid_instruction_suffix);
9108       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
9109           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
9110           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
9111           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
9112           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
9113         continue;
9114
9115       specific_error = progress (operand_size_mismatch);
9116       size_match = operand_size_match (t);
9117       if (!size_match)
9118         continue;
9119
9120       /* This is intentionally not
9121
9122          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
9123
9124          as the case of a missing * on the operand is accepted (perhaps with
9125          a warning, issued further down).  */
9126       specific_error = progress (operand_type_mismatch);
9127       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
9128         continue;
9129
9130       /* In Intel syntax, normally we can check for memory operand size when
9131          there is no mnemonic suffix.  But jmp and call have 2 different
9132          encodings with Dword memory operand size.  Skip the "near" one
9133          (permitting a register operand) when "far" was requested.  */
9134       if (i.far_branch
9135           && t->opcode_modifier.jump == JUMP_ABSOLUTE
9136           && t->operand_types[0].bitfield.class == Reg)
9137         continue;
9138
9139       for (j = 0; j < MAX_OPERANDS; j++)
9140         operand_types[j] = t->operand_types[j];
9141
9142       /* In general, don't allow 32-bit operands on pre-386.  */
9143       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
9144                                              : operand_size_mismatch);
9145       j = i.imm_operands + (t->operands > i.imm_operands + 1);
9146       if (i.suffix == LONG_MNEM_SUFFIX
9147           && !cpu_arch_flags.bitfield.cpui386
9148           && (intel_syntax
9149               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
9150                  && !intel_float_operand (insn_name (t)))
9151               : intel_float_operand (insn_name (t)) != 2)
9152           && (t->operands == i.imm_operands
9153               || (operand_types[i.imm_operands].bitfield.class != RegMMX
9154                && operand_types[i.imm_operands].bitfield.class != RegSIMD
9155                && operand_types[i.imm_operands].bitfield.class != RegMask)
9156               || (operand_types[j].bitfield.class != RegMMX
9157                   && operand_types[j].bitfield.class != RegSIMD
9158                   && operand_types[j].bitfield.class != RegMask))
9159           && !t->opcode_modifier.sib)
9160         continue;
9161
9162       /* Do not verify operands when there are none.  */
9163       if (!t->operands)
9164         {
9165           if (VEX_check_encoding (t))
9166             {
9167               specific_error = progress (i.error);
9168               continue;
9169             }
9170
9171           /* Check if pseudo prefix {rex2} is valid.  */
9172           if (t->opcode_modifier.noegpr && pp.rex2_encoding)
9173             {
9174               specific_error = progress (invalid_pseudo_prefix);
9175               continue;
9176             }
9177
9178           /* We've found a match; break out of loop.  */
9179           break;
9180         }
9181
9182       if (!t->opcode_modifier.jump
9183           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
9184         {
9185           /* There should be only one Disp operand.  */
9186           for (j = 0; j < MAX_OPERANDS; j++)
9187             if (operand_type_check (operand_types[j], disp))
9188               break;
9189           if (j < MAX_OPERANDS)
9190             {
9191               bool override = (i.prefix[ADDR_PREFIX] != 0);
9192
9193               addr_prefix_disp = j;
9194
9195               /* Address size prefix will turn Disp64 operand into Disp32 and
9196                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
9197               switch (flag_code)
9198                 {
9199                 case CODE_16BIT:
9200                   override = !override;
9201                   /* Fall through.  */
9202                 case CODE_32BIT:
9203                   if (operand_types[j].bitfield.disp32
9204                       && operand_types[j].bitfield.disp16)
9205                     {
9206                       operand_types[j].bitfield.disp16 = override;
9207                       operand_types[j].bitfield.disp32 = !override;
9208                     }
9209                   gas_assert (!operand_types[j].bitfield.disp64);
9210                   break;
9211
9212                 case CODE_64BIT:
9213                   if (operand_types[j].bitfield.disp64)
9214                     {
9215                       gas_assert (!operand_types[j].bitfield.disp32);
9216                       operand_types[j].bitfield.disp32 = override;
9217                       operand_types[j].bitfield.disp64 = !override;
9218                     }
9219                   operand_types[j].bitfield.disp16 = 0;
9220                   break;
9221                 }
9222             }
9223         }
9224
9225       /* We check register size if needed.  */
9226       if (t->opcode_modifier.checkoperandsize)
9227         {
9228           check_register = (1 << t->operands) - 1;
9229           if (i.broadcast.type || i.broadcast.bytes)
9230             check_register &= ~(1 << i.broadcast.operand);
9231         }
9232       else
9233         check_register = 0;
9234
9235       overlap0 = operand_type_and (i.types[0], operand_types[0]);
9236       switch (t->operands)
9237         {
9238         case 1:
9239           if (!operand_type_match (overlap0, i.types[0]))
9240             continue;
9241
9242           /* Allow the ModR/M encoding to be requested by using the {load} or
9243              {store} pseudo prefix on an applicable insn.  */
9244           if (!t->opcode_modifier.modrm
9245               && i.reg_operands == 1
9246               && ((pp.dir_encoding == dir_encoding_load
9247                    && t->mnem_off != MN_pop)
9248                   || (pp.dir_encoding == dir_encoding_store
9249                       && t->mnem_off != MN_push))
9250               /* Avoid BSWAP.  */
9251               && t->mnem_off != MN_bswap)
9252             continue;
9253           break;
9254
9255         case 2:
9256           /* xchg %eax, %eax is a special case. It is an alias for nop
9257              only in 32bit mode and we can use opcode 0x90.  In 64bit
9258              mode, we can't use 0x90 for xchg %eax, %eax since it should
9259              zero-extend %eax to %rax.  */
9260           if (t->base_opcode == 0x90
9261               && t->opcode_space == SPACE_BASE)
9262             {
9263               if (flag_code == CODE_64BIT
9264                   && i.types[0].bitfield.instance == Accum
9265                   && i.types[0].bitfield.dword
9266                   && i.types[1].bitfield.instance == Accum)
9267                 continue;
9268
9269               /* Allow the ModR/M encoding to be requested by using the
9270                  {load} or {store} pseudo prefix.  */
9271               if (pp.dir_encoding == dir_encoding_load
9272                   || pp.dir_encoding == dir_encoding_store)
9273                 continue;
9274             }
9275
9276           if (t->base_opcode == MOV_AX_DISP32
9277               && t->opcode_space == SPACE_BASE
9278               && t->mnem_off != MN_movabs)
9279             {
9280               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
9281               if (i.reloc[0] == BFD_RELOC_386_GOT32)
9282                 continue;
9283
9284               /* xrelease mov %eax, <disp> is another special case. It must not
9285                  match the accumulator-only encoding of mov.  */
9286               if (i.hle_prefix)
9287                 continue;
9288
9289               /* Allow the ModR/M encoding to be requested by using a suitable
9290                  {load} or {store} pseudo prefix.  */
9291               if (pp.dir_encoding == (i.types[0].bitfield.instance == Accum
9292                                      ? dir_encoding_store
9293                                      : dir_encoding_load)
9294                   && !i.types[0].bitfield.disp64
9295                   && !i.types[1].bitfield.disp64)
9296                 continue;
9297             }
9298
9299           /* Allow the ModR/M encoding to be requested by using the {load} or
9300              {store} pseudo prefix on an applicable insn.  */
9301           if (!t->opcode_modifier.modrm
9302               && i.reg_operands == 1
9303               && i.imm_operands == 1
9304               && (pp.dir_encoding == dir_encoding_load
9305                   || pp.dir_encoding == dir_encoding_store)
9306               && t->opcode_space == SPACE_BASE)
9307             {
9308               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
9309                   && pp.dir_encoding == dir_encoding_store)
9310                 continue;
9311
9312               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
9313                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
9314                       || pp.dir_encoding == dir_encoding_load))
9315                 continue;
9316
9317               if (t->base_opcode == 0xa8 /* test $imm, %acc */
9318                   && pp.dir_encoding == dir_encoding_load)
9319                 continue;
9320             }
9321           /* Fall through.  */
9322
9323         case 3:
9324           if (!(size_match & MATCH_STRAIGHT))
9325             goto check_reverse;
9326           /* Reverse direction of operands if swapping is possible in the first
9327              place (operands need to be symmetric) and
9328              - the load form is requested, and the template is a store form,
9329              - the store form is requested, and the template is a load form,
9330              - the non-default (swapped) form is requested.  */
9331           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
9332
9333           j = i.operands - 1 - (t->opcode_space == SPACE_MAP4
9334                                 && t->opcode_modifier.vexvvvv);
9335
9336           if (t->opcode_modifier.d && i.reg_operands == i.operands
9337               && !operand_type_all_zero (&overlap1))
9338             switch (pp.dir_encoding)
9339               {
9340               case dir_encoding_load:
9341                 if (operand_type_check (operand_types[j], anymem)
9342                     || t->opcode_modifier.regmem)
9343                   goto check_reverse;
9344                 break;
9345
9346               case dir_encoding_store:
9347                 if (!operand_type_check (operand_types[j], anymem)
9348                     && !t->opcode_modifier.regmem)
9349                   goto check_reverse;
9350                 break;
9351
9352               case dir_encoding_swap:
9353                 goto check_reverse;
9354
9355               case dir_encoding_default:
9356                 break;
9357               }
9358
9359           /* If we want store form, we skip the current load.  */
9360           if ((pp.dir_encoding == dir_encoding_store
9361                || pp.dir_encoding == dir_encoding_swap)
9362               && i.mem_operands == 0
9363               && t->opcode_modifier.load)
9364             continue;
9365           /* Fall through.  */
9366         case 4:
9367         case 5:
9368           overlap1 = operand_type_and (i.types[1], operand_types[1]);
9369           if (!operand_type_match (overlap0, i.types[0])
9370               || !operand_type_match (overlap1, i.types[1])
9371               || ((check_register & 3) == 3
9372                   && !operand_type_register_match (i.types[0],
9373                                                    operand_types[0],
9374                                                    i.types[1],
9375                                                    operand_types[1])))
9376             {
9377               specific_error = progress (i.error);
9378
9379               /* Check if other direction is valid ...  */
9380               if (!t->opcode_modifier.d)
9381                 continue;
9382
9383             check_reverse:
9384               if (!(size_match & MATCH_REVERSE))
9385                 continue;
9386               /* Try reversing direction of operands.  */
9387               j = is_cpu (t, CpuFMA4)
9388                   || is_cpu (t, CpuXOP)
9389                   || is_cpu (t, CpuAPX_F) ? 1 : i.operands - 1;
9390               overlap0 = operand_type_and (i.types[0], operand_types[j]);
9391               overlap1 = operand_type_and (i.types[j], operand_types[0]);
9392               overlap2 = operand_type_and (i.types[1], operand_types[1]);
9393               gas_assert (t->operands != 3 || !check_register
9394                           || is_cpu (t, CpuAPX_F));
9395               if (!operand_type_match (overlap0, i.types[0])
9396                   || !operand_type_match (overlap1, i.types[j])
9397                   || (t->operands == 3
9398                       && !operand_type_match (overlap2, i.types[1]))
9399                   || (check_register
9400                       && !operand_type_register_match (i.types[0],
9401                                                        operand_types[j],
9402                                                        i.types[j],
9403                                                        operand_types[0])))
9404                 {
9405                   /* Does not match either direction.  */
9406                   specific_error = progress (i.error);
9407                   continue;
9408                 }
9409               /* found_reverse_match holds which variant of D
9410                  we've found.  */
9411               if (!t->opcode_modifier.d)
9412                 found_reverse_match = 0;
9413               else if (operand_types[0].bitfield.tbyte)
9414                 {
9415                   if (t->opcode_modifier.operandconstraint != UGH)
9416                     found_reverse_match = Opcode_FloatD;
9417                   else
9418                     found_reverse_match = ~0;
9419                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
9420                   if ((t->extension_opcode & 4)
9421                       && (intel_syntax || intel_mnemonic))
9422                     found_reverse_match |= Opcode_FloatR;
9423                 }
9424               else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
9425                 {
9426                   found_reverse_match = Opcode_VexW;
9427                   goto check_operands_345;
9428                 }
9429               else if (t->opcode_space == SPACE_MAP4
9430                        && t->operands >= 3)
9431                 {
9432                   found_reverse_match = Opcode_D;
9433                   goto check_operands_345;
9434                 }
9435               else if (t->opcode_modifier.commutative
9436                        /* CFCMOVcc also wants its major opcode unaltered.  */
9437                        || (t->opcode_space == SPACE_MAP4
9438                            && (t->base_opcode | 0xf) == 0x4f))
9439                 found_reverse_match = ~0;
9440               else if (t->opcode_space != SPACE_BASE
9441                        && (t->opcode_space != SPACE_MAP4
9442                            /* MOVBE, originating from SPACE_0F38, also
9443                               belongs here.  */
9444                            || t->mnem_off == MN_movbe)
9445                        && (t->opcode_space != SPACE_0F
9446                            /* MOV to/from CR/DR/TR, as an exception, follow
9447                               the base opcode space encoding model.  */
9448                            || (t->base_opcode | 7) != 0x27))
9449                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
9450                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
9451               else
9452                 found_reverse_match = Opcode_D;
9453             }
9454           else
9455             {
9456               /* Found a forward 2 operand match here.  */
9457             check_operands_345:
9458               switch (t->operands)
9459                 {
9460                 case 5:
9461                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
9462                   if (!operand_type_match (overlap4, i.types[4])
9463                       || !operand_type_register_match (i.types[3],
9464                                                        operand_types[3],
9465                                                        i.types[4],
9466                                                        operand_types[4]))
9467                     {
9468                       specific_error = progress (i.error);
9469                       continue;
9470                     }
9471                   /* Fall through.  */
9472                 case 4:
9473                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
9474                   if (!operand_type_match (overlap3, i.types[3])
9475                       || ((check_register & 0xa) == 0xa
9476                           && !operand_type_register_match (i.types[1],
9477                                                             operand_types[1],
9478                                                             i.types[3],
9479                                                             operand_types[3]))
9480                       || ((check_register & 0xc) == 0xc
9481                           && !operand_type_register_match (i.types[2],
9482                                                             operand_types[2],
9483                                                             i.types[3],
9484                                                             operand_types[3])))
9485                     {
9486                       specific_error = progress (i.error);
9487                       continue;
9488                     }
9489                   /* Fall through.  */
9490                 case 3:
9491                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
9492                   if (!operand_type_match (overlap2, i.types[2])
9493                       || ((check_register & 5) == 5
9494                           && !operand_type_register_match (i.types[0],
9495                                                             operand_types[0],
9496                                                             i.types[2],
9497                                                             operand_types[2]))
9498                       || ((check_register & 6) == 6
9499                           && !operand_type_register_match (i.types[1],
9500                                                             operand_types[1],
9501                                                             i.types[2],
9502                                                             operand_types[2])))
9503                     {
9504                       specific_error = progress (i.error);
9505                       continue;
9506                     }
9507                   break;
9508                 }
9509             }
9510           /* Found either forward/reverse 2, 3 or 4 operand match here:
9511              slip through to break.  */
9512         }
9513
9514       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
9515       if (VEX_check_encoding (t))
9516         {
9517           specific_error = progress (i.error);
9518           continue;
9519         }
9520
9521       /* Check if EGPR operands(r16-r31) are valid.  */
9522       if (check_EgprOperands (t))
9523         {
9524           specific_error = progress (i.error);
9525           continue;
9526         }
9527
9528       /* Check if vector operands are valid.  */
9529       if (check_VecOperands (t))
9530         {
9531           specific_error = progress (i.error);
9532           continue;
9533         }
9534
9535       /* Check if APX operands are valid.  */
9536       if (check_APX_operands (t))
9537         {
9538           specific_error = progress (i.error);
9539           continue;
9540         }
9541
9542       /* Check whether to use the shorter VEX encoding for certain insns where
9543          the EVEX encoding comes first in the table.  This requires the respective
9544          AVX-* feature to be explicitly enabled.
9545
9546          Most of the respective insns have just a single EVEX and a single VEX
9547          template.  The one that's presently different is generated using the
9548          Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter
9549          two of which may fall back to their two corresponding VEX forms.  */
9550       j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2;
9551       if ((t == current_templates.start || j > 1)
9552           && t->opcode_modifier.disp8memshift
9553           && !t->opcode_modifier.vex
9554           && !need_evex_encoding (t)
9555           && t + j < current_templates.end
9556           && t[j].opcode_modifier.vex)
9557         {
9558           i386_cpu_flags cpu;
9559           unsigned int memshift = i.memshift;
9560
9561           i.memshift = 0;
9562           cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu),
9563                                cpu_arch_isa_flags);
9564           if (!cpu_flags_all_zero (&cpu)
9565               && (!i.types[0].bitfield.disp8
9566                   || !operand_type_check (i.types[0], disp)
9567                   || i.op[0].disps->X_op != O_constant
9568                   || fits_in_disp8 (i.op[0].disps->X_add_number)))
9569             {
9570               specific_error = progress (internal_error);
9571               t += j - 1;
9572               continue;
9573             }
9574           i.memshift = memshift;
9575         }
9576
9577       /* If we can optimize a NDD insn to legacy insn, like
9578          add %r16, %r8, %r8 -> add %r16, %r8,
9579          add  %r8, %r16, %r8 -> add %r16, %r8, then rematch template.
9580          Note that the semantics have not been changed.  */
9581       if (optimize
9582           && !pp.no_optimize
9583           && pp.encoding != encoding_evex
9584           && ((t + 1 < current_templates.end
9585                && !t[1].opcode_modifier.evex
9586                && t[1].opcode_space <= SPACE_0F38
9587                && t->opcode_modifier.vexvvvv == VexVVVV_DST)
9588               || t->mnem_off == MN_movbe)
9589           && (i.types[i.operands - 1].bitfield.dword
9590               || i.types[i.operands - 1].bitfield.qword))
9591         {
9592           unsigned int match_dest_op = can_convert_NDD_to_legacy (t);
9593
9594           if (match_dest_op != (unsigned int) ~0)
9595             {
9596               size_match = true;
9597               /* We ensure that the next template has the same input
9598                  operands as the original matching template by the first
9599                  opernd (ATT). To avoid someone support new NDD insns and
9600                  put it in the wrong position.  */
9601               overlap0 = operand_type_and (i.types[0],
9602                                            t[1].operand_types[0]);
9603               if (t->opcode_modifier.d)
9604                 overlap1 = operand_type_and (i.types[0],
9605                                              t[1].operand_types[1]);
9606               if (!operand_type_match (overlap0, i.types[0])
9607                   && (!t->opcode_modifier.d
9608                       || !operand_type_match (overlap1, i.types[0])))
9609                 size_match = false;
9610
9611               if (size_match
9612                   && (t[1].opcode_space <= SPACE_0F
9613                       /* Some non-legacy-map0/1 insns can be shorter when
9614                          legacy-encoded and when no REX prefix is required.  */
9615                       || (!check_EgprOperands (t + 1)
9616                           && !check_Rex_required ()
9617                           && !i.op[i.operands - 1].regs->reg_type.bitfield.qword)))
9618                 {
9619                   if (i.operands > 2 && match_dest_op == i.operands - 3)
9620                     {
9621                       swap_2_operands (match_dest_op, i.operands - 2);
9622
9623                       /* CMOVcc is marked commutative, but then also needs its
9624                          encoded condition inverted.  */
9625                       if ((t->base_opcode | 0xf) == 0x4f)
9626                         i.invert_cond = true;
9627                     }
9628
9629                   --i.operands;
9630                   --i.reg_operands;
9631
9632                   if (t->mnem_off == MN_movbe)
9633                     {
9634                       gas_assert (t[1].mnem_off == MN_bswap);
9635                       ++current_templates.end;
9636                     }
9637
9638                   specific_error = progress (internal_error);
9639                   continue;
9640                 }
9641
9642             }
9643         }
9644
9645       /* We've found a match; break out of loop.  */
9646       break;
9647     }
9648
9649 #undef progress
9650
9651   if (t == current_templates.end)
9652     {
9653       /* We found no match.  */
9654       i.error = specific_error;
9655       return NULL;
9656     }
9657
9658   /* Don't emit diagnostics or install the template when one was already
9659      installed, i.e. when called from process_suffix().  */
9660   if (i.tm.mnem_off)
9661     return t;
9662
9663   if (!quiet_warnings)
9664     {
9665       if (!intel_syntax
9666           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
9667         as_warn (_("indirect %s without `*'"), insn_name (t));
9668
9669       if (t->opcode_modifier.isprefix
9670           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
9671         {
9672           /* Warn them that a data or address size prefix doesn't
9673              affect assembly of the next line of code.  */
9674           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
9675         }
9676
9677       if (intel_syntax && mnem_suffix && !t->opcode_modifier.intelsuffix)
9678         {
9679           static bool noticed;
9680
9681           as_warn (_("mnemonic suffix used with `%s'"), insn_name (t));
9682           if (!noticed)
9683             {
9684               noticed = true;
9685               as_warn (_(
9686 "NOTE: Such forms are deprecated and will be rejected by a future version of the assembler"));
9687             }
9688         }
9689     }
9690
9691   /* Copy the template we found.  */
9692   install_template (t);
9693
9694   if (addr_prefix_disp != -1)
9695     i.tm.operand_types[addr_prefix_disp]
9696       = operand_types[addr_prefix_disp];
9697
9698   /* APX insns acting on byte operands are WIG, yet that can't be expressed
9699      in the templates (they're also covering word/dword/qword operands).  */
9700   if (t->opcode_space == SPACE_MAP4 && !t->opcode_modifier.vexw &&
9701       i.types[i.operands - 1].bitfield.byte)
9702     {
9703       gas_assert (t->opcode_modifier.w);
9704       i.tm.opcode_modifier.vexw = VEXWIG;
9705     }
9706
9707   switch (found_reverse_match)
9708     {
9709     case 0:
9710       break;
9711
9712     case Opcode_FloatR:
9713     case Opcode_FloatR | Opcode_FloatD:
9714       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
9715       found_reverse_match &= Opcode_FloatD;
9716
9717       /* Fall through.  */
9718     default:
9719       /* If we found a reverse match we must alter the opcode direction
9720          bit and clear/flip the regmem modifier one.  found_reverse_match
9721          holds bits to change (different for int & float insns).  */
9722
9723       i.tm.base_opcode ^= found_reverse_match;
9724
9725       if (i.tm.opcode_space == SPACE_MAP4)
9726         goto swap_first_2;
9727
9728       /* Certain SIMD insns have their load forms specified in the opcode
9729          table, and hence we need to _set_ RegMem instead of clearing it.
9730          We need to avoid setting the bit though on insns like KMOVW.  */
9731       i.tm.opcode_modifier.regmem
9732         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
9733           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
9734           && !i.tm.opcode_modifier.regmem;
9735
9736       /* Fall through.  */
9737     case ~0:
9738       if (i.tm.opcode_space == SPACE_MAP4
9739           && !t->opcode_modifier.commutative)
9740         i.tm.opcode_modifier.operandconstraint = EVEX_NF;
9741       i.tm.operand_types[0] = operand_types[i.operands - 1];
9742       i.tm.operand_types[i.operands - 1] = operand_types[0];
9743       break;
9744
9745     case Opcode_VexW:
9746       /* Only the first two register operands need reversing, alongside
9747          flipping VEX.W.  */
9748       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
9749
9750       /* In 3-operand insns XOP.W changes which operand goes into XOP.vvvv.  */
9751       i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
9752
9753     swap_first_2:
9754       j = i.tm.operand_types[0].bitfield.imm8;
9755       i.tm.operand_types[j] = operand_types[j + 1];
9756       i.tm.operand_types[j + 1] = operand_types[j];
9757       break;
9758     }
9759
9760   return t;
9761 }
9762
9763 static int
9764 check_string (void)
9765 {
9766   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
9767   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
9768
9769   if (i.seg[op] != NULL && i.seg[op] != reg_es)
9770     {
9771       as_bad (_("`%s' operand %u must use `%ses' segment"),
9772               insn_name (&i.tm),
9773               intel_syntax ? i.tm.operands - es_op : es_op + 1,
9774               register_prefix);
9775       return 0;
9776     }
9777
9778   /* There's only ever one segment override allowed per instruction.
9779      This instruction possibly has a legal segment override on the
9780      second operand, so copy the segment to where non-string
9781      instructions store it, allowing common code.  */
9782   i.seg[op] = i.seg[1];
9783
9784   return 1;
9785 }
9786
9787 static int
9788 process_suffix (const insn_template *t)
9789 {
9790   bool is_movx = false;
9791
9792   /* If matched instruction specifies an explicit instruction mnemonic
9793      suffix, use it.  */
9794   if (i.tm.opcode_modifier.size == SIZE16)
9795     i.suffix = WORD_MNEM_SUFFIX;
9796   else if (i.tm.opcode_modifier.size == SIZE32)
9797     i.suffix = LONG_MNEM_SUFFIX;
9798   else if (i.tm.opcode_modifier.size == SIZE64)
9799     i.suffix = QWORD_MNEM_SUFFIX;
9800   else if (i.reg_operands
9801            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
9802            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
9803     {
9804       unsigned int numop = i.operands;
9805
9806       /* MOVSX/MOVZX */
9807       is_movx = (i.tm.opcode_space == SPACE_0F
9808                  && (i.tm.base_opcode | 8) == 0xbe)
9809                 || (i.tm.opcode_space == SPACE_BASE
9810                     && i.tm.base_opcode == 0x63
9811                     && is_cpu (&i.tm, Cpu64));
9812
9813       /* movsx/movzx want only their source operand considered here, for the
9814          ambiguity checking below.  The suffix will be replaced afterwards
9815          to represent the destination (register).  */
9816       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
9817         --i.operands;
9818
9819       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
9820       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
9821         i.rex |= REX_W;
9822
9823       /* If there's no instruction mnemonic suffix we try to invent one
9824          based on GPR operands.  */
9825       if (!i.suffix)
9826         {
9827           /* We take i.suffix from the last register operand specified,
9828              Destination register type is more significant than source
9829              register type.  crc32 in SSE4.2 prefers source register
9830              type. */
9831           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
9832
9833           while (op--)
9834             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
9835                 || i.tm.operand_types[op].bitfield.instance == Accum)
9836               {
9837                 if (i.types[op].bitfield.class != Reg)
9838                   continue;
9839                 if (i.types[op].bitfield.byte)
9840                   i.suffix = BYTE_MNEM_SUFFIX;
9841                 else if (i.types[op].bitfield.word)
9842                   i.suffix = WORD_MNEM_SUFFIX;
9843                 else if (i.types[op].bitfield.dword)
9844                   i.suffix = LONG_MNEM_SUFFIX;
9845                 else if (i.types[op].bitfield.qword)
9846                   i.suffix = QWORD_MNEM_SUFFIX;
9847                 else
9848                   continue;
9849                 break;
9850               }
9851
9852           /* As an exception, movsx/movzx silently default to a byte source
9853              in AT&T mode.  */
9854           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
9855             i.suffix = BYTE_MNEM_SUFFIX;
9856         }
9857       else if (i.suffix == BYTE_MNEM_SUFFIX)
9858         {
9859           if (!check_byte_reg ())
9860             return 0;
9861         }
9862       else if (i.suffix == LONG_MNEM_SUFFIX)
9863         {
9864           if (!check_long_reg ())
9865             return 0;
9866         }
9867       else if (i.suffix == QWORD_MNEM_SUFFIX)
9868         {
9869           if (!check_qword_reg ())
9870             return 0;
9871         }
9872       else if (i.suffix == WORD_MNEM_SUFFIX)
9873         {
9874           if (!check_word_reg ())
9875             return 0;
9876         }
9877       else if (intel_syntax
9878                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
9879         /* Do nothing if the instruction is going to ignore the prefix.  */
9880         ;
9881       else
9882         abort ();
9883
9884       /* Undo the movsx/movzx change done above.  */
9885       i.operands = numop;
9886     }
9887   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
9888            && !i.suffix)
9889     {
9890       i.suffix = stackop_size;
9891       if (stackop_size == LONG_MNEM_SUFFIX)
9892         {
9893           /* stackop_size is set to LONG_MNEM_SUFFIX for the
9894              .code16gcc directive to support 16-bit mode with
9895              32-bit address.  For IRET without a suffix, generate
9896              16-bit IRET (opcode 0xcf) to return from an interrupt
9897              handler.  */
9898           if (i.tm.base_opcode == 0xcf)
9899             {
9900               i.suffix = WORD_MNEM_SUFFIX;
9901               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
9902             }
9903           /* Warn about changed behavior for segment register push/pop.  */
9904           else if ((i.tm.base_opcode | 1) == 0x07)
9905             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
9906                      insn_name (&i.tm));
9907         }
9908     }
9909   else if (!i.suffix
9910            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
9911                || i.tm.opcode_modifier.jump == JUMP_BYTE
9912                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
9913                || (i.tm.opcode_space == SPACE_0F
9914                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
9915                    && i.tm.extension_opcode <= 3)))
9916     {
9917       switch (flag_code)
9918         {
9919         case CODE_64BIT:
9920           if (!i.tm.opcode_modifier.no_qsuf)
9921             {
9922               if (i.tm.opcode_modifier.jump == JUMP_BYTE
9923                   || i.tm.opcode_modifier.no_lsuf)
9924                 i.suffix = QWORD_MNEM_SUFFIX;
9925               break;
9926             }
9927           /* Fall through.  */
9928         case CODE_32BIT:
9929           if (!i.tm.opcode_modifier.no_lsuf)
9930             i.suffix = LONG_MNEM_SUFFIX;
9931           break;
9932         case CODE_16BIT:
9933           if (!i.tm.opcode_modifier.no_wsuf)
9934             i.suffix = WORD_MNEM_SUFFIX;
9935           break;
9936         }
9937     }
9938
9939   if (!i.suffix
9940       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
9941           /* Also cover lret/retf/iret in 64-bit mode.  */
9942           || (flag_code == CODE_64BIT
9943               && !i.tm.opcode_modifier.no_lsuf
9944               && !i.tm.opcode_modifier.no_qsuf))
9945       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
9946       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
9947       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
9948       /* Accept FLDENV et al without suffix.  */
9949       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
9950     {
9951       unsigned int suffixes, evex = 0;
9952
9953       suffixes = !i.tm.opcode_modifier.no_bsuf;
9954       if (!i.tm.opcode_modifier.no_wsuf)
9955         suffixes |= 1 << 1;
9956       if (!i.tm.opcode_modifier.no_lsuf)
9957         suffixes |= 1 << 2;
9958       if (!i.tm.opcode_modifier.no_ssuf)
9959         suffixes |= 1 << 4;
9960       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
9961         suffixes |= 1 << 5;
9962
9963       /* Operand size may be ambiguous only across multiple templates.  Avoid
9964          the extra effort though if we already know that multiple suffixes /
9965          operand sizes are allowed.  Also limit this to non-SIMD operand sizes
9966          (i.e. ones expressable via suffixes) for now.
9967          There's one special case though that needs excluding: Insns taking
9968          Disp<N> operands also match templates permitting BaseIndex.  JMP in
9969          particular would thus wrongly trigger the check further down.  Cover
9970          JUMP_DWORD insns here as well, just in case.  */
9971       if (i.tm.opcode_modifier.jump != JUMP
9972           && i.tm.opcode_modifier.jump != JUMP_DWORD)
9973         while (!(suffixes & (suffixes - 1)))
9974           {
9975             /* Sadly check_VecOperands(), running ahead of install_template(),
9976                may update i.memshift.  Save and restore the value here.  */
9977             unsigned int memshift = i.memshift;
9978
9979             current_templates.start = t + 1;
9980             t = match_template (0);
9981             i.memshift = memshift;
9982             if (t == NULL)
9983               break;
9984             if (!t->opcode_modifier.no_bsuf)
9985               suffixes |= 1 << 0;
9986             if (!t->opcode_modifier.no_wsuf)
9987               suffixes |= 1 << 1;
9988             if (!t->opcode_modifier.no_lsuf)
9989               suffixes |= 1 << 2;
9990             if (!t->opcode_modifier.no_ssuf)
9991               suffixes |= 1 << 4;
9992             if (flag_code == CODE_64BIT && !t->opcode_modifier.no_qsuf)
9993               suffixes |= 1 << 5;
9994           }
9995
9996       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
9997          also suitable for AT&T syntax mode, it was requested that this be
9998          restricted to just Intel syntax.  */
9999       if (intel_syntax && is_any_vex_encoding (&i.tm)
10000           && !i.broadcast.type && !i.broadcast.bytes)
10001         {
10002           unsigned int op;
10003
10004           for (op = 0; op < i.tm.operands; ++op)
10005             {
10006               if (vector_size < VSZ512)
10007                 {
10008                   i.tm.operand_types[op].bitfield.zmmword = 0;
10009                   if (vector_size < VSZ256)
10010                     {
10011                       i.tm.operand_types[op].bitfield.ymmword = 0;
10012                       if (i.tm.operand_types[op].bitfield.xmmword
10013                           && i.tm.opcode_modifier.evex == EVEXDYN)
10014                         i.tm.opcode_modifier.evex = EVEX128;
10015                     }
10016                   else if (i.tm.operand_types[op].bitfield.ymmword
10017                            && !i.tm.operand_types[op].bitfield.xmmword
10018                            && i.tm.opcode_modifier.evex == EVEXDYN)
10019                     i.tm.opcode_modifier.evex = EVEX256;
10020                 }
10021               else if (i.tm.opcode_modifier.evex
10022                        && !cpu_arch_flags.bitfield.cpuavx512vl)
10023                 {
10024                   if (i.tm.operand_types[op].bitfield.ymmword)
10025                     i.tm.operand_types[op].bitfield.xmmword = 0;
10026                   if (i.tm.operand_types[op].bitfield.zmmword)
10027                     i.tm.operand_types[op].bitfield.ymmword = 0;
10028                   if (i.tm.opcode_modifier.evex == EVEXDYN)
10029                     i.tm.opcode_modifier.evex = EVEX512;
10030                 }
10031
10032               if (i.tm.operand_types[op].bitfield.xmmword
10033                   + i.tm.operand_types[op].bitfield.ymmword
10034                   + i.tm.operand_types[op].bitfield.zmmword < 2)
10035                 continue;
10036
10037               /* Any properly sized operand disambiguates the insn.  */
10038               if (i.types[op].bitfield.xmmword
10039                   || i.types[op].bitfield.ymmword
10040                   || i.types[op].bitfield.zmmword)
10041                 {
10042                   suffixes &= ~(7 << 6);
10043                   evex = 0;
10044                   break;
10045                 }
10046
10047               if ((i.flags[op] & Operand_Mem)
10048                   && i.tm.operand_types[op].bitfield.unspecified)
10049                 {
10050                   if (i.tm.operand_types[op].bitfield.xmmword)
10051                     suffixes |= 1 << 6;
10052                   if (i.tm.operand_types[op].bitfield.ymmword)
10053                     suffixes |= 1 << 7;
10054                   if (i.tm.operand_types[op].bitfield.zmmword)
10055                     suffixes |= 1 << 8;
10056                   if (i.tm.opcode_modifier.evex)
10057                     evex = EVEX512;
10058                 }
10059             }
10060         }
10061
10062       /* Are multiple suffixes / operand sizes allowed?  */
10063       if (suffixes & (suffixes - 1))
10064         {
10065           if (intel_syntax
10066               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
10067                   || operand_check == check_error))
10068             {
10069               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
10070               return 0;
10071             }
10072           if (operand_check == check_error)
10073             {
10074               as_bad (_("no instruction mnemonic suffix given and "
10075                         "no register operands; can't size `%s'"), insn_name (&i.tm));
10076               return 0;
10077             }
10078           if (operand_check == check_warning)
10079             as_warn (_("%s; using default for `%s'"),
10080                        intel_syntax
10081                        ? _("ambiguous operand size")
10082                        : _("no instruction mnemonic suffix given and "
10083                            "no register operands"),
10084                        insn_name (&i.tm));
10085
10086           if (i.tm.opcode_modifier.floatmf)
10087             i.suffix = SHORT_MNEM_SUFFIX;
10088           else if (is_movx)
10089             /* handled below */;
10090           else if (evex)
10091             i.tm.opcode_modifier.evex = evex;
10092           else if (flag_code == CODE_16BIT)
10093             i.suffix = WORD_MNEM_SUFFIX;
10094           else if (!i.tm.opcode_modifier.no_lsuf)
10095             i.suffix = LONG_MNEM_SUFFIX;
10096           else
10097             i.suffix = QWORD_MNEM_SUFFIX;
10098         }
10099     }
10100
10101   if (is_movx)
10102     {
10103       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
10104          In AT&T syntax, if there is no suffix (warned about above), the default
10105          will be byte extension.  */
10106       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
10107         i.tm.base_opcode |= 1;
10108
10109       /* For further processing, the suffix should represent the destination
10110          (register).  This is already the case when one was used with
10111          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
10112          no suffix to begin with.  */
10113       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
10114         {
10115           if (i.types[1].bitfield.word)
10116             i.suffix = WORD_MNEM_SUFFIX;
10117           else if (i.types[1].bitfield.qword)
10118             i.suffix = QWORD_MNEM_SUFFIX;
10119           else
10120             i.suffix = LONG_MNEM_SUFFIX;
10121
10122           i.tm.opcode_modifier.w = 0;
10123         }
10124     }
10125
10126   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
10127     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
10128                    != (i.tm.operand_types[1].bitfield.class == Reg);
10129
10130   /* Change the opcode based on the operand size given by i.suffix.  */
10131   switch (i.suffix)
10132     {
10133     /* Size floating point instruction.  */
10134     case LONG_MNEM_SUFFIX:
10135       if (i.tm.opcode_modifier.floatmf)
10136         {
10137           i.tm.base_opcode ^= 4;
10138           break;
10139         }
10140     /* fall through */
10141     case WORD_MNEM_SUFFIX:
10142     case QWORD_MNEM_SUFFIX:
10143       /* It's not a byte, select word/dword operation.  */
10144       if (i.tm.opcode_modifier.w)
10145         {
10146           if (i.short_form)
10147             i.tm.base_opcode |= 8;
10148           else
10149             i.tm.base_opcode |= 1;
10150         }
10151
10152       /* Set mode64 for an operand.  */
10153       if (i.suffix == QWORD_MNEM_SUFFIX)
10154         {
10155           if (flag_code == CODE_64BIT
10156               && !i.tm.opcode_modifier.norex64
10157               && !i.tm.opcode_modifier.vexw
10158               /* Special case for xchg %rax,%rax.  It is NOP and doesn't
10159                  need rex64. */
10160               && ! (i.operands == 2
10161                     && i.tm.base_opcode == 0x90
10162                     && i.tm.opcode_space == SPACE_BASE
10163                     && i.types[0].bitfield.instance == Accum
10164                     && i.types[1].bitfield.instance == Accum))
10165             i.rex |= REX_W;
10166
10167           break;
10168         }
10169
10170     /* fall through */
10171     case SHORT_MNEM_SUFFIX:
10172       /* Now select between word & dword operations via the operand
10173          size prefix, except for instructions that will ignore this
10174          prefix anyway.  */
10175       if (i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
10176           && !i.tm.opcode_modifier.floatmf
10177           && (!is_any_vex_encoding (&i.tm)
10178               || i.tm.opcode_space == SPACE_MAP4)
10179           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
10180               || (flag_code == CODE_64BIT
10181                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
10182         {
10183           unsigned int prefix = DATA_PREFIX_OPCODE;
10184
10185           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
10186             prefix = ADDR_PREFIX_OPCODE;
10187
10188           /* The DATA PREFIX of EVEX promoted from legacy APX instructions
10189              needs to be adjusted.  */
10190           if (i.tm.opcode_space == SPACE_MAP4)
10191             {
10192               gas_assert (!i.tm.opcode_modifier.opcodeprefix);
10193               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
10194             }
10195           else if (!add_prefix (prefix))
10196             return 0;
10197         }
10198
10199       break;
10200
10201     case 0:
10202       /* Select word/dword/qword operation with explicit data sizing prefix
10203          when there are no suitable register operands.  */
10204       if (i.tm.opcode_modifier.w
10205           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
10206           && (!i.reg_operands
10207               || (i.reg_operands == 1
10208                       /* ShiftCount */
10209                   && (i.tm.operand_types[0].bitfield.instance == RegC
10210                       /* InOutPortReg */
10211                       || i.tm.operand_types[0].bitfield.instance == RegD
10212                       || i.tm.operand_types[1].bitfield.instance == RegD
10213                       || i.tm.mnem_off == MN_crc32))))
10214         i.tm.base_opcode |= 1;
10215       break;
10216     }
10217
10218   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
10219     {
10220       gas_assert (!i.suffix);
10221       gas_assert (i.reg_operands);
10222
10223       if (i.tm.operand_types[0].bitfield.instance == Accum
10224           || i.operands == 1)
10225         {
10226           /* The address size override prefix changes the size of the
10227              first operand.  */
10228           if (flag_code == CODE_64BIT
10229               && i.op[0].regs->reg_type.bitfield.word)
10230             {
10231               as_bad (_("16-bit addressing unavailable for `%s'"),
10232                       insn_name (&i.tm));
10233               return 0;
10234             }
10235
10236           if ((flag_code == CODE_32BIT
10237                ? i.op[0].regs->reg_type.bitfield.word
10238                : i.op[0].regs->reg_type.bitfield.dword)
10239               && !add_prefix (ADDR_PREFIX_OPCODE))
10240             return 0;
10241         }
10242       else
10243         {
10244           /* Check invalid register operand when the address size override
10245              prefix changes the size of register operands.  */
10246           unsigned int op;
10247           enum { need_word, need_dword, need_qword } need;
10248
10249           /* Check the register operand for the address size prefix if
10250              the memory operand has no real registers, like symbol, DISP
10251              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
10252           if (i.mem_operands == 1
10253               && i.reg_operands == 1
10254               && i.operands == 2
10255               && i.types[1].bitfield.class == Reg
10256               && (flag_code == CODE_32BIT
10257                   ? i.op[1].regs->reg_type.bitfield.word
10258                   : i.op[1].regs->reg_type.bitfield.dword)
10259               && ((i.base_reg == NULL && i.index_reg == NULL)
10260 #ifdef OBJ_ELF
10261                   || (x86_elf_abi == X86_64_X32_ABI
10262                       && i.base_reg
10263                       && i.base_reg->reg_num == RegIP
10264                       && i.base_reg->reg_type.bitfield.qword))
10265 #else
10266                   || 0)
10267 #endif
10268               && !add_prefix (ADDR_PREFIX_OPCODE))
10269             return 0;
10270
10271           if (flag_code == CODE_32BIT)
10272             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
10273           else if (i.prefix[ADDR_PREFIX])
10274             need = need_dword;
10275           else
10276             need = flag_code == CODE_64BIT ? need_qword : need_word;
10277
10278           for (op = 0; op < i.operands; op++)
10279             {
10280               if (i.types[op].bitfield.class != Reg)
10281                 continue;
10282
10283               switch (need)
10284                 {
10285                 case need_word:
10286                   if (i.op[op].regs->reg_type.bitfield.word)
10287                     continue;
10288                   break;
10289                 case need_dword:
10290                   if (i.op[op].regs->reg_type.bitfield.dword)
10291                     continue;
10292                   break;
10293                 case need_qword:
10294                   if (i.op[op].regs->reg_type.bitfield.qword)
10295                     continue;
10296                   break;
10297                 }
10298
10299               as_bad (_("invalid register operand size for `%s'"),
10300                       insn_name (&i.tm));
10301               return 0;
10302             }
10303         }
10304     }
10305
10306   return 1;
10307 }
10308
10309 static int
10310 check_byte_reg (void)
10311 {
10312   int op;
10313
10314   for (op = i.operands; --op >= 0;)
10315     {
10316       /* Skip non-register operands. */
10317       if (i.types[op].bitfield.class != Reg)
10318         continue;
10319
10320       /* If this is an eight bit register, it's OK.  */
10321       if (i.types[op].bitfield.byte)
10322         {
10323           if (i.tm.opcode_modifier.checkoperandsize)
10324             break;
10325           continue;
10326         }
10327
10328       /* I/O port address operands are OK too.  */
10329       if (i.tm.operand_types[op].bitfield.instance == RegD
10330           && i.tm.operand_types[op].bitfield.word)
10331         continue;
10332
10333       /* crc32 only wants its source operand checked here.  */
10334       if (i.tm.mnem_off == MN_crc32 && op != 0)
10335         continue;
10336
10337       /* Any other register is bad.  */
10338       as_bad (_("`%s%s' not allowed with `%s%c'"),
10339               register_prefix, i.op[op].regs->reg_name,
10340               insn_name (&i.tm), i.suffix);
10341       return 0;
10342     }
10343   return 1;
10344 }
10345
10346 static int
10347 check_long_reg (void)
10348 {
10349   int op;
10350
10351   for (op = i.operands; --op >= 0;)
10352     /* Skip non-register operands. */
10353     if (i.types[op].bitfield.class != Reg)
10354       continue;
10355     /* Reject eight bit registers, except where the template requires
10356        them. (eg. movzb)  */
10357     else if (i.types[op].bitfield.byte
10358              && (i.tm.operand_types[op].bitfield.word
10359                  || i.tm.operand_types[op].bitfield.dword
10360                  || i.tm.operand_types[op].bitfield.qword))
10361       {
10362         as_bad (_("`%s%s' not allowed with `%s%c'"),
10363                 register_prefix,
10364                 i.op[op].regs->reg_name,
10365                 insn_name (&i.tm),
10366                 i.suffix);
10367         return 0;
10368       }
10369     /* Error if the e prefix on a general reg is missing, or if the r
10370        prefix on a general reg is present.  */
10371     else if ((i.types[op].bitfield.word
10372               || i.types[op].bitfield.qword)
10373              && i.tm.operand_types[op].bitfield.dword)
10374       {
10375         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
10376                 register_prefix, i.op[op].regs->reg_name,
10377                 i.suffix);
10378         return 0;
10379       }
10380     else if (i.tm.opcode_modifier.checkoperandsize)
10381       break;
10382
10383   return 1;
10384 }
10385
10386 static int
10387 check_qword_reg (void)
10388 {
10389   int op;
10390
10391   for (op = i.operands; --op >= 0; )
10392     /* Skip non-register operands. */
10393     if (i.types[op].bitfield.class != Reg)
10394       continue;
10395     /* Reject eight bit registers, except where the template requires
10396        them. (eg. movzb)  */
10397     else if (i.types[op].bitfield.byte
10398              && (i.tm.operand_types[op].bitfield.word
10399                  || i.tm.operand_types[op].bitfield.dword
10400                  || i.tm.operand_types[op].bitfield.qword))
10401       {
10402         as_bad (_("`%s%s' not allowed with `%s%c'"),
10403                 register_prefix,
10404                 i.op[op].regs->reg_name,
10405                 insn_name (&i.tm),
10406                 i.suffix);
10407         return 0;
10408       }
10409     /* Error if the r prefix on a general reg is missing.  */
10410     else if ((i.types[op].bitfield.word
10411               || i.types[op].bitfield.dword)
10412              && i.tm.operand_types[op].bitfield.qword)
10413       {
10414         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
10415                 register_prefix, i.op[op].regs->reg_name, i.suffix);
10416         return 0;
10417       }
10418     else if (i.tm.opcode_modifier.checkoperandsize)
10419       break;
10420
10421   return 1;
10422 }
10423
10424 static int
10425 check_word_reg (void)
10426 {
10427   int op;
10428   for (op = i.operands; --op >= 0;)
10429     /* Skip non-register operands. */
10430     if (i.types[op].bitfield.class != Reg)
10431       continue;
10432     /* Reject eight bit registers, except where the template requires
10433        them. (eg. movzb)  */
10434     else if (i.types[op].bitfield.byte
10435              && (i.tm.operand_types[op].bitfield.word
10436                  || i.tm.operand_types[op].bitfield.dword
10437                  || i.tm.operand_types[op].bitfield.qword))
10438       {
10439         as_bad (_("`%s%s' not allowed with `%s%c'"),
10440                 register_prefix,
10441                 i.op[op].regs->reg_name,
10442                 insn_name (&i.tm),
10443                 i.suffix);
10444         return 0;
10445       }
10446     /* Error if the e or r prefix on a general reg is present.  */
10447     else if ((i.types[op].bitfield.dword
10448                  || i.types[op].bitfield.qword)
10449              && i.tm.operand_types[op].bitfield.word)
10450       {
10451         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
10452                 register_prefix, i.op[op].regs->reg_name,
10453                 i.suffix);
10454         return 0;
10455       }
10456     else if (i.tm.opcode_modifier.checkoperandsize)
10457       break;
10458
10459   return 1;
10460 }
10461
10462 static int
10463 update_imm (unsigned int j)
10464 {
10465   i386_operand_type overlap = i.types[j];
10466
10467   if (i.tm.operand_types[j].bitfield.imm8
10468       && i.tm.operand_types[j].bitfield.imm8s
10469       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
10470     {
10471       /* This combination is used on 8-bit immediates where e.g. $~0 is
10472          desirable to permit.  We're past operand type matching, so simply
10473          put things back in the shape they were before introducing the
10474          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
10475       overlap.bitfield.imm8s = 0;
10476     }
10477
10478   if (overlap.bitfield.imm8
10479       + overlap.bitfield.imm8s
10480       + overlap.bitfield.imm16
10481       + overlap.bitfield.imm32
10482       + overlap.bitfield.imm32s
10483       + overlap.bitfield.imm64 > 1)
10484     {
10485       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
10486       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
10487       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
10488       static const i386_operand_type imm16_32 = { .bitfield =
10489         { .imm16 = 1, .imm32 = 1 }
10490       };
10491       static const i386_operand_type imm16_32s =  { .bitfield =
10492         { .imm16 = 1, .imm32s = 1 }
10493       };
10494       static const i386_operand_type imm16_32_32s = { .bitfield =
10495         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
10496       };
10497
10498       if (i.suffix)
10499         {
10500           i386_operand_type temp;
10501
10502           operand_type_set (&temp, 0);
10503           if (i.suffix == BYTE_MNEM_SUFFIX)
10504             {
10505               temp.bitfield.imm8 = overlap.bitfield.imm8;
10506               temp.bitfield.imm8s = overlap.bitfield.imm8s;
10507             }
10508           else if (i.suffix == WORD_MNEM_SUFFIX)
10509             temp.bitfield.imm16 = overlap.bitfield.imm16;
10510           else if (i.suffix == QWORD_MNEM_SUFFIX)
10511             {
10512               temp.bitfield.imm64 = overlap.bitfield.imm64;
10513               temp.bitfield.imm32s = overlap.bitfield.imm32s;
10514             }
10515           else
10516             temp.bitfield.imm32 = overlap.bitfield.imm32;
10517           overlap = temp;
10518         }
10519       else if (operand_type_equal (&overlap, &imm16_32_32s)
10520                || operand_type_equal (&overlap, &imm16_32)
10521                || operand_type_equal (&overlap, &imm16_32s))
10522         {
10523           if ((flag_code == CODE_16BIT)
10524               ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
10525             overlap = imm16;
10526           else
10527             overlap = imm32s;
10528         }
10529       else if (i.prefix[REX_PREFIX] & REX_W)
10530         overlap = operand_type_and (overlap, imm32s);
10531       else if (i.prefix[DATA_PREFIX])
10532         overlap = operand_type_and (overlap,
10533                                     flag_code != CODE_16BIT ? imm16 : imm32);
10534       if (overlap.bitfield.imm8
10535           + overlap.bitfield.imm8s
10536           + overlap.bitfield.imm16
10537           + overlap.bitfield.imm32
10538           + overlap.bitfield.imm32s
10539           + overlap.bitfield.imm64 != 1)
10540         {
10541           as_bad (_("no instruction mnemonic suffix given; "
10542                     "can't determine immediate size"));
10543           return 0;
10544         }
10545     }
10546   i.types[j] = overlap;
10547
10548   return 1;
10549 }
10550
10551 static int
10552 finalize_imm (void)
10553 {
10554   unsigned int j, n;
10555
10556   /* Update the first 2 immediate operands.  */
10557   n = i.operands > 2 ? 2 : i.operands;
10558   if (n)
10559     {
10560       for (j = 0; j < n; j++)
10561         if (update_imm (j) == 0)
10562           return 0;
10563
10564       /* The 3rd operand can't be immediate operand.  */
10565       gas_assert (operand_type_check (i.types[2], imm) == 0);
10566     }
10567
10568   return 1;
10569 }
10570
10571 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
10572                                  bool do_sse2avx)
10573 {
10574   if (r->reg_flags & RegRex)
10575     {
10576       if (i.rex & rex_bit)
10577         as_bad (_("same type of prefix used twice"));
10578       i.rex |= rex_bit;
10579     }
10580   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
10581     {
10582       gas_assert (i.vex.register_specifier == r);
10583       i.vex.register_specifier += 8;
10584     }
10585
10586   if (r->reg_flags & RegVRex)
10587     i.vrex |= rex_bit;
10588
10589   if (r->reg_flags & RegRex2)
10590     i.rex2 |= rex_bit;
10591 }
10592
10593 static INLINE void
10594 set_rex_rex2 (const reg_entry *r, unsigned int rex_bit)
10595 {
10596   if ((r->reg_flags & RegRex) != 0)
10597     i.rex |= rex_bit;
10598   if ((r->reg_flags & RegRex2) != 0)
10599     i.rex2 |= rex_bit;
10600 }
10601
10602 static int
10603 process_operands (void)
10604 {
10605   /* Default segment register this instruction will use for memory
10606      accesses.  0 means unknown.  This is only for optimizing out
10607      unnecessary segment overrides.  */
10608   const reg_entry *default_seg = NULL;
10609
10610   for (unsigned int j = 0; j < i.operands; j++)
10611     if (i.types[j].bitfield.instance != InstanceNone)
10612       i.reg_operands--;
10613
10614   if (i.tm.opcode_modifier.sse2avx)
10615     {
10616       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
10617          need converting.  */
10618       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
10619       i.prefix[REX_PREFIX] = 0;
10620       pp.rex_encoding = 0;
10621       pp.rex2_encoding = 0;
10622     }
10623   /* ImmExt should be processed after SSE2AVX.  */
10624   else if (i.tm.opcode_modifier.immext)
10625     process_immext ();
10626
10627   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
10628      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
10629      new destination operand here, while converting the source one to register
10630      number 0.  */
10631   if (i.tm.mnem_off == MN_tilezero)
10632     {
10633       copy_operand (1, 0);
10634       i.op[0].regs -= i.op[0].regs->reg_num;
10635       i.operands++;
10636       i.reg_operands++;
10637       i.tm.operands++;
10638     }
10639
10640   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
10641     {
10642       static const i386_operand_type regxmm = {
10643         .bitfield = { .class = RegSIMD, .xmmword = 1 }
10644       };
10645       unsigned int dupl = i.operands;
10646       unsigned int dest = dupl - 1;
10647       unsigned int j;
10648
10649       /* The destination must be an xmm register.  */
10650       gas_assert (i.reg_operands
10651                   && MAX_OPERANDS > dupl
10652                   && operand_type_equal (&i.types[dest], &regxmm));
10653
10654       if (i.tm.operand_types[0].bitfield.instance == Accum
10655           && i.tm.operand_types[0].bitfield.xmmword)
10656         {
10657           /* Keep xmm0 for instructions with VEX prefix and 3
10658              sources.  */
10659           i.tm.operand_types[0].bitfield.instance = InstanceNone;
10660           i.tm.operand_types[0].bitfield.class = RegSIMD;
10661           i.reg_operands++;
10662           goto duplicate;
10663         }
10664
10665       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
10666         {
10667           gas_assert ((MAX_OPERANDS - 1) > dupl);
10668
10669           /* Add the implicit xmm0 for instructions with VEX prefix
10670              and 3 sources.  */
10671           for (j = i.operands; j > 0; j--)
10672             copy_operand (j, j - 1);
10673           i.op[0].regs
10674             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
10675           i.types[0] = regxmm;
10676           i.tm.operand_types[0] = regxmm;
10677
10678           i.operands += 2;
10679           i.reg_operands += 2;
10680           i.tm.operands += 2;
10681
10682           dupl++;
10683           dest++;
10684         }
10685       else
10686         {
10687         duplicate:
10688           i.operands++;
10689           i.reg_operands++;
10690           i.tm.operands++;
10691         }
10692
10693       copy_operand (dupl, dest);
10694
10695       if (i.tm.opcode_modifier.immext)
10696         process_immext ();
10697     }
10698   else if (i.tm.operand_types[0].bitfield.instance == Accum
10699            && i.tm.opcode_modifier.modrm)
10700     {
10701       unsigned int j;
10702
10703       for (j = 1; j < i.operands; j++)
10704         copy_operand (j - 1, j);
10705
10706       /* No adjustment to i.reg_operands: This was already done at the top
10707          of the function.  */
10708       i.operands--;
10709       i.tm.operands--;
10710     }
10711   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_GROUP)
10712     {
10713       unsigned int op, extra;
10714       const reg_entry *first;
10715
10716       /* The second operand must be {x,y,z}mmN. */
10717       gas_assert (i.operands == 3 && i.types[1].bitfield.class == RegSIMD);
10718
10719       switch (i.types[2].bitfield.class)
10720         {
10721         case RegSIMD:
10722           /* AVX512-{4FMAPS,4VNNIW} operand 2: N must be a multiple of 4. */
10723           op = 1;
10724           extra = 3;
10725           break;
10726
10727         case RegMask:
10728           /* AVX512-VP2INTERSECT operand 3: N must be a multiple of 2. */
10729           op = 2;
10730           extra = 1;
10731           break;
10732
10733         default:
10734           abort ();
10735         }
10736
10737       first = i.op[op].regs - (register_number (i.op[op].regs) & extra);
10738       if (i.op[op].regs != first)
10739         as_warn (_("operand %u `%s%s' implicitly denotes"
10740                    " `%s%s' to `%s%s' group in `%s'"),
10741                  intel_syntax ? i.operands - op : op + 1,
10742                  register_prefix, i.op[op].regs->reg_name,
10743                  register_prefix, first[0].reg_name,
10744                  register_prefix, first[extra].reg_name,
10745                  insn_name (&i.tm));
10746     }
10747   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
10748     {
10749       /* The imul $imm, %reg instruction is converted into
10750          imul $imm, %reg, %reg, and the clr %reg instruction
10751          is converted into xor %reg, %reg.  */
10752
10753       unsigned int first_reg_op;
10754
10755       if (operand_type_check (i.types[0], reg))
10756         first_reg_op = 0;
10757       else
10758         first_reg_op = 1;
10759       /* Pretend we saw the extra register operand.  */
10760       gas_assert (i.reg_operands == 1
10761                   && i.op[first_reg_op + 1].regs == 0);
10762       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
10763       i.types[first_reg_op + 1] = i.types[first_reg_op];
10764       i.operands++;
10765       i.reg_operands++;
10766
10767       /* For IMULZU switch around the constraint.  */
10768       if (i.tm.mnem_off == MN_imulzu)
10769         i.tm.opcode_modifier.operandconstraint = ZERO_UPPER;
10770     }
10771
10772   if (i.tm.opcode_modifier.modrm)
10773     {
10774       /* The opcode is completed (modulo i.tm.extension_opcode which
10775          must be put into the modrm byte).  Now, we make the modrm and
10776          index base bytes based on all the info we've collected.  */
10777
10778       default_seg = build_modrm_byte ();
10779
10780       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
10781         {
10782           /* Warn about some common errors, but press on regardless.  */
10783           if (i.operands == 2)
10784             {
10785               /* Reversed arguments on faddp or fmulp.  */
10786               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
10787                        register_prefix, i.op[!intel_syntax].regs->reg_name,
10788                        register_prefix, i.op[intel_syntax].regs->reg_name);
10789             }
10790           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
10791             {
10792               /* Extraneous `l' suffix on fp insn.  */
10793               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
10794                        register_prefix, i.op[0].regs->reg_name);
10795             }
10796         }
10797     }
10798   else if (i.types[0].bitfield.class == SReg && !dot_insn ())
10799     {
10800       if (flag_code != CODE_64BIT
10801           ? i.tm.base_opcode == POP_SEG_SHORT
10802             && i.op[0].regs->reg_num == 1
10803           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
10804             && i.op[0].regs->reg_num < 4)
10805         {
10806           as_bad (_("you can't `%s %s%s'"),
10807                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
10808           return 0;
10809         }
10810       if (i.op[0].regs->reg_num > 3
10811           && i.tm.opcode_space == SPACE_BASE )
10812         {
10813           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
10814           i.tm.opcode_space = SPACE_0F;
10815         }
10816       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
10817     }
10818   else if (i.tm.opcode_space == SPACE_BASE
10819            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
10820     {
10821       default_seg = reg_ds;
10822     }
10823   else if (i.tm.opcode_modifier.isstring)
10824     {
10825       /* For the string instructions that allow a segment override
10826          on one of their operands, the default segment is ds.  */
10827       default_seg = reg_ds;
10828     }
10829   else if (i.short_form)
10830     {
10831       /* The register operand is in the 1st or 2nd non-immediate operand.  */
10832       const reg_entry *r = i.op[i.imm_operands].regs;
10833
10834       if (!dot_insn ()
10835           && r->reg_type.bitfield.instance == Accum
10836           && i.op[i.imm_operands + 1].regs)
10837         r = i.op[i.imm_operands + 1].regs;
10838       /* Register goes in low 3 bits of opcode.  */
10839       i.tm.base_opcode |= r->reg_num;
10840       set_rex_vrex (r, REX_B, false);
10841
10842       if (dot_insn () && i.reg_operands == 2)
10843         {
10844           gas_assert (is_any_vex_encoding (&i.tm)
10845                       || pp.encoding != encoding_default);
10846           i.vex.register_specifier = i.op[i.operands - 1].regs;
10847         }
10848     }
10849   else if (i.reg_operands == 1
10850            && !i.flags[i.operands - 1]
10851            && i.tm.operand_types[i.operands - 1].bitfield.instance
10852               == InstanceNone)
10853     {
10854       gas_assert (is_any_vex_encoding (&i.tm)
10855                   || pp.encoding != encoding_default);
10856       i.vex.register_specifier = i.op[i.operands - 1].regs;
10857     }
10858
10859   if ((i.seg[0] || i.prefix[SEG_PREFIX])
10860       && i.tm.mnem_off == MN_lea)
10861     {
10862       if (!quiet_warnings)
10863         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
10864       if (optimize && !pp.no_optimize)
10865         {
10866           i.seg[0] = NULL;
10867           i.prefix[SEG_PREFIX] = 0;
10868         }
10869     }
10870
10871   /* If a segment was explicitly specified, and the specified segment
10872      is neither the default nor the one already recorded from a prefix,
10873      use an opcode prefix to select it.  If we never figured out what
10874      the default segment is, then default_seg will be zero at this
10875      point, and the specified segment prefix will always be used.  */
10876   if (i.seg[0]
10877       && i.seg[0] != default_seg
10878       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
10879     {
10880       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
10881         return 0;
10882     }
10883   return 1;
10884 }
10885
10886 static const reg_entry *
10887 build_modrm_byte (void)
10888 {
10889   const reg_entry *default_seg = NULL;
10890   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
10891                         /* Compensate for kludge in md_assemble().  */
10892                         + i.tm.operand_types[0].bitfield.imm1;
10893   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
10894   unsigned int v, op, reg_slot;
10895
10896   /* Accumulator (in particular %st), shift count (%cl), and alike need
10897      to be skipped just like immediate operands do.  */
10898   if (i.tm.operand_types[source].bitfield.instance)
10899     ++source;
10900   while (i.tm.operand_types[dest].bitfield.instance)
10901     --dest;
10902
10903   for (op = source; op < i.operands; ++op)
10904     if (i.tm.operand_types[op].bitfield.baseindex)
10905       break;
10906
10907   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4)
10908     {
10909       expressionS *exp;
10910
10911       /* There are 2 kinds of instructions:
10912          1. 5 operands: 4 register operands or 3 register operands
10913          plus 1 memory operand plus one Imm4 operand, VexXDS, and
10914          VexW0 or VexW1.  The destination must be either XMM, YMM or
10915          ZMM register.
10916          2. 4 operands: 4 register operands or 3 register operands
10917          plus 1 memory operand, with VexXDS.
10918          3. Other equivalent combinations when coming from s_insn().  */
10919       gas_assert (i.tm.opcode_modifier.vexvvvv
10920                   && i.tm.opcode_modifier.vexw);
10921       gas_assert (dot_insn ()
10922                   || i.tm.operand_types[dest].bitfield.class == RegSIMD);
10923
10924       /* Of the first two non-immediate operands the one with the template
10925          not allowing for a memory one is encoded in the immediate operand.  */
10926       if (source == op)
10927         reg_slot = source + 1;
10928       else
10929         reg_slot = source++;
10930
10931       if (!dot_insn ())
10932         {
10933           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
10934           gas_assert (!(i.op[reg_slot].regs->reg_flags & RegVRex));
10935         }
10936       else
10937         gas_assert (i.tm.operand_types[reg_slot].bitfield.class != ClassNone);
10938
10939       if (i.imm_operands == 0)
10940         {
10941           /* When there is no immediate operand, generate an 8bit
10942              immediate operand to encode the first operand.  */
10943           exp = &im_expressions[i.imm_operands++];
10944           i.op[i.operands].imms = exp;
10945           i.types[i.operands].bitfield.imm8 = 1;
10946           i.operands++;
10947
10948           exp->X_op = O_constant;
10949         }
10950       else
10951         {
10952           gas_assert (i.imm_operands == 1);
10953           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
10954           gas_assert (!i.tm.opcode_modifier.immext);
10955
10956           /* Turn on Imm8 again so that output_imm will generate it.  */
10957           i.types[0].bitfield.imm8 = 1;
10958
10959           exp = i.op[0].imms;
10960         }
10961       exp->X_add_number |= register_number (i.op[reg_slot].regs)
10962                            << (3 + !(i.tm.opcode_modifier.evex
10963                                      || pp.encoding == encoding_evex));
10964     }
10965
10966   switch (i.tm.opcode_modifier.vexvvvv)
10967     {
10968     /* VEX.vvvv encodes the last source register operand.  */
10969     case VexVVVV_SRC2:
10970       v = source++;
10971       break;
10972     /* VEX.vvvv encodes the first source register operand.  */
10973     case VexVVVV_SRC1:
10974       v =  dest - 1;
10975       break;
10976     /* VEX.vvvv encodes the destination register operand.  */
10977     case VexVVVV_DST:
10978       v = dest--;
10979       break;
10980     default:
10981       v = ~0;
10982       break;
10983      }
10984
10985   if (dest == source)
10986     dest = ~0;
10987
10988   gas_assert (source < dest);
10989
10990   if (v < MAX_OPERANDS)
10991     {
10992       gas_assert (i.tm.opcode_modifier.vexvvvv);
10993       i.vex.register_specifier = i.op[v].regs;
10994     }
10995
10996   if (op < i.operands)
10997     {
10998       if (i.mem_operands)
10999         {
11000           unsigned int fake_zero_displacement = 0;
11001
11002           gas_assert (i.flags[op] & Operand_Mem);
11003
11004           if (i.tm.opcode_modifier.sib)
11005             {
11006               /* The index register of VSIB shouldn't be RegIZ.  */
11007               if (i.tm.opcode_modifier.sib != SIBMEM
11008                   && i.index_reg->reg_num == RegIZ)
11009                 abort ();
11010
11011               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
11012               if (!i.base_reg)
11013                 {
11014                   i.sib.base = NO_BASE_REGISTER;
11015                   i.sib.scale = i.log2_scale_factor;
11016                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
11017                   i.types[op].bitfield.disp32 = 1;
11018                 }
11019
11020               /* Since the mandatory SIB always has index register, so
11021                  the code logic remains unchanged. The non-mandatory SIB
11022                  without index register is allowed and will be handled
11023                  later.  */
11024               if (i.index_reg)
11025                 {
11026                   if (i.index_reg->reg_num == RegIZ)
11027                     i.sib.index = NO_INDEX_REGISTER;
11028                   else
11029                     i.sib.index = i.index_reg->reg_num;
11030                   set_rex_vrex (i.index_reg, REX_X, false);
11031                 }
11032             }
11033
11034           default_seg = reg_ds;
11035
11036           if (i.base_reg == 0)
11037             {
11038               i.rm.mode = 0;
11039               if (!i.disp_operands)
11040                 fake_zero_displacement = 1;
11041               if (i.index_reg == 0)
11042                 {
11043                   /* Both check for VSIB and mandatory non-vector SIB. */
11044                   gas_assert (!i.tm.opcode_modifier.sib
11045                               || i.tm.opcode_modifier.sib == SIBMEM);
11046                   /* Operand is just <disp>  */
11047                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
11048                   if (flag_code == CODE_64BIT)
11049                     {
11050                       /* 64bit mode overwrites the 32bit absolute
11051                          addressing by RIP relative addressing and
11052                          absolute addressing is encoded by one of the
11053                          redundant SIB forms.  */
11054                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
11055                       i.sib.base = NO_BASE_REGISTER;
11056                       i.sib.index = NO_INDEX_REGISTER;
11057                       i.types[op].bitfield.disp32 = 1;
11058                     }
11059                   else if ((flag_code == CODE_16BIT)
11060                            ^ (i.prefix[ADDR_PREFIX] != 0))
11061                     {
11062                       i.rm.regmem = NO_BASE_REGISTER_16;
11063                       i.types[op].bitfield.disp16 = 1;
11064                     }
11065                   else
11066                     {
11067                       i.rm.regmem = NO_BASE_REGISTER;
11068                       i.types[op].bitfield.disp32 = 1;
11069                     }
11070                 }
11071               else if (!i.tm.opcode_modifier.sib)
11072                 {
11073                   /* !i.base_reg && i.index_reg  */
11074                   if (i.index_reg->reg_num == RegIZ)
11075                     i.sib.index = NO_INDEX_REGISTER;
11076                   else
11077                     i.sib.index = i.index_reg->reg_num;
11078                   i.sib.base = NO_BASE_REGISTER;
11079                   i.sib.scale = i.log2_scale_factor;
11080                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
11081                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
11082                   i.types[op].bitfield.disp32 = 1;
11083                   set_rex_rex2 (i.index_reg, REX_X);
11084                 }
11085             }
11086           /* RIP addressing for 64bit mode.  */
11087           else if (i.base_reg->reg_num == RegIP)
11088             {
11089               gas_assert (!i.tm.opcode_modifier.sib);
11090               i.rm.regmem = NO_BASE_REGISTER;
11091               i.types[op].bitfield.disp8 = 0;
11092               i.types[op].bitfield.disp16 = 0;
11093               i.types[op].bitfield.disp32 = 1;
11094               i.types[op].bitfield.disp64 = 0;
11095               i.flags[op] |= Operand_PCrel;
11096               if (! i.disp_operands)
11097                 fake_zero_displacement = 1;
11098             }
11099           else if (i.base_reg->reg_type.bitfield.word)
11100             {
11101               gas_assert (!i.tm.opcode_modifier.sib);
11102               switch (i.base_reg->reg_num)
11103                 {
11104                 case 3: /* (%bx)  */
11105                   if (i.index_reg == 0)
11106                     i.rm.regmem = 7;
11107                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
11108                     i.rm.regmem = i.index_reg->reg_num - 6;
11109                   break;
11110                 case 5: /* (%bp)  */
11111                   default_seg = reg_ss;
11112                   if (i.index_reg == 0)
11113                     {
11114                       i.rm.regmem = 6;
11115                       if (operand_type_check (i.types[op], disp) == 0)
11116                         {
11117                           /* fake (%bp) into 0(%bp)  */
11118                           if (pp.disp_encoding == disp_encoding_16bit)
11119                             i.types[op].bitfield.disp16 = 1;
11120                           else
11121                             i.types[op].bitfield.disp8 = 1;
11122                           fake_zero_displacement = 1;
11123                         }
11124                     }
11125                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
11126                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
11127                   break;
11128                 default: /* (%si) -> 4 or (%di) -> 5  */
11129                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
11130                 }
11131               if (!fake_zero_displacement
11132                   && !i.disp_operands
11133                   && pp.disp_encoding)
11134                 {
11135                   fake_zero_displacement = 1;
11136                   if (pp.disp_encoding == disp_encoding_8bit)
11137                     i.types[op].bitfield.disp8 = 1;
11138                   else
11139                     i.types[op].bitfield.disp16 = 1;
11140                 }
11141               i.rm.mode = mode_from_disp_size (i.types[op]);
11142             }
11143           else /* i.base_reg and 32/64 bit mode  */
11144             {
11145               if (operand_type_check (i.types[op], disp))
11146                 {
11147                   i.types[op].bitfield.disp16 = 0;
11148                   i.types[op].bitfield.disp64 = 0;
11149                   i.types[op].bitfield.disp32 = 1;
11150                 }
11151
11152               if (!i.tm.opcode_modifier.sib)
11153                 i.rm.regmem = i.base_reg->reg_num;
11154               set_rex_rex2 (i.base_reg, REX_B);
11155               i.sib.base = i.base_reg->reg_num;
11156               /* x86-64 ignores REX prefix bit here to avoid decoder
11157                  complications.  */
11158               if (!(i.base_reg->reg_flags & RegRex)
11159                   && (i.base_reg->reg_num == EBP_REG_NUM
11160                    || i.base_reg->reg_num == ESP_REG_NUM))
11161                   default_seg = reg_ss;
11162               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
11163                 {
11164                   fake_zero_displacement = 1;
11165                   if (pp.disp_encoding == disp_encoding_32bit)
11166                     i.types[op].bitfield.disp32 = 1;
11167                   else
11168                     i.types[op].bitfield.disp8 = 1;
11169                 }
11170               i.sib.scale = i.log2_scale_factor;
11171               if (i.index_reg == 0)
11172                 {
11173                   /* Only check for VSIB. */
11174                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
11175                               && i.tm.opcode_modifier.sib != VECSIB256
11176                               && i.tm.opcode_modifier.sib != VECSIB512);
11177
11178                   /* <disp>(%esp) becomes two byte modrm with no index
11179                      register.  We've already stored the code for esp
11180                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
11181                      Any base register besides %esp will not use the
11182                      extra modrm byte.  */
11183                   i.sib.index = NO_INDEX_REGISTER;
11184                 }
11185               else if (!i.tm.opcode_modifier.sib)
11186                 {
11187                   if (i.index_reg->reg_num == RegIZ)
11188                     i.sib.index = NO_INDEX_REGISTER;
11189                   else
11190                     i.sib.index = i.index_reg->reg_num;
11191                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
11192                   set_rex_rex2 (i.index_reg, REX_X);
11193                 }
11194
11195               if (i.disp_operands
11196                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
11197                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
11198                 i.rm.mode = 0;
11199               else
11200                 {
11201                   if (!fake_zero_displacement
11202                       && !i.disp_operands
11203                       && pp.disp_encoding)
11204                     {
11205                       fake_zero_displacement = 1;
11206                       if (pp.disp_encoding == disp_encoding_8bit)
11207                         i.types[op].bitfield.disp8 = 1;
11208                       else
11209                         i.types[op].bitfield.disp32 = 1;
11210                     }
11211                   i.rm.mode = mode_from_disp_size (i.types[op]);
11212                 }
11213             }
11214
11215           if (fake_zero_displacement)
11216             {
11217               /* Fakes a zero displacement assuming that i.types[op]
11218                  holds the correct displacement size.  */
11219               expressionS *exp;
11220
11221               gas_assert (i.op[op].disps == 0);
11222               exp = &disp_expressions[i.disp_operands++];
11223               i.op[op].disps = exp;
11224               exp->X_op = O_constant;
11225               exp->X_add_number = 0;
11226               exp->X_add_symbol = (symbolS *) 0;
11227               exp->X_op_symbol = (symbolS *) 0;
11228             }
11229         }
11230     else
11231         {
11232       i.rm.mode = 3;
11233       i.rm.regmem = i.op[op].regs->reg_num;
11234       set_rex_vrex (i.op[op].regs, REX_B, false);
11235         }
11236
11237       if (op == dest)
11238         dest = ~0;
11239       if (op == source)
11240         source = ~0;
11241     }
11242   else
11243     {
11244       i.rm.mode = 3;
11245       if (!i.tm.opcode_modifier.regmem)
11246         {
11247           gas_assert (source < MAX_OPERANDS);
11248           i.rm.regmem = i.op[source].regs->reg_num;
11249           set_rex_vrex (i.op[source].regs, REX_B,
11250                         dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
11251           source = ~0;
11252         }
11253       else
11254         {
11255           gas_assert (dest < MAX_OPERANDS);
11256           i.rm.regmem = i.op[dest].regs->reg_num;
11257           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
11258           dest = ~0;
11259         }
11260     }
11261
11262   /* Fill in i.rm.reg field with extension opcode (if any) or the
11263      appropriate register.  */
11264   if (i.tm.extension_opcode != None)
11265     i.rm.reg = i.tm.extension_opcode;
11266   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
11267     {
11268       i.rm.reg = i.op[dest].regs->reg_num;
11269       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
11270     }
11271   else
11272     {
11273       gas_assert (source < MAX_OPERANDS);
11274       i.rm.reg = i.op[source].regs->reg_num;
11275       set_rex_vrex (i.op[source].regs, REX_R, false);
11276     }
11277
11278   if (flag_code != CODE_64BIT && (i.rex & REX_R))
11279     {
11280       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
11281       i.rex &= ~REX_R;
11282       add_prefix (LOCK_PREFIX_OPCODE);
11283     }
11284
11285   return default_seg;
11286 }
11287
11288 static INLINE void
11289 frag_opcode_byte (unsigned char byte)
11290 {
11291   if (now_seg != absolute_section)
11292     FRAG_APPEND_1_CHAR (byte);
11293   else
11294     ++abs_section_offset;
11295 }
11296
11297 static unsigned int
11298 flip_code16 (unsigned int code16)
11299 {
11300   gas_assert (i.tm.operands == 1);
11301
11302   return !(i.prefix[REX_PREFIX] & REX_W)
11303          && (code16 ? i.tm.operand_types[0].bitfield.disp32
11304                     : i.tm.operand_types[0].bitfield.disp16)
11305          ? CODE16 : 0;
11306 }
11307
11308 static void
11309 output_branch (void)
11310 {
11311   char *p;
11312   int size;
11313   int code16;
11314   int prefix;
11315   relax_substateT subtype;
11316   symbolS *sym;
11317   offsetT off;
11318
11319   if (now_seg == absolute_section)
11320     {
11321       as_bad (_("relaxable branches not supported in absolute section"));
11322       return;
11323     }
11324
11325   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
11326   size = pp.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
11327
11328   prefix = 0;
11329   if (i.prefix[DATA_PREFIX] != 0)
11330     {
11331       prefix = 1;
11332       i.prefixes -= 1;
11333       code16 ^= flip_code16(code16);
11334     }
11335   /* Pentium4 branch hints.  */
11336   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
11337       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
11338     {
11339       prefix++;
11340       i.prefixes--;
11341     }
11342   if (i.prefix[REX_PREFIX] != 0)
11343     {
11344       prefix++;
11345       i.prefixes--;
11346     }
11347
11348   /* BND prefixed jump.  */
11349   if (i.prefix[BND_PREFIX] != 0)
11350     {
11351       prefix++;
11352       i.prefixes--;
11353     }
11354
11355   if (i.prefixes != 0)
11356     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
11357
11358   /* It's always a symbol;  End frag & setup for relax.
11359      Make sure there is enough room in this frag for the largest
11360      instruction we may generate in md_convert_frag.  This is 2
11361      bytes for the opcode and room for the prefix and largest
11362      displacement.  */
11363   frag_grow (prefix + 2 + 4);
11364   /* Prefix and 1 opcode byte go in fr_fix.  */
11365   p = frag_more (prefix + 1);
11366   if (i.prefix[DATA_PREFIX] != 0)
11367     *p++ = DATA_PREFIX_OPCODE;
11368   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
11369       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
11370     *p++ = i.prefix[SEG_PREFIX];
11371   if (i.prefix[BND_PREFIX] != 0)
11372     *p++ = BND_PREFIX_OPCODE;
11373   if (i.prefix[REX_PREFIX] != 0)
11374     *p++ = i.prefix[REX_PREFIX];
11375   *p = i.tm.base_opcode;
11376
11377   if ((unsigned char) *p == JUMP_PC_RELATIVE)
11378     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
11379   else if (cpu_arch_flags.bitfield.cpui386)
11380     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
11381   else
11382     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
11383   subtype |= code16;
11384
11385   sym = i.op[0].disps->X_add_symbol;
11386   off = i.op[0].disps->X_add_number;
11387
11388   if (i.op[0].disps->X_op != O_constant
11389       && i.op[0].disps->X_op != O_symbol)
11390     {
11391       /* Handle complex expressions.  */
11392       sym = make_expr_symbol (i.op[0].disps);
11393       off = 0;
11394     }
11395
11396   /* 1 possible extra opcode + 4 byte displacement go in var part.
11397      Pass reloc in fr_var.  */
11398   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
11399 }
11400
11401 /* PLT32 relocation is ELF only.  */
11402 #ifdef OBJ_ELF
11403 /* Return TRUE iff PLT32 relocation should be used for branching to
11404    symbol S.  */
11405
11406 static bool
11407 need_plt32_p (symbolS *s)
11408 {
11409 #ifdef TE_SOLARIS
11410   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
11411      krtld support it.  */
11412   return false;
11413 #endif
11414
11415   /* Since there is no need to prepare for PLT branch on x86-64, we
11416      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
11417      be used as a marker for 32-bit PC-relative branches.  */
11418   if (!object_64bit)
11419     return false;
11420
11421   if (s == NULL)
11422     return false;
11423
11424   /* Weak or undefined symbol need PLT32 relocation.  */
11425   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
11426     return true;
11427
11428   /* Non-global symbol doesn't need PLT32 relocation.  */
11429   if (! S_IS_EXTERNAL (s))
11430     return false;
11431
11432   /* Other global symbols need PLT32 relocation.  NB: Symbol with
11433      non-default visibilities are treated as normal global symbol
11434      so that PLT32 relocation can be used as a marker for 32-bit
11435      PC-relative branches.  It is useful for linker relaxation.  */
11436   return true;
11437 }
11438 #endif
11439
11440 static void
11441 output_jump (void)
11442 {
11443   char *p;
11444   int size;
11445   fixS *fixP;
11446   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
11447
11448   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
11449     {
11450       /* This is a loop or jecxz type instruction.  */
11451       size = 1;
11452       if (i.prefix[ADDR_PREFIX] != 0)
11453         {
11454           frag_opcode_byte (ADDR_PREFIX_OPCODE);
11455           i.prefixes -= 1;
11456         }
11457       /* Pentium4 branch hints.  */
11458       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
11459           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
11460         {
11461           frag_opcode_byte (i.prefix[SEG_PREFIX]);
11462           i.prefixes--;
11463         }
11464     }
11465   else
11466     {
11467       int code16;
11468
11469       code16 = 0;
11470       if (flag_code == CODE_16BIT)
11471         code16 = CODE16;
11472
11473       if (i.prefix[DATA_PREFIX] != 0)
11474         {
11475           frag_opcode_byte (DATA_PREFIX_OPCODE);
11476           i.prefixes -= 1;
11477           code16 ^= flip_code16(code16);
11478         }
11479
11480       size = 4;
11481       if (code16)
11482         size = 2;
11483     }
11484
11485   /* BND prefixed jump.  */
11486   if (i.prefix[BND_PREFIX] != 0)
11487     {
11488       frag_opcode_byte (i.prefix[BND_PREFIX]);
11489       i.prefixes -= 1;
11490     }
11491
11492   if (i.prefix[REX_PREFIX] != 0)
11493     {
11494       frag_opcode_byte (i.prefix[REX_PREFIX]);
11495       i.prefixes -= 1;
11496     }
11497
11498   if (i.prefixes != 0)
11499     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
11500
11501   if (now_seg == absolute_section)
11502     {
11503       abs_section_offset += i.opcode_length + size;
11504       return;
11505     }
11506
11507   p = frag_more (i.opcode_length + size);
11508   switch (i.opcode_length)
11509     {
11510     case 2:
11511       *p++ = i.tm.base_opcode >> 8;
11512       /* Fall through.  */
11513     case 1:
11514       *p++ = i.tm.base_opcode;
11515       break;
11516     default:
11517       abort ();
11518     }
11519
11520 #ifdef OBJ_ELF
11521   if (flag_code == CODE_64BIT && size == 4
11522       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
11523       && need_plt32_p (i.op[0].disps->X_add_symbol))
11524     jump_reloc = BFD_RELOC_X86_64_PLT32;
11525 #endif
11526
11527   jump_reloc = reloc (size, 1, 1, jump_reloc);
11528
11529   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
11530                       i.op[0].disps, 1, jump_reloc);
11531
11532   /* All jumps handled here are signed, but don't unconditionally use a
11533      signed limit check for 32 and 16 bit jumps as we want to allow wrap
11534      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
11535      respectively.  */
11536   switch (size)
11537     {
11538     case 1:
11539       fixP->fx_signed = 1;
11540       break;
11541
11542     case 2:
11543       if (i.tm.mnem_off == MN_xbegin)
11544         fixP->fx_signed = 1;
11545       break;
11546
11547     case 4:
11548       if (flag_code == CODE_64BIT)
11549         fixP->fx_signed = 1;
11550       break;
11551     }
11552 }
11553
11554 static void
11555 output_interseg_jump (void)
11556 {
11557   char *p;
11558   int size;
11559   int prefix;
11560   int code16;
11561
11562   code16 = 0;
11563   if (flag_code == CODE_16BIT)
11564     code16 = CODE16;
11565
11566   prefix = 0;
11567   if (i.prefix[DATA_PREFIX] != 0)
11568     {
11569       prefix = 1;
11570       i.prefixes -= 1;
11571       code16 ^= CODE16;
11572     }
11573
11574   gas_assert (!i.prefix[REX_PREFIX]);
11575
11576   size = 4;
11577   if (code16)
11578     size = 2;
11579
11580   if (i.prefixes != 0)
11581     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
11582
11583   if (now_seg == absolute_section)
11584     {
11585       abs_section_offset += prefix + 1 + 2 + size;
11586       return;
11587     }
11588
11589   /* 1 opcode; 2 segment; offset  */
11590   p = frag_more (prefix + 1 + 2 + size);
11591
11592   if (i.prefix[DATA_PREFIX] != 0)
11593     *p++ = DATA_PREFIX_OPCODE;
11594
11595   if (i.prefix[REX_PREFIX] != 0)
11596     *p++ = i.prefix[REX_PREFIX];
11597
11598   *p++ = i.tm.base_opcode;
11599   if (i.op[1].imms->X_op == O_constant)
11600     {
11601       offsetT n = i.op[1].imms->X_add_number;
11602
11603       if (size == 2
11604           && !fits_in_unsigned_word (n)
11605           && !fits_in_signed_word (n))
11606         {
11607           as_bad (_("16-bit jump out of range"));
11608           return;
11609         }
11610       md_number_to_chars (p, n, size);
11611     }
11612   else
11613     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
11614                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
11615
11616   p += size;
11617   if (i.op[0].imms->X_op == O_constant)
11618     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
11619   else
11620     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
11621                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
11622 }
11623
11624 /* Hook used to reject pseudo-prefixes misplaced at the start of a line.  */
11625
11626 void i386_start_line (void)
11627 {
11628   struct pseudo_prefixes last_pp;
11629
11630   memcpy (&last_pp, &pp, sizeof (pp));
11631   memset (&pp, 0, sizeof (pp));
11632   if (memcmp (&pp, &last_pp, sizeof (pp)))
11633     as_bad_where (frag_now->fr_file, frag_now->fr_line,
11634                   _("pseudo prefix without instruction"));
11635 }
11636
11637 /* Hook used to warn about pseudo-prefixes ahead of a label.  */
11638
11639 bool i386_check_label (void)
11640 {
11641   struct pseudo_prefixes last_pp;
11642
11643   memcpy (&last_pp, &pp, sizeof (pp));
11644   memset (&pp, 0, sizeof (pp));
11645   if (memcmp (&pp, &last_pp, sizeof (pp)))
11646     as_warn (_("pseudo prefix ahead of label; ignoring"));
11647   return true;
11648 }
11649
11650 /* Hook used to parse pseudo-prefixes off of the start of a line.  */
11651
11652 int
11653 i386_unrecognized_line (int ch)
11654 {
11655   char mnemonic[MAX_MNEM_SIZE];
11656   const char *end;
11657
11658   if (ch != '{')
11659     return 0;
11660
11661   --input_line_pointer;
11662   know (*input_line_pointer == ch);
11663
11664   end = parse_insn (input_line_pointer, mnemonic, parse_pseudo_prefix);
11665   if (end == NULL)
11666     {
11667       /* Diagnostic was already issued.  */
11668       ignore_rest_of_line ();
11669       memset (&pp, 0, sizeof (pp));
11670       return 1;
11671     }
11672
11673   if (end == input_line_pointer)
11674     {
11675       ++input_line_pointer;
11676       return 0;
11677     }
11678
11679   input_line_pointer += end - input_line_pointer;
11680   return 1;
11681 }
11682
11683 #ifdef OBJ_ELF
11684 void
11685 x86_cleanup (void)
11686 {
11687   char *p;
11688   asection *seg = now_seg;
11689   subsegT subseg = now_subseg;
11690   asection *sec;
11691   unsigned int alignment, align_size_1;
11692   unsigned int isa_1_descsz, feature_2_descsz, descsz;
11693   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
11694   unsigned int padding;
11695
11696   if (!x86_used_note)
11697     return;
11698
11699   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
11700
11701   /* The .note.gnu.property section layout:
11702
11703      Field      Length          Contents
11704      ----       ----            ----
11705      n_namsz    4               4
11706      n_descsz   4               The note descriptor size
11707      n_type     4               NT_GNU_PROPERTY_TYPE_0
11708      n_name     4               "GNU"
11709      n_desc     n_descsz        The program property array
11710      ....       ....            ....
11711    */
11712
11713   /* Create the .note.gnu.property section.  */
11714   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
11715   bfd_set_section_flags (sec,
11716                          (SEC_ALLOC
11717                           | SEC_LOAD
11718                           | SEC_DATA
11719                           | SEC_HAS_CONTENTS
11720                           | SEC_READONLY));
11721
11722   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
11723     {
11724       align_size_1 = 7;
11725       alignment = 3;
11726     }
11727   else
11728     {
11729       align_size_1 = 3;
11730       alignment = 2;
11731     }
11732
11733   bfd_set_section_alignment (sec, alignment);
11734   elf_section_type (sec) = SHT_NOTE;
11735
11736   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
11737                                   + 4-byte data  */
11738   isa_1_descsz_raw = 4 + 4 + 4;
11739   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
11740   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
11741
11742   feature_2_descsz_raw = isa_1_descsz;
11743   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
11744                                       + 4-byte data  */
11745   feature_2_descsz_raw += 4 + 4 + 4;
11746   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
11747   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
11748                       & ~align_size_1);
11749
11750   descsz = feature_2_descsz;
11751   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
11752   p = frag_more (4 + 4 + 4 + 4 + descsz);
11753
11754   /* Write n_namsz.  */
11755   md_number_to_chars (p, (valueT) 4, 4);
11756
11757   /* Write n_descsz.  */
11758   md_number_to_chars (p + 4, (valueT) descsz, 4);
11759
11760   /* Write n_type.  */
11761   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
11762
11763   /* Write n_name.  */
11764   memcpy (p + 4 * 3, "GNU", 4);
11765
11766   /* Write 4-byte type.  */
11767   md_number_to_chars (p + 4 * 4,
11768                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
11769
11770   /* Write 4-byte data size.  */
11771   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
11772
11773   /* Write 4-byte data.  */
11774   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
11775
11776   /* Zero out paddings.  */
11777   padding = isa_1_descsz - isa_1_descsz_raw;
11778   if (padding)
11779     memset (p + 4 * 7, 0, padding);
11780
11781   /* Write 4-byte type.  */
11782   md_number_to_chars (p + isa_1_descsz + 4 * 4,
11783                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
11784
11785   /* Write 4-byte data size.  */
11786   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
11787
11788   /* Write 4-byte data.  */
11789   md_number_to_chars (p + isa_1_descsz + 4 * 6,
11790                       (valueT) x86_feature_2_used, 4);
11791
11792   /* Zero out paddings.  */
11793   padding = feature_2_descsz - feature_2_descsz_raw;
11794   if (padding)
11795     memset (p + isa_1_descsz + 4 * 7, 0, padding);
11796
11797   /* We probably can't restore the current segment, for there likely
11798      isn't one yet...  */
11799   if (seg && subseg)
11800     subseg_set (seg, subseg);
11801 }
11802
11803 #include "tc-i386-ginsn.c"
11804
11805 /* Whether SFrame stack trace info is supported.  */
11806 bool
11807 x86_support_sframe_p (void)
11808 {
11809   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
11810   return (x86_elf_abi == X86_64_ABI);
11811 }
11812
11813 /* Whether SFrame return address tracking is needed.  */
11814 bool
11815 x86_sframe_ra_tracking_p (void)
11816 {
11817   /* In AMD64, return address is always stored on the stack at a fixed offset
11818      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
11819      Do not track explicitly via an SFrame Frame Row Entry.  */
11820   return false;
11821 }
11822
11823 /* The fixed offset from CFA for SFrame to recover the return address.
11824    (useful only when SFrame RA tracking is not needed).  */
11825 offsetT
11826 x86_sframe_cfa_ra_offset (void)
11827 {
11828   gas_assert (x86_elf_abi == X86_64_ABI);
11829   return (offsetT) -8;
11830 }
11831
11832 /* The abi/arch indentifier for SFrame.  */
11833 unsigned char
11834 x86_sframe_get_abi_arch (void)
11835 {
11836   unsigned char sframe_abi_arch = 0;
11837
11838   if (x86_support_sframe_p ())
11839     {
11840       gas_assert (!target_big_endian);
11841       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
11842     }
11843
11844   return sframe_abi_arch;
11845 }
11846
11847 #endif
11848
11849 static unsigned int
11850 encoding_length (const fragS *start_frag, offsetT start_off,
11851                  const char *frag_now_ptr)
11852 {
11853   unsigned int len = 0;
11854
11855   if (start_frag != frag_now)
11856     {
11857       const fragS *fr = start_frag;
11858
11859       do {
11860         len += fr->fr_fix;
11861         fr = fr->fr_next;
11862       } while (fr && fr != frag_now);
11863     }
11864
11865   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
11866 }
11867
11868 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
11869    be macro-fused with conditional jumps.
11870    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
11871    or is one of the following format:
11872
11873     cmp m, imm
11874     add m, imm
11875     sub m, imm
11876    test m, imm
11877     and m, imm
11878     inc m
11879     dec m
11880
11881    it is unfusible.  */
11882
11883 static int
11884 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
11885 {
11886   /* No RIP address.  */
11887   if (i.base_reg && i.base_reg->reg_num == RegIP)
11888     return 0;
11889
11890   /* No opcodes outside of base encoding space.  */
11891   if (i.tm.opcode_space != SPACE_BASE)
11892     return 0;
11893
11894   /* add, sub without add/sub m, imm.  */
11895   if (i.tm.base_opcode <= 5
11896       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
11897       || ((i.tm.base_opcode | 3) == 0x83
11898           && (i.tm.extension_opcode == 0x5
11899               || i.tm.extension_opcode == 0x0)))
11900     {
11901       *mf_cmp_p = mf_cmp_alu_cmp;
11902       return !(i.mem_operands && i.imm_operands);
11903     }
11904
11905   /* and without and m, imm.  */
11906   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
11907       || ((i.tm.base_opcode | 3) == 0x83
11908           && i.tm.extension_opcode == 0x4))
11909     {
11910       *mf_cmp_p = mf_cmp_test_and;
11911       return !(i.mem_operands && i.imm_operands);
11912     }
11913
11914   /* test without test m imm.  */
11915   if ((i.tm.base_opcode | 1) == 0x85
11916       || (i.tm.base_opcode | 1) == 0xa9
11917       || ((i.tm.base_opcode | 1) == 0xf7
11918           && i.tm.extension_opcode == 0))
11919     {
11920       *mf_cmp_p = mf_cmp_test_and;
11921       return !(i.mem_operands && i.imm_operands);
11922     }
11923
11924   /* cmp without cmp m, imm.  */
11925   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
11926       || ((i.tm.base_opcode | 3) == 0x83
11927           && (i.tm.extension_opcode == 0x7)))
11928     {
11929       *mf_cmp_p = mf_cmp_alu_cmp;
11930       return !(i.mem_operands && i.imm_operands);
11931     }
11932
11933   /* inc, dec without inc/dec m.   */
11934   if ((is_cpu (&i.tm, CpuNo64)
11935        && (i.tm.base_opcode | 0xf) == 0x4f)
11936       || ((i.tm.base_opcode | 1) == 0xff
11937           && i.tm.extension_opcode <= 0x1))
11938     {
11939       *mf_cmp_p = mf_cmp_incdec;
11940       return !i.mem_operands;
11941     }
11942
11943   return 0;
11944 }
11945
11946 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
11947
11948 static int
11949 add_fused_jcc_padding_frag_p (enum mf_cmp_kind *mf_cmp_p,
11950                               const struct last_insn *last_insn)
11951 {
11952   /* NB: Don't work with COND_JUMP86 without i386.  */
11953   if (!align_branch_power
11954       || now_seg == absolute_section
11955       || !cpu_arch_flags.bitfield.cpui386
11956       || !(align_branch & align_branch_fused_bit))
11957     return 0;
11958
11959   if (maybe_fused_with_jcc_p (mf_cmp_p))
11960     {
11961       if (last_insn->kind == last_insn_other)
11962         return 1;
11963       if (flag_debug)
11964         as_warn_where (last_insn->file, last_insn->line,
11965                        _("`%s` skips -malign-branch-boundary on `%s`"),
11966                        last_insn->name, insn_name (&i.tm));
11967     }
11968
11969   return 0;
11970 }
11971
11972 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
11973
11974 static int
11975 add_branch_prefix_frag_p (const struct last_insn *last_insn)
11976 {
11977   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
11978      to PadLock instructions since they include prefixes in opcode.  */
11979   if (!align_branch_power
11980       || !align_branch_prefix_size
11981       || now_seg == absolute_section
11982       || is_cpu (&i.tm, CpuPadLock)
11983       || !cpu_arch_flags.bitfield.cpui386)
11984     return 0;
11985
11986   /* Don't add prefix if it is a prefix or there is no operand in case
11987      that segment prefix is special.  */
11988   if (!i.operands || i.tm.opcode_modifier.isprefix)
11989     return 0;
11990
11991   if (last_insn->kind == last_insn_other)
11992     return 1;
11993
11994   if (flag_debug)
11995     as_warn_where (last_insn->file, last_insn->line,
11996                    _("`%s` skips -malign-branch-boundary on `%s`"),
11997                    last_insn->name, insn_name (&i.tm));
11998
11999   return 0;
12000 }
12001
12002 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
12003
12004 static int
12005 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
12006                            enum mf_jcc_kind *mf_jcc_p,
12007                            const struct last_insn *last_insn)
12008 {
12009   int add_padding;
12010
12011   /* NB: Don't work with COND_JUMP86 without i386.  */
12012   if (!align_branch_power
12013       || now_seg == absolute_section
12014       || !cpu_arch_flags.bitfield.cpui386
12015       || i.tm.opcode_space != SPACE_BASE)
12016     return 0;
12017
12018   add_padding = 0;
12019
12020   /* Check for jcc and direct jmp.  */
12021   if (i.tm.opcode_modifier.jump == JUMP)
12022     {
12023       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
12024         {
12025           *branch_p = align_branch_jmp;
12026           add_padding = align_branch & align_branch_jmp_bit;
12027         }
12028       else
12029         {
12030           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
12031              igore the lowest bit.  */
12032           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
12033           *branch_p = align_branch_jcc;
12034           if ((align_branch & align_branch_jcc_bit))
12035             add_padding = 1;
12036         }
12037     }
12038   else if ((i.tm.base_opcode | 1) == 0xc3)
12039     {
12040       /* Near ret.  */
12041       *branch_p = align_branch_ret;
12042       if ((align_branch & align_branch_ret_bit))
12043         add_padding = 1;
12044     }
12045   else
12046     {
12047       /* Check for indirect jmp, direct and indirect calls.  */
12048       if (i.tm.base_opcode == 0xe8)
12049         {
12050           /* Direct call.  */
12051           *branch_p = align_branch_call;
12052           if ((align_branch & align_branch_call_bit))
12053             add_padding = 1;
12054         }
12055       else if (i.tm.base_opcode == 0xff
12056                && (i.tm.extension_opcode == 2
12057                    || i.tm.extension_opcode == 4))
12058         {
12059           /* Indirect call and jmp.  */
12060           *branch_p = align_branch_indirect;
12061           if ((align_branch & align_branch_indirect_bit))
12062             add_padding = 1;
12063         }
12064
12065       if (add_padding
12066           && i.disp_operands
12067           && tls_get_addr
12068           && (i.op[0].disps->X_op == O_symbol
12069               || (i.op[0].disps->X_op == O_subtract
12070                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
12071         {
12072           symbolS *s = i.op[0].disps->X_add_symbol;
12073           /* No padding to call to global or undefined tls_get_addr.  */
12074           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
12075               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
12076             return 0;
12077         }
12078     }
12079
12080   if (add_padding
12081       && last_insn->kind != last_insn_other)
12082     {
12083       if (flag_debug)
12084         as_warn_where (last_insn->file, last_insn->line,
12085                        _("`%s` skips -malign-branch-boundary on `%s`"),
12086                        last_insn->name, insn_name (&i.tm));
12087       return 0;
12088     }
12089
12090   return add_padding;
12091 }
12092
12093 static void
12094 output_insn (const struct last_insn *last_insn)
12095 {
12096   fragS *insn_start_frag;
12097   offsetT insn_start_off;
12098   fragS *fragP = NULL;
12099   enum align_branch_kind branch = align_branch_none;
12100   /* The initializer is arbitrary just to avoid uninitialized error.
12101      it's actually either assigned in add_branch_padding_frag_p
12102      or never be used.  */
12103   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
12104
12105 #ifdef OBJ_ELF
12106   if (x86_used_note && now_seg != absolute_section)
12107     {
12108       if ((i.xstate & xstate_tmm) == xstate_tmm
12109           || is_cpu (&i.tm, CpuAMX_TILE))
12110         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
12111
12112       if (is_cpu (&i.tm, Cpu8087)
12113           || is_cpu (&i.tm, Cpu287)
12114           || is_cpu (&i.tm, Cpu387)
12115           || is_cpu (&i.tm, Cpu687)
12116           || is_cpu (&i.tm, CpuFISTTP))
12117         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
12118
12119       if ((i.xstate & xstate_mmx)
12120           || i.tm.mnem_off == MN_emms
12121           || i.tm.mnem_off == MN_femms)
12122         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
12123
12124       if (i.index_reg)
12125         {
12126           if (i.index_reg->reg_type.bitfield.zmmword)
12127             i.xstate |= xstate_zmm;
12128           else if (i.index_reg->reg_type.bitfield.ymmword)
12129             i.xstate |= xstate_ymm;
12130           else if (i.index_reg->reg_type.bitfield.xmmword)
12131             i.xstate |= xstate_xmm;
12132         }
12133
12134       /* vzeroall / vzeroupper */
12135       if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
12136         i.xstate |= xstate_ymm;
12137
12138       if ((i.xstate & xstate_xmm)
12139           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
12140           || (i.tm.base_opcode == 0xae
12141               && (is_cpu (&i.tm, CpuSSE)
12142                   || is_cpu (&i.tm, CpuAVX)))
12143           || is_cpu (&i.tm, CpuWideKL)
12144           || is_cpu (&i.tm, CpuKL))
12145         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
12146
12147       if ((i.xstate & xstate_ymm) == xstate_ymm)
12148         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
12149       if ((i.xstate & xstate_zmm) == xstate_zmm)
12150         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
12151       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
12152         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
12153       if (is_cpu (&i.tm, CpuFXSR))
12154         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
12155       if (is_cpu (&i.tm, CpuXsave))
12156         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
12157       if (is_cpu (&i.tm, CpuXsaveopt))
12158         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
12159       if (is_cpu (&i.tm, CpuXSAVEC))
12160         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
12161
12162       if (x86_feature_2_used
12163           || is_cpu (&i.tm, CpuCMOV)
12164           || is_cpu (&i.tm, CpuSYSCALL)
12165           || i.tm.mnem_off == MN_cmpxchg8b)
12166         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
12167       if (is_cpu (&i.tm, CpuSSE3)
12168           || is_cpu (&i.tm, CpuSSSE3)
12169           || is_cpu (&i.tm, CpuSSE4_1)
12170           || is_cpu (&i.tm, CpuSSE4_2)
12171           || is_cpu (&i.tm, CpuCX16)
12172           || is_cpu (&i.tm, CpuPOPCNT)
12173           /* LAHF-SAHF insns in 64-bit mode.  */
12174           || (flag_code == CODE_64BIT
12175               && (i.tm.base_opcode | 1) == 0x9f
12176               && i.tm.opcode_space == SPACE_BASE))
12177         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
12178       if (is_cpu (&i.tm, CpuAVX)
12179           || is_cpu (&i.tm, CpuAVX2)
12180           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
12181              XOP, FMA4, LPW, TBM, and AMX.  */
12182           || (i.tm.opcode_modifier.vex
12183               && !is_cpu (&i.tm, CpuAVX512F)
12184               && !is_cpu (&i.tm, CpuAVX512BW)
12185               && !is_cpu (&i.tm, CpuAVX512DQ)
12186               && !is_cpu (&i.tm, CpuXOP)
12187               && !is_cpu (&i.tm, CpuFMA4)
12188               && !is_cpu (&i.tm, CpuLWP)
12189               && !is_cpu (&i.tm, CpuTBM)
12190               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
12191           || is_cpu (&i.tm, CpuF16C)
12192           || is_cpu (&i.tm, CpuFMA)
12193           || is_cpu (&i.tm, CpuLZCNT)
12194           || is_cpu (&i.tm, CpuMovbe)
12195           || is_cpu (&i.tm, CpuXSAVES)
12196           || (x86_feature_2_used
12197               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
12198                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
12199                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
12200         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
12201       if (is_cpu (&i.tm, CpuAVX512F)
12202           || is_cpu (&i.tm, CpuAVX512BW)
12203           || is_cpu (&i.tm, CpuAVX512DQ)
12204           || is_cpu (&i.tm, CpuAVX512VL)
12205           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
12206              AVX512-4FMAPS, and AVX512-4VNNIW.  */
12207           || (i.tm.opcode_modifier.evex
12208               && !is_cpu (&i.tm, CpuAVX512ER)
12209               && !is_cpu (&i.tm, CpuAVX512PF)
12210               && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
12211               && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
12212         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
12213     }
12214 #endif
12215
12216   /* Tie dwarf2 debug info to the address at the start of the insn.
12217      We can't do this after the insn has been output as the current
12218      frag may have been closed off.  eg. by frag_var.  */
12219   dwarf2_emit_insn (0);
12220
12221   insn_start_frag = frag_now;
12222   insn_start_off = frag_now_fix ();
12223
12224   if (add_branch_padding_frag_p (&branch, &mf_jcc, last_insn))
12225     {
12226       char *p;
12227       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
12228       unsigned int max_branch_padding_size = 14;
12229
12230       /* Align section to boundary.  */
12231       record_alignment (now_seg, align_branch_power);
12232
12233       /* Make room for padding.  */
12234       frag_grow (max_branch_padding_size);
12235
12236       /* Start of the padding.  */
12237       p = frag_more (0);
12238
12239       fragP = frag_now;
12240
12241       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
12242                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
12243                 NULL, 0, p);
12244
12245       fragP->tc_frag_data.mf_type = mf_jcc;
12246       fragP->tc_frag_data.branch_type = branch;
12247       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
12248     }
12249
12250   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
12251       && !pre_386_16bit_warned)
12252     {
12253       as_warn (_("use .code16 to ensure correct addressing mode"));
12254       pre_386_16bit_warned = true;
12255     }
12256
12257   /* Output jumps.  */
12258   if (i.tm.opcode_modifier.jump == JUMP)
12259     output_branch ();
12260   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
12261            || i.tm.opcode_modifier.jump == JUMP_DWORD)
12262     output_jump ();
12263   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
12264     output_interseg_jump ();
12265   else
12266     {
12267       /* Output normal instructions here.  */
12268       char *p;
12269       unsigned char *q;
12270       unsigned int j;
12271       enum mf_cmp_kind mf_cmp;
12272
12273       if (avoid_fence
12274           && (i.tm.base_opcode == 0xaee8
12275               || i.tm.base_opcode == 0xaef0
12276               || i.tm.base_opcode == 0xaef8))
12277         {
12278           /* Encode lfence, mfence, and sfence as
12279              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
12280           if (flag_code == CODE_16BIT)
12281             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
12282           else if (omit_lock_prefix)
12283             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
12284                     insn_name (&i.tm));
12285           else if (now_seg != absolute_section)
12286             {
12287               offsetT val = 0x240483f0ULL;
12288
12289               p = frag_more (5);
12290               md_number_to_chars (p, val, 5);
12291             }
12292           else
12293             abs_section_offset += 5;
12294           return;
12295         }
12296
12297       /* Some processors fail on LOCK prefix. This options makes
12298          assembler ignore LOCK prefix and serves as a workaround.  */
12299       if (omit_lock_prefix)
12300         {
12301           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
12302               && i.tm.opcode_modifier.isprefix)
12303             return;
12304           i.prefix[LOCK_PREFIX] = 0;
12305         }
12306
12307       if (branch)
12308         /* Skip if this is a branch.  */
12309         ;
12310       else if (add_fused_jcc_padding_frag_p (&mf_cmp, last_insn))
12311         {
12312           /* Make room for padding.  */
12313           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
12314           p = frag_more (0);
12315
12316           fragP = frag_now;
12317
12318           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
12319                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
12320                     NULL, 0, p);
12321
12322           fragP->tc_frag_data.mf_type = mf_cmp;
12323           fragP->tc_frag_data.branch_type = align_branch_fused;
12324           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
12325         }
12326       else if (add_branch_prefix_frag_p (last_insn))
12327         {
12328           unsigned int max_prefix_size = align_branch_prefix_size;
12329
12330           /* Make room for padding.  */
12331           frag_grow (max_prefix_size);
12332           p = frag_more (0);
12333
12334           fragP = frag_now;
12335
12336           frag_var (rs_machine_dependent, max_prefix_size, 0,
12337                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
12338                     NULL, 0, p);
12339
12340           fragP->tc_frag_data.max_bytes = max_prefix_size;
12341         }
12342
12343       /* Since the VEX/EVEX prefix contains the implicit prefix, we
12344          don't need the explicit prefix.  */
12345       if (!is_any_vex_encoding (&i.tm))
12346         {
12347           switch (i.tm.opcode_modifier.opcodeprefix)
12348             {
12349             case PREFIX_0X66:
12350               add_prefix (0x66);
12351               break;
12352             case PREFIX_0XF2:
12353               add_prefix (0xf2);
12354               break;
12355             case PREFIX_0XF3:
12356               if (!is_cpu (&i.tm, CpuPadLock)
12357                   || (i.prefix[REP_PREFIX] != 0xf3))
12358                 add_prefix (0xf3);
12359               break;
12360             case PREFIX_NONE:
12361               switch (i.opcode_length)
12362                 {
12363                 case 2:
12364                   break;
12365                 case 1:
12366                   /* Check for pseudo prefixes.  */
12367                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
12368                     break;
12369                   as_bad_where (insn_start_frag->fr_file,
12370                                 insn_start_frag->fr_line,
12371                                 _("pseudo prefix without instruction"));
12372                   return;
12373                 default:
12374                   abort ();
12375                 }
12376               break;
12377             default:
12378               abort ();
12379             }
12380
12381 #ifdef OBJ_ELF
12382           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
12383              R_X86_64_GOTTPOFF relocation so that linker can safely
12384              perform IE->LE optimization.  A dummy REX_OPCODE prefix
12385              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
12386              relocation for GDesc -> IE/LE optimization.  */
12387           if (x86_elf_abi == X86_64_X32_ABI
12388               && !is_apx_rex2_encoding ()
12389               && i.operands == 2
12390               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
12391                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
12392               && i.prefix[REX_PREFIX] == 0)
12393             add_prefix (REX_OPCODE);
12394 #endif
12395
12396           /* The prefix bytes.  */
12397           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
12398             if (*q)
12399               frag_opcode_byte (*q);
12400
12401           if (is_apx_rex2_encoding ())
12402             {
12403               frag_opcode_byte (i.vex.bytes[0]);
12404               frag_opcode_byte (i.vex.bytes[1]);
12405             }
12406         }
12407       else
12408         {
12409           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
12410             if (*q)
12411               switch (j)
12412                 {
12413                 case SEG_PREFIX:
12414                 case ADDR_PREFIX:
12415                   frag_opcode_byte (*q);
12416                   break;
12417                 default:
12418                   /* There should be no other prefixes for instructions
12419                      with VEX prefix.  */
12420                   abort ();
12421                 }
12422
12423           /* For EVEX instructions i.vrex should become 0 after
12424              build_evex_prefix.  For VEX instructions upper 16 registers
12425              aren't available, so VREX should be 0.  */
12426           if (i.vrex)
12427             abort ();
12428           /* Now the VEX prefix.  */
12429           if (now_seg != absolute_section)
12430             {
12431               p = frag_more (i.vex.length);
12432               for (j = 0; j < i.vex.length; j++)
12433                 p[j] = i.vex.bytes[j];
12434             }
12435           else
12436             abs_section_offset += i.vex.length;
12437         }
12438
12439       /* Now the opcode; be careful about word order here!  */
12440       j = i.opcode_length;
12441       if (!i.vex.length)
12442         switch (i.tm.opcode_space)
12443           {
12444           case SPACE_BASE:
12445             break;
12446           case SPACE_0F:
12447             ++j;
12448             break;
12449           case SPACE_0F38:
12450           case SPACE_0F3A:
12451             j += 2;
12452             break;
12453           default:
12454             abort ();
12455           }
12456
12457       if (now_seg == absolute_section)
12458         abs_section_offset += j;
12459       else if (j == 1)
12460         {
12461           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
12462         }
12463       else
12464         {
12465           p = frag_more (j);
12466           if (!i.vex.length
12467               && i.tm.opcode_space != SPACE_BASE)
12468             {
12469               *p++ = 0x0f;
12470               if (i.tm.opcode_space != SPACE_0F)
12471                 *p++ = i.tm.opcode_space == SPACE_0F38
12472                        ? 0x38 : 0x3a;
12473             }
12474
12475           switch (i.opcode_length)
12476             {
12477             case 2:
12478               /* Put out high byte first: can't use md_number_to_chars!  */
12479               *p++ = (i.tm.base_opcode >> 8) & 0xff;
12480               /* Fall through.  */
12481             case 1:
12482               *p = i.tm.base_opcode & 0xff;
12483               break;
12484             default:
12485               abort ();
12486               break;
12487             }
12488
12489         }
12490
12491       /* Now the modrm byte and sib byte (if present).  */
12492       if (i.tm.opcode_modifier.modrm)
12493         {
12494           frag_opcode_byte ((i.rm.regmem << 0)
12495                              | (i.rm.reg << 3)
12496                              | (i.rm.mode << 6));
12497           /* If i.rm.regmem == ESP (4)
12498              && i.rm.mode != (Register mode)
12499              && not 16 bit
12500              ==> need second modrm byte.  */
12501           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
12502               && i.rm.mode != 3
12503               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
12504             frag_opcode_byte ((i.sib.base << 0)
12505                               | (i.sib.index << 3)
12506                               | (i.sib.scale << 6));
12507         }
12508
12509       if (i.disp_operands)
12510         output_disp (insn_start_frag, insn_start_off);
12511
12512       if (i.imm_operands)
12513         output_imm (insn_start_frag, insn_start_off);
12514
12515       /*
12516        * frag_now_fix () returning plain abs_section_offset when we're in the
12517        * absolute section, and abs_section_offset not getting updated as data
12518        * gets added to the frag breaks the logic below.
12519        */
12520       if (now_seg != absolute_section)
12521         {
12522           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
12523           if (j > 15)
12524             {
12525               if (dot_insn ())
12526                 as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
12527                         j);
12528               else
12529                 as_bad (_("instruction length of %u bytes exceeds the limit of 15"),
12530                         j);
12531             }
12532           else if (fragP)
12533             {
12534               /* NB: Don't add prefix with GOTPC relocation since
12535                  output_disp() above depends on the fixed encoding
12536                  length.  Can't add prefix with TLS relocation since
12537                  it breaks TLS linker optimization.  */
12538               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
12539               /* Prefix count on the current instruction.  */
12540               unsigned int count = i.vex.length;
12541               unsigned int k;
12542               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
12543                 /* REX byte is encoded in VEX/EVEX prefix.  */
12544                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
12545                   count++;
12546
12547               /* Count prefixes for extended opcode maps.  */
12548               if (!i.vex.length)
12549                 switch (i.tm.opcode_space)
12550                   {
12551                   case SPACE_BASE:
12552                     break;
12553                   case SPACE_0F:
12554                     count++;
12555                     break;
12556                   case SPACE_0F38:
12557                   case SPACE_0F3A:
12558                     count += 2;
12559                     break;
12560                   default:
12561                     abort ();
12562                   }
12563
12564               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12565                   == BRANCH_PREFIX)
12566                 {
12567                   /* Set the maximum prefix size in BRANCH_PREFIX
12568                      frag.  */
12569                   if (fragP->tc_frag_data.max_bytes > max)
12570                     fragP->tc_frag_data.max_bytes = max;
12571                   if (fragP->tc_frag_data.max_bytes > count)
12572                     fragP->tc_frag_data.max_bytes -= count;
12573                   else
12574                     fragP->tc_frag_data.max_bytes = 0;
12575                 }
12576               else
12577                 {
12578                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
12579                      frag.  */
12580                   unsigned int max_prefix_size;
12581                   if (align_branch_prefix_size > max)
12582                     max_prefix_size = max;
12583                   else
12584                     max_prefix_size = align_branch_prefix_size;
12585                   if (max_prefix_size > count)
12586                     fragP->tc_frag_data.max_prefix_length
12587                       = max_prefix_size - count;
12588                 }
12589
12590               /* Use existing segment prefix if possible.  Use CS
12591                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
12592                  segment prefix with ESP/EBP base register and use DS
12593                  segment prefix without ESP/EBP base register.  */
12594               if (i.prefix[SEG_PREFIX])
12595                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
12596               else if (flag_code == CODE_64BIT)
12597                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
12598               else if (i.base_reg
12599                        && (i.base_reg->reg_num == 4
12600                            || i.base_reg->reg_num == 5))
12601                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
12602               else
12603                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
12604             }
12605         }
12606     }
12607
12608   /* NB: Don't work with COND_JUMP86 without i386.  */
12609   if (align_branch_power
12610       && now_seg != absolute_section
12611       && cpu_arch_flags.bitfield.cpui386)
12612     {
12613       /* Terminate each frag so that we can add prefix and check for
12614          fused jcc.  */
12615       frag_wane (frag_now);
12616       frag_new (0);
12617     }
12618
12619 #ifdef DEBUG386
12620   if (flag_debug)
12621     {
12622       pi ("" /*line*/, &i);
12623     }
12624 #endif /* DEBUG386  */
12625 }
12626
12627 /* Return the size of the displacement operand N.  */
12628
12629 static int
12630 disp_size (unsigned int n)
12631 {
12632   int size = 4;
12633
12634   if (i.types[n].bitfield.disp64)
12635     size = 8;
12636   else if (i.types[n].bitfield.disp8)
12637     size = 1;
12638   else if (i.types[n].bitfield.disp16)
12639     size = 2;
12640   return size;
12641 }
12642
12643 /* Return the size of the immediate operand N.  */
12644
12645 static int
12646 imm_size (unsigned int n)
12647 {
12648   int size = 4;
12649   if (i.types[n].bitfield.imm64)
12650     size = 8;
12651   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
12652     size = 1;
12653   else if (i.types[n].bitfield.imm16)
12654     size = 2;
12655   return size;
12656 }
12657
12658 static void
12659 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
12660 {
12661   char *p;
12662   unsigned int n;
12663
12664   for (n = 0; n < i.operands; n++)
12665     {
12666       if (operand_type_check (i.types[n], disp))
12667         {
12668           int size = disp_size (n);
12669
12670           if (now_seg == absolute_section)
12671             abs_section_offset += size;
12672           else if (i.op[n].disps->X_op == O_constant)
12673             {
12674               offsetT val = i.op[n].disps->X_add_number;
12675
12676               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
12677                                      size);
12678               p = frag_more (size);
12679               md_number_to_chars (p, val, size);
12680             }
12681           else
12682             {
12683               enum bfd_reloc_code_real reloc_type;
12684               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
12685               bool sign = (flag_code == CODE_64BIT && size == 4
12686                            && (!want_disp32 (&i.tm)
12687                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
12688                                    && !i.types[n].bitfield.baseindex)))
12689                           || pcrel;
12690               fixS *fixP;
12691
12692               /* We can't have 8 bit displacement here.  */
12693               gas_assert (!i.types[n].bitfield.disp8);
12694
12695               /* The PC relative address is computed relative
12696                  to the instruction boundary, so in case immediate
12697                  fields follows, we need to adjust the value.  */
12698               if (pcrel && i.imm_operands)
12699                 {
12700                   unsigned int n1;
12701                   int sz = 0;
12702
12703                   for (n1 = 0; n1 < i.operands; n1++)
12704                     if (operand_type_check (i.types[n1], imm))
12705                       {
12706                         /* Only one immediate is allowed for PC
12707                            relative address, except with .insn.  */
12708                         gas_assert (sz == 0 || dot_insn ());
12709                         sz += imm_size (n1);
12710                       }
12711                   /* We should find at least one immediate.  */
12712                   gas_assert (sz != 0);
12713                   i.op[n].disps->X_add_number -= sz;
12714                 }
12715
12716               p = frag_more (size);
12717               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
12718               if (GOT_symbol
12719                   && GOT_symbol == i.op[n].disps->X_add_symbol
12720                   && (((reloc_type == BFD_RELOC_32
12721                         || reloc_type == BFD_RELOC_X86_64_32S
12722                         || (reloc_type == BFD_RELOC_64
12723                             && object_64bit))
12724                        && (i.op[n].disps->X_op == O_symbol
12725                            || (i.op[n].disps->X_op == O_add
12726                                && ((symbol_get_value_expression
12727                                     (i.op[n].disps->X_op_symbol)->X_op)
12728                                    == O_subtract))))
12729                       || reloc_type == BFD_RELOC_32_PCREL))
12730                 {
12731                   if (!object_64bit)
12732                     {
12733                       reloc_type = BFD_RELOC_386_GOTPC;
12734                       i.has_gotpc_tls_reloc = true;
12735                       i.op[n].disps->X_add_number +=
12736                         encoding_length (insn_start_frag, insn_start_off, p);
12737                     }
12738                   else if (reloc_type == BFD_RELOC_64)
12739                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
12740                   else
12741                     /* Don't do the adjustment for x86-64, as there
12742                        the pcrel addressing is relative to the _next_
12743                        insn, and that is taken care of in other code.  */
12744                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
12745                 }
12746               else if (align_branch_power)
12747                 {
12748                   switch (reloc_type)
12749                     {
12750                     case BFD_RELOC_386_TLS_GD:
12751                     case BFD_RELOC_386_TLS_LDM:
12752                     case BFD_RELOC_386_TLS_IE:
12753                     case BFD_RELOC_386_TLS_IE_32:
12754                     case BFD_RELOC_386_TLS_GOTIE:
12755                     case BFD_RELOC_386_TLS_GOTDESC:
12756                     case BFD_RELOC_386_TLS_DESC_CALL:
12757                     case BFD_RELOC_X86_64_TLSGD:
12758                     case BFD_RELOC_X86_64_TLSLD:
12759                     case BFD_RELOC_X86_64_GOTTPOFF:
12760                     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
12761                     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
12762                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12763                     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
12764                     case BFD_RELOC_X86_64_TLSDESC_CALL:
12765                       i.has_gotpc_tls_reloc = true;
12766                     default:
12767                       break;
12768                     }
12769                 }
12770               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
12771                                   size, i.op[n].disps, pcrel,
12772                                   reloc_type);
12773
12774               if (flag_code == CODE_64BIT && size == 4 && pcrel
12775                   && !i.prefix[ADDR_PREFIX])
12776                 fixP->fx_signed = 1;
12777
12778               if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF
12779                   && i.tm.opcode_space == SPACE_MAP4)
12780                 {
12781                   /* Only "add %reg1, foo@gottpoff(%rip), %reg2" is
12782                      allowed in md_assemble.  Set fx_tcbit2 for EVEX
12783                      prefix.  */
12784                   fixP->fx_tcbit2 = 1;
12785                   continue;
12786                 }
12787
12788               if (i.base_reg && i.base_reg->reg_num == RegIP)
12789                 {
12790                   if (reloc_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
12791                     {
12792                       /* Set fx_tcbit for REX2 prefix.  */
12793                       if (is_apx_rex2_encoding ())
12794                         fixP->fx_tcbit = 1;
12795                       continue;
12796                     }
12797                 }
12798               /* In 64-bit, i386_validate_fix updates only (%rip)
12799                  relocations.  */
12800               else if (object_64bit)
12801                 continue;
12802
12803               /* Check for "call/jmp *mem", "mov mem, %reg",
12804                  "test %reg, mem" and "binop mem, %reg" where binop
12805                  is one of adc, add, and, cmp, or, sbb, sub, xor
12806                  instructions without data prefix.  Always generate
12807                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
12808               if (i.prefix[DATA_PREFIX] == 0
12809                   && (i.rm.mode == 2
12810                       || (i.rm.mode == 0 && i.rm.regmem == 5))
12811                   && i.tm.opcode_space == SPACE_BASE
12812                   && ((i.operands == 1
12813                        && i.tm.base_opcode == 0xff
12814                        && (i.rm.reg == 2 || i.rm.reg == 4))
12815                       || (i.operands == 2
12816                           && (i.tm.base_opcode == 0x8b
12817                               || i.tm.base_opcode == 0x85
12818                               || (i.tm.base_opcode & ~0x38) == 0x03))))
12819                 {
12820                   if (object_64bit)
12821                     {
12822                       if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF)
12823                         {
12824                           /* Set fx_tcbit for REX2 prefix.  */
12825                           if (is_apx_rex2_encoding ())
12826                             fixP->fx_tcbit = 1;
12827                         }
12828                       else if (generate_relax_relocations)
12829                         {
12830                           /* Set fx_tcbit3 for REX2 prefix.  */
12831                           if (is_apx_rex2_encoding ())
12832                             fixP->fx_tcbit3 = 1;
12833                           else if (i.rex)
12834                             fixP->fx_tcbit2 = 1;
12835                           else
12836                             fixP->fx_tcbit = 1;
12837                         }
12838                     }
12839                   else if (generate_relax_relocations
12840                            || (i.rm.mode == 0 && i.rm.regmem == 5))
12841                     fixP->fx_tcbit2 = 1;
12842                 }
12843             }
12844         }
12845     }
12846 }
12847
12848 static void
12849 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
12850 {
12851   char *p;
12852   unsigned int n;
12853
12854   for (n = 0; n < i.operands; n++)
12855     {
12856       if (operand_type_check (i.types[n], imm))
12857         {
12858           int size = imm_size (n);
12859
12860           if (now_seg == absolute_section)
12861             abs_section_offset += size;
12862           else if (i.op[n].imms->X_op == O_constant)
12863             {
12864               offsetT val;
12865
12866               val = offset_in_range (i.op[n].imms->X_add_number,
12867                                      size);
12868               p = frag_more (size);
12869               md_number_to_chars (p, val, size);
12870             }
12871           else
12872             {
12873               /* Not absolute_section.
12874                  Need a 32-bit fixup (don't support 8bit
12875                  non-absolute imms).  Try to support other
12876                  sizes ...  */
12877               enum bfd_reloc_code_real reloc_type;
12878               int sign;
12879
12880               if (i.types[n].bitfield.imm32s
12881                   && (i.suffix == QWORD_MNEM_SUFFIX
12882                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)
12883                       || (i.prefix[REX_PREFIX] & REX_W)
12884                       || dot_insn ()))
12885                 sign = 1;
12886               else
12887                 sign = 0;
12888
12889               p = frag_more (size);
12890               reloc_type = reloc (size, 0, sign, i.reloc[n]);
12891
12892               /*   This is tough to explain.  We end up with this one if we
12893                * have operands that look like
12894                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
12895                * obtain the absolute address of the GOT, and it is strongly
12896                * preferable from a performance point of view to avoid using
12897                * a runtime relocation for this.  The actual sequence of
12898                * instructions often look something like:
12899                *
12900                *        call    .L66
12901                * .L66:
12902                *        popl    %ebx
12903                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
12904                *
12905                *   The call and pop essentially return the absolute address
12906                * of the label .L66 and store it in %ebx.  The linker itself
12907                * will ultimately change the first operand of the addl so
12908                * that %ebx points to the GOT, but to keep things simple, the
12909                * .o file must have this operand set so that it generates not
12910                * the absolute address of .L66, but the absolute address of
12911                * itself.  This allows the linker itself simply treat a GOTPC
12912                * relocation as asking for a pcrel offset to the GOT to be
12913                * added in, and the addend of the relocation is stored in the
12914                * operand field for the instruction itself.
12915                *
12916                *   Our job here is to fix the operand so that it would add
12917                * the correct offset so that %ebx would point to itself.  The
12918                * thing that is tricky is that .-.L66 will point to the
12919                * beginning of the instruction, so we need to further modify
12920                * the operand so that it will point to itself.  There are
12921                * other cases where you have something like:
12922                *
12923                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
12924                *
12925                * and here no correction would be required.  Internally in
12926                * the assembler we treat operands of this form as not being
12927                * pcrel since the '.' is explicitly mentioned, and I wonder
12928                * whether it would simplify matters to do it this way.  Who
12929                * knows.  In earlier versions of the PIC patches, the
12930                * pcrel_adjust field was used to store the correction, but
12931                * since the expression is not pcrel, I felt it would be
12932                * confusing to do it this way.  */
12933
12934               if ((reloc_type == BFD_RELOC_32
12935                    || reloc_type == BFD_RELOC_X86_64_32S
12936                    || reloc_type == BFD_RELOC_64)
12937                   && GOT_symbol
12938                   && GOT_symbol == i.op[n].imms->X_add_symbol
12939                   && (i.op[n].imms->X_op == O_symbol
12940                       || (i.op[n].imms->X_op == O_add
12941                           && ((symbol_get_value_expression
12942                                (i.op[n].imms->X_op_symbol)->X_op)
12943                               == O_subtract))))
12944                 {
12945                   if (!object_64bit)
12946                     reloc_type = BFD_RELOC_386_GOTPC;
12947                   else if (size == 4)
12948                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
12949                   else if (size == 8)
12950                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
12951                   i.has_gotpc_tls_reloc = true;
12952                   i.op[n].imms->X_add_number +=
12953                     encoding_length (insn_start_frag, insn_start_off, p);
12954                 }
12955               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
12956                            i.op[n].imms, 0, reloc_type);
12957             }
12958         }
12959     }
12960 }
12961 \f
12962 /* x86_cons_fix_new is called via the expression parsing code when a
12963    reloc is needed.  We use this hook to get the correct .got reloc.  */
12964 static int cons_sign = -1;
12965
12966 void
12967 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
12968                   expressionS *exp, bfd_reloc_code_real_type r)
12969 {
12970   r = reloc (len, 0, cons_sign, r);
12971
12972 #ifdef TE_PE
12973   if (exp->X_op == O_secrel)
12974     {
12975       exp->X_op = O_symbol;
12976       r = BFD_RELOC_32_SECREL;
12977     }
12978   else if (exp->X_op == O_secidx)
12979     r = BFD_RELOC_16_SECIDX;
12980 #endif
12981
12982   fix_new_exp (frag, off, len, exp, 0, r);
12983 }
12984
12985 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
12986    purpose of the `.dc.a' internal pseudo-op.  */
12987
12988 int
12989 x86_address_bytes (void)
12990 {
12991   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
12992     return 4;
12993   return stdoutput->arch_info->bits_per_address / 8;
12994 }
12995
12996 #if (defined (OBJ_ELF) || defined (OBJ_MACH_O) || defined (TE_PE))
12997 /* Parse operands of the form
12998    <symbol>@GOTOFF+<nnn>
12999    and similar .plt or .got references.
13000
13001    If we find one, set up the correct relocation in RELOC and copy the
13002    input string, minus the `@GOTOFF' into a malloc'd buffer for
13003    parsing by the calling routine.  Return this buffer, and if ADJUST
13004    is non-null set it to the length of the string we removed from the
13005    input line.  Otherwise return NULL.  */
13006 static char *
13007 lex_got (enum bfd_reloc_code_real *rel,
13008          int *adjust,
13009          i386_operand_type *types)
13010 {
13011   /* Some of the relocations depend on the size of what field is to
13012      be relocated.  But in our callers i386_immediate and i386_displacement
13013      we don't yet know the operand size (this will be set by insn
13014      matching).  Hence we record the word32 relocation here,
13015      and adjust the reloc according to the real size in reloc().  */
13016   char *cp;
13017   unsigned int j;
13018
13019   for (cp = input_line_pointer; *cp != '@'; cp++)
13020     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
13021       return NULL;
13022
13023   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
13024     {
13025       int len = gotrel[j].len;
13026       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
13027         {
13028           if (gotrel[j].rel[object_64bit] != 0)
13029             {
13030               int first, second;
13031               char *tmpbuf, *past_reloc;
13032
13033               i.has_gotrel = true;
13034               *rel = gotrel[j].rel[object_64bit];
13035
13036               if (types)
13037                 {
13038                   if (flag_code != CODE_64BIT)
13039                     {
13040                       types->bitfield.imm32 = 1;
13041                       types->bitfield.disp32 = 1;
13042                     }
13043                   else
13044                     *types = gotrel[j].types64;
13045                 }
13046
13047               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
13048                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
13049
13050               /* The length of the first part of our input line.  */
13051               first = cp - input_line_pointer;
13052
13053               /* The second part goes from after the reloc token until
13054                  (and including) an end_of_line char or comma.  */
13055               past_reloc = cp + 1 + len;
13056               cp = past_reloc;
13057               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
13058                 ++cp;
13059               second = cp + 1 - past_reloc;
13060
13061               /* Allocate and copy string.  The trailing NUL shouldn't
13062                  be necessary, but be safe.  */
13063               tmpbuf = XNEWVEC (char, first + second + 2);
13064               memcpy (tmpbuf, input_line_pointer, first);
13065               if (second != 0 && *past_reloc != ' ')
13066                 /* Replace the relocation token with ' ', so that
13067                    errors like foo@GOTOFF1 will be detected.  */
13068                 tmpbuf[first++] = ' ';
13069               else
13070                 /* Increment length by 1 if the relocation token is
13071                    removed.  */
13072                 len++;
13073               if (adjust)
13074                 *adjust = len;
13075               memcpy (tmpbuf + first, past_reloc, second);
13076               tmpbuf[first + second] = '\0';
13077               return tmpbuf;
13078             }
13079
13080           as_bad (_("@%s reloc is not supported with %d-bit output format"),
13081                   gotrel[j].str, 1 << (5 + object_64bit));
13082           return NULL;
13083         }
13084     }
13085
13086   /* Might be a symbol version string.  Don't as_bad here.  */
13087   return NULL;
13088 }
13089 #else
13090 # define lex_got(reloc, adjust, types) NULL
13091 #endif
13092
13093 bfd_reloc_code_real_type
13094 x86_cons (expressionS *exp, int size)
13095 {
13096   bfd_reloc_code_real_type got_reloc = NO_RELOC;
13097
13098   intel_syntax = -intel_syntax;
13099   exp->X_md = 0;
13100   expr_mode = expr_operator_none;
13101
13102 #if defined (OBJ_ELF) || defined (TE_PE)
13103   if (size == 4 || (object_64bit && size == 8))
13104     {
13105       /* Handle @GOTOFF and the like in an expression.  */
13106       char *save;
13107       char *gotfree_input_line;
13108       int adjust = 0;
13109
13110       save = input_line_pointer;
13111       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
13112       if (gotfree_input_line)
13113         input_line_pointer = gotfree_input_line;
13114
13115       expression (exp);
13116
13117       if (gotfree_input_line)
13118         {
13119           /* expression () has merrily parsed up to the end of line,
13120              or a comma - in the wrong buffer.  Transfer how far
13121              input_line_pointer has moved to the right buffer.  */
13122           input_line_pointer = (save
13123                                 + (input_line_pointer - gotfree_input_line)
13124                                 + adjust);
13125           free (gotfree_input_line);
13126           if (exp->X_op == O_constant
13127               || exp->X_op == O_absent
13128               || exp->X_op == O_illegal
13129               || exp->X_op == O_register
13130               || exp->X_op == O_big)
13131             {
13132               char c = *input_line_pointer;
13133               *input_line_pointer = 0;
13134               as_bad (_("missing or invalid expression `%s'"), save);
13135               *input_line_pointer = c;
13136             }
13137           else if ((got_reloc == BFD_RELOC_386_PLT32
13138                     || got_reloc == BFD_RELOC_X86_64_PLT32)
13139                    && exp->X_op != O_symbol)
13140             {
13141               char c = *input_line_pointer;
13142               *input_line_pointer = 0;
13143               as_bad (_("invalid PLT expression `%s'"), save);
13144               *input_line_pointer = c;
13145             }
13146         }
13147     }
13148   else
13149 #endif
13150     expression (exp);
13151
13152   intel_syntax = -intel_syntax;
13153
13154   if (intel_syntax)
13155     i386_intel_simplify (exp);
13156
13157   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
13158   if (size <= 4 && expr_mode == expr_operator_present
13159       && exp->X_op == O_constant && !object_64bit)
13160     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
13161
13162   return got_reloc;
13163 }
13164
13165 static void
13166 signed_cons (int size)
13167 {
13168   if (object_64bit)
13169     cons_sign = 1;
13170   cons (size);
13171   cons_sign = -1;
13172 }
13173
13174 static void
13175 s_insn (int dummy ATTRIBUTE_UNUSED)
13176 {
13177   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer, *ptr;
13178   char *saved_ilp = find_end_of_line (line, false), saved_char;
13179   const char *end;
13180   unsigned int j;
13181   valueT val;
13182   bool vex = false, xop = false, evex = false;
13183   struct last_insn *last_insn;
13184
13185   init_globals ();
13186
13187   saved_char = *saved_ilp;
13188   *saved_ilp = 0;
13189
13190   end = parse_insn (line, mnemonic, parse_prefix);
13191   if (end == NULL)
13192     {
13193   bad:
13194       *saved_ilp = saved_char;
13195       ignore_rest_of_line ();
13196       i.tm.mnem_off = 0;
13197       memset (&pp, 0, sizeof (pp));
13198       return;
13199     }
13200   line += end - line;
13201
13202   current_templates.start = &i.tm;
13203   current_templates.end = &i.tm + 1;
13204   i.tm.mnem_off = MN__insn;
13205   i.tm.extension_opcode = None;
13206
13207   if (startswith (line, "VEX")
13208       && (line[3] == '.' || is_space_char (line[3])))
13209     {
13210       vex = true;
13211       line += 3;
13212     }
13213   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
13214     {
13215       char *e;
13216       unsigned long n = strtoul (line + 3, &e, 16);
13217
13218       if (e == line + 5 && n >= 0x08 && n <= 0x1f
13219           && (*e == '.' || is_space_char (*e)))
13220         {
13221           xop = true;
13222           /* Arrange for build_vex_prefix() to emit 0x8f.  */
13223           i.tm.opcode_space = SPACE_XOP08;
13224           i.insn_opcode_space = n;
13225           line = e;
13226         }
13227     }
13228   else if (startswith (line, "EVEX")
13229            && (line[4] == '.' || is_space_char (line[4])))
13230     {
13231       evex = true;
13232       line += 4;
13233     }
13234
13235   if (vex || xop
13236       ? pp.encoding == encoding_evex
13237       : evex
13238         ? pp.encoding == encoding_vex
13239           || pp.encoding == encoding_vex3
13240         : pp.encoding != encoding_default)
13241     {
13242       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
13243       goto bad;
13244     }
13245
13246   if (line > end && pp.encoding == encoding_default)
13247     pp.encoding = evex ? encoding_evex : encoding_vex;
13248
13249   if (pp.encoding != encoding_default)
13250     {
13251       /* Only address size and segment override prefixes are permitted with
13252          VEX/XOP/EVEX encodings.  */
13253       const unsigned char *p = i.prefix;
13254
13255       for (j = 0; j < ARRAY_SIZE (i.prefix); ++j, ++p)
13256         {
13257           if (!*p)
13258             continue;
13259
13260           switch (j)
13261             {
13262             case SEG_PREFIX:
13263             case ADDR_PREFIX:
13264               break;
13265             default:
13266                   as_bad (_("illegal prefix used with VEX/XOP/EVEX"));
13267                   goto bad;
13268             }
13269         }
13270     }
13271
13272   if (line > end && *line == '.')
13273     {
13274       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
13275       switch (line[1])
13276         {
13277         case 'L':
13278           switch (line[2])
13279             {
13280             case '0':
13281               if (evex)
13282                 i.tm.opcode_modifier.evex = EVEX128;
13283               else
13284                 i.tm.opcode_modifier.vex = VEX128;
13285               break;
13286
13287             case '1':
13288               if (evex)
13289                 i.tm.opcode_modifier.evex = EVEX256;
13290               else
13291                 i.tm.opcode_modifier.vex = VEX256;
13292               break;
13293
13294             case '2':
13295               if (evex)
13296                 i.tm.opcode_modifier.evex = EVEX512;
13297               break;
13298
13299             case '3':
13300               if (evex)
13301                 i.tm.opcode_modifier.evex = EVEX_L3;
13302               break;
13303
13304             case 'I':
13305               if (line[3] == 'G')
13306                 {
13307                   if (evex)
13308                     i.tm.opcode_modifier.evex = EVEXLIG;
13309                   else
13310                     i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
13311                   ++line;
13312                 }
13313               break;
13314             }
13315
13316           if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
13317             line += 3;
13318           break;
13319
13320         case '1':
13321           if (line[2] == '2' && line[3] == '8')
13322             {
13323               if (evex)
13324                 i.tm.opcode_modifier.evex = EVEX128;
13325               else
13326                 i.tm.opcode_modifier.vex = VEX128;
13327               line += 4;
13328             }
13329           break;
13330
13331         case '2':
13332           if (line[2] == '5' && line[3] == '6')
13333             {
13334               if (evex)
13335                 i.tm.opcode_modifier.evex = EVEX256;
13336               else
13337                 i.tm.opcode_modifier.vex = VEX256;
13338               line += 4;
13339             }
13340           break;
13341
13342         case '5':
13343           if (evex && line[2] == '1' && line[3] == '2')
13344             {
13345               i.tm.opcode_modifier.evex = EVEX512;
13346               line += 4;
13347             }
13348           break;
13349         }
13350     }
13351
13352   if (line > end && *line == '.')
13353     {
13354       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
13355       switch (line[1])
13356         {
13357         case 'N':
13358           if (line[2] == 'P')
13359             line += 3;
13360           break;
13361
13362         case '6':
13363           if (line[2] == '6')
13364             {
13365               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
13366               line += 3;
13367             }
13368           break;
13369
13370         case 'F': case 'f':
13371           if (line[2] == '3')
13372             {
13373               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
13374               line += 3;
13375             }
13376           else if (line[2] == '2')
13377             {
13378               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
13379               line += 3;
13380             }
13381           break;
13382         }
13383     }
13384
13385   if (line > end && !xop && *line == '.')
13386     {
13387       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
13388       switch (line[1])
13389         {
13390         case '0':
13391           if (TOUPPER (line[2]) != 'F')
13392             break;
13393           if (line[3] == '.' || is_space_char (line[3]))
13394             {
13395               i.insn_opcode_space = SPACE_0F;
13396               line += 3;
13397             }
13398           else if (line[3] == '3'
13399                    && (line[4] == '8' || TOUPPER (line[4]) == 'A')
13400                    && (line[5] == '.' || is_space_char (line[5])))
13401             {
13402               i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
13403               line += 5;
13404             }
13405           break;
13406
13407         case 'M':
13408           if (ISDIGIT (line[2]) && line[2] != '0')
13409             {
13410               char *e;
13411               unsigned long n = strtoul (line + 2, &e, 10);
13412
13413               if (n <= (evex ? 15 : 31)
13414                   && (*e == '.' || is_space_char (*e)))
13415                 {
13416                   i.insn_opcode_space = n;
13417                   line = e;
13418                 }
13419             }
13420           break;
13421         }
13422     }
13423
13424   if (line > end && *line == '.' && line[1] == 'W')
13425     {
13426       /* VEX.W, XOP.W, EVEX.W  */
13427       switch (line[2])
13428         {
13429         case '0':
13430           i.tm.opcode_modifier.vexw = VEXW0;
13431           break;
13432
13433         case '1':
13434           i.tm.opcode_modifier.vexw = VEXW1;
13435           break;
13436
13437         case 'I':
13438           if (line[3] == 'G')
13439             {
13440               i.tm.opcode_modifier.vexw = VEXWIG;
13441               ++line;
13442             }
13443           break;
13444         }
13445
13446       if (i.tm.opcode_modifier.vexw)
13447         line += 3;
13448     }
13449
13450   if (line > end && *line && !is_space_char (*line))
13451     {
13452       /* Improve diagnostic a little.  */
13453       if (*line == '.' && line[1] && !is_space_char (line[1]))
13454         ++line;
13455       goto done;
13456     }
13457
13458   /* Before processing the opcode expression, find trailing "+r" or
13459      "/<digit>" specifiers.  */
13460   for (ptr = line; ; ++ptr)
13461     {
13462       unsigned long n;
13463       char *e;
13464
13465       ptr = strpbrk (ptr, "+/,");
13466       if (ptr == NULL || *ptr == ',')
13467         break;
13468
13469       if (*ptr == '+' && ptr[1] == 'r'
13470           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
13471         {
13472           *ptr = ' ';
13473           ptr[1] = ' ';
13474           i.short_form = true;
13475           break;
13476         }
13477
13478       if (*ptr == '/' && ISDIGIT (ptr[1])
13479           && (n = strtoul (ptr + 1, &e, 8)) < 8
13480           && e == ptr + 2
13481           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
13482         {
13483           *ptr = ' ';
13484           ptr[1] = ' ';
13485           i.tm.extension_opcode = n;
13486           i.tm.opcode_modifier.modrm = 1;
13487           break;
13488         }
13489     }
13490
13491   input_line_pointer = line;
13492   val = get_absolute_expression ();
13493   line = input_line_pointer;
13494
13495   if (i.short_form && (val & 7))
13496     as_warn ("`+r' assumes low three opcode bits to be clear");
13497
13498   for (j = 1; j < sizeof(val); ++j)
13499     if (!(val >> (j * 8)))
13500       break;
13501
13502   /* Trim off a prefix if present.  */
13503   if (j > 1 && !vex && !xop && !evex)
13504     {
13505       uint8_t byte = val >> ((j - 1) * 8);
13506
13507       switch (byte)
13508         {
13509         case DATA_PREFIX_OPCODE:
13510         case REPE_PREFIX_OPCODE:
13511         case REPNE_PREFIX_OPCODE:
13512           if (!add_prefix (byte))
13513             goto bad;
13514           val &= ((uint64_t)1 << (--j * 8)) - 1;
13515           break;
13516         }
13517     }
13518
13519   /* Parse operands, if any, before evaluating encoding space.  */
13520   if (*line == ',')
13521     {
13522       i.memshift = -1;
13523
13524       ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
13525       this_operand = -1;
13526       if (!ptr)
13527         goto bad;
13528       line = ptr;
13529
13530       if (!i.operands)
13531         {
13532           as_bad (_("expecting operand after ','; got nothing"));
13533           goto done;
13534         }
13535
13536       if (i.mem_operands > 1)
13537         {
13538           as_bad (_("too many memory references for `%s'"),
13539                   &i386_mnemonics[MN__insn]);
13540           goto done;
13541         }
13542
13543       /* No need to distinguish encoding_evex and encoding_evex512.  */
13544       if (pp.encoding == encoding_evex512)
13545         pp.encoding = encoding_evex;
13546     }
13547
13548   /* Trim off encoding space.  */
13549   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
13550     {
13551       uint8_t byte = val >> ((--j - 1) * 8);
13552
13553       i.insn_opcode_space = SPACE_0F;
13554       switch (byte & -(j > 1 && !pp.rex2_encoding
13555                        && (pp.encoding != encoding_egpr || evex)))
13556         {
13557         case 0x38:
13558           i.insn_opcode_space = SPACE_0F38;
13559           --j;
13560           break;
13561         case 0x3a:
13562           i.insn_opcode_space = SPACE_0F3A;
13563           --j;
13564           break;
13565         }
13566       i.tm.opcode_space = i.insn_opcode_space;
13567       val &= ((uint64_t)1 << (j * 8)) - 1;
13568     }
13569   if (!i.tm.opcode_space && (vex || evex))
13570     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
13571        Also avoid hitting abort() there or in build_evex_prefix().  */
13572     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
13573                                                    : SPACE_0F38;
13574
13575   if (j > 2)
13576     {
13577       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
13578       goto done;
13579     }
13580   i.opcode_length = j;
13581
13582   /* Handle operands, if any.  */
13583   if (i.operands)
13584     {
13585       i386_operand_type combined;
13586       expressionS *disp_exp = NULL;
13587       bool changed;
13588
13589       if (pp.encoding == encoding_egpr)
13590         {
13591           if (vex || xop)
13592             {
13593               as_bad (_("eGPR use conflicts with encoding specifier"));
13594               goto done;
13595             }
13596           if (evex)
13597             pp.encoding = encoding_evex;
13598           else
13599             pp.encoding = encoding_default;
13600         }
13601
13602       /* Are we to emit ModR/M encoding?  */
13603       if (!i.short_form
13604           && (i.mem_operands
13605               || i.reg_operands > (pp.encoding != encoding_default)
13606               || i.tm.extension_opcode != None))
13607         i.tm.opcode_modifier.modrm = 1;
13608
13609       if (!i.tm.opcode_modifier.modrm
13610           && (i.reg_operands
13611               > i.short_form + 0U + (pp.encoding != encoding_default)
13612               || i.mem_operands))
13613         {
13614           as_bad (_("too many register/memory operands"));
13615           goto done;
13616         }
13617
13618       /* Enforce certain constraints on operands.  */
13619       switch (i.reg_operands + i.mem_operands
13620               + (i.tm.extension_opcode != None))
13621         {
13622         case 0:
13623           if (i.short_form)
13624             {
13625               as_bad (_("too few register/memory operands"));
13626               goto done;
13627             }
13628           /* Fall through.  */
13629         case 1:
13630           if (i.tm.opcode_modifier.modrm)
13631             {
13632               as_bad (_("too few register/memory operands"));
13633               goto done;
13634             }
13635           break;
13636
13637         case 2:
13638           break;
13639
13640         case 4:
13641           if (i.imm_operands
13642               && (i.op[0].imms->X_op != O_constant
13643                   || !fits_in_imm4 (i.op[0].imms->X_add_number)))
13644             {
13645               as_bad (_("constant doesn't fit in %d bits"), evex ? 3 : 4);
13646               goto done;
13647             }
13648           /* Fall through.  */
13649         case 3:
13650           if (pp.encoding != encoding_default)
13651             {
13652               i.tm.opcode_modifier.vexvvvv = i.tm.extension_opcode == None
13653                                              ? VexVVVV_SRC1 : VexVVVV_DST;
13654               break;
13655             }
13656           /* Fall through.  */
13657         default:
13658           as_bad (_("too many register/memory operands"));
13659           goto done;
13660         }
13661
13662       /* Bring operands into canonical order (imm, mem, reg).  */
13663       do
13664         {
13665           changed = false;
13666
13667           for (j = 1; j < i.operands; ++j)
13668             {
13669               if ((!operand_type_check (i.types[j - 1], imm)
13670                    && operand_type_check (i.types[j], imm))
13671                   || (i.types[j - 1].bitfield.class != ClassNone
13672                       && i.types[j].bitfield.class == ClassNone))
13673                 {
13674                   swap_2_operands (j - 1, j);
13675                   changed = true;
13676                 }
13677             }
13678         }
13679       while (changed);
13680
13681       /* For Intel syntax swap the order of register operands.  */
13682       if (intel_syntax)
13683         switch (i.reg_operands)
13684           {
13685           case 0:
13686           case 1:
13687             break;
13688
13689           case 4:
13690             swap_2_operands (i.imm_operands + i.mem_operands + 1, i.operands - 2);
13691             /* Fall through.  */
13692           case 3:
13693           case 2:
13694             swap_2_operands (i.imm_operands + i.mem_operands, i.operands - 1);
13695             break;
13696
13697           default:
13698             abort ();
13699           }
13700
13701       /* Enforce constraints when using VSIB.  */
13702       if (i.index_reg
13703           && (i.index_reg->reg_type.bitfield.xmmword
13704               || i.index_reg->reg_type.bitfield.ymmword
13705               || i.index_reg->reg_type.bitfield.zmmword))
13706         {
13707           if (pp.encoding == encoding_default)
13708             {
13709               as_bad (_("VSIB unavailable with legacy encoding"));
13710               goto done;
13711             }
13712
13713           if (pp.encoding == encoding_evex
13714               && i.reg_operands > 1)
13715             {
13716               /* We could allow two register operands, encoding the 2nd one in
13717                  an 8-bit immediate like for 4-register-operand insns, but that
13718                  would require ugly fiddling with process_operands() and/or
13719                  build_modrm_byte().  */
13720               as_bad (_("too many register operands with VSIB"));
13721               goto done;
13722             }
13723
13724           i.tm.opcode_modifier.sib = 1;
13725         }
13726
13727       /* Establish operand size encoding.  */
13728       operand_type_set (&combined, 0);
13729
13730       for (j = i.imm_operands; j < i.operands; ++j)
13731         {
13732           /* Look for 8-bit operands that use old registers.  */
13733           if (pp.encoding != encoding_default
13734               && flag_code == CODE_64BIT
13735               && i.types[j].bitfield.class == Reg
13736               && i.types[j].bitfield.byte
13737               && !(i.op[j].regs->reg_flags & (RegRex | RegRex2 | RegRex64))
13738               && i.op[j].regs->reg_num > 3)
13739             as_bad (_("can't encode register '%s%s' with VEX/XOP/EVEX"),
13740                     register_prefix, i.op[j].regs->reg_name);
13741
13742           i.types[j].bitfield.instance = InstanceNone;
13743
13744           if (operand_type_check (i.types[j], disp))
13745             {
13746               i.types[j].bitfield.baseindex = 1;
13747               disp_exp = i.op[j].disps;
13748             }
13749
13750           if (evex && i.types[j].bitfield.baseindex)
13751             {
13752               unsigned int n = i.memshift;
13753
13754               if (i.types[j].bitfield.byte)
13755                 n = 0;
13756               else if (i.types[j].bitfield.word)
13757                 n = 1;
13758               else if (i.types[j].bitfield.dword)
13759                 n = 2;
13760               else if (i.types[j].bitfield.qword)
13761                 n = 3;
13762               else if (i.types[j].bitfield.xmmword)
13763                 n = 4;
13764               else if (i.types[j].bitfield.ymmword)
13765                 n = 5;
13766               else if (i.types[j].bitfield.zmmword)
13767                 n = 6;
13768
13769               if (i.memshift < 32 && n != i.memshift)
13770                 as_warn ("conflicting memory operand size specifiers");
13771               i.memshift = n;
13772             }
13773
13774           if ((i.broadcast.type || i.broadcast.bytes)
13775               && j == i.broadcast.operand)
13776             continue;
13777
13778           combined = operand_type_or (combined, i.types[j]);
13779           combined.bitfield.class = ClassNone;
13780         }
13781
13782       switch ((i.broadcast.type ? i.broadcast.type : 1)
13783               << (i.memshift < 32 ? i.memshift : 0))
13784         {
13785         case 64: combined.bitfield.zmmword = 1; break;
13786         case 32: combined.bitfield.ymmword = 1; break;
13787         case 16: combined.bitfield.xmmword = 1; break;
13788         case  8: combined.bitfield.qword = 1; break;
13789         case  4: combined.bitfield.dword = 1; break;
13790         }
13791
13792       if (pp.encoding == encoding_default)
13793         {
13794           if (flag_code == CODE_64BIT && combined.bitfield.qword)
13795             i.rex |= REX_W;
13796           else if ((flag_code == CODE_16BIT ? combined.bitfield.dword
13797                                             : combined.bitfield.word)
13798                    && !add_prefix (DATA_PREFIX_OPCODE))
13799             goto done;
13800         }
13801       else if (!i.tm.opcode_modifier.vexw)
13802         {
13803           if (flag_code == CODE_64BIT)
13804             {
13805               if (combined.bitfield.qword)
13806                 i.tm.opcode_modifier.vexw = VEXW1;
13807               else if (combined.bitfield.dword)
13808                 i.tm.opcode_modifier.vexw = VEXW0;
13809             }
13810
13811           if (!i.tm.opcode_modifier.vexw)
13812             i.tm.opcode_modifier.vexw = VEXWIG;
13813         }
13814
13815       if (vex || xop)
13816         {
13817           if (!i.tm.opcode_modifier.vex)
13818             {
13819               if (combined.bitfield.ymmword)
13820                 i.tm.opcode_modifier.vex = VEX256;
13821               else if (combined.bitfield.xmmword)
13822                 i.tm.opcode_modifier.vex = VEX128;
13823             }
13824         }
13825       else if (evex)
13826         {
13827           if (!i.tm.opcode_modifier.evex)
13828             {
13829               /* Do _not_ consider AVX512VL here.  */
13830               if (i.rounding.type != rc_none || combined.bitfield.zmmword)
13831                 i.tm.opcode_modifier.evex = EVEX512;
13832               else if (combined.bitfield.ymmword)
13833                 i.tm.opcode_modifier.evex = EVEX256;
13834               else if (combined.bitfield.xmmword)
13835                 i.tm.opcode_modifier.evex = EVEX128;
13836             }
13837
13838           if (i.memshift >= 32)
13839             {
13840               unsigned int n = 0;
13841
13842               switch (i.tm.opcode_modifier.evex)
13843                 {
13844                 case EVEX512: n = 64; break;
13845                 case EVEX256: n = 32; break;
13846                 case EVEX128: n = 16; break;
13847                 }
13848
13849               if (i.broadcast.type)
13850                 n /= i.broadcast.type;
13851
13852               if (n > 0)
13853                 for (i.memshift = 0; !(n & 1); n >>= 1)
13854                   ++i.memshift;
13855               else if (disp_exp != NULL && disp_exp->X_op == O_constant
13856                        && disp_exp->X_add_number != 0
13857                        && pp.disp_encoding != disp_encoding_32bit)
13858                 {
13859                   if (!quiet_warnings)
13860                     as_warn ("cannot determine memory operand size");
13861                   pp.disp_encoding = disp_encoding_32bit;
13862                 }
13863             }
13864         }
13865
13866       if (i.memshift >= 32)
13867         i.memshift = 0;
13868       else if (!evex)
13869         pp.encoding = encoding_error;
13870
13871       if (i.disp_operands && !optimize_disp (&i.tm))
13872         goto done;
13873
13874       /* Establish size for immediate operands.  */
13875       for (j = 0; j < i.imm_operands; ++j)
13876         {
13877           expressionS *expP = i.op[j].imms;
13878
13879           gas_assert (operand_type_check (i.types[j], imm));
13880           operand_type_set (&i.types[j], 0);
13881
13882           if (i.imm_bits[j] > 32)
13883             i.types[j].bitfield.imm64 = 1;
13884           else if (i.imm_bits[j] > 16)
13885             {
13886               if (flag_code == CODE_64BIT && (i.flags[j] & Operand_Signed))
13887                 i.types[j].bitfield.imm32s = 1;
13888               else
13889                 i.types[j].bitfield.imm32 = 1;
13890             }
13891           else if (i.imm_bits[j] > 8)
13892             i.types[j].bitfield.imm16 = 1;
13893           else if (i.imm_bits[j] > 0)
13894             {
13895               if (i.flags[j] & Operand_Signed)
13896                 i.types[j].bitfield.imm8s = 1;
13897               else
13898                 i.types[j].bitfield.imm8 = 1;
13899             }
13900           else if (expP->X_op == O_constant)
13901             {
13902               i.types[j] = smallest_imm_type (expP->X_add_number);
13903               i.types[j].bitfield.imm1 = 0;
13904               /* Oddly enough imm_size() checks imm64 first, so the bit needs
13905                  zapping since smallest_imm_type() sets it unconditionally.  */
13906               if (flag_code != CODE_64BIT)
13907                 {
13908                   i.types[j].bitfield.imm64 = 0;
13909                   i.types[j].bitfield.imm32s = 0;
13910                   i.types[j].bitfield.imm32 = 1;
13911                 }
13912               else if (i.types[j].bitfield.imm32 || i.types[j].bitfield.imm32s)
13913                 i.types[j].bitfield.imm64 = 0;
13914             }
13915           else
13916             /* Non-constant expressions are sized heuristically.  */
13917             switch (flag_code)
13918               {
13919               case CODE_64BIT: i.types[j].bitfield.imm32s = 1; break;
13920               case CODE_32BIT: i.types[j].bitfield.imm32 = 1; break;
13921               case CODE_16BIT: i.types[j].bitfield.imm16 = 1; break;
13922               }
13923         }
13924
13925       for (j = 0; j < i.operands; ++j)
13926         i.tm.operand_types[j] = i.types[j];
13927
13928       process_operands ();
13929     }
13930
13931   /* Don't set opcode until after processing operands, to avoid any
13932      potential special casing there.  */
13933   i.tm.base_opcode |= val;
13934
13935   if (pp.encoding == encoding_error
13936       || (pp.encoding != encoding_evex
13937           ? i.broadcast.type || i.broadcast.bytes
13938             || i.rounding.type != rc_none
13939             || i.mask.reg
13940           : (i.mem_operands && i.rounding.type != rc_none)
13941             || ((i.broadcast.type || i.broadcast.bytes)
13942                 && !(i.flags[i.broadcast.operand] & Operand_Mem))))
13943     {
13944       as_bad (_("conflicting .insn operands"));
13945       goto done;
13946     }
13947
13948   if (vex || xop)
13949     {
13950       if (!i.tm.opcode_modifier.vex)
13951         i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
13952
13953       build_vex_prefix (NULL);
13954       i.rex &= REX_OPCODE;
13955     }
13956   else if (evex)
13957     {
13958       if (!i.tm.opcode_modifier.evex)
13959         i.tm.opcode_modifier.evex = EVEXLIG;
13960
13961       build_evex_prefix ();
13962       i.rex &= REX_OPCODE;
13963     }
13964   else
13965     establish_rex ();
13966
13967   last_insn = &seg_info(now_seg)->tc_segment_info_data.last_insn;
13968   output_insn (last_insn);
13969   last_insn->kind = last_insn_directive;
13970   last_insn->name = ".insn directive";
13971   last_insn->file = as_where (&last_insn->line);
13972
13973 #ifdef OBJ_ELF
13974   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
13975      performed in i386_target_format.  */
13976   if (flag_synth_cfi)
13977     as_bad (_("SCFI: hand-crafting instructions not supported"));
13978 #endif
13979
13980  done:
13981   *saved_ilp = saved_char;
13982   input_line_pointer = line;
13983
13984   demand_empty_rest_of_line ();
13985
13986   /* Make sure dot_insn() won't yield "true" anymore.  */
13987   i.tm.mnem_off = 0;
13988
13989   current_templates.start = NULL;
13990   memset (&pp, 0, sizeof (pp));
13991 }
13992
13993 #ifdef TE_PE
13994 static void
13995 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
13996 {
13997   expressionS exp;
13998
13999   do
14000     {
14001       expression (&exp);
14002       if (exp.X_op == O_symbol)
14003         exp.X_op = O_secrel;
14004
14005       emit_expr (&exp, 4);
14006     }
14007   while (*input_line_pointer++ == ',');
14008
14009   input_line_pointer--;
14010   demand_empty_rest_of_line ();
14011 }
14012
14013 static void
14014 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
14015 {
14016   expressionS exp;
14017
14018   do
14019     {
14020       expression (&exp);
14021       if (exp.X_op == O_symbol)
14022         exp.X_op = O_secidx;
14023
14024       emit_expr (&exp, 2);
14025     }
14026   while (*input_line_pointer++ == ',');
14027
14028   input_line_pointer--;
14029   demand_empty_rest_of_line ();
14030 }
14031 #endif
14032
14033 /* Handle Rounding Control / SAE specifiers.  */
14034
14035 static char *
14036 RC_SAE_specifier (const char *pstr)
14037 {
14038   unsigned int j;
14039
14040   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
14041     {
14042       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
14043         {
14044           if (i.rounding.type != rc_none)
14045             {
14046               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
14047               return NULL;
14048             }
14049
14050           switch (pp.encoding)
14051             {
14052             case encoding_default:
14053             case encoding_egpr:
14054               pp.encoding = encoding_evex512;
14055               break;
14056             case encoding_evex:
14057             case encoding_evex512:
14058               break;
14059             default:
14060               return NULL;
14061             }
14062
14063           i.rounding.type = RC_NamesTable[j].type;
14064
14065           return (char *)(pstr + RC_NamesTable[j].len);
14066         }
14067     }
14068
14069   return NULL;
14070 }
14071
14072 /* Handle Vector operations.  */
14073
14074 static char *
14075 check_VecOperations (char *op_string)
14076 {
14077   const reg_entry *mask;
14078   const char *saved;
14079   char *end_op;
14080
14081   while (*op_string)
14082     {
14083       saved = op_string;
14084       if (*op_string == '{')
14085         {
14086           op_string++;
14087           if (is_space_char (*op_string))
14088             op_string++;
14089
14090           /* Check broadcasts.  */
14091           if (startswith (op_string, "1to"))
14092             {
14093               unsigned int bcst_type;
14094
14095               if (i.broadcast.type)
14096                 goto duplicated_vec_op;
14097
14098               op_string += 3;
14099               if (*op_string == '8')
14100                 bcst_type = 8;
14101               else if (*op_string == '4')
14102                 bcst_type = 4;
14103               else if (*op_string == '2')
14104                 bcst_type = 2;
14105               else if (*op_string == '1'
14106                        && *(op_string+1) == '6')
14107                 {
14108                   bcst_type = 16;
14109                   op_string++;
14110                 }
14111               else if (*op_string == '3'
14112                        && *(op_string+1) == '2')
14113                 {
14114                   bcst_type = 32;
14115                   op_string++;
14116                 }
14117               else
14118                 {
14119                   as_bad (_("Unsupported broadcast: `%s'"), saved);
14120                   return NULL;
14121                 }
14122               op_string++;
14123
14124               switch (pp.encoding)
14125                 {
14126                 case encoding_default:
14127                 case encoding_egpr:
14128                   pp.encoding = encoding_evex;
14129                   break;
14130                 case encoding_evex:
14131                 case encoding_evex512:
14132                   break;
14133                 default:
14134                   goto unknown_vec_op;
14135                 }
14136
14137               i.broadcast.type = bcst_type;
14138               i.broadcast.operand = this_operand;
14139
14140               /* For .insn a data size specifier may be appended.  */
14141               if (dot_insn () && *op_string == ':')
14142                 goto dot_insn_modifier;
14143             }
14144           /* Check .insn special cases.  */
14145           else if (dot_insn () && *op_string == ':')
14146             {
14147             dot_insn_modifier:
14148               switch (op_string[1])
14149                 {
14150                   unsigned long n;
14151
14152                 case 'd':
14153                   if (i.memshift < 32)
14154                     goto duplicated_vec_op;
14155
14156                   n = strtoul (op_string + 2, &end_op, 0);
14157                   if (n)
14158                     for (i.memshift = 0; !(n & 1); n >>= 1)
14159                       ++i.memshift;
14160                   if (i.memshift < 32 && n == 1)
14161                     op_string = end_op;
14162                   break;
14163
14164                 case 's': case 'u':
14165                   /* This isn't really a "vector" operation, but a sign/size
14166                      specifier for immediate operands of .insn.  Note that AT&T
14167                      syntax handles the same in i386_immediate().  */
14168                   if (!intel_syntax)
14169                     break;
14170
14171                   if (i.imm_bits[this_operand])
14172                     goto duplicated_vec_op;
14173
14174                   n = strtoul (op_string + 2, &end_op, 0);
14175                   if (n && n <= (flag_code == CODE_64BIT ? 64 : 32))
14176                     {
14177                       i.imm_bits[this_operand] = n;
14178                       if (op_string[1] == 's')
14179                         i.flags[this_operand] |= Operand_Signed;
14180                       op_string = end_op;
14181                     }
14182                   break;
14183                 }
14184             }
14185           /* Check masking operation.  */
14186           else if ((mask = parse_register (op_string, &end_op)) != NULL)
14187             {
14188               if (mask == &bad_reg)
14189                 return NULL;
14190
14191               /* k0 can't be used for write mask.  */
14192               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
14193                 {
14194                   as_bad (_("`%s%s' can't be used for write mask"),
14195                           register_prefix, mask->reg_name);
14196                   return NULL;
14197                 }
14198
14199               if (!i.mask.reg)
14200                 {
14201                   i.mask.reg = mask;
14202                   i.mask.operand = this_operand;
14203                 }
14204               else if (i.mask.reg->reg_num)
14205                 goto duplicated_vec_op;
14206               else
14207                 {
14208                   i.mask.reg = mask;
14209
14210                   /* Only "{z}" is allowed here.  No need to check
14211                      zeroing mask explicitly.  */
14212                   if (i.mask.operand != (unsigned int) this_operand)
14213                     {
14214                       as_bad (_("invalid write mask `%s'"), saved);
14215                       return NULL;
14216                     }
14217                 }
14218
14219               op_string = end_op;
14220             }
14221           /* Check zeroing-flag for masking operation.  */
14222           else if (*op_string == 'z')
14223             {
14224               if (!i.mask.reg)
14225                 {
14226                   i.mask.reg = reg_k0;
14227                   i.mask.zeroing = 1;
14228                   i.mask.operand = this_operand;
14229                 }
14230               else
14231                 {
14232                   if (i.mask.zeroing)
14233                     {
14234                     duplicated_vec_op:
14235                       as_bad (_("duplicated `%s'"), saved);
14236                       return NULL;
14237                     }
14238
14239                   i.mask.zeroing = 1;
14240
14241                   /* Only "{%k}" is allowed here.  No need to check mask
14242                      register explicitly.  */
14243                   if (i.mask.operand != (unsigned int) this_operand)
14244                     {
14245                       as_bad (_("invalid zeroing-masking `%s'"),
14246                               saved);
14247                       return NULL;
14248                     }
14249                 }
14250
14251               op_string++;
14252             }
14253           else if (intel_syntax
14254                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
14255             i.rounding.modifier = true;
14256           else
14257             goto unknown_vec_op;
14258
14259           if (is_space_char (*op_string))
14260             op_string++;
14261           if (*op_string != '}')
14262             {
14263               as_bad (_("missing `}' in `%s'"), saved);
14264               return NULL;
14265             }
14266           op_string++;
14267
14268           if (is_space_char (*op_string))
14269             ++op_string;
14270
14271           continue;
14272         }
14273     unknown_vec_op:
14274       /* We don't know this one.  */
14275       as_bad (_("unknown vector operation: `%s'"), saved);
14276       return NULL;
14277     }
14278
14279   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
14280     {
14281       as_bad (_("zeroing-masking only allowed with write mask"));
14282       return NULL;
14283     }
14284
14285   return op_string;
14286 }
14287
14288 static int
14289 i386_immediate (char *imm_start)
14290 {
14291   char *save_input_line_pointer;
14292   char *gotfree_input_line;
14293   segT exp_seg = 0;
14294   expressionS *exp;
14295   i386_operand_type types;
14296
14297   operand_type_set (&types, ~0);
14298
14299   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
14300     {
14301       as_bad (_("at most %d immediate operands are allowed"),
14302               MAX_IMMEDIATE_OPERANDS);
14303       return 0;
14304     }
14305
14306   exp = &im_expressions[i.imm_operands++];
14307   i.op[this_operand].imms = exp;
14308
14309   if (is_space_char (*imm_start))
14310     ++imm_start;
14311
14312   save_input_line_pointer = input_line_pointer;
14313   input_line_pointer = imm_start;
14314
14315   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
14316   if (gotfree_input_line)
14317     input_line_pointer = gotfree_input_line;
14318
14319   expr_mode = expr_operator_none;
14320   exp_seg = expression (exp);
14321
14322   /* For .insn immediates there may be a size specifier.  */
14323   if (dot_insn () && *input_line_pointer == '{' && input_line_pointer[1] == ':'
14324       && (input_line_pointer[2] == 's' || input_line_pointer[2] == 'u'))
14325     {
14326       char *e;
14327       unsigned long n = strtoul (input_line_pointer + 3, &e, 0);
14328
14329       if (*e == '}' && n && n <= (flag_code == CODE_64BIT ? 64 : 32))
14330         {
14331           i.imm_bits[this_operand] = n;
14332           if (input_line_pointer[2] == 's')
14333             i.flags[this_operand] |= Operand_Signed;
14334           input_line_pointer = e + 1;
14335         }
14336     }
14337
14338   SKIP_WHITESPACE ();
14339   if (*input_line_pointer)
14340     as_bad (_("junk `%s' after expression"), input_line_pointer);
14341
14342   input_line_pointer = save_input_line_pointer;
14343   if (gotfree_input_line)
14344     {
14345       free (gotfree_input_line);
14346
14347       if (exp->X_op == O_constant)
14348         exp->X_op = O_illegal;
14349     }
14350
14351   if (exp_seg == reg_section)
14352     {
14353       as_bad (_("illegal immediate register operand %s"), imm_start);
14354       return 0;
14355     }
14356
14357   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
14358 }
14359
14360 static int
14361 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
14362                          i386_operand_type types, const char *imm_start)
14363 {
14364   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
14365     {
14366       if (imm_start)
14367         as_bad (_("missing or invalid immediate expression `%s'"),
14368                 imm_start);
14369       return 0;
14370     }
14371   else if (exp->X_op == O_constant)
14372     {
14373       /* Size it properly later.  */
14374       i.types[this_operand].bitfield.imm64 = 1;
14375
14376       /* If not 64bit, sign/zero extend val, to account for wraparound
14377          when !BFD64.  */
14378       if (expr_mode == expr_operator_present
14379           && flag_code != CODE_64BIT && !object_64bit)
14380         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
14381     }
14382 #ifdef OBJ_AOUT
14383   else if (exp_seg != absolute_section
14384            && exp_seg != text_section
14385            && exp_seg != data_section
14386            && exp_seg != bss_section
14387            && exp_seg != undefined_section
14388            && !bfd_is_com_section (exp_seg))
14389     {
14390       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
14391       return 0;
14392     }
14393 #endif
14394   else
14395     {
14396       /* This is an address.  The size of the address will be
14397          determined later, depending on destination register,
14398          suffix, or the default for the section.  */
14399       i.types[this_operand].bitfield.imm8 = 1;
14400       i.types[this_operand].bitfield.imm16 = 1;
14401       i.types[this_operand].bitfield.imm32 = 1;
14402       i.types[this_operand].bitfield.imm32s = 1;
14403       i.types[this_operand].bitfield.imm64 = 1;
14404       i.types[this_operand] = operand_type_and (i.types[this_operand],
14405                                                 types);
14406     }
14407
14408   return 1;
14409 }
14410
14411 static char *
14412 i386_scale (char *scale)
14413 {
14414   offsetT val;
14415   char *save = input_line_pointer;
14416
14417   input_line_pointer = scale;
14418   val = get_absolute_expression ();
14419
14420   switch (val)
14421     {
14422     case 1:
14423       i.log2_scale_factor = 0;
14424       break;
14425     case 2:
14426       i.log2_scale_factor = 1;
14427       break;
14428     case 4:
14429       i.log2_scale_factor = 2;
14430       break;
14431     case 8:
14432       i.log2_scale_factor = 3;
14433       break;
14434     default:
14435       {
14436         char sep = *input_line_pointer;
14437
14438         *input_line_pointer = '\0';
14439         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
14440                 scale);
14441         *input_line_pointer = sep;
14442         input_line_pointer = save;
14443         return NULL;
14444       }
14445     }
14446   if (i.log2_scale_factor != 0 && i.index_reg == 0)
14447     {
14448       as_warn (_("scale factor of %d without an index register"),
14449                1 << i.log2_scale_factor);
14450       i.log2_scale_factor = 0;
14451     }
14452   scale = input_line_pointer;
14453   input_line_pointer = save;
14454   return scale;
14455 }
14456
14457 static int
14458 i386_displacement (char *disp_start, char *disp_end)
14459 {
14460   expressionS *exp;
14461   segT exp_seg = 0;
14462   char *save_input_line_pointer;
14463   char *gotfree_input_line;
14464   int override;
14465   i386_operand_type bigdisp, types = anydisp;
14466   int ret;
14467
14468   if (i.disp_operands == MAX_MEMORY_OPERANDS)
14469     {
14470       as_bad (_("at most %d displacement operands are allowed"),
14471               MAX_MEMORY_OPERANDS);
14472       return 0;
14473     }
14474
14475   operand_type_set (&bigdisp, 0);
14476   if (i.jumpabsolute
14477       || i.types[this_operand].bitfield.baseindex
14478       || (current_templates.start->opcode_modifier.jump != JUMP
14479           && current_templates.start->opcode_modifier.jump != JUMP_DWORD))
14480     {
14481       i386_addressing_mode ();
14482       override = (i.prefix[ADDR_PREFIX] != 0);
14483       if (flag_code == CODE_64BIT)
14484         {
14485           bigdisp.bitfield.disp32 = 1;
14486           if (!override)
14487             bigdisp.bitfield.disp64 = 1;
14488         }
14489       else if ((flag_code == CODE_16BIT) ^ override)
14490           bigdisp.bitfield.disp16 = 1;
14491       else
14492           bigdisp.bitfield.disp32 = 1;
14493     }
14494   else
14495     {
14496       /* For PC-relative branches, the width of the displacement may be
14497          dependent upon data size, but is never dependent upon address size.
14498          Also make sure to not unintentionally match against a non-PC-relative
14499          branch template.  */
14500       const insn_template *t = current_templates.start;
14501       bool has_intel64 = false;
14502
14503       while (++t < current_templates.end)
14504         {
14505           if (t->opcode_modifier.jump
14506               != current_templates.start->opcode_modifier.jump)
14507             break;
14508           if ((t->opcode_modifier.isa64 >= INTEL64))
14509             has_intel64 = true;
14510         }
14511       current_templates.end = t;
14512
14513       override = (i.prefix[DATA_PREFIX] != 0);
14514       if (flag_code == CODE_64BIT)
14515         {
14516           if ((override || i.suffix == WORD_MNEM_SUFFIX)
14517               && (!intel64 || !has_intel64))
14518             bigdisp.bitfield.disp16 = 1;
14519           else
14520             bigdisp.bitfield.disp32 = 1;
14521         }
14522       else
14523         {
14524           if (!override)
14525             override = (i.suffix == (flag_code != CODE_16BIT
14526                                      ? WORD_MNEM_SUFFIX
14527                                      : LONG_MNEM_SUFFIX));
14528           bigdisp.bitfield.disp32 = 1;
14529           if ((flag_code == CODE_16BIT) ^ override)
14530             {
14531               bigdisp.bitfield.disp32 = 0;
14532               bigdisp.bitfield.disp16 = 1;
14533             }
14534         }
14535     }
14536   i.types[this_operand] = operand_type_or (i.types[this_operand],
14537                                            bigdisp);
14538
14539   exp = &disp_expressions[i.disp_operands];
14540   i.op[this_operand].disps = exp;
14541   i.disp_operands++;
14542   save_input_line_pointer = input_line_pointer;
14543   input_line_pointer = disp_start;
14544   END_STRING_AND_SAVE (disp_end);
14545
14546 #ifndef GCC_ASM_O_HACK
14547 #define GCC_ASM_O_HACK 0
14548 #endif
14549 #if GCC_ASM_O_HACK
14550   END_STRING_AND_SAVE (disp_end + 1);
14551   if (i.types[this_operand].bitfield.baseIndex
14552       && displacement_string_end[-1] == '+')
14553     {
14554       /* This hack is to avoid a warning when using the "o"
14555          constraint within gcc asm statements.
14556          For instance:
14557
14558          #define _set_tssldt_desc(n,addr,limit,type) \
14559          __asm__ __volatile__ ( \
14560          "movw %w2,%0\n\t" \
14561          "movw %w1,2+%0\n\t" \
14562          "rorl $16,%1\n\t" \
14563          "movb %b1,4+%0\n\t" \
14564          "movb %4,5+%0\n\t" \
14565          "movb $0,6+%0\n\t" \
14566          "movb %h1,7+%0\n\t" \
14567          "rorl $16,%1" \
14568          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
14569
14570          This works great except that the output assembler ends
14571          up looking a bit weird if it turns out that there is
14572          no offset.  You end up producing code that looks like:
14573
14574          #APP
14575          movw $235,(%eax)
14576          movw %dx,2+(%eax)
14577          rorl $16,%edx
14578          movb %dl,4+(%eax)
14579          movb $137,5+(%eax)
14580          movb $0,6+(%eax)
14581          movb %dh,7+(%eax)
14582          rorl $16,%edx
14583          #NO_APP
14584
14585          So here we provide the missing zero.  */
14586
14587       *displacement_string_end = '0';
14588     }
14589 #endif
14590   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
14591   if (gotfree_input_line)
14592     input_line_pointer = gotfree_input_line;
14593
14594   expr_mode = expr_operator_none;
14595   exp_seg = expression (exp);
14596
14597   SKIP_WHITESPACE ();
14598   if (*input_line_pointer)
14599     as_bad (_("junk `%s' after expression"), input_line_pointer);
14600 #if GCC_ASM_O_HACK
14601   RESTORE_END_STRING (disp_end + 1);
14602 #endif
14603   input_line_pointer = save_input_line_pointer;
14604   if (gotfree_input_line)
14605     {
14606       free (gotfree_input_line);
14607
14608       if (exp->X_op == O_constant || exp->X_op == O_register)
14609         exp->X_op = O_illegal;
14610     }
14611
14612   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
14613
14614   RESTORE_END_STRING (disp_end);
14615
14616   return ret;
14617 }
14618
14619 static int
14620 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
14621                             i386_operand_type types, const char *disp_start)
14622 {
14623   int ret = 1;
14624
14625   /* We do this to make sure that the section symbol is in
14626      the symbol table.  We will ultimately change the relocation
14627      to be relative to the beginning of the section.  */
14628   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
14629       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
14630       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
14631     {
14632       if (exp->X_op != O_symbol)
14633         goto inv_disp;
14634
14635       if (S_IS_LOCAL (exp->X_add_symbol)
14636           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
14637           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
14638         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
14639       exp->X_op = O_subtract;
14640       exp->X_op_symbol = GOT_symbol;
14641       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
14642         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
14643       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
14644         i.reloc[this_operand] = BFD_RELOC_64;
14645       else
14646         i.reloc[this_operand] = BFD_RELOC_32;
14647     }
14648
14649   else if (exp->X_op == O_absent
14650            || exp->X_op == O_illegal
14651            || exp->X_op == O_big)
14652     {
14653     inv_disp:
14654       as_bad (_("missing or invalid displacement expression `%s'"),
14655               disp_start);
14656       ret = 0;
14657     }
14658
14659   else if (exp->X_op == O_constant)
14660     {
14661       /* Sizing gets taken care of by optimize_disp().
14662
14663          If not 64bit, sign/zero extend val, to account for wraparound
14664          when !BFD64.  */
14665       if (expr_mode == expr_operator_present
14666           && flag_code != CODE_64BIT && !object_64bit)
14667         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
14668     }
14669
14670 #ifdef OBJ_AOUT
14671   else if (exp_seg != absolute_section
14672            && exp_seg != text_section
14673            && exp_seg != data_section
14674            && exp_seg != bss_section
14675            && exp_seg != undefined_section
14676            && !bfd_is_com_section (exp_seg))
14677     {
14678       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
14679       ret = 0;
14680     }
14681 #endif
14682
14683   else if (current_templates.start->opcode_modifier.jump == JUMP_BYTE)
14684     i.types[this_operand].bitfield.disp8 = 1;
14685
14686   /* Check if this is a displacement only operand.  */
14687   if (!i.types[this_operand].bitfield.baseindex)
14688     i.types[this_operand] =
14689       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
14690                        operand_type_and (i.types[this_operand], types));
14691
14692   return ret;
14693 }
14694
14695 /* Return the active addressing mode, taking address override and
14696    registers forming the address into consideration.  Update the
14697    address override prefix if necessary.  */
14698
14699 static enum flag_code
14700 i386_addressing_mode (void)
14701 {
14702   enum flag_code addr_mode;
14703
14704   if (i.prefix[ADDR_PREFIX])
14705     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
14706   else if (flag_code == CODE_16BIT
14707            && is_cpu (current_templates.start, CpuMPX)
14708            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
14709               from md_assemble() by "is not a valid base/index expression"
14710               when there is a base and/or index.  */
14711            && !i.types[this_operand].bitfield.baseindex)
14712     {
14713       /* MPX insn memory operands with neither base nor index must be forced
14714          to use 32-bit addressing in 16-bit mode.  */
14715       addr_mode = CODE_32BIT;
14716       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
14717       ++i.prefixes;
14718       gas_assert (!i.types[this_operand].bitfield.disp16);
14719       gas_assert (!i.types[this_operand].bitfield.disp32);
14720     }
14721   else
14722     {
14723       addr_mode = flag_code;
14724
14725 #if INFER_ADDR_PREFIX
14726       if (i.mem_operands == 0)
14727         {
14728           /* Infer address prefix from the first memory operand.  */
14729           const reg_entry *addr_reg = i.base_reg;
14730
14731           if (addr_reg == NULL)
14732             addr_reg = i.index_reg;
14733
14734           if (addr_reg)
14735             {
14736               if (addr_reg->reg_type.bitfield.dword)
14737                 addr_mode = CODE_32BIT;
14738               else if (flag_code != CODE_64BIT
14739                        && addr_reg->reg_type.bitfield.word)
14740                 addr_mode = CODE_16BIT;
14741
14742               if (addr_mode != flag_code)
14743                 {
14744                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
14745                   i.prefixes += 1;
14746                   /* Change the size of any displacement too.  At most one
14747                      of Disp16 or Disp32 is set.
14748                      FIXME.  There doesn't seem to be any real need for
14749                      separate Disp16 and Disp32 flags.  The same goes for
14750                      Imm16 and Imm32.  Removing them would probably clean
14751                      up the code quite a lot.  */
14752                   if (flag_code != CODE_64BIT
14753                       && (i.types[this_operand].bitfield.disp16
14754                           || i.types[this_operand].bitfield.disp32))
14755                     {
14756                       static const i386_operand_type disp16_32 = {
14757                         .bitfield = { .disp16 = 1, .disp32 = 1 }
14758                       };
14759
14760                       i.types[this_operand]
14761                         = operand_type_xor (i.types[this_operand], disp16_32);
14762                     }
14763                 }
14764             }
14765         }
14766 #endif
14767     }
14768
14769   return addr_mode;
14770 }
14771
14772 /* Make sure the memory operand we've been dealt is valid.
14773    Return 1 on success, 0 on a failure.  */
14774
14775 static int
14776 i386_index_check (const char *operand_string)
14777 {
14778   const char *kind = "base/index";
14779   enum flag_code addr_mode = i386_addressing_mode ();
14780   const insn_template *t = current_templates.end - 1;
14781
14782   if (t->opcode_modifier.isstring)
14783     {
14784       /* Memory operands of string insns are special in that they only allow
14785          a single register (rDI, rSI, or rBX) as their memory address.  */
14786       const reg_entry *expected_reg;
14787       static const char di_si[][2][4] =
14788         {
14789           { "esi", "edi" },
14790           { "si", "di" },
14791           { "rsi", "rdi" }
14792         };
14793       static const char bx[][4] = { "ebx", "bx", "rbx" };
14794
14795       kind = "string address";
14796
14797       if (t->opcode_modifier.prefixok == PrefixRep)
14798         {
14799           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
14800           int op = 0;
14801
14802           if (!t->operand_types[0].bitfield.baseindex
14803               || ((!i.mem_operands != !intel_syntax)
14804                   && t->operand_types[1].bitfield.baseindex))
14805             op = 1;
14806           expected_reg
14807             = (const reg_entry *) str_hash_find (reg_hash,
14808                                                  di_si[addr_mode][op == es_op]);
14809         }
14810       else
14811         expected_reg
14812           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
14813
14814       if (i.base_reg != expected_reg
14815           || i.index_reg
14816           || operand_type_check (i.types[this_operand], disp))
14817         {
14818           /* The second memory operand must have the same size as
14819              the first one.  */
14820           if (i.mem_operands
14821               && i.base_reg
14822               && !((addr_mode == CODE_64BIT
14823                     && i.base_reg->reg_type.bitfield.qword)
14824                    || (addr_mode == CODE_32BIT
14825                        ? i.base_reg->reg_type.bitfield.dword
14826                        : i.base_reg->reg_type.bitfield.word)))
14827             goto bad_address;
14828
14829           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
14830                    operand_string,
14831                    intel_syntax ? '[' : '(',
14832                    register_prefix,
14833                    expected_reg->reg_name,
14834                    intel_syntax ? ']' : ')');
14835           return 1;
14836         }
14837       else
14838         return 1;
14839
14840     bad_address:
14841       as_bad (_("`%s' is not a valid %s expression"),
14842               operand_string, kind);
14843       return 0;
14844     }
14845   else
14846     {
14847       t = current_templates.start;
14848
14849       if (addr_mode != CODE_16BIT)
14850         {
14851           /* 32-bit/64-bit checks.  */
14852           if (pp.disp_encoding == disp_encoding_16bit)
14853             {
14854             bad_disp:
14855               as_bad (_("invalid `%s' prefix"),
14856                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
14857               return 0;
14858             }
14859
14860           if ((i.base_reg
14861                && ((addr_mode == CODE_64BIT
14862                     ? !i.base_reg->reg_type.bitfield.qword
14863                     : !i.base_reg->reg_type.bitfield.dword)
14864                    || (i.index_reg && i.base_reg->reg_num == RegIP)
14865                    || i.base_reg->reg_num == RegIZ))
14866               || (i.index_reg
14867                   && !i.index_reg->reg_type.bitfield.xmmword
14868                   && !i.index_reg->reg_type.bitfield.ymmword
14869                   && !i.index_reg->reg_type.bitfield.zmmword
14870                   && ((addr_mode == CODE_64BIT
14871                        ? !i.index_reg->reg_type.bitfield.qword
14872                        : !i.index_reg->reg_type.bitfield.dword)
14873                       || !i.index_reg->reg_type.bitfield.baseindex)))
14874             goto bad_address;
14875
14876           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
14877           if (t->mnem_off == MN_bndmk
14878               || t->mnem_off == MN_bndldx
14879               || t->mnem_off == MN_bndstx
14880               || t->opcode_modifier.sib == SIBMEM)
14881             {
14882               /* They cannot use RIP-relative addressing. */
14883               if (i.base_reg && i.base_reg->reg_num == RegIP)
14884                 {
14885                   as_bad (_("`%s' cannot be used here"), operand_string);
14886                   return 0;
14887                 }
14888
14889               /* bndldx and bndstx ignore their scale factor. */
14890               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
14891                   && i.log2_scale_factor)
14892                 as_warn (_("register scaling is being ignored here"));
14893             }
14894         }
14895       else
14896         {
14897           /* 16-bit checks.  */
14898           if (pp.disp_encoding == disp_encoding_32bit)
14899             goto bad_disp;
14900
14901           if ((i.base_reg
14902                && (!i.base_reg->reg_type.bitfield.word
14903                    || !i.base_reg->reg_type.bitfield.baseindex))
14904               || (i.index_reg
14905                   && (!i.index_reg->reg_type.bitfield.word
14906                       || !i.index_reg->reg_type.bitfield.baseindex
14907                       || !(i.base_reg
14908                            && i.base_reg->reg_num < 6
14909                            && i.index_reg->reg_num >= 6
14910                            && i.log2_scale_factor == 0))))
14911             goto bad_address;
14912         }
14913     }
14914   return 1;
14915 }
14916
14917 /* Handle vector immediates.  */
14918
14919 static int
14920 RC_SAE_immediate (const char *imm_start)
14921 {
14922   const char *pstr = imm_start;
14923
14924   if (*pstr != '{')
14925     return 0;
14926
14927   pstr++;
14928   if (is_space_char (*pstr))
14929     pstr++;
14930
14931   pstr = RC_SAE_specifier (pstr);
14932   if (pstr == NULL)
14933     return 0;
14934
14935   if (is_space_char (*pstr))
14936     pstr++;
14937
14938   if (*pstr++ != '}')
14939     {
14940       as_bad (_("Missing '}': '%s'"), imm_start);
14941       return 0;
14942     }
14943   /* RC/SAE immediate string should contain nothing more.  */;
14944   if (*pstr != 0)
14945     {
14946       as_bad (_("Junk after '}': '%s'"), imm_start);
14947       return 0;
14948     }
14949
14950   /* Internally this doesn't count as an operand.  */
14951   --i.operands;
14952
14953   return 1;
14954 }
14955
14956 static INLINE bool starts_memory_operand (char c)
14957 {
14958   return ISDIGIT (c)
14959          || is_name_beginner (c)
14960          || strchr ("([\"+-!~", c);
14961 }
14962
14963 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
14964    on error.  */
14965
14966 static int
14967 i386_att_operand (char *operand_string)
14968 {
14969   const reg_entry *r;
14970   char *end_op;
14971   char *op_string = operand_string;
14972
14973   if (is_space_char (*op_string))
14974     ++op_string;
14975
14976   /* We check for an absolute prefix (differentiating,
14977      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
14978   if (*op_string == ABSOLUTE_PREFIX
14979       && current_templates.start->opcode_modifier.jump)
14980     {
14981       ++op_string;
14982       if (is_space_char (*op_string))
14983         ++op_string;
14984       i.jumpabsolute = true;
14985     }
14986
14987   /* Check if operand is a register.  */
14988   if ((r = parse_register (op_string, &end_op)) != NULL)
14989     {
14990       i386_operand_type temp;
14991
14992       if (r == &bad_reg)
14993         return 0;
14994
14995       /* Check for a segment override by searching for ':' after a
14996          segment register.  */
14997       op_string = end_op;
14998       if (is_space_char (*op_string))
14999         ++op_string;
15000       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
15001         {
15002           i.seg[i.mem_operands] = r;
15003
15004           /* Skip the ':' and whitespace.  */
15005           ++op_string;
15006           if (is_space_char (*op_string))
15007             ++op_string;
15008
15009           /* Handle case of %es:*foo.  */
15010           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
15011               && current_templates.start->opcode_modifier.jump)
15012             {
15013               ++op_string;
15014               if (is_space_char (*op_string))
15015                 ++op_string;
15016               i.jumpabsolute = true;
15017             }
15018
15019           if (!starts_memory_operand (*op_string))
15020             {
15021               as_bad (_("bad memory operand `%s'"), op_string);
15022               return 0;
15023             }
15024           goto do_memory_reference;
15025         }
15026
15027       /* Handle vector operations.  */
15028       if (*op_string == '{')
15029         {
15030           op_string = check_VecOperations (op_string);
15031           if (op_string == NULL)
15032             return 0;
15033         }
15034
15035       if (*op_string)
15036         {
15037           as_bad (_("junk `%s' after register"), op_string);
15038           return 0;
15039         }
15040
15041        /* Reject pseudo registers for .insn.  */
15042       if (dot_insn () && r->reg_type.bitfield.class == ClassNone)
15043         {
15044           as_bad (_("`%s%s' cannot be used here"),
15045                   register_prefix, r->reg_name);
15046           return 0;
15047         }
15048
15049       temp = r->reg_type;
15050       temp.bitfield.baseindex = 0;
15051       i.types[this_operand] = operand_type_or (i.types[this_operand],
15052                                                temp);
15053       i.types[this_operand].bitfield.unspecified = 0;
15054       i.op[this_operand].regs = r;
15055       i.reg_operands++;
15056
15057       /* A GPR may follow an RC or SAE immediate only if a (vector) register
15058          operand was also present earlier on.  */
15059       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
15060           && i.reg_operands == 1)
15061         {
15062           unsigned int j;
15063
15064           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
15065             if (i.rounding.type == RC_NamesTable[j].type)
15066               break;
15067           as_bad (_("`%s': misplaced `{%s}'"),
15068                   insn_name (current_templates.start), RC_NamesTable[j].name);
15069           return 0;
15070         }
15071     }
15072   else if (*op_string == REGISTER_PREFIX)
15073     {
15074       as_bad (_("bad register name `%s'"), op_string);
15075       return 0;
15076     }
15077   else if (*op_string == IMMEDIATE_PREFIX)
15078     {
15079       ++op_string;
15080       if (i.jumpabsolute)
15081         {
15082           as_bad (_("immediate operand illegal with absolute jump"));
15083           return 0;
15084         }
15085       if (!i386_immediate (op_string))
15086         return 0;
15087       if (i.rounding.type != rc_none)
15088         {
15089           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
15090                   insn_name (current_templates.start));
15091           return 0;
15092         }
15093     }
15094   else if (RC_SAE_immediate (operand_string))
15095     {
15096       /* If it is a RC or SAE immediate, do the necessary placement check:
15097          Only another immediate or a GPR may precede it.  */
15098       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
15099           || (i.reg_operands == 1
15100               && i.op[0].regs->reg_type.bitfield.class != Reg))
15101         {
15102           as_bad (_("`%s': misplaced `%s'"),
15103                   insn_name (current_templates.start), operand_string);
15104           return 0;
15105         }
15106     }
15107   else if (starts_memory_operand (*op_string))
15108     {
15109       /* This is a memory reference of some sort.  */
15110       char *base_string;
15111
15112       /* Start and end of displacement string expression (if found).  */
15113       char *displacement_string_start;
15114       char *displacement_string_end;
15115
15116     do_memory_reference:
15117       /* Check for base index form.  We detect the base index form by
15118          looking for an ')' at the end of the operand, searching
15119          for the '(' matching it, and finding a REGISTER_PREFIX or ','
15120          after the '('.  */
15121       base_string = op_string + strlen (op_string);
15122
15123       /* Handle vector operations.  */
15124       --base_string;
15125       if (is_space_char (*base_string))
15126         --base_string;
15127
15128       if (*base_string == '}')
15129         {
15130           char *vop_start = NULL;
15131
15132           while (base_string-- > op_string)
15133             {
15134               if (*base_string == '"')
15135                 break;
15136               if (*base_string != '{')
15137                 continue;
15138
15139               vop_start = base_string;
15140
15141               --base_string;
15142               if (is_space_char (*base_string))
15143                 --base_string;
15144
15145               if (*base_string != '}')
15146                 break;
15147
15148               vop_start = NULL;
15149             }
15150
15151           if (!vop_start)
15152             {
15153               as_bad (_("unbalanced figure braces"));
15154               return 0;
15155             }
15156
15157           if (check_VecOperations (vop_start) == NULL)
15158             return 0;
15159         }
15160
15161       /* If we only have a displacement, set-up for it to be parsed later.  */
15162       displacement_string_start = op_string;
15163       displacement_string_end = base_string + 1;
15164
15165       if (*base_string == ')')
15166         {
15167           char *temp_string;
15168           unsigned int parens_not_balanced = 0;
15169           bool in_quotes = false;
15170
15171           /* We've already checked that the number of left & right ()'s are
15172              equal, and that there's a matching set of double quotes.  */
15173           end_op = base_string;
15174           for (temp_string = op_string; temp_string < end_op; temp_string++)
15175             {
15176               if (*temp_string == '\\' && temp_string[1] == '"')
15177                 ++temp_string;
15178               else if (*temp_string == '"')
15179                 in_quotes = !in_quotes;
15180               else if (!in_quotes)
15181                 {
15182                   if (*temp_string == '(' && !parens_not_balanced++)
15183                     base_string = temp_string;
15184                   if (*temp_string == ')')
15185                     --parens_not_balanced;
15186                 }
15187             }
15188
15189           temp_string = base_string;
15190
15191           /* Skip past '(' and whitespace.  */
15192           gas_assert (*base_string == '(');
15193           ++base_string;
15194           if (is_space_char (*base_string))
15195             ++base_string;
15196
15197           if (*base_string == ','
15198               || ((i.base_reg = parse_register (base_string, &end_op))
15199                   != NULL))
15200             {
15201               displacement_string_end = temp_string;
15202
15203               i.types[this_operand].bitfield.baseindex = 1;
15204
15205               if (i.base_reg)
15206                 {
15207                   if (i.base_reg == &bad_reg)
15208                     return 0;
15209                   base_string = end_op;
15210                   if (is_space_char (*base_string))
15211                     ++base_string;
15212                 }
15213
15214               /* There may be an index reg or scale factor here.  */
15215               if (*base_string == ',')
15216                 {
15217                   ++base_string;
15218                   if (is_space_char (*base_string))
15219                     ++base_string;
15220
15221                   if ((i.index_reg = parse_register (base_string, &end_op))
15222                       != NULL)
15223                     {
15224                       if (i.index_reg == &bad_reg)
15225                         return 0;
15226                       base_string = end_op;
15227                       if (is_space_char (*base_string))
15228                         ++base_string;
15229                       if (*base_string == ',')
15230                         {
15231                           ++base_string;
15232                           if (is_space_char (*base_string))
15233                             ++base_string;
15234                         }
15235                       else if (*base_string != ')')
15236                         {
15237                           as_bad (_("expecting `,' or `)' "
15238                                     "after index register in `%s'"),
15239                                   operand_string);
15240                           return 0;
15241                         }
15242                     }
15243                   else if (*base_string == REGISTER_PREFIX)
15244                     {
15245                       end_op = strchr (base_string, ',');
15246                       if (end_op)
15247                         *end_op = '\0';
15248                       as_bad (_("bad register name `%s'"), base_string);
15249                       return 0;
15250                     }
15251
15252                   /* Check for scale factor.  */
15253                   if (*base_string != ')')
15254                     {
15255                       char *end_scale = i386_scale (base_string);
15256
15257                       if (!end_scale)
15258                         return 0;
15259
15260                       base_string = end_scale;
15261                       if (is_space_char (*base_string))
15262                         ++base_string;
15263                       if (*base_string != ')')
15264                         {
15265                           as_bad (_("expecting `)' "
15266                                     "after scale factor in `%s'"),
15267                                   operand_string);
15268                           return 0;
15269                         }
15270                     }
15271                   else if (!i.index_reg)
15272                     {
15273                       as_bad (_("expecting index register or scale factor "
15274                                 "after `,'; got '%c'"),
15275                               *base_string);
15276                       return 0;
15277                     }
15278                 }
15279               else if (*base_string != ')')
15280                 {
15281                   as_bad (_("expecting `,' or `)' "
15282                             "after base register in `%s'"),
15283                           operand_string);
15284                   return 0;
15285                 }
15286             }
15287           else if (*base_string == REGISTER_PREFIX)
15288             {
15289               end_op = strchr (base_string, ',');
15290               if (end_op)
15291                 *end_op = '\0';
15292               as_bad (_("bad register name `%s'"), base_string);
15293               return 0;
15294             }
15295         }
15296
15297       /* If there's an expression beginning the operand, parse it,
15298          assuming displacement_string_start and
15299          displacement_string_end are meaningful.  */
15300       if (displacement_string_start != displacement_string_end)
15301         {
15302           if (!i386_displacement (displacement_string_start,
15303                                   displacement_string_end))
15304             return 0;
15305         }
15306
15307       /* Special case for (%dx) while doing input/output op.  */
15308       if (i.base_reg
15309           && i.base_reg->reg_type.bitfield.instance == RegD
15310           && i.base_reg->reg_type.bitfield.word
15311           && i.index_reg == 0
15312           && i.log2_scale_factor == 0
15313           && i.seg[i.mem_operands] == 0
15314           && !operand_type_check (i.types[this_operand], disp))
15315         {
15316           i.types[this_operand] = i.base_reg->reg_type;
15317           i.op[this_operand].regs = i.base_reg;
15318           i.base_reg = NULL;
15319           i.input_output_operand = true;
15320           return 1;
15321         }
15322
15323       if (i386_index_check (operand_string) == 0)
15324         return 0;
15325       i.flags[this_operand] |= Operand_Mem;
15326       i.mem_operands++;
15327     }
15328   else
15329     {
15330       /* It's not a memory operand; argh!  */
15331       as_bad (_("invalid char %s beginning operand %d `%s'"),
15332               output_invalid (*op_string),
15333               this_operand + 1,
15334               op_string);
15335       return 0;
15336     }
15337   return 1;                     /* Normal return.  */
15338 }
15339 \f
15340 /* Calculate the maximum variable size (i.e., excluding fr_fix)
15341    that an rs_machine_dependent frag may reach.  */
15342
15343 unsigned int
15344 i386_frag_max_var (fragS *frag)
15345 {
15346   /* The only relaxable frags are for jumps.
15347      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
15348   gas_assert (frag->fr_type == rs_machine_dependent);
15349   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
15350 }
15351
15352 #ifdef OBJ_ELF
15353 static int
15354 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
15355 {
15356   /* STT_GNU_IFUNC symbol must go through PLT.  */
15357   if ((symbol_get_bfdsym (fr_symbol)->flags
15358        & BSF_GNU_INDIRECT_FUNCTION) != 0)
15359     return 0;
15360
15361   if (!S_IS_EXTERNAL (fr_symbol))
15362     /* Symbol may be weak or local.  */
15363     return !S_IS_WEAK (fr_symbol);
15364
15365   /* Global symbols with non-default visibility can't be preempted. */
15366   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
15367     return 1;
15368
15369   if (fr_var != NO_RELOC)
15370     switch ((enum bfd_reloc_code_real) fr_var)
15371       {
15372       case BFD_RELOC_386_PLT32:
15373       case BFD_RELOC_X86_64_PLT32:
15374         /* Symbol with PLT relocation may be preempted. */
15375         return 0;
15376       default:
15377         abort ();
15378       }
15379
15380   /* Global symbols with default visibility in a shared library may be
15381      preempted by another definition.  */
15382   return !shared;
15383 }
15384 #endif
15385
15386 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
15387    Note also work for Skylake and Cascadelake.
15388 ---------------------------------------------------------------------
15389 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
15390 | ------  | ----------- | ------- | -------- |
15391 |   Jo    |      N      |    N    |     Y    |
15392 |   Jno   |      N      |    N    |     Y    |
15393 |  Jc/Jb  |      Y      |    N    |     Y    |
15394 | Jae/Jnb |      Y      |    N    |     Y    |
15395 |  Je/Jz  |      Y      |    Y    |     Y    |
15396 | Jne/Jnz |      Y      |    Y    |     Y    |
15397 | Jna/Jbe |      Y      |    N    |     Y    |
15398 | Ja/Jnbe |      Y      |    N    |     Y    |
15399 |   Js    |      N      |    N    |     Y    |
15400 |   Jns   |      N      |    N    |     Y    |
15401 |  Jp/Jpe |      N      |    N    |     Y    |
15402 | Jnp/Jpo |      N      |    N    |     Y    |
15403 | Jl/Jnge |      Y      |    Y    |     Y    |
15404 | Jge/Jnl |      Y      |    Y    |     Y    |
15405 | Jle/Jng |      Y      |    Y    |     Y    |
15406 | Jg/Jnle |      Y      |    Y    |     Y    |
15407 ---------------------------------------------------------------------  */
15408 static int
15409 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
15410 {
15411   if (mf_cmp == mf_cmp_alu_cmp)
15412     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
15413             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
15414   if (mf_cmp == mf_cmp_incdec)
15415     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
15416             || mf_jcc == mf_jcc_jle);
15417   if (mf_cmp == mf_cmp_test_and)
15418     return 1;
15419   return 0;
15420 }
15421
15422 /* Return the next non-empty frag.  */
15423
15424 static fragS *
15425 i386_next_non_empty_frag (fragS *fragP)
15426 {
15427   /* There may be a frag with a ".fill 0" when there is no room in
15428      the current frag for frag_grow in output_insn.  */
15429   for (fragP = fragP->fr_next;
15430        (fragP != NULL
15431         && fragP->fr_type == rs_fill
15432         && fragP->fr_fix == 0);
15433        fragP = fragP->fr_next)
15434     ;
15435   return fragP;
15436 }
15437
15438 /* Return the next jcc frag after BRANCH_PADDING.  */
15439
15440 static fragS *
15441 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
15442 {
15443   fragS *branch_fragP;
15444   if (!pad_fragP)
15445     return NULL;
15446
15447   if (pad_fragP->fr_type == rs_machine_dependent
15448       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
15449           == BRANCH_PADDING))
15450     {
15451       branch_fragP = i386_next_non_empty_frag (pad_fragP);
15452       if (branch_fragP->fr_type != rs_machine_dependent)
15453         return NULL;
15454       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
15455           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
15456                                    pad_fragP->tc_frag_data.mf_type))
15457         return branch_fragP;
15458     }
15459
15460   return NULL;
15461 }
15462
15463 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
15464
15465 static void
15466 i386_classify_machine_dependent_frag (fragS *fragP)
15467 {
15468   fragS *cmp_fragP;
15469   fragS *pad_fragP;
15470   fragS *branch_fragP;
15471   fragS *next_fragP;
15472   unsigned int max_prefix_length;
15473
15474   if (fragP->tc_frag_data.classified)
15475     return;
15476
15477   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
15478      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
15479   for (next_fragP = fragP;
15480        next_fragP != NULL;
15481        next_fragP = next_fragP->fr_next)
15482     {
15483       next_fragP->tc_frag_data.classified = 1;
15484       if (next_fragP->fr_type == rs_machine_dependent)
15485         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
15486           {
15487           case BRANCH_PADDING:
15488             /* The BRANCH_PADDING frag must be followed by a branch
15489                frag.  */
15490             branch_fragP = i386_next_non_empty_frag (next_fragP);
15491             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
15492             break;
15493           case FUSED_JCC_PADDING:
15494             /* Check if this is a fused jcc:
15495                FUSED_JCC_PADDING
15496                CMP like instruction
15497                BRANCH_PADDING
15498                COND_JUMP
15499                */
15500             cmp_fragP = i386_next_non_empty_frag (next_fragP);
15501             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
15502             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
15503             if (branch_fragP)
15504               {
15505                 /* The BRANCH_PADDING frag is merged with the
15506                    FUSED_JCC_PADDING frag.  */
15507                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
15508                 /* CMP like instruction size.  */
15509                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
15510                 frag_wane (pad_fragP);
15511                 /* Skip to branch_fragP.  */
15512                 next_fragP = branch_fragP;
15513               }
15514             else if (next_fragP->tc_frag_data.max_prefix_length)
15515               {
15516                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
15517                    a fused jcc.  */
15518                 next_fragP->fr_subtype
15519                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
15520                 next_fragP->tc_frag_data.max_bytes
15521                   = next_fragP->tc_frag_data.max_prefix_length;
15522                 /* This will be updated in the BRANCH_PREFIX scan.  */
15523                 next_fragP->tc_frag_data.max_prefix_length = 0;
15524               }
15525             else
15526               frag_wane (next_fragP);
15527             break;
15528           }
15529     }
15530
15531   /* Stop if there is no BRANCH_PREFIX.  */
15532   if (!align_branch_prefix_size)
15533     return;
15534
15535   /* Scan for BRANCH_PREFIX.  */
15536   for (; fragP != NULL; fragP = fragP->fr_next)
15537     {
15538       if (fragP->fr_type != rs_machine_dependent
15539           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
15540               != BRANCH_PREFIX))
15541         continue;
15542
15543       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
15544          COND_JUMP_PREFIX.  */
15545       max_prefix_length = 0;
15546       for (next_fragP = fragP;
15547            next_fragP != NULL;
15548            next_fragP = next_fragP->fr_next)
15549         {
15550           if (next_fragP->fr_type == rs_fill)
15551             /* Skip rs_fill frags.  */
15552             continue;
15553           else if (next_fragP->fr_type != rs_machine_dependent)
15554             /* Stop for all other frags.  */
15555             break;
15556
15557           /* rs_machine_dependent frags.  */
15558           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15559               == BRANCH_PREFIX)
15560             {
15561               /* Count BRANCH_PREFIX frags.  */
15562               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
15563                 {
15564                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
15565                   frag_wane (next_fragP);
15566                 }
15567               else
15568                 max_prefix_length
15569                   += next_fragP->tc_frag_data.max_bytes;
15570             }
15571           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15572                     == BRANCH_PADDING)
15573                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15574                        == FUSED_JCC_PADDING))
15575             {
15576               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
15577               fragP->tc_frag_data.u.padding_fragP = next_fragP;
15578               break;
15579             }
15580           else
15581             /* Stop for other rs_machine_dependent frags.  */
15582             break;
15583         }
15584
15585       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
15586
15587       /* Skip to the next frag.  */
15588       fragP = next_fragP;
15589     }
15590 }
15591
15592 /* Compute padding size for
15593
15594         FUSED_JCC_PADDING
15595         CMP like instruction
15596         BRANCH_PADDING
15597         COND_JUMP/UNCOND_JUMP
15598
15599    or
15600
15601         BRANCH_PADDING
15602         COND_JUMP/UNCOND_JUMP
15603  */
15604
15605 static int
15606 i386_branch_padding_size (fragS *fragP, offsetT address)
15607 {
15608   unsigned int offset, size, padding_size;
15609   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
15610
15611   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
15612   if (!address)
15613     address = fragP->fr_address;
15614   address += fragP->fr_fix;
15615
15616   /* CMP like instrunction size.  */
15617   size = fragP->tc_frag_data.cmp_size;
15618
15619   /* The base size of the branch frag.  */
15620   size += branch_fragP->fr_fix;
15621
15622   /* Add opcode and displacement bytes for the rs_machine_dependent
15623      branch frag.  */
15624   if (branch_fragP->fr_type == rs_machine_dependent)
15625     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
15626
15627   /* Check if branch is within boundary and doesn't end at the last
15628      byte.  */
15629   offset = address & ((1U << align_branch_power) - 1);
15630   if ((offset + size) >= (1U << align_branch_power))
15631     /* Padding needed to avoid crossing boundary.  */
15632     padding_size = (1U << align_branch_power) - offset;
15633   else
15634     /* No padding needed.  */
15635     padding_size = 0;
15636
15637   /* The return value may be saved in tc_frag_data.length which is
15638      unsigned byte.  */
15639   if (!fits_in_unsigned_byte (padding_size))
15640     abort ();
15641
15642   return padding_size;
15643 }
15644
15645 /* i386_generic_table_relax_frag()
15646
15647    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
15648    grow/shrink padding to align branch frags.  Hand others to
15649    relax_frag().  */
15650
15651 long
15652 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
15653 {
15654   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15655       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
15656     {
15657       long padding_size = i386_branch_padding_size (fragP, 0);
15658       long grow = padding_size - fragP->tc_frag_data.length;
15659
15660       /* When the BRANCH_PREFIX frag is used, the computed address
15661          must match the actual address and there should be no padding.  */
15662       if (fragP->tc_frag_data.padding_address
15663           && (fragP->tc_frag_data.padding_address != fragP->fr_address
15664               || padding_size))
15665         abort ();
15666
15667       /* Update the padding size.  */
15668       if (grow)
15669         fragP->tc_frag_data.length = padding_size;
15670
15671       return grow;
15672     }
15673   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15674     {
15675       fragS *padding_fragP, *next_fragP;
15676       long padding_size, left_size, last_size;
15677
15678       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
15679       if (!padding_fragP)
15680         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
15681         return (fragP->tc_frag_data.length
15682                 - fragP->tc_frag_data.last_length);
15683
15684       /* Compute the relative address of the padding frag in the very
15685         first time where the BRANCH_PREFIX frag sizes are zero.  */
15686       if (!fragP->tc_frag_data.padding_address)
15687         fragP->tc_frag_data.padding_address
15688           = padding_fragP->fr_address - (fragP->fr_address - stretch);
15689
15690       /* First update the last length from the previous interation.  */
15691       left_size = fragP->tc_frag_data.prefix_length;
15692       for (next_fragP = fragP;
15693            next_fragP != padding_fragP;
15694            next_fragP = next_fragP->fr_next)
15695         if (next_fragP->fr_type == rs_machine_dependent
15696             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15697                 == BRANCH_PREFIX))
15698           {
15699             if (left_size)
15700               {
15701                 int max = next_fragP->tc_frag_data.max_bytes;
15702                 if (max)
15703                   {
15704                     int size;
15705                     if (max > left_size)
15706                       size = left_size;
15707                     else
15708                       size = max;
15709                     left_size -= size;
15710                     next_fragP->tc_frag_data.last_length = size;
15711                   }
15712               }
15713             else
15714               next_fragP->tc_frag_data.last_length = 0;
15715           }
15716
15717       /* Check the padding size for the padding frag.  */
15718       padding_size = i386_branch_padding_size
15719         (padding_fragP, (fragP->fr_address
15720                          + fragP->tc_frag_data.padding_address));
15721
15722       last_size = fragP->tc_frag_data.prefix_length;
15723       /* Check if there is change from the last interation.  */
15724       if (padding_size == last_size)
15725         {
15726           /* Update the expected address of the padding frag.  */
15727           padding_fragP->tc_frag_data.padding_address
15728             = (fragP->fr_address + padding_size
15729                + fragP->tc_frag_data.padding_address);
15730           return 0;
15731         }
15732
15733       if (padding_size > fragP->tc_frag_data.max_prefix_length)
15734         {
15735           /* No padding if there is no sufficient room.  Clear the
15736              expected address of the padding frag.  */
15737           padding_fragP->tc_frag_data.padding_address = 0;
15738           padding_size = 0;
15739         }
15740       else
15741         /* Store the expected address of the padding frag.  */
15742         padding_fragP->tc_frag_data.padding_address
15743           = (fragP->fr_address + padding_size
15744              + fragP->tc_frag_data.padding_address);
15745
15746       fragP->tc_frag_data.prefix_length = padding_size;
15747
15748       /* Update the length for the current interation.  */
15749       left_size = padding_size;
15750       for (next_fragP = fragP;
15751            next_fragP != padding_fragP;
15752            next_fragP = next_fragP->fr_next)
15753         if (next_fragP->fr_type == rs_machine_dependent
15754             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15755                 == BRANCH_PREFIX))
15756           {
15757             if (left_size)
15758               {
15759                 int max = next_fragP->tc_frag_data.max_bytes;
15760                 if (max)
15761                   {
15762                     int size;
15763                     if (max > left_size)
15764                       size = left_size;
15765                     else
15766                       size = max;
15767                     left_size -= size;
15768                     next_fragP->tc_frag_data.length = size;
15769                   }
15770               }
15771             else
15772               next_fragP->tc_frag_data.length = 0;
15773           }
15774
15775       return (fragP->tc_frag_data.length
15776               - fragP->tc_frag_data.last_length);
15777     }
15778   return relax_frag (segment, fragP, stretch);
15779 }
15780
15781 /* md_estimate_size_before_relax()
15782
15783    Called just before relax() for rs_machine_dependent frags.  The x86
15784    assembler uses these frags to handle variable size jump
15785    instructions.
15786
15787    Any symbol that is now undefined will not become defined.
15788    Return the correct fr_subtype in the frag.
15789    Return the initial "guess for variable size of frag" to caller.
15790    The guess is actually the growth beyond the fixed part.  Whatever
15791    we do to grow the fixed or variable part contributes to our
15792    returned value.  */
15793
15794 int
15795 md_estimate_size_before_relax (fragS *fragP, segT segment)
15796 {
15797   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15798       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
15799       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
15800     {
15801       i386_classify_machine_dependent_frag (fragP);
15802       return fragP->tc_frag_data.length;
15803     }
15804
15805   /* We've already got fragP->fr_subtype right;  all we have to do is
15806      check for un-relaxable symbols.  On an ELF system, we can't relax
15807      an externally visible symbol, because it may be overridden by a
15808      shared library.  */
15809   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
15810 #ifdef OBJ_ELF
15811       || !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
15812                                             fragP->fr_var)
15813 #endif
15814 #if defined (OBJ_COFF) && defined (TE_PE)
15815       || S_IS_WEAK (fragP->fr_symbol)
15816 #endif
15817       )
15818     {
15819       /* Symbol is undefined in this segment, or we need to keep a
15820          reloc so that weak symbols can be overridden.  */
15821       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
15822       enum bfd_reloc_code_real reloc_type;
15823       unsigned char *opcode;
15824       int old_fr_fix;
15825       fixS *fixP = NULL;
15826
15827       if (fragP->fr_var != NO_RELOC)
15828         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
15829       else if (size == 2)
15830         reloc_type = BFD_RELOC_16_PCREL;
15831 #ifdef OBJ_ELF
15832       else if (fragP->tc_frag_data.code == CODE_64BIT
15833                && fragP->fr_offset == 0
15834                && need_plt32_p (fragP->fr_symbol))
15835         reloc_type = BFD_RELOC_X86_64_PLT32;
15836 #endif
15837       else
15838         reloc_type = BFD_RELOC_32_PCREL;
15839
15840       old_fr_fix = fragP->fr_fix;
15841       opcode = (unsigned char *) fragP->fr_opcode;
15842
15843       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
15844         {
15845         case UNCOND_JUMP:
15846           /* Make jmp (0xeb) a (d)word displacement jump.  */
15847           opcode[0] = 0xe9;
15848           fragP->fr_fix += size;
15849           fixP = fix_new (fragP, old_fr_fix, size,
15850                           fragP->fr_symbol,
15851                           fragP->fr_offset, 1,
15852                           reloc_type);
15853           break;
15854
15855         case COND_JUMP86:
15856           if (size == 2
15857               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
15858             {
15859               /* Negate the condition, and branch past an
15860                  unconditional jump.  */
15861               opcode[0] ^= 1;
15862               opcode[1] = 3;
15863               /* Insert an unconditional jump.  */
15864               opcode[2] = 0xe9;
15865               /* We added two extra opcode bytes, and have a two byte
15866                  offset.  */
15867               fragP->fr_fix += 2 + 2;
15868               fix_new (fragP, old_fr_fix + 2, 2,
15869                        fragP->fr_symbol,
15870                        fragP->fr_offset, 1,
15871                        reloc_type);
15872               break;
15873             }
15874           /* Fall through.  */
15875
15876         case COND_JUMP:
15877           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
15878             {
15879               fragP->fr_fix += 1;
15880               fixP = fix_new (fragP, old_fr_fix, 1,
15881                               fragP->fr_symbol,
15882                               fragP->fr_offset, 1,
15883                               BFD_RELOC_8_PCREL);
15884               fixP->fx_signed = 1;
15885               break;
15886             }
15887
15888           /* This changes the byte-displacement jump 0x7N
15889              to the (d)word-displacement jump 0x0f,0x8N.  */
15890           opcode[1] = opcode[0] + 0x10;
15891           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15892           /* We've added an opcode byte.  */
15893           fragP->fr_fix += 1 + size;
15894           fixP = fix_new (fragP, old_fr_fix + 1, size,
15895                           fragP->fr_symbol,
15896                           fragP->fr_offset, 1,
15897                           reloc_type);
15898           break;
15899
15900         default:
15901           BAD_CASE (fragP->fr_subtype);
15902           break;
15903         }
15904
15905       /* All jumps handled here are signed, but don't unconditionally use a
15906          signed limit check for 32 and 16 bit jumps as we want to allow wrap
15907          around at 4G (outside of 64-bit mode) and 64k.  */
15908       if (size == 4 && flag_code == CODE_64BIT)
15909         fixP->fx_signed = 1;
15910
15911       frag_wane (fragP);
15912       return fragP->fr_fix - old_fr_fix;
15913     }
15914
15915   /* Guess size depending on current relax state.  Initially the relax
15916      state will correspond to a short jump and we return 1, because
15917      the variable part of the frag (the branch offset) is one byte
15918      long.  However, we can relax a section more than once and in that
15919      case we must either set fr_subtype back to the unrelaxed state,
15920      or return the value for the appropriate branch.  */
15921   return md_relax_table[fragP->fr_subtype].rlx_length;
15922 }
15923
15924 /* Called after relax() is finished.
15925
15926    In:  Address of frag.
15927         fr_type == rs_machine_dependent.
15928         fr_subtype is what the address relaxed to.
15929
15930    Out: Any fixSs and constants are set up.
15931         Caller will turn frag into a ".space 0".  */
15932
15933 void
15934 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
15935                  fragS *fragP)
15936 {
15937   unsigned char *opcode;
15938   unsigned char *where_to_put_displacement = NULL;
15939   offsetT target_address;
15940   offsetT opcode_address;
15941   unsigned int extension = 0;
15942   offsetT displacement_from_opcode_start;
15943
15944   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15945       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
15946       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15947     {
15948       /* Generate nop padding.  */
15949       unsigned int size = fragP->tc_frag_data.length;
15950       if (size)
15951         {
15952           if (size > fragP->tc_frag_data.max_bytes)
15953             abort ();
15954
15955           if (flag_debug)
15956             {
15957               const char *msg;
15958               const char *branch = "branch";
15959               const char *prefix = "";
15960               fragS *padding_fragP;
15961               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
15962                   == BRANCH_PREFIX)
15963                 {
15964                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
15965                   switch (fragP->tc_frag_data.default_prefix)
15966                     {
15967                     default:
15968                       abort ();
15969                       break;
15970                     case CS_PREFIX_OPCODE:
15971                       prefix = " cs";
15972                       break;
15973                     case DS_PREFIX_OPCODE:
15974                       prefix = " ds";
15975                       break;
15976                     case ES_PREFIX_OPCODE:
15977                       prefix = " es";
15978                       break;
15979                     case FS_PREFIX_OPCODE:
15980                       prefix = " fs";
15981                       break;
15982                     case GS_PREFIX_OPCODE:
15983                       prefix = " gs";
15984                       break;
15985                     case SS_PREFIX_OPCODE:
15986                       prefix = " ss";
15987                       break;
15988                     }
15989                   if (padding_fragP)
15990                     msg = _("%s:%u: add %d%s at 0x%llx to align "
15991                             "%s within %d-byte boundary\n");
15992                   else
15993                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
15994                             "align %s within %d-byte boundary\n");
15995                 }
15996               else
15997                 {
15998                   padding_fragP = fragP;
15999                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
16000                           "%s within %d-byte boundary\n");
16001                 }
16002
16003               if (padding_fragP)
16004                 switch (padding_fragP->tc_frag_data.branch_type)
16005                   {
16006                   case align_branch_jcc:
16007                     branch = "jcc";
16008                     break;
16009                   case align_branch_fused:
16010                     branch = "fused jcc";
16011                     break;
16012                   case align_branch_jmp:
16013                     branch = "jmp";
16014                     break;
16015                   case align_branch_call:
16016                     branch = "call";
16017                     break;
16018                   case align_branch_indirect:
16019                     branch = "indiret branch";
16020                     break;
16021                   case align_branch_ret:
16022                     branch = "ret";
16023                     break;
16024                   default:
16025                     break;
16026                   }
16027
16028               fprintf (stdout, msg,
16029                        fragP->fr_file, fragP->fr_line, size, prefix,
16030                        (long long) fragP->fr_address, branch,
16031                        1 << align_branch_power);
16032             }
16033           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
16034             memset (fragP->fr_opcode,
16035                     fragP->tc_frag_data.default_prefix, size);
16036           else
16037             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
16038                                 size, 0);
16039           fragP->fr_fix += size;
16040         }
16041       return;
16042     }
16043
16044   opcode = (unsigned char *) fragP->fr_opcode;
16045
16046   /* Address we want to reach in file space.  */
16047   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
16048
16049   /* Address opcode resides at in file space.  */
16050   opcode_address = fragP->fr_address + fragP->fr_fix;
16051
16052   /* Displacement from opcode start to fill into instruction.  */
16053   displacement_from_opcode_start = target_address - opcode_address;
16054
16055   if ((fragP->fr_subtype & BIG) == 0)
16056     {
16057       /* Don't have to change opcode.  */
16058       extension = 1;            /* 1 opcode + 1 displacement  */
16059       where_to_put_displacement = &opcode[1];
16060     }
16061   else
16062     {
16063       if (no_cond_jump_promotion
16064           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
16065         as_warn_where (fragP->fr_file, fragP->fr_line,
16066                        _("long jump required"));
16067
16068       switch (fragP->fr_subtype)
16069         {
16070         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
16071           extension = 4;                /* 1 opcode + 4 displacement  */
16072           opcode[0] = 0xe9;
16073           where_to_put_displacement = &opcode[1];
16074           break;
16075
16076         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
16077           extension = 2;                /* 1 opcode + 2 displacement  */
16078           opcode[0] = 0xe9;
16079           where_to_put_displacement = &opcode[1];
16080           break;
16081
16082         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
16083         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
16084           extension = 5;                /* 2 opcode + 4 displacement  */
16085           opcode[1] = opcode[0] + 0x10;
16086           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
16087           where_to_put_displacement = &opcode[2];
16088           break;
16089
16090         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
16091           extension = 3;                /* 2 opcode + 2 displacement  */
16092           opcode[1] = opcode[0] + 0x10;
16093           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
16094           where_to_put_displacement = &opcode[2];
16095           break;
16096
16097         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
16098           extension = 4;
16099           opcode[0] ^= 1;
16100           opcode[1] = 3;
16101           opcode[2] = 0xe9;
16102           where_to_put_displacement = &opcode[3];
16103           break;
16104
16105         default:
16106           BAD_CASE (fragP->fr_subtype);
16107           break;
16108         }
16109     }
16110
16111   /* If size if less then four we are sure that the operand fits,
16112      but if it's 4, then it could be that the displacement is larger
16113      then -/+ 2GB.  */
16114   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
16115       && object_64bit
16116       && ((addressT) (displacement_from_opcode_start - extension
16117                       + ((addressT) 1 << 31))
16118           > (((addressT) 2 << 31) - 1)))
16119     {
16120       as_bad_where (fragP->fr_file, fragP->fr_line,
16121                     _("jump target out of range"));
16122       /* Make us emit 0.  */
16123       displacement_from_opcode_start = extension;
16124     }
16125   /* Now put displacement after opcode.  */
16126   md_number_to_chars ((char *) where_to_put_displacement,
16127                       (valueT) (displacement_from_opcode_start - extension),
16128                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
16129   fragP->fr_fix += extension;
16130 }
16131 \f
16132 /* Apply a fixup (fixP) to segment data, once it has been determined
16133    by our caller that we have all the info we need to fix it up.
16134
16135    Parameter valP is the pointer to the value of the bits.
16136
16137    On the 386, immediates, displacements, and data pointers are all in
16138    the same (little-endian) format, so we don't need to care about which
16139    we are handling.  */
16140
16141 void
16142 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
16143 {
16144   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
16145   valueT value = *valP;
16146
16147 #if !defined (TE_Mach)
16148   if (fixP->fx_pcrel)
16149     {
16150       switch (fixP->fx_r_type)
16151         {
16152         default:
16153           break;
16154
16155         case BFD_RELOC_64:
16156           fixP->fx_r_type = BFD_RELOC_64_PCREL;
16157           break;
16158         case BFD_RELOC_32:
16159         case BFD_RELOC_X86_64_32S:
16160           fixP->fx_r_type = BFD_RELOC_32_PCREL;
16161           break;
16162         case BFD_RELOC_16:
16163           fixP->fx_r_type = BFD_RELOC_16_PCREL;
16164           break;
16165         case BFD_RELOC_8:
16166           fixP->fx_r_type = BFD_RELOC_8_PCREL;
16167           break;
16168         }
16169     }
16170
16171   if (fixP->fx_addsy != NULL
16172       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
16173           || fixP->fx_r_type == BFD_RELOC_64_PCREL
16174           || fixP->fx_r_type == BFD_RELOC_16_PCREL
16175           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
16176       && !use_rela_relocations)
16177     {
16178       /* This is a hack.  There should be a better way to handle this.
16179          This covers for the fact that bfd_install_relocation will
16180          subtract the current location (for partial_inplace, PC relative
16181          relocations); see more below.  */
16182 #if defined (OBJ_ELF) || defined (TE_PE)
16183       value += fixP->fx_where + fixP->fx_frag->fr_address;
16184 #endif
16185 #ifdef OBJ_ELF
16186       segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
16187
16188       if ((sym_seg == seg
16189            || (symbol_section_p (fixP->fx_addsy)
16190                && sym_seg != absolute_section))
16191           && !generic_force_reloc (fixP))
16192         {
16193           /* Yes, we add the values in twice.  This is because
16194              bfd_install_relocation subtracts them out again.  I think
16195              bfd_install_relocation is broken, but I don't dare change
16196              it.  FIXME.  */
16197           value += fixP->fx_where + fixP->fx_frag->fr_address;
16198         }
16199 #endif
16200 #if defined (OBJ_COFF) && defined (TE_PE)
16201       /* For some reason, the PE format does not store a
16202          section address offset for a PC relative symbol.  */
16203       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
16204           || S_IS_WEAK (fixP->fx_addsy))
16205         value += md_pcrel_from (fixP);
16206 #endif
16207     }
16208 #if defined (OBJ_COFF) && defined (TE_PE)
16209   if (fixP->fx_addsy != NULL
16210       && S_IS_WEAK (fixP->fx_addsy)
16211       /* PR 16858: Do not modify weak function references.  */
16212       && ! fixP->fx_pcrel)
16213     {
16214 #if !defined (TE_PEP)
16215       /* For x86 PE weak function symbols are neither PC-relative
16216          nor do they set S_IS_FUNCTION.  So the only reliable way
16217          to detect them is to check the flags of their containing
16218          section.  */
16219       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
16220           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
16221         ;
16222       else
16223 #endif
16224       value -= S_GET_VALUE (fixP->fx_addsy);
16225     }
16226 #endif
16227
16228   /* Fix a few things - the dynamic linker expects certain values here,
16229      and we must not disappoint it.  */
16230 #ifdef OBJ_ELF
16231   if (fixP->fx_addsy)
16232     switch (fixP->fx_r_type)
16233       {
16234       case BFD_RELOC_386_PLT32:
16235       case BFD_RELOC_X86_64_PLT32:
16236         /* Make the jump instruction point to the address of the operand.
16237            At runtime we merely add the offset to the actual PLT entry.
16238            NB: Subtract the offset size only for jump instructions.  */
16239         if (fixP->fx_pcrel)
16240           value = -4;
16241         break;
16242
16243       case BFD_RELOC_386_TLS_GD:
16244       case BFD_RELOC_386_TLS_LDM:
16245       case BFD_RELOC_386_TLS_IE_32:
16246       case BFD_RELOC_386_TLS_IE:
16247       case BFD_RELOC_386_TLS_GOTIE:
16248       case BFD_RELOC_386_TLS_GOTDESC:
16249       case BFD_RELOC_X86_64_TLSGD:
16250       case BFD_RELOC_X86_64_TLSLD:
16251       case BFD_RELOC_X86_64_GOTTPOFF:
16252       case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
16253       case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
16254       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
16255       case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
16256         value = 0; /* Fully resolved at runtime.  No addend.  */
16257         /* Fallthrough */
16258       case BFD_RELOC_386_TLS_LE:
16259       case BFD_RELOC_386_TLS_LDO_32:
16260       case BFD_RELOC_386_TLS_LE_32:
16261       case BFD_RELOC_X86_64_DTPOFF32:
16262       case BFD_RELOC_X86_64_DTPOFF64:
16263       case BFD_RELOC_X86_64_TPOFF32:
16264       case BFD_RELOC_X86_64_TPOFF64:
16265         S_SET_THREAD_LOCAL (fixP->fx_addsy);
16266         break;
16267
16268       case BFD_RELOC_386_TLS_DESC_CALL:
16269       case BFD_RELOC_X86_64_TLSDESC_CALL:
16270         value = 0; /* Fully resolved at runtime.  No addend.  */
16271         S_SET_THREAD_LOCAL (fixP->fx_addsy);
16272         fixP->fx_done = 0;
16273         return;
16274
16275       case BFD_RELOC_VTABLE_INHERIT:
16276       case BFD_RELOC_VTABLE_ENTRY:
16277         fixP->fx_done = 0;
16278         return;
16279
16280       default:
16281         break;
16282       }
16283 #endif /* OBJ_ELF  */
16284
16285   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
16286   if (!object_64bit)
16287     value = extend_to_32bit_address (value);
16288
16289   *valP = value;
16290 #endif /* !defined (TE_Mach)  */
16291
16292   /* Are we finished with this relocation now?  */
16293   if (fixP->fx_addsy == NULL)
16294     {
16295       fixP->fx_done = 1;
16296       switch (fixP->fx_r_type)
16297         {
16298         case BFD_RELOC_X86_64_32S:
16299           fixP->fx_signed = 1;
16300           break;
16301
16302         default:
16303           break;
16304         }
16305     }
16306 #if defined (OBJ_COFF) && defined (TE_PE)
16307   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
16308     {
16309       fixP->fx_done = 0;
16310       /* Remember value for tc_gen_reloc.  */
16311       fixP->fx_addnumber = value;
16312       /* Clear out the frag for now.  */
16313       value = 0;
16314     }
16315 #endif
16316   else if (use_rela_relocations)
16317     {
16318       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
16319         fixP->fx_no_overflow = 1;
16320       /* Remember value for tc_gen_reloc.  */
16321       fixP->fx_addnumber = value;
16322       value = 0;
16323     }
16324
16325   md_number_to_chars (p, value, fixP->fx_size);
16326 }
16327 \f
16328 const char *
16329 md_atof (int type, char *litP, int *sizeP)
16330 {
16331   /* This outputs the LITTLENUMs in REVERSE order;
16332      in accord with the bigendian 386.  */
16333   return ieee_md_atof (type, litP, sizeP, false);
16334 }
16335 \f
16336 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
16337
16338 static char *
16339 output_invalid (int c)
16340 {
16341   if (ISPRINT (c))
16342     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
16343               "'%c'", c);
16344   else
16345     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
16346               "(0x%x)", (unsigned char) c);
16347   return output_invalid_buf;
16348 }
16349
16350 /* Verify that @r can be used in the current context.  */
16351
16352 static bool check_register (const reg_entry *r)
16353 {
16354   if (allow_pseudo_reg)
16355     return true;
16356
16357   if (operand_type_all_zero (&r->reg_type))
16358     return false;
16359
16360   if ((r->reg_type.bitfield.dword
16361        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
16362        || r->reg_type.bitfield.class == RegCR
16363        || r->reg_type.bitfield.class == RegDR)
16364       && !cpu_arch_flags.bitfield.cpui386)
16365     return false;
16366
16367   if (r->reg_type.bitfield.class == RegTR
16368       && (flag_code == CODE_64BIT
16369           || !cpu_arch_flags.bitfield.cpui386
16370           || cpu_arch_isa_flags.bitfield.cpui586
16371           || cpu_arch_isa_flags.bitfield.cpui686))
16372     return false;
16373
16374   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
16375     return false;
16376
16377   if (!cpu_arch_flags.bitfield.cpuavx512f)
16378     {
16379       if (r->reg_type.bitfield.zmmword
16380           || r->reg_type.bitfield.class == RegMask)
16381         return false;
16382
16383       if (!cpu_arch_flags.bitfield.cpuavx)
16384         {
16385           if (r->reg_type.bitfield.ymmword)
16386             return false;
16387
16388           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
16389             return false;
16390         }
16391     }
16392
16393   if (r->reg_type.bitfield.zmmword)
16394     {
16395       if (vector_size < VSZ512)
16396         return false;
16397
16398       /* Don't update pp when not dealing with insn operands.  */
16399       switch (current_templates.start ? pp.encoding : encoding_evex)
16400         {
16401         case encoding_default:
16402         case encoding_egpr:
16403           pp.encoding = encoding_evex512;
16404           break;
16405         case encoding_evex:
16406         case encoding_evex512:
16407           break;
16408         default:
16409           pp.encoding = encoding_error;
16410           break;
16411         }
16412     }
16413
16414   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
16415     return false;
16416
16417   if (r->reg_type.bitfield.tmmword
16418       && (!cpu_arch_flags.bitfield.cpuamx_tile
16419           || flag_code != CODE_64BIT))
16420     return false;
16421
16422   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
16423     return false;
16424
16425   /* Don't allow fake index register unless allow_index_reg isn't 0. */
16426   if (!allow_index_reg && r->reg_num == RegIZ)
16427     return false;
16428
16429   /* Upper 16 vector registers are only available with VREX in 64bit
16430      mode, and require EVEX encoding.  */
16431   if (r->reg_flags & RegVRex)
16432     {
16433       if (!cpu_arch_flags.bitfield.cpuavx512f
16434           || flag_code != CODE_64BIT)
16435         return false;
16436
16437       /* Don't update pp when not dealing with insn operands.  */
16438       switch (current_templates.start ? pp.encoding : encoding_evex)
16439         {
16440           case encoding_default:
16441           case encoding_egpr:
16442           case encoding_evex512:
16443             pp.encoding = encoding_evex;
16444             break;
16445           case encoding_evex:
16446             break;
16447           default:
16448             pp.encoding = encoding_error;
16449             break;
16450         }
16451     }
16452
16453   if (r->reg_flags & RegRex2)
16454     {
16455       if (!cpu_arch_flags.bitfield.cpuapx_f
16456           || flag_code != CODE_64BIT)
16457         return false;
16458
16459       /* Don't update pp when not dealing with insn operands.  */
16460       switch (current_templates.start ? pp.encoding : encoding_egpr)
16461         {
16462         case encoding_default:
16463           pp.encoding = encoding_egpr;
16464           break;
16465         case encoding_egpr:
16466         case encoding_evex:
16467         case encoding_evex512:
16468           break;
16469         default:
16470           pp.encoding = encoding_error;
16471           break;
16472         }
16473     }
16474
16475   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
16476       && (!cpu_arch_flags.bitfield.cpu64
16477           || r->reg_type.bitfield.class != RegCR
16478           || dot_insn ())
16479       && flag_code != CODE_64BIT)
16480     return false;
16481
16482   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
16483       && !intel_syntax)
16484     return false;
16485
16486   return true;
16487 }
16488
16489 /* REG_STRING starts *before* REGISTER_PREFIX.  */
16490
16491 static const reg_entry *
16492 parse_real_register (const char *reg_string, char **end_op)
16493 {
16494   const char *s = reg_string;
16495   char *p;
16496   char reg_name_given[MAX_REG_NAME_SIZE + 1];
16497   const reg_entry *r;
16498
16499   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
16500   if (*s == REGISTER_PREFIX)
16501     ++s;
16502
16503   if (is_space_char (*s))
16504     ++s;
16505
16506   p = reg_name_given;
16507   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
16508     {
16509       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
16510         return (const reg_entry *) NULL;
16511       s++;
16512     }
16513
16514   if (is_part_of_name (*s))
16515     return (const reg_entry *) NULL;
16516
16517   *end_op = (char *) s;
16518
16519   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
16520
16521   /* Handle floating point regs, allowing spaces in the (i) part.  */
16522   if (r == reg_st0)
16523     {
16524       if (!cpu_arch_flags.bitfield.cpu8087
16525           && !cpu_arch_flags.bitfield.cpu287
16526           && !cpu_arch_flags.bitfield.cpu387
16527           && !allow_pseudo_reg)
16528         return (const reg_entry *) NULL;
16529
16530       if (is_space_char (*s))
16531         ++s;
16532       if (*s == '(')
16533         {
16534           ++s;
16535           if (is_space_char (*s))
16536             ++s;
16537           if (*s >= '0' && *s <= '7')
16538             {
16539               int fpr = *s - '0';
16540               ++s;
16541               if (is_space_char (*s))
16542                 ++s;
16543               if (*s == ')')
16544                 {
16545                   *end_op = (char *) s + 1;
16546                   know (r[fpr].reg_num == fpr);
16547                   return r + fpr;
16548                 }
16549             }
16550           /* We have "%st(" then garbage.  */
16551           return (const reg_entry *) NULL;
16552         }
16553     }
16554
16555   return r && check_register (r) ? r : NULL;
16556 }
16557
16558 /* REG_STRING starts *before* REGISTER_PREFIX.  */
16559
16560 static const reg_entry *
16561 parse_register (const char *reg_string, char **end_op)
16562 {
16563   const reg_entry *r;
16564
16565   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
16566     r = parse_real_register (reg_string, end_op);
16567   else
16568     r = NULL;
16569   if (!r)
16570     {
16571       char *save = input_line_pointer;
16572       char *buf = xstrdup (reg_string), *name;
16573       symbolS *symbolP;
16574
16575       input_line_pointer = buf;
16576       get_symbol_name (&name);
16577       symbolP = symbol_find (name);
16578       while (symbolP && symbol_equated_p (symbolP))
16579         {
16580           const expressionS *e = symbol_get_value_expression(symbolP);
16581
16582           if (e->X_add_number)
16583             break;
16584           symbolP = e->X_add_symbol;
16585         }
16586       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
16587         {
16588           const expressionS *e = symbol_get_value_expression (symbolP);
16589
16590           if (e->X_op == O_register)
16591             {
16592               know (e->X_add_number >= 0
16593                     && (valueT) e->X_add_number < i386_regtab_size);
16594               r = i386_regtab + e->X_add_number;
16595               *end_op = (char *) reg_string + (input_line_pointer - buf);
16596             }
16597           if (r && !check_register (r))
16598             {
16599               as_bad (_("register '%s%s' cannot be used here"),
16600                       register_prefix, r->reg_name);
16601               r = &bad_reg;
16602             }
16603         }
16604       input_line_pointer = save;
16605       free (buf);
16606     }
16607   return r;
16608 }
16609
16610 int
16611 i386_parse_name (char *name, expressionS *e, char *nextcharP)
16612 {
16613   const reg_entry *r = NULL;
16614   char *end = input_line_pointer;
16615
16616   /* We only know the terminating character here.  It being double quote could
16617      be the closing one of a quoted symbol name, or an opening one from a
16618      following string (or another quoted symbol name).  Since the latter can't
16619      be valid syntax for anything, bailing in either case is good enough.  */
16620   if (*nextcharP == '"')
16621     return 0;
16622
16623   *end = *nextcharP;
16624   if (*name == REGISTER_PREFIX || allow_naked_reg)
16625     r = parse_real_register (name, &input_line_pointer);
16626   if (r && end <= input_line_pointer)
16627     {
16628       *nextcharP = *input_line_pointer;
16629       *input_line_pointer = 0;
16630       e->X_op = O_register;
16631       e->X_add_number = r - i386_regtab;
16632       return 1;
16633     }
16634   input_line_pointer = end;
16635   *end = 0;
16636   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
16637 }
16638
16639 void
16640 md_operand (expressionS *e)
16641 {
16642   char *end;
16643   const reg_entry *r;
16644
16645   switch (*input_line_pointer)
16646     {
16647     case REGISTER_PREFIX:
16648       r = parse_real_register (input_line_pointer, &end);
16649       if (r)
16650         {
16651           e->X_op = O_register;
16652           e->X_add_number = r - i386_regtab;
16653           input_line_pointer = end;
16654         }
16655       break;
16656
16657     case '[':
16658       gas_assert (intel_syntax);
16659       end = input_line_pointer++;
16660       expression (e);
16661       if (*input_line_pointer == ']')
16662         {
16663           ++input_line_pointer;
16664           e->X_op_symbol = make_expr_symbol (e);
16665           e->X_add_symbol = NULL;
16666           e->X_add_number = 0;
16667           e->X_op = O_index;
16668         }
16669       else
16670         {
16671           e->X_op = O_absent;
16672           input_line_pointer = end;
16673         }
16674       break;
16675     }
16676 }
16677
16678 #ifdef BFD64
16679 /* To maintain consistency with !BFD64 builds of gas record, whether any
16680    (binary) operator was involved in an expression.  As expressions are
16681    evaluated in only 32 bits when !BFD64, we use this to decide whether to
16682    truncate results.  */
16683 bool i386_record_operator (operatorT op,
16684                            const expressionS *left,
16685                            const expressionS *right)
16686 {
16687   if (op == O_absent)
16688     return false;
16689
16690   if (!left)
16691     {
16692       /* Since the expression parser applies unary operators fine to bignum
16693          operands, we don't need to be concerned of respective operands not
16694          fitting in 32 bits.  */
16695       if (right->X_op == O_constant && right->X_unsigned
16696           && !fits_in_unsigned_long (right->X_add_number))
16697         return false;
16698     }
16699   /* This isn't entirely right: The pattern can also result when constant
16700      expressions are folded (e.g. 0xffffffff + 1).  */
16701   else if ((left->X_op == O_constant && left->X_unsigned
16702             && !fits_in_unsigned_long (left->X_add_number))
16703            || (right->X_op == O_constant && right->X_unsigned
16704                && !fits_in_unsigned_long (right->X_add_number)))
16705     expr_mode = expr_large_value;
16706
16707   if (expr_mode != expr_large_value)
16708     expr_mode = expr_operator_present;
16709
16710   return false;
16711 }
16712 #endif
16713 \f
16714 #ifdef OBJ_ELF
16715 const char md_shortopts[] = "kVQ:sqnO::";
16716 #else
16717 const char md_shortopts[] = "qnO::";
16718 #endif
16719
16720 #define OPTION_32 (OPTION_MD_BASE + 0)
16721 #define OPTION_64 (OPTION_MD_BASE + 1)
16722 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
16723 #define OPTION_MARCH (OPTION_MD_BASE + 3)
16724 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
16725 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
16726 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
16727 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
16728 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
16729 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
16730 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
16731 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
16732 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
16733 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
16734 #define OPTION_X32 (OPTION_MD_BASE + 14)
16735 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
16736 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
16737 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
16738 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
16739 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
16740 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
16741 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
16742 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
16743 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
16744 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
16745 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
16746 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
16747 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
16748 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
16749 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
16750 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
16751 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
16752 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
16753 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
16754 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
16755 #define OPTION_MTLS_CHECK (OPTION_MD_BASE + 35)
16756
16757 const struct option md_longopts[] =
16758 {
16759   {"32", no_argument, NULL, OPTION_32},
16760 #if (defined (OBJ_ELF) || defined (TE_PE) || defined (OBJ_MACH_O)) \
16761     && defined (BFD64)
16762   {"64", no_argument, NULL, OPTION_64},
16763 #endif
16764 #ifdef OBJ_ELF
16765 # ifdef BFD64
16766   {"x32", no_argument, NULL, OPTION_X32},
16767 # endif
16768   {"mshared", no_argument, NULL, OPTION_MSHARED},
16769   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
16770 #endif
16771   {"divide", no_argument, NULL, OPTION_DIVIDE},
16772   {"march", required_argument, NULL, OPTION_MARCH},
16773   {"mtune", required_argument, NULL, OPTION_MTUNE},
16774   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
16775   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
16776   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
16777   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
16778   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
16779   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
16780   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
16781   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
16782   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
16783   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
16784   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
16785   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
16786   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
16787 # if defined (TE_PE) || defined (TE_PEP)
16788   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
16789 #endif
16790   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
16791   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
16792   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
16793   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
16794   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
16795   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
16796   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
16797   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
16798   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
16799   {"mlfence-before-indirect-branch", required_argument, NULL,
16800    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
16801   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
16802   {"mamd64", no_argument, NULL, OPTION_MAMD64},
16803   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
16804   {"mtls-check", required_argument, NULL, OPTION_MTLS_CHECK},
16805   {NULL, no_argument, NULL, 0}
16806 };
16807 const size_t md_longopts_size = sizeof (md_longopts);
16808
16809 int
16810 md_parse_option (int c, const char *arg)
16811 {
16812   unsigned int j;
16813   char *arch, *next, *saved, *type;
16814
16815   switch (c)
16816     {
16817     case 'n':
16818       optimize_align_code = 0;
16819       break;
16820
16821     case 'q':
16822       quiet_warnings = 1;
16823       break;
16824
16825 #ifdef OBJ_ELF
16826       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
16827          should be emitted or not.  FIXME: Not implemented.  */
16828     case 'Q':
16829       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
16830         return 0;
16831       break;
16832
16833       /* -V: SVR4 argument to print version ID.  */
16834     case 'V':
16835       print_version_id ();
16836       break;
16837
16838       /* -k: Ignore for FreeBSD compatibility.  */
16839     case 'k':
16840       break;
16841
16842     case 's':
16843       /* -s: On i386 Solaris, this tells the native assembler to use
16844          .stab instead of .stab.excl.  We always use .stab anyhow.  */
16845       break;
16846
16847     case OPTION_MSHARED:
16848       shared = 1;
16849       break;
16850
16851     case OPTION_X86_USED_NOTE:
16852       if (strcasecmp (arg, "yes") == 0)
16853         x86_used_note = 1;
16854       else if (strcasecmp (arg, "no") == 0)
16855         x86_used_note = 0;
16856       else
16857         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
16858       break;
16859 #endif
16860
16861 #ifdef BFD64
16862
16863 #if (defined (OBJ_ELF) || defined (TE_PE) || defined (OBJ_MACH_O))
16864     case OPTION_64:
16865       {
16866         const char **list, **l;
16867
16868         list = bfd_target_list ();
16869         for (l = list; *l != NULL; l++)
16870           if (startswith (*l, "elf64-x86-64")
16871               || strcmp (*l, "coff-x86-64") == 0
16872               || strcmp (*l, "pe-x86-64") == 0
16873               || strcmp (*l, "pei-x86-64") == 0
16874               || strcmp (*l, "mach-o-x86-64") == 0)
16875             {
16876               default_arch = "x86_64";
16877               break;
16878             }
16879         if (*l == NULL)
16880           as_fatal (_("no compiled in support for x86_64"));
16881         free (list);
16882       }
16883       break;
16884 #endif
16885
16886 #ifdef OBJ_ELF
16887     case OPTION_X32:
16888       {
16889         const char **list, **l;
16890
16891         list = bfd_target_list ();
16892         for (l = list; *l != NULL; l++)
16893           if (startswith (*l, "elf32-x86-64"))
16894             {
16895               default_arch = "x86_64:32";
16896               break;
16897             }
16898         if (*l == NULL)
16899           as_fatal (_("no compiled in support for 32bit x86_64"));
16900         free (list);
16901       }
16902       break;
16903 #endif
16904
16905 #endif /* BFD64 */
16906
16907     case OPTION_32:
16908       {
16909         const char **list, **l;
16910
16911         list = bfd_target_list ();
16912         for (l = list; *l != NULL; l++)
16913           if (strstr (*l, "-i386")
16914               || strstr (*l, "-go32"))
16915             {
16916               default_arch = "i386";
16917               break;
16918             }
16919         if (*l == NULL)
16920           as_fatal (_("no compiled in support for ix86"));
16921         free (list);
16922       }
16923       break;
16924
16925     case OPTION_DIVIDE:
16926 #ifdef SVR4_COMMENT_CHARS
16927       {
16928         char *n, *t;
16929         const char *s;
16930
16931         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
16932         t = n;
16933         for (s = i386_comment_chars; *s != '\0'; s++)
16934           if (*s != '/')
16935             *t++ = *s;
16936         *t = '\0';
16937         i386_comment_chars = n;
16938       }
16939 #endif
16940       break;
16941
16942     case OPTION_MARCH:
16943       saved = xstrdup (arg);
16944       arch = saved;
16945       /* Allow -march=+nosse.  */
16946       if (*arch == '+')
16947         arch++;
16948       do
16949         {
16950           char *vsz;
16951
16952           if (*arch == '.')
16953             as_fatal (_("invalid -march= option: `%s'"), arg);
16954           next = strchr (arch, '+');
16955           if (next)
16956             *next++ = '\0';
16957           vsz = strchr (arch, '/');
16958           if (vsz)
16959             *vsz++ = '\0';
16960           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16961             {
16962               if (vsz && cpu_arch[j].vsz != vsz_set)
16963                 continue;
16964
16965               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
16966                   && strcmp (arch, cpu_arch[j].name) == 0)
16967                 {
16968                   /* Processor.  */
16969                   if (! cpu_arch[j].enable.bitfield.cpui386)
16970                     continue;
16971
16972                   cpu_arch_name = cpu_arch[j].name;
16973                   free (cpu_sub_arch_name);
16974                   cpu_sub_arch_name = NULL;
16975                   cpu_arch_flags = cpu_arch[j].enable;
16976                   cpu_arch_isa = cpu_arch[j].type;
16977                   cpu_arch_isa_flags = cpu_arch[j].enable;
16978                   if (!cpu_arch_tune_set)
16979                     cpu_arch_tune = cpu_arch_isa;
16980                   vector_size = VSZ_DEFAULT;
16981                   break;
16982                 }
16983               else if (cpu_arch[j].type == PROCESSOR_NONE
16984                        && strcmp (arch, cpu_arch[j].name) == 0
16985                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
16986                 {
16987                   /* ISA extension.  */
16988                   isa_enable (j);
16989
16990                   switch (cpu_arch[j].vsz)
16991                     {
16992                     default:
16993                       break;
16994
16995                     case vsz_set:
16996                       if (vsz)
16997                         {
16998                           char *end;
16999                           unsigned long val = strtoul (vsz, &end, 0);
17000
17001                           if (*end)
17002                             val = 0;
17003                           switch (val)
17004                             {
17005                             case 512: vector_size = VSZ512; break;
17006                             case 256: vector_size = VSZ256; break;
17007                             case 128: vector_size = VSZ128; break;
17008                             default:
17009                               as_warn (_("Unrecognized vector size specifier ignored"));
17010                               break;
17011                             }
17012                           break;
17013                         }
17014                         /* Fall through.  */
17015                     case vsz_reset:
17016                       vector_size = VSZ_DEFAULT;
17017                       break;
17018                     }
17019
17020                   break;
17021                 }
17022             }
17023
17024           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
17025             {
17026               /* Disable an ISA extension.  */
17027               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
17028                 if (cpu_arch[j].type == PROCESSOR_NONE
17029                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
17030                   {
17031                     isa_disable (j);
17032                     if (cpu_arch[j].vsz == vsz_set)
17033                       vector_size = VSZ_DEFAULT;
17034                     break;
17035                   }
17036             }
17037
17038           if (j >= ARRAY_SIZE (cpu_arch))
17039             as_fatal (_("invalid -march= option: `%s'"), arg);
17040
17041           arch = next;
17042         }
17043       while (next != NULL);
17044       free (saved);
17045       break;
17046
17047     case OPTION_MTUNE:
17048       if (*arg == '.')
17049         as_fatal (_("invalid -mtune= option: `%s'"), arg);
17050       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
17051         {
17052           if (cpu_arch[j].type != PROCESSOR_NONE
17053               && strcmp (arg, cpu_arch[j].name) == 0)
17054             {
17055               cpu_arch_tune_set = 1;
17056               cpu_arch_tune = cpu_arch [j].type;
17057               break;
17058             }
17059         }
17060       if (j >= ARRAY_SIZE (cpu_arch))
17061         as_fatal (_("invalid -mtune= option: `%s'"), arg);
17062       break;
17063
17064     case OPTION_MMNEMONIC:
17065       if (strcasecmp (arg, "att") == 0)
17066         intel_mnemonic = 0;
17067       else if (strcasecmp (arg, "intel") == 0)
17068         intel_mnemonic = 1;
17069       else
17070         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
17071       break;
17072
17073     case OPTION_MSYNTAX:
17074       if (strcasecmp (arg, "att") == 0)
17075         _set_intel_syntax (0);
17076       else if (strcasecmp (arg, "intel") == 0)
17077         _set_intel_syntax (1);
17078       else
17079         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
17080       break;
17081
17082     case OPTION_MINDEX_REG:
17083       allow_index_reg = 1;
17084       break;
17085
17086     case OPTION_MNAKED_REG:
17087       allow_naked_reg = 1;
17088       register_prefix = "";
17089       break;
17090
17091     case OPTION_MSSE2AVX:
17092       sse2avx = 1;
17093       break;
17094
17095     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
17096       use_unaligned_vector_move = 1;
17097       break;
17098
17099     case OPTION_MSSE_CHECK:
17100       if (strcasecmp (arg, "error") == 0)
17101         sse_check = check_error;
17102       else if (strcasecmp (arg, "warning") == 0)
17103         sse_check = check_warning;
17104       else if (strcasecmp (arg, "none") == 0)
17105         sse_check = check_none;
17106       else
17107         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
17108       break;
17109
17110     case OPTION_MOPERAND_CHECK:
17111       if (strcasecmp (arg, "error") == 0)
17112         operand_check = check_error;
17113       else if (strcasecmp (arg, "warning") == 0)
17114         operand_check = check_warning;
17115       else if (strcasecmp (arg, "none") == 0)
17116         operand_check = check_none;
17117       else
17118         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
17119       break;
17120
17121     case OPTION_MAVXSCALAR:
17122       if (strcasecmp (arg, "128") == 0)
17123         avxscalar = vex128;
17124       else if (strcasecmp (arg, "256") == 0)
17125         avxscalar = vex256;
17126       else
17127         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
17128       break;
17129
17130     case OPTION_MVEXWIG:
17131       if (strcmp (arg, "0") == 0)
17132         vexwig = vexw0;
17133       else if (strcmp (arg, "1") == 0)
17134         vexwig = vexw1;
17135       else
17136         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
17137       break;
17138
17139     case OPTION_MADD_BND_PREFIX:
17140       add_bnd_prefix = 1;
17141       break;
17142
17143     case OPTION_MEVEXLIG:
17144       if (strcmp (arg, "128") == 0)
17145         evexlig = evexl128;
17146       else if (strcmp (arg, "256") == 0)
17147         evexlig = evexl256;
17148       else  if (strcmp (arg, "512") == 0)
17149         evexlig = evexl512;
17150       else
17151         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
17152       break;
17153
17154     case OPTION_MEVEXRCIG:
17155       if (strcmp (arg, "rne") == 0)
17156         evexrcig = rne;
17157       else if (strcmp (arg, "rd") == 0)
17158         evexrcig = rd;
17159       else if (strcmp (arg, "ru") == 0)
17160         evexrcig = ru;
17161       else if (strcmp (arg, "rz") == 0)
17162         evexrcig = rz;
17163       else
17164         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
17165       break;
17166
17167     case OPTION_MEVEXWIG:
17168       if (strcmp (arg, "0") == 0)
17169         evexwig = evexw0;
17170       else if (strcmp (arg, "1") == 0)
17171         evexwig = evexw1;
17172       else
17173         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
17174       break;
17175
17176 # if defined (TE_PE) || defined (TE_PEP)
17177     case OPTION_MBIG_OBJ:
17178       use_big_obj = 1;
17179       break;
17180 #endif
17181
17182     case OPTION_MOMIT_LOCK_PREFIX:
17183       if (strcasecmp (arg, "yes") == 0)
17184         omit_lock_prefix = 1;
17185       else if (strcasecmp (arg, "no") == 0)
17186         omit_lock_prefix = 0;
17187       else
17188         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
17189       break;
17190
17191     case OPTION_MFENCE_AS_LOCK_ADD:
17192       if (strcasecmp (arg, "yes") == 0)
17193         avoid_fence = 1;
17194       else if (strcasecmp (arg, "no") == 0)
17195         avoid_fence = 0;
17196       else
17197         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
17198       break;
17199
17200     case OPTION_MLFENCE_AFTER_LOAD:
17201       if (strcasecmp (arg, "yes") == 0)
17202         lfence_after_load = 1;
17203       else if (strcasecmp (arg, "no") == 0)
17204         lfence_after_load = 0;
17205       else
17206         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
17207       break;
17208
17209     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
17210       if (strcasecmp (arg, "all") == 0)
17211         {
17212           lfence_before_indirect_branch = lfence_branch_all;
17213           if (lfence_before_ret == lfence_before_ret_none)
17214             lfence_before_ret = lfence_before_ret_shl;
17215         }
17216       else if (strcasecmp (arg, "memory") == 0)
17217         lfence_before_indirect_branch = lfence_branch_memory;
17218       else if (strcasecmp (arg, "register") == 0)
17219         lfence_before_indirect_branch = lfence_branch_register;
17220       else if (strcasecmp (arg, "none") == 0)
17221         lfence_before_indirect_branch = lfence_branch_none;
17222       else
17223         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
17224                   arg);
17225       break;
17226
17227     case OPTION_MLFENCE_BEFORE_RET:
17228       if (strcasecmp (arg, "or") == 0)
17229         lfence_before_ret = lfence_before_ret_or;
17230       else if (strcasecmp (arg, "not") == 0)
17231         lfence_before_ret = lfence_before_ret_not;
17232       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
17233         lfence_before_ret = lfence_before_ret_shl;
17234       else if (strcasecmp (arg, "none") == 0)
17235         lfence_before_ret = lfence_before_ret_none;
17236       else
17237         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
17238                   arg);
17239       break;
17240
17241     case OPTION_MRELAX_RELOCATIONS:
17242       if (strcasecmp (arg, "yes") == 0)
17243         generate_relax_relocations = 1;
17244       else if (strcasecmp (arg, "no") == 0)
17245         generate_relax_relocations = 0;
17246       else
17247         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
17248       break;
17249
17250     case OPTION_MALIGN_BRANCH_BOUNDARY:
17251       {
17252         char *end;
17253         long int align = strtoul (arg, &end, 0);
17254         if (*end == '\0')
17255           {
17256             if (align == 0)
17257               {
17258                 align_branch_power = 0;
17259                 break;
17260               }
17261             else if (align >= 16)
17262               {
17263                 int align_power;
17264                 for (align_power = 0;
17265                      (align & 1) == 0;
17266                      align >>= 1, align_power++)
17267                   continue;
17268                 /* Limit alignment power to 31.  */
17269                 if (align == 1 && align_power < 32)
17270                   {
17271                     align_branch_power = align_power;
17272                     break;
17273                   }
17274               }
17275           }
17276         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
17277       }
17278       break;
17279
17280     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
17281       {
17282         char *end;
17283         int align = strtoul (arg, &end, 0);
17284         /* Some processors only support 5 prefixes.  */
17285         if (*end == '\0' && align >= 0 && align < 6)
17286           {
17287             align_branch_prefix_size = align;
17288             break;
17289           }
17290         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
17291                   arg);
17292       }
17293       break;
17294
17295     case OPTION_MALIGN_BRANCH:
17296       align_branch = 0;
17297       saved = xstrdup (arg);
17298       type = saved;
17299       do
17300         {
17301           next = strchr (type, '+');
17302           if (next)
17303             *next++ = '\0';
17304           if (strcasecmp (type, "jcc") == 0)
17305             align_branch |= align_branch_jcc_bit;
17306           else if (strcasecmp (type, "fused") == 0)
17307             align_branch |= align_branch_fused_bit;
17308           else if (strcasecmp (type, "jmp") == 0)
17309             align_branch |= align_branch_jmp_bit;
17310           else if (strcasecmp (type, "call") == 0)
17311             align_branch |= align_branch_call_bit;
17312           else if (strcasecmp (type, "ret") == 0)
17313             align_branch |= align_branch_ret_bit;
17314           else if (strcasecmp (type, "indirect") == 0)
17315             align_branch |= align_branch_indirect_bit;
17316           else
17317             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
17318           type = next;
17319         }
17320       while (next != NULL);
17321       free (saved);
17322       break;
17323
17324     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
17325       align_branch_power = 5;
17326       align_branch_prefix_size = 5;
17327       align_branch = (align_branch_jcc_bit
17328                       | align_branch_fused_bit
17329                       | align_branch_jmp_bit);
17330       break;
17331
17332     case OPTION_MAMD64:
17333       isa64 = amd64;
17334       break;
17335
17336     case OPTION_MINTEL64:
17337       isa64 = intel64;
17338       break;
17339
17340     case 'O':
17341       if (arg == NULL)
17342         {
17343           optimize = 1;
17344           /* Turn off -Os.  */
17345           optimize_for_space = 0;
17346         }
17347       else if (*arg == 's')
17348         {
17349           optimize_for_space = 1;
17350           /* Turn on all encoding optimizations.  */
17351           optimize = INT_MAX;
17352         }
17353       else
17354         {
17355           optimize = atoi (arg);
17356           /* Turn off -Os.  */
17357           optimize_for_space = 0;
17358         }
17359       break;
17360     case OPTION_MTLS_CHECK:
17361       if (strcasecmp (arg, "yes") == 0)
17362         tls_check = true;
17363       else if (strcasecmp (arg, "no") == 0)
17364         tls_check = false;
17365       else
17366         as_fatal (_("invalid -mtls-check= option: `%s'"), arg);
17367       break;
17368
17369     default:
17370       return 0;
17371     }
17372   return 1;
17373 }
17374
17375 #define MESSAGE_TEMPLATE \
17376 "                                                                                "
17377
17378 static char *
17379 output_message (FILE *stream, char *p, char *message, char *start,
17380                 int *left_p, const char *name, int len)
17381 {
17382   int size = sizeof (MESSAGE_TEMPLATE);
17383   int left = *left_p;
17384
17385   /* Reserve 2 spaces for ", " or ",\0" */
17386   left -= len + 2;
17387
17388   /* Check if there is any room.  */
17389   if (left >= 0)
17390     {
17391       if (p != start)
17392         {
17393           *p++ = ',';
17394           *p++ = ' ';
17395         }
17396       p = mempcpy (p, name, len);
17397     }
17398   else
17399     {
17400       /* Output the current message now and start a new one.  */
17401       *p++ = ',';
17402       *p = '\0';
17403       fprintf (stream, "%s\n", message);
17404       p = start;
17405       left = size - (start - message) - len - 2;
17406
17407       gas_assert (left >= 0);
17408
17409       p = mempcpy (p, name, len);
17410     }
17411
17412   *left_p = left;
17413   return p;
17414 }
17415
17416 static void
17417 show_arch (FILE *stream, int ext, int check)
17418 {
17419   static char message[] = MESSAGE_TEMPLATE;
17420   char *start = message + 27;
17421   char *p;
17422   int size = sizeof (MESSAGE_TEMPLATE);
17423   int left;
17424   const char *name;
17425   int len;
17426   unsigned int j;
17427
17428   p = start;
17429   left = size - (start - message);
17430
17431   if (!ext && check)
17432     {
17433       p = output_message (stream, p, message, start, &left,
17434                           STRING_COMMA_LEN ("default"));
17435       p = output_message (stream, p, message, start, &left,
17436                           STRING_COMMA_LEN ("push"));
17437       p = output_message (stream, p, message, start, &left,
17438                           STRING_COMMA_LEN ("pop"));
17439     }
17440
17441   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
17442     {
17443       /* Should it be skipped?  */
17444       if (cpu_arch [j].skip)
17445         continue;
17446
17447       name = cpu_arch [j].name;
17448       len = cpu_arch [j].len;
17449       if (cpu_arch[j].type == PROCESSOR_NONE)
17450         {
17451           /* It is an extension.  Skip if we aren't asked to show it.  */
17452           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
17453             continue;
17454         }
17455       else if (ext)
17456         {
17457           /* It is an processor.  Skip if we show only extension.  */
17458           continue;
17459         }
17460       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
17461         {
17462           /* It is an impossible processor - skip.  */
17463           continue;
17464         }
17465
17466       p = output_message (stream, p, message, start, &left, name, len);
17467     }
17468
17469   /* Display disabled extensions.  */
17470   if (ext)
17471     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
17472       {
17473         char *str;
17474
17475         if (cpu_arch[j].type != PROCESSOR_NONE
17476             || !cpu_flags_all_zero (&cpu_arch[j].enable))
17477           continue;
17478         str = xasprintf ("no%s", cpu_arch[j].name);
17479         p = output_message (stream, p, message, start, &left, str,
17480                             strlen (str));
17481         free (str);
17482       }
17483
17484   *p = '\0';
17485   fprintf (stream, "%s\n", message);
17486 }
17487
17488 void
17489 md_show_usage (FILE *stream)
17490 {
17491 #ifdef OBJ_ELF
17492   fprintf (stream, _("\
17493   -Qy, -Qn                ignored\n\
17494   -V                      print assembler version number\n\
17495   -k                      ignored\n"));
17496 #endif
17497   fprintf (stream, _("\
17498   -n                      do not optimize code alignment\n\
17499   -O{012s}                attempt some code optimizations\n\
17500   -q                      quieten some warnings\n"));
17501 #ifdef OBJ_ELF
17502   fprintf (stream, _("\
17503   -s                      ignored\n"));
17504 #endif
17505 #ifdef BFD64
17506 # ifdef OBJ_ELF
17507   fprintf (stream, _("\
17508   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
17509 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
17510   fprintf (stream, _("\
17511   --32/--64               generate 32bit/64bit object\n"));
17512 # endif
17513 #endif
17514 #ifdef SVR4_COMMENT_CHARS
17515   fprintf (stream, _("\
17516   --divide                do not treat `/' as a comment character\n"));
17517 #else
17518   fprintf (stream, _("\
17519   --divide                ignored\n"));
17520 #endif
17521   fprintf (stream, _("\
17522   -march=CPU[,+EXTENSION...]\n\
17523                           generate code for CPU and EXTENSION, CPU is one of:\n"));
17524   show_arch (stream, 0, 1);
17525   fprintf (stream, _("\
17526                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
17527   show_arch (stream, 1, 0);
17528   fprintf (stream, _("\
17529   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
17530   show_arch (stream, 0, 0);
17531   fprintf (stream, _("\
17532   -msse2avx               encode SSE instructions with VEX prefix\n"));
17533   fprintf (stream, _("\
17534   -muse-unaligned-vector-move\n\
17535                           encode aligned vector move as unaligned vector move\n"));
17536   fprintf (stream, _("\
17537   -msse-check=[none|error|warning] (default: none)\n\
17538                           check SSE instructions\n"));
17539   fprintf (stream, _("\
17540   -moperand-check=[none|error|warning] (default: warning)\n\
17541                           check operand combinations for validity\n"));
17542   fprintf (stream, _("\
17543   -mavxscalar=[128|256] (default: 128)\n\
17544                           encode scalar AVX instructions with specific vector\n\
17545                            length\n"));
17546   fprintf (stream, _("\
17547   -mvexwig=[0|1] (default: 0)\n\
17548                           encode VEX instructions with specific VEX.W value\n\
17549                            for VEX.W bit ignored instructions\n"));
17550   fprintf (stream, _("\
17551   -mevexlig=[128|256|512] (default: 128)\n\
17552                           encode scalar EVEX instructions with specific vector\n\
17553                            length\n"));
17554   fprintf (stream, _("\
17555   -mevexwig=[0|1] (default: 0)\n\
17556                           encode EVEX instructions with specific EVEX.W value\n\
17557                            for EVEX.W bit ignored instructions\n"));
17558   fprintf (stream, _("\
17559   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
17560                           encode EVEX instructions with specific EVEX.RC value\n\
17561                            for SAE-only ignored instructions\n"));
17562   fprintf (stream, _("\
17563   -mmnemonic=[att|intel] "));
17564   if (SYSV386_COMPAT)
17565     fprintf (stream, _("(default: att)\n"));
17566   else
17567     fprintf (stream, _("(default: intel)\n"));
17568   fprintf (stream, _("\
17569                           use AT&T/Intel mnemonic (AT&T syntax only)\n"));
17570   fprintf (stream, _("\
17571   -msyntax=[att|intel] (default: att)\n\
17572                           use AT&T/Intel syntax\n"));
17573   fprintf (stream, _("\
17574   -mindex-reg             support pseudo index registers\n"));
17575   fprintf (stream, _("\
17576   -mnaked-reg             don't require `%%' prefix for registers\n"));
17577   fprintf (stream, _("\
17578   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
17579 #ifdef OBJ_ELF
17580   fprintf (stream, _("\
17581   -mshared                disable branch optimization for shared code\n"));
17582   fprintf (stream, _("\
17583   -mx86-used-note=[no|yes] "));
17584   if (DEFAULT_X86_USED_NOTE)
17585     fprintf (stream, _("(default: yes)\n"));
17586   else
17587     fprintf (stream, _("(default: no)\n"));
17588   fprintf (stream, _("\
17589                           generate x86 used ISA and feature properties\n"));
17590 #endif
17591 #if defined (TE_PE) || defined (TE_PEP)
17592   fprintf (stream, _("\
17593   -mbig-obj               generate big object files\n"));
17594 #endif
17595   fprintf (stream, _("\
17596   -momit-lock-prefix=[no|yes] (default: no)\n\
17597                           strip all lock prefixes\n"));
17598   fprintf (stream, _("\
17599   -mfence-as-lock-add=[no|yes] (default: no)\n\
17600                           encode lfence, mfence and sfence as\n\
17601                            lock addl $0x0, (%%{re}sp)\n"));
17602   fprintf (stream, _("\
17603   -mrelax-relocations=[no|yes] "));
17604   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
17605     fprintf (stream, _("(default: yes)\n"));
17606   else
17607     fprintf (stream, _("(default: no)\n"));
17608   fprintf (stream, _("\
17609                           generate relax relocations\n"));
17610 #ifdef OBJ_ELF
17611   fprintf (stream, _("\
17612   -mtls-check=[no|yes] "));
17613   if (DEFAULT_X86_TLS_CHECK)
17614     fprintf (stream, _("(default: yes)\n"));
17615   else
17616     fprintf (stream, _("(default: no)\n"));
17617   fprintf (stream, _("\
17618                           check TLS relocation\n"));
17619 #endif
17620   fprintf (stream, _("\
17621   -malign-branch-boundary=NUM (default: 0)\n\
17622                           align branches within NUM byte boundary\n"));
17623   fprintf (stream, _("\
17624   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
17625                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
17626                            indirect\n\
17627                           specify types of branches to align\n"));
17628   fprintf (stream, _("\
17629   -malign-branch-prefix-size=NUM (default: 5)\n\
17630                           align branches with NUM prefixes per instruction\n"));
17631   fprintf (stream, _("\
17632   -mbranches-within-32B-boundaries\n\
17633                           align branches within 32 byte boundary\n"));
17634   fprintf (stream, _("\
17635   -mlfence-after-load=[no|yes] (default: no)\n\
17636                           generate lfence after load\n"));
17637   fprintf (stream, _("\
17638   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
17639                           generate lfence before indirect near branch\n"));
17640   fprintf (stream, _("\
17641   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
17642                           generate lfence before ret\n"));
17643   fprintf (stream, _("\
17644   -mamd64                 accept only AMD64 ISA [default]\n"));
17645   fprintf (stream, _("\
17646   -mintel64               accept only Intel64 ISA\n"));
17647 }
17648
17649 #if (defined (OBJ_ELF) || defined (TE_PE) || defined (OBJ_MACH_O))
17650
17651 /* Pick the target format to use.  */
17652
17653 const char *
17654 i386_target_format (void)
17655 {
17656   if (startswith (default_arch, "x86_64"))
17657     {
17658       update_code_flag (CODE_64BIT, 1);
17659 #ifdef OBJ_ELF
17660       if (default_arch[6] == '\0')
17661         x86_elf_abi = X86_64_ABI;
17662       else
17663         x86_elf_abi = X86_64_X32_ABI;
17664 #endif
17665     }
17666   else if (!strcmp (default_arch, "i386"))
17667     update_code_flag (CODE_32BIT, 1);
17668   else if (!strcmp (default_arch, "iamcu"))
17669     {
17670       update_code_flag (CODE_32BIT, 1);
17671       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
17672         {
17673           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
17674           cpu_arch_name = "iamcu";
17675           free (cpu_sub_arch_name);
17676           cpu_sub_arch_name = NULL;
17677           cpu_arch_flags = iamcu_flags;
17678           cpu_arch_isa = PROCESSOR_IAMCU;
17679           cpu_arch_isa_flags = iamcu_flags;
17680           if (!cpu_arch_tune_set)
17681             cpu_arch_tune = PROCESSOR_IAMCU;
17682         }
17683       else if (cpu_arch_isa != PROCESSOR_IAMCU)
17684         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
17685                   cpu_arch_name);
17686     }
17687   else
17688     as_fatal (_("unknown architecture"));
17689
17690 #ifdef OBJ_ELF
17691   if (flag_synth_cfi && x86_elf_abi != X86_64_ABI)
17692     as_fatal (_("SCFI is not supported for this ABI"));
17693 #endif
17694
17695   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
17696     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
17697
17698   switch (OUTPUT_FLAVOR)
17699     {
17700 #ifdef TE_PE
17701     case bfd_target_coff_flavour:
17702       if (flag_code == CODE_64BIT)
17703         {
17704           object_64bit = 1;
17705           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
17706         }
17707       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
17708 #endif
17709 #ifdef OBJ_ELF
17710     case bfd_target_elf_flavour:
17711       {
17712         const char *format;
17713
17714         switch (x86_elf_abi)
17715           {
17716           default:
17717             format = ELF_TARGET_FORMAT;
17718 #ifndef TE_SOLARIS
17719             tls_get_addr = "___tls_get_addr";
17720 #endif
17721             break;
17722           case X86_64_ABI:
17723             use_rela_relocations = 1;
17724             object_64bit = 1;
17725 #ifndef TE_SOLARIS
17726             tls_get_addr = "__tls_get_addr";
17727 #endif
17728             format = ELF_TARGET_FORMAT64;
17729             break;
17730           case X86_64_X32_ABI:
17731             use_rela_relocations = 1;
17732             object_64bit = 1;
17733 #ifndef TE_SOLARIS
17734             tls_get_addr = "__tls_get_addr";
17735 #endif
17736             disallow_64bit_reloc = 1;
17737             format = ELF_TARGET_FORMAT32;
17738             break;
17739           }
17740         if (cpu_arch_isa == PROCESSOR_IAMCU)
17741           {
17742             if (x86_elf_abi != I386_ABI)
17743               as_fatal (_("Intel MCU is 32bit only"));
17744             return ELF_TARGET_IAMCU_FORMAT;
17745           }
17746         else
17747           return format;
17748       }
17749 #endif
17750 #if defined (OBJ_MACH_O)
17751     case bfd_target_mach_o_flavour:
17752       if (flag_code == CODE_64BIT)
17753         {
17754           use_rela_relocations = 1;
17755           object_64bit = 1;
17756           return "mach-o-x86-64";
17757         }
17758       else
17759         return "mach-o-i386";
17760 #endif
17761     default:
17762       abort ();
17763       return NULL;
17764     }
17765 }
17766
17767 #endif /* ELF / PE / MACH_O  */
17768 \f
17769 symbolS *
17770 md_undefined_symbol (char *name)
17771 {
17772   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
17773       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
17774       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
17775       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
17776     {
17777       if (!GOT_symbol)
17778         {
17779           if (symbol_find (name))
17780             as_bad (_("GOT already in symbol table"));
17781           GOT_symbol = symbol_new (name, undefined_section,
17782                                    &zero_address_frag, 0);
17783         };
17784       return GOT_symbol;
17785     }
17786   return 0;
17787 }
17788
17789 #ifdef OBJ_AOUT
17790 /* Round up a section size to the appropriate boundary.  */
17791
17792 valueT
17793 md_section_align (segT segment, valueT size)
17794 {
17795   /* For a.out, force the section size to be aligned.  If we don't do
17796      this, BFD will align it for us, but it will not write out the
17797      final bytes of the section.  This may be a bug in BFD, but it is
17798      easier to fix it here since that is how the other a.out targets
17799      work.  */
17800   int align = bfd_section_alignment (segment);
17801
17802   return ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
17803 }
17804 #endif
17805
17806 /* On the i386, PC-relative offsets are relative to the start of the
17807    next instruction.  That is, the address of the offset, plus its
17808    size, since the offset is always the last part of the insn.  */
17809
17810 long
17811 md_pcrel_from (fixS *fixP)
17812 {
17813   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
17814 }
17815
17816 #ifdef OBJ_AOUT
17817
17818 static void
17819 s_bss (int ignore ATTRIBUTE_UNUSED)
17820 {
17821   int temp;
17822
17823   temp = get_absolute_expression ();
17824   subseg_set (bss_section, (subsegT) temp);
17825   demand_empty_rest_of_line ();
17826 }
17827
17828 #endif
17829
17830 /* Remember constant directive.  */
17831
17832 void
17833 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
17834 {
17835   struct last_insn *last_insn
17836     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
17837
17838   if (bfd_section_flags (now_seg) & SEC_CODE)
17839     {
17840       last_insn->kind = last_insn_directive;
17841       last_insn->name = "constant directive";
17842       last_insn->file = as_where (&last_insn->line);
17843     }
17844 }
17845
17846 int
17847 i386_validate_fix (fixS *fixp)
17848 {
17849   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
17850     {
17851       reloc_howto_type *howto;
17852
17853       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
17854       as_bad_where (fixp->fx_file, fixp->fx_line,
17855                     _("invalid %s relocation against register"),
17856                     howto ? howto->name : "<unknown>");
17857       return 0;
17858     }
17859
17860 #ifdef OBJ_ELF
17861   if (fixp->fx_r_type == BFD_RELOC_SIZE32
17862       || fixp->fx_r_type == BFD_RELOC_SIZE64)
17863     return fixp->fx_addsy
17864            && (!S_IS_DEFINED (fixp->fx_addsy)
17865                || S_IS_EXTERNAL (fixp->fx_addsy));
17866
17867   /* BFD_RELOC_X86_64_GOTTPOFF:
17868       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTTPOFF
17869       2. fx_tcbit2 -> BFD_RELOC_X86_64_CODE_6_GOTTPOFF
17870     BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17871       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
17872     BFD_RELOC_32_PCREL:
17873       1. fx_tcbit -> BFD_RELOC_X86_64_GOTPCRELX
17874       2. fx_tcbit2 -> BFD_RELOC_X86_64_REX_GOTPCRELX
17875       3. fx_tcbit3 -> BFD_RELOC_X86_64_CODE_4_GOTPCRELX
17876       4. else -> BFD_RELOC_X86_64_GOTPCREL
17877    */
17878   if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF)
17879     {
17880       if (fixp->fx_tcbit)
17881         fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTTPOFF;
17882       else if (fixp->fx_tcbit2)
17883         fixp->fx_r_type = BFD_RELOC_X86_64_CODE_6_GOTTPOFF;
17884     }
17885   else if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
17886            && fixp->fx_tcbit)
17887     fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC;
17888 #endif
17889
17890   if (fixp->fx_subsy)
17891     {
17892       if (fixp->fx_subsy == GOT_symbol)
17893         {
17894           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
17895             {
17896               if (!object_64bit)
17897                 abort ();
17898 #ifdef OBJ_ELF
17899               if (fixp->fx_tcbit)
17900                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCRELX;
17901               else if (fixp->fx_tcbit2)
17902                 fixp->fx_r_type = BFD_RELOC_X86_64_REX_GOTPCRELX;
17903               else if (fixp->fx_tcbit3)
17904                 fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPCRELX;
17905               else
17906 #endif
17907                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
17908             }
17909           else
17910             {
17911               if (!object_64bit)
17912                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
17913               else
17914                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
17915             }
17916           fixp->fx_subsy = 0;
17917         }
17918     }
17919 #ifdef OBJ_ELF
17920   else
17921     {
17922       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
17923          to section.  Since PLT32 relocation must be against symbols,
17924          turn such PLT32 relocation into PC32 relocation.  NB: We can
17925          turn PLT32 relocation into PC32 relocation only for PC-relative
17926          relocations since non-PC-relative relocations need PLT entries.
17927        */
17928       if (fixp->fx_addsy
17929           && fixp->fx_pcrel
17930           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
17931               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
17932           && symbol_section_p (fixp->fx_addsy))
17933         fixp->fx_r_type = BFD_RELOC_32_PCREL;
17934       if (!object_64bit)
17935         {
17936           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
17937               && fixp->fx_tcbit2)
17938             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
17939         }
17940     }
17941 #endif
17942
17943   return 1;
17944 }
17945
17946 arelent *
17947 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
17948 {
17949   arelent *rel;
17950   bfd_reloc_code_real_type code;
17951
17952   switch (fixp->fx_r_type)
17953     {
17954 #ifdef OBJ_ELF
17955       symbolS *sym;
17956
17957     case BFD_RELOC_SIZE32:
17958     case BFD_RELOC_SIZE64:
17959       if (fixp->fx_addsy
17960           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
17961           && (!fixp->fx_subsy
17962               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
17963         sym = fixp->fx_addsy;
17964       else if (fixp->fx_subsy
17965                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
17966                && (!fixp->fx_addsy
17967                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
17968         sym = fixp->fx_subsy;
17969       else
17970         sym = NULL;
17971       if (sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
17972         {
17973           /* Resolve size relocation against local symbol to size of
17974              the symbol plus addend.  */
17975           valueT value = S_GET_SIZE (sym);
17976
17977           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
17978             value = bfd_section_size (S_GET_SEGMENT (sym));
17979           if (sym == fixp->fx_subsy)
17980             {
17981               value = -value;
17982               if (fixp->fx_addsy)
17983                 value += S_GET_VALUE (fixp->fx_addsy);
17984             }
17985           else if (fixp->fx_subsy)
17986             value -= S_GET_VALUE (fixp->fx_subsy);
17987           value += fixp->fx_offset;
17988           if (fixp->fx_r_type == BFD_RELOC_SIZE32
17989               && object_64bit
17990               && !fits_in_unsigned_long (value))
17991             as_bad_where (fixp->fx_file, fixp->fx_line,
17992                           _("symbol size computation overflow"));
17993           fixp->fx_addsy = NULL;
17994           fixp->fx_subsy = NULL;
17995           md_apply_fix (fixp, (valueT *) &value, NULL);
17996           return NULL;
17997         }
17998       if (!fixp->fx_addsy || fixp->fx_subsy)
17999         {
18000           as_bad_where (fixp->fx_file, fixp->fx_line,
18001                         "unsupported expression involving @size");
18002           return NULL;
18003         }
18004 #endif
18005       /* Fall through.  */
18006
18007     case BFD_RELOC_X86_64_PLT32:
18008     case BFD_RELOC_X86_64_GOT32:
18009     case BFD_RELOC_X86_64_GOTPCREL:
18010     case BFD_RELOC_X86_64_GOTPCRELX:
18011     case BFD_RELOC_X86_64_REX_GOTPCRELX:
18012     case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
18013     case BFD_RELOC_386_PLT32:
18014     case BFD_RELOC_386_GOT32:
18015     case BFD_RELOC_386_GOT32X:
18016     case BFD_RELOC_386_GOTOFF:
18017     case BFD_RELOC_386_GOTPC:
18018     case BFD_RELOC_386_TLS_GD:
18019     case BFD_RELOC_386_TLS_LDM:
18020     case BFD_RELOC_386_TLS_LDO_32:
18021     case BFD_RELOC_386_TLS_IE_32:
18022     case BFD_RELOC_386_TLS_IE:
18023     case BFD_RELOC_386_TLS_GOTIE:
18024     case BFD_RELOC_386_TLS_LE_32:
18025     case BFD_RELOC_386_TLS_LE:
18026     case BFD_RELOC_386_TLS_GOTDESC:
18027     case BFD_RELOC_386_TLS_DESC_CALL:
18028     case BFD_RELOC_X86_64_TLSGD:
18029     case BFD_RELOC_X86_64_TLSLD:
18030     case BFD_RELOC_X86_64_DTPOFF32:
18031     case BFD_RELOC_X86_64_DTPOFF64:
18032     case BFD_RELOC_X86_64_GOTTPOFF:
18033     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
18034     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
18035     case BFD_RELOC_X86_64_TPOFF32:
18036     case BFD_RELOC_X86_64_TPOFF64:
18037     case BFD_RELOC_X86_64_GOTOFF64:
18038     case BFD_RELOC_X86_64_GOTPC32:
18039     case BFD_RELOC_X86_64_GOT64:
18040     case BFD_RELOC_X86_64_GOTPCREL64:
18041     case BFD_RELOC_X86_64_GOTPC64:
18042     case BFD_RELOC_X86_64_GOTPLT64:
18043     case BFD_RELOC_X86_64_PLTOFF64:
18044     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
18045     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
18046     case BFD_RELOC_X86_64_TLSDESC_CALL:
18047     case BFD_RELOC_RVA:
18048     case BFD_RELOC_VTABLE_ENTRY:
18049     case BFD_RELOC_VTABLE_INHERIT:
18050 #ifdef TE_PE
18051     case BFD_RELOC_32_SECREL:
18052     case BFD_RELOC_16_SECIDX:
18053 #endif
18054       code = fixp->fx_r_type;
18055       break;
18056     case BFD_RELOC_X86_64_32S:
18057       if (!fixp->fx_pcrel)
18058         {
18059           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
18060           code = fixp->fx_r_type;
18061           break;
18062         }
18063       /* Fall through.  */
18064     default:
18065       if (fixp->fx_pcrel)
18066         {
18067           switch (fixp->fx_size)
18068             {
18069             default:
18070               as_bad_where (fixp->fx_file, fixp->fx_line,
18071                             _("can not do %d byte pc-relative relocation"),
18072                             fixp->fx_size);
18073               code = BFD_RELOC_32_PCREL;
18074               break;
18075             case 1: code = BFD_RELOC_8_PCREL;  break;
18076             case 2: code = BFD_RELOC_16_PCREL; break;
18077             case 4: code = BFD_RELOC_32_PCREL; break;
18078 #ifdef BFD64
18079             case 8: code = BFD_RELOC_64_PCREL; break;
18080 #endif
18081             }
18082         }
18083       else
18084         {
18085           switch (fixp->fx_size)
18086             {
18087             default:
18088               as_bad_where (fixp->fx_file, fixp->fx_line,
18089                             _("can not do %d byte relocation"),
18090                             fixp->fx_size);
18091               code = BFD_RELOC_32;
18092               break;
18093             case 1: code = BFD_RELOC_8;  break;
18094             case 2: code = BFD_RELOC_16; break;
18095             case 4: code = BFD_RELOC_32; break;
18096 #ifdef BFD64
18097             case 8: code = BFD_RELOC_64; break;
18098 #endif
18099             }
18100         }
18101       break;
18102     }
18103
18104   if ((code == BFD_RELOC_32
18105        || code == BFD_RELOC_32_PCREL
18106        || code == BFD_RELOC_X86_64_32S)
18107       && GOT_symbol
18108       && fixp->fx_addsy == GOT_symbol)
18109     {
18110       if (!object_64bit)
18111         code = BFD_RELOC_386_GOTPC;
18112       else
18113         code = BFD_RELOC_X86_64_GOTPC32;
18114     }
18115   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
18116       && GOT_symbol
18117       && fixp->fx_addsy == GOT_symbol)
18118     {
18119       code = BFD_RELOC_X86_64_GOTPC64;
18120     }
18121
18122   rel = XNEW (arelent);
18123   rel->sym_ptr_ptr = XNEW (asymbol *);
18124   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
18125
18126   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
18127
18128   if (!use_rela_relocations)
18129     {
18130       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
18131          vtable entry to be used in the relocation's section offset.  */
18132       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
18133         rel->address = fixp->fx_offset;
18134 #if defined (OBJ_COFF) && defined (TE_PE)
18135       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
18136         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
18137       else
18138 #endif
18139       rel->addend = 0;
18140     }
18141   /* Use the rela in 64bit mode.  */
18142   else
18143     {
18144       if (disallow_64bit_reloc)
18145         switch (code)
18146           {
18147           case BFD_RELOC_X86_64_DTPOFF64:
18148           case BFD_RELOC_X86_64_TPOFF64:
18149           case BFD_RELOC_64_PCREL:
18150           case BFD_RELOC_X86_64_GOTOFF64:
18151           case BFD_RELOC_X86_64_GOT64:
18152           case BFD_RELOC_X86_64_GOTPCREL64:
18153           case BFD_RELOC_X86_64_GOTPC64:
18154           case BFD_RELOC_X86_64_GOTPLT64:
18155           case BFD_RELOC_X86_64_PLTOFF64:
18156             as_bad_where (fixp->fx_file, fixp->fx_line,
18157                           _("cannot represent relocation type %s in x32 mode"),
18158                           bfd_get_reloc_code_name (code));
18159             break;
18160           default:
18161             break;
18162           }
18163
18164       if (!fixp->fx_pcrel)
18165         rel->addend = fixp->fx_offset;
18166       else
18167         switch (code)
18168           {
18169           case BFD_RELOC_X86_64_PLT32:
18170           case BFD_RELOC_X86_64_GOT32:
18171           case BFD_RELOC_X86_64_GOTPCREL:
18172           case BFD_RELOC_X86_64_GOTPCRELX:
18173           case BFD_RELOC_X86_64_REX_GOTPCRELX:
18174           case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
18175           case BFD_RELOC_X86_64_TLSGD:
18176           case BFD_RELOC_X86_64_TLSLD:
18177           case BFD_RELOC_X86_64_GOTTPOFF:
18178           case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
18179           case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
18180           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
18181           case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
18182           case BFD_RELOC_X86_64_TLSDESC_CALL:
18183             rel->addend = fixp->fx_offset - fixp->fx_size;
18184             break;
18185           default:
18186             rel->addend = (section->vma
18187                            - fixp->fx_size
18188                            + fixp->fx_addnumber
18189                            + md_pcrel_from (fixp));
18190             break;
18191           }
18192     }
18193
18194   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
18195   if (rel->howto == NULL)
18196     {
18197       as_bad_where (fixp->fx_file, fixp->fx_line,
18198                     _("cannot represent relocation type %s"),
18199                     bfd_get_reloc_code_name (code));
18200       /* Set howto to a garbage value so that we can keep going.  */
18201       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
18202       gas_assert (rel->howto != NULL);
18203     }
18204
18205   return rel;
18206 }
18207
18208 #include "tc-i386-intel.c"
18209
18210 void
18211 tc_x86_parse_to_dw2regnum (expressionS *exp)
18212 {
18213   int saved_naked_reg;
18214   char saved_register_dot;
18215
18216   saved_naked_reg = allow_naked_reg;
18217   allow_naked_reg = 1;
18218   saved_register_dot = register_chars['.'];
18219   register_chars['.'] = '.';
18220   allow_pseudo_reg = 1;
18221   expression_and_evaluate (exp);
18222   allow_pseudo_reg = 0;
18223   register_chars['.'] = saved_register_dot;
18224   allow_naked_reg = saved_naked_reg;
18225
18226   if (exp->X_op == O_register && exp->X_add_number >= 0)
18227     {
18228       exp->X_op = O_illegal;
18229       if ((addressT) exp->X_add_number < i386_regtab_size)
18230         {
18231           exp->X_add_number = i386_regtab[exp->X_add_number]
18232                               .dw2_regnum[object_64bit];
18233           if (exp->X_add_number != Dw2Inval)
18234             exp->X_op = O_constant;
18235         }
18236     }
18237 }
18238
18239 void
18240 tc_x86_frame_initial_instructions (void)
18241 {
18242   cfi_add_CFA_def_cfa (object_64bit ? REG_SP : 4, -x86_cie_data_alignment);
18243   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
18244 }
18245
18246 int
18247 x86_dwarf2_addr_size (void)
18248 {
18249 #ifdef OBJ_ELF
18250   if (x86_elf_abi == X86_64_X32_ABI)
18251     return 4;
18252 #endif
18253   return bfd_arch_bits_per_address (stdoutput) / 8;
18254 }
18255
18256 #ifdef TE_PE
18257 void
18258 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
18259 {
18260   expressionS exp;
18261
18262   exp.X_op = O_secrel;
18263   exp.X_add_symbol = symbol;
18264   exp.X_add_number = 0;
18265   emit_expr (&exp, size);
18266 }
18267 #endif
18268
18269 #ifdef OBJ_ELF
18270 int
18271 i386_elf_section_type (const char *str, size_t len)
18272 {
18273   if (flag_code == CODE_64BIT
18274       && len == sizeof ("unwind") - 1
18275       && startswith (str, "unwind"))
18276     return SHT_X86_64_UNWIND;
18277
18278   return -1;
18279 }
18280
18281 void
18282 i386_elf_section_change_hook (void)
18283 {
18284   struct i386_segment_info *info = &seg_info(now_seg)->tc_segment_info_data;
18285   struct i386_segment_info *curr, *prev;
18286
18287   if (info->subseg == now_subseg)
18288     return;
18289
18290   /* Find the (or make a) list entry to save state into.  */
18291   for (prev = info; (curr = prev->next) != NULL; prev = curr)
18292     if (curr->subseg == info->subseg)
18293       break;
18294   if (!curr)
18295     {
18296       curr = notes_alloc (sizeof (*curr));
18297       curr->subseg = info->subseg;
18298       curr->next = NULL;
18299       prev->next = curr;
18300     }
18301   curr->last_insn = info->last_insn;
18302
18303   /* Find the list entry to load state from.  */
18304   for (curr = info->next; curr; curr = curr->next)
18305     if (curr->subseg == now_subseg)
18306       break;
18307   if (curr)
18308     info->last_insn = curr->last_insn;
18309   else
18310     memset (&info->last_insn, 0, sizeof (info->last_insn));
18311   info->subseg = now_subseg;
18312 }
18313
18314 #ifdef TE_SOLARIS
18315 void
18316 i386_solaris_fix_up_eh_frame (segT sec)
18317 {
18318   if (flag_code == CODE_64BIT)
18319     elf_section_type (sec) = SHT_X86_64_UNWIND;
18320 }
18321 #endif
18322
18323 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
18324
18325 bfd_vma
18326 x86_64_section_letter (int letter, const char **ptr_msg)
18327 {
18328   if (flag_code == CODE_64BIT)
18329     {
18330       if (letter == 'l')
18331         return SHF_X86_64_LARGE;
18332
18333       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
18334     }
18335   else
18336     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
18337   return -1;
18338 }
18339
18340 static void
18341 handle_large_common (int small ATTRIBUTE_UNUSED)
18342 {
18343   if (flag_code != CODE_64BIT)
18344     {
18345       s_comm_internal (0, elf_common_parse);
18346       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
18347     }
18348   else
18349     {
18350       static segT lbss_section;
18351       asection *saved_com_section_ptr = elf_com_section_ptr;
18352       asection *saved_bss_section = bss_section;
18353
18354       if (lbss_section == NULL)
18355         {
18356           flagword applicable;
18357           segT seg = now_seg;
18358           subsegT subseg = now_subseg;
18359
18360           /* The .lbss section is for local .largecomm symbols.  */
18361           lbss_section = subseg_new (".lbss", 0);
18362           applicable = bfd_applicable_section_flags (stdoutput);
18363           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
18364           seg_info (lbss_section)->bss = 1;
18365
18366           subseg_set (seg, subseg);
18367         }
18368
18369       elf_com_section_ptr = &_bfd_elf_large_com_section;
18370       bss_section = lbss_section;
18371
18372       s_comm_internal (0, elf_common_parse);
18373
18374       elf_com_section_ptr = saved_com_section_ptr;
18375       bss_section = saved_bss_section;
18376     }
18377 }
18378 #endif /* OBJ_ELF */