lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20
  21 namespace llvm {
  22   class X86Subtarget;
  23   class X86TargetMachine;
  24
  25   namespace X86ISD {
  26     // X86 Specific DAG Nodes
  27     enum NodeType : unsigned {
  28       // Start the numbering where the builtin ops leave off.
  29       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  30
  31       /// Bit scan forward.
  32       BSF,
  33       /// Bit scan reverse.
  34       BSR,
  35
  36       /// Double shift instructions. These correspond to
  37       /// X86::SHLDxx and X86::SHRDxx instructions.
  38       SHLD,
  39       SHRD,
  40
  41       /// Bitwise logical AND of floating point values. This corresponds
  42       /// to X86::ANDPS or X86::ANDPD.
  43       FAND,
  44
  45       /// Bitwise logical OR of floating point values. This corresponds
  46       /// to X86::ORPS or X86::ORPD.
  47       FOR,
  48
  49       /// Bitwise logical XOR of floating point values. This corresponds
  50       /// to X86::XORPS or X86::XORPD.
  51       FXOR,
  52
  53       ///  Bitwise logical ANDNOT of floating point values. This
  54       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  55       FANDN,
  56
  57       /// These operations represent an abstract X86 call
  58       /// instruction, which includes a bunch of information.  In particular the
  59       /// operands of these node are:
  60       ///
  61       ///     #0 - The incoming token chain
  62       ///     #1 - The callee
  63       ///     #2 - The number of arg bytes the caller pushes on the stack.
  64       ///     #3 - The number of arg bytes the callee pops off the stack.
  65       ///     #4 - The value to pass in AL/AX/EAX (optional)
  66       ///     #5 - The value to pass in DL/DX/EDX (optional)
  67       ///
  68       /// The result values of these nodes are:
  69       ///
  70       ///     #0 - The outgoing token chain
  71       ///     #1 - The first register result value (optional)
  72       ///     #2 - The second register result value (optional)
  73       ///
  74       CALL,
  75
  76       /// Same as call except it adds the NoTrack prefix.
  77       NT_CALL,
  78
  79       /// X86 compare and logical compare instructions.
  80       CMP, COMI, UCOMI,
  81
  82       /// X86 bit-test instructions.
  83       BT,
  84
  85       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  86       /// operand, usually produced by a CMP instruction.
  87       SETCC,
  88
  89       /// X86 Select
  90       SELECTS,
  91
  92       // Same as SETCC except it's materialized with a sbb and the value is all
  93       // one's or all zero's.
  94       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  95
  96       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
  97       /// Operands are two FP values to compare; result is a mask of
  98       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
  99       FSETCC,
 100
 101       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 102       /// and a version with SAE.
 103       FSETCCM, FSETCCM_SAE,
 104
 105       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 106       /// to select from. Operand 2 is the condition code, and operand 3 is the
 107       /// flag operand produced by a CMP or TEST instruction.
 108       CMOV,
 109
 110       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 111       /// is the block to branch if condition is true, operand 2 is the
 112       /// condition code, and operand 3 is the flag operand produced by a CMP
 113       /// or TEST instruction.
 114       BRCOND,
 115
 116       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 117       /// operand 1 is the target address.
 118       NT_BRIND,
 119
 120       /// Return with a flag operand. Operand 0 is the chain operand, operand
 121       /// 1 is the number of bytes of stack to pop.
 122       RET_FLAG,
 123
 124       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 125       IRET,
 126
 127       /// Repeat fill, corresponds to X86::REP_STOSx.
 128       REP_STOS,
 129
 130       /// Repeat move, corresponds to X86::REP_MOVSx.
 131       REP_MOVS,
 132
 133       /// On Darwin, this node represents the result of the popl
 134       /// at function entry, used for PIC code.
 135       GlobalBaseReg,
 136
 137       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 138       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 139       /// MCSymbol and TargetBlockAddress.
 140       Wrapper,
 141
 142       /// Special wrapper used under X86-64 PIC mode for RIP
 143       /// relative displacements.
 144       WrapperRIP,
 145
 146       /// Copies a 64-bit value from an MMX vector to the low word
 147       /// of an XMM vector, with the high word zero filled.
 148       MOVQ2DQ,
 149
 150       /// Copies a 64-bit value from the low word of an XMM vector
 151       /// to an MMX vector.
 152       MOVDQ2Q,
 153
 154       /// Copies a 32-bit value from the low word of a MMX
 155       /// vector to a GPR.
 156       MMX_MOVD2W,
 157
 158       /// Copies a GPR into the low 32-bit word of a MMX vector
 159       /// and zero out the high word.
 160       MMX_MOVW2D,
 161
 162       /// Extract an 8-bit value from a vector and zero extend it to
 163       /// i32, corresponds to X86::PEXTRB.
 164       PEXTRB,
 165
 166       /// Extract a 16-bit value from a vector and zero extend it to
 167       /// i32, corresponds to X86::PEXTRW.
 168       PEXTRW,
 169
 170       /// Insert any element of a 4 x float vector into any element
 171       /// of a destination 4 x floatvector.
 172       INSERTPS,
 173
 174       /// Insert the lower 8-bits of a 32-bit value to a vector,
 175       /// corresponds to X86::PINSRB.
 176       PINSRB,
 177
 178       /// Insert the lower 16-bits of a 32-bit value to a vector,
 179       /// corresponds to X86::PINSRW.
 180       PINSRW,
 181
 182       /// Shuffle 16 8-bit values within a vector.
 183       PSHUFB,
 184
 185       /// Compute Sum of Absolute Differences.
 186       PSADBW,
 187       /// Compute Double Block Packed Sum-Absolute-Differences
 188       DBPSADBW,
 189
 190       /// Bitwise Logical AND NOT of Packed FP values.
 191       ANDNP,
 192
 193       /// Blend where the selector is an immediate.
 194       BLENDI,
 195
 196       /// Dynamic (non-constant condition) vector blend where only the sign bits
 197       /// of the condition elements are used. This is used to enforce that the
 198       /// condition mask is not valid for generic VSELECT optimizations. This
 199       /// is also used to implement the intrinsics.
 200       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 201       BLENDV,
 202
 203       /// Combined add and sub on an FP vector.
 204       ADDSUB,
 205
 206       //  FP vector ops with rounding mode.
 207       FADD_RND, FADDS, FADDS_RND,
 208       FSUB_RND, FSUBS, FSUBS_RND,
 209       FMUL_RND, FMULS, FMULS_RND,
 210       FDIV_RND, FDIVS, FDIVS_RND,
 211       FMAX_SAE, FMAXS_SAE,
 212       FMIN_SAE, FMINS_SAE,
 213       FSQRT_RND, FSQRTS, FSQRTS_RND,
 214
 215       // FP vector get exponent.
 216       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
 217       // Extract Normalized Mantissas.
 218       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
 219       // FP Scale.
 220       SCALEF, SCALEF_RND,
 221       SCALEFS, SCALEFS_RND,
 222
 223       // Unsigned Integer average.
 224       AVG,
 225
 226       /// Integer horizontal add/sub.
 227       HADD,
 228       HSUB,
 229
 230       /// Floating point horizontal add/sub.
 231       FHADD,
 232       FHSUB,
 233
 234       // Detect Conflicts Within a Vector
 235       CONFLICT,
 236
 237       /// Floating point max and min.
 238       FMAX, FMIN,
 239
 240       /// Commutative FMIN and FMAX.
 241       FMAXC, FMINC,
 242
 243       /// Scalar intrinsic floating point max and min.
 244       FMAXS, FMINS,
 245
 246       /// Floating point reciprocal-sqrt and reciprocal approximation.
 247       /// Note that these typically require refinement
 248       /// in order to obtain suitable precision.
 249       FRSQRT, FRCP,
 250
 251       // AVX-512 reciprocal approximations with a little more precision.
 252       RSQRT14, RSQRT14S, RCP14, RCP14S,
 253
 254       // Thread Local Storage.
 255       TLSADDR,
 256
 257       // Thread Local Storage. A call to get the start address
 258       // of the TLS block for the current module.
 259       TLSBASEADDR,
 260
 261       // Thread Local Storage.  When calling to an OS provided
 262       // thunk at the address from an earlier relocation.
 263       TLSCALL,
 264
 265       // Exception Handling helpers.
 266       EH_RETURN,
 267
 268       // SjLj exception handling setjmp.
 269       EH_SJLJ_SETJMP,
 270
 271       // SjLj exception handling longjmp.
 272       EH_SJLJ_LONGJMP,
 273
 274       // SjLj exception handling dispatch.
 275       EH_SJLJ_SETUP_DISPATCH,
 276
 277       /// Tail call return. See X86TargetLowering::LowerCall for
 278       /// the list of operands.
 279       TC_RETURN,
 280
 281       // Vector move to low scalar and zero higher vector elements.
 282       VZEXT_MOVL,
 283
 284       // Vector integer truncate.
 285       VTRUNC,
 286       // Vector integer truncate with unsigned/signed saturation.
 287       VTRUNCUS, VTRUNCS,
 288
 289       // Masked version of the above. Used when less than a 128-bit result is
 290       // produced since the mask only applies to the lower elements and can't
 291       // be represented by a select.
 292       // SRC, PASSTHRU, MASK
 293       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 294
 295       // Vector FP extend.
 296       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 297
 298       // Vector FP round.
 299       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 300
 301       // Masked version of above. Used for v2f64->v4f32.
 302       // SRC, PASSTHRU, MASK
 303       VMFPROUND,
 304
 305       // 128-bit vector logical left / right shift
 306       VSHLDQ, VSRLDQ,
 307
 308       // Vector shift elements
 309       VSHL, VSRL, VSRA,
 310
 311       // Vector variable shift
 312       VSHLV, VSRLV, VSRAV,
 313
 314       // Vector shift elements by immediate
 315       VSHLI, VSRLI, VSRAI,
 316
 317       // Shifts of mask registers.
 318       KSHIFTL, KSHIFTR,
 319
 320       // Bit rotate by immediate
 321       VROTLI, VROTRI,
 322
 323       // Vector packed double/float comparison.
 324       CMPP,
 325
 326       // Vector integer comparisons.
 327       PCMPEQ, PCMPGT,
 328
 329       // v8i16 Horizontal minimum and position.
 330       PHMINPOS,
 331
 332       MULTISHIFT,
 333
 334       /// Vector comparison generating mask bits for fp and
 335       /// integer signed and unsigned data types.
 336       CMPM,
 337       // Vector comparison with SAE for FP values
 338       CMPM_SAE,
 339
 340       // Arithmetic operations with FLAGS results.
 341       ADD, SUB, ADC, SBB, SMUL, UMUL,
 342       OR, XOR, AND,
 343
 344       // Bit field extract.
 345       BEXTR,
 346
 347       // Zero High Bits Starting with Specified Bit Position.
 348       BZHI,
 349
 350       // X86-specific multiply by immediate.
 351       MUL_IMM,
 352
 353       // Vector sign bit extraction.
 354       MOVMSK,
 355
 356       // Vector bitwise comparisons.
 357       PTEST,
 358
 359       // Vector packed fp sign bitwise comparisons.
 360       TESTP,
 361
 362       // OR/AND test for masks.
 363       KORTEST,
 364       KTEST,
 365
 366       // ADD for masks.
 367       KADD,
 368
 369       // Several flavors of instructions with vector shuffle behaviors.
 370       // Saturated signed/unnsigned packing.
 371       PACKSS,
 372       PACKUS,
 373       // Intra-lane alignr.
 374       PALIGNR,
 375       // AVX512 inter-lane alignr.
 376       VALIGN,
 377       PSHUFD,
 378       PSHUFHW,
 379       PSHUFLW,
 380       SHUFP,
 381       // VBMI2 Concat & Shift.
 382       VSHLD,
 383       VSHRD,
 384       VSHLDV,
 385       VSHRDV,
 386       //Shuffle Packed Values at 128-bit granularity.
 387       SHUF128,
 388       MOVDDUP,
 389       MOVSHDUP,
 390       MOVSLDUP,
 391       MOVLHPS,
 392       MOVHLPS,
 393       MOVSD,
 394       MOVSS,
 395       UNPCKL,
 396       UNPCKH,
 397       VPERMILPV,
 398       VPERMILPI,
 399       VPERMI,
 400       VPERM2X128,
 401
 402       // Variable Permute (VPERM).
 403       // Res = VPERMV MaskV, V0
 404       VPERMV,
 405
 406       // 3-op Variable Permute (VPERMT2).
 407       // Res = VPERMV3 V0, MaskV, V1
 408       VPERMV3,
 409
 410       // Bitwise ternary logic.
 411       VPTERNLOG,
 412       // Fix Up Special Packed Float32/64 values.
 413       VFIXUPIMM, VFIXUPIMM_SAE,
 414       VFIXUPIMMS, VFIXUPIMMS_SAE,
 415       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 416       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
 417       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 418       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
 419       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 420       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 421       // scaling part of the immediate.
 422       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
 423       // Tests Types Of a FP Values for packed types.
 424       VFPCLASS,
 425       // Tests Types Of a FP Values for scalar types.
 426       VFPCLASSS,
 427
 428       // Broadcast (splat) scalar or element 0 of a vector. If the operand is
 429       // a vector, this node may change the vector length as part of the splat.
 430       VBROADCAST,
 431       // Broadcast mask to vector.
 432       VBROADCASTM,
 433       // Broadcast subvector to vector.
 434       SUBV_BROADCAST,
 435
 436       /// SSE4A Extraction and Insertion.
 437       EXTRQI, INSERTQI,
 438
 439       // XOP arithmetic/logical shifts.
 440       VPSHA, VPSHL,
 441       // XOP signed/unsigned integer comparisons.
 442       VPCOM, VPCOMU,
 443       // XOP packed permute bytes.
 444       VPPERM,
 445       // XOP two source permutation.
 446       VPERMIL2,
 447
 448       // Vector multiply packed unsigned doubleword integers.
 449       PMULUDQ,
 450       // Vector multiply packed signed doubleword integers.
 451       PMULDQ,
 452       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 453       MULHRS,
 454
 455       // Multiply and Add Packed Integers.
 456       VPMADDUBSW, VPMADDWD,
 457
 458       // AVX512IFMA multiply and add.
 459       // NOTE: These are different than the instruction and perform
 460       // op0 x op1 + op2.
 461       VPMADD52L, VPMADD52H,
 462
 463       // VNNI
 464       VPDPBUSD,
 465       VPDPBUSDS,
 466       VPDPWSSD,
 467       VPDPWSSDS,
 468
 469       // FMA nodes.
 470       // We use the target independent ISD::FMA for the non-inverted case.
 471       FNMADD,
 472       FMSUB,
 473       FNMSUB,
 474       FMADDSUB,
 475       FMSUBADD,
 476
 477       // FMA with rounding mode.
 478       FMADD_RND,
 479       FNMADD_RND,
 480       FMSUB_RND,
 481       FNMSUB_RND,
 482       FMADDSUB_RND,
 483       FMSUBADD_RND,
 484
 485       // Compress and expand.
 486       COMPRESS,
 487       EXPAND,
 488
 489       // Bits shuffle
 490       VPSHUFBITQMB,
 491
 492       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 493       SINT_TO_FP_RND, UINT_TO_FP_RND,
 494       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
 495       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 496
 497       // Vector float/double to signed/unsigned integer.
 498       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 499       // Scalar float/double to signed/unsigned integer.
 500       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 501
 502       // Vector float/double to signed/unsigned integer with truncation.
 503       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
 504       // Scalar float/double to signed/unsigned integer with truncation.
 505       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 506
 507       // Vector signed/unsigned integer to float/double.
 508       CVTSI2P, CVTUI2P,
 509
 510       // Masked versions of above. Used for v2f64->v4f32.
 511       // SRC, PASSTHRU, MASK
 512       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 513       MCVTSI2P, MCVTUI2P,
 514
 515       // Vector float to bfloat16.
 516       // Convert TWO packed single data to one packed BF16 data
 517       CVTNE2PS2BF16,
 518       // Convert packed single data to packed BF16 data
 519       CVTNEPS2BF16,
 520       // Masked version of above.
 521       // SRC, PASSTHRU, MASK
 522       MCVTNEPS2BF16,
 523
 524       // Dot product of BF16 pairs to accumulated into
 525       // packed single precision.
 526       DPBF16PS,
 527
 528       // Save xmm argument registers to the stack, according to %al. An operator
 529       // is needed so that this can be expanded with control flow.
 530       VASTART_SAVE_XMM_REGS,
 531
 532       // Windows's _chkstk call to do stack probing.
 533       WIN_ALLOCA,
 534
 535       // For allocating variable amounts of stack space when using
 536       // segmented stacks. Check if the current stacklet has enough space, and
 537       // falls back to heap allocation if not.
 538       SEG_ALLOCA,
 539
 540       // Memory barriers.
 541       MEMBARRIER,
 542       MFENCE,
 543
 544       // Store FP status word into i16 register.
 545       FNSTSW16r,
 546
 547       // Store contents of %ah into %eflags.
 548       SAHF,
 549
 550       // Get a random integer and indicate whether it is valid in CF.
 551       RDRAND,
 552
 553       // Get a NIST SP800-90B & C compliant random integer and
 554       // indicate whether it is valid in CF.
 555       RDSEED,
 556
 557       // Protection keys
 558       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 559       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 560       // value for ECX.
 561       RDPKRU, WRPKRU,
 562
 563       // SSE42 string comparisons.
 564       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 565       // will emit one or two instructions based on which results are used. If
 566       // flags and index/mask this allows us to use a single instruction since
 567       // we won't have to pick and opcode for flags. Instead we can rely on the
 568       // DAG to CSE everything and decide at isel.
 569       PCMPISTR,
 570       PCMPESTR,
 571
 572       // Test if in transactional execution.
 573       XTEST,
 574
 575       // ERI instructions.
 576       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
 577       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 578
 579       // Conversions between float and half-float.
 580       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 581
 582       // Masked version of above.
 583       // SRC, RND, PASSTHRU, MASK
 584       MCVTPS2PH,
 585
 586       // Galois Field Arithmetic Instructions
 587       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 588
 589       // LWP insert record.
 590       LWPINS,
 591
 592       // User level wait
 593       UMWAIT, TPAUSE,
 594
 595       // Enqueue Stores Instructions
 596       ENQCMD, ENQCMDS,
 597
 598       // For avx512-vp2intersect
 599       VP2INTERSECT,
 600
 601       // Compare and swap.
 602       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 603       LCMPXCHG8_DAG,
 604       LCMPXCHG16_DAG,
 605       LCMPXCHG8_SAVE_EBX_DAG,
 606       LCMPXCHG16_SAVE_RBX_DAG,
 607
 608       /// LOCK-prefixed arithmetic read-modify-write instructions.
 609       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 610       LADD, LSUB, LOR, LXOR, LAND,
 611
 612       // Load, scalar_to_vector, and zero extend.
 613       VZEXT_LOAD,
 614
 615       // extract_vector_elt, store.
 616       VEXTRACT_STORE,
 617
 618       // scalar broadcast from memory
 619       VBROADCAST_LOAD,
 620
 621       // Store FP control world into i16 memory.
 622       FNSTCW16m,
 623
 624       /// This instruction implements FP_TO_SINT with the
 625       /// integer destination in memory and a FP reg source.  This corresponds
 626       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 627       /// has two inputs (token chain and address) and two outputs (int value
 628       /// and token chain). Memory VT specifies the type to store to.
 629       FP_TO_INT_IN_MEM,
 630
 631       /// This instruction implements SINT_TO_FP with the
 632       /// integer source in memory and FP reg result.  This corresponds to the
 633       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 634       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 635       /// flag). The integer source type is specified by the memory VT.
 636       FILD,
 637       FILD_FLAG,
 638
 639       /// This instruction implements a fp->int store from FP stack
 640       /// slots. This corresponds to the fist instruction. It takes a
 641       /// chain operand, value to store, address, and glue. The memory VT
 642       /// specifies the type to store as.
 643       FIST,
 644
 645       /// This instruction implements an extending load to FP stack slots.
 646       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 647       /// operand, and ptr to load from. The memory VT specifies the type to
 648       /// load from.
 649       FLD,
 650
 651       /// This instruction implements a truncating store from FP stack
 652       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 653       /// chain operand, value to store, address, and glue. The memory VT
 654       /// specifies the type to store as.
 655       FST,
 656
 657       /// This instruction grabs the address of the next argument
 658       /// from a va_list. (reads and modifies the va_list in memory)
 659       VAARG_64,
 660
 661       // Vector truncating store with unsigned/signed saturation
 662       VTRUNCSTOREUS, VTRUNCSTORES,
 663       // Vector truncating masked store with unsigned/signed saturation
 664       VMTRUNCSTOREUS, VMTRUNCSTORES,
 665
 666       // X86 specific gather and scatter
 667       MGATHER, MSCATTER,
 668
 669       // WARNING: Do not add anything in the end unless you want the node to
 670       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 671       // opcodes will be thought as target memory ops!
 672     };
 673   } // end namespace X86ISD
 674
 675   /// Define some predicates that are used for node matching.
 676   namespace X86 {
 677     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 678     bool isZeroNode(SDValue Elt);
 679
 680     /// Returns true of the given offset can be
 681     /// fit into displacement field of the instruction.
 682     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 683                                       bool hasSymbolicDisplacement = true);
 684
 685     /// Determines whether the callee is required to pop its
 686     /// own arguments. Callee pop is necessary to support tail calls.
 687     bool isCalleePop(CallingConv::ID CallingConv,
 688                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 689
 690     /// If Op is a constant whose elements are all the same constant or
 691     /// undefined, return true and return the constant value in \p SplatVal.
 692     bool isConstantSplat(SDValue Op, APInt &SplatVal);
 693   } // end namespace X86
 694
 695   //===--------------------------------------------------------------------===//
 696   //  X86 Implementation of the TargetLowering interface
 697   class X86TargetLowering final : public TargetLowering {
 698   public:
 699     explicit X86TargetLowering(const X86TargetMachine &TM,
 700                                const X86Subtarget &STI);
 701
 702     unsigned getJumpTableEncoding() const override;
 703     bool useSoftFloat() const override;
 704
 705     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 706                                ArgListTy &Args) const override;
 707
 708     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 709       return MVT::i8;
 710     }
 711
 712     const MCExpr *
 713     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 714                               const MachineBasicBlock *MBB, unsigned uid,
 715                               MCContext &Ctx) const override;
 716
 717     /// Returns relocation base for the given PIC jumptable.
 718     SDValue getPICJumpTableRelocBase(SDValue Table,
 719                                      SelectionDAG &DAG) const override;
 720     const MCExpr *
 721     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 722                                  unsigned JTI, MCContext &Ctx) const override;
 723
 724     /// Return the desired alignment for ByVal aggregate
 725     /// function arguments in the caller parameter area. For X86, aggregates
 726     /// that contains are placed at 16-byte boundaries while the rest are at
 727     /// 4-byte boundaries.
 728     unsigned getByValTypeAlignment(Type *Ty,
 729                                    const DataLayout &DL) const override;
 730
 731     /// Returns the target specific optimal type for load
 732     /// and store operations as a result of memset, memcpy, and memmove
 733     /// lowering. If DstAlign is zero that means it's safe to destination
 734     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 735     /// means there isn't a need to check it against alignment requirement,
 736     /// probably because the source does not need to be loaded. If 'IsMemset' is
 737     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 738     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 739     /// source is constant so it does not need to be loaded.
 740     /// It returns EVT::Other if the type should be determined using generic
 741     /// target-independent logic.
 742     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 743                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 744                             const AttributeList &FuncAttributes) const override;
 745
 746     /// Returns true if it's safe to use load / store of the
 747     /// specified type to expand memcpy / memset inline. This is mostly true
 748     /// for all types except for some special cases. For example, on X86
 749     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 750     /// also does type conversion. Note the specified type doesn't have to be
 751     /// legal as the hook is used before type legalization.
 752     bool isSafeMemOpType(MVT VT) const override;
 753
 754     /// Returns true if the target allows unaligned memory accesses of the
 755     /// specified type. Returns whether it is "fast" in the last argument.
 756     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 757                                         MachineMemOperand::Flags Flags,
 758                                         bool *Fast) const override;
 759
 760     /// Provide custom lowering hooks for some operations.
 761     ///
 762     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 763
 764     /// Places new result values for the node in Results (their number
 765     /// and types must exactly match those of the original return values of
 766     /// the node), or leaves Results empty, which indicates that the node is not
 767     /// to be custom lowered after all.
 768     void LowerOperationWrapper(SDNode *N,
 769                                SmallVectorImpl<SDValue> &Results,
 770                                SelectionDAG &DAG) const override;
 771
 772     /// Replace the results of node with an illegal result
 773     /// type with new values built out of custom code.
 774     ///
 775     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 776                             SelectionDAG &DAG) const override;
 777
 778     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 779
 780     // Return true if it is profitable to combine a BUILD_VECTOR with a
 781     // stride-pattern to a shuffle and a truncate.
 782     // Example of such a combine:
 783     // v4i32 build_vector((extract_elt V, 1),
 784     //                    (extract_elt V, 3),
 785     //                    (extract_elt V, 5),
 786     //                    (extract_elt V, 7))
 787     //  -->
 788     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 789     // v4i64)
 790     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 791         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 792
 793     /// Return true if the target has native support for
 794     /// the specified value type and it is 'desirable' to use the type for the
 795     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 796     /// instruction encodings are longer and some i16 instructions are slow.
 797     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 798
 799     /// Return true if the target has native support for the
 800     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 801     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 802     /// and some i16 instructions are slow.
 803     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 804
 805     /// Return 1 if we can compute the negated form of the specified expression
 806     /// for the same cost as the expression itself, or 2 if we can compute the
 807     /// negated form more cheaply than the expression itself. Else return 0.
 808     char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
 809                             bool ForCodeSize, unsigned Depth) const override;
 810
 811     /// If isNegatibleForFree returns true, return the newly negated expression.
 812     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 813                                  bool LegalOperations, bool ForCodeSize,
 814                                  unsigned Depth) const override;
 815
 816     MachineBasicBlock *
 817     EmitInstrWithCustomInserter(MachineInstr &MI,
 818                                 MachineBasicBlock *MBB) const override;
 819
 820     /// This method returns the name of a target specific DAG node.
 821     const char *getTargetNodeName(unsigned Opcode) const override;
 822
 823     /// Do not merge vector stores after legalization because that may conflict
 824     /// with x86-specific store splitting optimizations.
 825     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 826       return !MemVT.isVector();
 827     }
 828
 829     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 830                           const SelectionDAG &DAG) const override;
 831
 832     bool isCheapToSpeculateCttz() const override;
 833
 834     bool isCheapToSpeculateCtlz() const override;
 835
 836     bool isCtlzFast() const override;
 837
 838     bool hasBitPreservingFPLogic(EVT VT) const override {
 839       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 840     }
 841
 842     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 843       // If the pair to store is a mixture of float and int values, we will
 844       // save two bitwise instructions and one float-to-int instruction and
 845       // increase one store instruction. There is potentially a more
 846       // significant benefit because it avoids the float->int domain switch
 847       // for input value. So It is more likely a win.
 848       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 849           (LTy.isInteger() && HTy.isFloatingPoint()))
 850         return true;
 851       // If the pair only contains int values, we will save two bitwise
 852       // instructions and increase one store instruction (costing one more
 853       // store buffer). Since the benefit is more blurred so we leave
 854       // such pair out until we get testcase to prove it is a win.
 855       return false;
 856     }
 857
 858     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 859
 860     bool hasAndNotCompare(SDValue Y) const override;
 861
 862     bool hasAndNot(SDValue Y) const override;
 863
 864     bool hasBitTest(SDValue X, SDValue Y) const override;
 865
 866     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
 867         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
 868         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
 869         SelectionDAG &DAG) const override;
 870
 871     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
 872                                            CombineLevel Level) const override;
 873
 874     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 875
 876     bool
 877     shouldTransformSignedTruncationCheck(EVT XVT,
 878                                          unsigned KeptBits) const override {
 879       // For vectors, we don't have a preference..
 880       if (XVT.isVector())
 881         return false;
 882
 883       auto VTIsOk = [](EVT VT) -> bool {
 884         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 885                VT == MVT::i64;
 886       };
 887
 888       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 889       // XVT will be larger than KeptBitsVT.
 890       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 891       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 892     }
 893
 894     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
 895
 896     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 897
 898     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 899       return VT.isScalarInteger();
 900     }
 901
 902     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 903     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 904
 905     /// Return the value type to use for ISD::SETCC.
 906     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 907                            EVT VT) const override;
 908
 909     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 910                                       TargetLoweringOpt &TLO) const override;
 911
 912     /// Determine which of the bits specified in Mask are known to be either
 913     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 914     void computeKnownBitsForTargetNode(const SDValue Op,
 915                                        KnownBits &Known,
 916                                        const APInt &DemandedElts,
 917                                        const SelectionDAG &DAG,
 918                                        unsigned Depth = 0) const override;
 919
 920     /// Determine the number of bits in the operation that are sign bits.
 921     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 922                                              const APInt &DemandedElts,
 923                                              const SelectionDAG &DAG,
 924                                              unsigned Depth) const override;
 925
 926     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 927                                                  const APInt &DemandedElts,
 928                                                  APInt &KnownUndef,
 929                                                  APInt &KnownZero,
 930                                                  TargetLoweringOpt &TLO,
 931                                                  unsigned Depth) const override;
 932
 933     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 934                                            const APInt &DemandedBits,
 935                                            const APInt &DemandedElts,
 936                                            KnownBits &Known,
 937                                            TargetLoweringOpt &TLO,
 938                                            unsigned Depth) const override;
 939
 940     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
 941         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
 942         SelectionDAG &DAG, unsigned Depth) const override;
 943
 944     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 945
 946     SDValue unwrapAddress(SDValue N) const override;
 947
 948     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 949
 950     bool ExpandInlineAsm(CallInst *CI) const override;
 951
 952     ConstraintType getConstraintType(StringRef Constraint) const override;
 953
 954     /// Examine constraint string and operand type and determine a weight value.
 955     /// The operand object must already have been set up with the operand type.
 956     ConstraintWeight
 957       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 958                                      const char *constraint) const override;
 959
 960     const char *LowerXConstraint(EVT ConstraintVT) const override;
 961
 962     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 963     /// add anything to Ops. If hasMemory is true it means one of the asm
 964     /// constraint of the inline asm instruction being processed is 'm'.
 965     void LowerAsmOperandForConstraint(SDValue Op,
 966                                       std::string &Constraint,
 967                                       std::vector<SDValue> &Ops,
 968                                       SelectionDAG &DAG) const override;
 969
 970     unsigned
 971     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 972       if (ConstraintCode == "i")
 973         return InlineAsm::Constraint_i;
 974       else if (ConstraintCode == "o")
 975         return InlineAsm::Constraint_o;
 976       else if (ConstraintCode == "v")
 977         return InlineAsm::Constraint_v;
 978       else if (ConstraintCode == "X")
 979         return InlineAsm::Constraint_X;
 980       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 981     }
 982
 983     /// Handle Lowering flag assembly outputs.
 984     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
 985                                         const AsmOperandInfo &Constraint,
 986                                         SelectionDAG &DAG) const override;
 987
 988     /// Given a physical register constraint
 989     /// (e.g. {edx}), return the register number and the register class for the
 990     /// register.  This should only be used for C_Register constraints.  On
 991     /// error, this returns a register number of 0.
 992     std::pair<unsigned, const TargetRegisterClass *>
 993     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 994                                  StringRef Constraint, MVT VT) const override;
 995
 996     /// Return true if the addressing mode represented
 997     /// by AM is legal for this target, for a load/store of the specified type.
 998     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 999                                Type *Ty, unsigned AS,
1000                                Instruction *I = nullptr) const override;
1001
1002     /// Return true if the specified immediate is legal
1003     /// icmp immediate, that is the target has icmp instructions which can
1004     /// compare a register against the immediate without having to materialize
1005     /// the immediate into a register.
1006     bool isLegalICmpImmediate(int64_t Imm) const override;
1007
1008     /// Return true if the specified immediate is legal
1009     /// add immediate, that is the target has add instructions which can
1010     /// add a register and the immediate without having to materialize
1011     /// the immediate into a register.
1012     bool isLegalAddImmediate(int64_t Imm) const override;
1013
1014     bool isLegalStoreImmediate(int64_t Imm) const override;
1015
1016     /// Return the cost of the scaling factor used in the addressing
1017     /// mode represented by AM for this target, for a load/store
1018     /// of the specified type.
1019     /// If the AM is supported, the return value must be >= 0.
1020     /// If the AM is not supported, it returns a negative value.
1021     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1022                              unsigned AS) const override;
1023
1024     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1025
1026     /// Add x86-specific opcodes to the default list.
1027     bool isBinOp(unsigned Opcode) const override;
1028
1029     /// Returns true if the opcode is a commutative binary operation.
1030     bool isCommutativeBinOp(unsigned Opcode) const override;
1031
1032     /// Return true if it's free to truncate a value of
1033     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1034     /// register EAX to i16 by referencing its sub-register AX.
1035     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1036     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1037
1038     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1039
1040     /// Return true if any actual instruction that defines a
1041     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1042     /// register. This does not necessarily include registers defined in
1043     /// unknown ways, such as incoming arguments, or copies from unknown
1044     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1045     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1046     /// all instructions that define 32-bit values implicit zero-extend the
1047     /// result out to 64 bits.
1048     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1049     bool isZExtFree(EVT VT1, EVT VT2) const override;
1050     bool isZExtFree(SDValue Val, EVT VT2) const override;
1051
1052     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1053     /// extend node) is profitable.
1054     bool isVectorLoadExtDesirable(SDValue) const override;
1055
1056     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1057     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1058     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1059     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1060
1061     /// Return true if it's profitable to narrow
1062     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1063     /// from i32 to i8 but not from i32 to i16.
1064     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1065
1066     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1067     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1068     /// true and stores the intrinsic information into the IntrinsicInfo that was
1069     /// passed to the function.
1070     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1071                             MachineFunction &MF,
1072                             unsigned Intrinsic) const override;
1073
1074     /// Returns true if the target can instruction select the
1075     /// specified FP immediate natively. If false, the legalizer will
1076     /// materialize the FP immediate as a load from a constant pool.
1077     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1078                       bool ForCodeSize) const override;
1079
1080     /// Targets can use this to indicate that they only support *some*
1081     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1082     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1083     /// be legal.
1084     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1085
1086     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1087     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1088     /// constant pool entry.
1089     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1090
1091     /// Returns true if lowering to a jump table is allowed.
1092     bool areJTsAllowed(const Function *Fn) const override;
1093
1094     /// If true, then instruction selection should
1095     /// seek to shrink the FP constant of the specified type to a smaller type
1096     /// in order to save space and / or reduce runtime.
1097     bool ShouldShrinkFPConstant(EVT VT) const override {
1098       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1099       // expensive than a straight movsd. On the other hand, it's important to
1100       // shrink long double fp constant since fldt is very slow.
1101       return !X86ScalarSSEf64 || VT == MVT::f80;
1102     }
1103
1104     /// Return true if we believe it is correct and profitable to reduce the
1105     /// load node to a smaller type.
1106     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1107                                EVT NewVT) const override;
1108
1109     /// Return true if the specified scalar FP type is computed in an SSE
1110     /// register, not on the X87 floating point stack.
1111     bool isScalarFPTypeInSSEReg(EVT VT) const {
1112       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1113              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1114     }
1115
1116     /// Returns true if it is beneficial to convert a load of a constant
1117     /// to just the constant itself.
1118     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1119                                            Type *Ty) const override;
1120
1121     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1122
1123     bool convertSelectOfConstantsToMath(EVT VT) const override;
1124
1125     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1126                                 SDValue C) const override;
1127
1128     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1129                                   bool IsSigned) const override;
1130
1131     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1132     /// with this index.
1133     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1134                                  unsigned Index) const override;
1135
1136     /// Scalar ops always have equal or better analysis/performance/power than
1137     /// the vector equivalent, so this always makes sense if the scalar op is
1138     /// supported.
1139     bool shouldScalarizeBinop(SDValue) const override;
1140
1141     /// Extract of a scalar FP value from index 0 of a vector is free.
1142     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1143       EVT EltVT = VT.getScalarType();
1144       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1145     }
1146
1147     /// Overflow nodes should get combined/lowered to optimal instructions
1148     /// (they should allow eliminating explicit compares by getting flags from
1149     /// math ops).
1150     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1151
1152     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1153                                       unsigned AddrSpace) const override {
1154       // If we can replace more than 2 scalar stores, there will be a reduction
1155       // in instructions even after we add a vector constant load.
1156       return NumElem > 2;
1157     }
1158
1159     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1160                                  const SelectionDAG &DAG,
1161                                  const MachineMemOperand &MMO) const override;
1162
1163     /// Intel processors have a unified instruction and data cache
1164     const char * getClearCacheBuiltinName() const override {
1165       return nullptr; // nothing to do, move along.
1166     }
1167
1168     Register getRegisterByName(const char* RegName, EVT VT,
1169                                const MachineFunction &MF) const override;
1170
1171     /// If a physical register, this returns the register that receives the
1172     /// exception address on entry to an EH pad.
1173     unsigned
1174     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1175
1176     /// If a physical register, this returns the register that receives the
1177     /// exception typeid on entry to a landing pad.
1178     unsigned
1179     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1180
1181     virtual bool needsFixedCatchObjects() const override;
1182
1183     /// This method returns a target specific FastISel object,
1184     /// or null if the target does not support "fast" ISel.
1185     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1186                              const TargetLibraryInfo *libInfo) const override;
1187
1188     /// If the target has a standard location for the stack protector cookie,
1189     /// returns the address of that location. Otherwise, returns nullptr.
1190     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1191
1192     bool useLoadStackGuardNode() const override;
1193     bool useStackGuardXorFP() const override;
1194     void insertSSPDeclarations(Module &M) const override;
1195     Value *getSDagStackGuard(const Module &M) const override;
1196     Function *getSSPStackGuardCheck(const Module &M) const override;
1197     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1198                                 const SDLoc &DL) const override;
1199
1200
1201     /// Return true if the target stores SafeStack pointer at a fixed offset in
1202     /// some non-standard address space, and populates the address space and
1203     /// offset as appropriate.
1204     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1205
1206     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1207                       SelectionDAG &DAG) const;
1208
1209     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1210
1211     /// Customize the preferred legalization strategy for certain types.
1212     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1213
1214     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1215                                       EVT VT) const override;
1216
1217     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1218                                            CallingConv::ID CC,
1219                                            EVT VT) const override;
1220
1221     unsigned getVectorTypeBreakdownForCallingConv(
1222         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1223         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1224
1225     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1226
1227     bool supportSwiftError() const override;
1228
1229     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1230
1231     unsigned getStackProbeSize(MachineFunction &MF) const;
1232
1233     bool hasVectorBlend() const override { return true; }
1234
1235     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1236
1237     /// Lower interleaved load(s) into target specific
1238     /// instructions/intrinsics.
1239     bool lowerInterleavedLoad(LoadInst *LI,
1240                               ArrayRef<ShuffleVectorInst *> Shuffles,
1241                               ArrayRef<unsigned> Indices,
1242                               unsigned Factor) const override;
1243
1244     /// Lower interleaved store(s) into target specific
1245     /// instructions/intrinsics.
1246     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1247                                unsigned Factor) const override;
1248
1249     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1250                                    SDValue Addr, SelectionDAG &DAG)
1251                                    const override;
1252
1253   protected:
1254     std::pair<const TargetRegisterClass *, uint8_t>
1255     findRepresentativeClass(const TargetRegisterInfo *TRI,
1256                             MVT VT) const override;
1257
1258   private:
1259     /// Keep a reference to the X86Subtarget around so that we can
1260     /// make the right decision when generating code for different targets.
1261     const X86Subtarget &Subtarget;
1262
1263     /// Select between SSE or x87 floating point ops.
1264     /// When SSE is available, use it for f32 operations.
1265     /// When SSE2 is available, use it for f64 operations.
1266     bool X86ScalarSSEf32;
1267     bool X86ScalarSSEf64;
1268
1269     /// A list of legal FP immediates.
1270     std::vector<APFloat> LegalFPImmediates;
1271
1272     /// Indicate that this x86 target can instruction
1273     /// select the specified FP immediate natively.
1274     void addLegalFPImmediate(const APFloat& Imm) {
1275       LegalFPImmediates.push_back(Imm);
1276     }
1277
1278     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1279                             CallingConv::ID CallConv, bool isVarArg,
1280                             const SmallVectorImpl<ISD::InputArg> &Ins,
1281                             const SDLoc &dl, SelectionDAG &DAG,
1282                             SmallVectorImpl<SDValue> &InVals,
1283                             uint32_t *RegMask) const;
1284     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1285                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1286                              const SDLoc &dl, SelectionDAG &DAG,
1287                              const CCValAssign &VA, MachineFrameInfo &MFI,
1288                              unsigned i) const;
1289     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1290                              const SDLoc &dl, SelectionDAG &DAG,
1291                              const CCValAssign &VA,
1292                              ISD::ArgFlagsTy Flags) const;
1293
1294     // Call lowering helpers.
1295
1296     /// Check whether the call is eligible for tail call optimization. Targets
1297     /// that want to do tail call optimization should implement this function.
1298     bool IsEligibleForTailCallOptimization(SDValue Callee,
1299                                            CallingConv::ID CalleeCC,
1300                                            bool isVarArg,
1301                                            bool isCalleeStructRet,
1302                                            bool isCallerStructRet,
1303                                            Type *RetTy,
1304                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1305                                     const SmallVectorImpl<SDValue> &OutVals,
1306                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1307                                            SelectionDAG& DAG) const;
1308     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1309                                     SDValue Chain, bool IsTailCall,
1310                                     bool Is64Bit, int FPDiff,
1311                                     const SDLoc &dl) const;
1312
1313     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1314                                          SelectionDAG &DAG) const;
1315
1316     unsigned getAddressSpace(void) const;
1317
1318     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1319
1320     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1321     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1322     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1323     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1324
1325     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1326                                   const unsigned char OpFlags = 0) const;
1327     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1328     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1329     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1330     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1331     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1332
1333     /// Creates target global address or external symbol nodes for calls or
1334     /// other uses.
1335     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1336                                   bool ForCall) const;
1337
1338     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1339     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1340     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1341     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1342     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1343     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1344     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1345     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1346     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1347     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1348     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1349     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1350     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1351     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1352     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1353     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1354     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1355     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1356     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1357     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1358     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1359     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1360     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1361     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1362     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1363     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1364     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1365     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1366     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1367
1368     SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
1369                           RTLIB::Libcall Call) const;
1370
1371     SDValue
1372     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1373                          const SmallVectorImpl<ISD::InputArg> &Ins,
1374                          const SDLoc &dl, SelectionDAG &DAG,
1375                          SmallVectorImpl<SDValue> &InVals) const override;
1376     SDValue LowerCall(CallLoweringInfo &CLI,
1377                       SmallVectorImpl<SDValue> &InVals) const override;
1378
1379     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1380                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1381                         const SmallVectorImpl<SDValue> &OutVals,
1382                         const SDLoc &dl, SelectionDAG &DAG) const override;
1383
1384     bool supportSplitCSR(MachineFunction *MF) const override {
1385       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1386           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1387     }
1388     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1389     void insertCopiesSplitCSR(
1390       MachineBasicBlock *Entry,
1391       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1392
1393     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1394
1395     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1396
1397     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1398                             ISD::NodeType ExtendKind) const override;
1399
1400     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1401                         bool isVarArg,
1402                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1403                         LLVMContext &Context) const override;
1404
1405     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1406
1407     TargetLoweringBase::AtomicExpansionKind
1408     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1409     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1410     TargetLoweringBase::AtomicExpansionKind
1411     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1412
1413     LoadInst *
1414     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1415
1416     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1417     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1418
1419     bool needsCmpXchgNb(Type *MemType) const;
1420
1421     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1422                                 MachineBasicBlock *DispatchBB, int FI) const;
1423
1424     // Utility function to emit the low-level va_arg code for X86-64.
1425     MachineBasicBlock *
1426     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1427                                   MachineBasicBlock *MBB) const;
1428
1429     /// Utility function to emit the xmm reg save portion of va_start.
1430     MachineBasicBlock *
1431     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1432                                              MachineBasicBlock *BB) const;
1433
1434     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1435                                                  MachineInstr &MI2,
1436                                                  MachineBasicBlock *BB) const;
1437
1438     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1439                                          MachineBasicBlock *BB) const;
1440
1441     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1442                                            MachineBasicBlock *BB) const;
1443
1444     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1445                                            MachineBasicBlock *BB) const;
1446
1447     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1448                                            MachineBasicBlock *BB) const;
1449
1450     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1451                                             MachineBasicBlock *BB) const;
1452
1453     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1454                                           MachineBasicBlock *BB) const;
1455
1456     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1457                                           MachineBasicBlock *BB) const;
1458
1459     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1460                                             MachineBasicBlock *BB) const;
1461
1462     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1463                                         MachineBasicBlock *MBB) const;
1464
1465     void emitSetJmpShadowStackFix(MachineInstr &MI,
1466                                   MachineBasicBlock *MBB) const;
1467
1468     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1469                                          MachineBasicBlock *MBB) const;
1470
1471     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1472                                                  MachineBasicBlock *MBB) const;
1473
1474     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1475                                      MachineBasicBlock *MBB) const;
1476
1477     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1478                                              MachineBasicBlock *MBB) const;
1479
1480     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1481     /// equivalent, for use with the given x86 condition code.
1482     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1483                     SelectionDAG &DAG) const;
1484
1485     /// Convert a comparison if required by the subtarget.
1486     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1487
1488     /// Emit flags for the given setcc condition and operands. Also returns the
1489     /// corresponding X86 condition code constant in X86CC.
1490     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1491                               ISD::CondCode CC, const SDLoc &dl,
1492                               SelectionDAG &DAG,
1493                               SDValue &X86CC) const;
1494
1495     /// Check if replacement of SQRT with RSQRT should be disabled.
1496     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1497
1498     /// Use rsqrt* to speed up sqrt calculations.
1499     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1500                             int &RefinementSteps, bool &UseOneConstNR,
1501                             bool Reciprocal) const override;
1502
1503     /// Use rcp* to speed up fdiv calculations.
1504     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1505                              int &RefinementSteps) const override;
1506
1507     /// Reassociate floating point divisions into multiply by reciprocal.
1508     unsigned combineRepeatedFPDivisors() const override;
1509
1510     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1511                           SmallVectorImpl<SDNode *> &Created) const override;
1512   };
1513
1514   namespace X86 {
1515     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1516                              const TargetLibraryInfo *libInfo);
1517   } // end namespace X86
1518
1519   // Base class for all X86 non-masked store operations.
1520   class X86StoreSDNode : public MemSDNode {
1521   public:
1522     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1523                    SDVTList VTs, EVT MemVT,
1524                    MachineMemOperand *MMO)
1525       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1526     const SDValue &getValue() const { return getOperand(1); }
1527     const SDValue &getBasePtr() const { return getOperand(2); }
1528
1529     static bool classof(const SDNode *N) {
1530       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1531         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1532     }
1533   };
1534
1535   // Base class for all X86 masked store operations.
1536   // The class has the same order of operands as MaskedStoreSDNode for
1537   // convenience.
1538   class X86MaskedStoreSDNode : public MemSDNode {
1539   public:
1540     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1541                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1542                          MachineMemOperand *MMO)
1543       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1544
1545     const SDValue &getValue()   const { return getOperand(1); }
1546     const SDValue &getBasePtr() const { return getOperand(2); }
1547     const SDValue &getMask()    const { return getOperand(3); }
1548
1549     static bool classof(const SDNode *N) {
1550       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1551         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1552     }
1553   };
1554
1555   // X86 Truncating Store with Signed saturation.
1556   class TruncSStoreSDNode : public X86StoreSDNode {
1557   public:
1558     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1559                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1560       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1561
1562     static bool classof(const SDNode *N) {
1563       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1564     }
1565   };
1566
1567   // X86 Truncating Store with Unsigned saturation.
1568   class TruncUSStoreSDNode : public X86StoreSDNode {
1569   public:
1570     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1571                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1572       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1573
1574     static bool classof(const SDNode *N) {
1575       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1576     }
1577   };
1578
1579   // X86 Truncating Masked Store with Signed saturation.
1580   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1581   public:
1582     MaskedTruncSStoreSDNode(unsigned Order,
1583                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1584                          MachineMemOperand *MMO)
1585       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1586
1587     static bool classof(const SDNode *N) {
1588       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1589     }
1590   };
1591
1592   // X86 Truncating Masked Store with Unsigned saturation.
1593   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1594   public:
1595     MaskedTruncUSStoreSDNode(unsigned Order,
1596                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1597                             MachineMemOperand *MMO)
1598       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1599
1600     static bool classof(const SDNode *N) {
1601       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1602     }
1603   };
1604
1605   // X86 specific Gather/Scatter nodes.
1606   // The class has the same order of operands as MaskedGatherScatterSDNode for
1607   // convenience.
1608   class X86MaskedGatherScatterSDNode : public MemSDNode {
1609   public:
1610     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1611                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1612                                  MachineMemOperand *MMO)
1613         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1614
1615     const SDValue &getBasePtr() const { return getOperand(3); }
1616     const SDValue &getIndex()   const { return getOperand(4); }
1617     const SDValue &getMask()    const { return getOperand(2); }
1618     const SDValue &getScale()   const { return getOperand(5); }
1619
1620     static bool classof(const SDNode *N) {
1621       return N->getOpcode() == X86ISD::MGATHER ||
1622              N->getOpcode() == X86ISD::MSCATTER;
1623     }
1624   };
1625
1626   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1627   public:
1628     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1629                           EVT MemVT, MachineMemOperand *MMO)
1630         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1631                                        MMO) {}
1632
1633     const SDValue &getPassThru() const { return getOperand(1); }
1634
1635     static bool classof(const SDNode *N) {
1636       return N->getOpcode() == X86ISD::MGATHER;
1637     }
1638   };
1639
1640   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1641   public:
1642     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1643                            EVT MemVT, MachineMemOperand *MMO)
1644         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1645                                        MMO) {}
1646
1647     const SDValue &getValue() const { return getOperand(1); }
1648
1649     static bool classof(const SDNode *N) {
1650       return N->getOpcode() == X86ISD::MSCATTER;
1651     }
1652   };
1653
1654   /// Generate unpacklo/unpackhi shuffle mask.
1655   template <typename T = int>
1656   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1657                                bool Unary) {
1658     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1659     int NumElts = VT.getVectorNumElements();
1660     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1661     for (int i = 0; i < NumElts; ++i) {
1662       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1663       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1664       Pos += (Unary ? 0 : NumElts * (i % 2));
1665       Pos += (Lo ? 0 : NumEltsInLane / 2);
1666       Mask.push_back(Pos);
1667     }
1668   }
1669
1670   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1671   /// mask index with the scaled sequential indices for an equivalent narrowed
1672   /// mask. This is the reverse process to canWidenShuffleElements, but can
1673   /// always succeed.
1674   template <typename T>
1675   void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
1676                         SmallVectorImpl<T> &ScaledMask) {
1677     assert(0 < Scale && "Unexpected scaling factor");
1678     size_t NumElts = Mask.size();
1679     ScaledMask.assign(NumElts * Scale, -1);
1680
1681     for (size_t i = 0; i != NumElts; ++i) {
1682       int M = Mask[i];
1683
1684       // Repeat sentinel values in every mask element.
1685       if (M < 0) {
1686         for (size_t s = 0; s != Scale; ++s)
1687           ScaledMask[(Scale * i) + s] = M;
1688         continue;
1689       }
1690
1691       // Scale mask element and increment across each mask element.
1692       for (size_t s = 0; s != Scale; ++s)
1693         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1694     }
1695   }
1696 } // end namespace llvm
1697
1698 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H