lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20
  21 namespace llvm {
  22   class X86Subtarget;
  23   class X86TargetMachine;
  24
  25   namespace X86ISD {
  26     // X86 Specific DAG Nodes
  27     enum NodeType : unsigned {
  28       // Start the numbering where the builtin ops leave off.
  29       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  30
  31       /// Bit scan forward.
  32       BSF,
  33       /// Bit scan reverse.
  34       BSR,
  35
  36       /// Double shift instructions. These correspond to
  37       /// X86::SHLDxx and X86::SHRDxx instructions.
  38       SHLD,
  39       SHRD,
  40
  41       /// Bitwise logical AND of floating point values. This corresponds
  42       /// to X86::ANDPS or X86::ANDPD.
  43       FAND,
  44
  45       /// Bitwise logical OR of floating point values. This corresponds
  46       /// to X86::ORPS or X86::ORPD.
  47       FOR,
  48
  49       /// Bitwise logical XOR of floating point values. This corresponds
  50       /// to X86::XORPS or X86::XORPD.
  51       FXOR,
  52
  53       ///  Bitwise logical ANDNOT of floating point values. This
  54       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  55       FANDN,
  56
  57       /// These operations represent an abstract X86 call
  58       /// instruction, which includes a bunch of information.  In particular the
  59       /// operands of these node are:
  60       ///
  61       ///     #0 - The incoming token chain
  62       ///     #1 - The callee
  63       ///     #2 - The number of arg bytes the caller pushes on the stack.
  64       ///     #3 - The number of arg bytes the callee pops off the stack.
  65       ///     #4 - The value to pass in AL/AX/EAX (optional)
  66       ///     #5 - The value to pass in DL/DX/EDX (optional)
  67       ///
  68       /// The result values of these nodes are:
  69       ///
  70       ///     #0 - The outgoing token chain
  71       ///     #1 - The first register result value (optional)
  72       ///     #2 - The second register result value (optional)
  73       ///
  74       CALL,
  75
  76       /// Same as call except it adds the NoTrack prefix.
  77       NT_CALL,
  78
  79       /// X86 compare and logical compare instructions.
  80       CMP, COMI, UCOMI,
  81
  82       /// X86 bit-test instructions.
  83       BT,
  84
  85       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  86       /// operand, usually produced by a CMP instruction.
  87       SETCC,
  88
  89       /// X86 Select
  90       SELECTS,
  91
  92       // Same as SETCC except it's materialized with a sbb and the value is all
  93       // one's or all zero's.
  94       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  95
  96       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
  97       /// Operands are two FP values to compare; result is a mask of
  98       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
  99       FSETCC,
 100
 101       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 102       /// and a version with SAE.
 103       FSETCCM, FSETCCM_SAE,
 104
 105       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 106       /// to select from. Operand 2 is the condition code, and operand 3 is the
 107       /// flag operand produced by a CMP or TEST instruction.
 108       CMOV,
 109
 110       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 111       /// is the block to branch if condition is true, operand 2 is the
 112       /// condition code, and operand 3 is the flag operand produced by a CMP
 113       /// or TEST instruction.
 114       BRCOND,
 115
 116       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 117       /// operand 1 is the target address.
 118       NT_BRIND,
 119
 120       /// Return with a flag operand. Operand 0 is the chain operand, operand
 121       /// 1 is the number of bytes of stack to pop.
 122       RET_FLAG,
 123
 124       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 125       IRET,
 126
 127       /// Repeat fill, corresponds to X86::REP_STOSx.
 128       REP_STOS,
 129
 130       /// Repeat move, corresponds to X86::REP_MOVSx.
 131       REP_MOVS,
 132
 133       /// On Darwin, this node represents the result of the popl
 134       /// at function entry, used for PIC code.
 135       GlobalBaseReg,
 136
 137       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 138       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 139       /// MCSymbol and TargetBlockAddress.
 140       Wrapper,
 141
 142       /// Special wrapper used under X86-64 PIC mode for RIP
 143       /// relative displacements.
 144       WrapperRIP,
 145
 146       /// Copies a 64-bit value from an MMX vector to the low word
 147       /// of an XMM vector, with the high word zero filled.
 148       MOVQ2DQ,
 149
 150       /// Copies a 64-bit value from the low word of an XMM vector
 151       /// to an MMX vector.
 152       MOVDQ2Q,
 153
 154       /// Copies a 32-bit value from the low word of a MMX
 155       /// vector to a GPR.
 156       MMX_MOVD2W,
 157
 158       /// Copies a GPR into the low 32-bit word of a MMX vector
 159       /// and zero out the high word.
 160       MMX_MOVW2D,
 161
 162       /// Extract an 8-bit value from a vector and zero extend it to
 163       /// i32, corresponds to X86::PEXTRB.
 164       PEXTRB,
 165
 166       /// Extract a 16-bit value from a vector and zero extend it to
 167       /// i32, corresponds to X86::PEXTRW.
 168       PEXTRW,
 169
 170       /// Insert any element of a 4 x float vector into any element
 171       /// of a destination 4 x floatvector.
 172       INSERTPS,
 173
 174       /// Insert the lower 8-bits of a 32-bit value to a vector,
 175       /// corresponds to X86::PINSRB.
 176       PINSRB,
 177
 178       /// Insert the lower 16-bits of a 32-bit value to a vector,
 179       /// corresponds to X86::PINSRW.
 180       PINSRW,
 181
 182       /// Shuffle 16 8-bit values within a vector.
 183       PSHUFB,
 184
 185       /// Compute Sum of Absolute Differences.
 186       PSADBW,
 187       /// Compute Double Block Packed Sum-Absolute-Differences
 188       DBPSADBW,
 189
 190       /// Bitwise Logical AND NOT of Packed FP values.
 191       ANDNP,
 192
 193       /// Blend where the selector is an immediate.
 194       BLENDI,
 195
 196       /// Dynamic (non-constant condition) vector blend where only the sign bits
 197       /// of the condition elements are used. This is used to enforce that the
 198       /// condition mask is not valid for generic VSELECT optimizations. This
 199       /// is also used to implement the intrinsics.
 200       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 201       BLENDV,
 202
 203       /// Combined add and sub on an FP vector.
 204       ADDSUB,
 205
 206       //  FP vector ops with rounding mode.
 207       FADD_RND, FADDS, FADDS_RND,
 208       FSUB_RND, FSUBS, FSUBS_RND,
 209       FMUL_RND, FMULS, FMULS_RND,
 210       FDIV_RND, FDIVS, FDIVS_RND,
 211       FMAX_SAE, FMAXS_SAE,
 212       FMIN_SAE, FMINS_SAE,
 213       FSQRT_RND, FSQRTS, FSQRTS_RND,
 214
 215       // FP vector get exponent.
 216       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
 217       // Extract Normalized Mantissas.
 218       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
 219       // FP Scale.
 220       SCALEF, SCALEF_RND,
 221       SCALEFS, SCALEFS_RND,
 222
 223       // Unsigned Integer average.
 224       AVG,
 225
 226       /// Integer horizontal add/sub.
 227       HADD,
 228       HSUB,
 229
 230       /// Floating point horizontal add/sub.
 231       FHADD,
 232       FHSUB,
 233
 234       // Detect Conflicts Within a Vector
 235       CONFLICT,
 236
 237       /// Floating point max and min.
 238       FMAX, FMIN,
 239
 240       /// Commutative FMIN and FMAX.
 241       FMAXC, FMINC,
 242
 243       /// Scalar intrinsic floating point max and min.
 244       FMAXS, FMINS,
 245
 246       /// Floating point reciprocal-sqrt and reciprocal approximation.
 247       /// Note that these typically require refinement
 248       /// in order to obtain suitable precision.
 249       FRSQRT, FRCP,
 250
 251       // AVX-512 reciprocal approximations with a little more precision.
 252       RSQRT14, RSQRT14S, RCP14, RCP14S,
 253
 254       // Thread Local Storage.
 255       TLSADDR,
 256
 257       // Thread Local Storage. A call to get the start address
 258       // of the TLS block for the current module.
 259       TLSBASEADDR,
 260
 261       // Thread Local Storage.  When calling to an OS provided
 262       // thunk at the address from an earlier relocation.
 263       TLSCALL,
 264
 265       // Exception Handling helpers.
 266       EH_RETURN,
 267
 268       // SjLj exception handling setjmp.
 269       EH_SJLJ_SETJMP,
 270
 271       // SjLj exception handling longjmp.
 272       EH_SJLJ_LONGJMP,
 273
 274       // SjLj exception handling dispatch.
 275       EH_SJLJ_SETUP_DISPATCH,
 276
 277       /// Tail call return. See X86TargetLowering::LowerCall for
 278       /// the list of operands.
 279       TC_RETURN,
 280
 281       // Vector move to low scalar and zero higher vector elements.
 282       VZEXT_MOVL,
 283
 284       // Vector integer truncate.
 285       VTRUNC,
 286       // Vector integer truncate with unsigned/signed saturation.
 287       VTRUNCUS, VTRUNCS,
 288
 289       // Masked version of the above. Used when less than a 128-bit result is
 290       // produced since the mask only applies to the lower elements and can't
 291       // be represented by a select.
 292       // SRC, PASSTHRU, MASK
 293       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 294
 295       // Vector FP extend.
 296       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 297
 298       // Vector FP round.
 299       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 300
 301       // Masked version of above. Used for v2f64->v4f32.
 302       // SRC, PASSTHRU, MASK
 303       VMFPROUND,
 304
 305       // 128-bit vector logical left / right shift
 306       VSHLDQ, VSRLDQ,
 307
 308       // Vector shift elements
 309       VSHL, VSRL, VSRA,
 310
 311       // Vector variable shift
 312       VSHLV, VSRLV, VSRAV,
 313
 314       // Vector shift elements by immediate
 315       VSHLI, VSRLI, VSRAI,
 316
 317       // Shifts of mask registers.
 318       KSHIFTL, KSHIFTR,
 319
 320       // Bit rotate by immediate
 321       VROTLI, VROTRI,
 322
 323       // Vector packed double/float comparison.
 324       CMPP,
 325
 326       // Vector integer comparisons.
 327       PCMPEQ, PCMPGT,
 328
 329       // v8i16 Horizontal minimum and position.
 330       PHMINPOS,
 331
 332       MULTISHIFT,
 333
 334       /// Vector comparison generating mask bits for fp and
 335       /// integer signed and unsigned data types.
 336       CMPM,
 337       // Vector comparison with SAE for FP values
 338       CMPM_SAE,
 339
 340       // Arithmetic operations with FLAGS results.
 341       ADD, SUB, ADC, SBB, SMUL, UMUL,
 342       OR, XOR, AND,
 343
 344       // Bit field extract.
 345       BEXTR,
 346
 347       // Zero High Bits Starting with Specified Bit Position.
 348       BZHI,
 349
 350       // X86-specific multiply by immediate.
 351       MUL_IMM,
 352
 353       // Vector sign bit extraction.
 354       MOVMSK,
 355
 356       // Vector bitwise comparisons.
 357       PTEST,
 358
 359       // Vector packed fp sign bitwise comparisons.
 360       TESTP,
 361
 362       // OR/AND test for masks.
 363       KORTEST,
 364       KTEST,
 365
 366       // ADD for masks.
 367       KADD,
 368
 369       // Several flavors of instructions with vector shuffle behaviors.
 370       // Saturated signed/unnsigned packing.
 371       PACKSS,
 372       PACKUS,
 373       // Intra-lane alignr.
 374       PALIGNR,
 375       // AVX512 inter-lane alignr.
 376       VALIGN,
 377       PSHUFD,
 378       PSHUFHW,
 379       PSHUFLW,
 380       SHUFP,
 381       // VBMI2 Concat & Shift.
 382       VSHLD,
 383       VSHRD,
 384       VSHLDV,
 385       VSHRDV,
 386       //Shuffle Packed Values at 128-bit granularity.
 387       SHUF128,
 388       MOVDDUP,
 389       MOVSHDUP,
 390       MOVSLDUP,
 391       MOVLHPS,
 392       MOVHLPS,
 393       MOVSD,
 394       MOVSS,
 395       UNPCKL,
 396       UNPCKH,
 397       VPERMILPV,
 398       VPERMILPI,
 399       VPERMI,
 400       VPERM2X128,
 401
 402       // Variable Permute (VPERM).
 403       // Res = VPERMV MaskV, V0
 404       VPERMV,
 405
 406       // 3-op Variable Permute (VPERMT2).
 407       // Res = VPERMV3 V0, MaskV, V1
 408       VPERMV3,
 409
 410       // Bitwise ternary logic.
 411       VPTERNLOG,
 412       // Fix Up Special Packed Float32/64 values.
 413       VFIXUPIMM, VFIXUPIMM_SAE,
 414       VFIXUPIMMS, VFIXUPIMMS_SAE,
 415       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 416       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
 417       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 418       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
 419       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 420       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 421       // scaling part of the immediate.
 422       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
 423       // Tests Types Of a FP Values for packed types.
 424       VFPCLASS,
 425       // Tests Types Of a FP Values for scalar types.
 426       VFPCLASSS,
 427
 428       // Broadcast (splat) scalar or element 0 of a vector. If the operand is
 429       // a vector, this node may change the vector length as part of the splat.
 430       VBROADCAST,
 431       // Broadcast mask to vector.
 432       VBROADCASTM,
 433       // Broadcast subvector to vector.
 434       SUBV_BROADCAST,
 435
 436       /// SSE4A Extraction and Insertion.
 437       EXTRQI, INSERTQI,
 438
 439       // XOP arithmetic/logical shifts.
 440       VPSHA, VPSHL,
 441       // XOP signed/unsigned integer comparisons.
 442       VPCOM, VPCOMU,
 443       // XOP packed permute bytes.
 444       VPPERM,
 445       // XOP two source permutation.
 446       VPERMIL2,
 447
 448       // Vector multiply packed unsigned doubleword integers.
 449       PMULUDQ,
 450       // Vector multiply packed signed doubleword integers.
 451       PMULDQ,
 452       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 453       MULHRS,
 454
 455       // Multiply and Add Packed Integers.
 456       VPMADDUBSW, VPMADDWD,
 457
 458       // AVX512IFMA multiply and add.
 459       // NOTE: These are different than the instruction and perform
 460       // op0 x op1 + op2.
 461       VPMADD52L, VPMADD52H,
 462
 463       // VNNI
 464       VPDPBUSD,
 465       VPDPBUSDS,
 466       VPDPWSSD,
 467       VPDPWSSDS,
 468
 469       // FMA nodes.
 470       // We use the target independent ISD::FMA for the non-inverted case.
 471       FNMADD,
 472       FMSUB,
 473       FNMSUB,
 474       FMADDSUB,
 475       FMSUBADD,
 476
 477       // FMA with rounding mode.
 478       FMADD_RND,
 479       FNMADD_RND,
 480       FMSUB_RND,
 481       FNMSUB_RND,
 482       FMADDSUB_RND,
 483       FMSUBADD_RND,
 484
 485       // Compress and expand.
 486       COMPRESS,
 487       EXPAND,
 488
 489       // Bits shuffle
 490       VPSHUFBITQMB,
 491
 492       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 493       SINT_TO_FP_RND, UINT_TO_FP_RND,
 494       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
 495       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 496
 497       // Vector float/double to signed/unsigned integer.
 498       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 499       // Scalar float/double to signed/unsigned integer.
 500       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 501
 502       // Vector float/double to signed/unsigned integer with truncation.
 503       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
 504       // Scalar float/double to signed/unsigned integer with truncation.
 505       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 506
 507       // Vector signed/unsigned integer to float/double.
 508       CVTSI2P, CVTUI2P,
 509
 510       // Masked versions of above. Used for v2f64->v4f32.
 511       // SRC, PASSTHRU, MASK
 512       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 513       MCVTSI2P, MCVTUI2P,
 514
 515       // Vector float to bfloat16.
 516       // Convert TWO packed single data to one packed BF16 data
 517       CVTNE2PS2BF16,
 518       // Convert packed single data to packed BF16 data
 519       CVTNEPS2BF16,
 520       // Masked version of above.
 521       // SRC, PASSTHRU, MASK
 522       MCVTNEPS2BF16,
 523
 524       // Dot product of BF16 pairs to accumulated into
 525       // packed single precision.
 526       DPBF16PS,
 527
 528       // Save xmm argument registers to the stack, according to %al. An operator
 529       // is needed so that this can be expanded with control flow.
 530       VASTART_SAVE_XMM_REGS,
 531
 532       // Windows's _chkstk call to do stack probing.
 533       WIN_ALLOCA,
 534
 535       // For allocating variable amounts of stack space when using
 536       // segmented stacks. Check if the current stacklet has enough space, and
 537       // falls back to heap allocation if not.
 538       SEG_ALLOCA,
 539
 540       // Memory barriers.
 541       MEMBARRIER,
 542       MFENCE,
 543
 544       // Store FP status word into i16 register.
 545       FNSTSW16r,
 546
 547       // Store contents of %ah into %eflags.
 548       SAHF,
 549
 550       // Get a random integer and indicate whether it is valid in CF.
 551       RDRAND,
 552
 553       // Get a NIST SP800-90B & C compliant random integer and
 554       // indicate whether it is valid in CF.
 555       RDSEED,
 556
 557       // Protection keys
 558       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 559       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 560       // value for ECX.
 561       RDPKRU, WRPKRU,
 562
 563       // SSE42 string comparisons.
 564       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 565       // will emit one or two instructions based on which results are used. If
 566       // flags and index/mask this allows us to use a single instruction since
 567       // we won't have to pick and opcode for flags. Instead we can rely on the
 568       // DAG to CSE everything and decide at isel.
 569       PCMPISTR,
 570       PCMPESTR,
 571
 572       // Test if in transactional execution.
 573       XTEST,
 574
 575       // ERI instructions.
 576       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
 577       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 578
 579       // Conversions between float and half-float.
 580       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 581
 582       // Masked version of above.
 583       // SRC, RND, PASSTHRU, MASK
 584       MCVTPS2PH,
 585
 586       // Galois Field Arithmetic Instructions
 587       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 588
 589       // LWP insert record.
 590       LWPINS,
 591
 592       // User level wait
 593       UMWAIT, TPAUSE,
 594
 595       // Enqueue Stores Instructions
 596       ENQCMD, ENQCMDS,
 597
 598       // For avx512-vp2intersect
 599       VP2INTERSECT,
 600
 601       // Compare and swap.
 602       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 603       LCMPXCHG8_DAG,
 604       LCMPXCHG16_DAG,
 605       LCMPXCHG8_SAVE_EBX_DAG,
 606       LCMPXCHG16_SAVE_RBX_DAG,
 607
 608       /// LOCK-prefixed arithmetic read-modify-write instructions.
 609       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 610       LADD, LSUB, LOR, LXOR, LAND,
 611
 612       // Load, scalar_to_vector, and zero extend.
 613       VZEXT_LOAD,
 614
 615       // extract_vector_elt, store.
 616       VEXTRACT_STORE,
 617
 618       // Store FP control world into i16 memory.
 619       FNSTCW16m,
 620
 621       /// This instruction implements FP_TO_SINT with the
 622       /// integer destination in memory and a FP reg source.  This corresponds
 623       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 624       /// has two inputs (token chain and address) and two outputs (int value
 625       /// and token chain). Memory VT specifies the type to store to.
 626       FP_TO_INT_IN_MEM,
 627
 628       /// This instruction implements SINT_TO_FP with the
 629       /// integer source in memory and FP reg result.  This corresponds to the
 630       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 631       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 632       /// flag). The integer source type is specified by the memory VT.
 633       FILD,
 634       FILD_FLAG,
 635
 636       /// This instruction implements a fp->int store from FP stack
 637       /// slots. This corresponds to the fist instruction. It takes a
 638       /// chain operand, value to store, address, and glue. The memory VT
 639       /// specifies the type to store as.
 640       FIST,
 641
 642       /// This instruction implements an extending load to FP stack slots.
 643       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 644       /// operand, and ptr to load from. The memory VT specifies the type to
 645       /// load from.
 646       FLD,
 647
 648       /// This instruction implements a truncating store from FP stack
 649       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 650       /// chain operand, value to store, address, and glue. The memory VT
 651       /// specifies the type to store as.
 652       FST,
 653
 654       /// This instruction grabs the address of the next argument
 655       /// from a va_list. (reads and modifies the va_list in memory)
 656       VAARG_64,
 657
 658       // Vector truncating store with unsigned/signed saturation
 659       VTRUNCSTOREUS, VTRUNCSTORES,
 660       // Vector truncating masked store with unsigned/signed saturation
 661       VMTRUNCSTOREUS, VMTRUNCSTORES,
 662
 663       // X86 specific gather and scatter
 664       MGATHER, MSCATTER,
 665
 666       // WARNING: Do not add anything in the end unless you want the node to
 667       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 668       // opcodes will be thought as target memory ops!
 669     };
 670   } // end namespace X86ISD
 671
 672   /// Define some predicates that are used for node matching.
 673   namespace X86 {
 674     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 675     bool isZeroNode(SDValue Elt);
 676
 677     /// Returns true of the given offset can be
 678     /// fit into displacement field of the instruction.
 679     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 680                                       bool hasSymbolicDisplacement = true);
 681
 682     /// Determines whether the callee is required to pop its
 683     /// own arguments. Callee pop is necessary to support tail calls.
 684     bool isCalleePop(CallingConv::ID CallingConv,
 685                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 686
 687     /// If Op is a constant whose elements are all the same constant or
 688     /// undefined, return true and return the constant value in \p SplatVal.
 689     bool isConstantSplat(SDValue Op, APInt &SplatVal);
 690   } // end namespace X86
 691
 692   //===--------------------------------------------------------------------===//
 693   //  X86 Implementation of the TargetLowering interface
 694   class X86TargetLowering final : public TargetLowering {
 695   public:
 696     explicit X86TargetLowering(const X86TargetMachine &TM,
 697                                const X86Subtarget &STI);
 698
 699     unsigned getJumpTableEncoding() const override;
 700     bool useSoftFloat() const override;
 701
 702     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 703                                ArgListTy &Args) const override;
 704
 705     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 706       return MVT::i8;
 707     }
 708
 709     const MCExpr *
 710     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 711                               const MachineBasicBlock *MBB, unsigned uid,
 712                               MCContext &Ctx) const override;
 713
 714     /// Returns relocation base for the given PIC jumptable.
 715     SDValue getPICJumpTableRelocBase(SDValue Table,
 716                                      SelectionDAG &DAG) const override;
 717     const MCExpr *
 718     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 719                                  unsigned JTI, MCContext &Ctx) const override;
 720
 721     /// Return the desired alignment for ByVal aggregate
 722     /// function arguments in the caller parameter area. For X86, aggregates
 723     /// that contains are placed at 16-byte boundaries while the rest are at
 724     /// 4-byte boundaries.
 725     unsigned getByValTypeAlignment(Type *Ty,
 726                                    const DataLayout &DL) const override;
 727
 728     /// Returns the target specific optimal type for load
 729     /// and store operations as a result of memset, memcpy, and memmove
 730     /// lowering. If DstAlign is zero that means it's safe to destination
 731     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 732     /// means there isn't a need to check it against alignment requirement,
 733     /// probably because the source does not need to be loaded. If 'IsMemset' is
 734     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 735     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 736     /// source is constant so it does not need to be loaded.
 737     /// It returns EVT::Other if the type should be determined using generic
 738     /// target-independent logic.
 739     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 740                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 741                             const AttributeList &FuncAttributes) const override;
 742
 743     /// Returns true if it's safe to use load / store of the
 744     /// specified type to expand memcpy / memset inline. This is mostly true
 745     /// for all types except for some special cases. For example, on X86
 746     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 747     /// also does type conversion. Note the specified type doesn't have to be
 748     /// legal as the hook is used before type legalization.
 749     bool isSafeMemOpType(MVT VT) const override;
 750
 751     /// Returns true if the target allows unaligned memory accesses of the
 752     /// specified type. Returns whether it is "fast" in the last argument.
 753     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 754                                         MachineMemOperand::Flags Flags,
 755                                         bool *Fast) const override;
 756
 757     /// Provide custom lowering hooks for some operations.
 758     ///
 759     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 760
 761     /// Places new result values for the node in Results (their number
 762     /// and types must exactly match those of the original return values of
 763     /// the node), or leaves Results empty, which indicates that the node is not
 764     /// to be custom lowered after all.
 765     void LowerOperationWrapper(SDNode *N,
 766                                SmallVectorImpl<SDValue> &Results,
 767                                SelectionDAG &DAG) const override;
 768
 769     /// Replace the results of node with an illegal result
 770     /// type with new values built out of custom code.
 771     ///
 772     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 773                             SelectionDAG &DAG) const override;
 774
 775     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 776
 777     // Return true if it is profitable to combine a BUILD_VECTOR with a
 778     // stride-pattern to a shuffle and a truncate.
 779     // Example of such a combine:
 780     // v4i32 build_vector((extract_elt V, 1),
 781     //                    (extract_elt V, 3),
 782     //                    (extract_elt V, 5),
 783     //                    (extract_elt V, 7))
 784     //  -->
 785     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 786     // v4i64)
 787     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 788         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 789
 790     /// Return true if the target has native support for
 791     /// the specified value type and it is 'desirable' to use the type for the
 792     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 793     /// instruction encodings are longer and some i16 instructions are slow.
 794     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 795
 796     /// Return true if the target has native support for the
 797     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 798     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 799     /// and some i16 instructions are slow.
 800     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 801
 802     /// Return 1 if we can compute the negated form of the specified expression
 803     /// for the same cost as the expression itself, or 2 if we can compute the
 804     /// negated form more cheaply than the expression itself. Else return 0.
 805     char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
 806                             bool ForCodeSize, unsigned Depth) const override;
 807
 808     /// If isNegatibleForFree returns true, return the newly negated expression.
 809     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 810                                  bool LegalOperations, bool ForCodeSize,
 811                                  unsigned Depth) const override;
 812
 813     MachineBasicBlock *
 814     EmitInstrWithCustomInserter(MachineInstr &MI,
 815                                 MachineBasicBlock *MBB) const override;
 816
 817     /// This method returns the name of a target specific DAG node.
 818     const char *getTargetNodeName(unsigned Opcode) const override;
 819
 820     /// Do not merge vector stores after legalization because that may conflict
 821     /// with x86-specific store splitting optimizations.
 822     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 823       return !MemVT.isVector();
 824     }
 825
 826     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 827                           const SelectionDAG &DAG) const override;
 828
 829     bool isCheapToSpeculateCttz() const override;
 830
 831     bool isCheapToSpeculateCtlz() const override;
 832
 833     bool isCtlzFast() const override;
 834
 835     bool hasBitPreservingFPLogic(EVT VT) const override {
 836       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 837     }
 838
 839     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 840       // If the pair to store is a mixture of float and int values, we will
 841       // save two bitwise instructions and one float-to-int instruction and
 842       // increase one store instruction. There is potentially a more
 843       // significant benefit because it avoids the float->int domain switch
 844       // for input value. So It is more likely a win.
 845       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 846           (LTy.isInteger() && HTy.isFloatingPoint()))
 847         return true;
 848       // If the pair only contains int values, we will save two bitwise
 849       // instructions and increase one store instruction (costing one more
 850       // store buffer). Since the benefit is more blurred so we leave
 851       // such pair out until we get testcase to prove it is a win.
 852       return false;
 853     }
 854
 855     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 856
 857     bool hasAndNotCompare(SDValue Y) const override;
 858
 859     bool hasAndNot(SDValue Y) const override;
 860
 861     bool hasBitTest(SDValue X, SDValue Y) const override;
 862
 863     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
 864         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
 865         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
 866         SelectionDAG &DAG) const override;
 867
 868     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
 869                                            CombineLevel Level) const override;
 870
 871     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 872
 873     bool
 874     shouldTransformSignedTruncationCheck(EVT XVT,
 875                                          unsigned KeptBits) const override {
 876       // For vectors, we don't have a preference..
 877       if (XVT.isVector())
 878         return false;
 879
 880       auto VTIsOk = [](EVT VT) -> bool {
 881         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 882                VT == MVT::i64;
 883       };
 884
 885       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 886       // XVT will be larger than KeptBitsVT.
 887       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 888       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 889     }
 890
 891     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
 892
 893     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 894
 895     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 896       return VT.isScalarInteger();
 897     }
 898
 899     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 900     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 901
 902     /// Return the value type to use for ISD::SETCC.
 903     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 904                            EVT VT) const override;
 905
 906     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 907                                       TargetLoweringOpt &TLO) const override;
 908
 909     /// Determine which of the bits specified in Mask are known to be either
 910     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 911     void computeKnownBitsForTargetNode(const SDValue Op,
 912                                        KnownBits &Known,
 913                                        const APInt &DemandedElts,
 914                                        const SelectionDAG &DAG,
 915                                        unsigned Depth = 0) const override;
 916
 917     /// Determine the number of bits in the operation that are sign bits.
 918     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 919                                              const APInt &DemandedElts,
 920                                              const SelectionDAG &DAG,
 921                                              unsigned Depth) const override;
 922
 923     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 924                                                  const APInt &DemandedElts,
 925                                                  APInt &KnownUndef,
 926                                                  APInt &KnownZero,
 927                                                  TargetLoweringOpt &TLO,
 928                                                  unsigned Depth) const override;
 929
 930     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 931                                            const APInt &DemandedBits,
 932                                            const APInt &DemandedElts,
 933                                            KnownBits &Known,
 934                                            TargetLoweringOpt &TLO,
 935                                            unsigned Depth) const override;
 936
 937     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
 938         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
 939         SelectionDAG &DAG, unsigned Depth) const override;
 940
 941     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 942
 943     SDValue unwrapAddress(SDValue N) const override;
 944
 945     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 946
 947     bool ExpandInlineAsm(CallInst *CI) const override;
 948
 949     ConstraintType getConstraintType(StringRef Constraint) const override;
 950
 951     /// Examine constraint string and operand type and determine a weight value.
 952     /// The operand object must already have been set up with the operand type.
 953     ConstraintWeight
 954       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 955                                      const char *constraint) const override;
 956
 957     const char *LowerXConstraint(EVT ConstraintVT) const override;
 958
 959     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 960     /// add anything to Ops. If hasMemory is true it means one of the asm
 961     /// constraint of the inline asm instruction being processed is 'm'.
 962     void LowerAsmOperandForConstraint(SDValue Op,
 963                                       std::string &Constraint,
 964                                       std::vector<SDValue> &Ops,
 965                                       SelectionDAG &DAG) const override;
 966
 967     unsigned
 968     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 969       if (ConstraintCode == "i")
 970         return InlineAsm::Constraint_i;
 971       else if (ConstraintCode == "o")
 972         return InlineAsm::Constraint_o;
 973       else if (ConstraintCode == "v")
 974         return InlineAsm::Constraint_v;
 975       else if (ConstraintCode == "X")
 976         return InlineAsm::Constraint_X;
 977       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 978     }
 979
 980     /// Handle Lowering flag assembly outputs.
 981     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
 982                                         const AsmOperandInfo &Constraint,
 983                                         SelectionDAG &DAG) const override;
 984
 985     /// Given a physical register constraint
 986     /// (e.g. {edx}), return the register number and the register class for the
 987     /// register.  This should only be used for C_Register constraints.  On
 988     /// error, this returns a register number of 0.
 989     std::pair<unsigned, const TargetRegisterClass *>
 990     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 991                                  StringRef Constraint, MVT VT) const override;
 992
 993     /// Return true if the addressing mode represented
 994     /// by AM is legal for this target, for a load/store of the specified type.
 995     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 996                                Type *Ty, unsigned AS,
 997                                Instruction *I = nullptr) const override;
 998
 999     /// Return true if the specified immediate is legal
1000     /// icmp immediate, that is the target has icmp instructions which can
1001     /// compare a register against the immediate without having to materialize
1002     /// the immediate into a register.
1003     bool isLegalICmpImmediate(int64_t Imm) const override;
1004
1005     /// Return true if the specified immediate is legal
1006     /// add immediate, that is the target has add instructions which can
1007     /// add a register and the immediate without having to materialize
1008     /// the immediate into a register.
1009     bool isLegalAddImmediate(int64_t Imm) const override;
1010
1011     bool isLegalStoreImmediate(int64_t Imm) const override;
1012
1013     /// Return the cost of the scaling factor used in the addressing
1014     /// mode represented by AM for this target, for a load/store
1015     /// of the specified type.
1016     /// If the AM is supported, the return value must be >= 0.
1017     /// If the AM is not supported, it returns a negative value.
1018     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1019                              unsigned AS) const override;
1020
1021     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1022
1023     /// Add x86-specific opcodes to the default list.
1024     bool isBinOp(unsigned Opcode) const override;
1025
1026     /// Returns true if the opcode is a commutative binary operation.
1027     bool isCommutativeBinOp(unsigned Opcode) const override;
1028
1029     /// Return true if it's free to truncate a value of
1030     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1031     /// register EAX to i16 by referencing its sub-register AX.
1032     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1033     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1034
1035     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1036
1037     /// Return true if any actual instruction that defines a
1038     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1039     /// register. This does not necessarily include registers defined in
1040     /// unknown ways, such as incoming arguments, or copies from unknown
1041     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1042     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1043     /// all instructions that define 32-bit values implicit zero-extend the
1044     /// result out to 64 bits.
1045     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1046     bool isZExtFree(EVT VT1, EVT VT2) const override;
1047     bool isZExtFree(SDValue Val, EVT VT2) const override;
1048
1049     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1050     /// extend node) is profitable.
1051     bool isVectorLoadExtDesirable(SDValue) const override;
1052
1053     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1054     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1055     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1056     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1057
1058     /// Return true if it's profitable to narrow
1059     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1060     /// from i32 to i8 but not from i32 to i16.
1061     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1062
1063     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1064     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1065     /// true and stores the intrinsic information into the IntrinsicInfo that was
1066     /// passed to the function.
1067     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1068                             MachineFunction &MF,
1069                             unsigned Intrinsic) const override;
1070
1071     /// Returns true if the target can instruction select the
1072     /// specified FP immediate natively. If false, the legalizer will
1073     /// materialize the FP immediate as a load from a constant pool.
1074     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1075                       bool ForCodeSize) const override;
1076
1077     /// Targets can use this to indicate that they only support *some*
1078     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1079     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1080     /// be legal.
1081     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1082
1083     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1084     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1085     /// constant pool entry.
1086     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1087
1088     /// Returns true if lowering to a jump table is allowed.
1089     bool areJTsAllowed(const Function *Fn) const override;
1090
1091     /// If true, then instruction selection should
1092     /// seek to shrink the FP constant of the specified type to a smaller type
1093     /// in order to save space and / or reduce runtime.
1094     bool ShouldShrinkFPConstant(EVT VT) const override {
1095       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1096       // expensive than a straight movsd. On the other hand, it's important to
1097       // shrink long double fp constant since fldt is very slow.
1098       return !X86ScalarSSEf64 || VT == MVT::f80;
1099     }
1100
1101     /// Return true if we believe it is correct and profitable to reduce the
1102     /// load node to a smaller type.
1103     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1104                                EVT NewVT) const override;
1105
1106     /// Return true if the specified scalar FP type is computed in an SSE
1107     /// register, not on the X87 floating point stack.
1108     bool isScalarFPTypeInSSEReg(EVT VT) const {
1109       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1110              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1111     }
1112
1113     /// Returns true if it is beneficial to convert a load of a constant
1114     /// to just the constant itself.
1115     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1116                                            Type *Ty) const override;
1117
1118     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1119
1120     bool convertSelectOfConstantsToMath(EVT VT) const override;
1121
1122     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1123                                 SDValue C) const override;
1124
1125     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1126                                   bool IsSigned) const override;
1127
1128     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1129     /// with this index.
1130     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1131                                  unsigned Index) const override;
1132
1133     /// Scalar ops always have equal or better analysis/performance/power than
1134     /// the vector equivalent, so this always makes sense if the scalar op is
1135     /// supported.
1136     bool shouldScalarizeBinop(SDValue) const override;
1137
1138     /// Extract of a scalar FP value from index 0 of a vector is free.
1139     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1140       EVT EltVT = VT.getScalarType();
1141       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1142     }
1143
1144     /// Overflow nodes should get combined/lowered to optimal instructions
1145     /// (they should allow eliminating explicit compares by getting flags from
1146     /// math ops).
1147     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1148
1149     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1150                                       unsigned AddrSpace) const override {
1151       // If we can replace more than 2 scalar stores, there will be a reduction
1152       // in instructions even after we add a vector constant load.
1153       return NumElem > 2;
1154     }
1155
1156     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1157                                  const SelectionDAG &DAG,
1158                                  const MachineMemOperand &MMO) const override;
1159
1160     /// Intel processors have a unified instruction and data cache
1161     const char * getClearCacheBuiltinName() const override {
1162       return nullptr; // nothing to do, move along.
1163     }
1164
1165     unsigned getRegisterByName(const char* RegName, EVT VT,
1166                                SelectionDAG &DAG) const override;
1167
1168     /// If a physical register, this returns the register that receives the
1169     /// exception address on entry to an EH pad.
1170     unsigned
1171     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1172
1173     /// If a physical register, this returns the register that receives the
1174     /// exception typeid on entry to a landing pad.
1175     unsigned
1176     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1177
1178     virtual bool needsFixedCatchObjects() const override;
1179
1180     /// This method returns a target specific FastISel object,
1181     /// or null if the target does not support "fast" ISel.
1182     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1183                              const TargetLibraryInfo *libInfo) const override;
1184
1185     /// If the target has a standard location for the stack protector cookie,
1186     /// returns the address of that location. Otherwise, returns nullptr.
1187     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1188
1189     bool useLoadStackGuardNode() const override;
1190     bool useStackGuardXorFP() const override;
1191     void insertSSPDeclarations(Module &M) const override;
1192     Value *getSDagStackGuard(const Module &M) const override;
1193     Function *getSSPStackGuardCheck(const Module &M) const override;
1194     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1195                                 const SDLoc &DL) const override;
1196
1197
1198     /// Return true if the target stores SafeStack pointer at a fixed offset in
1199     /// some non-standard address space, and populates the address space and
1200     /// offset as appropriate.
1201     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1202
1203     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1204                       SelectionDAG &DAG) const;
1205
1206     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1207
1208     /// Customize the preferred legalization strategy for certain types.
1209     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1210
1211     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1212                                       EVT VT) const override;
1213
1214     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1215                                            CallingConv::ID CC,
1216                                            EVT VT) const override;
1217
1218     unsigned getVectorTypeBreakdownForCallingConv(
1219         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1220         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1221
1222     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1223
1224     bool supportSwiftError() const override;
1225
1226     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1227
1228     unsigned getStackProbeSize(MachineFunction &MF) const;
1229
1230     bool hasVectorBlend() const override { return true; }
1231
1232     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1233
1234     /// Lower interleaved load(s) into target specific
1235     /// instructions/intrinsics.
1236     bool lowerInterleavedLoad(LoadInst *LI,
1237                               ArrayRef<ShuffleVectorInst *> Shuffles,
1238                               ArrayRef<unsigned> Indices,
1239                               unsigned Factor) const override;
1240
1241     /// Lower interleaved store(s) into target specific
1242     /// instructions/intrinsics.
1243     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1244                                unsigned Factor) const override;
1245
1246     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1247                                    SDValue Addr, SelectionDAG &DAG)
1248                                    const override;
1249
1250   protected:
1251     std::pair<const TargetRegisterClass *, uint8_t>
1252     findRepresentativeClass(const TargetRegisterInfo *TRI,
1253                             MVT VT) const override;
1254
1255   private:
1256     /// Keep a reference to the X86Subtarget around so that we can
1257     /// make the right decision when generating code for different targets.
1258     const X86Subtarget &Subtarget;
1259
1260     /// Select between SSE or x87 floating point ops.
1261     /// When SSE is available, use it for f32 operations.
1262     /// When SSE2 is available, use it for f64 operations.
1263     bool X86ScalarSSEf32;
1264     bool X86ScalarSSEf64;
1265
1266     /// A list of legal FP immediates.
1267     std::vector<APFloat> LegalFPImmediates;
1268
1269     /// Indicate that this x86 target can instruction
1270     /// select the specified FP immediate natively.
1271     void addLegalFPImmediate(const APFloat& Imm) {
1272       LegalFPImmediates.push_back(Imm);
1273     }
1274
1275     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1276                             CallingConv::ID CallConv, bool isVarArg,
1277                             const SmallVectorImpl<ISD::InputArg> &Ins,
1278                             const SDLoc &dl, SelectionDAG &DAG,
1279                             SmallVectorImpl<SDValue> &InVals,
1280                             uint32_t *RegMask) const;
1281     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1282                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1283                              const SDLoc &dl, SelectionDAG &DAG,
1284                              const CCValAssign &VA, MachineFrameInfo &MFI,
1285                              unsigned i) const;
1286     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1287                              const SDLoc &dl, SelectionDAG &DAG,
1288                              const CCValAssign &VA,
1289                              ISD::ArgFlagsTy Flags) const;
1290
1291     // Call lowering helpers.
1292
1293     /// Check whether the call is eligible for tail call optimization. Targets
1294     /// that want to do tail call optimization should implement this function.
1295     bool IsEligibleForTailCallOptimization(SDValue Callee,
1296                                            CallingConv::ID CalleeCC,
1297                                            bool isVarArg,
1298                                            bool isCalleeStructRet,
1299                                            bool isCallerStructRet,
1300                                            Type *RetTy,
1301                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1302                                     const SmallVectorImpl<SDValue> &OutVals,
1303                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1304                                            SelectionDAG& DAG) const;
1305     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1306                                     SDValue Chain, bool IsTailCall,
1307                                     bool Is64Bit, int FPDiff,
1308                                     const SDLoc &dl) const;
1309
1310     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1311                                          SelectionDAG &DAG) const;
1312
1313     unsigned getAddressSpace(void) const;
1314
1315     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1316
1317     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1318     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1319     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1320     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1321
1322     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1323                                   const unsigned char OpFlags = 0) const;
1324     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1325     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1326     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1327     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1328     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1329
1330     /// Creates target global address or external symbol nodes for calls or
1331     /// other uses.
1332     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1333                                   bool ForCall) const;
1334
1335     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1336     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1337     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1338     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1339     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1340     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1341     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1342     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1343     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1344     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1345     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1346     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1347     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1348     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1349     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1350     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1351     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1352     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1353     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1354     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1355     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1356     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1357     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1358     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1359     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1360     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1361     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1362     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1363     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1364
1365     SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
1366                           RTLIB::Libcall Call) const;
1367
1368     SDValue
1369     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1370                          const SmallVectorImpl<ISD::InputArg> &Ins,
1371                          const SDLoc &dl, SelectionDAG &DAG,
1372                          SmallVectorImpl<SDValue> &InVals) const override;
1373     SDValue LowerCall(CallLoweringInfo &CLI,
1374                       SmallVectorImpl<SDValue> &InVals) const override;
1375
1376     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1377                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1378                         const SmallVectorImpl<SDValue> &OutVals,
1379                         const SDLoc &dl, SelectionDAG &DAG) const override;
1380
1381     bool supportSplitCSR(MachineFunction *MF) const override {
1382       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1383           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1384     }
1385     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1386     void insertCopiesSplitCSR(
1387       MachineBasicBlock *Entry,
1388       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1389
1390     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1391
1392     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1393
1394     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1395                             ISD::NodeType ExtendKind) const override;
1396
1397     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1398                         bool isVarArg,
1399                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1400                         LLVMContext &Context) const override;
1401
1402     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1403
1404     TargetLoweringBase::AtomicExpansionKind
1405     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1406     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1407     TargetLoweringBase::AtomicExpansionKind
1408     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1409
1410     LoadInst *
1411     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1412
1413     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1414     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1415
1416     bool needsCmpXchgNb(Type *MemType) const;
1417
1418     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1419                                 MachineBasicBlock *DispatchBB, int FI) const;
1420
1421     // Utility function to emit the low-level va_arg code for X86-64.
1422     MachineBasicBlock *
1423     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1424                                   MachineBasicBlock *MBB) const;
1425
1426     /// Utility function to emit the xmm reg save portion of va_start.
1427     MachineBasicBlock *
1428     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1429                                              MachineBasicBlock *BB) const;
1430
1431     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1432                                                  MachineInstr &MI2,
1433                                                  MachineBasicBlock *BB) const;
1434
1435     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1436                                          MachineBasicBlock *BB) const;
1437
1438     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1439                                            MachineBasicBlock *BB) const;
1440
1441     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1442                                            MachineBasicBlock *BB) const;
1443
1444     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1445                                            MachineBasicBlock *BB) const;
1446
1447     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1448                                             MachineBasicBlock *BB) const;
1449
1450     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1451                                           MachineBasicBlock *BB) const;
1452
1453     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1454                                           MachineBasicBlock *BB) const;
1455
1456     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1457                                             MachineBasicBlock *BB) const;
1458
1459     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1460                                         MachineBasicBlock *MBB) const;
1461
1462     void emitSetJmpShadowStackFix(MachineInstr &MI,
1463                                   MachineBasicBlock *MBB) const;
1464
1465     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1466                                          MachineBasicBlock *MBB) const;
1467
1468     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1469                                                  MachineBasicBlock *MBB) const;
1470
1471     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1472                                      MachineBasicBlock *MBB) const;
1473
1474     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1475                                              MachineBasicBlock *MBB) const;
1476
1477     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1478     /// equivalent, for use with the given x86 condition code.
1479     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1480                     SelectionDAG &DAG) const;
1481
1482     /// Convert a comparison if required by the subtarget.
1483     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1484
1485     /// Emit flags for the given setcc condition and operands. Also returns the
1486     /// corresponding X86 condition code constant in X86CC.
1487     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1488                               ISD::CondCode CC, const SDLoc &dl,
1489                               SelectionDAG &DAG,
1490                               SDValue &X86CC) const;
1491
1492     /// Check if replacement of SQRT with RSQRT should be disabled.
1493     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1494
1495     /// Use rsqrt* to speed up sqrt calculations.
1496     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1497                             int &RefinementSteps, bool &UseOneConstNR,
1498                             bool Reciprocal) const override;
1499
1500     /// Use rcp* to speed up fdiv calculations.
1501     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1502                              int &RefinementSteps) const override;
1503
1504     /// Reassociate floating point divisions into multiply by reciprocal.
1505     unsigned combineRepeatedFPDivisors() const override;
1506
1507     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1508                           SmallVectorImpl<SDNode *> &Created) const override;
1509   };
1510
1511   namespace X86 {
1512     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1513                              const TargetLibraryInfo *libInfo);
1514   } // end namespace X86
1515
1516   // Base class for all X86 non-masked store operations.
1517   class X86StoreSDNode : public MemSDNode {
1518   public:
1519     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1520                    SDVTList VTs, EVT MemVT,
1521                    MachineMemOperand *MMO)
1522       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1523     const SDValue &getValue() const { return getOperand(1); }
1524     const SDValue &getBasePtr() const { return getOperand(2); }
1525
1526     static bool classof(const SDNode *N) {
1527       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1528         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1529     }
1530   };
1531
1532   // Base class for all X86 masked store operations.
1533   // The class has the same order of operands as MaskedStoreSDNode for
1534   // convenience.
1535   class X86MaskedStoreSDNode : public MemSDNode {
1536   public:
1537     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1538                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1539                          MachineMemOperand *MMO)
1540       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1541
1542     const SDValue &getValue()   const { return getOperand(1); }
1543     const SDValue &getBasePtr() const { return getOperand(2); }
1544     const SDValue &getMask()    const { return getOperand(3); }
1545
1546     static bool classof(const SDNode *N) {
1547       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1548         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1549     }
1550   };
1551
1552   // X86 Truncating Store with Signed saturation.
1553   class TruncSStoreSDNode : public X86StoreSDNode {
1554   public:
1555     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1556                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1557       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1558
1559     static bool classof(const SDNode *N) {
1560       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1561     }
1562   };
1563
1564   // X86 Truncating Store with Unsigned saturation.
1565   class TruncUSStoreSDNode : public X86StoreSDNode {
1566   public:
1567     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1568                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1569       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1570
1571     static bool classof(const SDNode *N) {
1572       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1573     }
1574   };
1575
1576   // X86 Truncating Masked Store with Signed saturation.
1577   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1578   public:
1579     MaskedTruncSStoreSDNode(unsigned Order,
1580                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1581                          MachineMemOperand *MMO)
1582       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1583
1584     static bool classof(const SDNode *N) {
1585       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1586     }
1587   };
1588
1589   // X86 Truncating Masked Store with Unsigned saturation.
1590   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1591   public:
1592     MaskedTruncUSStoreSDNode(unsigned Order,
1593                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1594                             MachineMemOperand *MMO)
1595       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1596
1597     static bool classof(const SDNode *N) {
1598       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1599     }
1600   };
1601
1602   // X86 specific Gather/Scatter nodes.
1603   // The class has the same order of operands as MaskedGatherScatterSDNode for
1604   // convenience.
1605   class X86MaskedGatherScatterSDNode : public MemSDNode {
1606   public:
1607     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1608                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1609                                  MachineMemOperand *MMO)
1610         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1611
1612     const SDValue &getBasePtr() const { return getOperand(3); }
1613     const SDValue &getIndex()   const { return getOperand(4); }
1614     const SDValue &getMask()    const { return getOperand(2); }
1615     const SDValue &getScale()   const { return getOperand(5); }
1616
1617     static bool classof(const SDNode *N) {
1618       return N->getOpcode() == X86ISD::MGATHER ||
1619              N->getOpcode() == X86ISD::MSCATTER;
1620     }
1621   };
1622
1623   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1624   public:
1625     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1626                           EVT MemVT, MachineMemOperand *MMO)
1627         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1628                                        MMO) {}
1629
1630     const SDValue &getPassThru() const { return getOperand(1); }
1631
1632     static bool classof(const SDNode *N) {
1633       return N->getOpcode() == X86ISD::MGATHER;
1634     }
1635   };
1636
1637   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1638   public:
1639     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1640                            EVT MemVT, MachineMemOperand *MMO)
1641         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1642                                        MMO) {}
1643
1644     const SDValue &getValue() const { return getOperand(1); }
1645
1646     static bool classof(const SDNode *N) {
1647       return N->getOpcode() == X86ISD::MSCATTER;
1648     }
1649   };
1650
1651   /// Generate unpacklo/unpackhi shuffle mask.
1652   template <typename T = int>
1653   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1654                                bool Unary) {
1655     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1656     int NumElts = VT.getVectorNumElements();
1657     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1658     for (int i = 0; i < NumElts; ++i) {
1659       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1660       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1661       Pos += (Unary ? 0 : NumElts * (i % 2));
1662       Pos += (Lo ? 0 : NumEltsInLane / 2);
1663       Mask.push_back(Pos);
1664     }
1665   }
1666
1667   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1668   /// mask index with the scaled sequential indices for an equivalent narrowed
1669   /// mask. This is the reverse process to canWidenShuffleElements, but can
1670   /// always succeed.
1671   template <typename T>
1672   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1673                         SmallVectorImpl<T> &ScaledMask) {
1674     assert(0 < Scale && "Unexpected scaling factor");
1675     size_t NumElts = Mask.size();
1676     ScaledMask.assign(NumElts * Scale, -1);
1677
1678     for (int i = 0; i != (int)NumElts; ++i) {
1679       int M = Mask[i];
1680
1681       // Repeat sentinel values in every mask element.
1682       if (M < 0) {
1683         for (int s = 0; s != Scale; ++s)
1684           ScaledMask[(Scale * i) + s] = M;
1685         continue;
1686       }
1687
1688       // Scale mask element and increment across each mask element.
1689       for (int s = 0; s != Scale; ++s)
1690         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1691     }
1692   }
1693 } // end namespace llvm
1694
1695 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H