lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20 #include "llvm/Target/TargetOptions.h"
  21
  22 namespace llvm {
  23   class X86Subtarget;
  24   class X86TargetMachine;
  25
  26   namespace X86ISD {
  27     // X86 Specific DAG Nodes
  28     enum NodeType : unsigned {
  29       // Start the numbering where the builtin ops leave off.
  30       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  31
  32       /// Bit scan forward.
  33       BSF,
  34       /// Bit scan reverse.
  35       BSR,
  36
  37       /// Double shift instructions. These correspond to
  38       /// X86::SHLDxx and X86::SHRDxx instructions.
  39       SHLD,
  40       SHRD,
  41
  42       /// Bitwise logical AND of floating point values. This corresponds
  43       /// to X86::ANDPS or X86::ANDPD.
  44       FAND,
  45
  46       /// Bitwise logical OR of floating point values. This corresponds
  47       /// to X86::ORPS or X86::ORPD.
  48       FOR,
  49
  50       /// Bitwise logical XOR of floating point values. This corresponds
  51       /// to X86::XORPS or X86::XORPD.
  52       FXOR,
  53
  54       ///  Bitwise logical ANDNOT of floating point values. This
  55       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  56       FANDN,
  57
  58       /// These operations represent an abstract X86 call
  59       /// instruction, which includes a bunch of information.  In particular the
  60       /// operands of these node are:
  61       ///
  62       ///     #0 - The incoming token chain
  63       ///     #1 - The callee
  64       ///     #2 - The number of arg bytes the caller pushes on the stack.
  65       ///     #3 - The number of arg bytes the callee pops off the stack.
  66       ///     #4 - The value to pass in AL/AX/EAX (optional)
  67       ///     #5 - The value to pass in DL/DX/EDX (optional)
  68       ///
  69       /// The result values of these nodes are:
  70       ///
  71       ///     #0 - The outgoing token chain
  72       ///     #1 - The first register result value (optional)
  73       ///     #2 - The second register result value (optional)
  74       ///
  75       CALL,
  76
  77       /// Same as call except it adds the NoTrack prefix.
  78       NT_CALL,
  79
  80       /// X86 compare and logical compare instructions.
  81       CMP, COMI, UCOMI,
  82
  83       /// X86 bit-test instructions.
  84       BT,
  85
  86       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  87       /// operand, usually produced by a CMP instruction.
  88       SETCC,
  89
  90       /// X86 Select
  91       SELECTS,
  92
  93       // Same as SETCC except it's materialized with a sbb and the value is all
  94       // one's or all zero's.
  95       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  96
  97       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
  98       /// Operands are two FP values to compare; result is a mask of
  99       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 100       FSETCC,
 101
 102       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 103       /// and a version with SAE.
 104       FSETCCM, FSETCCM_SAE,
 105
 106       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 107       /// to select from. Operand 2 is the condition code, and operand 3 is the
 108       /// flag operand produced by a CMP or TEST instruction.
 109       CMOV,
 110
 111       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 112       /// is the block to branch if condition is true, operand 2 is the
 113       /// condition code, and operand 3 is the flag operand produced by a CMP
 114       /// or TEST instruction.
 115       BRCOND,
 116
 117       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 118       /// operand 1 is the target address.
 119       NT_BRIND,
 120
 121       /// Return with a flag operand. Operand 0 is the chain operand, operand
 122       /// 1 is the number of bytes of stack to pop.
 123       RET_FLAG,
 124
 125       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 126       IRET,
 127
 128       /// Repeat fill, corresponds to X86::REP_STOSx.
 129       REP_STOS,
 130
 131       /// Repeat move, corresponds to X86::REP_MOVSx.
 132       REP_MOVS,
 133
 134       /// On Darwin, this node represents the result of the popl
 135       /// at function entry, used for PIC code.
 136       GlobalBaseReg,
 137
 138       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 139       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 140       /// MCSymbol and TargetBlockAddress.
 141       Wrapper,
 142
 143       /// Special wrapper used under X86-64 PIC mode for RIP
 144       /// relative displacements.
 145       WrapperRIP,
 146
 147       /// Copies a 64-bit value from the low word of an XMM vector
 148       /// to an MMX vector.
 149       MOVDQ2Q,
 150
 151       /// Copies a 32-bit value from the low word of a MMX
 152       /// vector to a GPR.
 153       MMX_MOVD2W,
 154
 155       /// Copies a GPR into the low 32-bit word of a MMX vector
 156       /// and zero out the high word.
 157       MMX_MOVW2D,
 158
 159       /// Extract an 8-bit value from a vector and zero extend it to
 160       /// i32, corresponds to X86::PEXTRB.
 161       PEXTRB,
 162
 163       /// Extract a 16-bit value from a vector and zero extend it to
 164       /// i32, corresponds to X86::PEXTRW.
 165       PEXTRW,
 166
 167       /// Insert any element of a 4 x float vector into any element
 168       /// of a destination 4 x floatvector.
 169       INSERTPS,
 170
 171       /// Insert the lower 8-bits of a 32-bit value to a vector,
 172       /// corresponds to X86::PINSRB.
 173       PINSRB,
 174
 175       /// Insert the lower 16-bits of a 32-bit value to a vector,
 176       /// corresponds to X86::PINSRW.
 177       PINSRW,
 178
 179       /// Shuffle 16 8-bit values within a vector.
 180       PSHUFB,
 181
 182       /// Compute Sum of Absolute Differences.
 183       PSADBW,
 184       /// Compute Double Block Packed Sum-Absolute-Differences
 185       DBPSADBW,
 186
 187       /// Bitwise Logical AND NOT of Packed FP values.
 188       ANDNP,
 189
 190       /// Blend where the selector is an immediate.
 191       BLENDI,
 192
 193       /// Dynamic (non-constant condition) vector blend where only the sign bits
 194       /// of the condition elements are used. This is used to enforce that the
 195       /// condition mask is not valid for generic VSELECT optimizations. This
 196       /// is also used to implement the intrinsics.
 197       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 198       BLENDV,
 199
 200       /// Combined add and sub on an FP vector.
 201       ADDSUB,
 202
 203       //  FP vector ops with rounding mode.
 204       FADD_RND, FADDS, FADDS_RND,
 205       FSUB_RND, FSUBS, FSUBS_RND,
 206       FMUL_RND, FMULS, FMULS_RND,
 207       FDIV_RND, FDIVS, FDIVS_RND,
 208       FMAX_SAE, FMAXS_SAE,
 209       FMIN_SAE, FMINS_SAE,
 210       FSQRT_RND, FSQRTS, FSQRTS_RND,
 211
 212       // FP vector get exponent.
 213       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
 214       // Extract Normalized Mantissas.
 215       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
 216       // FP Scale.
 217       SCALEF, SCALEF_RND,
 218       SCALEFS, SCALEFS_RND,
 219
 220       // Unsigned Integer average.
 221       AVG,
 222
 223       /// Integer horizontal add/sub.
 224       HADD,
 225       HSUB,
 226
 227       /// Floating point horizontal add/sub.
 228       FHADD,
 229       FHSUB,
 230
 231       // Detect Conflicts Within a Vector
 232       CONFLICT,
 233
 234       /// Floating point max and min.
 235       FMAX, FMIN,
 236
 237       /// Commutative FMIN and FMAX.
 238       FMAXC, FMINC,
 239
 240       /// Scalar intrinsic floating point max and min.
 241       FMAXS, FMINS,
 242
 243       /// Floating point reciprocal-sqrt and reciprocal approximation.
 244       /// Note that these typically require refinement
 245       /// in order to obtain suitable precision.
 246       FRSQRT, FRCP,
 247
 248       // AVX-512 reciprocal approximations with a little more precision.
 249       RSQRT14, RSQRT14S, RCP14, RCP14S,
 250
 251       // Thread Local Storage.
 252       TLSADDR,
 253
 254       // Thread Local Storage. A call to get the start address
 255       // of the TLS block for the current module.
 256       TLSBASEADDR,
 257
 258       // Thread Local Storage.  When calling to an OS provided
 259       // thunk at the address from an earlier relocation.
 260       TLSCALL,
 261
 262       // Exception Handling helpers.
 263       EH_RETURN,
 264
 265       // SjLj exception handling setjmp.
 266       EH_SJLJ_SETJMP,
 267
 268       // SjLj exception handling longjmp.
 269       EH_SJLJ_LONGJMP,
 270
 271       // SjLj exception handling dispatch.
 272       EH_SJLJ_SETUP_DISPATCH,
 273
 274       /// Tail call return. See X86TargetLowering::LowerCall for
 275       /// the list of operands.
 276       TC_RETURN,
 277
 278       // Vector move to low scalar and zero higher vector elements.
 279       VZEXT_MOVL,
 280
 281       // Vector integer truncate.
 282       VTRUNC,
 283       // Vector integer truncate with unsigned/signed saturation.
 284       VTRUNCUS, VTRUNCS,
 285
 286       // Masked version of the above. Used when less than a 128-bit result is
 287       // produced since the mask only applies to the lower elements and can't
 288       // be represented by a select.
 289       // SRC, PASSTHRU, MASK
 290       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 291
 292       // Vector FP extend.
 293       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 294
 295       // Vector FP round.
 296       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 297
 298       // Masked version of above. Used for v2f64->v4f32.
 299       // SRC, PASSTHRU, MASK
 300       VMFPROUND,
 301
 302       // 128-bit vector logical left / right shift
 303       VSHLDQ, VSRLDQ,
 304
 305       // Vector shift elements
 306       VSHL, VSRL, VSRA,
 307
 308       // Vector variable shift
 309       VSHLV, VSRLV, VSRAV,
 310
 311       // Vector shift elements by immediate
 312       VSHLI, VSRLI, VSRAI,
 313
 314       // Shifts of mask registers.
 315       KSHIFTL, KSHIFTR,
 316
 317       // Bit rotate by immediate
 318       VROTLI, VROTRI,
 319
 320       // Vector packed double/float comparison.
 321       CMPP,
 322
 323       // Vector integer comparisons.
 324       PCMPEQ, PCMPGT,
 325
 326       // v8i16 Horizontal minimum and position.
 327       PHMINPOS,
 328
 329       MULTISHIFT,
 330
 331       /// Vector comparison generating mask bits for fp and
 332       /// integer signed and unsigned data types.
 333       CMPM,
 334       // Vector comparison with SAE for FP values
 335       CMPM_SAE,
 336
 337       // Arithmetic operations with FLAGS results.
 338       ADD, SUB, ADC, SBB, SMUL, UMUL,
 339       OR, XOR, AND,
 340
 341       // Bit field extract.
 342       BEXTR,
 343
 344       // Zero High Bits Starting with Specified Bit Position.
 345       BZHI,
 346
 347       // X86-specific multiply by immediate.
 348       MUL_IMM,
 349
 350       // Vector sign bit extraction.
 351       MOVMSK,
 352
 353       // Vector bitwise comparisons.
 354       PTEST,
 355
 356       // Vector packed fp sign bitwise comparisons.
 357       TESTP,
 358
 359       // OR/AND test for masks.
 360       KORTEST,
 361       KTEST,
 362
 363       // ADD for masks.
 364       KADD,
 365
 366       // Several flavors of instructions with vector shuffle behaviors.
 367       // Saturated signed/unnsigned packing.
 368       PACKSS,
 369       PACKUS,
 370       // Intra-lane alignr.
 371       PALIGNR,
 372       // AVX512 inter-lane alignr.
 373       VALIGN,
 374       PSHUFD,
 375       PSHUFHW,
 376       PSHUFLW,
 377       SHUFP,
 378       // VBMI2 Concat & Shift.
 379       VSHLD,
 380       VSHRD,
 381       VSHLDV,
 382       VSHRDV,
 383       //Shuffle Packed Values at 128-bit granularity.
 384       SHUF128,
 385       MOVDDUP,
 386       MOVSHDUP,
 387       MOVSLDUP,
 388       MOVLHPS,
 389       MOVHLPS,
 390       MOVSD,
 391       MOVSS,
 392       UNPCKL,
 393       UNPCKH,
 394       VPERMILPV,
 395       VPERMILPI,
 396       VPERMI,
 397       VPERM2X128,
 398
 399       // Variable Permute (VPERM).
 400       // Res = VPERMV MaskV, V0
 401       VPERMV,
 402
 403       // 3-op Variable Permute (VPERMT2).
 404       // Res = VPERMV3 V0, MaskV, V1
 405       VPERMV3,
 406
 407       // Bitwise ternary logic.
 408       VPTERNLOG,
 409       // Fix Up Special Packed Float32/64 values.
 410       VFIXUPIMM, VFIXUPIMM_SAE,
 411       VFIXUPIMMS, VFIXUPIMMS_SAE,
 412       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 413       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
 414       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 415       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
 416       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 417       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 418       // scaling part of the immediate.
 419       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
 420       // Tests Types Of a FP Values for packed types.
 421       VFPCLASS,
 422       // Tests Types Of a FP Values for scalar types.
 423       VFPCLASSS,
 424
 425       // Broadcast scalar to vector.
 426       VBROADCAST,
 427       // Broadcast mask to vector.
 428       VBROADCASTM,
 429       // Broadcast subvector to vector.
 430       SUBV_BROADCAST,
 431
 432       /// SSE4A Extraction and Insertion.
 433       EXTRQI, INSERTQI,
 434
 435       // XOP arithmetic/logical shifts.
 436       VPSHA, VPSHL,
 437       // XOP signed/unsigned integer comparisons.
 438       VPCOM, VPCOMU,
 439       // XOP packed permute bytes.
 440       VPPERM,
 441       // XOP two source permutation.
 442       VPERMIL2,
 443
 444       // Vector multiply packed unsigned doubleword integers.
 445       PMULUDQ,
 446       // Vector multiply packed signed doubleword integers.
 447       PMULDQ,
 448       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 449       MULHRS,
 450
 451       // Multiply and Add Packed Integers.
 452       VPMADDUBSW, VPMADDWD,
 453
 454       // AVX512IFMA multiply and add.
 455       // NOTE: These are different than the instruction and perform
 456       // op0 x op1 + op2.
 457       VPMADD52L, VPMADD52H,
 458
 459       // VNNI
 460       VPDPBUSD,
 461       VPDPBUSDS,
 462       VPDPWSSD,
 463       VPDPWSSDS,
 464
 465       // FMA nodes.
 466       // We use the target independent ISD::FMA for the non-inverted case.
 467       FNMADD,
 468       FMSUB,
 469       FNMSUB,
 470       FMADDSUB,
 471       FMSUBADD,
 472
 473       // FMA with rounding mode.
 474       FMADD_RND,
 475       FNMADD_RND,
 476       FMSUB_RND,
 477       FNMSUB_RND,
 478       FMADDSUB_RND,
 479       FMSUBADD_RND,
 480
 481       // Compress and expand.
 482       COMPRESS,
 483       EXPAND,
 484
 485       // Bits shuffle
 486       VPSHUFBITQMB,
 487
 488       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 489       SINT_TO_FP_RND, UINT_TO_FP_RND,
 490       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
 491       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 492
 493       // Vector float/double to signed/unsigned integer.
 494       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 495       // Scalar float/double to signed/unsigned integer.
 496       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 497
 498       // Vector float/double to signed/unsigned integer with truncation.
 499       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
 500       // Scalar float/double to signed/unsigned integer with truncation.
 501       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 502
 503       // Vector signed/unsigned integer to float/double.
 504       CVTSI2P, CVTUI2P,
 505
 506       // Masked versions of above. Used for v2f64->v4f32.
 507       // SRC, PASSTHRU, MASK
 508       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 509       MCVTSI2P, MCVTUI2P,
 510
 511       // Vector float to bfloat16.
 512       // Convert TWO packed single data to one packed BF16 data
 513       CVTNE2PS2BF16,
 514       // Convert packed single data to packed BF16 data
 515       CVTNEPS2BF16,
 516       // Masked version of above.
 517       // SRC, PASSTHRU, MASK
 518       MCVTNEPS2BF16,
 519
 520       // Dot product of BF16 pairs to accumulated into
 521       // packed single precision.
 522       DPBF16PS,
 523
 524       // Save xmm argument registers to the stack, according to %al. An operator
 525       // is needed so that this can be expanded with control flow.
 526       VASTART_SAVE_XMM_REGS,
 527
 528       // Windows's _chkstk call to do stack probing.
 529       WIN_ALLOCA,
 530
 531       // For allocating variable amounts of stack space when using
 532       // segmented stacks. Check if the current stacklet has enough space, and
 533       // falls back to heap allocation if not.
 534       SEG_ALLOCA,
 535
 536       // Memory barriers.
 537       MEMBARRIER,
 538       MFENCE,
 539
 540       // Store FP status word into i16 register.
 541       FNSTSW16r,
 542
 543       // Store contents of %ah into %eflags.
 544       SAHF,
 545
 546       // Get a random integer and indicate whether it is valid in CF.
 547       RDRAND,
 548
 549       // Get a NIST SP800-90B & C compliant random integer and
 550       // indicate whether it is valid in CF.
 551       RDSEED,
 552
 553       // Protection keys
 554       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 555       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 556       // value for ECX.
 557       RDPKRU, WRPKRU,
 558
 559       // SSE42 string comparisons.
 560       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 561       // will emit one or two instructions based on which results are used. If
 562       // flags and index/mask this allows us to use a single instruction since
 563       // we won't have to pick and opcode for flags. Instead we can rely on the
 564       // DAG to CSE everything and decide at isel.
 565       PCMPISTR,
 566       PCMPESTR,
 567
 568       // Test if in transactional execution.
 569       XTEST,
 570
 571       // ERI instructions.
 572       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
 573       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 574
 575       // Conversions between float and half-float.
 576       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 577
 578       // Masked version of above.
 579       // SRC, RND, PASSTHRU, MASK
 580       MCVTPS2PH,
 581
 582       // Galois Field Arithmetic Instructions
 583       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 584
 585       // LWP insert record.
 586       LWPINS,
 587
 588       // User level wait
 589       UMWAIT, TPAUSE,
 590
 591       // Enqueue Stores Instructions
 592       ENQCMD, ENQCMDS,
 593
 594       // For avx512-vp2intersect
 595       VP2INTERSECT,
 596
 597       // Compare and swap.
 598       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 599       LCMPXCHG8_DAG,
 600       LCMPXCHG16_DAG,
 601       LCMPXCHG8_SAVE_EBX_DAG,
 602       LCMPXCHG16_SAVE_RBX_DAG,
 603
 604       /// LOCK-prefixed arithmetic read-modify-write instructions.
 605       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 606       LADD, LSUB, LOR, LXOR, LAND,
 607
 608       // Load, scalar_to_vector, and zero extend.
 609       VZEXT_LOAD,
 610
 611       // extract_vector_elt, store.
 612       VEXTRACT_STORE,
 613
 614       // Store FP control world into i16 memory.
 615       FNSTCW16m,
 616
 617       /// This instruction implements FP_TO_SINT with the
 618       /// integer destination in memory and a FP reg source.  This corresponds
 619       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 620       /// has two inputs (token chain and address) and two outputs (int value
 621       /// and token chain). Memory VT specifies the type to store to.
 622       FP_TO_INT_IN_MEM,
 623
 624       /// This instruction implements SINT_TO_FP with the
 625       /// integer source in memory and FP reg result.  This corresponds to the
 626       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 627       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 628       /// flag). The integer source type is specified by the memory VT.
 629       FILD,
 630       FILD_FLAG,
 631
 632       /// This instruction implements a fp->int store from FP stack
 633       /// slots. This corresponds to the fist instruction. It takes a
 634       /// chain operand, value to store, address, and glue. The memory VT
 635       /// specifies the type to store as.
 636       FIST,
 637
 638       /// This instruction implements an extending load to FP stack slots.
 639       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 640       /// operand, and ptr to load from. The memory VT specifies the type to
 641       /// load from.
 642       FLD,
 643
 644       /// This instruction implements a truncating store from FP stack
 645       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 646       /// chain operand, value to store, address, and glue. The memory VT
 647       /// specifies the type to store as.
 648       FST,
 649
 650       /// This instruction grabs the address of the next argument
 651       /// from a va_list. (reads and modifies the va_list in memory)
 652       VAARG_64,
 653
 654       // Vector truncating store with unsigned/signed saturation
 655       VTRUNCSTOREUS, VTRUNCSTORES,
 656       // Vector truncating masked store with unsigned/signed saturation
 657       VMTRUNCSTOREUS, VMTRUNCSTORES,
 658
 659       // X86 specific gather and scatter
 660       MGATHER, MSCATTER,
 661
 662       // WARNING: Do not add anything in the end unless you want the node to
 663       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 664       // opcodes will be thought as target memory ops!
 665     };
 666   } // end namespace X86ISD
 667
 668   /// Define some predicates that are used for node matching.
 669   namespace X86 {
 670     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 671     bool isZeroNode(SDValue Elt);
 672
 673     /// Returns true of the given offset can be
 674     /// fit into displacement field of the instruction.
 675     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 676                                       bool hasSymbolicDisplacement = true);
 677
 678     /// Determines whether the callee is required to pop its
 679     /// own arguments. Callee pop is necessary to support tail calls.
 680     bool isCalleePop(CallingConv::ID CallingConv,
 681                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 682
 683   } // end namespace X86
 684
 685   //===--------------------------------------------------------------------===//
 686   //  X86 Implementation of the TargetLowering interface
 687   class X86TargetLowering final : public TargetLowering {
 688   public:
 689     explicit X86TargetLowering(const X86TargetMachine &TM,
 690                                const X86Subtarget &STI);
 691
 692     unsigned getJumpTableEncoding() const override;
 693     bool useSoftFloat() const override;
 694
 695     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 696                                ArgListTy &Args) const override;
 697
 698     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 699       return MVT::i8;
 700     }
 701
 702     const MCExpr *
 703     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 704                               const MachineBasicBlock *MBB, unsigned uid,
 705                               MCContext &Ctx) const override;
 706
 707     /// Returns relocation base for the given PIC jumptable.
 708     SDValue getPICJumpTableRelocBase(SDValue Table,
 709                                      SelectionDAG &DAG) const override;
 710     const MCExpr *
 711     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 712                                  unsigned JTI, MCContext &Ctx) const override;
 713
 714     /// Return the desired alignment for ByVal aggregate
 715     /// function arguments in the caller parameter area. For X86, aggregates
 716     /// that contains are placed at 16-byte boundaries while the rest are at
 717     /// 4-byte boundaries.
 718     unsigned getByValTypeAlignment(Type *Ty,
 719                                    const DataLayout &DL) const override;
 720
 721     /// Returns the target specific optimal type for load
 722     /// and store operations as a result of memset, memcpy, and memmove
 723     /// lowering. If DstAlign is zero that means it's safe to destination
 724     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 725     /// means there isn't a need to check it against alignment requirement,
 726     /// probably because the source does not need to be loaded. If 'IsMemset' is
 727     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 728     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 729     /// source is constant so it does not need to be loaded.
 730     /// It returns EVT::Other if the type should be determined using generic
 731     /// target-independent logic.
 732     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 733                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 734                             const AttributeList &FuncAttributes) const override;
 735
 736     /// Returns true if it's safe to use load / store of the
 737     /// specified type to expand memcpy / memset inline. This is mostly true
 738     /// for all types except for some special cases. For example, on X86
 739     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 740     /// also does type conversion. Note the specified type doesn't have to be
 741     /// legal as the hook is used before type legalization.
 742     bool isSafeMemOpType(MVT VT) const override;
 743
 744     /// Returns true if the target allows unaligned memory accesses of the
 745     /// specified type. Returns whether it is "fast" in the last argument.
 746     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 747                                         MachineMemOperand::Flags Flags,
 748                                         bool *Fast) const override;
 749
 750     /// Provide custom lowering hooks for some operations.
 751     ///
 752     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 753
 754     /// Places new result values for the node in Results (their number
 755     /// and types must exactly match those of the original return values of
 756     /// the node), or leaves Results empty, which indicates that the node is not
 757     /// to be custom lowered after all.
 758     void LowerOperationWrapper(SDNode *N,
 759                                SmallVectorImpl<SDValue> &Results,
 760                                SelectionDAG &DAG) const override;
 761
 762     /// Replace the results of node with an illegal result
 763     /// type with new values built out of custom code.
 764     ///
 765     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 766                             SelectionDAG &DAG) const override;
 767
 768     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 769
 770     // Return true if it is profitable to combine a BUILD_VECTOR with a
 771     // stride-pattern to a shuffle and a truncate.
 772     // Example of such a combine:
 773     // v4i32 build_vector((extract_elt V, 1),
 774     //                    (extract_elt V, 3),
 775     //                    (extract_elt V, 5),
 776     //                    (extract_elt V, 7))
 777     //  -->
 778     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 779     // v4i64)
 780     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 781         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 782
 783     /// Return true if the target has native support for
 784     /// the specified value type and it is 'desirable' to use the type for the
 785     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 786     /// instruction encodings are longer and some i16 instructions are slow.
 787     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 788
 789     /// Return true if the target has native support for the
 790     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 791     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 792     /// and some i16 instructions are slow.
 793     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 794
 795     MachineBasicBlock *
 796     EmitInstrWithCustomInserter(MachineInstr &MI,
 797                                 MachineBasicBlock *MBB) const override;
 798
 799     /// This method returns the name of a target specific DAG node.
 800     const char *getTargetNodeName(unsigned Opcode) const override;
 801
 802     /// Do not merge vector stores after legalization because that may conflict
 803     /// with x86-specific store splitting optimizations.
 804     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 805       return !MemVT.isVector();
 806     }
 807
 808     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 809                           const SelectionDAG &DAG) const override;
 810
 811     bool isCheapToSpeculateCttz() const override;
 812
 813     bool isCheapToSpeculateCtlz() const override;
 814
 815     bool isCtlzFast() const override;
 816
 817     bool hasBitPreservingFPLogic(EVT VT) const override {
 818       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 819     }
 820
 821     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 822       // If the pair to store is a mixture of float and int values, we will
 823       // save two bitwise instructions and one float-to-int instruction and
 824       // increase one store instruction. There is potentially a more
 825       // significant benefit because it avoids the float->int domain switch
 826       // for input value. So It is more likely a win.
 827       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 828           (LTy.isInteger() && HTy.isFloatingPoint()))
 829         return true;
 830       // If the pair only contains int values, we will save two bitwise
 831       // instructions and increase one store instruction (costing one more
 832       // store buffer). Since the benefit is more blurred so we leave
 833       // such pair out until we get testcase to prove it is a win.
 834       return false;
 835     }
 836
 837     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 838
 839     bool hasAndNotCompare(SDValue Y) const override;
 840
 841     bool hasAndNot(SDValue Y) const override;
 842
 843     bool hasBitTest(SDValue X, SDValue Y) const override;
 844
 845     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
 846         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
 847         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
 848         SelectionDAG &DAG) const override;
 849
 850     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
 851                                            CombineLevel Level) const override;
 852
 853     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 854
 855     bool
 856     shouldTransformSignedTruncationCheck(EVT XVT,
 857                                          unsigned KeptBits) const override {
 858       // For vectors, we don't have a preference..
 859       if (XVT.isVector())
 860         return false;
 861
 862       auto VTIsOk = [](EVT VT) -> bool {
 863         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 864                VT == MVT::i64;
 865       };
 866
 867       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 868       // XVT will be larger than KeptBitsVT.
 869       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 870       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 871     }
 872
 873     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
 874       if (DAG.getMachineFunction().getFunction().hasMinSize())
 875         return false;
 876       return true;
 877     }
 878
 879     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 880
 881     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 882       return VT.isScalarInteger();
 883     }
 884
 885     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 886     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 887
 888     /// Return the value type to use for ISD::SETCC.
 889     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 890                            EVT VT) const override;
 891
 892     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 893                                       TargetLoweringOpt &TLO) const override;
 894
 895     /// Determine which of the bits specified in Mask are known to be either
 896     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 897     void computeKnownBitsForTargetNode(const SDValue Op,
 898                                        KnownBits &Known,
 899                                        const APInt &DemandedElts,
 900                                        const SelectionDAG &DAG,
 901                                        unsigned Depth = 0) const override;
 902
 903     /// Determine the number of bits in the operation that are sign bits.
 904     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 905                                              const APInt &DemandedElts,
 906                                              const SelectionDAG &DAG,
 907                                              unsigned Depth) const override;
 908
 909     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 910                                                  const APInt &DemandedElts,
 911                                                  APInt &KnownUndef,
 912                                                  APInt &KnownZero,
 913                                                  TargetLoweringOpt &TLO,
 914                                                  unsigned Depth) const override;
 915
 916     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 917                                            const APInt &DemandedBits,
 918                                            const APInt &DemandedElts,
 919                                            KnownBits &Known,
 920                                            TargetLoweringOpt &TLO,
 921                                            unsigned Depth) const override;
 922
 923     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
 924         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
 925         SelectionDAG &DAG, unsigned Depth) const override;
 926
 927     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 928
 929     SDValue unwrapAddress(SDValue N) const override;
 930
 931     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 932
 933     bool ExpandInlineAsm(CallInst *CI) const override;
 934
 935     ConstraintType getConstraintType(StringRef Constraint) const override;
 936
 937     /// Examine constraint string and operand type and determine a weight value.
 938     /// The operand object must already have been set up with the operand type.
 939     ConstraintWeight
 940       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 941                                      const char *constraint) const override;
 942
 943     const char *LowerXConstraint(EVT ConstraintVT) const override;
 944
 945     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 946     /// add anything to Ops. If hasMemory is true it means one of the asm
 947     /// constraint of the inline asm instruction being processed is 'm'.
 948     void LowerAsmOperandForConstraint(SDValue Op,
 949                                       std::string &Constraint,
 950                                       std::vector<SDValue> &Ops,
 951                                       SelectionDAG &DAG) const override;
 952
 953     unsigned
 954     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 955       if (ConstraintCode == "i")
 956         return InlineAsm::Constraint_i;
 957       else if (ConstraintCode == "o")
 958         return InlineAsm::Constraint_o;
 959       else if (ConstraintCode == "v")
 960         return InlineAsm::Constraint_v;
 961       else if (ConstraintCode == "X")
 962         return InlineAsm::Constraint_X;
 963       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 964     }
 965
 966     /// Handle Lowering flag assembly outputs.
 967     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
 968                                         const AsmOperandInfo &Constraint,
 969                                         SelectionDAG &DAG) const override;
 970
 971     /// Given a physical register constraint
 972     /// (e.g. {edx}), return the register number and the register class for the
 973     /// register.  This should only be used for C_Register constraints.  On
 974     /// error, this returns a register number of 0.
 975     std::pair<unsigned, const TargetRegisterClass *>
 976     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 977                                  StringRef Constraint, MVT VT) const override;
 978
 979     /// Return true if the addressing mode represented
 980     /// by AM is legal for this target, for a load/store of the specified type.
 981     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 982                                Type *Ty, unsigned AS,
 983                                Instruction *I = nullptr) const override;
 984
 985     /// Return true if the specified immediate is legal
 986     /// icmp immediate, that is the target has icmp instructions which can
 987     /// compare a register against the immediate without having to materialize
 988     /// the immediate into a register.
 989     bool isLegalICmpImmediate(int64_t Imm) const override;
 990
 991     /// Return true if the specified immediate is legal
 992     /// add immediate, that is the target has add instructions which can
 993     /// add a register and the immediate without having to materialize
 994     /// the immediate into a register.
 995     bool isLegalAddImmediate(int64_t Imm) const override;
 996
 997     bool isLegalStoreImmediate(int64_t Imm) const override;
 998
 999     /// Return the cost of the scaling factor used in the addressing
1000     /// mode represented by AM for this target, for a load/store
1001     /// of the specified type.
1002     /// If the AM is supported, the return value must be >= 0.
1003     /// If the AM is not supported, it returns a negative value.
1004     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1005                              unsigned AS) const override;
1006
1007     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1008
1009     /// Add x86-specific opcodes to the default list.
1010     bool isBinOp(unsigned Opcode) const override;
1011
1012     /// Returns true if the opcode is a commutative binary operation.
1013     bool isCommutativeBinOp(unsigned Opcode) const override;
1014
1015     /// Return true if it's free to truncate a value of
1016     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1017     /// register EAX to i16 by referencing its sub-register AX.
1018     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1019     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1020
1021     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1022
1023     /// Return true if any actual instruction that defines a
1024     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1025     /// register. This does not necessarily include registers defined in
1026     /// unknown ways, such as incoming arguments, or copies from unknown
1027     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1028     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1029     /// all instructions that define 32-bit values implicit zero-extend the
1030     /// result out to 64 bits.
1031     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1032     bool isZExtFree(EVT VT1, EVT VT2) const override;
1033     bool isZExtFree(SDValue Val, EVT VT2) const override;
1034
1035     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1036     /// extend node) is profitable.
1037     bool isVectorLoadExtDesirable(SDValue) const override;
1038
1039     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1040     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1041     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1042     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1043
1044     /// Return true if it's profitable to narrow
1045     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1046     /// from i32 to i8 but not from i32 to i16.
1047     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1048
1049     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1050     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1051     /// true and stores the intrinsic information into the IntrinsicInfo that was
1052     /// passed to the function.
1053     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1054                             MachineFunction &MF,
1055                             unsigned Intrinsic) const override;
1056
1057     /// Returns true if the target can instruction select the
1058     /// specified FP immediate natively. If false, the legalizer will
1059     /// materialize the FP immediate as a load from a constant pool.
1060     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1061                       bool ForCodeSize) const override;
1062
1063     /// Targets can use this to indicate that they only support *some*
1064     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1065     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1066     /// be legal.
1067     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1068
1069     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1070     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1071     /// constant pool entry.
1072     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1073
1074     /// Returns true if lowering to a jump table is allowed.
1075     bool areJTsAllowed(const Function *Fn) const override;
1076
1077     /// If true, then instruction selection should
1078     /// seek to shrink the FP constant of the specified type to a smaller type
1079     /// in order to save space and / or reduce runtime.
1080     bool ShouldShrinkFPConstant(EVT VT) const override {
1081       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1082       // expensive than a straight movsd. On the other hand, it's important to
1083       // shrink long double fp constant since fldt is very slow.
1084       return !X86ScalarSSEf64 || VT == MVT::f80;
1085     }
1086
1087     /// Return true if we believe it is correct and profitable to reduce the
1088     /// load node to a smaller type.
1089     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1090                                EVT NewVT) const override;
1091
1092     /// Return true if the specified scalar FP type is computed in an SSE
1093     /// register, not on the X87 floating point stack.
1094     bool isScalarFPTypeInSSEReg(EVT VT) const {
1095       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1096              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1097     }
1098
1099     /// Returns true if it is beneficial to convert a load of a constant
1100     /// to just the constant itself.
1101     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1102                                            Type *Ty) const override;
1103
1104     bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1105
1106     bool convertSelectOfConstantsToMath(EVT VT) const override;
1107
1108     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1109                                 SDValue C) const override;
1110
1111     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1112                                   bool IsSigned) const override;
1113
1114     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1115     /// with this index.
1116     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1117                                  unsigned Index) const override;
1118
1119     /// Scalar ops always have equal or better analysis/performance/power than
1120     /// the vector equivalent, so this always makes sense if the scalar op is
1121     /// supported.
1122     bool shouldScalarizeBinop(SDValue) const override;
1123
1124     /// Extract of a scalar FP value from index 0 of a vector is free.
1125     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1126       EVT EltVT = VT.getScalarType();
1127       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1128     }
1129
1130     /// Overflow nodes should get combined/lowered to optimal instructions
1131     /// (they should allow eliminating explicit compares by getting flags from
1132     /// math ops).
1133     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1134
1135     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1136                                       unsigned AddrSpace) const override {
1137       // If we can replace more than 2 scalar stores, there will be a reduction
1138       // in instructions even after we add a vector constant load.
1139       return NumElem > 2;
1140     }
1141
1142     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1143                                  const SelectionDAG &DAG,
1144                                  const MachineMemOperand &MMO) const override;
1145
1146     /// Intel processors have a unified instruction and data cache
1147     const char * getClearCacheBuiltinName() const override {
1148       return nullptr; // nothing to do, move along.
1149     }
1150
1151     unsigned getRegisterByName(const char* RegName, EVT VT,
1152                                SelectionDAG &DAG) const override;
1153
1154     /// If a physical register, this returns the register that receives the
1155     /// exception address on entry to an EH pad.
1156     unsigned
1157     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1158
1159     /// If a physical register, this returns the register that receives the
1160     /// exception typeid on entry to a landing pad.
1161     unsigned
1162     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1163
1164     virtual bool needsFixedCatchObjects() const override;
1165
1166     /// This method returns a target specific FastISel object,
1167     /// or null if the target does not support "fast" ISel.
1168     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1169                              const TargetLibraryInfo *libInfo) const override;
1170
1171     /// If the target has a standard location for the stack protector cookie,
1172     /// returns the address of that location. Otherwise, returns nullptr.
1173     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1174
1175     bool useLoadStackGuardNode() const override;
1176     bool useStackGuardXorFP() const override;
1177     void insertSSPDeclarations(Module &M) const override;
1178     Value *getSDagStackGuard(const Module &M) const override;
1179     Function *getSSPStackGuardCheck(const Module &M) const override;
1180     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1181                                 const SDLoc &DL) const override;
1182
1183
1184     /// Return true if the target stores SafeStack pointer at a fixed offset in
1185     /// some non-standard address space, and populates the address space and
1186     /// offset as appropriate.
1187     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1188
1189     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1190                       SelectionDAG &DAG) const;
1191
1192     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1193
1194     /// Customize the preferred legalization strategy for certain types.
1195     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1196
1197     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1198                                       EVT VT) const override;
1199
1200     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1201                                            CallingConv::ID CC,
1202                                            EVT VT) const override;
1203
1204     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1205
1206     bool supportSwiftError() const override;
1207
1208     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1209
1210     unsigned getStackProbeSize(MachineFunction &MF) const;
1211
1212     bool hasVectorBlend() const override { return true; }
1213
1214     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1215
1216     /// Lower interleaved load(s) into target specific
1217     /// instructions/intrinsics.
1218     bool lowerInterleavedLoad(LoadInst *LI,
1219                               ArrayRef<ShuffleVectorInst *> Shuffles,
1220                               ArrayRef<unsigned> Indices,
1221                               unsigned Factor) const override;
1222
1223     /// Lower interleaved store(s) into target specific
1224     /// instructions/intrinsics.
1225     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1226                                unsigned Factor) const override;
1227
1228     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1229                                    SDValue Addr, SelectionDAG &DAG)
1230                                    const override;
1231
1232   protected:
1233     std::pair<const TargetRegisterClass *, uint8_t>
1234     findRepresentativeClass(const TargetRegisterInfo *TRI,
1235                             MVT VT) const override;
1236
1237   private:
1238     /// Keep a reference to the X86Subtarget around so that we can
1239     /// make the right decision when generating code for different targets.
1240     const X86Subtarget &Subtarget;
1241
1242     /// Select between SSE or x87 floating point ops.
1243     /// When SSE is available, use it for f32 operations.
1244     /// When SSE2 is available, use it for f64 operations.
1245     bool X86ScalarSSEf32;
1246     bool X86ScalarSSEf64;
1247
1248     /// A list of legal FP immediates.
1249     std::vector<APFloat> LegalFPImmediates;
1250
1251     /// Indicate that this x86 target can instruction
1252     /// select the specified FP immediate natively.
1253     void addLegalFPImmediate(const APFloat& Imm) {
1254       LegalFPImmediates.push_back(Imm);
1255     }
1256
1257     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1258                             CallingConv::ID CallConv, bool isVarArg,
1259                             const SmallVectorImpl<ISD::InputArg> &Ins,
1260                             const SDLoc &dl, SelectionDAG &DAG,
1261                             SmallVectorImpl<SDValue> &InVals,
1262                             uint32_t *RegMask) const;
1263     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1264                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1265                              const SDLoc &dl, SelectionDAG &DAG,
1266                              const CCValAssign &VA, MachineFrameInfo &MFI,
1267                              unsigned i) const;
1268     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1269                              const SDLoc &dl, SelectionDAG &DAG,
1270                              const CCValAssign &VA,
1271                              ISD::ArgFlagsTy Flags) const;
1272
1273     // Call lowering helpers.
1274
1275     /// Check whether the call is eligible for tail call optimization. Targets
1276     /// that want to do tail call optimization should implement this function.
1277     bool IsEligibleForTailCallOptimization(SDValue Callee,
1278                                            CallingConv::ID CalleeCC,
1279                                            bool isVarArg,
1280                                            bool isCalleeStructRet,
1281                                            bool isCallerStructRet,
1282                                            Type *RetTy,
1283                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1284                                     const SmallVectorImpl<SDValue> &OutVals,
1285                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1286                                            SelectionDAG& DAG) const;
1287     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1288                                     SDValue Chain, bool IsTailCall,
1289                                     bool Is64Bit, int FPDiff,
1290                                     const SDLoc &dl) const;
1291
1292     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1293                                          SelectionDAG &DAG) const;
1294
1295     unsigned getAddressSpace(void) const;
1296
1297     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1298
1299     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1300     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1301     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1302     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1303
1304     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1305                                   const unsigned char OpFlags = 0) const;
1306     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1307     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1308     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1309     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1310     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1311
1312     /// Creates target global address or external symbol nodes for calls or
1313     /// other uses.
1314     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1315                                   bool ForCall) const;
1316
1317     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1318     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1319     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1320     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1321     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1322     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1323     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1324     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1325     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1326     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1327     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1328     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1329     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1330     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1331     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1332     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1333     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1334     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1335     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1336     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1337     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1338     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1339     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1340     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1341     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1342     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1343
1344     SDValue
1345     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1346                          const SmallVectorImpl<ISD::InputArg> &Ins,
1347                          const SDLoc &dl, SelectionDAG &DAG,
1348                          SmallVectorImpl<SDValue> &InVals) const override;
1349     SDValue LowerCall(CallLoweringInfo &CLI,
1350                       SmallVectorImpl<SDValue> &InVals) const override;
1351
1352     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1353                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1354                         const SmallVectorImpl<SDValue> &OutVals,
1355                         const SDLoc &dl, SelectionDAG &DAG) const override;
1356
1357     bool supportSplitCSR(MachineFunction *MF) const override {
1358       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1359           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1360     }
1361     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1362     void insertCopiesSplitCSR(
1363       MachineBasicBlock *Entry,
1364       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1365
1366     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1367
1368     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1369
1370     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1371                             ISD::NodeType ExtendKind) const override;
1372
1373     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1374                         bool isVarArg,
1375                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1376                         LLVMContext &Context) const override;
1377
1378     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1379
1380     TargetLoweringBase::AtomicExpansionKind
1381     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1382     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1383     TargetLoweringBase::AtomicExpansionKind
1384     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1385
1386     LoadInst *
1387     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1388
1389     bool needsCmpXchgNb(Type *MemType) const;
1390
1391     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1392                                 MachineBasicBlock *DispatchBB, int FI) const;
1393
1394     // Utility function to emit the low-level va_arg code for X86-64.
1395     MachineBasicBlock *
1396     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1397                                   MachineBasicBlock *MBB) const;
1398
1399     /// Utility function to emit the xmm reg save portion of va_start.
1400     MachineBasicBlock *
1401     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1402                                              MachineBasicBlock *BB) const;
1403
1404     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1405                                                  MachineInstr &MI2,
1406                                                  MachineBasicBlock *BB) const;
1407
1408     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1409                                          MachineBasicBlock *BB) const;
1410
1411     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1412                                            MachineBasicBlock *BB) const;
1413
1414     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1415                                            MachineBasicBlock *BB) const;
1416
1417     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1418                                            MachineBasicBlock *BB) const;
1419
1420     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1421                                             MachineBasicBlock *BB) const;
1422
1423     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1424                                           MachineBasicBlock *BB) const;
1425
1426     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1427                                           MachineBasicBlock *BB) const;
1428
1429     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1430                                             MachineBasicBlock *BB) const;
1431
1432     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1433                                         MachineBasicBlock *MBB) const;
1434
1435     void emitSetJmpShadowStackFix(MachineInstr &MI,
1436                                   MachineBasicBlock *MBB) const;
1437
1438     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1439                                          MachineBasicBlock *MBB) const;
1440
1441     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1442                                                  MachineBasicBlock *MBB) const;
1443
1444     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1445                                      MachineBasicBlock *MBB) const;
1446
1447     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1448                                              MachineBasicBlock *MBB) const;
1449
1450     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1451     /// equivalent, for use with the given x86 condition code.
1452     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1453                     SelectionDAG &DAG) const;
1454
1455     /// Convert a comparison if required by the subtarget.
1456     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1457
1458     /// Emit flags for the given setcc condition and operands. Also returns the
1459     /// corresponding X86 condition code constant in X86CC.
1460     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1461                               ISD::CondCode CC, const SDLoc &dl,
1462                               SelectionDAG &DAG,
1463                               SDValue &X86CC) const;
1464
1465     /// Check if replacement of SQRT with RSQRT should be disabled.
1466     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1467
1468     /// Use rsqrt* to speed up sqrt calculations.
1469     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1470                             int &RefinementSteps, bool &UseOneConstNR,
1471                             bool Reciprocal) const override;
1472
1473     /// Use rcp* to speed up fdiv calculations.
1474     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1475                              int &RefinementSteps) const override;
1476
1477     /// Reassociate floating point divisions into multiply by reciprocal.
1478     unsigned combineRepeatedFPDivisors() const override;
1479   };
1480
1481   namespace X86 {
1482     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1483                              const TargetLibraryInfo *libInfo);
1484   } // end namespace X86
1485
1486   // Base class for all X86 non-masked store operations.
1487   class X86StoreSDNode : public MemSDNode {
1488   public:
1489     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1490                    SDVTList VTs, EVT MemVT,
1491                    MachineMemOperand *MMO)
1492       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1493     const SDValue &getValue() const { return getOperand(1); }
1494     const SDValue &getBasePtr() const { return getOperand(2); }
1495
1496     static bool classof(const SDNode *N) {
1497       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1498         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1499     }
1500   };
1501
1502   // Base class for all X86 masked store operations.
1503   // The class has the same order of operands as MaskedStoreSDNode for
1504   // convenience.
1505   class X86MaskedStoreSDNode : public MemSDNode {
1506   public:
1507     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1508                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1509                          MachineMemOperand *MMO)
1510       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1511
1512     const SDValue &getValue()   const { return getOperand(1); }
1513     const SDValue &getBasePtr() const { return getOperand(2); }
1514     const SDValue &getMask()    const { return getOperand(3); }
1515
1516     static bool classof(const SDNode *N) {
1517       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1518         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1519     }
1520   };
1521
1522   // X86 Truncating Store with Signed saturation.
1523   class TruncSStoreSDNode : public X86StoreSDNode {
1524   public:
1525     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1526                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1527       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1528
1529     static bool classof(const SDNode *N) {
1530       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1531     }
1532   };
1533
1534   // X86 Truncating Store with Unsigned saturation.
1535   class TruncUSStoreSDNode : public X86StoreSDNode {
1536   public:
1537     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1538                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1539       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1540
1541     static bool classof(const SDNode *N) {
1542       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1543     }
1544   };
1545
1546   // X86 Truncating Masked Store with Signed saturation.
1547   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1548   public:
1549     MaskedTruncSStoreSDNode(unsigned Order,
1550                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1551                          MachineMemOperand *MMO)
1552       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1553
1554     static bool classof(const SDNode *N) {
1555       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1556     }
1557   };
1558
1559   // X86 Truncating Masked Store with Unsigned saturation.
1560   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1561   public:
1562     MaskedTruncUSStoreSDNode(unsigned Order,
1563                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1564                             MachineMemOperand *MMO)
1565       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1566
1567     static bool classof(const SDNode *N) {
1568       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1569     }
1570   };
1571
1572   // X86 specific Gather/Scatter nodes.
1573   // The class has the same order of operands as MaskedGatherScatterSDNode for
1574   // convenience.
1575   class X86MaskedGatherScatterSDNode : public MemSDNode {
1576   public:
1577     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1578                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1579                                  MachineMemOperand *MMO)
1580         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1581
1582     const SDValue &getBasePtr() const { return getOperand(3); }
1583     const SDValue &getIndex()   const { return getOperand(4); }
1584     const SDValue &getMask()    const { return getOperand(2); }
1585     const SDValue &getScale()   const { return getOperand(5); }
1586
1587     static bool classof(const SDNode *N) {
1588       return N->getOpcode() == X86ISD::MGATHER ||
1589              N->getOpcode() == X86ISD::MSCATTER;
1590     }
1591   };
1592
1593   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1594   public:
1595     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1596                           EVT MemVT, MachineMemOperand *MMO)
1597         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1598                                        MMO) {}
1599
1600     const SDValue &getPassThru() const { return getOperand(1); }
1601
1602     static bool classof(const SDNode *N) {
1603       return N->getOpcode() == X86ISD::MGATHER;
1604     }
1605   };
1606
1607   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1608   public:
1609     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1610                            EVT MemVT, MachineMemOperand *MMO)
1611         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1612                                        MMO) {}
1613
1614     const SDValue &getValue() const { return getOperand(1); }
1615
1616     static bool classof(const SDNode *N) {
1617       return N->getOpcode() == X86ISD::MSCATTER;
1618     }
1619   };
1620
1621   /// Generate unpacklo/unpackhi shuffle mask.
1622   template <typename T = int>
1623   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1624                                bool Unary) {
1625     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1626     int NumElts = VT.getVectorNumElements();
1627     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1628     for (int i = 0; i < NumElts; ++i) {
1629       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1630       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1631       Pos += (Unary ? 0 : NumElts * (i % 2));
1632       Pos += (Lo ? 0 : NumEltsInLane / 2);
1633       Mask.push_back(Pos);
1634     }
1635   }
1636
1637   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1638   /// mask index with the scaled sequential indices for an equivalent narrowed
1639   /// mask. This is the reverse process to canWidenShuffleElements, but can
1640   /// always succeed.
1641   template <typename T>
1642   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1643                         SmallVectorImpl<T> &ScaledMask) {
1644     assert(0 < Scale && "Unexpected scaling factor");
1645     size_t NumElts = Mask.size();
1646     ScaledMask.assign(NumElts * Scale, -1);
1647
1648     for (int i = 0; i != (int)NumElts; ++i) {
1649       int M = Mask[i];
1650
1651       // Repeat sentinel values in every mask element.
1652       if (M < 0) {
1653         for (int s = 0; s != Scale; ++s)
1654           ScaledMask[(Scale * i) + s] = M;
1655         continue;
1656       }
1657
1658       // Scale mask element and increment across each mask element.
1659       for (int s = 0; s != Scale; ++s)
1660         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1661     }
1662   }
1663 } // end namespace llvm
1664
1665 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H