lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20
  21 namespace llvm {
  22   class X86Subtarget;
  23   class X86TargetMachine;
  24
  25   namespace X86ISD {
  26     // X86 Specific DAG Nodes
  27     enum NodeType : unsigned {
  28       // Start the numbering where the builtin ops leave off.
  29       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  30
  31       /// Bit scan forward.
  32       BSF,
  33       /// Bit scan reverse.
  34       BSR,
  35
  36       /// Double shift instructions. These correspond to
  37       /// X86::SHLDxx and X86::SHRDxx instructions.
  38       SHLD,
  39       SHRD,
  40
  41       /// Bitwise logical AND of floating point values. This corresponds
  42       /// to X86::ANDPS or X86::ANDPD.
  43       FAND,
  44
  45       /// Bitwise logical OR of floating point values. This corresponds
  46       /// to X86::ORPS or X86::ORPD.
  47       FOR,
  48
  49       /// Bitwise logical XOR of floating point values. This corresponds
  50       /// to X86::XORPS or X86::XORPD.
  51       FXOR,
  52
  53       ///  Bitwise logical ANDNOT of floating point values. This
  54       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  55       FANDN,
  56
  57       /// These operations represent an abstract X86 call
  58       /// instruction, which includes a bunch of information.  In particular the
  59       /// operands of these node are:
  60       ///
  61       ///     #0 - The incoming token chain
  62       ///     #1 - The callee
  63       ///     #2 - The number of arg bytes the caller pushes on the stack.
  64       ///     #3 - The number of arg bytes the callee pops off the stack.
  65       ///     #4 - The value to pass in AL/AX/EAX (optional)
  66       ///     #5 - The value to pass in DL/DX/EDX (optional)
  67       ///
  68       /// The result values of these nodes are:
  69       ///
  70       ///     #0 - The outgoing token chain
  71       ///     #1 - The first register result value (optional)
  72       ///     #2 - The second register result value (optional)
  73       ///
  74       CALL,
  75
  76       /// Same as call except it adds the NoTrack prefix.
  77       NT_CALL,
  78
  79       /// X86 compare and logical compare instructions.
  80       CMP, COMI, UCOMI,
  81
  82       /// X86 bit-test instructions.
  83       BT,
  84
  85       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  86       /// operand, usually produced by a CMP instruction.
  87       SETCC,
  88
  89       /// X86 Select
  90       SELECTS,
  91
  92       // Same as SETCC except it's materialized with a sbb and the value is all
  93       // one's or all zero's.
  94       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  95
  96       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
  97       /// Operands are two FP values to compare; result is a mask of
  98       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
  99       FSETCC,
 100
 101       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 102       /// and a version with SAE.
 103       FSETCCM, FSETCCM_SAE,
 104
 105       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 106       /// to select from. Operand 2 is the condition code, and operand 3 is the
 107       /// flag operand produced by a CMP or TEST instruction.
 108       CMOV,
 109
 110       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 111       /// is the block to branch if condition is true, operand 2 is the
 112       /// condition code, and operand 3 is the flag operand produced by a CMP
 113       /// or TEST instruction.
 114       BRCOND,
 115
 116       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 117       /// operand 1 is the target address.
 118       NT_BRIND,
 119
 120       /// Return with a flag operand. Operand 0 is the chain operand, operand
 121       /// 1 is the number of bytes of stack to pop.
 122       RET_FLAG,
 123
 124       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 125       IRET,
 126
 127       /// Repeat fill, corresponds to X86::REP_STOSx.
 128       REP_STOS,
 129
 130       /// Repeat move, corresponds to X86::REP_MOVSx.
 131       REP_MOVS,
 132
 133       /// On Darwin, this node represents the result of the popl
 134       /// at function entry, used for PIC code.
 135       GlobalBaseReg,
 136
 137       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 138       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 139       /// MCSymbol and TargetBlockAddress.
 140       Wrapper,
 141
 142       /// Special wrapper used under X86-64 PIC mode for RIP
 143       /// relative displacements.
 144       WrapperRIP,
 145
 146       /// Copies a 64-bit value from an MMX vector to the low word
 147       /// of an XMM vector, with the high word zero filled.
 148       MOVQ2DQ,
 149
 150       /// Copies a 64-bit value from the low word of an XMM vector
 151       /// to an MMX vector.
 152       MOVDQ2Q,
 153
 154       /// Copies a 32-bit value from the low word of a MMX
 155       /// vector to a GPR.
 156       MMX_MOVD2W,
 157
 158       /// Copies a GPR into the low 32-bit word of a MMX vector
 159       /// and zero out the high word.
 160       MMX_MOVW2D,
 161
 162       /// Extract an 8-bit value from a vector and zero extend it to
 163       /// i32, corresponds to X86::PEXTRB.
 164       PEXTRB,
 165
 166       /// Extract a 16-bit value from a vector and zero extend it to
 167       /// i32, corresponds to X86::PEXTRW.
 168       PEXTRW,
 169
 170       /// Insert any element of a 4 x float vector into any element
 171       /// of a destination 4 x floatvector.
 172       INSERTPS,
 173
 174       /// Insert the lower 8-bits of a 32-bit value to a vector,
 175       /// corresponds to X86::PINSRB.
 176       PINSRB,
 177
 178       /// Insert the lower 16-bits of a 32-bit value to a vector,
 179       /// corresponds to X86::PINSRW.
 180       PINSRW,
 181
 182       /// Shuffle 16 8-bit values within a vector.
 183       PSHUFB,
 184
 185       /// Compute Sum of Absolute Differences.
 186       PSADBW,
 187       /// Compute Double Block Packed Sum-Absolute-Differences
 188       DBPSADBW,
 189
 190       /// Bitwise Logical AND NOT of Packed FP values.
 191       ANDNP,
 192
 193       /// Blend where the selector is an immediate.
 194       BLENDI,
 195
 196       /// Dynamic (non-constant condition) vector blend where only the sign bits
 197       /// of the condition elements are used. This is used to enforce that the
 198       /// condition mask is not valid for generic VSELECT optimizations. This
 199       /// is also used to implement the intrinsics.
 200       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 201       BLENDV,
 202
 203       /// Combined add and sub on an FP vector.
 204       ADDSUB,
 205
 206       //  FP vector ops with rounding mode.
 207       FADD_RND, FADDS, FADDS_RND,
 208       FSUB_RND, FSUBS, FSUBS_RND,
 209       FMUL_RND, FMULS, FMULS_RND,
 210       FDIV_RND, FDIVS, FDIVS_RND,
 211       FMAX_SAE, FMAXS_SAE,
 212       FMIN_SAE, FMINS_SAE,
 213       FSQRT_RND, FSQRTS, FSQRTS_RND,
 214
 215       // FP vector get exponent.
 216       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
 217       // Extract Normalized Mantissas.
 218       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
 219       // FP Scale.
 220       SCALEF, SCALEF_RND,
 221       SCALEFS, SCALEFS_RND,
 222
 223       // Unsigned Integer average.
 224       AVG,
 225
 226       /// Integer horizontal add/sub.
 227       HADD,
 228       HSUB,
 229
 230       /// Floating point horizontal add/sub.
 231       FHADD,
 232       FHSUB,
 233
 234       // Detect Conflicts Within a Vector
 235       CONFLICT,
 236
 237       /// Floating point max and min.
 238       FMAX, FMIN,
 239
 240       /// Commutative FMIN and FMAX.
 241       FMAXC, FMINC,
 242
 243       /// Scalar intrinsic floating point max and min.
 244       FMAXS, FMINS,
 245
 246       /// Floating point reciprocal-sqrt and reciprocal approximation.
 247       /// Note that these typically require refinement
 248       /// in order to obtain suitable precision.
 249       FRSQRT, FRCP,
 250
 251       // AVX-512 reciprocal approximations with a little more precision.
 252       RSQRT14, RSQRT14S, RCP14, RCP14S,
 253
 254       // Thread Local Storage.
 255       TLSADDR,
 256
 257       // Thread Local Storage. A call to get the start address
 258       // of the TLS block for the current module.
 259       TLSBASEADDR,
 260
 261       // Thread Local Storage.  When calling to an OS provided
 262       // thunk at the address from an earlier relocation.
 263       TLSCALL,
 264
 265       // Exception Handling helpers.
 266       EH_RETURN,
 267
 268       // SjLj exception handling setjmp.
 269       EH_SJLJ_SETJMP,
 270
 271       // SjLj exception handling longjmp.
 272       EH_SJLJ_LONGJMP,
 273
 274       // SjLj exception handling dispatch.
 275       EH_SJLJ_SETUP_DISPATCH,
 276
 277       /// Tail call return. See X86TargetLowering::LowerCall for
 278       /// the list of operands.
 279       TC_RETURN,
 280
 281       // Vector move to low scalar and zero higher vector elements.
 282       VZEXT_MOVL,
 283
 284       // Vector integer truncate.
 285       VTRUNC,
 286       // Vector integer truncate with unsigned/signed saturation.
 287       VTRUNCUS, VTRUNCS,
 288
 289       // Masked version of the above. Used when less than a 128-bit result is
 290       // produced since the mask only applies to the lower elements and can't
 291       // be represented by a select.
 292       // SRC, PASSTHRU, MASK
 293       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 294
 295       // Vector FP extend.
 296       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 297
 298       // Vector FP round.
 299       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 300
 301       // Masked version of above. Used for v2f64->v4f32.
 302       // SRC, PASSTHRU, MASK
 303       VMFPROUND,
 304
 305       // 128-bit vector logical left / right shift
 306       VSHLDQ, VSRLDQ,
 307
 308       // Vector shift elements
 309       VSHL, VSRL, VSRA,
 310
 311       // Vector variable shift
 312       VSHLV, VSRLV, VSRAV,
 313
 314       // Vector shift elements by immediate
 315       VSHLI, VSRLI, VSRAI,
 316
 317       // Shifts of mask registers.
 318       KSHIFTL, KSHIFTR,
 319
 320       // Bit rotate by immediate
 321       VROTLI, VROTRI,
 322
 323       // Vector packed double/float comparison.
 324       CMPP,
 325
 326       // Vector integer comparisons.
 327       PCMPEQ, PCMPGT,
 328
 329       // v8i16 Horizontal minimum and position.
 330       PHMINPOS,
 331
 332       MULTISHIFT,
 333
 334       /// Vector comparison generating mask bits for fp and
 335       /// integer signed and unsigned data types.
 336       CMPM,
 337       // Vector comparison with SAE for FP values
 338       CMPM_SAE,
 339
 340       // Arithmetic operations with FLAGS results.
 341       ADD, SUB, ADC, SBB, SMUL, UMUL,
 342       OR, XOR, AND,
 343
 344       // Bit field extract.
 345       BEXTR,
 346
 347       // Zero High Bits Starting with Specified Bit Position.
 348       BZHI,
 349
 350       // X86-specific multiply by immediate.
 351       MUL_IMM,
 352
 353       // Vector sign bit extraction.
 354       MOVMSK,
 355
 356       // Vector bitwise comparisons.
 357       PTEST,
 358
 359       // Vector packed fp sign bitwise comparisons.
 360       TESTP,
 361
 362       // OR/AND test for masks.
 363       KORTEST,
 364       KTEST,
 365
 366       // ADD for masks.
 367       KADD,
 368
 369       // Several flavors of instructions with vector shuffle behaviors.
 370       // Saturated signed/unnsigned packing.
 371       PACKSS,
 372       PACKUS,
 373       // Intra-lane alignr.
 374       PALIGNR,
 375       // AVX512 inter-lane alignr.
 376       VALIGN,
 377       PSHUFD,
 378       PSHUFHW,
 379       PSHUFLW,
 380       SHUFP,
 381       // VBMI2 Concat & Shift.
 382       VSHLD,
 383       VSHRD,
 384       VSHLDV,
 385       VSHRDV,
 386       //Shuffle Packed Values at 128-bit granularity.
 387       SHUF128,
 388       MOVDDUP,
 389       MOVSHDUP,
 390       MOVSLDUP,
 391       MOVLHPS,
 392       MOVHLPS,
 393       MOVSD,
 394       MOVSS,
 395       UNPCKL,
 396       UNPCKH,
 397       VPERMILPV,
 398       VPERMILPI,
 399       VPERMI,
 400       VPERM2X128,
 401
 402       // Variable Permute (VPERM).
 403       // Res = VPERMV MaskV, V0
 404       VPERMV,
 405
 406       // 3-op Variable Permute (VPERMT2).
 407       // Res = VPERMV3 V0, MaskV, V1
 408       VPERMV3,
 409
 410       // Bitwise ternary logic.
 411       VPTERNLOG,
 412       // Fix Up Special Packed Float32/64 values.
 413       VFIXUPIMM, VFIXUPIMM_SAE,
 414       VFIXUPIMMS, VFIXUPIMMS_SAE,
 415       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 416       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
 417       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 418       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
 419       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 420       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 421       // scaling part of the immediate.
 422       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
 423       // Tests Types Of a FP Values for packed types.
 424       VFPCLASS,
 425       // Tests Types Of a FP Values for scalar types.
 426       VFPCLASSS,
 427
 428       // Broadcast scalar to vector.
 429       VBROADCAST,
 430       // Broadcast mask to vector.
 431       VBROADCASTM,
 432       // Broadcast subvector to vector.
 433       SUBV_BROADCAST,
 434
 435       /// SSE4A Extraction and Insertion.
 436       EXTRQI, INSERTQI,
 437
 438       // XOP arithmetic/logical shifts.
 439       VPSHA, VPSHL,
 440       // XOP signed/unsigned integer comparisons.
 441       VPCOM, VPCOMU,
 442       // XOP packed permute bytes.
 443       VPPERM,
 444       // XOP two source permutation.
 445       VPERMIL2,
 446
 447       // Vector multiply packed unsigned doubleword integers.
 448       PMULUDQ,
 449       // Vector multiply packed signed doubleword integers.
 450       PMULDQ,
 451       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 452       MULHRS,
 453
 454       // Multiply and Add Packed Integers.
 455       VPMADDUBSW, VPMADDWD,
 456
 457       // AVX512IFMA multiply and add.
 458       // NOTE: These are different than the instruction and perform
 459       // op0 x op1 + op2.
 460       VPMADD52L, VPMADD52H,
 461
 462       // VNNI
 463       VPDPBUSD,
 464       VPDPBUSDS,
 465       VPDPWSSD,
 466       VPDPWSSDS,
 467
 468       // FMA nodes.
 469       // We use the target independent ISD::FMA for the non-inverted case.
 470       FNMADD,
 471       FMSUB,
 472       FNMSUB,
 473       FMADDSUB,
 474       FMSUBADD,
 475
 476       // FMA with rounding mode.
 477       FMADD_RND,
 478       FNMADD_RND,
 479       FMSUB_RND,
 480       FNMSUB_RND,
 481       FMADDSUB_RND,
 482       FMSUBADD_RND,
 483
 484       // Compress and expand.
 485       COMPRESS,
 486       EXPAND,
 487
 488       // Bits shuffle
 489       VPSHUFBITQMB,
 490
 491       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 492       SINT_TO_FP_RND, UINT_TO_FP_RND,
 493       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
 494       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 495
 496       // Vector float/double to signed/unsigned integer.
 497       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 498       // Scalar float/double to signed/unsigned integer.
 499       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 500
 501       // Vector float/double to signed/unsigned integer with truncation.
 502       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
 503       // Scalar float/double to signed/unsigned integer with truncation.
 504       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 505
 506       // Vector signed/unsigned integer to float/double.
 507       CVTSI2P, CVTUI2P,
 508
 509       // Masked versions of above. Used for v2f64->v4f32.
 510       // SRC, PASSTHRU, MASK
 511       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 512       MCVTSI2P, MCVTUI2P,
 513
 514       // Vector float to bfloat16.
 515       // Convert TWO packed single data to one packed BF16 data
 516       CVTNE2PS2BF16,
 517       // Convert packed single data to packed BF16 data
 518       CVTNEPS2BF16,
 519       // Masked version of above.
 520       // SRC, PASSTHRU, MASK
 521       MCVTNEPS2BF16,
 522
 523       // Dot product of BF16 pairs to accumulated into
 524       // packed single precision.
 525       DPBF16PS,
 526
 527       // Save xmm argument registers to the stack, according to %al. An operator
 528       // is needed so that this can be expanded with control flow.
 529       VASTART_SAVE_XMM_REGS,
 530
 531       // Windows's _chkstk call to do stack probing.
 532       WIN_ALLOCA,
 533
 534       // For allocating variable amounts of stack space when using
 535       // segmented stacks. Check if the current stacklet has enough space, and
 536       // falls back to heap allocation if not.
 537       SEG_ALLOCA,
 538
 539       // Memory barriers.
 540       MEMBARRIER,
 541       MFENCE,
 542
 543       // Store FP status word into i16 register.
 544       FNSTSW16r,
 545
 546       // Store contents of %ah into %eflags.
 547       SAHF,
 548
 549       // Get a random integer and indicate whether it is valid in CF.
 550       RDRAND,
 551
 552       // Get a NIST SP800-90B & C compliant random integer and
 553       // indicate whether it is valid in CF.
 554       RDSEED,
 555
 556       // Protection keys
 557       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 558       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 559       // value for ECX.
 560       RDPKRU, WRPKRU,
 561
 562       // SSE42 string comparisons.
 563       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 564       // will emit one or two instructions based on which results are used. If
 565       // flags and index/mask this allows us to use a single instruction since
 566       // we won't have to pick and opcode for flags. Instead we can rely on the
 567       // DAG to CSE everything and decide at isel.
 568       PCMPISTR,
 569       PCMPESTR,
 570
 571       // Test if in transactional execution.
 572       XTEST,
 573
 574       // ERI instructions.
 575       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
 576       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 577
 578       // Conversions between float and half-float.
 579       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 580
 581       // Masked version of above.
 582       // SRC, RND, PASSTHRU, MASK
 583       MCVTPS2PH,
 584
 585       // Galois Field Arithmetic Instructions
 586       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 587
 588       // LWP insert record.
 589       LWPINS,
 590
 591       // User level wait
 592       UMWAIT, TPAUSE,
 593
 594       // Enqueue Stores Instructions
 595       ENQCMD, ENQCMDS,
 596
 597       // For avx512-vp2intersect
 598       VP2INTERSECT,
 599
 600       // Compare and swap.
 601       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 602       LCMPXCHG8_DAG,
 603       LCMPXCHG16_DAG,
 604       LCMPXCHG8_SAVE_EBX_DAG,
 605       LCMPXCHG16_SAVE_RBX_DAG,
 606
 607       /// LOCK-prefixed arithmetic read-modify-write instructions.
 608       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 609       LADD, LSUB, LOR, LXOR, LAND,
 610
 611       // Load, scalar_to_vector, and zero extend.
 612       VZEXT_LOAD,
 613
 614       // extract_vector_elt, store.
 615       VEXTRACT_STORE,
 616
 617       // Store FP control world into i16 memory.
 618       FNSTCW16m,
 619
 620       /// This instruction implements FP_TO_SINT with the
 621       /// integer destination in memory and a FP reg source.  This corresponds
 622       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 623       /// has two inputs (token chain and address) and two outputs (int value
 624       /// and token chain). Memory VT specifies the type to store to.
 625       FP_TO_INT_IN_MEM,
 626
 627       /// This instruction implements SINT_TO_FP with the
 628       /// integer source in memory and FP reg result.  This corresponds to the
 629       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 630       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 631       /// flag). The integer source type is specified by the memory VT.
 632       FILD,
 633       FILD_FLAG,
 634
 635       /// This instruction implements a fp->int store from FP stack
 636       /// slots. This corresponds to the fist instruction. It takes a
 637       /// chain operand, value to store, address, and glue. The memory VT
 638       /// specifies the type to store as.
 639       FIST,
 640
 641       /// This instruction implements an extending load to FP stack slots.
 642       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 643       /// operand, and ptr to load from. The memory VT specifies the type to
 644       /// load from.
 645       FLD,
 646
 647       /// This instruction implements a truncating store from FP stack
 648       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 649       /// chain operand, value to store, address, and glue. The memory VT
 650       /// specifies the type to store as.
 651       FST,
 652
 653       /// This instruction grabs the address of the next argument
 654       /// from a va_list. (reads and modifies the va_list in memory)
 655       VAARG_64,
 656
 657       // Vector truncating store with unsigned/signed saturation
 658       VTRUNCSTOREUS, VTRUNCSTORES,
 659       // Vector truncating masked store with unsigned/signed saturation
 660       VMTRUNCSTOREUS, VMTRUNCSTORES,
 661
 662       // X86 specific gather and scatter
 663       MGATHER, MSCATTER,
 664
 665       // WARNING: Do not add anything in the end unless you want the node to
 666       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 667       // opcodes will be thought as target memory ops!
 668     };
 669   } // end namespace X86ISD
 670
 671   /// Define some predicates that are used for node matching.
 672   namespace X86 {
 673     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 674     bool isZeroNode(SDValue Elt);
 675
 676     /// Returns true of the given offset can be
 677     /// fit into displacement field of the instruction.
 678     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 679                                       bool hasSymbolicDisplacement = true);
 680
 681     /// Determines whether the callee is required to pop its
 682     /// own arguments. Callee pop is necessary to support tail calls.
 683     bool isCalleePop(CallingConv::ID CallingConv,
 684                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 685
 686   } // end namespace X86
 687
 688   //===--------------------------------------------------------------------===//
 689   //  X86 Implementation of the TargetLowering interface
 690   class X86TargetLowering final : public TargetLowering {
 691   public:
 692     explicit X86TargetLowering(const X86TargetMachine &TM,
 693                                const X86Subtarget &STI);
 694
 695     unsigned getJumpTableEncoding() const override;
 696     bool useSoftFloat() const override;
 697
 698     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 699                                ArgListTy &Args) const override;
 700
 701     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 702       return MVT::i8;
 703     }
 704
 705     const MCExpr *
 706     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 707                               const MachineBasicBlock *MBB, unsigned uid,
 708                               MCContext &Ctx) const override;
 709
 710     /// Returns relocation base for the given PIC jumptable.
 711     SDValue getPICJumpTableRelocBase(SDValue Table,
 712                                      SelectionDAG &DAG) const override;
 713     const MCExpr *
 714     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 715                                  unsigned JTI, MCContext &Ctx) const override;
 716
 717     /// Return the desired alignment for ByVal aggregate
 718     /// function arguments in the caller parameter area. For X86, aggregates
 719     /// that contains are placed at 16-byte boundaries while the rest are at
 720     /// 4-byte boundaries.
 721     unsigned getByValTypeAlignment(Type *Ty,
 722                                    const DataLayout &DL) const override;
 723
 724     /// Returns the target specific optimal type for load
 725     /// and store operations as a result of memset, memcpy, and memmove
 726     /// lowering. If DstAlign is zero that means it's safe to destination
 727     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 728     /// means there isn't a need to check it against alignment requirement,
 729     /// probably because the source does not need to be loaded. If 'IsMemset' is
 730     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 731     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 732     /// source is constant so it does not need to be loaded.
 733     /// It returns EVT::Other if the type should be determined using generic
 734     /// target-independent logic.
 735     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 736                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 737                             const AttributeList &FuncAttributes) const override;
 738
 739     /// Returns true if it's safe to use load / store of the
 740     /// specified type to expand memcpy / memset inline. This is mostly true
 741     /// for all types except for some special cases. For example, on X86
 742     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 743     /// also does type conversion. Note the specified type doesn't have to be
 744     /// legal as the hook is used before type legalization.
 745     bool isSafeMemOpType(MVT VT) const override;
 746
 747     /// Returns true if the target allows unaligned memory accesses of the
 748     /// specified type. Returns whether it is "fast" in the last argument.
 749     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 750                                         MachineMemOperand::Flags Flags,
 751                                         bool *Fast) const override;
 752
 753     /// Provide custom lowering hooks for some operations.
 754     ///
 755     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 756
 757     /// Places new result values for the node in Results (their number
 758     /// and types must exactly match those of the original return values of
 759     /// the node), or leaves Results empty, which indicates that the node is not
 760     /// to be custom lowered after all.
 761     void LowerOperationWrapper(SDNode *N,
 762                                SmallVectorImpl<SDValue> &Results,
 763                                SelectionDAG &DAG) const override;
 764
 765     /// Replace the results of node with an illegal result
 766     /// type with new values built out of custom code.
 767     ///
 768     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 769                             SelectionDAG &DAG) const override;
 770
 771     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 772
 773     // Return true if it is profitable to combine a BUILD_VECTOR with a
 774     // stride-pattern to a shuffle and a truncate.
 775     // Example of such a combine:
 776     // v4i32 build_vector((extract_elt V, 1),
 777     //                    (extract_elt V, 3),
 778     //                    (extract_elt V, 5),
 779     //                    (extract_elt V, 7))
 780     //  -->
 781     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 782     // v4i64)
 783     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 784         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 785
 786     /// Return true if the target has native support for
 787     /// the specified value type and it is 'desirable' to use the type for the
 788     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 789     /// instruction encodings are longer and some i16 instructions are slow.
 790     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 791
 792     /// Return true if the target has native support for the
 793     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 794     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 795     /// and some i16 instructions are slow.
 796     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 797
 798     MachineBasicBlock *
 799     EmitInstrWithCustomInserter(MachineInstr &MI,
 800                                 MachineBasicBlock *MBB) const override;
 801
 802     /// This method returns the name of a target specific DAG node.
 803     const char *getTargetNodeName(unsigned Opcode) const override;
 804
 805     /// Do not merge vector stores after legalization because that may conflict
 806     /// with x86-specific store splitting optimizations.
 807     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 808       return !MemVT.isVector();
 809     }
 810
 811     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 812                           const SelectionDAG &DAG) const override;
 813
 814     bool isCheapToSpeculateCttz() const override;
 815
 816     bool isCheapToSpeculateCtlz() const override;
 817
 818     bool isCtlzFast() const override;
 819
 820     bool hasBitPreservingFPLogic(EVT VT) const override {
 821       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 822     }
 823
 824     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 825       // If the pair to store is a mixture of float and int values, we will
 826       // save two bitwise instructions and one float-to-int instruction and
 827       // increase one store instruction. There is potentially a more
 828       // significant benefit because it avoids the float->int domain switch
 829       // for input value. So It is more likely a win.
 830       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 831           (LTy.isInteger() && HTy.isFloatingPoint()))
 832         return true;
 833       // If the pair only contains int values, we will save two bitwise
 834       // instructions and increase one store instruction (costing one more
 835       // store buffer). Since the benefit is more blurred so we leave
 836       // such pair out until we get testcase to prove it is a win.
 837       return false;
 838     }
 839
 840     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 841
 842     bool hasAndNotCompare(SDValue Y) const override;
 843
 844     bool hasAndNot(SDValue Y) const override;
 845
 846     bool hasBitTest(SDValue X, SDValue Y) const override;
 847
 848     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
 849         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
 850         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
 851         SelectionDAG &DAG) const override;
 852
 853     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
 854                                            CombineLevel Level) const override;
 855
 856     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 857
 858     bool
 859     shouldTransformSignedTruncationCheck(EVT XVT,
 860                                          unsigned KeptBits) const override {
 861       // For vectors, we don't have a preference..
 862       if (XVT.isVector())
 863         return false;
 864
 865       auto VTIsOk = [](EVT VT) -> bool {
 866         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 867                VT == MVT::i64;
 868       };
 869
 870       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 871       // XVT will be larger than KeptBitsVT.
 872       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 873       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 874     }
 875
 876     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
 877       if (DAG.getMachineFunction().getFunction().hasMinSize())
 878         return false;
 879       return true;
 880     }
 881
 882     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 883
 884     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 885       return VT.isScalarInteger();
 886     }
 887
 888     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 889     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 890
 891     /// Return the value type to use for ISD::SETCC.
 892     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 893                            EVT VT) const override;
 894
 895     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 896                                       TargetLoweringOpt &TLO) const override;
 897
 898     /// Determine which of the bits specified in Mask are known to be either
 899     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 900     void computeKnownBitsForTargetNode(const SDValue Op,
 901                                        KnownBits &Known,
 902                                        const APInt &DemandedElts,
 903                                        const SelectionDAG &DAG,
 904                                        unsigned Depth = 0) const override;
 905
 906     /// Determine the number of bits in the operation that are sign bits.
 907     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 908                                              const APInt &DemandedElts,
 909                                              const SelectionDAG &DAG,
 910                                              unsigned Depth) const override;
 911
 912     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 913                                                  const APInt &DemandedElts,
 914                                                  APInt &KnownUndef,
 915                                                  APInt &KnownZero,
 916                                                  TargetLoweringOpt &TLO,
 917                                                  unsigned Depth) const override;
 918
 919     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 920                                            const APInt &DemandedBits,
 921                                            const APInt &DemandedElts,
 922                                            KnownBits &Known,
 923                                            TargetLoweringOpt &TLO,
 924                                            unsigned Depth) const override;
 925
 926     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
 927         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
 928         SelectionDAG &DAG, unsigned Depth) const override;
 929
 930     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 931
 932     SDValue unwrapAddress(SDValue N) const override;
 933
 934     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 935
 936     bool ExpandInlineAsm(CallInst *CI) const override;
 937
 938     ConstraintType getConstraintType(StringRef Constraint) const override;
 939
 940     /// Examine constraint string and operand type and determine a weight value.
 941     /// The operand object must already have been set up with the operand type.
 942     ConstraintWeight
 943       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 944                                      const char *constraint) const override;
 945
 946     const char *LowerXConstraint(EVT ConstraintVT) const override;
 947
 948     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 949     /// add anything to Ops. If hasMemory is true it means one of the asm
 950     /// constraint of the inline asm instruction being processed is 'm'.
 951     void LowerAsmOperandForConstraint(SDValue Op,
 952                                       std::string &Constraint,
 953                                       std::vector<SDValue> &Ops,
 954                                       SelectionDAG &DAG) const override;
 955
 956     unsigned
 957     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 958       if (ConstraintCode == "i")
 959         return InlineAsm::Constraint_i;
 960       else if (ConstraintCode == "o")
 961         return InlineAsm::Constraint_o;
 962       else if (ConstraintCode == "v")
 963         return InlineAsm::Constraint_v;
 964       else if (ConstraintCode == "X")
 965         return InlineAsm::Constraint_X;
 966       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 967     }
 968
 969     /// Handle Lowering flag assembly outputs.
 970     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
 971                                         const AsmOperandInfo &Constraint,
 972                                         SelectionDAG &DAG) const override;
 973
 974     /// Given a physical register constraint
 975     /// (e.g. {edx}), return the register number and the register class for the
 976     /// register.  This should only be used for C_Register constraints.  On
 977     /// error, this returns a register number of 0.
 978     std::pair<unsigned, const TargetRegisterClass *>
 979     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 980                                  StringRef Constraint, MVT VT) const override;
 981
 982     /// Return true if the addressing mode represented
 983     /// by AM is legal for this target, for a load/store of the specified type.
 984     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 985                                Type *Ty, unsigned AS,
 986                                Instruction *I = nullptr) const override;
 987
 988     /// Return true if the specified immediate is legal
 989     /// icmp immediate, that is the target has icmp instructions which can
 990     /// compare a register against the immediate without having to materialize
 991     /// the immediate into a register.
 992     bool isLegalICmpImmediate(int64_t Imm) const override;
 993
 994     /// Return true if the specified immediate is legal
 995     /// add immediate, that is the target has add instructions which can
 996     /// add a register and the immediate without having to materialize
 997     /// the immediate into a register.
 998     bool isLegalAddImmediate(int64_t Imm) const override;
 999
1000     bool isLegalStoreImmediate(int64_t Imm) const override;
1001
1002     /// Return the cost of the scaling factor used in the addressing
1003     /// mode represented by AM for this target, for a load/store
1004     /// of the specified type.
1005     /// If the AM is supported, the return value must be >= 0.
1006     /// If the AM is not supported, it returns a negative value.
1007     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1008                              unsigned AS) const override;
1009
1010     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1011
1012     /// Add x86-specific opcodes to the default list.
1013     bool isBinOp(unsigned Opcode) const override;
1014
1015     /// Returns true if the opcode is a commutative binary operation.
1016     bool isCommutativeBinOp(unsigned Opcode) const override;
1017
1018     /// Return true if it's free to truncate a value of
1019     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1020     /// register EAX to i16 by referencing its sub-register AX.
1021     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1022     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1023
1024     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1025
1026     /// Return true if any actual instruction that defines a
1027     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1028     /// register. This does not necessarily include registers defined in
1029     /// unknown ways, such as incoming arguments, or copies from unknown
1030     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1031     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1032     /// all instructions that define 32-bit values implicit zero-extend the
1033     /// result out to 64 bits.
1034     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1035     bool isZExtFree(EVT VT1, EVT VT2) const override;
1036     bool isZExtFree(SDValue Val, EVT VT2) const override;
1037
1038     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1039     /// extend node) is profitable.
1040     bool isVectorLoadExtDesirable(SDValue) const override;
1041
1042     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1043     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1044     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1045     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1046
1047     /// Return true if it's profitable to narrow
1048     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1049     /// from i32 to i8 but not from i32 to i16.
1050     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1051
1052     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1053     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1054     /// true and stores the intrinsic information into the IntrinsicInfo that was
1055     /// passed to the function.
1056     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1057                             MachineFunction &MF,
1058                             unsigned Intrinsic) const override;
1059
1060     /// Returns true if the target can instruction select the
1061     /// specified FP immediate natively. If false, the legalizer will
1062     /// materialize the FP immediate as a load from a constant pool.
1063     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1064                       bool ForCodeSize) const override;
1065
1066     /// Targets can use this to indicate that they only support *some*
1067     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1068     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1069     /// be legal.
1070     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1071
1072     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1073     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1074     /// constant pool entry.
1075     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1076
1077     /// Returns true if lowering to a jump table is allowed.
1078     bool areJTsAllowed(const Function *Fn) const override;
1079
1080     /// If true, then instruction selection should
1081     /// seek to shrink the FP constant of the specified type to a smaller type
1082     /// in order to save space and / or reduce runtime.
1083     bool ShouldShrinkFPConstant(EVT VT) const override {
1084       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1085       // expensive than a straight movsd. On the other hand, it's important to
1086       // shrink long double fp constant since fldt is very slow.
1087       return !X86ScalarSSEf64 || VT == MVT::f80;
1088     }
1089
1090     /// Return true if we believe it is correct and profitable to reduce the
1091     /// load node to a smaller type.
1092     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1093                                EVT NewVT) const override;
1094
1095     /// Return true if the specified scalar FP type is computed in an SSE
1096     /// register, not on the X87 floating point stack.
1097     bool isScalarFPTypeInSSEReg(EVT VT) const {
1098       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1099              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1100     }
1101
1102     /// Returns true if it is beneficial to convert a load of a constant
1103     /// to just the constant itself.
1104     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1105                                            Type *Ty) const override;
1106
1107     bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1108
1109     bool convertSelectOfConstantsToMath(EVT VT) const override;
1110
1111     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1112                                 SDValue C) const override;
1113
1114     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1115                                   bool IsSigned) const override;
1116
1117     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1118     /// with this index.
1119     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1120                                  unsigned Index) const override;
1121
1122     /// Scalar ops always have equal or better analysis/performance/power than
1123     /// the vector equivalent, so this always makes sense if the scalar op is
1124     /// supported.
1125     bool shouldScalarizeBinop(SDValue) const override;
1126
1127     /// Extract of a scalar FP value from index 0 of a vector is free.
1128     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1129       EVT EltVT = VT.getScalarType();
1130       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1131     }
1132
1133     /// Overflow nodes should get combined/lowered to optimal instructions
1134     /// (they should allow eliminating explicit compares by getting flags from
1135     /// math ops).
1136     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1137
1138     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1139                                       unsigned AddrSpace) const override {
1140       // If we can replace more than 2 scalar stores, there will be a reduction
1141       // in instructions even after we add a vector constant load.
1142       return NumElem > 2;
1143     }
1144
1145     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1146                                  const SelectionDAG &DAG,
1147                                  const MachineMemOperand &MMO) const override;
1148
1149     /// Intel processors have a unified instruction and data cache
1150     const char * getClearCacheBuiltinName() const override {
1151       return nullptr; // nothing to do, move along.
1152     }
1153
1154     unsigned getRegisterByName(const char* RegName, EVT VT,
1155                                SelectionDAG &DAG) const override;
1156
1157     /// If a physical register, this returns the register that receives the
1158     /// exception address on entry to an EH pad.
1159     unsigned
1160     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1161
1162     /// If a physical register, this returns the register that receives the
1163     /// exception typeid on entry to a landing pad.
1164     unsigned
1165     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1166
1167     virtual bool needsFixedCatchObjects() const override;
1168
1169     /// This method returns a target specific FastISel object,
1170     /// or null if the target does not support "fast" ISel.
1171     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1172                              const TargetLibraryInfo *libInfo) const override;
1173
1174     /// If the target has a standard location for the stack protector cookie,
1175     /// returns the address of that location. Otherwise, returns nullptr.
1176     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1177
1178     bool useLoadStackGuardNode() const override;
1179     bool useStackGuardXorFP() const override;
1180     void insertSSPDeclarations(Module &M) const override;
1181     Value *getSDagStackGuard(const Module &M) const override;
1182     Function *getSSPStackGuardCheck(const Module &M) const override;
1183     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1184                                 const SDLoc &DL) const override;
1185
1186
1187     /// Return true if the target stores SafeStack pointer at a fixed offset in
1188     /// some non-standard address space, and populates the address space and
1189     /// offset as appropriate.
1190     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1191
1192     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1193                       SelectionDAG &DAG) const;
1194
1195     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1196
1197     /// Customize the preferred legalization strategy for certain types.
1198     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1199
1200     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1201                                       EVT VT) const override;
1202
1203     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1204                                            CallingConv::ID CC,
1205                                            EVT VT) const override;
1206
1207     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1208
1209     bool supportSwiftError() const override;
1210
1211     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1212
1213     unsigned getStackProbeSize(MachineFunction &MF) const;
1214
1215     bool hasVectorBlend() const override { return true; }
1216
1217     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1218
1219     /// Lower interleaved load(s) into target specific
1220     /// instructions/intrinsics.
1221     bool lowerInterleavedLoad(LoadInst *LI,
1222                               ArrayRef<ShuffleVectorInst *> Shuffles,
1223                               ArrayRef<unsigned> Indices,
1224                               unsigned Factor) const override;
1225
1226     /// Lower interleaved store(s) into target specific
1227     /// instructions/intrinsics.
1228     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1229                                unsigned Factor) const override;
1230
1231     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1232                                    SDValue Addr, SelectionDAG &DAG)
1233                                    const override;
1234
1235   protected:
1236     std::pair<const TargetRegisterClass *, uint8_t>
1237     findRepresentativeClass(const TargetRegisterInfo *TRI,
1238                             MVT VT) const override;
1239
1240   private:
1241     /// Keep a reference to the X86Subtarget around so that we can
1242     /// make the right decision when generating code for different targets.
1243     const X86Subtarget &Subtarget;
1244
1245     /// Select between SSE or x87 floating point ops.
1246     /// When SSE is available, use it for f32 operations.
1247     /// When SSE2 is available, use it for f64 operations.
1248     bool X86ScalarSSEf32;
1249     bool X86ScalarSSEf64;
1250
1251     /// A list of legal FP immediates.
1252     std::vector<APFloat> LegalFPImmediates;
1253
1254     /// Indicate that this x86 target can instruction
1255     /// select the specified FP immediate natively.
1256     void addLegalFPImmediate(const APFloat& Imm) {
1257       LegalFPImmediates.push_back(Imm);
1258     }
1259
1260     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1261                             CallingConv::ID CallConv, bool isVarArg,
1262                             const SmallVectorImpl<ISD::InputArg> &Ins,
1263                             const SDLoc &dl, SelectionDAG &DAG,
1264                             SmallVectorImpl<SDValue> &InVals,
1265                             uint32_t *RegMask) const;
1266     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1267                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1268                              const SDLoc &dl, SelectionDAG &DAG,
1269                              const CCValAssign &VA, MachineFrameInfo &MFI,
1270                              unsigned i) const;
1271     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1272                              const SDLoc &dl, SelectionDAG &DAG,
1273                              const CCValAssign &VA,
1274                              ISD::ArgFlagsTy Flags) const;
1275
1276     // Call lowering helpers.
1277
1278     /// Check whether the call is eligible for tail call optimization. Targets
1279     /// that want to do tail call optimization should implement this function.
1280     bool IsEligibleForTailCallOptimization(SDValue Callee,
1281                                            CallingConv::ID CalleeCC,
1282                                            bool isVarArg,
1283                                            bool isCalleeStructRet,
1284                                            bool isCallerStructRet,
1285                                            Type *RetTy,
1286                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1287                                     const SmallVectorImpl<SDValue> &OutVals,
1288                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1289                                            SelectionDAG& DAG) const;
1290     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1291                                     SDValue Chain, bool IsTailCall,
1292                                     bool Is64Bit, int FPDiff,
1293                                     const SDLoc &dl) const;
1294
1295     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1296                                          SelectionDAG &DAG) const;
1297
1298     unsigned getAddressSpace(void) const;
1299
1300     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1301
1302     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1303     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1304     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1305     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1306
1307     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1308                                   const unsigned char OpFlags = 0) const;
1309     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1310     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1311     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1312     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1313     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1314
1315     /// Creates target global address or external symbol nodes for calls or
1316     /// other uses.
1317     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1318                                   bool ForCall) const;
1319
1320     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1321     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1322     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1323     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1324     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1325     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1326     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1327     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1328     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1329     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1330     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1331     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1332     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1333     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1334     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1335     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1336     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1337     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1338     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1339     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1340     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1341     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1342     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1343     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1344     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1345     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1346
1347     SDValue
1348     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1349                          const SmallVectorImpl<ISD::InputArg> &Ins,
1350                          const SDLoc &dl, SelectionDAG &DAG,
1351                          SmallVectorImpl<SDValue> &InVals) const override;
1352     SDValue LowerCall(CallLoweringInfo &CLI,
1353                       SmallVectorImpl<SDValue> &InVals) const override;
1354
1355     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1356                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1357                         const SmallVectorImpl<SDValue> &OutVals,
1358                         const SDLoc &dl, SelectionDAG &DAG) const override;
1359
1360     bool supportSplitCSR(MachineFunction *MF) const override {
1361       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1362           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1363     }
1364     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1365     void insertCopiesSplitCSR(
1366       MachineBasicBlock *Entry,
1367       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1368
1369     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1370
1371     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1372
1373     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1374                             ISD::NodeType ExtendKind) const override;
1375
1376     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1377                         bool isVarArg,
1378                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1379                         LLVMContext &Context) const override;
1380
1381     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1382
1383     TargetLoweringBase::AtomicExpansionKind
1384     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1385     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1386     TargetLoweringBase::AtomicExpansionKind
1387     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1388
1389     LoadInst *
1390     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1391
1392     bool needsCmpXchgNb(Type *MemType) const;
1393
1394     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1395                                 MachineBasicBlock *DispatchBB, int FI) const;
1396
1397     // Utility function to emit the low-level va_arg code for X86-64.
1398     MachineBasicBlock *
1399     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1400                                   MachineBasicBlock *MBB) const;
1401
1402     /// Utility function to emit the xmm reg save portion of va_start.
1403     MachineBasicBlock *
1404     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1405                                              MachineBasicBlock *BB) const;
1406
1407     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1408                                                  MachineInstr &MI2,
1409                                                  MachineBasicBlock *BB) const;
1410
1411     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1412                                          MachineBasicBlock *BB) const;
1413
1414     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1415                                            MachineBasicBlock *BB) const;
1416
1417     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1418                                            MachineBasicBlock *BB) const;
1419
1420     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1421                                            MachineBasicBlock *BB) const;
1422
1423     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1424                                             MachineBasicBlock *BB) const;
1425
1426     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1427                                           MachineBasicBlock *BB) const;
1428
1429     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1430                                           MachineBasicBlock *BB) const;
1431
1432     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1433                                             MachineBasicBlock *BB) const;
1434
1435     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1436                                         MachineBasicBlock *MBB) const;
1437
1438     void emitSetJmpShadowStackFix(MachineInstr &MI,
1439                                   MachineBasicBlock *MBB) const;
1440
1441     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1442                                          MachineBasicBlock *MBB) const;
1443
1444     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1445                                                  MachineBasicBlock *MBB) const;
1446
1447     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1448                                      MachineBasicBlock *MBB) const;
1449
1450     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1451                                              MachineBasicBlock *MBB) const;
1452
1453     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1454     /// equivalent, for use with the given x86 condition code.
1455     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1456                     SelectionDAG &DAG) const;
1457
1458     /// Convert a comparison if required by the subtarget.
1459     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1460
1461     /// Emit flags for the given setcc condition and operands. Also returns the
1462     /// corresponding X86 condition code constant in X86CC.
1463     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1464                               ISD::CondCode CC, const SDLoc &dl,
1465                               SelectionDAG &DAG,
1466                               SDValue &X86CC) const;
1467
1468     /// Check if replacement of SQRT with RSQRT should be disabled.
1469     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1470
1471     /// Use rsqrt* to speed up sqrt calculations.
1472     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1473                             int &RefinementSteps, bool &UseOneConstNR,
1474                             bool Reciprocal) const override;
1475
1476     /// Use rcp* to speed up fdiv calculations.
1477     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1478                              int &RefinementSteps) const override;
1479
1480     /// Reassociate floating point divisions into multiply by reciprocal.
1481     unsigned combineRepeatedFPDivisors() const override;
1482   };
1483
1484   namespace X86 {
1485     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1486                              const TargetLibraryInfo *libInfo);
1487   } // end namespace X86
1488
1489   // Base class for all X86 non-masked store operations.
1490   class X86StoreSDNode : public MemSDNode {
1491   public:
1492     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1493                    SDVTList VTs, EVT MemVT,
1494                    MachineMemOperand *MMO)
1495       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1496     const SDValue &getValue() const { return getOperand(1); }
1497     const SDValue &getBasePtr() const { return getOperand(2); }
1498
1499     static bool classof(const SDNode *N) {
1500       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1501         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1502     }
1503   };
1504
1505   // Base class for all X86 masked store operations.
1506   // The class has the same order of operands as MaskedStoreSDNode for
1507   // convenience.
1508   class X86MaskedStoreSDNode : public MemSDNode {
1509   public:
1510     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1511                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1512                          MachineMemOperand *MMO)
1513       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1514
1515     const SDValue &getValue()   const { return getOperand(1); }
1516     const SDValue &getBasePtr() const { return getOperand(2); }
1517     const SDValue &getMask()    const { return getOperand(3); }
1518
1519     static bool classof(const SDNode *N) {
1520       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1521         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1522     }
1523   };
1524
1525   // X86 Truncating Store with Signed saturation.
1526   class TruncSStoreSDNode : public X86StoreSDNode {
1527   public:
1528     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1529                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1530       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1531
1532     static bool classof(const SDNode *N) {
1533       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1534     }
1535   };
1536
1537   // X86 Truncating Store with Unsigned saturation.
1538   class TruncUSStoreSDNode : public X86StoreSDNode {
1539   public:
1540     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1541                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1542       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1543
1544     static bool classof(const SDNode *N) {
1545       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1546     }
1547   };
1548
1549   // X86 Truncating Masked Store with Signed saturation.
1550   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1551   public:
1552     MaskedTruncSStoreSDNode(unsigned Order,
1553                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1554                          MachineMemOperand *MMO)
1555       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1556
1557     static bool classof(const SDNode *N) {
1558       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1559     }
1560   };
1561
1562   // X86 Truncating Masked Store with Unsigned saturation.
1563   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1564   public:
1565     MaskedTruncUSStoreSDNode(unsigned Order,
1566                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1567                             MachineMemOperand *MMO)
1568       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1569
1570     static bool classof(const SDNode *N) {
1571       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1572     }
1573   };
1574
1575   // X86 specific Gather/Scatter nodes.
1576   // The class has the same order of operands as MaskedGatherScatterSDNode for
1577   // convenience.
1578   class X86MaskedGatherScatterSDNode : public MemSDNode {
1579   public:
1580     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1581                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1582                                  MachineMemOperand *MMO)
1583         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1584
1585     const SDValue &getBasePtr() const { return getOperand(3); }
1586     const SDValue &getIndex()   const { return getOperand(4); }
1587     const SDValue &getMask()    const { return getOperand(2); }
1588     const SDValue &getScale()   const { return getOperand(5); }
1589
1590     static bool classof(const SDNode *N) {
1591       return N->getOpcode() == X86ISD::MGATHER ||
1592              N->getOpcode() == X86ISD::MSCATTER;
1593     }
1594   };
1595
1596   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1597   public:
1598     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1599                           EVT MemVT, MachineMemOperand *MMO)
1600         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1601                                        MMO) {}
1602
1603     const SDValue &getPassThru() const { return getOperand(1); }
1604
1605     static bool classof(const SDNode *N) {
1606       return N->getOpcode() == X86ISD::MGATHER;
1607     }
1608   };
1609
1610   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1611   public:
1612     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1613                            EVT MemVT, MachineMemOperand *MMO)
1614         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1615                                        MMO) {}
1616
1617     const SDValue &getValue() const { return getOperand(1); }
1618
1619     static bool classof(const SDNode *N) {
1620       return N->getOpcode() == X86ISD::MSCATTER;
1621     }
1622   };
1623
1624   /// Generate unpacklo/unpackhi shuffle mask.
1625   template <typename T = int>
1626   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1627                                bool Unary) {
1628     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1629     int NumElts = VT.getVectorNumElements();
1630     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1631     for (int i = 0; i < NumElts; ++i) {
1632       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1633       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1634       Pos += (Unary ? 0 : NumElts * (i % 2));
1635       Pos += (Lo ? 0 : NumEltsInLane / 2);
1636       Mask.push_back(Pos);
1637     }
1638   }
1639
1640   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1641   /// mask index with the scaled sequential indices for an equivalent narrowed
1642   /// mask. This is the reverse process to canWidenShuffleElements, but can
1643   /// always succeed.
1644   template <typename T>
1645   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1646                         SmallVectorImpl<T> &ScaledMask) {
1647     assert(0 < Scale && "Unexpected scaling factor");
1648     size_t NumElts = Mask.size();
1649     ScaledMask.assign(NumElts * Scale, -1);
1650
1651     for (int i = 0; i != (int)NumElts; ++i) {
1652       int M = Mask[i];
1653
1654       // Repeat sentinel values in every mask element.
1655       if (M < 0) {
1656         for (int s = 0; s != Scale; ++s)
1657           ScaledMask[(Scale * i) + s] = M;
1658         continue;
1659       }
1660
1661       // Scale mask element and increment across each mask element.
1662       for (int s = 0; s != Scale; ++s)
1663         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1664     }
1665   }
1666 } // end namespace llvm
1667
1668 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H