lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20
  21 namespace llvm {
  22   class X86Subtarget;
  23   class X86TargetMachine;
  24
  25   namespace X86ISD {
  26     // X86 Specific DAG Nodes
  27     enum NodeType : unsigned {
  28       // Start the numbering where the builtin ops leave off.
  29       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  30
  31       /// Bit scan forward.
  32       BSF,
  33       /// Bit scan reverse.
  34       BSR,
  35
  36       /// Double shift instructions. These correspond to
  37       /// X86::SHLDxx and X86::SHRDxx instructions.
  38       SHLD,
  39       SHRD,
  40
  41       /// Bitwise logical AND of floating point values. This corresponds
  42       /// to X86::ANDPS or X86::ANDPD.
  43       FAND,
  44
  45       /// Bitwise logical OR of floating point values. This corresponds
  46       /// to X86::ORPS or X86::ORPD.
  47       FOR,
  48
  49       /// Bitwise logical XOR of floating point values. This corresponds
  50       /// to X86::XORPS or X86::XORPD.
  51       FXOR,
  52
  53       ///  Bitwise logical ANDNOT of floating point values. This
  54       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  55       FANDN,
  56
  57       /// These operations represent an abstract X86 call
  58       /// instruction, which includes a bunch of information.  In particular the
  59       /// operands of these node are:
  60       ///
  61       ///     #0 - The incoming token chain
  62       ///     #1 - The callee
  63       ///     #2 - The number of arg bytes the caller pushes on the stack.
  64       ///     #3 - The number of arg bytes the callee pops off the stack.
  65       ///     #4 - The value to pass in AL/AX/EAX (optional)
  66       ///     #5 - The value to pass in DL/DX/EDX (optional)
  67       ///
  68       /// The result values of these nodes are:
  69       ///
  70       ///     #0 - The outgoing token chain
  71       ///     #1 - The first register result value (optional)
  72       ///     #2 - The second register result value (optional)
  73       ///
  74       CALL,
  75
  76       /// Same as call except it adds the NoTrack prefix.
  77       NT_CALL,
  78
  79       /// X86 compare and logical compare instructions.
  80       CMP, COMI, UCOMI,
  81
  82       /// X86 bit-test instructions.
  83       BT,
  84
  85       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  86       /// operand, usually produced by a CMP instruction.
  87       SETCC,
  88
  89       /// X86 Select
  90       SELECTS,
  91
  92       // Same as SETCC except it's materialized with a sbb and the value is all
  93       // one's or all zero's.
  94       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  95
  96       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
  97       /// Operands are two FP values to compare; result is a mask of
  98       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
  99       FSETCC,
 100
 101       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 102       /// and a version with SAE.
 103       FSETCCM, FSETCCM_SAE,
 104
 105       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 106       /// to select from. Operand 2 is the condition code, and operand 3 is the
 107       /// flag operand produced by a CMP or TEST instruction.
 108       CMOV,
 109
 110       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 111       /// is the block to branch if condition is true, operand 2 is the
 112       /// condition code, and operand 3 is the flag operand produced by a CMP
 113       /// or TEST instruction.
 114       BRCOND,
 115
 116       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 117       /// operand 1 is the target address.
 118       NT_BRIND,
 119
 120       /// Return with a flag operand. Operand 0 is the chain operand, operand
 121       /// 1 is the number of bytes of stack to pop.
 122       RET_FLAG,
 123
 124       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 125       IRET,
 126
 127       /// Repeat fill, corresponds to X86::REP_STOSx.
 128       REP_STOS,
 129
 130       /// Repeat move, corresponds to X86::REP_MOVSx.
 131       REP_MOVS,
 132
 133       /// On Darwin, this node represents the result of the popl
 134       /// at function entry, used for PIC code.
 135       GlobalBaseReg,
 136
 137       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 138       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 139       /// MCSymbol and TargetBlockAddress.
 140       Wrapper,
 141
 142       /// Special wrapper used under X86-64 PIC mode for RIP
 143       /// relative displacements.
 144       WrapperRIP,
 145
 146       /// Copies a 64-bit value from an MMX vector to the low word
 147       /// of an XMM vector, with the high word zero filled.
 148       MOVQ2DQ,
 149
 150       /// Copies a 64-bit value from the low word of an XMM vector
 151       /// to an MMX vector.
 152       MOVDQ2Q,
 153
 154       /// Copies a 32-bit value from the low word of a MMX
 155       /// vector to a GPR.
 156       MMX_MOVD2W,
 157
 158       /// Copies a GPR into the low 32-bit word of a MMX vector
 159       /// and zero out the high word.
 160       MMX_MOVW2D,
 161
 162       /// Extract an 8-bit value from a vector and zero extend it to
 163       /// i32, corresponds to X86::PEXTRB.
 164       PEXTRB,
 165
 166       /// Extract a 16-bit value from a vector and zero extend it to
 167       /// i32, corresponds to X86::PEXTRW.
 168       PEXTRW,
 169
 170       /// Insert any element of a 4 x float vector into any element
 171       /// of a destination 4 x floatvector.
 172       INSERTPS,
 173
 174       /// Insert the lower 8-bits of a 32-bit value to a vector,
 175       /// corresponds to X86::PINSRB.
 176       PINSRB,
 177
 178       /// Insert the lower 16-bits of a 32-bit value to a vector,
 179       /// corresponds to X86::PINSRW.
 180       PINSRW,
 181
 182       /// Shuffle 16 8-bit values within a vector.
 183       PSHUFB,
 184
 185       /// Compute Sum of Absolute Differences.
 186       PSADBW,
 187       /// Compute Double Block Packed Sum-Absolute-Differences
 188       DBPSADBW,
 189
 190       /// Bitwise Logical AND NOT of Packed FP values.
 191       ANDNP,
 192
 193       /// Blend where the selector is an immediate.
 194       BLENDI,
 195
 196       /// Dynamic (non-constant condition) vector blend where only the sign bits
 197       /// of the condition elements are used. This is used to enforce that the
 198       /// condition mask is not valid for generic VSELECT optimizations. This
 199       /// is also used to implement the intrinsics.
 200       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 201       BLENDV,
 202
 203       /// Combined add and sub on an FP vector.
 204       ADDSUB,
 205
 206       //  FP vector ops with rounding mode.
 207       FADD_RND, FADDS, FADDS_RND,
 208       FSUB_RND, FSUBS, FSUBS_RND,
 209       FMUL_RND, FMULS, FMULS_RND,
 210       FDIV_RND, FDIVS, FDIVS_RND,
 211       FMAX_SAE, FMAXS_SAE,
 212       FMIN_SAE, FMINS_SAE,
 213       FSQRT_RND, FSQRTS, FSQRTS_RND,
 214
 215       // FP vector get exponent.
 216       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
 217       // Extract Normalized Mantissas.
 218       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
 219       // FP Scale.
 220       SCALEF, SCALEF_RND,
 221       SCALEFS, SCALEFS_RND,
 222
 223       // Unsigned Integer average.
 224       AVG,
 225
 226       /// Integer horizontal add/sub.
 227       HADD,
 228       HSUB,
 229
 230       /// Floating point horizontal add/sub.
 231       FHADD,
 232       FHSUB,
 233
 234       // Detect Conflicts Within a Vector
 235       CONFLICT,
 236
 237       /// Floating point max and min.
 238       FMAX, FMIN,
 239
 240       /// Commutative FMIN and FMAX.
 241       FMAXC, FMINC,
 242
 243       /// Scalar intrinsic floating point max and min.
 244       FMAXS, FMINS,
 245
 246       /// Floating point reciprocal-sqrt and reciprocal approximation.
 247       /// Note that these typically require refinement
 248       /// in order to obtain suitable precision.
 249       FRSQRT, FRCP,
 250
 251       // AVX-512 reciprocal approximations with a little more precision.
 252       RSQRT14, RSQRT14S, RCP14, RCP14S,
 253
 254       // Thread Local Storage.
 255       TLSADDR,
 256
 257       // Thread Local Storage. A call to get the start address
 258       // of the TLS block for the current module.
 259       TLSBASEADDR,
 260
 261       // Thread Local Storage.  When calling to an OS provided
 262       // thunk at the address from an earlier relocation.
 263       TLSCALL,
 264
 265       // Exception Handling helpers.
 266       EH_RETURN,
 267
 268       // SjLj exception handling setjmp.
 269       EH_SJLJ_SETJMP,
 270
 271       // SjLj exception handling longjmp.
 272       EH_SJLJ_LONGJMP,
 273
 274       // SjLj exception handling dispatch.
 275       EH_SJLJ_SETUP_DISPATCH,
 276
 277       /// Tail call return. See X86TargetLowering::LowerCall for
 278       /// the list of operands.
 279       TC_RETURN,
 280
 281       // Vector move to low scalar and zero higher vector elements.
 282       VZEXT_MOVL,
 283
 284       // Vector integer truncate.
 285       VTRUNC,
 286       // Vector integer truncate with unsigned/signed saturation.
 287       VTRUNCUS, VTRUNCS,
 288
 289       // Masked version of the above. Used when less than a 128-bit result is
 290       // produced since the mask only applies to the lower elements and can't
 291       // be represented by a select.
 292       // SRC, PASSTHRU, MASK
 293       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 294
 295       // Vector FP extend.
 296       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 297
 298       // Vector FP round.
 299       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 300
 301       // Masked version of above. Used for v2f64->v4f32.
 302       // SRC, PASSTHRU, MASK
 303       VMFPROUND,
 304
 305       // 128-bit vector logical left / right shift
 306       VSHLDQ, VSRLDQ,
 307
 308       // Vector shift elements
 309       VSHL, VSRL, VSRA,
 310
 311       // Vector variable shift
 312       VSHLV, VSRLV, VSRAV,
 313
 314       // Vector shift elements by immediate
 315       VSHLI, VSRLI, VSRAI,
 316
 317       // Shifts of mask registers.
 318       KSHIFTL, KSHIFTR,
 319
 320       // Bit rotate by immediate
 321       VROTLI, VROTRI,
 322
 323       // Vector packed double/float comparison.
 324       CMPP,
 325
 326       // Vector integer comparisons.
 327       PCMPEQ, PCMPGT,
 328
 329       // v8i16 Horizontal minimum and position.
 330       PHMINPOS,
 331
 332       MULTISHIFT,
 333
 334       /// Vector comparison generating mask bits for fp and
 335       /// integer signed and unsigned data types.
 336       CMPM,
 337       // Vector comparison with SAE for FP values
 338       CMPM_SAE,
 339
 340       // Arithmetic operations with FLAGS results.
 341       ADD, SUB, ADC, SBB, SMUL, UMUL,
 342       OR, XOR, AND,
 343
 344       // Bit field extract.
 345       BEXTR,
 346
 347       // Zero High Bits Starting with Specified Bit Position.
 348       BZHI,
 349
 350       // X86-specific multiply by immediate.
 351       MUL_IMM,
 352
 353       // Vector sign bit extraction.
 354       MOVMSK,
 355
 356       // Vector bitwise comparisons.
 357       PTEST,
 358
 359       // Vector packed fp sign bitwise comparisons.
 360       TESTP,
 361
 362       // OR/AND test for masks.
 363       KORTEST,
 364       KTEST,
 365
 366       // ADD for masks.
 367       KADD,
 368
 369       // Several flavors of instructions with vector shuffle behaviors.
 370       // Saturated signed/unnsigned packing.
 371       PACKSS,
 372       PACKUS,
 373       // Intra-lane alignr.
 374       PALIGNR,
 375       // AVX512 inter-lane alignr.
 376       VALIGN,
 377       PSHUFD,
 378       PSHUFHW,
 379       PSHUFLW,
 380       SHUFP,
 381       // VBMI2 Concat & Shift.
 382       VSHLD,
 383       VSHRD,
 384       VSHLDV,
 385       VSHRDV,
 386       //Shuffle Packed Values at 128-bit granularity.
 387       SHUF128,
 388       MOVDDUP,
 389       MOVSHDUP,
 390       MOVSLDUP,
 391       MOVLHPS,
 392       MOVHLPS,
 393       MOVSD,
 394       MOVSS,
 395       UNPCKL,
 396       UNPCKH,
 397       VPERMILPV,
 398       VPERMILPI,
 399       VPERMI,
 400       VPERM2X128,
 401
 402       // Variable Permute (VPERM).
 403       // Res = VPERMV MaskV, V0
 404       VPERMV,
 405
 406       // 3-op Variable Permute (VPERMT2).
 407       // Res = VPERMV3 V0, MaskV, V1
 408       VPERMV3,
 409
 410       // Bitwise ternary logic.
 411       VPTERNLOG,
 412       // Fix Up Special Packed Float32/64 values.
 413       VFIXUPIMM, VFIXUPIMM_SAE,
 414       VFIXUPIMMS, VFIXUPIMMS_SAE,
 415       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 416       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
 417       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 418       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
 419       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 420       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 421       // scaling part of the immediate.
 422       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
 423       // Tests Types Of a FP Values for packed types.
 424       VFPCLASS,
 425       // Tests Types Of a FP Values for scalar types.
 426       VFPCLASSS,
 427
 428       // Broadcast scalar to vector.
 429       VBROADCAST,
 430       // Broadcast mask to vector.
 431       VBROADCASTM,
 432       // Broadcast subvector to vector.
 433       SUBV_BROADCAST,
 434
 435       /// SSE4A Extraction and Insertion.
 436       EXTRQI, INSERTQI,
 437
 438       // XOP arithmetic/logical shifts.
 439       VPSHA, VPSHL,
 440       // XOP signed/unsigned integer comparisons.
 441       VPCOM, VPCOMU,
 442       // XOP packed permute bytes.
 443       VPPERM,
 444       // XOP two source permutation.
 445       VPERMIL2,
 446
 447       // Vector multiply packed unsigned doubleword integers.
 448       PMULUDQ,
 449       // Vector multiply packed signed doubleword integers.
 450       PMULDQ,
 451       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 452       MULHRS,
 453
 454       // Multiply and Add Packed Integers.
 455       VPMADDUBSW, VPMADDWD,
 456
 457       // AVX512IFMA multiply and add.
 458       // NOTE: These are different than the instruction and perform
 459       // op0 x op1 + op2.
 460       VPMADD52L, VPMADD52H,
 461
 462       // VNNI
 463       VPDPBUSD,
 464       VPDPBUSDS,
 465       VPDPWSSD,
 466       VPDPWSSDS,
 467
 468       // FMA nodes.
 469       // We use the target independent ISD::FMA for the non-inverted case.
 470       FNMADD,
 471       FMSUB,
 472       FNMSUB,
 473       FMADDSUB,
 474       FMSUBADD,
 475
 476       // FMA with rounding mode.
 477       FMADD_RND,
 478       FNMADD_RND,
 479       FMSUB_RND,
 480       FNMSUB_RND,
 481       FMADDSUB_RND,
 482       FMSUBADD_RND,
 483
 484       // Compress and expand.
 485       COMPRESS,
 486       EXPAND,
 487
 488       // Bits shuffle
 489       VPSHUFBITQMB,
 490
 491       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 492       SINT_TO_FP_RND, UINT_TO_FP_RND,
 493       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
 494       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 495
 496       // Vector float/double to signed/unsigned integer.
 497       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 498       // Scalar float/double to signed/unsigned integer.
 499       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 500
 501       // Vector float/double to signed/unsigned integer with truncation.
 502       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
 503       // Scalar float/double to signed/unsigned integer with truncation.
 504       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 505
 506       // Vector signed/unsigned integer to float/double.
 507       CVTSI2P, CVTUI2P,
 508
 509       // Masked versions of above. Used for v2f64->v4f32.
 510       // SRC, PASSTHRU, MASK
 511       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 512       MCVTSI2P, MCVTUI2P,
 513
 514       // Vector float to bfloat16.
 515       // Convert TWO packed single data to one packed BF16 data
 516       CVTNE2PS2BF16,
 517       // Convert packed single data to packed BF16 data
 518       CVTNEPS2BF16,
 519       // Masked version of above.
 520       // SRC, PASSTHRU, MASK
 521       MCVTNEPS2BF16,
 522
 523       // Dot product of BF16 pairs to accumulated into
 524       // packed single precision.
 525       DPBF16PS,
 526
 527       // Save xmm argument registers to the stack, according to %al. An operator
 528       // is needed so that this can be expanded with control flow.
 529       VASTART_SAVE_XMM_REGS,
 530
 531       // Windows's _chkstk call to do stack probing.
 532       WIN_ALLOCA,
 533
 534       // For allocating variable amounts of stack space when using
 535       // segmented stacks. Check if the current stacklet has enough space, and
 536       // falls back to heap allocation if not.
 537       SEG_ALLOCA,
 538
 539       // Memory barriers.
 540       MEMBARRIER,
 541       MFENCE,
 542
 543       // Store FP status word into i16 register.
 544       FNSTSW16r,
 545
 546       // Store contents of %ah into %eflags.
 547       SAHF,
 548
 549       // Get a random integer and indicate whether it is valid in CF.
 550       RDRAND,
 551
 552       // Get a NIST SP800-90B & C compliant random integer and
 553       // indicate whether it is valid in CF.
 554       RDSEED,
 555
 556       // Protection keys
 557       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 558       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 559       // value for ECX.
 560       RDPKRU, WRPKRU,
 561
 562       // SSE42 string comparisons.
 563       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 564       // will emit one or two instructions based on which results are used. If
 565       // flags and index/mask this allows us to use a single instruction since
 566       // we won't have to pick and opcode for flags. Instead we can rely on the
 567       // DAG to CSE everything and decide at isel.
 568       PCMPISTR,
 569       PCMPESTR,
 570
 571       // Test if in transactional execution.
 572       XTEST,
 573
 574       // ERI instructions.
 575       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
 576       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 577
 578       // Conversions between float and half-float.
 579       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 580
 581       // Masked version of above.
 582       // SRC, RND, PASSTHRU, MASK
 583       MCVTPS2PH,
 584
 585       // Galois Field Arithmetic Instructions
 586       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 587
 588       // LWP insert record.
 589       LWPINS,
 590
 591       // User level wait
 592       UMWAIT, TPAUSE,
 593
 594       // Enqueue Stores Instructions
 595       ENQCMD, ENQCMDS,
 596
 597       // For avx512-vp2intersect
 598       VP2INTERSECT,
 599
 600       // Compare and swap.
 601       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 602       LCMPXCHG8_DAG,
 603       LCMPXCHG16_DAG,
 604       LCMPXCHG8_SAVE_EBX_DAG,
 605       LCMPXCHG16_SAVE_RBX_DAG,
 606
 607       /// LOCK-prefixed arithmetic read-modify-write instructions.
 608       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 609       LADD, LSUB, LOR, LXOR, LAND,
 610
 611       // Load, scalar_to_vector, and zero extend.
 612       VZEXT_LOAD,
 613
 614       // extract_vector_elt, store.
 615       VEXTRACT_STORE,
 616
 617       // Store FP control world into i16 memory.
 618       FNSTCW16m,
 619
 620       /// This instruction implements FP_TO_SINT with the
 621       /// integer destination in memory and a FP reg source.  This corresponds
 622       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 623       /// has two inputs (token chain and address) and two outputs (int value
 624       /// and token chain). Memory VT specifies the type to store to.
 625       FP_TO_INT_IN_MEM,
 626
 627       /// This instruction implements SINT_TO_FP with the
 628       /// integer source in memory and FP reg result.  This corresponds to the
 629       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 630       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 631       /// flag). The integer source type is specified by the memory VT.
 632       FILD,
 633       FILD_FLAG,
 634
 635       /// This instruction implements a fp->int store from FP stack
 636       /// slots. This corresponds to the fist instruction. It takes a
 637       /// chain operand, value to store, address, and glue. The memory VT
 638       /// specifies the type to store as.
 639       FIST,
 640
 641       /// This instruction implements an extending load to FP stack slots.
 642       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 643       /// operand, and ptr to load from. The memory VT specifies the type to
 644       /// load from.
 645       FLD,
 646
 647       /// This instruction implements a truncating store from FP stack
 648       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 649       /// chain operand, value to store, address, and glue. The memory VT
 650       /// specifies the type to store as.
 651       FST,
 652
 653       /// This instruction grabs the address of the next argument
 654       /// from a va_list. (reads and modifies the va_list in memory)
 655       VAARG_64,
 656
 657       // Vector truncating store with unsigned/signed saturation
 658       VTRUNCSTOREUS, VTRUNCSTORES,
 659       // Vector truncating masked store with unsigned/signed saturation
 660       VMTRUNCSTOREUS, VMTRUNCSTORES,
 661
 662       // X86 specific gather and scatter
 663       MGATHER, MSCATTER,
 664
 665       // WARNING: Do not add anything in the end unless you want the node to
 666       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 667       // opcodes will be thought as target memory ops!
 668     };
 669   } // end namespace X86ISD
 670
 671   /// Define some predicates that are used for node matching.
 672   namespace X86 {
 673     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 674     bool isZeroNode(SDValue Elt);
 675
 676     /// Returns true of the given offset can be
 677     /// fit into displacement field of the instruction.
 678     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 679                                       bool hasSymbolicDisplacement = true);
 680
 681     /// Determines whether the callee is required to pop its
 682     /// own arguments. Callee pop is necessary to support tail calls.
 683     bool isCalleePop(CallingConv::ID CallingConv,
 684                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 685
 686     /// If Op is a constant whose elements are all the same constant or
 687     /// undefined, return true and return the constant value in \p SplatVal.
 688     bool isConstantSplat(SDValue Op, APInt &SplatVal);
 689   } // end namespace X86
 690
 691   //===--------------------------------------------------------------------===//
 692   //  X86 Implementation of the TargetLowering interface
 693   class X86TargetLowering final : public TargetLowering {
 694   public:
 695     explicit X86TargetLowering(const X86TargetMachine &TM,
 696                                const X86Subtarget &STI);
 697
 698     unsigned getJumpTableEncoding() const override;
 699     bool useSoftFloat() const override;
 700
 701     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 702                                ArgListTy &Args) const override;
 703
 704     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 705       return MVT::i8;
 706     }
 707
 708     const MCExpr *
 709     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 710                               const MachineBasicBlock *MBB, unsigned uid,
 711                               MCContext &Ctx) const override;
 712
 713     /// Returns relocation base for the given PIC jumptable.
 714     SDValue getPICJumpTableRelocBase(SDValue Table,
 715                                      SelectionDAG &DAG) const override;
 716     const MCExpr *
 717     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 718                                  unsigned JTI, MCContext &Ctx) const override;
 719
 720     /// Return the desired alignment for ByVal aggregate
 721     /// function arguments in the caller parameter area. For X86, aggregates
 722     /// that contains are placed at 16-byte boundaries while the rest are at
 723     /// 4-byte boundaries.
 724     unsigned getByValTypeAlignment(Type *Ty,
 725                                    const DataLayout &DL) const override;
 726
 727     /// Returns the target specific optimal type for load
 728     /// and store operations as a result of memset, memcpy, and memmove
 729     /// lowering. If DstAlign is zero that means it's safe to destination
 730     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 731     /// means there isn't a need to check it against alignment requirement,
 732     /// probably because the source does not need to be loaded. If 'IsMemset' is
 733     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 734     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 735     /// source is constant so it does not need to be loaded.
 736     /// It returns EVT::Other if the type should be determined using generic
 737     /// target-independent logic.
 738     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 739                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 740                             const AttributeList &FuncAttributes) const override;
 741
 742     /// Returns true if it's safe to use load / store of the
 743     /// specified type to expand memcpy / memset inline. This is mostly true
 744     /// for all types except for some special cases. For example, on X86
 745     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 746     /// also does type conversion. Note the specified type doesn't have to be
 747     /// legal as the hook is used before type legalization.
 748     bool isSafeMemOpType(MVT VT) const override;
 749
 750     /// Returns true if the target allows unaligned memory accesses of the
 751     /// specified type. Returns whether it is "fast" in the last argument.
 752     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 753                                         MachineMemOperand::Flags Flags,
 754                                         bool *Fast) const override;
 755
 756     /// Provide custom lowering hooks for some operations.
 757     ///
 758     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 759
 760     /// Places new result values for the node in Results (their number
 761     /// and types must exactly match those of the original return values of
 762     /// the node), or leaves Results empty, which indicates that the node is not
 763     /// to be custom lowered after all.
 764     void LowerOperationWrapper(SDNode *N,
 765                                SmallVectorImpl<SDValue> &Results,
 766                                SelectionDAG &DAG) const override;
 767
 768     /// Replace the results of node with an illegal result
 769     /// type with new values built out of custom code.
 770     ///
 771     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 772                             SelectionDAG &DAG) const override;
 773
 774     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 775
 776     // Return true if it is profitable to combine a BUILD_VECTOR with a
 777     // stride-pattern to a shuffle and a truncate.
 778     // Example of such a combine:
 779     // v4i32 build_vector((extract_elt V, 1),
 780     //                    (extract_elt V, 3),
 781     //                    (extract_elt V, 5),
 782     //                    (extract_elt V, 7))
 783     //  -->
 784     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 785     // v4i64)
 786     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 787         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 788
 789     /// Return true if the target has native support for
 790     /// the specified value type and it is 'desirable' to use the type for the
 791     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 792     /// instruction encodings are longer and some i16 instructions are slow.
 793     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 794
 795     /// Return true if the target has native support for the
 796     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 797     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 798     /// and some i16 instructions are slow.
 799     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 800
 801     MachineBasicBlock *
 802     EmitInstrWithCustomInserter(MachineInstr &MI,
 803                                 MachineBasicBlock *MBB) const override;
 804
 805     /// This method returns the name of a target specific DAG node.
 806     const char *getTargetNodeName(unsigned Opcode) const override;
 807
 808     /// Do not merge vector stores after legalization because that may conflict
 809     /// with x86-specific store splitting optimizations.
 810     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 811       return !MemVT.isVector();
 812     }
 813
 814     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 815                           const SelectionDAG &DAG) const override;
 816
 817     bool isCheapToSpeculateCttz() const override;
 818
 819     bool isCheapToSpeculateCtlz() const override;
 820
 821     bool isCtlzFast() const override;
 822
 823     bool hasBitPreservingFPLogic(EVT VT) const override {
 824       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 825     }
 826
 827     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 828       // If the pair to store is a mixture of float and int values, we will
 829       // save two bitwise instructions and one float-to-int instruction and
 830       // increase one store instruction. There is potentially a more
 831       // significant benefit because it avoids the float->int domain switch
 832       // for input value. So It is more likely a win.
 833       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 834           (LTy.isInteger() && HTy.isFloatingPoint()))
 835         return true;
 836       // If the pair only contains int values, we will save two bitwise
 837       // instructions and increase one store instruction (costing one more
 838       // store buffer). Since the benefit is more blurred so we leave
 839       // such pair out until we get testcase to prove it is a win.
 840       return false;
 841     }
 842
 843     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 844
 845     bool hasAndNotCompare(SDValue Y) const override;
 846
 847     bool hasAndNot(SDValue Y) const override;
 848
 849     bool hasBitTest(SDValue X, SDValue Y) const override;
 850
 851     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
 852         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
 853         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
 854         SelectionDAG &DAG) const override;
 855
 856     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
 857                                            CombineLevel Level) const override;
 858
 859     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 860
 861     bool
 862     shouldTransformSignedTruncationCheck(EVT XVT,
 863                                          unsigned KeptBits) const override {
 864       // For vectors, we don't have a preference..
 865       if (XVT.isVector())
 866         return false;
 867
 868       auto VTIsOk = [](EVT VT) -> bool {
 869         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 870                VT == MVT::i64;
 871       };
 872
 873       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 874       // XVT will be larger than KeptBitsVT.
 875       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 876       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 877     }
 878
 879     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
 880
 881     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 882
 883     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 884       return VT.isScalarInteger();
 885     }
 886
 887     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 888     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 889
 890     /// Return the value type to use for ISD::SETCC.
 891     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 892                            EVT VT) const override;
 893
 894     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 895                                       TargetLoweringOpt &TLO) const override;
 896
 897     /// Determine which of the bits specified in Mask are known to be either
 898     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 899     void computeKnownBitsForTargetNode(const SDValue Op,
 900                                        KnownBits &Known,
 901                                        const APInt &DemandedElts,
 902                                        const SelectionDAG &DAG,
 903                                        unsigned Depth = 0) const override;
 904
 905     /// Determine the number of bits in the operation that are sign bits.
 906     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 907                                              const APInt &DemandedElts,
 908                                              const SelectionDAG &DAG,
 909                                              unsigned Depth) const override;
 910
 911     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 912                                                  const APInt &DemandedElts,
 913                                                  APInt &KnownUndef,
 914                                                  APInt &KnownZero,
 915                                                  TargetLoweringOpt &TLO,
 916                                                  unsigned Depth) const override;
 917
 918     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 919                                            const APInt &DemandedBits,
 920                                            const APInt &DemandedElts,
 921                                            KnownBits &Known,
 922                                            TargetLoweringOpt &TLO,
 923                                            unsigned Depth) const override;
 924
 925     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
 926         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
 927         SelectionDAG &DAG, unsigned Depth) const override;
 928
 929     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 930
 931     SDValue unwrapAddress(SDValue N) const override;
 932
 933     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 934
 935     bool ExpandInlineAsm(CallInst *CI) const override;
 936
 937     ConstraintType getConstraintType(StringRef Constraint) const override;
 938
 939     /// Examine constraint string and operand type and determine a weight value.
 940     /// The operand object must already have been set up with the operand type.
 941     ConstraintWeight
 942       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 943                                      const char *constraint) const override;
 944
 945     const char *LowerXConstraint(EVT ConstraintVT) const override;
 946
 947     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 948     /// add anything to Ops. If hasMemory is true it means one of the asm
 949     /// constraint of the inline asm instruction being processed is 'm'.
 950     void LowerAsmOperandForConstraint(SDValue Op,
 951                                       std::string &Constraint,
 952                                       std::vector<SDValue> &Ops,
 953                                       SelectionDAG &DAG) const override;
 954
 955     unsigned
 956     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 957       if (ConstraintCode == "i")
 958         return InlineAsm::Constraint_i;
 959       else if (ConstraintCode == "o")
 960         return InlineAsm::Constraint_o;
 961       else if (ConstraintCode == "v")
 962         return InlineAsm::Constraint_v;
 963       else if (ConstraintCode == "X")
 964         return InlineAsm::Constraint_X;
 965       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 966     }
 967
 968     /// Handle Lowering flag assembly outputs.
 969     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
 970                                         const AsmOperandInfo &Constraint,
 971                                         SelectionDAG &DAG) const override;
 972
 973     /// Given a physical register constraint
 974     /// (e.g. {edx}), return the register number and the register class for the
 975     /// register.  This should only be used for C_Register constraints.  On
 976     /// error, this returns a register number of 0.
 977     std::pair<unsigned, const TargetRegisterClass *>
 978     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 979                                  StringRef Constraint, MVT VT) const override;
 980
 981     /// Return true if the addressing mode represented
 982     /// by AM is legal for this target, for a load/store of the specified type.
 983     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 984                                Type *Ty, unsigned AS,
 985                                Instruction *I = nullptr) const override;
 986
 987     /// Return true if the specified immediate is legal
 988     /// icmp immediate, that is the target has icmp instructions which can
 989     /// compare a register against the immediate without having to materialize
 990     /// the immediate into a register.
 991     bool isLegalICmpImmediate(int64_t Imm) const override;
 992
 993     /// Return true if the specified immediate is legal
 994     /// add immediate, that is the target has add instructions which can
 995     /// add a register and the immediate without having to materialize
 996     /// the immediate into a register.
 997     bool isLegalAddImmediate(int64_t Imm) const override;
 998
 999     bool isLegalStoreImmediate(int64_t Imm) const override;
1000
1001     /// Return the cost of the scaling factor used in the addressing
1002     /// mode represented by AM for this target, for a load/store
1003     /// of the specified type.
1004     /// If the AM is supported, the return value must be >= 0.
1005     /// If the AM is not supported, it returns a negative value.
1006     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1007                              unsigned AS) const override;
1008
1009     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1010
1011     /// Add x86-specific opcodes to the default list.
1012     bool isBinOp(unsigned Opcode) const override;
1013
1014     /// Returns true if the opcode is a commutative binary operation.
1015     bool isCommutativeBinOp(unsigned Opcode) const override;
1016
1017     /// Return true if it's free to truncate a value of
1018     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1019     /// register EAX to i16 by referencing its sub-register AX.
1020     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1021     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1022
1023     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1024
1025     /// Return true if any actual instruction that defines a
1026     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1027     /// register. This does not necessarily include registers defined in
1028     /// unknown ways, such as incoming arguments, or copies from unknown
1029     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1030     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1031     /// all instructions that define 32-bit values implicit zero-extend the
1032     /// result out to 64 bits.
1033     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1034     bool isZExtFree(EVT VT1, EVT VT2) const override;
1035     bool isZExtFree(SDValue Val, EVT VT2) const override;
1036
1037     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1038     /// extend node) is profitable.
1039     bool isVectorLoadExtDesirable(SDValue) const override;
1040
1041     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1042     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1043     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1044     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1045
1046     /// Return true if it's profitable to narrow
1047     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1048     /// from i32 to i8 but not from i32 to i16.
1049     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1050
1051     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1052     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1053     /// true and stores the intrinsic information into the IntrinsicInfo that was
1054     /// passed to the function.
1055     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1056                             MachineFunction &MF,
1057                             unsigned Intrinsic) const override;
1058
1059     /// Returns true if the target can instruction select the
1060     /// specified FP immediate natively. If false, the legalizer will
1061     /// materialize the FP immediate as a load from a constant pool.
1062     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1063                       bool ForCodeSize) const override;
1064
1065     /// Targets can use this to indicate that they only support *some*
1066     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1067     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1068     /// be legal.
1069     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1070
1071     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1072     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1073     /// constant pool entry.
1074     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1075
1076     /// Returns true if lowering to a jump table is allowed.
1077     bool areJTsAllowed(const Function *Fn) const override;
1078
1079     /// If true, then instruction selection should
1080     /// seek to shrink the FP constant of the specified type to a smaller type
1081     /// in order to save space and / or reduce runtime.
1082     bool ShouldShrinkFPConstant(EVT VT) const override {
1083       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1084       // expensive than a straight movsd. On the other hand, it's important to
1085       // shrink long double fp constant since fldt is very slow.
1086       return !X86ScalarSSEf64 || VT == MVT::f80;
1087     }
1088
1089     /// Return true if we believe it is correct and profitable to reduce the
1090     /// load node to a smaller type.
1091     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1092                                EVT NewVT) const override;
1093
1094     /// Return true if the specified scalar FP type is computed in an SSE
1095     /// register, not on the X87 floating point stack.
1096     bool isScalarFPTypeInSSEReg(EVT VT) const {
1097       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1098              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1099     }
1100
1101     /// Returns true if it is beneficial to convert a load of a constant
1102     /// to just the constant itself.
1103     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1104                                            Type *Ty) const override;
1105
1106     bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1107
1108     bool convertSelectOfConstantsToMath(EVT VT) const override;
1109
1110     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1111                                 SDValue C) const override;
1112
1113     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1114                                   bool IsSigned) const override;
1115
1116     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1117     /// with this index.
1118     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1119                                  unsigned Index) const override;
1120
1121     /// Scalar ops always have equal or better analysis/performance/power than
1122     /// the vector equivalent, so this always makes sense if the scalar op is
1123     /// supported.
1124     bool shouldScalarizeBinop(SDValue) const override;
1125
1126     /// Extract of a scalar FP value from index 0 of a vector is free.
1127     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1128       EVT EltVT = VT.getScalarType();
1129       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1130     }
1131
1132     /// Overflow nodes should get combined/lowered to optimal instructions
1133     /// (they should allow eliminating explicit compares by getting flags from
1134     /// math ops).
1135     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1136
1137     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1138                                       unsigned AddrSpace) const override {
1139       // If we can replace more than 2 scalar stores, there will be a reduction
1140       // in instructions even after we add a vector constant load.
1141       return NumElem > 2;
1142     }
1143
1144     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1145                                  const SelectionDAG &DAG,
1146                                  const MachineMemOperand &MMO) const override;
1147
1148     /// Intel processors have a unified instruction and data cache
1149     const char * getClearCacheBuiltinName() const override {
1150       return nullptr; // nothing to do, move along.
1151     }
1152
1153     unsigned getRegisterByName(const char* RegName, EVT VT,
1154                                SelectionDAG &DAG) const override;
1155
1156     /// If a physical register, this returns the register that receives the
1157     /// exception address on entry to an EH pad.
1158     unsigned
1159     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1160
1161     /// If a physical register, this returns the register that receives the
1162     /// exception typeid on entry to a landing pad.
1163     unsigned
1164     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1165
1166     virtual bool needsFixedCatchObjects() const override;
1167
1168     /// This method returns a target specific FastISel object,
1169     /// or null if the target does not support "fast" ISel.
1170     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1171                              const TargetLibraryInfo *libInfo) const override;
1172
1173     /// If the target has a standard location for the stack protector cookie,
1174     /// returns the address of that location. Otherwise, returns nullptr.
1175     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1176
1177     bool useLoadStackGuardNode() const override;
1178     bool useStackGuardXorFP() const override;
1179     void insertSSPDeclarations(Module &M) const override;
1180     Value *getSDagStackGuard(const Module &M) const override;
1181     Function *getSSPStackGuardCheck(const Module &M) const override;
1182     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1183                                 const SDLoc &DL) const override;
1184
1185
1186     /// Return true if the target stores SafeStack pointer at a fixed offset in
1187     /// some non-standard address space, and populates the address space and
1188     /// offset as appropriate.
1189     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1190
1191     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1192                       SelectionDAG &DAG) const;
1193
1194     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1195
1196     /// Customize the preferred legalization strategy for certain types.
1197     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1198
1199     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1200                                       EVT VT) const override;
1201
1202     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1203                                            CallingConv::ID CC,
1204                                            EVT VT) const override;
1205
1206     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1207
1208     bool supportSwiftError() const override;
1209
1210     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1211
1212     unsigned getStackProbeSize(MachineFunction &MF) const;
1213
1214     bool hasVectorBlend() const override { return true; }
1215
1216     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1217
1218     /// Lower interleaved load(s) into target specific
1219     /// instructions/intrinsics.
1220     bool lowerInterleavedLoad(LoadInst *LI,
1221                               ArrayRef<ShuffleVectorInst *> Shuffles,
1222                               ArrayRef<unsigned> Indices,
1223                               unsigned Factor) const override;
1224
1225     /// Lower interleaved store(s) into target specific
1226     /// instructions/intrinsics.
1227     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1228                                unsigned Factor) const override;
1229
1230     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1231                                    SDValue Addr, SelectionDAG &DAG)
1232                                    const override;
1233
1234   protected:
1235     std::pair<const TargetRegisterClass *, uint8_t>
1236     findRepresentativeClass(const TargetRegisterInfo *TRI,
1237                             MVT VT) const override;
1238
1239   private:
1240     /// Keep a reference to the X86Subtarget around so that we can
1241     /// make the right decision when generating code for different targets.
1242     const X86Subtarget &Subtarget;
1243
1244     /// Select between SSE or x87 floating point ops.
1245     /// When SSE is available, use it for f32 operations.
1246     /// When SSE2 is available, use it for f64 operations.
1247     bool X86ScalarSSEf32;
1248     bool X86ScalarSSEf64;
1249
1250     /// A list of legal FP immediates.
1251     std::vector<APFloat> LegalFPImmediates;
1252
1253     /// Indicate that this x86 target can instruction
1254     /// select the specified FP immediate natively.
1255     void addLegalFPImmediate(const APFloat& Imm) {
1256       LegalFPImmediates.push_back(Imm);
1257     }
1258
1259     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1260                             CallingConv::ID CallConv, bool isVarArg,
1261                             const SmallVectorImpl<ISD::InputArg> &Ins,
1262                             const SDLoc &dl, SelectionDAG &DAG,
1263                             SmallVectorImpl<SDValue> &InVals,
1264                             uint32_t *RegMask) const;
1265     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1266                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1267                              const SDLoc &dl, SelectionDAG &DAG,
1268                              const CCValAssign &VA, MachineFrameInfo &MFI,
1269                              unsigned i) const;
1270     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1271                              const SDLoc &dl, SelectionDAG &DAG,
1272                              const CCValAssign &VA,
1273                              ISD::ArgFlagsTy Flags) const;
1274
1275     // Call lowering helpers.
1276
1277     /// Check whether the call is eligible for tail call optimization. Targets
1278     /// that want to do tail call optimization should implement this function.
1279     bool IsEligibleForTailCallOptimization(SDValue Callee,
1280                                            CallingConv::ID CalleeCC,
1281                                            bool isVarArg,
1282                                            bool isCalleeStructRet,
1283                                            bool isCallerStructRet,
1284                                            Type *RetTy,
1285                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1286                                     const SmallVectorImpl<SDValue> &OutVals,
1287                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1288                                            SelectionDAG& DAG) const;
1289     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1290                                     SDValue Chain, bool IsTailCall,
1291                                     bool Is64Bit, int FPDiff,
1292                                     const SDLoc &dl) const;
1293
1294     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1295                                          SelectionDAG &DAG) const;
1296
1297     unsigned getAddressSpace(void) const;
1298
1299     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1300
1301     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1302     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1303     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1304     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1305
1306     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1307                                   const unsigned char OpFlags = 0) const;
1308     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1309     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1310     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1311     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1312     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1313
1314     /// Creates target global address or external symbol nodes for calls or
1315     /// other uses.
1316     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1317                                   bool ForCall) const;
1318
1319     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1320     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1321     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1322     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1323     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1324     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1325     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1326     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1327     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1328     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1329     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1330     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1331     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1332     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1333     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1334     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1335     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1336     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1337     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1338     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1339     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1340     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1341     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1342     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1343     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1344     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1345
1346     SDValue
1347     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1348                          const SmallVectorImpl<ISD::InputArg> &Ins,
1349                          const SDLoc &dl, SelectionDAG &DAG,
1350                          SmallVectorImpl<SDValue> &InVals) const override;
1351     SDValue LowerCall(CallLoweringInfo &CLI,
1352                       SmallVectorImpl<SDValue> &InVals) const override;
1353
1354     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1355                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1356                         const SmallVectorImpl<SDValue> &OutVals,
1357                         const SDLoc &dl, SelectionDAG &DAG) const override;
1358
1359     bool supportSplitCSR(MachineFunction *MF) const override {
1360       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1361           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1362     }
1363     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1364     void insertCopiesSplitCSR(
1365       MachineBasicBlock *Entry,
1366       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1367
1368     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1369
1370     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1371
1372     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1373                             ISD::NodeType ExtendKind) const override;
1374
1375     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1376                         bool isVarArg,
1377                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1378                         LLVMContext &Context) const override;
1379
1380     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1381
1382     TargetLoweringBase::AtomicExpansionKind
1383     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1384     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1385     TargetLoweringBase::AtomicExpansionKind
1386     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1387
1388     LoadInst *
1389     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1390
1391     bool needsCmpXchgNb(Type *MemType) const;
1392
1393     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1394                                 MachineBasicBlock *DispatchBB, int FI) const;
1395
1396     // Utility function to emit the low-level va_arg code for X86-64.
1397     MachineBasicBlock *
1398     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1399                                   MachineBasicBlock *MBB) const;
1400
1401     /// Utility function to emit the xmm reg save portion of va_start.
1402     MachineBasicBlock *
1403     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1404                                              MachineBasicBlock *BB) const;
1405
1406     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1407                                                  MachineInstr &MI2,
1408                                                  MachineBasicBlock *BB) const;
1409
1410     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1411                                          MachineBasicBlock *BB) const;
1412
1413     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1414                                            MachineBasicBlock *BB) const;
1415
1416     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1417                                            MachineBasicBlock *BB) const;
1418
1419     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1420                                            MachineBasicBlock *BB) const;
1421
1422     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1423                                             MachineBasicBlock *BB) const;
1424
1425     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1426                                           MachineBasicBlock *BB) const;
1427
1428     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1429                                           MachineBasicBlock *BB) const;
1430
1431     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1432                                             MachineBasicBlock *BB) const;
1433
1434     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1435                                         MachineBasicBlock *MBB) const;
1436
1437     void emitSetJmpShadowStackFix(MachineInstr &MI,
1438                                   MachineBasicBlock *MBB) const;
1439
1440     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1441                                          MachineBasicBlock *MBB) const;
1442
1443     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1444                                                  MachineBasicBlock *MBB) const;
1445
1446     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1447                                      MachineBasicBlock *MBB) const;
1448
1449     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1450                                              MachineBasicBlock *MBB) const;
1451
1452     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1453     /// equivalent, for use with the given x86 condition code.
1454     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1455                     SelectionDAG &DAG) const;
1456
1457     /// Convert a comparison if required by the subtarget.
1458     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1459
1460     /// Emit flags for the given setcc condition and operands. Also returns the
1461     /// corresponding X86 condition code constant in X86CC.
1462     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1463                               ISD::CondCode CC, const SDLoc &dl,
1464                               SelectionDAG &DAG,
1465                               SDValue &X86CC) const;
1466
1467     /// Check if replacement of SQRT with RSQRT should be disabled.
1468     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1469
1470     /// Use rsqrt* to speed up sqrt calculations.
1471     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1472                             int &RefinementSteps, bool &UseOneConstNR,
1473                             bool Reciprocal) const override;
1474
1475     /// Use rcp* to speed up fdiv calculations.
1476     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1477                              int &RefinementSteps) const override;
1478
1479     /// Reassociate floating point divisions into multiply by reciprocal.
1480     unsigned combineRepeatedFPDivisors() const override;
1481
1482     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1483                           SmallVectorImpl<SDNode *> &Created) const override;
1484   };
1485
1486   namespace X86 {
1487     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1488                              const TargetLibraryInfo *libInfo);
1489   } // end namespace X86
1490
1491   // Base class for all X86 non-masked store operations.
1492   class X86StoreSDNode : public MemSDNode {
1493   public:
1494     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1495                    SDVTList VTs, EVT MemVT,
1496                    MachineMemOperand *MMO)
1497       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1498     const SDValue &getValue() const { return getOperand(1); }
1499     const SDValue &getBasePtr() const { return getOperand(2); }
1500
1501     static bool classof(const SDNode *N) {
1502       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1503         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1504     }
1505   };
1506
1507   // Base class for all X86 masked store operations.
1508   // The class has the same order of operands as MaskedStoreSDNode for
1509   // convenience.
1510   class X86MaskedStoreSDNode : public MemSDNode {
1511   public:
1512     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1513                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1514                          MachineMemOperand *MMO)
1515       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1516
1517     const SDValue &getValue()   const { return getOperand(1); }
1518     const SDValue &getBasePtr() const { return getOperand(2); }
1519     const SDValue &getMask()    const { return getOperand(3); }
1520
1521     static bool classof(const SDNode *N) {
1522       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1523         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1524     }
1525   };
1526
1527   // X86 Truncating Store with Signed saturation.
1528   class TruncSStoreSDNode : public X86StoreSDNode {
1529   public:
1530     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1531                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1532       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1533
1534     static bool classof(const SDNode *N) {
1535       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1536     }
1537   };
1538
1539   // X86 Truncating Store with Unsigned saturation.
1540   class TruncUSStoreSDNode : public X86StoreSDNode {
1541   public:
1542     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1543                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1544       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1545
1546     static bool classof(const SDNode *N) {
1547       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1548     }
1549   };
1550
1551   // X86 Truncating Masked Store with Signed saturation.
1552   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1553   public:
1554     MaskedTruncSStoreSDNode(unsigned Order,
1555                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1556                          MachineMemOperand *MMO)
1557       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1558
1559     static bool classof(const SDNode *N) {
1560       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1561     }
1562   };
1563
1564   // X86 Truncating Masked Store with Unsigned saturation.
1565   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1566   public:
1567     MaskedTruncUSStoreSDNode(unsigned Order,
1568                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1569                             MachineMemOperand *MMO)
1570       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1571
1572     static bool classof(const SDNode *N) {
1573       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1574     }
1575   };
1576
1577   // X86 specific Gather/Scatter nodes.
1578   // The class has the same order of operands as MaskedGatherScatterSDNode for
1579   // convenience.
1580   class X86MaskedGatherScatterSDNode : public MemSDNode {
1581   public:
1582     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1583                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1584                                  MachineMemOperand *MMO)
1585         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1586
1587     const SDValue &getBasePtr() const { return getOperand(3); }
1588     const SDValue &getIndex()   const { return getOperand(4); }
1589     const SDValue &getMask()    const { return getOperand(2); }
1590     const SDValue &getScale()   const { return getOperand(5); }
1591
1592     static bool classof(const SDNode *N) {
1593       return N->getOpcode() == X86ISD::MGATHER ||
1594              N->getOpcode() == X86ISD::MSCATTER;
1595     }
1596   };
1597
1598   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1599   public:
1600     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1601                           EVT MemVT, MachineMemOperand *MMO)
1602         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1603                                        MMO) {}
1604
1605     const SDValue &getPassThru() const { return getOperand(1); }
1606
1607     static bool classof(const SDNode *N) {
1608       return N->getOpcode() == X86ISD::MGATHER;
1609     }
1610   };
1611
1612   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1613   public:
1614     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1615                            EVT MemVT, MachineMemOperand *MMO)
1616         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1617                                        MMO) {}
1618
1619     const SDValue &getValue() const { return getOperand(1); }
1620
1621     static bool classof(const SDNode *N) {
1622       return N->getOpcode() == X86ISD::MSCATTER;
1623     }
1624   };
1625
1626   /// Generate unpacklo/unpackhi shuffle mask.
1627   template <typename T = int>
1628   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1629                                bool Unary) {
1630     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1631     int NumElts = VT.getVectorNumElements();
1632     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1633     for (int i = 0; i < NumElts; ++i) {
1634       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1635       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1636       Pos += (Unary ? 0 : NumElts * (i % 2));
1637       Pos += (Lo ? 0 : NumEltsInLane / 2);
1638       Mask.push_back(Pos);
1639     }
1640   }
1641
1642   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1643   /// mask index with the scaled sequential indices for an equivalent narrowed
1644   /// mask. This is the reverse process to canWidenShuffleElements, but can
1645   /// always succeed.
1646   template <typename T>
1647   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1648                         SmallVectorImpl<T> &ScaledMask) {
1649     assert(0 < Scale && "Unexpected scaling factor");
1650     size_t NumElts = Mask.size();
1651     ScaledMask.assign(NumElts * Scale, -1);
1652
1653     for (int i = 0; i != (int)NumElts; ++i) {
1654       int M = Mask[i];
1655
1656       // Repeat sentinel values in every mask element.
1657       if (M < 0) {
1658         for (int s = 0; s != Scale; ++s)
1659           ScaledMask[(Scale * i) + s] = M;
1660         continue;
1661       }
1662
1663       // Scale mask element and increment across each mask element.
1664       for (int s = 0; s != Scale; ++s)
1665         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1666     }
1667   }
1668 } // end namespace llvm
1669
1670 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H