lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20 #include "llvm/Target/TargetOptions.h"
  21
  22 namespace llvm {
  23   class X86Subtarget;
  24   class X86TargetMachine;
  25
  26   namespace X86ISD {
  27     // X86 Specific DAG Nodes
  28     enum NodeType : unsigned {
  29       // Start the numbering where the builtin ops leave off.
  30       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  31
  32       /// Bit scan forward.
  33       BSF,
  34       /// Bit scan reverse.
  35       BSR,
  36
  37       /// Double shift instructions. These correspond to
  38       /// X86::SHLDxx and X86::SHRDxx instructions.
  39       SHLD,
  40       SHRD,
  41
  42       /// Bitwise logical AND of floating point values. This corresponds
  43       /// to X86::ANDPS or X86::ANDPD.
  44       FAND,
  45
  46       /// Bitwise logical OR of floating point values. This corresponds
  47       /// to X86::ORPS or X86::ORPD.
  48       FOR,
  49
  50       /// Bitwise logical XOR of floating point values. This corresponds
  51       /// to X86::XORPS or X86::XORPD.
  52       FXOR,
  53
  54       ///  Bitwise logical ANDNOT of floating point values. This
  55       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  56       FANDN,
  57
  58       /// These operations represent an abstract X86 call
  59       /// instruction, which includes a bunch of information.  In particular the
  60       /// operands of these node are:
  61       ///
  62       ///     #0 - The incoming token chain
  63       ///     #1 - The callee
  64       ///     #2 - The number of arg bytes the caller pushes on the stack.
  65       ///     #3 - The number of arg bytes the callee pops off the stack.
  66       ///     #4 - The value to pass in AL/AX/EAX (optional)
  67       ///     #5 - The value to pass in DL/DX/EDX (optional)
  68       ///
  69       /// The result values of these nodes are:
  70       ///
  71       ///     #0 - The outgoing token chain
  72       ///     #1 - The first register result value (optional)
  73       ///     #2 - The second register result value (optional)
  74       ///
  75       CALL,
  76
  77       /// Same as call except it adds the NoTrack prefix.
  78       NT_CALL,
  79
  80       /// X86 compare and logical compare instructions.
  81       CMP, COMI, UCOMI,
  82
  83       /// X86 bit-test instructions.
  84       BT,
  85
  86       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  87       /// operand, usually produced by a CMP instruction.
  88       SETCC,
  89
  90       /// X86 Select
  91       SELECTS,
  92
  93       // Same as SETCC except it's materialized with a sbb and the value is all
  94       // one's or all zero's.
  95       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  96
  97       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
  98       /// Operands are two FP values to compare; result is a mask of
  99       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 100       FSETCC,
 101
 102       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 103       /// and a version with SAE.
 104       FSETCCM, FSETCCM_SAE,
 105
 106       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 107       /// to select from. Operand 2 is the condition code, and operand 3 is the
 108       /// flag operand produced by a CMP or TEST instruction.
 109       CMOV,
 110
 111       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 112       /// is the block to branch if condition is true, operand 2 is the
 113       /// condition code, and operand 3 is the flag operand produced by a CMP
 114       /// or TEST instruction.
 115       BRCOND,
 116
 117       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 118       /// operand 1 is the target address.
 119       NT_BRIND,
 120
 121       /// Return with a flag operand. Operand 0 is the chain operand, operand
 122       /// 1 is the number of bytes of stack to pop.
 123       RET_FLAG,
 124
 125       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 126       IRET,
 127
 128       /// Repeat fill, corresponds to X86::REP_STOSx.
 129       REP_STOS,
 130
 131       /// Repeat move, corresponds to X86::REP_MOVSx.
 132       REP_MOVS,
 133
 134       /// On Darwin, this node represents the result of the popl
 135       /// at function entry, used for PIC code.
 136       GlobalBaseReg,
 137
 138       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 139       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 140       /// MCSymbol and TargetBlockAddress.
 141       Wrapper,
 142
 143       /// Special wrapper used under X86-64 PIC mode for RIP
 144       /// relative displacements.
 145       WrapperRIP,
 146
 147       /// Copies a 64-bit value from the low word of an XMM vector
 148       /// to an MMX vector.
 149       MOVDQ2Q,
 150
 151       /// Copies a 32-bit value from the low word of a MMX
 152       /// vector to a GPR.
 153       MMX_MOVD2W,
 154
 155       /// Copies a GPR into the low 32-bit word of a MMX vector
 156       /// and zero out the high word.
 157       MMX_MOVW2D,
 158
 159       /// Extract an 8-bit value from a vector and zero extend it to
 160       /// i32, corresponds to X86::PEXTRB.
 161       PEXTRB,
 162
 163       /// Extract a 16-bit value from a vector and zero extend it to
 164       /// i32, corresponds to X86::PEXTRW.
 165       PEXTRW,
 166
 167       /// Insert any element of a 4 x float vector into any element
 168       /// of a destination 4 x floatvector.
 169       INSERTPS,
 170
 171       /// Insert the lower 8-bits of a 32-bit value to a vector,
 172       /// corresponds to X86::PINSRB.
 173       PINSRB,
 174
 175       /// Insert the lower 16-bits of a 32-bit value to a vector,
 176       /// corresponds to X86::PINSRW.
 177       PINSRW,
 178
 179       /// Shuffle 16 8-bit values within a vector.
 180       PSHUFB,
 181
 182       /// Compute Sum of Absolute Differences.
 183       PSADBW,
 184       /// Compute Double Block Packed Sum-Absolute-Differences
 185       DBPSADBW,
 186
 187       /// Bitwise Logical AND NOT of Packed FP values.
 188       ANDNP,
 189
 190       /// Blend where the selector is an immediate.
 191       BLENDI,
 192
 193       /// Dynamic (non-constant condition) vector blend where only the sign bits
 194       /// of the condition elements are used. This is used to enforce that the
 195       /// condition mask is not valid for generic VSELECT optimizations. This
 196       /// is also used to implement the intrinsics.
 197       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 198       BLENDV,
 199
 200       /// Combined add and sub on an FP vector.
 201       ADDSUB,
 202
 203       //  FP vector ops with rounding mode.
 204       FADD_RND, FADDS, FADDS_RND,
 205       FSUB_RND, FSUBS, FSUBS_RND,
 206       FMUL_RND, FMULS, FMULS_RND,
 207       FDIV_RND, FDIVS, FDIVS_RND,
 208       FMAX_SAE, FMAXS_SAE,
 209       FMIN_SAE, FMINS_SAE,
 210       FSQRT_RND, FSQRTS, FSQRTS_RND,
 211
 212       // FP vector get exponent.
 213       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
 214       // Extract Normalized Mantissas.
 215       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
 216       // FP Scale.
 217       SCALEF, SCALEF_RND,
 218       SCALEFS, SCALEFS_RND,
 219
 220       // Unsigned Integer average.
 221       AVG,
 222
 223       /// Integer horizontal add/sub.
 224       HADD,
 225       HSUB,
 226
 227       /// Floating point horizontal add/sub.
 228       FHADD,
 229       FHSUB,
 230
 231       // Detect Conflicts Within a Vector
 232       CONFLICT,
 233
 234       /// Floating point max and min.
 235       FMAX, FMIN,
 236
 237       /// Commutative FMIN and FMAX.
 238       FMAXC, FMINC,
 239
 240       /// Scalar intrinsic floating point max and min.
 241       FMAXS, FMINS,
 242
 243       /// Floating point reciprocal-sqrt and reciprocal approximation.
 244       /// Note that these typically require refinement
 245       /// in order to obtain suitable precision.
 246       FRSQRT, FRCP,
 247
 248       // AVX-512 reciprocal approximations with a little more precision.
 249       RSQRT14, RSQRT14S, RCP14, RCP14S,
 250
 251       // Thread Local Storage.
 252       TLSADDR,
 253
 254       // Thread Local Storage. A call to get the start address
 255       // of the TLS block for the current module.
 256       TLSBASEADDR,
 257
 258       // Thread Local Storage.  When calling to an OS provided
 259       // thunk at the address from an earlier relocation.
 260       TLSCALL,
 261
 262       // Exception Handling helpers.
 263       EH_RETURN,
 264
 265       // SjLj exception handling setjmp.
 266       EH_SJLJ_SETJMP,
 267
 268       // SjLj exception handling longjmp.
 269       EH_SJLJ_LONGJMP,
 270
 271       // SjLj exception handling dispatch.
 272       EH_SJLJ_SETUP_DISPATCH,
 273
 274       /// Tail call return. See X86TargetLowering::LowerCall for
 275       /// the list of operands.
 276       TC_RETURN,
 277
 278       // Vector move to low scalar and zero higher vector elements.
 279       VZEXT_MOVL,
 280
 281       // Vector integer truncate.
 282       VTRUNC,
 283       // Vector integer truncate with unsigned/signed saturation.
 284       VTRUNCUS, VTRUNCS,
 285
 286       // Masked version of the above. Used when less than a 128-bit result is
 287       // produced since the mask only applies to the lower elements and can't
 288       // be represented by a select.
 289       // SRC, PASSTHRU, MASK
 290       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 291
 292       // Vector FP extend.
 293       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 294
 295       // Vector FP round.
 296       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 297
 298       // Masked version of above. Used for v2f64->v4f32.
 299       // SRC, PASSTHRU, MASK
 300       VMFPROUND,
 301
 302       // 128-bit vector logical left / right shift
 303       VSHLDQ, VSRLDQ,
 304
 305       // Vector shift elements
 306       VSHL, VSRL, VSRA,
 307
 308       // Vector variable shift
 309       VSHLV, VSRLV, VSRAV,
 310
 311       // Vector shift elements by immediate
 312       VSHLI, VSRLI, VSRAI,
 313
 314       // Shifts of mask registers.
 315       KSHIFTL, KSHIFTR,
 316
 317       // Bit rotate by immediate
 318       VROTLI, VROTRI,
 319
 320       // Vector packed double/float comparison.
 321       CMPP,
 322
 323       // Vector integer comparisons.
 324       PCMPEQ, PCMPGT,
 325
 326       // v8i16 Horizontal minimum and position.
 327       PHMINPOS,
 328
 329       MULTISHIFT,
 330
 331       /// Vector comparison generating mask bits for fp and
 332       /// integer signed and unsigned data types.
 333       CMPM,
 334       // Vector comparison with SAE for FP values
 335       CMPM_SAE,
 336
 337       // Arithmetic operations with FLAGS results.
 338       ADD, SUB, ADC, SBB, SMUL, UMUL,
 339       OR, XOR, AND,
 340
 341       // Bit field extract.
 342       BEXTR,
 343
 344       // Zero High Bits Starting with Specified Bit Position.
 345       BZHI,
 346
 347       // X86-specific multiply by immediate.
 348       MUL_IMM,
 349
 350       // Vector sign bit extraction.
 351       MOVMSK,
 352
 353       // Vector bitwise comparisons.
 354       PTEST,
 355
 356       // Vector packed fp sign bitwise comparisons.
 357       TESTP,
 358
 359       // OR/AND test for masks.
 360       KORTEST,
 361       KTEST,
 362
 363       // ADD for masks.
 364       KADD,
 365
 366       // Several flavors of instructions with vector shuffle behaviors.
 367       // Saturated signed/unnsigned packing.
 368       PACKSS,
 369       PACKUS,
 370       // Intra-lane alignr.
 371       PALIGNR,
 372       // AVX512 inter-lane alignr.
 373       VALIGN,
 374       PSHUFD,
 375       PSHUFHW,
 376       PSHUFLW,
 377       SHUFP,
 378       // VBMI2 Concat & Shift.
 379       VSHLD,
 380       VSHRD,
 381       VSHLDV,
 382       VSHRDV,
 383       //Shuffle Packed Values at 128-bit granularity.
 384       SHUF128,
 385       MOVDDUP,
 386       MOVSHDUP,
 387       MOVSLDUP,
 388       MOVLHPS,
 389       MOVHLPS,
 390       MOVSD,
 391       MOVSS,
 392       UNPCKL,
 393       UNPCKH,
 394       VPERMILPV,
 395       VPERMILPI,
 396       VPERMI,
 397       VPERM2X128,
 398
 399       // Variable Permute (VPERM).
 400       // Res = VPERMV MaskV, V0
 401       VPERMV,
 402
 403       // 3-op Variable Permute (VPERMT2).
 404       // Res = VPERMV3 V0, MaskV, V1
 405       VPERMV3,
 406
 407       // Bitwise ternary logic.
 408       VPTERNLOG,
 409       // Fix Up Special Packed Float32/64 values.
 410       VFIXUPIMM, VFIXUPIMM_SAE,
 411       VFIXUPIMMS, VFIXUPIMMS_SAE,
 412       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 413       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
 414       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 415       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
 416       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 417       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 418       // scaling part of the immediate.
 419       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
 420       // Tests Types Of a FP Values for packed types.
 421       VFPCLASS,
 422       // Tests Types Of a FP Values for scalar types.
 423       VFPCLASSS,
 424
 425       // Broadcast scalar to vector.
 426       VBROADCAST,
 427       // Broadcast mask to vector.
 428       VBROADCASTM,
 429       // Broadcast subvector to vector.
 430       SUBV_BROADCAST,
 431
 432       /// SSE4A Extraction and Insertion.
 433       EXTRQI, INSERTQI,
 434
 435       // XOP arithmetic/logical shifts.
 436       VPSHA, VPSHL,
 437       // XOP signed/unsigned integer comparisons.
 438       VPCOM, VPCOMU,
 439       // XOP packed permute bytes.
 440       VPPERM,
 441       // XOP two source permutation.
 442       VPERMIL2,
 443
 444       // Vector multiply packed unsigned doubleword integers.
 445       PMULUDQ,
 446       // Vector multiply packed signed doubleword integers.
 447       PMULDQ,
 448       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 449       MULHRS,
 450
 451       // Multiply and Add Packed Integers.
 452       VPMADDUBSW, VPMADDWD,
 453
 454       // AVX512IFMA multiply and add.
 455       // NOTE: These are different than the instruction and perform
 456       // op0 x op1 + op2.
 457       VPMADD52L, VPMADD52H,
 458
 459       // VNNI
 460       VPDPBUSD,
 461       VPDPBUSDS,
 462       VPDPWSSD,
 463       VPDPWSSDS,
 464
 465       // FMA nodes.
 466       // We use the target independent ISD::FMA for the non-inverted case.
 467       FNMADD,
 468       FMSUB,
 469       FNMSUB,
 470       FMADDSUB,
 471       FMSUBADD,
 472
 473       // FMA with rounding mode.
 474       FMADD_RND,
 475       FNMADD_RND,
 476       FMSUB_RND,
 477       FNMSUB_RND,
 478       FMADDSUB_RND,
 479       FMSUBADD_RND,
 480
 481       // Compress and expand.
 482       COMPRESS,
 483       EXPAND,
 484
 485       // Bits shuffle
 486       VPSHUFBITQMB,
 487
 488       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 489       SINT_TO_FP_RND, UINT_TO_FP_RND,
 490       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
 491       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 492
 493       // Vector float/double to signed/unsigned integer.
 494       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 495       // Scalar float/double to signed/unsigned integer.
 496       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 497
 498       // Vector float/double to signed/unsigned integer with truncation.
 499       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
 500       // Scalar float/double to signed/unsigned integer with truncation.
 501       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 502
 503       // Vector signed/unsigned integer to float/double.
 504       CVTSI2P, CVTUI2P,
 505
 506       // Masked versions of above. Used for v2f64->v4f32.
 507       // SRC, PASSTHRU, MASK
 508       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 509       MCVTSI2P, MCVTUI2P,
 510
 511       // Vector float to bfloat16.
 512       // Convert TWO packed single data to one packed BF16 data
 513       CVTNE2PS2BF16,
 514       // Convert packed single data to packed BF16 data
 515       CVTNEPS2BF16,
 516       // Masked version of above.
 517       // SRC, PASSTHRU, MASK
 518       MCVTNEPS2BF16,
 519
 520       // Dot product of BF16 pairs to accumulated into
 521       // packed single precision.
 522       DPBF16PS,
 523
 524       // Save xmm argument registers to the stack, according to %al. An operator
 525       // is needed so that this can be expanded with control flow.
 526       VASTART_SAVE_XMM_REGS,
 527
 528       // Windows's _chkstk call to do stack probing.
 529       WIN_ALLOCA,
 530
 531       // For allocating variable amounts of stack space when using
 532       // segmented stacks. Check if the current stacklet has enough space, and
 533       // falls back to heap allocation if not.
 534       SEG_ALLOCA,
 535
 536       // Memory barriers.
 537       MEMBARRIER,
 538       MFENCE,
 539
 540       // Store FP status word into i16 register.
 541       FNSTSW16r,
 542
 543       // Store contents of %ah into %eflags.
 544       SAHF,
 545
 546       // Get a random integer and indicate whether it is valid in CF.
 547       RDRAND,
 548
 549       // Get a NIST SP800-90B & C compliant random integer and
 550       // indicate whether it is valid in CF.
 551       RDSEED,
 552
 553       // Protection keys
 554       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 555       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 556       // value for ECX.
 557       RDPKRU, WRPKRU,
 558
 559       // SSE42 string comparisons.
 560       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 561       // will emit one or two instructions based on which results are used. If
 562       // flags and index/mask this allows us to use a single instruction since
 563       // we won't have to pick and opcode for flags. Instead we can rely on the
 564       // DAG to CSE everything and decide at isel.
 565       PCMPISTR,
 566       PCMPESTR,
 567
 568       // Test if in transactional execution.
 569       XTEST,
 570
 571       // ERI instructions.
 572       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
 573       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 574
 575       // Conversions between float and half-float.
 576       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 577
 578       // Masked version of above.
 579       // SRC, RND, PASSTHRU, MASK
 580       MCVTPS2PH,
 581
 582       // Galois Field Arithmetic Instructions
 583       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 584
 585       // LWP insert record.
 586       LWPINS,
 587
 588       // User level wait
 589       UMWAIT, TPAUSE,
 590
 591       // Enqueue Stores Instructions
 592       ENQCMD, ENQCMDS,
 593
 594       // For avx512-vp2intersect
 595       VP2INTERSECT,
 596
 597       // Compare and swap.
 598       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 599       LCMPXCHG8_DAG,
 600       LCMPXCHG16_DAG,
 601       LCMPXCHG8_SAVE_EBX_DAG,
 602       LCMPXCHG16_SAVE_RBX_DAG,
 603
 604       /// LOCK-prefixed arithmetic read-modify-write instructions.
 605       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 606       LADD, LSUB, LOR, LXOR, LAND,
 607
 608       // Load, scalar_to_vector, and zero extend.
 609       VZEXT_LOAD,
 610
 611       // extract_vector_elt, store.
 612       VEXTRACT_STORE,
 613
 614       // Store FP control world into i16 memory.
 615       FNSTCW16m,
 616
 617       /// This instruction implements FP_TO_SINT with the
 618       /// integer destination in memory and a FP reg source.  This corresponds
 619       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 620       /// has two inputs (token chain and address) and two outputs (int value
 621       /// and token chain). Memory VT specifies the type to store to.
 622       FP_TO_INT_IN_MEM,
 623
 624       /// This instruction implements SINT_TO_FP with the
 625       /// integer source in memory and FP reg result.  This corresponds to the
 626       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 627       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 628       /// flag). The integer source type is specified by the memory VT.
 629       FILD,
 630       FILD_FLAG,
 631
 632       /// This instruction implements a fp->int store from FP stack
 633       /// slots. This corresponds to the fist instruction. It takes a
 634       /// chain operand, value to store, address, and glue. The memory VT
 635       /// specifies the type to store as.
 636       FIST,
 637
 638       /// This instruction implements an extending load to FP stack slots.
 639       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 640       /// operand, and ptr to load from. The memory VT specifies the type to
 641       /// load from.
 642       FLD,
 643
 644       /// This instruction implements a truncating store from FP stack
 645       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 646       /// chain operand, value to store, address, and glue. The memory VT
 647       /// specifies the type to store as.
 648       FST,
 649
 650       /// This instruction grabs the address of the next argument
 651       /// from a va_list. (reads and modifies the va_list in memory)
 652       VAARG_64,
 653
 654       // Vector truncating store with unsigned/signed saturation
 655       VTRUNCSTOREUS, VTRUNCSTORES,
 656       // Vector truncating masked store with unsigned/signed saturation
 657       VMTRUNCSTOREUS, VMTRUNCSTORES,
 658
 659       // X86 specific gather and scatter
 660       MGATHER, MSCATTER,
 661
 662       // WARNING: Do not add anything in the end unless you want the node to
 663       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 664       // opcodes will be thought as target memory ops!
 665     };
 666   } // end namespace X86ISD
 667
 668   /// Define some predicates that are used for node matching.
 669   namespace X86 {
 670     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 671     bool isZeroNode(SDValue Elt);
 672
 673     /// Returns true of the given offset can be
 674     /// fit into displacement field of the instruction.
 675     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 676                                       bool hasSymbolicDisplacement = true);
 677
 678     /// Determines whether the callee is required to pop its
 679     /// own arguments. Callee pop is necessary to support tail calls.
 680     bool isCalleePop(CallingConv::ID CallingConv,
 681                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 682
 683   } // end namespace X86
 684
 685   //===--------------------------------------------------------------------===//
 686   //  X86 Implementation of the TargetLowering interface
 687   class X86TargetLowering final : public TargetLowering {
 688   public:
 689     explicit X86TargetLowering(const X86TargetMachine &TM,
 690                                const X86Subtarget &STI);
 691
 692     unsigned getJumpTableEncoding() const override;
 693     bool useSoftFloat() const override;
 694
 695     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 696                                ArgListTy &Args) const override;
 697
 698     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 699       return MVT::i8;
 700     }
 701
 702     const MCExpr *
 703     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 704                               const MachineBasicBlock *MBB, unsigned uid,
 705                               MCContext &Ctx) const override;
 706
 707     /// Returns relocation base for the given PIC jumptable.
 708     SDValue getPICJumpTableRelocBase(SDValue Table,
 709                                      SelectionDAG &DAG) const override;
 710     const MCExpr *
 711     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 712                                  unsigned JTI, MCContext &Ctx) const override;
 713
 714     /// Return the desired alignment for ByVal aggregate
 715     /// function arguments in the caller parameter area. For X86, aggregates
 716     /// that contains are placed at 16-byte boundaries while the rest are at
 717     /// 4-byte boundaries.
 718     unsigned getByValTypeAlignment(Type *Ty,
 719                                    const DataLayout &DL) const override;
 720
 721     /// Returns the target specific optimal type for load
 722     /// and store operations as a result of memset, memcpy, and memmove
 723     /// lowering. If DstAlign is zero that means it's safe to destination
 724     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 725     /// means there isn't a need to check it against alignment requirement,
 726     /// probably because the source does not need to be loaded. If 'IsMemset' is
 727     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 728     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 729     /// source is constant so it does not need to be loaded.
 730     /// It returns EVT::Other if the type should be determined using generic
 731     /// target-independent logic.
 732     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 733                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 734                             const AttributeList &FuncAttributes) const override;
 735
 736     /// Returns true if it's safe to use load / store of the
 737     /// specified type to expand memcpy / memset inline. This is mostly true
 738     /// for all types except for some special cases. For example, on X86
 739     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 740     /// also does type conversion. Note the specified type doesn't have to be
 741     /// legal as the hook is used before type legalization.
 742     bool isSafeMemOpType(MVT VT) const override;
 743
 744     /// Returns true if the target allows unaligned memory accesses of the
 745     /// specified type. Returns whether it is "fast" in the last argument.
 746     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 747                                         MachineMemOperand::Flags Flags,
 748                                         bool *Fast) const override;
 749
 750     /// Provide custom lowering hooks for some operations.
 751     ///
 752     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 753
 754     /// Places new result values for the node in Results (their number
 755     /// and types must exactly match those of the original return values of
 756     /// the node), or leaves Results empty, which indicates that the node is not
 757     /// to be custom lowered after all.
 758     void LowerOperationWrapper(SDNode *N,
 759                                SmallVectorImpl<SDValue> &Results,
 760                                SelectionDAG &DAG) const override;
 761
 762     /// Replace the results of node with an illegal result
 763     /// type with new values built out of custom code.
 764     ///
 765     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 766                             SelectionDAG &DAG) const override;
 767
 768     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 769
 770     // Return true if it is profitable to combine a BUILD_VECTOR with a
 771     // stride-pattern to a shuffle and a truncate.
 772     // Example of such a combine:
 773     // v4i32 build_vector((extract_elt V, 1),
 774     //                    (extract_elt V, 3),
 775     //                    (extract_elt V, 5),
 776     //                    (extract_elt V, 7))
 777     //  -->
 778     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 779     // v4i64)
 780     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 781         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 782
 783     /// Return true if the target has native support for
 784     /// the specified value type and it is 'desirable' to use the type for the
 785     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 786     /// instruction encodings are longer and some i16 instructions are slow.
 787     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 788
 789     /// Return true if the target has native support for the
 790     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 791     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 792     /// and some i16 instructions are slow.
 793     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 794
 795     MachineBasicBlock *
 796     EmitInstrWithCustomInserter(MachineInstr &MI,
 797                                 MachineBasicBlock *MBB) const override;
 798
 799     /// This method returns the name of a target specific DAG node.
 800     const char *getTargetNodeName(unsigned Opcode) const override;
 801
 802     /// Do not merge vector stores after legalization because that may conflict
 803     /// with x86-specific store splitting optimizations.
 804     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 805       return !MemVT.isVector();
 806     }
 807
 808     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 809                           const SelectionDAG &DAG) const override;
 810
 811     bool isCheapToSpeculateCttz() const override;
 812
 813     bool isCheapToSpeculateCtlz() const override;
 814
 815     bool isCtlzFast() const override;
 816
 817     bool hasBitPreservingFPLogic(EVT VT) const override {
 818       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 819     }
 820
 821     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 822       // If the pair to store is a mixture of float and int values, we will
 823       // save two bitwise instructions and one float-to-int instruction and
 824       // increase one store instruction. There is potentially a more
 825       // significant benefit because it avoids the float->int domain switch
 826       // for input value. So It is more likely a win.
 827       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 828           (LTy.isInteger() && HTy.isFloatingPoint()))
 829         return true;
 830       // If the pair only contains int values, we will save two bitwise
 831       // instructions and increase one store instruction (costing one more
 832       // store buffer). Since the benefit is more blurred so we leave
 833       // such pair out until we get testcase to prove it is a win.
 834       return false;
 835     }
 836
 837     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 838
 839     bool hasAndNotCompare(SDValue Y) const override;
 840
 841     bool hasAndNot(SDValue Y) const override;
 842
 843     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
 844                                            CombineLevel Level) const override;
 845
 846     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 847
 848     bool
 849     shouldTransformSignedTruncationCheck(EVT XVT,
 850                                          unsigned KeptBits) const override {
 851       // For vectors, we don't have a preference..
 852       if (XVT.isVector())
 853         return false;
 854
 855       auto VTIsOk = [](EVT VT) -> bool {
 856         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 857                VT == MVT::i64;
 858       };
 859
 860       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 861       // XVT will be larger than KeptBitsVT.
 862       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 863       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 864     }
 865
 866     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
 867       if (DAG.getMachineFunction().getFunction().hasMinSize())
 868         return false;
 869       return true;
 870     }
 871
 872     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 873
 874     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 875       return VT.isScalarInteger();
 876     }
 877
 878     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 879     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 880
 881     /// Return the value type to use for ISD::SETCC.
 882     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 883                            EVT VT) const override;
 884
 885     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 886                                       TargetLoweringOpt &TLO) const override;
 887
 888     /// Determine which of the bits specified in Mask are known to be either
 889     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 890     void computeKnownBitsForTargetNode(const SDValue Op,
 891                                        KnownBits &Known,
 892                                        const APInt &DemandedElts,
 893                                        const SelectionDAG &DAG,
 894                                        unsigned Depth = 0) const override;
 895
 896     /// Determine the number of bits in the operation that are sign bits.
 897     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 898                                              const APInt &DemandedElts,
 899                                              const SelectionDAG &DAG,
 900                                              unsigned Depth) const override;
 901
 902     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 903                                                  const APInt &DemandedElts,
 904                                                  APInt &KnownUndef,
 905                                                  APInt &KnownZero,
 906                                                  TargetLoweringOpt &TLO,
 907                                                  unsigned Depth) const override;
 908
 909     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 910                                            const APInt &DemandedBits,
 911                                            const APInt &DemandedElts,
 912                                            KnownBits &Known,
 913                                            TargetLoweringOpt &TLO,
 914                                            unsigned Depth) const override;
 915
 916     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 917
 918     SDValue unwrapAddress(SDValue N) const override;
 919
 920     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 921
 922     bool ExpandInlineAsm(CallInst *CI) const override;
 923
 924     ConstraintType getConstraintType(StringRef Constraint) const override;
 925
 926     /// Examine constraint string and operand type and determine a weight value.
 927     /// The operand object must already have been set up with the operand type.
 928     ConstraintWeight
 929       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 930                                      const char *constraint) const override;
 931
 932     const char *LowerXConstraint(EVT ConstraintVT) const override;
 933
 934     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 935     /// add anything to Ops. If hasMemory is true it means one of the asm
 936     /// constraint of the inline asm instruction being processed is 'm'.
 937     void LowerAsmOperandForConstraint(SDValue Op,
 938                                       std::string &Constraint,
 939                                       std::vector<SDValue> &Ops,
 940                                       SelectionDAG &DAG) const override;
 941
 942     unsigned
 943     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 944       if (ConstraintCode == "i")
 945         return InlineAsm::Constraint_i;
 946       else if (ConstraintCode == "o")
 947         return InlineAsm::Constraint_o;
 948       else if (ConstraintCode == "v")
 949         return InlineAsm::Constraint_v;
 950       else if (ConstraintCode == "X")
 951         return InlineAsm::Constraint_X;
 952       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 953     }
 954
 955     /// Handle Lowering flag assembly outputs.
 956     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
 957                                         const AsmOperandInfo &Constraint,
 958                                         SelectionDAG &DAG) const override;
 959
 960     /// Given a physical register constraint
 961     /// (e.g. {edx}), return the register number and the register class for the
 962     /// register.  This should only be used for C_Register constraints.  On
 963     /// error, this returns a register number of 0.
 964     std::pair<unsigned, const TargetRegisterClass *>
 965     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 966                                  StringRef Constraint, MVT VT) const override;
 967
 968     /// Return true if the addressing mode represented
 969     /// by AM is legal for this target, for a load/store of the specified type.
 970     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 971                                Type *Ty, unsigned AS,
 972                                Instruction *I = nullptr) const override;
 973
 974     /// Return true if the specified immediate is legal
 975     /// icmp immediate, that is the target has icmp instructions which can
 976     /// compare a register against the immediate without having to materialize
 977     /// the immediate into a register.
 978     bool isLegalICmpImmediate(int64_t Imm) const override;
 979
 980     /// Return true if the specified immediate is legal
 981     /// add immediate, that is the target has add instructions which can
 982     /// add a register and the immediate without having to materialize
 983     /// the immediate into a register.
 984     bool isLegalAddImmediate(int64_t Imm) const override;
 985
 986     bool isLegalStoreImmediate(int64_t Imm) const override;
 987
 988     /// Return the cost of the scaling factor used in the addressing
 989     /// mode represented by AM for this target, for a load/store
 990     /// of the specified type.
 991     /// If the AM is supported, the return value must be >= 0.
 992     /// If the AM is not supported, it returns a negative value.
 993     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 994                              unsigned AS) const override;
 995
 996     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 997
 998     /// Add x86-specific opcodes to the default list.
 999     bool isBinOp(unsigned Opcode) const override;
1000
1001     /// Returns true if the opcode is a commutative binary operation.
1002     bool isCommutativeBinOp(unsigned Opcode) const override;
1003
1004     /// Return true if it's free to truncate a value of
1005     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1006     /// register EAX to i16 by referencing its sub-register AX.
1007     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1008     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1009
1010     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1011
1012     /// Return true if any actual instruction that defines a
1013     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1014     /// register. This does not necessarily include registers defined in
1015     /// unknown ways, such as incoming arguments, or copies from unknown
1016     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1017     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1018     /// all instructions that define 32-bit values implicit zero-extend the
1019     /// result out to 64 bits.
1020     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1021     bool isZExtFree(EVT VT1, EVT VT2) const override;
1022     bool isZExtFree(SDValue Val, EVT VT2) const override;
1023
1024     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1025     /// extend node) is profitable.
1026     bool isVectorLoadExtDesirable(SDValue) const override;
1027
1028     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1029     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1030     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1031     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1032
1033     /// Return true if it's profitable to narrow
1034     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1035     /// from i32 to i8 but not from i32 to i16.
1036     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1037
1038     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1039     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1040     /// true and stores the intrinsic information into the IntrinsicInfo that was
1041     /// passed to the function.
1042     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1043                             MachineFunction &MF,
1044                             unsigned Intrinsic) const override;
1045
1046     /// Returns true if the target can instruction select the
1047     /// specified FP immediate natively. If false, the legalizer will
1048     /// materialize the FP immediate as a load from a constant pool.
1049     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1050                       bool ForCodeSize) const override;
1051
1052     /// Targets can use this to indicate that they only support *some*
1053     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1054     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1055     /// be legal.
1056     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1057
1058     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1059     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1060     /// constant pool entry.
1061     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1062
1063     /// Returns true if lowering to a jump table is allowed.
1064     bool areJTsAllowed(const Function *Fn) const override;
1065
1066     /// If true, then instruction selection should
1067     /// seek to shrink the FP constant of the specified type to a smaller type
1068     /// in order to save space and / or reduce runtime.
1069     bool ShouldShrinkFPConstant(EVT VT) const override {
1070       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1071       // expensive than a straight movsd. On the other hand, it's important to
1072       // shrink long double fp constant since fldt is very slow.
1073       return !X86ScalarSSEf64 || VT == MVT::f80;
1074     }
1075
1076     /// Return true if we believe it is correct and profitable to reduce the
1077     /// load node to a smaller type.
1078     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1079                                EVT NewVT) const override;
1080
1081     /// Return true if the specified scalar FP type is computed in an SSE
1082     /// register, not on the X87 floating point stack.
1083     bool isScalarFPTypeInSSEReg(EVT VT) const {
1084       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1085              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1086     }
1087
1088     /// Returns true if it is beneficial to convert a load of a constant
1089     /// to just the constant itself.
1090     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1091                                            Type *Ty) const override;
1092
1093     bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1094
1095     bool convertSelectOfConstantsToMath(EVT VT) const override;
1096
1097     bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1098
1099     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1100                                   bool IsSigned) const override;
1101
1102     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1103     /// with this index.
1104     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1105                                  unsigned Index) const override;
1106
1107     /// Scalar ops always have equal or better analysis/performance/power than
1108     /// the vector equivalent, so this always makes sense if the scalar op is
1109     /// supported.
1110     bool shouldScalarizeBinop(SDValue) const override;
1111
1112     /// Extract of a scalar FP value from index 0 of a vector is free.
1113     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1114       EVT EltVT = VT.getScalarType();
1115       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1116     }
1117
1118     /// Overflow nodes should get combined/lowered to optimal instructions
1119     /// (they should allow eliminating explicit compares by getting flags from
1120     /// math ops).
1121     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1122
1123     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1124                                       unsigned AddrSpace) const override {
1125       // If we can replace more than 2 scalar stores, there will be a reduction
1126       // in instructions even after we add a vector constant load.
1127       return NumElem > 2;
1128     }
1129
1130     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1131                                  const SelectionDAG &DAG,
1132                                  const MachineMemOperand &MMO) const override;
1133
1134     /// Intel processors have a unified instruction and data cache
1135     const char * getClearCacheBuiltinName() const override {
1136       return nullptr; // nothing to do, move along.
1137     }
1138
1139     unsigned getRegisterByName(const char* RegName, EVT VT,
1140                                SelectionDAG &DAG) const override;
1141
1142     /// If a physical register, this returns the register that receives the
1143     /// exception address on entry to an EH pad.
1144     unsigned
1145     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1146
1147     /// If a physical register, this returns the register that receives the
1148     /// exception typeid on entry to a landing pad.
1149     unsigned
1150     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1151
1152     virtual bool needsFixedCatchObjects() const override;
1153
1154     /// This method returns a target specific FastISel object,
1155     /// or null if the target does not support "fast" ISel.
1156     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1157                              const TargetLibraryInfo *libInfo) const override;
1158
1159     /// If the target has a standard location for the stack protector cookie,
1160     /// returns the address of that location. Otherwise, returns nullptr.
1161     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1162
1163     bool useLoadStackGuardNode() const override;
1164     bool useStackGuardXorFP() const override;
1165     void insertSSPDeclarations(Module &M) const override;
1166     Value *getSDagStackGuard(const Module &M) const override;
1167     Function *getSSPStackGuardCheck(const Module &M) const override;
1168     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1169                                 const SDLoc &DL) const override;
1170
1171
1172     /// Return true if the target stores SafeStack pointer at a fixed offset in
1173     /// some non-standard address space, and populates the address space and
1174     /// offset as appropriate.
1175     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1176
1177     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1178                       SelectionDAG &DAG) const;
1179
1180     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1181
1182     /// Customize the preferred legalization strategy for certain types.
1183     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1184
1185     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1186                                       EVT VT) const override;
1187
1188     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1189                                            CallingConv::ID CC,
1190                                            EVT VT) const override;
1191
1192     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1193
1194     bool supportSwiftError() const override;
1195
1196     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1197
1198     bool hasVectorBlend() const override { return true; }
1199
1200     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1201
1202     /// Lower interleaved load(s) into target specific
1203     /// instructions/intrinsics.
1204     bool lowerInterleavedLoad(LoadInst *LI,
1205                               ArrayRef<ShuffleVectorInst *> Shuffles,
1206                               ArrayRef<unsigned> Indices,
1207                               unsigned Factor) const override;
1208
1209     /// Lower interleaved store(s) into target specific
1210     /// instructions/intrinsics.
1211     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1212                                unsigned Factor) const override;
1213
1214     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1215                                    SDValue Addr, SelectionDAG &DAG)
1216                                    const override;
1217
1218   protected:
1219     std::pair<const TargetRegisterClass *, uint8_t>
1220     findRepresentativeClass(const TargetRegisterInfo *TRI,
1221                             MVT VT) const override;
1222
1223   private:
1224     /// Keep a reference to the X86Subtarget around so that we can
1225     /// make the right decision when generating code for different targets.
1226     const X86Subtarget &Subtarget;
1227
1228     /// Select between SSE or x87 floating point ops.
1229     /// When SSE is available, use it for f32 operations.
1230     /// When SSE2 is available, use it for f64 operations.
1231     bool X86ScalarSSEf32;
1232     bool X86ScalarSSEf64;
1233
1234     /// A list of legal FP immediates.
1235     std::vector<APFloat> LegalFPImmediates;
1236
1237     /// Indicate that this x86 target can instruction
1238     /// select the specified FP immediate natively.
1239     void addLegalFPImmediate(const APFloat& Imm) {
1240       LegalFPImmediates.push_back(Imm);
1241     }
1242
1243     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1244                             CallingConv::ID CallConv, bool isVarArg,
1245                             const SmallVectorImpl<ISD::InputArg> &Ins,
1246                             const SDLoc &dl, SelectionDAG &DAG,
1247                             SmallVectorImpl<SDValue> &InVals,
1248                             uint32_t *RegMask) const;
1249     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1250                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1251                              const SDLoc &dl, SelectionDAG &DAG,
1252                              const CCValAssign &VA, MachineFrameInfo &MFI,
1253                              unsigned i) const;
1254     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1255                              const SDLoc &dl, SelectionDAG &DAG,
1256                              const CCValAssign &VA,
1257                              ISD::ArgFlagsTy Flags) const;
1258
1259     // Call lowering helpers.
1260
1261     /// Check whether the call is eligible for tail call optimization. Targets
1262     /// that want to do tail call optimization should implement this function.
1263     bool IsEligibleForTailCallOptimization(SDValue Callee,
1264                                            CallingConv::ID CalleeCC,
1265                                            bool isVarArg,
1266                                            bool isCalleeStructRet,
1267                                            bool isCallerStructRet,
1268                                            Type *RetTy,
1269                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1270                                     const SmallVectorImpl<SDValue> &OutVals,
1271                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1272                                            SelectionDAG& DAG) const;
1273     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1274                                     SDValue Chain, bool IsTailCall,
1275                                     bool Is64Bit, int FPDiff,
1276                                     const SDLoc &dl) const;
1277
1278     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1279                                          SelectionDAG &DAG) const;
1280
1281     unsigned getAddressSpace(void) const;
1282
1283     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1284
1285     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1286     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1287     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1288     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1289
1290     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1291                                   const unsigned char OpFlags = 0) const;
1292     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1293     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1294     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1295     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1296     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1297
1298     /// Creates target global address or external symbol nodes for calls or
1299     /// other uses.
1300     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1301                                   bool ForCall) const;
1302
1303     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1304     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1305     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1306     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1307     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1308     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1309     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1310     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1311     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1312     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1313     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1314     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1315     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1316     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1317     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1318     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1319     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1320     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1321     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1322     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1323     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1324     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1325     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1326     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1327     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1328     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1329
1330     SDValue
1331     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1332                          const SmallVectorImpl<ISD::InputArg> &Ins,
1333                          const SDLoc &dl, SelectionDAG &DAG,
1334                          SmallVectorImpl<SDValue> &InVals) const override;
1335     SDValue LowerCall(CallLoweringInfo &CLI,
1336                       SmallVectorImpl<SDValue> &InVals) const override;
1337
1338     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1339                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1340                         const SmallVectorImpl<SDValue> &OutVals,
1341                         const SDLoc &dl, SelectionDAG &DAG) const override;
1342
1343     bool supportSplitCSR(MachineFunction *MF) const override {
1344       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1345           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1346     }
1347     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1348     void insertCopiesSplitCSR(
1349       MachineBasicBlock *Entry,
1350       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1351
1352     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1353
1354     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1355
1356     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1357                             ISD::NodeType ExtendKind) const override;
1358
1359     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1360                         bool isVarArg,
1361                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1362                         LLVMContext &Context) const override;
1363
1364     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1365
1366     TargetLoweringBase::AtomicExpansionKind
1367     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1368     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1369     TargetLoweringBase::AtomicExpansionKind
1370     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1371
1372     LoadInst *
1373     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1374
1375     bool needsCmpXchgNb(Type *MemType) const;
1376
1377     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1378                                 MachineBasicBlock *DispatchBB, int FI) const;
1379
1380     // Utility function to emit the low-level va_arg code for X86-64.
1381     MachineBasicBlock *
1382     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1383                                   MachineBasicBlock *MBB) const;
1384
1385     /// Utility function to emit the xmm reg save portion of va_start.
1386     MachineBasicBlock *
1387     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1388                                              MachineBasicBlock *BB) const;
1389
1390     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1391                                                  MachineInstr &MI2,
1392                                                  MachineBasicBlock *BB) const;
1393
1394     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1395                                          MachineBasicBlock *BB) const;
1396
1397     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1398                                            MachineBasicBlock *BB) const;
1399
1400     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1401                                            MachineBasicBlock *BB) const;
1402
1403     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1404                                            MachineBasicBlock *BB) const;
1405
1406     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1407                                             MachineBasicBlock *BB) const;
1408
1409     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1410                                           MachineBasicBlock *BB) const;
1411
1412     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1413                                           MachineBasicBlock *BB) const;
1414
1415     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1416                                             MachineBasicBlock *BB) const;
1417
1418     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1419                                         MachineBasicBlock *MBB) const;
1420
1421     void emitSetJmpShadowStackFix(MachineInstr &MI,
1422                                   MachineBasicBlock *MBB) const;
1423
1424     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1425                                          MachineBasicBlock *MBB) const;
1426
1427     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1428                                                  MachineBasicBlock *MBB) const;
1429
1430     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1431                                      MachineBasicBlock *MBB) const;
1432
1433     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1434                                              MachineBasicBlock *MBB) const;
1435
1436     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1437     /// equivalent, for use with the given x86 condition code.
1438     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1439                     SelectionDAG &DAG) const;
1440
1441     /// Convert a comparison if required by the subtarget.
1442     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1443
1444     /// Emit flags for the given setcc condition and operands. Also returns the
1445     /// corresponding X86 condition code constant in X86CC.
1446     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1447                               ISD::CondCode CC, const SDLoc &dl,
1448                               SelectionDAG &DAG,
1449                               SDValue &X86CC) const;
1450
1451     /// Check if replacement of SQRT with RSQRT should be disabled.
1452     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1453
1454     /// Use rsqrt* to speed up sqrt calculations.
1455     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1456                             int &RefinementSteps, bool &UseOneConstNR,
1457                             bool Reciprocal) const override;
1458
1459     /// Use rcp* to speed up fdiv calculations.
1460     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1461                              int &RefinementSteps) const override;
1462
1463     /// Reassociate floating point divisions into multiply by reciprocal.
1464     unsigned combineRepeatedFPDivisors() const override;
1465   };
1466
1467   namespace X86 {
1468     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1469                              const TargetLibraryInfo *libInfo);
1470   } // end namespace X86
1471
1472   // Base class for all X86 non-masked store operations.
1473   class X86StoreSDNode : public MemSDNode {
1474   public:
1475     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1476                    SDVTList VTs, EVT MemVT,
1477                    MachineMemOperand *MMO)
1478       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1479     const SDValue &getValue() const { return getOperand(1); }
1480     const SDValue &getBasePtr() const { return getOperand(2); }
1481
1482     static bool classof(const SDNode *N) {
1483       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1484         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1485     }
1486   };
1487
1488   // Base class for all X86 masked store operations.
1489   // The class has the same order of operands as MaskedStoreSDNode for
1490   // convenience.
1491   class X86MaskedStoreSDNode : public MemSDNode {
1492   public:
1493     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1494                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1495                          MachineMemOperand *MMO)
1496       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1497
1498     const SDValue &getValue()   const { return getOperand(1); }
1499     const SDValue &getBasePtr() const { return getOperand(2); }
1500     const SDValue &getMask()    const { return getOperand(3); }
1501
1502     static bool classof(const SDNode *N) {
1503       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1504         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1505     }
1506   };
1507
1508   // X86 Truncating Store with Signed saturation.
1509   class TruncSStoreSDNode : public X86StoreSDNode {
1510   public:
1511     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1512                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1513       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1514
1515     static bool classof(const SDNode *N) {
1516       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1517     }
1518   };
1519
1520   // X86 Truncating Store with Unsigned saturation.
1521   class TruncUSStoreSDNode : public X86StoreSDNode {
1522   public:
1523     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1524                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1525       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1526
1527     static bool classof(const SDNode *N) {
1528       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1529     }
1530   };
1531
1532   // X86 Truncating Masked Store with Signed saturation.
1533   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1534   public:
1535     MaskedTruncSStoreSDNode(unsigned Order,
1536                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1537                          MachineMemOperand *MMO)
1538       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1539
1540     static bool classof(const SDNode *N) {
1541       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1542     }
1543   };
1544
1545   // X86 Truncating Masked Store with Unsigned saturation.
1546   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1547   public:
1548     MaskedTruncUSStoreSDNode(unsigned Order,
1549                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1550                             MachineMemOperand *MMO)
1551       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1552
1553     static bool classof(const SDNode *N) {
1554       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1555     }
1556   };
1557
1558   // X86 specific Gather/Scatter nodes.
1559   // The class has the same order of operands as MaskedGatherScatterSDNode for
1560   // convenience.
1561   class X86MaskedGatherScatterSDNode : public MemSDNode {
1562   public:
1563     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1564                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1565                                  MachineMemOperand *MMO)
1566         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1567
1568     const SDValue &getBasePtr() const { return getOperand(3); }
1569     const SDValue &getIndex()   const { return getOperand(4); }
1570     const SDValue &getMask()    const { return getOperand(2); }
1571     const SDValue &getScale()   const { return getOperand(5); }
1572
1573     static bool classof(const SDNode *N) {
1574       return N->getOpcode() == X86ISD::MGATHER ||
1575              N->getOpcode() == X86ISD::MSCATTER;
1576     }
1577   };
1578
1579   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1580   public:
1581     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1582                           EVT MemVT, MachineMemOperand *MMO)
1583         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1584                                        MMO) {}
1585
1586     const SDValue &getPassThru() const { return getOperand(1); }
1587
1588     static bool classof(const SDNode *N) {
1589       return N->getOpcode() == X86ISD::MGATHER;
1590     }
1591   };
1592
1593   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1594   public:
1595     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1596                            EVT MemVT, MachineMemOperand *MMO)
1597         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1598                                        MMO) {}
1599
1600     const SDValue &getValue() const { return getOperand(1); }
1601
1602     static bool classof(const SDNode *N) {
1603       return N->getOpcode() == X86ISD::MSCATTER;
1604     }
1605   };
1606
1607   /// Generate unpacklo/unpackhi shuffle mask.
1608   template <typename T = int>
1609   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1610                                bool Unary) {
1611     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1612     int NumElts = VT.getVectorNumElements();
1613     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1614     for (int i = 0; i < NumElts; ++i) {
1615       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1616       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1617       Pos += (Unary ? 0 : NumElts * (i % 2));
1618       Pos += (Lo ? 0 : NumEltsInLane / 2);
1619       Mask.push_back(Pos);
1620     }
1621   }
1622
1623   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1624   /// mask index with the scaled sequential indices for an equivalent narrowed
1625   /// mask. This is the reverse process to canWidenShuffleElements, but can
1626   /// always succeed.
1627   template <typename T>
1628   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1629                         SmallVectorImpl<T> &ScaledMask) {
1630     assert(0 < Scale && "Unexpected scaling factor");
1631     size_t NumElts = Mask.size();
1632     ScaledMask.assign(NumElts * Scale, -1);
1633
1634     for (int i = 0; i != (int)NumElts; ++i) {
1635       int M = Mask[i];
1636
1637       // Repeat sentinel values in every mask element.
1638       if (M < 0) {
1639         for (int s = 0; s != Scale; ++s)
1640           ScaledMask[(Scale * i) + s] = M;
1641         continue;
1642       }
1643
1644       // Scale mask element and increment across each mask element.
1645       for (int s = 0; s != Scale; ++s)
1646         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1647     }
1648   }
1649 } // end namespace llvm
1650
1651 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H