lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that X86 uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  17
  18 #include "llvm/CodeGen/CallingConvLower.h"
  19 #include "llvm/CodeGen/SelectionDAG.h"
  20 #include "llvm/CodeGen/TargetLowering.h"
  21 #include "llvm/Target/TargetOptions.h"
  22
  23 namespace llvm {
  24   class X86Subtarget;
  25   class X86TargetMachine;
  26
  27   namespace X86ISD {
  28     // X86 Specific DAG Nodes
  29     enum NodeType : unsigned {
  30       // Start the numbering where the builtin ops leave off.
  31       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  32
  33       /// Bit scan forward.
  34       BSF,
  35       /// Bit scan reverse.
  36       BSR,
  37
  38       /// Double shift instructions. These correspond to
  39       /// X86::SHLDxx and X86::SHRDxx instructions.
  40       SHLD,
  41       SHRD,
  42
  43       /// Bitwise logical AND of floating point values. This corresponds
  44       /// to X86::ANDPS or X86::ANDPD.
  45       FAND,
  46
  47       /// Bitwise logical OR of floating point values. This corresponds
  48       /// to X86::ORPS or X86::ORPD.
  49       FOR,
  50
  51       /// Bitwise logical XOR of floating point values. This corresponds
  52       /// to X86::XORPS or X86::XORPD.
  53       FXOR,
  54
  55       ///  Bitwise logical ANDNOT of floating point values. This
  56       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  57       FANDN,
  58
  59       /// These operations represent an abstract X86 call
  60       /// instruction, which includes a bunch of information.  In particular the
  61       /// operands of these node are:
  62       ///
  63       ///     #0 - The incoming token chain
  64       ///     #1 - The callee
  65       ///     #2 - The number of arg bytes the caller pushes on the stack.
  66       ///     #3 - The number of arg bytes the callee pops off the stack.
  67       ///     #4 - The value to pass in AL/AX/EAX (optional)
  68       ///     #5 - The value to pass in DL/DX/EDX (optional)
  69       ///
  70       /// The result values of these nodes are:
  71       ///
  72       ///     #0 - The outgoing token chain
  73       ///     #1 - The first register result value (optional)
  74       ///     #2 - The second register result value (optional)
  75       ///
  76       CALL,
  77
  78       /// Same as call except it adds the NoTrack prefix.
  79       NT_CALL,
  80
  81       /// This operation implements the lowering for readcyclecounter.
  82       RDTSC_DAG,
  83
  84       /// X86 Read Time-Stamp Counter and Processor ID.
  85       RDTSCP_DAG,
  86
  87       /// X86 Read Performance Monitoring Counters.
  88       RDPMC_DAG,
  89
  90       /// X86 compare and logical compare instructions.
  91       CMP, COMI, UCOMI,
  92
  93       /// X86 bit-test instructions.
  94       BT,
  95
  96       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  97       /// operand, usually produced by a CMP instruction.
  98       SETCC,
  99
 100       /// X86 Select
 101       SELECT, SELECTS,
 102
 103       // Same as SETCC except it's materialized with a sbb and the value is all
 104       // one's or all zero's.
 105       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
 106
 107       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 108       /// Operands are two FP values to compare; result is a mask of
 109       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 110       FSETCC,
 111
 112       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 113       /// with optional rounding mode.
 114       FSETCCM, FSETCCM_RND,
 115
 116       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 117       /// to select from. Operand 2 is the condition code, and operand 3 is the
 118       /// flag operand produced by a CMP or TEST instruction. It also writes a
 119       /// flag result.
 120       CMOV,
 121
 122       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 123       /// is the block to branch if condition is true, operand 2 is the
 124       /// condition code, and operand 3 is the flag operand produced by a CMP
 125       /// or TEST instruction.
 126       BRCOND,
 127
 128       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 129       /// operand 1 is the target address.
 130       NT_BRIND,
 131
 132       /// Return with a flag operand. Operand 0 is the chain operand, operand
 133       /// 1 is the number of bytes of stack to pop.
 134       RET_FLAG,
 135
 136       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 137       IRET,
 138
 139       /// Repeat fill, corresponds to X86::REP_STOSx.
 140       REP_STOS,
 141
 142       /// Repeat move, corresponds to X86::REP_MOVSx.
 143       REP_MOVS,
 144
 145       /// On Darwin, this node represents the result of the popl
 146       /// at function entry, used for PIC code.
 147       GlobalBaseReg,
 148
 149       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 150       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 151       /// MCSymbol and TargetBlockAddress.
 152       Wrapper,
 153
 154       /// Special wrapper used under X86-64 PIC mode for RIP
 155       /// relative displacements.
 156       WrapperRIP,
 157
 158       /// Copies a 64-bit value from the low word of an XMM vector
 159       /// to an MMX vector.
 160       MOVDQ2Q,
 161
 162       /// Copies a 32-bit value from the low word of a MMX
 163       /// vector to a GPR.
 164       MMX_MOVD2W,
 165
 166       /// Copies a GPR into the low 32-bit word of a MMX vector
 167       /// and zero out the high word.
 168       MMX_MOVW2D,
 169
 170       /// Extract an 8-bit value from a vector and zero extend it to
 171       /// i32, corresponds to X86::PEXTRB.
 172       PEXTRB,
 173
 174       /// Extract a 16-bit value from a vector and zero extend it to
 175       /// i32, corresponds to X86::PEXTRW.
 176       PEXTRW,
 177
 178       /// Insert any element of a 4 x float vector into any element
 179       /// of a destination 4 x floatvector.
 180       INSERTPS,
 181
 182       /// Insert the lower 8-bits of a 32-bit value to a vector,
 183       /// corresponds to X86::PINSRB.
 184       PINSRB,
 185
 186       /// Insert the lower 16-bits of a 32-bit value to a vector,
 187       /// corresponds to X86::PINSRW.
 188       PINSRW,
 189
 190       /// Shuffle 16 8-bit values within a vector.
 191       PSHUFB,
 192
 193       /// Compute Sum of Absolute Differences.
 194       PSADBW,
 195       /// Compute Double Block Packed Sum-Absolute-Differences
 196       DBPSADBW,
 197
 198       /// Bitwise Logical AND NOT of Packed FP values.
 199       ANDNP,
 200
 201       /// Blend where the selector is an immediate.
 202       BLENDI,
 203
 204       /// Dynamic (non-constant condition) vector blend where only the sign bits
 205       /// of the condition elements are used. This is used to enforce that the
 206       /// condition mask is not valid for generic VSELECT optimizations.
 207       SHRUNKBLEND,
 208
 209       /// Combined add and sub on an FP vector.
 210       ADDSUB,
 211
 212       //  FP vector ops with rounding mode.
 213       FADD_RND, FADDS_RND,
 214       FSUB_RND, FSUBS_RND,
 215       FMUL_RND, FMULS_RND,
 216       FDIV_RND, FDIVS_RND,
 217       FMAX_RND, FMAXS_RND,
 218       FMIN_RND, FMINS_RND,
 219       FSQRT_RND, FSQRTS_RND,
 220
 221       // FP vector get exponent.
 222       FGETEXP_RND, FGETEXPS_RND,
 223       // Extract Normalized Mantissas.
 224       VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
 225       // FP Scale.
 226       SCALEF,
 227       SCALEFS,
 228
 229       // Integer add/sub with unsigned saturation.
 230       ADDUS,
 231       SUBUS,
 232
 233       // Integer add/sub with signed saturation.
 234       ADDS,
 235       SUBS,
 236
 237       // Unsigned Integer average.
 238       AVG,
 239
 240       /// Integer horizontal add/sub.
 241       HADD,
 242       HSUB,
 243
 244       /// Floating point horizontal add/sub.
 245       FHADD,
 246       FHSUB,
 247
 248       // Detect Conflicts Within a Vector
 249       CONFLICT,
 250
 251       /// Floating point max and min.
 252       FMAX, FMIN,
 253
 254       /// Commutative FMIN and FMAX.
 255       FMAXC, FMINC,
 256
 257       /// Scalar intrinsic floating point max and min.
 258       FMAXS, FMINS,
 259
 260       /// Floating point reciprocal-sqrt and reciprocal approximation.
 261       /// Note that these typically require refinement
 262       /// in order to obtain suitable precision.
 263       FRSQRT, FRCP,
 264
 265       // AVX-512 reciprocal approximations with a little more precision.
 266       RSQRT14, RSQRT14S, RCP14, RCP14S,
 267
 268       // Thread Local Storage.
 269       TLSADDR,
 270
 271       // Thread Local Storage. A call to get the start address
 272       // of the TLS block for the current module.
 273       TLSBASEADDR,
 274
 275       // Thread Local Storage.  When calling to an OS provided
 276       // thunk at the address from an earlier relocation.
 277       TLSCALL,
 278
 279       // Exception Handling helpers.
 280       EH_RETURN,
 281
 282       // SjLj exception handling setjmp.
 283       EH_SJLJ_SETJMP,
 284
 285       // SjLj exception handling longjmp.
 286       EH_SJLJ_LONGJMP,
 287
 288       // SjLj exception handling dispatch.
 289       EH_SJLJ_SETUP_DISPATCH,
 290
 291       /// Tail call return. See X86TargetLowering::LowerCall for
 292       /// the list of operands.
 293       TC_RETURN,
 294
 295       // Vector move to low scalar and zero higher vector elements.
 296       VZEXT_MOVL,
 297
 298       // Vector integer zero-extend.
 299       VZEXT,
 300       // Vector integer signed-extend.
 301       VSEXT,
 302
 303       // Vector integer truncate.
 304       VTRUNC,
 305       // Vector integer truncate with unsigned/signed saturation.
 306       VTRUNCUS, VTRUNCS,
 307
 308       // Vector FP extend.
 309       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
 310
 311       // Vector FP round.
 312       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
 313
 314       // 128-bit vector logical left / right shift
 315       VSHLDQ, VSRLDQ,
 316
 317       // Vector shift elements
 318       VSHL, VSRL, VSRA,
 319
 320       // Vector variable shift right arithmetic.
 321       // Unlike ISD::SRA, in case shift count greater then element size
 322       // use sign bit to fill destination data element.
 323       VSRAV,
 324
 325       // Vector shift elements by immediate
 326       VSHLI, VSRLI, VSRAI,
 327
 328       // Shifts of mask registers.
 329       KSHIFTL, KSHIFTR,
 330
 331       // Bit rotate by immediate
 332       VROTLI, VROTRI,
 333
 334       // Vector packed double/float comparison.
 335       CMPP,
 336
 337       // Vector integer comparisons.
 338       PCMPEQ, PCMPGT,
 339
 340       // v8i16 Horizontal minimum and position.
 341       PHMINPOS,
 342
 343       MULTISHIFT,
 344
 345       /// Vector comparison generating mask bits for fp and
 346       /// integer signed and unsigned data types.
 347       CMPM,
 348       // Vector comparison with rounding mode for FP values
 349       CMPM_RND,
 350
 351       // Arithmetic operations with FLAGS results.
 352       ADD, SUB, ADC, SBB, SMUL,
 353       INC, DEC, OR, XOR, AND,
 354
 355       // Bit field extract.
 356       BEXTR,
 357
 358       // LOW, HI, FLAGS = umul LHS, RHS.
 359       UMUL,
 360
 361       // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
 362       SMUL8, UMUL8,
 363
 364       // 8-bit divrem that zero-extend the high result (AH).
 365       UDIVREM8_ZEXT_HREG,
 366       SDIVREM8_SEXT_HREG,
 367
 368       // X86-specific multiply by immediate.
 369       MUL_IMM,
 370
 371       // Vector sign bit extraction.
 372       MOVMSK,
 373
 374       // Vector bitwise comparisons.
 375       PTEST,
 376
 377       // Vector packed fp sign bitwise comparisons.
 378       TESTP,
 379
 380       // OR/AND test for masks.
 381       KORTEST,
 382       KTEST,
 383
 384       // ADD for masks.
 385       KADD,
 386
 387       // Several flavors of instructions with vector shuffle behaviors.
 388       // Saturated signed/unnsigned packing.
 389       PACKSS,
 390       PACKUS,
 391       // Intra-lane alignr.
 392       PALIGNR,
 393       // AVX512 inter-lane alignr.
 394       VALIGN,
 395       PSHUFD,
 396       PSHUFHW,
 397       PSHUFLW,
 398       SHUFP,
 399       // VBMI2 Concat & Shift.
 400       VSHLD,
 401       VSHRD,
 402       VSHLDV,
 403       VSHRDV,
 404       //Shuffle Packed Values at 128-bit granularity.
 405       SHUF128,
 406       MOVDDUP,
 407       MOVSHDUP,
 408       MOVSLDUP,
 409       MOVLHPS,
 410       MOVHLPS,
 411       MOVSD,
 412       MOVSS,
 413       UNPCKL,
 414       UNPCKH,
 415       VPERMILPV,
 416       VPERMILPI,
 417       VPERMI,
 418       VPERM2X128,
 419
 420       // Variable Permute (VPERM).
 421       // Res = VPERMV MaskV, V0
 422       VPERMV,
 423
 424       // 3-op Variable Permute (VPERMT2).
 425       // Res = VPERMV3 V0, MaskV, V1
 426       VPERMV3,
 427
 428       // Bitwise ternary logic.
 429       VPTERNLOG,
 430       // Fix Up Special Packed Float32/64 values.
 431       VFIXUPIMM,
 432       VFIXUPIMMS,
 433       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 434       VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
 435       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 436       VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
 437       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 438       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 439       // scaling part of the immediate.
 440       VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
 441       // Tests Types Of a FP Values for packed types.
 442       VFPCLASS,
 443       // Tests Types Of a FP Values for scalar types.
 444       VFPCLASSS,
 445
 446       // Broadcast scalar to vector.
 447       VBROADCAST,
 448       // Broadcast mask to vector.
 449       VBROADCASTM,
 450       // Broadcast subvector to vector.
 451       SUBV_BROADCAST,
 452
 453       /// SSE4A Extraction and Insertion.
 454       EXTRQI, INSERTQI,
 455
 456       // XOP arithmetic/logical shifts.
 457       VPSHA, VPSHL,
 458       // XOP signed/unsigned integer comparisons.
 459       VPCOM, VPCOMU,
 460       // XOP packed permute bytes.
 461       VPPERM,
 462       // XOP two source permutation.
 463       VPERMIL2,
 464
 465       // Vector multiply packed unsigned doubleword integers.
 466       PMULUDQ,
 467       // Vector multiply packed signed doubleword integers.
 468       PMULDQ,
 469       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 470       MULHRS,
 471
 472       // Multiply and Add Packed Integers.
 473       VPMADDUBSW, VPMADDWD,
 474
 475       // AVX512IFMA multiply and add.
 476       // NOTE: These are different than the instruction and perform
 477       // op0 x op1 + op2.
 478       VPMADD52L, VPMADD52H,
 479
 480       // VNNI
 481       VPDPBUSD,
 482       VPDPBUSDS,
 483       VPDPWSSD,
 484       VPDPWSSDS,
 485
 486       // FMA nodes.
 487       // We use the target independent ISD::FMA for the non-inverted case.
 488       FNMADD,
 489       FMSUB,
 490       FNMSUB,
 491       FMADDSUB,
 492       FMSUBADD,
 493
 494       // FMA with rounding mode.
 495       FMADD_RND,
 496       FNMADD_RND,
 497       FMSUB_RND,
 498       FNMSUB_RND,
 499       FMADDSUB_RND,
 500       FMSUBADD_RND,
 501
 502       // Compress and expand.
 503       COMPRESS,
 504       EXPAND,
 505
 506       // Bits shuffle
 507       VPSHUFBITQMB,
 508
 509       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 510       SINT_TO_FP_RND, UINT_TO_FP_RND,
 511       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 512
 513       // Vector float/double to signed/unsigned integer.
 514       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 515       // Scalar float/double to signed/unsigned integer.
 516       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 517
 518       // Vector float/double to signed/unsigned integer with truncation.
 519       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
 520       // Scalar float/double to signed/unsigned integer with truncation.
 521       CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
 522
 523       // Vector signed/unsigned integer to float/double.
 524       CVTSI2P, CVTUI2P,
 525
 526       // Save xmm argument registers to the stack, according to %al. An operator
 527       // is needed so that this can be expanded with control flow.
 528       VASTART_SAVE_XMM_REGS,
 529
 530       // Windows's _chkstk call to do stack probing.
 531       WIN_ALLOCA,
 532
 533       // For allocating variable amounts of stack space when using
 534       // segmented stacks. Check if the current stacklet has enough space, and
 535       // falls back to heap allocation if not.
 536       SEG_ALLOCA,
 537
 538       // Memory barriers.
 539       MEMBARRIER,
 540       MFENCE,
 541
 542       // Store FP status word into i16 register.
 543       FNSTSW16r,
 544
 545       // Store contents of %ah into %eflags.
 546       SAHF,
 547
 548       // Get a random integer and indicate whether it is valid in CF.
 549       RDRAND,
 550
 551       // Get a NIST SP800-90B & C compliant random integer and
 552       // indicate whether it is valid in CF.
 553       RDSEED,
 554
 555       // SSE42 string comparisons.
 556       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 557       // will emit one or two instructions based on which results are used. If
 558       // flags and index/mask this allows us to use a single instruction since
 559       // we won't have to pick and opcode for flags. Instead we can rely on the
 560       // DAG to CSE everything and decide at isel.
 561       PCMPISTR,
 562       PCMPESTR,
 563
 564       // Test if in transactional execution.
 565       XTEST,
 566
 567       // ERI instructions.
 568       RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
 569
 570       // Conversions between float and half-float.
 571       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
 572
 573       // Galois Field Arithmetic Instructions
 574       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 575
 576       // LWP insert record.
 577       LWPINS,
 578
 579       // User level wait
 580       UMWAIT, TPAUSE,
 581
 582       // Compare and swap.
 583       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 584       LCMPXCHG8_DAG,
 585       LCMPXCHG16_DAG,
 586       LCMPXCHG8_SAVE_EBX_DAG,
 587       LCMPXCHG16_SAVE_RBX_DAG,
 588
 589       /// LOCK-prefixed arithmetic read-modify-write instructions.
 590       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 591       LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
 592
 593       // Load, scalar_to_vector, and zero extend.
 594       VZEXT_LOAD,
 595
 596       // Store FP control world into i16 memory.
 597       FNSTCW16m,
 598
 599       /// This instruction implements FP_TO_SINT with the
 600       /// integer destination in memory and a FP reg source.  This corresponds
 601       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 602       /// has two inputs (token chain and address) and two outputs (int value
 603       /// and token chain).
 604       FP_TO_INT16_IN_MEM,
 605       FP_TO_INT32_IN_MEM,
 606       FP_TO_INT64_IN_MEM,
 607
 608       /// This instruction implements SINT_TO_FP with the
 609       /// integer source in memory and FP reg result.  This corresponds to the
 610       /// X86::FILD*m instructions. It has three inputs (token chain, address,
 611       /// and source type) and two outputs (FP value and token chain). FILD_FLAG
 612       /// also produces a flag).
 613       FILD,
 614       FILD_FLAG,
 615
 616       /// This instruction implements an extending load to FP stack slots.
 617       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 618       /// operand, ptr to load from, and a ValueType node indicating the type
 619       /// to load to.
 620       FLD,
 621
 622       /// This instruction implements a truncating store to FP stack
 623       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 624       /// chain operand, value to store, address, and a ValueType to store it
 625       /// as.
 626       FST,
 627
 628       /// This instruction grabs the address of the next argument
 629       /// from a va_list. (reads and modifies the va_list in memory)
 630       VAARG_64,
 631
 632       // Vector truncating store with unsigned/signed saturation
 633       VTRUNCSTOREUS, VTRUNCSTORES,
 634       // Vector truncating masked store with unsigned/signed saturation
 635       VMTRUNCSTOREUS, VMTRUNCSTORES,
 636
 637       // X86 specific gather and scatter
 638       MGATHER, MSCATTER,
 639
 640       // WARNING: Do not add anything in the end unless you want the node to
 641       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 642       // opcodes will be thought as target memory ops!
 643     };
 644   } // end namespace X86ISD
 645
 646   /// Define some predicates that are used for node matching.
 647   namespace X86 {
 648     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 649     bool isZeroNode(SDValue Elt);
 650
 651     /// Returns true of the given offset can be
 652     /// fit into displacement field of the instruction.
 653     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 654                                       bool hasSymbolicDisplacement = true);
 655
 656     /// Determines whether the callee is required to pop its
 657     /// own arguments. Callee pop is necessary to support tail calls.
 658     bool isCalleePop(CallingConv::ID CallingConv,
 659                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 660
 661   } // end namespace X86
 662
 663   //===--------------------------------------------------------------------===//
 664   //  X86 Implementation of the TargetLowering interface
 665   class X86TargetLowering final : public TargetLowering {
 666   public:
 667     explicit X86TargetLowering(const X86TargetMachine &TM,
 668                                const X86Subtarget &STI);
 669
 670     unsigned getJumpTableEncoding() const override;
 671     bool useSoftFloat() const override;
 672
 673     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 674                                ArgListTy &Args) const override;
 675
 676     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 677       return MVT::i8;
 678     }
 679
 680     const MCExpr *
 681     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 682                               const MachineBasicBlock *MBB, unsigned uid,
 683                               MCContext &Ctx) const override;
 684
 685     /// Returns relocation base for the given PIC jumptable.
 686     SDValue getPICJumpTableRelocBase(SDValue Table,
 687                                      SelectionDAG &DAG) const override;
 688     const MCExpr *
 689     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 690                                  unsigned JTI, MCContext &Ctx) const override;
 691
 692     /// Return the desired alignment for ByVal aggregate
 693     /// function arguments in the caller parameter area. For X86, aggregates
 694     /// that contains are placed at 16-byte boundaries while the rest are at
 695     /// 4-byte boundaries.
 696     unsigned getByValTypeAlignment(Type *Ty,
 697                                    const DataLayout &DL) const override;
 698
 699     /// Returns the target specific optimal type for load
 700     /// and store operations as a result of memset, memcpy, and memmove
 701     /// lowering. If DstAlign is zero that means it's safe to destination
 702     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 703     /// means there isn't a need to check it against alignment requirement,
 704     /// probably because the source does not need to be loaded. If 'IsMemset' is
 705     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 706     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 707     /// source is constant so it does not need to be loaded.
 708     /// It returns EVT::Other if the type should be determined using generic
 709     /// target-independent logic.
 710     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 711                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 712                             MachineFunction &MF) const override;
 713
 714     /// Returns true if it's safe to use load / store of the
 715     /// specified type to expand memcpy / memset inline. This is mostly true
 716     /// for all types except for some special cases. For example, on X86
 717     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 718     /// also does type conversion. Note the specified type doesn't have to be
 719     /// legal as the hook is used before type legalization.
 720     bool isSafeMemOpType(MVT VT) const override;
 721
 722     /// Returns true if the target allows unaligned memory accesses of the
 723     /// specified type. Returns whether it is "fast" in the last argument.
 724     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 725                                        bool *Fast) const override;
 726
 727     /// Provide custom lowering hooks for some operations.
 728     ///
 729     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 730
 731     /// Places new result values for the node in Results (their number
 732     /// and types must exactly match those of the original return values of
 733     /// the node), or leaves Results empty, which indicates that the node is not
 734     /// to be custom lowered after all.
 735     void LowerOperationWrapper(SDNode *N,
 736                                SmallVectorImpl<SDValue> &Results,
 737                                SelectionDAG &DAG) const override;
 738
 739     /// Replace the results of node with an illegal result
 740     /// type with new values built out of custom code.
 741     ///
 742     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 743                             SelectionDAG &DAG) const override;
 744
 745     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 746
 747     // Return true if it is profitable to combine a BUILD_VECTOR with a
 748     // stride-pattern to a shuffle and a truncate.
 749     // Example of such a combine:
 750     // v4i32 build_vector((extract_elt V, 1),
 751     //                    (extract_elt V, 3),
 752     //                    (extract_elt V, 5),
 753     //                    (extract_elt V, 7))
 754     //  -->
 755     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 756     // v4i64)
 757     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 758         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 759
 760     /// Return true if the target has native support for
 761     /// the specified value type and it is 'desirable' to use the type for the
 762     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 763     /// instruction encodings are longer and some i16 instructions are slow.
 764     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 765
 766     /// Return true if the target has native support for the
 767     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 768     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 769     /// and some i16 instructions are slow.
 770     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 771
 772     MachineBasicBlock *
 773     EmitInstrWithCustomInserter(MachineInstr &MI,
 774                                 MachineBasicBlock *MBB) const override;
 775
 776     /// This method returns the name of a target specific DAG node.
 777     const char *getTargetNodeName(unsigned Opcode) const override;
 778
 779     bool mergeStoresAfterLegalization() const override { return true; }
 780
 781     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 782                           const SelectionDAG &DAG) const override;
 783
 784     bool isCheapToSpeculateCttz() const override;
 785
 786     bool isCheapToSpeculateCtlz() const override;
 787
 788     bool isCtlzFast() const override;
 789
 790     bool hasBitPreservingFPLogic(EVT VT) const override {
 791       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 792     }
 793
 794     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 795       // If the pair to store is a mixture of float and int values, we will
 796       // save two bitwise instructions and one float-to-int instruction and
 797       // increase one store instruction. There is potentially a more
 798       // significant benefit because it avoids the float->int domain switch
 799       // for input value. So It is more likely a win.
 800       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 801           (LTy.isInteger() && HTy.isFloatingPoint()))
 802         return true;
 803       // If the pair only contains int values, we will save two bitwise
 804       // instructions and increase one store instruction (costing one more
 805       // store buffer). Since the benefit is more blurred so we leave
 806       // such pair out until we get testcase to prove it is a win.
 807       return false;
 808     }
 809
 810     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 811
 812     bool hasAndNotCompare(SDValue Y) const override;
 813
 814     bool hasAndNot(SDValue Y) const override;
 815
 816     bool preferShiftsToClearExtremeBits(SDValue Y) const override;
 817
 818     bool
 819     shouldTransformSignedTruncationCheck(EVT XVT,
 820                                          unsigned KeptBits) const override {
 821       // For vectors, we don't have a preference..
 822       if (XVT.isVector())
 823         return false;
 824
 825       auto VTIsOk = [](EVT VT) -> bool {
 826         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 827                VT == MVT::i64;
 828       };
 829
 830       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 831       // XVT will be larger than KeptBitsVT.
 832       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 833       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 834     }
 835
 836     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 837
 838     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 839       return VT.isScalarInteger();
 840     }
 841
 842     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 843     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 844
 845     /// Allow multiple load pairs per block for smaller and faster code.
 846     unsigned getMemcmpEqZeroLoadsPerBlock() const override {
 847       return 2;
 848     }
 849
 850     /// Return the value type to use for ISD::SETCC.
 851     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 852                            EVT VT) const override;
 853
 854     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 855                                       TargetLoweringOpt &TLO) const override;
 856
 857     /// Determine which of the bits specified in Mask are known to be either
 858     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 859     void computeKnownBitsForTargetNode(const SDValue Op,
 860                                        KnownBits &Known,
 861                                        const APInt &DemandedElts,
 862                                        const SelectionDAG &DAG,
 863                                        unsigned Depth = 0) const override;
 864
 865     /// Determine the number of bits in the operation that are sign bits.
 866     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 867                                              const APInt &DemandedElts,
 868                                              const SelectionDAG &DAG,
 869                                              unsigned Depth) const override;
 870
 871     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 872                                                  const APInt &DemandedElts,
 873                                                  APInt &KnownUndef,
 874                                                  APInt &KnownZero,
 875                                                  TargetLoweringOpt &TLO,
 876                                                  unsigned Depth) const override;
 877
 878     SDValue unwrapAddress(SDValue N) const override;
 879
 880     bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
 881                         int64_t &Offset) const override;
 882
 883     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 884
 885     bool ExpandInlineAsm(CallInst *CI) const override;
 886
 887     ConstraintType getConstraintType(StringRef Constraint) const override;
 888
 889     /// Examine constraint string and operand type and determine a weight value.
 890     /// The operand object must already have been set up with the operand type.
 891     ConstraintWeight
 892       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 893                                      const char *constraint) const override;
 894
 895     const char *LowerXConstraint(EVT ConstraintVT) const override;
 896
 897     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 898     /// add anything to Ops. If hasMemory is true it means one of the asm
 899     /// constraint of the inline asm instruction being processed is 'm'.
 900     void LowerAsmOperandForConstraint(SDValue Op,
 901                                       std::string &Constraint,
 902                                       std::vector<SDValue> &Ops,
 903                                       SelectionDAG &DAG) const override;
 904
 905     unsigned
 906     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 907       if (ConstraintCode == "i")
 908         return InlineAsm::Constraint_i;
 909       else if (ConstraintCode == "o")
 910         return InlineAsm::Constraint_o;
 911       else if (ConstraintCode == "v")
 912         return InlineAsm::Constraint_v;
 913       else if (ConstraintCode == "X")
 914         return InlineAsm::Constraint_X;
 915       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 916     }
 917
 918     /// Given a physical register constraint
 919     /// (e.g. {edx}), return the register number and the register class for the
 920     /// register.  This should only be used for C_Register constraints.  On
 921     /// error, this returns a register number of 0.
 922     std::pair<unsigned, const TargetRegisterClass *>
 923     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 924                                  StringRef Constraint, MVT VT) const override;
 925
 926     /// Return true if the addressing mode represented
 927     /// by AM is legal for this target, for a load/store of the specified type.
 928     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 929                                Type *Ty, unsigned AS,
 930                                Instruction *I = nullptr) const override;
 931
 932     /// Return true if the specified immediate is legal
 933     /// icmp immediate, that is the target has icmp instructions which can
 934     /// compare a register against the immediate without having to materialize
 935     /// the immediate into a register.
 936     bool isLegalICmpImmediate(int64_t Imm) const override;
 937
 938     /// Return true if the specified immediate is legal
 939     /// add immediate, that is the target has add instructions which can
 940     /// add a register and the immediate without having to materialize
 941     /// the immediate into a register.
 942     bool isLegalAddImmediate(int64_t Imm) const override;
 943
 944     /// Return the cost of the scaling factor used in the addressing
 945     /// mode represented by AM for this target, for a load/store
 946     /// of the specified type.
 947     /// If the AM is supported, the return value must be >= 0.
 948     /// If the AM is not supported, it returns a negative value.
 949     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 950                              unsigned AS) const override;
 951
 952     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 953
 954     /// Return true if it's free to truncate a value of
 955     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
 956     /// register EAX to i16 by referencing its sub-register AX.
 957     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
 958     bool isTruncateFree(EVT VT1, EVT VT2) const override;
 959
 960     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 961
 962     /// Return true if any actual instruction that defines a
 963     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
 964     /// register. This does not necessarily include registers defined in
 965     /// unknown ways, such as incoming arguments, or copies from unknown
 966     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
 967     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
 968     /// all instructions that define 32-bit values implicit zero-extend the
 969     /// result out to 64 bits.
 970     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
 971     bool isZExtFree(EVT VT1, EVT VT2) const override;
 972     bool isZExtFree(SDValue Val, EVT VT2) const override;
 973
 974     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
 975     /// extend node) is profitable.
 976     bool isVectorLoadExtDesirable(SDValue) const override;
 977
 978     /// Return true if an FMA operation is faster than a pair of fmul and fadd
 979     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
 980     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
 981     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 982
 983     /// Return true if it's profitable to narrow
 984     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
 985     /// from i32 to i8 but not from i32 to i16.
 986     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
 987
 988     /// Given an intrinsic, checks if on the target the intrinsic will need to map
 989     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
 990     /// true and stores the intrinsic information into the IntrinsicInfo that was
 991     /// passed to the function.
 992     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
 993                             MachineFunction &MF,
 994                             unsigned Intrinsic) const override;
 995
 996     /// Returns true if the target can instruction select the
 997     /// specified FP immediate natively. If false, the legalizer will
 998     /// materialize the FP immediate as a load from a constant pool.
 999     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
1000
1001     /// Targets can use this to indicate that they only support *some*
1002     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1003     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1004     /// be legal.
1005     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1006
1007     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1008     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1009     /// constant pool entry.
1010     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1011
1012     /// Returns true if lowering to a jump table is allowed.
1013     bool areJTsAllowed(const Function *Fn) const override;
1014
1015     /// If true, then instruction selection should
1016     /// seek to shrink the FP constant of the specified type to a smaller type
1017     /// in order to save space and / or reduce runtime.
1018     bool ShouldShrinkFPConstant(EVT VT) const override {
1019       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1020       // expensive than a straight movsd. On the other hand, it's important to
1021       // shrink long double fp constant since fldt is very slow.
1022       return !X86ScalarSSEf64 || VT == MVT::f80;
1023     }
1024
1025     /// Return true if we believe it is correct and profitable to reduce the
1026     /// load node to a smaller type.
1027     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1028                                EVT NewVT) const override;
1029
1030     /// Return true if the specified scalar FP type is computed in an SSE
1031     /// register, not on the X87 floating point stack.
1032     bool isScalarFPTypeInSSEReg(EVT VT) const {
1033       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1034              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1035     }
1036
1037     /// Returns true if it is beneficial to convert a load of a constant
1038     /// to just the constant itself.
1039     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1040                                            Type *Ty) const override;
1041
1042     bool convertSelectOfConstantsToMath(EVT VT) const override;
1043
1044     bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1045
1046     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1047     /// with this index.
1048     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1049                                  unsigned Index) const override;
1050
1051     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1052                                       unsigned AddrSpace) const override {
1053       // If we can replace more than 2 scalar stores, there will be a reduction
1054       // in instructions even after we add a vector constant load.
1055       return NumElem > 2;
1056     }
1057
1058     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1059
1060     /// Intel processors have a unified instruction and data cache
1061     const char * getClearCacheBuiltinName() const override {
1062       return nullptr; // nothing to do, move along.
1063     }
1064
1065     unsigned getRegisterByName(const char* RegName, EVT VT,
1066                                SelectionDAG &DAG) const override;
1067
1068     /// If a physical register, this returns the register that receives the
1069     /// exception address on entry to an EH pad.
1070     unsigned
1071     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1072
1073     /// If a physical register, this returns the register that receives the
1074     /// exception typeid on entry to a landing pad.
1075     unsigned
1076     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1077
1078     virtual bool needsFixedCatchObjects() const override;
1079
1080     /// This method returns a target specific FastISel object,
1081     /// or null if the target does not support "fast" ISel.
1082     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1083                              const TargetLibraryInfo *libInfo) const override;
1084
1085     /// If the target has a standard location for the stack protector cookie,
1086     /// returns the address of that location. Otherwise, returns nullptr.
1087     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1088
1089     bool useLoadStackGuardNode() const override;
1090     bool useStackGuardXorFP() const override;
1091     void insertSSPDeclarations(Module &M) const override;
1092     Value *getSDagStackGuard(const Module &M) const override;
1093     Value *getSSPStackGuardCheck(const Module &M) const override;
1094     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1095                                 const SDLoc &DL) const override;
1096
1097
1098     /// Return true if the target stores SafeStack pointer at a fixed offset in
1099     /// some non-standard address space, and populates the address space and
1100     /// offset as appropriate.
1101     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1102
1103     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1104                       SelectionDAG &DAG) const;
1105
1106     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1107
1108     /// Customize the preferred legalization strategy for certain types.
1109     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1110
1111     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1112                                       EVT VT) const override;
1113
1114     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1115                                            CallingConv::ID CC,
1116                                            EVT VT) const override;
1117
1118     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1119
1120     bool supportSwiftError() const override;
1121
1122     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1123
1124     bool hasVectorBlend() const override { return true; }
1125
1126     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1127
1128     /// Lower interleaved load(s) into target specific
1129     /// instructions/intrinsics.
1130     bool lowerInterleavedLoad(LoadInst *LI,
1131                               ArrayRef<ShuffleVectorInst *> Shuffles,
1132                               ArrayRef<unsigned> Indices,
1133                               unsigned Factor) const override;
1134
1135     /// Lower interleaved store(s) into target specific
1136     /// instructions/intrinsics.
1137     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1138                                unsigned Factor) const override;
1139
1140     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1141                                    SDValue Addr, SelectionDAG &DAG)
1142                                    const override;
1143
1144   protected:
1145     std::pair<const TargetRegisterClass *, uint8_t>
1146     findRepresentativeClass(const TargetRegisterInfo *TRI,
1147                             MVT VT) const override;
1148
1149   private:
1150     /// Keep a reference to the X86Subtarget around so that we can
1151     /// make the right decision when generating code for different targets.
1152     const X86Subtarget &Subtarget;
1153
1154     /// Select between SSE or x87 floating point ops.
1155     /// When SSE is available, use it for f32 operations.
1156     /// When SSE2 is available, use it for f64 operations.
1157     bool X86ScalarSSEf32;
1158     bool X86ScalarSSEf64;
1159
1160     /// A list of legal FP immediates.
1161     std::vector<APFloat> LegalFPImmediates;
1162
1163     /// Indicate that this x86 target can instruction
1164     /// select the specified FP immediate natively.
1165     void addLegalFPImmediate(const APFloat& Imm) {
1166       LegalFPImmediates.push_back(Imm);
1167     }
1168
1169     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1170                             CallingConv::ID CallConv, bool isVarArg,
1171                             const SmallVectorImpl<ISD::InputArg> &Ins,
1172                             const SDLoc &dl, SelectionDAG &DAG,
1173                             SmallVectorImpl<SDValue> &InVals,
1174                             uint32_t *RegMask) const;
1175     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1176                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1177                              const SDLoc &dl, SelectionDAG &DAG,
1178                              const CCValAssign &VA, MachineFrameInfo &MFI,
1179                              unsigned i) const;
1180     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1181                              const SDLoc &dl, SelectionDAG &DAG,
1182                              const CCValAssign &VA,
1183                              ISD::ArgFlagsTy Flags) const;
1184
1185     // Call lowering helpers.
1186
1187     /// Check whether the call is eligible for tail call optimization. Targets
1188     /// that want to do tail call optimization should implement this function.
1189     bool IsEligibleForTailCallOptimization(SDValue Callee,
1190                                            CallingConv::ID CalleeCC,
1191                                            bool isVarArg,
1192                                            bool isCalleeStructRet,
1193                                            bool isCallerStructRet,
1194                                            Type *RetTy,
1195                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1196                                     const SmallVectorImpl<SDValue> &OutVals,
1197                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1198                                            SelectionDAG& DAG) const;
1199     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1200                                     SDValue Chain, bool IsTailCall,
1201                                     bool Is64Bit, int FPDiff,
1202                                     const SDLoc &dl) const;
1203
1204     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1205                                          SelectionDAG &DAG) const;
1206
1207     unsigned getAddressSpace(void) const;
1208
1209     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1210                                                bool isSigned,
1211                                                bool isReplace) const;
1212
1213     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1214     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1215     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1216     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1217
1218     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1219                                   const unsigned char OpFlags = 0) const;
1220     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1221     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1222     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1223                                int64_t Offset, SelectionDAG &DAG) const;
1224     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1225     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1226     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1227
1228     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1229     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1230     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1231     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1232     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1233     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1234     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1235     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1236     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1237     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1238     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1239     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1240     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1241     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1242     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1243     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1244     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1245     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1246     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1247     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1248     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1249     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1250     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1251     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1252     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1253     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1254
1255     SDValue
1256     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1257                          const SmallVectorImpl<ISD::InputArg> &Ins,
1258                          const SDLoc &dl, SelectionDAG &DAG,
1259                          SmallVectorImpl<SDValue> &InVals) const override;
1260     SDValue LowerCall(CallLoweringInfo &CLI,
1261                       SmallVectorImpl<SDValue> &InVals) const override;
1262
1263     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1264                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1265                         const SmallVectorImpl<SDValue> &OutVals,
1266                         const SDLoc &dl, SelectionDAG &DAG) const override;
1267
1268     bool supportSplitCSR(MachineFunction *MF) const override {
1269       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1270           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1271     }
1272     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1273     void insertCopiesSplitCSR(
1274       MachineBasicBlock *Entry,
1275       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1276
1277     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1278
1279     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1280
1281     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1282                             ISD::NodeType ExtendKind) const override;
1283
1284     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1285                         bool isVarArg,
1286                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1287                         LLVMContext &Context) const override;
1288
1289     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1290
1291     TargetLoweringBase::AtomicExpansionKind
1292     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1293     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1294     TargetLoweringBase::AtomicExpansionKind
1295     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1296
1297     LoadInst *
1298     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1299
1300     bool needsCmpXchgNb(Type *MemType) const;
1301
1302     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1303                                 MachineBasicBlock *DispatchBB, int FI) const;
1304
1305     // Utility function to emit the low-level va_arg code for X86-64.
1306     MachineBasicBlock *
1307     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1308                                   MachineBasicBlock *MBB) const;
1309
1310     /// Utility function to emit the xmm reg save portion of va_start.
1311     MachineBasicBlock *
1312     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1313                                              MachineBasicBlock *BB) const;
1314
1315     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1316                                                  MachineInstr &MI2,
1317                                                  MachineBasicBlock *BB) const;
1318
1319     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1320                                          MachineBasicBlock *BB) const;
1321
1322     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1323                                            MachineBasicBlock *BB) const;
1324
1325     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1326                                            MachineBasicBlock *BB) const;
1327
1328     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1329                                            MachineBasicBlock *BB) const;
1330
1331     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1332                                             MachineBasicBlock *BB) const;
1333
1334     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1335                                           MachineBasicBlock *BB) const;
1336
1337     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1338                                           MachineBasicBlock *BB) const;
1339
1340     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1341                                             MachineBasicBlock *BB) const;
1342
1343     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1344                                         MachineBasicBlock *MBB) const;
1345
1346     void emitSetJmpShadowStackFix(MachineInstr &MI,
1347                                   MachineBasicBlock *MBB) const;
1348
1349     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1350                                          MachineBasicBlock *MBB) const;
1351
1352     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1353                                                  MachineBasicBlock *MBB) const;
1354
1355     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1356                                      MachineBasicBlock *MBB) const;
1357
1358     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1359                                              MachineBasicBlock *MBB) const;
1360
1361     /// Emit nodes that will be selected as "test Op0,Op0", or something
1362     /// equivalent, for use with the given x86 condition code.
1363     SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1364                      SelectionDAG &DAG) const;
1365
1366     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1367     /// equivalent, for use with the given x86 condition code.
1368     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1369                     SelectionDAG &DAG) const;
1370
1371     /// Convert a comparison if required by the subtarget.
1372     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1373
1374     /// Check if replacement of SQRT with RSQRT should be disabled.
1375     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1376
1377     /// Use rsqrt* to speed up sqrt calculations.
1378     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1379                             int &RefinementSteps, bool &UseOneConstNR,
1380                             bool Reciprocal) const override;
1381
1382     /// Use rcp* to speed up fdiv calculations.
1383     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1384                              int &RefinementSteps) const override;
1385
1386     /// Reassociate floating point divisions into multiply by reciprocal.
1387     unsigned combineRepeatedFPDivisors() const override;
1388   };
1389
1390   namespace X86 {
1391     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1392                              const TargetLibraryInfo *libInfo);
1393   } // end namespace X86
1394
1395   // Base class for all X86 non-masked store operations.
1396   class X86StoreSDNode : public MemSDNode {
1397   public:
1398     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1399                    SDVTList VTs, EVT MemVT,
1400                    MachineMemOperand *MMO)
1401       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1402     const SDValue &getValue() const { return getOperand(1); }
1403     const SDValue &getBasePtr() const { return getOperand(2); }
1404
1405     static bool classof(const SDNode *N) {
1406       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1407         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1408     }
1409   };
1410
1411   // Base class for all X86 masked store operations.
1412   // The class has the same order of operands as MaskedStoreSDNode for
1413   // convenience.
1414   class X86MaskedStoreSDNode : public MemSDNode {
1415   public:
1416     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1417                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1418                          MachineMemOperand *MMO)
1419       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1420
1421     const SDValue &getValue()   const { return getOperand(1); }
1422     const SDValue &getBasePtr() const { return getOperand(2); }
1423     const SDValue &getMask()    const { return getOperand(3); }
1424
1425     static bool classof(const SDNode *N) {
1426       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1427         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1428     }
1429   };
1430
1431   // X86 Truncating Store with Signed saturation.
1432   class TruncSStoreSDNode : public X86StoreSDNode {
1433   public:
1434     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1435                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1436       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1437
1438     static bool classof(const SDNode *N) {
1439       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1440     }
1441   };
1442
1443   // X86 Truncating Store with Unsigned saturation.
1444   class TruncUSStoreSDNode : public X86StoreSDNode {
1445   public:
1446     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1447                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1448       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1449
1450     static bool classof(const SDNode *N) {
1451       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1452     }
1453   };
1454
1455   // X86 Truncating Masked Store with Signed saturation.
1456   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1457   public:
1458     MaskedTruncSStoreSDNode(unsigned Order,
1459                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1460                          MachineMemOperand *MMO)
1461       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1462
1463     static bool classof(const SDNode *N) {
1464       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1465     }
1466   };
1467
1468   // X86 Truncating Masked Store with Unsigned saturation.
1469   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1470   public:
1471     MaskedTruncUSStoreSDNode(unsigned Order,
1472                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1473                             MachineMemOperand *MMO)
1474       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1475
1476     static bool classof(const SDNode *N) {
1477       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1478     }
1479   };
1480
1481   // X86 specific Gather/Scatter nodes.
1482   // The class has the same order of operands as MaskedGatherScatterSDNode for
1483   // convenience.
1484   class X86MaskedGatherScatterSDNode : public MemSDNode {
1485   public:
1486     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1487                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1488                                  MachineMemOperand *MMO)
1489         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1490
1491     const SDValue &getBasePtr() const { return getOperand(3); }
1492     const SDValue &getIndex()   const { return getOperand(4); }
1493     const SDValue &getMask()    const { return getOperand(2); }
1494     const SDValue &getScale()   const { return getOperand(5); }
1495
1496     static bool classof(const SDNode *N) {
1497       return N->getOpcode() == X86ISD::MGATHER ||
1498              N->getOpcode() == X86ISD::MSCATTER;
1499     }
1500   };
1501
1502   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1503   public:
1504     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1505                           EVT MemVT, MachineMemOperand *MMO)
1506         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1507                                        MMO) {}
1508
1509     const SDValue &getPassThru() const { return getOperand(1); }
1510
1511     static bool classof(const SDNode *N) {
1512       return N->getOpcode() == X86ISD::MGATHER;
1513     }
1514   };
1515
1516   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1517   public:
1518     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1519                            EVT MemVT, MachineMemOperand *MMO)
1520         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1521                                        MMO) {}
1522
1523     const SDValue &getValue() const { return getOperand(1); }
1524
1525     static bool classof(const SDNode *N) {
1526       return N->getOpcode() == X86ISD::MSCATTER;
1527     }
1528   };
1529
1530   /// Generate unpacklo/unpackhi shuffle mask.
1531   template <typename T = int>
1532   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1533                                bool Unary) {
1534     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1535     int NumElts = VT.getVectorNumElements();
1536     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1537     for (int i = 0; i < NumElts; ++i) {
1538       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1539       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1540       Pos += (Unary ? 0 : NumElts * (i % 2));
1541       Pos += (Lo ? 0 : NumEltsInLane / 2);
1542       Mask.push_back(Pos);
1543     }
1544   }
1545
1546   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1547   /// mask index with the scaled sequential indices for an equivalent narrowed
1548   /// mask. This is the reverse process to canWidenShuffleElements, but can
1549   /// always succeed.
1550   template <typename T>
1551   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1552                         SmallVectorImpl<T> &ScaledMask) {
1553     assert(0 < Scale && "Unexpected scaling factor");
1554     int NumElts = Mask.size();
1555     ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1556
1557     for (int i = 0; i != NumElts; ++i) {
1558       int M = Mask[i];
1559
1560       // Repeat sentinel values in every mask element.
1561       if (M < 0) {
1562         for (int s = 0; s != Scale; ++s)
1563           ScaledMask[(Scale * i) + s] = M;
1564         continue;
1565       }
1566
1567       // Scale mask element and increment across each mask element.
1568       for (int s = 0; s != Scale; ++s)
1569         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1570     }
1571   }
1572 } // end namespace llvm
1573
1574 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H