lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20 #include "llvm/Target/TargetOptions.h"
  21
  22 namespace llvm {
  23   class X86Subtarget;
  24   class X86TargetMachine;
  25
  26   namespace X86ISD {
  27     // X86 Specific DAG Nodes
  28     enum NodeType : unsigned {
  29       // Start the numbering where the builtin ops leave off.
  30       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  31
  32       /// Bit scan forward.
  33       BSF,
  34       /// Bit scan reverse.
  35       BSR,
  36
  37       /// Double shift instructions. These correspond to
  38       /// X86::SHLDxx and X86::SHRDxx instructions.
  39       SHLD,
  40       SHRD,
  41
  42       /// Bitwise logical AND of floating point values. This corresponds
  43       /// to X86::ANDPS or X86::ANDPD.
  44       FAND,
  45
  46       /// Bitwise logical OR of floating point values. This corresponds
  47       /// to X86::ORPS or X86::ORPD.
  48       FOR,
  49
  50       /// Bitwise logical XOR of floating point values. This corresponds
  51       /// to X86::XORPS or X86::XORPD.
  52       FXOR,
  53
  54       ///  Bitwise logical ANDNOT of floating point values. This
  55       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  56       FANDN,
  57
  58       /// These operations represent an abstract X86 call
  59       /// instruction, which includes a bunch of information.  In particular the
  60       /// operands of these node are:
  61       ///
  62       ///     #0 - The incoming token chain
  63       ///     #1 - The callee
  64       ///     #2 - The number of arg bytes the caller pushes on the stack.
  65       ///     #3 - The number of arg bytes the callee pops off the stack.
  66       ///     #4 - The value to pass in AL/AX/EAX (optional)
  67       ///     #5 - The value to pass in DL/DX/EDX (optional)
  68       ///
  69       /// The result values of these nodes are:
  70       ///
  71       ///     #0 - The outgoing token chain
  72       ///     #1 - The first register result value (optional)
  73       ///     #2 - The second register result value (optional)
  74       ///
  75       CALL,
  76
  77       /// Same as call except it adds the NoTrack prefix.
  78       NT_CALL,
  79
  80       /// This operation implements the lowering for readcyclecounter.
  81       RDTSC_DAG,
  82
  83       /// X86 Read Time-Stamp Counter and Processor ID.
  84       RDTSCP_DAG,
  85
  86       /// X86 Read Performance Monitoring Counters.
  87       RDPMC_DAG,
  88
  89       /// X86 compare and logical compare instructions.
  90       CMP, COMI, UCOMI,
  91
  92       /// X86 bit-test instructions.
  93       BT,
  94
  95       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  96       /// operand, usually produced by a CMP instruction.
  97       SETCC,
  98
  99       /// X86 Select
 100       SELECTS,
 101
 102       // Same as SETCC except it's materialized with a sbb and the value is all
 103       // one's or all zero's.
 104       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
 105
 106       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 107       /// Operands are two FP values to compare; result is a mask of
 108       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 109       FSETCC,
 110
 111       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 112       /// with optional rounding mode.
 113       FSETCCM, FSETCCM_RND,
 114
 115       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 116       /// to select from. Operand 2 is the condition code, and operand 3 is the
 117       /// flag operand produced by a CMP or TEST instruction. It also writes a
 118       /// flag result.
 119       CMOV,
 120
 121       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 122       /// is the block to branch if condition is true, operand 2 is the
 123       /// condition code, and operand 3 is the flag operand produced by a CMP
 124       /// or TEST instruction.
 125       BRCOND,
 126
 127       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 128       /// operand 1 is the target address.
 129       NT_BRIND,
 130
 131       /// Return with a flag operand. Operand 0 is the chain operand, operand
 132       /// 1 is the number of bytes of stack to pop.
 133       RET_FLAG,
 134
 135       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 136       IRET,
 137
 138       /// Repeat fill, corresponds to X86::REP_STOSx.
 139       REP_STOS,
 140
 141       /// Repeat move, corresponds to X86::REP_MOVSx.
 142       REP_MOVS,
 143
 144       /// On Darwin, this node represents the result of the popl
 145       /// at function entry, used for PIC code.
 146       GlobalBaseReg,
 147
 148       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 149       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 150       /// MCSymbol and TargetBlockAddress.
 151       Wrapper,
 152
 153       /// Special wrapper used under X86-64 PIC mode for RIP
 154       /// relative displacements.
 155       WrapperRIP,
 156
 157       /// Copies a 64-bit value from the low word of an XMM vector
 158       /// to an MMX vector.
 159       MOVDQ2Q,
 160
 161       /// Copies a 32-bit value from the low word of a MMX
 162       /// vector to a GPR.
 163       MMX_MOVD2W,
 164
 165       /// Copies a GPR into the low 32-bit word of a MMX vector
 166       /// and zero out the high word.
 167       MMX_MOVW2D,
 168
 169       /// Extract an 8-bit value from a vector and zero extend it to
 170       /// i32, corresponds to X86::PEXTRB.
 171       PEXTRB,
 172
 173       /// Extract a 16-bit value from a vector and zero extend it to
 174       /// i32, corresponds to X86::PEXTRW.
 175       PEXTRW,
 176
 177       /// Insert any element of a 4 x float vector into any element
 178       /// of a destination 4 x floatvector.
 179       INSERTPS,
 180
 181       /// Insert the lower 8-bits of a 32-bit value to a vector,
 182       /// corresponds to X86::PINSRB.
 183       PINSRB,
 184
 185       /// Insert the lower 16-bits of a 32-bit value to a vector,
 186       /// corresponds to X86::PINSRW.
 187       PINSRW,
 188
 189       /// Shuffle 16 8-bit values within a vector.
 190       PSHUFB,
 191
 192       /// Compute Sum of Absolute Differences.
 193       PSADBW,
 194       /// Compute Double Block Packed Sum-Absolute-Differences
 195       DBPSADBW,
 196
 197       /// Bitwise Logical AND NOT of Packed FP values.
 198       ANDNP,
 199
 200       /// Blend where the selector is an immediate.
 201       BLENDI,
 202
 203       /// Dynamic (non-constant condition) vector blend where only the sign bits
 204       /// of the condition elements are used. This is used to enforce that the
 205       /// condition mask is not valid for generic VSELECT optimizations. This
 206       /// is also used to implement the intrinsics.
 207       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 208       BLENDV,
 209
 210       /// Combined add and sub on an FP vector.
 211       ADDSUB,
 212
 213       //  FP vector ops with rounding mode.
 214       FADD_RND, FADDS_RND,
 215       FSUB_RND, FSUBS_RND,
 216       FMUL_RND, FMULS_RND,
 217       FDIV_RND, FDIVS_RND,
 218       FMAX_RND, FMAXS_RND,
 219       FMIN_RND, FMINS_RND,
 220       FSQRT_RND, FSQRTS_RND,
 221
 222       // FP vector get exponent.
 223       FGETEXP_RND, FGETEXPS_RND,
 224       // Extract Normalized Mantissas.
 225       VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
 226       // FP Scale.
 227       SCALEF,
 228       SCALEFS,
 229
 230       // Unsigned Integer average.
 231       AVG,
 232
 233       /// Integer horizontal add/sub.
 234       HADD,
 235       HSUB,
 236
 237       /// Floating point horizontal add/sub.
 238       FHADD,
 239       FHSUB,
 240
 241       // Detect Conflicts Within a Vector
 242       CONFLICT,
 243
 244       /// Floating point max and min.
 245       FMAX, FMIN,
 246
 247       /// Commutative FMIN and FMAX.
 248       FMAXC, FMINC,
 249
 250       /// Scalar intrinsic floating point max and min.
 251       FMAXS, FMINS,
 252
 253       /// Floating point reciprocal-sqrt and reciprocal approximation.
 254       /// Note that these typically require refinement
 255       /// in order to obtain suitable precision.
 256       FRSQRT, FRCP,
 257
 258       // AVX-512 reciprocal approximations with a little more precision.
 259       RSQRT14, RSQRT14S, RCP14, RCP14S,
 260
 261       // Thread Local Storage.
 262       TLSADDR,
 263
 264       // Thread Local Storage. A call to get the start address
 265       // of the TLS block for the current module.
 266       TLSBASEADDR,
 267
 268       // Thread Local Storage.  When calling to an OS provided
 269       // thunk at the address from an earlier relocation.
 270       TLSCALL,
 271
 272       // Exception Handling helpers.
 273       EH_RETURN,
 274
 275       // SjLj exception handling setjmp.
 276       EH_SJLJ_SETJMP,
 277
 278       // SjLj exception handling longjmp.
 279       EH_SJLJ_LONGJMP,
 280
 281       // SjLj exception handling dispatch.
 282       EH_SJLJ_SETUP_DISPATCH,
 283
 284       /// Tail call return. See X86TargetLowering::LowerCall for
 285       /// the list of operands.
 286       TC_RETURN,
 287
 288       // Vector move to low scalar and zero higher vector elements.
 289       VZEXT_MOVL,
 290
 291       // Vector integer truncate.
 292       VTRUNC,
 293       // Vector integer truncate with unsigned/signed saturation.
 294       VTRUNCUS, VTRUNCS,
 295
 296       // Masked version of the above. Used when less than a 128-bit result is
 297       // produced since the mask only applies to the lower elements and can't
 298       // be represented by a select.
 299       // SRC, PASSTHRU, MASK
 300       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 301
 302       // Vector FP extend.
 303       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
 304
 305       // Vector FP round.
 306       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
 307
 308       // Masked version of above. Used for v2f64->v4f32.
 309       // SRC, PASSTHRU, MASK
 310       VMFPROUND,
 311
 312       // 128-bit vector logical left / right shift
 313       VSHLDQ, VSRLDQ,
 314
 315       // Vector shift elements
 316       VSHL, VSRL, VSRA,
 317
 318       // Vector variable shift
 319       VSHLV, VSRLV, VSRAV,
 320
 321       // Vector shift elements by immediate
 322       VSHLI, VSRLI, VSRAI,
 323
 324       // Shifts of mask registers.
 325       KSHIFTL, KSHIFTR,
 326
 327       // Bit rotate by immediate
 328       VROTLI, VROTRI,
 329
 330       // Vector packed double/float comparison.
 331       CMPP,
 332
 333       // Vector integer comparisons.
 334       PCMPEQ, PCMPGT,
 335
 336       // v8i16 Horizontal minimum and position.
 337       PHMINPOS,
 338
 339       MULTISHIFT,
 340
 341       /// Vector comparison generating mask bits for fp and
 342       /// integer signed and unsigned data types.
 343       CMPM,
 344       // Vector comparison with rounding mode for FP values
 345       CMPM_RND,
 346
 347       // Arithmetic operations with FLAGS results.
 348       ADD, SUB, ADC, SBB, SMUL, UMUL,
 349       OR, XOR, AND,
 350
 351       // Bit field extract.
 352       BEXTR,
 353
 354       // Zero High Bits Starting with Specified Bit Position.
 355       BZHI,
 356
 357       // X86-specific multiply by immediate.
 358       MUL_IMM,
 359
 360       // Vector sign bit extraction.
 361       MOVMSK,
 362
 363       // Vector bitwise comparisons.
 364       PTEST,
 365
 366       // Vector packed fp sign bitwise comparisons.
 367       TESTP,
 368
 369       // OR/AND test for masks.
 370       KORTEST,
 371       KTEST,
 372
 373       // ADD for masks.
 374       KADD,
 375
 376       // Several flavors of instructions with vector shuffle behaviors.
 377       // Saturated signed/unnsigned packing.
 378       PACKSS,
 379       PACKUS,
 380       // Intra-lane alignr.
 381       PALIGNR,
 382       // AVX512 inter-lane alignr.
 383       VALIGN,
 384       PSHUFD,
 385       PSHUFHW,
 386       PSHUFLW,
 387       SHUFP,
 388       // VBMI2 Concat & Shift.
 389       VSHLD,
 390       VSHRD,
 391       VSHLDV,
 392       VSHRDV,
 393       //Shuffle Packed Values at 128-bit granularity.
 394       SHUF128,
 395       MOVDDUP,
 396       MOVSHDUP,
 397       MOVSLDUP,
 398       MOVLHPS,
 399       MOVHLPS,
 400       MOVSD,
 401       MOVSS,
 402       UNPCKL,
 403       UNPCKH,
 404       VPERMILPV,
 405       VPERMILPI,
 406       VPERMI,
 407       VPERM2X128,
 408
 409       // Variable Permute (VPERM).
 410       // Res = VPERMV MaskV, V0
 411       VPERMV,
 412
 413       // 3-op Variable Permute (VPERMT2).
 414       // Res = VPERMV3 V0, MaskV, V1
 415       VPERMV3,
 416
 417       // Bitwise ternary logic.
 418       VPTERNLOG,
 419       // Fix Up Special Packed Float32/64 values.
 420       VFIXUPIMM,
 421       VFIXUPIMMS,
 422       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 423       VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
 424       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 425       VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
 426       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 427       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 428       // scaling part of the immediate.
 429       VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
 430       // Tests Types Of a FP Values for packed types.
 431       VFPCLASS,
 432       // Tests Types Of a FP Values for scalar types.
 433       VFPCLASSS,
 434
 435       // Broadcast scalar to vector.
 436       VBROADCAST,
 437       // Broadcast mask to vector.
 438       VBROADCASTM,
 439       // Broadcast subvector to vector.
 440       SUBV_BROADCAST,
 441
 442       /// SSE4A Extraction and Insertion.
 443       EXTRQI, INSERTQI,
 444
 445       // XOP arithmetic/logical shifts.
 446       VPSHA, VPSHL,
 447       // XOP signed/unsigned integer comparisons.
 448       VPCOM, VPCOMU,
 449       // XOP packed permute bytes.
 450       VPPERM,
 451       // XOP two source permutation.
 452       VPERMIL2,
 453
 454       // Vector multiply packed unsigned doubleword integers.
 455       PMULUDQ,
 456       // Vector multiply packed signed doubleword integers.
 457       PMULDQ,
 458       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 459       MULHRS,
 460
 461       // Multiply and Add Packed Integers.
 462       VPMADDUBSW, VPMADDWD,
 463
 464       // AVX512IFMA multiply and add.
 465       // NOTE: These are different than the instruction and perform
 466       // op0 x op1 + op2.
 467       VPMADD52L, VPMADD52H,
 468
 469       // VNNI
 470       VPDPBUSD,
 471       VPDPBUSDS,
 472       VPDPWSSD,
 473       VPDPWSSDS,
 474
 475       // FMA nodes.
 476       // We use the target independent ISD::FMA for the non-inverted case.
 477       FNMADD,
 478       FMSUB,
 479       FNMSUB,
 480       FMADDSUB,
 481       FMSUBADD,
 482
 483       // FMA with rounding mode.
 484       FMADD_RND,
 485       FNMADD_RND,
 486       FMSUB_RND,
 487       FNMSUB_RND,
 488       FMADDSUB_RND,
 489       FMSUBADD_RND,
 490
 491       // Compress and expand.
 492       COMPRESS,
 493       EXPAND,
 494
 495       // Bits shuffle
 496       VPSHUFBITQMB,
 497
 498       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 499       SINT_TO_FP_RND, UINT_TO_FP_RND,
 500       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 501
 502       // Vector float/double to signed/unsigned integer.
 503       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 504       // Scalar float/double to signed/unsigned integer.
 505       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 506
 507       // Vector float/double to signed/unsigned integer with truncation.
 508       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
 509       // Scalar float/double to signed/unsigned integer with truncation.
 510       CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
 511
 512       // Vector signed/unsigned integer to float/double.
 513       CVTSI2P, CVTUI2P,
 514
 515       // Masked versions of above. Used for v2f64->v4f32.
 516       // SRC, PASSTHRU, MASK
 517       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 518       MCVTSI2P, MCVTUI2P,
 519
 520       // Save xmm argument registers to the stack, according to %al. An operator
 521       // is needed so that this can be expanded with control flow.
 522       VASTART_SAVE_XMM_REGS,
 523
 524       // Windows's _chkstk call to do stack probing.
 525       WIN_ALLOCA,
 526
 527       // For allocating variable amounts of stack space when using
 528       // segmented stacks. Check if the current stacklet has enough space, and
 529       // falls back to heap allocation if not.
 530       SEG_ALLOCA,
 531
 532       // Memory barriers.
 533       MEMBARRIER,
 534       MFENCE,
 535
 536       // Store FP status word into i16 register.
 537       FNSTSW16r,
 538
 539       // Store contents of %ah into %eflags.
 540       SAHF,
 541
 542       // Get a random integer and indicate whether it is valid in CF.
 543       RDRAND,
 544
 545       // Get a NIST SP800-90B & C compliant random integer and
 546       // indicate whether it is valid in CF.
 547       RDSEED,
 548
 549       // SSE42 string comparisons.
 550       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 551       // will emit one or two instructions based on which results are used. If
 552       // flags and index/mask this allows us to use a single instruction since
 553       // we won't have to pick and opcode for flags. Instead we can rely on the
 554       // DAG to CSE everything and decide at isel.
 555       PCMPISTR,
 556       PCMPESTR,
 557
 558       // Test if in transactional execution.
 559       XTEST,
 560
 561       // ERI instructions.
 562       RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
 563
 564       // Conversions between float and half-float.
 565       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
 566
 567       // Masked version of above.
 568       // SRC, RND, PASSTHRU, MASK
 569       MCVTPS2PH,
 570
 571       // Galois Field Arithmetic Instructions
 572       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 573
 574       // LWP insert record.
 575       LWPINS,
 576
 577       // User level wait
 578       UMWAIT, TPAUSE,
 579
 580       // Compare and swap.
 581       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 582       LCMPXCHG8_DAG,
 583       LCMPXCHG16_DAG,
 584       LCMPXCHG8_SAVE_EBX_DAG,
 585       LCMPXCHG16_SAVE_RBX_DAG,
 586
 587       /// LOCK-prefixed arithmetic read-modify-write instructions.
 588       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 589       LADD, LSUB, LOR, LXOR, LAND,
 590
 591       // Load, scalar_to_vector, and zero extend.
 592       VZEXT_LOAD,
 593
 594       // Store FP control world into i16 memory.
 595       FNSTCW16m,
 596
 597       /// This instruction implements FP_TO_SINT with the
 598       /// integer destination in memory and a FP reg source.  This corresponds
 599       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 600       /// has two inputs (token chain and address) and two outputs (int value
 601       /// and token chain). Memory VT specifies the type to store to.
 602       FP_TO_INT_IN_MEM,
 603
 604       /// This instruction implements SINT_TO_FP with the
 605       /// integer source in memory and FP reg result.  This corresponds to the
 606       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 607       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 608       /// flag). The integer source type is specified by the memory VT.
 609       FILD,
 610       FILD_FLAG,
 611
 612       /// This instruction implements an extending load to FP stack slots.
 613       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 614       /// operand, and ptr to load from. The memory VT specifies the type to
 615       /// load from.
 616       FLD,
 617
 618       /// This instruction implements a truncating store to FP stack
 619       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 620       /// chain operand, value to store, and address. The memory VT specifies
 621       /// the type to store as.
 622       FST,
 623
 624       /// This instruction grabs the address of the next argument
 625       /// from a va_list. (reads and modifies the va_list in memory)
 626       VAARG_64,
 627
 628       // Vector truncating store with unsigned/signed saturation
 629       VTRUNCSTOREUS, VTRUNCSTORES,
 630       // Vector truncating masked store with unsigned/signed saturation
 631       VMTRUNCSTOREUS, VMTRUNCSTORES,
 632
 633       // X86 specific gather and scatter
 634       MGATHER, MSCATTER,
 635
 636       // WARNING: Do not add anything in the end unless you want the node to
 637       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 638       // opcodes will be thought as target memory ops!
 639     };
 640   } // end namespace X86ISD
 641
 642   /// Define some predicates that are used for node matching.
 643   namespace X86 {
 644     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 645     bool isZeroNode(SDValue Elt);
 646
 647     /// Returns true of the given offset can be
 648     /// fit into displacement field of the instruction.
 649     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 650                                       bool hasSymbolicDisplacement = true);
 651
 652     /// Determines whether the callee is required to pop its
 653     /// own arguments. Callee pop is necessary to support tail calls.
 654     bool isCalleePop(CallingConv::ID CallingConv,
 655                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 656
 657   } // end namespace X86
 658
 659   //===--------------------------------------------------------------------===//
 660   //  X86 Implementation of the TargetLowering interface
 661   class X86TargetLowering final : public TargetLowering {
 662   public:
 663     explicit X86TargetLowering(const X86TargetMachine &TM,
 664                                const X86Subtarget &STI);
 665
 666     unsigned getJumpTableEncoding() const override;
 667     bool useSoftFloat() const override;
 668
 669     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 670                                ArgListTy &Args) const override;
 671
 672     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 673       return MVT::i8;
 674     }
 675
 676     const MCExpr *
 677     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 678                               const MachineBasicBlock *MBB, unsigned uid,
 679                               MCContext &Ctx) const override;
 680
 681     /// Returns relocation base for the given PIC jumptable.
 682     SDValue getPICJumpTableRelocBase(SDValue Table,
 683                                      SelectionDAG &DAG) const override;
 684     const MCExpr *
 685     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 686                                  unsigned JTI, MCContext &Ctx) const override;
 687
 688     /// Return the desired alignment for ByVal aggregate
 689     /// function arguments in the caller parameter area. For X86, aggregates
 690     /// that contains are placed at 16-byte boundaries while the rest are at
 691     /// 4-byte boundaries.
 692     unsigned getByValTypeAlignment(Type *Ty,
 693                                    const DataLayout &DL) const override;
 694
 695     /// Returns the target specific optimal type for load
 696     /// and store operations as a result of memset, memcpy, and memmove
 697     /// lowering. If DstAlign is zero that means it's safe to destination
 698     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 699     /// means there isn't a need to check it against alignment requirement,
 700     /// probably because the source does not need to be loaded. If 'IsMemset' is
 701     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 702     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 703     /// source is constant so it does not need to be loaded.
 704     /// It returns EVT::Other if the type should be determined using generic
 705     /// target-independent logic.
 706     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 707                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 708                             MachineFunction &MF) const override;
 709
 710     /// Returns true if it's safe to use load / store of the
 711     /// specified type to expand memcpy / memset inline. This is mostly true
 712     /// for all types except for some special cases. For example, on X86
 713     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 714     /// also does type conversion. Note the specified type doesn't have to be
 715     /// legal as the hook is used before type legalization.
 716     bool isSafeMemOpType(MVT VT) const override;
 717
 718     /// Returns true if the target allows unaligned memory accesses of the
 719     /// specified type. Returns whether it is "fast" in the last argument.
 720     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 721                                        bool *Fast) const override;
 722
 723     /// Provide custom lowering hooks for some operations.
 724     ///
 725     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 726
 727     /// Places new result values for the node in Results (their number
 728     /// and types must exactly match those of the original return values of
 729     /// the node), or leaves Results empty, which indicates that the node is not
 730     /// to be custom lowered after all.
 731     void LowerOperationWrapper(SDNode *N,
 732                                SmallVectorImpl<SDValue> &Results,
 733                                SelectionDAG &DAG) const override;
 734
 735     /// Replace the results of node with an illegal result
 736     /// type with new values built out of custom code.
 737     ///
 738     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 739                             SelectionDAG &DAG) const override;
 740
 741     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 742
 743     // Return true if it is profitable to combine a BUILD_VECTOR with a
 744     // stride-pattern to a shuffle and a truncate.
 745     // Example of such a combine:
 746     // v4i32 build_vector((extract_elt V, 1),
 747     //                    (extract_elt V, 3),
 748     //                    (extract_elt V, 5),
 749     //                    (extract_elt V, 7))
 750     //  -->
 751     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 752     // v4i64)
 753     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 754         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 755
 756     /// Return true if the target has native support for
 757     /// the specified value type and it is 'desirable' to use the type for the
 758     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 759     /// instruction encodings are longer and some i16 instructions are slow.
 760     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 761
 762     /// Return true if the target has native support for the
 763     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 764     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 765     /// and some i16 instructions are slow.
 766     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 767
 768     MachineBasicBlock *
 769     EmitInstrWithCustomInserter(MachineInstr &MI,
 770                                 MachineBasicBlock *MBB) const override;
 771
 772     /// This method returns the name of a target specific DAG node.
 773     const char *getTargetNodeName(unsigned Opcode) const override;
 774
 775     bool mergeStoresAfterLegalization() const override { return true; }
 776
 777     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 778                           const SelectionDAG &DAG) const override;
 779
 780     bool isCheapToSpeculateCttz() const override;
 781
 782     bool isCheapToSpeculateCtlz() const override;
 783
 784     bool isCtlzFast() const override;
 785
 786     bool hasBitPreservingFPLogic(EVT VT) const override {
 787       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 788     }
 789
 790     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 791       // If the pair to store is a mixture of float and int values, we will
 792       // save two bitwise instructions and one float-to-int instruction and
 793       // increase one store instruction. There is potentially a more
 794       // significant benefit because it avoids the float->int domain switch
 795       // for input value. So It is more likely a win.
 796       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 797           (LTy.isInteger() && HTy.isFloatingPoint()))
 798         return true;
 799       // If the pair only contains int values, we will save two bitwise
 800       // instructions and increase one store instruction (costing one more
 801       // store buffer). Since the benefit is more blurred so we leave
 802       // such pair out until we get testcase to prove it is a win.
 803       return false;
 804     }
 805
 806     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 807
 808     bool hasAndNotCompare(SDValue Y) const override;
 809
 810     bool hasAndNot(SDValue Y) const override;
 811
 812     bool preferShiftsToClearExtremeBits(SDValue Y) const override;
 813
 814     bool
 815     shouldTransformSignedTruncationCheck(EVT XVT,
 816                                          unsigned KeptBits) const override {
 817       // For vectors, we don't have a preference..
 818       if (XVT.isVector())
 819         return false;
 820
 821       auto VTIsOk = [](EVT VT) -> bool {
 822         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 823                VT == MVT::i64;
 824       };
 825
 826       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 827       // XVT will be larger than KeptBitsVT.
 828       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 829       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 830     }
 831
 832     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
 833       if (DAG.getMachineFunction().getFunction().optForMinSize())
 834         return false;
 835       return true;
 836     }
 837
 838     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 839
 840     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 841       return VT.isScalarInteger();
 842     }
 843
 844     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 845     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 846
 847     /// Allow multiple load pairs per block for smaller and faster code.
 848     unsigned getMemcmpEqZeroLoadsPerBlock() const override {
 849       return 2;
 850     }
 851
 852     /// Return the value type to use for ISD::SETCC.
 853     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 854                            EVT VT) const override;
 855
 856     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 857                                       TargetLoweringOpt &TLO) const override;
 858
 859     /// Determine which of the bits specified in Mask are known to be either
 860     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 861     void computeKnownBitsForTargetNode(const SDValue Op,
 862                                        KnownBits &Known,
 863                                        const APInt &DemandedElts,
 864                                        const SelectionDAG &DAG,
 865                                        unsigned Depth = 0) const override;
 866
 867     /// Determine the number of bits in the operation that are sign bits.
 868     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 869                                              const APInt &DemandedElts,
 870                                              const SelectionDAG &DAG,
 871                                              unsigned Depth) const override;
 872
 873     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 874                                                  const APInt &DemandedElts,
 875                                                  APInt &KnownUndef,
 876                                                  APInt &KnownZero,
 877                                                  TargetLoweringOpt &TLO,
 878                                                  unsigned Depth) const override;
 879
 880     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 881                                            const APInt &DemandedBits,
 882                                            const APInt &DemandedElts,
 883                                            KnownBits &Known,
 884                                            TargetLoweringOpt &TLO,
 885                                            unsigned Depth) const override;
 886
 887     SDValue unwrapAddress(SDValue N) const override;
 888
 889     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 890
 891     bool ExpandInlineAsm(CallInst *CI) const override;
 892
 893     ConstraintType getConstraintType(StringRef Constraint) const override;
 894
 895     /// Examine constraint string and operand type and determine a weight value.
 896     /// The operand object must already have been set up with the operand type.
 897     ConstraintWeight
 898       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 899                                      const char *constraint) const override;
 900
 901     const char *LowerXConstraint(EVT ConstraintVT) const override;
 902
 903     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 904     /// add anything to Ops. If hasMemory is true it means one of the asm
 905     /// constraint of the inline asm instruction being processed is 'm'.
 906     void LowerAsmOperandForConstraint(SDValue Op,
 907                                       std::string &Constraint,
 908                                       std::vector<SDValue> &Ops,
 909                                       SelectionDAG &DAG) const override;
 910
 911     unsigned
 912     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 913       if (ConstraintCode == "i")
 914         return InlineAsm::Constraint_i;
 915       else if (ConstraintCode == "o")
 916         return InlineAsm::Constraint_o;
 917       else if (ConstraintCode == "v")
 918         return InlineAsm::Constraint_v;
 919       else if (ConstraintCode == "X")
 920         return InlineAsm::Constraint_X;
 921       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 922     }
 923
 924     /// Handle Lowering flag assembly outputs.
 925     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
 926                                         const AsmOperandInfo &Constraint,
 927                                         SelectionDAG &DAG) const override;
 928
 929     /// Given a physical register constraint
 930     /// (e.g. {edx}), return the register number and the register class for the
 931     /// register.  This should only be used for C_Register constraints.  On
 932     /// error, this returns a register number of 0.
 933     std::pair<unsigned, const TargetRegisterClass *>
 934     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 935                                  StringRef Constraint, MVT VT) const override;
 936
 937     /// Return true if the addressing mode represented
 938     /// by AM is legal for this target, for a load/store of the specified type.
 939     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 940                                Type *Ty, unsigned AS,
 941                                Instruction *I = nullptr) const override;
 942
 943     /// Return true if the specified immediate is legal
 944     /// icmp immediate, that is the target has icmp instructions which can
 945     /// compare a register against the immediate without having to materialize
 946     /// the immediate into a register.
 947     bool isLegalICmpImmediate(int64_t Imm) const override;
 948
 949     /// Return true if the specified immediate is legal
 950     /// add immediate, that is the target has add instructions which can
 951     /// add a register and the immediate without having to materialize
 952     /// the immediate into a register.
 953     bool isLegalAddImmediate(int64_t Imm) const override;
 954
 955     bool isLegalStoreImmediate(int64_t Imm) const override;
 956
 957     /// Return the cost of the scaling factor used in the addressing
 958     /// mode represented by AM for this target, for a load/store
 959     /// of the specified type.
 960     /// If the AM is supported, the return value must be >= 0.
 961     /// If the AM is not supported, it returns a negative value.
 962     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 963                              unsigned AS) const override;
 964
 965     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 966
 967     /// Return true if it's free to truncate a value of
 968     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
 969     /// register EAX to i16 by referencing its sub-register AX.
 970     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
 971     bool isTruncateFree(EVT VT1, EVT VT2) const override;
 972
 973     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 974
 975     /// Return true if any actual instruction that defines a
 976     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
 977     /// register. This does not necessarily include registers defined in
 978     /// unknown ways, such as incoming arguments, or copies from unknown
 979     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
 980     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
 981     /// all instructions that define 32-bit values implicit zero-extend the
 982     /// result out to 64 bits.
 983     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
 984     bool isZExtFree(EVT VT1, EVT VT2) const override;
 985     bool isZExtFree(SDValue Val, EVT VT2) const override;
 986
 987     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
 988     /// extend node) is profitable.
 989     bool isVectorLoadExtDesirable(SDValue) const override;
 990
 991     /// Return true if an FMA operation is faster than a pair of fmul and fadd
 992     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
 993     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
 994     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 995
 996     /// Return true if it's profitable to narrow
 997     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
 998     /// from i32 to i8 but not from i32 to i16.
 999     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1000
1001     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1002     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1003     /// true and stores the intrinsic information into the IntrinsicInfo that was
1004     /// passed to the function.
1005     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1006                             MachineFunction &MF,
1007                             unsigned Intrinsic) const override;
1008
1009     /// Returns true if the target can instruction select the
1010     /// specified FP immediate natively. If false, the legalizer will
1011     /// materialize the FP immediate as a load from a constant pool.
1012     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
1013
1014     /// Targets can use this to indicate that they only support *some*
1015     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1016     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1017     /// be legal.
1018     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1019
1020     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1021     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1022     /// constant pool entry.
1023     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1024
1025     /// Returns true if lowering to a jump table is allowed.
1026     bool areJTsAllowed(const Function *Fn) const override;
1027
1028     /// If true, then instruction selection should
1029     /// seek to shrink the FP constant of the specified type to a smaller type
1030     /// in order to save space and / or reduce runtime.
1031     bool ShouldShrinkFPConstant(EVT VT) const override {
1032       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1033       // expensive than a straight movsd. On the other hand, it's important to
1034       // shrink long double fp constant since fldt is very slow.
1035       return !X86ScalarSSEf64 || VT == MVT::f80;
1036     }
1037
1038     /// Return true if we believe it is correct and profitable to reduce the
1039     /// load node to a smaller type.
1040     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1041                                EVT NewVT) const override;
1042
1043     /// Return true if the specified scalar FP type is computed in an SSE
1044     /// register, not on the X87 floating point stack.
1045     bool isScalarFPTypeInSSEReg(EVT VT) const {
1046       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1047              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1048     }
1049
1050     /// Returns true if it is beneficial to convert a load of a constant
1051     /// to just the constant itself.
1052     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1053                                            Type *Ty) const override;
1054
1055     bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1056
1057     bool convertSelectOfConstantsToMath(EVT VT) const override;
1058
1059     bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1060
1061     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1062                                   bool IsSigned) const override;
1063
1064     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1065     /// with this index.
1066     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1067                                  unsigned Index) const override;
1068
1069     /// Scalar ops always have equal or better analysis/performance/power than
1070     /// the vector equivalent, so this always makes sense if the scalar op is
1071     /// supported.
1072     bool shouldScalarizeBinop(SDValue) const override;
1073
1074     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1075                                       unsigned AddrSpace) const override {
1076       // If we can replace more than 2 scalar stores, there will be a reduction
1077       // in instructions even after we add a vector constant load.
1078       return NumElem > 2;
1079     }
1080
1081     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1082
1083     /// Intel processors have a unified instruction and data cache
1084     const char * getClearCacheBuiltinName() const override {
1085       return nullptr; // nothing to do, move along.
1086     }
1087
1088     unsigned getRegisterByName(const char* RegName, EVT VT,
1089                                SelectionDAG &DAG) const override;
1090
1091     /// If a physical register, this returns the register that receives the
1092     /// exception address on entry to an EH pad.
1093     unsigned
1094     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1095
1096     /// If a physical register, this returns the register that receives the
1097     /// exception typeid on entry to a landing pad.
1098     unsigned
1099     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1100
1101     virtual bool needsFixedCatchObjects() const override;
1102
1103     /// This method returns a target specific FastISel object,
1104     /// or null if the target does not support "fast" ISel.
1105     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1106                              const TargetLibraryInfo *libInfo) const override;
1107
1108     /// If the target has a standard location for the stack protector cookie,
1109     /// returns the address of that location. Otherwise, returns nullptr.
1110     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1111
1112     bool useLoadStackGuardNode() const override;
1113     bool useStackGuardXorFP() const override;
1114     void insertSSPDeclarations(Module &M) const override;
1115     Value *getSDagStackGuard(const Module &M) const override;
1116     Function *getSSPStackGuardCheck(const Module &M) const override;
1117     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1118                                 const SDLoc &DL) const override;
1119
1120
1121     /// Return true if the target stores SafeStack pointer at a fixed offset in
1122     /// some non-standard address space, and populates the address space and
1123     /// offset as appropriate.
1124     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1125
1126     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1127                       SelectionDAG &DAG) const;
1128
1129     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1130
1131     /// Customize the preferred legalization strategy for certain types.
1132     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1133
1134     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1135                                       EVT VT) const override;
1136
1137     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1138                                            CallingConv::ID CC,
1139                                            EVT VT) const override;
1140
1141     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1142
1143     bool supportSwiftError() const override;
1144
1145     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1146
1147     bool hasVectorBlend() const override { return true; }
1148
1149     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1150
1151     /// Lower interleaved load(s) into target specific
1152     /// instructions/intrinsics.
1153     bool lowerInterleavedLoad(LoadInst *LI,
1154                               ArrayRef<ShuffleVectorInst *> Shuffles,
1155                               ArrayRef<unsigned> Indices,
1156                               unsigned Factor) const override;
1157
1158     /// Lower interleaved store(s) into target specific
1159     /// instructions/intrinsics.
1160     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1161                                unsigned Factor) const override;
1162
1163     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1164                                    SDValue Addr, SelectionDAG &DAG)
1165                                    const override;
1166
1167   protected:
1168     std::pair<const TargetRegisterClass *, uint8_t>
1169     findRepresentativeClass(const TargetRegisterInfo *TRI,
1170                             MVT VT) const override;
1171
1172   private:
1173     /// Keep a reference to the X86Subtarget around so that we can
1174     /// make the right decision when generating code for different targets.
1175     const X86Subtarget &Subtarget;
1176
1177     /// Select between SSE or x87 floating point ops.
1178     /// When SSE is available, use it for f32 operations.
1179     /// When SSE2 is available, use it for f64 operations.
1180     bool X86ScalarSSEf32;
1181     bool X86ScalarSSEf64;
1182
1183     /// A list of legal FP immediates.
1184     std::vector<APFloat> LegalFPImmediates;
1185
1186     /// Indicate that this x86 target can instruction
1187     /// select the specified FP immediate natively.
1188     void addLegalFPImmediate(const APFloat& Imm) {
1189       LegalFPImmediates.push_back(Imm);
1190     }
1191
1192     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1193                             CallingConv::ID CallConv, bool isVarArg,
1194                             const SmallVectorImpl<ISD::InputArg> &Ins,
1195                             const SDLoc &dl, SelectionDAG &DAG,
1196                             SmallVectorImpl<SDValue> &InVals,
1197                             uint32_t *RegMask) const;
1198     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1199                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1200                              const SDLoc &dl, SelectionDAG &DAG,
1201                              const CCValAssign &VA, MachineFrameInfo &MFI,
1202                              unsigned i) const;
1203     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1204                              const SDLoc &dl, SelectionDAG &DAG,
1205                              const CCValAssign &VA,
1206                              ISD::ArgFlagsTy Flags) const;
1207
1208     // Call lowering helpers.
1209
1210     /// Check whether the call is eligible for tail call optimization. Targets
1211     /// that want to do tail call optimization should implement this function.
1212     bool IsEligibleForTailCallOptimization(SDValue Callee,
1213                                            CallingConv::ID CalleeCC,
1214                                            bool isVarArg,
1215                                            bool isCalleeStructRet,
1216                                            bool isCallerStructRet,
1217                                            Type *RetTy,
1218                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1219                                     const SmallVectorImpl<SDValue> &OutVals,
1220                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1221                                            SelectionDAG& DAG) const;
1222     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1223                                     SDValue Chain, bool IsTailCall,
1224                                     bool Is64Bit, int FPDiff,
1225                                     const SDLoc &dl) const;
1226
1227     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1228                                          SelectionDAG &DAG) const;
1229
1230     unsigned getAddressSpace(void) const;
1231
1232     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1233
1234     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1235     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1236     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1237     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1238
1239     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1240                                   const unsigned char OpFlags = 0) const;
1241     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1242     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1243     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1244                                int64_t Offset, SelectionDAG &DAG) const;
1245     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1246     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1247     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1248
1249     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1250     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1251     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1252     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1253     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1254     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1255     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1256     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1257     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1258     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1259     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1260     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1261     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1262     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1263     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1264     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1265     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1266     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1267     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1268     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1269     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1270     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1271     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1272     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1273     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1274     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1275
1276     SDValue
1277     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1278                          const SmallVectorImpl<ISD::InputArg> &Ins,
1279                          const SDLoc &dl, SelectionDAG &DAG,
1280                          SmallVectorImpl<SDValue> &InVals) const override;
1281     SDValue LowerCall(CallLoweringInfo &CLI,
1282                       SmallVectorImpl<SDValue> &InVals) const override;
1283
1284     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1285                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1286                         const SmallVectorImpl<SDValue> &OutVals,
1287                         const SDLoc &dl, SelectionDAG &DAG) const override;
1288
1289     bool supportSplitCSR(MachineFunction *MF) const override {
1290       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1291           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1292     }
1293     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1294     void insertCopiesSplitCSR(
1295       MachineBasicBlock *Entry,
1296       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1297
1298     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1299
1300     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1301
1302     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1303                             ISD::NodeType ExtendKind) const override;
1304
1305     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1306                         bool isVarArg,
1307                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1308                         LLVMContext &Context) const override;
1309
1310     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1311
1312     TargetLoweringBase::AtomicExpansionKind
1313     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1314     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1315     TargetLoweringBase::AtomicExpansionKind
1316     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1317
1318     LoadInst *
1319     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1320
1321     bool needsCmpXchgNb(Type *MemType) const;
1322
1323     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1324                                 MachineBasicBlock *DispatchBB, int FI) const;
1325
1326     // Utility function to emit the low-level va_arg code for X86-64.
1327     MachineBasicBlock *
1328     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1329                                   MachineBasicBlock *MBB) const;
1330
1331     /// Utility function to emit the xmm reg save portion of va_start.
1332     MachineBasicBlock *
1333     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1334                                              MachineBasicBlock *BB) const;
1335
1336     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1337                                                  MachineInstr &MI2,
1338                                                  MachineBasicBlock *BB) const;
1339
1340     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1341                                          MachineBasicBlock *BB) const;
1342
1343     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1344                                            MachineBasicBlock *BB) const;
1345
1346     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1347                                            MachineBasicBlock *BB) const;
1348
1349     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1350                                            MachineBasicBlock *BB) const;
1351
1352     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1353                                             MachineBasicBlock *BB) const;
1354
1355     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1356                                           MachineBasicBlock *BB) const;
1357
1358     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1359                                           MachineBasicBlock *BB) const;
1360
1361     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1362                                             MachineBasicBlock *BB) const;
1363
1364     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1365                                         MachineBasicBlock *MBB) const;
1366
1367     void emitSetJmpShadowStackFix(MachineInstr &MI,
1368                                   MachineBasicBlock *MBB) const;
1369
1370     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1371                                          MachineBasicBlock *MBB) const;
1372
1373     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1374                                                  MachineBasicBlock *MBB) const;
1375
1376     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1377                                      MachineBasicBlock *MBB) const;
1378
1379     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1380                                              MachineBasicBlock *MBB) const;
1381
1382     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1383     /// equivalent, for use with the given x86 condition code.
1384     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1385                     SelectionDAG &DAG) const;
1386
1387     /// Convert a comparison if required by the subtarget.
1388     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1389
1390     /// Emit flags for the given setcc condition and operands. Also returns the
1391     /// corresponding X86 condition code constant in X86CC.
1392     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1393                               ISD::CondCode CC, const SDLoc &dl,
1394                               SelectionDAG &DAG,
1395                               SDValue &X86CC) const;
1396
1397     /// Check if replacement of SQRT with RSQRT should be disabled.
1398     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1399
1400     /// Use rsqrt* to speed up sqrt calculations.
1401     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1402                             int &RefinementSteps, bool &UseOneConstNR,
1403                             bool Reciprocal) const override;
1404
1405     /// Use rcp* to speed up fdiv calculations.
1406     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1407                              int &RefinementSteps) const override;
1408
1409     /// Reassociate floating point divisions into multiply by reciprocal.
1410     unsigned combineRepeatedFPDivisors() const override;
1411   };
1412
1413   namespace X86 {
1414     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1415                              const TargetLibraryInfo *libInfo);
1416   } // end namespace X86
1417
1418   // Base class for all X86 non-masked store operations.
1419   class X86StoreSDNode : public MemSDNode {
1420   public:
1421     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1422                    SDVTList VTs, EVT MemVT,
1423                    MachineMemOperand *MMO)
1424       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1425     const SDValue &getValue() const { return getOperand(1); }
1426     const SDValue &getBasePtr() const { return getOperand(2); }
1427
1428     static bool classof(const SDNode *N) {
1429       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1430         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1431     }
1432   };
1433
1434   // Base class for all X86 masked store operations.
1435   // The class has the same order of operands as MaskedStoreSDNode for
1436   // convenience.
1437   class X86MaskedStoreSDNode : public MemSDNode {
1438   public:
1439     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1440                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1441                          MachineMemOperand *MMO)
1442       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1443
1444     const SDValue &getValue()   const { return getOperand(1); }
1445     const SDValue &getBasePtr() const { return getOperand(2); }
1446     const SDValue &getMask()    const { return getOperand(3); }
1447
1448     static bool classof(const SDNode *N) {
1449       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1450         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1451     }
1452   };
1453
1454   // X86 Truncating Store with Signed saturation.
1455   class TruncSStoreSDNode : public X86StoreSDNode {
1456   public:
1457     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1458                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1459       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1460
1461     static bool classof(const SDNode *N) {
1462       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1463     }
1464   };
1465
1466   // X86 Truncating Store with Unsigned saturation.
1467   class TruncUSStoreSDNode : public X86StoreSDNode {
1468   public:
1469     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1470                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1471       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1472
1473     static bool classof(const SDNode *N) {
1474       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1475     }
1476   };
1477
1478   // X86 Truncating Masked Store with Signed saturation.
1479   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1480   public:
1481     MaskedTruncSStoreSDNode(unsigned Order,
1482                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1483                          MachineMemOperand *MMO)
1484       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1485
1486     static bool classof(const SDNode *N) {
1487       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1488     }
1489   };
1490
1491   // X86 Truncating Masked Store with Unsigned saturation.
1492   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1493   public:
1494     MaskedTruncUSStoreSDNode(unsigned Order,
1495                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1496                             MachineMemOperand *MMO)
1497       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1498
1499     static bool classof(const SDNode *N) {
1500       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1501     }
1502   };
1503
1504   // X86 specific Gather/Scatter nodes.
1505   // The class has the same order of operands as MaskedGatherScatterSDNode for
1506   // convenience.
1507   class X86MaskedGatherScatterSDNode : public MemSDNode {
1508   public:
1509     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1510                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1511                                  MachineMemOperand *MMO)
1512         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1513
1514     const SDValue &getBasePtr() const { return getOperand(3); }
1515     const SDValue &getIndex()   const { return getOperand(4); }
1516     const SDValue &getMask()    const { return getOperand(2); }
1517     const SDValue &getScale()   const { return getOperand(5); }
1518
1519     static bool classof(const SDNode *N) {
1520       return N->getOpcode() == X86ISD::MGATHER ||
1521              N->getOpcode() == X86ISD::MSCATTER;
1522     }
1523   };
1524
1525   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1526   public:
1527     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1528                           EVT MemVT, MachineMemOperand *MMO)
1529         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1530                                        MMO) {}
1531
1532     const SDValue &getPassThru() const { return getOperand(1); }
1533
1534     static bool classof(const SDNode *N) {
1535       return N->getOpcode() == X86ISD::MGATHER;
1536     }
1537   };
1538
1539   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1540   public:
1541     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1542                            EVT MemVT, MachineMemOperand *MMO)
1543         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1544                                        MMO) {}
1545
1546     const SDValue &getValue() const { return getOperand(1); }
1547
1548     static bool classof(const SDNode *N) {
1549       return N->getOpcode() == X86ISD::MSCATTER;
1550     }
1551   };
1552
1553   /// Generate unpacklo/unpackhi shuffle mask.
1554   template <typename T = int>
1555   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1556                                bool Unary) {
1557     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1558     int NumElts = VT.getVectorNumElements();
1559     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1560     for (int i = 0; i < NumElts; ++i) {
1561       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1562       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1563       Pos += (Unary ? 0 : NumElts * (i % 2));
1564       Pos += (Lo ? 0 : NumEltsInLane / 2);
1565       Mask.push_back(Pos);
1566     }
1567   }
1568
1569   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1570   /// mask index with the scaled sequential indices for an equivalent narrowed
1571   /// mask. This is the reverse process to canWidenShuffleElements, but can
1572   /// always succeed.
1573   template <typename T>
1574   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1575                         SmallVectorImpl<T> &ScaledMask) {
1576     assert(0 < Scale && "Unexpected scaling factor");
1577     int NumElts = Mask.size();
1578     ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1579
1580     for (int i = 0; i != NumElts; ++i) {
1581       int M = Mask[i];
1582
1583       // Repeat sentinel values in every mask element.
1584       if (M < 0) {
1585         for (int s = 0; s != Scale; ++s)
1586           ScaledMask[(Scale * i) + s] = M;
1587         continue;
1588       }
1589
1590       // Scale mask element and increment across each mask element.
1591       for (int s = 0; s != Scale; ++s)
1592         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1593     }
1594   }
1595 } // end namespace llvm
1596
1597 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H