llvm/lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/MachineFunction.h"
  18 #include "llvm/CodeGen/TargetLowering.h"
  19
  20 namespace llvm {
  21   class X86Subtarget;
  22   class X86TargetMachine;
  23
  24   namespace X86ISD {
  25     // X86 Specific DAG Nodes
  26   enum NodeType : unsigned {
  27     // Start the numbering where the builtin ops leave off.
  28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
  29
  30     /// Bit scan forward.
  31     BSF,
  32     /// Bit scan reverse.
  33     BSR,
  34
  35     /// X86 funnel/double shift i16 instructions. These correspond to
  36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
  37     /// modulo rules to generic funnel shifts.
  38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
  39     FSHL,
  40     FSHR,
  41
  42     /// Bitwise logical AND of floating point values. This corresponds
  43     /// to X86::ANDPS or X86::ANDPD.
  44     FAND,
  45
  46     /// Bitwise logical OR of floating point values. This corresponds
  47     /// to X86::ORPS or X86::ORPD.
  48     FOR,
  49
  50     /// Bitwise logical XOR of floating point values. This corresponds
  51     /// to X86::XORPS or X86::XORPD.
  52     FXOR,
  53
  54     ///  Bitwise logical ANDNOT of floating point values. This
  55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
  56     FANDN,
  57
  58     /// These operations represent an abstract X86 call
  59     /// instruction, which includes a bunch of information.  In particular the
  60     /// operands of these node are:
  61     ///
  62     ///     #0 - The incoming token chain
  63     ///     #1 - The callee
  64     ///     #2 - The number of arg bytes the caller pushes on the stack.
  65     ///     #3 - The number of arg bytes the callee pops off the stack.
  66     ///     #4 - The value to pass in AL/AX/EAX (optional)
  67     ///     #5 - The value to pass in DL/DX/EDX (optional)
  68     ///
  69     /// The result values of these nodes are:
  70     ///
  71     ///     #0 - The outgoing token chain
  72     ///     #1 - The first register result value (optional)
  73     ///     #2 - The second register result value (optional)
  74     ///
  75     CALL,
  76
  77     /// Same as call except it adds the NoTrack prefix.
  78     NT_CALL,
  79
  80     // Pseudo for a OBJC call that gets emitted together with a special
  81     // marker instruction.
  82     CALL_RVMARKER,
  83
  84     /// X86 compare and logical compare instructions.
  85     CMP,
  86     FCMP,
  87     COMI,
  88     UCOMI,
  89
  90     /// X86 bit-test instructions.
  91     BT,
  92
  93     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  94     /// operand, usually produced by a CMP instruction.
  95     SETCC,
  96
  97     /// X86 Select
  98     SELECTS,
  99
 100     // Same as SETCC except it's materialized with a sbb and the value is all
 101     // one's or all zero's.
 102     SETCC_CARRY, // R = carry_bit ? ~0 : 0
 103
 104     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 105     /// Operands are two FP values to compare; result is a mask of
 106     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 107     FSETCC,
 108
 109     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 110     /// and a version with SAE.
 111     FSETCCM,
 112     FSETCCM_SAE,
 113
 114     /// X86 conditional moves. Operand 0 and operand 1 are the two values
 115     /// to select from. Operand 2 is the condition code, and operand 3 is the
 116     /// flag operand produced by a CMP or TEST instruction.
 117     CMOV,
 118
 119     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 120     /// is the block to branch if condition is true, operand 2 is the
 121     /// condition code, and operand 3 is the flag operand produced by a CMP
 122     /// or TEST instruction.
 123     BRCOND,
 124
 125     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 126     /// operand 1 is the target address.
 127     NT_BRIND,
 128
 129     /// Return with a flag operand. Operand 0 is the chain operand, operand
 130     /// 1 is the number of bytes of stack to pop.
 131     RET_FLAG,
 132
 133     /// Return from interrupt. Operand 0 is the number of bytes to pop.
 134     IRET,
 135
 136     /// Repeat fill, corresponds to X86::REP_STOSx.
 137     REP_STOS,
 138
 139     /// Repeat move, corresponds to X86::REP_MOVSx.
 140     REP_MOVS,
 141
 142     /// On Darwin, this node represents the result of the popl
 143     /// at function entry, used for PIC code.
 144     GlobalBaseReg,
 145
 146     /// A wrapper node for TargetConstantPool, TargetJumpTable,
 147     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 148     /// MCSymbol and TargetBlockAddress.
 149     Wrapper,
 150
 151     /// Special wrapper used under X86-64 PIC mode for RIP
 152     /// relative displacements.
 153     WrapperRIP,
 154
 155     /// Copies a 64-bit value from an MMX vector to the low word
 156     /// of an XMM vector, with the high word zero filled.
 157     MOVQ2DQ,
 158
 159     /// Copies a 64-bit value from the low word of an XMM vector
 160     /// to an MMX vector.
 161     MOVDQ2Q,
 162
 163     /// Copies a 32-bit value from the low word of a MMX
 164     /// vector to a GPR.
 165     MMX_MOVD2W,
 166
 167     /// Copies a GPR into the low 32-bit word of a MMX vector
 168     /// and zero out the high word.
 169     MMX_MOVW2D,
 170
 171     /// Extract an 8-bit value from a vector and zero extend it to
 172     /// i32, corresponds to X86::PEXTRB.
 173     PEXTRB,
 174
 175     /// Extract a 16-bit value from a vector and zero extend it to
 176     /// i32, corresponds to X86::PEXTRW.
 177     PEXTRW,
 178
 179     /// Insert any element of a 4 x float vector into any element
 180     /// of a destination 4 x floatvector.
 181     INSERTPS,
 182
 183     /// Insert the lower 8-bits of a 32-bit value to a vector,
 184     /// corresponds to X86::PINSRB.
 185     PINSRB,
 186
 187     /// Insert the lower 16-bits of a 32-bit value to a vector,
 188     /// corresponds to X86::PINSRW.
 189     PINSRW,
 190
 191     /// Shuffle 16 8-bit values within a vector.
 192     PSHUFB,
 193
 194     /// Compute Sum of Absolute Differences.
 195     PSADBW,
 196     /// Compute Double Block Packed Sum-Absolute-Differences
 197     DBPSADBW,
 198
 199     /// Bitwise Logical AND NOT of Packed FP values.
 200     ANDNP,
 201
 202     /// Blend where the selector is an immediate.
 203     BLENDI,
 204
 205     /// Dynamic (non-constant condition) vector blend where only the sign bits
 206     /// of the condition elements are used. This is used to enforce that the
 207     /// condition mask is not valid for generic VSELECT optimizations. This
 208     /// is also used to implement the intrinsics.
 209     /// Operands are in VSELECT order: MASK, TRUE, FALSE
 210     BLENDV,
 211
 212     /// Combined add and sub on an FP vector.
 213     ADDSUB,
 214
 215     //  FP vector ops with rounding mode.
 216     FADD_RND,
 217     FADDS,
 218     FADDS_RND,
 219     FSUB_RND,
 220     FSUBS,
 221     FSUBS_RND,
 222     FMUL_RND,
 223     FMULS,
 224     FMULS_RND,
 225     FDIV_RND,
 226     FDIVS,
 227     FDIVS_RND,
 228     FMAX_SAE,
 229     FMAXS_SAE,
 230     FMIN_SAE,
 231     FMINS_SAE,
 232     FSQRT_RND,
 233     FSQRTS,
 234     FSQRTS_RND,
 235
 236     // FP vector get exponent.
 237     FGETEXP,
 238     FGETEXP_SAE,
 239     FGETEXPS,
 240     FGETEXPS_SAE,
 241     // Extract Normalized Mantissas.
 242     VGETMANT,
 243     VGETMANT_SAE,
 244     VGETMANTS,
 245     VGETMANTS_SAE,
 246     // FP Scale.
 247     SCALEF,
 248     SCALEF_RND,
 249     SCALEFS,
 250     SCALEFS_RND,
 251
 252     // Unsigned Integer average.
 253     AVG,
 254
 255     /// Integer horizontal add/sub.
 256     HADD,
 257     HSUB,
 258
 259     /// Floating point horizontal add/sub.
 260     FHADD,
 261     FHSUB,
 262
 263     // Detect Conflicts Within a Vector
 264     CONFLICT,
 265
 266     /// Floating point max and min.
 267     FMAX,
 268     FMIN,
 269
 270     /// Commutative FMIN and FMAX.
 271     FMAXC,
 272     FMINC,
 273
 274     /// Scalar intrinsic floating point max and min.
 275     FMAXS,
 276     FMINS,
 277
 278     /// Floating point reciprocal-sqrt and reciprocal approximation.
 279     /// Note that these typically require refinement
 280     /// in order to obtain suitable precision.
 281     FRSQRT,
 282     FRCP,
 283
 284     // AVX-512 reciprocal approximations with a little more precision.
 285     RSQRT14,
 286     RSQRT14S,
 287     RCP14,
 288     RCP14S,
 289
 290     // Thread Local Storage.
 291     TLSADDR,
 292
 293     // Thread Local Storage. A call to get the start address
 294     // of the TLS block for the current module.
 295     TLSBASEADDR,
 296
 297     // Thread Local Storage.  When calling to an OS provided
 298     // thunk at the address from an earlier relocation.
 299     TLSCALL,
 300
 301     // Exception Handling helpers.
 302     EH_RETURN,
 303
 304     // SjLj exception handling setjmp.
 305     EH_SJLJ_SETJMP,
 306
 307     // SjLj exception handling longjmp.
 308     EH_SJLJ_LONGJMP,
 309
 310     // SjLj exception handling dispatch.
 311     EH_SJLJ_SETUP_DISPATCH,
 312
 313     /// Tail call return. See X86TargetLowering::LowerCall for
 314     /// the list of operands.
 315     TC_RETURN,
 316
 317     // Vector move to low scalar and zero higher vector elements.
 318     VZEXT_MOVL,
 319
 320     // Vector integer truncate.
 321     VTRUNC,
 322     // Vector integer truncate with unsigned/signed saturation.
 323     VTRUNCUS,
 324     VTRUNCS,
 325
 326     // Masked version of the above. Used when less than a 128-bit result is
 327     // produced since the mask only applies to the lower elements and can't
 328     // be represented by a select.
 329     // SRC, PASSTHRU, MASK
 330     VMTRUNC,
 331     VMTRUNCUS,
 332     VMTRUNCS,
 333
 334     // Vector FP extend.
 335     VFPEXT,
 336     VFPEXT_SAE,
 337     VFPEXTS,
 338     VFPEXTS_SAE,
 339
 340     // Vector FP round.
 341     VFPROUND,
 342     VFPROUND_RND,
 343     VFPROUNDS,
 344     VFPROUNDS_RND,
 345
 346     // Masked version of above. Used for v2f64->v4f32.
 347     // SRC, PASSTHRU, MASK
 348     VMFPROUND,
 349
 350     // 128-bit vector logical left / right shift
 351     VSHLDQ,
 352     VSRLDQ,
 353
 354     // Vector shift elements
 355     VSHL,
 356     VSRL,
 357     VSRA,
 358
 359     // Vector variable shift
 360     VSHLV,
 361     VSRLV,
 362     VSRAV,
 363
 364     // Vector shift elements by immediate
 365     VSHLI,
 366     VSRLI,
 367     VSRAI,
 368
 369     // Shifts of mask registers.
 370     KSHIFTL,
 371     KSHIFTR,
 372
 373     // Bit rotate by immediate
 374     VROTLI,
 375     VROTRI,
 376
 377     // Vector packed double/float comparison.
 378     CMPP,
 379
 380     // Vector integer comparisons.
 381     PCMPEQ,
 382     PCMPGT,
 383
 384     // v8i16 Horizontal minimum and position.
 385     PHMINPOS,
 386
 387     MULTISHIFT,
 388
 389     /// Vector comparison generating mask bits for fp and
 390     /// integer signed and unsigned data types.
 391     CMPM,
 392     // Vector mask comparison generating mask bits for FP values.
 393     CMPMM,
 394     // Vector mask comparison with SAE for FP values.
 395     CMPMM_SAE,
 396
 397     // Arithmetic operations with FLAGS results.
 398     ADD,
 399     SUB,
 400     ADC,
 401     SBB,
 402     SMUL,
 403     UMUL,
 404     OR,
 405     XOR,
 406     AND,
 407
 408     // Bit field extract.
 409     BEXTR,
 410     BEXTRI,
 411
 412     // Zero High Bits Starting with Specified Bit Position.
 413     BZHI,
 414
 415     // Parallel extract and deposit.
 416     PDEP,
 417     PEXT,
 418
 419     // X86-specific multiply by immediate.
 420     MUL_IMM,
 421
 422     // Vector sign bit extraction.
 423     MOVMSK,
 424
 425     // Vector bitwise comparisons.
 426     PTEST,
 427
 428     // Vector packed fp sign bitwise comparisons.
 429     TESTP,
 430
 431     // OR/AND test for masks.
 432     KORTEST,
 433     KTEST,
 434
 435     // ADD for masks.
 436     KADD,
 437
 438     // Several flavors of instructions with vector shuffle behaviors.
 439     // Saturated signed/unnsigned packing.
 440     PACKSS,
 441     PACKUS,
 442     // Intra-lane alignr.
 443     PALIGNR,
 444     // AVX512 inter-lane alignr.
 445     VALIGN,
 446     PSHUFD,
 447     PSHUFHW,
 448     PSHUFLW,
 449     SHUFP,
 450     // VBMI2 Concat & Shift.
 451     VSHLD,
 452     VSHRD,
 453     VSHLDV,
 454     VSHRDV,
 455     // Shuffle Packed Values at 128-bit granularity.
 456     SHUF128,
 457     MOVDDUP,
 458     MOVSHDUP,
 459     MOVSLDUP,
 460     MOVLHPS,
 461     MOVHLPS,
 462     MOVSD,
 463     MOVSS,
 464     MOVSH,
 465     UNPCKL,
 466     UNPCKH,
 467     VPERMILPV,
 468     VPERMILPI,
 469     VPERMI,
 470     VPERM2X128,
 471
 472     // Variable Permute (VPERM).
 473     // Res = VPERMV MaskV, V0
 474     VPERMV,
 475
 476     // 3-op Variable Permute (VPERMT2).
 477     // Res = VPERMV3 V0, MaskV, V1
 478     VPERMV3,
 479
 480     // Bitwise ternary logic.
 481     VPTERNLOG,
 482     // Fix Up Special Packed Float32/64 values.
 483     VFIXUPIMM,
 484     VFIXUPIMM_SAE,
 485     VFIXUPIMMS,
 486     VFIXUPIMMS_SAE,
 487     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 488     VRANGE,
 489     VRANGE_SAE,
 490     VRANGES,
 491     VRANGES_SAE,
 492     // Reduce - Perform Reduction Transformation on scalar\packed FP.
 493     VREDUCE,
 494     VREDUCE_SAE,
 495     VREDUCES,
 496     VREDUCES_SAE,
 497     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 498     // Also used by the legacy (V)ROUND intrinsics where we mask out the
 499     // scaling part of the immediate.
 500     VRNDSCALE,
 501     VRNDSCALE_SAE,
 502     VRNDSCALES,
 503     VRNDSCALES_SAE,
 504     // Tests Types Of a FP Values for packed types.
 505     VFPCLASS,
 506     // Tests Types Of a FP Values for scalar types.
 507     VFPCLASSS,
 508
 509     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
 510     // a vector, this node may change the vector length as part of the splat.
 511     VBROADCAST,
 512     // Broadcast mask to vector.
 513     VBROADCASTM,
 514
 515     /// SSE4A Extraction and Insertion.
 516     EXTRQI,
 517     INSERTQI,
 518
 519     // XOP arithmetic/logical shifts.
 520     VPSHA,
 521     VPSHL,
 522     // XOP signed/unsigned integer comparisons.
 523     VPCOM,
 524     VPCOMU,
 525     // XOP packed permute bytes.
 526     VPPERM,
 527     // XOP two source permutation.
 528     VPERMIL2,
 529
 530     // Vector multiply packed unsigned doubleword integers.
 531     PMULUDQ,
 532     // Vector multiply packed signed doubleword integers.
 533     PMULDQ,
 534     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 535     MULHRS,
 536
 537     // Multiply and Add Packed Integers.
 538     VPMADDUBSW,
 539     VPMADDWD,
 540
 541     // AVX512IFMA multiply and add.
 542     // NOTE: These are different than the instruction and perform
 543     // op0 x op1 + op2.
 544     VPMADD52L,
 545     VPMADD52H,
 546
 547     // VNNI
 548     VPDPBUSD,
 549     VPDPBUSDS,
 550     VPDPWSSD,
 551     VPDPWSSDS,
 552
 553     // FMA nodes.
 554     // We use the target independent ISD::FMA for the non-inverted case.
 555     FNMADD,
 556     FMSUB,
 557     FNMSUB,
 558     FMADDSUB,
 559     FMSUBADD,
 560
 561     // FMA with rounding mode.
 562     FMADD_RND,
 563     FNMADD_RND,
 564     FMSUB_RND,
 565     FNMSUB_RND,
 566     FMADDSUB_RND,
 567     FMSUBADD_RND,
 568
 569     // Compress and expand.
 570     COMPRESS,
 571     EXPAND,
 572
 573     // Bits shuffle
 574     VPSHUFBITQMB,
 575
 576     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 577     SINT_TO_FP_RND,
 578     UINT_TO_FP_RND,
 579     SCALAR_SINT_TO_FP,
 580     SCALAR_UINT_TO_FP,
 581     SCALAR_SINT_TO_FP_RND,
 582     SCALAR_UINT_TO_FP_RND,
 583
 584     // Vector float/double to signed/unsigned integer.
 585     CVTP2SI,
 586     CVTP2UI,
 587     CVTP2SI_RND,
 588     CVTP2UI_RND,
 589     // Scalar float/double to signed/unsigned integer.
 590     CVTS2SI,
 591     CVTS2UI,
 592     CVTS2SI_RND,
 593     CVTS2UI_RND,
 594
 595     // Vector float/double to signed/unsigned integer with truncation.
 596     CVTTP2SI,
 597     CVTTP2UI,
 598     CVTTP2SI_SAE,
 599     CVTTP2UI_SAE,
 600     // Scalar float/double to signed/unsigned integer with truncation.
 601     CVTTS2SI,
 602     CVTTS2UI,
 603     CVTTS2SI_SAE,
 604     CVTTS2UI_SAE,
 605
 606     // Vector signed/unsigned integer to float/double.
 607     CVTSI2P,
 608     CVTUI2P,
 609
 610     // Masked versions of above. Used for v2f64->v4f32.
 611     // SRC, PASSTHRU, MASK
 612     MCVTP2SI,
 613     MCVTP2UI,
 614     MCVTTP2SI,
 615     MCVTTP2UI,
 616     MCVTSI2P,
 617     MCVTUI2P,
 618
 619     // Vector float to bfloat16.
 620     // Convert TWO packed single data to one packed BF16 data
 621     CVTNE2PS2BF16,
 622     // Convert packed single data to packed BF16 data
 623     CVTNEPS2BF16,
 624     // Masked version of above.
 625     // SRC, PASSTHRU, MASK
 626     MCVTNEPS2BF16,
 627
 628     // Dot product of BF16 pairs to accumulated into
 629     // packed single precision.
 630     DPBF16PS,
 631
 632     // Save xmm argument registers to the stack, according to %al. An operator
 633     // is needed so that this can be expanded with control flow.
 634     VASTART_SAVE_XMM_REGS,
 635
 636     // Windows's _chkstk call to do stack probing.
 637     WIN_ALLOCA,
 638
 639     // For allocating variable amounts of stack space when using
 640     // segmented stacks. Check if the current stacklet has enough space, and
 641     // falls back to heap allocation if not.
 642     SEG_ALLOCA,
 643
 644     // For allocating stack space when using stack clash protector.
 645     // Allocation is performed by block, and each block is probed.
 646     PROBED_ALLOCA,
 647
 648     // Memory barriers.
 649     MEMBARRIER,
 650     MFENCE,
 651
 652     // Get a random integer and indicate whether it is valid in CF.
 653     RDRAND,
 654
 655     // Get a NIST SP800-90B & C compliant random integer and
 656     // indicate whether it is valid in CF.
 657     RDSEED,
 658
 659     // Protection keys
 660     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 661     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 662     // value for ECX.
 663     RDPKRU,
 664     WRPKRU,
 665
 666     // SSE42 string comparisons.
 667     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 668     // will emit one or two instructions based on which results are used. If
 669     // flags and index/mask this allows us to use a single instruction since
 670     // we won't have to pick and opcode for flags. Instead we can rely on the
 671     // DAG to CSE everything and decide at isel.
 672     PCMPISTR,
 673     PCMPESTR,
 674
 675     // Test if in transactional execution.
 676     XTEST,
 677
 678     // ERI instructions.
 679     RSQRT28,
 680     RSQRT28_SAE,
 681     RSQRT28S,
 682     RSQRT28S_SAE,
 683     RCP28,
 684     RCP28_SAE,
 685     RCP28S,
 686     RCP28S_SAE,
 687     EXP2,
 688     EXP2_SAE,
 689
 690     // Conversions between float and half-float.
 691     CVTPS2PH,
 692     CVTPH2PS,
 693     CVTPH2PS_SAE,
 694
 695     // Masked version of above.
 696     // SRC, RND, PASSTHRU, MASK
 697     MCVTPS2PH,
 698
 699     // Galois Field Arithmetic Instructions
 700     GF2P8AFFINEINVQB,
 701     GF2P8AFFINEQB,
 702     GF2P8MULB,
 703
 704     // LWP insert record.
 705     LWPINS,
 706
 707     // User level wait
 708     UMWAIT,
 709     TPAUSE,
 710
 711     // Enqueue Stores Instructions
 712     ENQCMD,
 713     ENQCMDS,
 714
 715     // For avx512-vp2intersect
 716     VP2INTERSECT,
 717
 718     // User level interrupts - testui
 719     TESTUI,
 720
 721     /// X86 strict FP compare instructions.
 722     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
 723     STRICT_FCMPS,
 724
 725     // Vector packed double/float comparison.
 726     STRICT_CMPP,
 727
 728     /// Vector comparison generating mask bits for fp and
 729     /// integer signed and unsigned data types.
 730     STRICT_CMPM,
 731
 732     // Vector float/double to signed/unsigned integer with truncation.
 733     STRICT_CVTTP2SI,
 734     STRICT_CVTTP2UI,
 735
 736     // Vector FP extend.
 737     STRICT_VFPEXT,
 738
 739     // Vector FP round.
 740     STRICT_VFPROUND,
 741
 742     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 743     // Also used by the legacy (V)ROUND intrinsics where we mask out the
 744     // scaling part of the immediate.
 745     STRICT_VRNDSCALE,
 746
 747     // Vector signed/unsigned integer to float/double.
 748     STRICT_CVTSI2P,
 749     STRICT_CVTUI2P,
 750
 751     // Strict FMA nodes.
 752     STRICT_FNMADD,
 753     STRICT_FMSUB,
 754     STRICT_FNMSUB,
 755
 756     // Conversions between float and half-float.
 757     STRICT_CVTPS2PH,
 758     STRICT_CVTPH2PS,
 759
 760     // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
 761     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
 762
 763     // Compare and swap.
 764     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 765     LCMPXCHG8_DAG,
 766     LCMPXCHG16_DAG,
 767     LCMPXCHG16_SAVE_RBX_DAG,
 768
 769     /// LOCK-prefixed arithmetic read-modify-write instructions.
 770     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 771     LADD,
 772     LSUB,
 773     LOR,
 774     LXOR,
 775     LAND,
 776
 777     // Load, scalar_to_vector, and zero extend.
 778     VZEXT_LOAD,
 779
 780     // extract_vector_elt, store.
 781     VEXTRACT_STORE,
 782
 783     // scalar broadcast from memory.
 784     VBROADCAST_LOAD,
 785
 786     // subvector broadcast from memory.
 787     SUBV_BROADCAST_LOAD,
 788
 789     // Store FP control word into i16 memory.
 790     FNSTCW16m,
 791
 792     // Load FP control word from i16 memory.
 793     FLDCW16m,
 794
 795     /// This instruction implements FP_TO_SINT with the
 796     /// integer destination in memory and a FP reg source.  This corresponds
 797     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 798     /// has two inputs (token chain and address) and two outputs (int value
 799     /// and token chain). Memory VT specifies the type to store to.
 800     FP_TO_INT_IN_MEM,
 801
 802     /// This instruction implements SINT_TO_FP with the
 803     /// integer source in memory and FP reg result.  This corresponds to the
 804     /// X86::FILD*m instructions. It has two inputs (token chain and address)
 805     /// and two outputs (FP value and token chain). The integer source type is
 806     /// specified by the memory VT.
 807     FILD,
 808
 809     /// This instruction implements a fp->int store from FP stack
 810     /// slots. This corresponds to the fist instruction. It takes a
 811     /// chain operand, value to store, address, and glue. The memory VT
 812     /// specifies the type to store as.
 813     FIST,
 814
 815     /// This instruction implements an extending load to FP stack slots.
 816     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 817     /// operand, and ptr to load from. The memory VT specifies the type to
 818     /// load from.
 819     FLD,
 820
 821     /// This instruction implements a truncating store from FP stack
 822     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 823     /// chain operand, value to store, address, and glue. The memory VT
 824     /// specifies the type to store as.
 825     FST,
 826
 827     /// These instructions grab the address of the next argument
 828     /// from a va_list. (reads and modifies the va_list in memory)
 829     VAARG_64,
 830     VAARG_X32,
 831
 832     // Vector truncating store with unsigned/signed saturation
 833     VTRUNCSTOREUS,
 834     VTRUNCSTORES,
 835     // Vector truncating masked store with unsigned/signed saturation
 836     VMTRUNCSTOREUS,
 837     VMTRUNCSTORES,
 838
 839     // X86 specific gather and scatter
 840     MGATHER,
 841     MSCATTER,
 842
 843     // Key locker nodes that produce flags.
 844     AESENC128KL,
 845     AESDEC128KL,
 846     AESENC256KL,
 847     AESDEC256KL,
 848     AESENCWIDE128KL,
 849     AESDECWIDE128KL,
 850     AESENCWIDE256KL,
 851     AESDECWIDE256KL,
 852
 853     // WARNING: Do not add anything in the end unless you want the node to
 854     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 855     // opcodes will be thought as target memory ops!
 856   };
 857   } // end namespace X86ISD
 858
 859   namespace X86 {
 860     /// Current rounding mode is represented in bits 11:10 of FPSR. These
 861     /// values are same as corresponding constants for rounding mode used
 862     /// in glibc.
 863     enum RoundingMode {
 864       rmToNearest   = 0,        // FE_TONEAREST
 865       rmDownward    = 1 << 10,  // FE_DOWNWARD
 866       rmUpward      = 2 << 10,  // FE_UPWARD
 867       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
 868       rmMask        = 3 << 10   // Bit mask selecting rounding mode
 869     };
 870   }
 871
 872   /// Define some predicates that are used for node matching.
 873   namespace X86 {
 874     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 875     bool isZeroNode(SDValue Elt);
 876
 877     /// Returns true of the given offset can be
 878     /// fit into displacement field of the instruction.
 879     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 880                                       bool hasSymbolicDisplacement);
 881
 882     /// Determines whether the callee is required to pop its
 883     /// own arguments. Callee pop is necessary to support tail calls.
 884     bool isCalleePop(CallingConv::ID CallingConv,
 885                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 886
 887     /// If Op is a constant whose elements are all the same constant or
 888     /// undefined, return true and return the constant value in \p SplatVal.
 889     /// If we have undef bits that don't cover an entire element, we treat these
 890     /// as zero if AllowPartialUndefs is set, else we fail and return false.
 891     bool isConstantSplat(SDValue Op, APInt &SplatVal,
 892                          bool AllowPartialUndefs = true);
 893   } // end namespace X86
 894
 895   //===--------------------------------------------------------------------===//
 896   //  X86 Implementation of the TargetLowering interface
 897   class X86TargetLowering final : public TargetLowering {
 898   public:
 899     explicit X86TargetLowering(const X86TargetMachine &TM,
 900                                const X86Subtarget &STI);
 901
 902     unsigned getJumpTableEncoding() const override;
 903     bool useSoftFloat() const override;
 904
 905     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 906                                ArgListTy &Args) const override;
 907
 908     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 909       return MVT::i8;
 910     }
 911
 912     const MCExpr *
 913     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 914                               const MachineBasicBlock *MBB, unsigned uid,
 915                               MCContext &Ctx) const override;
 916
 917     /// Returns relocation base for the given PIC jumptable.
 918     SDValue getPICJumpTableRelocBase(SDValue Table,
 919                                      SelectionDAG &DAG) const override;
 920     const MCExpr *
 921     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 922                                  unsigned JTI, MCContext &Ctx) const override;
 923
 924     /// Return the desired alignment for ByVal aggregate
 925     /// function arguments in the caller parameter area. For X86, aggregates
 926     /// that contains are placed at 16-byte boundaries while the rest are at
 927     /// 4-byte boundaries.
 928     unsigned getByValTypeAlignment(Type *Ty,
 929                                    const DataLayout &DL) const override;
 930
 931     EVT getOptimalMemOpType(const MemOp &Op,
 932                             const AttributeList &FuncAttributes) const override;
 933
 934     /// Returns true if it's safe to use load / store of the
 935     /// specified type to expand memcpy / memset inline. This is mostly true
 936     /// for all types except for some special cases. For example, on X86
 937     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 938     /// also does type conversion. Note the specified type doesn't have to be
 939     /// legal as the hook is used before type legalization.
 940     bool isSafeMemOpType(MVT VT) const override;
 941
 942     /// Returns true if the target allows unaligned memory accesses of the
 943     /// specified type. Returns whether it is "fast" in the last argument.
 944     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
 945                                         MachineMemOperand::Flags Flags,
 946                                         bool *Fast) const override;
 947
 948     /// Provide custom lowering hooks for some operations.
 949     ///
 950     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 951
 952     /// Replace the results of node with an illegal result
 953     /// type with new values built out of custom code.
 954     ///
 955     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 956                             SelectionDAG &DAG) const override;
 957
 958     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 959
 960     /// Return true if the target has native support for
 961     /// the specified value type and it is 'desirable' to use the type for the
 962     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 963     /// instruction encodings are longer and some i16 instructions are slow.
 964     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 965
 966     /// Return true if the target has native support for the
 967     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 968     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 969     /// and some i16 instructions are slow.
 970     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 971
 972     /// Return the newly negated expression if the cost is not expensive and
 973     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
 974     /// do the negation.
 975     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 976                                  bool LegalOperations, bool ForCodeSize,
 977                                  NegatibleCost &Cost,
 978                                  unsigned Depth) const override;
 979
 980     MachineBasicBlock *
 981     EmitInstrWithCustomInserter(MachineInstr &MI,
 982                                 MachineBasicBlock *MBB) const override;
 983
 984     /// This method returns the name of a target specific DAG node.
 985     const char *getTargetNodeName(unsigned Opcode) const override;
 986
 987     /// Do not merge vector stores after legalization because that may conflict
 988     /// with x86-specific store splitting optimizations.
 989     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 990       return !MemVT.isVector();
 991     }
 992
 993     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 994                           const MachineFunction &MF) const override;
 995
 996     bool isCheapToSpeculateCttz() const override;
 997
 998     bool isCheapToSpeculateCtlz() const override;
 999
1000     bool isCtlzFast() const override;
1001
1002     bool hasBitPreservingFPLogic(EVT VT) const override {
1003       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
1004              (VT == MVT::f16 && X86ScalarSSEf16);
1005     }
1006
1007     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1008       // If the pair to store is a mixture of float and int values, we will
1009       // save two bitwise instructions and one float-to-int instruction and
1010       // increase one store instruction. There is potentially a more
1011       // significant benefit because it avoids the float->int domain switch
1012       // for input value. So It is more likely a win.
1013       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1014           (LTy.isInteger() && HTy.isFloatingPoint()))
1015         return true;
1016       // If the pair only contains int values, we will save two bitwise
1017       // instructions and increase one store instruction (costing one more
1018       // store buffer). Since the benefit is more blurred so we leave
1019       // such pair out until we get testcase to prove it is a win.
1020       return false;
1021     }
1022
1023     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1024
1025     bool hasAndNotCompare(SDValue Y) const override;
1026
1027     bool hasAndNot(SDValue Y) const override;
1028
1029     bool hasBitTest(SDValue X, SDValue Y) const override;
1030
1031     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1032         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1033         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1034         SelectionDAG &DAG) const override;
1035
1036     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1037                                            CombineLevel Level) const override;
1038
1039     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1040
1041     bool
1042     shouldTransformSignedTruncationCheck(EVT XVT,
1043                                          unsigned KeptBits) const override {
1044       // For vectors, we don't have a preference..
1045       if (XVT.isVector())
1046         return false;
1047
1048       auto VTIsOk = [](EVT VT) -> bool {
1049         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1050                VT == MVT::i64;
1051       };
1052
1053       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1054       // XVT will be larger than KeptBitsVT.
1055       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1056       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1057     }
1058
1059     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1060
1061     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1062
1063     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1064       return VT.isScalarInteger();
1065     }
1066
1067     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1068     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1069
1070     /// Return the value type to use for ISD::SETCC.
1071     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1072                            EVT VT) const override;
1073
1074     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1075                                       const APInt &DemandedElts,
1076                                       TargetLoweringOpt &TLO) const override;
1077
1078     /// Determine which of the bits specified in Mask are known to be either
1079     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1080     void computeKnownBitsForTargetNode(const SDValue Op,
1081                                        KnownBits &Known,
1082                                        const APInt &DemandedElts,
1083                                        const SelectionDAG &DAG,
1084                                        unsigned Depth = 0) const override;
1085
1086     /// Determine the number of bits in the operation that are sign bits.
1087     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1088                                              const APInt &DemandedElts,
1089                                              const SelectionDAG &DAG,
1090                                              unsigned Depth) const override;
1091
1092     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1093                                                  const APInt &DemandedElts,
1094                                                  APInt &KnownUndef,
1095                                                  APInt &KnownZero,
1096                                                  TargetLoweringOpt &TLO,
1097                                                  unsigned Depth) const override;
1098
1099     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1100                                                     const APInt &DemandedElts,
1101                                                     unsigned MaskIndex,
1102                                                     TargetLoweringOpt &TLO,
1103                                                     unsigned Depth) const;
1104
1105     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1106                                            const APInt &DemandedBits,
1107                                            const APInt &DemandedElts,
1108                                            KnownBits &Known,
1109                                            TargetLoweringOpt &TLO,
1110                                            unsigned Depth) const override;
1111
1112     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1113         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1114         SelectionDAG &DAG, unsigned Depth) const override;
1115
1116     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1117
1118     SDValue unwrapAddress(SDValue N) const override;
1119
1120     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1121
1122     bool ExpandInlineAsm(CallInst *CI) const override;
1123
1124     ConstraintType getConstraintType(StringRef Constraint) const override;
1125
1126     /// Examine constraint string and operand type and determine a weight value.
1127     /// The operand object must already have been set up with the operand type.
1128     ConstraintWeight
1129       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1130                                      const char *constraint) const override;
1131
1132     const char *LowerXConstraint(EVT ConstraintVT) const override;
1133
1134     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1135     /// add anything to Ops. If hasMemory is true it means one of the asm
1136     /// constraint of the inline asm instruction being processed is 'm'.
1137     void LowerAsmOperandForConstraint(SDValue Op,
1138                                       std::string &Constraint,
1139                                       std::vector<SDValue> &Ops,
1140                                       SelectionDAG &DAG) const override;
1141
1142     unsigned
1143     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1144       if (ConstraintCode == "v")
1145         return InlineAsm::Constraint_v;
1146       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1147     }
1148
1149     /// Handle Lowering flag assembly outputs.
1150     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1151                                         const SDLoc &DL,
1152                                         const AsmOperandInfo &Constraint,
1153                                         SelectionDAG &DAG) const override;
1154
1155     /// Given a physical register constraint
1156     /// (e.g. {edx}), return the register number and the register class for the
1157     /// register.  This should only be used for C_Register constraints.  On
1158     /// error, this returns a register number of 0.
1159     std::pair<unsigned, const TargetRegisterClass *>
1160     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1161                                  StringRef Constraint, MVT VT) const override;
1162
1163     /// Return true if the addressing mode represented
1164     /// by AM is legal for this target, for a load/store of the specified type.
1165     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1166                                Type *Ty, unsigned AS,
1167                                Instruction *I = nullptr) const override;
1168
1169     /// Return true if the specified immediate is legal
1170     /// icmp immediate, that is the target has icmp instructions which can
1171     /// compare a register against the immediate without having to materialize
1172     /// the immediate into a register.
1173     bool isLegalICmpImmediate(int64_t Imm) const override;
1174
1175     /// Return true if the specified immediate is legal
1176     /// add immediate, that is the target has add instructions which can
1177     /// add a register and the immediate without having to materialize
1178     /// the immediate into a register.
1179     bool isLegalAddImmediate(int64_t Imm) const override;
1180
1181     bool isLegalStoreImmediate(int64_t Imm) const override;
1182
1183     /// Return the cost of the scaling factor used in the addressing
1184     /// mode represented by AM for this target, for a load/store
1185     /// of the specified type.
1186     /// If the AM is supported, the return value must be >= 0.
1187     /// If the AM is not supported, it returns a negative value.
1188     InstructionCost getScalingFactorCost(const DataLayout &DL,
1189                                          const AddrMode &AM, Type *Ty,
1190                                          unsigned AS) const override;
1191
1192     /// This is used to enable splatted operand transforms for vector shifts
1193     /// and vector funnel shifts.
1194     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1195
1196     /// Add x86-specific opcodes to the default list.
1197     bool isBinOp(unsigned Opcode) const override;
1198
1199     /// Returns true if the opcode is a commutative binary operation.
1200     bool isCommutativeBinOp(unsigned Opcode) const override;
1201
1202     /// Return true if it's free to truncate a value of
1203     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1204     /// register EAX to i16 by referencing its sub-register AX.
1205     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1206     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1207
1208     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1209
1210     /// Return true if any actual instruction that defines a
1211     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1212     /// register. This does not necessarily include registers defined in
1213     /// unknown ways, such as incoming arguments, or copies from unknown
1214     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1215     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1216     /// all instructions that define 32-bit values implicit zero-extend the
1217     /// result out to 64 bits.
1218     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1219     bool isZExtFree(EVT VT1, EVT VT2) const override;
1220     bool isZExtFree(SDValue Val, EVT VT2) const override;
1221
1222     bool shouldSinkOperands(Instruction *I,
1223                             SmallVectorImpl<Use *> &Ops) const override;
1224     bool shouldConvertPhiType(Type *From, Type *To) const override;
1225
1226     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1227     /// extend node) is profitable.
1228     bool isVectorLoadExtDesirable(SDValue) const override;
1229
1230     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1231     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1232     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1233     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1234                                     EVT VT) const override;
1235
1236     /// Return true if it's profitable to narrow
1237     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1238     /// from i32 to i8 but not from i32 to i16.
1239     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1240
1241     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1242     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1243     /// true and stores the intrinsic information into the IntrinsicInfo that was
1244     /// passed to the function.
1245     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1246                             MachineFunction &MF,
1247                             unsigned Intrinsic) const override;
1248
1249     /// Returns true if the target can instruction select the
1250     /// specified FP immediate natively. If false, the legalizer will
1251     /// materialize the FP immediate as a load from a constant pool.
1252     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1253                       bool ForCodeSize) const override;
1254
1255     /// Targets can use this to indicate that they only support *some*
1256     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1257     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1258     /// be legal.
1259     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1260
1261     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1262     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1263     /// constant pool entry.
1264     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1265
1266     /// Returns true if lowering to a jump table is allowed.
1267     bool areJTsAllowed(const Function *Fn) const override;
1268
1269     /// If true, then instruction selection should
1270     /// seek to shrink the FP constant of the specified type to a smaller type
1271     /// in order to save space and / or reduce runtime.
1272     bool ShouldShrinkFPConstant(EVT VT) const override {
1273       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1274       // expensive than a straight movsd. On the other hand, it's important to
1275       // shrink long double fp constant since fldt is very slow.
1276       return !X86ScalarSSEf64 || VT == MVT::f80;
1277     }
1278
1279     /// Return true if we believe it is correct and profitable to reduce the
1280     /// load node to a smaller type.
1281     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1282                                EVT NewVT) const override;
1283
1284     /// Return true if the specified scalar FP type is computed in an SSE
1285     /// register, not on the X87 floating point stack.
1286     bool isScalarFPTypeInSSEReg(EVT VT) const {
1287       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1288              (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
1289              (VT == MVT::f16 && X86ScalarSSEf16);   // f16 is when AVX512FP16
1290     }
1291
1292     /// Returns true if it is beneficial to convert a load of a constant
1293     /// to just the constant itself.
1294     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1295                                            Type *Ty) const override;
1296
1297     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1298
1299     bool convertSelectOfConstantsToMath(EVT VT) const override;
1300
1301     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1302                                 SDValue C) const override;
1303
1304     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1305     /// with this index.
1306     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1307                                  unsigned Index) const override;
1308
1309     /// Scalar ops always have equal or better analysis/performance/power than
1310     /// the vector equivalent, so this always makes sense if the scalar op is
1311     /// supported.
1312     bool shouldScalarizeBinop(SDValue) const override;
1313
1314     /// Extract of a scalar FP value from index 0 of a vector is free.
1315     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1316       EVT EltVT = VT.getScalarType();
1317       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1318     }
1319
1320     /// Overflow nodes should get combined/lowered to optimal instructions
1321     /// (they should allow eliminating explicit compares by getting flags from
1322     /// math ops).
1323     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1324                               bool MathUsed) const override;
1325
1326     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1327                                       unsigned AddrSpace) const override {
1328       // If we can replace more than 2 scalar stores, there will be a reduction
1329       // in instructions even after we add a vector constant load.
1330       return NumElem > 2;
1331     }
1332
1333     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1334                                  const SelectionDAG &DAG,
1335                                  const MachineMemOperand &MMO) const override;
1336
1337     /// Intel processors have a unified instruction and data cache
1338     const char * getClearCacheBuiltinName() const override {
1339       return nullptr; // nothing to do, move along.
1340     }
1341
1342     Register getRegisterByName(const char* RegName, LLT VT,
1343                                const MachineFunction &MF) const override;
1344
1345     /// If a physical register, this returns the register that receives the
1346     /// exception address on entry to an EH pad.
1347     Register
1348     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1349
1350     /// If a physical register, this returns the register that receives the
1351     /// exception typeid on entry to a landing pad.
1352     Register
1353     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1354
1355     virtual bool needsFixedCatchObjects() const override;
1356
1357     /// This method returns a target specific FastISel object,
1358     /// or null if the target does not support "fast" ISel.
1359     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1360                              const TargetLibraryInfo *libInfo) const override;
1361
1362     /// If the target has a standard location for the stack protector cookie,
1363     /// returns the address of that location. Otherwise, returns nullptr.
1364     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1365
1366     bool useLoadStackGuardNode() const override;
1367     bool useStackGuardXorFP() const override;
1368     void insertSSPDeclarations(Module &M) const override;
1369     Value *getSDagStackGuard(const Module &M) const override;
1370     Function *getSSPStackGuardCheck(const Module &M) const override;
1371     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1372                                 const SDLoc &DL) const override;
1373
1374
1375     /// Return true if the target stores SafeStack pointer at a fixed offset in
1376     /// some non-standard address space, and populates the address space and
1377     /// offset as appropriate.
1378     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1379
1380     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1381                                           SDValue Chain, SDValue Pointer,
1382                                           MachinePointerInfo PtrInfo,
1383                                           Align Alignment,
1384                                           SelectionDAG &DAG) const;
1385
1386     /// Customize the preferred legalization strategy for certain types.
1387     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1388
1389     bool softPromoteHalfType() const override { return true; }
1390
1391     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1392                                       EVT VT) const override;
1393
1394     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1395                                            CallingConv::ID CC,
1396                                            EVT VT) const override;
1397
1398     unsigned getVectorTypeBreakdownForCallingConv(
1399         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1400         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1401
1402     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1403
1404     bool supportSwiftError() const override;
1405
1406     bool hasStackProbeSymbol(MachineFunction &MF) const override;
1407     bool hasInlineStackProbe(MachineFunction &MF) const override;
1408     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1409
1410     unsigned getStackProbeSize(MachineFunction &MF) const;
1411
1412     bool hasVectorBlend() const override { return true; }
1413
1414     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1415
1416     /// Lower interleaved load(s) into target specific
1417     /// instructions/intrinsics.
1418     bool lowerInterleavedLoad(LoadInst *LI,
1419                               ArrayRef<ShuffleVectorInst *> Shuffles,
1420                               ArrayRef<unsigned> Indices,
1421                               unsigned Factor) const override;
1422
1423     /// Lower interleaved store(s) into target specific
1424     /// instructions/intrinsics.
1425     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1426                                unsigned Factor) const override;
1427
1428     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1429                                    SDValue Addr, SelectionDAG &DAG)
1430                                    const override;
1431
1432     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1433
1434   protected:
1435     std::pair<const TargetRegisterClass *, uint8_t>
1436     findRepresentativeClass(const TargetRegisterInfo *TRI,
1437                             MVT VT) const override;
1438
1439   private:
1440     /// Keep a reference to the X86Subtarget around so that we can
1441     /// make the right decision when generating code for different targets.
1442     const X86Subtarget &Subtarget;
1443
1444     /// Select between SSE or x87 floating point ops.
1445     /// When SSE is available, use it for f32 operations.
1446     /// When SSE2 is available, use it for f64 operations.
1447     bool X86ScalarSSEf32;
1448     bool X86ScalarSSEf64;
1449     bool X86ScalarSSEf16;
1450
1451     /// A list of legal FP immediates.
1452     std::vector<APFloat> LegalFPImmediates;
1453
1454     /// Indicate that this x86 target can instruction
1455     /// select the specified FP immediate natively.
1456     void addLegalFPImmediate(const APFloat& Imm) {
1457       LegalFPImmediates.push_back(Imm);
1458     }
1459
1460     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1461                             CallingConv::ID CallConv, bool isVarArg,
1462                             const SmallVectorImpl<ISD::InputArg> &Ins,
1463                             const SDLoc &dl, SelectionDAG &DAG,
1464                             SmallVectorImpl<SDValue> &InVals,
1465                             uint32_t *RegMask) const;
1466     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1467                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1468                              const SDLoc &dl, SelectionDAG &DAG,
1469                              const CCValAssign &VA, MachineFrameInfo &MFI,
1470                              unsigned i) const;
1471     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1472                              const SDLoc &dl, SelectionDAG &DAG,
1473                              const CCValAssign &VA,
1474                              ISD::ArgFlagsTy Flags, bool isByval) const;
1475
1476     // Call lowering helpers.
1477
1478     /// Check whether the call is eligible for tail call optimization. Targets
1479     /// that want to do tail call optimization should implement this function.
1480     bool IsEligibleForTailCallOptimization(SDValue Callee,
1481                                            CallingConv::ID CalleeCC,
1482                                            bool isVarArg,
1483                                            bool isCalleeStructRet,
1484                                            bool isCallerStructRet,
1485                                            Type *RetTy,
1486                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1487                                     const SmallVectorImpl<SDValue> &OutVals,
1488                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1489                                            SelectionDAG& DAG) const;
1490     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1491                                     SDValue Chain, bool IsTailCall,
1492                                     bool Is64Bit, int FPDiff,
1493                                     const SDLoc &dl) const;
1494
1495     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1496                                          SelectionDAG &DAG) const;
1497
1498     unsigned getAddressSpace(void) const;
1499
1500     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1501                             SDValue &Chain) const;
1502     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1503
1504     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1505     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1506     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1507     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1508
1509     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1510                                   const unsigned char OpFlags = 0) const;
1511     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1512     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1513     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1514     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1515     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1516
1517     /// Creates target global address or external symbol nodes for calls or
1518     /// other uses.
1519     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1520                                   bool ForCall) const;
1521
1522     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1523     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1524     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1525     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1526     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1527     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1528     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1529     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1530     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1531     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1532     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1533     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1534     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1535     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1536     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1537     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1538     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1539     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1540     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1541     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1542     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1543     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1544     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1545     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1546     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1547     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1548     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1549     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1550     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1551     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1552     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1553
1554     SDValue
1555     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1556                          const SmallVectorImpl<ISD::InputArg> &Ins,
1557                          const SDLoc &dl, SelectionDAG &DAG,
1558                          SmallVectorImpl<SDValue> &InVals) const override;
1559     SDValue LowerCall(CallLoweringInfo &CLI,
1560                       SmallVectorImpl<SDValue> &InVals) const override;
1561
1562     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1563                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1564                         const SmallVectorImpl<SDValue> &OutVals,
1565                         const SDLoc &dl, SelectionDAG &DAG) const override;
1566
1567     bool supportSplitCSR(MachineFunction *MF) const override {
1568       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1569           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1570     }
1571     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1572     void insertCopiesSplitCSR(
1573       MachineBasicBlock *Entry,
1574       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1575
1576     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1577
1578     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1579
1580     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1581                             ISD::NodeType ExtendKind) const override;
1582
1583     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1584                         bool isVarArg,
1585                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1586                         LLVMContext &Context) const override;
1587
1588     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1589
1590     TargetLoweringBase::AtomicExpansionKind
1591     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1592     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1593     TargetLoweringBase::AtomicExpansionKind
1594     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1595
1596     LoadInst *
1597     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1598
1599     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1600     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1601
1602     bool needsCmpXchgNb(Type *MemType) const;
1603
1604     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1605                                 MachineBasicBlock *DispatchBB, int FI) const;
1606
1607     // Utility function to emit the low-level va_arg code for X86-64.
1608     MachineBasicBlock *
1609     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1610
1611     /// Utility function to emit the xmm reg save portion of va_start.
1612     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1613                                                  MachineInstr &MI2,
1614                                                  MachineBasicBlock *BB) const;
1615
1616     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1617                                          MachineBasicBlock *BB) const;
1618
1619     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1620                                            MachineBasicBlock *BB) const;
1621
1622     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1623                                             MachineBasicBlock *BB) const;
1624
1625     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1626                                                MachineBasicBlock *BB) const;
1627
1628     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1629                                           MachineBasicBlock *BB) const;
1630
1631     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1632                                           MachineBasicBlock *BB) const;
1633
1634     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1635                                                 MachineBasicBlock *BB) const;
1636
1637     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1638                                         MachineBasicBlock *MBB) const;
1639
1640     void emitSetJmpShadowStackFix(MachineInstr &MI,
1641                                   MachineBasicBlock *MBB) const;
1642
1643     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1644                                          MachineBasicBlock *MBB) const;
1645
1646     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1647                                                  MachineBasicBlock *MBB) const;
1648
1649     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1650                                              MachineBasicBlock *MBB) const;
1651
1652     /// Emit flags for the given setcc condition and operands. Also returns the
1653     /// corresponding X86 condition code constant in X86CC.
1654     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1655                               const SDLoc &dl, SelectionDAG &DAG,
1656                               SDValue &X86CC) const;
1657
1658     /// Check if replacement of SQRT with RSQRT should be disabled.
1659     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1660
1661     /// Use rsqrt* to speed up sqrt calculations.
1662     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1663                             int &RefinementSteps, bool &UseOneConstNR,
1664                             bool Reciprocal) const override;
1665
1666     /// Use rcp* to speed up fdiv calculations.
1667     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1668                              int &RefinementSteps) const override;
1669
1670     /// Reassociate floating point divisions into multiply by reciprocal.
1671     unsigned combineRepeatedFPDivisors() const override;
1672
1673     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1674                           SmallVectorImpl<SDNode *> &Created) const override;
1675   };
1676
1677   namespace X86 {
1678     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1679                              const TargetLibraryInfo *libInfo);
1680   } // end namespace X86
1681
1682   // X86 specific Gather/Scatter nodes.
1683   // The class has the same order of operands as MaskedGatherScatterSDNode for
1684   // convenience.
1685   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1686   public:
1687     // This is a intended as a utility and should never be directly created.
1688     X86MaskedGatherScatterSDNode() = delete;
1689     ~X86MaskedGatherScatterSDNode() = delete;
1690
1691     const SDValue &getBasePtr() const { return getOperand(3); }
1692     const SDValue &getIndex()   const { return getOperand(4); }
1693     const SDValue &getMask()    const { return getOperand(2); }
1694     const SDValue &getScale()   const { return getOperand(5); }
1695
1696     static bool classof(const SDNode *N) {
1697       return N->getOpcode() == X86ISD::MGATHER ||
1698              N->getOpcode() == X86ISD::MSCATTER;
1699     }
1700   };
1701
1702   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1703   public:
1704     const SDValue &getPassThru() const { return getOperand(1); }
1705
1706     static bool classof(const SDNode *N) {
1707       return N->getOpcode() == X86ISD::MGATHER;
1708     }
1709   };
1710
1711   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1712   public:
1713     const SDValue &getValue() const { return getOperand(1); }
1714
1715     static bool classof(const SDNode *N) {
1716       return N->getOpcode() == X86ISD::MSCATTER;
1717     }
1718   };
1719
1720   /// Generate unpacklo/unpackhi shuffle mask.
1721   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1722                                bool Unary);
1723
1724   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1725   /// imposed by AVX and specific to the unary pattern. Example:
1726   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1727   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1728   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1729
1730 } // end namespace llvm
1731
1732 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H