llvm/lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/MachineFunction.h"
  18 #include "llvm/CodeGen/TargetLowering.h"
  19
  20 namespace llvm {
  21   class X86Subtarget;
  22   class X86TargetMachine;
  23
  24   namespace X86ISD {
  25     // X86 Specific DAG Nodes
  26   enum NodeType : unsigned {
  27     // Start the numbering where the builtin ops leave off.
  28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
  29
  30     /// Bit scan forward.
  31     BSF,
  32     /// Bit scan reverse.
  33     BSR,
  34
  35     /// X86 funnel/double shift i16 instructions. These correspond to
  36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
  37     /// modulo rules to generic funnel shifts.
  38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
  39     FSHL,
  40     FSHR,
  41
  42     /// Bitwise logical AND of floating point values. This corresponds
  43     /// to X86::ANDPS or X86::ANDPD.
  44     FAND,
  45
  46     /// Bitwise logical OR of floating point values. This corresponds
  47     /// to X86::ORPS or X86::ORPD.
  48     FOR,
  49
  50     /// Bitwise logical XOR of floating point values. This corresponds
  51     /// to X86::XORPS or X86::XORPD.
  52     FXOR,
  53
  54     ///  Bitwise logical ANDNOT of floating point values. This
  55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
  56     FANDN,
  57
  58     /// These operations represent an abstract X86 call
  59     /// instruction, which includes a bunch of information.  In particular the
  60     /// operands of these node are:
  61     ///
  62     ///     #0 - The incoming token chain
  63     ///     #1 - The callee
  64     ///     #2 - The number of arg bytes the caller pushes on the stack.
  65     ///     #3 - The number of arg bytes the callee pops off the stack.
  66     ///     #4 - The value to pass in AL/AX/EAX (optional)
  67     ///     #5 - The value to pass in DL/DX/EDX (optional)
  68     ///
  69     /// The result values of these nodes are:
  70     ///
  71     ///     #0 - The outgoing token chain
  72     ///     #1 - The first register result value (optional)
  73     ///     #2 - The second register result value (optional)
  74     ///
  75     CALL,
  76
  77     /// Same as call except it adds the NoTrack prefix.
  78     NT_CALL,
  79
  80     // Pseudo for a OBJC call that gets emitted together with a special
  81     // marker instruction.
  82     CALL_RVMARKER,
  83
  84     /// X86 compare and logical compare instructions.
  85     CMP,
  86     FCMP,
  87     COMI,
  88     UCOMI,
  89
  90     // X86 compare with Intrinsics similar to COMI.
  91     COMX,
  92     UCOMX,
  93
  94     /// X86 bit-test instructions.
  95     BT,
  96
  97     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  98     /// operand, usually produced by a CMP instruction.
  99     SETCC,
 100
 101     /// X86 Select
 102     SELECTS,
 103
 104     // Same as SETCC except it's materialized with a sbb and the value is all
 105     // one's or all zero's.
 106     SETCC_CARRY, // R = carry_bit ? ~0 : 0
 107
 108     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 109     /// Operands are two FP values to compare; result is a mask of
 110     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 111     FSETCC,
 112
 113     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 114     /// and a version with SAE.
 115     FSETCCM,
 116     FSETCCM_SAE,
 117
 118     /// X86 conditional moves. Operand 0 and operand 1 are the two values
 119     /// to select from. Operand 2 is the condition code, and operand 3 is the
 120     /// flag operand produced by a CMP or TEST instruction.
 121     CMOV,
 122
 123     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 124     /// is the block to branch if condition is true, operand 2 is the
 125     /// condition code, and operand 3 is the flag operand produced by a CMP
 126     /// or TEST instruction.
 127     BRCOND,
 128
 129     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 130     /// operand 1 is the target address.
 131     NT_BRIND,
 132
 133     /// Return with a glue operand. Operand 0 is the chain operand, operand
 134     /// 1 is the number of bytes of stack to pop.
 135     RET_GLUE,
 136
 137     /// Return from interrupt. Operand 0 is the number of bytes to pop.
 138     IRET,
 139
 140     /// Repeat fill, corresponds to X86::REP_STOSx.
 141     REP_STOS,
 142
 143     /// Repeat move, corresponds to X86::REP_MOVSx.
 144     REP_MOVS,
 145
 146     /// On Darwin, this node represents the result of the popl
 147     /// at function entry, used for PIC code.
 148     GlobalBaseReg,
 149
 150     /// A wrapper node for TargetConstantPool, TargetJumpTable,
 151     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 152     /// MCSymbol and TargetBlockAddress.
 153     Wrapper,
 154
 155     /// Special wrapper used under X86-64 PIC mode for RIP
 156     /// relative displacements.
 157     WrapperRIP,
 158
 159     /// Copies a 64-bit value from an MMX vector to the low word
 160     /// of an XMM vector, with the high word zero filled.
 161     MOVQ2DQ,
 162
 163     /// Copies a 64-bit value from the low word of an XMM vector
 164     /// to an MMX vector.
 165     MOVDQ2Q,
 166
 167     /// Copies a 32-bit value from the low word of a MMX
 168     /// vector to a GPR.
 169     MMX_MOVD2W,
 170
 171     /// Copies a GPR into the low 32-bit word of a MMX vector
 172     /// and zero out the high word.
 173     MMX_MOVW2D,
 174
 175     /// Extract an 8-bit value from a vector and zero extend it to
 176     /// i32, corresponds to X86::PEXTRB.
 177     PEXTRB,
 178
 179     /// Extract a 16-bit value from a vector and zero extend it to
 180     /// i32, corresponds to X86::PEXTRW.
 181     PEXTRW,
 182
 183     /// Insert any element of a 4 x float vector into any element
 184     /// of a destination 4 x floatvector.
 185     INSERTPS,
 186
 187     /// Insert the lower 8-bits of a 32-bit value to a vector,
 188     /// corresponds to X86::PINSRB.
 189     PINSRB,
 190
 191     /// Insert the lower 16-bits of a 32-bit value to a vector,
 192     /// corresponds to X86::PINSRW.
 193     PINSRW,
 194
 195     /// Shuffle 16 8-bit values within a vector.
 196     PSHUFB,
 197
 198     /// Compute Sum of Absolute Differences.
 199     PSADBW,
 200     /// Compute Double Block Packed Sum-Absolute-Differences
 201     DBPSADBW,
 202
 203     /// Bitwise Logical AND NOT of Packed FP values.
 204     ANDNP,
 205
 206     /// Blend where the selector is an immediate.
 207     BLENDI,
 208
 209     /// Dynamic (non-constant condition) vector blend where only the sign bits
 210     /// of the condition elements are used. This is used to enforce that the
 211     /// condition mask is not valid for generic VSELECT optimizations. This
 212     /// is also used to implement the intrinsics.
 213     /// Operands are in VSELECT order: MASK, TRUE, FALSE
 214     BLENDV,
 215
 216     /// Combined add and sub on an FP vector.
 217     ADDSUB,
 218
 219     //  FP vector ops with rounding mode.
 220     FADD_RND,
 221     FADDS,
 222     FADDS_RND,
 223     FSUB_RND,
 224     FSUBS,
 225     FSUBS_RND,
 226     FMUL_RND,
 227     FMULS,
 228     FMULS_RND,
 229     FDIV_RND,
 230     FDIVS,
 231     FDIVS_RND,
 232     FMAX_SAE,
 233     FMAXS_SAE,
 234     FMIN_SAE,
 235     FMINS_SAE,
 236     FSQRT_RND,
 237     FSQRTS,
 238     FSQRTS_RND,
 239
 240     // FP vector get exponent.
 241     FGETEXP,
 242     FGETEXP_SAE,
 243     FGETEXPS,
 244     FGETEXPS_SAE,
 245     // Extract Normalized Mantissas.
 246     VGETMANT,
 247     VGETMANT_SAE,
 248     VGETMANTS,
 249     VGETMANTS_SAE,
 250     // FP Scale.
 251     SCALEF,
 252     SCALEF_RND,
 253     SCALEFS,
 254     SCALEFS_RND,
 255
 256     /// Integer horizontal add/sub.
 257     HADD,
 258     HSUB,
 259
 260     /// Floating point horizontal add/sub.
 261     FHADD,
 262     FHSUB,
 263
 264     // Detect Conflicts Within a Vector
 265     CONFLICT,
 266
 267     /// Floating point max and min.
 268     FMAX,
 269     FMIN,
 270
 271     /// Commutative FMIN and FMAX.
 272     FMAXC,
 273     FMINC,
 274
 275     /// Scalar intrinsic floating point max and min.
 276     FMAXS,
 277     FMINS,
 278
 279     /// Floating point reciprocal-sqrt and reciprocal approximation.
 280     /// Note that these typically require refinement
 281     /// in order to obtain suitable precision.
 282     FRSQRT,
 283     FRCP,
 284
 285     // AVX-512 reciprocal approximations with a little more precision.
 286     RSQRT14,
 287     RSQRT14S,
 288     RCP14,
 289     RCP14S,
 290
 291     // Thread Local Storage.
 292     TLSADDR,
 293
 294     // Thread Local Storage. A call to get the start address
 295     // of the TLS block for the current module.
 296     TLSBASEADDR,
 297
 298     // Thread Local Storage.  When calling to an OS provided
 299     // thunk at the address from an earlier relocation.
 300     TLSCALL,
 301
 302     // Thread Local Storage. A descriptor containing pointer to
 303     // code and to argument to get the TLS offset for the symbol.
 304     TLSDESC,
 305
 306     // Exception Handling helpers.
 307     EH_RETURN,
 308
 309     // SjLj exception handling setjmp.
 310     EH_SJLJ_SETJMP,
 311
 312     // SjLj exception handling longjmp.
 313     EH_SJLJ_LONGJMP,
 314
 315     // SjLj exception handling dispatch.
 316     EH_SJLJ_SETUP_DISPATCH,
 317
 318     /// Tail call return. See X86TargetLowering::LowerCall for
 319     /// the list of operands.
 320     TC_RETURN,
 321
 322     // Vector move to low scalar and zero higher vector elements.
 323     VZEXT_MOVL,
 324
 325     // Vector integer truncate.
 326     VTRUNC,
 327     // Vector integer truncate with unsigned/signed saturation.
 328     VTRUNCUS,
 329     VTRUNCS,
 330
 331     // Masked version of the above. Used when less than a 128-bit result is
 332     // produced since the mask only applies to the lower elements and can't
 333     // be represented by a select.
 334     // SRC, PASSTHRU, MASK
 335     VMTRUNC,
 336     VMTRUNCUS,
 337     VMTRUNCS,
 338
 339     // Vector FP extend.
 340     VFPEXT,
 341     VFPEXT_SAE,
 342     VFPEXTS,
 343     VFPEXTS_SAE,
 344
 345     // Vector FP round.
 346     VFPROUND,
 347     // Convert TWO packed single data to one packed data
 348     VFPROUND2,
 349     VFPROUND2_RND,
 350     VFPROUND_RND,
 351     VFPROUNDS,
 352     VFPROUNDS_RND,
 353
 354     // Masked version of above. Used for v2f64->v4f32.
 355     // SRC, PASSTHRU, MASK
 356     VMFPROUND,
 357
 358     // 128-bit vector logical left / right shift
 359     VSHLDQ,
 360     VSRLDQ,
 361
 362     // Vector shift elements
 363     VSHL,
 364     VSRL,
 365     VSRA,
 366
 367     // Vector variable shift
 368     VSHLV,
 369     VSRLV,
 370     VSRAV,
 371
 372     // Vector shift elements by immediate
 373     VSHLI,
 374     VSRLI,
 375     VSRAI,
 376
 377     // Shifts of mask registers.
 378     KSHIFTL,
 379     KSHIFTR,
 380
 381     // Bit rotate by immediate
 382     VROTLI,
 383     VROTRI,
 384
 385     // Vector packed double/float comparison.
 386     CMPP,
 387
 388     // Vector integer comparisons.
 389     PCMPEQ,
 390     PCMPGT,
 391
 392     // v8i16 Horizontal minimum and position.
 393     PHMINPOS,
 394
 395     MULTISHIFT,
 396
 397     /// Vector comparison generating mask bits for fp and
 398     /// integer signed and unsigned data types.
 399     CMPM,
 400     // Vector mask comparison generating mask bits for FP values.
 401     CMPMM,
 402     // Vector mask comparison with SAE for FP values.
 403     CMPMM_SAE,
 404
 405     // Arithmetic operations with FLAGS results.
 406     ADD,
 407     SUB,
 408     ADC,
 409     SBB,
 410     SMUL,
 411     UMUL,
 412     OR,
 413     XOR,
 414     AND,
 415
 416     // Bit field extract.
 417     BEXTR,
 418     BEXTRI,
 419
 420     // Zero High Bits Starting with Specified Bit Position.
 421     BZHI,
 422
 423     // Parallel extract and deposit.
 424     PDEP,
 425     PEXT,
 426
 427     // X86-specific multiply by immediate.
 428     MUL_IMM,
 429
 430     // Vector sign bit extraction.
 431     MOVMSK,
 432
 433     // Vector bitwise comparisons.
 434     PTEST,
 435
 436     // Vector packed fp sign bitwise comparisons.
 437     TESTP,
 438
 439     // OR/AND test for masks.
 440     KORTEST,
 441     KTEST,
 442
 443     // ADD for masks.
 444     KADD,
 445
 446     // Several flavors of instructions with vector shuffle behaviors.
 447     // Saturated signed/unnsigned packing.
 448     PACKSS,
 449     PACKUS,
 450     // Intra-lane alignr.
 451     PALIGNR,
 452     // AVX512 inter-lane alignr.
 453     VALIGN,
 454     PSHUFD,
 455     PSHUFHW,
 456     PSHUFLW,
 457     SHUFP,
 458     // VBMI2 Concat & Shift.
 459     VSHLD,
 460     VSHRD,
 461     VSHLDV,
 462     VSHRDV,
 463     // Shuffle Packed Values at 128-bit granularity.
 464     SHUF128,
 465     MOVDDUP,
 466     MOVSHDUP,
 467     MOVSLDUP,
 468     MOVLHPS,
 469     MOVHLPS,
 470     MOVSD,
 471     MOVSS,
 472     MOVSH,
 473     UNPCKL,
 474     UNPCKH,
 475     VPERMILPV,
 476     VPERMILPI,
 477     VPERMI,
 478     VPERM2X128,
 479
 480     // Variable Permute (VPERM).
 481     // Res = VPERMV MaskV, V0
 482     VPERMV,
 483
 484     // 3-op Variable Permute (VPERMT2).
 485     // Res = VPERMV3 V0, MaskV, V1
 486     VPERMV3,
 487
 488     // Bitwise ternary logic.
 489     VPTERNLOG,
 490     // Fix Up Special Packed Float32/64 values.
 491     VFIXUPIMM,
 492     VFIXUPIMM_SAE,
 493     VFIXUPIMMS,
 494     VFIXUPIMMS_SAE,
 495     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 496     VRANGE,
 497     VRANGE_SAE,
 498     VRANGES,
 499     VRANGES_SAE,
 500     // Reduce - Perform Reduction Transformation on scalar\packed FP.
 501     VREDUCE,
 502     VREDUCE_SAE,
 503     VREDUCES,
 504     VREDUCES_SAE,
 505     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 506     // Also used by the legacy (V)ROUND intrinsics where we mask out the
 507     // scaling part of the immediate.
 508     VRNDSCALE,
 509     VRNDSCALE_SAE,
 510     VRNDSCALES,
 511     VRNDSCALES_SAE,
 512     // Tests Types Of a FP Values for packed types.
 513     VFPCLASS,
 514     // Tests Types Of a FP Values for scalar types.
 515     VFPCLASSS,
 516
 517     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
 518     // a vector, this node may change the vector length as part of the splat.
 519     VBROADCAST,
 520     // Broadcast mask to vector.
 521     VBROADCASTM,
 522
 523     /// SSE4A Extraction and Insertion.
 524     EXTRQI,
 525     INSERTQI,
 526
 527     // XOP arithmetic/logical shifts.
 528     VPSHA,
 529     VPSHL,
 530     // XOP signed/unsigned integer comparisons.
 531     VPCOM,
 532     VPCOMU,
 533     // XOP packed permute bytes.
 534     VPPERM,
 535     // XOP two source permutation.
 536     VPERMIL2,
 537
 538     // Vector multiply packed unsigned doubleword integers.
 539     PMULUDQ,
 540     // Vector multiply packed signed doubleword integers.
 541     PMULDQ,
 542     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 543     MULHRS,
 544
 545     // Multiply and Add Packed Integers.
 546     VPMADDUBSW,
 547     VPMADDWD,
 548
 549     // AVX512IFMA multiply and add.
 550     // NOTE: These are different than the instruction and perform
 551     // op0 x op1 + op2.
 552     VPMADD52L,
 553     VPMADD52H,
 554
 555     // VNNI
 556     VPDPBUSD,
 557     VPDPBUSDS,
 558     VPDPWSSD,
 559     VPDPWSSDS,
 560
 561     // FMA nodes.
 562     // We use the target independent ISD::FMA for the non-inverted case.
 563     FNMADD,
 564     FMSUB,
 565     FNMSUB,
 566     FMADDSUB,
 567     FMSUBADD,
 568
 569     // FMA with rounding mode.
 570     FMADD_RND,
 571     FNMADD_RND,
 572     FMSUB_RND,
 573     FNMSUB_RND,
 574     FMADDSUB_RND,
 575     FMSUBADD_RND,
 576
 577     // AVX512-FP16 complex addition and multiplication.
 578     VFMADDC,
 579     VFMADDC_RND,
 580     VFCMADDC,
 581     VFCMADDC_RND,
 582
 583     VFMULC,
 584     VFMULC_RND,
 585     VFCMULC,
 586     VFCMULC_RND,
 587
 588     VFMADDCSH,
 589     VFMADDCSH_RND,
 590     VFCMADDCSH,
 591     VFCMADDCSH_RND,
 592
 593     VFMULCSH,
 594     VFMULCSH_RND,
 595     VFCMULCSH,
 596     VFCMULCSH_RND,
 597
 598     VPDPBSUD,
 599     VPDPBSUDS,
 600     VPDPBUUD,
 601     VPDPBUUDS,
 602     VPDPBSSD,
 603     VPDPBSSDS,
 604
 605     VPDPWSUD,
 606     VPDPWSUDS,
 607     VPDPWUSD,
 608     VPDPWUSDS,
 609     VPDPWUUD,
 610     VPDPWUUDS,
 611
 612     VMINMAX,
 613     VMINMAX_SAE,
 614     VMINMAXS,
 615     VMINMAXS_SAE,
 616
 617     CVTP2IBS,
 618     CVTP2IUBS,
 619     CVTP2IBS_RND,
 620     CVTP2IUBS_RND,
 621     CVTTP2IBS,
 622     CVTTP2IUBS,
 623     CVTTP2IBS_SAE,
 624     CVTTP2IUBS_SAE,
 625
 626     MPSADBW,
 627
 628     VCVTNE2PH2BF8,
 629     VCVTNE2PH2BF8S,
 630     VCVTNE2PH2HF8,
 631     VCVTNE2PH2HF8S,
 632     VCVTBIASPH2BF8,
 633     VCVTBIASPH2BF8S,
 634     VCVTBIASPH2HF8,
 635     VCVTBIASPH2HF8S,
 636     VCVTNEPH2BF8,
 637     VCVTNEPH2BF8S,
 638     VCVTNEPH2HF8,
 639     VCVTNEPH2HF8S,
 640     VMCVTBIASPH2BF8,
 641     VMCVTBIASPH2BF8S,
 642     VMCVTBIASPH2HF8,
 643     VMCVTBIASPH2HF8S,
 644     VMCVTNEPH2BF8,
 645     VMCVTNEPH2BF8S,
 646     VMCVTNEPH2HF8,
 647     VMCVTNEPH2HF8S,
 648     VCVTHF82PH,
 649
 650     // Compress and expand.
 651     COMPRESS,
 652     EXPAND,
 653
 654     // Bits shuffle
 655     VPSHUFBITQMB,
 656
 657     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 658     SINT_TO_FP_RND,
 659     UINT_TO_FP_RND,
 660     SCALAR_SINT_TO_FP,
 661     SCALAR_UINT_TO_FP,
 662     SCALAR_SINT_TO_FP_RND,
 663     SCALAR_UINT_TO_FP_RND,
 664
 665     // Vector float/double to signed/unsigned integer.
 666     CVTP2SI,
 667     CVTP2UI,
 668     CVTP2SI_RND,
 669     CVTP2UI_RND,
 670     // Scalar float/double to signed/unsigned integer.
 671     CVTS2SI,
 672     CVTS2UI,
 673     CVTS2SI_RND,
 674     CVTS2UI_RND,
 675
 676     // Vector float/double to signed/unsigned integer with truncation.
 677     CVTTP2SI,
 678     CVTTP2UI,
 679     CVTTP2SI_SAE,
 680     CVTTP2UI_SAE,
 681
 682     // Saturation enabled Vector float/double to signed/unsigned
 683     // integer with truncation.
 684     CVTTP2SIS,
 685     CVTTP2UIS,
 686     CVTTP2SIS_SAE,
 687     CVTTP2UIS_SAE,
 688     // Masked versions of above. Used for v2f64 to v4i32.
 689     // SRC, PASSTHRU, MASK
 690     MCVTTP2SIS,
 691     MCVTTP2UIS,
 692
 693     // Scalar float/double to signed/unsigned integer with truncation.
 694     CVTTS2SI,
 695     CVTTS2UI,
 696     CVTTS2SI_SAE,
 697     CVTTS2UI_SAE,
 698
 699     // Vector signed/unsigned integer to float/double.
 700     CVTSI2P,
 701     CVTUI2P,
 702
 703     // Scalar float/double to signed/unsigned integer with saturation.
 704     CVTTS2SIS,
 705     CVTTS2UIS,
 706     CVTTS2SIS_SAE,
 707     CVTTS2UIS_SAE,
 708
 709     // Masked versions of above. Used for v2f64->v4f32.
 710     // SRC, PASSTHRU, MASK
 711     MCVTP2SI,
 712     MCVTP2UI,
 713     MCVTTP2SI,
 714     MCVTTP2UI,
 715     MCVTSI2P,
 716     MCVTUI2P,
 717
 718     // Vector float to bfloat16.
 719     // Convert packed single data to packed BF16 data
 720     CVTNEPS2BF16,
 721     // Masked version of above.
 722     // SRC, PASSTHRU, MASK
 723     MCVTNEPS2BF16,
 724
 725     // Dot product of BF16/FP16 pairs to accumulated into
 726     // packed single precision.
 727     DPBF16PS,
 728     DPFP16PS,
 729
 730     // A stack checking function call. On Windows it's _chkstk call.
 731     DYN_ALLOCA,
 732
 733     // For allocating variable amounts of stack space when using
 734     // segmented stacks. Check if the current stacklet has enough space, and
 735     // falls back to heap allocation if not.
 736     SEG_ALLOCA,
 737
 738     // For allocating stack space when using stack clash protector.
 739     // Allocation is performed by block, and each block is probed.
 740     PROBED_ALLOCA,
 741
 742     // Memory barriers.
 743     MFENCE,
 744
 745     // Get a random integer and indicate whether it is valid in CF.
 746     RDRAND,
 747
 748     // Get a NIST SP800-90B & C compliant random integer and
 749     // indicate whether it is valid in CF.
 750     RDSEED,
 751
 752     // Protection keys
 753     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 754     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 755     // value for ECX.
 756     RDPKRU,
 757     WRPKRU,
 758
 759     // SSE42 string comparisons.
 760     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 761     // will emit one or two instructions based on which results are used. If
 762     // flags and index/mask this allows us to use a single instruction since
 763     // we won't have to pick and opcode for flags. Instead we can rely on the
 764     // DAG to CSE everything and decide at isel.
 765     PCMPISTR,
 766     PCMPESTR,
 767
 768     // Test if in transactional execution.
 769     XTEST,
 770
 771     // Conversions between float and half-float.
 772     CVTPS2PH,
 773     CVTPS2PH_SAE,
 774     CVTPH2PS,
 775     CVTPH2PS_SAE,
 776
 777     // Masked version of above.
 778     // SRC, RND, PASSTHRU, MASK
 779     MCVTPS2PH,
 780     MCVTPS2PH_SAE,
 781
 782     // Galois Field Arithmetic Instructions
 783     GF2P8AFFINEINVQB,
 784     GF2P8AFFINEQB,
 785     GF2P8MULB,
 786
 787     // LWP insert record.
 788     LWPINS,
 789
 790     // User level wait
 791     UMWAIT,
 792     TPAUSE,
 793
 794     // Enqueue Stores Instructions
 795     ENQCMD,
 796     ENQCMDS,
 797
 798     // For avx512-vp2intersect
 799     VP2INTERSECT,
 800
 801     // User level interrupts - testui
 802     TESTUI,
 803
 804     // Perform an FP80 add after changing precision control in FPCW.
 805     FP80_ADD,
 806
 807     // Conditional compare instructions
 808     CCMP,
 809     CTEST,
 810
 811     /// X86 strict FP compare instructions.
 812     FIRST_STRICTFP_OPCODE,
 813     STRICT_FCMP = FIRST_STRICTFP_OPCODE,
 814     STRICT_FCMPS,
 815
 816     // Vector packed double/float comparison.
 817     STRICT_CMPP,
 818
 819     /// Vector comparison generating mask bits for fp and
 820     /// integer signed and unsigned data types.
 821     STRICT_CMPM,
 822
 823     // Vector float/double to signed/unsigned integer with truncation.
 824     STRICT_CVTTP2SI,
 825     STRICT_CVTTP2UI,
 826
 827     // Vector FP extend.
 828     STRICT_VFPEXT,
 829
 830     // Vector FP round.
 831     STRICT_VFPROUND,
 832
 833     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 834     // Also used by the legacy (V)ROUND intrinsics where we mask out the
 835     // scaling part of the immediate.
 836     STRICT_VRNDSCALE,
 837
 838     // Vector signed/unsigned integer to float/double.
 839     STRICT_CVTSI2P,
 840     STRICT_CVTUI2P,
 841
 842     // Strict FMA nodes.
 843     STRICT_FNMADD,
 844     STRICT_FMSUB,
 845     STRICT_FNMSUB,
 846
 847     // Conversions between float and half-float.
 848     STRICT_CVTPS2PH,
 849     STRICT_CVTPH2PS,
 850
 851     // Perform an FP80 add after changing precision control in FPCW.
 852     STRICT_FP80_ADD,
 853
 854     /// Floating point max and min.
 855     STRICT_FMAX,
 856     STRICT_FMIN,
 857     LAST_STRICTFP_OPCODE = STRICT_FMIN,
 858
 859     // Compare and swap.
 860     FIRST_MEMORY_OPCODE,
 861     LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
 862     LCMPXCHG8_DAG,
 863     LCMPXCHG16_DAG,
 864     LCMPXCHG16_SAVE_RBX_DAG,
 865
 866     /// LOCK-prefixed arithmetic read-modify-write instructions.
 867     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 868     LADD,
 869     LSUB,
 870     LOR,
 871     LXOR,
 872     LAND,
 873     LBTS,
 874     LBTC,
 875     LBTR,
 876     LBTS_RM,
 877     LBTC_RM,
 878     LBTR_RM,
 879
 880     /// RAO arithmetic instructions.
 881     /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
 882     AADD,
 883     AOR,
 884     AXOR,
 885     AAND,
 886
 887     // Load, scalar_to_vector, and zero extend.
 888     VZEXT_LOAD,
 889
 890     // extract_vector_elt, store.
 891     VEXTRACT_STORE,
 892
 893     // scalar broadcast from memory.
 894     VBROADCAST_LOAD,
 895
 896     // subvector broadcast from memory.
 897     SUBV_BROADCAST_LOAD,
 898
 899     // Store FP control word into i16 memory.
 900     FNSTCW16m,
 901
 902     // Load FP control word from i16 memory.
 903     FLDCW16m,
 904
 905     // Store x87 FPU environment into memory.
 906     FNSTENVm,
 907
 908     // Load x87 FPU environment from memory.
 909     FLDENVm,
 910
 911     // Custom handling for FP_TO_xINT_SAT
 912     FP_TO_SINT_SAT,
 913     FP_TO_UINT_SAT,
 914
 915     /// This instruction implements FP_TO_SINT with the
 916     /// integer destination in memory and a FP reg source.  This corresponds
 917     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 918     /// has two inputs (token chain and address) and two outputs (int value
 919     /// and token chain). Memory VT specifies the type to store to.
 920     FP_TO_INT_IN_MEM,
 921
 922     /// This instruction implements SINT_TO_FP with the
 923     /// integer source in memory and FP reg result.  This corresponds to the
 924     /// X86::FILD*m instructions. It has two inputs (token chain and address)
 925     /// and two outputs (FP value and token chain). The integer source type is
 926     /// specified by the memory VT.
 927     FILD,
 928
 929     /// This instruction implements a fp->int store from FP stack
 930     /// slots. This corresponds to the fist instruction. It takes a
 931     /// chain operand, value to store, address, and glue. The memory VT
 932     /// specifies the type to store as.
 933     FIST,
 934
 935     /// This instruction implements an extending load to FP stack slots.
 936     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 937     /// operand, and ptr to load from. The memory VT specifies the type to
 938     /// load from.
 939     FLD,
 940
 941     /// This instruction implements a truncating store from FP stack
 942     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 943     /// chain operand, value to store, address, and glue. The memory VT
 944     /// specifies the type to store as.
 945     FST,
 946
 947     /// These instructions grab the address of the next argument
 948     /// from a va_list. (reads and modifies the va_list in memory)
 949     VAARG_64,
 950     VAARG_X32,
 951
 952     // Vector truncating store with unsigned/signed saturation
 953     VTRUNCSTOREUS,
 954     VTRUNCSTORES,
 955     // Vector truncating masked store with unsigned/signed saturation
 956     VMTRUNCSTOREUS,
 957     VMTRUNCSTORES,
 958
 959     // X86 specific gather and scatter
 960     MGATHER,
 961     MSCATTER,
 962
 963     // Key locker nodes that produce flags.
 964     AESENC128KL,
 965     AESDEC128KL,
 966     AESENC256KL,
 967     AESDEC256KL,
 968     AESENCWIDE128KL,
 969     AESDECWIDE128KL,
 970     AESENCWIDE256KL,
 971     AESDECWIDE256KL,
 972
 973     /// Compare and Add if Condition is Met. Compare value in operand 2 with
 974     /// value in memory of operand 1. If condition of operand 4 is met, add
 975     /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
 976     /// always updated with the original value from operand 1.
 977     CMPCCXADD,
 978
 979     // Save xmm argument registers to the stack, according to %al. An operator
 980     // is needed so that this can be expanded with control flow.
 981     VASTART_SAVE_XMM_REGS,
 982
 983     // Conditional load/store instructions
 984     CLOAD,
 985     CSTORE,
 986     LAST_MEMORY_OPCODE = CSTORE,
 987   };
 988   } // end namespace X86ISD
 989
 990   namespace X86 {
 991     /// Current rounding mode is represented in bits 11:10 of FPSR. These
 992     /// values are same as corresponding constants for rounding mode used
 993     /// in glibc.
 994     enum RoundingMode {
 995       rmToNearest   = 0,        // FE_TONEAREST
 996       rmDownward    = 1 << 10,  // FE_DOWNWARD
 997       rmUpward      = 2 << 10,  // FE_UPWARD
 998       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
 999       rmMask        = 3 << 10   // Bit mask selecting rounding mode
1000     };
1001   }
1002
1003   /// Define some predicates that are used for node matching.
1004   namespace X86 {
1005     /// Returns true if Elt is a constant zero or floating point constant +0.0.
1006     bool isZeroNode(SDValue Elt);
1007
1008     /// Returns true of the given offset can be
1009     /// fit into displacement field of the instruction.
1010     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
1011                                       bool hasSymbolicDisplacement);
1012
1013     /// Determines whether the callee is required to pop its
1014     /// own arguments. Callee pop is necessary to support tail calls.
1015     bool isCalleePop(CallingConv::ID CallingConv,
1016                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
1017
1018     /// If Op is a constant whose elements are all the same constant or
1019     /// undefined, return true and return the constant value in \p SplatVal.
1020     /// If we have undef bits that don't cover an entire element, we treat these
1021     /// as zero if AllowPartialUndefs is set, else we fail and return false.
1022     bool isConstantSplat(SDValue Op, APInt &SplatVal,
1023                          bool AllowPartialUndefs = true);
1024
1025     /// Check if Op is a load operation that could be folded into some other x86
1026     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
1027     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
1028                      bool AssumeSingleUse = false);
1029
1030     /// Check if Op is a load operation that could be folded into a vector splat
1031     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
1032     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
1033                                          const X86Subtarget &Subtarget,
1034                                          bool AssumeSingleUse = false);
1035
1036     /// Check if Op is a value that could be used to fold a store into some
1037     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
1038     bool mayFoldIntoStore(SDValue Op);
1039
1040     /// Check if Op is an operation that could be folded into a zero extend x86
1041     /// instruction.
1042     bool mayFoldIntoZeroExtend(SDValue Op);
1043
1044     /// True if the target supports the extended frame for async Swift
1045     /// functions.
1046     bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
1047                                             const MachineFunction &MF);
1048   } // end namespace X86
1049
1050   //===--------------------------------------------------------------------===//
1051   //  X86 Implementation of the TargetLowering interface
1052   class X86TargetLowering final : public TargetLowering {
1053   public:
1054     explicit X86TargetLowering(const X86TargetMachine &TM,
1055                                const X86Subtarget &STI);
1056
1057     unsigned getJumpTableEncoding() const override;
1058     bool useSoftFloat() const override;
1059
1060     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
1061                                ArgListTy &Args) const override;
1062
1063     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
1064       return MVT::i8;
1065     }
1066
1067     const MCExpr *
1068     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1069                               const MachineBasicBlock *MBB, unsigned uid,
1070                               MCContext &Ctx) const override;
1071
1072     /// Returns relocation base for the given PIC jumptable.
1073     SDValue getPICJumpTableRelocBase(SDValue Table,
1074                                      SelectionDAG &DAG) const override;
1075     const MCExpr *
1076     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1077                                  unsigned JTI, MCContext &Ctx) const override;
1078
1079     /// Return the desired alignment for ByVal aggregate
1080     /// function arguments in the caller parameter area. For X86, aggregates
1081     /// that contains are placed at 16-byte boundaries while the rest are at
1082     /// 4-byte boundaries.
1083     Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
1084
1085     EVT getOptimalMemOpType(const MemOp &Op,
1086                             const AttributeList &FuncAttributes) const override;
1087
1088     /// Returns true if it's safe to use load / store of the
1089     /// specified type to expand memcpy / memset inline. This is mostly true
1090     /// for all types except for some special cases. For example, on X86
1091     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1092     /// also does type conversion. Note the specified type doesn't have to be
1093     /// legal as the hook is used before type legalization.
1094     bool isSafeMemOpType(MVT VT) const override;
1095
1096     bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1097
1098     /// Returns true if the target allows unaligned memory accesses of the
1099     /// specified type. Returns whether it is "fast" in the last argument.
1100     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1101                                         MachineMemOperand::Flags Flags,
1102                                         unsigned *Fast) const override;
1103
1104     /// This function returns true if the memory access is aligned or if the
1105     /// target allows this specific unaligned memory access. If the access is
1106     /// allowed, the optional final parameter returns a relative speed of the
1107     /// access (as defined by the target).
1108     bool allowsMemoryAccess(
1109         LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1110         Align Alignment,
1111         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1112         unsigned *Fast = nullptr) const override;
1113
1114     bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1115                             const MachineMemOperand &MMO,
1116                             unsigned *Fast) const {
1117       return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1118                                 MMO.getAlign(), MMO.getFlags(), Fast);
1119     }
1120
1121     /// Provide custom lowering hooks for some operations.
1122     ///
1123     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1124
1125     /// Replace the results of node with an illegal result
1126     /// type with new values built out of custom code.
1127     ///
1128     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1129                             SelectionDAG &DAG) const override;
1130
1131     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1132
1133     bool preferABDSToABSWithNSW(EVT VT) const override;
1134
1135     bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1136                                    EVT ExtVT) const override;
1137
1138     bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1139                                            EVT VT) const override;
1140
1141     /// Return true if the target has native support for
1142     /// the specified value type and it is 'desirable' to use the type for the
1143     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1144     /// instruction encodings are longer and some i16 instructions are slow.
1145     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1146
1147     /// Return true if the target has native support for the
1148     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1149     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1150     /// and some i16 instructions are slow.
1151     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1152
1153     /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1154     /// integer, None otherwise.
1155     TargetLowering::AndOrSETCCFoldKind
1156     isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1157                                        const SDNode *SETCC0,
1158                                        const SDNode *SETCC1) const override;
1159
1160     /// Return the newly negated expression if the cost is not expensive and
1161     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1162     /// do the negation.
1163     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1164                                  bool LegalOperations, bool ForCodeSize,
1165                                  NegatibleCost &Cost,
1166                                  unsigned Depth) const override;
1167
1168     MachineBasicBlock *
1169     EmitInstrWithCustomInserter(MachineInstr &MI,
1170                                 MachineBasicBlock *MBB) const override;
1171
1172     /// This method returns the name of a target specific DAG node.
1173     const char *getTargetNodeName(unsigned Opcode) const override;
1174
1175     /// Do not merge vector stores after legalization because that may conflict
1176     /// with x86-specific store splitting optimizations.
1177     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1178       return !MemVT.isVector();
1179     }
1180
1181     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1182                           const MachineFunction &MF) const override;
1183
1184     bool isCheapToSpeculateCttz(Type *Ty) const override;
1185
1186     bool isCheapToSpeculateCtlz(Type *Ty) const override;
1187
1188     bool isCtlzFast() const override;
1189
1190     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1191       // If the pair to store is a mixture of float and int values, we will
1192       // save two bitwise instructions and one float-to-int instruction and
1193       // increase one store instruction. There is potentially a more
1194       // significant benefit because it avoids the float->int domain switch
1195       // for input value. So It is more likely a win.
1196       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1197           (LTy.isInteger() && HTy.isFloatingPoint()))
1198         return true;
1199       // If the pair only contains int values, we will save two bitwise
1200       // instructions and increase one store instruction (costing one more
1201       // store buffer). Since the benefit is more blurred so we leave
1202       // such pair out until we get testcase to prove it is a win.
1203       return false;
1204     }
1205
1206     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1207
1208     bool hasAndNotCompare(SDValue Y) const override;
1209
1210     bool hasAndNot(SDValue Y) const override;
1211
1212     bool hasBitTest(SDValue X, SDValue Y) const override;
1213
1214     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1215         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1216         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1217         SelectionDAG &DAG) const override;
1218
1219     unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1220         EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1221         const APInt &ShiftOrRotateAmt,
1222         const std::optional<APInt> &AndMask) const override;
1223
1224     bool preferScalarizeSplat(SDNode *N) const override;
1225
1226     CondMergingParams
1227     getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1228                                   const Value *Rhs) const override;
1229
1230     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1231                                            CombineLevel Level) const override;
1232
1233     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1234
1235     bool
1236     shouldTransformSignedTruncationCheck(EVT XVT,
1237                                          unsigned KeptBits) const override {
1238       // For vectors, we don't have a preference..
1239       if (XVT.isVector())
1240         return false;
1241
1242       auto VTIsOk = [](EVT VT) -> bool {
1243         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1244                VT == MVT::i64;
1245       };
1246
1247       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1248       // XVT will be larger than KeptBitsVT.
1249       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1250       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1251     }
1252
1253     ShiftLegalizationStrategy
1254     preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1255                                        unsigned ExpansionFactor) const override;
1256
1257     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1258
1259     bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1260       // Converting to sat variants holds little benefit on X86 as we will just
1261       // need to saturate the value back using fp arithmatic.
1262       return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1263     }
1264
1265     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1266       return VT.isScalarInteger();
1267     }
1268
1269     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1270     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1271
1272     /// Return the value type to use for ISD::SETCC.
1273     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1274                            EVT VT) const override;
1275
1276     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1277                                       const APInt &DemandedElts,
1278                                       TargetLoweringOpt &TLO) const override;
1279
1280     /// Determine which of the bits specified in Mask are known to be either
1281     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1282     void computeKnownBitsForTargetNode(const SDValue Op,
1283                                        KnownBits &Known,
1284                                        const APInt &DemandedElts,
1285                                        const SelectionDAG &DAG,
1286                                        unsigned Depth = 0) const override;
1287
1288     /// Determine the number of bits in the operation that are sign bits.
1289     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1290                                              const APInt &DemandedElts,
1291                                              const SelectionDAG &DAG,
1292                                              unsigned Depth) const override;
1293
1294     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1295                                                  const APInt &DemandedElts,
1296                                                  APInt &KnownUndef,
1297                                                  APInt &KnownZero,
1298                                                  TargetLoweringOpt &TLO,
1299                                                  unsigned Depth) const override;
1300
1301     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1302                                                     const APInt &DemandedElts,
1303                                                     unsigned MaskIndex,
1304                                                     TargetLoweringOpt &TLO,
1305                                                     unsigned Depth) const;
1306
1307     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1308                                            const APInt &DemandedBits,
1309                                            const APInt &DemandedElts,
1310                                            KnownBits &Known,
1311                                            TargetLoweringOpt &TLO,
1312                                            unsigned Depth) const override;
1313
1314     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1315         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1316         SelectionDAG &DAG, unsigned Depth) const override;
1317
1318     bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1319         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1320         bool PoisonOnly, unsigned Depth) const override;
1321
1322     bool canCreateUndefOrPoisonForTargetNode(
1323         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1324         bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1325
1326     bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1327                                    APInt &UndefElts, const SelectionDAG &DAG,
1328                                    unsigned Depth) const override;
1329
1330     bool isTargetCanonicalConstantNode(SDValue Op) const override {
1331       // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1332       // vector from memory.
1333       while (Op.getOpcode() == ISD::BITCAST ||
1334              Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1335              (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1336               Op.getOperand(0).isUndef()))
1337         Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1338
1339       return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1340              TargetLowering::isTargetCanonicalConstantNode(Op);
1341     }
1342
1343     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1344
1345     SDValue unwrapAddress(SDValue N) const override;
1346
1347     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1348
1349     bool ExpandInlineAsm(CallInst *CI) const override;
1350
1351     ConstraintType getConstraintType(StringRef Constraint) const override;
1352
1353     /// Examine constraint string and operand type and determine a weight value.
1354     /// The operand object must already have been set up with the operand type.
1355     ConstraintWeight
1356       getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1357                                      const char *Constraint) const override;
1358
1359     const char *LowerXConstraint(EVT ConstraintVT) const override;
1360
1361     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1362     /// add anything to Ops. If hasMemory is true it means one of the asm
1363     /// constraint of the inline asm instruction being processed is 'm'.
1364     void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1365                                       std::vector<SDValue> &Ops,
1366                                       SelectionDAG &DAG) const override;
1367
1368     InlineAsm::ConstraintCode
1369     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1370       if (ConstraintCode == "v")
1371         return InlineAsm::ConstraintCode::v;
1372       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1373     }
1374
1375     /// Handle Lowering flag assembly outputs.
1376     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1377                                         const SDLoc &DL,
1378                                         const AsmOperandInfo &Constraint,
1379                                         SelectionDAG &DAG) const override;
1380
1381     /// Given a physical register constraint
1382     /// (e.g. {edx}), return the register number and the register class for the
1383     /// register.  This should only be used for C_Register constraints.  On
1384     /// error, this returns a register number of 0.
1385     std::pair<unsigned, const TargetRegisterClass *>
1386     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1387                                  StringRef Constraint, MVT VT) const override;
1388
1389     /// Return true if the addressing mode represented
1390     /// by AM is legal for this target, for a load/store of the specified type.
1391     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1392                                Type *Ty, unsigned AS,
1393                                Instruction *I = nullptr) const override;
1394
1395     bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1396
1397     /// Return true if the specified immediate is legal
1398     /// icmp immediate, that is the target has icmp instructions which can
1399     /// compare a register against the immediate without having to materialize
1400     /// the immediate into a register.
1401     bool isLegalICmpImmediate(int64_t Imm) const override;
1402
1403     /// Return true if the specified immediate is legal
1404     /// add immediate, that is the target has add instructions which can
1405     /// add a register and the immediate without having to materialize
1406     /// the immediate into a register.
1407     bool isLegalAddImmediate(int64_t Imm) const override;
1408
1409     bool isLegalStoreImmediate(int64_t Imm) const override;
1410
1411     /// Add x86-specific opcodes to the default list.
1412     bool isBinOp(unsigned Opcode) const override;
1413
1414     /// Returns true if the opcode is a commutative binary operation.
1415     bool isCommutativeBinOp(unsigned Opcode) const override;
1416
1417     /// Return true if it's free to truncate a value of
1418     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1419     /// register EAX to i16 by referencing its sub-register AX.
1420     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1421     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1422
1423     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1424
1425     /// Return true if any actual instruction that defines a
1426     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1427     /// register. This does not necessarily include registers defined in
1428     /// unknown ways, such as incoming arguments, or copies from unknown
1429     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1430     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1431     /// all instructions that define 32-bit values implicit zero-extend the
1432     /// result out to 64 bits.
1433     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1434     bool isZExtFree(EVT VT1, EVT VT2) const override;
1435     bool isZExtFree(SDValue Val, EVT VT2) const override;
1436
1437     bool shouldConvertPhiType(Type *From, Type *To) const override;
1438
1439     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1440     /// extend node) is profitable.
1441     bool isVectorLoadExtDesirable(SDValue) const override;
1442
1443     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1444     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1445     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1446     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1447                                     EVT VT) const override;
1448
1449     /// Return true if it's profitable to narrow operations of type SrcVT to
1450     /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1451     /// from i32 to i16.
1452     bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override;
1453
1454     bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1455                                               EVT VT) const override;
1456
1457     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1458     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1459     /// true and stores the intrinsic information into the IntrinsicInfo that was
1460     /// passed to the function.
1461     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1462                             MachineFunction &MF,
1463                             unsigned Intrinsic) const override;
1464
1465     /// Returns true if the target can instruction select the
1466     /// specified FP immediate natively. If false, the legalizer will
1467     /// materialize the FP immediate as a load from a constant pool.
1468     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1469                       bool ForCodeSize) const override;
1470
1471     /// Targets can use this to indicate that they only support *some*
1472     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1473     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1474     /// be legal.
1475     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1476
1477     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1478     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1479     /// constant pool entry.
1480     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1481
1482     /// Returns true if lowering to a jump table is allowed.
1483     bool areJTsAllowed(const Function *Fn) const override;
1484
1485     MVT getPreferredSwitchConditionType(LLVMContext &Context,
1486                                         EVT ConditionVT) const override;
1487
1488     /// If true, then instruction selection should
1489     /// seek to shrink the FP constant of the specified type to a smaller type
1490     /// in order to save space and / or reduce runtime.
1491     bool ShouldShrinkFPConstant(EVT VT) const override;
1492
1493     /// Return true if we believe it is correct and profitable to reduce the
1494     /// load node to a smaller type.
1495     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1496                                EVT NewVT) const override;
1497
1498     /// Return true if the specified scalar FP type is computed in an SSE
1499     /// register, not on the X87 floating point stack.
1500     bool isScalarFPTypeInSSEReg(EVT VT) const;
1501
1502     /// Returns true if it is beneficial to convert a load of a constant
1503     /// to just the constant itself.
1504     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1505                                            Type *Ty) const override;
1506
1507     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1508
1509     bool convertSelectOfConstantsToMath(EVT VT) const override;
1510
1511     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1512                                 SDValue C) const override;
1513
1514     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1515     /// with this index.
1516     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1517                                  unsigned Index) const override;
1518
1519     /// Scalar ops always have equal or better analysis/performance/power than
1520     /// the vector equivalent, so this always makes sense if the scalar op is
1521     /// supported.
1522     bool shouldScalarizeBinop(SDValue) const override;
1523
1524     /// Extract of a scalar FP value from index 0 of a vector is free.
1525     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1526       EVT EltVT = VT.getScalarType();
1527       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1528     }
1529
1530     /// Overflow nodes should get combined/lowered to optimal instructions
1531     /// (they should allow eliminating explicit compares by getting flags from
1532     /// math ops).
1533     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1534                               bool MathUsed) const override;
1535
1536     bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1537                                       unsigned AddrSpace) const override {
1538       // If we can replace more than 2 scalar stores, there will be a reduction
1539       // in instructions even after we add a vector constant load.
1540       return IsZero || NumElem > 2;
1541     }
1542
1543     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1544                                  const SelectionDAG &DAG,
1545                                  const MachineMemOperand &MMO) const override;
1546
1547     Register getRegisterByName(const char* RegName, LLT VT,
1548                                const MachineFunction &MF) const override;
1549
1550     /// If a physical register, this returns the register that receives the
1551     /// exception address on entry to an EH pad.
1552     Register
1553     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1554
1555     /// If a physical register, this returns the register that receives the
1556     /// exception typeid on entry to a landing pad.
1557     Register
1558     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1559
1560     bool needsFixedCatchObjects() const override;
1561
1562     /// This method returns a target specific FastISel object,
1563     /// or null if the target does not support "fast" ISel.
1564     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1565                              const TargetLibraryInfo *libInfo) const override;
1566
1567     /// If the target has a standard location for the stack protector cookie,
1568     /// returns the address of that location. Otherwise, returns nullptr.
1569     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1570
1571     bool useLoadStackGuardNode(const Module &M) const override;
1572     bool useStackGuardXorFP() const override;
1573     void insertSSPDeclarations(Module &M) const override;
1574     Value *getSDagStackGuard(const Module &M) const override;
1575     Function *getSSPStackGuardCheck(const Module &M) const override;
1576     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1577                                 const SDLoc &DL) const override;
1578
1579
1580     /// Return true if the target stores SafeStack pointer at a fixed offset in
1581     /// some non-standard address space, and populates the address space and
1582     /// offset as appropriate.
1583     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1584
1585     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1586                                           SDValue Chain, SDValue Pointer,
1587                                           MachinePointerInfo PtrInfo,
1588                                           Align Alignment,
1589                                           SelectionDAG &DAG) const;
1590
1591     /// Customize the preferred legalization strategy for certain types.
1592     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1593
1594     bool softPromoteHalfType() const override { return true; }
1595
1596     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1597                                       EVT VT) const override;
1598
1599     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1600                                            CallingConv::ID CC,
1601                                            EVT VT) const override;
1602
1603     unsigned getVectorTypeBreakdownForCallingConv(
1604         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1605         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1606
1607     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1608
1609     bool supportSwiftError() const override;
1610
1611     bool supportKCFIBundles() const override { return true; }
1612
1613     MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1614                                 MachineBasicBlock::instr_iterator &MBBI,
1615                                 const TargetInstrInfo *TII) const override;
1616
1617     bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1618     bool hasInlineStackProbe(const MachineFunction &MF) const override;
1619     StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1620
1621     unsigned getStackProbeSize(const MachineFunction &MF) const;
1622
1623     bool hasVectorBlend() const override { return true; }
1624
1625     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1626
1627     bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1628                                  unsigned OpNo) const override;
1629
1630     SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1631                             MachineMemOperand *MMO, SDValue &NewLoad,
1632                             SDValue Ptr, SDValue PassThru,
1633                             SDValue Mask) const override;
1634     SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1635                              MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1636                              SDValue Mask) const override;
1637
1638     /// Lower interleaved load(s) into target specific
1639     /// instructions/intrinsics.
1640     bool lowerInterleavedLoad(LoadInst *LI,
1641                               ArrayRef<ShuffleVectorInst *> Shuffles,
1642                               ArrayRef<unsigned> Indices,
1643                               unsigned Factor) const override;
1644
1645     /// Lower interleaved store(s) into target specific
1646     /// instructions/intrinsics.
1647     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1648                                unsigned Factor) const override;
1649
1650     SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1651                                    int JTI, SelectionDAG &DAG) const override;
1652
1653     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1654
1655     EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1656       if (VT == MVT::f80)
1657         return EVT::getIntegerVT(Context, 96);
1658       return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1659     }
1660
1661   protected:
1662     std::pair<const TargetRegisterClass *, uint8_t>
1663     findRepresentativeClass(const TargetRegisterInfo *TRI,
1664                             MVT VT) const override;
1665
1666   private:
1667     /// Keep a reference to the X86Subtarget around so that we can
1668     /// make the right decision when generating code for different targets.
1669     const X86Subtarget &Subtarget;
1670
1671     /// A list of legal FP immediates.
1672     std::vector<APFloat> LegalFPImmediates;
1673
1674     /// Indicate that this x86 target can instruction
1675     /// select the specified FP immediate natively.
1676     void addLegalFPImmediate(const APFloat& Imm) {
1677       LegalFPImmediates.push_back(Imm);
1678     }
1679
1680     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1681                             CallingConv::ID CallConv, bool isVarArg,
1682                             const SmallVectorImpl<ISD::InputArg> &Ins,
1683                             const SDLoc &dl, SelectionDAG &DAG,
1684                             SmallVectorImpl<SDValue> &InVals,
1685                             uint32_t *RegMask) const;
1686     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1687                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1688                              const SDLoc &dl, SelectionDAG &DAG,
1689                              const CCValAssign &VA, MachineFrameInfo &MFI,
1690                              unsigned i) const;
1691     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1692                              const SDLoc &dl, SelectionDAG &DAG,
1693                              const CCValAssign &VA,
1694                              ISD::ArgFlagsTy Flags, bool isByval) const;
1695
1696     // Call lowering helpers.
1697
1698     /// Check whether the call is eligible for tail call optimization. Targets
1699     /// that want to do tail call optimization should implement this function.
1700     bool IsEligibleForTailCallOptimization(
1701         TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
1702         SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;
1703     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1704                                     SDValue Chain, bool IsTailCall,
1705                                     bool Is64Bit, int FPDiff,
1706                                     const SDLoc &dl) const;
1707
1708     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1709                                          SelectionDAG &DAG) const;
1710
1711     unsigned getAddressSpace() const;
1712
1713     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1714                             SDValue &Chain) const;
1715     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1716
1717     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1718     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1719     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1720     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1721
1722     unsigned getGlobalWrapperKind(const GlobalValue *GV,
1723                                   const unsigned char OpFlags) const;
1724     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1725     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1726     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1727     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1728     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1729
1730     /// Creates target global address or external symbol nodes for calls or
1731     /// other uses.
1732     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1733                                   bool ForCall) const;
1734
1735     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1736     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1737     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1738     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1739     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1740     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1741     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1742     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1743     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1744     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1745     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1746     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1747     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1748     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1749     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1750     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1751     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1752     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1753     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1754     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1755     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1756     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1757     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1758     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1759     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1760     SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1761     SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1762     SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1763     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1764     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1765                                     SDValue &Chain) const;
1766     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1767     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1768     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1769     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1770     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1771     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1772     SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1773
1774     SDValue
1775     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1776                          const SmallVectorImpl<ISD::InputArg> &Ins,
1777                          const SDLoc &dl, SelectionDAG &DAG,
1778                          SmallVectorImpl<SDValue> &InVals) const override;
1779     SDValue LowerCall(CallLoweringInfo &CLI,
1780                       SmallVectorImpl<SDValue> &InVals) const override;
1781
1782     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1783                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1784                         const SmallVectorImpl<SDValue> &OutVals,
1785                         const SDLoc &dl, SelectionDAG &DAG) const override;
1786
1787     bool supportSplitCSR(MachineFunction *MF) const override {
1788       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1789           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1790     }
1791     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1792     void insertCopiesSplitCSR(
1793       MachineBasicBlock *Entry,
1794       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1795
1796     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1797
1798     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1799
1800     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1801                             ISD::NodeType ExtendKind) const override;
1802
1803     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1804                         bool isVarArg,
1805                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1806                         LLVMContext &Context) const override;
1807
1808     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1809     ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1810
1811     TargetLoweringBase::AtomicExpansionKind
1812     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1813     TargetLoweringBase::AtomicExpansionKind
1814     shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1815     TargetLoweringBase::AtomicExpansionKind
1816     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1817     TargetLoweringBase::AtomicExpansionKind
1818     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1819     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1820     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1821
1822     LoadInst *
1823     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1824
1825     bool needsCmpXchgNb(Type *MemType) const;
1826
1827     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1828                                 MachineBasicBlock *DispatchBB, int FI) const;
1829
1830     // Utility function to emit the low-level va_arg code for X86-64.
1831     MachineBasicBlock *
1832     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1833
1834     /// Utility function to emit the xmm reg save portion of va_start.
1835     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1836                                                  MachineInstr &MI2,
1837                                                  MachineBasicBlock *BB) const;
1838
1839     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1840                                          MachineBasicBlock *BB) const;
1841
1842     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1843                                            MachineBasicBlock *BB) const;
1844
1845     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1846                                             MachineBasicBlock *BB) const;
1847
1848     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1849                                                MachineBasicBlock *BB) const;
1850
1851     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1852                                           MachineBasicBlock *BB) const;
1853
1854     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1855                                                 MachineBasicBlock *BB) const;
1856
1857     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1858                                         MachineBasicBlock *MBB) const;
1859
1860     void emitSetJmpShadowStackFix(MachineInstr &MI,
1861                                   MachineBasicBlock *MBB) const;
1862
1863     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1864                                          MachineBasicBlock *MBB) const;
1865
1866     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1867                                                  MachineBasicBlock *MBB) const;
1868
1869     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1870                                              MachineBasicBlock *MBB) const;
1871
1872     MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1873                                               MachineBasicBlock *MBB) const;
1874
1875     /// Emit flags for the given setcc condition and operands. Also returns the
1876     /// corresponding X86 condition code constant in X86CC.
1877     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1878                               const SDLoc &dl, SelectionDAG &DAG,
1879                               SDValue &X86CC) const;
1880
1881     bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1882                                              SDValue IntPow2) const override;
1883
1884     /// Check if replacement of SQRT with RSQRT should be disabled.
1885     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1886
1887     /// Use rsqrt* to speed up sqrt calculations.
1888     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1889                             int &RefinementSteps, bool &UseOneConstNR,
1890                             bool Reciprocal) const override;
1891
1892     /// Use rcp* to speed up fdiv calculations.
1893     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1894                              int &RefinementSteps) const override;
1895
1896     /// Reassociate floating point divisions into multiply by reciprocal.
1897     unsigned combineRepeatedFPDivisors() const override;
1898
1899     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1900                           SmallVectorImpl<SDNode *> &Created) const override;
1901
1902     SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1903                     SDValue V2) const;
1904   };
1905
1906   namespace X86 {
1907     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1908                              const TargetLibraryInfo *libInfo);
1909   } // end namespace X86
1910
1911   // X86 specific Gather/Scatter nodes.
1912   // The class has the same order of operands as MaskedGatherScatterSDNode for
1913   // convenience.
1914   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1915   public:
1916     // This is a intended as a utility and should never be directly created.
1917     X86MaskedGatherScatterSDNode() = delete;
1918     ~X86MaskedGatherScatterSDNode() = delete;
1919
1920     const SDValue &getBasePtr() const { return getOperand(3); }
1921     const SDValue &getIndex()   const { return getOperand(4); }
1922     const SDValue &getMask()    const { return getOperand(2); }
1923     const SDValue &getScale()   const { return getOperand(5); }
1924
1925     static bool classof(const SDNode *N) {
1926       return N->getOpcode() == X86ISD::MGATHER ||
1927              N->getOpcode() == X86ISD::MSCATTER;
1928     }
1929   };
1930
1931   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1932   public:
1933     const SDValue &getPassThru() const { return getOperand(1); }
1934
1935     static bool classof(const SDNode *N) {
1936       return N->getOpcode() == X86ISD::MGATHER;
1937     }
1938   };
1939
1940   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1941   public:
1942     const SDValue &getValue() const { return getOperand(1); }
1943
1944     static bool classof(const SDNode *N) {
1945       return N->getOpcode() == X86ISD::MSCATTER;
1946     }
1947   };
1948
1949   /// Generate unpacklo/unpackhi shuffle mask.
1950   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1951                                bool Unary);
1952
1953   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1954   /// imposed by AVX and specific to the unary pattern. Example:
1955   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1956   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1957   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1958
1959 } // end namespace llvm
1960
1961 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H