llvm/lib/Target/RISCV/RISCVISelLowering.h

   1 //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
  15 #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
  16
  17 #include "RISCV.h"
  18 #include "llvm/CodeGen/CallingConvLower.h"
  19 #include "llvm/CodeGen/SelectionDAG.h"
  20 #include "llvm/CodeGen/TargetLowering.h"
  21 #include <optional>
  22
  23 namespace llvm {
  24 class InstructionCost;
  25 class RISCVSubtarget;
  26 struct RISCVRegisterInfo;
  27 class RVVArgDispatcher;
  28
  29 namespace RISCVISD {
  30 // clang-format off
  31 enum NodeType : unsigned {
  32   FIRST_NUMBER = ISD::BUILTIN_OP_END,
  33   RET_GLUE,
  34   SRET_GLUE,
  35   MRET_GLUE,
  36   CALL,
  37   /// Select with condition operator - This selects between a true value and
  38   /// a false value (ops #3 and #4) based on the boolean result of comparing
  39   /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
  40   /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum.
  41   /// The lhs and rhs are XLenVT integers. The true and false values can be
  42   /// integer or floating point.
  43   SELECT_CC,
  44   BR_CC,
  45   BuildPairF64,
  46   SplitF64,
  47   TAIL,
  48
  49   // Add the Lo 12 bits from an address. Selected to ADDI.
  50   ADD_LO,
  51   // Get the Hi 20 bits from an address. Selected to LUI.
  52   HI,
  53
  54   // Represents an AUIPC+ADDI pair. Selected to PseudoLLA.
  55   LLA,
  56
  57   // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
  58   ADD_TPREL,
  59
  60   // Multiply high for signedxunsigned.
  61   MULHSU,
  62
  63   // Represents (ADD (SHL a, b), c) with the arguments appearing in the order
  64   // a, b, c.  'b' must be a constant.  Maps to sh1add/sh2add/sh3add with zba
  65   // or addsl with XTheadBa.
  66   SHL_ADD,
  67
  68   // RV64I shifts, directly matching the semantics of the named RISC-V
  69   // instructions.
  70   SLLW,
  71   SRAW,
  72   SRLW,
  73   // 32-bit operations from RV64M that can't be simply matched with a pattern
  74   // at instruction selection time. These have undefined behavior for division
  75   // by 0 or overflow (divw) like their target independent counterparts.
  76   DIVW,
  77   DIVUW,
  78   REMUW,
  79   // RV64IB rotates, directly matching the semantics of the named RISC-V
  80   // instructions.
  81   ROLW,
  82   RORW,
  83   // RV64IZbb bit counting instructions directly matching the semantics of the
  84   // named RISC-V instructions.
  85   CLZW,
  86   CTZW,
  87
  88   // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel.
  89   ABSW,
  90
  91   // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as
  92   // XLEN is the only legal integer width.
  93   //
  94   // FMV_H_X matches the semantics of the FMV.H.X.
  95   // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result.
  96   // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result.
  97   // FMV_W_X_RV64 matches the semantics of the FMV.W.X.
  98   // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
  99   //
 100   // This is a more convenient semantic for producing dagcombines that remove
 101   // unnecessary GPR->FPR->GPR moves.
 102   FMV_H_X,
 103   FMV_X_ANYEXTH,
 104   FMV_X_SIGNEXTH,
 105   FMV_W_X_RV64,
 106   FMV_X_ANYEXTW_RV64,
 107   // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
 108   // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
 109   // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode
 110   // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
 111   FCVT_X,
 112   FCVT_XU,
 113   // FP to 32 bit int conversions for RV64. These are used to keep track of the
 114   // result being sign extended to 64 bit. These saturate out of range inputs.
 115   // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode
 116   // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
 117   FCVT_W_RV64,
 118   FCVT_WU_RV64,
 119
 120   FP_ROUND_BF16,
 121   FP_EXTEND_BF16,
 122
 123   // Rounds an FP value to its corresponding integer in the same FP format.
 124   // First operand is the value to round, the second operand is the largest
 125   // integer that can be represented exactly in the FP format. This will be
 126   // expanded into multiple instructions and basic blocks with a custom
 127   // inserter.
 128   FROUND,
 129
 130   FCLASS,
 131
 132   // Floating point fmax and fmin matching the RISC-V instruction semantics.
 133   FMAX, FMIN,
 134
 135   // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)).
 136   // It takes a chain operand and another two target constant operands (the
 137   // CSR numbers of the low and high parts of the counter).
 138   READ_COUNTER_WIDE,
 139
 140   // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or
 141   // XLenVT.
 142   BREV8,
 143   ORC_B,
 144   ZIP,
 145   UNZIP,
 146
 147   // Scalar cryptography
 148   CLMUL, CLMULH, CLMULR,
 149   SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1,
 150   SM4KS, SM4ED,
 151   SM3P0, SM3P1,
 152
 153   // May-Be-Operations
 154   MOPR, MOPRR,
 155
 156   // Vector Extension
 157   FIRST_VL_VECTOR_OP,
 158   // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
 159   // for the VL value to be used for the operation. The first operand is
 160   // passthru operand.
 161   VMV_V_V_VL = FIRST_VL_VECTOR_OP,
 162   // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
 163   // for the VL value to be used for the operation. The first operand is
 164   // passthru operand.
 165   VMV_V_X_VL,
 166   // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand
 167   // for the VL value to be used for the operation. The first operand is
 168   // passthru operand.
 169   VFMV_V_F_VL,
 170   // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign
 171   // extended from the vector element size.
 172   VMV_X_S,
 173   // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand.
 174   VMV_S_X_VL,
 175   // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand.
 176   VFMV_S_F_VL,
 177   // Splats an 64-bit value that has been split into two i32 parts. This is
 178   // expanded late to two scalar stores and a stride 0 vector load.
 179   // The first operand is passthru operand.
 180   SPLAT_VECTOR_SPLIT_I64_VL,
 181   // Truncates a RVV integer vector by one power-of-two. Carries both an extra
 182   // mask and VL operand.
 183   TRUNCATE_VECTOR_VL,
 184   // Matches the semantics of vslideup/vslidedown. The first operand is the
 185   // pass-thru operand, the second is the source vector, the third is the XLenVT
 186   // index (either constant or non-constant), the fourth is the mask, the fifth
 187   // is the VL and the sixth is the policy.
 188   VSLIDEUP_VL,
 189   VSLIDEDOWN_VL,
 190   // Matches the semantics of vslide1up/slide1down. The first operand is
 191   // passthru operand, the second is source vector, third is the XLenVT scalar
 192   // value. The fourth and fifth operands are the mask and VL operands.
 193   VSLIDE1UP_VL,
 194   VSLIDE1DOWN_VL,
 195   // Matches the semantics of vfslide1up/vfslide1down. The first operand is
 196   // passthru operand, the second is source vector, third is a scalar value
 197   // whose type matches the element type of the vectors.  The fourth and fifth
 198   // operands are the mask and VL operands.
 199   VFSLIDE1UP_VL,
 200   VFSLIDE1DOWN_VL,
 201   // Matches the semantics of the vid.v instruction, with a mask and VL
 202   // operand.
 203   VID_VL,
 204   // Matches the semantics of the vfcnvt.rod function (Convert double-width
 205   // float to single-width float, rounding towards odd). Takes a double-width
 206   // float vector and produces a single-width float vector. Also has a mask and
 207   // VL operand.
 208   VFNCVT_ROD_VL,
 209   // These nodes match the semantics of the corresponding RVV vector reduction
 210   // instructions. They produce a vector result which is the reduction
 211   // performed over the second vector operand plus the first element of the
 212   // third vector operand. The first operand is the pass-thru operand. The
 213   // second operand is an unconstrained vector type, and the result, first, and
 214   // third operand's types are expected to be the corresponding full-width
 215   // LMUL=1 type for the second operand:
 216   //   nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8
 217   //   nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32
 218   // The different in types does introduce extra vsetvli instructions but
 219   // similarly it reduces the number of registers consumed per reduction.
 220   // Also has a mask and VL operand.
 221   VECREDUCE_ADD_VL,
 222   VECREDUCE_UMAX_VL,
 223   VECREDUCE_SMAX_VL,
 224   VECREDUCE_UMIN_VL,
 225   VECREDUCE_SMIN_VL,
 226   VECREDUCE_AND_VL,
 227   VECREDUCE_OR_VL,
 228   VECREDUCE_XOR_VL,
 229   VECREDUCE_FADD_VL,
 230   VECREDUCE_SEQ_FADD_VL,
 231   VECREDUCE_FMIN_VL,
 232   VECREDUCE_FMAX_VL,
 233
 234   // Vector binary ops with a merge as a third operand, a mask as a fourth
 235   // operand, and VL as a fifth operand.
 236   ADD_VL,
 237   AND_VL,
 238   MUL_VL,
 239   OR_VL,
 240   SDIV_VL,
 241   SHL_VL,
 242   SREM_VL,
 243   SRA_VL,
 244   SRL_VL,
 245   ROTL_VL,
 246   ROTR_VL,
 247   SUB_VL,
 248   UDIV_VL,
 249   UREM_VL,
 250   XOR_VL,
 251   SMIN_VL,
 252   SMAX_VL,
 253   UMIN_VL,
 254   UMAX_VL,
 255
 256   BITREVERSE_VL,
 257   BSWAP_VL,
 258   CTLZ_VL,
 259   CTTZ_VL,
 260   CTPOP_VL,
 261
 262   SADDSAT_VL,
 263   UADDSAT_VL,
 264   SSUBSAT_VL,
 265   USUBSAT_VL,
 266
 267   // Averaging adds of unsigned integers.
 268   AVGFLOORU_VL,
 269   // Rounding averaging adds of unsigned integers.
 270   AVGCEILU_VL,
 271
 272   MULHS_VL,
 273   MULHU_VL,
 274   FADD_VL,
 275   FSUB_VL,
 276   FMUL_VL,
 277   FDIV_VL,
 278   VFMIN_VL,
 279   VFMAX_VL,
 280
 281   // Vector unary ops with a mask as a second operand and VL as a third operand.
 282   FNEG_VL,
 283   FABS_VL,
 284   FSQRT_VL,
 285   FCLASS_VL,
 286   FCOPYSIGN_VL, // Has a merge operand
 287   VFCVT_RTZ_X_F_VL,
 288   VFCVT_RTZ_XU_F_VL,
 289   VFCVT_X_F_VL,
 290   VFCVT_XU_F_VL,
 291   VFROUND_NOEXCEPT_VL,
 292   VFCVT_RM_X_F_VL,  // Has a rounding mode operand.
 293   VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
 294   SINT_TO_FP_VL,
 295   UINT_TO_FP_VL,
 296   VFCVT_RM_F_X_VL,  // Has a rounding mode operand.
 297   VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
 298   FP_ROUND_VL,
 299   FP_EXTEND_VL,
 300
 301   // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand.
 302   VFMADD_VL,
 303   VFNMADD_VL,
 304   VFMSUB_VL,
 305   VFNMSUB_VL,
 306
 307   // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth
 308   // operand.
 309   VFWMADD_VL,
 310   VFWNMADD_VL,
 311   VFWMSUB_VL,
 312   VFWNMSUB_VL,
 313
 314   // Widening instructions with a merge value a third operand, a mask as a
 315   // fourth operand, and VL as a fifth operand.
 316   VWMUL_VL,
 317   VWMULU_VL,
 318   VWMULSU_VL,
 319   VWADD_VL,
 320   VWADDU_VL,
 321   VWSUB_VL,
 322   VWSUBU_VL,
 323   VWADD_W_VL,
 324   VWADDU_W_VL,
 325   VWSUB_W_VL,
 326   VWSUBU_W_VL,
 327   VWSLL_VL,
 328
 329   VFWMUL_VL,
 330   VFWADD_VL,
 331   VFWSUB_VL,
 332   VFWADD_W_VL,
 333   VFWSUB_W_VL,
 334
 335   // Widening ternary operations with a mask as the fourth operand and VL as the
 336   // fifth operand.
 337   VWMACC_VL,
 338   VWMACCU_VL,
 339   VWMACCSU_VL,
 340
 341   // Narrowing logical shift right.
 342   // Operands are (source, shift, passthru, mask, vl)
 343   VNSRL_VL,
 344
 345   // Vector compare producing a mask. Fourth operand is input mask. Fifth
 346   // operand is VL.
 347   SETCC_VL,
 348
 349   // General vmerge node with mask, true, false, passthru, and vl operands.
 350   // Tail agnostic vselect can be implemented by setting passthru to undef.
 351   VMERGE_VL,
 352
 353   // Mask binary operators.
 354   VMAND_VL,
 355   VMOR_VL,
 356   VMXOR_VL,
 357
 358   // Set mask vector to all zeros or ones.
 359   VMCLR_VL,
 360   VMSET_VL,
 361
 362   // Matches the semantics of vrgather.vx and vrgather.vv with extra operands
 363   // for passthru and VL. Operands are (src, index, mask, passthru, vl).
 364   VRGATHER_VX_VL,
 365   VRGATHER_VV_VL,
 366   VRGATHEREI16_VV_VL,
 367
 368   // Vector sign/zero extend with additional mask & VL operands.
 369   VSEXT_VL,
 370   VZEXT_VL,
 371
 372   //  vcpop.m with additional mask and VL operands.
 373   VCPOP_VL,
 374
 375   //  vfirst.m with additional mask and VL operands.
 376   VFIRST_VL,
 377
 378   LAST_VL_VECTOR_OP = VFIRST_VL,
 379
 380   // Read VLENB CSR
 381   READ_VLENB,
 382   // Reads value of CSR.
 383   // The first operand is a chain pointer. The second specifies address of the
 384   // required CSR. Two results are produced, the read value and the new chain
 385   // pointer.
 386   READ_CSR,
 387   // Write value to CSR.
 388   // The first operand is a chain pointer, the second specifies address of the
 389   // required CSR and the third is the value to write. The result is the new
 390   // chain pointer.
 391   WRITE_CSR,
 392   // Read and write value of CSR.
 393   // The first operand is a chain pointer, the second specifies address of the
 394   // required CSR and the third is the value to write. Two results are produced,
 395   // the value read before the modification and the new chain pointer.
 396   SWAP_CSR,
 397
 398   // Branchless select operations, matching the semantics of the instructions
 399   // defined in Zicond or XVentanaCondOps.
 400   CZERO_EQZ, // vt.maskc for XVentanaCondOps.
 401   CZERO_NEZ, // vt.maskcn for XVentanaCondOps.
 402
 403   // FP to 32 bit int conversions for RV64. These are used to keep track of the
 404   // result being sign extended to 64 bit. These saturate out of range inputs.
 405   STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
 406   STRICT_FCVT_WU_RV64,
 407   STRICT_FADD_VL,
 408   STRICT_FSUB_VL,
 409   STRICT_FMUL_VL,
 410   STRICT_FDIV_VL,
 411   STRICT_FSQRT_VL,
 412   STRICT_VFMADD_VL,
 413   STRICT_VFNMADD_VL,
 414   STRICT_VFMSUB_VL,
 415   STRICT_VFNMSUB_VL,
 416   STRICT_FP_ROUND_VL,
 417   STRICT_FP_EXTEND_VL,
 418   STRICT_VFNCVT_ROD_VL,
 419   STRICT_SINT_TO_FP_VL,
 420   STRICT_UINT_TO_FP_VL,
 421   STRICT_VFCVT_RM_X_F_VL,
 422   STRICT_VFCVT_RTZ_X_F_VL,
 423   STRICT_VFCVT_RTZ_XU_F_VL,
 424   STRICT_FSETCC_VL,
 425   STRICT_FSETCCS_VL,
 426   STRICT_VFROUND_NOEXCEPT_VL,
 427   LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
 428
 429   SF_VC_XV_SE,
 430   SF_VC_IV_SE,
 431   SF_VC_VV_SE,
 432   SF_VC_FV_SE,
 433   SF_VC_XVV_SE,
 434   SF_VC_IVV_SE,
 435   SF_VC_VVV_SE,
 436   SF_VC_FVV_SE,
 437   SF_VC_XVW_SE,
 438   SF_VC_IVW_SE,
 439   SF_VC_VVW_SE,
 440   SF_VC_FVW_SE,
 441   SF_VC_V_X_SE,
 442   SF_VC_V_I_SE,
 443   SF_VC_V_XV_SE,
 444   SF_VC_V_IV_SE,
 445   SF_VC_V_VV_SE,
 446   SF_VC_V_FV_SE,
 447   SF_VC_V_XVV_SE,
 448   SF_VC_V_IVV_SE,
 449   SF_VC_V_VVV_SE,
 450   SF_VC_V_FVV_SE,
 451   SF_VC_V_XVW_SE,
 452   SF_VC_V_IVW_SE,
 453   SF_VC_V_VVW_SE,
 454   SF_VC_V_FVW_SE,
 455
 456   // WARNING: Do not add anything in the end unless you want the node to
 457   // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 458   // opcodes will be thought as target memory ops!
 459
 460   TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE,
 461   TH_LWUD,
 462   TH_LDD,
 463   TH_SWD,
 464   TH_SDD,
 465 };
 466 // clang-format on
 467 } // namespace RISCVISD
 468
 469 class RISCVTargetLowering : public TargetLowering {
 470   const RISCVSubtarget &Subtarget;
 471
 472 public:
 473   explicit RISCVTargetLowering(const TargetMachine &TM,
 474                                const RISCVSubtarget &STI);
 475
 476   const RISCVSubtarget &getSubtarget() const { return Subtarget; }
 477
 478   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
 479                           MachineFunction &MF,
 480                           unsigned Intrinsic) const override;
 481   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 482                              unsigned AS,
 483                              Instruction *I = nullptr) const override;
 484   bool isLegalICmpImmediate(int64_t Imm) const override;
 485   bool isLegalAddImmediate(int64_t Imm) const override;
 486   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
 487   bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
 488   bool isZExtFree(SDValue Val, EVT VT2) const override;
 489   bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
 490   bool signExtendConstant(const ConstantInt *CI) const override;
 491   bool isCheapToSpeculateCttz(Type *Ty) const override;
 492   bool isCheapToSpeculateCtlz(Type *Ty) const override;
 493   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 494   bool hasAndNotCompare(SDValue Y) const override;
 495   bool hasBitTest(SDValue X, SDValue Y) const override;
 496   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
 497       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
 498       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
 499       SelectionDAG &DAG) const override;
 500   /// Return true if the (vector) instruction I will be lowered to an instruction
 501   /// with a scalar splat operand for the given Operand number.
 502   bool canSplatOperand(Instruction *I, int Operand) const;
 503   /// Return true if a vector instruction will lower to a target instruction
 504   /// able to splat the given operand.
 505   bool canSplatOperand(unsigned Opcode, int Operand) const;
 506   bool shouldSinkOperands(Instruction *I,
 507                           SmallVectorImpl<Use *> &Ops) const override;
 508   bool shouldScalarizeBinop(SDValue VecOp) const override;
 509   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 510   std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
 511   bool isFPImmLegal(const APFloat &Imm, EVT VT,
 512                     bool ForCodeSize) const override;
 513   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
 514                                unsigned Index) const override;
 515
 516   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
 517
 518   bool preferScalarizeSplat(SDNode *N) const override;
 519
 520   bool softPromoteHalfType() const override { return true; }
 521
 522   /// Return the register type for a given MVT, ensuring vectors are treated
 523   /// as a series of gpr sized integers.
 524   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
 525                                     EVT VT) const override;
 526
 527   /// Return the number of registers for a given MVT, ensuring vectors are
 528   /// treated as a series of gpr sized integers.
 529   unsigned getNumRegistersForCallingConv(LLVMContext &Context,
 530                                          CallingConv::ID CC,
 531                                          EVT VT) const override;
 532
 533   unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
 534                                                 CallingConv::ID CC, EVT VT,
 535                                                 EVT &IntermediateVT,
 536                                                 unsigned &NumIntermediates,
 537                                                 MVT &RegisterVT) const override;
 538
 539   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
 540                                             EVT VT) const override;
 541
 542   /// Return true if the given shuffle mask can be codegen'd directly, or if it
 543   /// should be stack expanded.
 544   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
 545
 546   bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 547     // If the pair to store is a mixture of float and int values, we will
 548     // save two bitwise instructions and one float-to-int instruction and
 549     // increase one store instruction. There is potentially a more
 550     // significant benefit because it avoids the float->int domain switch
 551     // for input value. So It is more likely a win.
 552     if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 553         (LTy.isInteger() && HTy.isFloatingPoint()))
 554       return true;
 555     // If the pair only contains int values, we will save two bitwise
 556     // instructions and increase one store instruction (costing one more
 557     // store buffer). Since the benefit is more blurred we leave such a pair
 558     // out until we get testcase to prove it is a win.
 559     return false;
 560   }
 561
 562   bool
 563   shouldExpandBuildVectorWithShuffles(EVT VT,
 564                                       unsigned DefinedValues) const override;
 565
 566   /// Return the cost of LMUL for linear operations.
 567   InstructionCost getLMULCost(MVT VT) const;
 568
 569   InstructionCost getVRGatherVVCost(MVT VT) const;
 570   InstructionCost getVRGatherVICost(MVT VT) const;
 571   InstructionCost getVSlideVXCost(MVT VT) const;
 572   InstructionCost getVSlideVICost(MVT VT) const;
 573
 574   // Provide custom lowering hooks for some operations.
 575   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 576   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
 577                           SelectionDAG &DAG) const override;
 578
 579   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 580
 581   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
 582                                     const APInt &DemandedElts,
 583                                     TargetLoweringOpt &TLO) const override;
 584
 585   void computeKnownBitsForTargetNode(const SDValue Op,
 586                                      KnownBits &Known,
 587                                      const APInt &DemandedElts,
 588                                      const SelectionDAG &DAG,
 589                                      unsigned Depth) const override;
 590   unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 591                                            const APInt &DemandedElts,
 592                                            const SelectionDAG &DAG,
 593                                            unsigned Depth) const override;
 594
 595   bool canCreateUndefOrPoisonForTargetNode(SDValue Op,
 596                                            const APInt &DemandedElts,
 597                                            const SelectionDAG &DAG,
 598                                            bool PoisonOnly, bool ConsiderFlags,
 599                                            unsigned Depth) const override;
 600
 601   const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 602
 603   // This method returns the name of a target specific DAG node.
 604   const char *getTargetNodeName(unsigned Opcode) const override;
 605
 606   MachineMemOperand::Flags
 607   getTargetMMOFlags(const Instruction &I) const override;
 608
 609   MachineMemOperand::Flags
 610   getTargetMMOFlags(const MemSDNode &Node) const override;
 611
 612   bool
 613   areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
 614                                       const MemSDNode &NodeY) const override;
 615
 616   ConstraintType getConstraintType(StringRef Constraint) const override;
 617
 618   InlineAsm::ConstraintCode
 619   getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
 620
 621   std::pair<unsigned, const TargetRegisterClass *>
 622   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 623                                StringRef Constraint, MVT VT) const override;
 624
 625   void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
 626                                     std::vector<SDValue> &Ops,
 627                                     SelectionDAG &DAG) const override;
 628
 629   MachineBasicBlock *
 630   EmitInstrWithCustomInserter(MachineInstr &MI,
 631                               MachineBasicBlock *BB) const override;
 632
 633   void AdjustInstrPostInstrSelection(MachineInstr &MI,
 634                                      SDNode *Node) const override;
 635
 636   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 637                          EVT VT) const override;
 638
 639   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
 640                             bool MathUsed) const override {
 641     if (VT == MVT::i8 || VT == MVT::i16)
 642       return false;
 643
 644     return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
 645   }
 646
 647   bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
 648                                     unsigned AddrSpace) const override {
 649     // If we can replace 4 or more scalar stores, there will be a reduction
 650     // in instructions even after we add a vector constant load.
 651     return NumElem >= 4;
 652   }
 653
 654   bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 655     return VT.isScalarInteger();
 656   }
 657   bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
 658
 659   bool isCtpopFast(EVT VT) const override;
 660
 661   unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override;
 662
 663   bool preferZeroCompareBranch() const override { return true; }
 664
 665   bool shouldInsertFencesForAtomic(const Instruction *I) const override {
 666     return isa<LoadInst>(I) || isa<StoreInst>(I);
 667   }
 668   Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
 669                                 AtomicOrdering Ord) const override;
 670   Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
 671                                  AtomicOrdering Ord) const override;
 672
 673   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
 674                                   EVT VT) const override;
 675
 676   ISD::NodeType getExtendForAtomicOps() const override {
 677     return ISD::SIGN_EXTEND;
 678   }
 679
 680   ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
 681
 682   bool shouldTransformSignedTruncationCheck(EVT XVT,
 683                                             unsigned KeptBits) const override;
 684
 685   TargetLowering::ShiftLegalizationStrategy
 686   preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
 687                                      unsigned ExpansionFactor) const override {
 688     if (DAG.getMachineFunction().getFunction().hasMinSize())
 689       return ShiftLegalizationStrategy::LowerToLibcall;
 690     return TargetLowering::preferredShiftLegalizationStrategy(DAG, N,
 691                                                               ExpansionFactor);
 692   }
 693
 694   bool isDesirableToCommuteWithShift(const SDNode *N,
 695                                      CombineLevel Level) const override;
 696
 697   /// If a physical register, this returns the register that receives the
 698   /// exception address on entry to an EH pad.
 699   Register
 700   getExceptionPointerRegister(const Constant *PersonalityFn) const override;
 701
 702   /// If a physical register, this returns the register that receives the
 703   /// exception typeid on entry to a landing pad.
 704   Register
 705   getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
 706
 707   bool shouldExtendTypeInLibCall(EVT Type) const override;
 708   bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
 709
 710   /// Returns the register with the specified architectural or ABI name. This
 711   /// method is necessary to lower the llvm.read_register.* and
 712   /// llvm.write_register.* intrinsics. Allocatable registers must be reserved
 713   /// with the clang -ffixed-xX flag for access to be allowed.
 714   Register getRegisterByName(const char *RegName, LLT VT,
 715                              const MachineFunction &MF) const override;
 716
 717   // Lower incoming arguments, copy physregs into vregs
 718   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
 719                                bool IsVarArg,
 720                                const SmallVectorImpl<ISD::InputArg> &Ins,
 721                                const SDLoc &DL, SelectionDAG &DAG,
 722                                SmallVectorImpl<SDValue> &InVals) const override;
 723   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
 724                       bool IsVarArg,
 725                       const SmallVectorImpl<ISD::OutputArg> &Outs,
 726                       LLVMContext &Context) const override;
 727   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
 728                       const SmallVectorImpl<ISD::OutputArg> &Outs,
 729                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
 730                       SelectionDAG &DAG) const override;
 731   SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
 732                     SmallVectorImpl<SDValue> &InVals) const override;
 733
 734   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
 735                                          Type *Ty) const override;
 736   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
 737   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
 738   bool shouldConsiderGEPOffsetSplit() const override { return true; }
 739
 740   bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
 741                               SDValue C) const override;
 742
 743   bool isMulAddWithConstProfitable(SDValue AddNode,
 744                                    SDValue ConstNode) const override;
 745
 746   TargetLowering::AtomicExpansionKind
 747   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 748   Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
 749                                       Value *AlignedAddr, Value *Incr,
 750                                       Value *Mask, Value *ShiftAmt,
 751                                       AtomicOrdering Ord) const override;
 752   TargetLowering::AtomicExpansionKind
 753   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
 754   Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
 755                                           AtomicCmpXchgInst *CI,
 756                                           Value *AlignedAddr, Value *CmpVal,
 757                                           Value *NewVal, Value *Mask,
 758                                           AtomicOrdering Ord) const override;
 759
 760   /// Returns true if the target allows unaligned memory accesses of the
 761   /// specified type.
 762   bool allowsMisalignedMemoryAccesses(
 763       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
 764       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
 765       unsigned *Fast = nullptr) const override;
 766
 767   EVT getOptimalMemOpType(const MemOp &Op,
 768                           const AttributeList &FuncAttributes) const override;
 769
 770   bool splitValueIntoRegisterParts(
 771       SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
 772       unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
 773       const override;
 774
 775   SDValue joinRegisterPartsIntoValue(
 776       SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
 777       unsigned NumParts, MVT PartVT, EVT ValueVT,
 778       std::optional<CallingConv::ID> CC) const override;
 779
 780   // Return the value of VLMax for the given vector type (i.e. SEW and LMUL)
 781   SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const;
 782
 783   static RISCVII::VLMUL getLMUL(MVT VT);
 784   inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize,
 785                                       unsigned MinSize) {
 786     // Original equation:
 787     //   VLMAX = (VectorBits / EltSize) * LMUL
 788     //   where LMUL = MinSize / RISCV::RVVBitsPerBlock
 789     // The following equations have been reordered to prevent loss of precision
 790     // when calculating fractional LMUL.
 791     return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
 792   }
 793
 794   // Return inclusive (low, high) bounds on the value of VLMAX for the
 795   // given scalable container type given known bounds on VLEN.
 796   static std::pair<unsigned, unsigned>
 797   computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget);
 798
 799   static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
 800   static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
 801   static unsigned getRegClassIDForVecVT(MVT VT);
 802   static std::pair<unsigned, unsigned>
 803   decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT,
 804                                            unsigned InsertExtractIdx,
 805                                            const RISCVRegisterInfo *TRI);
 806   MVT getContainerForFixedLengthVector(MVT VT) const;
 807
 808   bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
 809
 810   bool isLegalElementTypeForRVV(EVT ScalarTy) const;
 811
 812   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
 813
 814   unsigned getJumpTableEncoding() const override;
 815
 816   const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 817                                           const MachineBasicBlock *MBB,
 818                                           unsigned uid,
 819                                           MCContext &Ctx) const override;
 820
 821   bool isVScaleKnownToBeAPowerOfTwo() const override;
 822
 823   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
 824                               ISD::MemIndexedMode &AM, SelectionDAG &DAG) const;
 825   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
 826                                  ISD::MemIndexedMode &AM,
 827                                  SelectionDAG &DAG) const override;
 828   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
 829                                   SDValue &Offset, ISD::MemIndexedMode &AM,
 830                                   SelectionDAG &DAG) const override;
 831
 832   bool isLegalScaleForGatherScatter(uint64_t Scale,
 833                                     uint64_t ElemSize) const override {
 834     // Scaled addressing not supported on indexed load/stores
 835     return Scale == 1;
 836   }
 837
 838   /// If the target has a standard location for the stack protector cookie,
 839   /// returns the address of that location. Otherwise, returns nullptr.
 840   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
 841
 842   /// Returns whether or not generating a interleaved load/store intrinsic for
 843   /// this type will be legal.
 844   bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
 845                                     Align Alignment, unsigned AddrSpace,
 846                                     const DataLayout &) const;
 847
 848   /// Return true if a stride load store of the given result type and
 849   /// alignment is legal.
 850   bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
 851
 852   unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
 853
 854   bool fallBackToDAGISel(const Instruction &Inst) const override;
 855
 856   bool lowerInterleavedLoad(LoadInst *LI,
 857                             ArrayRef<ShuffleVectorInst *> Shuffles,
 858                             ArrayRef<unsigned> Indices,
 859                             unsigned Factor) const override;
 860
 861   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
 862                              unsigned Factor) const override;
 863
 864   bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
 865                                         LoadInst *LI) const override;
 866
 867   bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
 868                                        StoreInst *SI) const override;
 869
 870   bool supportKCFIBundles() const override { return true; }
 871
 872   MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
 873                               MachineBasicBlock::instr_iterator &MBBI,
 874                               const TargetInstrInfo *TII) const override;
 875
 876   /// RISCVCCAssignFn - This target-specific function extends the default
 877   /// CCValAssign with additional information used to lower RISC-V calling
 878   /// conventions.
 879   typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI,
 880                                unsigned ValNo, MVT ValVT, MVT LocVT,
 881                                CCValAssign::LocInfo LocInfo,
 882                                ISD::ArgFlagsTy ArgFlags, CCState &State,
 883                                bool IsFixed, bool IsRet, Type *OrigTy,
 884                                const RISCVTargetLowering &TLI,
 885                                RVVArgDispatcher &RVVDispatcher);
 886
 887 private:
 888   void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
 889                         const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
 890                         RISCVCCAssignFn Fn) const;
 891   void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo,
 892                          const SmallVectorImpl<ISD::OutputArg> &Outs,
 893                          bool IsRet, CallLoweringInfo *CLI,
 894                          RISCVCCAssignFn Fn) const;
 895
 896   template <class NodeTy>
 897   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true,
 898                   bool IsExternWeak = false) const;
 899   SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
 900                            bool UseGOT) const;
 901   SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
 902   SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
 903
 904   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
 905   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
 906   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
 907   SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
 908   SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
 909   SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
 910   SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 911   SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 912   SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
 913   SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
 914   SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
 915   SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
 916   SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const;
 917   SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const;
 918   SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
 919                              int64_t ExtTrueVal) const;
 920   SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
 921   SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
 922   SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
 923   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 924   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 925   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 926   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 927   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
 928   SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const;
 929   SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
 930   SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG,
 931                                       bool IsVP) const;
 932   SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
 933   SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
 934   SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
 935   SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
 936   SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
 937   SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
 938   SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
 939   SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
 940   SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
 941   SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
 942   SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
 943   SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
 944                                                SelectionDAG &DAG) const;
 945   SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
 946   SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
 947   SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
 948   SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
 949   SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
 950   SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
 951                                             SelectionDAG &DAG) const;
 952   SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
 953   SDValue lowerUnsignedAvgFloor(SDValue Op, SelectionDAG &DAG) const;
 954   SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
 955   SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
 956   SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
 957   SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
 958   SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
 959   SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const;
 960   SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
 961   SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
 962   SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
 963   SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
 964   SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const;
 965   SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
 966                                             unsigned ExtendOpc) const;
 967   SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
 968   SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
 969
 970   SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
 971   SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
 972
 973   SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
 974
 975   SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const;
 976
 977   SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
 978   SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
 979
 980   bool isEligibleForTailCallOptimization(
 981       CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
 982       const SmallVector<CCValAssign, 16> &ArgLocs) const;
 983
 984   /// Generate error diagnostics if any register used by CC has been marked
 985   /// reserved.
 986   void validateCCReservedRegs(
 987       const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
 988       MachineFunction &MF) const;
 989
 990   bool useRVVForFixedLengthVectorVT(MVT VT) const;
 991
 992   MVT getVPExplicitVectorLengthTy() const override;
 993
 994   bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
 995                                    bool IsScalable) const override;
 996
 997   bool shouldExpandCttzElements(EVT VT) const override;
 998
 999   /// RVV code generation for fixed length vectors does not lower all
1000   /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
1001   /// merge. However, merging them creates a BUILD_VECTOR that is just as
1002   /// illegal as the original, thus leading to an infinite legalisation loop.
1003   /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
1004   /// this override can be removed.
1005   bool mergeStoresAfterLegalization(EVT VT) const override;
1006
1007   /// Disable normalizing
1008   /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1009   /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
1010   /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
1011   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
1012     return false;
1013   }
1014
1015   /// For available scheduling models FDIV + two independent FMULs are much
1016   /// faster than two FDIVs.
1017   unsigned combineRepeatedFPDivisors() const override;
1018
1019   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1020                         SmallVectorImpl<SDNode *> &Created) const override;
1021
1022   bool shouldFoldSelectWithSingleBitTest(EVT VT,
1023                                          const APInt &AndMask) const override;
1024
1025   unsigned getMinimumJumpTableEntries() const override;
1026 };
1027
1028 /// As per the spec, the rules for passing vector arguments are as follows:
1029 ///
1030 /// 1. For the first vector mask argument, use v0 to pass it.
1031 /// 2. For vector data arguments or rest vector mask arguments, starting from
1032 /// the v8 register, if a vector register group between v8-v23 that has not been
1033 /// allocated can be found and the first register number is a multiple of LMUL,
1034 /// then allocate this vector register group to the argument and mark these
1035 /// registers as allocated. Otherwise, pass it by reference and are replaced in
1036 /// the argument list with the address.
1037 /// 3. For tuple vector data arguments, starting from the v8 register, if
1038 /// NFIELDS consecutive vector register groups between v8-v23 that have not been
1039 /// allocated can be found and the first register number is a multiple of LMUL,
1040 /// then allocate these vector register groups to the argument and mark these
1041 /// registers as allocated. Otherwise, pass it by reference and are replaced in
1042 /// the argument list with the address.
1043 class RVVArgDispatcher {
1044 public:
1045   static constexpr unsigned NumArgVRs = 16;
1046
1047   struct RVVArgInfo {
1048     unsigned NF;
1049     MVT VT;
1050     bool FirstVMask = false;
1051   };
1052
1053   template <typename Arg>
1054   RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI,
1055                    ArrayRef<Arg> ArgList)
1056       : MF(MF), TLI(TLI) {
1057     constructArgInfos(ArgList);
1058     compute();
1059   }
1060
1061   RVVArgDispatcher() = default;
1062
1063   MCPhysReg getNextPhysReg();
1064
1065 private:
1066   SmallVector<RVVArgInfo, 4> RVVArgInfos;
1067   SmallVector<MCPhysReg, 4> AllocatedPhysRegs;
1068
1069   const MachineFunction *MF = nullptr;
1070   const RISCVTargetLowering *TLI = nullptr;
1071
1072   unsigned CurIdx = 0;
1073
1074   template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret);
1075   void compute();
1076   void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1,
1077                        unsigned StartReg = 0);
1078 };
1079
1080 namespace RISCV {
1081
1082 bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1083               MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1084               ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1085               bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
1086               RVVArgDispatcher &RVVDispatcher);
1087
1088 bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1089                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1090                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1091                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
1092                      RVVArgDispatcher &RVVDispatcher);
1093
1094 bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
1095                   CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
1096                   CCState &State);
1097
1098 ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI);
1099
1100 } // end namespace RISCV
1101
1102 namespace RISCVVIntrinsicsTable {
1103
1104 struct RISCVVIntrinsicInfo {
1105   unsigned IntrinsicID;
1106   uint8_t ScalarOperand;
1107   uint8_t VLOperand;
1108   bool hasScalarOperand() const {
1109     // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td.
1110     return ScalarOperand != 0xF;
1111   }
1112   bool hasVLOperand() const {
1113     // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td.
1114     return VLOperand != 0x1F;
1115   }
1116 };
1117
1118 using namespace RISCV;
1119
1120 #define GET_RISCVVIntrinsicsTable_DECL
1121 #include "RISCVGenSearchableTables.inc"
1122 #undef GET_RISCVVIntrinsicsTable_DECL
1123
1124 } // end namespace RISCVVIntrinsicsTable
1125
1126 } // end namespace llvm
1127
1128 #endif