1 //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
15 #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
24 class InstructionCost
;
26 struct RISCVRegisterInfo
;
27 class RVVArgDispatcher
;
31 enum NodeType
: unsigned {
32 FIRST_NUMBER
= ISD::BUILTIN_OP_END
,
37 /// Select with condition operator - This selects between a true value and
38 /// a false value (ops #3 and #4) based on the boolean result of comparing
39 /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
40 /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum.
41 /// The lhs and rhs are XLenVT integers. The true and false values can be
42 /// integer or floating point.
49 // Add the Lo 12 bits from an address. Selected to ADDI.
51 // Get the Hi 20 bits from an address. Selected to LUI.
54 // Represents an AUIPC+ADDI pair. Selected to PseudoLLA.
57 // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
60 // Multiply high for signedxunsigned.
63 // Represents (ADD (SHL a, b), c) with the arguments appearing in the order
64 // a, b, c. 'b' must be a constant. Maps to sh1add/sh2add/sh3add with zba
65 // or addsl with XTheadBa.
68 // RV64I shifts, directly matching the semantics of the named RISC-V
73 // 32-bit operations from RV64M that can't be simply matched with a pattern
74 // at instruction selection time. These have undefined behavior for division
75 // by 0 or overflow (divw) like their target independent counterparts.
79 // RV64IB rotates, directly matching the semantics of the named RISC-V
83 // RV64IZbb bit counting instructions directly matching the semantics of the
84 // named RISC-V instructions.
88 // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel.
91 // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as
92 // XLEN is the only legal integer width.
94 // FMV_H_X matches the semantics of the FMV.H.X.
95 // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result.
96 // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result.
97 // FMV_W_X_RV64 matches the semantics of the FMV.W.X.
98 // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
100 // This is a more convenient semantic for producing dagcombines that remove
101 // unnecessary GPR->FPR->GPR moves.
107 // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
108 // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
109 // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode
110 // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
113 // FP to 32 bit int conversions for RV64. These are used to keep track of the
114 // result being sign extended to 64 bit. These saturate out of range inputs.
115 // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode
116 // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
123 // Rounds an FP value to its corresponding integer in the same FP format.
124 // First operand is the value to round, the second operand is the largest
125 // integer that can be represented exactly in the FP format. This will be
126 // expanded into multiple instructions and basic blocks with a custom
132 // Floating point fmax and fmin matching the RISC-V instruction semantics.
135 // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)).
136 // It takes a chain operand and another two target constant operands (the
137 // CSR numbers of the low and high parts of the counter).
140 // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or
147 // Scalar cryptography
148 CLMUL
, CLMULH
, CLMULR
,
149 SHA256SIG0
, SHA256SIG1
, SHA256SUM0
, SHA256SUM1
,
158 // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
159 // for the VL value to be used for the operation. The first operand is
161 VMV_V_V_VL
= FIRST_VL_VECTOR_OP
,
162 // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
163 // for the VL value to be used for the operation. The first operand is
166 // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand
167 // for the VL value to be used for the operation. The first operand is
170 // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign
171 // extended from the vector element size.
173 // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand.
175 // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand.
177 // Splats an 64-bit value that has been split into two i32 parts. This is
178 // expanded late to two scalar stores and a stride 0 vector load.
179 // The first operand is passthru operand.
180 SPLAT_VECTOR_SPLIT_I64_VL
,
181 // Truncates a RVV integer vector by one power-of-two. Carries both an extra
182 // mask and VL operand.
184 // Matches the semantics of vslideup/vslidedown. The first operand is the
185 // pass-thru operand, the second is the source vector, the third is the XLenVT
186 // index (either constant or non-constant), the fourth is the mask, the fifth
187 // is the VL and the sixth is the policy.
190 // Matches the semantics of vslide1up/slide1down. The first operand is
191 // passthru operand, the second is source vector, third is the XLenVT scalar
192 // value. The fourth and fifth operands are the mask and VL operands.
195 // Matches the semantics of vfslide1up/vfslide1down. The first operand is
196 // passthru operand, the second is source vector, third is a scalar value
197 // whose type matches the element type of the vectors. The fourth and fifth
198 // operands are the mask and VL operands.
201 // Matches the semantics of the vid.v instruction, with a mask and VL
204 // Matches the semantics of the vfcnvt.rod function (Convert double-width
205 // float to single-width float, rounding towards odd). Takes a double-width
206 // float vector and produces a single-width float vector. Also has a mask and
209 // These nodes match the semantics of the corresponding RVV vector reduction
210 // instructions. They produce a vector result which is the reduction
211 // performed over the second vector operand plus the first element of the
212 // third vector operand. The first operand is the pass-thru operand. The
213 // second operand is an unconstrained vector type, and the result, first, and
214 // third operand's types are expected to be the corresponding full-width
215 // LMUL=1 type for the second operand:
216 // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8
217 // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32
218 // The different in types does introduce extra vsetvli instructions but
219 // similarly it reduces the number of registers consumed per reduction.
220 // Also has a mask and VL operand.
230 VECREDUCE_SEQ_FADD_VL
,
234 // Vector binary ops with a merge as a third operand, a mask as a fourth
235 // operand, and VL as a fifth operand.
267 // Averaging adds of unsigned integers.
269 // Rounding averaging adds of unsigned integers.
281 // Vector unary ops with a mask as a second operand and VL as a third operand.
286 FCOPYSIGN_VL
, // Has a merge operand
292 VFCVT_RM_X_F_VL
, // Has a rounding mode operand.
293 VFCVT_RM_XU_F_VL
, // Has a rounding mode operand.
296 VFCVT_RM_F_X_VL
, // Has a rounding mode operand.
297 VFCVT_RM_F_XU_VL
, // Has a rounding mode operand.
301 // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand.
307 // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth
314 // Widening instructions with a merge value a third operand, a mask as a
315 // fourth operand, and VL as a fifth operand.
335 // Widening ternary operations with a mask as the fourth operand and VL as the
341 // Narrowing logical shift right.
342 // Operands are (source, shift, passthru, mask, vl)
345 // Vector compare producing a mask. Fourth operand is input mask. Fifth
349 // General vmerge node with mask, true, false, passthru, and vl operands.
350 // Tail agnostic vselect can be implemented by setting passthru to undef.
353 // Mask binary operators.
358 // Set mask vector to all zeros or ones.
362 // Matches the semantics of vrgather.vx and vrgather.vv with extra operands
363 // for passthru and VL. Operands are (src, index, mask, passthru, vl).
368 // Vector sign/zero extend with additional mask & VL operands.
372 // vcpop.m with additional mask and VL operands.
375 // vfirst.m with additional mask and VL operands.
378 LAST_VL_VECTOR_OP
= VFIRST_VL
,
382 // Reads value of CSR.
383 // The first operand is a chain pointer. The second specifies address of the
384 // required CSR. Two results are produced, the read value and the new chain
387 // Write value to CSR.
388 // The first operand is a chain pointer, the second specifies address of the
389 // required CSR and the third is the value to write. The result is the new
392 // Read and write value of CSR.
393 // The first operand is a chain pointer, the second specifies address of the
394 // required CSR and the third is the value to write. Two results are produced,
395 // the value read before the modification and the new chain pointer.
398 // Branchless select operations, matching the semantics of the instructions
399 // defined in Zicond or XVentanaCondOps.
400 CZERO_EQZ
, // vt.maskc for XVentanaCondOps.
401 CZERO_NEZ
, // vt.maskcn for XVentanaCondOps.
403 // FP to 32 bit int conversions for RV64. These are used to keep track of the
404 // result being sign extended to 64 bit. These saturate out of range inputs.
405 STRICT_FCVT_W_RV64
= ISD::FIRST_TARGET_STRICTFP_OPCODE
,
418 STRICT_VFNCVT_ROD_VL
,
419 STRICT_SINT_TO_FP_VL
,
420 STRICT_UINT_TO_FP_VL
,
421 STRICT_VFCVT_RM_X_F_VL
,
422 STRICT_VFCVT_RTZ_X_F_VL
,
423 STRICT_VFCVT_RTZ_XU_F_VL
,
426 STRICT_VFROUND_NOEXCEPT_VL
,
427 LAST_RISCV_STRICTFP_OPCODE
= STRICT_VFROUND_NOEXCEPT_VL
,
456 // WARNING: Do not add anything in the end unless you want the node to
457 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
458 // opcodes will be thought as target memory ops!
460 TH_LWD
= ISD::FIRST_TARGET_MEMORY_OPCODE
,
467 } // namespace RISCVISD
469 class RISCVTargetLowering
: public TargetLowering
{
470 const RISCVSubtarget
&Subtarget
;
473 explicit RISCVTargetLowering(const TargetMachine
&TM
,
474 const RISCVSubtarget
&STI
);
476 const RISCVSubtarget
&getSubtarget() const { return Subtarget
; }
478 bool getTgtMemIntrinsic(IntrinsicInfo
&Info
, const CallInst
&I
,
480 unsigned Intrinsic
) const override
;
481 bool isLegalAddressingMode(const DataLayout
&DL
, const AddrMode
&AM
, Type
*Ty
,
483 Instruction
*I
= nullptr) const override
;
484 bool isLegalICmpImmediate(int64_t Imm
) const override
;
485 bool isLegalAddImmediate(int64_t Imm
) const override
;
486 bool isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const override
;
487 bool isTruncateFree(EVT SrcVT
, EVT DstVT
) const override
;
488 bool isZExtFree(SDValue Val
, EVT VT2
) const override
;
489 bool isSExtCheaperThanZExt(EVT SrcVT
, EVT DstVT
) const override
;
490 bool signExtendConstant(const ConstantInt
*CI
) const override
;
491 bool isCheapToSpeculateCttz(Type
*Ty
) const override
;
492 bool isCheapToSpeculateCtlz(Type
*Ty
) const override
;
493 bool isMaskAndCmp0FoldingBeneficial(const Instruction
&AndI
) const override
;
494 bool hasAndNotCompare(SDValue Y
) const override
;
495 bool hasBitTest(SDValue X
, SDValue Y
) const override
;
496 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
497 SDValue X
, ConstantSDNode
*XC
, ConstantSDNode
*CC
, SDValue Y
,
498 unsigned OldShiftOpcode
, unsigned NewShiftOpcode
,
499 SelectionDAG
&DAG
) const override
;
500 /// Return true if the (vector) instruction I will be lowered to an instruction
501 /// with a scalar splat operand for the given Operand number.
502 bool canSplatOperand(Instruction
*I
, int Operand
) const;
503 /// Return true if a vector instruction will lower to a target instruction
504 /// able to splat the given operand.
505 bool canSplatOperand(unsigned Opcode
, int Operand
) const;
506 bool shouldSinkOperands(Instruction
*I
,
507 SmallVectorImpl
<Use
*> &Ops
) const override
;
508 bool shouldScalarizeBinop(SDValue VecOp
) const override
;
509 bool isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const override
;
510 std::pair
<int, bool> getLegalZfaFPImm(const APFloat
&Imm
, EVT VT
) const;
511 bool isFPImmLegal(const APFloat
&Imm
, EVT VT
,
512 bool ForCodeSize
) const override
;
513 bool isExtractSubvectorCheap(EVT ResVT
, EVT SrcVT
,
514 unsigned Index
) const override
;
516 bool isIntDivCheap(EVT VT
, AttributeList Attr
) const override
;
518 bool preferScalarizeSplat(SDNode
*N
) const override
;
520 bool softPromoteHalfType() const override
{ return true; }
522 /// Return the register type for a given MVT, ensuring vectors are treated
523 /// as a series of gpr sized integers.
524 MVT
getRegisterTypeForCallingConv(LLVMContext
&Context
, CallingConv::ID CC
,
525 EVT VT
) const override
;
527 /// Return the number of registers for a given MVT, ensuring vectors are
528 /// treated as a series of gpr sized integers.
529 unsigned getNumRegistersForCallingConv(LLVMContext
&Context
,
531 EVT VT
) const override
;
533 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext
&Context
,
534 CallingConv::ID CC
, EVT VT
,
536 unsigned &NumIntermediates
,
537 MVT
&RegisterVT
) const override
;
539 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode
,
540 EVT VT
) const override
;
542 /// Return true if the given shuffle mask can be codegen'd directly, or if it
543 /// should be stack expanded.
544 bool isShuffleMaskLegal(ArrayRef
<int> M
, EVT VT
) const override
;
546 bool isMultiStoresCheaperThanBitsMerge(EVT LTy
, EVT HTy
) const override
{
547 // If the pair to store is a mixture of float and int values, we will
548 // save two bitwise instructions and one float-to-int instruction and
549 // increase one store instruction. There is potentially a more
550 // significant benefit because it avoids the float->int domain switch
551 // for input value. So It is more likely a win.
552 if ((LTy
.isFloatingPoint() && HTy
.isInteger()) ||
553 (LTy
.isInteger() && HTy
.isFloatingPoint()))
555 // If the pair only contains int values, we will save two bitwise
556 // instructions and increase one store instruction (costing one more
557 // store buffer). Since the benefit is more blurred we leave such a pair
558 // out until we get testcase to prove it is a win.
563 shouldExpandBuildVectorWithShuffles(EVT VT
,
564 unsigned DefinedValues
) const override
;
566 /// Return the cost of LMUL for linear operations.
567 InstructionCost
getLMULCost(MVT VT
) const;
569 InstructionCost
getVRGatherVVCost(MVT VT
) const;
570 InstructionCost
getVRGatherVICost(MVT VT
) const;
571 InstructionCost
getVSlideVXCost(MVT VT
) const;
572 InstructionCost
getVSlideVICost(MVT VT
) const;
574 // Provide custom lowering hooks for some operations.
575 SDValue
LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const override
;
576 void ReplaceNodeResults(SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
,
577 SelectionDAG
&DAG
) const override
;
579 SDValue
PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const override
;
581 bool targetShrinkDemandedConstant(SDValue Op
, const APInt
&DemandedBits
,
582 const APInt
&DemandedElts
,
583 TargetLoweringOpt
&TLO
) const override
;
585 void computeKnownBitsForTargetNode(const SDValue Op
,
587 const APInt
&DemandedElts
,
588 const SelectionDAG
&DAG
,
589 unsigned Depth
) const override
;
590 unsigned ComputeNumSignBitsForTargetNode(SDValue Op
,
591 const APInt
&DemandedElts
,
592 const SelectionDAG
&DAG
,
593 unsigned Depth
) const override
;
595 bool canCreateUndefOrPoisonForTargetNode(SDValue Op
,
596 const APInt
&DemandedElts
,
597 const SelectionDAG
&DAG
,
598 bool PoisonOnly
, bool ConsiderFlags
,
599 unsigned Depth
) const override
;
601 const Constant
*getTargetConstantFromLoad(LoadSDNode
*LD
) const override
;
603 // This method returns the name of a target specific DAG node.
604 const char *getTargetNodeName(unsigned Opcode
) const override
;
606 MachineMemOperand::Flags
607 getTargetMMOFlags(const Instruction
&I
) const override
;
609 MachineMemOperand::Flags
610 getTargetMMOFlags(const MemSDNode
&Node
) const override
;
613 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode
&NodeX
,
614 const MemSDNode
&NodeY
) const override
;
616 ConstraintType
getConstraintType(StringRef Constraint
) const override
;
618 InlineAsm::ConstraintCode
619 getInlineAsmMemConstraint(StringRef ConstraintCode
) const override
;
621 std::pair
<unsigned, const TargetRegisterClass
*>
622 getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
623 StringRef Constraint
, MVT VT
) const override
;
625 void LowerAsmOperandForConstraint(SDValue Op
, StringRef Constraint
,
626 std::vector
<SDValue
> &Ops
,
627 SelectionDAG
&DAG
) const override
;
630 EmitInstrWithCustomInserter(MachineInstr
&MI
,
631 MachineBasicBlock
*BB
) const override
;
633 void AdjustInstrPostInstrSelection(MachineInstr
&MI
,
634 SDNode
*Node
) const override
;
636 EVT
getSetCCResultType(const DataLayout
&DL
, LLVMContext
&Context
,
637 EVT VT
) const override
;
639 bool shouldFormOverflowOp(unsigned Opcode
, EVT VT
,
640 bool MathUsed
) const override
{
641 if (VT
== MVT::i8
|| VT
== MVT::i16
)
644 return TargetLowering::shouldFormOverflowOp(Opcode
, VT
, MathUsed
);
647 bool storeOfVectorConstantIsCheap(bool IsZero
, EVT MemVT
, unsigned NumElem
,
648 unsigned AddrSpace
) const override
{
649 // If we can replace 4 or more scalar stores, there will be a reduction
650 // in instructions even after we add a vector constant load.
654 bool convertSetCCLogicToBitwiseLogic(EVT VT
) const override
{
655 return VT
.isScalarInteger();
657 bool convertSelectOfConstantsToMath(EVT VT
) const override
{ return true; }
659 bool isCtpopFast(EVT VT
) const override
;
661 unsigned getCustomCtpopCost(EVT VT
, ISD::CondCode Cond
) const override
;
663 bool preferZeroCompareBranch() const override
{ return true; }
665 bool shouldInsertFencesForAtomic(const Instruction
*I
) const override
{
666 return isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
);
668 Instruction
*emitLeadingFence(IRBuilderBase
&Builder
, Instruction
*Inst
,
669 AtomicOrdering Ord
) const override
;
670 Instruction
*emitTrailingFence(IRBuilderBase
&Builder
, Instruction
*Inst
,
671 AtomicOrdering Ord
) const override
;
673 bool isFMAFasterThanFMulAndFAdd(const MachineFunction
&MF
,
674 EVT VT
) const override
;
676 ISD::NodeType
getExtendForAtomicOps() const override
{
677 return ISD::SIGN_EXTEND
;
680 ISD::NodeType
getExtendForAtomicCmpSwapArg() const override
;
682 bool shouldTransformSignedTruncationCheck(EVT XVT
,
683 unsigned KeptBits
) const override
;
685 TargetLowering::ShiftLegalizationStrategy
686 preferredShiftLegalizationStrategy(SelectionDAG
&DAG
, SDNode
*N
,
687 unsigned ExpansionFactor
) const override
{
688 if (DAG
.getMachineFunction().getFunction().hasMinSize())
689 return ShiftLegalizationStrategy::LowerToLibcall
;
690 return TargetLowering::preferredShiftLegalizationStrategy(DAG
, N
,
694 bool isDesirableToCommuteWithShift(const SDNode
*N
,
695 CombineLevel Level
) const override
;
697 /// If a physical register, this returns the register that receives the
698 /// exception address on entry to an EH pad.
700 getExceptionPointerRegister(const Constant
*PersonalityFn
) const override
;
702 /// If a physical register, this returns the register that receives the
703 /// exception typeid on entry to a landing pad.
705 getExceptionSelectorRegister(const Constant
*PersonalityFn
) const override
;
707 bool shouldExtendTypeInLibCall(EVT Type
) const override
;
708 bool shouldSignExtendTypeInLibCall(EVT Type
, bool IsSigned
) const override
;
710 /// Returns the register with the specified architectural or ABI name. This
711 /// method is necessary to lower the llvm.read_register.* and
712 /// llvm.write_register.* intrinsics. Allocatable registers must be reserved
713 /// with the clang -ffixed-xX flag for access to be allowed.
714 Register
getRegisterByName(const char *RegName
, LLT VT
,
715 const MachineFunction
&MF
) const override
;
717 // Lower incoming arguments, copy physregs into vregs
718 SDValue
LowerFormalArguments(SDValue Chain
, CallingConv::ID CallConv
,
720 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
721 const SDLoc
&DL
, SelectionDAG
&DAG
,
722 SmallVectorImpl
<SDValue
> &InVals
) const override
;
723 bool CanLowerReturn(CallingConv::ID CallConv
, MachineFunction
&MF
,
725 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
726 LLVMContext
&Context
) const override
;
727 SDValue
LowerReturn(SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
728 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
729 const SmallVectorImpl
<SDValue
> &OutVals
, const SDLoc
&DL
,
730 SelectionDAG
&DAG
) const override
;
731 SDValue
LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
732 SmallVectorImpl
<SDValue
> &InVals
) const override
;
734 bool shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
735 Type
*Ty
) const override
;
736 bool isUsedByReturnOnly(SDNode
*N
, SDValue
&Chain
) const override
;
737 bool mayBeEmittedAsTailCall(const CallInst
*CI
) const override
;
738 bool shouldConsiderGEPOffsetSplit() const override
{ return true; }
740 bool decomposeMulByConstant(LLVMContext
&Context
, EVT VT
,
741 SDValue C
) const override
;
743 bool isMulAddWithConstProfitable(SDValue AddNode
,
744 SDValue ConstNode
) const override
;
746 TargetLowering::AtomicExpansionKind
747 shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const override
;
748 Value
*emitMaskedAtomicRMWIntrinsic(IRBuilderBase
&Builder
, AtomicRMWInst
*AI
,
749 Value
*AlignedAddr
, Value
*Incr
,
750 Value
*Mask
, Value
*ShiftAmt
,
751 AtomicOrdering Ord
) const override
;
752 TargetLowering::AtomicExpansionKind
753 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst
*CI
) const override
;
754 Value
*emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase
&Builder
,
755 AtomicCmpXchgInst
*CI
,
756 Value
*AlignedAddr
, Value
*CmpVal
,
757 Value
*NewVal
, Value
*Mask
,
758 AtomicOrdering Ord
) const override
;
760 /// Returns true if the target allows unaligned memory accesses of the
762 bool allowsMisalignedMemoryAccesses(
763 EVT VT
, unsigned AddrSpace
= 0, Align Alignment
= Align(1),
764 MachineMemOperand::Flags Flags
= MachineMemOperand::MONone
,
765 unsigned *Fast
= nullptr) const override
;
767 EVT
getOptimalMemOpType(const MemOp
&Op
,
768 const AttributeList
&FuncAttributes
) const override
;
770 bool splitValueIntoRegisterParts(
771 SelectionDAG
& DAG
, const SDLoc
&DL
, SDValue Val
, SDValue
*Parts
,
772 unsigned NumParts
, MVT PartVT
, std::optional
<CallingConv::ID
> CC
)
775 SDValue
joinRegisterPartsIntoValue(
776 SelectionDAG
& DAG
, const SDLoc
&DL
, const SDValue
*Parts
,
777 unsigned NumParts
, MVT PartVT
, EVT ValueVT
,
778 std::optional
<CallingConv::ID
> CC
) const override
;
780 // Return the value of VLMax for the given vector type (i.e. SEW and LMUL)
781 SDValue
computeVLMax(MVT VecVT
, const SDLoc
&DL
, SelectionDAG
&DAG
) const;
783 static RISCVII::VLMUL
getLMUL(MVT VT
);
784 inline static unsigned computeVLMAX(unsigned VectorBits
, unsigned EltSize
,
786 // Original equation:
787 // VLMAX = (VectorBits / EltSize) * LMUL
788 // where LMUL = MinSize / RISCV::RVVBitsPerBlock
789 // The following equations have been reordered to prevent loss of precision
790 // when calculating fractional LMUL.
791 return ((VectorBits
/ EltSize
) * MinSize
) / RISCV::RVVBitsPerBlock
;
794 // Return inclusive (low, high) bounds on the value of VLMAX for the
795 // given scalable container type given known bounds on VLEN.
796 static std::pair
<unsigned, unsigned>
797 computeVLMAXBounds(MVT ContainerVT
, const RISCVSubtarget
&Subtarget
);
799 static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul
);
800 static unsigned getSubregIndexByMVT(MVT VT
, unsigned Index
);
801 static unsigned getRegClassIDForVecVT(MVT VT
);
802 static std::pair
<unsigned, unsigned>
803 decomposeSubvectorInsertExtractToSubRegs(MVT VecVT
, MVT SubVecVT
,
804 unsigned InsertExtractIdx
,
805 const RISCVRegisterInfo
*TRI
);
806 MVT
getContainerForFixedLengthVector(MVT VT
) const;
808 bool shouldRemoveExtendFromGSIndex(SDValue Extend
, EVT DataVT
) const override
;
810 bool isLegalElementTypeForRVV(EVT ScalarTy
) const;
812 bool shouldConvertFpToSat(unsigned Op
, EVT FPVT
, EVT VT
) const override
;
814 unsigned getJumpTableEncoding() const override
;
816 const MCExpr
*LowerCustomJumpTableEntry(const MachineJumpTableInfo
*MJTI
,
817 const MachineBasicBlock
*MBB
,
819 MCContext
&Ctx
) const override
;
821 bool isVScaleKnownToBeAPowerOfTwo() const override
;
823 bool getIndexedAddressParts(SDNode
*Op
, SDValue
&Base
, SDValue
&Offset
,
824 ISD::MemIndexedMode
&AM
, SelectionDAG
&DAG
) const;
825 bool getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
, SDValue
&Offset
,
826 ISD::MemIndexedMode
&AM
,
827 SelectionDAG
&DAG
) const override
;
828 bool getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
, SDValue
&Base
,
829 SDValue
&Offset
, ISD::MemIndexedMode
&AM
,
830 SelectionDAG
&DAG
) const override
;
832 bool isLegalScaleForGatherScatter(uint64_t Scale
,
833 uint64_t ElemSize
) const override
{
834 // Scaled addressing not supported on indexed load/stores
838 /// If the target has a standard location for the stack protector cookie,
839 /// returns the address of that location. Otherwise, returns nullptr.
840 Value
*getIRStackGuard(IRBuilderBase
&IRB
) const override
;
842 /// Returns whether or not generating a interleaved load/store intrinsic for
843 /// this type will be legal.
844 bool isLegalInterleavedAccessType(VectorType
*VTy
, unsigned Factor
,
845 Align Alignment
, unsigned AddrSpace
,
846 const DataLayout
&) const;
848 /// Return true if a stride load store of the given result type and
849 /// alignment is legal.
850 bool isLegalStridedLoadStore(EVT DataType
, Align Alignment
) const;
852 unsigned getMaxSupportedInterleaveFactor() const override
{ return 8; }
854 bool fallBackToDAGISel(const Instruction
&Inst
) const override
;
856 bool lowerInterleavedLoad(LoadInst
*LI
,
857 ArrayRef
<ShuffleVectorInst
*> Shuffles
,
858 ArrayRef
<unsigned> Indices
,
859 unsigned Factor
) const override
;
861 bool lowerInterleavedStore(StoreInst
*SI
, ShuffleVectorInst
*SVI
,
862 unsigned Factor
) const override
;
864 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst
*II
,
865 LoadInst
*LI
) const override
;
867 bool lowerInterleaveIntrinsicToStore(IntrinsicInst
*II
,
868 StoreInst
*SI
) const override
;
870 bool supportKCFIBundles() const override
{ return true; }
872 MachineInstr
*EmitKCFICheck(MachineBasicBlock
&MBB
,
873 MachineBasicBlock::instr_iterator
&MBBI
,
874 const TargetInstrInfo
*TII
) const override
;
876 /// RISCVCCAssignFn - This target-specific function extends the default
877 /// CCValAssign with additional information used to lower RISC-V calling
879 typedef bool RISCVCCAssignFn(const DataLayout
&DL
, RISCVABI::ABI
,
880 unsigned ValNo
, MVT ValVT
, MVT LocVT
,
881 CCValAssign::LocInfo LocInfo
,
882 ISD::ArgFlagsTy ArgFlags
, CCState
&State
,
883 bool IsFixed
, bool IsRet
, Type
*OrigTy
,
884 const RISCVTargetLowering
&TLI
,
885 RVVArgDispatcher
&RVVDispatcher
);
888 void analyzeInputArgs(MachineFunction
&MF
, CCState
&CCInfo
,
889 const SmallVectorImpl
<ISD::InputArg
> &Ins
, bool IsRet
,
890 RISCVCCAssignFn Fn
) const;
891 void analyzeOutputArgs(MachineFunction
&MF
, CCState
&CCInfo
,
892 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
893 bool IsRet
, CallLoweringInfo
*CLI
,
894 RISCVCCAssignFn Fn
) const;
896 template <class NodeTy
>
897 SDValue
getAddr(NodeTy
*N
, SelectionDAG
&DAG
, bool IsLocal
= true,
898 bool IsExternWeak
= false) const;
899 SDValue
getStaticTLSAddr(GlobalAddressSDNode
*N
, SelectionDAG
&DAG
,
901 SDValue
getDynamicTLSAddr(GlobalAddressSDNode
*N
, SelectionDAG
&DAG
) const;
902 SDValue
getTLSDescAddr(GlobalAddressSDNode
*N
, SelectionDAG
&DAG
) const;
904 SDValue
lowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
) const;
905 SDValue
lowerBlockAddress(SDValue Op
, SelectionDAG
&DAG
) const;
906 SDValue
lowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const;
907 SDValue
lowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const;
908 SDValue
lowerGlobalTLSAddress(SDValue Op
, SelectionDAG
&DAG
) const;
909 SDValue
lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const;
910 SDValue
lowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const;
911 SDValue
lowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const;
912 SDValue
lowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) const;
913 SDValue
lowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
) const;
914 SDValue
lowerShiftLeftParts(SDValue Op
, SelectionDAG
&DAG
) const;
915 SDValue
lowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
, bool IsSRA
) const;
916 SDValue
lowerSPLAT_VECTOR_PARTS(SDValue Op
, SelectionDAG
&DAG
) const;
917 SDValue
lowerVectorMaskSplat(SDValue Op
, SelectionDAG
&DAG
) const;
918 SDValue
lowerVectorMaskExt(SDValue Op
, SelectionDAG
&DAG
,
919 int64_t ExtTrueVal
) const;
920 SDValue
lowerVectorMaskTruncLike(SDValue Op
, SelectionDAG
&DAG
) const;
921 SDValue
lowerVectorTruncLike(SDValue Op
, SelectionDAG
&DAG
) const;
922 SDValue
lowerVectorFPExtendOrRoundLike(SDValue Op
, SelectionDAG
&DAG
) const;
923 SDValue
lowerINSERT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const;
924 SDValue
lowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const;
925 SDValue
LowerINTRINSIC_WO_CHAIN(SDValue Op
, SelectionDAG
&DAG
) const;
926 SDValue
LowerINTRINSIC_W_CHAIN(SDValue Op
, SelectionDAG
&DAG
) const;
927 SDValue
LowerINTRINSIC_VOID(SDValue Op
, SelectionDAG
&DAG
) const;
928 SDValue
lowerVPREDUCE(SDValue Op
, SelectionDAG
&DAG
) const;
929 SDValue
lowerVECREDUCE(SDValue Op
, SelectionDAG
&DAG
) const;
930 SDValue
lowerVectorMaskVecReduction(SDValue Op
, SelectionDAG
&DAG
,
932 SDValue
lowerFPVECREDUCE(SDValue Op
, SelectionDAG
&DAG
) const;
933 SDValue
lowerINSERT_SUBVECTOR(SDValue Op
, SelectionDAG
&DAG
) const;
934 SDValue
lowerEXTRACT_SUBVECTOR(SDValue Op
, SelectionDAG
&DAG
) const;
935 SDValue
lowerVECTOR_DEINTERLEAVE(SDValue Op
, SelectionDAG
&DAG
) const;
936 SDValue
lowerVECTOR_INTERLEAVE(SDValue Op
, SelectionDAG
&DAG
) const;
937 SDValue
lowerSTEP_VECTOR(SDValue Op
, SelectionDAG
&DAG
) const;
938 SDValue
lowerVECTOR_REVERSE(SDValue Op
, SelectionDAG
&DAG
) const;
939 SDValue
lowerVECTOR_SPLICE(SDValue Op
, SelectionDAG
&DAG
) const;
940 SDValue
lowerABS(SDValue Op
, SelectionDAG
&DAG
) const;
941 SDValue
lowerMaskedLoad(SDValue Op
, SelectionDAG
&DAG
) const;
942 SDValue
lowerMaskedStore(SDValue Op
, SelectionDAG
&DAG
) const;
943 SDValue
lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op
,
944 SelectionDAG
&DAG
) const;
945 SDValue
lowerMaskedGather(SDValue Op
, SelectionDAG
&DAG
) const;
946 SDValue
lowerMaskedScatter(SDValue Op
, SelectionDAG
&DAG
) const;
947 SDValue
lowerFixedLengthVectorLoadToRVV(SDValue Op
, SelectionDAG
&DAG
) const;
948 SDValue
lowerFixedLengthVectorStoreToRVV(SDValue Op
, SelectionDAG
&DAG
) const;
949 SDValue
lowerFixedLengthVectorSetccToRVV(SDValue Op
, SelectionDAG
&DAG
) const;
950 SDValue
lowerFixedLengthVectorSelectToRVV(SDValue Op
,
951 SelectionDAG
&DAG
) const;
952 SDValue
lowerToScalableOp(SDValue Op
, SelectionDAG
&DAG
) const;
953 SDValue
lowerUnsignedAvgFloor(SDValue Op
, SelectionDAG
&DAG
) const;
954 SDValue
LowerIS_FPCLASS(SDValue Op
, SelectionDAG
&DAG
) const;
955 SDValue
lowerVPOp(SDValue Op
, SelectionDAG
&DAG
) const;
956 SDValue
lowerLogicVPOp(SDValue Op
, SelectionDAG
&DAG
) const;
957 SDValue
lowerVPExtMaskOp(SDValue Op
, SelectionDAG
&DAG
) const;
958 SDValue
lowerVPSetCCMaskOp(SDValue Op
, SelectionDAG
&DAG
) const;
959 SDValue
lowerVPSpliceExperimental(SDValue Op
, SelectionDAG
&DAG
) const;
960 SDValue
lowerVPReverseExperimental(SDValue Op
, SelectionDAG
&DAG
) const;
961 SDValue
lowerVPFPIntConvOp(SDValue Op
, SelectionDAG
&DAG
) const;
962 SDValue
lowerVPStridedLoad(SDValue Op
, SelectionDAG
&DAG
) const;
963 SDValue
lowerVPStridedStore(SDValue Op
, SelectionDAG
&DAG
) const;
964 SDValue
lowerVPCttzElements(SDValue Op
, SelectionDAG
&DAG
) const;
965 SDValue
lowerFixedLengthVectorExtendToRVV(SDValue Op
, SelectionDAG
&DAG
,
966 unsigned ExtendOpc
) const;
967 SDValue
lowerGET_ROUNDING(SDValue Op
, SelectionDAG
&DAG
) const;
968 SDValue
lowerSET_ROUNDING(SDValue Op
, SelectionDAG
&DAG
) const;
970 SDValue
lowerEH_DWARF_CFA(SDValue Op
, SelectionDAG
&DAG
) const;
971 SDValue
lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op
, SelectionDAG
&DAG
) const;
973 SDValue
lowerStrictFPExtendOrRoundLike(SDValue Op
, SelectionDAG
&DAG
) const;
975 SDValue
lowerVectorStrictFSetcc(SDValue Op
, SelectionDAG
&DAG
) const;
977 SDValue
expandUnalignedRVVLoad(SDValue Op
, SelectionDAG
&DAG
) const;
978 SDValue
expandUnalignedRVVStore(SDValue Op
, SelectionDAG
&DAG
) const;
980 bool isEligibleForTailCallOptimization(
981 CCState
&CCInfo
, CallLoweringInfo
&CLI
, MachineFunction
&MF
,
982 const SmallVector
<CCValAssign
, 16> &ArgLocs
) const;
984 /// Generate error diagnostics if any register used by CC has been marked
986 void validateCCReservedRegs(
987 const SmallVectorImpl
<std::pair
<llvm::Register
, llvm::SDValue
>> &Regs
,
988 MachineFunction
&MF
) const;
990 bool useRVVForFixedLengthVectorVT(MVT VT
) const;
992 MVT
getVPExplicitVectorLengthTy() const override
;
994 bool shouldExpandGetVectorLength(EVT TripCountVT
, unsigned VF
,
995 bool IsScalable
) const override
;
997 bool shouldExpandCttzElements(EVT VT
) const override
;
999 /// RVV code generation for fixed length vectors does not lower all
1000 /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
1001 /// merge. However, merging them creates a BUILD_VECTOR that is just as
1002 /// illegal as the original, thus leading to an infinite legalisation loop.
1003 /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
1004 /// this override can be removed.
1005 bool mergeStoresAfterLegalization(EVT VT
) const override
;
1007 /// Disable normalizing
1008 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1009 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
1010 /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
1011 bool shouldNormalizeToSelectSequence(LLVMContext
&, EVT
) const override
{
1015 /// For available scheduling models FDIV + two independent FMULs are much
1016 /// faster than two FDIVs.
1017 unsigned combineRepeatedFPDivisors() const override
;
1019 SDValue
BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
, SelectionDAG
&DAG
,
1020 SmallVectorImpl
<SDNode
*> &Created
) const override
;
1022 bool shouldFoldSelectWithSingleBitTest(EVT VT
,
1023 const APInt
&AndMask
) const override
;
1025 unsigned getMinimumJumpTableEntries() const override
;
1028 /// As per the spec, the rules for passing vector arguments are as follows:
1030 /// 1. For the first vector mask argument, use v0 to pass it.
1031 /// 2. For vector data arguments or rest vector mask arguments, starting from
1032 /// the v8 register, if a vector register group between v8-v23 that has not been
1033 /// allocated can be found and the first register number is a multiple of LMUL,
1034 /// then allocate this vector register group to the argument and mark these
1035 /// registers as allocated. Otherwise, pass it by reference and are replaced in
1036 /// the argument list with the address.
1037 /// 3. For tuple vector data arguments, starting from the v8 register, if
1038 /// NFIELDS consecutive vector register groups between v8-v23 that have not been
1039 /// allocated can be found and the first register number is a multiple of LMUL,
1040 /// then allocate these vector register groups to the argument and mark these
1041 /// registers as allocated. Otherwise, pass it by reference and are replaced in
1042 /// the argument list with the address.
1043 class RVVArgDispatcher
{
1045 static constexpr unsigned NumArgVRs
= 16;
1050 bool FirstVMask
= false;
1053 template <typename Arg
>
1054 RVVArgDispatcher(const MachineFunction
*MF
, const RISCVTargetLowering
*TLI
,
1055 ArrayRef
<Arg
> ArgList
)
1056 : MF(MF
), TLI(TLI
) {
1057 constructArgInfos(ArgList
);
1061 RVVArgDispatcher() = default;
1063 MCPhysReg
getNextPhysReg();
1066 SmallVector
<RVVArgInfo
, 4> RVVArgInfos
;
1067 SmallVector
<MCPhysReg
, 4> AllocatedPhysRegs
;
1069 const MachineFunction
*MF
= nullptr;
1070 const RISCVTargetLowering
*TLI
= nullptr;
1072 unsigned CurIdx
= 0;
1074 template <typename Arg
> void constructArgInfos(ArrayRef
<Arg
> Ret
);
1076 void allocatePhysReg(unsigned NF
= 1, unsigned LMul
= 1,
1077 unsigned StartReg
= 0);
1082 bool CC_RISCV(const DataLayout
&DL
, RISCVABI::ABI ABI
, unsigned ValNo
,
1083 MVT ValVT
, MVT LocVT
, CCValAssign::LocInfo LocInfo
,
1084 ISD::ArgFlagsTy ArgFlags
, CCState
&State
, bool IsFixed
,
1085 bool IsRet
, Type
*OrigTy
, const RISCVTargetLowering
&TLI
,
1086 RVVArgDispatcher
&RVVDispatcher
);
1088 bool CC_RISCV_FastCC(const DataLayout
&DL
, RISCVABI::ABI ABI
, unsigned ValNo
,
1089 MVT ValVT
, MVT LocVT
, CCValAssign::LocInfo LocInfo
,
1090 ISD::ArgFlagsTy ArgFlags
, CCState
&State
, bool IsFixed
,
1091 bool IsRet
, Type
*OrigTy
, const RISCVTargetLowering
&TLI
,
1092 RVVArgDispatcher
&RVVDispatcher
);
1094 bool CC_RISCV_GHC(unsigned ValNo
, MVT ValVT
, MVT LocVT
,
1095 CCValAssign::LocInfo LocInfo
, ISD::ArgFlagsTy ArgFlags
,
1098 ArrayRef
<MCPhysReg
> getArgGPRs(const RISCVABI::ABI ABI
);
1100 } // end namespace RISCV
1102 namespace RISCVVIntrinsicsTable
{
1104 struct RISCVVIntrinsicInfo
{
1105 unsigned IntrinsicID
;
1106 uint8_t ScalarOperand
;
1108 bool hasScalarOperand() const {
1109 // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td.
1110 return ScalarOperand
!= 0xF;
1112 bool hasVLOperand() const {
1113 // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td.
1114 return VLOperand
!= 0x1F;
1118 using namespace RISCV
;
1120 #define GET_RISCVVIntrinsicsTable_DECL
1121 #include "RISCVGenSearchableTables.inc"
1122 #undef GET_RISCVVIntrinsicsTable_DECL
1124 } // end namespace RISCVVIntrinsicsTable
1126 } // end namespace llvm