[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Target / RISCV / RISCVISelLowering.h
blob30b9ad7e6f6f32653ddc0435bbf7ca9b1ad71432
1 //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
15 #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
17 #include "RISCV.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/TargetParser/RISCVTargetParser.h"
22 #include <optional>
24 namespace llvm {
25 class InstructionCost;
26 class RISCVSubtarget;
27 struct RISCVRegisterInfo;
29 namespace RISCVISD {
30 // clang-format off
31 enum NodeType : unsigned {
32 FIRST_NUMBER = ISD::BUILTIN_OP_END,
33 RET_GLUE,
34 SRET_GLUE,
35 MRET_GLUE,
36 CALL,
37 /// Select with condition operator - This selects between a true value and
38 /// a false value (ops #3 and #4) based on the boolean result of comparing
39 /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
40 /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum.
41 /// The lhs and rhs are XLenVT integers. The true and false values can be
42 /// integer or floating point.
43 SELECT_CC,
44 BR_CC,
45 BuildPairF64,
46 SplitF64,
47 TAIL,
49 // Add the Lo 12 bits from an address. Selected to ADDI.
50 ADD_LO,
51 // Get the Hi 20 bits from an address. Selected to LUI.
52 HI,
54 // Represents an AUIPC+ADDI pair. Selected to PseudoLLA.
55 LLA,
57 // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
58 ADD_TPREL,
60 // Multiply high for signedxunsigned.
61 MULHSU,
62 // RV64I shifts, directly matching the semantics of the named RISC-V
63 // instructions.
64 SLLW,
65 SRAW,
66 SRLW,
67 // 32-bit operations from RV64M that can't be simply matched with a pattern
68 // at instruction selection time. These have undefined behavior for division
69 // by 0 or overflow (divw) like their target independent counterparts.
70 DIVW,
71 DIVUW,
72 REMUW,
73 // RV64IB rotates, directly matching the semantics of the named RISC-V
74 // instructions.
75 ROLW,
76 RORW,
77 // RV64IZbb bit counting instructions directly matching the semantics of the
78 // named RISC-V instructions.
79 CLZW,
80 CTZW,
82 // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel.
83 ABSW,
85 // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as
86 // XLEN is the only legal integer width.
88 // FMV_H_X matches the semantics of the FMV.H.X.
89 // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result.
90 // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result.
91 // FMV_W_X_RV64 matches the semantics of the FMV.W.X.
92 // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
94 // This is a more convenient semantic for producing dagcombines that remove
95 // unnecessary GPR->FPR->GPR moves.
96 FMV_H_X,
97 FMV_X_ANYEXTH,
98 FMV_X_SIGNEXTH,
99 FMV_W_X_RV64,
100 FMV_X_ANYEXTW_RV64,
101 // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
102 // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
103 // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode
104 // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
105 FCVT_X,
106 FCVT_XU,
107 // FP to 32 bit int conversions for RV64. These are used to keep track of the
108 // result being sign extended to 64 bit. These saturate out of range inputs.
109 // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode
110 // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
111 FCVT_W_RV64,
112 FCVT_WU_RV64,
114 FP_ROUND_BF16,
115 FP_EXTEND_BF16,
117 // Rounds an FP value to its corresponding integer in the same FP format.
118 // First operand is the value to round, the second operand is the largest
119 // integer that can be represented exactly in the FP format. This will be
120 // expanded into multiple instructions and basic blocks with a custom
121 // inserter.
122 FROUND,
124 FCLASS,
126 // Floating point fmax and fmin matching the RISC-V instruction semantics.
127 FMAX, FMIN,
129 // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
130 // (returns (Lo, Hi)). It takes a chain operand.
131 READ_CYCLE_WIDE,
132 // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or
133 // XLenVT.
134 BREV8,
135 ORC_B,
136 ZIP,
137 UNZIP,
139 // Scalar cryptography
140 CLMUL, CLMULH, CLMULR,
141 SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1,
142 SM4KS, SM4ED,
143 SM3P0, SM3P1,
145 // Vector Extension
146 FIRST_VL_VECTOR_OP,
147 // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
148 // for the VL value to be used for the operation. The first operand is
149 // passthru operand.
150 VMV_V_V_VL = FIRST_VL_VECTOR_OP,
151 // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
152 // for the VL value to be used for the operation. The first operand is
153 // passthru operand.
154 VMV_V_X_VL,
155 // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand
156 // for the VL value to be used for the operation. The first operand is
157 // passthru operand.
158 VFMV_V_F_VL,
159 // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign
160 // extended from the vector element size.
161 VMV_X_S,
162 // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand.
163 VMV_S_X_VL,
164 // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand.
165 VFMV_S_F_VL,
166 // Splats an 64-bit value that has been split into two i32 parts. This is
167 // expanded late to two scalar stores and a stride 0 vector load.
168 // The first operand is passthru operand.
169 SPLAT_VECTOR_SPLIT_I64_VL,
170 // Truncates a RVV integer vector by one power-of-two. Carries both an extra
171 // mask and VL operand.
172 TRUNCATE_VECTOR_VL,
173 // Matches the semantics of vslideup/vslidedown. The first operand is the
174 // pass-thru operand, the second is the source vector, the third is the XLenVT
175 // index (either constant or non-constant), the fourth is the mask, the fifth
176 // is the VL and the sixth is the policy.
177 VSLIDEUP_VL,
178 VSLIDEDOWN_VL,
179 // Matches the semantics of vslide1up/slide1down. The first operand is
180 // passthru operand, the second is source vector, third is the XLenVT scalar
181 // value. The fourth and fifth operands are the mask and VL operands.
182 VSLIDE1UP_VL,
183 VSLIDE1DOWN_VL,
184 // Matches the semantics of vfslide1up/vfslide1down. The first operand is
185 // passthru operand, the second is source vector, third is a scalar value
186 // whose type matches the element type of the vectors. The fourth and fifth
187 // operands are the mask and VL operands.
188 VFSLIDE1UP_VL,
189 VFSLIDE1DOWN_VL,
190 // Matches the semantics of the vid.v instruction, with a mask and VL
191 // operand.
192 VID_VL,
193 // Matches the semantics of the vfcnvt.rod function (Convert double-width
194 // float to single-width float, rounding towards odd). Takes a double-width
195 // float vector and produces a single-width float vector. Also has a mask and
196 // VL operand.
197 VFNCVT_ROD_VL,
198 // These nodes match the semantics of the corresponding RVV vector reduction
199 // instructions. They produce a vector result which is the reduction
200 // performed over the second vector operand plus the first element of the
201 // third vector operand. The first operand is the pass-thru operand. The
202 // second operand is an unconstrained vector type, and the result, first, and
203 // third operand's types are expected to be the corresponding full-width
204 // LMUL=1 type for the second operand:
205 // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8
206 // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32
207 // The different in types does introduce extra vsetvli instructions but
208 // similarly it reduces the number of registers consumed per reduction.
209 // Also has a mask and VL operand.
210 VECREDUCE_ADD_VL,
211 VECREDUCE_UMAX_VL,
212 VECREDUCE_SMAX_VL,
213 VECREDUCE_UMIN_VL,
214 VECREDUCE_SMIN_VL,
215 VECREDUCE_AND_VL,
216 VECREDUCE_OR_VL,
217 VECREDUCE_XOR_VL,
218 VECREDUCE_FADD_VL,
219 VECREDUCE_SEQ_FADD_VL,
220 VECREDUCE_FMIN_VL,
221 VECREDUCE_FMAX_VL,
223 // Vector binary ops with a merge as a third operand, a mask as a fourth
224 // operand, and VL as a fifth operand.
225 ADD_VL,
226 AND_VL,
227 MUL_VL,
228 OR_VL,
229 SDIV_VL,
230 SHL_VL,
231 SREM_VL,
232 SRA_VL,
233 SRL_VL,
234 ROTL_VL,
235 ROTR_VL,
236 SUB_VL,
237 UDIV_VL,
238 UREM_VL,
239 XOR_VL,
240 SMIN_VL,
241 SMAX_VL,
242 UMIN_VL,
243 UMAX_VL,
245 BITREVERSE_VL,
246 BSWAP_VL,
247 CTLZ_VL,
248 CTTZ_VL,
249 CTPOP_VL,
251 SADDSAT_VL,
252 UADDSAT_VL,
253 SSUBSAT_VL,
254 USUBSAT_VL,
256 // Averaging adds of unsigned integers.
257 AVGFLOORU_VL,
258 // Rounding averaging adds of unsigned integers.
259 AVGCEILU_VL,
261 MULHS_VL,
262 MULHU_VL,
263 FADD_VL,
264 FSUB_VL,
265 FMUL_VL,
266 FDIV_VL,
267 VFMIN_VL,
268 VFMAX_VL,
270 // Vector unary ops with a mask as a second operand and VL as a third operand.
271 FNEG_VL,
272 FABS_VL,
273 FSQRT_VL,
274 FCLASS_VL,
275 FCOPYSIGN_VL, // Has a merge operand
276 VFCVT_RTZ_X_F_VL,
277 VFCVT_RTZ_XU_F_VL,
278 VFCVT_X_F_VL,
279 VFCVT_XU_F_VL,
280 VFROUND_NOEXCEPT_VL,
281 VFCVT_RM_X_F_VL, // Has a rounding mode operand.
282 VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
283 SINT_TO_FP_VL,
284 UINT_TO_FP_VL,
285 VFCVT_RM_F_X_VL, // Has a rounding mode operand.
286 VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
287 FP_ROUND_VL,
288 FP_EXTEND_VL,
290 // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand.
291 VFMADD_VL,
292 VFNMADD_VL,
293 VFMSUB_VL,
294 VFNMSUB_VL,
296 // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth
297 // operand.
298 VFWMADD_VL,
299 VFWNMADD_VL,
300 VFWMSUB_VL,
301 VFWNMSUB_VL,
303 // Widening instructions with a merge value a third operand, a mask as a
304 // fourth operand, and VL as a fifth operand.
305 VWMUL_VL,
306 VWMULU_VL,
307 VWMULSU_VL,
308 VWADD_VL,
309 VWADDU_VL,
310 VWSUB_VL,
311 VWSUBU_VL,
312 VWADD_W_VL,
313 VWADDU_W_VL,
314 VWSUB_W_VL,
315 VWSUBU_W_VL,
316 VWSLL_VL,
318 VFWMUL_VL,
319 VFWADD_VL,
320 VFWSUB_VL,
321 VFWADD_W_VL,
322 VFWSUB_W_VL,
324 // Widening ternary operations with a mask as the fourth operand and VL as the
325 // fifth operand.
326 VWMACC_VL,
327 VWMACCU_VL,
328 VWMACCSU_VL,
330 // Narrowing logical shift right.
331 // Operands are (source, shift, passthru, mask, vl)
332 VNSRL_VL,
334 // Vector compare producing a mask. Fourth operand is input mask. Fifth
335 // operand is VL.
336 SETCC_VL,
338 // General vmerge node with mask, true, false, passthru, and vl operands.
339 // Tail agnostic vselect can be implemented by setting passthru to undef.
340 VMERGE_VL,
342 // Mask binary operators.
343 VMAND_VL,
344 VMOR_VL,
345 VMXOR_VL,
347 // Set mask vector to all zeros or ones.
348 VMCLR_VL,
349 VMSET_VL,
351 // Matches the semantics of vrgather.vx and vrgather.vv with extra operands
352 // for passthru and VL. Operands are (src, index, mask, passthru, vl).
353 VRGATHER_VX_VL,
354 VRGATHER_VV_VL,
355 VRGATHEREI16_VV_VL,
357 // Vector sign/zero extend with additional mask & VL operands.
358 VSEXT_VL,
359 VZEXT_VL,
361 // vcpop.m with additional mask and VL operands.
362 VCPOP_VL,
364 // vfirst.m with additional mask and VL operands.
365 VFIRST_VL,
367 LAST_VL_VECTOR_OP = VFIRST_VL,
369 // Read VLENB CSR
370 READ_VLENB,
371 // Reads value of CSR.
372 // The first operand is a chain pointer. The second specifies address of the
373 // required CSR. Two results are produced, the read value and the new chain
374 // pointer.
375 READ_CSR,
376 // Write value to CSR.
377 // The first operand is a chain pointer, the second specifies address of the
378 // required CSR and the third is the value to write. The result is the new
379 // chain pointer.
380 WRITE_CSR,
381 // Read and write value of CSR.
382 // The first operand is a chain pointer, the second specifies address of the
383 // required CSR and the third is the value to write. Two results are produced,
384 // the value read before the modification and the new chain pointer.
385 SWAP_CSR,
387 // Branchless select operations, matching the semantics of the instructions
388 // defined in Zicond or XVentanaCondOps.
389 CZERO_EQZ, // vt.maskc for XVentanaCondOps.
390 CZERO_NEZ, // vt.maskcn for XVentanaCondOps.
392 // FP to 32 bit int conversions for RV64. These are used to keep track of the
393 // result being sign extended to 64 bit. These saturate out of range inputs.
394 STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
395 STRICT_FCVT_WU_RV64,
396 STRICT_FADD_VL,
397 STRICT_FSUB_VL,
398 STRICT_FMUL_VL,
399 STRICT_FDIV_VL,
400 STRICT_FSQRT_VL,
401 STRICT_VFMADD_VL,
402 STRICT_VFNMADD_VL,
403 STRICT_VFMSUB_VL,
404 STRICT_VFNMSUB_VL,
405 STRICT_FP_ROUND_VL,
406 STRICT_FP_EXTEND_VL,
407 STRICT_VFNCVT_ROD_VL,
408 STRICT_SINT_TO_FP_VL,
409 STRICT_UINT_TO_FP_VL,
410 STRICT_VFCVT_RM_X_F_VL,
411 STRICT_VFCVT_RTZ_X_F_VL,
412 STRICT_VFCVT_RTZ_XU_F_VL,
413 STRICT_FSETCC_VL,
414 STRICT_FSETCCS_VL,
415 STRICT_VFROUND_NOEXCEPT_VL,
416 LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
418 // WARNING: Do not add anything in the end unless you want the node to
419 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
420 // opcodes will be thought as target memory ops!
422 TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE,
423 TH_LWUD,
424 TH_LDD,
425 TH_SWD,
426 TH_SDD,
428 // clang-format on
429 } // namespace RISCVISD
431 class RISCVTargetLowering : public TargetLowering {
432 const RISCVSubtarget &Subtarget;
434 public:
435 explicit RISCVTargetLowering(const TargetMachine &TM,
436 const RISCVSubtarget &STI);
438 const RISCVSubtarget &getSubtarget() const { return Subtarget; }
440 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
441 MachineFunction &MF,
442 unsigned Intrinsic) const override;
443 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
444 unsigned AS,
445 Instruction *I = nullptr) const override;
446 bool isLegalICmpImmediate(int64_t Imm) const override;
447 bool isLegalAddImmediate(int64_t Imm) const override;
448 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
449 bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
450 bool isZExtFree(SDValue Val, EVT VT2) const override;
451 bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
452 bool signExtendConstant(const ConstantInt *CI) const override;
453 bool isCheapToSpeculateCttz(Type *Ty) const override;
454 bool isCheapToSpeculateCtlz(Type *Ty) const override;
455 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
456 bool hasAndNotCompare(SDValue Y) const override;
457 bool hasBitTest(SDValue X, SDValue Y) const override;
458 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
459 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
460 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
461 SelectionDAG &DAG) const override;
462 /// Return true if the (vector) instruction I will be lowered to an instruction
463 /// with a scalar splat operand for the given Operand number.
464 bool canSplatOperand(Instruction *I, int Operand) const;
465 /// Return true if a vector instruction will lower to a target instruction
466 /// able to splat the given operand.
467 bool canSplatOperand(unsigned Opcode, int Operand) const;
468 bool shouldSinkOperands(Instruction *I,
469 SmallVectorImpl<Use *> &Ops) const override;
470 bool shouldScalarizeBinop(SDValue VecOp) const override;
471 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
472 std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
473 bool isFPImmLegal(const APFloat &Imm, EVT VT,
474 bool ForCodeSize) const override;
475 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
476 unsigned Index) const override;
478 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
480 bool preferScalarizeSplat(SDNode *N) const override;
482 bool softPromoteHalfType() const override { return true; }
484 /// Return the register type for a given MVT, ensuring vectors are treated
485 /// as a series of gpr sized integers.
486 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
487 EVT VT) const override;
489 /// Return the number of registers for a given MVT, ensuring vectors are
490 /// treated as a series of gpr sized integers.
491 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
492 CallingConv::ID CC,
493 EVT VT) const override;
495 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
496 CallingConv::ID CC, EVT VT,
497 EVT &IntermediateVT,
498 unsigned &NumIntermediates,
499 MVT &RegisterVT) const override;
501 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
502 EVT VT) const override;
504 /// Return true if the given shuffle mask can be codegen'd directly, or if it
505 /// should be stack expanded.
506 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
508 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
509 // If the pair to store is a mixture of float and int values, we will
510 // save two bitwise instructions and one float-to-int instruction and
511 // increase one store instruction. There is potentially a more
512 // significant benefit because it avoids the float->int domain switch
513 // for input value. So It is more likely a win.
514 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
515 (LTy.isInteger() && HTy.isFloatingPoint()))
516 return true;
517 // If the pair only contains int values, we will save two bitwise
518 // instructions and increase one store instruction (costing one more
519 // store buffer). Since the benefit is more blurred we leave such a pair
520 // out until we get testcase to prove it is a win.
521 return false;
524 bool
525 shouldExpandBuildVectorWithShuffles(EVT VT,
526 unsigned DefinedValues) const override;
528 /// Return the cost of LMUL for linear operations.
529 InstructionCost getLMULCost(MVT VT) const;
531 InstructionCost getVRGatherVVCost(MVT VT) const;
532 InstructionCost getVRGatherVICost(MVT VT) const;
533 InstructionCost getVSlideVXCost(MVT VT) const;
534 InstructionCost getVSlideVICost(MVT VT) const;
536 // Provide custom lowering hooks for some operations.
537 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
538 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
539 SelectionDAG &DAG) const override;
541 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
543 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
544 const APInt &DemandedElts,
545 TargetLoweringOpt &TLO) const override;
547 void computeKnownBitsForTargetNode(const SDValue Op,
548 KnownBits &Known,
549 const APInt &DemandedElts,
550 const SelectionDAG &DAG,
551 unsigned Depth) const override;
552 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
553 const APInt &DemandedElts,
554 const SelectionDAG &DAG,
555 unsigned Depth) const override;
557 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
559 // This method returns the name of a target specific DAG node.
560 const char *getTargetNodeName(unsigned Opcode) const override;
562 MachineMemOperand::Flags
563 getTargetMMOFlags(const Instruction &I) const override;
565 MachineMemOperand::Flags
566 getTargetMMOFlags(const MemSDNode &Node) const override;
568 bool
569 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
570 const MemSDNode &NodeY) const override;
572 ConstraintType getConstraintType(StringRef Constraint) const override;
574 InlineAsm::ConstraintCode
575 getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
577 std::pair<unsigned, const TargetRegisterClass *>
578 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
579 StringRef Constraint, MVT VT) const override;
581 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
582 std::vector<SDValue> &Ops,
583 SelectionDAG &DAG) const override;
585 MachineBasicBlock *
586 EmitInstrWithCustomInserter(MachineInstr &MI,
587 MachineBasicBlock *BB) const override;
589 void AdjustInstrPostInstrSelection(MachineInstr &MI,
590 SDNode *Node) const override;
592 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
593 EVT VT) const override;
595 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
596 bool MathUsed) const override {
597 if (VT == MVT::i8 || VT == MVT::i16)
598 return false;
600 return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
603 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
604 unsigned AddrSpace) const override {
605 // If we can replace 4 or more scalar stores, there will be a reduction
606 // in instructions even after we add a vector constant load.
607 return NumElem >= 4;
610 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
611 return VT.isScalarInteger();
613 bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
615 bool isCtpopFast(EVT VT) const override;
617 unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override;
619 bool preferZeroCompareBranch() const override { return true; }
621 bool shouldInsertFencesForAtomic(const Instruction *I) const override {
622 return isa<LoadInst>(I) || isa<StoreInst>(I);
624 Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
625 AtomicOrdering Ord) const override;
626 Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
627 AtomicOrdering Ord) const override;
629 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
630 EVT VT) const override;
632 ISD::NodeType getExtendForAtomicOps() const override {
633 return ISD::SIGN_EXTEND;
636 ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
638 bool shouldTransformSignedTruncationCheck(EVT XVT,
639 unsigned KeptBits) const override;
641 TargetLowering::ShiftLegalizationStrategy
642 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
643 unsigned ExpansionFactor) const override {
644 if (DAG.getMachineFunction().getFunction().hasMinSize())
645 return ShiftLegalizationStrategy::LowerToLibcall;
646 return TargetLowering::preferredShiftLegalizationStrategy(DAG, N,
647 ExpansionFactor);
650 bool isDesirableToCommuteWithShift(const SDNode *N,
651 CombineLevel Level) const override;
653 /// If a physical register, this returns the register that receives the
654 /// exception address on entry to an EH pad.
655 Register
656 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
658 /// If a physical register, this returns the register that receives the
659 /// exception typeid on entry to a landing pad.
660 Register
661 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
663 bool shouldExtendTypeInLibCall(EVT Type) const override;
664 bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
666 /// Returns the register with the specified architectural or ABI name. This
667 /// method is necessary to lower the llvm.read_register.* and
668 /// llvm.write_register.* intrinsics. Allocatable registers must be reserved
669 /// with the clang -ffixed-xX flag for access to be allowed.
670 Register getRegisterByName(const char *RegName, LLT VT,
671 const MachineFunction &MF) const override;
673 // Lower incoming arguments, copy physregs into vregs
674 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
675 bool IsVarArg,
676 const SmallVectorImpl<ISD::InputArg> &Ins,
677 const SDLoc &DL, SelectionDAG &DAG,
678 SmallVectorImpl<SDValue> &InVals) const override;
679 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
680 bool IsVarArg,
681 const SmallVectorImpl<ISD::OutputArg> &Outs,
682 LLVMContext &Context) const override;
683 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
684 const SmallVectorImpl<ISD::OutputArg> &Outs,
685 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
686 SelectionDAG &DAG) const override;
687 SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
688 SmallVectorImpl<SDValue> &InVals) const override;
690 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
691 Type *Ty) const override;
692 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
693 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
694 bool shouldConsiderGEPOffsetSplit() const override { return true; }
696 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
697 SDValue C) const override;
699 bool isMulAddWithConstProfitable(SDValue AddNode,
700 SDValue ConstNode) const override;
702 TargetLowering::AtomicExpansionKind
703 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
704 Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
705 Value *AlignedAddr, Value *Incr,
706 Value *Mask, Value *ShiftAmt,
707 AtomicOrdering Ord) const override;
708 TargetLowering::AtomicExpansionKind
709 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
710 Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
711 AtomicCmpXchgInst *CI,
712 Value *AlignedAddr, Value *CmpVal,
713 Value *NewVal, Value *Mask,
714 AtomicOrdering Ord) const override;
716 /// Returns true if the target allows unaligned memory accesses of the
717 /// specified type.
718 bool allowsMisalignedMemoryAccesses(
719 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
720 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
721 unsigned *Fast = nullptr) const override;
723 EVT getOptimalMemOpType(const MemOp &Op,
724 const AttributeList &FuncAttributes) const override;
726 bool splitValueIntoRegisterParts(
727 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
728 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
729 const override;
731 SDValue joinRegisterPartsIntoValue(
732 SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
733 unsigned NumParts, MVT PartVT, EVT ValueVT,
734 std::optional<CallingConv::ID> CC) const override;
736 // Return the value of VLMax for the given vector type (i.e. SEW and LMUL)
737 SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const;
739 static RISCVII::VLMUL getLMUL(MVT VT);
740 inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize,
741 unsigned MinSize) {
742 // Original equation:
743 // VLMAX = (VectorBits / EltSize) * LMUL
744 // where LMUL = MinSize / RISCV::RVVBitsPerBlock
745 // The following equations have been reordered to prevent loss of precision
746 // when calculating fractional LMUL.
747 return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
750 // Return inclusive (low, high) bounds on the value of VLMAX for the
751 // given scalable container type given known bounds on VLEN.
752 static std::pair<unsigned, unsigned>
753 computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget);
755 static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
756 static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
757 static unsigned getRegClassIDForVecVT(MVT VT);
758 static std::pair<unsigned, unsigned>
759 decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT,
760 unsigned InsertExtractIdx,
761 const RISCVRegisterInfo *TRI);
762 MVT getContainerForFixedLengthVector(MVT VT) const;
764 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
766 bool isLegalElementTypeForRVV(EVT ScalarTy) const;
768 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
770 unsigned getJumpTableEncoding() const override;
772 const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
773 const MachineBasicBlock *MBB,
774 unsigned uid,
775 MCContext &Ctx) const override;
777 bool isVScaleKnownToBeAPowerOfTwo() const override;
779 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
780 ISD::MemIndexedMode &AM, SelectionDAG &DAG) const;
781 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
782 ISD::MemIndexedMode &AM,
783 SelectionDAG &DAG) const override;
784 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
785 SDValue &Offset, ISD::MemIndexedMode &AM,
786 SelectionDAG &DAG) const override;
788 bool isLegalScaleForGatherScatter(uint64_t Scale,
789 uint64_t ElemSize) const override {
790 // Scaled addressing not supported on indexed load/stores
791 return Scale == 1;
794 /// If the target has a standard location for the stack protector cookie,
795 /// returns the address of that location. Otherwise, returns nullptr.
796 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
798 /// Returns whether or not generating a interleaved load/store intrinsic for
799 /// this type will be legal.
800 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
801 Align Alignment, unsigned AddrSpace,
802 const DataLayout &) const;
804 /// Return true if a stride load store of the given result type and
805 /// alignment is legal.
806 bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
808 unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
810 bool fallBackToDAGISel(const Instruction &Inst) const override;
812 bool lowerInterleavedLoad(LoadInst *LI,
813 ArrayRef<ShuffleVectorInst *> Shuffles,
814 ArrayRef<unsigned> Indices,
815 unsigned Factor) const override;
817 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
818 unsigned Factor) const override;
820 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
821 LoadInst *LI) const override;
823 bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
824 StoreInst *SI) const override;
826 bool supportKCFIBundles() const override { return true; }
828 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
829 MachineBasicBlock::instr_iterator &MBBI,
830 const TargetInstrInfo *TII) const override;
832 /// RISCVCCAssignFn - This target-specific function extends the default
833 /// CCValAssign with additional information used to lower RISC-V calling
834 /// conventions.
835 typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI,
836 unsigned ValNo, MVT ValVT, MVT LocVT,
837 CCValAssign::LocInfo LocInfo,
838 ISD::ArgFlagsTy ArgFlags, CCState &State,
839 bool IsFixed, bool IsRet, Type *OrigTy,
840 const RISCVTargetLowering &TLI,
841 std::optional<unsigned> FirstMaskArgument);
843 private:
844 void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
845 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
846 RISCVCCAssignFn Fn) const;
847 void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo,
848 const SmallVectorImpl<ISD::OutputArg> &Outs,
849 bool IsRet, CallLoweringInfo *CLI,
850 RISCVCCAssignFn Fn) const;
852 template <class NodeTy>
853 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true,
854 bool IsExternWeak = false) const;
855 SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
856 bool UseGOT) const;
857 SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
858 SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
860 SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
861 SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
862 SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
863 SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
864 SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
865 SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
866 SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
867 SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
868 SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
869 SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
870 SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
871 SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
872 SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const;
873 SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const;
874 SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
875 int64_t ExtTrueVal) const;
876 SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
877 SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
878 SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
879 SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
880 SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
881 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
882 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
883 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
884 SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const;
885 SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
886 SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG,
887 bool IsVP) const;
888 SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
889 SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
890 SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
891 SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
892 SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
893 SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
894 SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
895 SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
896 SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
897 SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
898 SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
899 SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
900 SelectionDAG &DAG) const;
901 SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
902 SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
903 SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
904 SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
905 SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
906 SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
907 SelectionDAG &DAG) const;
908 SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
909 SDValue lowerUnsignedAvgFloor(SDValue Op, SelectionDAG &DAG) const;
910 SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
911 SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
912 SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
913 SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
914 SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
915 SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const;
916 SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
917 SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
918 SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
919 SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
920 SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
921 unsigned ExtendOpc) const;
922 SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
923 SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
925 SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
926 SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
928 SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
930 SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const;
932 SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
933 SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
935 bool isEligibleForTailCallOptimization(
936 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
937 const SmallVector<CCValAssign, 16> &ArgLocs) const;
939 /// Generate error diagnostics if any register used by CC has been marked
940 /// reserved.
941 void validateCCReservedRegs(
942 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
943 MachineFunction &MF) const;
945 bool useRVVForFixedLengthVectorVT(MVT VT) const;
947 MVT getVPExplicitVectorLengthTy() const override;
949 bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
950 bool IsScalable) const override;
952 /// RVV code generation for fixed length vectors does not lower all
953 /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
954 /// merge. However, merging them creates a BUILD_VECTOR that is just as
955 /// illegal as the original, thus leading to an infinite legalisation loop.
956 /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
957 /// this override can be removed.
958 bool mergeStoresAfterLegalization(EVT VT) const override;
960 /// Disable normalizing
961 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
962 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
963 /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
964 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
965 return false;
968 /// For available scheduling models FDIV + two independent FMULs are much
969 /// faster than two FDIVs.
970 unsigned combineRepeatedFPDivisors() const override;
972 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
973 SmallVectorImpl<SDNode *> &Created) const override;
975 bool shouldFoldSelectWithSingleBitTest(EVT VT,
976 const APInt &AndMask) const override;
978 unsigned getMinimumJumpTableEntries() const override;
981 namespace RISCV {
983 bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
984 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
985 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
986 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
987 std::optional<unsigned> FirstMaskArgument);
989 bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
990 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
991 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
992 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
993 std::optional<unsigned> FirstMaskArgument);
995 bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
996 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
997 CCState &State);
999 ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI);
1001 } // end namespace RISCV
1003 namespace RISCVVIntrinsicsTable {
1005 struct RISCVVIntrinsicInfo {
1006 unsigned IntrinsicID;
1007 uint8_t ScalarOperand;
1008 uint8_t VLOperand;
1009 bool hasScalarOperand() const {
1010 // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td.
1011 return ScalarOperand != 0xF;
1013 bool hasVLOperand() const {
1014 // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td.
1015 return VLOperand != 0x1F;
1019 using namespace RISCV;
1021 #define GET_RISCVVIntrinsicsTable_DECL
1022 #include "RISCVGenSearchableTables.inc"
1023 #undef GET_RISCVVIntrinsicsTable_DECL
1025 } // end namespace RISCVVIntrinsicsTable
1027 } // end namespace llvm
1029 #endif