1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/DiagnosticPrinter.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicsRISCV.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/InstructionCost.h"
43 #include "llvm/Support/KnownBits.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Support/raw_ostream.h"
50 #define DEBUG_TYPE "riscv-lower"
52 STATISTIC(NumTailCalls
, "Number of tail calls");
54 static cl::opt
<unsigned> ExtensionMaxWebSize(
55 DEBUG_TYPE
"-ext-max-web-size", cl::Hidden
,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
61 AllowSplatInVW_W(DEBUG_TYPE
"-form-vw-w-with-splat", cl::Hidden
,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
66 static cl::opt
<unsigned> NumRepeatedDivisors(
67 DEBUG_TYPE
"-fp-repeated-divisors", cl::Hidden
,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
73 FPImmCost(DEBUG_TYPE
"-fpimm-cost", cl::Hidden
,
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden
,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine
&TM
,
83 const RISCVSubtarget
&STI
)
84 : TargetLowering(TM
), Subtarget(STI
) {
86 if (Subtarget
.isRVE())
87 report_fatal_error("Codegen not yet implemented for RVE");
89 RISCVABI::ABI ABI
= Subtarget
.getTargetABI();
90 assert(ABI
!= RISCVABI::ABI_Unknown
&& "Improperly initialised target ABI");
92 if ((ABI
== RISCVABI::ABI_ILP32F
|| ABI
== RISCVABI::ABI_LP64F
) &&
93 !Subtarget
.hasStdExtF()) {
94 errs() << "Hard-float 'f' ABI can't be used for a target that "
95 "doesn't support the F instruction set extension (ignoring "
97 ABI
= Subtarget
.is64Bit() ? RISCVABI::ABI_LP64
: RISCVABI::ABI_ILP32
;
98 } else if ((ABI
== RISCVABI::ABI_ILP32D
|| ABI
== RISCVABI::ABI_LP64D
) &&
99 !Subtarget
.hasStdExtD()) {
100 errs() << "Hard-float 'd' ABI can't be used for a target that "
101 "doesn't support the D instruction set extension (ignoring "
103 ABI
= Subtarget
.is64Bit() ? RISCVABI::ABI_LP64
: RISCVABI::ABI_ILP32
;
108 report_fatal_error("Don't know how to lower this ABI");
109 case RISCVABI::ABI_ILP32
:
110 case RISCVABI::ABI_ILP32F
:
111 case RISCVABI::ABI_ILP32D
:
112 case RISCVABI::ABI_LP64
:
113 case RISCVABI::ABI_LP64F
:
114 case RISCVABI::ABI_LP64D
:
118 MVT XLenVT
= Subtarget
.getXLenVT();
120 // Set up the register classes.
121 addRegisterClass(XLenVT
, &RISCV::GPRRegClass
);
122 if (Subtarget
.is64Bit() && RV64LegalI32
)
123 addRegisterClass(MVT::i32
, &RISCV::GPRRegClass
);
125 if (Subtarget
.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16
, &RISCV::FPR16RegClass
);
127 if (Subtarget
.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16
, &RISCV::FPR16RegClass
);
129 if (Subtarget
.hasStdExtF())
130 addRegisterClass(MVT::f32
, &RISCV::FPR32RegClass
);
131 if (Subtarget
.hasStdExtD())
132 addRegisterClass(MVT::f64
, &RISCV::FPR64RegClass
);
133 if (Subtarget
.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16
, &RISCV::GPRF16RegClass
);
135 if (Subtarget
.hasStdExtZfinx())
136 addRegisterClass(MVT::f32
, &RISCV::GPRF32RegClass
);
137 if (Subtarget
.hasStdExtZdinx()) {
138 if (Subtarget
.is64Bit())
139 addRegisterClass(MVT::f64
, &RISCV::GPRRegClass
);
141 addRegisterClass(MVT::f64
, &RISCV::GPRPF64RegClass
);
144 static const MVT::SimpleValueType BoolVecVTs
[] = {
145 MVT::nxv1i1
, MVT::nxv2i1
, MVT::nxv4i1
, MVT::nxv8i1
,
146 MVT::nxv16i1
, MVT::nxv32i1
, MVT::nxv64i1
};
147 static const MVT::SimpleValueType IntVecVTs
[] = {
148 MVT::nxv1i8
, MVT::nxv2i8
, MVT::nxv4i8
, MVT::nxv8i8
, MVT::nxv16i8
,
149 MVT::nxv32i8
, MVT::nxv64i8
, MVT::nxv1i16
, MVT::nxv2i16
, MVT::nxv4i16
,
150 MVT::nxv8i16
, MVT::nxv16i16
, MVT::nxv32i16
, MVT::nxv1i32
, MVT::nxv2i32
,
151 MVT::nxv4i32
, MVT::nxv8i32
, MVT::nxv16i32
, MVT::nxv1i64
, MVT::nxv2i64
,
152 MVT::nxv4i64
, MVT::nxv8i64
};
153 static const MVT::SimpleValueType F16VecVTs
[] = {
154 MVT::nxv1f16
, MVT::nxv2f16
, MVT::nxv4f16
,
155 MVT::nxv8f16
, MVT::nxv16f16
, MVT::nxv32f16
};
156 static const MVT::SimpleValueType BF16VecVTs
[] = {
157 MVT::nxv1bf16
, MVT::nxv2bf16
, MVT::nxv4bf16
,
158 MVT::nxv8bf16
, MVT::nxv16bf16
, MVT::nxv32bf16
};
159 static const MVT::SimpleValueType F32VecVTs
[] = {
160 MVT::nxv1f32
, MVT::nxv2f32
, MVT::nxv4f32
, MVT::nxv8f32
, MVT::nxv16f32
};
161 static const MVT::SimpleValueType F64VecVTs
[] = {
162 MVT::nxv1f64
, MVT::nxv2f64
, MVT::nxv4f64
, MVT::nxv8f64
};
164 if (Subtarget
.hasVInstructions()) {
165 auto addRegClassForRVV
= [this](MVT VT
) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
168 unsigned MinElts
= RISCV::RVVBitsPerBlock
/ Subtarget
.getELen();
169 if (VT
.getVectorMinNumElements() < MinElts
)
172 unsigned Size
= VT
.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass
*RC
;
174 if (Size
<= RISCV::RVVBitsPerBlock
)
175 RC
= &RISCV::VRRegClass
;
176 else if (Size
== 2 * RISCV::RVVBitsPerBlock
)
177 RC
= &RISCV::VRM2RegClass
;
178 else if (Size
== 4 * RISCV::RVVBitsPerBlock
)
179 RC
= &RISCV::VRM4RegClass
;
180 else if (Size
== 8 * RISCV::RVVBitsPerBlock
)
181 RC
= &RISCV::VRM8RegClass
;
183 llvm_unreachable("Unexpected size");
185 addRegisterClass(VT
, RC
);
188 for (MVT VT
: BoolVecVTs
)
189 addRegClassForRVV(VT
);
190 for (MVT VT
: IntVecVTs
) {
191 if (VT
.getVectorElementType() == MVT::i64
&&
192 !Subtarget
.hasVInstructionsI64())
194 addRegClassForRVV(VT
);
197 if (Subtarget
.hasVInstructionsF16Minimal())
198 for (MVT VT
: F16VecVTs
)
199 addRegClassForRVV(VT
);
201 if (Subtarget
.hasVInstructionsBF16())
202 for (MVT VT
: BF16VecVTs
)
203 addRegClassForRVV(VT
);
205 if (Subtarget
.hasVInstructionsF32())
206 for (MVT VT
: F32VecVTs
)
207 addRegClassForRVV(VT
);
209 if (Subtarget
.hasVInstructionsF64())
210 for (MVT VT
: F64VecVTs
)
211 addRegClassForRVV(VT
);
213 if (Subtarget
.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors
= [this](MVT VT
) {
215 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
216 unsigned RCID
= getRegClassIDForVecVT(ContainerVT
);
217 const RISCVRegisterInfo
&TRI
= *Subtarget
.getRegisterInfo();
218 addRegisterClass(VT
, TRI
.getRegClass(RCID
));
220 for (MVT VT
: MVT::integer_fixedlen_vector_valuetypes())
221 if (useRVVForFixedLengthVectorVT(VT
))
222 addRegClassForFixedVectors(VT
);
224 for (MVT VT
: MVT::fp_fixedlen_vector_valuetypes())
225 if (useRVVForFixedLengthVectorVT(VT
))
226 addRegClassForFixedVectors(VT
);
230 // Compute derived properties from the register classes.
231 computeRegisterProperties(STI
.getRegisterInfo());
233 setStackPointerRegisterToSaveRestore(RISCV::X2
);
235 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, XLenVT
,
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
238 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, MVT::i32
,
241 // TODO: add all necessary setOperationAction calls.
242 setOperationAction(ISD::DYNAMIC_STACKALLOC
, XLenVT
, Expand
);
244 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
245 setOperationAction(ISD::BR_CC
, XLenVT
, Expand
);
246 if (RV64LegalI32
&& Subtarget
.is64Bit())
247 setOperationAction(ISD::BR_CC
, MVT::i32
, Expand
);
248 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
249 setOperationAction(ISD::SELECT_CC
, XLenVT
, Expand
);
250 if (RV64LegalI32
&& Subtarget
.is64Bit())
251 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Expand
);
253 setCondCodeAction(ISD::SETLE
, XLenVT
, Expand
);
254 setCondCodeAction(ISD::SETGT
, XLenVT
, Custom
);
255 setCondCodeAction(ISD::SETGE
, XLenVT
, Expand
);
256 setCondCodeAction(ISD::SETULE
, XLenVT
, Expand
);
257 setCondCodeAction(ISD::SETUGT
, XLenVT
, Custom
);
258 setCondCodeAction(ISD::SETUGE
, XLenVT
, Expand
);
260 if (RV64LegalI32
&& Subtarget
.is64Bit())
261 setOperationAction(ISD::SETCC
, MVT::i32
, Promote
);
263 setOperationAction({ISD::STACKSAVE
, ISD::STACKRESTORE
}, MVT::Other
, Expand
);
265 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
266 setOperationAction({ISD::VAARG
, ISD::VACOPY
, ISD::VAEND
}, MVT::Other
, Expand
);
268 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
270 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
272 if (!Subtarget
.hasStdExtZbb() && !Subtarget
.hasVendorXTHeadBb())
273 setOperationAction(ISD::SIGN_EXTEND_INREG
, {MVT::i8
, MVT::i16
}, Expand
);
275 if (Subtarget
.is64Bit()) {
276 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i64
, Custom
);
279 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
280 setOperationAction({ISD::ADD
, ISD::SUB
, ISD::SHL
, ISD::SRA
, ISD::SRL
},
282 setOperationAction(ISD::SADDO
, MVT::i32
, Custom
);
283 setOperationAction({ISD::UADDO
, ISD::USUBO
, ISD::UADDSAT
, ISD::USUBSAT
},
288 {RTLIB::SHL_I128
, RTLIB::SRL_I128
, RTLIB::SRA_I128
, RTLIB::MUL_I128
},
290 setLibcallName(RTLIB::MULO_I64
, nullptr);
293 if (!Subtarget
.hasStdExtM() && !Subtarget
.hasStdExtZmmul()) {
294 setOperationAction({ISD::MUL
, ISD::MULHS
, ISD::MULHU
}, XLenVT
, Expand
);
295 if (RV64LegalI32
&& Subtarget
.is64Bit())
296 setOperationAction(ISD::MUL
, MVT::i32
, Promote
);
297 } else if (Subtarget
.is64Bit()) {
298 setOperationAction(ISD::MUL
, MVT::i128
, Custom
);
300 setOperationAction(ISD::MUL
, MVT::i32
, Custom
);
302 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
305 if (!Subtarget
.hasStdExtM()) {
306 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
},
308 if (RV64LegalI32
&& Subtarget
.is64Bit())
309 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
}, MVT::i32
,
311 } else if (Subtarget
.is64Bit()) {
313 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::UREM
},
314 {MVT::i8
, MVT::i16
, MVT::i32
}, Custom
);
317 if (RV64LegalI32
&& Subtarget
.is64Bit()) {
318 setOperationAction({ISD::MULHS
, ISD::MULHU
}, MVT::i32
, Expand
);
320 {ISD::SDIVREM
, ISD::UDIVREM
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, MVT::i32
,
325 {ISD::SDIVREM
, ISD::UDIVREM
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, XLenVT
,
328 setOperationAction({ISD::SHL_PARTS
, ISD::SRL_PARTS
, ISD::SRA_PARTS
}, XLenVT
,
331 if (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) {
332 if (!RV64LegalI32
&& Subtarget
.is64Bit())
333 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Custom
);
334 } else if (Subtarget
.hasVendorXTHeadBb()) {
335 if (Subtarget
.is64Bit())
336 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Custom
);
337 setOperationAction({ISD::ROTL
, ISD::ROTR
}, XLenVT
, Custom
);
338 } else if (Subtarget
.hasVendorXCVbitmanip()) {
339 setOperationAction(ISD::ROTL
, XLenVT
, Expand
);
341 setOperationAction({ISD::ROTL
, ISD::ROTR
}, XLenVT
, Expand
);
342 if (RV64LegalI32
&& Subtarget
.is64Bit())
343 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Expand
);
346 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
347 // pattern match it directly in isel.
348 setOperationAction(ISD::BSWAP
, XLenVT
,
349 (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
350 Subtarget
.hasVendorXTHeadBb())
353 if (RV64LegalI32
&& Subtarget
.is64Bit())
354 setOperationAction(ISD::BSWAP
, MVT::i32
,
355 (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
356 Subtarget
.hasVendorXTHeadBb())
361 if (Subtarget
.hasVendorXCVbitmanip()) {
362 setOperationAction(ISD::BITREVERSE
, XLenVT
, Legal
);
364 // Zbkb can use rev8+brev8 to implement bitreverse.
365 setOperationAction(ISD::BITREVERSE
, XLenVT
,
366 Subtarget
.hasStdExtZbkb() ? Custom
: Expand
);
369 if (Subtarget
.hasStdExtZbb()) {
370 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, XLenVT
,
372 if (RV64LegalI32
&& Subtarget
.is64Bit())
373 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, MVT::i32
,
376 if (Subtarget
.is64Bit()) {
378 setOperationAction(ISD::CTTZ
, MVT::i32
, Legal
);
380 setOperationAction({ISD::CTTZ
, ISD::CTTZ_ZERO_UNDEF
}, MVT::i32
, Custom
);
382 } else if (!Subtarget
.hasVendorXCVbitmanip()) {
383 setOperationAction({ISD::CTTZ
, ISD::CTPOP
}, XLenVT
, Expand
);
384 if (RV64LegalI32
&& Subtarget
.is64Bit())
385 setOperationAction({ISD::CTTZ
, ISD::CTPOP
}, MVT::i32
, Expand
);
388 if (Subtarget
.hasStdExtZbb() || Subtarget
.hasVendorXTHeadBb() ||
389 Subtarget
.hasVendorXCVbitmanip()) {
390 // We need the custom lowering to make sure that the resulting sequence
391 // for the 32bit case is efficient on 64bit targets.
392 if (Subtarget
.is64Bit()) {
394 setOperationAction(ISD::CTLZ
, MVT::i32
,
395 Subtarget
.hasStdExtZbb() ? Legal
: Promote
);
396 if (!Subtarget
.hasStdExtZbb())
397 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i32
, Promote
);
399 setOperationAction({ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
}, MVT::i32
, Custom
);
402 setOperationAction(ISD::CTLZ
, XLenVT
, Expand
);
403 if (RV64LegalI32
&& Subtarget
.is64Bit())
404 setOperationAction(ISD::CTLZ
, MVT::i32
, Expand
);
407 if (!RV64LegalI32
&& Subtarget
.is64Bit() &&
408 !Subtarget
.hasShortForwardBranchOpt())
409 setOperationAction(ISD::ABS
, MVT::i32
, Custom
);
411 // We can use PseudoCCSUB to implement ABS.
412 if (Subtarget
.hasShortForwardBranchOpt())
413 setOperationAction(ISD::ABS
, XLenVT
, Legal
);
415 if (!Subtarget
.hasVendorXTHeadCondMov())
416 setOperationAction(ISD::SELECT
, XLenVT
, Custom
);
418 if (RV64LegalI32
&& Subtarget
.is64Bit())
419 setOperationAction(ISD::SELECT
, MVT::i32
, Promote
);
421 static const unsigned FPLegalNodeTypes
[] = {
422 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::LRINT
,
423 ISD::LLRINT
, ISD::LROUND
, ISD::LLROUND
,
424 ISD::STRICT_LRINT
, ISD::STRICT_LLRINT
, ISD::STRICT_LROUND
,
425 ISD::STRICT_LLROUND
, ISD::STRICT_FMA
, ISD::STRICT_FADD
,
426 ISD::STRICT_FSUB
, ISD::STRICT_FMUL
, ISD::STRICT_FDIV
,
427 ISD::STRICT_FSQRT
, ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
};
429 static const ISD::CondCode FPCCToExpand
[] = {
430 ISD::SETOGT
, ISD::SETOGE
, ISD::SETONE
, ISD::SETUEQ
, ISD::SETUGT
,
431 ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
, ISD::SETUNE
, ISD::SETGT
,
432 ISD::SETGE
, ISD::SETNE
, ISD::SETO
, ISD::SETUO
};
434 static const unsigned FPOpToExpand
[] = {
435 ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
, ISD::FPOW
,
438 static const unsigned FPRndMode
[] = {
439 ISD::FCEIL
, ISD::FFLOOR
, ISD::FTRUNC
, ISD::FRINT
, ISD::FROUND
,
442 if (Subtarget
.hasStdExtZfhminOrZhinxmin())
443 setOperationAction(ISD::BITCAST
, MVT::i16
, Custom
);
445 static const unsigned ZfhminZfbfminPromoteOps
[] = {
446 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FADD
,
447 ISD::FSUB
, ISD::FMUL
, ISD::FMA
,
448 ISD::FDIV
, ISD::FSQRT
, ISD::FABS
,
449 ISD::FNEG
, ISD::STRICT_FMA
, ISD::STRICT_FADD
,
450 ISD::STRICT_FSUB
, ISD::STRICT_FMUL
, ISD::STRICT_FDIV
,
451 ISD::STRICT_FSQRT
, ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
,
452 ISD::SETCC
, ISD::FCEIL
, ISD::FFLOOR
,
453 ISD::FTRUNC
, ISD::FRINT
, ISD::FROUND
,
454 ISD::FROUNDEVEN
, ISD::SELECT
};
456 if (Subtarget
.hasStdExtZfbfmin()) {
457 setOperationAction(ISD::BITCAST
, MVT::i16
, Custom
);
458 setOperationAction(ISD::BITCAST
, MVT::bf16
, Custom
);
459 setOperationAction(ISD::FP_ROUND
, MVT::bf16
, Custom
);
460 setOperationAction(ISD::FP_EXTEND
, MVT::f32
, Custom
);
461 setOperationAction(ISD::FP_EXTEND
, MVT::f64
, Custom
);
462 setOperationAction(ISD::ConstantFP
, MVT::bf16
, Expand
);
463 setOperationAction(ISD::SELECT_CC
, MVT::bf16
, Expand
);
464 setOperationAction(ISD::BR_CC
, MVT::bf16
, Expand
);
465 setOperationAction(ZfhminZfbfminPromoteOps
, MVT::bf16
, Promote
);
466 setOperationAction(ISD::FREM
, MVT::bf16
, Promote
);
467 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
468 // DAGCombiner::visitFP_ROUND probably needs improvements first.
469 setOperationAction(ISD::FCOPYSIGN
, MVT::bf16
, Expand
);
472 if (Subtarget
.hasStdExtZfhminOrZhinxmin()) {
473 if (Subtarget
.hasStdExtZfhOrZhinx()) {
474 setOperationAction(FPLegalNodeTypes
, MVT::f16
, Legal
);
475 setOperationAction(FPRndMode
, MVT::f16
,
476 Subtarget
.hasStdExtZfa() ? Legal
: Custom
);
477 setOperationAction(ISD::SELECT
, MVT::f16
, Custom
);
478 setOperationAction(ISD::IS_FPCLASS
, MVT::f16
, Custom
);
480 setOperationAction(ZfhminZfbfminPromoteOps
, MVT::f16
, Promote
);
481 setOperationAction({ISD::STRICT_LRINT
, ISD::STRICT_LLRINT
,
482 ISD::STRICT_LROUND
, ISD::STRICT_LLROUND
},
484 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
485 // DAGCombiner::visitFP_ROUND probably needs improvements first.
486 setOperationAction(ISD::FCOPYSIGN
, MVT::f16
, Expand
);
489 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f16
, Legal
);
490 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f32
, Legal
);
491 setCondCodeAction(FPCCToExpand
, MVT::f16
, Expand
);
492 setOperationAction(ISD::SELECT_CC
, MVT::f16
, Expand
);
493 setOperationAction(ISD::BR_CC
, MVT::f16
, Expand
);
495 setOperationAction(ISD::FNEARBYINT
, MVT::f16
,
496 Subtarget
.hasStdExtZfa() ? Legal
: Promote
);
497 setOperationAction({ISD::FREM
, ISD::FPOW
, ISD::FPOWI
,
498 ISD::FCOS
, ISD::FSIN
, ISD::FSINCOS
, ISD::FEXP
,
499 ISD::FEXP2
, ISD::FEXP10
, ISD::FLOG
, ISD::FLOG2
,
503 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
504 // complete support for all operations in LegalizeDAG.
505 setOperationAction({ISD::STRICT_FCEIL
, ISD::STRICT_FFLOOR
,
506 ISD::STRICT_FNEARBYINT
, ISD::STRICT_FRINT
,
507 ISD::STRICT_FROUND
, ISD::STRICT_FROUNDEVEN
,
511 // We need to custom promote this.
512 if (Subtarget
.is64Bit())
513 setOperationAction(ISD::FPOWI
, MVT::i32
, Custom
);
515 if (!Subtarget
.hasStdExtZfa())
516 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f16
, Custom
);
519 if (Subtarget
.hasStdExtFOrZfinx()) {
520 setOperationAction(FPLegalNodeTypes
, MVT::f32
, Legal
);
521 setOperationAction(FPRndMode
, MVT::f32
,
522 Subtarget
.hasStdExtZfa() ? Legal
: Custom
);
523 setCondCodeAction(FPCCToExpand
, MVT::f32
, Expand
);
524 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
525 setOperationAction(ISD::SELECT
, MVT::f32
, Custom
);
526 setOperationAction(ISD::BR_CC
, MVT::f32
, Expand
);
527 setOperationAction(FPOpToExpand
, MVT::f32
, Expand
);
528 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::f16
, Expand
);
529 setTruncStoreAction(MVT::f32
, MVT::f16
, Expand
);
530 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::bf16
, Expand
);
531 setTruncStoreAction(MVT::f32
, MVT::bf16
, Expand
);
532 setOperationAction(ISD::IS_FPCLASS
, MVT::f32
, Custom
);
533 setOperationAction(ISD::BF16_TO_FP
, MVT::f32
, Custom
);
534 setOperationAction(ISD::FP_TO_BF16
, MVT::f32
,
535 Subtarget
.isSoftFPABI() ? LibCall
: Custom
);
536 setOperationAction(ISD::FP_TO_FP16
, MVT::f32
, Custom
);
537 setOperationAction(ISD::FP16_TO_FP
, MVT::f32
, Custom
);
539 if (Subtarget
.hasStdExtZfa())
540 setOperationAction(ISD::FNEARBYINT
, MVT::f32
, Legal
);
542 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f32
, Custom
);
545 if (Subtarget
.hasStdExtFOrZfinx() && Subtarget
.is64Bit())
546 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
548 if (Subtarget
.hasStdExtDOrZdinx()) {
549 setOperationAction(FPLegalNodeTypes
, MVT::f64
, Legal
);
551 if (Subtarget
.hasStdExtZfa()) {
552 setOperationAction(FPRndMode
, MVT::f64
, Legal
);
553 setOperationAction(ISD::FNEARBYINT
, MVT::f64
, Legal
);
554 setOperationAction(ISD::BITCAST
, MVT::i64
, Custom
);
555 setOperationAction(ISD::BITCAST
, MVT::f64
, Custom
);
557 if (Subtarget
.is64Bit())
558 setOperationAction(FPRndMode
, MVT::f64
, Custom
);
560 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f64
, Custom
);
563 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f32
, Legal
);
564 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f64
, Legal
);
565 setCondCodeAction(FPCCToExpand
, MVT::f64
, Expand
);
566 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
567 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
568 setOperationAction(ISD::BR_CC
, MVT::f64
, Expand
);
569 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
570 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
571 setOperationAction(FPOpToExpand
, MVT::f64
, Expand
);
572 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f16
, Expand
);
573 setTruncStoreAction(MVT::f64
, MVT::f16
, Expand
);
574 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::bf16
, Expand
);
575 setTruncStoreAction(MVT::f64
, MVT::bf16
, Expand
);
576 setOperationAction(ISD::IS_FPCLASS
, MVT::f64
, Custom
);
577 setOperationAction(ISD::BF16_TO_FP
, MVT::f64
, Custom
);
578 setOperationAction(ISD::FP_TO_BF16
, MVT::f64
,
579 Subtarget
.isSoftFPABI() ? LibCall
: Custom
);
580 setOperationAction(ISD::FP_TO_FP16
, MVT::f64
, Custom
);
581 setOperationAction(ISD::FP16_TO_FP
, MVT::f64
, Expand
);
584 if (Subtarget
.is64Bit()) {
585 setOperationAction({ISD::FP_TO_UINT
, ISD::FP_TO_SINT
,
586 ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
},
588 setOperationAction(ISD::LROUND
, MVT::i32
, Custom
);
591 if (Subtarget
.hasStdExtFOrZfinx()) {
592 setOperationAction({ISD::FP_TO_UINT_SAT
, ISD::FP_TO_SINT_SAT
}, XLenVT
,
595 setOperationAction({ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
,
596 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_SINT_TO_FP
},
599 if (RV64LegalI32
&& Subtarget
.is64Bit())
600 setOperationAction({ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
,
601 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_SINT_TO_FP
},
604 setOperationAction(ISD::GET_ROUNDING
, XLenVT
, Custom
);
605 setOperationAction(ISD::SET_ROUNDING
, MVT::Other
, Custom
);
608 setOperationAction({ISD::GlobalAddress
, ISD::BlockAddress
, ISD::ConstantPool
,
612 setOperationAction(ISD::GlobalTLSAddress
, XLenVT
, Custom
);
614 if (Subtarget
.is64Bit())
615 setOperationAction(ISD::Constant
, MVT::i64
, Custom
);
617 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
618 // Unfortunately this can't be determined just from the ISA naming string.
619 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
,
620 Subtarget
.is64Bit() ? Legal
: Custom
);
622 setOperationAction({ISD::TRAP
, ISD::DEBUGTRAP
}, MVT::Other
, Legal
);
623 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
624 if (Subtarget
.is64Bit())
625 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i32
, Custom
);
627 if (Subtarget
.hasStdExtZicbop()) {
628 setOperationAction(ISD::PREFETCH
, MVT::Other
, Legal
);
631 if (Subtarget
.hasStdExtA()) {
632 setMaxAtomicSizeInBitsSupported(Subtarget
.getXLen());
633 setMinCmpXchgSizeInBits(32);
634 } else if (Subtarget
.hasForcedAtomics()) {
635 setMaxAtomicSizeInBitsSupported(Subtarget
.getXLen());
637 setMaxAtomicSizeInBitsSupported(0);
640 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
642 setBooleanContents(ZeroOrOneBooleanContent
);
644 if (Subtarget
.hasVInstructions()) {
645 setBooleanVectorContents(ZeroOrOneBooleanContent
);
647 setOperationAction(ISD::VSCALE
, XLenVT
, Custom
);
648 if (RV64LegalI32
&& Subtarget
.is64Bit())
649 setOperationAction(ISD::VSCALE
, MVT::i32
, Custom
);
651 // RVV intrinsics may have illegal operands.
652 // We also need to custom legalize vmv.x.s.
653 setOperationAction({ISD::INTRINSIC_WO_CHAIN
, ISD::INTRINSIC_W_CHAIN
,
654 ISD::INTRINSIC_VOID
},
655 {MVT::i8
, MVT::i16
}, Custom
);
656 if (Subtarget
.is64Bit())
657 setOperationAction({ISD::INTRINSIC_W_CHAIN
, ISD::INTRINSIC_VOID
},
660 setOperationAction({ISD::INTRINSIC_WO_CHAIN
, ISD::INTRINSIC_W_CHAIN
},
663 setOperationAction({ISD::INTRINSIC_W_CHAIN
, ISD::INTRINSIC_VOID
},
666 static const unsigned IntegerVPOps
[] = {
667 ISD::VP_ADD
, ISD::VP_SUB
, ISD::VP_MUL
,
668 ISD::VP_SDIV
, ISD::VP_UDIV
, ISD::VP_SREM
,
669 ISD::VP_UREM
, ISD::VP_AND
, ISD::VP_OR
,
670 ISD::VP_XOR
, ISD::VP_ASHR
, ISD::VP_LSHR
,
671 ISD::VP_SHL
, ISD::VP_REDUCE_ADD
, ISD::VP_REDUCE_AND
,
672 ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
, ISD::VP_REDUCE_SMAX
,
673 ISD::VP_REDUCE_SMIN
, ISD::VP_REDUCE_UMAX
, ISD::VP_REDUCE_UMIN
,
674 ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::VP_FP_TO_SINT
,
675 ISD::VP_FP_TO_UINT
, ISD::VP_SETCC
, ISD::VP_SIGN_EXTEND
,
676 ISD::VP_ZERO_EXTEND
, ISD::VP_TRUNCATE
, ISD::VP_SMIN
,
677 ISD::VP_SMAX
, ISD::VP_UMIN
, ISD::VP_UMAX
,
678 ISD::VP_ABS
, ISD::EXPERIMENTAL_VP_REVERSE
, ISD::EXPERIMENTAL_VP_SPLICE
};
680 static const unsigned FloatingPointVPOps
[] = {
681 ISD::VP_FADD
, ISD::VP_FSUB
, ISD::VP_FMUL
,
682 ISD::VP_FDIV
, ISD::VP_FNEG
, ISD::VP_FABS
,
683 ISD::VP_FMA
, ISD::VP_REDUCE_FADD
, ISD::VP_REDUCE_SEQ_FADD
,
684 ISD::VP_REDUCE_FMIN
, ISD::VP_REDUCE_FMAX
, ISD::VP_MERGE
,
685 ISD::VP_SELECT
, ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
,
686 ISD::VP_SETCC
, ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
,
687 ISD::VP_SQRT
, ISD::VP_FMINNUM
, ISD::VP_FMAXNUM
,
688 ISD::VP_FCEIL
, ISD::VP_FFLOOR
, ISD::VP_FROUND
,
689 ISD::VP_FROUNDEVEN
, ISD::VP_FCOPYSIGN
, ISD::VP_FROUNDTOZERO
,
690 ISD::VP_FRINT
, ISD::VP_FNEARBYINT
, ISD::VP_IS_FPCLASS
,
691 ISD::EXPERIMENTAL_VP_REVERSE
, ISD::EXPERIMENTAL_VP_SPLICE
};
693 static const unsigned IntegerVecReduceOps
[] = {
694 ISD::VECREDUCE_ADD
, ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
,
695 ISD::VECREDUCE_XOR
, ISD::VECREDUCE_SMAX
, ISD::VECREDUCE_SMIN
,
696 ISD::VECREDUCE_UMAX
, ISD::VECREDUCE_UMIN
};
698 static const unsigned FloatingPointVecReduceOps
[] = {
699 ISD::VECREDUCE_FADD
, ISD::VECREDUCE_SEQ_FADD
, ISD::VECREDUCE_FMIN
,
700 ISD::VECREDUCE_FMAX
};
702 if (!Subtarget
.is64Bit()) {
703 // We must custom-lower certain vXi64 operations on RV32 due to the vector
704 // element type being illegal.
705 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
},
708 setOperationAction(IntegerVecReduceOps
, MVT::i64
, Custom
);
710 setOperationAction({ISD::VP_REDUCE_ADD
, ISD::VP_REDUCE_AND
,
711 ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
,
712 ISD::VP_REDUCE_SMAX
, ISD::VP_REDUCE_SMIN
,
713 ISD::VP_REDUCE_UMAX
, ISD::VP_REDUCE_UMIN
},
717 for (MVT VT
: BoolVecVTs
) {
718 if (!isTypeLegal(VT
))
721 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
723 // Mask VTs are custom-expanded into a series of standard nodes
724 setOperationAction({ISD::TRUNCATE
, ISD::CONCAT_VECTORS
,
725 ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
,
726 ISD::SCALAR_TO_VECTOR
},
729 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
732 setOperationAction(ISD::SELECT
, VT
, Custom
);
734 {ISD::SELECT_CC
, ISD::VSELECT
, ISD::VP_MERGE
, ISD::VP_SELECT
}, VT
,
737 setOperationAction({ISD::VP_AND
, ISD::VP_OR
, ISD::VP_XOR
}, VT
, Custom
);
740 {ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
, ISD::VECREDUCE_XOR
}, VT
,
744 {ISD::VP_REDUCE_AND
, ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
}, VT
,
747 // RVV has native int->float & float->int conversions where the
748 // element type sizes are within one power-of-two of each other. Any
749 // wider distances between type sizes have to be lowered as sequences
750 // which progressively narrow the gap in stages.
751 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
, ISD::FP_TO_SINT
,
752 ISD::FP_TO_UINT
, ISD::STRICT_SINT_TO_FP
,
753 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_FP_TO_SINT
,
754 ISD::STRICT_FP_TO_UINT
},
756 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
759 // Expand all extending loads to types larger than this, and truncating
760 // stores from types larger than this.
761 for (MVT OtherVT
: MVT::integer_scalable_vector_valuetypes()) {
762 setTruncStoreAction(VT
, OtherVT
, Expand
);
763 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, VT
,
767 setOperationAction({ISD::VP_FP_TO_SINT
, ISD::VP_FP_TO_UINT
,
768 ISD::VP_TRUNCATE
, ISD::VP_SETCC
},
771 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
772 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
774 setOperationAction(ISD::VECTOR_REVERSE
, VT
, Custom
);
776 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE
, VT
, Custom
);
777 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE
, VT
, Custom
);
779 setOperationPromotedToType(
780 ISD::VECTOR_SPLICE
, VT
,
781 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount()));
784 for (MVT VT
: IntVecVTs
) {
785 if (!isTypeLegal(VT
))
788 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
789 setOperationAction(ISD::SPLAT_VECTOR_PARTS
, VT
, Custom
);
791 // Vectors implement MULHS/MULHU.
792 setOperationAction({ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, VT
, Expand
);
794 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
795 if (VT
.getVectorElementType() == MVT::i64
&& !Subtarget
.hasStdExtV())
796 setOperationAction({ISD::MULHU
, ISD::MULHS
}, VT
, Expand
);
798 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, VT
,
801 // Custom-lower extensions and truncations from/to mask types.
802 setOperationAction({ISD::ANY_EXTEND
, ISD::SIGN_EXTEND
, ISD::ZERO_EXTEND
},
805 // RVV has native int->float & float->int conversions where the
806 // element type sizes are within one power-of-two of each other. Any
807 // wider distances between type sizes have to be lowered as sequences
808 // which progressively narrow the gap in stages.
809 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
, ISD::FP_TO_SINT
,
810 ISD::FP_TO_UINT
, ISD::STRICT_SINT_TO_FP
,
811 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_FP_TO_SINT
,
812 ISD::STRICT_FP_TO_UINT
},
814 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
816 setOperationAction({ISD::LRINT
, ISD::LLRINT
}, VT
, Custom
);
818 {ISD::SADDSAT
, ISD::UADDSAT
, ISD::SSUBSAT
, ISD::USUBSAT
}, VT
, Legal
);
820 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
821 // nodes which truncate by one power of two at a time.
822 setOperationAction(ISD::TRUNCATE
, VT
, Custom
);
824 // Custom-lower insert/extract operations to simplify patterns.
825 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
828 // Custom-lower reduction operations to set up the corresponding custom
830 setOperationAction(IntegerVecReduceOps
, VT
, Custom
);
832 setOperationAction(IntegerVPOps
, VT
, Custom
);
834 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
836 setOperationAction({ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
},
840 {ISD::VP_LOAD
, ISD::VP_STORE
, ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
841 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
, ISD::VP_SCATTER
},
844 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
845 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
848 setOperationAction(ISD::SELECT
, VT
, Custom
);
849 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
851 setOperationAction({ISD::STEP_VECTOR
, ISD::VECTOR_REVERSE
}, VT
, Custom
);
853 for (MVT OtherVT
: MVT::integer_scalable_vector_valuetypes()) {
854 setTruncStoreAction(VT
, OtherVT
, Expand
);
855 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, VT
,
859 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
860 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
863 setOperationAction(ISD::VECTOR_SPLICE
, VT
, Custom
);
865 if (Subtarget
.hasStdExtZvkb()) {
866 setOperationAction(ISD::BSWAP
, VT
, Legal
);
867 setOperationAction(ISD::VP_BSWAP
, VT
, Custom
);
869 setOperationAction({ISD::BSWAP
, ISD::VP_BSWAP
}, VT
, Expand
);
870 setOperationAction({ISD::ROTL
, ISD::ROTR
}, VT
, Expand
);
873 if (Subtarget
.hasStdExtZvbb()) {
874 setOperationAction(ISD::BITREVERSE
, VT
, Legal
);
875 setOperationAction(ISD::VP_BITREVERSE
, VT
, Custom
);
876 setOperationAction({ISD::VP_CTLZ
, ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ
,
877 ISD::VP_CTTZ_ZERO_UNDEF
, ISD::VP_CTPOP
},
880 setOperationAction({ISD::BITREVERSE
, ISD::VP_BITREVERSE
}, VT
, Expand
);
881 setOperationAction({ISD::CTLZ
, ISD::CTTZ
, ISD::CTPOP
}, VT
, Expand
);
882 setOperationAction({ISD::VP_CTLZ
, ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ
,
883 ISD::VP_CTTZ_ZERO_UNDEF
, ISD::VP_CTPOP
},
886 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
888 EVT FloatVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
889 if (isTypeLegal(FloatVT
)) {
890 setOperationAction({ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
,
891 ISD::CTTZ_ZERO_UNDEF
, ISD::VP_CTLZ
,
892 ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ_ZERO_UNDEF
},
898 // Expand various CCs to best match the RVV ISA, which natively supports UNE
899 // but no other unordered comparisons, and supports all ordered comparisons
900 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
901 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
902 // and we pattern-match those back to the "original", swapping operands once
903 // more. This way we catch both operations and both "vf" and "fv" forms with
905 static const ISD::CondCode VFPCCToExpand
[] = {
906 ISD::SETO
, ISD::SETONE
, ISD::SETUEQ
, ISD::SETUGT
,
907 ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
, ISD::SETUO
,
908 ISD::SETGT
, ISD::SETOGT
, ISD::SETGE
, ISD::SETOGE
,
911 // TODO: support more ops.
912 static const unsigned ZvfhminPromoteOps
[] = {
913 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FADD
, ISD::FSUB
,
914 ISD::FMUL
, ISD::FMA
, ISD::FDIV
, ISD::FSQRT
,
915 ISD::FABS
, ISD::FNEG
, ISD::FCOPYSIGN
, ISD::FCEIL
,
916 ISD::FFLOOR
, ISD::FROUND
, ISD::FROUNDEVEN
, ISD::FRINT
,
917 ISD::FNEARBYINT
, ISD::IS_FPCLASS
, ISD::SETCC
, ISD::FMAXIMUM
,
918 ISD::FMINIMUM
, ISD::STRICT_FADD
, ISD::STRICT_FSUB
, ISD::STRICT_FMUL
,
919 ISD::STRICT_FDIV
, ISD::STRICT_FSQRT
, ISD::STRICT_FMA
};
921 // TODO: support more vp ops.
922 static const unsigned ZvfhminPromoteVPOps
[] = {
923 ISD::VP_FADD
, ISD::VP_FSUB
, ISD::VP_FMUL
,
924 ISD::VP_FDIV
, ISD::VP_FNEG
, ISD::VP_FABS
,
925 ISD::VP_FMA
, ISD::VP_REDUCE_FADD
, ISD::VP_REDUCE_SEQ_FADD
,
926 ISD::VP_REDUCE_FMIN
, ISD::VP_REDUCE_FMAX
, ISD::VP_SQRT
,
927 ISD::VP_FMINNUM
, ISD::VP_FMAXNUM
, ISD::VP_FCEIL
,
928 ISD::VP_FFLOOR
, ISD::VP_FROUND
, ISD::VP_FROUNDEVEN
,
929 ISD::VP_FCOPYSIGN
, ISD::VP_FROUNDTOZERO
, ISD::VP_FRINT
,
930 ISD::VP_FNEARBYINT
, ISD::VP_SETCC
};
932 // Sets common operation actions on RVV floating-point vector types.
933 const auto SetCommonVFPActions
= [&](MVT VT
) {
934 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
935 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
936 // sizes are within one power-of-two of each other. Therefore conversions
937 // between vXf16 and vXf64 must be lowered as sequences which convert via
939 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
940 // Custom-lower insert/extract operations to simplify patterns.
941 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
943 // Expand various condition codes (explained above).
944 setCondCodeAction(VFPCCToExpand
, VT
, Expand
);
946 setOperationAction({ISD::FMINNUM
, ISD::FMAXNUM
}, VT
, Legal
);
947 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, VT
, Custom
);
949 setOperationAction({ISD::FTRUNC
, ISD::FCEIL
, ISD::FFLOOR
, ISD::FROUND
,
950 ISD::FROUNDEVEN
, ISD::FRINT
, ISD::FNEARBYINT
,
954 setOperationAction(FloatingPointVecReduceOps
, VT
, Custom
);
956 // Expand FP operations that need libcalls.
957 setOperationAction(ISD::FREM
, VT
, Expand
);
958 setOperationAction(ISD::FPOW
, VT
, Expand
);
959 setOperationAction(ISD::FCOS
, VT
, Expand
);
960 setOperationAction(ISD::FSIN
, VT
, Expand
);
961 setOperationAction(ISD::FSINCOS
, VT
, Expand
);
962 setOperationAction(ISD::FEXP
, VT
, Expand
);
963 setOperationAction(ISD::FEXP2
, VT
, Expand
);
964 setOperationAction(ISD::FEXP10
, VT
, Expand
);
965 setOperationAction(ISD::FLOG
, VT
, Expand
);
966 setOperationAction(ISD::FLOG2
, VT
, Expand
);
967 setOperationAction(ISD::FLOG10
, VT
, Expand
);
969 setOperationAction(ISD::FCOPYSIGN
, VT
, Legal
);
971 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
973 setOperationAction({ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
},
977 {ISD::VP_LOAD
, ISD::VP_STORE
, ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
978 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
, ISD::VP_SCATTER
},
981 setOperationAction(ISD::SELECT
, VT
, Custom
);
982 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
984 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
985 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
988 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
989 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
991 setOperationAction({ISD::VECTOR_REVERSE
, ISD::VECTOR_SPLICE
}, VT
, Custom
);
993 setOperationAction(FloatingPointVPOps
, VT
, Custom
);
995 setOperationAction({ISD::STRICT_FP_EXTEND
, ISD::STRICT_FP_ROUND
}, VT
,
997 setOperationAction({ISD::STRICT_FADD
, ISD::STRICT_FSUB
, ISD::STRICT_FMUL
,
998 ISD::STRICT_FDIV
, ISD::STRICT_FSQRT
, ISD::STRICT_FMA
},
1000 setOperationAction({ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
,
1001 ISD::STRICT_FTRUNC
, ISD::STRICT_FCEIL
,
1002 ISD::STRICT_FFLOOR
, ISD::STRICT_FROUND
,
1003 ISD::STRICT_FROUNDEVEN
, ISD::STRICT_FNEARBYINT
},
1007 // Sets common extload/truncstore actions on RVV floating-point vector
1009 const auto SetCommonVFPExtLoadTruncStoreActions
=
1010 [&](MVT VT
, ArrayRef
<MVT::SimpleValueType
> SmallerVTs
) {
1011 for (auto SmallVT
: SmallerVTs
) {
1012 setTruncStoreAction(VT
, SmallVT
, Expand
);
1013 setLoadExtAction(ISD::EXTLOAD
, VT
, SmallVT
, Expand
);
1017 if (Subtarget
.hasVInstructionsF16()) {
1018 for (MVT VT
: F16VecVTs
) {
1019 if (!isTypeLegal(VT
))
1021 SetCommonVFPActions(VT
);
1023 } else if (Subtarget
.hasVInstructionsF16Minimal()) {
1024 for (MVT VT
: F16VecVTs
) {
1025 if (!isTypeLegal(VT
))
1027 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1028 setOperationAction({ISD::STRICT_FP_ROUND
, ISD::STRICT_FP_EXTEND
}, VT
,
1030 setOperationAction({ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
}, VT
, Custom
);
1031 setOperationAction({ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::SELECT
}, VT
,
1033 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1034 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
,
1035 ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
},
1037 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
1038 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
1040 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
1042 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1044 // Custom split nxv32f16 since nxv32f32 if not legal.
1045 if (VT
== MVT::nxv32f16
) {
1046 setOperationAction(ZvfhminPromoteOps
, VT
, Custom
);
1047 setOperationAction(ZvfhminPromoteVPOps
, VT
, Custom
);
1050 // Add more promote ops.
1051 MVT F32VecVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1052 setOperationPromotedToType(ZvfhminPromoteOps
, VT
, F32VecVT
);
1053 setOperationPromotedToType(ZvfhminPromoteVPOps
, VT
, F32VecVT
);
1057 if (Subtarget
.hasVInstructionsF32()) {
1058 for (MVT VT
: F32VecVTs
) {
1059 if (!isTypeLegal(VT
))
1061 SetCommonVFPActions(VT
);
1062 SetCommonVFPExtLoadTruncStoreActions(VT
, F16VecVTs
);
1066 if (Subtarget
.hasVInstructionsF64()) {
1067 for (MVT VT
: F64VecVTs
) {
1068 if (!isTypeLegal(VT
))
1070 SetCommonVFPActions(VT
);
1071 SetCommonVFPExtLoadTruncStoreActions(VT
, F16VecVTs
);
1072 SetCommonVFPExtLoadTruncStoreActions(VT
, F32VecVTs
);
1076 if (Subtarget
.useRVVForFixedLengthVectors()) {
1077 for (MVT VT
: MVT::integer_fixedlen_vector_valuetypes()) {
1078 if (!useRVVForFixedLengthVectorVT(VT
))
1081 // By default everything must be expanded.
1082 for (unsigned Op
= 0; Op
< ISD::BUILTIN_OP_END
; ++Op
)
1083 setOperationAction(Op
, VT
, Expand
);
1084 for (MVT OtherVT
: MVT::integer_fixedlen_vector_valuetypes()) {
1085 setTruncStoreAction(VT
, OtherVT
, Expand
);
1086 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, VT
,
1090 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1091 // expansion to a build_vector of 0s.
1092 setOperationAction(ISD::UNDEF
, VT
, Custom
);
1094 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1095 setOperationAction({ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
}, VT
,
1098 setOperationAction({ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
}, VT
,
1101 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
},
1104 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
1106 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1108 setOperationAction(ISD::SETCC
, VT
, Custom
);
1110 setOperationAction(ISD::SELECT
, VT
, Custom
);
1112 setOperationAction(ISD::TRUNCATE
, VT
, Custom
);
1114 setOperationAction(ISD::BITCAST
, VT
, Custom
);
1117 {ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
, ISD::VECREDUCE_XOR
}, VT
,
1121 {ISD::VP_REDUCE_AND
, ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
}, VT
,
1130 ISD::STRICT_SINT_TO_FP
,
1131 ISD::STRICT_UINT_TO_FP
,
1132 ISD::STRICT_FP_TO_SINT
,
1133 ISD::STRICT_FP_TO_UINT
,
1136 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
1139 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
1141 // Operations below are different for between masks and other vectors.
1142 if (VT
.getVectorElementType() == MVT::i1
) {
1143 setOperationAction({ISD::VP_AND
, ISD::VP_OR
, ISD::VP_XOR
, ISD::AND
,
1147 setOperationAction({ISD::VP_FP_TO_SINT
, ISD::VP_FP_TO_UINT
,
1148 ISD::VP_SETCC
, ISD::VP_TRUNCATE
},
1151 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE
, VT
, Custom
);
1152 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE
, VT
, Custom
);
1156 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1157 // it before type legalization for i64 vectors on RV32. It will then be
1158 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1159 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1160 // improvements first.
1161 if (!Subtarget
.is64Bit() && VT
.getVectorElementType() == MVT::i64
) {
1162 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
1163 setOperationAction(ISD::SPLAT_VECTOR_PARTS
, VT
, Custom
);
1167 {ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
}, VT
, Custom
);
1169 setOperationAction({ISD::VP_LOAD
, ISD::VP_STORE
,
1170 ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
1171 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
,
1175 setOperationAction({ISD::ADD
, ISD::MUL
, ISD::SUB
, ISD::AND
, ISD::OR
,
1176 ISD::XOR
, ISD::SDIV
, ISD::SREM
, ISD::UDIV
,
1177 ISD::UREM
, ISD::SHL
, ISD::SRA
, ISD::SRL
},
1181 {ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
, ISD::ABS
}, VT
, Custom
);
1183 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1184 if (VT
.getVectorElementType() != MVT::i64
|| Subtarget
.hasStdExtV())
1185 setOperationAction({ISD::MULHS
, ISD::MULHU
}, VT
, Custom
);
1188 {ISD::SADDSAT
, ISD::UADDSAT
, ISD::SSUBSAT
, ISD::USUBSAT
}, VT
,
1191 setOperationAction(ISD::VSELECT
, VT
, Custom
);
1192 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1195 {ISD::ANY_EXTEND
, ISD::SIGN_EXTEND
, ISD::ZERO_EXTEND
}, VT
, Custom
);
1197 // Custom-lower reduction operations to set up the corresponding custom
1199 setOperationAction({ISD::VECREDUCE_ADD
, ISD::VECREDUCE_SMAX
,
1200 ISD::VECREDUCE_SMIN
, ISD::VECREDUCE_UMAX
,
1201 ISD::VECREDUCE_UMIN
},
1204 setOperationAction(IntegerVPOps
, VT
, Custom
);
1206 if (Subtarget
.hasStdExtZvkb())
1207 setOperationAction({ISD::BSWAP
, ISD::ROTL
, ISD::ROTR
}, VT
, Custom
);
1209 if (Subtarget
.hasStdExtZvbb()) {
1210 setOperationAction({ISD::BITREVERSE
, ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
,
1211 ISD::CTTZ
, ISD::CTTZ_ZERO_UNDEF
, ISD::CTPOP
},
1214 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1216 EVT FloatVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1217 if (isTypeLegal(FloatVT
))
1219 {ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
, ISD::CTTZ_ZERO_UNDEF
}, VT
,
1224 for (MVT VT
: MVT::fp_fixedlen_vector_valuetypes()) {
1225 // There are no extending loads or truncating stores.
1226 for (MVT InnerVT
: MVT::fp_fixedlen_vector_valuetypes()) {
1227 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
1228 setTruncStoreAction(VT
, InnerVT
, Expand
);
1231 if (!useRVVForFixedLengthVectorVT(VT
))
1234 // By default everything must be expanded.
1235 for (unsigned Op
= 0; Op
< ISD::BUILTIN_OP_END
; ++Op
)
1236 setOperationAction(Op
, VT
, Expand
);
1238 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1239 // expansion to a build_vector of 0s.
1240 setOperationAction(ISD::UNDEF
, VT
, Custom
);
1242 if (VT
.getVectorElementType() == MVT::f16
&&
1243 !Subtarget
.hasVInstructionsF16()) {
1244 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1245 setOperationAction({ISD::STRICT_FP_ROUND
, ISD::STRICT_FP_EXTEND
}, VT
,
1247 setOperationAction({ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
}, VT
, Custom
);
1249 {ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::VSELECT
, ISD::SELECT
}, VT
,
1251 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
,
1252 ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
},
1254 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
1255 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
1257 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1258 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
1259 MVT F32VecVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1260 // Don't promote f16 vector operations to f32 if f32 vector type is
1262 // TODO: could split the f16 vector into two vectors and do promotion.
1263 if (!isTypeLegal(F32VecVT
))
1265 setOperationPromotedToType(ZvfhminPromoteOps
, VT
, F32VecVT
);
1266 setOperationPromotedToType(ZvfhminPromoteVPOps
, VT
, F32VecVT
);
1270 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1271 setOperationAction({ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
}, VT
,
1274 setOperationAction({ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
,
1275 ISD::VECTOR_SHUFFLE
, ISD::INSERT_VECTOR_ELT
,
1276 ISD::EXTRACT_VECTOR_ELT
},
1279 setOperationAction({ISD::LOAD
, ISD::STORE
, ISD::MLOAD
, ISD::MSTORE
,
1280 ISD::MGATHER
, ISD::MSCATTER
},
1283 setOperationAction({ISD::VP_LOAD
, ISD::VP_STORE
,
1284 ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
1285 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
,
1289 setOperationAction({ISD::FADD
, ISD::FSUB
, ISD::FMUL
, ISD::FDIV
,
1290 ISD::FNEG
, ISD::FABS
, ISD::FCOPYSIGN
, ISD::FSQRT
,
1291 ISD::FMA
, ISD::FMINNUM
, ISD::FMAXNUM
,
1292 ISD::IS_FPCLASS
, ISD::FMAXIMUM
, ISD::FMINIMUM
},
1295 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1297 setOperationAction({ISD::FTRUNC
, ISD::FCEIL
, ISD::FFLOOR
, ISD::FROUND
,
1298 ISD::FROUNDEVEN
, ISD::FRINT
, ISD::FNEARBYINT
},
1301 setCondCodeAction(VFPCCToExpand
, VT
, Expand
);
1303 setOperationAction(ISD::SETCC
, VT
, Custom
);
1304 setOperationAction({ISD::VSELECT
, ISD::SELECT
}, VT
, Custom
);
1305 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1307 setOperationAction(ISD::BITCAST
, VT
, Custom
);
1309 setOperationAction(FloatingPointVecReduceOps
, VT
, Custom
);
1311 setOperationAction(FloatingPointVPOps
, VT
, Custom
);
1313 setOperationAction({ISD::STRICT_FP_EXTEND
, ISD::STRICT_FP_ROUND
}, VT
,
1316 {ISD::STRICT_FADD
, ISD::STRICT_FSUB
, ISD::STRICT_FMUL
,
1317 ISD::STRICT_FDIV
, ISD::STRICT_FSQRT
, ISD::STRICT_FMA
,
1318 ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
, ISD::STRICT_FTRUNC
,
1319 ISD::STRICT_FCEIL
, ISD::STRICT_FFLOOR
, ISD::STRICT_FROUND
,
1320 ISD::STRICT_FROUNDEVEN
, ISD::STRICT_FNEARBYINT
},
1324 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1325 setOperationAction(ISD::BITCAST
, {MVT::i8
, MVT::i16
, MVT::i32
, MVT::i64
},
1327 if (Subtarget
.hasStdExtZfhminOrZhinxmin())
1328 setOperationAction(ISD::BITCAST
, MVT::f16
, Custom
);
1329 if (Subtarget
.hasStdExtFOrZfinx())
1330 setOperationAction(ISD::BITCAST
, MVT::f32
, Custom
);
1331 if (Subtarget
.hasStdExtDOrZdinx())
1332 setOperationAction(ISD::BITCAST
, MVT::f64
, Custom
);
1336 if (Subtarget
.hasStdExtA()) {
1337 setOperationAction(ISD::ATOMIC_LOAD_SUB
, XLenVT
, Expand
);
1338 if (RV64LegalI32
&& Subtarget
.is64Bit())
1339 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i32
, Expand
);
1342 if (Subtarget
.hasForcedAtomics()) {
1343 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1345 {ISD::ATOMIC_CMP_SWAP
, ISD::ATOMIC_SWAP
, ISD::ATOMIC_LOAD_ADD
,
1346 ISD::ATOMIC_LOAD_SUB
, ISD::ATOMIC_LOAD_AND
, ISD::ATOMIC_LOAD_OR
,
1347 ISD::ATOMIC_LOAD_XOR
, ISD::ATOMIC_LOAD_NAND
, ISD::ATOMIC_LOAD_MIN
,
1348 ISD::ATOMIC_LOAD_MAX
, ISD::ATOMIC_LOAD_UMIN
, ISD::ATOMIC_LOAD_UMAX
},
1352 if (Subtarget
.hasVendorXTHeadMemIdx()) {
1353 for (unsigned im
= (unsigned)ISD::PRE_INC
; im
!= (unsigned)ISD::POST_DEC
;
1355 setIndexedLoadAction(im
, MVT::i8
, Legal
);
1356 setIndexedStoreAction(im
, MVT::i8
, Legal
);
1357 setIndexedLoadAction(im
, MVT::i16
, Legal
);
1358 setIndexedStoreAction(im
, MVT::i16
, Legal
);
1359 setIndexedLoadAction(im
, MVT::i32
, Legal
);
1360 setIndexedStoreAction(im
, MVT::i32
, Legal
);
1362 if (Subtarget
.is64Bit()) {
1363 setIndexedLoadAction(im
, MVT::i64
, Legal
);
1364 setIndexedStoreAction(im
, MVT::i64
, Legal
);
1369 // Function alignments.
1370 const Align
FunctionAlignment(Subtarget
.hasStdExtCOrZca() ? 2 : 4);
1371 setMinFunctionAlignment(FunctionAlignment
);
1372 // Set preferred alignments.
1373 setPrefFunctionAlignment(Subtarget
.getPrefFunctionAlignment());
1374 setPrefLoopAlignment(Subtarget
.getPrefLoopAlignment());
1376 setTargetDAGCombine({ISD::INTRINSIC_VOID
, ISD::INTRINSIC_W_CHAIN
,
1377 ISD::INTRINSIC_WO_CHAIN
, ISD::ADD
, ISD::SUB
, ISD::AND
,
1378 ISD::OR
, ISD::XOR
, ISD::SETCC
, ISD::SELECT
});
1379 if (Subtarget
.is64Bit())
1380 setTargetDAGCombine(ISD::SRA
);
1382 if (Subtarget
.hasStdExtFOrZfinx())
1383 setTargetDAGCombine({ISD::FADD
, ISD::FMAXNUM
, ISD::FMINNUM
});
1385 if (Subtarget
.hasStdExtZbb())
1386 setTargetDAGCombine({ISD::UMAX
, ISD::UMIN
, ISD::SMAX
, ISD::SMIN
});
1388 if (Subtarget
.hasStdExtZbs() && Subtarget
.is64Bit())
1389 setTargetDAGCombine(ISD::TRUNCATE
);
1391 if (Subtarget
.hasStdExtZbkb())
1392 setTargetDAGCombine(ISD::BITREVERSE
);
1393 if (Subtarget
.hasStdExtZfhminOrZhinxmin())
1394 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG
);
1395 if (Subtarget
.hasStdExtFOrZfinx())
1396 setTargetDAGCombine({ISD::ZERO_EXTEND
, ISD::FP_TO_SINT
, ISD::FP_TO_UINT
,
1397 ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
});
1398 if (Subtarget
.hasVInstructions())
1399 setTargetDAGCombine({ISD::FCOPYSIGN
, ISD::MGATHER
, ISD::MSCATTER
,
1400 ISD::VP_GATHER
, ISD::VP_SCATTER
, ISD::SRA
, ISD::SRL
,
1401 ISD::SHL
, ISD::STORE
, ISD::SPLAT_VECTOR
,
1402 ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
,
1403 ISD::EXPERIMENTAL_VP_REVERSE
, ISD::MUL
,
1404 ISD::INSERT_VECTOR_ELT
});
1405 if (Subtarget
.hasVendorXTHeadMemPair())
1406 setTargetDAGCombine({ISD::LOAD
, ISD::STORE
});
1407 if (Subtarget
.useRVVForFixedLengthVectors())
1408 setTargetDAGCombine(ISD::BITCAST
);
1410 setLibcallName(RTLIB::FPEXT_F16_F32
, "__extendhfsf2");
1411 setLibcallName(RTLIB::FPROUND_F32_F16
, "__truncsfhf2");
1413 // Disable strict node mutation.
1414 IsStrictFPEnabled
= true;
1417 EVT
RISCVTargetLowering::getSetCCResultType(const DataLayout
&DL
,
1418 LLVMContext
&Context
,
1421 return getPointerTy(DL
);
1422 if (Subtarget
.hasVInstructions() &&
1423 (VT
.isScalableVector() || Subtarget
.useRVVForFixedLengthVectors()))
1424 return EVT::getVectorVT(Context
, MVT::i1
, VT
.getVectorElementCount());
1425 return VT
.changeVectorElementTypeToInteger();
1428 MVT
RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1429 return Subtarget
.getXLenVT();
1432 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1433 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT
,
1435 bool IsScalable
) const {
1436 if (!Subtarget
.hasVInstructions())
1442 if (TripCountVT
!= MVT::i32
&& TripCountVT
!= Subtarget
.getXLenVT())
1445 // Don't allow VF=1 if those types are't legal.
1446 if (VF
< RISCV::RVVBitsPerBlock
/ Subtarget
.getELen())
1449 // VLEN=32 support is incomplete.
1450 if (Subtarget
.getRealMinVLen() < RISCV::RVVBitsPerBlock
)
1453 // The maximum VF is for the smallest element width with LMUL=8.
1454 // VF must be a power of 2.
1455 unsigned MaxVF
= (RISCV::RVVBitsPerBlock
/ 8) * 8;
1456 return VF
> MaxVF
|| !isPowerOf2_32(VF
);
1459 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
1461 MachineFunction
&MF
,
1462 unsigned Intrinsic
) const {
1463 auto &DL
= I
.getModule()->getDataLayout();
1465 auto SetRVVLoadStoreInfo
= [&](unsigned PtrOp
, bool IsStore
,
1466 bool IsUnitStrided
) {
1467 Info
.opc
= IsStore
? ISD::INTRINSIC_VOID
: ISD::INTRINSIC_W_CHAIN
;
1468 Info
.ptrVal
= I
.getArgOperand(PtrOp
);
1471 // Store value is the first operand.
1472 MemTy
= I
.getArgOperand(0)->getType();
1474 // Use return type. If it's segment load, return type is a struct.
1475 MemTy
= I
.getType();
1476 if (MemTy
->isStructTy())
1477 MemTy
= MemTy
->getStructElementType(0);
1480 MemTy
= MemTy
->getScalarType();
1482 Info
.memVT
= getValueType(DL
, MemTy
);
1483 Info
.align
= Align(DL
.getTypeSizeInBits(MemTy
->getScalarType()) / 8);
1484 Info
.size
= MemoryLocation::UnknownSize
;
1486 IsStore
? MachineMemOperand::MOStore
: MachineMemOperand::MOLoad
;
1490 if (I
.getMetadata(LLVMContext::MD_nontemporal
) != nullptr)
1491 Info
.flags
|= MachineMemOperand::MONonTemporal
;
1493 Info
.flags
|= RISCVTargetLowering::getTargetMMOFlags(I
);
1494 switch (Intrinsic
) {
1497 case Intrinsic::riscv_masked_atomicrmw_xchg_i32
:
1498 case Intrinsic::riscv_masked_atomicrmw_add_i32
:
1499 case Intrinsic::riscv_masked_atomicrmw_sub_i32
:
1500 case Intrinsic::riscv_masked_atomicrmw_nand_i32
:
1501 case Intrinsic::riscv_masked_atomicrmw_max_i32
:
1502 case Intrinsic::riscv_masked_atomicrmw_min_i32
:
1503 case Intrinsic::riscv_masked_atomicrmw_umax_i32
:
1504 case Intrinsic::riscv_masked_atomicrmw_umin_i32
:
1505 case Intrinsic::riscv_masked_cmpxchg_i32
:
1506 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
1507 Info
.memVT
= MVT::i32
;
1508 Info
.ptrVal
= I
.getArgOperand(0);
1510 Info
.align
= Align(4);
1511 Info
.flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOStore
|
1512 MachineMemOperand::MOVolatile
;
1514 case Intrinsic::riscv_masked_strided_load
:
1515 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1516 /*IsUnitStrided*/ false);
1517 case Intrinsic::riscv_masked_strided_store
:
1518 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1519 /*IsUnitStrided*/ false);
1520 case Intrinsic::riscv_seg2_load
:
1521 case Intrinsic::riscv_seg3_load
:
1522 case Intrinsic::riscv_seg4_load
:
1523 case Intrinsic::riscv_seg5_load
:
1524 case Intrinsic::riscv_seg6_load
:
1525 case Intrinsic::riscv_seg7_load
:
1526 case Intrinsic::riscv_seg8_load
:
1527 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1528 /*IsUnitStrided*/ false);
1529 case Intrinsic::riscv_seg2_store
:
1530 case Intrinsic::riscv_seg3_store
:
1531 case Intrinsic::riscv_seg4_store
:
1532 case Intrinsic::riscv_seg5_store
:
1533 case Intrinsic::riscv_seg6_store
:
1534 case Intrinsic::riscv_seg7_store
:
1535 case Intrinsic::riscv_seg8_store
:
1536 // Operands are (vec, ..., vec, ptr, vl)
1537 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1539 /*IsUnitStrided*/ false);
1540 case Intrinsic::riscv_vle
:
1541 case Intrinsic::riscv_vle_mask
:
1542 case Intrinsic::riscv_vleff
:
1543 case Intrinsic::riscv_vleff_mask
:
1544 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1546 /*IsUnitStrided*/ true);
1547 case Intrinsic::riscv_vse
:
1548 case Intrinsic::riscv_vse_mask
:
1549 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1551 /*IsUnitStrided*/ true);
1552 case Intrinsic::riscv_vlse
:
1553 case Intrinsic::riscv_vlse_mask
:
1554 case Intrinsic::riscv_vloxei
:
1555 case Intrinsic::riscv_vloxei_mask
:
1556 case Intrinsic::riscv_vluxei
:
1557 case Intrinsic::riscv_vluxei_mask
:
1558 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1560 /*IsUnitStrided*/ false);
1561 case Intrinsic::riscv_vsse
:
1562 case Intrinsic::riscv_vsse_mask
:
1563 case Intrinsic::riscv_vsoxei
:
1564 case Intrinsic::riscv_vsoxei_mask
:
1565 case Intrinsic::riscv_vsuxei
:
1566 case Intrinsic::riscv_vsuxei_mask
:
1567 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1569 /*IsUnitStrided*/ false);
1570 case Intrinsic::riscv_vlseg2
:
1571 case Intrinsic::riscv_vlseg3
:
1572 case Intrinsic::riscv_vlseg4
:
1573 case Intrinsic::riscv_vlseg5
:
1574 case Intrinsic::riscv_vlseg6
:
1575 case Intrinsic::riscv_vlseg7
:
1576 case Intrinsic::riscv_vlseg8
:
1577 case Intrinsic::riscv_vlseg2ff
:
1578 case Intrinsic::riscv_vlseg3ff
:
1579 case Intrinsic::riscv_vlseg4ff
:
1580 case Intrinsic::riscv_vlseg5ff
:
1581 case Intrinsic::riscv_vlseg6ff
:
1582 case Intrinsic::riscv_vlseg7ff
:
1583 case Intrinsic::riscv_vlseg8ff
:
1584 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1586 /*IsUnitStrided*/ false);
1587 case Intrinsic::riscv_vlseg2_mask
:
1588 case Intrinsic::riscv_vlseg3_mask
:
1589 case Intrinsic::riscv_vlseg4_mask
:
1590 case Intrinsic::riscv_vlseg5_mask
:
1591 case Intrinsic::riscv_vlseg6_mask
:
1592 case Intrinsic::riscv_vlseg7_mask
:
1593 case Intrinsic::riscv_vlseg8_mask
:
1594 case Intrinsic::riscv_vlseg2ff_mask
:
1595 case Intrinsic::riscv_vlseg3ff_mask
:
1596 case Intrinsic::riscv_vlseg4ff_mask
:
1597 case Intrinsic::riscv_vlseg5ff_mask
:
1598 case Intrinsic::riscv_vlseg6ff_mask
:
1599 case Intrinsic::riscv_vlseg7ff_mask
:
1600 case Intrinsic::riscv_vlseg8ff_mask
:
1601 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 4,
1603 /*IsUnitStrided*/ false);
1604 case Intrinsic::riscv_vlsseg2
:
1605 case Intrinsic::riscv_vlsseg3
:
1606 case Intrinsic::riscv_vlsseg4
:
1607 case Intrinsic::riscv_vlsseg5
:
1608 case Intrinsic::riscv_vlsseg6
:
1609 case Intrinsic::riscv_vlsseg7
:
1610 case Intrinsic::riscv_vlsseg8
:
1611 case Intrinsic::riscv_vloxseg2
:
1612 case Intrinsic::riscv_vloxseg3
:
1613 case Intrinsic::riscv_vloxseg4
:
1614 case Intrinsic::riscv_vloxseg5
:
1615 case Intrinsic::riscv_vloxseg6
:
1616 case Intrinsic::riscv_vloxseg7
:
1617 case Intrinsic::riscv_vloxseg8
:
1618 case Intrinsic::riscv_vluxseg2
:
1619 case Intrinsic::riscv_vluxseg3
:
1620 case Intrinsic::riscv_vluxseg4
:
1621 case Intrinsic::riscv_vluxseg5
:
1622 case Intrinsic::riscv_vluxseg6
:
1623 case Intrinsic::riscv_vluxseg7
:
1624 case Intrinsic::riscv_vluxseg8
:
1625 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1627 /*IsUnitStrided*/ false);
1628 case Intrinsic::riscv_vlsseg2_mask
:
1629 case Intrinsic::riscv_vlsseg3_mask
:
1630 case Intrinsic::riscv_vlsseg4_mask
:
1631 case Intrinsic::riscv_vlsseg5_mask
:
1632 case Intrinsic::riscv_vlsseg6_mask
:
1633 case Intrinsic::riscv_vlsseg7_mask
:
1634 case Intrinsic::riscv_vlsseg8_mask
:
1635 case Intrinsic::riscv_vloxseg2_mask
:
1636 case Intrinsic::riscv_vloxseg3_mask
:
1637 case Intrinsic::riscv_vloxseg4_mask
:
1638 case Intrinsic::riscv_vloxseg5_mask
:
1639 case Intrinsic::riscv_vloxseg6_mask
:
1640 case Intrinsic::riscv_vloxseg7_mask
:
1641 case Intrinsic::riscv_vloxseg8_mask
:
1642 case Intrinsic::riscv_vluxseg2_mask
:
1643 case Intrinsic::riscv_vluxseg3_mask
:
1644 case Intrinsic::riscv_vluxseg4_mask
:
1645 case Intrinsic::riscv_vluxseg5_mask
:
1646 case Intrinsic::riscv_vluxseg6_mask
:
1647 case Intrinsic::riscv_vluxseg7_mask
:
1648 case Intrinsic::riscv_vluxseg8_mask
:
1649 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 5,
1651 /*IsUnitStrided*/ false);
1652 case Intrinsic::riscv_vsseg2
:
1653 case Intrinsic::riscv_vsseg3
:
1654 case Intrinsic::riscv_vsseg4
:
1655 case Intrinsic::riscv_vsseg5
:
1656 case Intrinsic::riscv_vsseg6
:
1657 case Intrinsic::riscv_vsseg7
:
1658 case Intrinsic::riscv_vsseg8
:
1659 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1661 /*IsUnitStrided*/ false);
1662 case Intrinsic::riscv_vsseg2_mask
:
1663 case Intrinsic::riscv_vsseg3_mask
:
1664 case Intrinsic::riscv_vsseg4_mask
:
1665 case Intrinsic::riscv_vsseg5_mask
:
1666 case Intrinsic::riscv_vsseg6_mask
:
1667 case Intrinsic::riscv_vsseg7_mask
:
1668 case Intrinsic::riscv_vsseg8_mask
:
1669 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1671 /*IsUnitStrided*/ false);
1672 case Intrinsic::riscv_vssseg2
:
1673 case Intrinsic::riscv_vssseg3
:
1674 case Intrinsic::riscv_vssseg4
:
1675 case Intrinsic::riscv_vssseg5
:
1676 case Intrinsic::riscv_vssseg6
:
1677 case Intrinsic::riscv_vssseg7
:
1678 case Intrinsic::riscv_vssseg8
:
1679 case Intrinsic::riscv_vsoxseg2
:
1680 case Intrinsic::riscv_vsoxseg3
:
1681 case Intrinsic::riscv_vsoxseg4
:
1682 case Intrinsic::riscv_vsoxseg5
:
1683 case Intrinsic::riscv_vsoxseg6
:
1684 case Intrinsic::riscv_vsoxseg7
:
1685 case Intrinsic::riscv_vsoxseg8
:
1686 case Intrinsic::riscv_vsuxseg2
:
1687 case Intrinsic::riscv_vsuxseg3
:
1688 case Intrinsic::riscv_vsuxseg4
:
1689 case Intrinsic::riscv_vsuxseg5
:
1690 case Intrinsic::riscv_vsuxseg6
:
1691 case Intrinsic::riscv_vsuxseg7
:
1692 case Intrinsic::riscv_vsuxseg8
:
1693 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1695 /*IsUnitStrided*/ false);
1696 case Intrinsic::riscv_vssseg2_mask
:
1697 case Intrinsic::riscv_vssseg3_mask
:
1698 case Intrinsic::riscv_vssseg4_mask
:
1699 case Intrinsic::riscv_vssseg5_mask
:
1700 case Intrinsic::riscv_vssseg6_mask
:
1701 case Intrinsic::riscv_vssseg7_mask
:
1702 case Intrinsic::riscv_vssseg8_mask
:
1703 case Intrinsic::riscv_vsoxseg2_mask
:
1704 case Intrinsic::riscv_vsoxseg3_mask
:
1705 case Intrinsic::riscv_vsoxseg4_mask
:
1706 case Intrinsic::riscv_vsoxseg5_mask
:
1707 case Intrinsic::riscv_vsoxseg6_mask
:
1708 case Intrinsic::riscv_vsoxseg7_mask
:
1709 case Intrinsic::riscv_vsoxseg8_mask
:
1710 case Intrinsic::riscv_vsuxseg2_mask
:
1711 case Intrinsic::riscv_vsuxseg3_mask
:
1712 case Intrinsic::riscv_vsuxseg4_mask
:
1713 case Intrinsic::riscv_vsuxseg5_mask
:
1714 case Intrinsic::riscv_vsuxseg6_mask
:
1715 case Intrinsic::riscv_vsuxseg7_mask
:
1716 case Intrinsic::riscv_vsuxseg8_mask
:
1717 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 4,
1719 /*IsUnitStrided*/ false);
1723 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
1724 const AddrMode
&AM
, Type
*Ty
,
1726 Instruction
*I
) const {
1727 // No global is ever allowed as a base.
1731 // RVV instructions only support register addressing.
1732 if (Subtarget
.hasVInstructions() && isa
<VectorType
>(Ty
))
1733 return AM
.HasBaseReg
&& AM
.Scale
== 0 && !AM
.BaseOffs
;
1735 // Require a 12-bit signed offset.
1736 if (!isInt
<12>(AM
.BaseOffs
))
1740 case 0: // "r+i" or just "i", depending on HasBaseReg.
1743 if (!AM
.HasBaseReg
) // allow "r+i".
1745 return false; // disallow "r+r" or "r+r+i".
1753 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
1754 return isInt
<12>(Imm
);
1757 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
1758 return isInt
<12>(Imm
);
1761 // On RV32, 64-bit integers are split into their high and low parts and held
1762 // in two different registers, so the trunc is free since the low register can
1764 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1766 bool RISCVTargetLowering::isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const {
1767 if (Subtarget
.is64Bit() || !SrcTy
->isIntegerTy() || !DstTy
->isIntegerTy())
1769 unsigned SrcBits
= SrcTy
->getPrimitiveSizeInBits();
1770 unsigned DestBits
= DstTy
->getPrimitiveSizeInBits();
1771 return (SrcBits
== 64 && DestBits
== 32);
1774 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT
, EVT DstVT
) const {
1775 // We consider i64->i32 free on RV64 since we have good selection of W
1776 // instructions that make promoting operations back to i64 free in many cases.
1777 if (SrcVT
.isVector() || DstVT
.isVector() || !SrcVT
.isInteger() ||
1780 unsigned SrcBits
= SrcVT
.getSizeInBits();
1781 unsigned DestBits
= DstVT
.getSizeInBits();
1782 return (SrcBits
== 64 && DestBits
== 32);
1785 bool RISCVTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
1786 // Zexts are free if they can be combined with a load.
1787 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1788 // poorly with type legalization of compares preferring sext.
1789 if (auto *LD
= dyn_cast
<LoadSDNode
>(Val
)) {
1790 EVT MemVT
= LD
->getMemoryVT();
1791 if ((MemVT
== MVT::i8
|| MemVT
== MVT::i16
) &&
1792 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
1793 LD
->getExtensionType() == ISD::ZEXTLOAD
))
1797 return TargetLowering::isZExtFree(Val
, VT2
);
1800 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT
, EVT DstVT
) const {
1801 return Subtarget
.is64Bit() && SrcVT
== MVT::i32
&& DstVT
== MVT::i64
;
1804 bool RISCVTargetLowering::signExtendConstant(const ConstantInt
*CI
) const {
1805 return Subtarget
.is64Bit() && CI
->getType()->isIntegerTy(32);
1808 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type
*Ty
) const {
1809 return Subtarget
.hasStdExtZbb() || Subtarget
.hasVendorXCVbitmanip();
1812 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type
*Ty
) const {
1813 return Subtarget
.hasStdExtZbb() || Subtarget
.hasVendorXTHeadBb() ||
1814 Subtarget
.hasVendorXCVbitmanip();
1817 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1818 const Instruction
&AndI
) const {
1819 // We expect to be able to match a bit extraction instruction if the Zbs
1820 // extension is supported and the mask is a power of two. However, we
1821 // conservatively return false if the mask would fit in an ANDI instruction,
1822 // on the basis that it's possible the sinking+duplication of the AND in
1823 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1824 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1825 if (!Subtarget
.hasStdExtZbs() && !Subtarget
.hasVendorXTHeadBs())
1827 ConstantInt
*Mask
= dyn_cast
<ConstantInt
>(AndI
.getOperand(1));
1830 return !Mask
->getValue().isSignedIntN(12) && Mask
->getValue().isPowerOf2();
1833 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y
) const {
1834 EVT VT
= Y
.getValueType();
1836 // FIXME: Support vectors once we have tests.
1840 return (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) &&
1841 !isa
<ConstantSDNode
>(Y
);
1844 bool RISCVTargetLowering::hasBitTest(SDValue X
, SDValue Y
) const {
1845 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1846 if (Subtarget
.hasStdExtZbs())
1847 return X
.getValueType().isScalarInteger();
1848 auto *C
= dyn_cast
<ConstantSDNode
>(Y
);
1849 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1850 if (Subtarget
.hasVendorXTHeadBs())
1851 return C
!= nullptr;
1852 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1853 return C
&& C
->getAPIntValue().ule(10);
1856 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode
,
1858 // Only enable for rvv.
1859 if (!VT
.isVector() || !Subtarget
.hasVInstructions())
1862 if (VT
.isFixedLengthVector() && !isTypeLegal(VT
))
1868 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
1870 assert(Ty
->isIntegerTy());
1872 unsigned BitSize
= Ty
->getIntegerBitWidth();
1873 if (BitSize
> Subtarget
.getXLen())
1876 // Fast path, assume 32-bit immediates are cheap.
1877 int64_t Val
= Imm
.getSExtValue();
1881 // A constant pool entry may be more aligned thant he load we're trying to
1882 // replace. If we don't support unaligned scalar mem, prefer the constant
1884 // TODO: Can the caller pass down the alignment?
1885 if (!Subtarget
.hasFastUnalignedAccess())
1888 // Prefer to keep the load if it would require many instructions.
1889 // This uses the same threshold we use for constant pools but doesn't
1890 // check useConstantPoolForLargeInts.
1891 // TODO: Should we keep the load only when we're definitely going to emit a
1894 RISCVMatInt::InstSeq Seq
= RISCVMatInt::generateInstSeq(Val
, Subtarget
);
1895 return Seq
.size() <= Subtarget
.getMaxBuildIntsCost();
1898 bool RISCVTargetLowering::
1899 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1900 SDValue X
, ConstantSDNode
*XC
, ConstantSDNode
*CC
, SDValue Y
,
1901 unsigned OldShiftOpcode
, unsigned NewShiftOpcode
,
1902 SelectionDAG
&DAG
) const {
1903 // One interesting pattern that we'd want to form is 'bit extract':
1904 // ((1 >> Y) & 1) ==/!= 0
1905 // But we also need to be careful not to try to reverse that fold.
1907 // Is this '((1 >> Y) & 1)'?
1908 if (XC
&& OldShiftOpcode
== ISD::SRL
&& XC
->isOne())
1909 return false; // Keep the 'bit extract' pattern.
1911 // Will this be '((1 >> Y) & 1)' after the transform?
1912 if (NewShiftOpcode
== ISD::SRL
&& CC
->isOne())
1913 return true; // Do form the 'bit extract' pattern.
1915 // If 'X' is a constant, and we transform, then we will immediately
1916 // try to undo the fold, thus causing endless combine loop.
1917 // So only do the transform if X is not a constant. This matches the default
1918 // implementation of this function.
1922 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode
, int Operand
) const {
1924 case Instruction::Add
:
1925 case Instruction::Sub
:
1926 case Instruction::Mul
:
1927 case Instruction::And
:
1928 case Instruction::Or
:
1929 case Instruction::Xor
:
1930 case Instruction::FAdd
:
1931 case Instruction::FSub
:
1932 case Instruction::FMul
:
1933 case Instruction::FDiv
:
1934 case Instruction::ICmp
:
1935 case Instruction::FCmp
:
1937 case Instruction::Shl
:
1938 case Instruction::LShr
:
1939 case Instruction::AShr
:
1940 case Instruction::UDiv
:
1941 case Instruction::SDiv
:
1942 case Instruction::URem
:
1943 case Instruction::SRem
:
1944 return Operand
== 1;
1951 bool RISCVTargetLowering::canSplatOperand(Instruction
*I
, int Operand
) const {
1952 if (!I
->getType()->isVectorTy() || !Subtarget
.hasVInstructions())
1955 if (canSplatOperand(I
->getOpcode(), Operand
))
1958 auto *II
= dyn_cast
<IntrinsicInst
>(I
);
1962 switch (II
->getIntrinsicID()) {
1963 case Intrinsic::fma
:
1964 case Intrinsic::vp_fma
:
1965 return Operand
== 0 || Operand
== 1;
1966 case Intrinsic::vp_shl
:
1967 case Intrinsic::vp_lshr
:
1968 case Intrinsic::vp_ashr
:
1969 case Intrinsic::vp_udiv
:
1970 case Intrinsic::vp_sdiv
:
1971 case Intrinsic::vp_urem
:
1972 case Intrinsic::vp_srem
:
1973 return Operand
== 1;
1974 // These intrinsics are commutative.
1975 case Intrinsic::vp_add
:
1976 case Intrinsic::vp_mul
:
1977 case Intrinsic::vp_and
:
1978 case Intrinsic::vp_or
:
1979 case Intrinsic::vp_xor
:
1980 case Intrinsic::vp_fadd
:
1981 case Intrinsic::vp_fmul
:
1982 case Intrinsic::vp_icmp
:
1983 case Intrinsic::vp_fcmp
:
1984 // These intrinsics have 'vr' versions.
1985 case Intrinsic::vp_sub
:
1986 case Intrinsic::vp_fsub
:
1987 case Intrinsic::vp_fdiv
:
1988 return Operand
== 0 || Operand
== 1;
1994 /// Check if sinking \p I's operands to I's basic block is profitable, because
1995 /// the operands can be folded into a target instruction, e.g.
1996 /// splats of scalars can fold into vector instructions.
1997 bool RISCVTargetLowering::shouldSinkOperands(
1998 Instruction
*I
, SmallVectorImpl
<Use
*> &Ops
) const {
1999 using namespace llvm::PatternMatch
;
2001 if (!I
->getType()->isVectorTy() || !Subtarget
.hasVInstructions())
2004 for (auto OpIdx
: enumerate(I
->operands())) {
2005 if (!canSplatOperand(I
, OpIdx
.index()))
2008 Instruction
*Op
= dyn_cast
<Instruction
>(OpIdx
.value().get());
2009 // Make sure we are not already sinking this operand
2010 if (!Op
|| any_of(Ops
, [&](Use
*U
) { return U
->get() == Op
; }))
2013 // We are looking for a splat that can be sunk.
2014 if (!match(Op
, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
2015 m_Undef(), m_ZeroMask())))
2018 // Don't sink i1 splats.
2019 if (cast
<VectorType
>(Op
->getType())->getElementType()->isIntegerTy(1))
2022 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2023 // and vector registers
2024 for (Use
&U
: Op
->uses()) {
2025 Instruction
*Insn
= cast
<Instruction
>(U
.getUser());
2026 if (!canSplatOperand(Insn
, U
.getOperandNo()))
2030 Ops
.push_back(&Op
->getOperandUse(0));
2031 Ops
.push_back(&OpIdx
.value());
2036 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp
) const {
2037 unsigned Opc
= VecOp
.getOpcode();
2039 // Assume target opcodes can't be scalarized.
2040 // TODO - do we have any exceptions?
2041 if (Opc
>= ISD::BUILTIN_OP_END
)
2044 // If the vector op is not supported, try to convert to scalar.
2045 EVT VecVT
= VecOp
.getValueType();
2046 if (!isOperationLegalOrCustomOrPromote(Opc
, VecVT
))
2049 // If the vector op is supported, but the scalar op is not, the transform may
2050 // not be worthwhile.
2051 // Permit a vector binary operation can be converted to scalar binary
2052 // operation which is custom lowered with illegal type.
2053 EVT ScalarVT
= VecVT
.getScalarType();
2054 return isOperationLegalOrCustomOrPromote(Opc
, ScalarVT
) ||
2055 isOperationCustom(Opc
, ScalarVT
);
2058 bool RISCVTargetLowering::isOffsetFoldingLegal(
2059 const GlobalAddressSDNode
*GA
) const {
2060 // In order to maximise the opportunity for common subexpression elimination,
2061 // keep a separate ADD node for the global address offset instead of folding
2062 // it in the global address node. Later peephole optimisations may choose to
2063 // fold it back in when profitable.
2067 // Return one of the followings:
2068 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2069 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2070 // positive counterpart, which will be materialized from the first returned
2071 // element. The second returned element indicated that there should be a FNEG
2073 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2074 std::pair
<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat
&Imm
,
2076 if (!Subtarget
.hasStdExtZfa())
2077 return std::make_pair(-1, false);
2079 bool IsSupportedVT
= false;
2080 if (VT
== MVT::f16
) {
2081 IsSupportedVT
= Subtarget
.hasStdExtZfh() || Subtarget
.hasStdExtZvfh();
2082 } else if (VT
== MVT::f32
) {
2083 IsSupportedVT
= true;
2084 } else if (VT
== MVT::f64
) {
2085 assert(Subtarget
.hasStdExtD() && "Expect D extension");
2086 IsSupportedVT
= true;
2090 return std::make_pair(-1, false);
2092 int Index
= RISCVLoadFPImm::getLoadFPImm(Imm
);
2093 if (Index
< 0 && Imm
.isNegative())
2094 // Try the combination of its positive counterpart + FNEG.
2095 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm
), true);
2097 return std::make_pair(Index
, false);
2100 bool RISCVTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
2101 bool ForCodeSize
) const {
2102 bool IsLegalVT
= false;
2104 IsLegalVT
= Subtarget
.hasStdExtZfhminOrZhinxmin();
2105 else if (VT
== MVT::f32
)
2106 IsLegalVT
= Subtarget
.hasStdExtFOrZfinx();
2107 else if (VT
== MVT::f64
)
2108 IsLegalVT
= Subtarget
.hasStdExtDOrZdinx();
2109 else if (VT
== MVT::bf16
)
2110 IsLegalVT
= Subtarget
.hasStdExtZfbfmin();
2115 if (getLegalZfaFPImm(Imm
, VT
).first
>= 0)
2118 // Cannot create a 64 bit floating-point immediate value for rv32.
2119 if (Subtarget
.getXLen() < VT
.getScalarSizeInBits()) {
2120 // td can handle +0.0 or -0.0 already.
2121 // -0.0 can be created by fmv + fneg.
2122 return Imm
.isZero();
2125 // Special case: fmv + fneg
2126 if (Imm
.isNegZero())
2129 // Building an integer and then converting requires a fmv at the end of
2130 // the integer sequence.
2132 1 + RISCVMatInt::getIntMatCost(Imm
.bitcastToAPInt(), Subtarget
.getXLen(),
2134 return Cost
<= FPImmCost
;
2137 // TODO: This is very conservative.
2138 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT
, EVT SrcVT
,
2139 unsigned Index
) const {
2140 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR
, ResVT
))
2143 // Only support extracting a fixed from a fixed vector for now.
2144 if (ResVT
.isScalableVector() || SrcVT
.isScalableVector())
2147 unsigned ResElts
= ResVT
.getVectorNumElements();
2148 unsigned SrcElts
= SrcVT
.getVectorNumElements();
2150 // Convervatively only handle extracting half of a vector.
2151 // TODO: Relax this.
2152 if ((ResElts
* 2) != SrcElts
)
2155 // The smallest type we can slide is i8.
2156 // TODO: We can extract index 0 from a mask vector without a slide.
2157 if (ResVT
.getVectorElementType() == MVT::i1
)
2160 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2165 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2166 // the upper half of a vector until we have more test coverage.
2167 return Index
== 0 || Index
== ResElts
;
2170 MVT
RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext
&Context
,
2173 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2174 // We might still end up using a GPR but that will be decided based on ABI.
2175 if (VT
== MVT::f16
&& Subtarget
.hasStdExtFOrZfinx() &&
2176 !Subtarget
.hasStdExtZfhminOrZhinxmin())
2179 MVT PartVT
= TargetLowering::getRegisterTypeForCallingConv(Context
, CC
, VT
);
2181 if (RV64LegalI32
&& Subtarget
.is64Bit() && PartVT
== MVT::i32
)
2187 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext
&Context
,
2190 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2191 // We might still end up using a GPR but that will be decided based on ABI.
2192 if (VT
== MVT::f16
&& Subtarget
.hasStdExtFOrZfinx() &&
2193 !Subtarget
.hasStdExtZfhminOrZhinxmin())
2196 return TargetLowering::getNumRegistersForCallingConv(Context
, CC
, VT
);
2199 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2200 LLVMContext
&Context
, CallingConv::ID CC
, EVT VT
, EVT
&IntermediateVT
,
2201 unsigned &NumIntermediates
, MVT
&RegisterVT
) const {
2202 unsigned NumRegs
= TargetLowering::getVectorTypeBreakdownForCallingConv(
2203 Context
, CC
, VT
, IntermediateVT
, NumIntermediates
, RegisterVT
);
2205 if (RV64LegalI32
&& Subtarget
.is64Bit() && IntermediateVT
== MVT::i32
)
2206 IntermediateVT
= MVT::i64
;
2208 if (RV64LegalI32
&& Subtarget
.is64Bit() && RegisterVT
== MVT::i32
)
2209 RegisterVT
= MVT::i64
;
2214 // Changes the condition code and swaps operands if necessary, so the SetCC
2215 // operation matches one of the comparisons supported directly by branches
2216 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2218 static void translateSetCCForBranch(const SDLoc
&DL
, SDValue
&LHS
, SDValue
&RHS
,
2219 ISD::CondCode
&CC
, SelectionDAG
&DAG
) {
2220 // If this is a single bit test that can't be handled by ANDI, shift the
2221 // bit to be tested to the MSB and perform a signed compare with 0.
2222 if (isIntEqualitySetCC(CC
) && isNullConstant(RHS
) &&
2223 LHS
.getOpcode() == ISD::AND
&& LHS
.hasOneUse() &&
2224 isa
<ConstantSDNode
>(LHS
.getOperand(1))) {
2225 uint64_t Mask
= LHS
.getConstantOperandVal(1);
2226 if ((isPowerOf2_64(Mask
) || isMask_64(Mask
)) && !isInt
<12>(Mask
)) {
2228 if (isPowerOf2_64(Mask
)) {
2229 CC
= CC
== ISD::SETEQ
? ISD::SETGE
: ISD::SETLT
;
2230 ShAmt
= LHS
.getValueSizeInBits() - 1 - Log2_64(Mask
);
2232 ShAmt
= LHS
.getValueSizeInBits() - llvm::bit_width(Mask
);
2235 LHS
= LHS
.getOperand(0);
2237 LHS
= DAG
.getNode(ISD::SHL
, DL
, LHS
.getValueType(), LHS
,
2238 DAG
.getConstant(ShAmt
, DL
, LHS
.getValueType()));
2243 if (auto *RHSC
= dyn_cast
<ConstantSDNode
>(RHS
)) {
2244 int64_t C
= RHSC
->getSExtValue();
2248 // Convert X > -1 to X >= 0.
2250 RHS
= DAG
.getConstant(0, DL
, RHS
.getValueType());
2256 // Convert X < 1 to 0 >= X.
2259 LHS
= DAG
.getConstant(0, DL
, RHS
.getValueType());
2274 CC
= ISD::getSetCCSwappedOperands(CC
);
2275 std::swap(LHS
, RHS
);
2280 RISCVII::VLMUL
RISCVTargetLowering::getLMUL(MVT VT
) {
2281 assert(VT
.isScalableVector() && "Expecting a scalable vector type");
2282 unsigned KnownSize
= VT
.getSizeInBits().getKnownMinValue();
2283 if (VT
.getVectorElementType() == MVT::i1
)
2286 switch (KnownSize
) {
2288 llvm_unreachable("Invalid LMUL.");
2290 return RISCVII::VLMUL::LMUL_F8
;
2292 return RISCVII::VLMUL::LMUL_F4
;
2294 return RISCVII::VLMUL::LMUL_F2
;
2296 return RISCVII::VLMUL::LMUL_1
;
2298 return RISCVII::VLMUL::LMUL_2
;
2300 return RISCVII::VLMUL::LMUL_4
;
2302 return RISCVII::VLMUL::LMUL_8
;
2306 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul
) {
2309 llvm_unreachable("Invalid LMUL.");
2310 case RISCVII::VLMUL::LMUL_F8
:
2311 case RISCVII::VLMUL::LMUL_F4
:
2312 case RISCVII::VLMUL::LMUL_F2
:
2313 case RISCVII::VLMUL::LMUL_1
:
2314 return RISCV::VRRegClassID
;
2315 case RISCVII::VLMUL::LMUL_2
:
2316 return RISCV::VRM2RegClassID
;
2317 case RISCVII::VLMUL::LMUL_4
:
2318 return RISCV::VRM4RegClassID
;
2319 case RISCVII::VLMUL::LMUL_8
:
2320 return RISCV::VRM8RegClassID
;
2324 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT
, unsigned Index
) {
2325 RISCVII::VLMUL LMUL
= getLMUL(VT
);
2326 if (LMUL
== RISCVII::VLMUL::LMUL_F8
||
2327 LMUL
== RISCVII::VLMUL::LMUL_F4
||
2328 LMUL
== RISCVII::VLMUL::LMUL_F2
||
2329 LMUL
== RISCVII::VLMUL::LMUL_1
) {
2330 static_assert(RISCV::sub_vrm1_7
== RISCV::sub_vrm1_0
+ 7,
2331 "Unexpected subreg numbering");
2332 return RISCV::sub_vrm1_0
+ Index
;
2334 if (LMUL
== RISCVII::VLMUL::LMUL_2
) {
2335 static_assert(RISCV::sub_vrm2_3
== RISCV::sub_vrm2_0
+ 3,
2336 "Unexpected subreg numbering");
2337 return RISCV::sub_vrm2_0
+ Index
;
2339 if (LMUL
== RISCVII::VLMUL::LMUL_4
) {
2340 static_assert(RISCV::sub_vrm4_1
== RISCV::sub_vrm4_0
+ 1,
2341 "Unexpected subreg numbering");
2342 return RISCV::sub_vrm4_0
+ Index
;
2344 llvm_unreachable("Invalid vector type.");
2347 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT
) {
2348 if (VT
.getVectorElementType() == MVT::i1
)
2349 return RISCV::VRRegClassID
;
2350 return getRegClassIDForLMUL(getLMUL(VT
));
2353 // Attempt to decompose a subvector insert/extract between VecVT and
2354 // SubVecVT via subregister indices. Returns the subregister index that
2355 // can perform the subvector insert/extract with the given element index, as
2356 // well as the index corresponding to any leftover subvectors that must be
2357 // further inserted/extracted within the register class for SubVecVT.
2358 std::pair
<unsigned, unsigned>
2359 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2360 MVT VecVT
, MVT SubVecVT
, unsigned InsertExtractIdx
,
2361 const RISCVRegisterInfo
*TRI
) {
2362 static_assert((RISCV::VRM8RegClassID
> RISCV::VRM4RegClassID
&&
2363 RISCV::VRM4RegClassID
> RISCV::VRM2RegClassID
&&
2364 RISCV::VRM2RegClassID
> RISCV::VRRegClassID
),
2365 "Register classes not ordered");
2366 unsigned VecRegClassID
= getRegClassIDForVecVT(VecVT
);
2367 unsigned SubRegClassID
= getRegClassIDForVecVT(SubVecVT
);
2368 // Try to compose a subregister index that takes us from the incoming
2369 // LMUL>1 register class down to the outgoing one. At each step we half
2371 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2372 // Note that this is not guaranteed to find a subregister index, such as
2373 // when we are extracting from one VR type to another.
2374 unsigned SubRegIdx
= RISCV::NoSubRegister
;
2375 for (const unsigned RCID
:
2376 {RISCV::VRM4RegClassID
, RISCV::VRM2RegClassID
, RISCV::VRRegClassID
})
2377 if (VecRegClassID
> RCID
&& SubRegClassID
<= RCID
) {
2378 VecVT
= VecVT
.getHalfNumVectorElementsVT();
2380 InsertExtractIdx
>= VecVT
.getVectorElementCount().getKnownMinValue();
2381 SubRegIdx
= TRI
->composeSubRegIndices(SubRegIdx
,
2382 getSubregIndexByMVT(VecVT
, IsHi
));
2384 InsertExtractIdx
-= VecVT
.getVectorElementCount().getKnownMinValue();
2386 return {SubRegIdx
, InsertExtractIdx
};
2389 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2390 // stores for those types.
2391 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT
) const {
2392 return !Subtarget
.useRVVForFixedLengthVectors() ||
2393 (VT
.isFixedLengthVector() && VT
.getVectorElementType() == MVT::i1
);
2396 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy
) const {
2397 if (!ScalarTy
.isSimple())
2399 switch (ScalarTy
.getSimpleVT().SimpleTy
) {
2401 return Subtarget
.is64Bit() ? Subtarget
.hasVInstructionsI64() : true;
2407 return Subtarget
.hasVInstructionsI64();
2409 return Subtarget
.hasVInstructionsF16();
2411 return Subtarget
.hasVInstructionsF32();
2413 return Subtarget
.hasVInstructionsF64();
2420 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2421 return NumRepeatedDivisors
;
2424 static SDValue
getVLOperand(SDValue Op
) {
2425 assert((Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2426 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
) &&
2427 "Unexpected opcode");
2428 bool HasChain
= Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
;
2429 unsigned IntNo
= Op
.getConstantOperandVal(HasChain
? 1 : 0);
2430 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo
*II
=
2431 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo
);
2434 return Op
.getOperand(II
->VLOperand
+ 1 + HasChain
);
2437 static bool useRVVForFixedLengthVectorVT(MVT VT
,
2438 const RISCVSubtarget
&Subtarget
) {
2439 assert(VT
.isFixedLengthVector() && "Expected a fixed length vector type!");
2440 if (!Subtarget
.useRVVForFixedLengthVectors())
2443 // We only support a set of vector types with a consistent maximum fixed size
2444 // across all supported vector element types to avoid legalization issues.
2445 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2446 // fixed-length vector type we support is 1024 bytes.
2447 if (VT
.getFixedSizeInBits() > 1024 * 8)
2450 unsigned MinVLen
= Subtarget
.getRealMinVLen();
2452 MVT EltVT
= VT
.getVectorElementType();
2454 // Don't use RVV for vectors we cannot scalarize if required.
2455 switch (EltVT
.SimpleTy
) {
2456 // i1 is supported but has different rules.
2460 // Masks can only use a single register.
2461 if (VT
.getVectorNumElements() > MinVLen
)
2470 if (!Subtarget
.hasVInstructionsI64())
2474 if (!Subtarget
.hasVInstructionsF16Minimal())
2478 if (!Subtarget
.hasVInstructionsF32())
2482 if (!Subtarget
.hasVInstructionsF64())
2487 // Reject elements larger than ELEN.
2488 if (EltVT
.getSizeInBits() > Subtarget
.getELen())
2491 unsigned LMul
= divideCeil(VT
.getSizeInBits(), MinVLen
);
2492 // Don't use RVV for types that don't fit.
2493 if (LMul
> Subtarget
.getMaxLMULForFixedLengthVectors())
2496 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2497 // the base fixed length RVV support in place.
2498 if (!VT
.isPow2VectorType())
2504 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT
) const {
2505 return ::useRVVForFixedLengthVectorVT(VT
, Subtarget
);
2508 // Return the largest legal scalable vector type that matches VT's element type.
2509 static MVT
getContainerForFixedLengthVector(const TargetLowering
&TLI
, MVT VT
,
2510 const RISCVSubtarget
&Subtarget
) {
2511 // This may be called before legal types are setup.
2512 assert(((VT
.isFixedLengthVector() && TLI
.isTypeLegal(VT
)) ||
2513 useRVVForFixedLengthVectorVT(VT
, Subtarget
)) &&
2514 "Expected legal fixed length vector!");
2516 unsigned MinVLen
= Subtarget
.getRealMinVLen();
2517 unsigned MaxELen
= Subtarget
.getELen();
2519 MVT EltVT
= VT
.getVectorElementType();
2520 switch (EltVT
.SimpleTy
) {
2522 llvm_unreachable("unexpected element type for RVV container");
2531 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2532 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2533 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2535 (VT
.getVectorNumElements() * RISCV::RVVBitsPerBlock
) / MinVLen
;
2536 NumElts
= std::max(NumElts
, RISCV::RVVBitsPerBlock
/ MaxELen
);
2537 assert(isPowerOf2_32(NumElts
) && "Expected power of 2 NumElts");
2538 return MVT::getScalableVectorVT(EltVT
, NumElts
);
2543 static MVT
getContainerForFixedLengthVector(SelectionDAG
&DAG
, MVT VT
,
2544 const RISCVSubtarget
&Subtarget
) {
2545 return getContainerForFixedLengthVector(DAG
.getTargetLoweringInfo(), VT
,
2549 MVT
RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT
) const {
2550 return ::getContainerForFixedLengthVector(*this, VT
, getSubtarget());
2553 // Grow V to consume an entire RVV register.
2554 static SDValue
convertToScalableVector(EVT VT
, SDValue V
, SelectionDAG
&DAG
,
2555 const RISCVSubtarget
&Subtarget
) {
2556 assert(VT
.isScalableVector() &&
2557 "Expected to convert into a scalable vector!");
2558 assert(V
.getValueType().isFixedLengthVector() &&
2559 "Expected a fixed length vector operand!");
2561 SDValue Zero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
2562 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, DAG
.getUNDEF(VT
), V
, Zero
);
2565 // Shrink V so it's just big enough to maintain a VT's worth of data.
2566 static SDValue
convertFromScalableVector(EVT VT
, SDValue V
, SelectionDAG
&DAG
,
2567 const RISCVSubtarget
&Subtarget
) {
2568 assert(VT
.isFixedLengthVector() &&
2569 "Expected to convert into a fixed length vector!");
2570 assert(V
.getValueType().isScalableVector() &&
2571 "Expected a scalable vector operand!");
2573 SDValue Zero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
2574 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, V
, Zero
);
2577 /// Return the type of the mask type suitable for masking the provided
2578 /// vector type. This is simply an i1 element type vector of the same
2579 /// (possibly scalable) length.
2580 static MVT
getMaskTypeFor(MVT VecVT
) {
2581 assert(VecVT
.isVector());
2582 ElementCount EC
= VecVT
.getVectorElementCount();
2583 return MVT::getVectorVT(MVT::i1
, EC
);
2586 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2587 /// vector length VL. .
2588 static SDValue
getAllOnesMask(MVT VecVT
, SDValue VL
, const SDLoc
&DL
,
2589 SelectionDAG
&DAG
) {
2590 MVT MaskVT
= getMaskTypeFor(VecVT
);
2591 return DAG
.getNode(RISCVISD::VMSET_VL
, DL
, MaskVT
, VL
);
2594 static SDValue
getVLOp(uint64_t NumElts
, MVT ContainerVT
, const SDLoc
&DL
,
2595 SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
) {
2596 // If we know the exact VLEN, our VL is exactly equal to VLMAX, and
2597 // we can't encode the AVL as an immediate, use the VLMAX encoding.
2598 const auto [MinVLMAX
, MaxVLMAX
] =
2599 RISCVTargetLowering::computeVLMAXBounds(ContainerVT
, Subtarget
);
2600 if (MinVLMAX
== MaxVLMAX
&& NumElts
== MinVLMAX
&& NumElts
> 31)
2601 return DAG
.getRegister(RISCV::X0
, Subtarget
.getXLenVT());
2603 return DAG
.getConstant(NumElts
, DL
, Subtarget
.getXLenVT());
2606 static std::pair
<SDValue
, SDValue
>
2607 getDefaultScalableVLOps(MVT VecVT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
2608 const RISCVSubtarget
&Subtarget
) {
2609 assert(VecVT
.isScalableVector() && "Expecting a scalable vector");
2610 SDValue VL
= DAG
.getRegister(RISCV::X0
, Subtarget
.getXLenVT());
2611 SDValue Mask
= getAllOnesMask(VecVT
, VL
, DL
, DAG
);
2615 static std::pair
<SDValue
, SDValue
>
2616 getDefaultVLOps(uint64_t NumElts
, MVT ContainerVT
, const SDLoc
&DL
,
2617 SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
) {
2618 assert(ContainerVT
.isScalableVector() && "Expecting scalable container type");
2619 SDValue VL
= getVLOp(NumElts
, ContainerVT
, DL
, DAG
, Subtarget
);
2620 SDValue Mask
= getAllOnesMask(ContainerVT
, VL
, DL
, DAG
);
2624 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2625 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2626 // the vector type that the fixed-length vector is contained in. Otherwise if
2627 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2628 static std::pair
<SDValue
, SDValue
>
2629 getDefaultVLOps(MVT VecVT
, MVT ContainerVT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
2630 const RISCVSubtarget
&Subtarget
) {
2631 if (VecVT
.isFixedLengthVector())
2632 return getDefaultVLOps(VecVT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
2634 assert(ContainerVT
.isScalableVector() && "Expecting scalable container type");
2635 return getDefaultScalableVLOps(ContainerVT
, DL
, DAG
, Subtarget
);
2638 SDValue
RISCVTargetLowering::computeVLMax(MVT VecVT
, const SDLoc
&DL
,
2639 SelectionDAG
&DAG
) const {
2640 assert(VecVT
.isScalableVector() && "Expected scalable vector");
2641 return DAG
.getElementCount(DL
, Subtarget
.getXLenVT(),
2642 VecVT
.getVectorElementCount());
2645 std::pair
<unsigned, unsigned>
2646 RISCVTargetLowering::computeVLMAXBounds(MVT VecVT
,
2647 const RISCVSubtarget
&Subtarget
) {
2648 assert(VecVT
.isScalableVector() && "Expected scalable vector");
2650 unsigned EltSize
= VecVT
.getScalarSizeInBits();
2651 unsigned MinSize
= VecVT
.getSizeInBits().getKnownMinValue();
2653 unsigned VectorBitsMax
= Subtarget
.getRealMaxVLen();
2655 RISCVTargetLowering::computeVLMAX(VectorBitsMax
, EltSize
, MinSize
);
2657 unsigned VectorBitsMin
= Subtarget
.getRealMinVLen();
2659 RISCVTargetLowering::computeVLMAX(VectorBitsMin
, EltSize
, MinSize
);
2661 return std::make_pair(MinVLMAX
, MaxVLMAX
);
2664 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2665 // of either is (currently) supported. This can get us into an infinite loop
2666 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2668 // Until either (or both) of these can reliably lower any node, reporting that
2669 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2670 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2671 // which is not desirable.
2672 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2673 EVT VT
, unsigned DefinedValues
) const {
2677 InstructionCost
RISCVTargetLowering::getLMULCost(MVT VT
) const {
2678 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2679 // implementation-defined.
2681 return InstructionCost::getInvalid();
2682 unsigned DLenFactor
= Subtarget
.getDLenFactor();
2684 if (VT
.isScalableVector()) {
2687 std::tie(LMul
, Fractional
) =
2688 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT
));
2690 Cost
= LMul
<= DLenFactor
? (DLenFactor
/ LMul
) : 1;
2692 Cost
= (LMul
* DLenFactor
);
2694 Cost
= divideCeil(VT
.getSizeInBits(), Subtarget
.getRealMinVLen() / DLenFactor
);
2700 /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2701 /// is generally quadratic in the number of vreg implied by LMUL. Note that
2702 /// operand (index and possibly mask) are handled separately.
2703 InstructionCost
RISCVTargetLowering::getVRGatherVVCost(MVT VT
) const {
2704 return getLMULCost(VT
) * getLMULCost(VT
);
2707 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2708 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2709 /// or may track the vrgather.vv cost. It is implementation-dependent.
2710 InstructionCost
RISCVTargetLowering::getVRGatherVICost(MVT VT
) const {
2711 return getLMULCost(VT
);
2714 /// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2715 /// for the type VT. (This does not cover the vslide1up or vslide1down
2716 /// variants.) Slides may be linear in the number of vregs implied by LMUL,
2717 /// or may track the vrgather.vv cost. It is implementation-dependent.
2718 InstructionCost
RISCVTargetLowering::getVSlideCost(MVT VT
) const {
2719 return getLMULCost(VT
);
2722 static SDValue
lowerFP_TO_INT_SAT(SDValue Op
, SelectionDAG
&DAG
,
2723 const RISCVSubtarget
&Subtarget
) {
2724 // RISC-V FP-to-int conversions saturate to the destination register size, but
2725 // don't produce 0 for nan. We can use a conversion instruction and fix the
2726 // nan case with a compare and a select.
2727 SDValue Src
= Op
.getOperand(0);
2729 MVT DstVT
= Op
.getSimpleValueType();
2730 EVT SatVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
2732 bool IsSigned
= Op
.getOpcode() == ISD::FP_TO_SINT_SAT
;
2734 if (!DstVT
.isVector()) {
2735 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2737 if ((Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfhOrZhinx()) ||
2738 Src
.getValueType() == MVT::bf16
) {
2739 Src
= DAG
.getNode(ISD::FP_EXTEND
, SDLoc(Op
), MVT::f32
, Src
);
2744 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
2745 else if (DstVT
== MVT::i64
&& SatVT
== MVT::i32
)
2746 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
2749 // FIXME: Support other SatVTs by clamping before or after the conversion.
2752 SDValue FpToInt
= DAG
.getNode(
2753 Opc
, DL
, DstVT
, Src
,
2754 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, Subtarget
.getXLenVT()));
2756 if (Opc
== RISCVISD::FCVT_WU_RV64
)
2757 FpToInt
= DAG
.getZeroExtendInReg(FpToInt
, DL
, MVT::i32
);
2759 SDValue ZeroInt
= DAG
.getConstant(0, DL
, DstVT
);
2760 return DAG
.getSelectCC(DL
, Src
, Src
, ZeroInt
, FpToInt
,
2761 ISD::CondCode::SETUO
);
2766 MVT DstEltVT
= DstVT
.getVectorElementType();
2767 MVT SrcVT
= Src
.getSimpleValueType();
2768 MVT SrcEltVT
= SrcVT
.getVectorElementType();
2769 unsigned SrcEltSize
= SrcEltVT
.getSizeInBits();
2770 unsigned DstEltSize
= DstEltVT
.getSizeInBits();
2772 // Only handle saturating to the destination type.
2773 if (SatVT
!= DstEltVT
)
2776 // FIXME: Don't support narrowing by more than 1 steps for now.
2777 if (SrcEltSize
> (2 * DstEltSize
))
2780 MVT DstContainerVT
= DstVT
;
2781 MVT SrcContainerVT
= SrcVT
;
2782 if (DstVT
.isFixedLengthVector()) {
2783 DstContainerVT
= getContainerForFixedLengthVector(DAG
, DstVT
, Subtarget
);
2784 SrcContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
2785 assert(DstContainerVT
.getVectorElementCount() ==
2786 SrcContainerVT
.getVectorElementCount() &&
2787 "Expected same element count");
2788 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
2793 auto [Mask
, VL
] = getDefaultVLOps(DstVT
, DstContainerVT
, DL
, DAG
, Subtarget
);
2795 SDValue IsNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
2796 {Src
, Src
, DAG
.getCondCode(ISD::SETNE
),
2797 DAG
.getUNDEF(Mask
.getValueType()), Mask
, VL
});
2799 // Need to widen by more than 1 step, promote the FP type, then do a widening
2801 if (DstEltSize
> (2 * SrcEltSize
)) {
2802 assert(SrcContainerVT
.getVectorElementType() == MVT::f16
&& "Unexpected VT!");
2803 MVT InterVT
= SrcContainerVT
.changeVectorElementType(MVT::f32
);
2804 Src
= DAG
.getNode(RISCVISD::FP_EXTEND_VL
, DL
, InterVT
, Src
, Mask
, VL
);
2808 IsSigned
? RISCVISD::VFCVT_RTZ_X_F_VL
: RISCVISD::VFCVT_RTZ_XU_F_VL
;
2809 SDValue Res
= DAG
.getNode(RVVOpc
, DL
, DstContainerVT
, Src
, Mask
, VL
);
2811 SDValue SplatZero
= DAG
.getNode(
2812 RISCVISD::VMV_V_X_VL
, DL
, DstContainerVT
, DAG
.getUNDEF(DstContainerVT
),
2813 DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()), VL
);
2814 Res
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, DstContainerVT
, IsNan
, SplatZero
,
2817 if (DstVT
.isFixedLengthVector())
2818 Res
= convertFromScalableVector(DstVT
, Res
, DAG
, Subtarget
);
2823 static RISCVFPRndMode::RoundingMode
matchRoundingOp(unsigned Opc
) {
2825 case ISD::FROUNDEVEN
:
2826 case ISD::STRICT_FROUNDEVEN
:
2827 case ISD::VP_FROUNDEVEN
:
2828 return RISCVFPRndMode::RNE
;
2830 case ISD::STRICT_FTRUNC
:
2831 case ISD::VP_FROUNDTOZERO
:
2832 return RISCVFPRndMode::RTZ
;
2834 case ISD::STRICT_FFLOOR
:
2835 case ISD::VP_FFLOOR
:
2836 return RISCVFPRndMode::RDN
;
2838 case ISD::STRICT_FCEIL
:
2840 return RISCVFPRndMode::RUP
;
2842 case ISD::STRICT_FROUND
:
2843 case ISD::VP_FROUND
:
2844 return RISCVFPRndMode::RMM
;
2846 return RISCVFPRndMode::DYN
;
2849 return RISCVFPRndMode::Invalid
;
2852 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2853 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2854 // the integer domain and back. Taking care to avoid converting values that are
2855 // nan or already correct.
2857 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
2858 const RISCVSubtarget
&Subtarget
) {
2859 MVT VT
= Op
.getSimpleValueType();
2860 assert(VT
.isVector() && "Unexpected type");
2864 SDValue Src
= Op
.getOperand(0);
2866 MVT ContainerVT
= VT
;
2867 if (VT
.isFixedLengthVector()) {
2868 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
2869 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
2873 if (Op
->isVPOpcode()) {
2874 Mask
= Op
.getOperand(1);
2875 if (VT
.isFixedLengthVector())
2876 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
2878 VL
= Op
.getOperand(2);
2880 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
2883 // Freeze the source since we are increasing the number of uses.
2884 Src
= DAG
.getFreeze(Src
);
2886 // We do the conversion on the absolute value and fix the sign at the end.
2887 SDValue Abs
= DAG
.getNode(RISCVISD::FABS_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
2889 // Determine the largest integer that can be represented exactly. This and
2890 // values larger than it don't have any fractional bits so don't need to
2892 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(ContainerVT
);
2893 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
2894 APFloat MaxVal
= APFloat(FltSem
);
2895 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
2896 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
2897 SDValue MaxValNode
=
2898 DAG
.getConstantFP(MaxVal
, DL
, ContainerVT
.getVectorElementType());
2899 SDValue MaxValSplat
= DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, ContainerVT
,
2900 DAG
.getUNDEF(ContainerVT
), MaxValNode
, VL
);
2902 // If abs(Src) was larger than MaxVal or nan, keep it.
2903 MVT SetccVT
= MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
2905 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, SetccVT
,
2906 {Abs
, MaxValSplat
, DAG
.getCondCode(ISD::SETOLT
),
2909 // Truncate to integer and convert back to FP.
2910 MVT IntVT
= ContainerVT
.changeVectorElementTypeToInteger();
2911 MVT XLenVT
= Subtarget
.getXLenVT();
2914 switch (Op
.getOpcode()) {
2916 llvm_unreachable("Unexpected opcode");
2920 case ISD::VP_FFLOOR
:
2922 case ISD::FROUNDEVEN
:
2923 case ISD::VP_FROUND
:
2924 case ISD::VP_FROUNDEVEN
:
2925 case ISD::VP_FROUNDTOZERO
: {
2926 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
2927 assert(FRM
!= RISCVFPRndMode::Invalid
);
2928 Truncated
= DAG
.getNode(RISCVISD::VFCVT_RM_X_F_VL
, DL
, IntVT
, Src
, Mask
,
2929 DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
);
2933 Truncated
= DAG
.getNode(RISCVISD::VFCVT_RTZ_X_F_VL
, DL
, IntVT
, Src
,
2938 Truncated
= DAG
.getNode(RISCVISD::VFCVT_X_F_VL
, DL
, IntVT
, Src
, Mask
, VL
);
2940 case ISD::FNEARBYINT
:
2941 case ISD::VP_FNEARBYINT
:
2942 Truncated
= DAG
.getNode(RISCVISD::VFROUND_NOEXCEPT_VL
, DL
, ContainerVT
, Src
,
2947 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2948 if (Truncated
.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL
)
2949 Truncated
= DAG
.getNode(RISCVISD::SINT_TO_FP_VL
, DL
, ContainerVT
, Truncated
,
2952 // Restore the original sign so that -0.0 is preserved.
2953 Truncated
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Truncated
,
2954 Src
, Src
, Mask
, VL
);
2956 if (!VT
.isFixedLengthVector())
2959 return convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
2962 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2963 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2964 // qNan and coverting the new source to integer and back to FP.
2966 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
2967 const RISCVSubtarget
&Subtarget
) {
2969 MVT VT
= Op
.getSimpleValueType();
2970 SDValue Chain
= Op
.getOperand(0);
2971 SDValue Src
= Op
.getOperand(1);
2973 MVT ContainerVT
= VT
;
2974 if (VT
.isFixedLengthVector()) {
2975 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
2976 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
2979 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
2981 // Freeze the source since we are increasing the number of uses.
2982 Src
= DAG
.getFreeze(Src
);
2984 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2985 MVT MaskVT
= Mask
.getSimpleValueType();
2986 SDValue Unorder
= DAG
.getNode(RISCVISD::STRICT_FSETCC_VL
, DL
,
2987 DAG
.getVTList(MaskVT
, MVT::Other
),
2988 {Chain
, Src
, Src
, DAG
.getCondCode(ISD::SETUNE
),
2989 DAG
.getUNDEF(MaskVT
), Mask
, VL
});
2990 Chain
= Unorder
.getValue(1);
2991 Src
= DAG
.getNode(RISCVISD::STRICT_FADD_VL
, DL
,
2992 DAG
.getVTList(ContainerVT
, MVT::Other
),
2993 {Chain
, Src
, Src
, DAG
.getUNDEF(ContainerVT
), Unorder
, VL
});
2994 Chain
= Src
.getValue(1);
2996 // We do the conversion on the absolute value and fix the sign at the end.
2997 SDValue Abs
= DAG
.getNode(RISCVISD::FABS_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
2999 // Determine the largest integer that can be represented exactly. This and
3000 // values larger than it don't have any fractional bits so don't need to
3002 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(ContainerVT
);
3003 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
3004 APFloat MaxVal
= APFloat(FltSem
);
3005 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
3006 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
3007 SDValue MaxValNode
=
3008 DAG
.getConstantFP(MaxVal
, DL
, ContainerVT
.getVectorElementType());
3009 SDValue MaxValSplat
= DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, ContainerVT
,
3010 DAG
.getUNDEF(ContainerVT
), MaxValNode
, VL
);
3012 // If abs(Src) was larger than MaxVal or nan, keep it.
3014 RISCVISD::SETCC_VL
, DL
, MaskVT
,
3015 {Abs
, MaxValSplat
, DAG
.getCondCode(ISD::SETOLT
), Mask
, Mask
, VL
});
3017 // Truncate to integer and convert back to FP.
3018 MVT IntVT
= ContainerVT
.changeVectorElementTypeToInteger();
3019 MVT XLenVT
= Subtarget
.getXLenVT();
3022 switch (Op
.getOpcode()) {
3024 llvm_unreachable("Unexpected opcode");
3025 case ISD::STRICT_FCEIL
:
3026 case ISD::STRICT_FFLOOR
:
3027 case ISD::STRICT_FROUND
:
3028 case ISD::STRICT_FROUNDEVEN
: {
3029 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
3030 assert(FRM
!= RISCVFPRndMode::Invalid
);
3031 Truncated
= DAG
.getNode(
3032 RISCVISD::STRICT_VFCVT_RM_X_F_VL
, DL
, DAG
.getVTList(IntVT
, MVT::Other
),
3033 {Chain
, Src
, Mask
, DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
});
3036 case ISD::STRICT_FTRUNC
:
3038 DAG
.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL
, DL
,
3039 DAG
.getVTList(IntVT
, MVT::Other
), Chain
, Src
, Mask
, VL
);
3041 case ISD::STRICT_FNEARBYINT
:
3042 Truncated
= DAG
.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL
, DL
,
3043 DAG
.getVTList(ContainerVT
, MVT::Other
), Chain
, Src
,
3047 Chain
= Truncated
.getValue(1);
3049 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3050 if (Op
.getOpcode() != ISD::STRICT_FNEARBYINT
) {
3051 Truncated
= DAG
.getNode(RISCVISD::STRICT_SINT_TO_FP_VL
, DL
,
3052 DAG
.getVTList(ContainerVT
, MVT::Other
), Chain
,
3053 Truncated
, Mask
, VL
);
3054 Chain
= Truncated
.getValue(1);
3057 // Restore the original sign so that -0.0 is preserved.
3058 Truncated
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Truncated
,
3059 Src
, Src
, Mask
, VL
);
3061 if (VT
.isFixedLengthVector())
3062 Truncated
= convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
3063 return DAG
.getMergeValues({Truncated
, Chain
}, DL
);
3067 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
3068 const RISCVSubtarget
&Subtarget
) {
3069 MVT VT
= Op
.getSimpleValueType();
3071 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
3073 if (DAG
.shouldOptForSize())
3077 SDValue Src
= Op
.getOperand(0);
3079 // Create an integer the size of the mantissa with the MSB set. This and all
3080 // values larger than it don't have any fractional bits so don't need to be
3082 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(VT
);
3083 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
3084 APFloat MaxVal
= APFloat(FltSem
);
3085 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
3086 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
3087 SDValue MaxValNode
= DAG
.getConstantFP(MaxVal
, DL
, VT
);
3089 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
3090 return DAG
.getNode(RISCVISD::FROUND
, DL
, VT
, Src
, MaxValNode
,
3091 DAG
.getTargetConstant(FRM
, DL
, Subtarget
.getXLenVT()));
3094 // Expand vector LRINT and LLRINT by converting to the integer domain.
3095 static SDValue
lowerVectorXRINT(SDValue Op
, SelectionDAG
&DAG
,
3096 const RISCVSubtarget
&Subtarget
) {
3097 MVT VT
= Op
.getSimpleValueType();
3098 assert(VT
.isVector() && "Unexpected type");
3101 SDValue Src
= Op
.getOperand(0);
3102 MVT ContainerVT
= VT
;
3104 if (VT
.isFixedLengthVector()) {
3105 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3106 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
3109 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3111 DAG
.getNode(RISCVISD::VFCVT_X_F_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
3113 if (!VT
.isFixedLengthVector())
3116 return convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
3120 getVSlidedown(SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
,
3121 const SDLoc
&DL
, EVT VT
, SDValue Merge
, SDValue Op
,
3122 SDValue Offset
, SDValue Mask
, SDValue VL
,
3123 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
) {
3124 if (Merge
.isUndef())
3125 Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3126 SDValue PolicyOp
= DAG
.getTargetConstant(Policy
, DL
, Subtarget
.getXLenVT());
3127 SDValue Ops
[] = {Merge
, Op
, Offset
, Mask
, VL
, PolicyOp
};
3128 return DAG
.getNode(RISCVISD::VSLIDEDOWN_VL
, DL
, VT
, Ops
);
3132 getVSlideup(SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
, const SDLoc
&DL
,
3133 EVT VT
, SDValue Merge
, SDValue Op
, SDValue Offset
, SDValue Mask
,
3135 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
) {
3136 if (Merge
.isUndef())
3137 Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3138 SDValue PolicyOp
= DAG
.getTargetConstant(Policy
, DL
, Subtarget
.getXLenVT());
3139 SDValue Ops
[] = {Merge
, Op
, Offset
, Mask
, VL
, PolicyOp
};
3140 return DAG
.getNode(RISCVISD::VSLIDEUP_VL
, DL
, VT
, Ops
);
3143 static MVT
getLMUL1VT(MVT VT
) {
3144 assert(VT
.getVectorElementType().getSizeInBits() <= 64 &&
3145 "Unexpected vector MVT");
3146 return MVT::getScalableVectorVT(
3147 VT
.getVectorElementType(),
3148 RISCV::RVVBitsPerBlock
/ VT
.getVectorElementType().getSizeInBits());
3151 struct VIDSequence
{
3152 int64_t StepNumerator
;
3153 unsigned StepDenominator
;
3157 static std::optional
<uint64_t> getExactInteger(const APFloat
&APF
,
3158 uint32_t BitWidth
) {
3159 APSInt
ValInt(BitWidth
, !APF
.isNegative());
3160 // We use an arbitrary rounding mode here. If a floating-point is an exact
3161 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3162 // the rounding mode changes the output value, then it is not an exact
3164 RoundingMode ArbitraryRM
= RoundingMode::TowardZero
;
3166 // If it is out of signed integer range, it will return an invalid operation.
3167 // If it is not an exact integer, IsExact is false.
3168 if ((APF
.convertToInteger(ValInt
, ArbitraryRM
, &IsExact
) ==
3169 APFloatBase::opInvalidOp
) ||
3171 return std::nullopt
;
3172 return ValInt
.extractBitsAsZExtValue(BitWidth
, 0);
3175 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3176 // to the (non-zero) step S and start value X. This can be then lowered as the
3177 // RVV sequence (VID * S) + X, for example.
3178 // The step S is represented as an integer numerator divided by a positive
3179 // denominator. Note that the implementation currently only identifies
3180 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
3181 // cannot detect 2/3, for example.
3182 // Note that this method will also match potentially unappealing index
3183 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3184 // determine whether this is worth generating code for.
3185 static std::optional
<VIDSequence
> isSimpleVIDSequence(SDValue Op
) {
3186 unsigned NumElts
= Op
.getNumOperands();
3187 assert(Op
.getOpcode() == ISD::BUILD_VECTOR
&& "Unexpected BUILD_VECTOR");
3188 bool IsInteger
= Op
.getValueType().isInteger();
3190 std::optional
<unsigned> SeqStepDenom
;
3191 std::optional
<int64_t> SeqStepNum
, SeqAddend
;
3192 std::optional
<std::pair
<uint64_t, unsigned>> PrevElt
;
3193 unsigned EltSizeInBits
= Op
.getValueType().getScalarSizeInBits();
3194 for (unsigned Idx
= 0; Idx
< NumElts
; Idx
++) {
3195 // Assume undef elements match the sequence; we just have to be careful
3196 // when interpolating across them.
3197 if (Op
.getOperand(Idx
).isUndef())
3202 // The BUILD_VECTOR must be all constants.
3203 if (!isa
<ConstantSDNode
>(Op
.getOperand(Idx
)))
3204 return std::nullopt
;
3205 Val
= Op
.getConstantOperandVal(Idx
) &
3206 maskTrailingOnes
<uint64_t>(EltSizeInBits
);
3208 // The BUILD_VECTOR must be all constants.
3209 if (!isa
<ConstantFPSDNode
>(Op
.getOperand(Idx
)))
3210 return std::nullopt
;
3211 if (auto ExactInteger
= getExactInteger(
3212 cast
<ConstantFPSDNode
>(Op
.getOperand(Idx
))->getValueAPF(),
3214 Val
= *ExactInteger
;
3216 return std::nullopt
;
3220 // Calculate the step since the last non-undef element, and ensure
3221 // it's consistent across the entire sequence.
3222 unsigned IdxDiff
= Idx
- PrevElt
->second
;
3223 int64_t ValDiff
= SignExtend64(Val
- PrevElt
->first
, EltSizeInBits
);
3225 // A zero-value value difference means that we're somewhere in the middle
3226 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3227 // step change before evaluating the sequence.
3231 int64_t Remainder
= ValDiff
% IdxDiff
;
3232 // Normalize the step if it's greater than 1.
3233 if (Remainder
!= ValDiff
) {
3234 // The difference must cleanly divide the element span.
3236 return std::nullopt
;
3242 SeqStepNum
= ValDiff
;
3243 else if (ValDiff
!= SeqStepNum
)
3244 return std::nullopt
;
3247 SeqStepDenom
= IdxDiff
;
3248 else if (IdxDiff
!= *SeqStepDenom
)
3249 return std::nullopt
;
3252 // Record this non-undef element for later.
3253 if (!PrevElt
|| PrevElt
->first
!= Val
)
3254 PrevElt
= std::make_pair(Val
, Idx
);
3257 // We need to have logged a step for this to count as a legal index sequence.
3258 if (!SeqStepNum
|| !SeqStepDenom
)
3259 return std::nullopt
;
3261 // Loop back through the sequence and validate elements we might have skipped
3262 // while waiting for a valid step. While doing this, log any sequence addend.
3263 for (unsigned Idx
= 0; Idx
< NumElts
; Idx
++) {
3264 if (Op
.getOperand(Idx
).isUndef())
3268 Val
= Op
.getConstantOperandVal(Idx
) &
3269 maskTrailingOnes
<uint64_t>(EltSizeInBits
);
3271 Val
= *getExactInteger(
3272 cast
<ConstantFPSDNode
>(Op
.getOperand(Idx
))->getValueAPF(),
3275 uint64_t ExpectedVal
=
3276 (int64_t)(Idx
* (uint64_t)*SeqStepNum
) / *SeqStepDenom
;
3277 int64_t Addend
= SignExtend64(Val
- ExpectedVal
, EltSizeInBits
);
3280 else if (Addend
!= SeqAddend
)
3281 return std::nullopt
;
3284 assert(SeqAddend
&& "Must have an addend if we have a step");
3286 return VIDSequence
{*SeqStepNum
, *SeqStepDenom
, *SeqAddend
};
3289 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3290 // and lower it as a VRGATHER_VX_VL from the source vector.
3291 static SDValue
matchSplatAsGather(SDValue SplatVal
, MVT VT
, const SDLoc
&DL
,
3293 const RISCVSubtarget
&Subtarget
) {
3294 if (SplatVal
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
3296 SDValue Vec
= SplatVal
.getOperand(0);
3297 // Only perform this optimization on vectors of the same size for simplicity.
3298 // Don't perform this optimization for i1 vectors.
3299 // FIXME: Support i1 vectors, maybe by promoting to i8?
3300 if (Vec
.getValueType() != VT
|| VT
.getVectorElementType() == MVT::i1
)
3302 SDValue Idx
= SplatVal
.getOperand(1);
3303 // The index must be a legal type.
3304 if (Idx
.getValueType() != Subtarget
.getXLenVT())
3307 MVT ContainerVT
= VT
;
3308 if (VT
.isFixedLengthVector()) {
3309 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3310 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
3313 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3315 SDValue Gather
= DAG
.getNode(RISCVISD::VRGATHER_VX_VL
, DL
, ContainerVT
, Vec
,
3316 Idx
, DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
3318 if (!VT
.isFixedLengthVector())
3321 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
3325 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3326 /// which constitute a large proportion of the elements. In such cases we can
3327 /// splat a vector with the dominant element and make up the shortfall with
3328 /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3329 /// Note that this includes vectors of 2 elements by association. The
3330 /// upper-most element is the "dominant" one, allowing us to use a splat to
3331 /// "insert" the upper element, and an insert of the lower element at position
3332 /// 0, which improves codegen.
3333 static SDValue
lowerBuildVectorViaDominantValues(SDValue Op
, SelectionDAG
&DAG
,
3334 const RISCVSubtarget
&Subtarget
) {
3335 MVT VT
= Op
.getSimpleValueType();
3336 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3338 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3341 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3343 MVT XLenVT
= Subtarget
.getXLenVT();
3344 unsigned NumElts
= Op
.getNumOperands();
3346 SDValue DominantValue
;
3347 unsigned MostCommonCount
= 0;
3348 DenseMap
<SDValue
, unsigned> ValueCounts
;
3349 unsigned NumUndefElts
=
3350 count_if(Op
->op_values(), [](const SDValue
&V
) { return V
.isUndef(); });
3352 // Track the number of scalar loads we know we'd be inserting, estimated as
3353 // any non-zero floating-point constant. Other kinds of element are either
3354 // already in registers or are materialized on demand. The threshold at which
3355 // a vector load is more desirable than several scalar materializion and
3356 // vector-insertion instructions is not known.
3357 unsigned NumScalarLoads
= 0;
3359 for (SDValue V
: Op
->op_values()) {
3363 ValueCounts
.insert(std::make_pair(V
, 0));
3364 unsigned &Count
= ValueCounts
[V
];
3366 if (auto *CFP
= dyn_cast
<ConstantFPSDNode
>(V
))
3367 NumScalarLoads
+= !CFP
->isExactlyValue(+0.0);
3369 // Is this value dominant? In case of a tie, prefer the highest element as
3370 // it's cheaper to insert near the beginning of a vector than it is at the
3372 if (++Count
>= MostCommonCount
) {
3374 MostCommonCount
= Count
;
3378 assert(DominantValue
&& "Not expecting an all-undef BUILD_VECTOR");
3379 unsigned NumDefElts
= NumElts
- NumUndefElts
;
3380 unsigned DominantValueCountThreshold
= NumDefElts
<= 2 ? 0 : NumDefElts
- 2;
3382 // Don't perform this optimization when optimizing for size, since
3383 // materializing elements and inserting them tends to cause code bloat.
3384 if (!DAG
.shouldOptForSize() && NumScalarLoads
< NumElts
&&
3385 (NumElts
!= 2 || ISD::isBuildVectorOfConstantSDNodes(Op
.getNode())) &&
3386 ((MostCommonCount
> DominantValueCountThreshold
) ||
3387 (ValueCounts
.size() <= Log2_32(NumDefElts
)))) {
3388 // Start by splatting the most common element.
3389 SDValue Vec
= DAG
.getSplatBuildVector(VT
, DL
, DominantValue
);
3391 DenseSet
<SDValue
> Processed
{DominantValue
};
3393 // We can handle an insert into the last element (of a splat) via
3394 // v(f)slide1down. This is slightly better than the vslideup insert
3395 // lowering as it avoids the need for a vector group temporary. It
3396 // is also better than using vmerge.vx as it avoids the need to
3397 // materialize the mask in a vector register.
3398 if (SDValue LastOp
= Op
->getOperand(Op
->getNumOperands() - 1);
3399 !LastOp
.isUndef() && ValueCounts
[LastOp
] == 1 &&
3400 LastOp
!= DominantValue
) {
3401 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
3403 VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
;
3404 if (!VT
.isFloatingPoint())
3405 LastOp
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, LastOp
);
3406 Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Vec
,
3408 Vec
= convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
3409 Processed
.insert(LastOp
);
3412 MVT SelMaskTy
= VT
.changeVectorElementType(MVT::i1
);
3413 for (const auto &OpIdx
: enumerate(Op
->ops())) {
3414 const SDValue
&V
= OpIdx
.value();
3415 if (V
.isUndef() || !Processed
.insert(V
).second
)
3417 if (ValueCounts
[V
] == 1) {
3418 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
, Vec
, V
,
3419 DAG
.getConstant(OpIdx
.index(), DL
, XLenVT
));
3421 // Blend in all instances of this value using a VSELECT, using a
3422 // mask where each bit signals whether that element is the one
3424 SmallVector
<SDValue
> Ops
;
3425 transform(Op
->op_values(), std::back_inserter(Ops
), [&](SDValue V1
) {
3426 return DAG
.getConstant(V
== V1
, DL
, XLenVT
);
3428 Vec
= DAG
.getNode(ISD::VSELECT
, DL
, VT
,
3429 DAG
.getBuildVector(SelMaskTy
, DL
, Ops
),
3430 DAG
.getSplatBuildVector(VT
, DL
, V
), Vec
);
3440 static SDValue
lowerBuildVectorOfConstants(SDValue Op
, SelectionDAG
&DAG
,
3441 const RISCVSubtarget
&Subtarget
) {
3442 MVT VT
= Op
.getSimpleValueType();
3443 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3445 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3448 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3450 MVT XLenVT
= Subtarget
.getXLenVT();
3451 unsigned NumElts
= Op
.getNumOperands();
3453 if (VT
.getVectorElementType() == MVT::i1
) {
3454 if (ISD::isBuildVectorAllZeros(Op
.getNode())) {
3455 SDValue VMClr
= DAG
.getNode(RISCVISD::VMCLR_VL
, DL
, ContainerVT
, VL
);
3456 return convertFromScalableVector(VT
, VMClr
, DAG
, Subtarget
);
3459 if (ISD::isBuildVectorAllOnes(Op
.getNode())) {
3460 SDValue VMSet
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
3461 return convertFromScalableVector(VT
, VMSet
, DAG
, Subtarget
);
3464 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3465 // scalar integer chunks whose bit-width depends on the number of mask
3467 // First, determine the most appropriate scalar integer type to use. This
3468 // is at most XLenVT, but may be shrunk to a smaller vector element type
3469 // according to the size of the final vector - use i8 chunks rather than
3470 // XLenVT if we're producing a v8i1. This results in more consistent
3471 // codegen across RV32 and RV64.
3472 unsigned NumViaIntegerBits
= std::clamp(NumElts
, 8u, Subtarget
.getXLen());
3473 NumViaIntegerBits
= std::min(NumViaIntegerBits
, Subtarget
.getELen());
3474 // If we have to use more than one INSERT_VECTOR_ELT then this
3475 // optimization is likely to increase code size; avoid peforming it in
3476 // such a case. We can use a load from a constant pool in this case.
3477 if (DAG
.shouldOptForSize() && NumElts
> NumViaIntegerBits
)
3479 // Now we can create our integer vector type. Note that it may be larger
3480 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3481 unsigned IntegerViaVecElts
= divideCeil(NumElts
, NumViaIntegerBits
);
3482 MVT IntegerViaVecVT
=
3483 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits
),
3487 unsigned BitPos
= 0, IntegerEltIdx
= 0;
3488 SmallVector
<SDValue
, 8> Elts(IntegerViaVecElts
);
3490 for (unsigned I
= 0; I
< NumElts
;) {
3491 SDValue V
= Op
.getOperand(I
);
3492 bool BitValue
= !V
.isUndef() && cast
<ConstantSDNode
>(V
)->getZExtValue();
3493 Bits
|= ((uint64_t)BitValue
<< BitPos
);
3497 // Once we accumulate enough bits to fill our scalar type or process the
3498 // last element, insert into our vector and clear our accumulated data.
3499 if (I
% NumViaIntegerBits
== 0 || I
== NumElts
) {
3500 if (NumViaIntegerBits
<= 32)
3501 Bits
= SignExtend64
<32>(Bits
);
3502 SDValue Elt
= DAG
.getConstant(Bits
, DL
, XLenVT
);
3503 Elts
[IntegerEltIdx
] = Elt
;
3510 SDValue Vec
= DAG
.getBuildVector(IntegerViaVecVT
, DL
, Elts
);
3512 if (NumElts
< NumViaIntegerBits
) {
3513 // If we're producing a smaller vector than our minimum legal integer
3514 // type, bitcast to the equivalent (known-legal) mask type, and extract
3516 assert(IntegerViaVecVT
== MVT::v1i8
&& "Unexpected mask vector type");
3517 Vec
= DAG
.getBitcast(MVT::v8i1
, Vec
);
3518 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Vec
,
3519 DAG
.getConstant(0, DL
, XLenVT
));
3521 // Else we must have produced an integer type with the same size as the
3522 // mask type; bitcast for the final result.
3523 assert(VT
.getSizeInBits() == IntegerViaVecVT
.getSizeInBits());
3524 Vec
= DAG
.getBitcast(VT
, Vec
);
3530 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3531 unsigned Opc
= VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3532 : RISCVISD::VMV_V_X_VL
;
3533 if (!VT
.isFloatingPoint())
3534 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Splat
);
3536 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Splat
, VL
);
3537 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
3540 // Try and match index sequences, which we can lower to the vid instruction
3541 // with optional modifications. An all-undef vector is matched by
3542 // getSplatValue, above.
3543 if (auto SimpleVID
= isSimpleVIDSequence(Op
)) {
3544 int64_t StepNumerator
= SimpleVID
->StepNumerator
;
3545 unsigned StepDenominator
= SimpleVID
->StepDenominator
;
3546 int64_t Addend
= SimpleVID
->Addend
;
3548 assert(StepNumerator
!= 0 && "Invalid step");
3549 bool Negate
= false;
3550 int64_t SplatStepVal
= StepNumerator
;
3551 unsigned StepOpcode
= ISD::MUL
;
3552 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3553 // anyway as the shift of 63 won't fit in uimm5.
3554 if (StepNumerator
!= 1 && StepNumerator
!= INT64_MIN
&&
3555 isPowerOf2_64(std::abs(StepNumerator
))) {
3556 Negate
= StepNumerator
< 0;
3557 StepOpcode
= ISD::SHL
;
3558 SplatStepVal
= Log2_64(std::abs(StepNumerator
));
3561 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3562 // threshold since it's the immediate value many RVV instructions accept.
3563 // There is no vmul.vi instruction so ensure multiply constant can fit in
3564 // a single addi instruction.
3565 if (((StepOpcode
== ISD::MUL
&& isInt
<12>(SplatStepVal
)) ||
3566 (StepOpcode
== ISD::SHL
&& isUInt
<5>(SplatStepVal
))) &&
3567 isPowerOf2_32(StepDenominator
) &&
3568 (SplatStepVal
>= 0 || StepDenominator
== 1) && isInt
<5>(Addend
)) {
3570 VT
.isFloatingPoint() ? VT
.changeVectorElementTypeToInteger() : VT
;
3571 MVT VIDContainerVT
=
3572 getContainerForFixedLengthVector(DAG
, VIDVT
, Subtarget
);
3573 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VIDContainerVT
, Mask
, VL
);
3574 // Convert right out of the scalable type so we can use standard ISD
3575 // nodes for the rest of the computation. If we used scalable types with
3576 // these, we'd lose the fixed-length vector info and generate worse
3578 VID
= convertFromScalableVector(VIDVT
, VID
, DAG
, Subtarget
);
3579 if ((StepOpcode
== ISD::MUL
&& SplatStepVal
!= 1) ||
3580 (StepOpcode
== ISD::SHL
&& SplatStepVal
!= 0)) {
3581 SDValue SplatStep
= DAG
.getConstant(SplatStepVal
, DL
, VIDVT
);
3582 VID
= DAG
.getNode(StepOpcode
, DL
, VIDVT
, VID
, SplatStep
);
3584 if (StepDenominator
!= 1) {
3586 DAG
.getConstant(Log2_64(StepDenominator
), DL
, VIDVT
);
3587 VID
= DAG
.getNode(ISD::SRL
, DL
, VIDVT
, VID
, SplatStep
);
3589 if (Addend
!= 0 || Negate
) {
3590 SDValue SplatAddend
= DAG
.getConstant(Addend
, DL
, VIDVT
);
3591 VID
= DAG
.getNode(Negate
? ISD::SUB
: ISD::ADD
, DL
, VIDVT
, SplatAddend
,
3594 if (VT
.isFloatingPoint()) {
3595 // TODO: Use vfwcvt to reduce register pressure.
3596 VID
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, VID
);
3602 // For very small build_vectors, use a single scalar insert of a constant.
3603 // TODO: Base this on constant rematerialization cost, not size.
3604 const unsigned EltBitSize
= VT
.getScalarSizeInBits();
3605 if (VT
.getSizeInBits() <= 32 &&
3606 ISD::isBuildVectorOfConstantSDNodes(Op
.getNode())) {
3607 MVT ViaIntVT
= MVT::getIntegerVT(VT
.getSizeInBits());
3608 assert((ViaIntVT
== MVT::i16
|| ViaIntVT
== MVT::i32
) &&
3609 "Unexpected sequence type");
3610 // If we can use the original VL with the modified element type, this
3611 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3612 // be moved into InsertVSETVLI?
3613 unsigned ViaVecLen
=
3614 (Subtarget
.getRealMinVLen() >= VT
.getSizeInBits() * NumElts
) ? NumElts
: 1;
3615 MVT ViaVecVT
= MVT::getVectorVT(ViaIntVT
, ViaVecLen
);
3617 uint64_t EltMask
= maskTrailingOnes
<uint64_t>(EltBitSize
);
3618 uint64_t SplatValue
= 0;
3619 // Construct the amalgamated value at this larger vector type.
3620 for (const auto &OpIdx
: enumerate(Op
->op_values())) {
3621 const auto &SeqV
= OpIdx
.value();
3622 if (!SeqV
.isUndef())
3623 SplatValue
|= ((cast
<ConstantSDNode
>(SeqV
)->getZExtValue() & EltMask
)
3624 << (OpIdx
.index() * EltBitSize
));
3627 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3628 // achieve better constant materializion.
3629 if (Subtarget
.is64Bit() && ViaIntVT
== MVT::i32
)
3630 SplatValue
= SignExtend64
<32>(SplatValue
);
3632 SDValue Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ViaVecVT
,
3633 DAG
.getUNDEF(ViaVecVT
),
3634 DAG
.getConstant(SplatValue
, DL
, XLenVT
),
3635 DAG
.getConstant(0, DL
, XLenVT
));
3637 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
,
3638 MVT::getVectorVT(ViaIntVT
, 1), Vec
,
3639 DAG
.getConstant(0, DL
, XLenVT
));
3640 return DAG
.getBitcast(VT
, Vec
);
3644 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3645 // when re-interpreted as a vector with a larger element type. For example,
3646 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3647 // could be instead splat as
3648 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3649 // TODO: This optimization could also work on non-constant splats, but it
3650 // would require bit-manipulation instructions to construct the splat value.
3651 SmallVector
<SDValue
> Sequence
;
3652 const auto *BV
= cast
<BuildVectorSDNode
>(Op
);
3653 if (VT
.isInteger() && EltBitSize
< 64 &&
3654 ISD::isBuildVectorOfConstantSDNodes(Op
.getNode()) &&
3655 BV
->getRepeatedSequence(Sequence
) &&
3656 (Sequence
.size() * EltBitSize
) <= 64) {
3657 unsigned SeqLen
= Sequence
.size();
3658 MVT ViaIntVT
= MVT::getIntegerVT(EltBitSize
* SeqLen
);
3659 assert((ViaIntVT
== MVT::i16
|| ViaIntVT
== MVT::i32
||
3660 ViaIntVT
== MVT::i64
) &&
3661 "Unexpected sequence type");
3663 // If we can use the original VL with the modified element type, this
3664 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3665 // be moved into InsertVSETVLI?
3666 const unsigned RequiredVL
= NumElts
/ SeqLen
;
3667 const unsigned ViaVecLen
=
3668 (Subtarget
.getRealMinVLen() >= ViaIntVT
.getSizeInBits() * NumElts
) ?
3669 NumElts
: RequiredVL
;
3670 MVT ViaVecVT
= MVT::getVectorVT(ViaIntVT
, ViaVecLen
);
3672 unsigned EltIdx
= 0;
3673 uint64_t EltMask
= maskTrailingOnes
<uint64_t>(EltBitSize
);
3674 uint64_t SplatValue
= 0;
3675 // Construct the amalgamated value which can be splatted as this larger
3677 for (const auto &SeqV
: Sequence
) {
3678 if (!SeqV
.isUndef())
3679 SplatValue
|= ((cast
<ConstantSDNode
>(SeqV
)->getZExtValue() & EltMask
)
3680 << (EltIdx
* EltBitSize
));
3684 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3685 // achieve better constant materializion.
3686 if (Subtarget
.is64Bit() && ViaIntVT
== MVT::i32
)
3687 SplatValue
= SignExtend64
<32>(SplatValue
);
3689 // Since we can't introduce illegal i64 types at this stage, we can only
3690 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3691 // way we can use RVV instructions to splat.
3692 assert((ViaIntVT
.bitsLE(XLenVT
) ||
3693 (!Subtarget
.is64Bit() && ViaIntVT
== MVT::i64
)) &&
3694 "Unexpected bitcast sequence");
3695 if (ViaIntVT
.bitsLE(XLenVT
) || isInt
<32>(SplatValue
)) {
3697 DAG
.getConstant(ViaVecVT
.getVectorNumElements(), DL
, XLenVT
);
3698 MVT ViaContainerVT
=
3699 getContainerForFixedLengthVector(DAG
, ViaVecVT
, Subtarget
);
3701 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ViaContainerVT
,
3702 DAG
.getUNDEF(ViaContainerVT
),
3703 DAG
.getConstant(SplatValue
, DL
, XLenVT
), ViaVL
);
3704 Splat
= convertFromScalableVector(ViaVecVT
, Splat
, DAG
, Subtarget
);
3705 if (ViaVecLen
!= RequiredVL
)
3706 Splat
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
,
3707 MVT::getVectorVT(ViaIntVT
, RequiredVL
), Splat
,
3708 DAG
.getConstant(0, DL
, XLenVT
));
3709 return DAG
.getBitcast(VT
, Splat
);
3713 // If the number of signbits allows, see if we can lower as a <N x i8>.
3714 // Our main goal here is to reduce LMUL (and thus work) required to
3715 // build the constant, but we will also narrow if the resulting
3716 // narrow vector is known to materialize cheaply.
3717 // TODO: We really should be costing the smaller vector. There are
3718 // profitable cases this misses.
3719 if (EltBitSize
> 8 && VT
.isInteger() &&
3720 (NumElts
<= 4 || VT
.getSizeInBits() > Subtarget
.getRealMinVLen())) {
3721 unsigned SignBits
= DAG
.ComputeNumSignBits(Op
);
3722 if (EltBitSize
- SignBits
< 8) {
3723 SDValue Source
= DAG
.getBuildVector(VT
.changeVectorElementType(MVT::i8
),
3725 Source
= convertToScalableVector(ContainerVT
.changeVectorElementType(MVT::i8
),
3726 Source
, DAG
, Subtarget
);
3727 SDValue Res
= DAG
.getNode(RISCVISD::VSEXT_VL
, DL
, ContainerVT
, Source
, Mask
, VL
);
3728 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
3732 if (SDValue Res
= lowerBuildVectorViaDominantValues(Op
, DAG
, Subtarget
))
3735 // For constant vectors, use generic constant pool lowering. Otherwise,
3736 // we'd have to materialize constants in GPRs just to move them into the
3741 static SDValue
lowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
,
3742 const RISCVSubtarget
&Subtarget
) {
3743 MVT VT
= Op
.getSimpleValueType();
3744 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3746 if (ISD::isBuildVectorOfConstantSDNodes(Op
.getNode()) ||
3747 ISD::isBuildVectorOfConstantFPSDNodes(Op
.getNode()))
3748 return lowerBuildVectorOfConstants(Op
, DAG
, Subtarget
);
3750 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3753 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3755 MVT XLenVT
= Subtarget
.getXLenVT();
3757 if (VT
.getVectorElementType() == MVT::i1
) {
3758 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3759 // vector type, we have a legal equivalently-sized i8 type, so we can use
3761 MVT WideVecVT
= VT
.changeVectorElementType(MVT::i8
);
3762 SDValue VecZero
= DAG
.getConstant(0, DL
, WideVecVT
);
3765 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3766 // For a splat, perform a scalar truncate before creating the wider
3768 Splat
= DAG
.getNode(ISD::AND
, DL
, Splat
.getValueType(), Splat
,
3769 DAG
.getConstant(1, DL
, Splat
.getValueType()));
3770 WideVec
= DAG
.getSplatBuildVector(WideVecVT
, DL
, Splat
);
3772 SmallVector
<SDValue
, 8> Ops(Op
->op_values());
3773 WideVec
= DAG
.getBuildVector(WideVecVT
, DL
, Ops
);
3774 SDValue VecOne
= DAG
.getConstant(1, DL
, WideVecVT
);
3775 WideVec
= DAG
.getNode(ISD::AND
, DL
, WideVecVT
, WideVec
, VecOne
);
3778 return DAG
.getSetCC(DL
, VT
, WideVec
, VecZero
, ISD::SETNE
);
3781 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3782 if (auto Gather
= matchSplatAsGather(Splat
, VT
, DL
, DAG
, Subtarget
))
3784 unsigned Opc
= VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3785 : RISCVISD::VMV_V_X_VL
;
3786 if (!VT
.isFloatingPoint())
3787 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Splat
);
3789 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Splat
, VL
);
3790 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
3793 if (SDValue Res
= lowerBuildVectorViaDominantValues(Op
, DAG
, Subtarget
))
3796 // If we're compiling for an exact VLEN value, we can split our work per
3797 // register in the register group.
3798 const unsigned MinVLen
= Subtarget
.getRealMinVLen();
3799 const unsigned MaxVLen
= Subtarget
.getRealMaxVLen();
3800 if (MinVLen
== MaxVLen
&& VT
.getSizeInBits().getKnownMinValue() > MinVLen
) {
3801 MVT ElemVT
= VT
.getVectorElementType();
3802 unsigned ElemsPerVReg
= MinVLen
/ ElemVT
.getFixedSizeInBits();
3803 EVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3804 MVT OneRegVT
= MVT::getVectorVT(ElemVT
, ElemsPerVReg
);
3805 MVT M1VT
= getContainerForFixedLengthVector(DAG
, OneRegVT
, Subtarget
);
3806 assert(M1VT
== getLMUL1VT(M1VT
));
3808 // The following semantically builds up a fixed length concat_vector
3809 // of the component build_vectors. We eagerly lower to scalable and
3810 // insert_subvector here to avoid DAG combining it back to a large
3812 SmallVector
<SDValue
> BuildVectorOps(Op
->op_begin(), Op
->op_end());
3813 unsigned NumOpElts
= M1VT
.getVectorMinNumElements();
3814 SDValue Vec
= DAG
.getUNDEF(ContainerVT
);
3815 for (unsigned i
= 0; i
< VT
.getVectorNumElements(); i
+= ElemsPerVReg
) {
3816 auto OneVRegOfOps
= ArrayRef(BuildVectorOps
).slice(i
, ElemsPerVReg
);
3818 DAG
.getNode(ISD::BUILD_VECTOR
, DL
, OneRegVT
, OneVRegOfOps
);
3819 SubBV
= convertToScalableVector(M1VT
, SubBV
, DAG
, Subtarget
);
3820 unsigned InsertIdx
= (i
/ ElemsPerVReg
) * NumOpElts
;
3821 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
, Vec
, SubBV
,
3822 DAG
.getVectorIdxConstant(InsertIdx
, DL
));
3824 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
3827 // Cap the cost at a value linear to the number of elements in the vector.
3828 // The default lowering is to use the stack. The vector store + scalar loads
3829 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3830 // being (at least) linear in LMUL. As a result, using the vslidedown
3831 // lowering for every element ends up being VL*LMUL..
3832 // TODO: Should we be directly costing the stack alternative? Doing so might
3833 // give us a more accurate upper bound.
3834 InstructionCost LinearBudget
= VT
.getVectorNumElements() * 2;
3836 // TODO: unify with TTI getSlideCost.
3837 InstructionCost PerSlideCost
= 1;
3838 switch (RISCVTargetLowering::getLMUL(ContainerVT
)) {
3840 case RISCVII::VLMUL::LMUL_2
:
3843 case RISCVII::VLMUL::LMUL_4
:
3846 case RISCVII::VLMUL::LMUL_8
:
3851 // TODO: Should we be using the build instseq then cost + evaluate scheme
3852 // we use for integer constants here?
3853 unsigned UndefCount
= 0;
3854 for (const SDValue
&V
: Op
->ops()) {
3860 LinearBudget
-= PerSlideCost
;
3863 LinearBudget
-= PerSlideCost
;
3866 LinearBudget
-= PerSlideCost
;
3869 if (LinearBudget
< 0)
3872 assert((!VT
.isFloatingPoint() ||
3873 VT
.getVectorElementType().getSizeInBits() <= Subtarget
.getFLen()) &&
3874 "Illegal type which will result in reserved encoding");
3876 const unsigned Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3880 for (SDValue V
: Op
->ops()) {
3886 // Start our sequence with a TA splat in the hopes that hardware is able to
3887 // recognize there's no dependency on the prior value of our temporary
3890 Vec
= DAG
.getSplatVector(VT
, DL
, V
);
3891 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
3897 const SDValue Offset
= DAG
.getConstant(UndefCount
, DL
, Subtarget
.getXLenVT());
3898 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
3899 Vec
, Offset
, Mask
, VL
, Policy
);
3903 VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
;
3904 if (!VT
.isFloatingPoint())
3905 V
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), V
);
3906 Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Vec
,
3910 const SDValue Offset
= DAG
.getConstant(UndefCount
, DL
, Subtarget
.getXLenVT());
3911 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
3912 Vec
, Offset
, Mask
, VL
, Policy
);
3914 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
3917 static SDValue
splatPartsI64WithVL(const SDLoc
&DL
, MVT VT
, SDValue Passthru
,
3918 SDValue Lo
, SDValue Hi
, SDValue VL
,
3919 SelectionDAG
&DAG
) {
3921 Passthru
= DAG
.getUNDEF(VT
);
3922 if (isa
<ConstantSDNode
>(Lo
) && isa
<ConstantSDNode
>(Hi
)) {
3923 int32_t LoC
= cast
<ConstantSDNode
>(Lo
)->getSExtValue();
3924 int32_t HiC
= cast
<ConstantSDNode
>(Hi
)->getSExtValue();
3925 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3926 // node in order to try and match RVV vector/scalar instructions.
3927 if ((LoC
>> 31) == HiC
)
3928 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3930 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3931 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3932 // vlmax vsetvli or vsetivli to change the VL.
3933 // FIXME: Support larger constants?
3934 // FIXME: Support non-constant VLs by saturating?
3937 if (isAllOnesConstant(VL
) ||
3938 (isa
<RegisterSDNode
>(VL
) &&
3939 cast
<RegisterSDNode
>(VL
)->getReg() == RISCV::X0
))
3940 NewVL
= DAG
.getRegister(RISCV::X0
, MVT::i32
);
3941 else if (isa
<ConstantSDNode
>(VL
) &&
3942 isUInt
<4>(cast
<ConstantSDNode
>(VL
)->getZExtValue()))
3943 NewVL
= DAG
.getNode(ISD::ADD
, DL
, VL
.getValueType(), VL
, VL
);
3947 MVT::getVectorVT(MVT::i32
, VT
.getVectorElementCount() * 2);
3948 auto InterVec
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, InterVT
,
3949 DAG
.getUNDEF(InterVT
), Lo
,
3950 DAG
.getRegister(RISCV::X0
, MVT::i32
));
3951 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, InterVec
);
3956 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3957 if (Hi
.getOpcode() == ISD::SRA
&& Hi
.getOperand(0) == Lo
&&
3958 isa
<ConstantSDNode
>(Hi
.getOperand(1)) &&
3959 Hi
.getConstantOperandVal(1) == 31)
3960 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3962 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3963 // even if it might be sign extended.
3965 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3967 // Fall back to a stack store and stride x0 vector load.
3968 return DAG
.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
, DL
, VT
, Passthru
, Lo
,
3972 // Called by type legalization to handle splat of i64 on RV32.
3973 // FIXME: We can optimize this when the type has sign or zero bits in one
3975 static SDValue
splatSplitI64WithVL(const SDLoc
&DL
, MVT VT
, SDValue Passthru
,
3976 SDValue Scalar
, SDValue VL
,
3977 SelectionDAG
&DAG
) {
3978 assert(Scalar
.getValueType() == MVT::i64
&& "Unexpected VT!");
3980 std::tie(Lo
, Hi
) = DAG
.SplitScalar(Scalar
, DL
, MVT::i32
, MVT::i32
);
3981 return splatPartsI64WithVL(DL
, VT
, Passthru
, Lo
, Hi
, VL
, DAG
);
3984 // This function lowers a splat of a scalar operand Splat with the vector
3985 // length VL. It ensures the final sequence is type legal, which is useful when
3986 // lowering a splat after type legalization.
3987 static SDValue
lowerScalarSplat(SDValue Passthru
, SDValue Scalar
, SDValue VL
,
3988 MVT VT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
3989 const RISCVSubtarget
&Subtarget
) {
3990 bool HasPassthru
= Passthru
&& !Passthru
.isUndef();
3991 if (!HasPassthru
&& !Passthru
)
3992 Passthru
= DAG
.getUNDEF(VT
);
3993 if (VT
.isFloatingPoint())
3994 return DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
3996 MVT XLenVT
= Subtarget
.getXLenVT();
3998 // Simplest case is that the operand needs to be promoted to XLenVT.
3999 if (Scalar
.getValueType().bitsLE(XLenVT
)) {
4000 // If the operand is a constant, sign extend to increase our chances
4001 // of being able to use a .vi instruction. ANY_EXTEND would become a
4002 // a zero extend and the simm5 check in isel would fail.
4003 // FIXME: Should we ignore the upper bits in isel instead?
4005 isa
<ConstantSDNode
>(Scalar
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
4006 Scalar
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, Scalar
);
4007 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
4010 assert(XLenVT
== MVT::i32
&& Scalar
.getValueType() == MVT::i64
&&
4011 "Unexpected scalar for splat lowering!");
4013 if (isOneConstant(VL
) && isNullConstant(Scalar
))
4014 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
, Passthru
,
4015 DAG
.getConstant(0, DL
, XLenVT
), VL
);
4017 // Otherwise use the more complicated splatting algorithm.
4018 return splatSplitI64WithVL(DL
, VT
, Passthru
, Scalar
, VL
, DAG
);
4021 // This function lowers an insert of a scalar operand Scalar into lane
4022 // 0 of the vector regardless of the value of VL. The contents of the
4023 // remaining lanes of the result vector are unspecified. VL is assumed
4025 static SDValue
lowerScalarInsert(SDValue Scalar
, SDValue VL
, MVT VT
,
4026 const SDLoc
&DL
, SelectionDAG
&DAG
,
4027 const RISCVSubtarget
&Subtarget
) {
4028 assert(VT
.isScalableVector() && "Expect VT is scalable vector type.");
4030 const MVT XLenVT
= Subtarget
.getXLenVT();
4031 SDValue Passthru
= DAG
.getUNDEF(VT
);
4033 if (Scalar
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
4034 isNullConstant(Scalar
.getOperand(1))) {
4035 SDValue ExtractedVal
= Scalar
.getOperand(0);
4036 MVT ExtractedVT
= ExtractedVal
.getSimpleValueType();
4037 MVT ExtractedContainerVT
= ExtractedVT
;
4038 if (ExtractedContainerVT
.isFixedLengthVector()) {
4039 ExtractedContainerVT
= getContainerForFixedLengthVector(
4040 DAG
, ExtractedContainerVT
, Subtarget
);
4041 ExtractedVal
= convertToScalableVector(ExtractedContainerVT
, ExtractedVal
,
4044 if (ExtractedContainerVT
.bitsLE(VT
))
4045 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Passthru
, ExtractedVal
,
4046 DAG
.getConstant(0, DL
, XLenVT
));
4047 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, ExtractedVal
,
4048 DAG
.getConstant(0, DL
, XLenVT
));
4052 if (VT
.isFloatingPoint())
4053 return DAG
.getNode(RISCVISD::VFMV_S_F_VL
, DL
, VT
,
4054 DAG
.getUNDEF(VT
), Scalar
, VL
);
4056 // Avoid the tricky legalization cases by falling back to using the
4057 // splat code which already handles it gracefully.
4058 if (!Scalar
.getValueType().bitsLE(XLenVT
))
4059 return lowerScalarSplat(DAG
.getUNDEF(VT
), Scalar
,
4060 DAG
.getConstant(1, DL
, XLenVT
),
4061 VT
, DL
, DAG
, Subtarget
);
4063 // If the operand is a constant, sign extend to increase our chances
4064 // of being able to use a .vi instruction. ANY_EXTEND would become a
4065 // a zero extend and the simm5 check in isel would fail.
4066 // FIXME: Should we ignore the upper bits in isel instead?
4068 isa
<ConstantSDNode
>(Scalar
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
4069 Scalar
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, Scalar
);
4070 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
,
4071 DAG
.getUNDEF(VT
), Scalar
, VL
);
4074 // Is this a shuffle extracts either the even or odd elements of a vector?
4075 // That is, specifically, either (a) or (b) below.
4076 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
4077 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
4078 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4079 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4080 // Returns {Src Vector, Even Elements} om success
4081 static bool isDeinterleaveShuffle(MVT VT
, MVT ContainerVT
, SDValue V1
,
4082 SDValue V2
, ArrayRef
<int> Mask
,
4083 const RISCVSubtarget
&Subtarget
) {
4084 // Need to be able to widen the vector.
4085 if (VT
.getScalarSizeInBits() >= Subtarget
.getELen())
4088 // Both input must be extracts.
4089 if (V1
.getOpcode() != ISD::EXTRACT_SUBVECTOR
||
4090 V2
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
4093 // Extracting from the same source.
4094 SDValue Src
= V1
.getOperand(0);
4095 if (Src
!= V2
.getOperand(0))
4098 // Src needs to have twice the number of elements.
4099 if (Src
.getValueType().getVectorNumElements() != (Mask
.size() * 2))
4102 // The extracts must extract the two halves of the source.
4103 if (V1
.getConstantOperandVal(1) != 0 ||
4104 V2
.getConstantOperandVal(1) != Mask
.size())
4107 // First index must be the first even or odd element from V1.
4108 if (Mask
[0] != 0 && Mask
[0] != 1)
4111 // The others must increase by 2 each time.
4112 // TODO: Support undef elements?
4113 for (unsigned i
= 1; i
!= Mask
.size(); ++i
)
4114 if (Mask
[i
] != Mask
[i
- 1] + 2)
4120 /// Is this shuffle interleaving contiguous elements from one vector into the
4121 /// even elements and contiguous elements from another vector into the odd
4122 /// elements. \p EvenSrc will contain the element that should be in the first
4123 /// even element. \p OddSrc will contain the element that should be in the first
4124 /// odd element. These can be the first element in a source or the element half
4125 /// way through the source.
4126 static bool isInterleaveShuffle(ArrayRef
<int> Mask
, MVT VT
, int &EvenSrc
,
4127 int &OddSrc
, const RISCVSubtarget
&Subtarget
) {
4128 // We need to be able to widen elements to the next larger integer type.
4129 if (VT
.getScalarSizeInBits() >= Subtarget
.getELen())
4132 int Size
= Mask
.size();
4133 int NumElts
= VT
.getVectorNumElements();
4134 assert(Size
== (int)NumElts
&& "Unexpected mask size");
4136 SmallVector
<unsigned, 2> StartIndexes
;
4137 if (!ShuffleVectorInst::isInterleaveMask(Mask
, 2, Size
* 2, StartIndexes
))
4140 EvenSrc
= StartIndexes
[0];
4141 OddSrc
= StartIndexes
[1];
4143 // One source should be low half of first vector.
4144 if (EvenSrc
!= 0 && OddSrc
!= 0)
4147 // Subvectors will be subtracted from either at the start of the two input
4148 // vectors, or at the start and middle of the first vector if it's an unary
4150 // In both cases, HalfNumElts will be extracted.
4151 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4152 // we'll create an illegal extract_subvector.
4153 // FIXME: We could support other values using a slidedown first.
4154 int HalfNumElts
= NumElts
/ 2;
4155 return ((EvenSrc
% HalfNumElts
) == 0) && ((OddSrc
% HalfNumElts
) == 0);
4158 /// Match shuffles that concatenate two vectors, rotate the concatenation,
4159 /// and then extract the original number of elements from the rotated result.
4160 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4161 /// returned rotation amount is for a rotate right, where elements move from
4162 /// higher elements to lower elements. \p LoSrc indicates the first source
4163 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4164 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4165 /// 0 or 1 if a rotation is found.
4167 /// NOTE: We talk about rotate to the right which matches how bit shift and
4168 /// rotate instructions are described where LSBs are on the right, but LLVM IR
4169 /// and the table below write vectors with the lowest elements on the left.
4170 static int isElementRotate(int &LoSrc
, int &HiSrc
, ArrayRef
<int> Mask
) {
4171 int Size
= Mask
.size();
4173 // We need to detect various ways of spelling a rotation:
4174 // [11, 12, 13, 14, 15, 0, 1, 2]
4175 // [-1, 12, 13, 14, -1, -1, 1, -1]
4176 // [-1, -1, -1, -1, -1, -1, 1, 2]
4177 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4178 // [-1, 4, 5, 6, -1, -1, 9, -1]
4179 // [-1, 4, 5, 6, -1, -1, -1, -1]
4183 for (int i
= 0; i
!= Size
; ++i
) {
4188 // Determine where a rotate vector would have started.
4189 int StartIdx
= i
- (M
% Size
);
4190 // The identity rotation isn't interesting, stop.
4194 // If we found the tail of a vector the rotation must be the missing
4195 // front. If we found the head of a vector, it must be how much of the
4197 int CandidateRotation
= StartIdx
< 0 ? -StartIdx
: Size
- StartIdx
;
4200 Rotation
= CandidateRotation
;
4201 else if (Rotation
!= CandidateRotation
)
4202 // The rotations don't match, so we can't match this mask.
4205 // Compute which value this mask is pointing at.
4206 int MaskSrc
= M
< Size
? 0 : 1;
4208 // Compute which of the two target values this index should be assigned to.
4209 // This reflects whether the high elements are remaining or the low elemnts
4211 int &TargetSrc
= StartIdx
< 0 ? HiSrc
: LoSrc
;
4213 // Either set up this value if we've not encountered it before, or check
4214 // that it remains consistent.
4216 TargetSrc
= MaskSrc
;
4217 else if (TargetSrc
!= MaskSrc
)
4218 // This may be a rotation, but it pulls from the inputs in some
4219 // unsupported interleaving.
4223 // Check that we successfully analyzed the mask, and normalize the results.
4224 assert(Rotation
!= 0 && "Failed to locate a viable rotation!");
4225 assert((LoSrc
>= 0 || HiSrc
>= 0) &&
4226 "Failed to find a rotated input vector!");
4231 // Lower a deinterleave shuffle to vnsrl.
4232 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4233 // -> [p, q, r, s] (EvenElts == false)
4234 // VT is the type of the vector to return, <[vscale x ]n x ty>
4235 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4236 static SDValue
getDeinterleaveViaVNSRL(const SDLoc
&DL
, MVT VT
, SDValue Src
,
4238 const RISCVSubtarget
&Subtarget
,
4239 SelectionDAG
&DAG
) {
4240 // The result is a vector of type <m x n x ty>
4241 MVT ContainerVT
= VT
;
4242 // Convert fixed vectors to scalable if needed
4243 if (ContainerVT
.isFixedLengthVector()) {
4244 assert(Src
.getSimpleValueType().isFixedLengthVector());
4245 ContainerVT
= getContainerForFixedLengthVector(DAG
, ContainerVT
, Subtarget
);
4247 // The source is a vector of type <m x n*2 x ty>
4248 MVT SrcContainerVT
=
4249 MVT::getVectorVT(ContainerVT
.getVectorElementType(),
4250 ContainerVT
.getVectorElementCount() * 2);
4251 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
4254 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4256 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4257 // This also converts FP to int.
4258 unsigned EltBits
= ContainerVT
.getScalarSizeInBits();
4259 MVT WideSrcContainerVT
= MVT::getVectorVT(
4260 MVT::getIntegerVT(EltBits
* 2), ContainerVT
.getVectorElementCount());
4261 Src
= DAG
.getBitcast(WideSrcContainerVT
, Src
);
4263 // The integer version of the container type.
4264 MVT IntContainerVT
= ContainerVT
.changeVectorElementTypeToInteger();
4266 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4267 // the original element size.
4268 unsigned Shift
= EvenElts
? 0 : EltBits
;
4269 SDValue SplatShift
= DAG
.getNode(
4270 RISCVISD::VMV_V_X_VL
, DL
, IntContainerVT
, DAG
.getUNDEF(ContainerVT
),
4271 DAG
.getConstant(Shift
, DL
, Subtarget
.getXLenVT()), VL
);
4273 DAG
.getNode(RISCVISD::VNSRL_VL
, DL
, IntContainerVT
, Src
, SplatShift
,
4274 DAG
.getUNDEF(IntContainerVT
), TrueMask
, VL
);
4275 // Cast back to FP if needed.
4276 Res
= DAG
.getBitcast(ContainerVT
, Res
);
4278 if (VT
.isFixedLengthVector())
4279 Res
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4283 // Lower the following shuffle to vslidedown.
4285 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
4286 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
4287 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4289 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
4290 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
4291 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
4292 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
4293 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
4294 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4295 static SDValue
lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc
&DL
, MVT VT
,
4296 SDValue V1
, SDValue V2
,
4298 const RISCVSubtarget
&Subtarget
,
4299 SelectionDAG
&DAG
) {
4300 auto findNonEXTRACT_SUBVECTORParent
=
4301 [](SDValue Parent
) -> std::pair
<SDValue
, uint64_t> {
4302 uint64_t Offset
= 0;
4303 while (Parent
.getOpcode() == ISD::EXTRACT_SUBVECTOR
&&
4304 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4305 // a scalable vector. But we don't want to match the case.
4306 Parent
.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4307 Offset
+= Parent
.getConstantOperandVal(1);
4308 Parent
= Parent
.getOperand(0);
4310 return std::make_pair(Parent
, Offset
);
4313 auto [V1Src
, V1IndexOffset
] = findNonEXTRACT_SUBVECTORParent(V1
);
4314 auto [V2Src
, V2IndexOffset
] = findNonEXTRACT_SUBVECTORParent(V2
);
4316 // Extracting from the same source.
4317 SDValue Src
= V1Src
;
4321 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4322 SmallVector
<int, 16> NewMask(Mask
);
4323 for (size_t i
= 0; i
!= NewMask
.size(); ++i
) {
4324 if (NewMask
[i
] == -1)
4327 if (static_cast<size_t>(NewMask
[i
]) < NewMask
.size()) {
4328 NewMask
[i
] = NewMask
[i
] + V1IndexOffset
;
4330 // Minus NewMask.size() is needed. Otherwise, the b case would be
4331 // <5,6,7,12> instead of <5,6,7,8>.
4332 NewMask
[i
] = NewMask
[i
] - NewMask
.size() + V2IndexOffset
;
4336 // First index must be known and non-zero. It will be used as the slidedown
4338 if (NewMask
[0] <= 0)
4341 // NewMask is also continuous.
4342 for (unsigned i
= 1; i
!= NewMask
.size(); ++i
)
4343 if (NewMask
[i
- 1] + 1 != NewMask
[i
])
4346 MVT XLenVT
= Subtarget
.getXLenVT();
4347 MVT SrcVT
= Src
.getSimpleValueType();
4348 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
4349 auto [TrueMask
, VL
] = getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
4351 getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
4352 convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
),
4353 DAG
.getConstant(NewMask
[0], DL
, XLenVT
), TrueMask
, VL
);
4355 ISD::EXTRACT_SUBVECTOR
, DL
, VT
,
4356 convertFromScalableVector(SrcVT
, Slidedown
, DAG
, Subtarget
),
4357 DAG
.getConstant(0, DL
, XLenVT
));
4360 // Because vslideup leaves the destination elements at the start intact, we can
4361 // use it to perform shuffles that insert subvectors:
4363 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4365 // vsetvli zero, 8, e8, mf2, ta, ma
4366 // vslideup.vi v8, v9, 4
4368 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4370 // vsetvli zero, 5, e8, mf2, tu, ma
4371 // vslideup.v1 v8, v9, 2
4372 static SDValue
lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc
&DL
, MVT VT
,
4373 SDValue V1
, SDValue V2
,
4375 const RISCVSubtarget
&Subtarget
,
4376 SelectionDAG
&DAG
) {
4377 unsigned NumElts
= VT
.getVectorNumElements();
4378 int NumSubElts
, Index
;
4379 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask
, NumElts
, NumSubElts
,
4383 bool OpsSwapped
= Mask
[Index
] < (int)NumElts
;
4384 SDValue InPlace
= OpsSwapped
? V2
: V1
;
4385 SDValue ToInsert
= OpsSwapped
? V1
: V2
;
4387 MVT XLenVT
= Subtarget
.getXLenVT();
4388 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4389 auto TrueMask
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
4390 // We slide up by the index that the subvector is being inserted at, and set
4391 // VL to the index + the number of elements being inserted.
4392 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
| RISCVII::MASK_AGNOSTIC
;
4393 // If the we're adding a suffix to the in place vector, i.e. inserting right
4394 // up to the very end of it, then we don't actually care about the tail.
4395 if (NumSubElts
+ Index
>= (int)NumElts
)
4396 Policy
|= RISCVII::TAIL_AGNOSTIC
;
4398 InPlace
= convertToScalableVector(ContainerVT
, InPlace
, DAG
, Subtarget
);
4399 ToInsert
= convertToScalableVector(ContainerVT
, ToInsert
, DAG
, Subtarget
);
4400 SDValue VL
= DAG
.getConstant(NumSubElts
+ Index
, DL
, XLenVT
);
4403 // If we're inserting into the lowest elements, use a tail undisturbed
4406 Res
= DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, ContainerVT
, InPlace
, ToInsert
,
4409 Res
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, InPlace
, ToInsert
,
4410 DAG
.getConstant(Index
, DL
, XLenVT
), TrueMask
, VL
, Policy
);
4411 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4414 /// Match v(f)slide1up/down idioms. These operations involve sliding
4415 /// N-1 elements to make room for an inserted scalar at one end.
4416 static SDValue
lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc
&DL
, MVT VT
,
4417 SDValue V1
, SDValue V2
,
4419 const RISCVSubtarget
&Subtarget
,
4420 SelectionDAG
&DAG
) {
4421 bool OpsSwapped
= false;
4422 if (!isa
<BuildVectorSDNode
>(V1
)) {
4423 if (!isa
<BuildVectorSDNode
>(V2
))
4428 SDValue Splat
= cast
<BuildVectorSDNode
>(V1
)->getSplatValue();
4432 // Return true if the mask could describe a slide of Mask.size() - 1
4433 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4434 auto isSlideMask
= [](ArrayRef
<int> Mask
, unsigned Base
, int Offset
) {
4435 const unsigned S
= (Offset
> 0) ? 0 : -Offset
;
4436 const unsigned E
= Mask
.size() - ((Offset
> 0) ? Offset
: 0);
4437 for (unsigned i
= S
; i
!= E
; ++i
)
4438 if (Mask
[i
] >= 0 && (unsigned)Mask
[i
] != Base
+ i
+ Offset
)
4443 const unsigned NumElts
= VT
.getVectorNumElements();
4444 bool IsVSlidedown
= isSlideMask(Mask
, OpsSwapped
? 0 : NumElts
, 1);
4445 if (!IsVSlidedown
&& !isSlideMask(Mask
, OpsSwapped
? 0 : NumElts
, -1))
4448 const int InsertIdx
= Mask
[IsVSlidedown
? (NumElts
- 1) : 0];
4449 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4450 if (InsertIdx
< 0 || InsertIdx
/ NumElts
!= (unsigned)OpsSwapped
)
4453 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4454 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4455 auto OpCode
= IsVSlidedown
?
4456 (VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
) :
4457 (VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
: RISCVISD::VSLIDE1UP_VL
);
4458 if (!VT
.isFloatingPoint())
4459 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), Splat
);
4460 auto Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
,
4461 DAG
.getUNDEF(ContainerVT
),
4462 convertToScalableVector(ContainerVT
, V2
, DAG
, Subtarget
),
4463 Splat
, TrueMask
, VL
);
4464 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
4467 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4468 // to create an interleaved vector of <[vscale x] n*2 x ty>.
4469 // This requires that the size of ty is less than the subtarget's maximum ELEN.
4470 static SDValue
getWideningInterleave(SDValue EvenV
, SDValue OddV
,
4471 const SDLoc
&DL
, SelectionDAG
&DAG
,
4472 const RISCVSubtarget
&Subtarget
) {
4473 MVT VecVT
= EvenV
.getSimpleValueType();
4474 MVT VecContainerVT
= VecVT
; // <vscale x n x ty>
4475 // Convert fixed vectors to scalable if needed
4476 if (VecContainerVT
.isFixedLengthVector()) {
4477 VecContainerVT
= getContainerForFixedLengthVector(DAG
, VecVT
, Subtarget
);
4478 EvenV
= convertToScalableVector(VecContainerVT
, EvenV
, DAG
, Subtarget
);
4479 OddV
= convertToScalableVector(VecContainerVT
, OddV
, DAG
, Subtarget
);
4482 assert(VecVT
.getScalarSizeInBits() < Subtarget
.getELen());
4484 // We're working with a vector of the same size as the resulting
4485 // interleaved vector, but with half the number of elements and
4486 // twice the SEW (Hence the restriction on not using the maximum
4489 MVT::getVectorVT(MVT::getIntegerVT(VecVT
.getScalarSizeInBits() * 2),
4490 VecVT
.getVectorElementCount());
4491 MVT WideContainerVT
= WideVT
; // <vscale x n x ty*2>
4492 if (WideContainerVT
.isFixedLengthVector())
4493 WideContainerVT
= getContainerForFixedLengthVector(DAG
, WideVT
, Subtarget
);
4495 // Bitcast the input vectors to integers in case they are FP
4496 VecContainerVT
= VecContainerVT
.changeTypeToInteger();
4497 EvenV
= DAG
.getBitcast(VecContainerVT
, EvenV
);
4498 OddV
= DAG
.getBitcast(VecContainerVT
, OddV
);
4500 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, VecContainerVT
, DL
, DAG
, Subtarget
);
4501 SDValue Passthru
= DAG
.getUNDEF(WideContainerVT
);
4503 SDValue Interleaved
;
4504 if (Subtarget
.hasStdExtZvbb()) {
4505 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4507 DAG
.getSplatVector(VecContainerVT
, DL
,
4508 DAG
.getConstant(VecVT
.getScalarSizeInBits(), DL
,
4509 Subtarget
.getXLenVT()));
4510 Interleaved
= DAG
.getNode(RISCVISD::VWSLL_VL
, DL
, WideContainerVT
, OddV
,
4511 OffsetVec
, Passthru
, Mask
, VL
);
4512 Interleaved
= DAG
.getNode(RISCVISD::VWADDU_W_VL
, DL
, WideContainerVT
,
4513 Interleaved
, EvenV
, Passthru
, Mask
, VL
);
4515 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4517 Interleaved
= DAG
.getNode(RISCVISD::VWADDU_VL
, DL
, WideContainerVT
, EvenV
,
4518 OddV
, Passthru
, Mask
, VL
);
4520 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4521 SDValue AllOnesVec
= DAG
.getSplatVector(
4522 VecContainerVT
, DL
, DAG
.getAllOnesConstant(DL
, Subtarget
.getXLenVT()));
4523 SDValue OddsMul
= DAG
.getNode(RISCVISD::VWMULU_VL
, DL
, WideContainerVT
,
4524 OddV
, AllOnesVec
, Passthru
, Mask
, VL
);
4526 // Add the two together so we get
4527 // (OddV * 0xff...ff) + (OddV + EvenV)
4528 // = (OddV * 0x100...00) + EvenV
4529 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4530 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4531 Interleaved
= DAG
.getNode(RISCVISD::ADD_VL
, DL
, WideContainerVT
,
4532 Interleaved
, OddsMul
, Passthru
, Mask
, VL
);
4535 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4536 MVT ResultContainerVT
= MVT::getVectorVT(
4537 VecVT
.getVectorElementType(), // Make sure to use original type
4538 VecContainerVT
.getVectorElementCount().multiplyCoefficientBy(2));
4539 Interleaved
= DAG
.getBitcast(ResultContainerVT
, Interleaved
);
4541 // Convert back to a fixed vector if needed
4543 MVT::getVectorVT(VecVT
.getVectorElementType(),
4544 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
4545 if (ResultVT
.isFixedLengthVector())
4547 convertFromScalableVector(ResultVT
, Interleaved
, DAG
, Subtarget
);
4552 // If we have a vector of bits that we want to reverse, we can use a vbrev on a
4553 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4554 static SDValue
lowerBitreverseShuffle(ShuffleVectorSDNode
*SVN
,
4556 const RISCVSubtarget
&Subtarget
) {
4558 MVT VT
= SVN
->getSimpleValueType(0);
4559 SDValue V
= SVN
->getOperand(0);
4560 unsigned NumElts
= VT
.getVectorNumElements();
4562 assert(VT
.getVectorElementType() == MVT::i1
);
4564 if (!ShuffleVectorInst::isReverseMask(SVN
->getMask(),
4565 SVN
->getMask().size()) ||
4566 !SVN
->getOperand(1).isUndef())
4569 unsigned ViaEltSize
= std::max((uint64_t)8, PowerOf2Ceil(NumElts
));
4570 EVT ViaVT
= EVT::getVectorVT(
4571 *DAG
.getContext(), EVT::getIntegerVT(*DAG
.getContext(), ViaEltSize
), 1);
4573 EVT::getVectorVT(*DAG
.getContext(), MVT::i1
, ViaVT
.getScalarSizeInBits());
4575 // If we don't have zvbb or the larger element type > ELEN, the operation will
4577 if (!Subtarget
.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE
,
4579 !Subtarget
.getTargetLowering()->isTypeLegal(ViaBitVT
))
4582 // If the bit vector doesn't fit exactly into the larger element type, we need
4583 // to insert it into the larger vector and then shift up the reversed bits
4584 // afterwards to get rid of the gap introduced.
4585 if (ViaEltSize
> NumElts
)
4586 V
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ViaBitVT
, DAG
.getUNDEF(ViaBitVT
),
4587 V
, DAG
.getVectorIdxConstant(0, DL
));
4590 DAG
.getNode(ISD::BITREVERSE
, DL
, ViaVT
, DAG
.getBitcast(ViaVT
, V
));
4592 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4594 if (ViaEltSize
> NumElts
)
4595 Res
= DAG
.getNode(ISD::SRL
, DL
, ViaVT
, Res
,
4596 DAG
.getConstant(ViaEltSize
- NumElts
, DL
, ViaVT
));
4598 Res
= DAG
.getBitcast(ViaBitVT
, Res
);
4600 if (ViaEltSize
> NumElts
)
4601 Res
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Res
,
4602 DAG
.getVectorIdxConstant(0, DL
));
4606 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4607 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4608 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4609 static SDValue
lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode
*SVN
,
4611 const RISCVSubtarget
&Subtarget
) {
4614 EVT VT
= SVN
->getValueType(0);
4615 unsigned NumElts
= VT
.getVectorNumElements();
4616 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
4617 unsigned NumSubElts
, RotateAmt
;
4618 if (!ShuffleVectorInst::isBitRotateMask(SVN
->getMask(), EltSizeInBits
, 2,
4619 NumElts
, NumSubElts
, RotateAmt
))
4621 MVT RotateVT
= MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits
* NumSubElts
),
4622 NumElts
/ NumSubElts
);
4624 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4625 if (!Subtarget
.getTargetLowering()->isTypeLegal(RotateVT
))
4628 SDValue Op
= DAG
.getBitcast(RotateVT
, SVN
->getOperand(0));
4631 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4632 // so canonicalize to vrev8.
4633 if (RotateVT
.getScalarType() == MVT::i16
&& RotateAmt
== 8)
4634 Rotate
= DAG
.getNode(ISD::BSWAP
, DL
, RotateVT
, Op
);
4636 Rotate
= DAG
.getNode(ISD::ROTL
, DL
, RotateVT
, Op
,
4637 DAG
.getConstant(RotateAmt
, DL
, RotateVT
));
4639 return DAG
.getBitcast(VT
, Rotate
);
4642 static SDValue
lowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
,
4643 const RISCVSubtarget
&Subtarget
) {
4644 SDValue V1
= Op
.getOperand(0);
4645 SDValue V2
= Op
.getOperand(1);
4647 MVT XLenVT
= Subtarget
.getXLenVT();
4648 MVT VT
= Op
.getSimpleValueType();
4649 unsigned NumElts
= VT
.getVectorNumElements();
4650 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
4652 if (VT
.getVectorElementType() == MVT::i1
) {
4653 // Lower to a vror.vi of a larger element type if possible before we promote
4655 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4657 if (SDValue V
= lowerBitreverseShuffle(SVN
, DAG
, Subtarget
))
4660 // Promote i1 shuffle to i8 shuffle.
4661 MVT WidenVT
= MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount());
4662 V1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, V1
);
4663 V2
= V2
.isUndef() ? DAG
.getUNDEF(WidenVT
)
4664 : DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, V2
);
4665 SDValue Shuffled
= DAG
.getVectorShuffle(WidenVT
, DL
, V1
, V2
, SVN
->getMask());
4666 return DAG
.getSetCC(DL
, VT
, Shuffled
, DAG
.getConstant(0, DL
, WidenVT
),
4670 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4672 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4674 if (SVN
->isSplat()) {
4675 const int Lane
= SVN
->getSplatIndex();
4677 MVT SVT
= VT
.getVectorElementType();
4679 // Turn splatted vector load into a strided load with an X0 stride.
4681 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4683 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4685 if (V
.getOpcode() == ISD::CONCAT_VECTORS
) {
4687 V
.getOperand(0).getSimpleValueType().getVectorNumElements();
4688 V
= V
.getOperand(Offset
/ OpElements
);
4689 Offset
%= OpElements
;
4692 // We need to ensure the load isn't atomic or volatile.
4693 if (ISD::isNormalLoad(V
.getNode()) && cast
<LoadSDNode
>(V
)->isSimple()) {
4694 auto *Ld
= cast
<LoadSDNode
>(V
);
4695 Offset
*= SVT
.getStoreSize();
4696 SDValue NewAddr
= DAG
.getMemBasePlusOffset(
4697 Ld
->getBasePtr(), TypeSize::getFixed(Offset
), DL
);
4699 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4700 if (SVT
.isInteger() && SVT
.bitsGT(XLenVT
)) {
4701 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
4703 DAG
.getTargetConstant(Intrinsic::riscv_vlse
, DL
, XLenVT
);
4704 SDValue Ops
[] = {Ld
->getChain(),
4706 DAG
.getUNDEF(ContainerVT
),
4708 DAG
.getRegister(RISCV::X0
, XLenVT
),
4710 SDValue NewLoad
= DAG
.getMemIntrinsicNode(
4711 ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, SVT
,
4712 DAG
.getMachineFunction().getMachineMemOperand(
4713 Ld
->getMemOperand(), Offset
, SVT
.getStoreSize()));
4714 DAG
.makeEquivalentMemoryOrdering(Ld
, NewLoad
);
4715 return convertFromScalableVector(VT
, NewLoad
, DAG
, Subtarget
);
4718 // Otherwise use a scalar load and splat. This will give the best
4719 // opportunity to fold a splat into the operation. ISel can turn it into
4720 // the x0 strided load if we aren't able to fold away the select.
4721 if (SVT
.isFloatingPoint())
4722 V
= DAG
.getLoad(SVT
, DL
, Ld
->getChain(), NewAddr
,
4723 Ld
->getPointerInfo().getWithOffset(Offset
),
4724 Ld
->getOriginalAlign(),
4725 Ld
->getMemOperand()->getFlags());
4727 V
= DAG
.getExtLoad(ISD::SEXTLOAD
, DL
, XLenVT
, Ld
->getChain(), NewAddr
,
4728 Ld
->getPointerInfo().getWithOffset(Offset
), SVT
,
4729 Ld
->getOriginalAlign(),
4730 Ld
->getMemOperand()->getFlags());
4731 DAG
.makeEquivalentMemoryOrdering(Ld
, V
);
4734 VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL
;
4736 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), V
, VL
);
4737 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
4740 V1
= convertToScalableVector(ContainerVT
, V1
, DAG
, Subtarget
);
4741 assert(Lane
< (int)NumElts
&& "Unexpected lane!");
4742 SDValue Gather
= DAG
.getNode(RISCVISD::VRGATHER_VX_VL
, DL
, ContainerVT
,
4743 V1
, DAG
.getConstant(Lane
, DL
, XLenVT
),
4744 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
4745 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
4749 ArrayRef
<int> Mask
= SVN
->getMask();
4752 lowerVECTOR_SHUFFLEAsVSlide1(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4756 lowerVECTOR_SHUFFLEAsVSlidedown(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4759 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
4761 if (Subtarget
.hasStdExtZvkb())
4762 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4765 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4766 // be undef which can be handled with a single SLIDEDOWN/UP.
4768 int Rotation
= isElementRotate(LoSrc
, HiSrc
, Mask
);
4772 LoV
= LoSrc
== 0 ? V1
: V2
;
4773 LoV
= convertToScalableVector(ContainerVT
, LoV
, DAG
, Subtarget
);
4776 HiV
= HiSrc
== 0 ? V1
: V2
;
4777 HiV
= convertToScalableVector(ContainerVT
, HiV
, DAG
, Subtarget
);
4780 // We found a rotation. We need to slide HiV down by Rotation. Then we need
4781 // to slide LoV up by (NumElts - Rotation).
4782 unsigned InvRotate
= NumElts
- Rotation
;
4784 SDValue Res
= DAG
.getUNDEF(ContainerVT
);
4786 // Even though we could use a smaller VL, don't to avoid a vsetivli
4788 Res
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, Res
, HiV
,
4789 DAG
.getConstant(Rotation
, DL
, XLenVT
), TrueMask
, VL
);
4792 Res
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Res
, LoV
,
4793 DAG
.getConstant(InvRotate
, DL
, XLenVT
), TrueMask
, VL
,
4794 RISCVII::TAIL_AGNOSTIC
);
4796 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4799 // If this is a deinterleave and we can widen the vector, then we can use
4800 // vnsrl to deinterleave.
4801 if (isDeinterleaveShuffle(VT
, ContainerVT
, V1
, V2
, Mask
, Subtarget
)) {
4802 return getDeinterleaveViaVNSRL(DL
, VT
, V1
.getOperand(0), Mask
[0] == 0,
4807 lowerVECTOR_SHUFFLEAsVSlideup(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4810 // Detect an interleave shuffle and lower to
4811 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4812 int EvenSrc
, OddSrc
;
4813 if (isInterleaveShuffle(Mask
, VT
, EvenSrc
, OddSrc
, Subtarget
)) {
4814 // Extract the halves of the vectors.
4815 MVT HalfVT
= VT
.getHalfNumVectorElementsVT();
4817 int Size
= Mask
.size();
4818 SDValue EvenV
, OddV
;
4819 assert(EvenSrc
>= 0 && "Undef source?");
4820 EvenV
= (EvenSrc
/ Size
) == 0 ? V1
: V2
;
4821 EvenV
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, HalfVT
, EvenV
,
4822 DAG
.getConstant(EvenSrc
% Size
, DL
, XLenVT
));
4824 assert(OddSrc
>= 0 && "Undef source?");
4825 OddV
= (OddSrc
/ Size
) == 0 ? V1
: V2
;
4826 OddV
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, HalfVT
, OddV
,
4827 DAG
.getConstant(OddSrc
% Size
, DL
, XLenVT
));
4829 return getWideningInterleave(EvenV
, OddV
, DL
, DAG
, Subtarget
);
4832 // Detect shuffles which can be re-expressed as vector selects; these are
4833 // shuffles in which each element in the destination is taken from an element
4834 // at the corresponding index in either source vectors.
4835 bool IsSelect
= all_of(enumerate(Mask
), [&](const auto &MaskIdx
) {
4836 int MaskIndex
= MaskIdx
.value();
4837 return MaskIndex
< 0 || MaskIdx
.index() == (unsigned)MaskIndex
% NumElts
;
4840 assert(!V1
.isUndef() && "Unexpected shuffle canonicalization");
4842 SmallVector
<SDValue
> MaskVals
;
4843 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4844 // merged with a second vrgather.
4845 SmallVector
<SDValue
> GatherIndicesLHS
, GatherIndicesRHS
;
4847 // By default we preserve the original operand order, and use a mask to
4848 // select LHS as true and RHS as false. However, since RVV vector selects may
4849 // feature splats but only on the LHS, we may choose to invert our mask and
4850 // instead select between RHS and LHS.
4851 bool SwapOps
= DAG
.isSplatValue(V2
) && !DAG
.isSplatValue(V1
);
4852 bool InvertMask
= IsSelect
== SwapOps
;
4854 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4856 DenseMap
<int, unsigned> LHSIndexCounts
, RHSIndexCounts
;
4858 // Now construct the mask that will be used by the vselect or blended
4859 // vrgather operation. For vrgathers, construct the appropriate indices into
4861 for (int MaskIndex
: Mask
) {
4862 bool SelectMaskVal
= (MaskIndex
< (int)NumElts
) ^ InvertMask
;
4863 MaskVals
.push_back(DAG
.getConstant(SelectMaskVal
, DL
, XLenVT
));
4865 bool IsLHSOrUndefIndex
= MaskIndex
< (int)NumElts
;
4866 GatherIndicesLHS
.push_back(IsLHSOrUndefIndex
&& MaskIndex
>= 0
4867 ? DAG
.getConstant(MaskIndex
, DL
, XLenVT
)
4868 : DAG
.getUNDEF(XLenVT
));
4869 GatherIndicesRHS
.push_back(
4870 IsLHSOrUndefIndex
? DAG
.getUNDEF(XLenVT
)
4871 : DAG
.getConstant(MaskIndex
- NumElts
, DL
, XLenVT
));
4872 if (IsLHSOrUndefIndex
&& MaskIndex
>= 0)
4873 ++LHSIndexCounts
[MaskIndex
];
4874 if (!IsLHSOrUndefIndex
)
4875 ++RHSIndexCounts
[MaskIndex
- NumElts
];
4881 std::swap(GatherIndicesLHS
, GatherIndicesRHS
);
4884 assert(MaskVals
.size() == NumElts
&& "Unexpected select-like shuffle");
4885 MVT MaskVT
= MVT::getVectorVT(MVT::i1
, NumElts
);
4886 SDValue SelectMask
= DAG
.getBuildVector(MaskVT
, DL
, MaskVals
);
4889 return DAG
.getNode(ISD::VSELECT
, DL
, VT
, SelectMask
, V1
, V2
);
4891 // We might be able to express the shuffle as a bitrotate. But even if we
4892 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
4893 // shifts and a vor will have a higher throughput than a vrgather.
4894 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4897 if (VT
.getScalarSizeInBits() == 8 && VT
.getVectorNumElements() > 256) {
4898 // On such a large vector we're unable to use i8 as the index type.
4899 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4900 // may involve vector splitting if we're already at LMUL=8, or our
4901 // user-supplied maximum fixed-length LMUL.
4905 unsigned GatherVXOpc
= RISCVISD::VRGATHER_VX_VL
;
4906 unsigned GatherVVOpc
= RISCVISD::VRGATHER_VV_VL
;
4907 MVT IndexVT
= VT
.changeTypeToInteger();
4908 // Since we can't introduce illegal index types at this stage, use i16 and
4909 // vrgatherei16 if the corresponding index type for plain vrgather is greater
4911 if (IndexVT
.getScalarType().bitsGT(XLenVT
)) {
4912 GatherVVOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
4913 IndexVT
= IndexVT
.changeVectorElementType(MVT::i16
);
4916 // If the mask allows, we can do all the index computation in 16 bits. This
4917 // requires less work and less register pressure at high LMUL, and creates
4918 // smaller constants which may be cheaper to materialize.
4919 if (IndexVT
.getScalarType().bitsGT(MVT::i16
) && isUInt
<16>(NumElts
- 1) &&
4920 (IndexVT
.getSizeInBits() / Subtarget
.getRealMinVLen()) > 1) {
4921 GatherVVOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
4922 IndexVT
= IndexVT
.changeVectorElementType(MVT::i16
);
4925 MVT IndexContainerVT
=
4926 ContainerVT
.changeVectorElementType(IndexVT
.getScalarType());
4929 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
4930 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
4931 if (SDValue SplatValue
= DAG
.getSplatValue(V1
, /*LegalTypes*/ true)) {
4932 Gather
= lowerScalarSplat(SDValue(), SplatValue
, VL
, ContainerVT
, DL
, DAG
,
4935 V1
= convertToScalableVector(ContainerVT
, V1
, DAG
, Subtarget
);
4936 // If only one index is used, we can use a "splat" vrgather.
4937 // TODO: We can splat the most-common index and fix-up any stragglers, if
4938 // that's beneficial.
4939 if (LHSIndexCounts
.size() == 1) {
4940 int SplatIndex
= LHSIndexCounts
.begin()->getFirst();
4941 Gather
= DAG
.getNode(GatherVXOpc
, DL
, ContainerVT
, V1
,
4942 DAG
.getConstant(SplatIndex
, DL
, XLenVT
),
4943 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
4945 SDValue LHSIndices
= DAG
.getBuildVector(IndexVT
, DL
, GatherIndicesLHS
);
4947 convertToScalableVector(IndexContainerVT
, LHSIndices
, DAG
, Subtarget
);
4949 Gather
= DAG
.getNode(GatherVVOpc
, DL
, ContainerVT
, V1
, LHSIndices
,
4950 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
4954 // If a second vector operand is used by this shuffle, blend it in with an
4955 // additional vrgather.
4956 if (!V2
.isUndef()) {
4957 V2
= convertToScalableVector(ContainerVT
, V2
, DAG
, Subtarget
);
4959 MVT MaskContainerVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
4961 convertToScalableVector(MaskContainerVT
, SelectMask
, DAG
, Subtarget
);
4963 // If only one index is used, we can use a "splat" vrgather.
4964 // TODO: We can splat the most-common index and fix-up any stragglers, if
4965 // that's beneficial.
4966 if (RHSIndexCounts
.size() == 1) {
4967 int SplatIndex
= RHSIndexCounts
.begin()->getFirst();
4968 Gather
= DAG
.getNode(GatherVXOpc
, DL
, ContainerVT
, V2
,
4969 DAG
.getConstant(SplatIndex
, DL
, XLenVT
), Gather
,
4972 SDValue RHSIndices
= DAG
.getBuildVector(IndexVT
, DL
, GatherIndicesRHS
);
4974 convertToScalableVector(IndexContainerVT
, RHSIndices
, DAG
, Subtarget
);
4975 Gather
= DAG
.getNode(GatherVVOpc
, DL
, ContainerVT
, V2
, RHSIndices
, Gather
,
4980 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
4983 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef
<int> M
, EVT VT
) const {
4984 // Support splats for any type. These should type legalize well.
4985 if (ShuffleVectorSDNode::isSplatMask(M
.data(), VT
))
4988 // Only support legal VTs for other shuffles for now.
4989 if (!isTypeLegal(VT
))
4992 MVT SVT
= VT
.getSimpleVT();
4994 // Not for i1 vectors.
4995 if (SVT
.getScalarType() == MVT::i1
)
4999 return (isElementRotate(Dummy1
, Dummy2
, M
) > 0) ||
5000 isInterleaveShuffle(M
, SVT
, Dummy1
, Dummy2
, Subtarget
);
5003 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5006 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op
,
5007 SelectionDAG
&DAG
) const {
5008 MVT VT
= Op
.getSimpleValueType();
5009 unsigned EltSize
= VT
.getScalarSizeInBits();
5010 SDValue Src
= Op
.getOperand(0);
5012 MVT ContainerVT
= VT
;
5015 if (Op
->isVPOpcode()) {
5016 Mask
= Op
.getOperand(1);
5017 if (VT
.isFixedLengthVector())
5018 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
5020 VL
= Op
.getOperand(2);
5023 // We choose FP type that can represent the value if possible. Otherwise, we
5024 // use rounding to zero conversion for correct exponent of the result.
5025 // TODO: Use f16 for i8 when possible?
5026 MVT FloatEltVT
= (EltSize
>= 32) ? MVT::f64
: MVT::f32
;
5027 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT
, VT
.getVectorElementCount())))
5028 FloatEltVT
= MVT::f32
;
5029 MVT FloatVT
= MVT::getVectorVT(FloatEltVT
, VT
.getVectorElementCount());
5031 // Legal types should have been checked in the RISCVTargetLowering
5033 // TODO: Splitting may make sense in some cases.
5034 assert(DAG
.getTargetLoweringInfo().isTypeLegal(FloatVT
) &&
5035 "Expected legal float type!");
5037 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5038 // The trailing zero count is equal to log2 of this single bit value.
5039 if (Op
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
) {
5040 SDValue Neg
= DAG
.getNegative(Src
, DL
, VT
);
5041 Src
= DAG
.getNode(ISD::AND
, DL
, VT
, Src
, Neg
);
5042 } else if (Op
.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF
) {
5043 SDValue Neg
= DAG
.getNode(ISD::VP_SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
),
5045 Src
= DAG
.getNode(ISD::VP_AND
, DL
, VT
, Src
, Neg
, Mask
, VL
);
5048 // We have a legal FP type, convert to it.
5050 if (FloatVT
.bitsGT(VT
)) {
5051 if (Op
->isVPOpcode())
5052 FloatVal
= DAG
.getNode(ISD::VP_UINT_TO_FP
, DL
, FloatVT
, Src
, Mask
, VL
);
5054 FloatVal
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, FloatVT
, Src
);
5056 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5057 if (VT
.isFixedLengthVector()) {
5058 ContainerVT
= getContainerForFixedLengthVector(VT
);
5059 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
5061 if (!Op
->isVPOpcode())
5062 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
5064 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, Subtarget
.getXLenVT());
5065 MVT ContainerFloatVT
=
5066 MVT::getVectorVT(FloatEltVT
, ContainerVT
.getVectorElementCount());
5067 FloatVal
= DAG
.getNode(RISCVISD::VFCVT_RM_F_XU_VL
, DL
, ContainerFloatVT
,
5068 Src
, Mask
, RTZRM
, VL
);
5069 if (VT
.isFixedLengthVector())
5070 FloatVal
= convertFromScalableVector(FloatVT
, FloatVal
, DAG
, Subtarget
);
5072 // Bitcast to integer and shift the exponent to the LSB.
5073 EVT IntVT
= FloatVT
.changeVectorElementTypeToInteger();
5074 SDValue Bitcast
= DAG
.getBitcast(IntVT
, FloatVal
);
5075 unsigned ShiftAmt
= FloatEltVT
== MVT::f64
? 52 : 23;
5078 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5079 if (Op
->isVPOpcode()) {
5080 Exp
= DAG
.getNode(ISD::VP_LSHR
, DL
, IntVT
, Bitcast
,
5081 DAG
.getConstant(ShiftAmt
, DL
, IntVT
), Mask
, VL
);
5082 Exp
= DAG
.getVPZExtOrTrunc(DL
, VT
, Exp
, Mask
, VL
);
5084 Exp
= DAG
.getNode(ISD::SRL
, DL
, IntVT
, Bitcast
,
5085 DAG
.getConstant(ShiftAmt
, DL
, IntVT
));
5086 if (IntVT
.bitsLT(VT
))
5087 Exp
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, Exp
);
5088 else if (IntVT
.bitsGT(VT
))
5089 Exp
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Exp
);
5092 // The exponent contains log2 of the value in biased form.
5093 unsigned ExponentBias
= FloatEltVT
== MVT::f64
? 1023 : 127;
5094 // For trailing zeros, we just need to subtract the bias.
5095 if (Op
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
)
5096 return DAG
.getNode(ISD::SUB
, DL
, VT
, Exp
,
5097 DAG
.getConstant(ExponentBias
, DL
, VT
));
5098 if (Op
.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF
)
5099 return DAG
.getNode(ISD::VP_SUB
, DL
, VT
, Exp
,
5100 DAG
.getConstant(ExponentBias
, DL
, VT
), Mask
, VL
);
5102 // For leading zeros, we need to remove the bias and convert from log2 to
5103 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5104 unsigned Adjust
= ExponentBias
+ (EltSize
- 1);
5106 if (Op
->isVPOpcode())
5107 Res
= DAG
.getNode(ISD::VP_SUB
, DL
, VT
, DAG
.getConstant(Adjust
, DL
, VT
), Exp
,
5110 Res
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(Adjust
, DL
, VT
), Exp
);
5112 // The above result with zero input equals to Adjust which is greater than
5113 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5114 if (Op
.getOpcode() == ISD::CTLZ
)
5115 Res
= DAG
.getNode(ISD::UMIN
, DL
, VT
, Res
, DAG
.getConstant(EltSize
, DL
, VT
));
5116 else if (Op
.getOpcode() == ISD::VP_CTLZ
)
5117 Res
= DAG
.getNode(ISD::VP_UMIN
, DL
, VT
, Res
,
5118 DAG
.getConstant(EltSize
, DL
, VT
), Mask
, VL
);
5122 // While RVV has alignment restrictions, we should always be able to load as a
5123 // legal equivalently-sized byte-typed vector instead. This method is
5124 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5125 // the load is already correctly-aligned, it returns SDValue().
5126 SDValue
RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op
,
5127 SelectionDAG
&DAG
) const {
5128 auto *Load
= cast
<LoadSDNode
>(Op
);
5129 assert(Load
&& Load
->getMemoryVT().isVector() && "Expected vector load");
5131 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
5132 Load
->getMemoryVT(),
5133 *Load
->getMemOperand()))
5137 MVT VT
= Op
.getSimpleValueType();
5138 unsigned EltSizeBits
= VT
.getScalarSizeInBits();
5139 assert((EltSizeBits
== 16 || EltSizeBits
== 32 || EltSizeBits
== 64) &&
5140 "Unexpected unaligned RVV load type");
5142 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount() * (EltSizeBits
/ 8));
5143 assert(NewVT
.isValid() &&
5144 "Expecting equally-sized RVV vector types to be legal");
5145 SDValue L
= DAG
.getLoad(NewVT
, DL
, Load
->getChain(), Load
->getBasePtr(),
5146 Load
->getPointerInfo(), Load
->getOriginalAlign(),
5147 Load
->getMemOperand()->getFlags());
5148 return DAG
.getMergeValues({DAG
.getBitcast(VT
, L
), L
.getValue(1)}, DL
);
5151 // While RVV has alignment restrictions, we should always be able to store as a
5152 // legal equivalently-sized byte-typed vector instead. This method is
5153 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5154 // returns SDValue() if the store is already correctly aligned.
5155 SDValue
RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op
,
5156 SelectionDAG
&DAG
) const {
5157 auto *Store
= cast
<StoreSDNode
>(Op
);
5158 assert(Store
&& Store
->getValue().getValueType().isVector() &&
5159 "Expected vector store");
5161 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
5162 Store
->getMemoryVT(),
5163 *Store
->getMemOperand()))
5167 SDValue StoredVal
= Store
->getValue();
5168 MVT VT
= StoredVal
.getSimpleValueType();
5169 unsigned EltSizeBits
= VT
.getScalarSizeInBits();
5170 assert((EltSizeBits
== 16 || EltSizeBits
== 32 || EltSizeBits
== 64) &&
5171 "Unexpected unaligned RVV store type");
5173 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount() * (EltSizeBits
/ 8));
5174 assert(NewVT
.isValid() &&
5175 "Expecting equally-sized RVV vector types to be legal");
5176 StoredVal
= DAG
.getBitcast(NewVT
, StoredVal
);
5177 return DAG
.getStore(Store
->getChain(), DL
, StoredVal
, Store
->getBasePtr(),
5178 Store
->getPointerInfo(), Store
->getOriginalAlign(),
5179 Store
->getMemOperand()->getFlags());
5182 static SDValue
lowerConstant(SDValue Op
, SelectionDAG
&DAG
,
5183 const RISCVSubtarget
&Subtarget
) {
5184 assert(Op
.getValueType() == MVT::i64
&& "Unexpected VT");
5186 int64_t Imm
= cast
<ConstantSDNode
>(Op
)->getSExtValue();
5188 // All simm32 constants should be handled by isel.
5189 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5190 // this check redundant, but small immediates are common so this check
5191 // should have better compile time.
5195 // We only need to cost the immediate, if constant pool lowering is enabled.
5196 if (!Subtarget
.useConstantPoolForLargeInts())
5199 RISCVMatInt::InstSeq Seq
= RISCVMatInt::generateInstSeq(Imm
, Subtarget
);
5200 if (Seq
.size() <= Subtarget
.getMaxBuildIntsCost())
5203 // Optimizations below are disabled for opt size. If we're optimizing for
5204 // size, use a constant pool.
5205 if (DAG
.shouldOptForSize())
5208 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5209 // that if it will avoid a constant pool.
5210 // It will require an extra temporary register though.
5211 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5212 // low and high 32 bits are the same and bit 31 and 63 are set.
5213 unsigned ShiftAmt
, AddOpc
;
5214 RISCVMatInt::InstSeq SeqLo
=
5215 RISCVMatInt::generateTwoRegInstSeq(Imm
, Subtarget
, ShiftAmt
, AddOpc
);
5216 if (!SeqLo
.empty() && (SeqLo
.size() + 2) <= Subtarget
.getMaxBuildIntsCost())
5222 static SDValue
LowerATOMIC_FENCE(SDValue Op
, SelectionDAG
&DAG
,
5223 const RISCVSubtarget
&Subtarget
) {
5225 AtomicOrdering FenceOrdering
=
5226 static_cast<AtomicOrdering
>(Op
.getConstantOperandVal(1));
5227 SyncScope::ID FenceSSID
=
5228 static_cast<SyncScope::ID
>(Op
.getConstantOperandVal(2));
5230 if (Subtarget
.hasStdExtZtso()) {
5231 // The only fence that needs an instruction is a sequentially-consistent
5232 // cross-thread fence.
5233 if (FenceOrdering
== AtomicOrdering::SequentiallyConsistent
&&
5234 FenceSSID
== SyncScope::System
)
5237 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5238 return DAG
.getNode(ISD::MEMBARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
5241 // singlethread fences only synchronize with signal handlers on the same
5242 // thread and thus only need to preserve instruction order, not actually
5243 // enforce memory ordering.
5244 if (FenceSSID
== SyncScope::SingleThread
)
5245 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5246 return DAG
.getNode(ISD::MEMBARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
5251 SDValue
RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op
,
5252 SelectionDAG
&DAG
) const {
5254 MVT VT
= Op
.getSimpleValueType();
5255 MVT XLenVT
= Subtarget
.getXLenVT();
5256 unsigned Check
= Op
.getConstantOperandVal(1);
5257 unsigned TDCMask
= 0;
5259 TDCMask
|= RISCV::FPMASK_Signaling_NaN
;
5261 TDCMask
|= RISCV::FPMASK_Quiet_NaN
;
5262 if (Check
& fcPosInf
)
5263 TDCMask
|= RISCV::FPMASK_Positive_Infinity
;
5264 if (Check
& fcNegInf
)
5265 TDCMask
|= RISCV::FPMASK_Negative_Infinity
;
5266 if (Check
& fcPosNormal
)
5267 TDCMask
|= RISCV::FPMASK_Positive_Normal
;
5268 if (Check
& fcNegNormal
)
5269 TDCMask
|= RISCV::FPMASK_Negative_Normal
;
5270 if (Check
& fcPosSubnormal
)
5271 TDCMask
|= RISCV::FPMASK_Positive_Subnormal
;
5272 if (Check
& fcNegSubnormal
)
5273 TDCMask
|= RISCV::FPMASK_Negative_Subnormal
;
5274 if (Check
& fcPosZero
)
5275 TDCMask
|= RISCV::FPMASK_Positive_Zero
;
5276 if (Check
& fcNegZero
)
5277 TDCMask
|= RISCV::FPMASK_Negative_Zero
;
5279 bool IsOneBitMask
= isPowerOf2_32(TDCMask
);
5281 SDValue TDCMaskV
= DAG
.getConstant(TDCMask
, DL
, XLenVT
);
5283 if (VT
.isVector()) {
5284 SDValue Op0
= Op
.getOperand(0);
5285 MVT VT0
= Op
.getOperand(0).getSimpleValueType();
5287 if (VT
.isScalableVector()) {
5288 MVT DstVT
= VT0
.changeVectorElementTypeToInteger();
5289 auto [Mask
, VL
] = getDefaultScalableVLOps(VT0
, DL
, DAG
, Subtarget
);
5290 if (Op
.getOpcode() == ISD::VP_IS_FPCLASS
) {
5291 Mask
= Op
.getOperand(2);
5292 VL
= Op
.getOperand(3);
5294 SDValue FPCLASS
= DAG
.getNode(RISCVISD::FCLASS_VL
, DL
, DstVT
, Op0
, Mask
,
5295 VL
, Op
->getFlags());
5297 return DAG
.getSetCC(DL
, VT
, FPCLASS
,
5298 DAG
.getConstant(TDCMask
, DL
, DstVT
),
5299 ISD::CondCode::SETEQ
);
5300 SDValue AND
= DAG
.getNode(ISD::AND
, DL
, DstVT
, FPCLASS
,
5301 DAG
.getConstant(TDCMask
, DL
, DstVT
));
5302 return DAG
.getSetCC(DL
, VT
, AND
, DAG
.getConstant(0, DL
, DstVT
),
5306 MVT ContainerVT0
= getContainerForFixedLengthVector(VT0
);
5307 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
5308 MVT ContainerDstVT
= ContainerVT0
.changeVectorElementTypeToInteger();
5309 auto [Mask
, VL
] = getDefaultVLOps(VT0
, ContainerVT0
, DL
, DAG
, Subtarget
);
5310 if (Op
.getOpcode() == ISD::VP_IS_FPCLASS
) {
5311 Mask
= Op
.getOperand(2);
5312 MVT MaskContainerVT
=
5313 getContainerForFixedLengthVector(Mask
.getSimpleValueType());
5314 Mask
= convertToScalableVector(MaskContainerVT
, Mask
, DAG
, Subtarget
);
5315 VL
= Op
.getOperand(3);
5317 Op0
= convertToScalableVector(ContainerVT0
, Op0
, DAG
, Subtarget
);
5319 SDValue FPCLASS
= DAG
.getNode(RISCVISD::FCLASS_VL
, DL
, ContainerDstVT
, Op0
,
5320 Mask
, VL
, Op
->getFlags());
5322 TDCMaskV
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerDstVT
,
5323 DAG
.getUNDEF(ContainerDstVT
), TDCMaskV
, VL
);
5326 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, ContainerVT
,
5327 {FPCLASS
, TDCMaskV
, DAG
.getCondCode(ISD::SETEQ
),
5328 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5329 return convertFromScalableVector(VT
, VMSEQ
, DAG
, Subtarget
);
5331 SDValue AND
= DAG
.getNode(RISCVISD::AND_VL
, DL
, ContainerDstVT
, FPCLASS
,
5332 TDCMaskV
, DAG
.getUNDEF(ContainerDstVT
), Mask
, VL
);
5334 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
5335 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerDstVT
,
5336 DAG
.getUNDEF(ContainerDstVT
), SplatZero
, VL
);
5338 SDValue VMSNE
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, ContainerVT
,
5339 {AND
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
5340 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5341 return convertFromScalableVector(VT
, VMSNE
, DAG
, Subtarget
);
5344 SDValue FCLASS
= DAG
.getNode(RISCVISD::FCLASS
, DL
, XLenVT
, Op
.getOperand(0));
5345 SDValue AND
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, FCLASS
, TDCMaskV
);
5346 SDValue Res
= DAG
.getSetCC(DL
, XLenVT
, AND
, DAG
.getConstant(0, DL
, XLenVT
),
5347 ISD::CondCode::SETNE
);
5348 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Res
);
5351 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5352 // operations propagate nans.
5353 static SDValue
lowerFMAXIMUM_FMINIMUM(SDValue Op
, SelectionDAG
&DAG
,
5354 const RISCVSubtarget
&Subtarget
) {
5356 MVT VT
= Op
.getSimpleValueType();
5358 SDValue X
= Op
.getOperand(0);
5359 SDValue Y
= Op
.getOperand(1);
5361 if (!VT
.isVector()) {
5362 MVT XLenVT
= Subtarget
.getXLenVT();
5364 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5365 // ensures that when one input is a nan, the other will also be a nan
5366 // allowing the nan to propagate. If both inputs are nan, this will swap the
5367 // inputs which is harmless.
5370 if (!Op
->getFlags().hasNoNaNs() && !DAG
.isKnownNeverNaN(X
)) {
5371 SDValue XIsNonNan
= DAG
.getSetCC(DL
, XLenVT
, X
, X
, ISD::SETOEQ
);
5372 NewY
= DAG
.getSelect(DL
, VT
, XIsNonNan
, Y
, X
);
5376 if (!Op
->getFlags().hasNoNaNs() && !DAG
.isKnownNeverNaN(Y
)) {
5377 SDValue YIsNonNan
= DAG
.getSetCC(DL
, XLenVT
, Y
, Y
, ISD::SETOEQ
);
5378 NewX
= DAG
.getSelect(DL
, VT
, YIsNonNan
, X
, Y
);
5382 Op
.getOpcode() == ISD::FMAXIMUM
? RISCVISD::FMAX
: RISCVISD::FMIN
;
5383 return DAG
.getNode(Opc
, DL
, VT
, NewX
, NewY
);
5386 // Check no NaNs before converting to fixed vector scalable.
5387 bool XIsNeverNan
= Op
->getFlags().hasNoNaNs() || DAG
.isKnownNeverNaN(X
);
5388 bool YIsNeverNan
= Op
->getFlags().hasNoNaNs() || DAG
.isKnownNeverNaN(Y
);
5390 MVT ContainerVT
= VT
;
5391 if (VT
.isFixedLengthVector()) {
5392 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
5393 X
= convertToScalableVector(ContainerVT
, X
, DAG
, Subtarget
);
5394 Y
= convertToScalableVector(ContainerVT
, Y
, DAG
, Subtarget
);
5397 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
5401 SDValue XIsNonNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
5402 {X
, X
, DAG
.getCondCode(ISD::SETOEQ
),
5403 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5405 DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, XIsNonNan
, Y
, X
, VL
);
5410 SDValue YIsNonNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
5411 {Y
, Y
, DAG
.getCondCode(ISD::SETOEQ
),
5412 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5414 DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, YIsNonNan
, X
, Y
, VL
);
5418 Op
.getOpcode() == ISD::FMAXIMUM
? RISCVISD::VFMAX_VL
: RISCVISD::VFMIN_VL
;
5419 SDValue Res
= DAG
.getNode(Opc
, DL
, ContainerVT
, NewX
, NewY
,
5420 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
5421 if (VT
.isFixedLengthVector())
5422 Res
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
5426 /// Get a RISC-V target specified VL op for a given SDNode.
5427 static unsigned getRISCVVLOp(SDValue Op
) {
5428 #define OP_CASE(NODE) \
5430 return RISCVISD::NODE##_VL;
5431 #define VP_CASE(NODE) \
5432 case ISD::VP_##NODE: \
5433 return RISCVISD::NODE##_VL;
5435 switch (Op
.getOpcode()) {
5437 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5472 OP_CASE(STRICT_FADD
)
5473 OP_CASE(STRICT_FSUB
)
5474 OP_CASE(STRICT_FMUL
)
5475 OP_CASE(STRICT_FDIV
)
5476 OP_CASE(STRICT_FSQRT
)
5477 VP_CASE(ADD
) // VP_ADD
5478 VP_CASE(SUB
) // VP_SUB
5479 VP_CASE(MUL
) // VP_MUL
5480 VP_CASE(SDIV
) // VP_SDIV
5481 VP_CASE(SREM
) // VP_SREM
5482 VP_CASE(UDIV
) // VP_UDIV
5483 VP_CASE(UREM
) // VP_UREM
5484 VP_CASE(SHL
) // VP_SHL
5485 VP_CASE(FADD
) // VP_FADD
5486 VP_CASE(FSUB
) // VP_FSUB
5487 VP_CASE(FMUL
) // VP_FMUL
5488 VP_CASE(FDIV
) // VP_FDIV
5489 VP_CASE(FNEG
) // VP_FNEG
5490 VP_CASE(FABS
) // VP_FABS
5491 VP_CASE(SMIN
) // VP_SMIN
5492 VP_CASE(SMAX
) // VP_SMAX
5493 VP_CASE(UMIN
) // VP_UMIN
5494 VP_CASE(UMAX
) // VP_UMAX
5495 VP_CASE(FCOPYSIGN
) // VP_FCOPYSIGN
5496 VP_CASE(SETCC
) // VP_SETCC
5497 VP_CASE(SINT_TO_FP
) // VP_SINT_TO_FP
5498 VP_CASE(UINT_TO_FP
) // VP_UINT_TO_FP
5499 VP_CASE(BITREVERSE
) // VP_BITREVERSE
5500 VP_CASE(BSWAP
) // VP_BSWAP
5501 VP_CASE(CTLZ
) // VP_CTLZ
5502 VP_CASE(CTTZ
) // VP_CTTZ
5503 VP_CASE(CTPOP
) // VP_CTPOP
5504 case ISD::CTLZ_ZERO_UNDEF
:
5505 case ISD::VP_CTLZ_ZERO_UNDEF
:
5506 return RISCVISD::CTLZ_VL
;
5507 case ISD::CTTZ_ZERO_UNDEF
:
5508 case ISD::VP_CTTZ_ZERO_UNDEF
:
5509 return RISCVISD::CTTZ_VL
;
5512 return RISCVISD::VFMADD_VL
;
5513 case ISD::STRICT_FMA
:
5514 return RISCVISD::STRICT_VFMADD_VL
;
5517 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5518 return RISCVISD::VMAND_VL
;
5519 return RISCVISD::AND_VL
;
5522 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5523 return RISCVISD::VMOR_VL
;
5524 return RISCVISD::OR_VL
;
5527 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5528 return RISCVISD::VMXOR_VL
;
5529 return RISCVISD::XOR_VL
;
5530 case ISD::VP_SELECT
:
5531 return RISCVISD::VSELECT_VL
;
5533 return RISCVISD::VMERGE_VL
;
5535 return RISCVISD::SRA_VL
;
5537 return RISCVISD::SRL_VL
;
5539 return RISCVISD::FSQRT_VL
;
5540 case ISD::VP_SIGN_EXTEND
:
5541 return RISCVISD::VSEXT_VL
;
5542 case ISD::VP_ZERO_EXTEND
:
5543 return RISCVISD::VZEXT_VL
;
5544 case ISD::VP_FP_TO_SINT
:
5545 return RISCVISD::VFCVT_RTZ_X_F_VL
;
5546 case ISD::VP_FP_TO_UINT
:
5547 return RISCVISD::VFCVT_RTZ_XU_F_VL
;
5549 case ISD::VP_FMINNUM
:
5550 return RISCVISD::VFMIN_VL
;
5552 case ISD::VP_FMAXNUM
:
5553 return RISCVISD::VFMAX_VL
;
5560 /// Return true if a RISC-V target specified op has a merge operand.
5561 static bool hasMergeOp(unsigned Opcode
) {
5562 assert(Opcode
> RISCVISD::FIRST_NUMBER
&&
5563 Opcode
<= RISCVISD::LAST_RISCV_STRICTFP_OPCODE
&&
5564 "not a RISC-V target specific op");
5565 static_assert(RISCVISD::LAST_VL_VECTOR_OP
- RISCVISD::FIRST_VL_VECTOR_OP
==
5567 RISCVISD::LAST_RISCV_STRICTFP_OPCODE
-
5568 ISD::FIRST_TARGET_STRICTFP_OPCODE
==
5570 "adding target specific op should update this function");
5571 if (Opcode
>= RISCVISD::ADD_VL
&& Opcode
<= RISCVISD::VFMAX_VL
)
5573 if (Opcode
== RISCVISD::FCOPYSIGN_VL
)
5575 if (Opcode
>= RISCVISD::VWMUL_VL
&& Opcode
<= RISCVISD::VFWSUB_W_VL
)
5577 if (Opcode
== RISCVISD::SETCC_VL
)
5579 if (Opcode
>= RISCVISD::STRICT_FADD_VL
&& Opcode
<= RISCVISD::STRICT_FDIV_VL
)
5581 if (Opcode
== RISCVISD::VMERGE_VL
)
5586 /// Return true if a RISC-V target specified op has a mask operand.
5587 static bool hasMaskOp(unsigned Opcode
) {
5588 assert(Opcode
> RISCVISD::FIRST_NUMBER
&&
5589 Opcode
<= RISCVISD::LAST_RISCV_STRICTFP_OPCODE
&&
5590 "not a RISC-V target specific op");
5591 static_assert(RISCVISD::LAST_VL_VECTOR_OP
- RISCVISD::FIRST_VL_VECTOR_OP
==
5593 RISCVISD::LAST_RISCV_STRICTFP_OPCODE
-
5594 ISD::FIRST_TARGET_STRICTFP_OPCODE
==
5596 "adding target specific op should update this function");
5597 if (Opcode
>= RISCVISD::TRUNCATE_VECTOR_VL
&& Opcode
<= RISCVISD::SETCC_VL
)
5599 if (Opcode
>= RISCVISD::VRGATHER_VX_VL
&& Opcode
<= RISCVISD::VFIRST_VL
)
5601 if (Opcode
>= RISCVISD::STRICT_FADD_VL
&&
5602 Opcode
<= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL
)
5607 static SDValue
SplitVectorOp(SDValue Op
, SelectionDAG
&DAG
) {
5608 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(Op
.getValueType());
5611 SmallVector
<SDValue
, 4> LoOperands(Op
.getNumOperands());
5612 SmallVector
<SDValue
, 4> HiOperands(Op
.getNumOperands());
5614 for (unsigned j
= 0; j
!= Op
.getNumOperands(); ++j
) {
5615 if (!Op
.getOperand(j
).getValueType().isVector()) {
5616 LoOperands
[j
] = Op
.getOperand(j
);
5617 HiOperands
[j
] = Op
.getOperand(j
);
5620 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5621 DAG
.SplitVector(Op
.getOperand(j
), DL
);
5625 DAG
.getNode(Op
.getOpcode(), DL
, LoVT
, LoOperands
, Op
->getFlags());
5627 DAG
.getNode(Op
.getOpcode(), DL
, HiVT
, HiOperands
, Op
->getFlags());
5629 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, Op
.getValueType(), LoRes
, HiRes
);
5632 static SDValue
SplitVPOp(SDValue Op
, SelectionDAG
&DAG
) {
5633 assert(ISD::isVPOpcode(Op
.getOpcode()) && "Not a VP op");
5634 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(Op
.getValueType());
5637 SmallVector
<SDValue
, 4> LoOperands(Op
.getNumOperands());
5638 SmallVector
<SDValue
, 4> HiOperands(Op
.getNumOperands());
5640 for (unsigned j
= 0; j
!= Op
.getNumOperands(); ++j
) {
5641 if (ISD::getVPExplicitVectorLengthIdx(Op
.getOpcode()) == j
) {
5642 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5643 DAG
.SplitEVL(Op
.getOperand(j
), Op
.getValueType(), DL
);
5646 if (!Op
.getOperand(j
).getValueType().isVector()) {
5647 LoOperands
[j
] = Op
.getOperand(j
);
5648 HiOperands
[j
] = Op
.getOperand(j
);
5651 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5652 DAG
.SplitVector(Op
.getOperand(j
), DL
);
5656 DAG
.getNode(Op
.getOpcode(), DL
, LoVT
, LoOperands
, Op
->getFlags());
5658 DAG
.getNode(Op
.getOpcode(), DL
, HiVT
, HiOperands
, Op
->getFlags());
5660 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, Op
.getValueType(), LoRes
, HiRes
);
5663 static SDValue
SplitVectorReductionOp(SDValue Op
, SelectionDAG
&DAG
) {
5666 auto [Lo
, Hi
] = DAG
.SplitVector(Op
.getOperand(1), DL
);
5667 auto [MaskLo
, MaskHi
] = DAG
.SplitVector(Op
.getOperand(2), DL
);
5668 auto [EVLLo
, EVLHi
] =
5669 DAG
.SplitEVL(Op
.getOperand(3), Op
.getOperand(1).getValueType(), DL
);
5672 DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
5673 {Op
.getOperand(0), Lo
, MaskLo
, EVLLo
}, Op
->getFlags());
5674 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
5675 {ResLo
, Hi
, MaskHi
, EVLHi
}, Op
->getFlags());
5678 static SDValue
SplitStrictFPVectorOp(SDValue Op
, SelectionDAG
&DAG
) {
5680 assert(Op
->isStrictFPOpcode());
5682 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(Op
->getValueType(0));
5684 SDVTList LoVTs
= DAG
.getVTList(LoVT
, Op
->getValueType(1));
5685 SDVTList HiVTs
= DAG
.getVTList(HiVT
, Op
->getValueType(1));
5689 SmallVector
<SDValue
, 4> LoOperands(Op
.getNumOperands());
5690 SmallVector
<SDValue
, 4> HiOperands(Op
.getNumOperands());
5692 for (unsigned j
= 0; j
!= Op
.getNumOperands(); ++j
) {
5693 if (!Op
.getOperand(j
).getValueType().isVector()) {
5694 LoOperands
[j
] = Op
.getOperand(j
);
5695 HiOperands
[j
] = Op
.getOperand(j
);
5698 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5699 DAG
.SplitVector(Op
.getOperand(j
), DL
);
5703 DAG
.getNode(Op
.getOpcode(), DL
, LoVTs
, LoOperands
, Op
->getFlags());
5704 HiOperands
[0] = LoRes
.getValue(1);
5706 DAG
.getNode(Op
.getOpcode(), DL
, HiVTs
, HiOperands
, Op
->getFlags());
5708 SDValue V
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, Op
->getValueType(0),
5709 LoRes
.getValue(0), HiRes
.getValue(0));
5710 return DAG
.getMergeValues({V
, HiRes
.getValue(1)}, DL
);
5713 SDValue
RISCVTargetLowering::LowerOperation(SDValue Op
,
5714 SelectionDAG
&DAG
) const {
5715 switch (Op
.getOpcode()) {
5717 report_fatal_error("unimplemented operand");
5718 case ISD::ATOMIC_FENCE
:
5719 return LowerATOMIC_FENCE(Op
, DAG
, Subtarget
);
5720 case ISD::GlobalAddress
:
5721 return lowerGlobalAddress(Op
, DAG
);
5722 case ISD::BlockAddress
:
5723 return lowerBlockAddress(Op
, DAG
);
5724 case ISD::ConstantPool
:
5725 return lowerConstantPool(Op
, DAG
);
5726 case ISD::JumpTable
:
5727 return lowerJumpTable(Op
, DAG
);
5728 case ISD::GlobalTLSAddress
:
5729 return lowerGlobalTLSAddress(Op
, DAG
);
5731 return lowerConstant(Op
, DAG
, Subtarget
);
5733 return lowerSELECT(Op
, DAG
);
5735 return lowerBRCOND(Op
, DAG
);
5737 return lowerVASTART(Op
, DAG
);
5738 case ISD::FRAMEADDR
:
5739 return lowerFRAMEADDR(Op
, DAG
);
5740 case ISD::RETURNADDR
:
5741 return lowerRETURNADDR(Op
, DAG
);
5742 case ISD::SHL_PARTS
:
5743 return lowerShiftLeftParts(Op
, DAG
);
5744 case ISD::SRA_PARTS
:
5745 return lowerShiftRightParts(Op
, DAG
, true);
5746 case ISD::SRL_PARTS
:
5747 return lowerShiftRightParts(Op
, DAG
, false);
5750 if (Op
.getValueType().isFixedLengthVector()) {
5751 assert(Subtarget
.hasStdExtZvkb());
5752 return lowerToScalableOp(Op
, DAG
);
5754 assert(Subtarget
.hasVendorXTHeadBb() &&
5755 !(Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) &&
5756 "Unexpected custom legalization");
5757 // XTHeadBb only supports rotate by constant.
5758 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5761 case ISD::BITCAST
: {
5763 EVT VT
= Op
.getValueType();
5764 SDValue Op0
= Op
.getOperand(0);
5765 EVT Op0VT
= Op0
.getValueType();
5766 MVT XLenVT
= Subtarget
.getXLenVT();
5767 if (VT
== MVT::f16
&& Op0VT
== MVT::i16
&&
5768 Subtarget
.hasStdExtZfhminOrZhinxmin()) {
5769 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Op0
);
5770 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, MVT::f16
, NewOp0
);
5773 if (VT
== MVT::bf16
&& Op0VT
== MVT::i16
&&
5774 Subtarget
.hasStdExtZfbfmin()) {
5775 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Op0
);
5776 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, MVT::bf16
, NewOp0
);
5779 if (VT
== MVT::f32
&& Op0VT
== MVT::i32
&& Subtarget
.is64Bit() &&
5780 Subtarget
.hasStdExtFOrZfinx()) {
5781 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
5783 DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, NewOp0
);
5786 if (VT
== MVT::f64
&& Op0VT
== MVT::i64
&& XLenVT
== MVT::i32
&&
5787 Subtarget
.hasStdExtZfa()) {
5789 std::tie(Lo
, Hi
) = DAG
.SplitScalar(Op0
, DL
, MVT::i32
, MVT::i32
);
5791 DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
5795 // Consider other scalar<->scalar casts as legal if the types are legal.
5796 // Otherwise expand them.
5797 if (!VT
.isVector() && !Op0VT
.isVector()) {
5798 if (isTypeLegal(VT
) && isTypeLegal(Op0VT
))
5803 assert(!VT
.isScalableVector() && !Op0VT
.isScalableVector() &&
5804 "Unexpected types");
5806 if (VT
.isFixedLengthVector()) {
5807 // We can handle fixed length vector bitcasts with a simple replacement
5809 if (Op0VT
.isFixedLengthVector())
5811 // When bitcasting from scalar to fixed-length vector, insert the scalar
5812 // into a one-element vector of the result type, and perform a vector
5814 if (!Op0VT
.isVector()) {
5815 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), Op0VT
, 1);
5816 if (!isTypeLegal(BVT
))
5818 return DAG
.getBitcast(VT
, DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, BVT
,
5819 DAG
.getUNDEF(BVT
), Op0
,
5820 DAG
.getConstant(0, DL
, XLenVT
)));
5824 // Custom-legalize bitcasts from fixed-length vector types to scalar types
5825 // thus: bitcast the vector to a one-element vector type whose element type
5826 // is the same as the result type, and extract the first element.
5827 if (!VT
.isVector() && Op0VT
.isFixedLengthVector()) {
5828 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 1);
5829 if (!isTypeLegal(BVT
))
5831 SDValue BVec
= DAG
.getBitcast(BVT
, Op0
);
5832 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VT
, BVec
,
5833 DAG
.getConstant(0, DL
, XLenVT
));
5837 case ISD::INTRINSIC_WO_CHAIN
:
5838 return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
5839 case ISD::INTRINSIC_W_CHAIN
:
5840 return LowerINTRINSIC_W_CHAIN(Op
, DAG
);
5841 case ISD::INTRINSIC_VOID
:
5842 return LowerINTRINSIC_VOID(Op
, DAG
);
5843 case ISD::IS_FPCLASS
:
5844 return LowerIS_FPCLASS(Op
, DAG
);
5845 case ISD::BITREVERSE
: {
5846 MVT VT
= Op
.getSimpleValueType();
5847 if (VT
.isFixedLengthVector()) {
5848 assert(Subtarget
.hasStdExtZvbb());
5849 return lowerToScalableOp(Op
, DAG
);
5852 assert(Subtarget
.hasStdExtZbkb() && "Unexpected custom legalization");
5853 assert(Op
.getOpcode() == ISD::BITREVERSE
&& "Unexpected opcode");
5854 // Expand bitreverse to a bswap(rev8) followed by brev8.
5855 SDValue BSwap
= DAG
.getNode(ISD::BSWAP
, DL
, VT
, Op
.getOperand(0));
5856 return DAG
.getNode(RISCVISD::BREV8
, DL
, VT
, BSwap
);
5859 // Only custom-lower vector truncates
5860 if (!Op
.getSimpleValueType().isVector())
5862 return lowerVectorTruncLike(Op
, DAG
);
5863 case ISD::ANY_EXTEND
:
5864 case ISD::ZERO_EXTEND
:
5865 if (Op
.getOperand(0).getValueType().isVector() &&
5866 Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
5867 return lowerVectorMaskExt(Op
, DAG
, /*ExtVal*/ 1);
5868 return lowerFixedLengthVectorExtendToRVV(Op
, DAG
, RISCVISD::VZEXT_VL
);
5869 case ISD::SIGN_EXTEND
:
5870 if (Op
.getOperand(0).getValueType().isVector() &&
5871 Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
5872 return lowerVectorMaskExt(Op
, DAG
, /*ExtVal*/ -1);
5873 return lowerFixedLengthVectorExtendToRVV(Op
, DAG
, RISCVISD::VSEXT_VL
);
5874 case ISD::SPLAT_VECTOR_PARTS
:
5875 return lowerSPLAT_VECTOR_PARTS(Op
, DAG
);
5876 case ISD::INSERT_VECTOR_ELT
:
5877 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
5878 case ISD::EXTRACT_VECTOR_ELT
:
5879 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
5880 case ISD::SCALAR_TO_VECTOR
: {
5881 MVT VT
= Op
.getSimpleValueType();
5883 SDValue Scalar
= Op
.getOperand(0);
5884 if (VT
.getVectorElementType() == MVT::i1
) {
5885 MVT WideVT
= VT
.changeVectorElementType(MVT::i8
);
5886 SDValue V
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, DL
, WideVT
, Scalar
);
5887 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, V
);
5889 MVT ContainerVT
= VT
;
5890 if (VT
.isFixedLengthVector())
5891 ContainerVT
= getContainerForFixedLengthVector(VT
);
5892 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
5893 Scalar
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), Scalar
);
5894 SDValue V
= DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, ContainerVT
,
5895 DAG
.getUNDEF(ContainerVT
), Scalar
, VL
);
5896 if (VT
.isFixedLengthVector())
5897 V
= convertFromScalableVector(VT
, V
, DAG
, Subtarget
);
5901 MVT XLenVT
= Subtarget
.getXLenVT();
5902 MVT VT
= Op
.getSimpleValueType();
5904 SDValue Res
= DAG
.getNode(RISCVISD::READ_VLENB
, DL
, XLenVT
);
5905 // We define our scalable vector types for lmul=1 to use a 64 bit known
5906 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
5907 // vscale as VLENB / 8.
5908 static_assert(RISCV::RVVBitsPerBlock
== 64, "Unexpected bits per block!");
5909 if (Subtarget
.getRealMinVLen() < RISCV::RVVBitsPerBlock
)
5910 report_fatal_error("Support for VLEN==32 is incomplete.");
5911 // We assume VLENB is a multiple of 8. We manually choose the best shift
5912 // here because SimplifyDemandedBits isn't always able to simplify it.
5913 uint64_t Val
= Op
.getConstantOperandVal(0);
5914 if (isPowerOf2_64(Val
)) {
5915 uint64_t Log2
= Log2_64(Val
);
5917 Res
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
, Res
,
5918 DAG
.getConstant(3 - Log2
, DL
, VT
));
5920 Res
= DAG
.getNode(ISD::SHL
, DL
, XLenVT
, Res
,
5921 DAG
.getConstant(Log2
- 3, DL
, XLenVT
));
5922 } else if ((Val
% 8) == 0) {
5923 // If the multiplier is a multiple of 8, scale it down to avoid needing
5924 // to shift the VLENB value.
5925 Res
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, Res
,
5926 DAG
.getConstant(Val
/ 8, DL
, XLenVT
));
5928 SDValue VScale
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
, Res
,
5929 DAG
.getConstant(3, DL
, XLenVT
));
5930 Res
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, VScale
,
5931 DAG
.getConstant(Val
, DL
, XLenVT
));
5933 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Res
);
5936 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
5937 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
5938 if (Op
.getValueType() == MVT::f16
&& Subtarget
.is64Bit() &&
5939 Op
.getOperand(1).getValueType() == MVT::i32
) {
5941 SDValue Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op
.getOperand(0));
5943 DAG
.getNode(ISD::FPOWI
, DL
, MVT::f32
, Op0
, Op
.getOperand(1));
5944 return DAG
.getNode(ISD::FP_ROUND
, DL
, MVT::f16
, Powi
,
5945 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
5951 if (Op
.getValueType() == MVT::nxv32f16
&&
5952 (Subtarget
.hasVInstructionsF16Minimal() &&
5953 !Subtarget
.hasVInstructionsF16()))
5954 return SplitVectorOp(Op
, DAG
);
5955 return lowerFMAXIMUM_FMINIMUM(Op
, DAG
, Subtarget
);
5956 case ISD::FP_EXTEND
: {
5958 EVT VT
= Op
.getValueType();
5959 SDValue Op0
= Op
.getOperand(0);
5960 EVT Op0VT
= Op0
.getValueType();
5961 if (VT
== MVT::f32
&& Op0VT
== MVT::bf16
&& Subtarget
.hasStdExtZfbfmin())
5962 return DAG
.getNode(RISCVISD::FP_EXTEND_BF16
, DL
, MVT::f32
, Op0
);
5963 if (VT
== MVT::f64
&& Op0VT
== MVT::bf16
&& Subtarget
.hasStdExtZfbfmin()) {
5965 DAG
.getNode(RISCVISD::FP_EXTEND_BF16
, DL
, MVT::f32
, Op0
);
5966 return DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f64
, FloatVal
);
5969 if (!Op
.getValueType().isVector())
5971 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
5973 case ISD::FP_ROUND
: {
5975 EVT VT
= Op
.getValueType();
5976 SDValue Op0
= Op
.getOperand(0);
5977 EVT Op0VT
= Op0
.getValueType();
5978 if (VT
== MVT::bf16
&& Op0VT
== MVT::f32
&& Subtarget
.hasStdExtZfbfmin())
5979 return DAG
.getNode(RISCVISD::FP_ROUND_BF16
, DL
, MVT::bf16
, Op0
);
5980 if (VT
== MVT::bf16
&& Op0VT
== MVT::f64
&& Subtarget
.hasStdExtZfbfmin() &&
5981 Subtarget
.hasStdExtDOrZdinx()) {
5983 DAG
.getNode(ISD::FP_ROUND
, DL
, MVT::f32
, Op0
,
5984 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
5985 return DAG
.getNode(RISCVISD::FP_ROUND_BF16
, DL
, MVT::bf16
, FloatVal
);
5988 if (!Op
.getValueType().isVector())
5990 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
5992 case ISD::STRICT_FP_ROUND
:
5993 case ISD::STRICT_FP_EXTEND
:
5994 return lowerStrictFPExtendOrRoundLike(Op
, DAG
);
5995 case ISD::SINT_TO_FP
:
5996 case ISD::UINT_TO_FP
:
5997 if (Op
.getValueType().isVector() &&
5998 Op
.getValueType().getScalarType() == MVT::f16
&&
5999 (Subtarget
.hasVInstructionsF16Minimal() &&
6000 !Subtarget
.hasVInstructionsF16())) {
6001 if (Op
.getValueType() == MVT::nxv32f16
)
6002 return SplitVectorOp(Op
, DAG
);
6006 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount());
6007 SDValue NC
= DAG
.getNode(Op
.getOpcode(), DL
, NVT
, Op
->ops());
6009 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NC
,
6010 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6013 case ISD::FP_TO_SINT
:
6014 case ISD::FP_TO_UINT
:
6015 if (SDValue Op1
= Op
.getOperand(0);
6016 Op1
.getValueType().isVector() &&
6017 Op1
.getValueType().getScalarType() == MVT::f16
&&
6018 (Subtarget
.hasVInstructionsF16Minimal() &&
6019 !Subtarget
.hasVInstructionsF16())) {
6020 if (Op1
.getValueType() == MVT::nxv32f16
)
6021 return SplitVectorOp(Op
, DAG
);
6024 MVT NVT
= MVT::getVectorVT(MVT::f32
,
6025 Op1
.getValueType().getVectorElementCount());
6026 SDValue WidenVec
= DAG
.getNode(ISD::FP_EXTEND
, DL
, NVT
, Op1
);
6028 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(), WidenVec
);
6031 case ISD::STRICT_FP_TO_SINT
:
6032 case ISD::STRICT_FP_TO_UINT
:
6033 case ISD::STRICT_SINT_TO_FP
:
6034 case ISD::STRICT_UINT_TO_FP
: {
6035 // RVV can only do fp<->int conversions to types half/double the size as
6036 // the source. We custom-lower any conversions that do two hops into
6038 MVT VT
= Op
.getSimpleValueType();
6042 bool IsStrict
= Op
->isStrictFPOpcode();
6043 SDValue Src
= Op
.getOperand(0 + IsStrict
);
6044 MVT EltVT
= VT
.getVectorElementType();
6045 MVT SrcVT
= Src
.getSimpleValueType();
6046 MVT SrcEltVT
= SrcVT
.getVectorElementType();
6047 unsigned EltSize
= EltVT
.getSizeInBits();
6048 unsigned SrcEltSize
= SrcEltVT
.getSizeInBits();
6049 assert(isPowerOf2_32(EltSize
) && isPowerOf2_32(SrcEltSize
) &&
6050 "Unexpected vector element types");
6052 bool IsInt2FP
= SrcEltVT
.isInteger();
6053 // Widening conversions
6054 if (EltSize
> (2 * SrcEltSize
)) {
6056 // Do a regular integer sign/zero extension then convert to float.
6057 MVT IVecVT
= MVT::getVectorVT(MVT::getIntegerVT(EltSize
/ 2),
6058 VT
.getVectorElementCount());
6059 unsigned ExtOpcode
= (Op
.getOpcode() == ISD::UINT_TO_FP
||
6060 Op
.getOpcode() == ISD::STRICT_UINT_TO_FP
)
6063 SDValue Ext
= DAG
.getNode(ExtOpcode
, DL
, IVecVT
, Src
);
6065 return DAG
.getNode(Op
.getOpcode(), DL
, Op
->getVTList(),
6066 Op
.getOperand(0), Ext
);
6067 return DAG
.getNode(Op
.getOpcode(), DL
, VT
, Ext
);
6070 assert(SrcEltVT
== MVT::f16
&& "Unexpected FP_TO_[US]INT lowering");
6071 // Do one doubling fp_extend then complete the operation by converting
6073 MVT InterimFVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
6075 auto [FExt
, Chain
] =
6076 DAG
.getStrictFPExtendOrRound(Src
, Op
.getOperand(0), DL
, InterimFVT
);
6077 return DAG
.getNode(Op
.getOpcode(), DL
, Op
->getVTList(), Chain
, FExt
);
6079 SDValue FExt
= DAG
.getFPExtendOrRound(Src
, DL
, InterimFVT
);
6080 return DAG
.getNode(Op
.getOpcode(), DL
, VT
, FExt
);
6083 // Narrowing conversions
6084 if (SrcEltSize
> (2 * EltSize
)) {
6086 // One narrowing int_to_fp, then an fp_round.
6087 assert(EltVT
== MVT::f16
&& "Unexpected [US]_TO_FP lowering");
6088 MVT InterimFVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
6090 SDValue Int2FP
= DAG
.getNode(Op
.getOpcode(), DL
,
6091 DAG
.getVTList(InterimFVT
, MVT::Other
),
6092 Op
.getOperand(0), Src
);
6093 SDValue Chain
= Int2FP
.getValue(1);
6094 return DAG
.getStrictFPExtendOrRound(Int2FP
, Chain
, DL
, VT
).first
;
6096 SDValue Int2FP
= DAG
.getNode(Op
.getOpcode(), DL
, InterimFVT
, Src
);
6097 return DAG
.getFPExtendOrRound(Int2FP
, DL
, VT
);
6100 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6101 // representable by the integer, the result is poison.
6102 MVT IVecVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
6103 VT
.getVectorElementCount());
6106 DAG
.getNode(Op
.getOpcode(), DL
, DAG
.getVTList(IVecVT
, MVT::Other
),
6107 Op
.getOperand(0), Src
);
6108 SDValue Res
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FP2Int
);
6109 return DAG
.getMergeValues({Res
, FP2Int
.getValue(1)}, DL
);
6111 SDValue FP2Int
= DAG
.getNode(Op
.getOpcode(), DL
, IVecVT
, Src
);
6112 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FP2Int
);
6115 // Scalable vectors can exit here. Patterns will handle equally-sized
6116 // conversions halving/doubling ones.
6117 if (!VT
.isFixedLengthVector())
6120 // For fixed-length vectors we lower to a custom "VL" node.
6121 unsigned RVVOpc
= 0;
6122 switch (Op
.getOpcode()) {
6124 llvm_unreachable("Impossible opcode");
6125 case ISD::FP_TO_SINT
:
6126 RVVOpc
= RISCVISD::VFCVT_RTZ_X_F_VL
;
6128 case ISD::FP_TO_UINT
:
6129 RVVOpc
= RISCVISD::VFCVT_RTZ_XU_F_VL
;
6131 case ISD::SINT_TO_FP
:
6132 RVVOpc
= RISCVISD::SINT_TO_FP_VL
;
6134 case ISD::UINT_TO_FP
:
6135 RVVOpc
= RISCVISD::UINT_TO_FP_VL
;
6137 case ISD::STRICT_FP_TO_SINT
:
6138 RVVOpc
= RISCVISD::STRICT_VFCVT_RTZ_X_F_VL
;
6140 case ISD::STRICT_FP_TO_UINT
:
6141 RVVOpc
= RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL
;
6143 case ISD::STRICT_SINT_TO_FP
:
6144 RVVOpc
= RISCVISD::STRICT_SINT_TO_FP_VL
;
6146 case ISD::STRICT_UINT_TO_FP
:
6147 RVVOpc
= RISCVISD::STRICT_UINT_TO_FP_VL
;
6151 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
6152 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
6153 assert(ContainerVT
.getVectorElementCount() == SrcContainerVT
.getVectorElementCount() &&
6154 "Expected same element count");
6156 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
6158 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
6160 Src
= DAG
.getNode(RVVOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
),
6161 Op
.getOperand(0), Src
, Mask
, VL
);
6162 SDValue SubVec
= convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
6163 return DAG
.getMergeValues({SubVec
, Src
.getValue(1)}, DL
);
6165 Src
= DAG
.getNode(RVVOpc
, DL
, ContainerVT
, Src
, Mask
, VL
);
6166 return convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
6168 case ISD::FP_TO_SINT_SAT
:
6169 case ISD::FP_TO_UINT_SAT
:
6170 return lowerFP_TO_INT_SAT(Op
, DAG
, Subtarget
);
6171 case ISD::FP_TO_BF16
: {
6172 // Custom lower to ensure the libcall return is passed in an FPR on hard
6174 assert(!Subtarget
.isSoftFPABI() && "Unexpected custom legalization");
6176 MakeLibCallOptions CallOptions
;
6178 RTLIB::getFPROUND(Op
.getOperand(0).getValueType(), MVT::bf16
);
6180 makeLibCall(DAG
, LC
, MVT::f32
, Op
.getOperand(0), CallOptions
, DL
).first
;
6181 if (Subtarget
.is64Bit() && !RV64LegalI32
)
6182 return DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Res
);
6183 return DAG
.getBitcast(MVT::i32
, Res
);
6185 case ISD::BF16_TO_FP
: {
6186 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6187 MVT VT
= Op
.getSimpleValueType();
6190 ISD::SHL
, DL
, Op
.getOperand(0).getValueType(), Op
.getOperand(0),
6191 DAG
.getShiftAmountConstant(16, Op
.getOperand(0).getValueType(), DL
));
6192 SDValue Res
= Subtarget
.is64Bit()
6193 ? DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, Op
)
6194 : DAG
.getBitcast(MVT::f32
, Op
);
6195 // fp_extend if the target VT is bigger than f32.
6197 return DAG
.getNode(ISD::FP_EXTEND
, DL
, VT
, Res
);
6200 case ISD::FP_TO_FP16
: {
6201 // Custom lower to ensure the libcall return is passed in an FPR on hard
6203 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6205 MakeLibCallOptions CallOptions
;
6207 RTLIB::getFPROUND(Op
.getOperand(0).getValueType(), MVT::f16
);
6209 makeLibCall(DAG
, LC
, MVT::f32
, Op
.getOperand(0), CallOptions
, DL
).first
;
6210 if (Subtarget
.is64Bit() && !RV64LegalI32
)
6211 return DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Res
);
6212 return DAG
.getBitcast(MVT::i32
, Res
);
6214 case ISD::FP16_TO_FP
: {
6215 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6217 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6219 MakeLibCallOptions CallOptions
;
6220 SDValue Arg
= Subtarget
.is64Bit()
6221 ? DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
,
6223 : DAG
.getBitcast(MVT::f32
, Op
.getOperand(0));
6225 makeLibCall(DAG
, RTLIB::FPEXT_F16_F32
, MVT::f32
, Arg
, CallOptions
, DL
)
6232 case ISD::FNEARBYINT
:
6235 case ISD::FROUNDEVEN
:
6236 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6239 return lowerVectorXRINT(Op
, DAG
, Subtarget
);
6240 case ISD::VECREDUCE_ADD
:
6241 case ISD::VECREDUCE_UMAX
:
6242 case ISD::VECREDUCE_SMAX
:
6243 case ISD::VECREDUCE_UMIN
:
6244 case ISD::VECREDUCE_SMIN
:
6245 return lowerVECREDUCE(Op
, DAG
);
6246 case ISD::VECREDUCE_AND
:
6247 case ISD::VECREDUCE_OR
:
6248 case ISD::VECREDUCE_XOR
:
6249 if (Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
6250 return lowerVectorMaskVecReduction(Op
, DAG
, /*IsVP*/ false);
6251 return lowerVECREDUCE(Op
, DAG
);
6252 case ISD::VECREDUCE_FADD
:
6253 case ISD::VECREDUCE_SEQ_FADD
:
6254 case ISD::VECREDUCE_FMIN
:
6255 case ISD::VECREDUCE_FMAX
:
6256 return lowerFPVECREDUCE(Op
, DAG
);
6257 case ISD::VP_REDUCE_ADD
:
6258 case ISD::VP_REDUCE_UMAX
:
6259 case ISD::VP_REDUCE_SMAX
:
6260 case ISD::VP_REDUCE_UMIN
:
6261 case ISD::VP_REDUCE_SMIN
:
6262 case ISD::VP_REDUCE_FADD
:
6263 case ISD::VP_REDUCE_SEQ_FADD
:
6264 case ISD::VP_REDUCE_FMIN
:
6265 case ISD::VP_REDUCE_FMAX
:
6266 if (Op
.getOperand(1).getValueType() == MVT::nxv32f16
&&
6267 (Subtarget
.hasVInstructionsF16Minimal() &&
6268 !Subtarget
.hasVInstructionsF16()))
6269 return SplitVectorReductionOp(Op
, DAG
);
6270 return lowerVPREDUCE(Op
, DAG
);
6271 case ISD::VP_REDUCE_AND
:
6272 case ISD::VP_REDUCE_OR
:
6273 case ISD::VP_REDUCE_XOR
:
6274 if (Op
.getOperand(1).getValueType().getVectorElementType() == MVT::i1
)
6275 return lowerVectorMaskVecReduction(Op
, DAG
, /*IsVP*/ true);
6276 return lowerVPREDUCE(Op
, DAG
);
6278 MVT ContainerVT
= getContainerForFixedLengthVector(Op
.getSimpleValueType());
6279 return convertFromScalableVector(Op
.getSimpleValueType(),
6280 DAG
.getUNDEF(ContainerVT
), DAG
, Subtarget
);
6282 case ISD::INSERT_SUBVECTOR
:
6283 return lowerINSERT_SUBVECTOR(Op
, DAG
);
6284 case ISD::EXTRACT_SUBVECTOR
:
6285 return lowerEXTRACT_SUBVECTOR(Op
, DAG
);
6286 case ISD::VECTOR_DEINTERLEAVE
:
6287 return lowerVECTOR_DEINTERLEAVE(Op
, DAG
);
6288 case ISD::VECTOR_INTERLEAVE
:
6289 return lowerVECTOR_INTERLEAVE(Op
, DAG
);
6290 case ISD::STEP_VECTOR
:
6291 return lowerSTEP_VECTOR(Op
, DAG
);
6292 case ISD::VECTOR_REVERSE
:
6293 return lowerVECTOR_REVERSE(Op
, DAG
);
6294 case ISD::VECTOR_SPLICE
:
6295 return lowerVECTOR_SPLICE(Op
, DAG
);
6296 case ISD::BUILD_VECTOR
:
6297 return lowerBUILD_VECTOR(Op
, DAG
, Subtarget
);
6298 case ISD::SPLAT_VECTOR
:
6299 if (Op
.getValueType().getScalarType() == MVT::f16
&&
6300 (Subtarget
.hasVInstructionsF16Minimal() &&
6301 !Subtarget
.hasVInstructionsF16())) {
6302 if (Op
.getValueType() == MVT::nxv32f16
)
6303 return SplitVectorOp(Op
, DAG
);
6306 DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op
.getOperand(0));
6307 SDValue NewSplat
= DAG
.getNode(
6308 ISD::SPLAT_VECTOR
, DL
,
6309 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount()),
6311 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NewSplat
,
6312 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6314 if (Op
.getValueType().getVectorElementType() == MVT::i1
)
6315 return lowerVectorMaskSplat(Op
, DAG
);
6317 case ISD::VECTOR_SHUFFLE
:
6318 return lowerVECTOR_SHUFFLE(Op
, DAG
, Subtarget
);
6319 case ISD::CONCAT_VECTORS
: {
6320 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6321 // better than going through the stack, as the default expansion does.
6323 MVT VT
= Op
.getSimpleValueType();
6324 unsigned NumOpElts
=
6325 Op
.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6326 SDValue Vec
= DAG
.getUNDEF(VT
);
6327 for (const auto &OpIdx
: enumerate(Op
->ops())) {
6328 SDValue SubVec
= OpIdx
.value();
6329 // Don't insert undef subvectors.
6330 if (SubVec
.isUndef())
6332 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Vec
, SubVec
,
6333 DAG
.getIntPtrConstant(OpIdx
.index() * NumOpElts
, DL
));
6338 if (auto V
= expandUnalignedRVVLoad(Op
, DAG
))
6340 if (Op
.getValueType().isFixedLengthVector())
6341 return lowerFixedLengthVectorLoadToRVV(Op
, DAG
);
6344 if (auto V
= expandUnalignedRVVStore(Op
, DAG
))
6346 if (Op
.getOperand(1).getValueType().isFixedLengthVector())
6347 return lowerFixedLengthVectorStoreToRVV(Op
, DAG
);
6351 return lowerMaskedLoad(Op
, DAG
);
6354 return lowerMaskedStore(Op
, DAG
);
6355 case ISD::SELECT_CC
: {
6356 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6357 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6358 // into separate SETCC+SELECT just like LegalizeDAG.
6359 SDValue Tmp1
= Op
.getOperand(0);
6360 SDValue Tmp2
= Op
.getOperand(1);
6361 SDValue True
= Op
.getOperand(2);
6362 SDValue False
= Op
.getOperand(3);
6363 EVT VT
= Op
.getValueType();
6364 SDValue CC
= Op
.getOperand(4);
6365 EVT CmpVT
= Tmp1
.getValueType();
6367 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), CmpVT
);
6370 DAG
.getNode(ISD::SETCC
, DL
, CCVT
, Tmp1
, Tmp2
, CC
, Op
->getFlags());
6371 return DAG
.getSelect(DL
, VT
, Cond
, True
, False
);
6374 MVT OpVT
= Op
.getOperand(0).getSimpleValueType();
6375 if (OpVT
.isScalarInteger()) {
6376 MVT VT
= Op
.getSimpleValueType();
6377 SDValue LHS
= Op
.getOperand(0);
6378 SDValue RHS
= Op
.getOperand(1);
6379 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
6380 assert((CCVal
== ISD::SETGT
|| CCVal
== ISD::SETUGT
) &&
6381 "Unexpected CondCode");
6385 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6386 // convert this to the equivalent of (set(u)ge X, C+1) by using
6387 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6389 if (isa
<ConstantSDNode
>(RHS
)) {
6390 int64_t Imm
= cast
<ConstantSDNode
>(RHS
)->getSExtValue();
6391 if (Imm
!= 0 && isInt
<12>((uint64_t)Imm
+ 1)) {
6392 // If this is an unsigned compare and the constant is -1, incrementing
6393 // the constant would change behavior. The result should be false.
6394 if (CCVal
== ISD::SETUGT
&& Imm
== -1)
6395 return DAG
.getConstant(0, DL
, VT
);
6396 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6397 CCVal
= ISD::getSetCCSwappedOperands(CCVal
);
6398 SDValue SetCC
= DAG
.getSetCC(
6399 DL
, VT
, LHS
, DAG
.getConstant(Imm
+ 1, DL
, OpVT
), CCVal
);
6400 return DAG
.getLogicalNOT(DL
, SetCC
, VT
);
6404 // Not a constant we could handle, swap the operands and condition code to
6406 CCVal
= ISD::getSetCCSwappedOperands(CCVal
);
6407 return DAG
.getSetCC(DL
, VT
, RHS
, LHS
, CCVal
);
6410 if (Op
.getOperand(0).getSimpleValueType() == MVT::nxv32f16
&&
6411 (Subtarget
.hasVInstructionsF16Minimal() &&
6412 !Subtarget
.hasVInstructionsF16()))
6413 return SplitVectorOp(Op
, DAG
);
6415 return lowerFixedLengthVectorSetccToRVV(Op
, DAG
);
6431 return lowerToScalableOp(Op
, DAG
);
6435 if (Op
.getSimpleValueType().isFixedLengthVector())
6436 return lowerToScalableOp(Op
, DAG
);
6437 // This can be called for an i32 shift amount that needs to be promoted.
6438 assert(Op
.getOperand(1).getValueType() == MVT::i32
&& Subtarget
.is64Bit() &&
6439 "Unexpected custom legalisation");
6451 if (Op
.getValueType() == MVT::nxv32f16
&&
6452 (Subtarget
.hasVInstructionsF16Minimal() &&
6453 !Subtarget
.hasVInstructionsF16()))
6454 return SplitVectorOp(Op
, DAG
);
6464 return lowerToScalableOp(Op
, DAG
);
6467 return lowerABS(Op
, DAG
);
6469 case ISD::CTLZ_ZERO_UNDEF
:
6471 case ISD::CTTZ_ZERO_UNDEF
:
6472 if (Subtarget
.hasStdExtZvbb())
6473 return lowerToScalableOp(Op
, DAG
);
6474 assert(Op
.getOpcode() != ISD::CTTZ
);
6475 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6477 return lowerFixedLengthVectorSelectToRVV(Op
, DAG
);
6478 case ISD::FCOPYSIGN
:
6479 if (Op
.getValueType() == MVT::nxv32f16
&&
6480 (Subtarget
.hasVInstructionsF16Minimal() &&
6481 !Subtarget
.hasVInstructionsF16()))
6482 return SplitVectorOp(Op
, DAG
);
6483 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op
, DAG
);
6484 case ISD::STRICT_FADD
:
6485 case ISD::STRICT_FSUB
:
6486 case ISD::STRICT_FMUL
:
6487 case ISD::STRICT_FDIV
:
6488 case ISD::STRICT_FSQRT
:
6489 case ISD::STRICT_FMA
:
6490 if (Op
.getValueType() == MVT::nxv32f16
&&
6491 (Subtarget
.hasVInstructionsF16Minimal() &&
6492 !Subtarget
.hasVInstructionsF16()))
6493 return SplitStrictFPVectorOp(Op
, DAG
);
6494 return lowerToScalableOp(Op
, DAG
);
6495 case ISD::STRICT_FSETCC
:
6496 case ISD::STRICT_FSETCCS
:
6497 return lowerVectorStrictFSetcc(Op
, DAG
);
6498 case ISD::STRICT_FCEIL
:
6499 case ISD::STRICT_FRINT
:
6500 case ISD::STRICT_FFLOOR
:
6501 case ISD::STRICT_FTRUNC
:
6502 case ISD::STRICT_FNEARBYINT
:
6503 case ISD::STRICT_FROUND
:
6504 case ISD::STRICT_FROUNDEVEN
:
6505 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6507 case ISD::VP_GATHER
:
6508 return lowerMaskedGather(Op
, DAG
);
6510 case ISD::VP_SCATTER
:
6511 return lowerMaskedScatter(Op
, DAG
);
6512 case ISD::GET_ROUNDING
:
6513 return lowerGET_ROUNDING(Op
, DAG
);
6514 case ISD::SET_ROUNDING
:
6515 return lowerSET_ROUNDING(Op
, DAG
);
6516 case ISD::EH_DWARF_CFA
:
6517 return lowerEH_DWARF_CFA(Op
, DAG
);
6518 case ISD::VP_SELECT
:
6527 return lowerVPOp(Op
, DAG
);
6531 return lowerLogicVPOp(Op
, DAG
);
6540 case ISD::VP_FMINNUM
:
6541 case ISD::VP_FMAXNUM
:
6542 case ISD::VP_FCOPYSIGN
:
6543 if (Op
.getValueType() == MVT::nxv32f16
&&
6544 (Subtarget
.hasVInstructionsF16Minimal() &&
6545 !Subtarget
.hasVInstructionsF16()))
6546 return SplitVPOp(Op
, DAG
);
6551 return lowerVPOp(Op
, DAG
);
6552 case ISD::VP_IS_FPCLASS
:
6553 return LowerIS_FPCLASS(Op
, DAG
);
6554 case ISD::VP_SIGN_EXTEND
:
6555 case ISD::VP_ZERO_EXTEND
:
6556 if (Op
.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1
)
6557 return lowerVPExtMaskOp(Op
, DAG
);
6558 return lowerVPOp(Op
, DAG
);
6559 case ISD::VP_TRUNCATE
:
6560 return lowerVectorTruncLike(Op
, DAG
);
6561 case ISD::VP_FP_EXTEND
:
6562 case ISD::VP_FP_ROUND
:
6563 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
6564 case ISD::VP_SINT_TO_FP
:
6565 case ISD::VP_UINT_TO_FP
:
6566 if (Op
.getValueType().isVector() &&
6567 Op
.getValueType().getScalarType() == MVT::f16
&&
6568 (Subtarget
.hasVInstructionsF16Minimal() &&
6569 !Subtarget
.hasVInstructionsF16())) {
6570 if (Op
.getValueType() == MVT::nxv32f16
)
6571 return SplitVPOp(Op
, DAG
);
6575 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount());
6576 auto NC
= DAG
.getNode(Op
.getOpcode(), DL
, NVT
, Op
->ops());
6578 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NC
,
6579 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6582 case ISD::VP_FP_TO_SINT
:
6583 case ISD::VP_FP_TO_UINT
:
6584 if (SDValue Op1
= Op
.getOperand(0);
6585 Op1
.getValueType().isVector() &&
6586 Op1
.getValueType().getScalarType() == MVT::f16
&&
6587 (Subtarget
.hasVInstructionsF16Minimal() &&
6588 !Subtarget
.hasVInstructionsF16())) {
6589 if (Op1
.getValueType() == MVT::nxv32f16
)
6590 return SplitVPOp(Op
, DAG
);
6593 MVT NVT
= MVT::getVectorVT(MVT::f32
,
6594 Op1
.getValueType().getVectorElementCount());
6595 SDValue WidenVec
= DAG
.getNode(ISD::FP_EXTEND
, DL
, NVT
, Op1
);
6597 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
6598 {WidenVec
, Op
.getOperand(1), Op
.getOperand(2)});
6600 return lowerVPFPIntConvOp(Op
, DAG
);
6602 if (Op
.getOperand(0).getSimpleValueType() == MVT::nxv32f16
&&
6603 (Subtarget
.hasVInstructionsF16Minimal() &&
6604 !Subtarget
.hasVInstructionsF16()))
6605 return SplitVPOp(Op
, DAG
);
6606 if (Op
.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1
)
6607 return lowerVPSetCCMaskOp(Op
, DAG
);
6613 case ISD::VP_BITREVERSE
:
6615 return lowerVPOp(Op
, DAG
);
6617 case ISD::VP_CTLZ_ZERO_UNDEF
:
6618 if (Subtarget
.hasStdExtZvbb())
6619 return lowerVPOp(Op
, DAG
);
6620 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6622 case ISD::VP_CTTZ_ZERO_UNDEF
:
6623 if (Subtarget
.hasStdExtZvbb())
6624 return lowerVPOp(Op
, DAG
);
6625 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6627 return lowerVPOp(Op
, DAG
);
6628 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD
:
6629 return lowerVPStridedLoad(Op
, DAG
);
6630 case ISD::EXPERIMENTAL_VP_STRIDED_STORE
:
6631 return lowerVPStridedStore(Op
, DAG
);
6633 case ISD::VP_FFLOOR
:
6635 case ISD::VP_FNEARBYINT
:
6636 case ISD::VP_FROUND
:
6637 case ISD::VP_FROUNDEVEN
:
6638 case ISD::VP_FROUNDTOZERO
:
6639 if (Op
.getValueType() == MVT::nxv32f16
&&
6640 (Subtarget
.hasVInstructionsF16Minimal() &&
6641 !Subtarget
.hasVInstructionsF16()))
6642 return SplitVPOp(Op
, DAG
);
6643 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6644 case ISD::EXPERIMENTAL_VP_SPLICE
:
6645 return lowerVPSpliceExperimental(Op
, DAG
);
6646 case ISD::EXPERIMENTAL_VP_REVERSE
:
6647 return lowerVPReverseExperimental(Op
, DAG
);
6651 static SDValue
getTargetNode(GlobalAddressSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6652 SelectionDAG
&DAG
, unsigned Flags
) {
6653 return DAG
.getTargetGlobalAddress(N
->getGlobal(), DL
, Ty
, 0, Flags
);
6656 static SDValue
getTargetNode(BlockAddressSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6657 SelectionDAG
&DAG
, unsigned Flags
) {
6658 return DAG
.getTargetBlockAddress(N
->getBlockAddress(), Ty
, N
->getOffset(),
6662 static SDValue
getTargetNode(ConstantPoolSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6663 SelectionDAG
&DAG
, unsigned Flags
) {
6664 return DAG
.getTargetConstantPool(N
->getConstVal(), Ty
, N
->getAlign(),
6665 N
->getOffset(), Flags
);
6668 static SDValue
getTargetNode(JumpTableSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6669 SelectionDAG
&DAG
, unsigned Flags
) {
6670 return DAG
.getTargetJumpTable(N
->getIndex(), Ty
, Flags
);
6673 template <class NodeTy
>
6674 SDValue
RISCVTargetLowering::getAddr(NodeTy
*N
, SelectionDAG
&DAG
,
6675 bool IsLocal
, bool IsExternWeak
) const {
6677 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6679 // When HWASAN is used and tagging of global variables is enabled
6680 // they should be accessed via the GOT, since the tagged address of a global
6681 // is incompatible with existing code models. This also applies to non-pic
6683 if (isPositionIndependent() || Subtarget
.allowTaggedGlobals()) {
6684 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
6685 if (IsLocal
&& !Subtarget
.allowTaggedGlobals())
6686 // Use PC-relative addressing to access the symbol. This generates the
6687 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
6688 // %pcrel_lo(auipc)).
6689 return DAG
.getNode(RISCVISD::LLA
, DL
, Ty
, Addr
);
6691 // Use PC-relative addressing to access the GOT for this symbol, then load
6692 // the address from the GOT. This generates the pattern (PseudoLGA sym),
6693 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6695 SDValue(DAG
.getMachineNode(RISCV::PseudoLGA
, DL
, Ty
, Addr
), 0);
6696 MachineFunction
&MF
= DAG
.getMachineFunction();
6697 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6698 MachinePointerInfo::getGOT(MF
),
6699 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6700 MachineMemOperand::MOInvariant
,
6701 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6702 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6706 switch (getTargetMachine().getCodeModel()) {
6708 report_fatal_error("Unsupported code model for lowering");
6709 case CodeModel::Small
: {
6710 // Generate a sequence for accessing addresses within the first 2 GiB of
6711 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
6712 SDValue AddrHi
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_HI
);
6713 SDValue AddrLo
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_LO
);
6714 SDValue MNHi
= DAG
.getNode(RISCVISD::HI
, DL
, Ty
, AddrHi
);
6715 return DAG
.getNode(RISCVISD::ADD_LO
, DL
, Ty
, MNHi
, AddrLo
);
6717 case CodeModel::Medium
: {
6718 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
6720 // An extern weak symbol may be undefined, i.e. have value 0, which may
6721 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
6722 // symbol. This generates the pattern (PseudoLGA sym), which expands to
6723 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6725 SDValue(DAG
.getMachineNode(RISCV::PseudoLGA
, DL
, Ty
, Addr
), 0);
6726 MachineFunction
&MF
= DAG
.getMachineFunction();
6727 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6728 MachinePointerInfo::getGOT(MF
),
6729 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6730 MachineMemOperand::MOInvariant
,
6731 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6732 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6736 // Generate a sequence for accessing addresses within any 2GiB range within
6737 // the address space. This generates the pattern (PseudoLLA sym), which
6738 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
6739 return DAG
.getNode(RISCVISD::LLA
, DL
, Ty
, Addr
);
6744 SDValue
RISCVTargetLowering::lowerGlobalAddress(SDValue Op
,
6745 SelectionDAG
&DAG
) const {
6746 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
6747 assert(N
->getOffset() == 0 && "unexpected offset in global node");
6748 const GlobalValue
*GV
= N
->getGlobal();
6749 return getAddr(N
, DAG
, GV
->isDSOLocal(), GV
->hasExternalWeakLinkage());
6752 SDValue
RISCVTargetLowering::lowerBlockAddress(SDValue Op
,
6753 SelectionDAG
&DAG
) const {
6754 BlockAddressSDNode
*N
= cast
<BlockAddressSDNode
>(Op
);
6756 return getAddr(N
, DAG
);
6759 SDValue
RISCVTargetLowering::lowerConstantPool(SDValue Op
,
6760 SelectionDAG
&DAG
) const {
6761 ConstantPoolSDNode
*N
= cast
<ConstantPoolSDNode
>(Op
);
6763 return getAddr(N
, DAG
);
6766 SDValue
RISCVTargetLowering::lowerJumpTable(SDValue Op
,
6767 SelectionDAG
&DAG
) const {
6768 JumpTableSDNode
*N
= cast
<JumpTableSDNode
>(Op
);
6770 return getAddr(N
, DAG
);
6773 SDValue
RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode
*N
,
6775 bool UseGOT
) const {
6777 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6778 const GlobalValue
*GV
= N
->getGlobal();
6779 MVT XLenVT
= Subtarget
.getXLenVT();
6782 // Use PC-relative addressing to access the GOT for this TLS symbol, then
6783 // load the address from the GOT and add the thread pointer. This generates
6784 // the pattern (PseudoLA_TLS_IE sym), which expands to
6785 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
6786 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
6788 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_IE
, DL
, Ty
, Addr
), 0);
6789 MachineFunction
&MF
= DAG
.getMachineFunction();
6790 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6791 MachinePointerInfo::getGOT(MF
),
6792 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6793 MachineMemOperand::MOInvariant
,
6794 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6795 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6797 // Add the thread pointer.
6798 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
6799 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Load
, TPReg
);
6802 // Generate a sequence for accessing the address relative to the thread
6803 // pointer, with the appropriate adjustment for the thread pointer offset.
6804 // This generates the pattern
6805 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
6807 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_HI
);
6809 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_ADD
);
6811 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_LO
);
6813 SDValue MNHi
= DAG
.getNode(RISCVISD::HI
, DL
, Ty
, AddrHi
);
6814 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
6816 DAG
.getNode(RISCVISD::ADD_TPREL
, DL
, Ty
, MNHi
, TPReg
, AddrAdd
);
6817 return DAG
.getNode(RISCVISD::ADD_LO
, DL
, Ty
, MNAdd
, AddrLo
);
6820 SDValue
RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode
*N
,
6821 SelectionDAG
&DAG
) const {
6823 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6824 IntegerType
*CallTy
= Type::getIntNTy(*DAG
.getContext(), Ty
.getSizeInBits());
6825 const GlobalValue
*GV
= N
->getGlobal();
6827 // Use a PC-relative addressing mode to access the global dynamic GOT address.
6828 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
6829 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
6830 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
6832 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_GD
, DL
, Ty
, Addr
), 0);
6834 // Prepare argument list to generate call.
6839 Args
.push_back(Entry
);
6841 // Setup call to __tls_get_addr.
6842 TargetLowering::CallLoweringInfo
CLI(DAG
);
6844 .setChain(DAG
.getEntryNode())
6845 .setLibCallee(CallingConv::C
, CallTy
,
6846 DAG
.getExternalSymbol("__tls_get_addr", Ty
),
6849 return LowerCallTo(CLI
).first
;
6852 SDValue
RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op
,
6853 SelectionDAG
&DAG
) const {
6854 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
6855 assert(N
->getOffset() == 0 && "unexpected offset in global node");
6857 if (DAG
.getTarget().useEmulatedTLS())
6858 return LowerToTLSEmulatedModel(N
, DAG
);
6860 TLSModel::Model Model
= getTargetMachine().getTLSModel(N
->getGlobal());
6862 if (DAG
.getMachineFunction().getFunction().getCallingConv() ==
6864 report_fatal_error("In GHC calling convention TLS is not supported");
6868 case TLSModel::LocalExec
:
6869 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/false);
6871 case TLSModel::InitialExec
:
6872 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/true);
6874 case TLSModel::LocalDynamic
:
6875 case TLSModel::GeneralDynamic
:
6876 Addr
= getDynamicTLSAddr(N
, DAG
);
6883 // Return true if Val is equal to (setcc LHS, RHS, CC).
6884 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
6885 // Otherwise, return std::nullopt.
6886 static std::optional
<bool> matchSetCC(SDValue LHS
, SDValue RHS
,
6887 ISD::CondCode CC
, SDValue Val
) {
6888 assert(Val
->getOpcode() == ISD::SETCC
);
6889 SDValue LHS2
= Val
.getOperand(0);
6890 SDValue RHS2
= Val
.getOperand(1);
6891 ISD::CondCode CC2
= cast
<CondCodeSDNode
>(Val
.getOperand(2))->get();
6893 if (LHS
== LHS2
&& RHS
== RHS2
) {
6896 if (CC
== ISD::getSetCCInverse(CC2
, LHS2
.getValueType()))
6898 } else if (LHS
== RHS2
&& RHS
== LHS2
) {
6899 CC2
= ISD::getSetCCSwappedOperands(CC2
);
6902 if (CC
== ISD::getSetCCInverse(CC2
, LHS2
.getValueType()))
6906 return std::nullopt
;
6909 static SDValue
combineSelectToBinOp(SDNode
*N
, SelectionDAG
&DAG
,
6910 const RISCVSubtarget
&Subtarget
) {
6911 SDValue CondV
= N
->getOperand(0);
6912 SDValue TrueV
= N
->getOperand(1);
6913 SDValue FalseV
= N
->getOperand(2);
6914 MVT VT
= N
->getSimpleValueType(0);
6917 if (!Subtarget
.hasShortForwardBranchOpt()) {
6918 // (select c, -1, y) -> -c | y
6919 if (isAllOnesConstant(TrueV
)) {
6920 SDValue Neg
= DAG
.getNegative(CondV
, DL
, VT
);
6921 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, FalseV
);
6923 // (select c, y, -1) -> (c-1) | y
6924 if (isAllOnesConstant(FalseV
)) {
6925 SDValue Neg
= DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
,
6926 DAG
.getAllOnesConstant(DL
, VT
));
6927 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, TrueV
);
6930 // (select c, 0, y) -> (c-1) & y
6931 if (isNullConstant(TrueV
)) {
6932 SDValue Neg
= DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
,
6933 DAG
.getAllOnesConstant(DL
, VT
));
6934 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, FalseV
);
6936 // (select c, y, 0) -> -c & y
6937 if (isNullConstant(FalseV
)) {
6938 SDValue Neg
= DAG
.getNegative(CondV
, DL
, VT
);
6939 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, TrueV
);
6943 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
6944 // when both truev and falsev are also setcc.
6945 if (CondV
.getOpcode() == ISD::SETCC
&& TrueV
.getOpcode() == ISD::SETCC
&&
6946 FalseV
.getOpcode() == ISD::SETCC
) {
6947 SDValue LHS
= CondV
.getOperand(0);
6948 SDValue RHS
= CondV
.getOperand(1);
6949 ISD::CondCode CC
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
6951 // (select x, x, y) -> x | y
6952 // (select !x, x, y) -> x & y
6953 if (std::optional
<bool> MatchResult
= matchSetCC(LHS
, RHS
, CC
, TrueV
)) {
6954 return DAG
.getNode(*MatchResult
? ISD::OR
: ISD::AND
, DL
, VT
, TrueV
,
6957 // (select x, y, x) -> x & y
6958 // (select !x, y, x) -> x | y
6959 if (std::optional
<bool> MatchResult
= matchSetCC(LHS
, RHS
, CC
, FalseV
)) {
6960 return DAG
.getNode(*MatchResult
? ISD::AND
: ISD::OR
, DL
, VT
, TrueV
,
6968 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
6969 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
6970 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
6971 // being `0` or `-1`. In such cases we can replace `select` with `and`.
6972 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
6975 foldBinOpIntoSelectIfProfitable(SDNode
*BO
, SelectionDAG
&DAG
,
6976 const RISCVSubtarget
&Subtarget
) {
6977 if (Subtarget
.hasShortForwardBranchOpt())
6980 unsigned SelOpNo
= 0;
6981 SDValue Sel
= BO
->getOperand(0);
6982 if (Sel
.getOpcode() != ISD::SELECT
|| !Sel
.hasOneUse()) {
6984 Sel
= BO
->getOperand(1);
6987 if (Sel
.getOpcode() != ISD::SELECT
|| !Sel
.hasOneUse())
6990 unsigned ConstSelOpNo
= 1;
6991 unsigned OtherSelOpNo
= 2;
6992 if (!dyn_cast
<ConstantSDNode
>(Sel
->getOperand(ConstSelOpNo
))) {
6996 SDValue ConstSelOp
= Sel
->getOperand(ConstSelOpNo
);
6997 ConstantSDNode
*ConstSelOpNode
= dyn_cast
<ConstantSDNode
>(ConstSelOp
);
6998 if (!ConstSelOpNode
|| ConstSelOpNode
->isOpaque())
7001 SDValue ConstBinOp
= BO
->getOperand(SelOpNo
^ 1);
7002 ConstantSDNode
*ConstBinOpNode
= dyn_cast
<ConstantSDNode
>(ConstBinOp
);
7003 if (!ConstBinOpNode
|| ConstBinOpNode
->isOpaque())
7007 EVT VT
= BO
->getValueType(0);
7009 SDValue NewConstOps
[2] = {ConstSelOp
, ConstBinOp
};
7011 std::swap(NewConstOps
[0], NewConstOps
[1]);
7013 SDValue NewConstOp
=
7014 DAG
.FoldConstantArithmetic(BO
->getOpcode(), DL
, VT
, NewConstOps
);
7018 const APInt
&NewConstAPInt
=
7019 cast
<ConstantSDNode
>(NewConstOp
)->getAPIntValue();
7020 if (!NewConstAPInt
.isZero() && !NewConstAPInt
.isAllOnes())
7023 SDValue OtherSelOp
= Sel
->getOperand(OtherSelOpNo
);
7024 SDValue NewNonConstOps
[2] = {OtherSelOp
, ConstBinOp
};
7026 std::swap(NewNonConstOps
[0], NewNonConstOps
[1]);
7027 SDValue NewNonConstOp
= DAG
.getNode(BO
->getOpcode(), DL
, VT
, NewNonConstOps
);
7029 SDValue NewT
= (ConstSelOpNo
== 1) ? NewConstOp
: NewNonConstOp
;
7030 SDValue NewF
= (ConstSelOpNo
== 1) ? NewNonConstOp
: NewConstOp
;
7031 return DAG
.getSelect(DL
, VT
, Sel
.getOperand(0), NewT
, NewF
);
7034 SDValue
RISCVTargetLowering::lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
7035 SDValue CondV
= Op
.getOperand(0);
7036 SDValue TrueV
= Op
.getOperand(1);
7037 SDValue FalseV
= Op
.getOperand(2);
7039 MVT VT
= Op
.getSimpleValueType();
7040 MVT XLenVT
= Subtarget
.getXLenVT();
7042 // Lower vector SELECTs to VSELECTs by splatting the condition.
7043 if (VT
.isVector()) {
7044 MVT SplatCondVT
= VT
.changeVectorElementType(MVT::i1
);
7045 SDValue CondSplat
= DAG
.getSplat(SplatCondVT
, DL
, CondV
);
7046 return DAG
.getNode(ISD::VSELECT
, DL
, VT
, CondSplat
, TrueV
, FalseV
);
7049 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7050 // nodes to implement the SELECT. Performing the lowering here allows for
7051 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7052 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7053 if ((Subtarget
.hasStdExtZicond() || Subtarget
.hasVendorXVentanaCondOps()) &&
7054 VT
.isScalarInteger()) {
7055 // (select c, t, 0) -> (czero_eqz t, c)
7056 if (isNullConstant(FalseV
))
7057 return DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
);
7058 // (select c, 0, f) -> (czero_nez f, c)
7059 if (isNullConstant(TrueV
))
7060 return DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
);
7062 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7063 if (TrueV
.getOpcode() == ISD::AND
&&
7064 (TrueV
.getOperand(0) == FalseV
|| TrueV
.getOperand(1) == FalseV
))
7066 ISD::OR
, DL
, VT
, TrueV
,
7067 DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
));
7068 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7069 if (FalseV
.getOpcode() == ISD::AND
&&
7070 (FalseV
.getOperand(0) == TrueV
|| FalseV
.getOperand(1) == TrueV
))
7072 ISD::OR
, DL
, VT
, FalseV
,
7073 DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
));
7075 // Try some other optimizations before falling back to generic lowering.
7076 if (SDValue V
= combineSelectToBinOp(Op
.getNode(), DAG
, Subtarget
))
7079 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7080 // Unless we have the short forward branch optimization.
7081 if (!Subtarget
.hasShortForwardBranchOpt())
7084 DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
),
7085 DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
));
7088 if (SDValue V
= combineSelectToBinOp(Op
.getNode(), DAG
, Subtarget
))
7091 if (Op
.hasOneUse()) {
7092 unsigned UseOpc
= Op
->use_begin()->getOpcode();
7093 if (isBinOp(UseOpc
) && DAG
.isSafeToSpeculativelyExecute(UseOpc
)) {
7094 SDNode
*BinOp
= *Op
->use_begin();
7095 if (SDValue NewSel
= foldBinOpIntoSelectIfProfitable(*Op
->use_begin(),
7097 DAG
.ReplaceAllUsesWith(BinOp
, &NewSel
);
7098 return lowerSELECT(NewSel
, DAG
);
7103 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7104 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7105 const ConstantFPSDNode
*FPTV
= dyn_cast
<ConstantFPSDNode
>(TrueV
);
7106 const ConstantFPSDNode
*FPFV
= dyn_cast
<ConstantFPSDNode
>(FalseV
);
7108 if (FPTV
->isExactlyValue(1.0) && FPFV
->isExactlyValue(0.0))
7109 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, CondV
);
7110 if (FPTV
->isExactlyValue(0.0) && FPFV
->isExactlyValue(1.0)) {
7111 SDValue XOR
= DAG
.getNode(ISD::XOR
, DL
, XLenVT
, CondV
,
7112 DAG
.getConstant(1, DL
, XLenVT
));
7113 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, XOR
);
7117 // If the condition is not an integer SETCC which operates on XLenVT, we need
7118 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7119 // (select condv, truev, falsev)
7120 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7121 if (CondV
.getOpcode() != ISD::SETCC
||
7122 CondV
.getOperand(0).getSimpleValueType() != XLenVT
) {
7123 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
7124 SDValue SetNE
= DAG
.getCondCode(ISD::SETNE
);
7126 SDValue Ops
[] = {CondV
, Zero
, SetNE
, TrueV
, FalseV
};
7128 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, Ops
);
7131 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7132 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7133 // advantage of the integer compare+branch instructions. i.e.:
7134 // (select (setcc lhs, rhs, cc), truev, falsev)
7135 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7136 SDValue LHS
= CondV
.getOperand(0);
7137 SDValue RHS
= CondV
.getOperand(1);
7138 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
7140 // Special case for a select of 2 constants that have a diffence of 1.
7141 // Normally this is done by DAGCombine, but if the select is introduced by
7142 // type legalization or op legalization, we miss it. Restricting to SETLT
7143 // case for now because that is what signed saturating add/sub need.
7144 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7145 // but we would probably want to swap the true/false values if the condition
7146 // is SETGE/SETLE to avoid an XORI.
7147 if (isa
<ConstantSDNode
>(TrueV
) && isa
<ConstantSDNode
>(FalseV
) &&
7148 CCVal
== ISD::SETLT
) {
7149 const APInt
&TrueVal
= cast
<ConstantSDNode
>(TrueV
)->getAPIntValue();
7150 const APInt
&FalseVal
= cast
<ConstantSDNode
>(FalseV
)->getAPIntValue();
7151 if (TrueVal
- 1 == FalseVal
)
7152 return DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
, FalseV
);
7153 if (TrueVal
+ 1 == FalseVal
)
7154 return DAG
.getNode(ISD::SUB
, DL
, VT
, FalseV
, CondV
);
7157 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
7158 // 1 < x ? x : 1 -> 0 < x ? x : 1
7159 if (isOneConstant(LHS
) && (CCVal
== ISD::SETLT
|| CCVal
== ISD::SETULT
) &&
7160 RHS
== TrueV
&& LHS
== FalseV
) {
7161 LHS
= DAG
.getConstant(0, DL
, VT
);
7162 // 0 <u x is the same as x != 0.
7163 if (CCVal
== ISD::SETULT
) {
7164 std::swap(LHS
, RHS
);
7169 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7170 if (isAllOnesConstant(RHS
) && CCVal
== ISD::SETLT
&& LHS
== TrueV
&&
7172 RHS
= DAG
.getConstant(0, DL
, VT
);
7175 SDValue TargetCC
= DAG
.getCondCode(CCVal
);
7177 if (isa
<ConstantSDNode
>(TrueV
) && !isa
<ConstantSDNode
>(FalseV
)) {
7178 // (select (setcc lhs, rhs, CC), constant, falsev)
7179 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7180 std::swap(TrueV
, FalseV
);
7181 TargetCC
= DAG
.getCondCode(ISD::getSetCCInverse(CCVal
, LHS
.getValueType()));
7184 SDValue Ops
[] = {LHS
, RHS
, TargetCC
, TrueV
, FalseV
};
7185 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, Ops
);
7188 SDValue
RISCVTargetLowering::lowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const {
7189 SDValue CondV
= Op
.getOperand(1);
7191 MVT XLenVT
= Subtarget
.getXLenVT();
7193 if (CondV
.getOpcode() == ISD::SETCC
&&
7194 CondV
.getOperand(0).getValueType() == XLenVT
) {
7195 SDValue LHS
= CondV
.getOperand(0);
7196 SDValue RHS
= CondV
.getOperand(1);
7197 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
7199 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
7201 SDValue TargetCC
= DAG
.getCondCode(CCVal
);
7202 return DAG
.getNode(RISCVISD::BR_CC
, DL
, Op
.getValueType(), Op
.getOperand(0),
7203 LHS
, RHS
, TargetCC
, Op
.getOperand(2));
7206 return DAG
.getNode(RISCVISD::BR_CC
, DL
, Op
.getValueType(), Op
.getOperand(0),
7207 CondV
, DAG
.getConstant(0, DL
, XLenVT
),
7208 DAG
.getCondCode(ISD::SETNE
), Op
.getOperand(2));
7211 SDValue
RISCVTargetLowering::lowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
7212 MachineFunction
&MF
= DAG
.getMachineFunction();
7213 RISCVMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
7216 SDValue FI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
7217 getPointerTy(MF
.getDataLayout()));
7219 // vastart just stores the address of the VarArgsFrameIndex slot into the
7220 // memory location argument.
7221 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
7222 return DAG
.getStore(Op
.getOperand(0), DL
, FI
, Op
.getOperand(1),
7223 MachinePointerInfo(SV
));
7226 SDValue
RISCVTargetLowering::lowerFRAMEADDR(SDValue Op
,
7227 SelectionDAG
&DAG
) const {
7228 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
7229 MachineFunction
&MF
= DAG
.getMachineFunction();
7230 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7231 MFI
.setFrameAddressIsTaken(true);
7232 Register FrameReg
= RI
.getFrameRegister(MF
);
7233 int XLenInBytes
= Subtarget
.getXLen() / 8;
7235 EVT VT
= Op
.getValueType();
7237 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, FrameReg
, VT
);
7238 unsigned Depth
= Op
.getConstantOperandVal(0);
7240 int Offset
= -(XLenInBytes
* 2);
7241 SDValue Ptr
= DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
,
7242 DAG
.getIntPtrConstant(Offset
, DL
));
7244 DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
7249 SDValue
RISCVTargetLowering::lowerRETURNADDR(SDValue Op
,
7250 SelectionDAG
&DAG
) const {
7251 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
7252 MachineFunction
&MF
= DAG
.getMachineFunction();
7253 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7254 MFI
.setReturnAddressIsTaken(true);
7255 MVT XLenVT
= Subtarget
.getXLenVT();
7256 int XLenInBytes
= Subtarget
.getXLen() / 8;
7258 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
7261 EVT VT
= Op
.getValueType();
7263 unsigned Depth
= Op
.getConstantOperandVal(0);
7265 int Off
= -XLenInBytes
;
7266 SDValue FrameAddr
= lowerFRAMEADDR(Op
, DAG
);
7267 SDValue Offset
= DAG
.getConstant(Off
, DL
, VT
);
7268 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
7269 DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
, Offset
),
7270 MachinePointerInfo());
7273 // Return the value of the return address register, marking it an implicit
7275 Register Reg
= MF
.addLiveIn(RI
.getRARegister(), getRegClassFor(XLenVT
));
7276 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, Reg
, XLenVT
);
7279 SDValue
RISCVTargetLowering::lowerShiftLeftParts(SDValue Op
,
7280 SelectionDAG
&DAG
) const {
7282 SDValue Lo
= Op
.getOperand(0);
7283 SDValue Hi
= Op
.getOperand(1);
7284 SDValue Shamt
= Op
.getOperand(2);
7285 EVT VT
= Lo
.getValueType();
7287 // if Shamt-XLEN < 0: // Shamt < XLEN
7289 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7292 // Hi = Lo << (Shamt-XLEN)
7294 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
7295 SDValue One
= DAG
.getConstant(1, DL
, VT
);
7296 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
7297 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
7298 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
7299 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
7301 SDValue LoTrue
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, Shamt
);
7302 SDValue ShiftRight1Lo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, One
);
7303 SDValue ShiftRightLo
=
7304 DAG
.getNode(ISD::SRL
, DL
, VT
, ShiftRight1Lo
, XLenMinus1Shamt
);
7305 SDValue ShiftLeftHi
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, Shamt
);
7306 SDValue HiTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftLeftHi
, ShiftRightLo
);
7307 SDValue HiFalse
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, ShamtMinusXLen
);
7309 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
7311 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, Zero
);
7312 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
7314 SDValue Parts
[2] = {Lo
, Hi
};
7315 return DAG
.getMergeValues(Parts
, DL
);
7318 SDValue
RISCVTargetLowering::lowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
,
7321 SDValue Lo
= Op
.getOperand(0);
7322 SDValue Hi
= Op
.getOperand(1);
7323 SDValue Shamt
= Op
.getOperand(2);
7324 EVT VT
= Lo
.getValueType();
7327 // if Shamt-XLEN < 0: // Shamt < XLEN
7328 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7329 // Hi = Hi >>s Shamt
7331 // Lo = Hi >>s (Shamt-XLEN);
7332 // Hi = Hi >>s (XLEN-1)
7335 // if Shamt-XLEN < 0: // Shamt < XLEN
7336 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7337 // Hi = Hi >>u Shamt
7339 // Lo = Hi >>u (Shamt-XLEN);
7342 unsigned ShiftRightOp
= IsSRA
? ISD::SRA
: ISD::SRL
;
7344 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
7345 SDValue One
= DAG
.getConstant(1, DL
, VT
);
7346 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
7347 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
7348 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
7349 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
7351 SDValue ShiftRightLo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, Shamt
);
7352 SDValue ShiftLeftHi1
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, One
);
7353 SDValue ShiftLeftHi
=
7354 DAG
.getNode(ISD::SHL
, DL
, VT
, ShiftLeftHi1
, XLenMinus1Shamt
);
7355 SDValue LoTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftRightLo
, ShiftLeftHi
);
7356 SDValue HiTrue
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, Shamt
);
7357 SDValue LoFalse
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, ShamtMinusXLen
);
7359 IsSRA
? DAG
.getNode(ISD::SRA
, DL
, VT
, Hi
, XLenMinus1
) : Zero
;
7361 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
7363 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, LoFalse
);
7364 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
7366 SDValue Parts
[2] = {Lo
, Hi
};
7367 return DAG
.getMergeValues(Parts
, DL
);
7370 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
7371 // legal equivalently-sized i8 type, so we can use that as a go-between.
7372 SDValue
RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op
,
7373 SelectionDAG
&DAG
) const {
7375 MVT VT
= Op
.getSimpleValueType();
7376 SDValue SplatVal
= Op
.getOperand(0);
7377 // All-zeros or all-ones splats are handled specially.
7378 if (ISD::isConstantSplatVectorAllOnes(Op
.getNode())) {
7379 SDValue VL
= getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
).second
;
7380 return DAG
.getNode(RISCVISD::VMSET_VL
, DL
, VT
, VL
);
7382 if (ISD::isConstantSplatVectorAllZeros(Op
.getNode())) {
7383 SDValue VL
= getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
).second
;
7384 return DAG
.getNode(RISCVISD::VMCLR_VL
, DL
, VT
, VL
);
7386 MVT InterVT
= VT
.changeVectorElementType(MVT::i8
);
7387 SplatVal
= DAG
.getNode(ISD::AND
, DL
, SplatVal
.getValueType(), SplatVal
,
7388 DAG
.getConstant(1, DL
, SplatVal
.getValueType()));
7389 SDValue LHS
= DAG
.getSplatVector(InterVT
, DL
, SplatVal
);
7390 SDValue Zero
= DAG
.getConstant(0, DL
, InterVT
);
7391 return DAG
.getSetCC(DL
, VT
, LHS
, Zero
, ISD::SETNE
);
7394 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7395 // illegal (currently only vXi64 RV32).
7396 // FIXME: We could also catch non-constant sign-extended i32 values and lower
7397 // them to VMV_V_X_VL.
7398 SDValue
RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op
,
7399 SelectionDAG
&DAG
) const {
7401 MVT VecVT
= Op
.getSimpleValueType();
7402 assert(!Subtarget
.is64Bit() && VecVT
.getVectorElementType() == MVT::i64
&&
7403 "Unexpected SPLAT_VECTOR_PARTS lowering");
7405 assert(Op
.getNumOperands() == 2 && "Unexpected number of operands!");
7406 SDValue Lo
= Op
.getOperand(0);
7407 SDValue Hi
= Op
.getOperand(1);
7409 MVT ContainerVT
= VecVT
;
7410 if (VecVT
.isFixedLengthVector())
7411 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7413 auto VL
= getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
7416 splatPartsI64WithVL(DL
, ContainerVT
, SDValue(), Lo
, Hi
, VL
, DAG
);
7418 if (VecVT
.isFixedLengthVector())
7419 Res
= convertFromScalableVector(VecVT
, Res
, DAG
, Subtarget
);
7424 // Custom-lower extensions from mask vectors by using a vselect either with 1
7425 // for zero/any-extension or -1 for sign-extension:
7426 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7427 // Note that any-extension is lowered identically to zero-extension.
7428 SDValue
RISCVTargetLowering::lowerVectorMaskExt(SDValue Op
, SelectionDAG
&DAG
,
7429 int64_t ExtTrueVal
) const {
7431 MVT VecVT
= Op
.getSimpleValueType();
7432 SDValue Src
= Op
.getOperand(0);
7433 // Only custom-lower extensions from mask types
7434 assert(Src
.getValueType().isVector() &&
7435 Src
.getValueType().getVectorElementType() == MVT::i1
);
7437 if (VecVT
.isScalableVector()) {
7438 SDValue SplatZero
= DAG
.getConstant(0, DL
, VecVT
);
7439 SDValue SplatTrueVal
= DAG
.getConstant(ExtTrueVal
, DL
, VecVT
);
7440 return DAG
.getNode(ISD::VSELECT
, DL
, VecVT
, Src
, SplatTrueVal
, SplatZero
);
7443 MVT ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7445 MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
7447 SDValue CC
= convertToScalableVector(I1ContainerVT
, Src
, DAG
, Subtarget
);
7449 SDValue VL
= getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
7451 MVT XLenVT
= Subtarget
.getXLenVT();
7452 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
7453 SDValue SplatTrueVal
= DAG
.getConstant(ExtTrueVal
, DL
, XLenVT
);
7455 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7456 DAG
.getUNDEF(ContainerVT
), SplatZero
, VL
);
7457 SplatTrueVal
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7458 DAG
.getUNDEF(ContainerVT
), SplatTrueVal
, VL
);
7459 SDValue Select
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, CC
,
7460 SplatTrueVal
, SplatZero
, VL
);
7462 return convertFromScalableVector(VecVT
, Select
, DAG
, Subtarget
);
7465 SDValue
RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7466 SDValue Op
, SelectionDAG
&DAG
, unsigned ExtendOpc
) const {
7467 MVT ExtVT
= Op
.getSimpleValueType();
7468 // Only custom-lower extensions from fixed-length vector types.
7469 if (!ExtVT
.isFixedLengthVector())
7471 MVT VT
= Op
.getOperand(0).getSimpleValueType();
7472 // Grab the canonical container type for the extended type. Infer the smaller
7473 // type from that to ensure the same number of vector elements, as we know
7474 // the LMUL will be sufficient to hold the smaller type.
7475 MVT ContainerExtVT
= getContainerForFixedLengthVector(ExtVT
);
7476 // Get the extended container type manually to ensure the same number of
7477 // vector elements between source and dest.
7478 MVT ContainerVT
= MVT::getVectorVT(VT
.getVectorElementType(),
7479 ContainerExtVT
.getVectorElementCount());
7482 convertToScalableVector(ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
7485 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
7487 SDValue Ext
= DAG
.getNode(ExtendOpc
, DL
, ContainerExtVT
, Op1
, Mask
, VL
);
7489 return convertFromScalableVector(ExtVT
, Ext
, DAG
, Subtarget
);
7492 // Custom-lower truncations from vectors to mask vectors by using a mask and a
7494 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7495 SDValue
RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op
,
7496 SelectionDAG
&DAG
) const {
7497 bool IsVPTrunc
= Op
.getOpcode() == ISD::VP_TRUNCATE
;
7499 EVT MaskVT
= Op
.getValueType();
7500 // Only expect to custom-lower truncations to mask types
7501 assert(MaskVT
.isVector() && MaskVT
.getVectorElementType() == MVT::i1
&&
7502 "Unexpected type for vector mask lowering");
7503 SDValue Src
= Op
.getOperand(0);
7504 MVT VecVT
= Src
.getSimpleValueType();
7507 Mask
= Op
.getOperand(1);
7508 VL
= Op
.getOperand(2);
7510 // If this is a fixed vector, we need to convert it to a scalable vector.
7511 MVT ContainerVT
= VecVT
;
7513 if (VecVT
.isFixedLengthVector()) {
7514 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7515 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
7517 MVT MaskContainerVT
=
7518 getContainerForFixedLengthVector(Mask
.getSimpleValueType());
7519 Mask
= convertToScalableVector(MaskContainerVT
, Mask
, DAG
, Subtarget
);
7524 std::tie(Mask
, VL
) =
7525 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7528 SDValue SplatOne
= DAG
.getConstant(1, DL
, Subtarget
.getXLenVT());
7529 SDValue SplatZero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
7531 SplatOne
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7532 DAG
.getUNDEF(ContainerVT
), SplatOne
, VL
);
7533 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7534 DAG
.getUNDEF(ContainerVT
), SplatZero
, VL
);
7536 MVT MaskContainerVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
7537 SDValue Trunc
= DAG
.getNode(RISCVISD::AND_VL
, DL
, ContainerVT
, Src
, SplatOne
,
7538 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
7539 Trunc
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskContainerVT
,
7540 {Trunc
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
7541 DAG
.getUNDEF(MaskContainerVT
), Mask
, VL
});
7542 if (MaskVT
.isFixedLengthVector())
7543 Trunc
= convertFromScalableVector(MaskVT
, Trunc
, DAG
, Subtarget
);
7547 SDValue
RISCVTargetLowering::lowerVectorTruncLike(SDValue Op
,
7548 SelectionDAG
&DAG
) const {
7549 bool IsVPTrunc
= Op
.getOpcode() == ISD::VP_TRUNCATE
;
7552 MVT VT
= Op
.getSimpleValueType();
7553 // Only custom-lower vector truncates
7554 assert(VT
.isVector() && "Unexpected type for vector truncate lowering");
7556 // Truncates to mask types are handled differently
7557 if (VT
.getVectorElementType() == MVT::i1
)
7558 return lowerVectorMaskTruncLike(Op
, DAG
);
7560 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7561 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7562 // truncate by one power of two at a time.
7563 MVT DstEltVT
= VT
.getVectorElementType();
7565 SDValue Src
= Op
.getOperand(0);
7566 MVT SrcVT
= Src
.getSimpleValueType();
7567 MVT SrcEltVT
= SrcVT
.getVectorElementType();
7569 assert(DstEltVT
.bitsLT(SrcEltVT
) && isPowerOf2_64(DstEltVT
.getSizeInBits()) &&
7570 isPowerOf2_64(SrcEltVT
.getSizeInBits()) &&
7571 "Unexpected vector truncate lowering");
7573 MVT ContainerVT
= SrcVT
;
7576 Mask
= Op
.getOperand(1);
7577 VL
= Op
.getOperand(2);
7579 if (SrcVT
.isFixedLengthVector()) {
7580 ContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7581 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
7583 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
7584 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
7588 SDValue Result
= Src
;
7590 std::tie(Mask
, VL
) =
7591 getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7594 LLVMContext
&Context
= *DAG
.getContext();
7595 const ElementCount Count
= ContainerVT
.getVectorElementCount();
7597 SrcEltVT
= MVT::getIntegerVT(SrcEltVT
.getSizeInBits() / 2);
7598 EVT ResultVT
= EVT::getVectorVT(Context
, SrcEltVT
, Count
);
7599 Result
= DAG
.getNode(RISCVISD::TRUNCATE_VECTOR_VL
, DL
, ResultVT
, Result
,
7601 } while (SrcEltVT
!= DstEltVT
);
7603 if (SrcVT
.isFixedLengthVector())
7604 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
7610 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op
,
7611 SelectionDAG
&DAG
) const {
7613 SDValue Chain
= Op
.getOperand(0);
7614 SDValue Src
= Op
.getOperand(1);
7615 MVT VT
= Op
.getSimpleValueType();
7616 MVT SrcVT
= Src
.getSimpleValueType();
7617 MVT ContainerVT
= VT
;
7618 if (VT
.isFixedLengthVector()) {
7619 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7621 SrcContainerVT
.changeVectorElementType(VT
.getVectorElementType());
7622 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
7625 auto [Mask
, VL
] = getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7627 // RVV can only widen/truncate fp to types double/half the size as the source.
7628 if ((VT
.getVectorElementType() == MVT::f64
&&
7629 SrcVT
.getVectorElementType() == MVT::f16
) ||
7630 (VT
.getVectorElementType() == MVT::f16
&&
7631 SrcVT
.getVectorElementType() == MVT::f64
)) {
7632 // For double rounding, the intermediate rounding should be round-to-odd.
7633 unsigned InterConvOpc
= Op
.getOpcode() == ISD::STRICT_FP_EXTEND
7634 ? RISCVISD::STRICT_FP_EXTEND_VL
7635 : RISCVISD::STRICT_VFNCVT_ROD_VL
;
7636 MVT InterVT
= ContainerVT
.changeVectorElementType(MVT::f32
);
7637 Src
= DAG
.getNode(InterConvOpc
, DL
, DAG
.getVTList(InterVT
, MVT::Other
),
7638 Chain
, Src
, Mask
, VL
);
7639 Chain
= Src
.getValue(1);
7642 unsigned ConvOpc
= Op
.getOpcode() == ISD::STRICT_FP_EXTEND
7643 ? RISCVISD::STRICT_FP_EXTEND_VL
7644 : RISCVISD::STRICT_FP_ROUND_VL
;
7645 SDValue Res
= DAG
.getNode(ConvOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
),
7646 Chain
, Src
, Mask
, VL
);
7647 if (VT
.isFixedLengthVector()) {
7648 // StrictFP operations have two result values. Their lowered result should
7649 // have same result count.
7650 SDValue SubVec
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
7651 Res
= DAG
.getMergeValues({SubVec
, Res
.getValue(1)}, DL
);
7657 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op
,
7658 SelectionDAG
&DAG
) const {
7660 Op
.getOpcode() == ISD::VP_FP_ROUND
|| Op
.getOpcode() == ISD::VP_FP_EXTEND
;
7662 Op
.getOpcode() == ISD::VP_FP_EXTEND
|| Op
.getOpcode() == ISD::FP_EXTEND
;
7663 // RVV can only do truncate fp to types half the size as the source. We
7664 // custom-lower f64->f16 rounds via RVV's round-to-odd float
7665 // conversion instruction.
7667 MVT VT
= Op
.getSimpleValueType();
7669 assert(VT
.isVector() && "Unexpected type for vector truncate lowering");
7671 SDValue Src
= Op
.getOperand(0);
7672 MVT SrcVT
= Src
.getSimpleValueType();
7674 bool IsDirectExtend
= IsExtend
&& (VT
.getVectorElementType() != MVT::f64
||
7675 SrcVT
.getVectorElementType() != MVT::f16
);
7676 bool IsDirectTrunc
= !IsExtend
&& (VT
.getVectorElementType() != MVT::f16
||
7677 SrcVT
.getVectorElementType() != MVT::f64
);
7679 bool IsDirectConv
= IsDirectExtend
|| IsDirectTrunc
;
7681 // Prepare any fixed-length vector operands.
7682 MVT ContainerVT
= VT
;
7685 Mask
= Op
.getOperand(1);
7686 VL
= Op
.getOperand(2);
7688 if (VT
.isFixedLengthVector()) {
7689 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7691 SrcContainerVT
.changeVectorElementType(VT
.getVectorElementType());
7692 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
7694 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
7695 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
7700 std::tie(Mask
, VL
) =
7701 getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7703 unsigned ConvOpc
= IsExtend
? RISCVISD::FP_EXTEND_VL
: RISCVISD::FP_ROUND_VL
;
7706 Src
= DAG
.getNode(ConvOpc
, DL
, ContainerVT
, Src
, Mask
, VL
);
7707 if (VT
.isFixedLengthVector())
7708 Src
= convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
7712 unsigned InterConvOpc
=
7713 IsExtend
? RISCVISD::FP_EXTEND_VL
: RISCVISD::VFNCVT_ROD_VL
;
7715 MVT InterVT
= ContainerVT
.changeVectorElementType(MVT::f32
);
7716 SDValue IntermediateConv
=
7717 DAG
.getNode(InterConvOpc
, DL
, InterVT
, Src
, Mask
, VL
);
7719 DAG
.getNode(ConvOpc
, DL
, ContainerVT
, IntermediateConv
, Mask
, VL
);
7720 if (VT
.isFixedLengthVector())
7721 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
7725 // Given a scalable vector type and an index into it, returns the type for the
7726 // smallest subvector that the index fits in. This can be used to reduce LMUL
7727 // for operations like vslidedown.
7729 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
7730 static std::optional
<MVT
>
7731 getSmallestVTForIndex(MVT VecVT
, unsigned MaxIdx
, SDLoc DL
, SelectionDAG
&DAG
,
7732 const RISCVSubtarget
&Subtarget
) {
7733 assert(VecVT
.isScalableVector());
7734 const unsigned EltSize
= VecVT
.getScalarSizeInBits();
7735 const unsigned VectorBitsMin
= Subtarget
.getRealMinVLen();
7736 const unsigned MinVLMAX
= VectorBitsMin
/ EltSize
;
7738 if (MaxIdx
< MinVLMAX
)
7739 SmallerVT
= getLMUL1VT(VecVT
);
7740 else if (MaxIdx
< MinVLMAX
* 2)
7741 SmallerVT
= getLMUL1VT(VecVT
).getDoubleNumVectorElementsVT();
7742 else if (MaxIdx
< MinVLMAX
* 4)
7743 SmallerVT
= getLMUL1VT(VecVT
)
7744 .getDoubleNumVectorElementsVT()
7745 .getDoubleNumVectorElementsVT();
7746 if (!SmallerVT
.isValid() || !VecVT
.bitsGT(SmallerVT
))
7747 return std::nullopt
;
7751 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
7752 // first position of a vector, and that vector is slid up to the insert index.
7753 // By limiting the active vector length to index+1 and merging with the
7754 // original vector (with an undisturbed tail policy for elements >= VL), we
7755 // achieve the desired result of leaving all elements untouched except the one
7756 // at VL-1, which is replaced with the desired value.
7757 SDValue
RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
7758 SelectionDAG
&DAG
) const {
7760 MVT VecVT
= Op
.getSimpleValueType();
7761 SDValue Vec
= Op
.getOperand(0);
7762 SDValue Val
= Op
.getOperand(1);
7763 SDValue Idx
= Op
.getOperand(2);
7765 if (VecVT
.getVectorElementType() == MVT::i1
) {
7766 // FIXME: For now we just promote to an i8 vector and insert into that,
7767 // but this is probably not optimal.
7768 MVT WideVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
7769 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Vec
);
7770 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, WideVT
, Vec
, Val
, Idx
);
7771 return DAG
.getNode(ISD::TRUNCATE
, DL
, VecVT
, Vec
);
7774 MVT ContainerVT
= VecVT
;
7775 // If the operand is a fixed-length vector, convert to a scalable one.
7776 if (VecVT
.isFixedLengthVector()) {
7777 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7778 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
7781 // If we know the index we're going to insert at, we can shrink Vec so that
7782 // we're performing the scalar inserts and slideup on a smaller LMUL.
7783 MVT OrigContainerVT
= ContainerVT
;
7784 SDValue OrigVec
= Vec
;
7786 if (auto *IdxC
= dyn_cast
<ConstantSDNode
>(Idx
)) {
7787 const unsigned OrigIdx
= IdxC
->getZExtValue();
7788 // Do we know an upper bound on LMUL?
7789 if (auto ShrunkVT
= getSmallestVTForIndex(ContainerVT
, OrigIdx
,
7790 DL
, DAG
, Subtarget
)) {
7791 ContainerVT
= *ShrunkVT
;
7792 AlignedIdx
= DAG
.getVectorIdxConstant(0, DL
);
7795 // If we're compiling for an exact VLEN value, we can always perform
7796 // the insert in m1 as we can determine the register corresponding to
7797 // the index in the register group.
7798 const unsigned MinVLen
= Subtarget
.getRealMinVLen();
7799 const unsigned MaxVLen
= Subtarget
.getRealMaxVLen();
7800 const MVT M1VT
= getLMUL1VT(ContainerVT
);
7801 if (MinVLen
== MaxVLen
&& ContainerVT
.bitsGT(M1VT
)) {
7802 EVT ElemVT
= VecVT
.getVectorElementType();
7803 unsigned ElemsPerVReg
= MinVLen
/ ElemVT
.getFixedSizeInBits();
7804 unsigned RemIdx
= OrigIdx
% ElemsPerVReg
;
7805 unsigned SubRegIdx
= OrigIdx
/ ElemsPerVReg
;
7806 unsigned ExtractIdx
=
7807 SubRegIdx
* M1VT
.getVectorElementCount().getKnownMinValue();
7808 AlignedIdx
= DAG
.getVectorIdxConstant(ExtractIdx
, DL
);
7809 Idx
= DAG
.getVectorIdxConstant(RemIdx
, DL
);
7814 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
7818 MVT XLenVT
= Subtarget
.getXLenVT();
7820 bool IsLegalInsert
= Subtarget
.is64Bit() || Val
.getValueType() != MVT::i64
;
7821 // Even i64-element vectors on RV32 can be lowered without scalar
7822 // legalization if the most-significant 32 bits of the value are not affected
7823 // by the sign-extension of the lower 32 bits.
7824 // TODO: We could also catch sign extensions of a 32-bit value.
7825 if (!IsLegalInsert
&& isa
<ConstantSDNode
>(Val
)) {
7826 const auto *CVal
= cast
<ConstantSDNode
>(Val
);
7827 if (isInt
<32>(CVal
->getSExtValue())) {
7828 IsLegalInsert
= true;
7829 Val
= DAG
.getConstant(CVal
->getSExtValue(), DL
, MVT::i32
);
7833 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7837 if (IsLegalInsert
) {
7839 VecVT
.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL
: RISCVISD::VMV_S_X_VL
;
7840 if (isNullConstant(Idx
)) {
7841 if (!VecVT
.isFloatingPoint())
7842 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Val
);
7843 Vec
= DAG
.getNode(Opc
, DL
, ContainerVT
, Vec
, Val
, VL
);
7846 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
7848 if (!VecVT
.isFixedLengthVector())
7850 return convertFromScalableVector(VecVT
, Vec
, DAG
, Subtarget
);
7852 ValInVec
= lowerScalarInsert(Val
, VL
, ContainerVT
, DL
, DAG
, Subtarget
);
7854 // On RV32, i64-element vectors must be specially handled to place the
7855 // value at element 0, by using two vslide1down instructions in sequence on
7856 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
7858 SDValue ValLo
, ValHi
;
7859 std::tie(ValLo
, ValHi
) = DAG
.SplitScalar(Val
, DL
, MVT::i32
, MVT::i32
);
7860 MVT I32ContainerVT
=
7861 MVT::getVectorVT(MVT::i32
, ContainerVT
.getVectorElementCount() * 2);
7863 getDefaultScalableVLOps(I32ContainerVT
, DL
, DAG
, Subtarget
).first
;
7864 // Limit the active VL to two.
7865 SDValue InsertI64VL
= DAG
.getConstant(2, DL
, XLenVT
);
7866 // If the Idx is 0 we can insert directly into the vector.
7867 if (isNullConstant(Idx
)) {
7868 // First slide in the lo value, then the hi in above it. We use slide1down
7869 // to avoid the register group overlap constraint of vslide1up.
7870 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
7871 Vec
, Vec
, ValLo
, I32Mask
, InsertI64VL
);
7872 // If the source vector is undef don't pass along the tail elements from
7873 // the previous slide1down.
7874 SDValue Tail
= Vec
.isUndef() ? Vec
: ValInVec
;
7875 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
7876 Tail
, ValInVec
, ValHi
, I32Mask
, InsertI64VL
);
7877 // Bitcast back to the right container type.
7878 ValInVec
= DAG
.getBitcast(ContainerVT
, ValInVec
);
7882 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
7883 ValInVec
, AlignedIdx
);
7884 if (!VecVT
.isFixedLengthVector())
7886 return convertFromScalableVector(VecVT
, ValInVec
, DAG
, Subtarget
);
7889 // First slide in the lo value, then the hi in above it. We use slide1down
7890 // to avoid the register group overlap constraint of vslide1up.
7891 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
7892 DAG
.getUNDEF(I32ContainerVT
),
7893 DAG
.getUNDEF(I32ContainerVT
), ValLo
,
7894 I32Mask
, InsertI64VL
);
7895 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
7896 DAG
.getUNDEF(I32ContainerVT
), ValInVec
, ValHi
,
7897 I32Mask
, InsertI64VL
);
7898 // Bitcast back to the right container type.
7899 ValInVec
= DAG
.getBitcast(ContainerVT
, ValInVec
);
7902 // Now that the value is in a vector, slide it into position.
7904 DAG
.getNode(ISD::ADD
, DL
, XLenVT
, Idx
, DAG
.getConstant(1, DL
, XLenVT
));
7906 // Use tail agnostic policy if Idx is the last index of Vec.
7907 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
;
7908 if (VecVT
.isFixedLengthVector() && isa
<ConstantSDNode
>(Idx
) &&
7909 cast
<ConstantSDNode
>(Idx
)->getZExtValue() + 1 ==
7910 VecVT
.getVectorNumElements())
7911 Policy
= RISCVII::TAIL_AGNOSTIC
;
7912 SDValue Slideup
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Vec
, ValInVec
,
7913 Idx
, Mask
, InsertVL
, Policy
);
7916 Slideup
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
7917 Slideup
, AlignedIdx
);
7918 if (!VecVT
.isFixedLengthVector())
7920 return convertFromScalableVector(VecVT
, Slideup
, DAG
, Subtarget
);
7923 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
7924 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
7925 // types this is done using VMV_X_S to allow us to glean information about the
7926 // sign bits of the result.
7927 SDValue
RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
7928 SelectionDAG
&DAG
) const {
7930 SDValue Idx
= Op
.getOperand(1);
7931 SDValue Vec
= Op
.getOperand(0);
7932 EVT EltVT
= Op
.getValueType();
7933 MVT VecVT
= Vec
.getSimpleValueType();
7934 MVT XLenVT
= Subtarget
.getXLenVT();
7936 if (VecVT
.getVectorElementType() == MVT::i1
) {
7937 // Use vfirst.m to extract the first bit.
7938 if (isNullConstant(Idx
)) {
7939 MVT ContainerVT
= VecVT
;
7940 if (VecVT
.isFixedLengthVector()) {
7941 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7942 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
7944 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7946 DAG
.getNode(RISCVISD::VFIRST_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
7947 SDValue Res
= DAG
.getSetCC(DL
, XLenVT
, Vfirst
,
7948 DAG
.getConstant(0, DL
, XLenVT
), ISD::SETEQ
);
7949 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Res
);
7951 if (VecVT
.isFixedLengthVector()) {
7952 unsigned NumElts
= VecVT
.getVectorNumElements();
7955 unsigned WidenVecLen
;
7956 SDValue ExtractElementIdx
;
7957 SDValue ExtractBitIdx
;
7958 unsigned MaxEEW
= Subtarget
.getELen();
7959 MVT LargestEltVT
= MVT::getIntegerVT(
7960 std::min(MaxEEW
, unsigned(XLenVT
.getSizeInBits())));
7961 if (NumElts
<= LargestEltVT
.getSizeInBits()) {
7962 assert(isPowerOf2_32(NumElts
) &&
7963 "the number of elements should be power of 2");
7964 WideEltVT
= MVT::getIntegerVT(NumElts
);
7966 ExtractElementIdx
= DAG
.getConstant(0, DL
, XLenVT
);
7967 ExtractBitIdx
= Idx
;
7969 WideEltVT
= LargestEltVT
;
7970 WidenVecLen
= NumElts
/ WideEltVT
.getSizeInBits();
7971 // extract element index = index / element width
7972 ExtractElementIdx
= DAG
.getNode(
7973 ISD::SRL
, DL
, XLenVT
, Idx
,
7974 DAG
.getConstant(Log2_64(WideEltVT
.getSizeInBits()), DL
, XLenVT
));
7975 // mask bit index = index % element width
7976 ExtractBitIdx
= DAG
.getNode(
7977 ISD::AND
, DL
, XLenVT
, Idx
,
7978 DAG
.getConstant(WideEltVT
.getSizeInBits() - 1, DL
, XLenVT
));
7980 MVT WideVT
= MVT::getVectorVT(WideEltVT
, WidenVecLen
);
7981 Vec
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, Vec
);
7982 SDValue ExtractElt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, XLenVT
,
7983 Vec
, ExtractElementIdx
);
7984 // Extract the bit from GPR.
7985 SDValue ShiftRight
=
7986 DAG
.getNode(ISD::SRL
, DL
, XLenVT
, ExtractElt
, ExtractBitIdx
);
7987 SDValue Res
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, ShiftRight
,
7988 DAG
.getConstant(1, DL
, XLenVT
));
7989 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Res
);
7992 // Otherwise, promote to an i8 vector and extract from that.
7993 MVT WideVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
7994 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Vec
);
7995 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Vec
, Idx
);
7998 // If this is a fixed vector, we need to convert it to a scalable vector.
7999 MVT ContainerVT
= VecVT
;
8000 if (VecVT
.isFixedLengthVector()) {
8001 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
8002 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
8005 // If we're compiling for an exact VLEN value and we have a known
8006 // constant index, we can always perform the extract in m1 (or
8007 // smaller) as we can determine the register corresponding to
8008 // the index in the register group.
8009 const unsigned MinVLen
= Subtarget
.getRealMinVLen();
8010 const unsigned MaxVLen
= Subtarget
.getRealMaxVLen();
8011 if (auto *IdxC
= dyn_cast
<ConstantSDNode
>(Idx
);
8012 IdxC
&& MinVLen
== MaxVLen
&&
8013 VecVT
.getSizeInBits().getKnownMinValue() > MinVLen
) {
8014 MVT M1VT
= getLMUL1VT(ContainerVT
);
8015 unsigned OrigIdx
= IdxC
->getZExtValue();
8016 EVT ElemVT
= VecVT
.getVectorElementType();
8017 unsigned ElemsPerVReg
= MinVLen
/ ElemVT
.getFixedSizeInBits();
8018 unsigned RemIdx
= OrigIdx
% ElemsPerVReg
;
8019 unsigned SubRegIdx
= OrigIdx
/ ElemsPerVReg
;
8020 unsigned ExtractIdx
=
8021 SubRegIdx
* M1VT
.getVectorElementCount().getKnownMinValue();
8022 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, M1VT
, Vec
,
8023 DAG
.getVectorIdxConstant(ExtractIdx
, DL
));
8024 Idx
= DAG
.getVectorIdxConstant(RemIdx
, DL
);
8028 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8029 // contains our index.
8030 std::optional
<uint64_t> MaxIdx
;
8031 if (VecVT
.isFixedLengthVector())
8032 MaxIdx
= VecVT
.getVectorNumElements() - 1;
8033 if (auto *IdxC
= dyn_cast
<ConstantSDNode
>(Idx
))
8034 MaxIdx
= IdxC
->getZExtValue();
8036 if (auto SmallerVT
=
8037 getSmallestVTForIndex(ContainerVT
, *MaxIdx
, DL
, DAG
, Subtarget
)) {
8038 ContainerVT
= *SmallerVT
;
8039 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
8040 DAG
.getConstant(0, DL
, XLenVT
));
8044 // If after narrowing, the required slide is still greater than LMUL2,
8045 // fallback to generic expansion and go through the stack. This is done
8046 // for a subtle reason: extracting *all* elements out of a vector is
8047 // widely expected to be linear in vector size, but because vslidedown
8048 // is linear in LMUL, performing N extracts using vslidedown becomes
8049 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8050 // seems to have the same problem (the store is linear in LMUL), but the
8051 // generic expansion *memoizes* the store, and thus for many extracts of
8052 // the same vector we end up with one store and a bunch of loads.
8053 // TODO: We don't have the same code for insert_vector_elt because we
8054 // have BUILD_VECTOR and handle the degenerate case there. Should we
8055 // consider adding an inverse BUILD_VECTOR node?
8056 MVT LMUL2VT
= getLMUL1VT(ContainerVT
).getDoubleNumVectorElementsVT();
8057 if (ContainerVT
.bitsGT(LMUL2VT
) && VecVT
.isFixedLengthVector())
8060 // If the index is 0, the vector is already in the right position.
8061 if (!isNullConstant(Idx
)) {
8062 // Use a VL of 1 to avoid processing more elements than we need.
8063 auto [Mask
, VL
] = getDefaultVLOps(1, ContainerVT
, DL
, DAG
, Subtarget
);
8064 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
8065 DAG
.getUNDEF(ContainerVT
), Vec
, Idx
, Mask
, VL
);
8068 if (!EltVT
.isInteger()) {
8069 // Floating-point extracts are handled in TableGen.
8070 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Vec
,
8071 DAG
.getConstant(0, DL
, XLenVT
));
8074 SDValue Elt0
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
8075 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Elt0
);
8078 // Some RVV intrinsics may claim that they want an integer operand to be
8079 // promoted or expanded.
8080 static SDValue
lowerVectorIntrinsicScalars(SDValue Op
, SelectionDAG
&DAG
,
8081 const RISCVSubtarget
&Subtarget
) {
8082 assert((Op
.getOpcode() == ISD::INTRINSIC_VOID
||
8083 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
8084 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
) &&
8085 "Unexpected opcode");
8087 if (!Subtarget
.hasVInstructions())
8090 bool HasChain
= Op
.getOpcode() == ISD::INTRINSIC_VOID
||
8091 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
;
8092 unsigned IntNo
= Op
.getConstantOperandVal(HasChain
? 1 : 0);
8096 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo
*II
=
8097 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo
);
8098 if (!II
|| !II
->hasScalarOperand())
8101 unsigned SplatOp
= II
->ScalarOperand
+ 1 + HasChain
;
8102 assert(SplatOp
< Op
.getNumOperands());
8104 SmallVector
<SDValue
, 8> Operands(Op
->op_begin(), Op
->op_end());
8105 SDValue
&ScalarOp
= Operands
[SplatOp
];
8106 MVT OpVT
= ScalarOp
.getSimpleValueType();
8107 MVT XLenVT
= Subtarget
.getXLenVT();
8109 // If this isn't a scalar, or its type is XLenVT we're done.
8110 if (!OpVT
.isScalarInteger() || OpVT
== XLenVT
)
8113 // Simplest case is that the operand needs to be promoted to XLenVT.
8114 if (OpVT
.bitsLT(XLenVT
)) {
8115 // If the operand is a constant, sign extend to increase our chances
8116 // of being able to use a .vi instruction. ANY_EXTEND would become a
8117 // a zero extend and the simm5 check in isel would fail.
8118 // FIXME: Should we ignore the upper bits in isel instead?
8120 isa
<ConstantSDNode
>(ScalarOp
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
8121 ScalarOp
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, ScalarOp
);
8122 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
8125 // Use the previous operand to get the vXi64 VT. The result might be a mask
8126 // VT for compares. Using the previous operand assumes that the previous
8127 // operand will never have a smaller element size than a scalar operand and
8128 // that a widening operation never uses SEW=64.
8129 // NOTE: If this fails the below assert, we can probably just find the
8130 // element count from any operand or result and use it to construct the VT.
8131 assert(II
->ScalarOperand
> 0 && "Unexpected splat operand!");
8132 MVT VT
= Op
.getOperand(SplatOp
- 1).getSimpleValueType();
8134 // The more complex case is when the scalar is larger than XLenVT.
8135 assert(XLenVT
== MVT::i32
&& OpVT
== MVT::i64
&&
8136 VT
.getVectorElementType() == MVT::i64
&& "Unexpected VTs!");
8138 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8139 // instruction to sign-extend since SEW>XLEN.
8140 if (DAG
.ComputeNumSignBits(ScalarOp
) > 32) {
8141 ScalarOp
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, ScalarOp
);
8142 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
8146 case Intrinsic::riscv_vslide1up
:
8147 case Intrinsic::riscv_vslide1down
:
8148 case Intrinsic::riscv_vslide1up_mask
:
8149 case Intrinsic::riscv_vslide1down_mask
: {
8150 // We need to special case these when the scalar is larger than XLen.
8151 unsigned NumOps
= Op
.getNumOperands();
8152 bool IsMasked
= NumOps
== 7;
8154 // Convert the vector source to the equivalent nxvXi32 vector.
8155 MVT I32VT
= MVT::getVectorVT(MVT::i32
, VT
.getVectorElementCount() * 2);
8156 SDValue Vec
= DAG
.getBitcast(I32VT
, Operands
[2]);
8157 SDValue ScalarLo
, ScalarHi
;
8158 std::tie(ScalarLo
, ScalarHi
) =
8159 DAG
.SplitScalar(ScalarOp
, DL
, MVT::i32
, MVT::i32
);
8161 // Double the VL since we halved SEW.
8162 SDValue AVL
= getVLOperand(Op
);
8165 // Optimize for constant AVL
8166 if (isa
<ConstantSDNode
>(AVL
)) {
8167 const auto [MinVLMAX
, MaxVLMAX
] =
8168 RISCVTargetLowering::computeVLMAXBounds(VT
, Subtarget
);
8170 uint64_t AVLInt
= cast
<ConstantSDNode
>(AVL
)->getZExtValue();
8171 if (AVLInt
<= MinVLMAX
) {
8172 I32VL
= DAG
.getConstant(2 * AVLInt
, DL
, XLenVT
);
8173 } else if (AVLInt
>= 2 * MaxVLMAX
) {
8174 // Just set vl to VLMAX in this situation
8175 RISCVII::VLMUL Lmul
= RISCVTargetLowering::getLMUL(I32VT
);
8176 SDValue LMUL
= DAG
.getConstant(Lmul
, DL
, XLenVT
);
8177 unsigned Sew
= RISCVVType::encodeSEW(I32VT
.getScalarSizeInBits());
8178 SDValue SEW
= DAG
.getConstant(Sew
, DL
, XLenVT
);
8179 SDValue SETVLMAX
= DAG
.getTargetConstant(
8180 Intrinsic::riscv_vsetvlimax
, DL
, MVT::i32
);
8181 I32VL
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, SETVLMAX
, SEW
,
8184 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8185 // is related to the hardware implementation.
8186 // So let the following code handle
8190 RISCVII::VLMUL Lmul
= RISCVTargetLowering::getLMUL(VT
);
8191 SDValue LMUL
= DAG
.getConstant(Lmul
, DL
, XLenVT
);
8192 unsigned Sew
= RISCVVType::encodeSEW(VT
.getScalarSizeInBits());
8193 SDValue SEW
= DAG
.getConstant(Sew
, DL
, XLenVT
);
8195 DAG
.getTargetConstant(Intrinsic::riscv_vsetvli
, DL
, MVT::i32
);
8196 // Using vsetvli instruction to get actually used length which related to
8197 // the hardware implementation
8198 SDValue VL
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, SETVL
, AVL
,
8201 DAG
.getNode(ISD::SHL
, DL
, XLenVT
, VL
, DAG
.getConstant(1, DL
, XLenVT
));
8204 SDValue I32Mask
= getAllOnesMask(I32VT
, I32VL
, DL
, DAG
);
8206 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8210 Passthru
= DAG
.getUNDEF(I32VT
);
8212 Passthru
= DAG
.getBitcast(I32VT
, Operands
[1]);
8214 if (IntNo
== Intrinsic::riscv_vslide1up
||
8215 IntNo
== Intrinsic::riscv_vslide1up_mask
) {
8216 Vec
= DAG
.getNode(RISCVISD::VSLIDE1UP_VL
, DL
, I32VT
, Passthru
, Vec
,
8217 ScalarHi
, I32Mask
, I32VL
);
8218 Vec
= DAG
.getNode(RISCVISD::VSLIDE1UP_VL
, DL
, I32VT
, Passthru
, Vec
,
8219 ScalarLo
, I32Mask
, I32VL
);
8221 Vec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32VT
, Passthru
, Vec
,
8222 ScalarLo
, I32Mask
, I32VL
);
8223 Vec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32VT
, Passthru
, Vec
,
8224 ScalarHi
, I32Mask
, I32VL
);
8227 // Convert back to nxvXi64.
8228 Vec
= DAG
.getBitcast(VT
, Vec
);
8232 // Apply mask after the operation.
8233 SDValue Mask
= Operands
[NumOps
- 3];
8234 SDValue MaskedOff
= Operands
[1];
8235 // Assume Policy operand is the last operand.
8237 cast
<ConstantSDNode
>(Operands
[NumOps
- 1])->getZExtValue();
8238 // We don't need to select maskedoff if it's undef.
8239 if (MaskedOff
.isUndef())
8242 if (Policy
== RISCVII::TAIL_AGNOSTIC
)
8243 return DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, VT
, Mask
, Vec
, MaskedOff
,
8245 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8246 // It's fine because vmerge does not care mask policy.
8247 return DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, VT
, Mask
, Vec
, MaskedOff
,
8252 // We need to convert the scalar to a splat vector.
8253 SDValue VL
= getVLOperand(Op
);
8254 assert(VL
.getValueType() == XLenVT
);
8255 ScalarOp
= splatSplitI64WithVL(DL
, VT
, SDValue(), ScalarOp
, VL
, DAG
);
8256 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
8259 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8260 // scalable vector llvm.get.vector.length for now.
8262 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
8263 // (vscale * VF). The vscale and VF are independent of element width. We use
8264 // SEW=8 for the vsetvli because it is the only element width that supports all
8265 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8266 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8267 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8268 // SEW and LMUL are better for the surrounding vector instructions.
8269 static SDValue
lowerGetVectorLength(SDNode
*N
, SelectionDAG
&DAG
,
8270 const RISCVSubtarget
&Subtarget
) {
8271 MVT XLenVT
= Subtarget
.getXLenVT();
8273 // The smallest LMUL is only valid for the smallest element width.
8274 const unsigned ElementWidth
= 8;
8276 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8277 unsigned LMul1VF
= RISCV::RVVBitsPerBlock
/ ElementWidth
;
8278 // We don't support VF==1 with ELEN==32.
8279 unsigned MinVF
= RISCV::RVVBitsPerBlock
/ Subtarget
.getELen();
8281 unsigned VF
= N
->getConstantOperandVal(2);
8282 assert(VF
>= MinVF
&& VF
<= (LMul1VF
* 8) && isPowerOf2_32(VF
) &&
8286 bool Fractional
= VF
< LMul1VF
;
8287 unsigned LMulVal
= Fractional
? LMul1VF
/ VF
: VF
/ LMul1VF
;
8288 unsigned VLMUL
= (unsigned)RISCVVType::encodeLMUL(LMulVal
, Fractional
);
8289 unsigned VSEW
= RISCVVType::encodeSEW(ElementWidth
);
8293 SDValue LMul
= DAG
.getTargetConstant(VLMUL
, DL
, XLenVT
);
8294 SDValue Sew
= DAG
.getTargetConstant(VSEW
, DL
, XLenVT
);
8296 SDValue AVL
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, XLenVT
, N
->getOperand(1));
8298 SDValue ID
= DAG
.getTargetConstant(Intrinsic::riscv_vsetvli
, DL
, XLenVT
);
8300 DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, ID
, AVL
, Sew
, LMul
);
8301 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), Res
);
8304 static void getVCIXOperands(SDValue
&Op
, SelectionDAG
&DAG
,
8305 SmallVector
<SDValue
> &Ops
) {
8308 const RISCVSubtarget
&Subtarget
=
8309 DAG
.getMachineFunction().getSubtarget
<RISCVSubtarget
>();
8310 for (const SDValue
&V
: Op
->op_values()) {
8311 EVT ValType
= V
.getValueType();
8312 if (ValType
.isScalableVector() && ValType
.isFloatingPoint()) {
8314 MVT::getVectorVT(MVT::getIntegerVT(ValType
.getScalarSizeInBits()),
8315 ValType
.getVectorElementCount());
8316 Ops
.push_back(DAG
.getBitcast(InterimIVT
, V
));
8317 } else if (ValType
.isFixedLengthVector()) {
8318 MVT OpContainerVT
= getContainerForFixedLengthVector(
8319 DAG
, V
.getSimpleValueType(), Subtarget
);
8320 Ops
.push_back(convertToScalableVector(OpContainerVT
, V
, DAG
, Subtarget
));
8326 // LMUL * VLEN should be greater than or equal to EGS * SEW
8327 static inline bool isValidEGW(int EGS
, EVT VT
,
8328 const RISCVSubtarget
&Subtarget
) {
8329 return (Subtarget
.getRealMinVLen() *
8330 VT
.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock
>=
8331 EGS
* VT
.getScalarSizeInBits();
8334 SDValue
RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
8335 SelectionDAG
&DAG
) const {
8336 unsigned IntNo
= Op
.getConstantOperandVal(0);
8338 MVT XLenVT
= Subtarget
.getXLenVT();
8342 break; // Don't custom lower most intrinsics.
8343 case Intrinsic::thread_pointer
: {
8344 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
8345 return DAG
.getRegister(RISCV::X4
, PtrVT
);
8347 case Intrinsic::riscv_orc_b
:
8348 case Intrinsic::riscv_brev8
:
8349 case Intrinsic::riscv_sha256sig0
:
8350 case Intrinsic::riscv_sha256sig1
:
8351 case Intrinsic::riscv_sha256sum0
:
8352 case Intrinsic::riscv_sha256sum1
:
8353 case Intrinsic::riscv_sm3p0
:
8354 case Intrinsic::riscv_sm3p1
: {
8357 case Intrinsic::riscv_orc_b
: Opc
= RISCVISD::ORC_B
; break;
8358 case Intrinsic::riscv_brev8
: Opc
= RISCVISD::BREV8
; break;
8359 case Intrinsic::riscv_sha256sig0
: Opc
= RISCVISD::SHA256SIG0
; break;
8360 case Intrinsic::riscv_sha256sig1
: Opc
= RISCVISD::SHA256SIG1
; break;
8361 case Intrinsic::riscv_sha256sum0
: Opc
= RISCVISD::SHA256SUM0
; break;
8362 case Intrinsic::riscv_sha256sum1
: Opc
= RISCVISD::SHA256SUM1
; break;
8363 case Intrinsic::riscv_sm3p0
: Opc
= RISCVISD::SM3P0
; break;
8364 case Intrinsic::riscv_sm3p1
: Opc
= RISCVISD::SM3P1
; break;
8367 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8369 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8370 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp
);
8371 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8374 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1));
8376 case Intrinsic::riscv_sm4ks
:
8377 case Intrinsic::riscv_sm4ed
: {
8379 IntNo
== Intrinsic::riscv_sm4ks
? RISCVISD::SM4KS
: RISCVISD::SM4ED
;
8381 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8383 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8385 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8387 DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
, Op
.getOperand(3));
8388 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8391 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1), Op
.getOperand(2),
8394 case Intrinsic::riscv_zip
:
8395 case Intrinsic::riscv_unzip
: {
8397 IntNo
== Intrinsic::riscv_zip
? RISCVISD::ZIP
: RISCVISD::UNZIP
;
8398 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1));
8400 case Intrinsic::riscv_clmul
:
8401 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8403 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8405 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8406 SDValue Res
= DAG
.getNode(RISCVISD::CLMUL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
8407 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8409 return DAG
.getNode(RISCVISD::CLMUL
, DL
, XLenVT
, Op
.getOperand(1),
8411 case Intrinsic::riscv_clmulh
:
8412 case Intrinsic::riscv_clmulr
: {
8414 IntNo
== Intrinsic::riscv_clmulh
? RISCVISD::CLMULH
: RISCVISD::CLMULR
;
8415 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8417 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8419 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8420 NewOp0
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
,
8421 DAG
.getConstant(32, DL
, MVT::i64
));
8422 NewOp1
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp1
,
8423 DAG
.getConstant(32, DL
, MVT::i64
));
8424 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
);
8425 Res
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Res
,
8426 DAG
.getConstant(32, DL
, MVT::i64
));
8427 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8430 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1), Op
.getOperand(2));
8432 case Intrinsic::experimental_get_vector_length
:
8433 return lowerGetVectorLength(Op
.getNode(), DAG
, Subtarget
);
8434 case Intrinsic::riscv_vmv_x_s
: {
8435 SDValue Res
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Op
.getOperand(1));
8436 return DAG
.getNode(ISD::TRUNCATE
, DL
, Op
.getValueType(), Res
);
8438 case Intrinsic::riscv_vfmv_f_s
:
8439 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, Op
.getValueType(),
8440 Op
.getOperand(1), DAG
.getConstant(0, DL
, XLenVT
));
8441 case Intrinsic::riscv_vmv_v_x
:
8442 return lowerScalarSplat(Op
.getOperand(1), Op
.getOperand(2),
8443 Op
.getOperand(3), Op
.getSimpleValueType(), DL
, DAG
,
8445 case Intrinsic::riscv_vfmv_v_f
:
8446 return DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, Op
.getValueType(),
8447 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
8448 case Intrinsic::riscv_vmv_s_x
: {
8449 SDValue Scalar
= Op
.getOperand(2);
8451 if (Scalar
.getValueType().bitsLE(XLenVT
)) {
8452 Scalar
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Scalar
);
8453 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, Op
.getValueType(),
8454 Op
.getOperand(1), Scalar
, Op
.getOperand(3));
8457 assert(Scalar
.getValueType() == MVT::i64
&& "Unexpected scalar VT!");
8459 // This is an i64 value that lives in two scalar registers. We have to
8460 // insert this in a convoluted way. First we build vXi64 splat containing
8461 // the two values that we assemble using some bit math. Next we'll use
8462 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8463 // to merge element 0 from our splat into the source vector.
8464 // FIXME: This is probably not the best way to do this, but it is
8465 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8472 // vmseq.vx mMask, vVid, 0
8473 // vmerge.vvm vDest, vSrc, vVal, mMask
8474 MVT VT
= Op
.getSimpleValueType();
8475 SDValue Vec
= Op
.getOperand(1);
8476 SDValue VL
= getVLOperand(Op
);
8478 SDValue SplattedVal
= splatSplitI64WithVL(DL
, VT
, SDValue(), Scalar
, VL
, DAG
);
8479 if (Op
.getOperand(1).isUndef())
8481 SDValue SplattedIdx
=
8482 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
8483 DAG
.getConstant(0, DL
, MVT::i32
), VL
);
8485 MVT MaskVT
= getMaskTypeFor(VT
);
8486 SDValue Mask
= getAllOnesMask(VT
, VL
, DL
, DAG
);
8487 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VT
, Mask
, VL
);
8488 SDValue SelectCond
=
8489 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskVT
,
8490 {VID
, SplattedIdx
, DAG
.getCondCode(ISD::SETEQ
),
8491 DAG
.getUNDEF(MaskVT
), Mask
, VL
});
8492 return DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, VT
, SelectCond
, SplattedVal
,
8495 // EGS * EEW >= 128 bits
8496 case Intrinsic::riscv_vaesdf_vv
:
8497 case Intrinsic::riscv_vaesdf_vs
:
8498 case Intrinsic::riscv_vaesdm_vv
:
8499 case Intrinsic::riscv_vaesdm_vs
:
8500 case Intrinsic::riscv_vaesef_vv
:
8501 case Intrinsic::riscv_vaesef_vs
:
8502 case Intrinsic::riscv_vaesem_vv
:
8503 case Intrinsic::riscv_vaesem_vs
:
8504 case Intrinsic::riscv_vaeskf1
:
8505 case Intrinsic::riscv_vaeskf2
:
8506 case Intrinsic::riscv_vaesz_vs
:
8507 case Intrinsic::riscv_vsm4k
:
8508 case Intrinsic::riscv_vsm4r_vv
:
8509 case Intrinsic::riscv_vsm4r_vs
: {
8510 if (!isValidEGW(4, Op
.getSimpleValueType(), Subtarget
) ||
8511 !isValidEGW(4, Op
->getOperand(1).getSimpleValueType(), Subtarget
) ||
8512 !isValidEGW(4, Op
->getOperand(2).getSimpleValueType(), Subtarget
))
8513 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8516 // EGS * EEW >= 256 bits
8517 case Intrinsic::riscv_vsm3c
:
8518 case Intrinsic::riscv_vsm3me
: {
8519 if (!isValidEGW(8, Op
.getSimpleValueType(), Subtarget
) ||
8520 !isValidEGW(8, Op
->getOperand(1).getSimpleValueType(), Subtarget
))
8521 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
8524 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
8525 case Intrinsic::riscv_vsha2ch
:
8526 case Intrinsic::riscv_vsha2cl
:
8527 case Intrinsic::riscv_vsha2ms
: {
8528 if (Op
->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
8529 !Subtarget
.hasStdExtZvknhb())
8530 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
8531 if (!isValidEGW(4, Op
.getSimpleValueType(), Subtarget
) ||
8532 !isValidEGW(4, Op
->getOperand(1).getSimpleValueType(), Subtarget
) ||
8533 !isValidEGW(4, Op
->getOperand(2).getSimpleValueType(), Subtarget
))
8534 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8537 case Intrinsic::riscv_sf_vc_v_x
:
8538 case Intrinsic::riscv_sf_vc_v_i
:
8539 case Intrinsic::riscv_sf_vc_v_xv
:
8540 case Intrinsic::riscv_sf_vc_v_iv
:
8541 case Intrinsic::riscv_sf_vc_v_vv
:
8542 case Intrinsic::riscv_sf_vc_v_fv
:
8543 case Intrinsic::riscv_sf_vc_v_xvv
:
8544 case Intrinsic::riscv_sf_vc_v_ivv
:
8545 case Intrinsic::riscv_sf_vc_v_vvv
:
8546 case Intrinsic::riscv_sf_vc_v_fvv
:
8547 case Intrinsic::riscv_sf_vc_v_xvw
:
8548 case Intrinsic::riscv_sf_vc_v_ivw
:
8549 case Intrinsic::riscv_sf_vc_v_vvw
:
8550 case Intrinsic::riscv_sf_vc_v_fvw
: {
8551 MVT VT
= Op
.getSimpleValueType();
8553 SmallVector
<SDValue
> Ops
;
8554 getVCIXOperands(Op
, DAG
, Ops
);
8557 if (VT
.isFixedLengthVector())
8558 RetVT
= getContainerForFixedLengthVector(VT
);
8559 else if (VT
.isFloatingPoint())
8560 RetVT
= MVT::getVectorVT(MVT::getIntegerVT(VT
.getScalarSizeInBits()),
8561 VT
.getVectorElementCount());
8563 SDValue NewNode
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, RetVT
, Ops
);
8565 if (VT
.isFixedLengthVector())
8566 NewNode
= convertFromScalableVector(VT
, NewNode
, DAG
, Subtarget
);
8567 else if (VT
.isFloatingPoint())
8568 NewNode
= DAG
.getBitcast(VT
, NewNode
);
8577 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
8580 SDValue
RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op
,
8581 SelectionDAG
&DAG
) const {
8582 unsigned IntNo
= Op
.getConstantOperandVal(1);
8586 case Intrinsic::riscv_masked_strided_load
: {
8588 MVT XLenVT
= Subtarget
.getXLenVT();
8590 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8591 // the selection of the masked intrinsics doesn't do this for us.
8592 SDValue Mask
= Op
.getOperand(5);
8593 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
8595 MVT VT
= Op
->getSimpleValueType(0);
8596 MVT ContainerVT
= VT
;
8597 if (VT
.isFixedLengthVector())
8598 ContainerVT
= getContainerForFixedLengthVector(VT
);
8600 SDValue PassThru
= Op
.getOperand(2);
8602 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
8603 if (VT
.isFixedLengthVector()) {
8604 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
8605 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
8609 auto *Load
= cast
<MemIntrinsicSDNode
>(Op
);
8610 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
8611 SDValue Ptr
= Op
.getOperand(3);
8612 SDValue Stride
= Op
.getOperand(4);
8613 SDValue Result
, Chain
;
8615 // TODO: We restrict this to unmasked loads currently in consideration of
8616 // the complexity of hanlding all falses masks.
8617 if (IsUnmasked
&& isNullConstant(Stride
)) {
8618 MVT ScalarVT
= ContainerVT
.getVectorElementType();
8619 SDValue ScalarLoad
=
8620 DAG
.getExtLoad(ISD::ZEXTLOAD
, DL
, XLenVT
, Load
->getChain(), Ptr
,
8621 ScalarVT
, Load
->getMemOperand());
8622 Chain
= ScalarLoad
.getValue(1);
8623 Result
= lowerScalarSplat(SDValue(), ScalarLoad
, VL
, ContainerVT
, DL
, DAG
,
8626 SDValue IntID
= DAG
.getTargetConstant(
8627 IsUnmasked
? Intrinsic::riscv_vlse
: Intrinsic::riscv_vlse_mask
, DL
,
8630 SmallVector
<SDValue
, 8> Ops
{Load
->getChain(), IntID
};
8632 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
8634 Ops
.push_back(PassThru
);
8636 Ops
.push_back(Stride
);
8638 Ops
.push_back(Mask
);
8642 DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
8643 Ops
.push_back(Policy
);
8646 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
8648 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
8649 Load
->getMemoryVT(), Load
->getMemOperand());
8650 Chain
= Result
.getValue(1);
8652 if (VT
.isFixedLengthVector())
8653 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
8654 return DAG
.getMergeValues({Result
, Chain
}, DL
);
8656 case Intrinsic::riscv_seg2_load
:
8657 case Intrinsic::riscv_seg3_load
:
8658 case Intrinsic::riscv_seg4_load
:
8659 case Intrinsic::riscv_seg5_load
:
8660 case Intrinsic::riscv_seg6_load
:
8661 case Intrinsic::riscv_seg7_load
:
8662 case Intrinsic::riscv_seg8_load
: {
8664 static const Intrinsic::ID VlsegInts
[7] = {
8665 Intrinsic::riscv_vlseg2
, Intrinsic::riscv_vlseg3
,
8666 Intrinsic::riscv_vlseg4
, Intrinsic::riscv_vlseg5
,
8667 Intrinsic::riscv_vlseg6
, Intrinsic::riscv_vlseg7
,
8668 Intrinsic::riscv_vlseg8
};
8669 unsigned NF
= Op
->getNumValues() - 1;
8670 assert(NF
>= 2 && NF
<= 8 && "Unexpected seg number");
8671 MVT XLenVT
= Subtarget
.getXLenVT();
8672 MVT VT
= Op
->getSimpleValueType(0);
8673 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
8675 SDValue VL
= getVLOp(VT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
8677 SDValue IntID
= DAG
.getTargetConstant(VlsegInts
[NF
- 2], DL
, XLenVT
);
8678 auto *Load
= cast
<MemIntrinsicSDNode
>(Op
);
8679 SmallVector
<EVT
, 9> ContainerVTs(NF
, ContainerVT
);
8680 ContainerVTs
.push_back(MVT::Other
);
8681 SDVTList VTs
= DAG
.getVTList(ContainerVTs
);
8682 SmallVector
<SDValue
, 12> Ops
= {Load
->getChain(), IntID
};
8683 Ops
.insert(Ops
.end(), NF
, DAG
.getUNDEF(ContainerVT
));
8684 Ops
.push_back(Op
.getOperand(2));
8687 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
8688 Load
->getMemoryVT(), Load
->getMemOperand());
8689 SmallVector
<SDValue
, 9> Results
;
8690 for (unsigned int RetIdx
= 0; RetIdx
< NF
; RetIdx
++)
8691 Results
.push_back(convertFromScalableVector(VT
, Result
.getValue(RetIdx
),
8693 Results
.push_back(Result
.getValue(NF
));
8694 return DAG
.getMergeValues(Results
, DL
);
8696 case Intrinsic::riscv_sf_vc_v_x_se
:
8697 case Intrinsic::riscv_sf_vc_v_i_se
:
8698 case Intrinsic::riscv_sf_vc_v_xv_se
:
8699 case Intrinsic::riscv_sf_vc_v_iv_se
:
8700 case Intrinsic::riscv_sf_vc_v_vv_se
:
8701 case Intrinsic::riscv_sf_vc_v_fv_se
:
8702 case Intrinsic::riscv_sf_vc_v_xvv_se
:
8703 case Intrinsic::riscv_sf_vc_v_ivv_se
:
8704 case Intrinsic::riscv_sf_vc_v_vvv_se
:
8705 case Intrinsic::riscv_sf_vc_v_fvv_se
:
8706 case Intrinsic::riscv_sf_vc_v_xvw_se
:
8707 case Intrinsic::riscv_sf_vc_v_ivw_se
:
8708 case Intrinsic::riscv_sf_vc_v_vvw_se
:
8709 case Intrinsic::riscv_sf_vc_v_fvw_se
: {
8710 MVT VT
= Op
.getSimpleValueType();
8712 SmallVector
<SDValue
> Ops
;
8713 getVCIXOperands(Op
, DAG
, Ops
);
8716 if (VT
.isFixedLengthVector())
8717 RetVT
= getContainerForFixedLengthVector(VT
);
8718 else if (VT
.isFloatingPoint())
8719 RetVT
= MVT::getVectorVT(MVT::getIntegerVT(RetVT
.getScalarSizeInBits()),
8720 RetVT
.getVectorElementCount());
8722 SDVTList VTs
= DAG
.getVTList({RetVT
, MVT::Other
});
8723 SDValue NewNode
= DAG
.getNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
);
8725 if (VT
.isFixedLengthVector()) {
8726 SDValue FixedVector
=
8727 convertFromScalableVector(VT
, NewNode
, DAG
, Subtarget
);
8728 NewNode
= DAG
.getMergeValues({FixedVector
, NewNode
.getValue(1)}, DL
);
8729 } else if (VT
.isFloatingPoint()) {
8730 SDValue BitCast
= DAG
.getBitcast(VT
, NewNode
.getValue(0));
8731 NewNode
= DAG
.getMergeValues({BitCast
, NewNode
.getValue(1)}, DL
);
8741 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
8744 SDValue
RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
8745 SelectionDAG
&DAG
) const {
8746 unsigned IntNo
= Op
.getConstantOperandVal(1);
8750 case Intrinsic::riscv_masked_strided_store
: {
8752 MVT XLenVT
= Subtarget
.getXLenVT();
8754 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8755 // the selection of the masked intrinsics doesn't do this for us.
8756 SDValue Mask
= Op
.getOperand(5);
8757 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
8759 SDValue Val
= Op
.getOperand(2);
8760 MVT VT
= Val
.getSimpleValueType();
8761 MVT ContainerVT
= VT
;
8762 if (VT
.isFixedLengthVector()) {
8763 ContainerVT
= getContainerForFixedLengthVector(VT
);
8764 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
8767 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
8768 if (VT
.isFixedLengthVector())
8769 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
8772 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
8774 SDValue IntID
= DAG
.getTargetConstant(
8775 IsUnmasked
? Intrinsic::riscv_vsse
: Intrinsic::riscv_vsse_mask
, DL
,
8778 auto *Store
= cast
<MemIntrinsicSDNode
>(Op
);
8779 SmallVector
<SDValue
, 8> Ops
{Store
->getChain(), IntID
};
8781 Ops
.push_back(Op
.getOperand(3)); // Ptr
8782 Ops
.push_back(Op
.getOperand(4)); // Stride
8784 Ops
.push_back(Mask
);
8787 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
, Store
->getVTList(),
8788 Ops
, Store
->getMemoryVT(),
8789 Store
->getMemOperand());
8791 case Intrinsic::riscv_seg2_store
:
8792 case Intrinsic::riscv_seg3_store
:
8793 case Intrinsic::riscv_seg4_store
:
8794 case Intrinsic::riscv_seg5_store
:
8795 case Intrinsic::riscv_seg6_store
:
8796 case Intrinsic::riscv_seg7_store
:
8797 case Intrinsic::riscv_seg8_store
: {
8799 static const Intrinsic::ID VssegInts
[] = {
8800 Intrinsic::riscv_vsseg2
, Intrinsic::riscv_vsseg3
,
8801 Intrinsic::riscv_vsseg4
, Intrinsic::riscv_vsseg5
,
8802 Intrinsic::riscv_vsseg6
, Intrinsic::riscv_vsseg7
,
8803 Intrinsic::riscv_vsseg8
};
8804 // Operands are (chain, int_id, vec*, ptr, vl)
8805 unsigned NF
= Op
->getNumOperands() - 4;
8806 assert(NF
>= 2 && NF
<= 8 && "Unexpected seg number");
8807 MVT XLenVT
= Subtarget
.getXLenVT();
8808 MVT VT
= Op
->getOperand(2).getSimpleValueType();
8809 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
8811 SDValue VL
= getVLOp(VT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
8813 SDValue IntID
= DAG
.getTargetConstant(VssegInts
[NF
- 2], DL
, XLenVT
);
8814 SDValue Ptr
= Op
->getOperand(NF
+ 2);
8816 auto *FixedIntrinsic
= cast
<MemIntrinsicSDNode
>(Op
);
8817 SmallVector
<SDValue
, 12> Ops
= {FixedIntrinsic
->getChain(), IntID
};
8818 for (unsigned i
= 0; i
< NF
; i
++)
8819 Ops
.push_back(convertToScalableVector(
8820 ContainerVT
, FixedIntrinsic
->getOperand(2 + i
), DAG
, Subtarget
));
8821 Ops
.append({Ptr
, VL
});
8823 return DAG
.getMemIntrinsicNode(
8824 ISD::INTRINSIC_VOID
, DL
, DAG
.getVTList(MVT::Other
), Ops
,
8825 FixedIntrinsic
->getMemoryVT(), FixedIntrinsic
->getMemOperand());
8827 case Intrinsic::riscv_sf_vc_x_se_e8mf8
:
8828 case Intrinsic::riscv_sf_vc_x_se_e8mf4
:
8829 case Intrinsic::riscv_sf_vc_x_se_e8mf2
:
8830 case Intrinsic::riscv_sf_vc_x_se_e8m1
:
8831 case Intrinsic::riscv_sf_vc_x_se_e8m2
:
8832 case Intrinsic::riscv_sf_vc_x_se_e8m4
:
8833 case Intrinsic::riscv_sf_vc_x_se_e8m8
:
8834 case Intrinsic::riscv_sf_vc_x_se_e16mf4
:
8835 case Intrinsic::riscv_sf_vc_x_se_e16mf2
:
8836 case Intrinsic::riscv_sf_vc_x_se_e16m1
:
8837 case Intrinsic::riscv_sf_vc_x_se_e16m2
:
8838 case Intrinsic::riscv_sf_vc_x_se_e16m4
:
8839 case Intrinsic::riscv_sf_vc_x_se_e16m8
:
8840 case Intrinsic::riscv_sf_vc_x_se_e32mf2
:
8841 case Intrinsic::riscv_sf_vc_x_se_e32m1
:
8842 case Intrinsic::riscv_sf_vc_x_se_e32m2
:
8843 case Intrinsic::riscv_sf_vc_x_se_e32m4
:
8844 case Intrinsic::riscv_sf_vc_x_se_e32m8
:
8845 case Intrinsic::riscv_sf_vc_x_se_e64m1
:
8846 case Intrinsic::riscv_sf_vc_x_se_e64m2
:
8847 case Intrinsic::riscv_sf_vc_x_se_e64m4
:
8848 case Intrinsic::riscv_sf_vc_x_se_e64m8
:
8849 case Intrinsic::riscv_sf_vc_i_se_e8mf8
:
8850 case Intrinsic::riscv_sf_vc_i_se_e8mf4
:
8851 case Intrinsic::riscv_sf_vc_i_se_e8mf2
:
8852 case Intrinsic::riscv_sf_vc_i_se_e8m1
:
8853 case Intrinsic::riscv_sf_vc_i_se_e8m2
:
8854 case Intrinsic::riscv_sf_vc_i_se_e8m4
:
8855 case Intrinsic::riscv_sf_vc_i_se_e8m8
:
8856 case Intrinsic::riscv_sf_vc_i_se_e16mf4
:
8857 case Intrinsic::riscv_sf_vc_i_se_e16mf2
:
8858 case Intrinsic::riscv_sf_vc_i_se_e16m1
:
8859 case Intrinsic::riscv_sf_vc_i_se_e16m2
:
8860 case Intrinsic::riscv_sf_vc_i_se_e16m4
:
8861 case Intrinsic::riscv_sf_vc_i_se_e16m8
:
8862 case Intrinsic::riscv_sf_vc_i_se_e32mf2
:
8863 case Intrinsic::riscv_sf_vc_i_se_e32m1
:
8864 case Intrinsic::riscv_sf_vc_i_se_e32m2
:
8865 case Intrinsic::riscv_sf_vc_i_se_e32m4
:
8866 case Intrinsic::riscv_sf_vc_i_se_e32m8
:
8867 case Intrinsic::riscv_sf_vc_i_se_e64m1
:
8868 case Intrinsic::riscv_sf_vc_i_se_e64m2
:
8869 case Intrinsic::riscv_sf_vc_i_se_e64m4
:
8870 case Intrinsic::riscv_sf_vc_i_se_e64m8
:
8871 case Intrinsic::riscv_sf_vc_xv_se
:
8872 case Intrinsic::riscv_sf_vc_iv_se
:
8873 case Intrinsic::riscv_sf_vc_vv_se
:
8874 case Intrinsic::riscv_sf_vc_fv_se
:
8875 case Intrinsic::riscv_sf_vc_xvv_se
:
8876 case Intrinsic::riscv_sf_vc_ivv_se
:
8877 case Intrinsic::riscv_sf_vc_vvv_se
:
8878 case Intrinsic::riscv_sf_vc_fvv_se
:
8879 case Intrinsic::riscv_sf_vc_xvw_se
:
8880 case Intrinsic::riscv_sf_vc_ivw_se
:
8881 case Intrinsic::riscv_sf_vc_vvw_se
:
8882 case Intrinsic::riscv_sf_vc_fvw_se
: {
8883 SmallVector
<SDValue
> Ops
;
8884 getVCIXOperands(Op
, DAG
, Ops
);
8887 DAG
.getNode(ISD::INTRINSIC_VOID
, SDLoc(Op
), Op
->getVTList(), Ops
);
8896 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
8899 static unsigned getRVVReductionOp(unsigned ISDOpcode
) {
8900 switch (ISDOpcode
) {
8902 llvm_unreachable("Unhandled reduction");
8903 case ISD::VP_REDUCE_ADD
:
8904 case ISD::VECREDUCE_ADD
:
8905 return RISCVISD::VECREDUCE_ADD_VL
;
8906 case ISD::VP_REDUCE_UMAX
:
8907 case ISD::VECREDUCE_UMAX
:
8908 return RISCVISD::VECREDUCE_UMAX_VL
;
8909 case ISD::VP_REDUCE_SMAX
:
8910 case ISD::VECREDUCE_SMAX
:
8911 return RISCVISD::VECREDUCE_SMAX_VL
;
8912 case ISD::VP_REDUCE_UMIN
:
8913 case ISD::VECREDUCE_UMIN
:
8914 return RISCVISD::VECREDUCE_UMIN_VL
;
8915 case ISD::VP_REDUCE_SMIN
:
8916 case ISD::VECREDUCE_SMIN
:
8917 return RISCVISD::VECREDUCE_SMIN_VL
;
8918 case ISD::VP_REDUCE_AND
:
8919 case ISD::VECREDUCE_AND
:
8920 return RISCVISD::VECREDUCE_AND_VL
;
8921 case ISD::VP_REDUCE_OR
:
8922 case ISD::VECREDUCE_OR
:
8923 return RISCVISD::VECREDUCE_OR_VL
;
8924 case ISD::VP_REDUCE_XOR
:
8925 case ISD::VECREDUCE_XOR
:
8926 return RISCVISD::VECREDUCE_XOR_VL
;
8927 case ISD::VP_REDUCE_FADD
:
8928 return RISCVISD::VECREDUCE_FADD_VL
;
8929 case ISD::VP_REDUCE_SEQ_FADD
:
8930 return RISCVISD::VECREDUCE_SEQ_FADD_VL
;
8931 case ISD::VP_REDUCE_FMAX
:
8932 return RISCVISD::VECREDUCE_FMAX_VL
;
8933 case ISD::VP_REDUCE_FMIN
:
8934 return RISCVISD::VECREDUCE_FMIN_VL
;
8939 SDValue
RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op
,
8943 SDValue Vec
= Op
.getOperand(IsVP
? 1 : 0);
8944 MVT VecVT
= Vec
.getSimpleValueType();
8945 assert((Op
.getOpcode() == ISD::VECREDUCE_AND
||
8946 Op
.getOpcode() == ISD::VECREDUCE_OR
||
8947 Op
.getOpcode() == ISD::VECREDUCE_XOR
||
8948 Op
.getOpcode() == ISD::VP_REDUCE_AND
||
8949 Op
.getOpcode() == ISD::VP_REDUCE_OR
||
8950 Op
.getOpcode() == ISD::VP_REDUCE_XOR
) &&
8951 "Unexpected reduction lowering");
8953 MVT XLenVT
= Subtarget
.getXLenVT();
8955 MVT ContainerVT
= VecVT
;
8956 if (VecVT
.isFixedLengthVector()) {
8957 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
8958 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
8963 Mask
= Op
.getOperand(2);
8964 VL
= Op
.getOperand(3);
8966 std::tie(Mask
, VL
) =
8967 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
8972 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
8974 switch (Op
.getOpcode()) {
8976 llvm_unreachable("Unhandled reduction");
8977 case ISD::VECREDUCE_AND
:
8978 case ISD::VP_REDUCE_AND
: {
8980 SDValue TrueMask
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
8981 Vec
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Vec
, TrueMask
, VL
);
8982 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
8987 case ISD::VECREDUCE_OR
:
8988 case ISD::VP_REDUCE_OR
:
8990 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
8994 case ISD::VECREDUCE_XOR
:
8995 case ISD::VP_REDUCE_XOR
: {
8996 // ((vcpop x) & 1) != 0
8997 SDValue One
= DAG
.getConstant(1, DL
, XLenVT
);
8998 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
8999 Vec
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Vec
, One
);
9006 SDValue SetCC
= DAG
.getSetCC(DL
, XLenVT
, Vec
, Zero
, CC
);
9007 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, Op
.getValueType(), SetCC
);
9012 // Now include the start value in the operation.
9013 // Note that we must return the start value when no elements are operated
9014 // upon. The vcpop instructions we've emitted in each case above will return
9015 // 0 for an inactive vector, and so we've already received the neutral value:
9016 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9017 // can simply include the start value.
9018 return DAG
.getNode(BaseOpc
, DL
, Op
.getValueType(), SetCC
, Op
.getOperand(0));
9021 static bool isNonZeroAVL(SDValue AVL
) {
9022 auto *RegisterAVL
= dyn_cast
<RegisterSDNode
>(AVL
);
9023 auto *ImmAVL
= dyn_cast
<ConstantSDNode
>(AVL
);
9024 return (RegisterAVL
&& RegisterAVL
->getReg() == RISCV::X0
) ||
9025 (ImmAVL
&& ImmAVL
->getZExtValue() >= 1);
9028 /// Helper to lower a reduction sequence of the form:
9029 /// scalar = reduce_op vec, scalar_start
9030 static SDValue
lowerReductionSeq(unsigned RVVOpcode
, MVT ResVT
,
9031 SDValue StartValue
, SDValue Vec
, SDValue Mask
,
9032 SDValue VL
, const SDLoc
&DL
, SelectionDAG
&DAG
,
9033 const RISCVSubtarget
&Subtarget
) {
9034 const MVT VecVT
= Vec
.getSimpleValueType();
9035 const MVT M1VT
= getLMUL1VT(VecVT
);
9036 const MVT XLenVT
= Subtarget
.getXLenVT();
9037 const bool NonZeroAVL
= isNonZeroAVL(VL
);
9039 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9040 // or the original VT if fractional.
9041 auto InnerVT
= VecVT
.bitsLE(M1VT
) ? VecVT
: M1VT
;
9042 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9043 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9044 // be the result of the reduction operation.
9045 auto InnerVL
= NonZeroAVL
? VL
: DAG
.getConstant(1, DL
, XLenVT
);
9046 SDValue InitialValue
= lowerScalarInsert(StartValue
, InnerVL
, InnerVT
, DL
,
9048 if (M1VT
!= InnerVT
)
9049 InitialValue
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, M1VT
,
9051 InitialValue
, DAG
.getConstant(0, DL
, XLenVT
));
9052 SDValue PassThru
= NonZeroAVL
? DAG
.getUNDEF(M1VT
) : InitialValue
;
9053 SDValue Policy
= DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
9054 SDValue Ops
[] = {PassThru
, Vec
, InitialValue
, Mask
, VL
, Policy
};
9055 SDValue Reduction
= DAG
.getNode(RVVOpcode
, DL
, M1VT
, Ops
);
9056 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ResVT
, Reduction
,
9057 DAG
.getConstant(0, DL
, XLenVT
));
9060 SDValue
RISCVTargetLowering::lowerVECREDUCE(SDValue Op
,
9061 SelectionDAG
&DAG
) const {
9063 SDValue Vec
= Op
.getOperand(0);
9064 EVT VecEVT
= Vec
.getValueType();
9066 unsigned BaseOpc
= ISD::getVecReduceBaseOpcode(Op
.getOpcode());
9068 // Due to ordering in legalize types we may have a vector type that needs to
9069 // be split. Do that manually so we can get down to a legal type.
9070 while (getTypeAction(*DAG
.getContext(), VecEVT
) ==
9071 TargetLowering::TypeSplitVector
) {
9072 auto [Lo
, Hi
] = DAG
.SplitVector(Vec
, DL
);
9073 VecEVT
= Lo
.getValueType();
9074 Vec
= DAG
.getNode(BaseOpc
, DL
, VecEVT
, Lo
, Hi
);
9077 // TODO: The type may need to be widened rather than split. Or widened before
9079 if (!isTypeLegal(VecEVT
))
9082 MVT VecVT
= VecEVT
.getSimpleVT();
9083 MVT VecEltVT
= VecVT
.getVectorElementType();
9084 unsigned RVVOpcode
= getRVVReductionOp(Op
.getOpcode());
9086 MVT ContainerVT
= VecVT
;
9087 if (VecVT
.isFixedLengthVector()) {
9088 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9089 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9092 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
9094 SDValue StartV
= DAG
.getNeutralElement(BaseOpc
, DL
, VecEltVT
, SDNodeFlags());
9102 MVT XLenVT
= Subtarget
.getXLenVT();
9103 StartV
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VecEltVT
, Vec
,
9104 DAG
.getConstant(0, DL
, XLenVT
));
9106 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), StartV
, Vec
,
9107 Mask
, VL
, DL
, DAG
, Subtarget
);
9110 // Given a reduction op, this function returns the matching reduction opcode,
9111 // the vector SDValue and the scalar SDValue required to lower this to a
9113 static std::tuple
<unsigned, SDValue
, SDValue
>
9114 getRVVFPReductionOpAndOperands(SDValue Op
, SelectionDAG
&DAG
, EVT EltVT
,
9115 const RISCVSubtarget
&Subtarget
) {
9117 auto Flags
= Op
->getFlags();
9118 unsigned Opcode
= Op
.getOpcode();
9121 llvm_unreachable("Unhandled reduction");
9122 case ISD::VECREDUCE_FADD
: {
9123 // Use positive zero if we can. It is cheaper to materialize.
9125 DAG
.getConstantFP(Flags
.hasNoSignedZeros() ? 0.0 : -0.0, DL
, EltVT
);
9126 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL
, Op
.getOperand(0), Zero
);
9128 case ISD::VECREDUCE_SEQ_FADD
:
9129 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL
, Op
.getOperand(1),
9131 case ISD::VECREDUCE_FMIN
:
9132 case ISD::VECREDUCE_FMAX
: {
9133 MVT XLenVT
= Subtarget
.getXLenVT();
9135 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Op
.getOperand(0),
9136 DAG
.getConstant(0, DL
, XLenVT
));
9137 unsigned RVVOpc
= (Opcode
== ISD::VECREDUCE_FMIN
)
9138 ? RISCVISD::VECREDUCE_FMIN_VL
9139 : RISCVISD::VECREDUCE_FMAX_VL
;
9140 return std::make_tuple(RVVOpc
, Op
.getOperand(0), Front
);
9145 SDValue
RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op
,
9146 SelectionDAG
&DAG
) const {
9148 MVT VecEltVT
= Op
.getSimpleValueType();
9151 SDValue VectorVal
, ScalarVal
;
9152 std::tie(RVVOpcode
, VectorVal
, ScalarVal
) =
9153 getRVVFPReductionOpAndOperands(Op
, DAG
, VecEltVT
, Subtarget
);
9154 MVT VecVT
= VectorVal
.getSimpleValueType();
9156 MVT ContainerVT
= VecVT
;
9157 if (VecVT
.isFixedLengthVector()) {
9158 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9159 VectorVal
= convertToScalableVector(ContainerVT
, VectorVal
, DAG
, Subtarget
);
9162 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
9163 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), ScalarVal
,
9164 VectorVal
, Mask
, VL
, DL
, DAG
, Subtarget
);
9167 SDValue
RISCVTargetLowering::lowerVPREDUCE(SDValue Op
,
9168 SelectionDAG
&DAG
) const {
9170 SDValue Vec
= Op
.getOperand(1);
9171 EVT VecEVT
= Vec
.getValueType();
9173 // TODO: The type may need to be widened rather than split. Or widened before
9175 if (!isTypeLegal(VecEVT
))
9178 MVT VecVT
= VecEVT
.getSimpleVT();
9179 unsigned RVVOpcode
= getRVVReductionOp(Op
.getOpcode());
9181 if (VecVT
.isFixedLengthVector()) {
9182 auto ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9183 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9186 SDValue VL
= Op
.getOperand(3);
9187 SDValue Mask
= Op
.getOperand(2);
9188 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), Op
.getOperand(0),
9189 Vec
, Mask
, VL
, DL
, DAG
, Subtarget
);
9192 SDValue
RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op
,
9193 SelectionDAG
&DAG
) const {
9194 SDValue Vec
= Op
.getOperand(0);
9195 SDValue SubVec
= Op
.getOperand(1);
9196 MVT VecVT
= Vec
.getSimpleValueType();
9197 MVT SubVecVT
= SubVec
.getSimpleValueType();
9200 MVT XLenVT
= Subtarget
.getXLenVT();
9201 unsigned OrigIdx
= Op
.getConstantOperandVal(2);
9202 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
9204 // We don't have the ability to slide mask vectors up indexed by their i1
9205 // elements; the smallest we can do is i8. Often we are able to bitcast to
9206 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9207 // into a scalable one, we might not necessarily have enough scalable
9208 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9209 if (SubVecVT
.getVectorElementType() == MVT::i1
&&
9210 (OrigIdx
!= 0 || !Vec
.isUndef())) {
9211 if (VecVT
.getVectorMinNumElements() >= 8 &&
9212 SubVecVT
.getVectorMinNumElements() >= 8) {
9213 assert(OrigIdx
% 8 == 0 && "Invalid index");
9214 assert(VecVT
.getVectorMinNumElements() % 8 == 0 &&
9215 SubVecVT
.getVectorMinNumElements() % 8 == 0 &&
9216 "Unexpected mask vector lowering");
9219 MVT::getVectorVT(MVT::i8
, SubVecVT
.getVectorMinNumElements() / 8,
9220 SubVecVT
.isScalableVector());
9221 VecVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorMinNumElements() / 8,
9222 VecVT
.isScalableVector());
9223 Vec
= DAG
.getBitcast(VecVT
, Vec
);
9224 SubVec
= DAG
.getBitcast(SubVecVT
, SubVec
);
9226 // We can't slide this mask vector up indexed by its i1 elements.
9227 // This poses a problem when we wish to insert a scalable vector which
9228 // can't be re-expressed as a larger type. Just choose the slow path and
9229 // extend to a larger type, then truncate back down.
9230 MVT ExtVecVT
= VecVT
.changeVectorElementType(MVT::i8
);
9231 MVT ExtSubVecVT
= SubVecVT
.changeVectorElementType(MVT::i8
);
9232 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtVecVT
, Vec
);
9233 SubVec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtSubVecVT
, SubVec
);
9234 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ExtVecVT
, Vec
, SubVec
,
9236 SDValue SplatZero
= DAG
.getConstant(0, DL
, ExtVecVT
);
9237 return DAG
.getSetCC(DL
, VecVT
, Vec
, SplatZero
, ISD::SETNE
);
9241 // If the subvector vector is a fixed-length type, we cannot use subregister
9242 // manipulation to simplify the codegen; we don't know which register of a
9243 // LMUL group contains the specific subvector as we only know the minimum
9244 // register size. Therefore we must slide the vector group up the full
9246 if (SubVecVT
.isFixedLengthVector()) {
9247 if (OrigIdx
== 0 && Vec
.isUndef() && !VecVT
.isFixedLengthVector())
9249 MVT ContainerVT
= VecVT
;
9250 if (VecVT
.isFixedLengthVector()) {
9251 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9252 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9255 if (OrigIdx
== 0 && Vec
.isUndef() && VecVT
.isFixedLengthVector()) {
9256 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
,
9257 DAG
.getUNDEF(ContainerVT
), SubVec
,
9258 DAG
.getConstant(0, DL
, XLenVT
));
9259 SubVec
= convertFromScalableVector(VecVT
, SubVec
, DAG
, Subtarget
);
9260 return DAG
.getBitcast(Op
.getValueType(), SubVec
);
9263 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
,
9264 DAG
.getUNDEF(ContainerVT
), SubVec
,
9265 DAG
.getConstant(0, DL
, XLenVT
));
9267 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
9268 // Set the vector length to only the number of elements we care about. Note
9269 // that for slideup this includes the offset.
9270 unsigned EndIndex
= OrigIdx
+ SubVecVT
.getVectorNumElements();
9271 SDValue VL
= getVLOp(EndIndex
, ContainerVT
, DL
, DAG
, Subtarget
);
9273 // Use tail agnostic policy if we're inserting over Vec's tail.
9274 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
;
9275 if (VecVT
.isFixedLengthVector() && EndIndex
== VecVT
.getVectorNumElements())
9276 Policy
= RISCVII::TAIL_AGNOSTIC
;
9278 // If we're inserting into the lowest elements, use a tail undisturbed
9282 DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, ContainerVT
, Vec
, SubVec
, VL
);
9284 SDValue SlideupAmt
= DAG
.getConstant(OrigIdx
, DL
, XLenVT
);
9285 SubVec
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Vec
, SubVec
,
9286 SlideupAmt
, Mask
, VL
, Policy
);
9289 if (VecVT
.isFixedLengthVector())
9290 SubVec
= convertFromScalableVector(VecVT
, SubVec
, DAG
, Subtarget
);
9291 return DAG
.getBitcast(Op
.getValueType(), SubVec
);
9294 unsigned SubRegIdx
, RemIdx
;
9295 std::tie(SubRegIdx
, RemIdx
) =
9296 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9297 VecVT
, SubVecVT
, OrigIdx
, TRI
);
9299 RISCVII::VLMUL SubVecLMUL
= RISCVTargetLowering::getLMUL(SubVecVT
);
9300 bool IsSubVecPartReg
= SubVecLMUL
== RISCVII::VLMUL::LMUL_F2
||
9301 SubVecLMUL
== RISCVII::VLMUL::LMUL_F4
||
9302 SubVecLMUL
== RISCVII::VLMUL::LMUL_F8
;
9304 // 1. If the Idx has been completely eliminated and this subvector's size is
9305 // a vector register or a multiple thereof, or the surrounding elements are
9306 // undef, then this is a subvector insert which naturally aligns to a vector
9307 // register. These can easily be handled using subregister manipulation.
9308 // 2. If the subvector is smaller than a vector register, then the insertion
9309 // must preserve the undisturbed elements of the register. We do this by
9310 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9311 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9312 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9313 // LMUL=1 type back into the larger vector (resolving to another subregister
9314 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9315 // to avoid allocating a large register group to hold our subvector.
9316 if (RemIdx
== 0 && (!IsSubVecPartReg
|| Vec
.isUndef()))
9319 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9320 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9321 // (in our case undisturbed). This means we can set up a subvector insertion
9322 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9323 // size of the subvector.
9324 MVT InterSubVT
= VecVT
;
9325 SDValue AlignedExtract
= Vec
;
9326 unsigned AlignedIdx
= OrigIdx
- RemIdx
;
9327 if (VecVT
.bitsGT(getLMUL1VT(VecVT
))) {
9328 InterSubVT
= getLMUL1VT(VecVT
);
9329 // Extract a subvector equal to the nearest full vector register type. This
9330 // should resolve to a EXTRACT_SUBREG instruction.
9331 AlignedExtract
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, InterSubVT
, Vec
,
9332 DAG
.getConstant(AlignedIdx
, DL
, XLenVT
));
9335 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, InterSubVT
,
9336 DAG
.getUNDEF(InterSubVT
), SubVec
,
9337 DAG
.getConstant(0, DL
, XLenVT
));
9339 auto [Mask
, VL
] = getDefaultScalableVLOps(VecVT
, DL
, DAG
, Subtarget
);
9341 VL
= computeVLMax(SubVecVT
, DL
, DAG
);
9343 // If we're inserting into the lowest elements, use a tail undisturbed
9346 SubVec
= DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, InterSubVT
, AlignedExtract
,
9349 SDValue SlideupAmt
=
9350 DAG
.getVScale(DL
, XLenVT
, APInt(XLenVT
.getSizeInBits(), RemIdx
));
9352 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9353 VL
= DAG
.getNode(ISD::ADD
, DL
, XLenVT
, SlideupAmt
, VL
);
9355 SubVec
= getVSlideup(DAG
, Subtarget
, DL
, InterSubVT
, AlignedExtract
, SubVec
,
9356 SlideupAmt
, Mask
, VL
);
9359 // If required, insert this subvector back into the correct vector register.
9360 // This should resolve to an INSERT_SUBREG instruction.
9361 if (VecVT
.bitsGT(InterSubVT
))
9362 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VecVT
, Vec
, SubVec
,
9363 DAG
.getConstant(AlignedIdx
, DL
, XLenVT
));
9365 // We might have bitcast from a mask type: cast back to the original type if
9367 return DAG
.getBitcast(Op
.getSimpleValueType(), SubVec
);
9370 SDValue
RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op
,
9371 SelectionDAG
&DAG
) const {
9372 SDValue Vec
= Op
.getOperand(0);
9373 MVT SubVecVT
= Op
.getSimpleValueType();
9374 MVT VecVT
= Vec
.getSimpleValueType();
9377 MVT XLenVT
= Subtarget
.getXLenVT();
9378 unsigned OrigIdx
= Op
.getConstantOperandVal(1);
9379 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
9381 // We don't have the ability to slide mask vectors down indexed by their i1
9382 // elements; the smallest we can do is i8. Often we are able to bitcast to
9383 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9384 // from a scalable one, we might not necessarily have enough scalable
9385 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9386 if (SubVecVT
.getVectorElementType() == MVT::i1
&& OrigIdx
!= 0) {
9387 if (VecVT
.getVectorMinNumElements() >= 8 &&
9388 SubVecVT
.getVectorMinNumElements() >= 8) {
9389 assert(OrigIdx
% 8 == 0 && "Invalid index");
9390 assert(VecVT
.getVectorMinNumElements() % 8 == 0 &&
9391 SubVecVT
.getVectorMinNumElements() % 8 == 0 &&
9392 "Unexpected mask vector lowering");
9395 MVT::getVectorVT(MVT::i8
, SubVecVT
.getVectorMinNumElements() / 8,
9396 SubVecVT
.isScalableVector());
9397 VecVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorMinNumElements() / 8,
9398 VecVT
.isScalableVector());
9399 Vec
= DAG
.getBitcast(VecVT
, Vec
);
9401 // We can't slide this mask vector down, indexed by its i1 elements.
9402 // This poses a problem when we wish to extract a scalable vector which
9403 // can't be re-expressed as a larger type. Just choose the slow path and
9404 // extend to a larger type, then truncate back down.
9405 // TODO: We could probably improve this when extracting certain fixed
9406 // from fixed, where we can extract as i8 and shift the correct element
9407 // right to reach the desired subvector?
9408 MVT ExtVecVT
= VecVT
.changeVectorElementType(MVT::i8
);
9409 MVT ExtSubVecVT
= SubVecVT
.changeVectorElementType(MVT::i8
);
9410 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtVecVT
, Vec
);
9411 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ExtSubVecVT
, Vec
,
9413 SDValue SplatZero
= DAG
.getConstant(0, DL
, ExtSubVecVT
);
9414 return DAG
.getSetCC(DL
, SubVecVT
, Vec
, SplatZero
, ISD::SETNE
);
9418 // With an index of 0 this is a cast-like subvector, which can be performed
9419 // with subregister operations.
9423 // If the subvector vector is a fixed-length type, we cannot use subregister
9424 // manipulation to simplify the codegen; we don't know which register of a
9425 // LMUL group contains the specific subvector as we only know the minimum
9426 // register size. Therefore we must slide the vector group down the full
9428 if (SubVecVT
.isFixedLengthVector()) {
9429 MVT ContainerVT
= VecVT
;
9430 if (VecVT
.isFixedLengthVector()) {
9431 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9432 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9435 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9436 unsigned LastIdx
= OrigIdx
+ SubVecVT
.getVectorNumElements() - 1;
9438 getSmallestVTForIndex(ContainerVT
, LastIdx
, DL
, DAG
, Subtarget
)) {
9439 ContainerVT
= *ShrunkVT
;
9440 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
9441 DAG
.getVectorIdxConstant(0, DL
));
9445 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
9446 // Set the vector length to only the number of elements we care about. This
9447 // avoids sliding down elements we're going to discard straight away.
9448 SDValue VL
= getVLOp(SubVecVT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
9450 SDValue SlidedownAmt
= DAG
.getConstant(OrigIdx
, DL
, XLenVT
);
9452 getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
9453 DAG
.getUNDEF(ContainerVT
), Vec
, SlidedownAmt
, Mask
, VL
);
9454 // Now we can use a cast-like subvector extract to get the result.
9455 Slidedown
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SubVecVT
, Slidedown
,
9456 DAG
.getConstant(0, DL
, XLenVT
));
9457 return DAG
.getBitcast(Op
.getValueType(), Slidedown
);
9460 unsigned SubRegIdx
, RemIdx
;
9461 std::tie(SubRegIdx
, RemIdx
) =
9462 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9463 VecVT
, SubVecVT
, OrigIdx
, TRI
);
9465 // If the Idx has been completely eliminated then this is a subvector extract
9466 // which naturally aligns to a vector register. These can easily be handled
9467 // using subregister manipulation.
9471 // Else SubVecVT is a fractional LMUL and may need to be slid down.
9472 assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT
)).second
);
9474 // If the vector type is an LMUL-group type, extract a subvector equal to the
9475 // nearest full vector register type.
9476 MVT InterSubVT
= VecVT
;
9477 if (VecVT
.bitsGT(getLMUL1VT(VecVT
))) {
9478 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
9479 // we should have successfully decomposed the extract into a subregister.
9480 assert(SubRegIdx
!= RISCV::NoSubRegister
);
9481 InterSubVT
= getLMUL1VT(VecVT
);
9482 Vec
= DAG
.getTargetExtractSubreg(SubRegIdx
, DL
, InterSubVT
, Vec
);
9485 // Slide this vector register down by the desired number of elements in order
9486 // to place the desired subvector starting at element 0.
9487 SDValue SlidedownAmt
=
9488 DAG
.getVScale(DL
, XLenVT
, APInt(XLenVT
.getSizeInBits(), RemIdx
));
9490 auto [Mask
, VL
] = getDefaultScalableVLOps(InterSubVT
, DL
, DAG
, Subtarget
);
9492 getVSlidedown(DAG
, Subtarget
, DL
, InterSubVT
, DAG
.getUNDEF(InterSubVT
),
9493 Vec
, SlidedownAmt
, Mask
, VL
);
9495 // Now the vector is in the right position, extract our final subvector. This
9496 // should resolve to a COPY.
9497 Slidedown
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SubVecVT
, Slidedown
,
9498 DAG
.getConstant(0, DL
, XLenVT
));
9500 // We might have bitcast from a mask type: cast back to the original type if
9502 return DAG
.getBitcast(Op
.getSimpleValueType(), Slidedown
);
9505 // Widen a vector's operands to i8, then truncate its results back to the
9506 // original type, typically i1. All operand and result types must be the same.
9507 static SDValue
widenVectorOpsToi8(SDValue N
, const SDLoc
&DL
,
9508 SelectionDAG
&DAG
) {
9509 MVT VT
= N
.getSimpleValueType();
9510 MVT WideVT
= VT
.changeVectorElementType(MVT::i8
);
9511 SmallVector
<SDValue
, 4> WideOps
;
9512 for (SDValue Op
: N
->ops()) {
9513 assert(Op
.getSimpleValueType() == VT
&&
9514 "Operands and result must be same type");
9515 WideOps
.push_back(DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Op
));
9518 unsigned NumVals
= N
->getNumValues();
9520 SDVTList VTs
= DAG
.getVTList(SmallVector
<EVT
, 4>(
9521 NumVals
, N
.getValueType().changeVectorElementType(MVT::i8
)));
9522 SDValue WideN
= DAG
.getNode(N
.getOpcode(), DL
, VTs
, WideOps
);
9523 SmallVector
<SDValue
, 4> TruncVals
;
9524 for (unsigned I
= 0; I
< NumVals
; I
++) {
9525 TruncVals
.push_back(
9526 DAG
.getSetCC(DL
, N
->getSimpleValueType(I
), WideN
.getValue(I
),
9527 DAG
.getConstant(0, DL
, WideVT
), ISD::SETNE
));
9530 if (TruncVals
.size() > 1)
9531 return DAG
.getMergeValues(TruncVals
, DL
);
9532 return TruncVals
.front();
9535 SDValue
RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op
,
9536 SelectionDAG
&DAG
) const {
9538 MVT VecVT
= Op
.getSimpleValueType();
9539 MVT XLenVT
= Subtarget
.getXLenVT();
9541 assert(VecVT
.isScalableVector() &&
9542 "vector_interleave on non-scalable vector!");
9544 // 1 bit element vectors need to be widened to e8
9545 if (VecVT
.getVectorElementType() == MVT::i1
)
9546 return widenVectorOpsToi8(Op
, DL
, DAG
);
9548 // If the VT is LMUL=8, we need to split and reassemble.
9549 if (VecVT
.getSizeInBits().getKnownMinValue() ==
9550 (8 * RISCV::RVVBitsPerBlock
)) {
9551 auto [Op0Lo
, Op0Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9552 auto [Op1Lo
, Op1Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 1);
9553 EVT SplitVT
= Op0Lo
.getValueType();
9555 SDValue ResLo
= DAG
.getNode(ISD::VECTOR_DEINTERLEAVE
, DL
,
9556 DAG
.getVTList(SplitVT
, SplitVT
), Op0Lo
, Op0Hi
);
9557 SDValue ResHi
= DAG
.getNode(ISD::VECTOR_DEINTERLEAVE
, DL
,
9558 DAG
.getVTList(SplitVT
, SplitVT
), Op1Lo
, Op1Hi
);
9560 SDValue Even
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9561 ResLo
.getValue(0), ResHi
.getValue(0));
9562 SDValue Odd
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
, ResLo
.getValue(1),
9564 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9567 // Concatenate the two vectors as one vector to deinterleave
9569 MVT::getVectorVT(VecVT
.getVectorElementType(),
9570 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
9571 SDValue Concat
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, ConcatVT
,
9572 Op
.getOperand(0), Op
.getOperand(1));
9574 // We want to operate on all lanes, so get the mask and VL and mask for it
9575 auto [Mask
, VL
] = getDefaultScalableVLOps(ConcatVT
, DL
, DAG
, Subtarget
);
9576 SDValue Passthru
= DAG
.getUNDEF(ConcatVT
);
9578 // We can deinterleave through vnsrl.wi if the element type is smaller than
9580 if (VecVT
.getScalarSizeInBits() < Subtarget
.getELen()) {
9582 getDeinterleaveViaVNSRL(DL
, VecVT
, Concat
, true, Subtarget
, DAG
);
9584 getDeinterleaveViaVNSRL(DL
, VecVT
, Concat
, false, Subtarget
, DAG
);
9585 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9588 // For the indices, use the same SEW to avoid an extra vsetvli
9589 MVT IdxVT
= ConcatVT
.changeVectorElementTypeToInteger();
9590 // Create a vector of even indices {0, 2, 4, ...}
9592 DAG
.getStepVector(DL
, IdxVT
, APInt(IdxVT
.getScalarSizeInBits(), 2));
9593 // Create a vector of odd indices {1, 3, 5, ... }
9595 DAG
.getNode(ISD::ADD
, DL
, IdxVT
, EvenIdx
, DAG
.getConstant(1, DL
, IdxVT
));
9597 // Gather the even and odd elements into two separate vectors
9598 SDValue EvenWide
= DAG
.getNode(RISCVISD::VRGATHER_VV_VL
, DL
, ConcatVT
,
9599 Concat
, EvenIdx
, Passthru
, Mask
, VL
);
9600 SDValue OddWide
= DAG
.getNode(RISCVISD::VRGATHER_VV_VL
, DL
, ConcatVT
,
9601 Concat
, OddIdx
, Passthru
, Mask
, VL
);
9603 // Extract the result half of the gather for even and odd
9604 SDValue Even
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, EvenWide
,
9605 DAG
.getConstant(0, DL
, XLenVT
));
9606 SDValue Odd
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, OddWide
,
9607 DAG
.getConstant(0, DL
, XLenVT
));
9609 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9612 SDValue
RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op
,
9613 SelectionDAG
&DAG
) const {
9615 MVT VecVT
= Op
.getSimpleValueType();
9617 assert(VecVT
.isScalableVector() &&
9618 "vector_interleave on non-scalable vector!");
9620 // i1 vectors need to be widened to i8
9621 if (VecVT
.getVectorElementType() == MVT::i1
)
9622 return widenVectorOpsToi8(Op
, DL
, DAG
);
9624 MVT XLenVT
= Subtarget
.getXLenVT();
9625 SDValue VL
= DAG
.getRegister(RISCV::X0
, XLenVT
);
9627 // If the VT is LMUL=8, we need to split and reassemble.
9628 if (VecVT
.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock
)) {
9629 auto [Op0Lo
, Op0Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9630 auto [Op1Lo
, Op1Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 1);
9631 EVT SplitVT
= Op0Lo
.getValueType();
9633 SDValue ResLo
= DAG
.getNode(ISD::VECTOR_INTERLEAVE
, DL
,
9634 DAG
.getVTList(SplitVT
, SplitVT
), Op0Lo
, Op1Lo
);
9635 SDValue ResHi
= DAG
.getNode(ISD::VECTOR_INTERLEAVE
, DL
,
9636 DAG
.getVTList(SplitVT
, SplitVT
), Op0Hi
, Op1Hi
);
9638 SDValue Lo
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9639 ResLo
.getValue(0), ResLo
.getValue(1));
9640 SDValue Hi
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9641 ResHi
.getValue(0), ResHi
.getValue(1));
9642 return DAG
.getMergeValues({Lo
, Hi
}, DL
);
9645 SDValue Interleaved
;
9647 // If the element type is smaller than ELEN, then we can interleave with
9648 // vwaddu.vv and vwmaccu.vx
9649 if (VecVT
.getScalarSizeInBits() < Subtarget
.getELen()) {
9650 Interleaved
= getWideningInterleave(Op
.getOperand(0), Op
.getOperand(1), DL
,
9653 // Otherwise, fallback to using vrgathere16.vv
9655 MVT::getVectorVT(VecVT
.getVectorElementType(),
9656 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
9657 SDValue Concat
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, ConcatVT
,
9658 Op
.getOperand(0), Op
.getOperand(1));
9660 MVT IdxVT
= ConcatVT
.changeVectorElementType(MVT::i16
);
9662 // 0 1 2 3 4 5 6 7 ...
9663 SDValue StepVec
= DAG
.getStepVector(DL
, IdxVT
);
9665 // 1 1 1 1 1 1 1 1 ...
9666 SDValue Ones
= DAG
.getSplatVector(IdxVT
, DL
, DAG
.getConstant(1, DL
, XLenVT
));
9668 // 1 0 1 0 1 0 1 0 ...
9669 SDValue OddMask
= DAG
.getNode(ISD::AND
, DL
, IdxVT
, StepVec
, Ones
);
9670 OddMask
= DAG
.getSetCC(
9671 DL
, IdxVT
.changeVectorElementType(MVT::i1
), OddMask
,
9672 DAG
.getSplatVector(IdxVT
, DL
, DAG
.getConstant(0, DL
, XLenVT
)),
9673 ISD::CondCode::SETNE
);
9675 SDValue VLMax
= DAG
.getSplatVector(IdxVT
, DL
, computeVLMax(VecVT
, DL
, DAG
));
9677 // Build up the index vector for interleaving the concatenated vector
9678 // 0 0 1 1 2 2 3 3 ...
9679 SDValue Idx
= DAG
.getNode(ISD::SRL
, DL
, IdxVT
, StepVec
, Ones
);
9680 // 0 n 1 n+1 2 n+2 3 n+3 ...
9682 DAG
.getNode(RISCVISD::ADD_VL
, DL
, IdxVT
, Idx
, VLMax
, Idx
, OddMask
, VL
);
9684 // Then perform the interleave
9685 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
9686 SDValue TrueMask
= getAllOnesMask(IdxVT
, VL
, DL
, DAG
);
9687 Interleaved
= DAG
.getNode(RISCVISD::VRGATHEREI16_VV_VL
, DL
, ConcatVT
,
9688 Concat
, Idx
, DAG
.getUNDEF(ConcatVT
), TrueMask
, VL
);
9691 // Extract the two halves from the interleaved result
9692 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, Interleaved
,
9693 DAG
.getVectorIdxConstant(0, DL
));
9694 SDValue Hi
= DAG
.getNode(
9695 ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, Interleaved
,
9696 DAG
.getVectorIdxConstant(VecVT
.getVectorMinNumElements(), DL
));
9698 return DAG
.getMergeValues({Lo
, Hi
}, DL
);
9701 // Lower step_vector to the vid instruction. Any non-identity step value must
9702 // be accounted for my manual expansion.
9703 SDValue
RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op
,
9704 SelectionDAG
&DAG
) const {
9706 MVT VT
= Op
.getSimpleValueType();
9707 assert(VT
.isScalableVector() && "Expected scalable vector");
9708 MVT XLenVT
= Subtarget
.getXLenVT();
9709 auto [Mask
, VL
] = getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
);
9710 SDValue StepVec
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VT
, Mask
, VL
);
9711 uint64_t StepValImm
= Op
.getConstantOperandVal(0);
9712 if (StepValImm
!= 1) {
9713 if (isPowerOf2_64(StepValImm
)) {
9715 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
9716 DAG
.getConstant(Log2_64(StepValImm
), DL
, XLenVT
), VL
);
9717 StepVec
= DAG
.getNode(ISD::SHL
, DL
, VT
, StepVec
, StepVal
);
9719 SDValue StepVal
= lowerScalarSplat(
9720 SDValue(), DAG
.getConstant(StepValImm
, DL
, VT
.getVectorElementType()),
9721 VL
, VT
, DL
, DAG
, Subtarget
);
9722 StepVec
= DAG
.getNode(ISD::MUL
, DL
, VT
, StepVec
, StepVal
);
9728 // Implement vector_reverse using vrgather.vv with indices determined by
9729 // subtracting the id of each element from (VLMAX-1). This will convert
9730 // the indices like so:
9731 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
9732 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
9733 SDValue
RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op
,
9734 SelectionDAG
&DAG
) const {
9736 MVT VecVT
= Op
.getSimpleValueType();
9737 if (VecVT
.getVectorElementType() == MVT::i1
) {
9738 MVT WidenVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
9739 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, Op
.getOperand(0));
9740 SDValue Op2
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, WidenVT
, Op1
);
9741 return DAG
.getNode(ISD::TRUNCATE
, DL
, VecVT
, Op2
);
9743 unsigned EltSize
= VecVT
.getScalarSizeInBits();
9744 unsigned MinSize
= VecVT
.getSizeInBits().getKnownMinValue();
9745 unsigned VectorBitsMax
= Subtarget
.getRealMaxVLen();
9747 RISCVTargetLowering::computeVLMAX(VectorBitsMax
, EltSize
, MinSize
);
9749 unsigned GatherOpc
= RISCVISD::VRGATHER_VV_VL
;
9750 MVT IntVT
= VecVT
.changeVectorElementTypeToInteger();
9752 // If this is SEW=8 and VLMAX is potentially more than 256, we need
9753 // to use vrgatherei16.vv.
9754 // TODO: It's also possible to use vrgatherei16.vv for other types to
9755 // decrease register width for the index calculation.
9756 if (MaxVLMAX
> 256 && EltSize
== 8) {
9757 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
9758 // Reverse each half, then reassemble them in reverse order.
9759 // NOTE: It's also possible that after splitting that VLMAX no longer
9760 // requires vrgatherei16.vv.
9761 if (MinSize
== (8 * RISCV::RVVBitsPerBlock
)) {
9762 auto [Lo
, Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9763 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(VecVT
);
9764 Lo
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, LoVT
, Lo
);
9765 Hi
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, HiVT
, Hi
);
9766 // Reassemble the low and high pieces reversed.
9767 // FIXME: This is a CONCAT_VECTORS.
9769 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VecVT
, DAG
.getUNDEF(VecVT
), Hi
,
9770 DAG
.getIntPtrConstant(0, DL
));
9772 ISD::INSERT_SUBVECTOR
, DL
, VecVT
, Res
, Lo
,
9773 DAG
.getIntPtrConstant(LoVT
.getVectorMinNumElements(), DL
));
9776 // Just promote the int type to i16 which will double the LMUL.
9777 IntVT
= MVT::getVectorVT(MVT::i16
, VecVT
.getVectorElementCount());
9778 GatherOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
9781 MVT XLenVT
= Subtarget
.getXLenVT();
9782 auto [Mask
, VL
] = getDefaultScalableVLOps(VecVT
, DL
, DAG
, Subtarget
);
9784 // Calculate VLMAX-1 for the desired SEW.
9785 SDValue VLMinus1
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
,
9786 computeVLMax(VecVT
, DL
, DAG
),
9787 DAG
.getConstant(1, DL
, XLenVT
));
9789 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
9791 !Subtarget
.is64Bit() && IntVT
.getVectorElementType() == MVT::i64
;
9794 SplatVL
= DAG
.getSplatVector(IntVT
, DL
, VLMinus1
);
9796 SplatVL
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
, DAG
.getUNDEF(IntVT
),
9797 VLMinus1
, DAG
.getRegister(RISCV::X0
, XLenVT
));
9799 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, IntVT
, Mask
, VL
);
9800 SDValue Indices
= DAG
.getNode(RISCVISD::SUB_VL
, DL
, IntVT
, SplatVL
, VID
,
9801 DAG
.getUNDEF(IntVT
), Mask
, VL
);
9803 return DAG
.getNode(GatherOpc
, DL
, VecVT
, Op
.getOperand(0), Indices
,
9804 DAG
.getUNDEF(VecVT
), Mask
, VL
);
9807 SDValue
RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op
,
9808 SelectionDAG
&DAG
) const {
9810 SDValue V1
= Op
.getOperand(0);
9811 SDValue V2
= Op
.getOperand(1);
9812 MVT XLenVT
= Subtarget
.getXLenVT();
9813 MVT VecVT
= Op
.getSimpleValueType();
9815 SDValue VLMax
= computeVLMax(VecVT
, DL
, DAG
);
9817 int64_t ImmValue
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getSExtValue();
9818 SDValue DownOffset
, UpOffset
;
9819 if (ImmValue
>= 0) {
9820 // The operand is a TargetConstant, we need to rebuild it as a regular
9822 DownOffset
= DAG
.getConstant(ImmValue
, DL
, XLenVT
);
9823 UpOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, VLMax
, DownOffset
);
9825 // The operand is a TargetConstant, we need to rebuild it as a regular
9826 // constant rather than negating the original operand.
9827 UpOffset
= DAG
.getConstant(-ImmValue
, DL
, XLenVT
);
9828 DownOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, VLMax
, UpOffset
);
9831 SDValue TrueMask
= getAllOnesMask(VecVT
, VLMax
, DL
, DAG
);
9834 getVSlidedown(DAG
, Subtarget
, DL
, VecVT
, DAG
.getUNDEF(VecVT
), V1
,
9835 DownOffset
, TrueMask
, UpOffset
);
9836 return getVSlideup(DAG
, Subtarget
, DL
, VecVT
, SlideDown
, V2
, UpOffset
,
9837 TrueMask
, DAG
.getRegister(RISCV::X0
, XLenVT
),
9838 RISCVII::TAIL_AGNOSTIC
);
9842 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op
,
9843 SelectionDAG
&DAG
) const {
9845 auto *Load
= cast
<LoadSDNode
>(Op
);
9847 assert(allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
9848 Load
->getMemoryVT(),
9849 *Load
->getMemOperand()) &&
9850 "Expecting a correctly-aligned load");
9852 MVT VT
= Op
.getSimpleValueType();
9853 MVT XLenVT
= Subtarget
.getXLenVT();
9854 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
9856 // If we know the exact VLEN and our fixed length vector completely fills
9857 // the container, use a whole register load instead.
9858 const auto [MinVLMAX
, MaxVLMAX
] =
9859 RISCVTargetLowering::computeVLMAXBounds(ContainerVT
, Subtarget
);
9860 if (MinVLMAX
== MaxVLMAX
&& MinVLMAX
== VT
.getVectorNumElements() &&
9861 getLMUL1VT(ContainerVT
).bitsLE(ContainerVT
)) {
9863 DAG
.getLoad(ContainerVT
, DL
, Load
->getChain(), Load
->getBasePtr(),
9864 Load
->getMemOperand());
9865 SDValue Result
= convertFromScalableVector(VT
, NewLoad
, DAG
, Subtarget
);
9866 return DAG
.getMergeValues({Result
, NewLoad
.getValue(1)}, DL
);
9869 SDValue VL
= getVLOp(VT
.getVectorNumElements(), ContainerVT
, DL
, DAG
, Subtarget
);
9871 bool IsMaskOp
= VT
.getVectorElementType() == MVT::i1
;
9872 SDValue IntID
= DAG
.getTargetConstant(
9873 IsMaskOp
? Intrinsic::riscv_vlm
: Intrinsic::riscv_vle
, DL
, XLenVT
);
9874 SmallVector
<SDValue
, 4> Ops
{Load
->getChain(), IntID
};
9876 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
9877 Ops
.push_back(Load
->getBasePtr());
9879 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
9881 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
9882 Load
->getMemoryVT(), Load
->getMemOperand());
9884 SDValue Result
= convertFromScalableVector(VT
, NewLoad
, DAG
, Subtarget
);
9885 return DAG
.getMergeValues({Result
, NewLoad
.getValue(1)}, DL
);
9889 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op
,
9890 SelectionDAG
&DAG
) const {
9892 auto *Store
= cast
<StoreSDNode
>(Op
);
9894 assert(allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
9895 Store
->getMemoryVT(),
9896 *Store
->getMemOperand()) &&
9897 "Expecting a correctly-aligned store");
9899 SDValue StoreVal
= Store
->getValue();
9900 MVT VT
= StoreVal
.getSimpleValueType();
9901 MVT XLenVT
= Subtarget
.getXLenVT();
9903 // If the size less than a byte, we need to pad with zeros to make a byte.
9904 if (VT
.getVectorElementType() == MVT::i1
&& VT
.getVectorNumElements() < 8) {
9906 StoreVal
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
,
9907 DAG
.getConstant(0, DL
, VT
), StoreVal
,
9908 DAG
.getIntPtrConstant(0, DL
));
9911 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
9914 convertToScalableVector(ContainerVT
, StoreVal
, DAG
, Subtarget
);
9917 // If we know the exact VLEN and our fixed length vector completely fills
9918 // the container, use a whole register store instead.
9919 const auto [MinVLMAX
, MaxVLMAX
] =
9920 RISCVTargetLowering::computeVLMAXBounds(ContainerVT
, Subtarget
);
9921 if (MinVLMAX
== MaxVLMAX
&& MinVLMAX
== VT
.getVectorNumElements() &&
9922 getLMUL1VT(ContainerVT
).bitsLE(ContainerVT
))
9923 return DAG
.getStore(Store
->getChain(), DL
, NewValue
, Store
->getBasePtr(),
9924 Store
->getMemOperand());
9926 SDValue VL
= getVLOp(VT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
9929 bool IsMaskOp
= VT
.getVectorElementType() == MVT::i1
;
9930 SDValue IntID
= DAG
.getTargetConstant(
9931 IsMaskOp
? Intrinsic::riscv_vsm
: Intrinsic::riscv_vse
, DL
, XLenVT
);
9932 return DAG
.getMemIntrinsicNode(
9933 ISD::INTRINSIC_VOID
, DL
, DAG
.getVTList(MVT::Other
),
9934 {Store
->getChain(), IntID
, NewValue
, Store
->getBasePtr(), VL
},
9935 Store
->getMemoryVT(), Store
->getMemOperand());
9938 SDValue
RISCVTargetLowering::lowerMaskedLoad(SDValue Op
,
9939 SelectionDAG
&DAG
) const {
9941 MVT VT
= Op
.getSimpleValueType();
9943 const auto *MemSD
= cast
<MemSDNode
>(Op
);
9944 EVT MemVT
= MemSD
->getMemoryVT();
9945 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
9946 SDValue Chain
= MemSD
->getChain();
9947 SDValue BasePtr
= MemSD
->getBasePtr();
9949 SDValue Mask
, PassThru
, VL
;
9950 if (const auto *VPLoad
= dyn_cast
<VPLoadSDNode
>(Op
)) {
9951 Mask
= VPLoad
->getMask();
9952 PassThru
= DAG
.getUNDEF(VT
);
9953 VL
= VPLoad
->getVectorLength();
9955 const auto *MLoad
= cast
<MaskedLoadSDNode
>(Op
);
9956 Mask
= MLoad
->getMask();
9957 PassThru
= MLoad
->getPassThru();
9960 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
9962 MVT XLenVT
= Subtarget
.getXLenVT();
9964 MVT ContainerVT
= VT
;
9965 if (VT
.isFixedLengthVector()) {
9966 ContainerVT
= getContainerForFixedLengthVector(VT
);
9967 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
9969 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
9970 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
9975 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
9978 IsUnmasked
? Intrinsic::riscv_vle
: Intrinsic::riscv_vle_mask
;
9979 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
9981 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
9983 Ops
.push_back(PassThru
);
9984 Ops
.push_back(BasePtr
);
9986 Ops
.push_back(Mask
);
9989 Ops
.push_back(DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
));
9991 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
9994 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, MemVT
, MMO
);
9995 Chain
= Result
.getValue(1);
9997 if (VT
.isFixedLengthVector())
9998 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10000 return DAG
.getMergeValues({Result
, Chain
}, DL
);
10003 SDValue
RISCVTargetLowering::lowerMaskedStore(SDValue Op
,
10004 SelectionDAG
&DAG
) const {
10007 const auto *MemSD
= cast
<MemSDNode
>(Op
);
10008 EVT MemVT
= MemSD
->getMemoryVT();
10009 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
10010 SDValue Chain
= MemSD
->getChain();
10011 SDValue BasePtr
= MemSD
->getBasePtr();
10012 SDValue Val
, Mask
, VL
;
10014 if (const auto *VPStore
= dyn_cast
<VPStoreSDNode
>(Op
)) {
10015 Val
= VPStore
->getValue();
10016 Mask
= VPStore
->getMask();
10017 VL
= VPStore
->getVectorLength();
10019 const auto *MStore
= cast
<MaskedStoreSDNode
>(Op
);
10020 Val
= MStore
->getValue();
10021 Mask
= MStore
->getMask();
10024 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10026 MVT VT
= Val
.getSimpleValueType();
10027 MVT XLenVT
= Subtarget
.getXLenVT();
10029 MVT ContainerVT
= VT
;
10030 if (VT
.isFixedLengthVector()) {
10031 ContainerVT
= getContainerForFixedLengthVector(VT
);
10033 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
10035 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10036 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10041 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10044 IsUnmasked
? Intrinsic::riscv_vse
: Intrinsic::riscv_vse_mask
;
10045 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
10046 Ops
.push_back(Val
);
10047 Ops
.push_back(BasePtr
);
10049 Ops
.push_back(Mask
);
10052 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
,
10053 DAG
.getVTList(MVT::Other
), Ops
, MemVT
, MMO
);
10057 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op
,
10058 SelectionDAG
&DAG
) const {
10059 MVT InVT
= Op
.getOperand(0).getSimpleValueType();
10060 MVT ContainerVT
= getContainerForFixedLengthVector(InVT
);
10062 MVT VT
= Op
.getSimpleValueType();
10065 convertToScalableVector(ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
10067 convertToScalableVector(ContainerVT
, Op
.getOperand(1), DAG
, Subtarget
);
10070 auto [Mask
, VL
] = getDefaultVLOps(VT
.getVectorNumElements(), ContainerVT
, DL
,
10072 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10075 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskVT
,
10076 {Op1
, Op2
, Op
.getOperand(2), DAG
.getUNDEF(MaskVT
), Mask
, VL
});
10078 return convertFromScalableVector(VT
, Cmp
, DAG
, Subtarget
);
10081 SDValue
RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op
,
10082 SelectionDAG
&DAG
) const {
10083 unsigned Opc
= Op
.getOpcode();
10085 SDValue Chain
= Op
.getOperand(0);
10086 SDValue Op1
= Op
.getOperand(1);
10087 SDValue Op2
= Op
.getOperand(2);
10088 SDValue CC
= Op
.getOperand(3);
10089 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
10090 MVT VT
= Op
.getSimpleValueType();
10091 MVT InVT
= Op1
.getSimpleValueType();
10093 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10095 if (Opc
== ISD::STRICT_FSETCCS
) {
10096 // Expand strict_fsetccs(x, oeq) to
10097 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10098 SDVTList VTList
= Op
->getVTList();
10099 if (CCVal
== ISD::SETEQ
|| CCVal
== ISD::SETOEQ
) {
10100 SDValue OLECCVal
= DAG
.getCondCode(ISD::SETOLE
);
10101 SDValue Tmp1
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op1
,
10103 SDValue Tmp2
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op2
,
10105 SDValue OutChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
10106 Tmp1
.getValue(1), Tmp2
.getValue(1));
10107 // Tmp1 and Tmp2 might be the same node.
10109 Tmp1
= DAG
.getNode(ISD::AND
, DL
, VT
, Tmp1
, Tmp2
);
10110 return DAG
.getMergeValues({Tmp1
, OutChain
}, DL
);
10113 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10114 if (CCVal
== ISD::SETNE
|| CCVal
== ISD::SETUNE
) {
10115 SDValue OEQCCVal
= DAG
.getCondCode(ISD::SETOEQ
);
10116 SDValue OEQ
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op1
,
10118 SDValue Res
= DAG
.getNOT(DL
, OEQ
, VT
);
10119 return DAG
.getMergeValues({Res
, OEQ
.getValue(1)}, DL
);
10123 MVT ContainerInVT
= InVT
;
10124 if (InVT
.isFixedLengthVector()) {
10125 ContainerInVT
= getContainerForFixedLengthVector(InVT
);
10126 Op1
= convertToScalableVector(ContainerInVT
, Op1
, DAG
, Subtarget
);
10127 Op2
= convertToScalableVector(ContainerInVT
, Op2
, DAG
, Subtarget
);
10129 MVT MaskVT
= getMaskTypeFor(ContainerInVT
);
10131 auto [Mask
, VL
] = getDefaultVLOps(InVT
, ContainerInVT
, DL
, DAG
, Subtarget
);
10134 if (Opc
== ISD::STRICT_FSETCC
&&
10135 (CCVal
== ISD::SETLT
|| CCVal
== ISD::SETOLT
|| CCVal
== ISD::SETLE
||
10136 CCVal
== ISD::SETOLE
)) {
10137 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10138 // active when both input elements are ordered.
10139 SDValue True
= getAllOnesMask(ContainerInVT
, VL
, DL
, DAG
);
10140 SDValue OrderMask1
= DAG
.getNode(
10141 RISCVISD::STRICT_FSETCC_VL
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
10142 {Chain
, Op1
, Op1
, DAG
.getCondCode(ISD::SETOEQ
), DAG
.getUNDEF(MaskVT
),
10144 SDValue OrderMask2
= DAG
.getNode(
10145 RISCVISD::STRICT_FSETCC_VL
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
10146 {Chain
, Op2
, Op2
, DAG
.getCondCode(ISD::SETOEQ
), DAG
.getUNDEF(MaskVT
),
10149 DAG
.getNode(RISCVISD::VMAND_VL
, DL
, MaskVT
, OrderMask1
, OrderMask2
, VL
);
10150 // Use Mask as the merge operand to let the result be 0 if either of the
10151 // inputs is unordered.
10152 Res
= DAG
.getNode(RISCVISD::STRICT_FSETCCS_VL
, DL
,
10153 DAG
.getVTList(MaskVT
, MVT::Other
),
10154 {Chain
, Op1
, Op2
, CC
, Mask
, Mask
, VL
});
10156 unsigned RVVOpc
= Opc
== ISD::STRICT_FSETCC
? RISCVISD::STRICT_FSETCC_VL
10157 : RISCVISD::STRICT_FSETCCS_VL
;
10158 Res
= DAG
.getNode(RVVOpc
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
10159 {Chain
, Op1
, Op2
, CC
, DAG
.getUNDEF(MaskVT
), Mask
, VL
});
10162 if (VT
.isFixedLengthVector()) {
10163 SDValue SubVec
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
10164 return DAG
.getMergeValues({SubVec
, Res
.getValue(1)}, DL
);
10169 // Lower vector ABS to smax(X, sub(0, X)).
10170 SDValue
RISCVTargetLowering::lowerABS(SDValue Op
, SelectionDAG
&DAG
) const {
10172 MVT VT
= Op
.getSimpleValueType();
10173 SDValue X
= Op
.getOperand(0);
10175 assert((Op
.getOpcode() == ISD::VP_ABS
|| VT
.isFixedLengthVector()) &&
10176 "Unexpected type for ISD::ABS");
10178 MVT ContainerVT
= VT
;
10179 if (VT
.isFixedLengthVector()) {
10180 ContainerVT
= getContainerForFixedLengthVector(VT
);
10181 X
= convertToScalableVector(ContainerVT
, X
, DAG
, Subtarget
);
10185 if (Op
->getOpcode() == ISD::VP_ABS
) {
10186 Mask
= Op
->getOperand(1);
10187 if (VT
.isFixedLengthVector())
10188 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
10190 VL
= Op
->getOperand(2);
10192 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
10194 SDValue SplatZero
= DAG
.getNode(
10195 RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
10196 DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()), VL
);
10197 SDValue NegX
= DAG
.getNode(RISCVISD::SUB_VL
, DL
, ContainerVT
, SplatZero
, X
,
10198 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
10199 SDValue Max
= DAG
.getNode(RISCVISD::SMAX_VL
, DL
, ContainerVT
, X
, NegX
,
10200 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
10202 if (VT
.isFixedLengthVector())
10203 Max
= convertFromScalableVector(VT
, Max
, DAG
, Subtarget
);
10207 SDValue
RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10208 SDValue Op
, SelectionDAG
&DAG
) const {
10210 MVT VT
= Op
.getSimpleValueType();
10211 SDValue Mag
= Op
.getOperand(0);
10212 SDValue Sign
= Op
.getOperand(1);
10213 assert(Mag
.getValueType() == Sign
.getValueType() &&
10214 "Can only handle COPYSIGN with matching types.");
10216 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10217 Mag
= convertToScalableVector(ContainerVT
, Mag
, DAG
, Subtarget
);
10218 Sign
= convertToScalableVector(ContainerVT
, Sign
, DAG
, Subtarget
);
10220 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
10222 SDValue CopySign
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Mag
,
10223 Sign
, DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
10225 return convertFromScalableVector(VT
, CopySign
, DAG
, Subtarget
);
10228 SDValue
RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10229 SDValue Op
, SelectionDAG
&DAG
) const {
10230 MVT VT
= Op
.getSimpleValueType();
10231 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10233 MVT I1ContainerVT
=
10234 MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
10237 convertToScalableVector(I1ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
10239 convertToScalableVector(ContainerVT
, Op
.getOperand(1), DAG
, Subtarget
);
10241 convertToScalableVector(ContainerVT
, Op
.getOperand(2), DAG
, Subtarget
);
10244 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10247 DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, CC
, Op1
, Op2
, VL
);
10249 return convertFromScalableVector(VT
, Select
, DAG
, Subtarget
);
10252 SDValue
RISCVTargetLowering::lowerToScalableOp(SDValue Op
,
10253 SelectionDAG
&DAG
) const {
10254 unsigned NewOpc
= getRISCVVLOp(Op
);
10255 bool HasMergeOp
= hasMergeOp(NewOpc
);
10256 bool HasMask
= hasMaskOp(NewOpc
);
10258 MVT VT
= Op
.getSimpleValueType();
10259 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10261 // Create list of operands by converting existing ones to scalable types.
10262 SmallVector
<SDValue
, 6> Ops
;
10263 for (const SDValue
&V
: Op
->op_values()) {
10264 assert(!isa
<VTSDNode
>(V
) && "Unexpected VTSDNode node!");
10266 // Pass through non-vector operands.
10267 if (!V
.getValueType().isVector()) {
10272 // "cast" fixed length vector to a scalable vector.
10273 assert(useRVVForFixedLengthVectorVT(V
.getSimpleValueType()) &&
10274 "Only fixed length vectors are supported!");
10275 Ops
.push_back(convertToScalableVector(ContainerVT
, V
, DAG
, Subtarget
));
10279 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
10281 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10283 Ops
.push_back(Mask
);
10286 // StrictFP operations have two result values. Their lowered result should
10287 // have same result count.
10288 if (Op
->isStrictFPOpcode()) {
10289 SDValue ScalableRes
=
10290 DAG
.getNode(NewOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
), Ops
,
10292 SDValue SubVec
= convertFromScalableVector(VT
, ScalableRes
, DAG
, Subtarget
);
10293 return DAG
.getMergeValues({SubVec
, ScalableRes
.getValue(1)}, DL
);
10296 SDValue ScalableRes
=
10297 DAG
.getNode(NewOpc
, DL
, ContainerVT
, Ops
, Op
->getFlags());
10298 return convertFromScalableVector(VT
, ScalableRes
, DAG
, Subtarget
);
10301 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10302 // * Operands of each node are assumed to be in the same order.
10303 // * The EVL operand is promoted from i32 to i64 on RV64.
10304 // * Fixed-length vectors are converted to their scalable-vector container
10306 SDValue
RISCVTargetLowering::lowerVPOp(SDValue Op
, SelectionDAG
&DAG
) const {
10307 unsigned RISCVISDOpc
= getRISCVVLOp(Op
);
10308 bool HasMergeOp
= hasMergeOp(RISCVISDOpc
);
10311 MVT VT
= Op
.getSimpleValueType();
10312 SmallVector
<SDValue
, 4> Ops
;
10314 MVT ContainerVT
= VT
;
10315 if (VT
.isFixedLengthVector())
10316 ContainerVT
= getContainerForFixedLengthVector(VT
);
10318 for (const auto &OpIdx
: enumerate(Op
->ops())) {
10319 SDValue V
= OpIdx
.value();
10320 assert(!isa
<VTSDNode
>(V
) && "Unexpected VTSDNode node!");
10321 // Add dummy merge value before the mask. Or if there isn't a mask, before
10324 auto MaskIdx
= ISD::getVPMaskIdx(Op
.getOpcode());
10326 if (*MaskIdx
== OpIdx
.index())
10327 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10328 } else if (ISD::getVPExplicitVectorLengthIdx(Op
.getOpcode()) ==
10330 // For VP_MERGE, copy the false operand instead of an undef value.
10331 assert(Op
.getOpcode() == ISD::VP_MERGE
);
10332 Ops
.push_back(Ops
.back());
10335 // Pass through operands which aren't fixed-length vectors.
10336 if (!V
.getValueType().isFixedLengthVector()) {
10340 // "cast" fixed length vector to a scalable vector.
10341 MVT OpVT
= V
.getSimpleValueType();
10342 MVT ContainerVT
= getContainerForFixedLengthVector(OpVT
);
10343 assert(useRVVForFixedLengthVectorVT(OpVT
) &&
10344 "Only fixed length vectors are supported!");
10345 Ops
.push_back(convertToScalableVector(ContainerVT
, V
, DAG
, Subtarget
));
10348 if (!VT
.isFixedLengthVector())
10349 return DAG
.getNode(RISCVISDOpc
, DL
, VT
, Ops
, Op
->getFlags());
10351 SDValue VPOp
= DAG
.getNode(RISCVISDOpc
, DL
, ContainerVT
, Ops
, Op
->getFlags());
10353 return convertFromScalableVector(VT
, VPOp
, DAG
, Subtarget
);
10356 SDValue
RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op
,
10357 SelectionDAG
&DAG
) const {
10359 MVT VT
= Op
.getSimpleValueType();
10361 SDValue Src
= Op
.getOperand(0);
10362 // NOTE: Mask is dropped.
10363 SDValue VL
= Op
.getOperand(2);
10365 MVT ContainerVT
= VT
;
10366 if (VT
.isFixedLengthVector()) {
10367 ContainerVT
= getContainerForFixedLengthVector(VT
);
10368 MVT SrcVT
= MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
10369 Src
= convertToScalableVector(SrcVT
, Src
, DAG
, Subtarget
);
10372 MVT XLenVT
= Subtarget
.getXLenVT();
10373 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
10374 SDValue ZeroSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10375 DAG
.getUNDEF(ContainerVT
), Zero
, VL
);
10377 SDValue SplatValue
= DAG
.getConstant(
10378 Op
.getOpcode() == ISD::VP_ZERO_EXTEND
? 1 : -1, DL
, XLenVT
);
10379 SDValue Splat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10380 DAG
.getUNDEF(ContainerVT
), SplatValue
, VL
);
10382 SDValue Result
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, Src
,
10383 Splat
, ZeroSplat
, VL
);
10384 if (!VT
.isFixedLengthVector())
10386 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10389 SDValue
RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op
,
10390 SelectionDAG
&DAG
) const {
10392 MVT VT
= Op
.getSimpleValueType();
10394 SDValue Op1
= Op
.getOperand(0);
10395 SDValue Op2
= Op
.getOperand(1);
10396 ISD::CondCode Condition
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
10397 // NOTE: Mask is dropped.
10398 SDValue VL
= Op
.getOperand(4);
10400 MVT ContainerVT
= VT
;
10401 if (VT
.isFixedLengthVector()) {
10402 ContainerVT
= getContainerForFixedLengthVector(VT
);
10403 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10404 Op2
= convertToScalableVector(ContainerVT
, Op2
, DAG
, Subtarget
);
10408 SDValue AllOneMask
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
10410 switch (Condition
) {
10413 // X != Y --> (X^Y)
10415 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, Op2
, VL
);
10417 // X == Y --> ~(X^Y)
10420 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, Op2
, VL
);
10422 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, AllOneMask
, VL
);
10425 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
10426 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
10428 case ISD::SETULT
: {
10430 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, AllOneMask
, VL
);
10431 Result
= DAG
.getNode(RISCVISD::VMAND_VL
, DL
, ContainerVT
, Temp
, Op2
, VL
);
10434 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
10435 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
10437 case ISD::SETUGT
: {
10439 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op2
, AllOneMask
, VL
);
10440 Result
= DAG
.getNode(RISCVISD::VMAND_VL
, DL
, ContainerVT
, Op1
, Temp
, VL
);
10443 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
10444 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
10446 case ISD::SETULE
: {
10448 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, AllOneMask
, VL
);
10449 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, Op2
, VL
);
10452 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
10453 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
10455 case ISD::SETUGE
: {
10457 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op2
, AllOneMask
, VL
);
10458 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, Op1
, VL
);
10463 if (!VT
.isFixedLengthVector())
10465 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10468 // Lower Floating-Point/Integer Type-Convert VP SDNodes
10469 SDValue
RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op
,
10470 SelectionDAG
&DAG
) const {
10473 SDValue Src
= Op
.getOperand(0);
10474 SDValue Mask
= Op
.getOperand(1);
10475 SDValue VL
= Op
.getOperand(2);
10476 unsigned RISCVISDOpc
= getRISCVVLOp(Op
);
10478 MVT DstVT
= Op
.getSimpleValueType();
10479 MVT SrcVT
= Src
.getSimpleValueType();
10480 if (DstVT
.isFixedLengthVector()) {
10481 DstVT
= getContainerForFixedLengthVector(DstVT
);
10482 SrcVT
= getContainerForFixedLengthVector(SrcVT
);
10483 Src
= convertToScalableVector(SrcVT
, Src
, DAG
, Subtarget
);
10484 MVT MaskVT
= getMaskTypeFor(DstVT
);
10485 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10488 unsigned DstEltSize
= DstVT
.getScalarSizeInBits();
10489 unsigned SrcEltSize
= SrcVT
.getScalarSizeInBits();
10492 if (DstEltSize
>= SrcEltSize
) { // Single-width and widening conversion.
10493 if (SrcVT
.isInteger()) {
10494 assert(DstVT
.isFloatingPoint() && "Wrong input/output vector types");
10496 unsigned RISCVISDExtOpc
= RISCVISDOpc
== RISCVISD::SINT_TO_FP_VL
10497 ? RISCVISD::VSEXT_VL
10498 : RISCVISD::VZEXT_VL
;
10500 // Do we need to do any pre-widening before converting?
10501 if (SrcEltSize
== 1) {
10502 MVT IntVT
= DstVT
.changeVectorElementTypeToInteger();
10503 MVT XLenVT
= Subtarget
.getXLenVT();
10504 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
10505 SDValue ZeroSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
,
10506 DAG
.getUNDEF(IntVT
), Zero
, VL
);
10507 SDValue One
= DAG
.getConstant(
10508 RISCVISDExtOpc
== RISCVISD::VZEXT_VL
? 1 : -1, DL
, XLenVT
);
10509 SDValue OneSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
,
10510 DAG
.getUNDEF(IntVT
), One
, VL
);
10511 Src
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, IntVT
, Src
, OneSplat
,
10513 } else if (DstEltSize
> (2 * SrcEltSize
)) {
10514 // Widen before converting.
10515 MVT IntVT
= MVT::getVectorVT(MVT::getIntegerVT(DstEltSize
/ 2),
10516 DstVT
.getVectorElementCount());
10517 Src
= DAG
.getNode(RISCVISDExtOpc
, DL
, IntVT
, Src
, Mask
, VL
);
10520 Result
= DAG
.getNode(RISCVISDOpc
, DL
, DstVT
, Src
, Mask
, VL
);
10522 assert(SrcVT
.isFloatingPoint() && DstVT
.isInteger() &&
10523 "Wrong input/output vector types");
10525 // Convert f16 to f32 then convert f32 to i64.
10526 if (DstEltSize
> (2 * SrcEltSize
)) {
10527 assert(SrcVT
.getVectorElementType() == MVT::f16
&& "Unexpected type!");
10529 MVT::getVectorVT(MVT::f32
, DstVT
.getVectorElementCount());
10531 DAG
.getNode(RISCVISD::FP_EXTEND_VL
, DL
, InterimFVT
, Src
, Mask
, VL
);
10534 Result
= DAG
.getNode(RISCVISDOpc
, DL
, DstVT
, Src
, Mask
, VL
);
10536 } else { // Narrowing + Conversion
10537 if (SrcVT
.isInteger()) {
10538 assert(DstVT
.isFloatingPoint() && "Wrong input/output vector types");
10539 // First do a narrowing convert to an FP type half the size, then round
10540 // the FP type to a small FP type if needed.
10542 MVT InterimFVT
= DstVT
;
10543 if (SrcEltSize
> (2 * DstEltSize
)) {
10544 assert(SrcEltSize
== (4 * DstEltSize
) && "Unexpected types!");
10545 assert(DstVT
.getVectorElementType() == MVT::f16
&& "Unexpected type!");
10546 InterimFVT
= MVT::getVectorVT(MVT::f32
, DstVT
.getVectorElementCount());
10549 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimFVT
, Src
, Mask
, VL
);
10551 if (InterimFVT
!= DstVT
) {
10553 Result
= DAG
.getNode(RISCVISD::FP_ROUND_VL
, DL
, DstVT
, Src
, Mask
, VL
);
10556 assert(SrcVT
.isFloatingPoint() && DstVT
.isInteger() &&
10557 "Wrong input/output vector types");
10558 // First do a narrowing conversion to an integer half the size, then
10559 // truncate if needed.
10561 if (DstEltSize
== 1) {
10562 // First convert to the same size integer, then convert to mask using
10564 assert(SrcEltSize
>= 16 && "Unexpected FP type!");
10565 MVT InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
),
10566 DstVT
.getVectorElementCount());
10567 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimIVT
, Src
, Mask
, VL
);
10569 // Compare the integer result to 0. The integer should be 0 or 1/-1,
10570 // otherwise the conversion was undefined.
10571 MVT XLenVT
= Subtarget
.getXLenVT();
10572 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
10573 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, InterimIVT
,
10574 DAG
.getUNDEF(InterimIVT
), SplatZero
, VL
);
10575 Result
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, DstVT
,
10576 {Result
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
10577 DAG
.getUNDEF(DstVT
), Mask
, VL
});
10579 MVT InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
10580 DstVT
.getVectorElementCount());
10582 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimIVT
, Src
, Mask
, VL
);
10584 while (InterimIVT
!= DstVT
) {
10587 InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
10588 DstVT
.getVectorElementCount());
10589 Result
= DAG
.getNode(RISCVISD::TRUNCATE_VECTOR_VL
, DL
, InterimIVT
,
10596 MVT VT
= Op
.getSimpleValueType();
10597 if (!VT
.isFixedLengthVector())
10599 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10603 RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op
,
10604 SelectionDAG
&DAG
) const {
10607 SDValue Op1
= Op
.getOperand(0);
10608 SDValue Op2
= Op
.getOperand(1);
10609 SDValue Offset
= Op
.getOperand(2);
10610 SDValue Mask
= Op
.getOperand(3);
10611 SDValue EVL1
= Op
.getOperand(4);
10612 SDValue EVL2
= Op
.getOperand(5);
10614 const MVT XLenVT
= Subtarget
.getXLenVT();
10615 MVT VT
= Op
.getSimpleValueType();
10616 MVT ContainerVT
= VT
;
10617 if (VT
.isFixedLengthVector()) {
10618 ContainerVT
= getContainerForFixedLengthVector(VT
);
10619 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10620 Op2
= convertToScalableVector(ContainerVT
, Op2
, DAG
, Subtarget
);
10621 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10622 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10625 bool IsMaskVector
= VT
.getVectorElementType() == MVT::i1
;
10626 if (IsMaskVector
) {
10627 ContainerVT
= ContainerVT
.changeVectorElementType(MVT::i8
);
10629 // Expand input operands
10630 SDValue SplatOneOp1
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10631 DAG
.getUNDEF(ContainerVT
),
10632 DAG
.getConstant(1, DL
, XLenVT
), EVL1
);
10633 SDValue SplatZeroOp1
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10634 DAG
.getUNDEF(ContainerVT
),
10635 DAG
.getConstant(0, DL
, XLenVT
), EVL1
);
10636 Op1
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, Op1
, SplatOneOp1
,
10637 SplatZeroOp1
, EVL1
);
10639 SDValue SplatOneOp2
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10640 DAG
.getUNDEF(ContainerVT
),
10641 DAG
.getConstant(1, DL
, XLenVT
), EVL2
);
10642 SDValue SplatZeroOp2
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10643 DAG
.getUNDEF(ContainerVT
),
10644 DAG
.getConstant(0, DL
, XLenVT
), EVL2
);
10645 Op2
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, Op2
, SplatOneOp2
,
10646 SplatZeroOp2
, EVL2
);
10649 int64_t ImmValue
= cast
<ConstantSDNode
>(Offset
)->getSExtValue();
10650 SDValue DownOffset
, UpOffset
;
10651 if (ImmValue
>= 0) {
10652 // The operand is a TargetConstant, we need to rebuild it as a regular
10654 DownOffset
= DAG
.getConstant(ImmValue
, DL
, XLenVT
);
10655 UpOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, EVL1
, DownOffset
);
10657 // The operand is a TargetConstant, we need to rebuild it as a regular
10658 // constant rather than negating the original operand.
10659 UpOffset
= DAG
.getConstant(-ImmValue
, DL
, XLenVT
);
10660 DownOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, EVL1
, UpOffset
);
10663 SDValue SlideDown
=
10664 getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
10665 Op1
, DownOffset
, Mask
, UpOffset
);
10666 SDValue Result
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, SlideDown
, Op2
,
10667 UpOffset
, Mask
, EVL2
, RISCVII::TAIL_AGNOSTIC
);
10669 if (IsMaskVector
) {
10670 // Truncate Result back to a mask vector (Result has same EVL as Op2)
10671 Result
= DAG
.getNode(
10672 RISCVISD::SETCC_VL
, DL
, ContainerVT
.changeVectorElementType(MVT::i1
),
10673 {Result
, DAG
.getConstant(0, DL
, ContainerVT
),
10674 DAG
.getCondCode(ISD::SETNE
), DAG
.getUNDEF(getMaskTypeFor(ContainerVT
)),
10678 if (!VT
.isFixedLengthVector())
10680 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10684 RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op
,
10685 SelectionDAG
&DAG
) const {
10687 MVT VT
= Op
.getSimpleValueType();
10688 MVT XLenVT
= Subtarget
.getXLenVT();
10690 SDValue Op1
= Op
.getOperand(0);
10691 SDValue Mask
= Op
.getOperand(1);
10692 SDValue EVL
= Op
.getOperand(2);
10694 MVT ContainerVT
= VT
;
10695 if (VT
.isFixedLengthVector()) {
10696 ContainerVT
= getContainerForFixedLengthVector(VT
);
10697 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10698 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10699 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10702 MVT GatherVT
= ContainerVT
;
10703 MVT IndicesVT
= ContainerVT
.changeVectorElementTypeToInteger();
10704 // Check if we are working with mask vectors
10705 bool IsMaskVector
= ContainerVT
.getVectorElementType() == MVT::i1
;
10706 if (IsMaskVector
) {
10707 GatherVT
= IndicesVT
= ContainerVT
.changeVectorElementType(MVT::i8
);
10709 // Expand input operand
10710 SDValue SplatOne
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IndicesVT
,
10711 DAG
.getUNDEF(IndicesVT
),
10712 DAG
.getConstant(1, DL
, XLenVT
), EVL
);
10713 SDValue SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IndicesVT
,
10714 DAG
.getUNDEF(IndicesVT
),
10715 DAG
.getConstant(0, DL
, XLenVT
), EVL
);
10716 Op1
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, IndicesVT
, Op1
, SplatOne
,
10720 unsigned EltSize
= GatherVT
.getScalarSizeInBits();
10721 unsigned MinSize
= GatherVT
.getSizeInBits().getKnownMinValue();
10722 unsigned VectorBitsMax
= Subtarget
.getRealMaxVLen();
10723 unsigned MaxVLMAX
=
10724 RISCVTargetLowering::computeVLMAX(VectorBitsMax
, EltSize
, MinSize
);
10726 unsigned GatherOpc
= RISCVISD::VRGATHER_VV_VL
;
10727 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
10728 // to use vrgatherei16.vv.
10729 // TODO: It's also possible to use vrgatherei16.vv for other types to
10730 // decrease register width for the index calculation.
10731 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10732 if (MaxVLMAX
> 256 && EltSize
== 8) {
10733 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
10734 // Split the vector in half and reverse each half using a full register
10736 // Swap the halves and concatenate them.
10737 // Slide the concatenated result by (VLMax - VL).
10738 if (MinSize
== (8 * RISCV::RVVBitsPerBlock
)) {
10739 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(GatherVT
);
10740 auto [Lo
, Hi
] = DAG
.SplitVector(Op1
, DL
);
10742 SDValue LoRev
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, LoVT
, Lo
);
10743 SDValue HiRev
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, HiVT
, Hi
);
10745 // Reassemble the low and high pieces reversed.
10746 // NOTE: this Result is unmasked (because we do not need masks for
10747 // shuffles). If in the future this has to change, we can use a SELECT_VL
10748 // between Result and UNDEF using the mask originally passed to VP_REVERSE
10750 DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, GatherVT
, HiRev
, LoRev
);
10752 // Slide off any elements from past EVL that were reversed into the low
10754 unsigned MinElts
= GatherVT
.getVectorMinNumElements();
10755 SDValue VLMax
= DAG
.getNode(ISD::VSCALE
, DL
, XLenVT
,
10756 DAG
.getConstant(MinElts
, DL
, XLenVT
));
10757 SDValue Diff
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, VLMax
, EVL
);
10759 Result
= getVSlidedown(DAG
, Subtarget
, DL
, GatherVT
,
10760 DAG
.getUNDEF(GatherVT
), Result
, Diff
, Mask
, EVL
);
10762 if (IsMaskVector
) {
10763 // Truncate Result back to a mask vector
10765 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, ContainerVT
,
10766 {Result
, DAG
.getConstant(0, DL
, GatherVT
),
10767 DAG
.getCondCode(ISD::SETNE
),
10768 DAG
.getUNDEF(getMaskTypeFor(ContainerVT
)), Mask
, EVL
});
10771 if (!VT
.isFixedLengthVector())
10773 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10776 // Just promote the int type to i16 which will double the LMUL.
10777 IndicesVT
= MVT::getVectorVT(MVT::i16
, IndicesVT
.getVectorElementCount());
10778 GatherOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
10781 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, IndicesVT
, Mask
, EVL
);
10783 DAG
.getNode(ISD::SUB
, DL
, XLenVT
, EVL
, DAG
.getConstant(1, DL
, XLenVT
));
10784 SDValue VecLenSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IndicesVT
,
10785 DAG
.getUNDEF(IndicesVT
), VecLen
, EVL
);
10786 SDValue VRSUB
= DAG
.getNode(RISCVISD::SUB_VL
, DL
, IndicesVT
, VecLenSplat
, VID
,
10787 DAG
.getUNDEF(IndicesVT
), Mask
, EVL
);
10788 SDValue Result
= DAG
.getNode(GatherOpc
, DL
, GatherVT
, Op1
, VRSUB
,
10789 DAG
.getUNDEF(GatherVT
), Mask
, EVL
);
10791 if (IsMaskVector
) {
10792 // Truncate Result back to a mask vector
10793 Result
= DAG
.getNode(
10794 RISCVISD::SETCC_VL
, DL
, ContainerVT
,
10795 {Result
, DAG
.getConstant(0, DL
, GatherVT
), DAG
.getCondCode(ISD::SETNE
),
10796 DAG
.getUNDEF(getMaskTypeFor(ContainerVT
)), Mask
, EVL
});
10799 if (!VT
.isFixedLengthVector())
10801 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10804 SDValue
RISCVTargetLowering::lowerLogicVPOp(SDValue Op
,
10805 SelectionDAG
&DAG
) const {
10806 MVT VT
= Op
.getSimpleValueType();
10807 if (VT
.getVectorElementType() != MVT::i1
)
10808 return lowerVPOp(Op
, DAG
);
10810 // It is safe to drop mask parameter as masked-off elements are undef.
10811 SDValue Op1
= Op
->getOperand(0);
10812 SDValue Op2
= Op
->getOperand(1);
10813 SDValue VL
= Op
->getOperand(3);
10815 MVT ContainerVT
= VT
;
10816 const bool IsFixed
= VT
.isFixedLengthVector();
10818 ContainerVT
= getContainerForFixedLengthVector(VT
);
10819 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10820 Op2
= convertToScalableVector(ContainerVT
, Op2
, DAG
, Subtarget
);
10824 SDValue Val
= DAG
.getNode(getRISCVVLOp(Op
), DL
, ContainerVT
, Op1
, Op2
, VL
);
10827 return convertFromScalableVector(VT
, Val
, DAG
, Subtarget
);
10830 SDValue
RISCVTargetLowering::lowerVPStridedLoad(SDValue Op
,
10831 SelectionDAG
&DAG
) const {
10833 MVT XLenVT
= Subtarget
.getXLenVT();
10834 MVT VT
= Op
.getSimpleValueType();
10835 MVT ContainerVT
= VT
;
10836 if (VT
.isFixedLengthVector())
10837 ContainerVT
= getContainerForFixedLengthVector(VT
);
10839 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
10841 auto *VPNode
= cast
<VPStridedLoadSDNode
>(Op
);
10842 // Check if the mask is known to be all ones
10843 SDValue Mask
= VPNode
->getMask();
10844 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10846 SDValue IntID
= DAG
.getTargetConstant(IsUnmasked
? Intrinsic::riscv_vlse
10847 : Intrinsic::riscv_vlse_mask
,
10849 SmallVector
<SDValue
, 8> Ops
{VPNode
->getChain(), IntID
,
10850 DAG
.getUNDEF(ContainerVT
), VPNode
->getBasePtr(),
10851 VPNode
->getStride()};
10853 if (VT
.isFixedLengthVector()) {
10854 MVT MaskVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
10855 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10857 Ops
.push_back(Mask
);
10859 Ops
.push_back(VPNode
->getVectorLength());
10861 SDValue Policy
= DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
10862 Ops
.push_back(Policy
);
10866 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
10867 VPNode
->getMemoryVT(), VPNode
->getMemOperand());
10868 SDValue Chain
= Result
.getValue(1);
10870 if (VT
.isFixedLengthVector())
10871 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10873 return DAG
.getMergeValues({Result
, Chain
}, DL
);
10876 SDValue
RISCVTargetLowering::lowerVPStridedStore(SDValue Op
,
10877 SelectionDAG
&DAG
) const {
10879 MVT XLenVT
= Subtarget
.getXLenVT();
10881 auto *VPNode
= cast
<VPStridedStoreSDNode
>(Op
);
10882 SDValue StoreVal
= VPNode
->getValue();
10883 MVT VT
= StoreVal
.getSimpleValueType();
10884 MVT ContainerVT
= VT
;
10885 if (VT
.isFixedLengthVector()) {
10886 ContainerVT
= getContainerForFixedLengthVector(VT
);
10887 StoreVal
= convertToScalableVector(ContainerVT
, StoreVal
, DAG
, Subtarget
);
10890 // Check if the mask is known to be all ones
10891 SDValue Mask
= VPNode
->getMask();
10892 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10894 SDValue IntID
= DAG
.getTargetConstant(IsUnmasked
? Intrinsic::riscv_vsse
10895 : Intrinsic::riscv_vsse_mask
,
10897 SmallVector
<SDValue
, 8> Ops
{VPNode
->getChain(), IntID
, StoreVal
,
10898 VPNode
->getBasePtr(), VPNode
->getStride()};
10900 if (VT
.isFixedLengthVector()) {
10901 MVT MaskVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
10902 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10904 Ops
.push_back(Mask
);
10906 Ops
.push_back(VPNode
->getVectorLength());
10908 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
, VPNode
->getVTList(),
10909 Ops
, VPNode
->getMemoryVT(),
10910 VPNode
->getMemOperand());
10913 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
10914 // matched to a RVV indexed load. The RVV indexed load instructions only
10915 // support the "unsigned unscaled" addressing mode; indices are implicitly
10916 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
10917 // signed or scaled indexing is extended to the XLEN value type and scaled
10919 SDValue
RISCVTargetLowering::lowerMaskedGather(SDValue Op
,
10920 SelectionDAG
&DAG
) const {
10922 MVT VT
= Op
.getSimpleValueType();
10924 const auto *MemSD
= cast
<MemSDNode
>(Op
.getNode());
10925 EVT MemVT
= MemSD
->getMemoryVT();
10926 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
10927 SDValue Chain
= MemSD
->getChain();
10928 SDValue BasePtr
= MemSD
->getBasePtr();
10930 ISD::LoadExtType LoadExtType
;
10931 SDValue Index
, Mask
, PassThru
, VL
;
10933 if (auto *VPGN
= dyn_cast
<VPGatherSDNode
>(Op
.getNode())) {
10934 Index
= VPGN
->getIndex();
10935 Mask
= VPGN
->getMask();
10936 PassThru
= DAG
.getUNDEF(VT
);
10937 VL
= VPGN
->getVectorLength();
10938 // VP doesn't support extending loads.
10939 LoadExtType
= ISD::NON_EXTLOAD
;
10941 // Else it must be a MGATHER.
10942 auto *MGN
= cast
<MaskedGatherSDNode
>(Op
.getNode());
10943 Index
= MGN
->getIndex();
10944 Mask
= MGN
->getMask();
10945 PassThru
= MGN
->getPassThru();
10946 LoadExtType
= MGN
->getExtensionType();
10949 MVT IndexVT
= Index
.getSimpleValueType();
10950 MVT XLenVT
= Subtarget
.getXLenVT();
10952 assert(VT
.getVectorElementCount() == IndexVT
.getVectorElementCount() &&
10953 "Unexpected VTs!");
10954 assert(BasePtr
.getSimpleValueType() == XLenVT
&& "Unexpected pointer type");
10955 // Targets have to explicitly opt-in for extending vector loads.
10956 assert(LoadExtType
== ISD::NON_EXTLOAD
&&
10957 "Unexpected extending MGATHER/VP_GATHER");
10960 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
10961 // the selection of the masked intrinsics doesn't do this for us.
10962 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10964 MVT ContainerVT
= VT
;
10965 if (VT
.isFixedLengthVector()) {
10966 ContainerVT
= getContainerForFixedLengthVector(VT
);
10967 IndexVT
= MVT::getVectorVT(IndexVT
.getVectorElementType(),
10968 ContainerVT
.getVectorElementCount());
10970 Index
= convertToScalableVector(IndexVT
, Index
, DAG
, Subtarget
);
10973 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10974 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10975 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
10980 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10982 if (XLenVT
== MVT::i32
&& IndexVT
.getVectorElementType().bitsGT(XLenVT
)) {
10983 IndexVT
= IndexVT
.changeVectorElementType(XLenVT
);
10984 Index
= DAG
.getNode(ISD::TRUNCATE
, DL
, IndexVT
, Index
);
10988 IsUnmasked
? Intrinsic::riscv_vluxei
: Intrinsic::riscv_vluxei_mask
;
10989 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
10991 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10993 Ops
.push_back(PassThru
);
10994 Ops
.push_back(BasePtr
);
10995 Ops
.push_back(Index
);
10997 Ops
.push_back(Mask
);
11000 Ops
.push_back(DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
));
11002 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
11004 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, MemVT
, MMO
);
11005 Chain
= Result
.getValue(1);
11007 if (VT
.isFixedLengthVector())
11008 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
11010 return DAG
.getMergeValues({Result
, Chain
}, DL
);
11013 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11014 // matched to a RVV indexed store. The RVV indexed store instructions only
11015 // support the "unsigned unscaled" addressing mode; indices are implicitly
11016 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
11017 // signed or scaled indexing is extended to the XLEN value type and scaled
11019 SDValue
RISCVTargetLowering::lowerMaskedScatter(SDValue Op
,
11020 SelectionDAG
&DAG
) const {
11022 const auto *MemSD
= cast
<MemSDNode
>(Op
.getNode());
11023 EVT MemVT
= MemSD
->getMemoryVT();
11024 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
11025 SDValue Chain
= MemSD
->getChain();
11026 SDValue BasePtr
= MemSD
->getBasePtr();
11028 bool IsTruncatingStore
= false;
11029 SDValue Index
, Mask
, Val
, VL
;
11031 if (auto *VPSN
= dyn_cast
<VPScatterSDNode
>(Op
.getNode())) {
11032 Index
= VPSN
->getIndex();
11033 Mask
= VPSN
->getMask();
11034 Val
= VPSN
->getValue();
11035 VL
= VPSN
->getVectorLength();
11036 // VP doesn't support truncating stores.
11037 IsTruncatingStore
= false;
11039 // Else it must be a MSCATTER.
11040 auto *MSN
= cast
<MaskedScatterSDNode
>(Op
.getNode());
11041 Index
= MSN
->getIndex();
11042 Mask
= MSN
->getMask();
11043 Val
= MSN
->getValue();
11044 IsTruncatingStore
= MSN
->isTruncatingStore();
11047 MVT VT
= Val
.getSimpleValueType();
11048 MVT IndexVT
= Index
.getSimpleValueType();
11049 MVT XLenVT
= Subtarget
.getXLenVT();
11051 assert(VT
.getVectorElementCount() == IndexVT
.getVectorElementCount() &&
11052 "Unexpected VTs!");
11053 assert(BasePtr
.getSimpleValueType() == XLenVT
&& "Unexpected pointer type");
11054 // Targets have to explicitly opt-in for extending vector loads and
11055 // truncating vector stores.
11056 assert(!IsTruncatingStore
&& "Unexpected truncating MSCATTER/VP_SCATTER");
11057 (void)IsTruncatingStore
;
11059 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11060 // the selection of the masked intrinsics doesn't do this for us.
11061 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
11063 MVT ContainerVT
= VT
;
11064 if (VT
.isFixedLengthVector()) {
11065 ContainerVT
= getContainerForFixedLengthVector(VT
);
11066 IndexVT
= MVT::getVectorVT(IndexVT
.getVectorElementType(),
11067 ContainerVT
.getVectorElementCount());
11069 Index
= convertToScalableVector(IndexVT
, Index
, DAG
, Subtarget
);
11070 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
11073 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
11074 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
11079 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
11081 if (XLenVT
== MVT::i32
&& IndexVT
.getVectorElementType().bitsGT(XLenVT
)) {
11082 IndexVT
= IndexVT
.changeVectorElementType(XLenVT
);
11083 Index
= DAG
.getNode(ISD::TRUNCATE
, DL
, IndexVT
, Index
);
11087 IsUnmasked
? Intrinsic::riscv_vsoxei
: Intrinsic::riscv_vsoxei_mask
;
11088 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
11089 Ops
.push_back(Val
);
11090 Ops
.push_back(BasePtr
);
11091 Ops
.push_back(Index
);
11093 Ops
.push_back(Mask
);
11096 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
,
11097 DAG
.getVTList(MVT::Other
), Ops
, MemVT
, MMO
);
11100 SDValue
RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op
,
11101 SelectionDAG
&DAG
) const {
11102 const MVT XLenVT
= Subtarget
.getXLenVT();
11104 SDValue Chain
= Op
->getOperand(0);
11105 SDValue SysRegNo
= DAG
.getTargetConstant(
11106 RISCVSysReg::lookupSysRegByName("FRM")->Encoding
, DL
, XLenVT
);
11107 SDVTList VTs
= DAG
.getVTList(XLenVT
, MVT::Other
);
11108 SDValue RM
= DAG
.getNode(RISCVISD::READ_CSR
, DL
, VTs
, Chain
, SysRegNo
);
11110 // Encoding used for rounding mode in RISC-V differs from that used in
11111 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11112 // table, which consists of a sequence of 4-bit fields, each representing
11113 // corresponding FLT_ROUNDS mode.
11114 static const int Table
=
11115 (int(RoundingMode::NearestTiesToEven
) << 4 * RISCVFPRndMode::RNE
) |
11116 (int(RoundingMode::TowardZero
) << 4 * RISCVFPRndMode::RTZ
) |
11117 (int(RoundingMode::TowardNegative
) << 4 * RISCVFPRndMode::RDN
) |
11118 (int(RoundingMode::TowardPositive
) << 4 * RISCVFPRndMode::RUP
) |
11119 (int(RoundingMode::NearestTiesToAway
) << 4 * RISCVFPRndMode::RMM
);
11122 DAG
.getNode(ISD::SHL
, DL
, XLenVT
, RM
, DAG
.getConstant(2, DL
, XLenVT
));
11123 SDValue Shifted
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
,
11124 DAG
.getConstant(Table
, DL
, XLenVT
), Shift
);
11125 SDValue Masked
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Shifted
,
11126 DAG
.getConstant(7, DL
, XLenVT
));
11128 return DAG
.getMergeValues({Masked
, Chain
}, DL
);
11131 SDValue
RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op
,
11132 SelectionDAG
&DAG
) const {
11133 const MVT XLenVT
= Subtarget
.getXLenVT();
11135 SDValue Chain
= Op
->getOperand(0);
11136 SDValue RMValue
= Op
->getOperand(1);
11137 SDValue SysRegNo
= DAG
.getTargetConstant(
11138 RISCVSysReg::lookupSysRegByName("FRM")->Encoding
, DL
, XLenVT
);
11140 // Encoding used for rounding mode in RISC-V differs from that used in
11141 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11142 // a table, which consists of a sequence of 4-bit fields, each representing
11143 // corresponding RISC-V mode.
11144 static const unsigned Table
=
11145 (RISCVFPRndMode::RNE
<< 4 * int(RoundingMode::NearestTiesToEven
)) |
11146 (RISCVFPRndMode::RTZ
<< 4 * int(RoundingMode::TowardZero
)) |
11147 (RISCVFPRndMode::RDN
<< 4 * int(RoundingMode::TowardNegative
)) |
11148 (RISCVFPRndMode::RUP
<< 4 * int(RoundingMode::TowardPositive
)) |
11149 (RISCVFPRndMode::RMM
<< 4 * int(RoundingMode::NearestTiesToAway
));
11151 RMValue
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, XLenVT
, RMValue
);
11153 SDValue Shift
= DAG
.getNode(ISD::SHL
, DL
, XLenVT
, RMValue
,
11154 DAG
.getConstant(2, DL
, XLenVT
));
11155 SDValue Shifted
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
,
11156 DAG
.getConstant(Table
, DL
, XLenVT
), Shift
);
11157 RMValue
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Shifted
,
11158 DAG
.getConstant(0x7, DL
, XLenVT
));
11159 return DAG
.getNode(RISCVISD::WRITE_CSR
, DL
, MVT::Other
, Chain
, SysRegNo
,
11163 SDValue
RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op
,
11164 SelectionDAG
&DAG
) const {
11165 MachineFunction
&MF
= DAG
.getMachineFunction();
11167 bool isRISCV64
= Subtarget
.is64Bit();
11168 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
11170 int FI
= MF
.getFrameInfo().CreateFixedObject(isRISCV64
? 8 : 4, 0, false);
11171 return DAG
.getFrameIndex(FI
, PtrVT
);
11174 // Returns the opcode of the target-specific SDNode that implements the 32-bit
11175 // form of the given Opcode.
11176 static RISCVISD::NodeType
getRISCVWOpcode(unsigned Opcode
) {
11179 llvm_unreachable("Unexpected opcode");
11181 return RISCVISD::SLLW
;
11183 return RISCVISD::SRAW
;
11185 return RISCVISD::SRLW
;
11187 return RISCVISD::DIVW
;
11189 return RISCVISD::DIVUW
;
11191 return RISCVISD::REMUW
;
11193 return RISCVISD::ROLW
;
11195 return RISCVISD::RORW
;
11199 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11200 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11201 // otherwise be promoted to i64, making it difficult to select the
11202 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11203 // type i8/i16/i32 is lost.
11204 static SDValue
customLegalizeToWOp(SDNode
*N
, SelectionDAG
&DAG
,
11205 unsigned ExtOpc
= ISD::ANY_EXTEND
) {
11207 RISCVISD::NodeType WOpcode
= getRISCVWOpcode(N
->getOpcode());
11208 SDValue NewOp0
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(0));
11209 SDValue NewOp1
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(1));
11210 SDValue NewRes
= DAG
.getNode(WOpcode
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11211 // ReplaceNodeResults requires we maintain the same type for the return value.
11212 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), NewRes
);
11215 // Converts the given 32-bit operation to a i64 operation with signed extension
11216 // semantic to reduce the signed extension instructions.
11217 static SDValue
customLegalizeToWOpWithSExt(SDNode
*N
, SelectionDAG
&DAG
) {
11219 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11220 SDValue NewOp1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11221 SDValue NewWOp
= DAG
.getNode(N
->getOpcode(), DL
, MVT::i64
, NewOp0
, NewOp1
);
11222 SDValue NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewWOp
,
11223 DAG
.getValueType(MVT::i32
));
11224 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
);
11227 void RISCVTargetLowering::ReplaceNodeResults(SDNode
*N
,
11228 SmallVectorImpl
<SDValue
> &Results
,
11229 SelectionDAG
&DAG
) const {
11231 switch (N
->getOpcode()) {
11233 llvm_unreachable("Don't know how to custom type legalize this operation!");
11234 case ISD::STRICT_FP_TO_SINT
:
11235 case ISD::STRICT_FP_TO_UINT
:
11236 case ISD::FP_TO_SINT
:
11237 case ISD::FP_TO_UINT
: {
11238 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11239 "Unexpected custom legalisation");
11240 bool IsStrict
= N
->isStrictFPOpcode();
11241 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT
||
11242 N
->getOpcode() == ISD::STRICT_FP_TO_SINT
;
11243 SDValue Op0
= IsStrict
? N
->getOperand(1) : N
->getOperand(0);
11244 if (getTypeAction(*DAG
.getContext(), Op0
.getValueType()) !=
11245 TargetLowering::TypeSoftenFloat
) {
11246 if (!isTypeLegal(Op0
.getValueType()))
11249 SDValue Chain
= N
->getOperand(0);
11250 // In absense of Zfh, promote f16 to f32, then convert.
11251 if (Op0
.getValueType() == MVT::f16
&&
11252 !Subtarget
.hasStdExtZfhOrZhinx()) {
11253 Op0
= DAG
.getNode(ISD::STRICT_FP_EXTEND
, DL
, {MVT::f32
, MVT::Other
},
11255 Chain
= Op0
.getValue(1);
11257 unsigned Opc
= IsSigned
? RISCVISD::STRICT_FCVT_W_RV64
11258 : RISCVISD::STRICT_FCVT_WU_RV64
;
11259 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Other
);
11260 SDValue Res
= DAG
.getNode(
11261 Opc
, DL
, VTs
, Chain
, Op0
,
11262 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, MVT::i64
));
11263 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11264 Results
.push_back(Res
.getValue(1));
11267 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11269 if ((Op0
.getValueType() == MVT::f16
&&
11270 !Subtarget
.hasStdExtZfhOrZhinx()) ||
11271 Op0
.getValueType() == MVT::bf16
)
11272 Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op0
);
11274 unsigned Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
11276 DAG
.getNode(Opc
, DL
, MVT::i64
, Op0
,
11277 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, MVT::i64
));
11278 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11281 // If the FP type needs to be softened, emit a library call using the 'si'
11282 // version. If we left it to default legalization we'd end up with 'di'. If
11283 // the FP type doesn't need to be softened just let generic type
11284 // legalization promote the result type.
11287 LC
= RTLIB::getFPTOSINT(Op0
.getValueType(), N
->getValueType(0));
11289 LC
= RTLIB::getFPTOUINT(Op0
.getValueType(), N
->getValueType(0));
11290 MakeLibCallOptions CallOptions
;
11291 EVT OpVT
= Op0
.getValueType();
11292 CallOptions
.setTypeListBeforeSoften(OpVT
, N
->getValueType(0), true);
11293 SDValue Chain
= IsStrict
? N
->getOperand(0) : SDValue();
11295 std::tie(Result
, Chain
) =
11296 makeLibCall(DAG
, LC
, N
->getValueType(0), Op0
, CallOptions
, DL
, Chain
);
11297 Results
.push_back(Result
);
11299 Results
.push_back(Chain
);
11302 case ISD::LROUND
: {
11303 SDValue Op0
= N
->getOperand(0);
11304 EVT Op0VT
= Op0
.getValueType();
11305 if (getTypeAction(*DAG
.getContext(), Op0
.getValueType()) !=
11306 TargetLowering::TypeSoftenFloat
) {
11307 if (!isTypeLegal(Op0VT
))
11310 // In absense of Zfh, promote f16 to f32, then convert.
11311 if (Op0
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfhOrZhinx())
11312 Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op0
);
11315 DAG
.getNode(RISCVISD::FCVT_W_RV64
, DL
, MVT::i64
, Op0
,
11316 DAG
.getTargetConstant(RISCVFPRndMode::RMM
, DL
, MVT::i64
));
11317 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11320 // If the FP type needs to be softened, emit a library call to lround. We'll
11321 // need to truncate the result. We assume any value that doesn't fit in i32
11322 // is allowed to return an unspecified value.
11323 RTLIB::Libcall LC
=
11324 Op0
.getValueType() == MVT::f64
? RTLIB::LROUND_F64
: RTLIB::LROUND_F32
;
11325 MakeLibCallOptions CallOptions
;
11326 EVT OpVT
= Op0
.getValueType();
11327 CallOptions
.setTypeListBeforeSoften(OpVT
, MVT::i64
, true);
11328 SDValue Result
= makeLibCall(DAG
, LC
, MVT::i64
, Op0
, CallOptions
, DL
).first
;
11329 Result
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Result
);
11330 Results
.push_back(Result
);
11333 case ISD::READCYCLECOUNTER
: {
11334 assert(!Subtarget
.is64Bit() &&
11335 "READCYCLECOUNTER only has custom type legalization on riscv32");
11337 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
11339 DAG
.getNode(RISCVISD::READ_CYCLE_WIDE
, DL
, VTs
, N
->getOperand(0));
11342 DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, RCW
, RCW
.getValue(1)));
11343 Results
.push_back(RCW
.getValue(2));
11347 if (!ISD::isNON_EXTLoad(N
))
11350 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11351 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11352 LoadSDNode
*Ld
= cast
<LoadSDNode
>(N
);
11355 SDValue Res
= DAG
.getExtLoad(ISD::SEXTLOAD
, dl
, MVT::i64
, Ld
->getChain(),
11356 Ld
->getBasePtr(), Ld
->getMemoryVT(),
11357 Ld
->getMemOperand());
11358 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, Res
));
11359 Results
.push_back(Res
.getValue(1));
11363 unsigned Size
= N
->getSimpleValueType(0).getSizeInBits();
11364 unsigned XLen
= Subtarget
.getXLen();
11365 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11367 assert(Size
== (XLen
* 2) && "Unexpected custom legalisation");
11368 SDValue LHS
= N
->getOperand(0);
11369 SDValue RHS
= N
->getOperand(1);
11370 APInt HighMask
= APInt::getHighBitsSet(Size
, XLen
);
11372 bool LHSIsU
= DAG
.MaskedValueIsZero(LHS
, HighMask
);
11373 bool RHSIsU
= DAG
.MaskedValueIsZero(RHS
, HighMask
);
11374 // We need exactly one side to be unsigned.
11375 if (LHSIsU
== RHSIsU
)
11378 auto MakeMULPair
= [&](SDValue S
, SDValue U
) {
11379 MVT XLenVT
= Subtarget
.getXLenVT();
11380 S
= DAG
.getNode(ISD::TRUNCATE
, DL
, XLenVT
, S
);
11381 U
= DAG
.getNode(ISD::TRUNCATE
, DL
, XLenVT
, U
);
11382 SDValue Lo
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, S
, U
);
11383 SDValue Hi
= DAG
.getNode(RISCVISD::MULHSU
, DL
, XLenVT
, S
, U
);
11384 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, N
->getValueType(0), Lo
, Hi
);
11387 bool LHSIsS
= DAG
.ComputeNumSignBits(LHS
) > XLen
;
11388 bool RHSIsS
= DAG
.ComputeNumSignBits(RHS
) > XLen
;
11390 // The other operand should be signed, but still prefer MULH when
11392 if (RHSIsU
&& LHSIsS
&& !RHSIsS
)
11393 Results
.push_back(MakeMULPair(LHS
, RHS
));
11394 else if (LHSIsU
&& RHSIsS
&& !LHSIsS
)
11395 Results
.push_back(MakeMULPair(RHS
, LHS
));
11403 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11404 "Unexpected custom legalisation");
11405 Results
.push_back(customLegalizeToWOpWithSExt(N
, DAG
));
11410 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11411 "Unexpected custom legalisation");
11412 if (N
->getOperand(1).getOpcode() != ISD::Constant
) {
11413 // If we can use a BSET instruction, allow default promotion to apply.
11414 if (N
->getOpcode() == ISD::SHL
&& Subtarget
.hasStdExtZbs() &&
11415 isOneConstant(N
->getOperand(0)))
11417 Results
.push_back(customLegalizeToWOp(N
, DAG
));
11421 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
11422 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
11424 if (N
->getOpcode() == ISD::SHL
) {
11427 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11429 DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11430 SDValue NewWOp
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11431 SDValue NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewWOp
,
11432 DAG
.getValueType(MVT::i32
));
11433 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
));
11439 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11440 "Unexpected custom legalisation");
11441 assert((Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
11442 Subtarget
.hasVendorXTHeadBb()) &&
11443 "Unexpected custom legalization");
11444 if (!isa
<ConstantSDNode
>(N
->getOperand(1)) &&
11445 !(Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()))
11447 Results
.push_back(customLegalizeToWOp(N
, DAG
));
11450 case ISD::CTTZ_ZERO_UNDEF
:
11452 case ISD::CTLZ_ZERO_UNDEF
: {
11453 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11454 "Unexpected custom legalisation");
11457 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11459 N
->getOpcode() == ISD::CTTZ
|| N
->getOpcode() == ISD::CTTZ_ZERO_UNDEF
;
11460 unsigned Opc
= IsCTZ
? RISCVISD::CTZW
: RISCVISD::CLZW
;
11461 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
);
11462 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11468 MVT VT
= N
->getSimpleValueType(0);
11469 assert((VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
) &&
11470 Subtarget
.is64Bit() && Subtarget
.hasStdExtM() &&
11471 "Unexpected custom legalisation");
11472 // Don't promote division/remainder by constant since we should expand those
11473 // to multiply by magic constant.
11474 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
11475 if (N
->getOperand(1).getOpcode() == ISD::Constant
&&
11476 !isIntDivCheap(N
->getValueType(0), Attr
))
11479 // If the input is i32, use ANY_EXTEND since the W instructions don't read
11480 // the upper 32 bits. For other types we need to sign or zero extend
11481 // based on the opcode.
11482 unsigned ExtOpc
= ISD::ANY_EXTEND
;
11483 if (VT
!= MVT::i32
)
11484 ExtOpc
= N
->getOpcode() == ISD::SDIV
? ISD::SIGN_EXTEND
11485 : ISD::ZERO_EXTEND
;
11487 Results
.push_back(customLegalizeToWOp(N
, DAG
, ExtOpc
));
11491 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11492 "Unexpected custom legalisation");
11494 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
11495 // use the default legalization.
11496 if (!isa
<ConstantSDNode
>(N
->getOperand(1)))
11499 SDValue LHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11500 SDValue RHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11501 SDValue Res
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, LHS
, RHS
);
11502 Res
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Res
,
11503 DAG
.getValueType(MVT::i32
));
11505 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i64
);
11507 // For an addition, the result should be less than one of the operands (LHS)
11508 // if and only if the other operand (RHS) is negative, otherwise there will
11510 // For a subtraction, the result should be less than one of the operands
11511 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11512 // otherwise there will be overflow.
11513 EVT OType
= N
->getValueType(1);
11514 SDValue ResultLowerThanLHS
= DAG
.getSetCC(DL
, OType
, Res
, LHS
, ISD::SETLT
);
11515 SDValue ConditionRHS
= DAG
.getSetCC(DL
, OType
, RHS
, Zero
, ISD::SETLT
);
11518 DAG
.getNode(ISD::XOR
, DL
, OType
, ConditionRHS
, ResultLowerThanLHS
);
11519 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11520 Results
.push_back(Overflow
);
11525 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11526 "Unexpected custom legalisation");
11527 bool IsAdd
= N
->getOpcode() == ISD::UADDO
;
11528 // Create an ADDW or SUBW.
11529 SDValue LHS
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11530 SDValue RHS
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11532 DAG
.getNode(IsAdd
? ISD::ADD
: ISD::SUB
, DL
, MVT::i64
, LHS
, RHS
);
11533 Res
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Res
,
11534 DAG
.getValueType(MVT::i32
));
11537 if (IsAdd
&& isOneConstant(RHS
)) {
11538 // Special case uaddo X, 1 overflowed if the addition result is 0.
11539 // The general case (X + C) < C is not necessarily beneficial. Although we
11540 // reduce the live range of X, we may introduce the materialization of
11541 // constant C, especially when the setcc result is used by branch. We have
11542 // no compare with constant and branch instructions.
11543 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), Res
,
11544 DAG
.getConstant(0, DL
, MVT::i64
), ISD::SETEQ
);
11545 } else if (IsAdd
&& isAllOnesConstant(RHS
)) {
11546 // Special case uaddo X, -1 overflowed if X != 0.
11547 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), N
->getOperand(0),
11548 DAG
.getConstant(0, DL
, MVT::i32
), ISD::SETNE
);
11550 // Sign extend the LHS and perform an unsigned compare with the ADDW
11551 // result. Since the inputs are sign extended from i32, this is equivalent
11552 // to comparing the lower 32 bits.
11553 LHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11554 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), Res
, LHS
,
11555 IsAdd
? ISD::SETULT
: ISD::SETUGT
);
11558 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11559 Results
.push_back(Overflow
);
11563 case ISD::USUBSAT
: {
11564 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11565 "Unexpected custom legalisation");
11566 if (Subtarget
.hasStdExtZbb()) {
11567 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
11568 // sign extend allows overflow of the lower 32 bits to be detected on
11569 // the promoted size.
11571 DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11573 DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11574 SDValue Res
= DAG
.getNode(N
->getOpcode(), DL
, MVT::i64
, LHS
, RHS
);
11575 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11579 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
11580 // promotion for UADDO/USUBO.
11581 Results
.push_back(expandAddSubSat(N
, DAG
));
11585 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11586 "Unexpected custom legalisation");
11588 if (Subtarget
.hasStdExtZbb()) {
11589 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
11590 // This allows us to remember that the result is sign extended. Expanding
11591 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
11592 SDValue Src
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
,
11594 SDValue Abs
= DAG
.getNode(RISCVISD::ABSW
, DL
, MVT::i64
, Src
);
11595 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Abs
));
11599 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
11600 SDValue Src
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11602 // Freeze the source so we can increase it's use count.
11603 Src
= DAG
.getFreeze(Src
);
11605 // Copy sign bit to all bits using the sraiw pattern.
11606 SDValue SignFill
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Src
,
11607 DAG
.getValueType(MVT::i32
));
11608 SignFill
= DAG
.getNode(ISD::SRA
, DL
, MVT::i64
, SignFill
,
11609 DAG
.getConstant(31, DL
, MVT::i64
));
11611 SDValue NewRes
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, Src
, SignFill
);
11612 NewRes
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, NewRes
, SignFill
);
11614 // NOTE: The result is only required to be anyextended, but sext is
11615 // consistent with type legalization of sub.
11616 NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewRes
,
11617 DAG
.getValueType(MVT::i32
));
11618 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
));
11621 case ISD::BITCAST
: {
11622 EVT VT
= N
->getValueType(0);
11623 assert(VT
.isInteger() && !VT
.isVector() && "Unexpected VT!");
11624 SDValue Op0
= N
->getOperand(0);
11625 EVT Op0VT
= Op0
.getValueType();
11626 MVT XLenVT
= Subtarget
.getXLenVT();
11627 if (VT
== MVT::i16
&& Op0VT
== MVT::f16
&&
11628 Subtarget
.hasStdExtZfhminOrZhinxmin()) {
11629 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, XLenVT
, Op0
);
11630 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, FPConv
));
11631 } else if (VT
== MVT::i16
&& Op0VT
== MVT::bf16
&&
11632 Subtarget
.hasStdExtZfbfmin()) {
11633 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, XLenVT
, Op0
);
11634 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, FPConv
));
11635 } else if (VT
== MVT::i32
&& Op0VT
== MVT::f32
&& Subtarget
.is64Bit() &&
11636 Subtarget
.hasStdExtFOrZfinx()) {
11638 DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Op0
);
11639 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, FPConv
));
11640 } else if (VT
== MVT::i64
&& Op0VT
== MVT::f64
&& XLenVT
== MVT::i32
&&
11641 Subtarget
.hasStdExtZfa()) {
11642 SDValue NewReg
= DAG
.getNode(RISCVISD::SplitF64
, DL
,
11643 DAG
.getVTList(MVT::i32
, MVT::i32
), Op0
);
11644 SDValue RetReg
= DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
,
11645 NewReg
.getValue(0), NewReg
.getValue(1));
11646 Results
.push_back(RetReg
);
11647 } else if (!VT
.isVector() && Op0VT
.isFixedLengthVector() &&
11648 isTypeLegal(Op0VT
)) {
11649 // Custom-legalize bitcasts from fixed-length vector types to illegal
11650 // scalar types in order to improve codegen. Bitcast the vector to a
11651 // one-element vector type whose element type is the same as the result
11652 // type, and extract the first element.
11653 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 1);
11654 if (isTypeLegal(BVT
)) {
11655 SDValue BVec
= DAG
.getBitcast(BVT
, Op0
);
11656 Results
.push_back(DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VT
, BVec
,
11657 DAG
.getConstant(0, DL
, XLenVT
)));
11662 case RISCVISD::BREV8
: {
11663 MVT VT
= N
->getSimpleValueType(0);
11664 MVT XLenVT
= Subtarget
.getXLenVT();
11665 assert((VT
== MVT::i16
|| (VT
== MVT::i32
&& Subtarget
.is64Bit())) &&
11666 "Unexpected custom legalisation");
11667 assert(Subtarget
.hasStdExtZbkb() && "Unexpected extension");
11668 SDValue NewOp
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, N
->getOperand(0));
11669 SDValue NewRes
= DAG
.getNode(N
->getOpcode(), DL
, XLenVT
, NewOp
);
11670 // ReplaceNodeResults requires we maintain the same type for the return
11672 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, NewRes
));
11675 case ISD::EXTRACT_VECTOR_ELT
: {
11676 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
11677 // type is illegal (currently only vXi64 RV32).
11678 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
11679 // transferred to the destination register. We issue two of these from the
11680 // upper- and lower- halves of the SEW-bit vector element, slid down to the
11682 SDValue Vec
= N
->getOperand(0);
11683 SDValue Idx
= N
->getOperand(1);
11685 // The vector type hasn't been legalized yet so we can't issue target
11686 // specific nodes if it needs legalization.
11687 // FIXME: We would manually legalize if it's important.
11688 if (!isTypeLegal(Vec
.getValueType()))
11691 MVT VecVT
= Vec
.getSimpleValueType();
11693 assert(!Subtarget
.is64Bit() && N
->getValueType(0) == MVT::i64
&&
11694 VecVT
.getVectorElementType() == MVT::i64
&&
11695 "Unexpected EXTRACT_VECTOR_ELT legalization");
11697 // If this is a fixed vector, we need to convert it to a scalable vector.
11698 MVT ContainerVT
= VecVT
;
11699 if (VecVT
.isFixedLengthVector()) {
11700 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
11701 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
11704 MVT XLenVT
= Subtarget
.getXLenVT();
11706 // Use a VL of 1 to avoid processing more elements than we need.
11707 auto [Mask
, VL
] = getDefaultVLOps(1, ContainerVT
, DL
, DAG
, Subtarget
);
11709 // Unless the index is known to be 0, we must slide the vector down to get
11710 // the desired element into index 0.
11711 if (!isNullConstant(Idx
)) {
11712 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
11713 DAG
.getUNDEF(ContainerVT
), Vec
, Idx
, Mask
, VL
);
11716 // Extract the lower XLEN bits of the correct vector element.
11717 SDValue EltLo
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
11719 // To extract the upper XLEN bits of the vector element, shift the first
11720 // element right by 32 bits and re-extract the lower XLEN bits.
11721 SDValue ThirtyTwoV
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
11722 DAG
.getUNDEF(ContainerVT
),
11723 DAG
.getConstant(32, DL
, XLenVT
), VL
);
11725 DAG
.getNode(RISCVISD::SRL_VL
, DL
, ContainerVT
, Vec
, ThirtyTwoV
,
11726 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
11728 SDValue EltHi
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, LShr32
);
11730 Results
.push_back(DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, EltLo
, EltHi
));
11733 case ISD::INTRINSIC_WO_CHAIN
: {
11734 unsigned IntNo
= N
->getConstantOperandVal(0);
11738 "Don't know how to custom type legalize this intrinsic!");
11739 case Intrinsic::experimental_get_vector_length
: {
11740 SDValue Res
= lowerGetVectorLength(N
, DAG
, Subtarget
);
11741 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11744 case Intrinsic::riscv_orc_b
:
11745 case Intrinsic::riscv_brev8
:
11746 case Intrinsic::riscv_sha256sig0
:
11747 case Intrinsic::riscv_sha256sig1
:
11748 case Intrinsic::riscv_sha256sum0
:
11749 case Intrinsic::riscv_sha256sum1
:
11750 case Intrinsic::riscv_sm3p0
:
11751 case Intrinsic::riscv_sm3p1
: {
11752 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11756 case Intrinsic::riscv_orc_b
: Opc
= RISCVISD::ORC_B
; break;
11757 case Intrinsic::riscv_brev8
: Opc
= RISCVISD::BREV8
; break;
11758 case Intrinsic::riscv_sha256sig0
: Opc
= RISCVISD::SHA256SIG0
; break;
11759 case Intrinsic::riscv_sha256sig1
: Opc
= RISCVISD::SHA256SIG1
; break;
11760 case Intrinsic::riscv_sha256sum0
: Opc
= RISCVISD::SHA256SUM0
; break;
11761 case Intrinsic::riscv_sha256sum1
: Opc
= RISCVISD::SHA256SUM1
; break;
11762 case Intrinsic::riscv_sm3p0
: Opc
= RISCVISD::SM3P0
; break;
11763 case Intrinsic::riscv_sm3p1
: Opc
= RISCVISD::SM3P1
; break;
11767 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11768 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp
);
11769 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11772 case Intrinsic::riscv_sm4ks
:
11773 case Intrinsic::riscv_sm4ed
: {
11775 IntNo
== Intrinsic::riscv_sm4ks
? RISCVISD::SM4KS
: RISCVISD::SM4ED
;
11777 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11779 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11781 DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
, N
->getOperand(3));
11782 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11785 case Intrinsic::riscv_clmul
: {
11786 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11790 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11792 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11793 SDValue Res
= DAG
.getNode(RISCVISD::CLMUL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11794 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11797 case Intrinsic::riscv_clmulh
:
11798 case Intrinsic::riscv_clmulr
: {
11799 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11802 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
11803 // to the full 128-bit clmul result of multiplying two xlen values.
11804 // Perform clmulr or clmulh on the shifted values. Finally, extract the
11807 // The alternative is to mask the inputs to 32 bits and use clmul, but
11808 // that requires two shifts to mask each input without zext.w.
11809 // FIXME: If the inputs are known zero extended or could be freely
11810 // zero extended, the mask form would be better.
11812 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11814 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11815 NewOp0
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
,
11816 DAG
.getConstant(32, DL
, MVT::i64
));
11817 NewOp1
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp1
,
11818 DAG
.getConstant(32, DL
, MVT::i64
));
11819 unsigned Opc
= IntNo
== Intrinsic::riscv_clmulh
? RISCVISD::CLMULH
11820 : RISCVISD::CLMULR
;
11821 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11822 Res
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Res
,
11823 DAG
.getConstant(32, DL
, MVT::i64
));
11824 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11827 case Intrinsic::riscv_vmv_x_s
: {
11828 EVT VT
= N
->getValueType(0);
11829 MVT XLenVT
= Subtarget
.getXLenVT();
11830 if (VT
.bitsLT(XLenVT
)) {
11831 // Simple case just extract using vmv.x.s and truncate.
11832 SDValue Extract
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
,
11833 Subtarget
.getXLenVT(), N
->getOperand(1));
11834 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Extract
));
11838 assert(VT
== MVT::i64
&& !Subtarget
.is64Bit() &&
11839 "Unexpected custom legalization");
11841 // We need to do the move in two steps.
11842 SDValue Vec
= N
->getOperand(1);
11843 MVT VecVT
= Vec
.getSimpleValueType();
11845 // First extract the lower XLEN bits of the element.
11846 SDValue EltLo
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
11848 // To extract the upper XLEN bits of the vector element, shift the first
11849 // element right by 32 bits and re-extract the lower XLEN bits.
11850 auto [Mask
, VL
] = getDefaultVLOps(1, VecVT
, DL
, DAG
, Subtarget
);
11852 SDValue ThirtyTwoV
=
11853 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VecVT
, DAG
.getUNDEF(VecVT
),
11854 DAG
.getConstant(32, DL
, XLenVT
), VL
);
11855 SDValue LShr32
= DAG
.getNode(RISCVISD::SRL_VL
, DL
, VecVT
, Vec
, ThirtyTwoV
,
11856 DAG
.getUNDEF(VecVT
), Mask
, VL
);
11857 SDValue EltHi
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, LShr32
);
11860 DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, EltLo
, EltHi
));
11866 case ISD::VECREDUCE_ADD
:
11867 case ISD::VECREDUCE_AND
:
11868 case ISD::VECREDUCE_OR
:
11869 case ISD::VECREDUCE_XOR
:
11870 case ISD::VECREDUCE_SMAX
:
11871 case ISD::VECREDUCE_UMAX
:
11872 case ISD::VECREDUCE_SMIN
:
11873 case ISD::VECREDUCE_UMIN
:
11874 if (SDValue V
= lowerVECREDUCE(SDValue(N
, 0), DAG
))
11875 Results
.push_back(V
);
11877 case ISD::VP_REDUCE_ADD
:
11878 case ISD::VP_REDUCE_AND
:
11879 case ISD::VP_REDUCE_OR
:
11880 case ISD::VP_REDUCE_XOR
:
11881 case ISD::VP_REDUCE_SMAX
:
11882 case ISD::VP_REDUCE_UMAX
:
11883 case ISD::VP_REDUCE_SMIN
:
11884 case ISD::VP_REDUCE_UMIN
:
11885 if (SDValue V
= lowerVPREDUCE(SDValue(N
, 0), DAG
))
11886 Results
.push_back(V
);
11888 case ISD::GET_ROUNDING
: {
11889 SDVTList VTs
= DAG
.getVTList(Subtarget
.getXLenVT(), MVT::Other
);
11890 SDValue Res
= DAG
.getNode(ISD::GET_ROUNDING
, DL
, VTs
, N
->getOperand(0));
11891 Results
.push_back(Res
.getValue(0));
11892 Results
.push_back(Res
.getValue(1));
11898 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
11899 /// which corresponds to it.
11900 static unsigned getVecReduceOpcode(unsigned Opc
) {
11903 llvm_unreachable("Unhandled binary to transfrom reduction");
11905 return ISD::VECREDUCE_ADD
;
11907 return ISD::VECREDUCE_UMAX
;
11909 return ISD::VECREDUCE_SMAX
;
11911 return ISD::VECREDUCE_UMIN
;
11913 return ISD::VECREDUCE_SMIN
;
11915 return ISD::VECREDUCE_AND
;
11917 return ISD::VECREDUCE_OR
;
11919 return ISD::VECREDUCE_XOR
;
11921 // Note: This is the associative form of the generic reduction opcode.
11922 return ISD::VECREDUCE_FADD
;
11926 /// Perform two related transforms whose purpose is to incrementally recognize
11927 /// an explode_vector followed by scalar reduction as a vector reduction node.
11928 /// This exists to recover from a deficiency in SLP which can't handle
11929 /// forests with multiple roots sharing common nodes. In some cases, one
11930 /// of the trees will be vectorized, and the other will remain (unprofitably)
11933 combineBinOpOfExtractToReduceTree(SDNode
*N
, SelectionDAG
&DAG
,
11934 const RISCVSubtarget
&Subtarget
) {
11936 // This transforms need to run before all integer types have been legalized
11937 // to i64 (so that the vector element type matches the add type), and while
11938 // it's safe to introduce odd sized vector types.
11939 if (DAG
.NewNodesMustHaveLegalTypes
)
11942 // Without V, this transform isn't useful. We could form the (illegal)
11943 // operations and let them be scalarized again, but there's really no point.
11944 if (!Subtarget
.hasVInstructions())
11948 const EVT VT
= N
->getValueType(0);
11949 const unsigned Opc
= N
->getOpcode();
11951 // For FADD, we only handle the case with reassociation allowed. We
11952 // could handle strict reduction order, but at the moment, there's no
11953 // known reason to, and the complexity isn't worth it.
11954 // TODO: Handle fminnum and fmaxnum here
11955 if (!VT
.isInteger() &&
11956 (Opc
!= ISD::FADD
|| !N
->getFlags().hasAllowReassociation()))
11959 const unsigned ReduceOpc
= getVecReduceOpcode(Opc
);
11960 assert(Opc
== ISD::getVecReduceBaseOpcode(ReduceOpc
) &&
11961 "Inconsistent mappings");
11962 SDValue LHS
= N
->getOperand(0);
11963 SDValue RHS
= N
->getOperand(1);
11965 if (!LHS
.hasOneUse() || !RHS
.hasOneUse())
11968 if (RHS
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
11969 std::swap(LHS
, RHS
);
11971 if (RHS
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
11972 !isa
<ConstantSDNode
>(RHS
.getOperand(1)))
11975 uint64_t RHSIdx
= cast
<ConstantSDNode
>(RHS
.getOperand(1))->getLimitedValue();
11976 SDValue SrcVec
= RHS
.getOperand(0);
11977 EVT SrcVecVT
= SrcVec
.getValueType();
11978 assert(SrcVecVT
.getVectorElementType() == VT
);
11979 if (SrcVecVT
.isScalableVector())
11982 if (SrcVecVT
.getScalarSizeInBits() > Subtarget
.getELen())
11985 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
11986 // reduce_op (extract_subvector [2 x VT] from V). This will form the
11987 // root of our reduction tree. TODO: We could extend this to any two
11988 // adjacent aligned constant indices if desired.
11989 if (LHS
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
11990 LHS
.getOperand(0) == SrcVec
&& isa
<ConstantSDNode
>(LHS
.getOperand(1))) {
11992 cast
<ConstantSDNode
>(LHS
.getOperand(1))->getLimitedValue();
11993 if (0 == std::min(LHSIdx
, RHSIdx
) && 1 == std::max(LHSIdx
, RHSIdx
)) {
11994 EVT ReduceVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 2);
11995 SDValue Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ReduceVT
, SrcVec
,
11996 DAG
.getVectorIdxConstant(0, DL
));
11997 return DAG
.getNode(ReduceOpc
, DL
, VT
, Vec
, N
->getFlags());
12001 // Match (binop (reduce (extract_subvector V, 0),
12002 // (extract_vector_elt V, sizeof(SubVec))))
12003 // into a reduction of one more element from the original vector V.
12004 if (LHS
.getOpcode() != ReduceOpc
)
12007 SDValue ReduceVec
= LHS
.getOperand(0);
12008 if (ReduceVec
.getOpcode() == ISD::EXTRACT_SUBVECTOR
&&
12009 ReduceVec
.hasOneUse() && ReduceVec
.getOperand(0) == RHS
.getOperand(0) &&
12010 isNullConstant(ReduceVec
.getOperand(1)) &&
12011 ReduceVec
.getValueType().getVectorNumElements() == RHSIdx
) {
12012 // For illegal types (e.g. 3xi32), most will be combined again into a
12013 // wider (hopefully legal) type. If this is a terminal state, we are
12014 // relying on type legalization here to produce something reasonable
12015 // and this lowering quality could probably be improved. (TODO)
12016 EVT ReduceVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, RHSIdx
+ 1);
12017 SDValue Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ReduceVT
, SrcVec
,
12018 DAG
.getVectorIdxConstant(0, DL
));
12019 auto Flags
= ReduceVec
->getFlags();
12020 Flags
.intersectWith(N
->getFlags());
12021 return DAG
.getNode(ReduceOpc
, DL
, VT
, Vec
, Flags
);
12028 // Try to fold (<bop> x, (reduction.<bop> vec, start))
12029 static SDValue
combineBinOpToReduce(SDNode
*N
, SelectionDAG
&DAG
,
12030 const RISCVSubtarget
&Subtarget
) {
12031 auto BinOpToRVVReduce
= [](unsigned Opc
) {
12034 llvm_unreachable("Unhandled binary to transfrom reduction");
12036 return RISCVISD::VECREDUCE_ADD_VL
;
12038 return RISCVISD::VECREDUCE_UMAX_VL
;
12040 return RISCVISD::VECREDUCE_SMAX_VL
;
12042 return RISCVISD::VECREDUCE_UMIN_VL
;
12044 return RISCVISD::VECREDUCE_SMIN_VL
;
12046 return RISCVISD::VECREDUCE_AND_VL
;
12048 return RISCVISD::VECREDUCE_OR_VL
;
12050 return RISCVISD::VECREDUCE_XOR_VL
;
12052 return RISCVISD::VECREDUCE_FADD_VL
;
12054 return RISCVISD::VECREDUCE_FMAX_VL
;
12056 return RISCVISD::VECREDUCE_FMIN_VL
;
12060 auto IsReduction
= [&BinOpToRVVReduce
](SDValue V
, unsigned Opc
) {
12061 return V
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
12062 isNullConstant(V
.getOperand(1)) &&
12063 V
.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc
);
12066 unsigned Opc
= N
->getOpcode();
12067 unsigned ReduceIdx
;
12068 if (IsReduction(N
->getOperand(0), Opc
))
12070 else if (IsReduction(N
->getOperand(1), Opc
))
12075 // Skip if FADD disallows reassociation but the combiner needs.
12076 if (Opc
== ISD::FADD
&& !N
->getFlags().hasAllowReassociation())
12079 SDValue Extract
= N
->getOperand(ReduceIdx
);
12080 SDValue Reduce
= Extract
.getOperand(0);
12081 if (!Extract
.hasOneUse() || !Reduce
.hasOneUse())
12084 SDValue ScalarV
= Reduce
.getOperand(2);
12085 EVT ScalarVT
= ScalarV
.getValueType();
12086 if (ScalarV
.getOpcode() == ISD::INSERT_SUBVECTOR
&&
12087 ScalarV
.getOperand(0)->isUndef() &&
12088 isNullConstant(ScalarV
.getOperand(2)))
12089 ScalarV
= ScalarV
.getOperand(1);
12091 // Make sure that ScalarV is a splat with VL=1.
12092 if (ScalarV
.getOpcode() != RISCVISD::VFMV_S_F_VL
&&
12093 ScalarV
.getOpcode() != RISCVISD::VMV_S_X_VL
&&
12094 ScalarV
.getOpcode() != RISCVISD::VMV_V_X_VL
)
12097 if (!isNonZeroAVL(ScalarV
.getOperand(2)))
12100 // Check the scalar of ScalarV is neutral element
12101 // TODO: Deal with value other than neutral element.
12102 if (!isNeutralConstant(N
->getOpcode(), N
->getFlags(), ScalarV
.getOperand(1),
12106 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12107 // FIXME: We might be able to improve this if operand 0 is undef.
12108 if (!isNonZeroAVL(Reduce
.getOperand(5)))
12111 SDValue NewStart
= N
->getOperand(1 - ReduceIdx
);
12114 SDValue NewScalarV
=
12115 lowerScalarInsert(NewStart
, ScalarV
.getOperand(2),
12116 ScalarV
.getSimpleValueType(), DL
, DAG
, Subtarget
);
12118 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12119 if (ScalarVT
!= ScalarV
.getValueType())
12121 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ScalarVT
, DAG
.getUNDEF(ScalarVT
),
12122 NewScalarV
, DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()));
12124 SDValue Ops
[] = {Reduce
.getOperand(0), Reduce
.getOperand(1),
12125 NewScalarV
, Reduce
.getOperand(3),
12126 Reduce
.getOperand(4), Reduce
.getOperand(5)};
12127 SDValue NewReduce
=
12128 DAG
.getNode(Reduce
.getOpcode(), DL
, Reduce
.getValueType(), Ops
);
12129 return DAG
.getNode(Extract
.getOpcode(), DL
, Extract
.getValueType(), NewReduce
,
12130 Extract
.getOperand(1));
12133 // Optimize (add (shl x, c0), (shl y, c1)) ->
12134 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12135 static SDValue
transformAddShlImm(SDNode
*N
, SelectionDAG
&DAG
,
12136 const RISCVSubtarget
&Subtarget
) {
12137 // Perform this optimization only in the zba extension.
12138 if (!Subtarget
.hasStdExtZba())
12141 // Skip for vector types and larger types.
12142 EVT VT
= N
->getValueType(0);
12143 if (VT
.isVector() || VT
.getSizeInBits() > Subtarget
.getXLen())
12146 // The two operand nodes must be SHL and have no other use.
12147 SDValue N0
= N
->getOperand(0);
12148 SDValue N1
= N
->getOperand(1);
12149 if (N0
->getOpcode() != ISD::SHL
|| N1
->getOpcode() != ISD::SHL
||
12150 !N0
->hasOneUse() || !N1
->hasOneUse())
12153 // Check c0 and c1.
12154 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
12155 auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
->getOperand(1));
12158 int64_t C0
= N0C
->getSExtValue();
12159 int64_t C1
= N1C
->getSExtValue();
12160 if (C0
<= 0 || C1
<= 0)
12163 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12164 int64_t Bits
= std::min(C0
, C1
);
12165 int64_t Diff
= std::abs(C0
- C1
);
12166 if (Diff
!= 1 && Diff
!= 2 && Diff
!= 3)
12171 SDValue NS
= (C0
< C1
) ? N0
->getOperand(0) : N1
->getOperand(0);
12172 SDValue NL
= (C0
> C1
) ? N0
->getOperand(0) : N1
->getOperand(0);
12174 DAG
.getNode(ISD::SHL
, DL
, VT
, NL
, DAG
.getConstant(Diff
, DL
, VT
));
12175 SDValue NA1
= DAG
.getNode(ISD::ADD
, DL
, VT
, NA0
, NS
);
12176 return DAG
.getNode(ISD::SHL
, DL
, VT
, NA1
, DAG
.getConstant(Bits
, DL
, VT
));
12179 // Combine a constant select operand into its use:
12181 // (and (select cond, -1, c), x)
12182 // -> (select cond, x, (and x, c)) [AllOnes=1]
12183 // (or (select cond, 0, c), x)
12184 // -> (select cond, x, (or x, c)) [AllOnes=0]
12185 // (xor (select cond, 0, c), x)
12186 // -> (select cond, x, (xor x, c)) [AllOnes=0]
12187 // (add (select cond, 0, c), x)
12188 // -> (select cond, x, (add x, c)) [AllOnes=0]
12189 // (sub x, (select cond, 0, c))
12190 // -> (select cond, x, (sub x, c)) [AllOnes=0]
12191 static SDValue
combineSelectAndUse(SDNode
*N
, SDValue Slct
, SDValue OtherOp
,
12192 SelectionDAG
&DAG
, bool AllOnes
,
12193 const RISCVSubtarget
&Subtarget
) {
12194 EVT VT
= N
->getValueType(0);
12200 if (!Subtarget
.hasShortForwardBranchOpt()) {
12201 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12202 if ((!Subtarget
.hasStdExtZicond() &&
12203 !Subtarget
.hasVendorXVentanaCondOps()) ||
12204 N
->getOpcode() != ISD::AND
)
12207 // Maybe harmful when condition code has multiple use.
12208 if (Slct
.getOpcode() == ISD::SELECT
&& !Slct
.getOperand(0).hasOneUse())
12211 // Maybe harmful when VT is wider than XLen.
12212 if (VT
.getSizeInBits() > Subtarget
.getXLen())
12216 if ((Slct
.getOpcode() != ISD::SELECT
&&
12217 Slct
.getOpcode() != RISCVISD::SELECT_CC
) ||
12221 auto isZeroOrAllOnes
= [](SDValue N
, bool AllOnes
) {
12222 return AllOnes
? isAllOnesConstant(N
) : isNullConstant(N
);
12225 bool SwapSelectOps
;
12226 unsigned OpOffset
= Slct
.getOpcode() == RISCVISD::SELECT_CC
? 2 : 0;
12227 SDValue TrueVal
= Slct
.getOperand(1 + OpOffset
);
12228 SDValue FalseVal
= Slct
.getOperand(2 + OpOffset
);
12229 SDValue NonConstantVal
;
12230 if (isZeroOrAllOnes(TrueVal
, AllOnes
)) {
12231 SwapSelectOps
= false;
12232 NonConstantVal
= FalseVal
;
12233 } else if (isZeroOrAllOnes(FalseVal
, AllOnes
)) {
12234 SwapSelectOps
= true;
12235 NonConstantVal
= TrueVal
;
12239 // Slct is now know to be the desired identity constant when CC is true.
12241 FalseVal
= DAG
.getNode(N
->getOpcode(), SDLoc(N
), VT
, OtherOp
, NonConstantVal
);
12242 // Unless SwapSelectOps says the condition should be false.
12244 std::swap(TrueVal
, FalseVal
);
12246 if (Slct
.getOpcode() == RISCVISD::SELECT_CC
)
12247 return DAG
.getNode(RISCVISD::SELECT_CC
, SDLoc(N
), VT
,
12248 {Slct
.getOperand(0), Slct
.getOperand(1),
12249 Slct
.getOperand(2), TrueVal
, FalseVal
});
12251 return DAG
.getNode(ISD::SELECT
, SDLoc(N
), VT
,
12252 {Slct
.getOperand(0), TrueVal
, FalseVal
});
12255 // Attempt combineSelectAndUse on each operand of a commutative operator N.
12256 static SDValue
combineSelectAndUseCommutative(SDNode
*N
, SelectionDAG
&DAG
,
12258 const RISCVSubtarget
&Subtarget
) {
12259 SDValue N0
= N
->getOperand(0);
12260 SDValue N1
= N
->getOperand(1);
12261 if (SDValue Result
= combineSelectAndUse(N
, N0
, N1
, DAG
, AllOnes
, Subtarget
))
12263 if (SDValue Result
= combineSelectAndUse(N
, N1
, N0
, DAG
, AllOnes
, Subtarget
))
12268 // Transform (add (mul x, c0), c1) ->
12269 // (add (mul (add x, c1/c0), c0), c1%c0).
12270 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12271 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
12272 // to an infinite loop in DAGCombine if transformed.
12273 // Or transform (add (mul x, c0), c1) ->
12274 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12275 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12276 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12277 // lead to an infinite loop in DAGCombine if transformed.
12278 // Or transform (add (mul x, c0), c1) ->
12279 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12280 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12281 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12282 // lead to an infinite loop in DAGCombine if transformed.
12283 // Or transform (add (mul x, c0), c1) ->
12284 // (mul (add x, c1/c0), c0).
12285 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12286 static SDValue
transformAddImmMulImm(SDNode
*N
, SelectionDAG
&DAG
,
12287 const RISCVSubtarget
&Subtarget
) {
12288 // Skip for vector types and larger types.
12289 EVT VT
= N
->getValueType(0);
12290 if (VT
.isVector() || VT
.getSizeInBits() > Subtarget
.getXLen())
12292 // The first operand node must be a MUL and has no other use.
12293 SDValue N0
= N
->getOperand(0);
12294 if (!N0
->hasOneUse() || N0
->getOpcode() != ISD::MUL
)
12296 // Check if c0 and c1 match above conditions.
12297 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
12298 auto *N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
12301 // If N0C has multiple uses it's possible one of the cases in
12302 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12303 // in an infinite loop.
12304 if (!N0C
->hasOneUse())
12306 int64_t C0
= N0C
->getSExtValue();
12307 int64_t C1
= N1C
->getSExtValue();
12309 if (C0
== -1 || C0
== 0 || C0
== 1 || isInt
<12>(C1
))
12311 // Search for proper CA (non-zero) and CB that both are simm12.
12312 if ((C1
/ C0
) != 0 && isInt
<12>(C1
/ C0
) && isInt
<12>(C1
% C0
) &&
12313 !isInt
<12>(C0
* (C1
/ C0
))) {
12316 } else if ((C1
/ C0
+ 1) != 0 && isInt
<12>(C1
/ C0
+ 1) &&
12317 isInt
<12>(C1
% C0
- C0
) && !isInt
<12>(C0
* (C1
/ C0
+ 1))) {
12320 } else if ((C1
/ C0
- 1) != 0 && isInt
<12>(C1
/ C0
- 1) &&
12321 isInt
<12>(C1
% C0
+ C0
) && !isInt
<12>(C0
* (C1
/ C0
- 1))) {
12326 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12328 SDValue New0
= DAG
.getNode(ISD::ADD
, DL
, VT
, N0
->getOperand(0),
12329 DAG
.getConstant(CA
, DL
, VT
));
12331 DAG
.getNode(ISD::MUL
, DL
, VT
, New0
, DAG
.getConstant(C0
, DL
, VT
));
12332 return DAG
.getNode(ISD::ADD
, DL
, VT
, New1
, DAG
.getConstant(CB
, DL
, VT
));
12335 // Try to turn (add (xor bool, 1) -1) into (neg bool).
12336 static SDValue
combineAddOfBooleanXor(SDNode
*N
, SelectionDAG
&DAG
) {
12337 SDValue N0
= N
->getOperand(0);
12338 SDValue N1
= N
->getOperand(1);
12339 EVT VT
= N
->getValueType(0);
12342 // RHS should be -1.
12343 if (!isAllOnesConstant(N1
))
12346 // Look for (xor X, 1).
12347 if (N0
.getOpcode() != ISD::XOR
|| !isOneConstant(N0
.getOperand(1)))
12350 // First xor input should be 0 or 1.
12351 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
12352 if (!DAG
.MaskedValueIsZero(N0
.getOperand(0), Mask
))
12355 // Emit a negate of the setcc.
12356 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
),
12360 static SDValue
performADDCombine(SDNode
*N
, SelectionDAG
&DAG
,
12361 const RISCVSubtarget
&Subtarget
) {
12362 if (SDValue V
= combineAddOfBooleanXor(N
, DAG
))
12364 if (SDValue V
= transformAddImmMulImm(N
, DAG
, Subtarget
))
12366 if (SDValue V
= transformAddShlImm(N
, DAG
, Subtarget
))
12368 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12370 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12373 // fold (add (select lhs, rhs, cc, 0, y), x) ->
12374 // (select lhs, rhs, cc, x, (add x, y))
12375 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
12378 // Try to turn a sub boolean RHS and constant LHS into an addi.
12379 static SDValue
combineSubOfBoolean(SDNode
*N
, SelectionDAG
&DAG
) {
12380 SDValue N0
= N
->getOperand(0);
12381 SDValue N1
= N
->getOperand(1);
12382 EVT VT
= N
->getValueType(0);
12385 // Require a constant LHS.
12386 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
);
12390 // All our optimizations involve subtracting 1 from the immediate and forming
12391 // an ADDI. Make sure the new immediate is valid for an ADDI.
12392 APInt ImmValMinus1
= N0C
->getAPIntValue() - 1;
12393 if (!ImmValMinus1
.isSignedIntN(12))
12397 if (N1
.getOpcode() == ISD::SETCC
&& N1
.hasOneUse()) {
12398 // (sub constant, (setcc x, y, eq/neq)) ->
12399 // (add (setcc x, y, neq/eq), constant - 1)
12400 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(N1
.getOperand(2))->get();
12401 EVT SetCCOpVT
= N1
.getOperand(0).getValueType();
12402 if (!isIntEqualitySetCC(CCVal
) || !SetCCOpVT
.isInteger())
12404 CCVal
= ISD::getSetCCInverse(CCVal
, SetCCOpVT
);
12406 DAG
.getSetCC(SDLoc(N1
), VT
, N1
.getOperand(0), N1
.getOperand(1), CCVal
);
12407 } else if (N1
.getOpcode() == ISD::XOR
&& isOneConstant(N1
.getOperand(1)) &&
12408 N1
.getOperand(0).getOpcode() == ISD::SETCC
) {
12409 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
12410 // Since setcc returns a bool the xor is equivalent to 1-setcc.
12411 NewLHS
= N1
.getOperand(0);
12415 SDValue NewRHS
= DAG
.getConstant(ImmValMinus1
, DL
, VT
);
12416 return DAG
.getNode(ISD::ADD
, DL
, VT
, NewLHS
, NewRHS
);
12419 static SDValue
performSUBCombine(SDNode
*N
, SelectionDAG
&DAG
,
12420 const RISCVSubtarget
&Subtarget
) {
12421 if (SDValue V
= combineSubOfBoolean(N
, DAG
))
12424 SDValue N0
= N
->getOperand(0);
12425 SDValue N1
= N
->getOperand(1);
12426 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
12427 if (isNullConstant(N0
) && N1
.getOpcode() == ISD::SETCC
&& N1
.hasOneUse() &&
12428 isNullConstant(N1
.getOperand(1))) {
12429 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(N1
.getOperand(2))->get();
12430 if (CCVal
== ISD::SETLT
) {
12431 EVT VT
= N
->getValueType(0);
12433 unsigned ShAmt
= N0
.getValueSizeInBits() - 1;
12434 return DAG
.getNode(ISD::SRA
, DL
, VT
, N1
.getOperand(0),
12435 DAG
.getConstant(ShAmt
, DL
, VT
));
12439 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
12440 // (select lhs, rhs, cc, x, (sub x, y))
12441 return combineSelectAndUse(N
, N1
, N0
, DAG
, /*AllOnes*/ false, Subtarget
);
12444 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
12445 // Legalizing setcc can introduce xors like this. Doing this transform reduces
12446 // the number of xors and may allow the xor to fold into a branch condition.
12447 static SDValue
combineDeMorganOfBoolean(SDNode
*N
, SelectionDAG
&DAG
) {
12448 SDValue N0
= N
->getOperand(0);
12449 SDValue N1
= N
->getOperand(1);
12450 bool IsAnd
= N
->getOpcode() == ISD::AND
;
12452 if (N0
.getOpcode() != ISD::XOR
|| N1
.getOpcode() != ISD::XOR
)
12455 if (!N0
.hasOneUse() || !N1
.hasOneUse())
12458 SDValue N01
= N0
.getOperand(1);
12459 SDValue N11
= N1
.getOperand(1);
12461 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
12462 // (xor X, -1) based on the upper bits of the other operand being 0. If the
12463 // operation is And, allow one of the Xors to use -1.
12464 if (isOneConstant(N01
)) {
12465 if (!isOneConstant(N11
) && !(IsAnd
&& isAllOnesConstant(N11
)))
12467 } else if (isOneConstant(N11
)) {
12468 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
12469 if (!(IsAnd
&& isAllOnesConstant(N01
)))
12474 EVT VT
= N
->getValueType(0);
12476 SDValue N00
= N0
.getOperand(0);
12477 SDValue N10
= N1
.getOperand(0);
12479 // The LHS of the xors needs to be 0/1.
12480 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
12481 if (!DAG
.MaskedValueIsZero(N00
, Mask
) || !DAG
.MaskedValueIsZero(N10
, Mask
))
12484 // Invert the opcode and insert a new xor.
12486 unsigned Opc
= IsAnd
? ISD::OR
: ISD::AND
;
12487 SDValue Logic
= DAG
.getNode(Opc
, DL
, VT
, N00
, N10
);
12488 return DAG
.getNode(ISD::XOR
, DL
, VT
, Logic
, DAG
.getConstant(1, DL
, VT
));
12491 static SDValue
performTRUNCATECombine(SDNode
*N
, SelectionDAG
&DAG
,
12492 const RISCVSubtarget
&Subtarget
) {
12493 SDValue N0
= N
->getOperand(0);
12494 EVT VT
= N
->getValueType(0);
12496 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
12497 // extending X. This is safe since we only need the LSB after the shift and
12498 // shift amounts larger than 31 would produce poison. If we wait until
12499 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12500 // to use a BEXT instruction.
12501 if (!RV64LegalI32
&& Subtarget
.is64Bit() && Subtarget
.hasStdExtZbs() && VT
== MVT::i1
&&
12502 N0
.getValueType() == MVT::i32
&& N0
.getOpcode() == ISD::SRL
&&
12503 !isa
<ConstantSDNode
>(N0
.getOperand(1)) && N0
.hasOneUse()) {
12505 SDValue Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N0
.getOperand(0));
12506 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N0
.getOperand(1));
12507 SDValue Srl
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Op0
, Op1
);
12508 return DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), VT
, Srl
);
12514 // Combines two comparison operation and logic operation to one selection
12515 // operation(min, max) and logic operation. Returns new constructed Node if
12516 // conditions for optimization are satisfied.
12517 static SDValue
performANDCombine(SDNode
*N
,
12518 TargetLowering::DAGCombinerInfo
&DCI
,
12519 const RISCVSubtarget
&Subtarget
) {
12520 SelectionDAG
&DAG
= DCI
.DAG
;
12522 SDValue N0
= N
->getOperand(0);
12523 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
12524 // extending X. This is safe since we only need the LSB after the shift and
12525 // shift amounts larger than 31 would produce poison. If we wait until
12526 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12527 // to use a BEXT instruction.
12528 if (!RV64LegalI32
&& Subtarget
.is64Bit() && Subtarget
.hasStdExtZbs() &&
12529 N
->getValueType(0) == MVT::i32
&& isOneConstant(N
->getOperand(1)) &&
12530 N0
.getOpcode() == ISD::SRL
&& !isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
12533 SDValue Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N0
.getOperand(0));
12534 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N0
.getOperand(1));
12535 SDValue Srl
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Op0
, Op1
);
12536 SDValue And
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, Srl
,
12537 DAG
.getConstant(1, DL
, MVT::i64
));
12538 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, And
);
12541 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12543 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12546 if (DCI
.isAfterLegalizeDAG())
12547 if (SDValue V
= combineDeMorganOfBoolean(N
, DAG
))
12550 // fold (and (select lhs, rhs, cc, -1, y), x) ->
12551 // (select lhs, rhs, cc, x, (and x, y))
12552 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ true, Subtarget
);
12555 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
12556 // FIXME: Generalize to other binary operators with same operand.
12557 static SDValue
combineOrOfCZERO(SDNode
*N
, SDValue N0
, SDValue N1
,
12558 SelectionDAG
&DAG
) {
12559 assert(N
->getOpcode() == ISD::OR
&& "Unexpected opcode");
12561 if (N0
.getOpcode() != RISCVISD::CZERO_EQZ
||
12562 N1
.getOpcode() != RISCVISD::CZERO_NEZ
||
12563 !N0
.hasOneUse() || !N1
.hasOneUse())
12566 // Should have the same condition.
12567 SDValue Cond
= N0
.getOperand(1);
12568 if (Cond
!= N1
.getOperand(1))
12571 SDValue TrueV
= N0
.getOperand(0);
12572 SDValue FalseV
= N1
.getOperand(0);
12574 if (TrueV
.getOpcode() != ISD::XOR
|| FalseV
.getOpcode() != ISD::XOR
||
12575 TrueV
.getOperand(1) != FalseV
.getOperand(1) ||
12576 !isOneConstant(TrueV
.getOperand(1)) ||
12577 !TrueV
.hasOneUse() || !FalseV
.hasOneUse())
12580 EVT VT
= N
->getValueType(0);
12583 SDValue NewN0
= DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
.getOperand(0),
12585 SDValue NewN1
= DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
.getOperand(0),
12587 SDValue NewOr
= DAG
.getNode(ISD::OR
, DL
, VT
, NewN0
, NewN1
);
12588 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewOr
, TrueV
.getOperand(1));
12591 static SDValue
performORCombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
,
12592 const RISCVSubtarget
&Subtarget
) {
12593 SelectionDAG
&DAG
= DCI
.DAG
;
12595 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12597 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12600 if (DCI
.isAfterLegalizeDAG())
12601 if (SDValue V
= combineDeMorganOfBoolean(N
, DAG
))
12604 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
12605 // We may be able to pull a common operation out of the true and false value.
12606 SDValue N0
= N
->getOperand(0);
12607 SDValue N1
= N
->getOperand(1);
12608 if (SDValue V
= combineOrOfCZERO(N
, N0
, N1
, DAG
))
12610 if (SDValue V
= combineOrOfCZERO(N
, N1
, N0
, DAG
))
12613 // fold (or (select cond, 0, y), x) ->
12614 // (select cond, x, (or x, y))
12615 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
12618 static SDValue
performXORCombine(SDNode
*N
, SelectionDAG
&DAG
,
12619 const RISCVSubtarget
&Subtarget
) {
12620 SDValue N0
= N
->getOperand(0);
12621 SDValue N1
= N
->getOperand(1);
12623 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
12624 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
12625 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
12626 if (!RV64LegalI32
&& Subtarget
.is64Bit() && Subtarget
.hasStdExtZbs() &&
12627 N
->getValueType(0) == MVT::i32
&& isAllOnesConstant(N1
) &&
12628 N0
.getOpcode() == ISD::SHL
&& isAllOnesConstant(N0
.getOperand(0)) &&
12629 !isa
<ConstantSDNode
>(N0
.getOperand(1)) && N0
.hasOneUse()) {
12631 SDValue Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N0
.getOperand(0));
12632 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N0
.getOperand(1));
12633 SDValue Shl
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, Op0
, Op1
);
12634 SDValue And
= DAG
.getNOT(DL
, Shl
, MVT::i64
);
12635 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, And
);
12638 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
12639 // NOTE: Assumes ROL being legal means ROLW is legal.
12640 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
12641 if (N0
.getOpcode() == RISCVISD::SLLW
&&
12642 isAllOnesConstant(N1
) && isOneConstant(N0
.getOperand(0)) &&
12643 TLI
.isOperationLegal(ISD::ROTL
, MVT::i64
)) {
12645 return DAG
.getNode(RISCVISD::ROLW
, DL
, MVT::i64
,
12646 DAG
.getConstant(~1, DL
, MVT::i64
), N0
.getOperand(1));
12649 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
12650 if (N0
.getOpcode() == ISD::SETCC
&& isOneConstant(N1
) && N0
.hasOneUse()) {
12651 auto *ConstN00
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0));
12652 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
12653 if (ConstN00
&& CC
== ISD::SETLT
) {
12654 EVT VT
= N0
.getValueType();
12656 const APInt
&Imm
= ConstN00
->getAPIntValue();
12657 if ((Imm
+ 1).isSignedIntN(12))
12658 return DAG
.getSetCC(DL
, VT
, N0
.getOperand(1),
12659 DAG
.getConstant(Imm
+ 1, DL
, VT
), CC
);
12663 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12665 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12668 // fold (xor (select cond, 0, y), x) ->
12669 // (select cond, x, (xor x, y))
12670 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
12673 static SDValue
performMULCombine(SDNode
*N
, SelectionDAG
&DAG
) {
12674 EVT VT
= N
->getValueType(0);
12675 if (!VT
.isVector())
12679 SDValue N0
= N
->getOperand(0);
12680 SDValue N1
= N
->getOperand(1);
12682 unsigned AddSubOpc
;
12684 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
12685 // (mul x, add (y, 1)) -> (add x, (mul x, y))
12686 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
12687 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
12688 auto IsAddSubWith1
= [&](SDValue V
) -> bool {
12689 AddSubOpc
= V
->getOpcode();
12690 if ((AddSubOpc
== ISD::ADD
|| AddSubOpc
== ISD::SUB
) && V
->hasOneUse()) {
12691 SDValue Opnd
= V
->getOperand(1);
12692 MulOper
= V
->getOperand(0);
12693 if (AddSubOpc
== ISD::SUB
)
12694 std::swap(Opnd
, MulOper
);
12695 if (isOneOrOneSplat(Opnd
))
12701 if (IsAddSubWith1(N0
)) {
12702 SDValue MulVal
= DAG
.getNode(ISD::MUL
, DL
, VT
, N1
, MulOper
);
12703 return DAG
.getNode(AddSubOpc
, DL
, VT
, N1
, MulVal
);
12706 if (IsAddSubWith1(N1
)) {
12707 SDValue MulVal
= DAG
.getNode(ISD::MUL
, DL
, VT
, N0
, MulOper
);
12708 return DAG
.getNode(AddSubOpc
, DL
, VT
, N0
, MulVal
);
12714 /// According to the property that indexed load/store instructions zero-extend
12715 /// their indices, try to narrow the type of index operand.
12716 static bool narrowIndex(SDValue
&N
, ISD::MemIndexType IndexType
, SelectionDAG
&DAG
) {
12717 if (isIndexTypeSigned(IndexType
))
12720 if (!N
->hasOneUse())
12723 EVT VT
= N
.getValueType();
12726 // In general, what we're doing here is seeing if we can sink a truncate to
12727 // a smaller element type into the expression tree building our index.
12728 // TODO: We can generalize this and handle a bunch more cases if useful.
12730 // Narrow a buildvector to the narrowest element type. This requires less
12731 // work and less register pressure at high LMUL, and creates smaller constants
12732 // which may be cheaper to materialize.
12733 if (ISD::isBuildVectorOfConstantSDNodes(N
.getNode())) {
12734 KnownBits Known
= DAG
.computeKnownBits(N
);
12735 unsigned ActiveBits
= std::max(8u, Known
.countMaxActiveBits());
12736 LLVMContext
&C
= *DAG
.getContext();
12737 EVT ResultVT
= EVT::getIntegerVT(C
, ActiveBits
).getRoundIntegerType(C
);
12738 if (ResultVT
.bitsLT(VT
.getVectorElementType())) {
12739 N
= DAG
.getNode(ISD::TRUNCATE
, DL
,
12740 VT
.changeVectorElementType(ResultVT
), N
);
12745 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
12746 if (N
.getOpcode() != ISD::SHL
)
12749 SDValue N0
= N
.getOperand(0);
12750 if (N0
.getOpcode() != ISD::ZERO_EXTEND
&&
12751 N0
.getOpcode() != RISCVISD::VZEXT_VL
)
12753 if (!N0
->hasOneUse())
12757 SDValue N1
= N
.getOperand(1);
12758 if (!ISD::isConstantSplatVector(N1
.getNode(), ShAmt
))
12761 SDValue Src
= N0
.getOperand(0);
12762 EVT SrcVT
= Src
.getValueType();
12763 unsigned SrcElen
= SrcVT
.getScalarSizeInBits();
12764 unsigned ShAmtV
= ShAmt
.getZExtValue();
12765 unsigned NewElen
= PowerOf2Ceil(SrcElen
+ ShAmtV
);
12766 NewElen
= std::max(NewElen
, 8U);
12768 // Skip if NewElen is not narrower than the original extended type.
12769 if (NewElen
>= N0
.getValueType().getScalarSizeInBits())
12772 EVT NewEltVT
= EVT::getIntegerVT(*DAG
.getContext(), NewElen
);
12773 EVT NewVT
= SrcVT
.changeVectorElementType(NewEltVT
);
12775 SDValue NewExt
= DAG
.getNode(N0
->getOpcode(), DL
, NewVT
, N0
->ops());
12776 SDValue NewShAmtVec
= DAG
.getConstant(ShAmtV
, DL
, NewVT
);
12777 N
= DAG
.getNode(ISD::SHL
, DL
, NewVT
, NewExt
, NewShAmtVec
);
12781 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
12782 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
12783 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
12784 // can become a sext.w instead of a shift pair.
12785 static SDValue
performSETCCCombine(SDNode
*N
, SelectionDAG
&DAG
,
12786 const RISCVSubtarget
&Subtarget
) {
12787 SDValue N0
= N
->getOperand(0);
12788 SDValue N1
= N
->getOperand(1);
12789 EVT VT
= N
->getValueType(0);
12790 EVT OpVT
= N0
.getValueType();
12792 if (OpVT
!= MVT::i64
|| !Subtarget
.is64Bit())
12795 // RHS needs to be a constant.
12796 auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
);
12800 // LHS needs to be (and X, 0xffffffff).
12801 if (N0
.getOpcode() != ISD::AND
|| !N0
.hasOneUse() ||
12802 !isa
<ConstantSDNode
>(N0
.getOperand(1)) ||
12803 N0
.getConstantOperandVal(1) != UINT64_C(0xffffffff))
12806 // Looking for an equality compare.
12807 ISD::CondCode Cond
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
12808 if (!isIntEqualitySetCC(Cond
))
12811 // Don't do this if the sign bit is provably zero, it will be turned back into
12813 APInt SignMask
= APInt::getOneBitSet(64, 31);
12814 if (DAG
.MaskedValueIsZero(N0
.getOperand(0), SignMask
))
12817 const APInt
&C1
= N1C
->getAPIntValue();
12820 // If the constant is larger than 2^32 - 1 it is impossible for both sides
12822 if (C1
.getActiveBits() > 32)
12823 return DAG
.getBoolConstant(Cond
== ISD::SETNE
, dl
, VT
, OpVT
);
12825 SDValue SExtOp
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, N
, OpVT
,
12826 N0
.getOperand(0), DAG
.getValueType(MVT::i32
));
12827 return DAG
.getSetCC(dl
, VT
, SExtOp
, DAG
.getConstant(C1
.trunc(32).sext(64),
12832 performSIGN_EXTEND_INREGCombine(SDNode
*N
, SelectionDAG
&DAG
,
12833 const RISCVSubtarget
&Subtarget
) {
12834 SDValue Src
= N
->getOperand(0);
12835 EVT VT
= N
->getValueType(0);
12837 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
12838 if (Src
.getOpcode() == RISCVISD::FMV_X_ANYEXTH
&&
12839 cast
<VTSDNode
>(N
->getOperand(1))->getVT().bitsGE(MVT::i16
))
12840 return DAG
.getNode(RISCVISD::FMV_X_SIGNEXTH
, SDLoc(N
), VT
,
12841 Src
.getOperand(0));
12847 // Forward declaration of the structure holding the necessary information to
12848 // apply a combine.
12849 struct CombineResult
;
12851 /// Helper class for folding sign/zero extensions.
12852 /// In particular, this class is used for the following combines:
12853 /// add_vl -> vwadd(u) | vwadd(u)_w
12854 /// sub_vl -> vwsub(u) | vwsub(u)_w
12855 /// mul_vl -> vwmul(u) | vwmul_su
12857 /// An object of this class represents an operand of the operation we want to
12859 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
12860 /// NodeExtensionHelper for `a` and one for `b`.
12862 /// This class abstracts away how the extension is materialized and
12863 /// how its Mask, VL, number of users affect the combines.
12866 /// - VWADD_W is conceptually == add(op0, sext(op1))
12867 /// - VWADDU_W == add(op0, zext(op1))
12868 /// - VWSUB_W == sub(op0, sext(op1))
12869 /// - VWSUBU_W == sub(op0, zext(op1))
12871 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
12872 /// zext|sext(smaller_value).
12873 struct NodeExtensionHelper
{
12874 /// Records if this operand is like being zero extended.
12876 /// Records if this operand is like being sign extended.
12877 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
12878 /// instance, a splat constant (e.g., 3), would support being both sign and
12881 /// This boolean captures whether we care if this operand would still be
12882 /// around after the folding happens.
12883 bool EnforceOneUse
;
12884 /// Records if this operand's mask needs to match the mask of the operation
12885 /// that it will fold into.
12887 /// Value of the Mask for this operand.
12888 /// It may be SDValue().
12890 /// Value of the vector length operand.
12891 /// It may be SDValue().
12893 /// Original value that this NodeExtensionHelper represents.
12894 SDValue OrigOperand
;
12896 /// Get the value feeding the extension or the value itself.
12897 /// E.g., for zext(a), this would return a.
12898 SDValue
getSource() const {
12899 switch (OrigOperand
.getOpcode()) {
12900 case RISCVISD::VSEXT_VL
:
12901 case RISCVISD::VZEXT_VL
:
12902 return OrigOperand
.getOperand(0);
12904 return OrigOperand
;
12908 /// Check if this instance represents a splat.
12909 bool isSplat() const {
12910 return OrigOperand
.getOpcode() == RISCVISD::VMV_V_X_VL
;
12913 /// Get or create a value that can feed \p Root with the given extension \p
12914 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
12915 /// \see ::getSource().
12916 SDValue
getOrCreateExtendedOp(const SDNode
*Root
, SelectionDAG
&DAG
,
12917 std::optional
<bool> SExt
) const {
12918 if (!SExt
.has_value())
12919 return OrigOperand
;
12921 MVT NarrowVT
= getNarrowType(Root
);
12923 SDValue Source
= getSource();
12924 if (Source
.getValueType() == NarrowVT
)
12927 unsigned ExtOpc
= *SExt
? RISCVISD::VSEXT_VL
: RISCVISD::VZEXT_VL
;
12929 // If we need an extension, we should be changing the type.
12931 auto [Mask
, VL
] = getMaskAndVL(Root
);
12932 switch (OrigOperand
.getOpcode()) {
12933 case RISCVISD::VSEXT_VL
:
12934 case RISCVISD::VZEXT_VL
:
12935 return DAG
.getNode(ExtOpc
, DL
, NarrowVT
, Source
, Mask
, VL
);
12936 case RISCVISD::VMV_V_X_VL
:
12937 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, NarrowVT
,
12938 DAG
.getUNDEF(NarrowVT
), Source
.getOperand(1), VL
);
12940 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
12941 // and that operand should already have the right NarrowVT so no
12942 // extension should be required at this point.
12943 llvm_unreachable("Unsupported opcode");
12947 /// Helper function to get the narrow type for \p Root.
12948 /// The narrow type is the type of \p Root where we divided the size of each
12949 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
12950 /// \pre The size of the type of the elements of Root must be a multiple of 2
12951 /// and be greater than 16.
12952 static MVT
getNarrowType(const SDNode
*Root
) {
12953 MVT VT
= Root
->getSimpleValueType(0);
12955 // Determine the narrow size.
12956 unsigned NarrowSize
= VT
.getScalarSizeInBits() / 2;
12957 assert(NarrowSize
>= 8 && "Trying to extend something we can't represent");
12958 MVT NarrowVT
= MVT::getVectorVT(MVT::getIntegerVT(NarrowSize
),
12959 VT
.getVectorElementCount());
12963 /// Return the opcode required to materialize the folding of the sign
12964 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
12965 /// both operands for \p Opcode.
12966 /// Put differently, get the opcode to materialize:
12967 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
12968 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
12969 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
12970 static unsigned getSameExtensionOpcode(unsigned Opcode
, bool IsSExt
) {
12972 case RISCVISD::ADD_VL
:
12973 case RISCVISD::VWADD_W_VL
:
12974 case RISCVISD::VWADDU_W_VL
:
12975 return IsSExt
? RISCVISD::VWADD_VL
: RISCVISD::VWADDU_VL
;
12976 case RISCVISD::MUL_VL
:
12977 return IsSExt
? RISCVISD::VWMUL_VL
: RISCVISD::VWMULU_VL
;
12978 case RISCVISD::SUB_VL
:
12979 case RISCVISD::VWSUB_W_VL
:
12980 case RISCVISD::VWSUBU_W_VL
:
12981 return IsSExt
? RISCVISD::VWSUB_VL
: RISCVISD::VWSUBU_VL
;
12983 llvm_unreachable("Unexpected opcode");
12987 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
12988 /// newOpcode(a, b).
12989 static unsigned getSUOpcode(unsigned Opcode
) {
12990 assert(Opcode
== RISCVISD::MUL_VL
&& "SU is only supported for MUL");
12991 return RISCVISD::VWMULSU_VL
;
12994 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
12995 /// newOpcode(a, b).
12996 static unsigned getWOpcode(unsigned Opcode
, bool IsSExt
) {
12998 case RISCVISD::ADD_VL
:
12999 return IsSExt
? RISCVISD::VWADD_W_VL
: RISCVISD::VWADDU_W_VL
;
13000 case RISCVISD::SUB_VL
:
13001 return IsSExt
? RISCVISD::VWSUB_W_VL
: RISCVISD::VWSUBU_W_VL
;
13003 llvm_unreachable("Unexpected opcode");
13007 using CombineToTry
= std::function
<std::optional
<CombineResult
>(
13008 SDNode
* /*Root*/, const NodeExtensionHelper
& /*LHS*/,
13009 const NodeExtensionHelper
& /*RHS*/)>;
13011 /// Check if this node needs to be fully folded or extended for all users.
13012 bool needToPromoteOtherUsers() const { return EnforceOneUse
; }
13014 /// Helper method to set the various fields of this struct based on the
13015 /// type of \p Root.
13016 void fillUpExtensionSupport(SDNode
*Root
, SelectionDAG
&DAG
) {
13017 SupportsZExt
= false;
13018 SupportsSExt
= false;
13019 EnforceOneUse
= true;
13021 switch (OrigOperand
.getOpcode()) {
13022 case RISCVISD::VZEXT_VL
:
13023 SupportsZExt
= true;
13024 Mask
= OrigOperand
.getOperand(1);
13025 VL
= OrigOperand
.getOperand(2);
13027 case RISCVISD::VSEXT_VL
:
13028 SupportsSExt
= true;
13029 Mask
= OrigOperand
.getOperand(1);
13030 VL
= OrigOperand
.getOperand(2);
13032 case RISCVISD::VMV_V_X_VL
: {
13033 // Historically, we didn't care about splat values not disappearing during
13035 EnforceOneUse
= false;
13037 VL
= OrigOperand
.getOperand(2);
13039 // The operand is a splat of a scalar.
13041 // The pasthru must be undef for tail agnostic.
13042 if (!OrigOperand
.getOperand(0).isUndef())
13045 // Get the scalar value.
13046 SDValue Op
= OrigOperand
.getOperand(1);
13048 // See if we have enough sign bits or zero bits in the scalar to use a
13049 // widening opcode by splatting to smaller element size.
13050 MVT VT
= Root
->getSimpleValueType(0);
13051 unsigned EltBits
= VT
.getScalarSizeInBits();
13052 unsigned ScalarBits
= Op
.getValueSizeInBits();
13053 // Make sure we're getting all element bits from the scalar register.
13054 // FIXME: Support implicit sign extension of vmv.v.x?
13055 if (ScalarBits
< EltBits
)
13058 unsigned NarrowSize
= VT
.getScalarSizeInBits() / 2;
13059 // If the narrow type cannot be expressed with a legal VMV,
13060 // this is not a valid candidate.
13061 if (NarrowSize
< 8)
13064 if (DAG
.ComputeMaxSignificantBits(Op
) <= NarrowSize
)
13065 SupportsSExt
= true;
13066 if (DAG
.MaskedValueIsZero(Op
,
13067 APInt::getBitsSetFrom(ScalarBits
, NarrowSize
)))
13068 SupportsZExt
= true;
13076 /// Check if \p Root supports any extension folding combines.
13077 static bool isSupportedRoot(const SDNode
*Root
) {
13078 switch (Root
->getOpcode()) {
13079 case RISCVISD::ADD_VL
:
13080 case RISCVISD::MUL_VL
:
13081 case RISCVISD::VWADD_W_VL
:
13082 case RISCVISD::VWADDU_W_VL
:
13083 case RISCVISD::SUB_VL
:
13084 case RISCVISD::VWSUB_W_VL
:
13085 case RISCVISD::VWSUBU_W_VL
:
13092 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
13093 NodeExtensionHelper(SDNode
*Root
, unsigned OperandIdx
, SelectionDAG
&DAG
) {
13094 assert(isSupportedRoot(Root
) && "Trying to build an helper with an "
13095 "unsupported root");
13096 assert(OperandIdx
< 2 && "Requesting something else than LHS or RHS");
13097 OrigOperand
= Root
->getOperand(OperandIdx
);
13099 unsigned Opc
= Root
->getOpcode();
13101 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
13102 // <ADD|SUB>(LHS, S|ZEXT(RHS))
13103 case RISCVISD::VWADD_W_VL
:
13104 case RISCVISD::VWADDU_W_VL
:
13105 case RISCVISD::VWSUB_W_VL
:
13106 case RISCVISD::VWSUBU_W_VL
:
13107 if (OperandIdx
== 1) {
13109 Opc
== RISCVISD::VWADDU_W_VL
|| Opc
== RISCVISD::VWSUBU_W_VL
;
13110 SupportsSExt
= !SupportsZExt
;
13111 std::tie(Mask
, VL
) = getMaskAndVL(Root
);
13113 // There's no existing extension here, so we don't have to worry about
13114 // making sure it gets removed.
13115 EnforceOneUse
= false;
13120 fillUpExtensionSupport(Root
, DAG
);
13125 /// Check if this operand is compatible with the given vector length \p VL.
13126 bool isVLCompatible(SDValue VL
) const {
13127 return this->VL
!= SDValue() && this->VL
== VL
;
13130 /// Check if this operand is compatible with the given \p Mask.
13131 bool isMaskCompatible(SDValue Mask
) const {
13132 return !CheckMask
|| (this->Mask
!= SDValue() && this->Mask
== Mask
);
13135 /// Helper function to get the Mask and VL from \p Root.
13136 static std::pair
<SDValue
, SDValue
> getMaskAndVL(const SDNode
*Root
) {
13137 assert(isSupportedRoot(Root
) && "Unexpected root");
13138 return std::make_pair(Root
->getOperand(3), Root
->getOperand(4));
13141 /// Check if the Mask and VL of this operand are compatible with \p Root.
13142 bool areVLAndMaskCompatible(const SDNode
*Root
) const {
13143 auto [Mask
, VL
] = getMaskAndVL(Root
);
13144 return isMaskCompatible(Mask
) && isVLCompatible(VL
);
13147 /// Helper function to check if \p N is commutative with respect to the
13148 /// foldings that are supported by this class.
13149 static bool isCommutative(const SDNode
*N
) {
13150 switch (N
->getOpcode()) {
13151 case RISCVISD::ADD_VL
:
13152 case RISCVISD::MUL_VL
:
13153 case RISCVISD::VWADD_W_VL
:
13154 case RISCVISD::VWADDU_W_VL
:
13156 case RISCVISD::SUB_VL
:
13157 case RISCVISD::VWSUB_W_VL
:
13158 case RISCVISD::VWSUBU_W_VL
:
13161 llvm_unreachable("Unexpected opcode");
13165 /// Get a list of combine to try for folding extensions in \p Root.
13166 /// Note that each returned CombineToTry function doesn't actually modify
13167 /// anything. Instead they produce an optional CombineResult that if not None,
13168 /// need to be materialized for the combine to be applied.
13169 /// \see CombineResult::materialize.
13170 /// If the related CombineToTry function returns std::nullopt, that means the
13171 /// combine didn't match.
13172 static SmallVector
<CombineToTry
> getSupportedFoldings(const SDNode
*Root
);
13175 /// Helper structure that holds all the necessary information to materialize a
13176 /// combine that does some extension folding.
13177 struct CombineResult
{
13178 /// Opcode to be generated when materializing the combine.
13179 unsigned TargetOpcode
;
13180 // No value means no extension is needed. If extension is needed, the value
13181 // indicates if it needs to be sign extended.
13182 std::optional
<bool> SExtLHS
;
13183 std::optional
<bool> SExtRHS
;
13184 /// Root of the combine.
13186 /// LHS of the TargetOpcode.
13187 NodeExtensionHelper LHS
;
13188 /// RHS of the TargetOpcode.
13189 NodeExtensionHelper RHS
;
13191 CombineResult(unsigned TargetOpcode
, SDNode
*Root
,
13192 const NodeExtensionHelper
&LHS
, std::optional
<bool> SExtLHS
,
13193 const NodeExtensionHelper
&RHS
, std::optional
<bool> SExtRHS
)
13194 : TargetOpcode(TargetOpcode
), SExtLHS(SExtLHS
), SExtRHS(SExtRHS
),
13195 Root(Root
), LHS(LHS
), RHS(RHS
) {}
13197 /// Return a value that uses TargetOpcode and that can be used to replace
13199 /// The actual replacement is *not* done in that method.
13200 SDValue
materialize(SelectionDAG
&DAG
) const {
13201 SDValue Mask
, VL
, Merge
;
13202 std::tie(Mask
, VL
) = NodeExtensionHelper::getMaskAndVL(Root
);
13203 Merge
= Root
->getOperand(2);
13204 return DAG
.getNode(TargetOpcode
, SDLoc(Root
), Root
->getValueType(0),
13205 LHS
.getOrCreateExtendedOp(Root
, DAG
, SExtLHS
),
13206 RHS
.getOrCreateExtendedOp(Root
, DAG
, SExtRHS
), Merge
,
13211 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
13212 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
13213 /// are zext) and LHS and RHS can be folded into Root.
13214 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
13216 /// \note If the pattern can match with both zext and sext, the returned
13217 /// CombineResult will feature the zext result.
13219 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13220 /// can be used to apply the pattern.
13221 static std::optional
<CombineResult
>
13222 canFoldToVWWithSameExtensionImpl(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13223 const NodeExtensionHelper
&RHS
, bool AllowSExt
,
13225 assert((AllowSExt
|| AllowZExt
) && "Forgot to set what you want?");
13226 if (!LHS
.areVLAndMaskCompatible(Root
) || !RHS
.areVLAndMaskCompatible(Root
))
13227 return std::nullopt
;
13228 if (AllowZExt
&& LHS
.SupportsZExt
&& RHS
.SupportsZExt
)
13229 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
13230 Root
->getOpcode(), /*IsSExt=*/false),
13231 Root
, LHS
, /*SExtLHS=*/false, RHS
,
13232 /*SExtRHS=*/false);
13233 if (AllowSExt
&& LHS
.SupportsSExt
&& RHS
.SupportsSExt
)
13234 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
13235 Root
->getOpcode(), /*IsSExt=*/true),
13236 Root
, LHS
, /*SExtLHS=*/true, RHS
,
13238 return std::nullopt
;
13241 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
13242 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
13243 /// are zext) and LHS and RHS can be folded into Root.
13245 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13246 /// can be used to apply the pattern.
13247 static std::optional
<CombineResult
>
13248 canFoldToVWWithSameExtension(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13249 const NodeExtensionHelper
&RHS
) {
13250 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/true,
13251 /*AllowZExt=*/true);
13254 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
13256 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13257 /// can be used to apply the pattern.
13258 static std::optional
<CombineResult
>
13259 canFoldToVW_W(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13260 const NodeExtensionHelper
&RHS
) {
13261 if (!RHS
.areVLAndMaskCompatible(Root
))
13262 return std::nullopt
;
13264 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
13266 // Control this behavior behind an option (AllowSplatInVW_W) for testing
13268 if (RHS
.SupportsZExt
&& (!RHS
.isSplat() || AllowSplatInVW_W
))
13269 return CombineResult(
13270 NodeExtensionHelper::getWOpcode(Root
->getOpcode(), /*IsSExt=*/false),
13271 Root
, LHS
, /*SExtLHS=*/std::nullopt
, RHS
, /*SExtRHS=*/false);
13272 if (RHS
.SupportsSExt
&& (!RHS
.isSplat() || AllowSplatInVW_W
))
13273 return CombineResult(
13274 NodeExtensionHelper::getWOpcode(Root
->getOpcode(), /*IsSExt=*/true),
13275 Root
, LHS
, /*SExtLHS=*/std::nullopt
, RHS
, /*SExtRHS=*/true);
13276 return std::nullopt
;
13279 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
13281 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13282 /// can be used to apply the pattern.
13283 static std::optional
<CombineResult
>
13284 canFoldToVWWithSEXT(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13285 const NodeExtensionHelper
&RHS
) {
13286 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/true,
13287 /*AllowZExt=*/false);
13290 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
13292 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13293 /// can be used to apply the pattern.
13294 static std::optional
<CombineResult
>
13295 canFoldToVWWithZEXT(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13296 const NodeExtensionHelper
&RHS
) {
13297 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/false,
13298 /*AllowZExt=*/true);
13301 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
13303 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13304 /// can be used to apply the pattern.
13305 static std::optional
<CombineResult
>
13306 canFoldToVW_SU(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13307 const NodeExtensionHelper
&RHS
) {
13308 if (!LHS
.SupportsSExt
|| !RHS
.SupportsZExt
)
13309 return std::nullopt
;
13310 if (!LHS
.areVLAndMaskCompatible(Root
) || !RHS
.areVLAndMaskCompatible(Root
))
13311 return std::nullopt
;
13312 return CombineResult(NodeExtensionHelper::getSUOpcode(Root
->getOpcode()),
13313 Root
, LHS
, /*SExtLHS=*/true, RHS
, /*SExtRHS=*/false);
13316 SmallVector
<NodeExtensionHelper::CombineToTry
>
13317 NodeExtensionHelper::getSupportedFoldings(const SDNode
*Root
) {
13318 SmallVector
<CombineToTry
> Strategies
;
13319 switch (Root
->getOpcode()) {
13320 case RISCVISD::ADD_VL
:
13321 case RISCVISD::SUB_VL
:
13322 // add|sub -> vwadd(u)|vwsub(u)
13323 Strategies
.push_back(canFoldToVWWithSameExtension
);
13324 // add|sub -> vwadd(u)_w|vwsub(u)_w
13325 Strategies
.push_back(canFoldToVW_W
);
13327 case RISCVISD::MUL_VL
:
13329 Strategies
.push_back(canFoldToVWWithSameExtension
);
13331 Strategies
.push_back(canFoldToVW_SU
);
13333 case RISCVISD::VWADD_W_VL
:
13334 case RISCVISD::VWSUB_W_VL
:
13335 // vwadd_w|vwsub_w -> vwadd|vwsub
13336 Strategies
.push_back(canFoldToVWWithSEXT
);
13338 case RISCVISD::VWADDU_W_VL
:
13339 case RISCVISD::VWSUBU_W_VL
:
13340 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
13341 Strategies
.push_back(canFoldToVWWithZEXT
);
13344 llvm_unreachable("Unexpected opcode");
13348 } // End anonymous namespace.
13350 /// Combine a binary operation to its equivalent VW or VW_W form.
13351 /// The supported combines are:
13352 /// add_vl -> vwadd(u) | vwadd(u)_w
13353 /// sub_vl -> vwsub(u) | vwsub(u)_w
13354 /// mul_vl -> vwmul(u) | vwmul_su
13355 /// vwadd_w(u) -> vwadd(u)
13356 /// vwub_w(u) -> vwadd(u)
13358 combineBinOp_VLToVWBinOp_VL(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
13359 SelectionDAG
&DAG
= DCI
.DAG
;
13361 assert(NodeExtensionHelper::isSupportedRoot(N
) &&
13362 "Shouldn't have called this method");
13363 SmallVector
<SDNode
*> Worklist
;
13364 SmallSet
<SDNode
*, 8> Inserted
;
13365 Worklist
.push_back(N
);
13366 Inserted
.insert(N
);
13367 SmallVector
<CombineResult
> CombinesToApply
;
13369 while (!Worklist
.empty()) {
13370 SDNode
*Root
= Worklist
.pop_back_val();
13371 if (!NodeExtensionHelper::isSupportedRoot(Root
))
13374 NodeExtensionHelper
LHS(N
, 0, DAG
);
13375 NodeExtensionHelper
RHS(N
, 1, DAG
);
13376 auto AppendUsersIfNeeded
= [&Worklist
,
13377 &Inserted
](const NodeExtensionHelper
&Op
) {
13378 if (Op
.needToPromoteOtherUsers()) {
13379 for (SDNode
*TheUse
: Op
.OrigOperand
->uses()) {
13380 if (Inserted
.insert(TheUse
).second
)
13381 Worklist
.push_back(TheUse
);
13386 // Control the compile time by limiting the number of node we look at in
13388 if (Inserted
.size() > ExtensionMaxWebSize
)
13391 SmallVector
<NodeExtensionHelper::CombineToTry
> FoldingStrategies
=
13392 NodeExtensionHelper::getSupportedFoldings(N
);
13394 assert(!FoldingStrategies
.empty() && "Nothing to be folded");
13395 bool Matched
= false;
13396 for (int Attempt
= 0;
13397 (Attempt
!= 1 + NodeExtensionHelper::isCommutative(N
)) && !Matched
;
13400 for (NodeExtensionHelper::CombineToTry FoldingStrategy
:
13401 FoldingStrategies
) {
13402 std::optional
<CombineResult
> Res
= FoldingStrategy(N
, LHS
, RHS
);
13405 CombinesToApply
.push_back(*Res
);
13406 // All the inputs that are extended need to be folded, otherwise
13407 // we would be leaving the old input (since it is may still be used),
13408 // and the new one.
13409 if (Res
->SExtLHS
.has_value())
13410 AppendUsersIfNeeded(LHS
);
13411 if (Res
->SExtRHS
.has_value())
13412 AppendUsersIfNeeded(RHS
);
13416 std::swap(LHS
, RHS
);
13418 // Right now we do an all or nothing approach.
13422 // Store the value for the replacement of the input node separately.
13423 SDValue InputRootReplacement
;
13424 // We do the RAUW after we materialize all the combines, because some replaced
13425 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
13426 // some of these nodes may appear in the NodeExtensionHelpers of some of the
13427 // yet-to-be-visited CombinesToApply roots.
13428 SmallVector
<std::pair
<SDValue
, SDValue
>> ValuesToReplace
;
13429 ValuesToReplace
.reserve(CombinesToApply
.size());
13430 for (CombineResult Res
: CombinesToApply
) {
13431 SDValue NewValue
= Res
.materialize(DAG
);
13432 if (!InputRootReplacement
) {
13433 assert(Res
.Root
== N
&&
13434 "First element is expected to be the current node");
13435 InputRootReplacement
= NewValue
;
13437 ValuesToReplace
.emplace_back(SDValue(Res
.Root
, 0), NewValue
);
13440 for (std::pair
<SDValue
, SDValue
> OldNewValues
: ValuesToReplace
) {
13441 DAG
.ReplaceAllUsesOfValueWith(OldNewValues
.first
, OldNewValues
.second
);
13442 DCI
.AddToWorklist(OldNewValues
.second
.getNode());
13444 return InputRootReplacement
;
13447 // Helper function for performMemPairCombine.
13448 // Try to combine the memory loads/stores LSNode1 and LSNode2
13449 // into a single memory pair operation.
13450 static SDValue
tryMemPairCombine(SelectionDAG
&DAG
, LSBaseSDNode
*LSNode1
,
13451 LSBaseSDNode
*LSNode2
, SDValue BasePtr
,
13453 SmallPtrSet
<const SDNode
*, 32> Visited
;
13454 SmallVector
<const SDNode
*, 8> Worklist
= {LSNode1
, LSNode2
};
13456 if (SDNode::hasPredecessorHelper(LSNode1
, Visited
, Worklist
) ||
13457 SDNode::hasPredecessorHelper(LSNode2
, Visited
, Worklist
))
13460 MachineFunction
&MF
= DAG
.getMachineFunction();
13461 const RISCVSubtarget
&Subtarget
= MF
.getSubtarget
<RISCVSubtarget
>();
13463 // The new operation has twice the width.
13464 MVT XLenVT
= Subtarget
.getXLenVT();
13465 EVT MemVT
= LSNode1
->getMemoryVT();
13466 EVT NewMemVT
= (MemVT
== MVT::i32
) ? MVT::i64
: MVT::i128
;
13467 MachineMemOperand
*MMO
= LSNode1
->getMemOperand();
13468 MachineMemOperand
*NewMMO
= MF
.getMachineMemOperand(
13469 MMO
, MMO
->getPointerInfo(), MemVT
== MVT::i32
? 8 : 16);
13471 if (LSNode1
->getOpcode() == ISD::LOAD
) {
13472 auto Ext
= cast
<LoadSDNode
>(LSNode1
)->getExtensionType();
13474 if (MemVT
== MVT::i32
)
13475 Opcode
= (Ext
== ISD::ZEXTLOAD
) ? RISCVISD::TH_LWUD
: RISCVISD::TH_LWD
;
13477 Opcode
= RISCVISD::TH_LDD
;
13479 SDValue Res
= DAG
.getMemIntrinsicNode(
13480 Opcode
, SDLoc(LSNode1
), DAG
.getVTList({XLenVT
, XLenVT
, MVT::Other
}),
13481 {LSNode1
->getChain(), BasePtr
,
13482 DAG
.getConstant(Imm
, SDLoc(LSNode1
), XLenVT
)},
13486 DAG
.getMergeValues({Res
.getValue(0), Res
.getValue(2)}, SDLoc(LSNode1
));
13488 DAG
.getMergeValues({Res
.getValue(1), Res
.getValue(2)}, SDLoc(LSNode2
));
13490 DAG
.ReplaceAllUsesWith(LSNode2
, Node2
.getNode());
13493 unsigned Opcode
= (MemVT
== MVT::i32
) ? RISCVISD::TH_SWD
: RISCVISD::TH_SDD
;
13495 SDValue Res
= DAG
.getMemIntrinsicNode(
13496 Opcode
, SDLoc(LSNode1
), DAG
.getVTList(MVT::Other
),
13497 {LSNode1
->getChain(), LSNode1
->getOperand(1), LSNode2
->getOperand(1),
13498 BasePtr
, DAG
.getConstant(Imm
, SDLoc(LSNode1
), XLenVT
)},
13501 DAG
.ReplaceAllUsesWith(LSNode2
, Res
.getNode());
13506 // Try to combine two adjacent loads/stores to a single pair instruction from
13507 // the XTHeadMemPair vendor extension.
13508 static SDValue
performMemPairCombine(SDNode
*N
,
13509 TargetLowering::DAGCombinerInfo
&DCI
) {
13510 SelectionDAG
&DAG
= DCI
.DAG
;
13511 MachineFunction
&MF
= DAG
.getMachineFunction();
13512 const RISCVSubtarget
&Subtarget
= MF
.getSubtarget
<RISCVSubtarget
>();
13514 // Target does not support load/store pair.
13515 if (!Subtarget
.hasVendorXTHeadMemPair())
13518 LSBaseSDNode
*LSNode1
= cast
<LSBaseSDNode
>(N
);
13519 EVT MemVT
= LSNode1
->getMemoryVT();
13520 unsigned OpNum
= LSNode1
->getOpcode() == ISD::LOAD
? 1 : 2;
13522 // No volatile, indexed or atomic loads/stores.
13523 if (!LSNode1
->isSimple() || LSNode1
->isIndexed())
13526 // Function to get a base + constant representation from a memory value.
13527 auto ExtractBaseAndOffset
= [](SDValue Ptr
) -> std::pair
<SDValue
, uint64_t> {
13528 if (Ptr
->getOpcode() == ISD::ADD
)
13529 if (auto *C1
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1)))
13530 return {Ptr
->getOperand(0), C1
->getZExtValue()};
13534 auto [Base1
, Offset1
] = ExtractBaseAndOffset(LSNode1
->getOperand(OpNum
));
13536 SDValue Chain
= N
->getOperand(0);
13537 for (SDNode::use_iterator UI
= Chain
->use_begin(), UE
= Chain
->use_end();
13539 SDUse
&Use
= UI
.getUse();
13540 if (Use
.getUser() != N
&& Use
.getResNo() == 0 &&
13541 Use
.getUser()->getOpcode() == N
->getOpcode()) {
13542 LSBaseSDNode
*LSNode2
= cast
<LSBaseSDNode
>(Use
.getUser());
13544 // No volatile, indexed or atomic loads/stores.
13545 if (!LSNode2
->isSimple() || LSNode2
->isIndexed())
13548 // Check if LSNode1 and LSNode2 have the same type and extension.
13549 if (LSNode1
->getOpcode() == ISD::LOAD
)
13550 if (cast
<LoadSDNode
>(LSNode2
)->getExtensionType() !=
13551 cast
<LoadSDNode
>(LSNode1
)->getExtensionType())
13554 if (LSNode1
->getMemoryVT() != LSNode2
->getMemoryVT())
13557 auto [Base2
, Offset2
] = ExtractBaseAndOffset(LSNode2
->getOperand(OpNum
));
13559 // Check if the base pointer is the same for both instruction.
13560 if (Base1
!= Base2
)
13563 // Check if the offsets match the XTHeadMemPair encoding contraints.
13564 bool Valid
= false;
13565 if (MemVT
== MVT::i32
) {
13566 // Check for adjacent i32 values and a 2-bit index.
13567 if ((Offset1
+ 4 == Offset2
) && isShiftedUInt
<2, 3>(Offset1
))
13569 } else if (MemVT
== MVT::i64
) {
13570 // Check for adjacent i64 values and a 2-bit index.
13571 if ((Offset1
+ 8 == Offset2
) && isShiftedUInt
<2, 4>(Offset1
))
13580 tryMemPairCombine(DAG
, LSNode1
, LSNode2
, Base1
, Offset1
))
13589 // (fp_to_int (froundeven X)) -> fcvt X, rne
13590 // (fp_to_int (ftrunc X)) -> fcvt X, rtz
13591 // (fp_to_int (ffloor X)) -> fcvt X, rdn
13592 // (fp_to_int (fceil X)) -> fcvt X, rup
13593 // (fp_to_int (fround X)) -> fcvt X, rmm
13594 // (fp_to_int (frint X)) -> fcvt X
13595 static SDValue
performFP_TO_INTCombine(SDNode
*N
,
13596 TargetLowering::DAGCombinerInfo
&DCI
,
13597 const RISCVSubtarget
&Subtarget
) {
13598 SelectionDAG
&DAG
= DCI
.DAG
;
13599 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
13600 MVT XLenVT
= Subtarget
.getXLenVT();
13602 SDValue Src
= N
->getOperand(0);
13604 // Don't do this for strict-fp Src.
13605 if (Src
->isStrictFPOpcode() || Src
->isTargetStrictFPOpcode())
13608 // Ensure the FP type is legal.
13609 if (!TLI
.isTypeLegal(Src
.getValueType()))
13612 // Don't do this for f16 with Zfhmin and not Zfh.
13613 if (Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfh())
13616 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Src
.getOpcode());
13617 // If the result is invalid, we didn't find a foldable instruction.
13618 if (FRM
== RISCVFPRndMode::Invalid
)
13622 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT
;
13623 EVT VT
= N
->getValueType(0);
13625 if (VT
.isVector() && TLI
.isTypeLegal(VT
)) {
13626 MVT SrcVT
= Src
.getSimpleValueType();
13627 MVT SrcContainerVT
= SrcVT
;
13628 MVT ContainerVT
= VT
.getSimpleVT();
13629 SDValue XVal
= Src
.getOperand(0);
13631 // For widening and narrowing conversions we just combine it into a
13632 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
13633 // end up getting lowered to their appropriate pseudo instructions based on
13634 // their operand types
13635 if (VT
.getScalarSizeInBits() > SrcVT
.getScalarSizeInBits() * 2 ||
13636 VT
.getScalarSizeInBits() * 2 < SrcVT
.getScalarSizeInBits())
13639 // Make fixed-length vectors scalable first
13640 if (SrcVT
.isFixedLengthVector()) {
13641 SrcContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
13642 XVal
= convertToScalableVector(SrcContainerVT
, XVal
, DAG
, Subtarget
);
13644 getContainerForFixedLengthVector(DAG
, ContainerVT
, Subtarget
);
13648 getDefaultVLOps(SrcVT
, SrcContainerVT
, DL
, DAG
, Subtarget
);
13651 if (FRM
== RISCVFPRndMode::RTZ
) {
13652 // Use the dedicated trunc static rounding mode if we're truncating so we
13653 // don't need to generate calls to fsrmi/fsrm
13655 IsSigned
? RISCVISD::VFCVT_RTZ_X_F_VL
: RISCVISD::VFCVT_RTZ_XU_F_VL
;
13656 FpToInt
= DAG
.getNode(Opc
, DL
, ContainerVT
, XVal
, Mask
, VL
);
13657 } else if (FRM
== RISCVFPRndMode::DYN
) {
13659 IsSigned
? RISCVISD::VFCVT_X_F_VL
: RISCVISD::VFCVT_XU_F_VL
;
13660 FpToInt
= DAG
.getNode(Opc
, DL
, ContainerVT
, XVal
, Mask
, VL
);
13663 IsSigned
? RISCVISD::VFCVT_RM_X_F_VL
: RISCVISD::VFCVT_RM_XU_F_VL
;
13664 FpToInt
= DAG
.getNode(Opc
, DL
, ContainerVT
, XVal
, Mask
,
13665 DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
);
13668 // If converted from fixed-length to scalable, convert back
13669 if (VT
.isFixedLengthVector())
13670 FpToInt
= convertFromScalableVector(VT
, FpToInt
, DAG
, Subtarget
);
13675 // Only handle XLen or i32 types. Other types narrower than XLen will
13676 // eventually be legalized to XLenVT.
13677 if (VT
!= MVT::i32
&& VT
!= XLenVT
)
13682 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
13684 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
13686 SDValue FpToInt
= DAG
.getNode(Opc
, DL
, XLenVT
, Src
.getOperand(0),
13687 DAG
.getTargetConstant(FRM
, DL
, XLenVT
));
13688 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FpToInt
);
13692 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
13693 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
13694 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
13695 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
13696 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
13697 // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
13698 static SDValue
performFP_TO_INT_SATCombine(SDNode
*N
,
13699 TargetLowering::DAGCombinerInfo
&DCI
,
13700 const RISCVSubtarget
&Subtarget
) {
13701 SelectionDAG
&DAG
= DCI
.DAG
;
13702 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
13703 MVT XLenVT
= Subtarget
.getXLenVT();
13705 // Only handle XLen types. Other types narrower than XLen will eventually be
13706 // legalized to XLenVT.
13707 EVT DstVT
= N
->getValueType(0);
13708 if (DstVT
!= XLenVT
)
13711 SDValue Src
= N
->getOperand(0);
13713 // Don't do this for strict-fp Src.
13714 if (Src
->isStrictFPOpcode() || Src
->isTargetStrictFPOpcode())
13717 // Ensure the FP type is also legal.
13718 if (!TLI
.isTypeLegal(Src
.getValueType()))
13721 // Don't do this for f16 with Zfhmin and not Zfh.
13722 if (Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfh())
13725 EVT SatVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
13727 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Src
.getOpcode());
13728 if (FRM
== RISCVFPRndMode::Invalid
)
13731 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT_SAT
;
13734 if (SatVT
== DstVT
)
13735 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
13736 else if (DstVT
== MVT::i64
&& SatVT
== MVT::i32
)
13737 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
13740 // FIXME: Support other SatVTs by clamping before or after the conversion.
13742 Src
= Src
.getOperand(0);
13745 SDValue FpToInt
= DAG
.getNode(Opc
, DL
, XLenVT
, Src
,
13746 DAG
.getTargetConstant(FRM
, DL
, XLenVT
));
13748 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
13750 if (Opc
== RISCVISD::FCVT_WU_RV64
)
13751 FpToInt
= DAG
.getZeroExtendInReg(FpToInt
, DL
, MVT::i32
);
13753 // RISC-V FP-to-int conversions saturate to the destination register size, but
13754 // don't produce 0 for nan.
13755 SDValue ZeroInt
= DAG
.getConstant(0, DL
, DstVT
);
13756 return DAG
.getSelectCC(DL
, Src
, Src
, ZeroInt
, FpToInt
, ISD::CondCode::SETUO
);
13759 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
13760 // smaller than XLenVT.
13761 static SDValue
performBITREVERSECombine(SDNode
*N
, SelectionDAG
&DAG
,
13762 const RISCVSubtarget
&Subtarget
) {
13763 assert(Subtarget
.hasStdExtZbkb() && "Unexpected extension");
13765 SDValue Src
= N
->getOperand(0);
13766 if (Src
.getOpcode() != ISD::BSWAP
)
13769 EVT VT
= N
->getValueType(0);
13770 if (!VT
.isScalarInteger() || VT
.getSizeInBits() >= Subtarget
.getXLen() ||
13771 !llvm::has_single_bit
<uint32_t>(VT
.getSizeInBits()))
13775 return DAG
.getNode(RISCVISD::BREV8
, DL
, VT
, Src
.getOperand(0));
13778 // Convert from one FMA opcode to another based on whether we are negating the
13779 // multiply result and/or the accumulator.
13780 // NOTE: Only supports RVV operations with VL.
13781 static unsigned negateFMAOpcode(unsigned Opcode
, bool NegMul
, bool NegAcc
) {
13782 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
13784 // clang-format off
13786 default: llvm_unreachable("Unexpected opcode");
13787 case RISCVISD::VFMADD_VL
: Opcode
= RISCVISD::VFNMSUB_VL
; break;
13788 case RISCVISD::VFNMSUB_VL
: Opcode
= RISCVISD::VFMADD_VL
; break;
13789 case RISCVISD::VFNMADD_VL
: Opcode
= RISCVISD::VFMSUB_VL
; break;
13790 case RISCVISD::VFMSUB_VL
: Opcode
= RISCVISD::VFNMADD_VL
; break;
13791 case RISCVISD::STRICT_VFMADD_VL
: Opcode
= RISCVISD::STRICT_VFNMSUB_VL
; break;
13792 case RISCVISD::STRICT_VFNMSUB_VL
: Opcode
= RISCVISD::STRICT_VFMADD_VL
; break;
13793 case RISCVISD::STRICT_VFNMADD_VL
: Opcode
= RISCVISD::STRICT_VFMSUB_VL
; break;
13794 case RISCVISD::STRICT_VFMSUB_VL
: Opcode
= RISCVISD::STRICT_VFNMADD_VL
; break;
13799 // Negating the accumulator changes ADD<->SUB.
13801 // clang-format off
13803 default: llvm_unreachable("Unexpected opcode");
13804 case RISCVISD::VFMADD_VL
: Opcode
= RISCVISD::VFMSUB_VL
; break;
13805 case RISCVISD::VFMSUB_VL
: Opcode
= RISCVISD::VFMADD_VL
; break;
13806 case RISCVISD::VFNMADD_VL
: Opcode
= RISCVISD::VFNMSUB_VL
; break;
13807 case RISCVISD::VFNMSUB_VL
: Opcode
= RISCVISD::VFNMADD_VL
; break;
13808 case RISCVISD::STRICT_VFMADD_VL
: Opcode
= RISCVISD::STRICT_VFMSUB_VL
; break;
13809 case RISCVISD::STRICT_VFMSUB_VL
: Opcode
= RISCVISD::STRICT_VFMADD_VL
; break;
13810 case RISCVISD::STRICT_VFNMADD_VL
: Opcode
= RISCVISD::STRICT_VFNMSUB_VL
; break;
13811 case RISCVISD::STRICT_VFNMSUB_VL
: Opcode
= RISCVISD::STRICT_VFNMADD_VL
; break;
13819 static SDValue
combineVFMADD_VLWithVFNEG_VL(SDNode
*N
, SelectionDAG
&DAG
) {
13820 // Fold FNEG_VL into FMA opcodes.
13821 // The first operand of strict-fp is chain.
13822 unsigned Offset
= N
->isTargetStrictFPOpcode();
13823 SDValue A
= N
->getOperand(0 + Offset
);
13824 SDValue B
= N
->getOperand(1 + Offset
);
13825 SDValue C
= N
->getOperand(2 + Offset
);
13826 SDValue Mask
= N
->getOperand(3 + Offset
);
13827 SDValue VL
= N
->getOperand(4 + Offset
);
13829 auto invertIfNegative
= [&Mask
, &VL
](SDValue
&V
) {
13830 if (V
.getOpcode() == RISCVISD::FNEG_VL
&& V
.getOperand(1) == Mask
&&
13831 V
.getOperand(2) == VL
) {
13832 // Return the negated input.
13833 V
= V
.getOperand(0);
13840 bool NegA
= invertIfNegative(A
);
13841 bool NegB
= invertIfNegative(B
);
13842 bool NegC
= invertIfNegative(C
);
13844 // If no operands are negated, we're done.
13845 if (!NegA
&& !NegB
&& !NegC
)
13848 unsigned NewOpcode
= negateFMAOpcode(N
->getOpcode(), NegA
!= NegB
, NegC
);
13849 if (N
->isTargetStrictFPOpcode())
13850 return DAG
.getNode(NewOpcode
, SDLoc(N
), N
->getVTList(),
13851 {N
->getOperand(0), A
, B
, C
, Mask
, VL
});
13852 return DAG
.getNode(NewOpcode
, SDLoc(N
), N
->getValueType(0), A
, B
, C
, Mask
,
13856 static SDValue
performVFMADD_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
13857 const RISCVSubtarget
&Subtarget
) {
13858 if (SDValue V
= combineVFMADD_VLWithVFNEG_VL(N
, DAG
))
13861 if (N
->getValueType(0).isScalableVector() &&
13862 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
13863 (Subtarget
.hasVInstructionsF16Minimal() &&
13864 !Subtarget
.hasVInstructionsF16())) {
13868 // FIXME: Ignore strict opcodes for now.
13869 if (N
->isTargetStrictFPOpcode())
13872 // Try to form widening FMA.
13873 SDValue Op0
= N
->getOperand(0);
13874 SDValue Op1
= N
->getOperand(1);
13875 SDValue Mask
= N
->getOperand(3);
13876 SDValue VL
= N
->getOperand(4);
13878 if (Op0
.getOpcode() != RISCVISD::FP_EXTEND_VL
||
13879 Op1
.getOpcode() != RISCVISD::FP_EXTEND_VL
)
13882 // TODO: Refactor to handle more complex cases similar to
13883 // combineBinOp_VLToVWBinOp_VL.
13884 if ((!Op0
.hasOneUse() || !Op1
.hasOneUse()) &&
13885 (Op0
!= Op1
|| !Op0
->hasNUsesOfValue(2, 0)))
13888 // Check the mask and VL are the same.
13889 if (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
||
13890 Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
)
13894 switch (N
->getOpcode()) {
13896 llvm_unreachable("Unexpected opcode");
13897 case RISCVISD::VFMADD_VL
:
13898 NewOpc
= RISCVISD::VFWMADD_VL
;
13900 case RISCVISD::VFNMSUB_VL
:
13901 NewOpc
= RISCVISD::VFWNMSUB_VL
;
13903 case RISCVISD::VFNMADD_VL
:
13904 NewOpc
= RISCVISD::VFWNMADD_VL
;
13906 case RISCVISD::VFMSUB_VL
:
13907 NewOpc
= RISCVISD::VFWMSUB_VL
;
13911 Op0
= Op0
.getOperand(0);
13912 Op1
= Op1
.getOperand(0);
13914 return DAG
.getNode(NewOpc
, SDLoc(N
), N
->getValueType(0), Op0
, Op1
,
13915 N
->getOperand(2), Mask
, VL
);
13918 static SDValue
performVFMUL_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
13919 const RISCVSubtarget
&Subtarget
) {
13920 if (N
->getValueType(0).isScalableVector() &&
13921 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
13922 (Subtarget
.hasVInstructionsF16Minimal() &&
13923 !Subtarget
.hasVInstructionsF16())) {
13927 // FIXME: Ignore strict opcodes for now.
13928 assert(!N
->isTargetStrictFPOpcode() && "Unexpected opcode");
13930 // Try to form widening multiply.
13931 SDValue Op0
= N
->getOperand(0);
13932 SDValue Op1
= N
->getOperand(1);
13933 SDValue Merge
= N
->getOperand(2);
13934 SDValue Mask
= N
->getOperand(3);
13935 SDValue VL
= N
->getOperand(4);
13937 if (Op0
.getOpcode() != RISCVISD::FP_EXTEND_VL
||
13938 Op1
.getOpcode() != RISCVISD::FP_EXTEND_VL
)
13941 // TODO: Refactor to handle more complex cases similar to
13942 // combineBinOp_VLToVWBinOp_VL.
13943 if ((!Op0
.hasOneUse() || !Op1
.hasOneUse()) &&
13944 (Op0
!= Op1
|| !Op0
->hasNUsesOfValue(2, 0)))
13947 // Check the mask and VL are the same.
13948 if (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
||
13949 Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
)
13952 Op0
= Op0
.getOperand(0);
13953 Op1
= Op1
.getOperand(0);
13955 return DAG
.getNode(RISCVISD::VFWMUL_VL
, SDLoc(N
), N
->getValueType(0), Op0
,
13956 Op1
, Merge
, Mask
, VL
);
13959 static SDValue
performFADDSUB_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
13960 const RISCVSubtarget
&Subtarget
) {
13961 if (N
->getValueType(0).isScalableVector() &&
13962 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
13963 (Subtarget
.hasVInstructionsF16Minimal() &&
13964 !Subtarget
.hasVInstructionsF16())) {
13968 SDValue Op0
= N
->getOperand(0);
13969 SDValue Op1
= N
->getOperand(1);
13970 SDValue Merge
= N
->getOperand(2);
13971 SDValue Mask
= N
->getOperand(3);
13972 SDValue VL
= N
->getOperand(4);
13974 bool IsAdd
= N
->getOpcode() == RISCVISD::FADD_VL
;
13976 // Look for foldable FP_EXTENDS.
13978 Op0
.getOpcode() == RISCVISD::FP_EXTEND_VL
&&
13979 (Op0
.hasOneUse() || (Op0
== Op1
&& Op0
->hasNUsesOfValue(2, 0)));
13981 (Op0
== Op1
&& Op0IsExtend
) ||
13982 (Op1
.getOpcode() == RISCVISD::FP_EXTEND_VL
&& Op1
.hasOneUse());
13984 // Check the mask and VL.
13985 if (Op0IsExtend
&& (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
))
13986 Op0IsExtend
= false;
13987 if (Op1IsExtend
&& (Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
))
13988 Op1IsExtend
= false;
13991 if (!Op1IsExtend
) {
13992 // Sub requires at least operand 1 to be an extend.
13996 // Add is commutable, if the other operand is foldable, swap them.
14000 std::swap(Op0
, Op1
);
14001 std::swap(Op0IsExtend
, Op1IsExtend
);
14004 // Op1 is a foldable extend. Op0 might be foldable.
14005 Op1
= Op1
.getOperand(0);
14007 Op0
= Op0
.getOperand(0);
14011 Opc
= Op0IsExtend
? RISCVISD::VFWADD_VL
: RISCVISD::VFWADD_W_VL
;
14013 Opc
= Op0IsExtend
? RISCVISD::VFWSUB_VL
: RISCVISD::VFWSUB_W_VL
;
14015 return DAG
.getNode(Opc
, SDLoc(N
), N
->getValueType(0), Op0
, Op1
, Merge
, Mask
,
14019 static SDValue
performSRACombine(SDNode
*N
, SelectionDAG
&DAG
,
14020 const RISCVSubtarget
&Subtarget
) {
14021 assert(N
->getOpcode() == ISD::SRA
&& "Unexpected opcode");
14023 if (N
->getValueType(0) != MVT::i64
|| !Subtarget
.is64Bit())
14026 if (!isa
<ConstantSDNode
>(N
->getOperand(1)))
14028 uint64_t ShAmt
= N
->getConstantOperandVal(1);
14032 SDValue N0
= N
->getOperand(0);
14034 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
14035 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
14036 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
14038 N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&& N0
.hasOneUse() &&
14039 cast
<VTSDNode
>(N0
.getOperand(1))->getVT() == MVT::i32
&&
14040 N0
.getOperand(0).getOpcode() == ISD::SHL
&& N0
.getOperand(0).hasOneUse() &&
14041 isa
<ConstantSDNode
>(N0
.getOperand(0).getOperand(1))) {
14042 uint64_t LShAmt
= N0
.getOperand(0).getConstantOperandVal(1);
14044 SDLoc
ShlDL(N0
.getOperand(0));
14045 SDValue Shl
= DAG
.getNode(ISD::SHL
, ShlDL
, MVT::i64
,
14046 N0
.getOperand(0).getOperand(0),
14047 DAG
.getConstant(LShAmt
+ 32, ShlDL
, MVT::i64
));
14049 return DAG
.getNode(ISD::SRA
, DL
, MVT::i64
, Shl
,
14050 DAG
.getConstant(ShAmt
+ 32, DL
, MVT::i64
));
14054 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
14055 // FIXME: Should this be a generic combine? There's a similar combine on X86.
14057 // Also try these folds where an add or sub is in the middle.
14058 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
14059 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
14061 ConstantSDNode
*AddC
= nullptr;
14063 // We might have an ADD or SUB between the SRA and SHL.
14064 bool IsAdd
= N0
.getOpcode() == ISD::ADD
;
14065 if ((IsAdd
|| N0
.getOpcode() == ISD::SUB
)) {
14066 // Other operand needs to be a constant we can modify.
14067 AddC
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(IsAdd
? 1 : 0));
14071 // AddC needs to have at least 32 trailing zeros.
14072 if (AddC
->getAPIntValue().countr_zero() < 32)
14075 // All users should be a shift by constant less than or equal to 32. This
14076 // ensures we'll do this optimization for each of them to produce an
14077 // add/sub+sext_inreg they can all share.
14078 for (SDNode
*U
: N0
->uses()) {
14079 if (U
->getOpcode() != ISD::SRA
||
14080 !isa
<ConstantSDNode
>(U
->getOperand(1)) ||
14081 U
->getConstantOperandVal(1) > 32)
14085 Shl
= N0
.getOperand(IsAdd
? 0 : 1);
14087 // Not an ADD or SUB.
14091 // Look for a shift left by 32.
14092 if (Shl
.getOpcode() != ISD::SHL
|| !isa
<ConstantSDNode
>(Shl
.getOperand(1)) ||
14093 Shl
.getConstantOperandVal(1) != 32)
14096 // We if we didn't look through an add/sub, then the shl should have one use.
14097 // If we did look through an add/sub, the sext_inreg we create is free so
14098 // we're only creating 2 new instructions. It's enough to only remove the
14099 // original sra+add/sub.
14100 if (!AddC
&& !Shl
.hasOneUse())
14104 SDValue In
= Shl
.getOperand(0);
14106 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
14109 SDValue ShiftedAddC
=
14110 DAG
.getConstant(AddC
->getAPIntValue().lshr(32), DL
, MVT::i64
);
14112 In
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, In
, ShiftedAddC
);
14114 In
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, ShiftedAddC
, In
);
14117 SDValue SExt
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, In
,
14118 DAG
.getValueType(MVT::i32
));
14122 return DAG
.getNode(
14123 ISD::SHL
, DL
, MVT::i64
, SExt
,
14124 DAG
.getConstant(32 - ShAmt
, DL
, MVT::i64
));
14127 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
14128 // the result is used as the conditon of a br_cc or select_cc we can invert,
14129 // inverting the setcc is free, and Z is 0/1. Caller will invert the
14130 // br_cc/select_cc.
14131 static SDValue
tryDemorganOfBooleanCondition(SDValue Cond
, SelectionDAG
&DAG
) {
14132 bool IsAnd
= Cond
.getOpcode() == ISD::AND
;
14133 if (!IsAnd
&& Cond
.getOpcode() != ISD::OR
)
14136 if (!Cond
.hasOneUse())
14139 SDValue Setcc
= Cond
.getOperand(0);
14140 SDValue Xor
= Cond
.getOperand(1);
14141 // Canonicalize setcc to LHS.
14142 if (Setcc
.getOpcode() != ISD::SETCC
)
14143 std::swap(Setcc
, Xor
);
14144 // LHS should be a setcc and RHS should be an xor.
14145 if (Setcc
.getOpcode() != ISD::SETCC
|| !Setcc
.hasOneUse() ||
14146 Xor
.getOpcode() != ISD::XOR
|| !Xor
.hasOneUse())
14149 // If the condition is an And, SimplifyDemandedBits may have changed
14150 // (xor Z, 1) to (not Z).
14151 SDValue Xor1
= Xor
.getOperand(1);
14152 if (!isOneConstant(Xor1
) && !(IsAnd
&& isAllOnesConstant(Xor1
)))
14155 EVT VT
= Cond
.getValueType();
14156 SDValue Xor0
= Xor
.getOperand(0);
14158 // The LHS of the xor needs to be 0/1.
14159 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
14160 if (!DAG
.MaskedValueIsZero(Xor0
, Mask
))
14163 // We can only invert integer setccs.
14164 EVT SetCCOpVT
= Setcc
.getOperand(0).getValueType();
14165 if (!SetCCOpVT
.isScalarInteger())
14168 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Setcc
.getOperand(2))->get();
14169 if (ISD::isIntEqualitySetCC(CCVal
)) {
14170 CCVal
= ISD::getSetCCInverse(CCVal
, SetCCOpVT
);
14171 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
, Setcc
.getOperand(0),
14172 Setcc
.getOperand(1), CCVal
);
14173 } else if (CCVal
== ISD::SETLT
&& isNullConstant(Setcc
.getOperand(0))) {
14174 // Invert (setlt 0, X) by converting to (setlt X, 1).
14175 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
, Setcc
.getOperand(1),
14176 DAG
.getConstant(1, SDLoc(Setcc
), VT
), CCVal
);
14177 } else if (CCVal
== ISD::SETLT
&& isOneConstant(Setcc
.getOperand(1))) {
14178 // (setlt X, 1) by converting to (setlt 0, X).
14179 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
,
14180 DAG
.getConstant(0, SDLoc(Setcc
), VT
),
14181 Setcc
.getOperand(0), CCVal
);
14185 unsigned Opc
= IsAnd
? ISD::OR
: ISD::AND
;
14186 return DAG
.getNode(Opc
, SDLoc(Cond
), VT
, Setcc
, Xor
.getOperand(0));
14189 // Perform common combines for BR_CC and SELECT_CC condtions.
14190 static bool combine_CC(SDValue
&LHS
, SDValue
&RHS
, SDValue
&CC
, const SDLoc
&DL
,
14191 SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
) {
14192 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
14194 // As far as arithmetic right shift always saves the sign,
14195 // shift can be omitted.
14196 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
14197 // setge (sra X, N), 0 -> setge X, 0
14198 if (isNullConstant(RHS
) && (CCVal
== ISD::SETGE
|| CCVal
== ISD::SETLT
) &&
14199 LHS
.getOpcode() == ISD::SRA
) {
14200 LHS
= LHS
.getOperand(0);
14204 if (!ISD::isIntEqualitySetCC(CCVal
))
14207 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
14208 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
14209 if (LHS
.getOpcode() == ISD::SETCC
&& isNullConstant(RHS
) &&
14210 LHS
.getOperand(0).getValueType() == Subtarget
.getXLenVT()) {
14211 // If we're looking for eq 0 instead of ne 0, we need to invert the
14213 bool Invert
= CCVal
== ISD::SETEQ
;
14214 CCVal
= cast
<CondCodeSDNode
>(LHS
.getOperand(2))->get();
14216 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
14218 RHS
= LHS
.getOperand(1);
14219 LHS
= LHS
.getOperand(0);
14220 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
14222 CC
= DAG
.getCondCode(CCVal
);
14226 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
14227 if (LHS
.getOpcode() == ISD::XOR
&& isNullConstant(RHS
)) {
14228 RHS
= LHS
.getOperand(1);
14229 LHS
= LHS
.getOperand(0);
14233 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
14234 if (isNullConstant(RHS
) && LHS
.getOpcode() == ISD::SRL
&& LHS
.hasOneUse() &&
14235 LHS
.getOperand(1).getOpcode() == ISD::Constant
) {
14236 SDValue LHS0
= LHS
.getOperand(0);
14237 if (LHS0
.getOpcode() == ISD::AND
&&
14238 LHS0
.getOperand(1).getOpcode() == ISD::Constant
) {
14239 uint64_t Mask
= LHS0
.getConstantOperandVal(1);
14240 uint64_t ShAmt
= LHS
.getConstantOperandVal(1);
14241 if (isPowerOf2_64(Mask
) && Log2_64(Mask
) == ShAmt
) {
14242 CCVal
= CCVal
== ISD::SETEQ
? ISD::SETGE
: ISD::SETLT
;
14243 CC
= DAG
.getCondCode(CCVal
);
14245 ShAmt
= LHS
.getValueSizeInBits() - 1 - ShAmt
;
14246 LHS
= LHS0
.getOperand(0);
14249 DAG
.getNode(ISD::SHL
, DL
, LHS
.getValueType(), LHS0
.getOperand(0),
14250 DAG
.getConstant(ShAmt
, DL
, LHS
.getValueType()));
14256 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
14257 // This can occur when legalizing some floating point comparisons.
14258 APInt Mask
= APInt::getBitsSetFrom(LHS
.getValueSizeInBits(), 1);
14259 if (isOneConstant(RHS
) && DAG
.MaskedValueIsZero(LHS
, Mask
)) {
14260 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
14261 CC
= DAG
.getCondCode(CCVal
);
14262 RHS
= DAG
.getConstant(0, DL
, LHS
.getValueType());
14266 if (isNullConstant(RHS
)) {
14267 if (SDValue NewCond
= tryDemorganOfBooleanCondition(LHS
, DAG
)) {
14268 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
14269 CC
= DAG
.getCondCode(CCVal
);
14279 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
14280 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
14281 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
14282 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
14283 static SDValue
tryFoldSelectIntoOp(SDNode
*N
, SelectionDAG
&DAG
,
14284 SDValue TrueVal
, SDValue FalseVal
,
14286 bool Commutative
= true;
14287 unsigned Opc
= TrueVal
.getOpcode();
14295 Commutative
= false;
14303 if (!TrueVal
.hasOneUse() || isa
<ConstantSDNode
>(FalseVal
))
14307 if (FalseVal
== TrueVal
.getOperand(0))
14309 else if (Commutative
&& FalseVal
== TrueVal
.getOperand(1))
14314 EVT VT
= N
->getValueType(0);
14316 SDValue OtherOp
= TrueVal
.getOperand(1 - OpToFold
);
14317 EVT OtherOpVT
= OtherOp
->getValueType(0);
14318 SDValue IdentityOperand
=
14319 DAG
.getNeutralElement(Opc
, DL
, OtherOpVT
, N
->getFlags());
14321 IdentityOperand
= DAG
.getConstant(0, DL
, OtherOpVT
);
14322 assert(IdentityOperand
&& "No identity operand!");
14325 std::swap(OtherOp
, IdentityOperand
);
14327 DAG
.getSelect(DL
, OtherOpVT
, N
->getOperand(0), OtherOp
, IdentityOperand
);
14328 return DAG
.getNode(TrueVal
.getOpcode(), DL
, VT
, FalseVal
, NewSel
);
14331 // This tries to get rid of `select` and `icmp` that are being used to handle
14332 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
14333 static SDValue
foldSelectOfCTTZOrCTLZ(SDNode
*N
, SelectionDAG
&DAG
) {
14334 SDValue Cond
= N
->getOperand(0);
14336 // This represents either CTTZ or CTLZ instruction.
14337 SDValue CountZeroes
;
14341 if (Cond
.getOpcode() != ISD::SETCC
)
14344 if (!isNullConstant(Cond
->getOperand(1)))
14347 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Cond
->getOperand(2))->get();
14348 if (CCVal
== ISD::CondCode::SETEQ
) {
14349 CountZeroes
= N
->getOperand(2);
14350 ValOnZero
= N
->getOperand(1);
14351 } else if (CCVal
== ISD::CondCode::SETNE
) {
14352 CountZeroes
= N
->getOperand(1);
14353 ValOnZero
= N
->getOperand(2);
14358 if (CountZeroes
.getOpcode() == ISD::TRUNCATE
||
14359 CountZeroes
.getOpcode() == ISD::ZERO_EXTEND
)
14360 CountZeroes
= CountZeroes
.getOperand(0);
14362 if (CountZeroes
.getOpcode() != ISD::CTTZ
&&
14363 CountZeroes
.getOpcode() != ISD::CTTZ_ZERO_UNDEF
&&
14364 CountZeroes
.getOpcode() != ISD::CTLZ
&&
14365 CountZeroes
.getOpcode() != ISD::CTLZ_ZERO_UNDEF
)
14368 if (!isNullConstant(ValOnZero
))
14371 SDValue CountZeroesArgument
= CountZeroes
->getOperand(0);
14372 if (Cond
->getOperand(0) != CountZeroesArgument
)
14375 if (CountZeroes
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
) {
14376 CountZeroes
= DAG
.getNode(ISD::CTTZ
, SDLoc(CountZeroes
),
14377 CountZeroes
.getValueType(), CountZeroesArgument
);
14378 } else if (CountZeroes
.getOpcode() == ISD::CTLZ_ZERO_UNDEF
) {
14379 CountZeroes
= DAG
.getNode(ISD::CTLZ
, SDLoc(CountZeroes
),
14380 CountZeroes
.getValueType(), CountZeroesArgument
);
14383 unsigned BitWidth
= CountZeroes
.getValueSizeInBits();
14384 SDValue BitWidthMinusOne
=
14385 DAG
.getConstant(BitWidth
- 1, SDLoc(N
), CountZeroes
.getValueType());
14387 auto AndNode
= DAG
.getNode(ISD::AND
, SDLoc(N
), CountZeroes
.getValueType(),
14388 CountZeroes
, BitWidthMinusOne
);
14389 return DAG
.getZExtOrTrunc(AndNode
, SDLoc(N
), N
->getValueType(0));
14392 static SDValue
useInversedSetcc(SDNode
*N
, SelectionDAG
&DAG
,
14393 const RISCVSubtarget
&Subtarget
) {
14394 SDValue Cond
= N
->getOperand(0);
14395 SDValue True
= N
->getOperand(1);
14396 SDValue False
= N
->getOperand(2);
14398 EVT VT
= N
->getValueType(0);
14399 EVT CondVT
= Cond
.getValueType();
14401 if (Cond
.getOpcode() != ISD::SETCC
|| !Cond
.hasOneUse())
14404 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
14405 // BEXTI, where C is power of 2.
14406 if (Subtarget
.hasStdExtZbs() && VT
.isScalarInteger() &&
14407 (Subtarget
.hasStdExtZicond() || Subtarget
.hasVendorXVentanaCondOps())) {
14408 SDValue LHS
= Cond
.getOperand(0);
14409 SDValue RHS
= Cond
.getOperand(1);
14410 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
14411 if (CC
== ISD::SETEQ
&& LHS
.getOpcode() == ISD::AND
&&
14412 isa
<ConstantSDNode
>(LHS
.getOperand(1)) && isNullConstant(RHS
)) {
14413 uint64_t MaskVal
= LHS
.getConstantOperandVal(1);
14414 if (isPowerOf2_64(MaskVal
) && !isInt
<12>(MaskVal
))
14415 return DAG
.getSelect(DL
, VT
,
14416 DAG
.getSetCC(DL
, CondVT
, LHS
, RHS
, ISD::SETNE
),
14423 static SDValue
performSELECTCombine(SDNode
*N
, SelectionDAG
&DAG
,
14424 const RISCVSubtarget
&Subtarget
) {
14425 if (SDValue Folded
= foldSelectOfCTTZOrCTLZ(N
, DAG
))
14428 if (SDValue V
= useInversedSetcc(N
, DAG
, Subtarget
))
14431 if (Subtarget
.hasShortForwardBranchOpt())
14434 SDValue TrueVal
= N
->getOperand(1);
14435 SDValue FalseVal
= N
->getOperand(2);
14436 if (SDValue V
= tryFoldSelectIntoOp(N
, DAG
, TrueVal
, FalseVal
, /*Swapped*/false))
14438 return tryFoldSelectIntoOp(N
, DAG
, FalseVal
, TrueVal
, /*Swapped*/true);
14441 /// If we have a build_vector where each lane is binop X, C, where C
14442 /// is a constant (but not necessarily the same constant on all lanes),
14443 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
14444 /// We assume that materializing a constant build vector will be no more
14445 /// expensive that performing O(n) binops.
14446 static SDValue
performBUILD_VECTORCombine(SDNode
*N
, SelectionDAG
&DAG
,
14447 const RISCVSubtarget
&Subtarget
,
14448 const RISCVTargetLowering
&TLI
) {
14450 EVT VT
= N
->getValueType(0);
14452 assert(!VT
.isScalableVector() && "unexpected build vector");
14454 if (VT
.getVectorNumElements() == 1)
14457 const unsigned Opcode
= N
->op_begin()->getNode()->getOpcode();
14458 if (!TLI
.isBinOp(Opcode
))
14461 if (!TLI
.isOperationLegalOrCustom(Opcode
, VT
) || !TLI
.isTypeLegal(VT
))
14464 SmallVector
<SDValue
> LHSOps
;
14465 SmallVector
<SDValue
> RHSOps
;
14466 for (SDValue Op
: N
->ops()) {
14467 if (Op
.isUndef()) {
14468 // We can't form a divide or remainder from undef.
14469 if (!DAG
.isSafeToSpeculativelyExecute(Opcode
))
14472 LHSOps
.push_back(Op
);
14473 RHSOps
.push_back(Op
);
14477 // TODO: We can handle operations which have an neutral rhs value
14478 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
14479 // of profit in a more explicit manner.
14480 if (Op
.getOpcode() != Opcode
|| !Op
.hasOneUse())
14483 LHSOps
.push_back(Op
.getOperand(0));
14484 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)) &&
14485 !isa
<ConstantFPSDNode
>(Op
.getOperand(1)))
14487 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
14488 // have different LHS and RHS types.
14489 if (Op
.getOperand(0).getValueType() != Op
.getOperand(1).getValueType())
14491 RHSOps
.push_back(Op
.getOperand(1));
14494 return DAG
.getNode(Opcode
, DL
, VT
, DAG
.getBuildVector(VT
, DL
, LHSOps
),
14495 DAG
.getBuildVector(VT
, DL
, RHSOps
));
14498 static SDValue
performINSERT_VECTOR_ELTCombine(SDNode
*N
, SelectionDAG
&DAG
,
14499 const RISCVSubtarget
&Subtarget
,
14500 const RISCVTargetLowering
&TLI
) {
14501 SDValue InVec
= N
->getOperand(0);
14502 SDValue InVal
= N
->getOperand(1);
14503 SDValue EltNo
= N
->getOperand(2);
14506 EVT VT
= InVec
.getValueType();
14507 if (VT
.isScalableVector())
14510 if (!InVec
.hasOneUse())
14513 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
14514 // move the insert_vector_elts into the arms of the binop. Note that
14515 // the new RHS must be a constant.
14516 const unsigned InVecOpcode
= InVec
->getOpcode();
14517 if (InVecOpcode
== InVal
->getOpcode() && TLI
.isBinOp(InVecOpcode
) &&
14518 InVal
.hasOneUse()) {
14519 SDValue InVecLHS
= InVec
->getOperand(0);
14520 SDValue InVecRHS
= InVec
->getOperand(1);
14521 SDValue InValLHS
= InVal
->getOperand(0);
14522 SDValue InValRHS
= InVal
->getOperand(1);
14524 if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS
.getNode()))
14526 if (!isa
<ConstantSDNode
>(InValRHS
) && !isa
<ConstantFPSDNode
>(InValRHS
))
14528 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
14529 // have different LHS and RHS types.
14530 if (InVec
.getOperand(0).getValueType() != InVec
.getOperand(1).getValueType())
14532 SDValue LHS
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
,
14533 InVecLHS
, InValLHS
, EltNo
);
14534 SDValue RHS
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
,
14535 InVecRHS
, InValRHS
, EltNo
);
14536 return DAG
.getNode(InVecOpcode
, DL
, VT
, LHS
, RHS
);
14539 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
14540 // move the insert_vector_elt to the source operand of the concat_vector.
14541 if (InVec
.getOpcode() != ISD::CONCAT_VECTORS
)
14544 auto *IndexC
= dyn_cast
<ConstantSDNode
>(EltNo
);
14547 unsigned Elt
= IndexC
->getZExtValue();
14549 EVT ConcatVT
= InVec
.getOperand(0).getValueType();
14550 if (ConcatVT
.getVectorElementType() != InVal
.getValueType())
14552 unsigned ConcatNumElts
= ConcatVT
.getVectorNumElements();
14553 SDValue NewIdx
= DAG
.getConstant(Elt
% ConcatNumElts
, DL
,
14554 EltNo
.getValueType());
14556 unsigned ConcatOpIdx
= Elt
/ ConcatNumElts
;
14557 SDValue ConcatOp
= InVec
.getOperand(ConcatOpIdx
);
14558 ConcatOp
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ConcatVT
,
14559 ConcatOp
, InVal
, NewIdx
);
14561 SmallVector
<SDValue
> ConcatOps
;
14562 ConcatOps
.append(InVec
->op_begin(), InVec
->op_end());
14563 ConcatOps
[ConcatOpIdx
] = ConcatOp
;
14564 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VT
, ConcatOps
);
14567 // If we're concatenating a series of vector loads like
14568 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
14569 // Then we can turn this into a strided load by widening the vector elements
14570 // vlse32 p, stride=n
14571 static SDValue
performCONCAT_VECTORSCombine(SDNode
*N
, SelectionDAG
&DAG
,
14572 const RISCVSubtarget
&Subtarget
,
14573 const RISCVTargetLowering
&TLI
) {
14575 EVT VT
= N
->getValueType(0);
14577 // Only perform this combine on legal MVTs.
14578 if (!TLI
.isTypeLegal(VT
))
14581 // TODO: Potentially extend this to scalable vectors
14582 if (VT
.isScalableVector())
14585 auto *BaseLd
= dyn_cast
<LoadSDNode
>(N
->getOperand(0));
14586 if (!BaseLd
|| !BaseLd
->isSimple() || !ISD::isNormalLoad(BaseLd
) ||
14587 !SDValue(BaseLd
, 0).hasOneUse())
14590 EVT BaseLdVT
= BaseLd
->getValueType(0);
14592 // Go through the loads and check that they're strided
14593 SmallVector
<LoadSDNode
*> Lds
;
14594 Lds
.push_back(BaseLd
);
14595 Align Align
= BaseLd
->getAlign();
14596 for (SDValue Op
: N
->ops().drop_front()) {
14597 auto *Ld
= dyn_cast
<LoadSDNode
>(Op
);
14598 if (!Ld
|| !Ld
->isSimple() || !Op
.hasOneUse() ||
14599 Ld
->getChain() != BaseLd
->getChain() || !ISD::isNormalLoad(Ld
) ||
14600 Ld
->getValueType(0) != BaseLdVT
)
14605 // The common alignment is the most restrictive (smallest) of all the loads
14606 Align
= std::min(Align
, Ld
->getAlign());
14609 using PtrDiff
= std::pair
<std::variant
<int64_t, SDValue
>, bool>;
14610 auto GetPtrDiff
= [&DAG
](LoadSDNode
*Ld1
,
14611 LoadSDNode
*Ld2
) -> std::optional
<PtrDiff
> {
14612 // If the load ptrs can be decomposed into a common (Base + Index) with a
14613 // common constant stride, then return the constant stride.
14614 BaseIndexOffset BIO1
= BaseIndexOffset::match(Ld1
, DAG
);
14615 BaseIndexOffset BIO2
= BaseIndexOffset::match(Ld2
, DAG
);
14616 if (BIO1
.equalBaseIndex(BIO2
, DAG
))
14617 return {{BIO2
.getOffset() - BIO1
.getOffset(), false}};
14619 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
14620 SDValue P1
= Ld1
->getBasePtr();
14621 SDValue P2
= Ld2
->getBasePtr();
14622 if (P2
.getOpcode() == ISD::ADD
&& P2
.getOperand(0) == P1
)
14623 return {{P2
.getOperand(1), false}};
14624 if (P1
.getOpcode() == ISD::ADD
&& P1
.getOperand(0) == P2
)
14625 return {{P1
.getOperand(1), true}};
14627 return std::nullopt
;
14630 // Get the distance between the first and second loads
14631 auto BaseDiff
= GetPtrDiff(Lds
[0], Lds
[1]);
14635 // Check all the loads are the same distance apart
14636 for (auto *It
= Lds
.begin() + 1; It
!= Lds
.end() - 1; It
++)
14637 if (GetPtrDiff(*It
, *std::next(It
)) != BaseDiff
)
14640 // TODO: At this point, we've successfully matched a generalized gather
14641 // load. Maybe we should emit that, and then move the specialized
14642 // matchers above and below into a DAG combine?
14644 // Get the widened scalar type, e.g. v4i8 -> i64
14645 unsigned WideScalarBitWidth
=
14646 BaseLdVT
.getScalarSizeInBits() * BaseLdVT
.getVectorNumElements();
14647 MVT WideScalarVT
= MVT::getIntegerVT(WideScalarBitWidth
);
14649 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
14650 MVT WideVecVT
= MVT::getVectorVT(WideScalarVT
, N
->getNumOperands());
14651 if (!TLI
.isTypeLegal(WideVecVT
))
14654 // Check that the operation is legal
14655 if (!TLI
.isLegalStridedLoadStore(WideVecVT
, Align
))
14658 auto [StrideVariant
, MustNegateStride
] = *BaseDiff
;
14659 SDValue Stride
= std::holds_alternative
<SDValue
>(StrideVariant
)
14660 ? std::get
<SDValue
>(StrideVariant
)
14661 : DAG
.getConstant(std::get
<int64_t>(StrideVariant
), DL
,
14662 Lds
[0]->getOffset().getValueType());
14663 if (MustNegateStride
)
14664 Stride
= DAG
.getNegative(Stride
, DL
, Stride
.getValueType());
14666 SDVTList VTs
= DAG
.getVTList({WideVecVT
, MVT::Other
});
14668 DAG
.getTargetConstant(Intrinsic::riscv_masked_strided_load
, DL
,
14669 Subtarget
.getXLenVT());
14671 SDValue AllOneMask
=
14672 DAG
.getSplat(WideVecVT
.changeVectorElementType(MVT::i1
), DL
,
14673 DAG
.getConstant(1, DL
, MVT::i1
));
14675 SDValue Ops
[] = {BaseLd
->getChain(), IntID
, DAG
.getUNDEF(WideVecVT
),
14676 BaseLd
->getBasePtr(), Stride
, AllOneMask
};
14679 if (auto *ConstStride
= dyn_cast
<ConstantSDNode
>(Stride
);
14680 ConstStride
&& ConstStride
->getSExtValue() >= 0)
14681 // total size = (elsize * n) + (stride - elsize) * (n-1)
14682 // = elsize + stride * (n-1)
14683 MemSize
= WideScalarVT
.getSizeInBits() +
14684 ConstStride
->getSExtValue() * (N
->getNumOperands() - 1);
14686 // If Stride isn't constant, then we can't know how much it will load
14687 MemSize
= MemoryLocation::UnknownSize
;
14689 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
14690 BaseLd
->getPointerInfo(), BaseLd
->getMemOperand()->getFlags(), MemSize
,
14693 SDValue StridedLoad
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
,
14694 Ops
, WideVecVT
, MMO
);
14695 for (SDValue Ld
: N
->ops())
14696 DAG
.makeEquivalentMemoryOrdering(cast
<LoadSDNode
>(Ld
), StridedLoad
);
14698 return DAG
.getBitcast(VT
.getSimpleVT(), StridedLoad
);
14701 static SDValue
combineToVWMACC(SDNode
*N
, SelectionDAG
&DAG
,
14702 const RISCVSubtarget
&Subtarget
) {
14703 assert(N
->getOpcode() == RISCVISD::ADD_VL
);
14704 SDValue Addend
= N
->getOperand(0);
14705 SDValue MulOp
= N
->getOperand(1);
14706 SDValue AddMergeOp
= N
->getOperand(2);
14708 if (!AddMergeOp
.isUndef())
14711 auto IsVWMulOpc
= [](unsigned Opc
) {
14713 case RISCVISD::VWMUL_VL
:
14714 case RISCVISD::VWMULU_VL
:
14715 case RISCVISD::VWMULSU_VL
:
14722 if (!IsVWMulOpc(MulOp
.getOpcode()))
14723 std::swap(Addend
, MulOp
);
14725 if (!IsVWMulOpc(MulOp
.getOpcode()))
14728 SDValue MulMergeOp
= MulOp
.getOperand(2);
14730 if (!MulMergeOp
.isUndef())
14733 SDValue AddMask
= N
->getOperand(3);
14734 SDValue AddVL
= N
->getOperand(4);
14735 SDValue MulMask
= MulOp
.getOperand(3);
14736 SDValue MulVL
= MulOp
.getOperand(4);
14738 if (AddMask
!= MulMask
|| AddVL
!= MulVL
)
14741 unsigned Opc
= RISCVISD::VWMACC_VL
+ MulOp
.getOpcode() - RISCVISD::VWMUL_VL
;
14742 static_assert(RISCVISD::VWMACC_VL
+ 1 == RISCVISD::VWMACCU_VL
,
14743 "Unexpected opcode after VWMACC_VL");
14744 static_assert(RISCVISD::VWMACC_VL
+ 2 == RISCVISD::VWMACCSU_VL
,
14745 "Unexpected opcode after VWMACC_VL!");
14746 static_assert(RISCVISD::VWMUL_VL
+ 1 == RISCVISD::VWMULU_VL
,
14747 "Unexpected opcode after VWMUL_VL!");
14748 static_assert(RISCVISD::VWMUL_VL
+ 2 == RISCVISD::VWMULSU_VL
,
14749 "Unexpected opcode after VWMUL_VL!");
14752 EVT VT
= N
->getValueType(0);
14753 SDValue Ops
[] = {MulOp
.getOperand(0), MulOp
.getOperand(1), Addend
, AddMask
,
14755 return DAG
.getNode(Opc
, DL
, VT
, Ops
);
14758 static bool legalizeScatterGatherIndexType(SDLoc DL
, SDValue
&Index
,
14759 ISD::MemIndexType
&IndexType
,
14760 RISCVTargetLowering::DAGCombinerInfo
&DCI
) {
14761 if (!DCI
.isBeforeLegalize())
14764 SelectionDAG
&DAG
= DCI
.DAG
;
14766 DAG
.getMachineFunction().getSubtarget
<RISCVSubtarget
>().getXLenVT();
14768 const EVT IndexVT
= Index
.getValueType();
14770 // RISC-V indexed loads only support the "unsigned unscaled" addressing
14771 // mode, so anything else must be manually legalized.
14772 if (!isIndexTypeSigned(IndexType
))
14775 if (IndexVT
.getVectorElementType().bitsLT(XLenVT
)) {
14776 // Any index legalization should first promote to XLenVT, so we don't lose
14777 // bits when scaling. This may create an illegal index type so we let
14778 // LLVM's legalization take care of the splitting.
14779 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
14780 Index
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
,
14781 IndexVT
.changeVectorElementType(XLenVT
), Index
);
14783 IndexType
= ISD::UNSIGNED_SCALED
;
14787 /// Match the index vector of a scatter or gather node as the shuffle mask
14788 /// which performs the rearrangement if possible. Will only match if
14789 /// all lanes are touched, and thus replacing the scatter or gather with
14790 /// a unit strided access and shuffle is legal.
14791 static bool matchIndexAsShuffle(EVT VT
, SDValue Index
, SDValue Mask
,
14792 SmallVector
<int> &ShuffleMask
) {
14793 if (!ISD::isConstantSplatVectorAllOnes(Mask
.getNode()))
14795 if (!ISD::isBuildVectorOfConstantSDNodes(Index
.getNode()))
14798 const unsigned ElementSize
= VT
.getScalarStoreSize();
14799 const unsigned NumElems
= VT
.getVectorNumElements();
14801 // Create the shuffle mask and check all bits active
14802 assert(ShuffleMask
.empty());
14803 BitVector
ActiveLanes(NumElems
);
14804 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
++) {
14805 // TODO: We've found an active bit of UB, and could be
14806 // more aggressive here if desired.
14807 if (Index
->getOperand(i
)->isUndef())
14809 uint64_t C
= Index
->getConstantOperandVal(i
);
14810 if (C
% ElementSize
!= 0)
14812 C
= C
/ ElementSize
;
14815 ShuffleMask
.push_back(C
);
14816 ActiveLanes
.set(C
);
14818 return ActiveLanes
.all();
14821 /// Match the index of a gather or scatter operation as an operation
14822 /// with twice the element width and half the number of elements. This is
14823 /// generally profitable (if legal) because these operations are linear
14824 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
14825 /// come out ahead.
14826 static bool matchIndexAsWiderOp(EVT VT
, SDValue Index
, SDValue Mask
,
14827 Align BaseAlign
, const RISCVSubtarget
&ST
) {
14828 if (!ISD::isConstantSplatVectorAllOnes(Mask
.getNode()))
14830 if (!ISD::isBuildVectorOfConstantSDNodes(Index
.getNode()))
14833 // Attempt a doubling. If we can use a element type 4x or 8x in
14834 // size, this will happen via multiply iterations of the transform.
14835 const unsigned NumElems
= VT
.getVectorNumElements();
14836 if (NumElems
% 2 != 0)
14839 const unsigned ElementSize
= VT
.getScalarStoreSize();
14840 const unsigned WiderElementSize
= ElementSize
* 2;
14841 if (WiderElementSize
> ST
.getELen()/8)
14844 if (!ST
.hasFastUnalignedAccess() && BaseAlign
< WiderElementSize
)
14847 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
++) {
14848 // TODO: We've found an active bit of UB, and could be
14849 // more aggressive here if desired.
14850 if (Index
->getOperand(i
)->isUndef())
14852 // TODO: This offset check is too strict if we support fully
14853 // misaligned memory operations.
14854 uint64_t C
= Index
->getConstantOperandVal(i
);
14856 if (C
% WiderElementSize
!= 0)
14860 uint64_t Last
= Index
->getConstantOperandVal(i
-1);
14861 if (C
!= Last
+ ElementSize
)
14868 SDValue
RISCVTargetLowering::PerformDAGCombine(SDNode
*N
,
14869 DAGCombinerInfo
&DCI
) const {
14870 SelectionDAG
&DAG
= DCI
.DAG
;
14871 const MVT XLenVT
= Subtarget
.getXLenVT();
14874 // Helper to call SimplifyDemandedBits on an operand of N where only some low
14875 // bits are demanded. N will be added to the Worklist if it was not deleted.
14876 // Caller should return SDValue(N, 0) if this returns true.
14877 auto SimplifyDemandedLowBitsHelper
= [&](unsigned OpNo
, unsigned LowBits
) {
14878 SDValue Op
= N
->getOperand(OpNo
);
14879 APInt Mask
= APInt::getLowBitsSet(Op
.getValueSizeInBits(), LowBits
);
14880 if (!SimplifyDemandedBits(Op
, Mask
, DCI
))
14883 if (N
->getOpcode() != ISD::DELETED_NODE
)
14884 DCI
.AddToWorklist(N
);
14888 switch (N
->getOpcode()) {
14891 case RISCVISD::SplitF64
: {
14892 SDValue Op0
= N
->getOperand(0);
14893 // If the input to SplitF64 is just BuildPairF64 then the operation is
14894 // redundant. Instead, use BuildPairF64's operands directly.
14895 if (Op0
->getOpcode() == RISCVISD::BuildPairF64
)
14896 return DCI
.CombineTo(N
, Op0
.getOperand(0), Op0
.getOperand(1));
14898 if (Op0
->isUndef()) {
14899 SDValue Lo
= DAG
.getUNDEF(MVT::i32
);
14900 SDValue Hi
= DAG
.getUNDEF(MVT::i32
);
14901 return DCI
.CombineTo(N
, Lo
, Hi
);
14904 // It's cheaper to materialise two 32-bit integers than to load a double
14905 // from the constant pool and transfer it to integer registers through the
14907 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(Op0
)) {
14908 APInt V
= C
->getValueAPF().bitcastToAPInt();
14909 SDValue Lo
= DAG
.getConstant(V
.trunc(32), DL
, MVT::i32
);
14910 SDValue Hi
= DAG
.getConstant(V
.lshr(32).trunc(32), DL
, MVT::i32
);
14911 return DCI
.CombineTo(N
, Lo
, Hi
);
14914 // This is a target-specific version of a DAGCombine performed in
14915 // DAGCombiner::visitBITCAST. It performs the equivalent of:
14916 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14917 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14918 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
14919 !Op0
.getNode()->hasOneUse())
14921 SDValue NewSplitF64
=
14922 DAG
.getNode(RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
),
14923 Op0
.getOperand(0));
14924 SDValue Lo
= NewSplitF64
.getValue(0);
14925 SDValue Hi
= NewSplitF64
.getValue(1);
14926 APInt SignBit
= APInt::getSignMask(32);
14927 if (Op0
.getOpcode() == ISD::FNEG
) {
14928 SDValue NewHi
= DAG
.getNode(ISD::XOR
, DL
, MVT::i32
, Hi
,
14929 DAG
.getConstant(SignBit
, DL
, MVT::i32
));
14930 return DCI
.CombineTo(N
, Lo
, NewHi
);
14932 assert(Op0
.getOpcode() == ISD::FABS
);
14933 SDValue NewHi
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, Hi
,
14934 DAG
.getConstant(~SignBit
, DL
, MVT::i32
));
14935 return DCI
.CombineTo(N
, Lo
, NewHi
);
14937 case RISCVISD::SLLW
:
14938 case RISCVISD::SRAW
:
14939 case RISCVISD::SRLW
:
14940 case RISCVISD::RORW
:
14941 case RISCVISD::ROLW
: {
14942 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
14943 if (SimplifyDemandedLowBitsHelper(0, 32) ||
14944 SimplifyDemandedLowBitsHelper(1, 5))
14945 return SDValue(N
, 0);
14949 case RISCVISD::CLZW
:
14950 case RISCVISD::CTZW
: {
14951 // Only the lower 32 bits of the first operand are read
14952 if (SimplifyDemandedLowBitsHelper(0, 32))
14953 return SDValue(N
, 0);
14956 case RISCVISD::FMV_W_X_RV64
: {
14957 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
14958 // conversion is unnecessary and can be replaced with the
14959 // FMV_X_ANYEXTW_RV64 operand.
14960 SDValue Op0
= N
->getOperand(0);
14961 if (Op0
.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
)
14962 return Op0
.getOperand(0);
14965 case RISCVISD::FMV_X_ANYEXTH
:
14966 case RISCVISD::FMV_X_ANYEXTW_RV64
: {
14968 SDValue Op0
= N
->getOperand(0);
14969 MVT VT
= N
->getSimpleValueType(0);
14970 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
14971 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
14972 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
14973 if ((N
->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
&&
14974 Op0
->getOpcode() == RISCVISD::FMV_W_X_RV64
) ||
14975 (N
->getOpcode() == RISCVISD::FMV_X_ANYEXTH
&&
14976 Op0
->getOpcode() == RISCVISD::FMV_H_X
)) {
14977 assert(Op0
.getOperand(0).getValueType() == VT
&&
14978 "Unexpected value type!");
14979 return Op0
.getOperand(0);
14982 // This is a target-specific version of a DAGCombine performed in
14983 // DAGCombiner::visitBITCAST. It performs the equivalent of:
14984 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14985 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14986 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
14987 !Op0
.getNode()->hasOneUse())
14989 SDValue NewFMV
= DAG
.getNode(N
->getOpcode(), DL
, VT
, Op0
.getOperand(0));
14990 unsigned FPBits
= N
->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
? 32 : 16;
14991 APInt SignBit
= APInt::getSignMask(FPBits
).sext(VT
.getSizeInBits());
14992 if (Op0
.getOpcode() == ISD::FNEG
)
14993 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewFMV
,
14994 DAG
.getConstant(SignBit
, DL
, VT
));
14996 assert(Op0
.getOpcode() == ISD::FABS
);
14997 return DAG
.getNode(ISD::AND
, DL
, VT
, NewFMV
,
14998 DAG
.getConstant(~SignBit
, DL
, VT
));
15001 return performADDCombine(N
, DAG
, Subtarget
);
15003 return performSUBCombine(N
, DAG
, Subtarget
);
15005 return performANDCombine(N
, DCI
, Subtarget
);
15007 return performORCombine(N
, DCI
, Subtarget
);
15009 return performXORCombine(N
, DAG
, Subtarget
);
15011 return performMULCombine(N
, DAG
);
15018 case ISD::FMINNUM
: {
15019 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
15021 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
15026 return performSETCCCombine(N
, DAG
, Subtarget
);
15027 case ISD::SIGN_EXTEND_INREG
:
15028 return performSIGN_EXTEND_INREGCombine(N
, DAG
, Subtarget
);
15029 case ISD::ZERO_EXTEND
:
15030 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
15031 // type legalization. This is safe because fp_to_uint produces poison if
15033 if (N
->getValueType(0) == MVT::i64
&& Subtarget
.is64Bit()) {
15034 SDValue Src
= N
->getOperand(0);
15035 if (Src
.getOpcode() == ISD::FP_TO_UINT
&&
15036 isTypeLegal(Src
.getOperand(0).getValueType()))
15037 return DAG
.getNode(ISD::FP_TO_UINT
, SDLoc(N
), MVT::i64
,
15038 Src
.getOperand(0));
15039 if (Src
.getOpcode() == ISD::STRICT_FP_TO_UINT
&& Src
.hasOneUse() &&
15040 isTypeLegal(Src
.getOperand(1).getValueType())) {
15041 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Other
);
15042 SDValue Res
= DAG
.getNode(ISD::STRICT_FP_TO_UINT
, SDLoc(N
), VTs
,
15043 Src
.getOperand(0), Src
.getOperand(1));
15044 DCI
.CombineTo(N
, Res
);
15045 DAG
.ReplaceAllUsesOfValueWith(Src
.getValue(1), Res
.getValue(1));
15046 DCI
.recursivelyDeleteUnusedNodes(Src
.getNode());
15047 return SDValue(N
, 0); // Return N so it doesn't get rechecked.
15051 case RISCVISD::TRUNCATE_VECTOR_VL
: {
15052 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
15053 // This would be benefit for the cases where X and Y are both the same value
15054 // type of low precision vectors. Since the truncate would be lowered into
15055 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
15056 // restriction, such pattern would be expanded into a series of "vsetvli"
15057 // and "vnsrl" instructions later to reach this point.
15058 auto IsTruncNode
= [](SDValue V
) {
15059 if (V
.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL
)
15061 SDValue VL
= V
.getOperand(2);
15062 auto *C
= dyn_cast
<ConstantSDNode
>(VL
);
15063 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
15064 bool IsVLMAXForVMSET
= (C
&& C
->isAllOnes()) ||
15065 (isa
<RegisterSDNode
>(VL
) &&
15066 cast
<RegisterSDNode
>(VL
)->getReg() == RISCV::X0
);
15067 return V
.getOperand(1).getOpcode() == RISCVISD::VMSET_VL
&&
15071 SDValue Op
= N
->getOperand(0);
15073 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
15074 // to distinguish such pattern.
15075 while (IsTruncNode(Op
)) {
15076 if (!Op
.hasOneUse())
15078 Op
= Op
.getOperand(0);
15081 if (Op
.getOpcode() == ISD::SRA
&& Op
.hasOneUse()) {
15082 SDValue N0
= Op
.getOperand(0);
15083 SDValue N1
= Op
.getOperand(1);
15084 if (N0
.getOpcode() == ISD::SIGN_EXTEND
&& N0
.hasOneUse() &&
15085 N1
.getOpcode() == ISD::ZERO_EXTEND
&& N1
.hasOneUse()) {
15086 SDValue N00
= N0
.getOperand(0);
15087 SDValue N10
= N1
.getOperand(0);
15088 if (N00
.getValueType().isVector() &&
15089 N00
.getValueType() == N10
.getValueType() &&
15090 N
->getValueType(0) == N10
.getValueType()) {
15091 unsigned MaxShAmt
= N10
.getValueType().getScalarSizeInBits() - 1;
15092 SDValue SMin
= DAG
.getNode(
15093 ISD::SMIN
, SDLoc(N1
), N
->getValueType(0), N10
,
15094 DAG
.getConstant(MaxShAmt
, SDLoc(N1
), N
->getValueType(0)));
15095 return DAG
.getNode(ISD::SRA
, SDLoc(N
), N
->getValueType(0), N00
, SMin
);
15101 case ISD::TRUNCATE
:
15102 return performTRUNCATECombine(N
, DAG
, Subtarget
);
15104 return performSELECTCombine(N
, DAG
, Subtarget
);
15105 case RISCVISD::CZERO_EQZ
:
15106 case RISCVISD::CZERO_NEZ
:
15107 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
15108 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
15109 if (N
->getOperand(1).getOpcode() == ISD::XOR
&&
15110 isOneConstant(N
->getOperand(1).getOperand(1))) {
15111 SDValue Cond
= N
->getOperand(1).getOperand(0);
15112 APInt Mask
= APInt::getBitsSetFrom(Cond
.getValueSizeInBits(), 1);
15113 if (DAG
.MaskedValueIsZero(Cond
, Mask
)) {
15114 unsigned NewOpc
= N
->getOpcode() == RISCVISD::CZERO_EQZ
15115 ? RISCVISD::CZERO_NEZ
15116 : RISCVISD::CZERO_EQZ
;
15117 return DAG
.getNode(NewOpc
, SDLoc(N
), N
->getValueType(0),
15118 N
->getOperand(0), Cond
);
15123 case RISCVISD::SELECT_CC
: {
15125 SDValue LHS
= N
->getOperand(0);
15126 SDValue RHS
= N
->getOperand(1);
15127 SDValue CC
= N
->getOperand(2);
15128 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
15129 SDValue TrueV
= N
->getOperand(3);
15130 SDValue FalseV
= N
->getOperand(4);
15132 EVT VT
= N
->getValueType(0);
15134 // If the True and False values are the same, we don't need a select_cc.
15135 if (TrueV
== FalseV
)
15138 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
15139 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
15140 if (!Subtarget
.hasShortForwardBranchOpt() && isa
<ConstantSDNode
>(TrueV
) &&
15141 isa
<ConstantSDNode
>(FalseV
) && isNullConstant(RHS
) &&
15142 (CCVal
== ISD::CondCode::SETLT
|| CCVal
== ISD::CondCode::SETGE
)) {
15143 if (CCVal
== ISD::CondCode::SETGE
)
15144 std::swap(TrueV
, FalseV
);
15146 int64_t TrueSImm
= cast
<ConstantSDNode
>(TrueV
)->getSExtValue();
15147 int64_t FalseSImm
= cast
<ConstantSDNode
>(FalseV
)->getSExtValue();
15148 // Only handle simm12, if it is not in this range, it can be considered as
15150 if (isInt
<12>(TrueSImm
) && isInt
<12>(FalseSImm
) &&
15151 isInt
<12>(TrueSImm
- FalseSImm
)) {
15153 DAG
.getNode(ISD::SRA
, DL
, VT
, LHS
,
15154 DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
));
15156 DAG
.getNode(ISD::AND
, DL
, VT
, SRA
,
15157 DAG
.getConstant(TrueSImm
- FalseSImm
, DL
, VT
));
15158 return DAG
.getNode(ISD::ADD
, DL
, VT
, AND
, FalseV
);
15161 if (CCVal
== ISD::CondCode::SETGE
)
15162 std::swap(TrueV
, FalseV
);
15165 if (combine_CC(LHS
, RHS
, CC
, DL
, DAG
, Subtarget
))
15166 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, N
->getValueType(0),
15167 {LHS
, RHS
, CC
, TrueV
, FalseV
});
15169 if (!Subtarget
.hasShortForwardBranchOpt()) {
15170 // (select c, -1, y) -> -c | y
15171 if (isAllOnesConstant(TrueV
)) {
15172 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, CCVal
);
15173 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
15174 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, FalseV
);
15176 // (select c, y, -1) -> -!c | y
15177 if (isAllOnesConstant(FalseV
)) {
15179 DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::getSetCCInverse(CCVal
, VT
));
15180 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
15181 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, TrueV
);
15184 // (select c, 0, y) -> -!c & y
15185 if (isNullConstant(TrueV
)) {
15187 DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::getSetCCInverse(CCVal
, VT
));
15188 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
15189 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, FalseV
);
15191 // (select c, y, 0) -> -c & y
15192 if (isNullConstant(FalseV
)) {
15193 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, CCVal
);
15194 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
15195 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, TrueV
);
15197 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
15198 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
15199 if (((isOneConstant(FalseV
) && LHS
== TrueV
&&
15200 CCVal
== ISD::CondCode::SETNE
) ||
15201 (isOneConstant(TrueV
) && LHS
== FalseV
&&
15202 CCVal
== ISD::CondCode::SETEQ
)) &&
15203 isNullConstant(RHS
)) {
15204 // freeze it to be safe.
15205 LHS
= DAG
.getFreeze(LHS
);
15206 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::CondCode::SETEQ
);
15207 return DAG
.getNode(ISD::ADD
, DL
, VT
, LHS
, C
);
15211 // If both true/false are an xor with 1, pull through the select.
15212 // This can occur after op legalization if both operands are setccs that
15213 // require an xor to invert.
15214 // FIXME: Generalize to other binary ops with identical operand?
15215 if (TrueV
.getOpcode() == ISD::XOR
&& FalseV
.getOpcode() == ISD::XOR
&&
15216 TrueV
.getOperand(1) == FalseV
.getOperand(1) &&
15217 isOneConstant(TrueV
.getOperand(1)) &&
15218 TrueV
.hasOneUse() && FalseV
.hasOneUse()) {
15219 SDValue NewSel
= DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, CC
,
15220 TrueV
.getOperand(0), FalseV
.getOperand(0));
15221 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewSel
, TrueV
.getOperand(1));
15226 case RISCVISD::BR_CC
: {
15227 SDValue LHS
= N
->getOperand(1);
15228 SDValue RHS
= N
->getOperand(2);
15229 SDValue CC
= N
->getOperand(3);
15232 if (combine_CC(LHS
, RHS
, CC
, DL
, DAG
, Subtarget
))
15233 return DAG
.getNode(RISCVISD::BR_CC
, DL
, N
->getValueType(0),
15234 N
->getOperand(0), LHS
, RHS
, CC
, N
->getOperand(4));
15238 case ISD::BITREVERSE
:
15239 return performBITREVERSECombine(N
, DAG
, Subtarget
);
15240 case ISD::FP_TO_SINT
:
15241 case ISD::FP_TO_UINT
:
15242 return performFP_TO_INTCombine(N
, DCI
, Subtarget
);
15243 case ISD::FP_TO_SINT_SAT
:
15244 case ISD::FP_TO_UINT_SAT
:
15245 return performFP_TO_INT_SATCombine(N
, DCI
, Subtarget
);
15246 case ISD::FCOPYSIGN
: {
15247 EVT VT
= N
->getValueType(0);
15248 if (!VT
.isVector())
15250 // There is a form of VFSGNJ which injects the negated sign of its second
15251 // operand. Try and bubble any FNEG up after the extend/round to produce
15252 // this optimized pattern. Avoid modifying cases where FP_ROUND and
15254 SDValue In2
= N
->getOperand(1);
15255 // Avoid cases where the extend/round has multiple uses, as duplicating
15256 // those is typically more expensive than removing a fneg.
15257 if (!In2
.hasOneUse())
15259 if (In2
.getOpcode() != ISD::FP_EXTEND
&&
15260 (In2
.getOpcode() != ISD::FP_ROUND
|| In2
.getConstantOperandVal(1) != 0))
15262 In2
= In2
.getOperand(0);
15263 if (In2
.getOpcode() != ISD::FNEG
)
15266 SDValue NewFPExtRound
= DAG
.getFPExtendOrRound(In2
.getOperand(0), DL
, VT
);
15267 return DAG
.getNode(ISD::FCOPYSIGN
, DL
, VT
, N
->getOperand(0),
15268 DAG
.getNode(ISD::FNEG
, DL
, VT
, NewFPExtRound
));
15270 case ISD::MGATHER
: {
15271 const auto *MGN
= dyn_cast
<MaskedGatherSDNode
>(N
);
15272 const EVT VT
= N
->getValueType(0);
15273 SDValue Index
= MGN
->getIndex();
15274 SDValue ScaleOp
= MGN
->getScale();
15275 ISD::MemIndexType IndexType
= MGN
->getIndexType();
15276 assert(!MGN
->isIndexScaled() &&
15277 "Scaled gather/scatter should not be formed");
15280 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
15281 return DAG
.getMaskedGather(
15282 N
->getVTList(), MGN
->getMemoryVT(), DL
,
15283 {MGN
->getChain(), MGN
->getPassThru(), MGN
->getMask(),
15284 MGN
->getBasePtr(), Index
, ScaleOp
},
15285 MGN
->getMemOperand(), IndexType
, MGN
->getExtensionType());
15287 if (narrowIndex(Index
, IndexType
, DAG
))
15288 return DAG
.getMaskedGather(
15289 N
->getVTList(), MGN
->getMemoryVT(), DL
,
15290 {MGN
->getChain(), MGN
->getPassThru(), MGN
->getMask(),
15291 MGN
->getBasePtr(), Index
, ScaleOp
},
15292 MGN
->getMemOperand(), IndexType
, MGN
->getExtensionType());
15294 if (Index
.getOpcode() == ISD::BUILD_VECTOR
&&
15295 MGN
->getExtensionType() == ISD::NON_EXTLOAD
) {
15296 if (std::optional
<VIDSequence
> SimpleVID
= isSimpleVIDSequence(Index
);
15297 SimpleVID
&& SimpleVID
->StepDenominator
== 1) {
15298 const int64_t StepNumerator
= SimpleVID
->StepNumerator
;
15299 const int64_t Addend
= SimpleVID
->Addend
;
15301 // Note: We don't need to check alignment here since (by assumption
15302 // from the existance of the gather), our offsets must be sufficiently
15305 const EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
15306 assert(MGN
->getBasePtr()->getValueType(0) == PtrVT
);
15307 assert(IndexType
== ISD::UNSIGNED_SCALED
);
15308 SDValue BasePtr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, MGN
->getBasePtr(),
15309 DAG
.getConstant(Addend
, DL
, PtrVT
));
15311 SDVTList VTs
= DAG
.getVTList({VT
, MVT::Other
});
15313 DAG
.getTargetConstant(Intrinsic::riscv_masked_strided_load
, DL
,
15316 {MGN
->getChain(), IntID
, MGN
->getPassThru(), BasePtr
,
15317 DAG
.getConstant(StepNumerator
, DL
, XLenVT
), MGN
->getMask()};
15318 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
,
15319 Ops
, VT
, MGN
->getMemOperand());
15323 SmallVector
<int> ShuffleMask
;
15324 if (MGN
->getExtensionType() == ISD::NON_EXTLOAD
&&
15325 matchIndexAsShuffle(VT
, Index
, MGN
->getMask(), ShuffleMask
)) {
15326 SDValue Load
= DAG
.getMaskedLoad(VT
, DL
, MGN
->getChain(),
15327 MGN
->getBasePtr(), DAG
.getUNDEF(XLenVT
),
15328 MGN
->getMask(), DAG
.getUNDEF(VT
),
15329 MGN
->getMemoryVT(), MGN
->getMemOperand(),
15330 ISD::UNINDEXED
, ISD::NON_EXTLOAD
);
15332 DAG
.getVectorShuffle(VT
, DL
, Load
, DAG
.getUNDEF(VT
), ShuffleMask
);
15333 return DAG
.getMergeValues({Shuffle
, Load
.getValue(1)}, DL
);
15336 if (MGN
->getExtensionType() == ISD::NON_EXTLOAD
&&
15337 matchIndexAsWiderOp(VT
, Index
, MGN
->getMask(),
15338 MGN
->getMemOperand()->getBaseAlign(), Subtarget
)) {
15339 SmallVector
<SDValue
> NewIndices
;
15340 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
+= 2)
15341 NewIndices
.push_back(Index
.getOperand(i
));
15342 EVT IndexVT
= Index
.getValueType()
15343 .getHalfNumVectorElementsVT(*DAG
.getContext());
15344 Index
= DAG
.getBuildVector(IndexVT
, DL
, NewIndices
);
15346 unsigned ElementSize
= VT
.getScalarStoreSize();
15347 EVT WideScalarVT
= MVT::getIntegerVT(ElementSize
* 8 * 2);
15348 auto EltCnt
= VT
.getVectorElementCount();
15349 assert(EltCnt
.isKnownEven() && "Splitting vector, but not in half!");
15350 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), WideScalarVT
,
15351 EltCnt
.divideCoefficientBy(2));
15352 SDValue Passthru
= DAG
.getBitcast(WideVT
, MGN
->getPassThru());
15353 EVT MaskVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::i1
,
15354 EltCnt
.divideCoefficientBy(2));
15355 SDValue Mask
= DAG
.getSplat(MaskVT
, DL
, DAG
.getConstant(1, DL
, MVT::i1
));
15358 DAG
.getMaskedGather(DAG
.getVTList(WideVT
, MVT::Other
), WideVT
, DL
,
15359 {MGN
->getChain(), Passthru
, Mask
, MGN
->getBasePtr(),
15361 MGN
->getMemOperand(), IndexType
, ISD::NON_EXTLOAD
);
15362 SDValue Result
= DAG
.getBitcast(VT
, Gather
.getValue(0));
15363 return DAG
.getMergeValues({Result
, Gather
.getValue(1)}, DL
);
15367 case ISD::MSCATTER
:{
15368 const auto *MSN
= dyn_cast
<MaskedScatterSDNode
>(N
);
15369 SDValue Index
= MSN
->getIndex();
15370 SDValue ScaleOp
= MSN
->getScale();
15371 ISD::MemIndexType IndexType
= MSN
->getIndexType();
15372 assert(!MSN
->isIndexScaled() &&
15373 "Scaled gather/scatter should not be formed");
15376 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
15377 return DAG
.getMaskedScatter(
15378 N
->getVTList(), MSN
->getMemoryVT(), DL
,
15379 {MSN
->getChain(), MSN
->getValue(), MSN
->getMask(), MSN
->getBasePtr(),
15381 MSN
->getMemOperand(), IndexType
, MSN
->isTruncatingStore());
15383 if (narrowIndex(Index
, IndexType
, DAG
))
15384 return DAG
.getMaskedScatter(
15385 N
->getVTList(), MSN
->getMemoryVT(), DL
,
15386 {MSN
->getChain(), MSN
->getValue(), MSN
->getMask(), MSN
->getBasePtr(),
15388 MSN
->getMemOperand(), IndexType
, MSN
->isTruncatingStore());
15390 EVT VT
= MSN
->getValue()->getValueType(0);
15391 SmallVector
<int> ShuffleMask
;
15392 if (!MSN
->isTruncatingStore() &&
15393 matchIndexAsShuffle(VT
, Index
, MSN
->getMask(), ShuffleMask
)) {
15394 SDValue Shuffle
= DAG
.getVectorShuffle(VT
, DL
, MSN
->getValue(),
15395 DAG
.getUNDEF(VT
), ShuffleMask
);
15396 return DAG
.getMaskedStore(MSN
->getChain(), DL
, Shuffle
, MSN
->getBasePtr(),
15397 DAG
.getUNDEF(XLenVT
), MSN
->getMask(),
15398 MSN
->getMemoryVT(), MSN
->getMemOperand(),
15399 ISD::UNINDEXED
, false);
15403 case ISD::VP_GATHER
: {
15404 const auto *VPGN
= dyn_cast
<VPGatherSDNode
>(N
);
15405 SDValue Index
= VPGN
->getIndex();
15406 SDValue ScaleOp
= VPGN
->getScale();
15407 ISD::MemIndexType IndexType
= VPGN
->getIndexType();
15408 assert(!VPGN
->isIndexScaled() &&
15409 "Scaled gather/scatter should not be formed");
15412 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
15413 return DAG
.getGatherVP(N
->getVTList(), VPGN
->getMemoryVT(), DL
,
15414 {VPGN
->getChain(), VPGN
->getBasePtr(), Index
,
15415 ScaleOp
, VPGN
->getMask(),
15416 VPGN
->getVectorLength()},
15417 VPGN
->getMemOperand(), IndexType
);
15419 if (narrowIndex(Index
, IndexType
, DAG
))
15420 return DAG
.getGatherVP(N
->getVTList(), VPGN
->getMemoryVT(), DL
,
15421 {VPGN
->getChain(), VPGN
->getBasePtr(), Index
,
15422 ScaleOp
, VPGN
->getMask(),
15423 VPGN
->getVectorLength()},
15424 VPGN
->getMemOperand(), IndexType
);
15428 case ISD::VP_SCATTER
: {
15429 const auto *VPSN
= dyn_cast
<VPScatterSDNode
>(N
);
15430 SDValue Index
= VPSN
->getIndex();
15431 SDValue ScaleOp
= VPSN
->getScale();
15432 ISD::MemIndexType IndexType
= VPSN
->getIndexType();
15433 assert(!VPSN
->isIndexScaled() &&
15434 "Scaled gather/scatter should not be formed");
15437 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
15438 return DAG
.getScatterVP(N
->getVTList(), VPSN
->getMemoryVT(), DL
,
15439 {VPSN
->getChain(), VPSN
->getValue(),
15440 VPSN
->getBasePtr(), Index
, ScaleOp
,
15441 VPSN
->getMask(), VPSN
->getVectorLength()},
15442 VPSN
->getMemOperand(), IndexType
);
15444 if (narrowIndex(Index
, IndexType
, DAG
))
15445 return DAG
.getScatterVP(N
->getVTList(), VPSN
->getMemoryVT(), DL
,
15446 {VPSN
->getChain(), VPSN
->getValue(),
15447 VPSN
->getBasePtr(), Index
, ScaleOp
,
15448 VPSN
->getMask(), VPSN
->getVectorLength()},
15449 VPSN
->getMemOperand(), IndexType
);
15452 case RISCVISD::SRA_VL
:
15453 case RISCVISD::SRL_VL
:
15454 case RISCVISD::SHL_VL
: {
15455 SDValue ShAmt
= N
->getOperand(1);
15456 if (ShAmt
.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
) {
15457 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
15459 SDValue VL
= N
->getOperand(4);
15460 EVT VT
= N
->getValueType(0);
15461 ShAmt
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
15462 ShAmt
.getOperand(1), VL
);
15463 return DAG
.getNode(N
->getOpcode(), DL
, VT
, N
->getOperand(0), ShAmt
,
15464 N
->getOperand(2), N
->getOperand(3), N
->getOperand(4));
15469 if (SDValue V
= performSRACombine(N
, DAG
, Subtarget
))
15474 SDValue ShAmt
= N
->getOperand(1);
15475 if (ShAmt
.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
) {
15476 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
15478 EVT VT
= N
->getValueType(0);
15479 ShAmt
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
15480 ShAmt
.getOperand(1),
15481 DAG
.getRegister(RISCV::X0
, Subtarget
.getXLenVT()));
15482 return DAG
.getNode(N
->getOpcode(), DL
, VT
, N
->getOperand(0), ShAmt
);
15486 case RISCVISD::ADD_VL
:
15487 if (SDValue V
= combineBinOp_VLToVWBinOp_VL(N
, DCI
))
15489 return combineToVWMACC(N
, DAG
, Subtarget
);
15490 case RISCVISD::SUB_VL
:
15491 case RISCVISD::VWADD_W_VL
:
15492 case RISCVISD::VWADDU_W_VL
:
15493 case RISCVISD::VWSUB_W_VL
:
15494 case RISCVISD::VWSUBU_W_VL
:
15495 case RISCVISD::MUL_VL
:
15496 return combineBinOp_VLToVWBinOp_VL(N
, DCI
);
15497 case RISCVISD::VFMADD_VL
:
15498 case RISCVISD::VFNMADD_VL
:
15499 case RISCVISD::VFMSUB_VL
:
15500 case RISCVISD::VFNMSUB_VL
:
15501 case RISCVISD::STRICT_VFMADD_VL
:
15502 case RISCVISD::STRICT_VFNMADD_VL
:
15503 case RISCVISD::STRICT_VFMSUB_VL
:
15504 case RISCVISD::STRICT_VFNMSUB_VL
:
15505 return performVFMADD_VLCombine(N
, DAG
, Subtarget
);
15506 case RISCVISD::FMUL_VL
:
15507 return performVFMUL_VLCombine(N
, DAG
, Subtarget
);
15508 case RISCVISD::FADD_VL
:
15509 case RISCVISD::FSUB_VL
:
15510 return performFADDSUB_VLCombine(N
, DAG
, Subtarget
);
15513 if (DCI
.isAfterLegalizeDAG())
15514 if (SDValue V
= performMemPairCombine(N
, DCI
))
15517 if (N
->getOpcode() != ISD::STORE
)
15520 auto *Store
= cast
<StoreSDNode
>(N
);
15521 SDValue Chain
= Store
->getChain();
15522 EVT MemVT
= Store
->getMemoryVT();
15523 SDValue Val
= Store
->getValue();
15526 bool IsScalarizable
=
15527 MemVT
.isFixedLengthVector() && ISD::isNormalStore(Store
) &&
15528 Store
->isSimple() &&
15529 MemVT
.getVectorElementType().bitsLE(Subtarget
.getXLenVT()) &&
15530 isPowerOf2_64(MemVT
.getSizeInBits()) &&
15531 MemVT
.getSizeInBits() <= Subtarget
.getXLen();
15533 // If sufficiently aligned we can scalarize stores of constant vectors of
15534 // any power-of-two size up to XLen bits, provided that they aren't too
15535 // expensive to materialize.
15536 // vsetivli zero, 2, e8, m1, ta, ma
15538 // vse64.v v8, (a0)
15542 if (DCI
.isBeforeLegalize() && IsScalarizable
&&
15543 ISD::isBuildVectorOfConstantSDNodes(Val
.getNode())) {
15544 // Get the constant vector bits
15545 APInt
NewC(Val
.getValueSizeInBits(), 0);
15546 uint64_t EltSize
= Val
.getScalarValueSizeInBits();
15547 for (unsigned i
= 0; i
< Val
.getNumOperands(); i
++) {
15548 if (Val
.getOperand(i
).isUndef())
15550 NewC
.insertBits(Val
.getConstantOperandAPInt(i
).trunc(EltSize
),
15553 MVT NewVT
= MVT::getIntegerVT(MemVT
.getSizeInBits());
15555 if (RISCVMatInt::getIntMatCost(NewC
, Subtarget
.getXLen(), Subtarget
,
15557 allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
15558 NewVT
, *Store
->getMemOperand())) {
15559 SDValue NewV
= DAG
.getConstant(NewC
, DL
, NewVT
);
15560 return DAG
.getStore(Chain
, DL
, NewV
, Store
->getBasePtr(),
15561 Store
->getPointerInfo(), Store
->getOriginalAlign(),
15562 Store
->getMemOperand()->getFlags());
15566 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
15567 // vsetivli zero, 2, e16, m1, ta, ma
15568 // vle16.v v8, (a0)
15569 // vse16.v v8, (a1)
15570 if (auto *L
= dyn_cast
<LoadSDNode
>(Val
);
15571 L
&& DCI
.isBeforeLegalize() && IsScalarizable
&& L
->isSimple() &&
15572 L
->hasNUsesOfValue(1, 0) && L
->hasNUsesOfValue(1, 1) &&
15573 Store
->getChain() == SDValue(L
, 1) && ISD::isNormalLoad(L
) &&
15574 L
->getMemoryVT() == MemVT
) {
15575 MVT NewVT
= MVT::getIntegerVT(MemVT
.getSizeInBits());
15576 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
15577 NewVT
, *Store
->getMemOperand()) &&
15578 allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
15579 NewVT
, *L
->getMemOperand())) {
15580 SDValue NewL
= DAG
.getLoad(NewVT
, DL
, L
->getChain(), L
->getBasePtr(),
15581 L
->getPointerInfo(), L
->getOriginalAlign(),
15582 L
->getMemOperand()->getFlags());
15583 return DAG
.getStore(Chain
, DL
, NewL
, Store
->getBasePtr(),
15584 Store
->getPointerInfo(), Store
->getOriginalAlign(),
15585 Store
->getMemOperand()->getFlags());
15589 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
15590 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
15591 // any illegal types.
15592 if (Val
.getOpcode() == RISCVISD::VMV_X_S
||
15593 (DCI
.isAfterLegalizeDAG() &&
15594 Val
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
15595 isNullConstant(Val
.getOperand(1)))) {
15596 SDValue Src
= Val
.getOperand(0);
15597 MVT VecVT
= Src
.getSimpleValueType();
15598 // VecVT should be scalable and memory VT should match the element type.
15599 if (!Store
->isIndexed() && VecVT
.isScalableVector() &&
15600 MemVT
== VecVT
.getVectorElementType()) {
15602 MVT MaskVT
= getMaskTypeFor(VecVT
);
15603 return DAG
.getStoreVP(
15604 Store
->getChain(), DL
, Src
, Store
->getBasePtr(), Store
->getOffset(),
15605 DAG
.getConstant(1, DL
, MaskVT
),
15606 DAG
.getConstant(1, DL
, Subtarget
.getXLenVT()), MemVT
,
15607 Store
->getMemOperand(), Store
->getAddressingMode(),
15608 Store
->isTruncatingStore(), /*IsCompress*/ false);
15614 case ISD::SPLAT_VECTOR
: {
15615 EVT VT
= N
->getValueType(0);
15616 // Only perform this combine on legal MVT types.
15617 if (!isTypeLegal(VT
))
15619 if (auto Gather
= matchSplatAsGather(N
->getOperand(0), VT
.getSimpleVT(), N
,
15624 case ISD::BUILD_VECTOR
:
15625 if (SDValue V
= performBUILD_VECTORCombine(N
, DAG
, Subtarget
, *this))
15628 case ISD::CONCAT_VECTORS
:
15629 if (SDValue V
= performCONCAT_VECTORSCombine(N
, DAG
, Subtarget
, *this))
15632 case ISD::INSERT_VECTOR_ELT
:
15633 if (SDValue V
= performINSERT_VECTOR_ELTCombine(N
, DAG
, Subtarget
, *this))
15636 case RISCVISD::VFMV_V_F_VL
: {
15637 const MVT VT
= N
->getSimpleValueType(0);
15638 SDValue Passthru
= N
->getOperand(0);
15639 SDValue Scalar
= N
->getOperand(1);
15640 SDValue VL
= N
->getOperand(2);
15642 // If VL is 1, we can use vfmv.s.f.
15643 if (isOneConstant(VL
))
15644 return DAG
.getNode(RISCVISD::VFMV_S_F_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15647 case RISCVISD::VMV_V_X_VL
: {
15648 const MVT VT
= N
->getSimpleValueType(0);
15649 SDValue Passthru
= N
->getOperand(0);
15650 SDValue Scalar
= N
->getOperand(1);
15651 SDValue VL
= N
->getOperand(2);
15653 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
15655 unsigned ScalarSize
= Scalar
.getValueSizeInBits();
15656 unsigned EltWidth
= VT
.getScalarSizeInBits();
15657 if (ScalarSize
> EltWidth
&& Passthru
.isUndef())
15658 if (SimplifyDemandedLowBitsHelper(1, EltWidth
))
15659 return SDValue(N
, 0);
15661 // If VL is 1 and the scalar value won't benefit from immediate, we can
15663 ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(Scalar
);
15664 if (isOneConstant(VL
) &&
15665 (!Const
|| Const
->isZero() ||
15666 !Const
->getAPIntValue().sextOrTrunc(EltWidth
).isSignedIntN(5)))
15667 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15671 case RISCVISD::VFMV_S_F_VL
: {
15672 SDValue Src
= N
->getOperand(1);
15673 // Try to remove vector->scalar->vector if the scalar->vector is inserting
15674 // into an undef vector.
15675 // TODO: Could use a vslide or vmv.v.v for non-undef.
15676 if (N
->getOperand(0).isUndef() &&
15677 Src
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
15678 isNullConstant(Src
.getOperand(1)) &&
15679 Src
.getOperand(0).getValueType().isScalableVector()) {
15680 EVT VT
= N
->getValueType(0);
15681 EVT SrcVT
= Src
.getOperand(0).getValueType();
15682 assert(SrcVT
.getVectorElementType() == VT
.getVectorElementType());
15683 // Widths match, just return the original vector.
15685 return Src
.getOperand(0);
15686 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
15690 case RISCVISD::VMV_S_X_VL
: {
15691 const MVT VT
= N
->getSimpleValueType(0);
15692 SDValue Passthru
= N
->getOperand(0);
15693 SDValue Scalar
= N
->getOperand(1);
15694 SDValue VL
= N
->getOperand(2);
15696 // Use M1 or smaller to avoid over constraining register allocation
15697 const MVT M1VT
= getLMUL1VT(VT
);
15698 if (M1VT
.bitsLT(VT
)) {
15699 SDValue M1Passthru
=
15700 DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, M1VT
, Passthru
,
15701 DAG
.getVectorIdxConstant(0, DL
));
15703 DAG
.getNode(N
->getOpcode(), DL
, M1VT
, M1Passthru
, Scalar
, VL
);
15704 Result
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Passthru
, Result
,
15705 DAG
.getConstant(0, DL
, XLenVT
));
15709 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
15710 // higher would involve overly constraining the register allocator for
15712 if (ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(Scalar
);
15713 Const
&& !Const
->isZero() && isInt
<5>(Const
->getSExtValue()) &&
15714 VT
.bitsLE(getLMUL1VT(VT
)) && Passthru
.isUndef())
15715 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15719 case ISD::INTRINSIC_VOID
:
15720 case ISD::INTRINSIC_W_CHAIN
:
15721 case ISD::INTRINSIC_WO_CHAIN
: {
15722 unsigned IntOpNo
= N
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
? 0 : 1;
15723 unsigned IntNo
= N
->getConstantOperandVal(IntOpNo
);
15725 // By default we do not combine any intrinsic.
15728 case Intrinsic::riscv_masked_strided_load
: {
15729 MVT VT
= N
->getSimpleValueType(0);
15730 auto *Load
= cast
<MemIntrinsicSDNode
>(N
);
15731 SDValue PassThru
= N
->getOperand(2);
15732 SDValue Base
= N
->getOperand(3);
15733 SDValue Stride
= N
->getOperand(4);
15734 SDValue Mask
= N
->getOperand(5);
15736 // If the stride is equal to the element size in bytes, we can use
15738 const unsigned ElementSize
= VT
.getScalarStoreSize();
15739 if (auto *StrideC
= dyn_cast
<ConstantSDNode
>(Stride
);
15740 StrideC
&& StrideC
->getZExtValue() == ElementSize
)
15741 return DAG
.getMaskedLoad(VT
, DL
, Load
->getChain(), Base
,
15742 DAG
.getUNDEF(XLenVT
), Mask
, PassThru
,
15743 Load
->getMemoryVT(), Load
->getMemOperand(),
15744 ISD::UNINDEXED
, ISD::NON_EXTLOAD
);
15747 case Intrinsic::riscv_masked_strided_store
: {
15748 auto *Store
= cast
<MemIntrinsicSDNode
>(N
);
15749 SDValue Value
= N
->getOperand(2);
15750 SDValue Base
= N
->getOperand(3);
15751 SDValue Stride
= N
->getOperand(4);
15752 SDValue Mask
= N
->getOperand(5);
15754 // If the stride is equal to the element size in bytes, we can use
15756 const unsigned ElementSize
= Value
.getValueType().getScalarStoreSize();
15757 if (auto *StrideC
= dyn_cast
<ConstantSDNode
>(Stride
);
15758 StrideC
&& StrideC
->getZExtValue() == ElementSize
)
15759 return DAG
.getMaskedStore(Store
->getChain(), DL
, Value
, Base
,
15760 DAG
.getUNDEF(XLenVT
), Mask
,
15761 Store
->getMemoryVT(), Store
->getMemOperand(),
15762 ISD::UNINDEXED
, false);
15765 case Intrinsic::riscv_vcpop
:
15766 case Intrinsic::riscv_vcpop_mask
:
15767 case Intrinsic::riscv_vfirst
:
15768 case Intrinsic::riscv_vfirst_mask
: {
15769 SDValue VL
= N
->getOperand(2);
15770 if (IntNo
== Intrinsic::riscv_vcpop_mask
||
15771 IntNo
== Intrinsic::riscv_vfirst_mask
)
15772 VL
= N
->getOperand(3);
15773 if (!isNullConstant(VL
))
15775 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
15777 EVT VT
= N
->getValueType(0);
15778 if (IntNo
== Intrinsic::riscv_vfirst
||
15779 IntNo
== Intrinsic::riscv_vfirst_mask
)
15780 return DAG
.getConstant(-1, DL
, VT
);
15781 return DAG
.getConstant(0, DL
, VT
);
15785 case ISD::BITCAST
: {
15786 assert(Subtarget
.useRVVForFixedLengthVectors());
15787 SDValue N0
= N
->getOperand(0);
15788 EVT VT
= N
->getValueType(0);
15789 EVT SrcVT
= N0
.getValueType();
15790 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
15791 // type, widen both sides to avoid a trip through memory.
15792 if ((SrcVT
== MVT::v1i1
|| SrcVT
== MVT::v2i1
|| SrcVT
== MVT::v4i1
) &&
15793 VT
.isScalarInteger()) {
15794 unsigned NumConcats
= 8 / SrcVT
.getVectorNumElements();
15795 SmallVector
<SDValue
, 4> Ops(NumConcats
, DAG
.getUNDEF(SrcVT
));
15798 N0
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, MVT::v8i1
, Ops
);
15799 N0
= DAG
.getBitcast(MVT::i8
, N0
);
15800 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, N0
);
15810 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
15811 EVT XVT
, unsigned KeptBits
) const {
15812 // For vectors, we don't have a preference..
15813 if (XVT
.isVector())
15816 if (XVT
!= MVT::i32
&& XVT
!= MVT::i64
)
15819 // We can use sext.w for RV64 or an srai 31 on RV32.
15820 if (KeptBits
== 32 || KeptBits
== 64)
15823 // With Zbb we can use sext.h/sext.b.
15824 return Subtarget
.hasStdExtZbb() &&
15825 ((KeptBits
== 8 && XVT
== MVT::i64
&& !Subtarget
.is64Bit()) ||
15829 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
15830 const SDNode
*N
, CombineLevel Level
) const {
15831 assert((N
->getOpcode() == ISD::SHL
|| N
->getOpcode() == ISD::SRA
||
15832 N
->getOpcode() == ISD::SRL
) &&
15833 "Expected shift op");
15835 // The following folds are only desirable if `(OP _, c1 << c2)` can be
15836 // materialised in fewer instructions than `(OP _, c1)`:
15838 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
15839 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
15840 SDValue N0
= N
->getOperand(0);
15841 EVT Ty
= N0
.getValueType();
15842 if (Ty
.isScalarInteger() &&
15843 (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::OR
)) {
15844 auto *C1
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
15845 auto *C2
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
15847 const APInt
&C1Int
= C1
->getAPIntValue();
15848 APInt ShiftedC1Int
= C1Int
<< C2
->getAPIntValue();
15850 // We can materialise `c1 << c2` into an add immediate, so it's "free",
15851 // and the combine should happen, to potentially allow further combines
15853 if (ShiftedC1Int
.getSignificantBits() <= 64 &&
15854 isLegalAddImmediate(ShiftedC1Int
.getSExtValue()))
15857 // We can materialise `c1` in an add immediate, so it's "free", and the
15858 // combine should be prevented.
15859 if (C1Int
.getSignificantBits() <= 64 &&
15860 isLegalAddImmediate(C1Int
.getSExtValue()))
15863 // Neither constant will fit into an immediate, so find materialisation
15866 RISCVMatInt::getIntMatCost(C1Int
, Ty
.getSizeInBits(), Subtarget
,
15867 /*CompressionCost*/ true);
15868 int ShiftedC1Cost
= RISCVMatInt::getIntMatCost(
15869 ShiftedC1Int
, Ty
.getSizeInBits(), Subtarget
,
15870 /*CompressionCost*/ true);
15872 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
15873 // combine should be prevented.
15874 if (C1Cost
< ShiftedC1Cost
)
15881 bool RISCVTargetLowering::targetShrinkDemandedConstant(
15882 SDValue Op
, const APInt
&DemandedBits
, const APInt
&DemandedElts
,
15883 TargetLoweringOpt
&TLO
) const {
15884 // Delay this optimization as late as possible.
15888 EVT VT
= Op
.getValueType();
15892 unsigned Opcode
= Op
.getOpcode();
15893 if (Opcode
!= ISD::AND
&& Opcode
!= ISD::OR
&& Opcode
!= ISD::XOR
)
15896 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
15900 const APInt
&Mask
= C
->getAPIntValue();
15902 // Clear all non-demanded bits initially.
15903 APInt ShrunkMask
= Mask
& DemandedBits
;
15905 // Try to make a smaller immediate by setting undemanded bits.
15907 APInt ExpandedMask
= Mask
| ~DemandedBits
;
15909 auto IsLegalMask
= [ShrunkMask
, ExpandedMask
](const APInt
&Mask
) -> bool {
15910 return ShrunkMask
.isSubsetOf(Mask
) && Mask
.isSubsetOf(ExpandedMask
);
15912 auto UseMask
= [Mask
, Op
, &TLO
](const APInt
&NewMask
) -> bool {
15913 if (NewMask
== Mask
)
15916 SDValue NewC
= TLO
.DAG
.getConstant(NewMask
, DL
, Op
.getValueType());
15917 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
15918 Op
.getOperand(0), NewC
);
15919 return TLO
.CombineTo(Op
, NewOp
);
15922 // If the shrunk mask fits in sign extended 12 bits, let the target
15923 // independent code apply it.
15924 if (ShrunkMask
.isSignedIntN(12))
15927 // And has a few special cases for zext.
15928 if (Opcode
== ISD::AND
) {
15929 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
15930 // otherwise use SLLI + SRLI.
15931 APInt NewMask
= APInt(Mask
.getBitWidth(), 0xffff);
15932 if (IsLegalMask(NewMask
))
15933 return UseMask(NewMask
);
15935 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
15936 if (VT
== MVT::i64
) {
15937 APInt NewMask
= APInt(64, 0xffffffff);
15938 if (IsLegalMask(NewMask
))
15939 return UseMask(NewMask
);
15943 // For the remaining optimizations, we need to be able to make a negative
15944 // number through a combination of mask and undemanded bits.
15945 if (!ExpandedMask
.isNegative())
15948 // What is the fewest number of bits we need to represent the negative number.
15949 unsigned MinSignedBits
= ExpandedMask
.getSignificantBits();
15951 // Try to make a 12 bit negative immediate. If that fails try to make a 32
15952 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
15953 // If we can't create a simm12, we shouldn't change opaque constants.
15954 APInt NewMask
= ShrunkMask
;
15955 if (MinSignedBits
<= 12)
15956 NewMask
.setBitsFrom(11);
15957 else if (!C
->isOpaque() && MinSignedBits
<= 32 && !ShrunkMask
.isSignedIntN(32))
15958 NewMask
.setBitsFrom(31);
15962 // Check that our new mask is a subset of the demanded mask.
15963 assert(IsLegalMask(NewMask
));
15964 return UseMask(NewMask
);
15967 static uint64_t computeGREVOrGORC(uint64_t x
, unsigned ShAmt
, bool IsGORC
) {
15968 static const uint64_t GREVMasks
[] = {
15969 0x5555555555555555ULL
, 0x3333333333333333ULL
, 0x0F0F0F0F0F0F0F0FULL
,
15970 0x00FF00FF00FF00FFULL
, 0x0000FFFF0000FFFFULL
, 0x00000000FFFFFFFFULL
};
15972 for (unsigned Stage
= 0; Stage
!= 6; ++Stage
) {
15973 unsigned Shift
= 1 << Stage
;
15974 if (ShAmt
& Shift
) {
15975 uint64_t Mask
= GREVMasks
[Stage
];
15976 uint64_t Res
= ((x
& Mask
) << Shift
) | ((x
>> Shift
) & Mask
);
15986 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
15988 const APInt
&DemandedElts
,
15989 const SelectionDAG
&DAG
,
15990 unsigned Depth
) const {
15991 unsigned BitWidth
= Known
.getBitWidth();
15992 unsigned Opc
= Op
.getOpcode();
15993 assert((Opc
>= ISD::BUILTIN_OP_END
||
15994 Opc
== ISD::INTRINSIC_WO_CHAIN
||
15995 Opc
== ISD::INTRINSIC_W_CHAIN
||
15996 Opc
== ISD::INTRINSIC_VOID
) &&
15997 "Should use MaskedValueIsZero if you don't know whether Op"
15998 " is a target node!");
16003 case RISCVISD::SELECT_CC
: {
16004 Known
= DAG
.computeKnownBits(Op
.getOperand(4), Depth
+ 1);
16005 // If we don't know any bits, early out.
16006 if (Known
.isUnknown())
16008 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(3), Depth
+ 1);
16010 // Only known if known in both the LHS and RHS.
16011 Known
= Known
.intersectWith(Known2
);
16014 case RISCVISD::CZERO_EQZ
:
16015 case RISCVISD::CZERO_NEZ
:
16016 Known
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
16017 // Result is either all zero or operand 0. We can propagate zeros, but not
16019 Known
.One
.clearAllBits();
16021 case RISCVISD::REMUW
: {
16023 Known
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16024 Known2
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
16025 // We only care about the lower 32 bits.
16026 Known
= KnownBits::urem(Known
.trunc(32), Known2
.trunc(32));
16027 // Restore the original width by sign extending.
16028 Known
= Known
.sext(BitWidth
);
16031 case RISCVISD::DIVUW
: {
16033 Known
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16034 Known2
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
16035 // We only care about the lower 32 bits.
16036 Known
= KnownBits::udiv(Known
.trunc(32), Known2
.trunc(32));
16037 // Restore the original width by sign extending.
16038 Known
= Known
.sext(BitWidth
);
16041 case RISCVISD::SLLW
: {
16043 Known
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16044 Known2
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
16045 Known
= KnownBits::shl(Known
.trunc(32), Known2
.trunc(5).zext(32));
16046 // Restore the original width by sign extending.
16047 Known
= Known
.sext(BitWidth
);
16050 case RISCVISD::CTZW
: {
16051 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
16052 unsigned PossibleTZ
= Known2
.trunc(32).countMaxTrailingZeros();
16053 unsigned LowBits
= llvm::bit_width(PossibleTZ
);
16054 Known
.Zero
.setBitsFrom(LowBits
);
16057 case RISCVISD::CLZW
: {
16058 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
16059 unsigned PossibleLZ
= Known2
.trunc(32).countMaxLeadingZeros();
16060 unsigned LowBits
= llvm::bit_width(PossibleLZ
);
16061 Known
.Zero
.setBitsFrom(LowBits
);
16064 case RISCVISD::BREV8
:
16065 case RISCVISD::ORC_B
: {
16066 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
16067 // control value of 7 is equivalent to brev8 and orc.b.
16068 Known
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
16069 bool IsGORC
= Op
.getOpcode() == RISCVISD::ORC_B
;
16070 // To compute zeros, we need to invert the value and invert it back after.
16072 ~computeGREVOrGORC(~Known
.Zero
.getZExtValue(), 7, IsGORC
);
16073 Known
.One
= computeGREVOrGORC(Known
.One
.getZExtValue(), 7, IsGORC
);
16076 case RISCVISD::READ_VLENB
: {
16077 // We can use the minimum and maximum VLEN values to bound VLENB. We
16078 // know VLEN must be a power of two.
16079 const unsigned MinVLenB
= Subtarget
.getRealMinVLen() / 8;
16080 const unsigned MaxVLenB
= Subtarget
.getRealMaxVLen() / 8;
16081 assert(MinVLenB
> 0 && "READ_VLENB without vector extension enabled?");
16082 Known
.Zero
.setLowBits(Log2_32(MinVLenB
));
16083 Known
.Zero
.setBitsFrom(Log2_32(MaxVLenB
)+1);
16084 if (MaxVLenB
== MinVLenB
)
16085 Known
.One
.setBit(Log2_32(MinVLenB
));
16088 case RISCVISD::FCLASS
: {
16089 // fclass will only set one of the low 10 bits.
16090 Known
.Zero
.setBitsFrom(10);
16093 case ISD::INTRINSIC_W_CHAIN
:
16094 case ISD::INTRINSIC_WO_CHAIN
: {
16096 Op
.getConstantOperandVal(Opc
== ISD::INTRINSIC_WO_CHAIN
? 0 : 1);
16099 // We can't do anything for most intrinsics.
16101 case Intrinsic::riscv_vsetvli
:
16102 case Intrinsic::riscv_vsetvlimax
: {
16103 bool HasAVL
= IntNo
== Intrinsic::riscv_vsetvli
;
16104 unsigned VSEW
= Op
.getConstantOperandVal(HasAVL
+ 1);
16105 RISCVII::VLMUL VLMUL
=
16106 static_cast<RISCVII::VLMUL
>(Op
.getConstantOperandVal(HasAVL
+ 2));
16107 unsigned SEW
= RISCVVType::decodeVSEW(VSEW
);
16108 auto [LMul
, Fractional
] = RISCVVType::decodeVLMUL(VLMUL
);
16109 uint64_t MaxVL
= Subtarget
.getRealMaxVLen() / SEW
;
16110 MaxVL
= (Fractional
) ? MaxVL
/ LMul
: MaxVL
* LMul
;
16112 // Result of vsetvli must be not larger than AVL.
16113 if (HasAVL
&& isa
<ConstantSDNode
>(Op
.getOperand(1)))
16114 MaxVL
= std::min(MaxVL
, Op
.getConstantOperandVal(1));
16116 unsigned KnownZeroFirstBit
= Log2_32(MaxVL
) + 1;
16117 if (BitWidth
> KnownZeroFirstBit
)
16118 Known
.Zero
.setBitsFrom(KnownZeroFirstBit
);
16127 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
16128 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
16129 unsigned Depth
) const {
16130 switch (Op
.getOpcode()) {
16133 case RISCVISD::SELECT_CC
: {
16135 DAG
.ComputeNumSignBits(Op
.getOperand(3), DemandedElts
, Depth
+ 1);
16136 if (Tmp
== 1) return 1; // Early out.
16138 DAG
.ComputeNumSignBits(Op
.getOperand(4), DemandedElts
, Depth
+ 1);
16139 return std::min(Tmp
, Tmp2
);
16141 case RISCVISD::CZERO_EQZ
:
16142 case RISCVISD::CZERO_NEZ
:
16143 // Output is either all zero or operand 0. We can propagate sign bit count
16145 return DAG
.ComputeNumSignBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16146 case RISCVISD::ABSW
: {
16147 // We expand this at isel to negw+max. The result will have 33 sign bits
16148 // if the input has at least 33 sign bits.
16150 DAG
.ComputeNumSignBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16151 if (Tmp
< 33) return 1;
16154 case RISCVISD::SLLW
:
16155 case RISCVISD::SRAW
:
16156 case RISCVISD::SRLW
:
16157 case RISCVISD::DIVW
:
16158 case RISCVISD::DIVUW
:
16159 case RISCVISD::REMUW
:
16160 case RISCVISD::ROLW
:
16161 case RISCVISD::RORW
:
16162 case RISCVISD::FCVT_W_RV64
:
16163 case RISCVISD::FCVT_WU_RV64
:
16164 case RISCVISD::STRICT_FCVT_W_RV64
:
16165 case RISCVISD::STRICT_FCVT_WU_RV64
:
16166 // TODO: As the result is sign-extended, this is conservatively correct. A
16167 // more precise answer could be calculated for SRAW depending on known
16168 // bits in the shift amount.
16170 case RISCVISD::VMV_X_S
: {
16171 // The number of sign bits of the scalar result is computed by obtaining the
16172 // element type of the input vector operand, subtracting its width from the
16173 // XLEN, and then adding one (sign bit within the element type). If the
16174 // element type is wider than XLen, the least-significant XLEN bits are
16176 unsigned XLen
= Subtarget
.getXLen();
16177 unsigned EltBits
= Op
.getOperand(0).getScalarValueSizeInBits();
16178 if (EltBits
<= XLen
)
16179 return XLen
- EltBits
+ 1;
16182 case ISD::INTRINSIC_W_CHAIN
: {
16183 unsigned IntNo
= Op
.getConstantOperandVal(1);
16187 case Intrinsic::riscv_masked_atomicrmw_xchg_i64
:
16188 case Intrinsic::riscv_masked_atomicrmw_add_i64
:
16189 case Intrinsic::riscv_masked_atomicrmw_sub_i64
:
16190 case Intrinsic::riscv_masked_atomicrmw_nand_i64
:
16191 case Intrinsic::riscv_masked_atomicrmw_max_i64
:
16192 case Intrinsic::riscv_masked_atomicrmw_min_i64
:
16193 case Intrinsic::riscv_masked_atomicrmw_umax_i64
:
16194 case Intrinsic::riscv_masked_atomicrmw_umin_i64
:
16195 case Intrinsic::riscv_masked_cmpxchg_i64
:
16196 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
16197 // narrow atomic operation. These are implemented using atomic
16198 // operations at the minimum supported atomicrmw/cmpxchg width whose
16199 // result is then sign extended to XLEN. With +A, the minimum width is
16200 // 32 for both 64 and 32.
16201 assert(Subtarget
.getXLen() == 64);
16202 assert(getMinCmpXchgSizeInBits() == 32);
16203 assert(Subtarget
.hasStdExtA());
16214 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode
*Ld
) const {
16215 assert(Ld
&& "Unexpected null LoadSDNode");
16216 if (!ISD::isNormalLoad(Ld
))
16219 SDValue Ptr
= Ld
->getBasePtr();
16221 // Only constant pools with no offset are supported.
16222 auto GetSupportedConstantPool
= [](SDValue Ptr
) -> ConstantPoolSDNode
* {
16223 auto *CNode
= dyn_cast
<ConstantPoolSDNode
>(Ptr
);
16224 if (!CNode
|| CNode
->isMachineConstantPoolEntry() ||
16225 CNode
->getOffset() != 0)
16231 // Simple case, LLA.
16232 if (Ptr
.getOpcode() == RISCVISD::LLA
) {
16233 auto *CNode
= GetSupportedConstantPool(Ptr
);
16234 if (!CNode
|| CNode
->getTargetFlags() != 0)
16237 return CNode
->getConstVal();
16240 // Look for a HI and ADD_LO pair.
16241 if (Ptr
.getOpcode() != RISCVISD::ADD_LO
||
16242 Ptr
.getOperand(0).getOpcode() != RISCVISD::HI
)
16245 auto *CNodeLo
= GetSupportedConstantPool(Ptr
.getOperand(1));
16246 auto *CNodeHi
= GetSupportedConstantPool(Ptr
.getOperand(0).getOperand(0));
16248 if (!CNodeLo
|| CNodeLo
->getTargetFlags() != RISCVII::MO_LO
||
16249 !CNodeHi
|| CNodeHi
->getTargetFlags() != RISCVII::MO_HI
)
16252 if (CNodeLo
->getConstVal() != CNodeHi
->getConstVal())
16255 return CNodeLo
->getConstVal();
16258 static MachineBasicBlock
*emitReadCycleWidePseudo(MachineInstr
&MI
,
16259 MachineBasicBlock
*BB
) {
16260 assert(MI
.getOpcode() == RISCV::ReadCycleWide
&& "Unexpected instruction");
16262 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
16263 // Should the count have wrapped while it was being read, we need to try
16267 // rdcycleh x3 # load high word of cycle
16268 // rdcycle x2 # load low word of cycle
16269 // rdcycleh x4 # load high word of cycle
16270 // bne x3, x4, read # check if high word reads match, otherwise try again
16273 MachineFunction
&MF
= *BB
->getParent();
16274 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
16275 MachineFunction::iterator It
= ++BB
->getIterator();
16277 MachineBasicBlock
*LoopMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
16278 MF
.insert(It
, LoopMBB
);
16280 MachineBasicBlock
*DoneMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
16281 MF
.insert(It
, DoneMBB
);
16283 // Transfer the remainder of BB and its successor edges to DoneMBB.
16284 DoneMBB
->splice(DoneMBB
->begin(), BB
,
16285 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
16286 DoneMBB
->transferSuccessorsAndUpdatePHIs(BB
);
16288 BB
->addSuccessor(LoopMBB
);
16290 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
16291 Register ReadAgainReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
16292 Register LoReg
= MI
.getOperand(0).getReg();
16293 Register HiReg
= MI
.getOperand(1).getReg();
16294 DebugLoc DL
= MI
.getDebugLoc();
16296 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
16297 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), HiReg
)
16298 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
16299 .addReg(RISCV::X0
);
16300 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), LoReg
)
16301 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding
)
16302 .addReg(RISCV::X0
);
16303 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), ReadAgainReg
)
16304 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
16305 .addReg(RISCV::X0
);
16307 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::BNE
))
16309 .addReg(ReadAgainReg
)
16312 LoopMBB
->addSuccessor(LoopMBB
);
16313 LoopMBB
->addSuccessor(DoneMBB
);
16315 MI
.eraseFromParent();
16320 static MachineBasicBlock
*emitSplitF64Pseudo(MachineInstr
&MI
,
16321 MachineBasicBlock
*BB
,
16322 const RISCVSubtarget
&Subtarget
) {
16323 assert((MI
.getOpcode() == RISCV::SplitF64Pseudo
||
16324 MI
.getOpcode() == RISCV::SplitF64Pseudo_INX
) &&
16325 "Unexpected instruction");
16327 MachineFunction
&MF
= *BB
->getParent();
16328 DebugLoc DL
= MI
.getDebugLoc();
16329 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
16330 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
16331 Register LoReg
= MI
.getOperand(0).getReg();
16332 Register HiReg
= MI
.getOperand(1).getReg();
16333 Register SrcReg
= MI
.getOperand(2).getReg();
16335 const TargetRegisterClass
*SrcRC
= MI
.getOpcode() == RISCV::SplitF64Pseudo_INX
16336 ? &RISCV::GPRPF64RegClass
16337 : &RISCV::FPR64RegClass
;
16338 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex(MF
);
16340 TII
.storeRegToStackSlot(*BB
, MI
, SrcReg
, MI
.getOperand(2).isKill(), FI
, SrcRC
,
16342 MachinePointerInfo MPI
= MachinePointerInfo::getFixedStack(MF
, FI
);
16343 MachineMemOperand
*MMOLo
=
16344 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOLoad
, 4, Align(8));
16345 MachineMemOperand
*MMOHi
= MF
.getMachineMemOperand(
16346 MPI
.getWithOffset(4), MachineMemOperand::MOLoad
, 4, Align(8));
16347 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), LoReg
)
16350 .addMemOperand(MMOLo
);
16351 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), HiReg
)
16354 .addMemOperand(MMOHi
);
16355 MI
.eraseFromParent(); // The pseudo instruction is gone now.
16359 static MachineBasicBlock
*emitBuildPairF64Pseudo(MachineInstr
&MI
,
16360 MachineBasicBlock
*BB
,
16361 const RISCVSubtarget
&Subtarget
) {
16362 assert((MI
.getOpcode() == RISCV::BuildPairF64Pseudo
||
16363 MI
.getOpcode() == RISCV::BuildPairF64Pseudo_INX
) &&
16364 "Unexpected instruction");
16366 MachineFunction
&MF
= *BB
->getParent();
16367 DebugLoc DL
= MI
.getDebugLoc();
16368 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
16369 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
16370 Register DstReg
= MI
.getOperand(0).getReg();
16371 Register LoReg
= MI
.getOperand(1).getReg();
16372 Register HiReg
= MI
.getOperand(2).getReg();
16374 const TargetRegisterClass
*DstRC
=
16375 MI
.getOpcode() == RISCV::BuildPairF64Pseudo_INX
? &RISCV::GPRPF64RegClass
16376 : &RISCV::FPR64RegClass
;
16377 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex(MF
);
16379 MachinePointerInfo MPI
= MachinePointerInfo::getFixedStack(MF
, FI
);
16380 MachineMemOperand
*MMOLo
=
16381 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOStore
, 4, Align(8));
16382 MachineMemOperand
*MMOHi
= MF
.getMachineMemOperand(
16383 MPI
.getWithOffset(4), MachineMemOperand::MOStore
, 4, Align(8));
16384 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
16385 .addReg(LoReg
, getKillRegState(MI
.getOperand(1).isKill()))
16388 .addMemOperand(MMOLo
);
16389 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
16390 .addReg(HiReg
, getKillRegState(MI
.getOperand(2).isKill()))
16393 .addMemOperand(MMOHi
);
16394 TII
.loadRegFromStackSlot(*BB
, MI
, DstReg
, FI
, DstRC
, RI
, Register());
16395 MI
.eraseFromParent(); // The pseudo instruction is gone now.
16399 static bool isSelectPseudo(MachineInstr
&MI
) {
16400 switch (MI
.getOpcode()) {
16403 case RISCV::Select_GPR_Using_CC_GPR
:
16404 case RISCV::Select_FPR16_Using_CC_GPR
:
16405 case RISCV::Select_FPR16INX_Using_CC_GPR
:
16406 case RISCV::Select_FPR32_Using_CC_GPR
:
16407 case RISCV::Select_FPR32INX_Using_CC_GPR
:
16408 case RISCV::Select_FPR64_Using_CC_GPR
:
16409 case RISCV::Select_FPR64INX_Using_CC_GPR
:
16410 case RISCV::Select_FPR64IN32X_Using_CC_GPR
:
16415 static MachineBasicBlock
*emitQuietFCMP(MachineInstr
&MI
, MachineBasicBlock
*BB
,
16416 unsigned RelOpcode
, unsigned EqOpcode
,
16417 const RISCVSubtarget
&Subtarget
) {
16418 DebugLoc DL
= MI
.getDebugLoc();
16419 Register DstReg
= MI
.getOperand(0).getReg();
16420 Register Src1Reg
= MI
.getOperand(1).getReg();
16421 Register Src2Reg
= MI
.getOperand(2).getReg();
16422 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
16423 Register SavedFFlags
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16424 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
16426 // Save the current FFLAGS.
16427 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::ReadFFLAGS
), SavedFFlags
);
16429 auto MIB
= BuildMI(*BB
, MI
, DL
, TII
.get(RelOpcode
), DstReg
)
16432 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16433 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16435 // Restore the FFLAGS.
16436 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::WriteFFLAGS
))
16437 .addReg(SavedFFlags
, RegState::Kill
);
16439 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
16440 auto MIB2
= BuildMI(*BB
, MI
, DL
, TII
.get(EqOpcode
), RISCV::X0
)
16441 .addReg(Src1Reg
, getKillRegState(MI
.getOperand(1).isKill()))
16442 .addReg(Src2Reg
, getKillRegState(MI
.getOperand(2).isKill()));
16443 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16444 MIB2
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16446 // Erase the pseudoinstruction.
16447 MI
.eraseFromParent();
16451 static MachineBasicBlock
*
16452 EmitLoweredCascadedSelect(MachineInstr
&First
, MachineInstr
&Second
,
16453 MachineBasicBlock
*ThisMBB
,
16454 const RISCVSubtarget
&Subtarget
) {
16455 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
16456 // Without this, custom-inserter would have generated:
16468 // A: X = ...; Y = ...
16470 // C: Z = PHI [X, A], [Y, B]
16472 // E: PHI [X, C], [Z, D]
16474 // If we lower both Select_FPRX_ in a single step, we can instead generate:
16486 // A: X = ...; Y = ...
16488 // E: PHI [X, A], [X, C], [Y, D]
16490 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
16491 const DebugLoc
&DL
= First
.getDebugLoc();
16492 const BasicBlock
*LLVM_BB
= ThisMBB
->getBasicBlock();
16493 MachineFunction
*F
= ThisMBB
->getParent();
16494 MachineBasicBlock
*FirstMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16495 MachineBasicBlock
*SecondMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16496 MachineBasicBlock
*SinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16497 MachineFunction::iterator It
= ++ThisMBB
->getIterator();
16498 F
->insert(It
, FirstMBB
);
16499 F
->insert(It
, SecondMBB
);
16500 F
->insert(It
, SinkMBB
);
16502 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
16503 SinkMBB
->splice(SinkMBB
->begin(), ThisMBB
,
16504 std::next(MachineBasicBlock::iterator(First
)),
16506 SinkMBB
->transferSuccessorsAndUpdatePHIs(ThisMBB
);
16508 // Fallthrough block for ThisMBB.
16509 ThisMBB
->addSuccessor(FirstMBB
);
16510 // Fallthrough block for FirstMBB.
16511 FirstMBB
->addSuccessor(SecondMBB
);
16512 ThisMBB
->addSuccessor(SinkMBB
);
16513 FirstMBB
->addSuccessor(SinkMBB
);
16514 // This is fallthrough.
16515 SecondMBB
->addSuccessor(SinkMBB
);
16517 auto FirstCC
= static_cast<RISCVCC::CondCode
>(First
.getOperand(3).getImm());
16518 Register FLHS
= First
.getOperand(1).getReg();
16519 Register FRHS
= First
.getOperand(2).getReg();
16520 // Insert appropriate branch.
16521 BuildMI(FirstMBB
, DL
, TII
.getBrCond(FirstCC
))
16526 Register SLHS
= Second
.getOperand(1).getReg();
16527 Register SRHS
= Second
.getOperand(2).getReg();
16528 Register Op1Reg4
= First
.getOperand(4).getReg();
16529 Register Op1Reg5
= First
.getOperand(5).getReg();
16531 auto SecondCC
= static_cast<RISCVCC::CondCode
>(Second
.getOperand(3).getImm());
16532 // Insert appropriate branch.
16533 BuildMI(ThisMBB
, DL
, TII
.getBrCond(SecondCC
))
16538 Register DestReg
= Second
.getOperand(0).getReg();
16539 Register Op2Reg4
= Second
.getOperand(4).getReg();
16540 BuildMI(*SinkMBB
, SinkMBB
->begin(), DL
, TII
.get(RISCV::PHI
), DestReg
)
16546 .addMBB(SecondMBB
);
16548 // Now remove the Select_FPRX_s.
16549 First
.eraseFromParent();
16550 Second
.eraseFromParent();
16554 static MachineBasicBlock
*emitSelectPseudo(MachineInstr
&MI
,
16555 MachineBasicBlock
*BB
,
16556 const RISCVSubtarget
&Subtarget
) {
16557 // To "insert" Select_* instructions, we actually have to insert the triangle
16558 // control-flow pattern. The incoming instructions know the destination vreg
16559 // to set, the condition code register to branch on, the true/false values to
16560 // select between, and the condcode to use to select the appropriate branch.
16562 // We produce the following control flow:
16569 // When we find a sequence of selects we attempt to optimize their emission
16570 // by sharing the control flow. Currently we only handle cases where we have
16571 // multiple selects with the exact same condition (same LHS, RHS and CC).
16572 // The selects may be interleaved with other instructions if the other
16573 // instructions meet some requirements we deem safe:
16574 // - They are not pseudo instructions.
16575 // - They are debug instructions. Otherwise,
16576 // - They do not have side-effects, do not access memory and their inputs do
16577 // not depend on the results of the select pseudo-instructions.
16578 // The TrueV/FalseV operands of the selects cannot depend on the result of
16579 // previous selects in the sequence.
16580 // These conditions could be further relaxed. See the X86 target for a
16581 // related approach and more information.
16583 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
16584 // is checked here and handled by a separate function -
16585 // EmitLoweredCascadedSelect.
16586 Register LHS
= MI
.getOperand(1).getReg();
16587 Register RHS
= MI
.getOperand(2).getReg();
16588 auto CC
= static_cast<RISCVCC::CondCode
>(MI
.getOperand(3).getImm());
16590 SmallVector
<MachineInstr
*, 4> SelectDebugValues
;
16591 SmallSet
<Register
, 4> SelectDests
;
16592 SelectDests
.insert(MI
.getOperand(0).getReg());
16594 MachineInstr
*LastSelectPseudo
= &MI
;
16595 auto Next
= next_nodbg(MI
.getIterator(), BB
->instr_end());
16596 if (MI
.getOpcode() != RISCV::Select_GPR_Using_CC_GPR
&& Next
!= BB
->end() &&
16597 Next
->getOpcode() == MI
.getOpcode() &&
16598 Next
->getOperand(5).getReg() == MI
.getOperand(0).getReg() &&
16599 Next
->getOperand(5).isKill()) {
16600 return EmitLoweredCascadedSelect(MI
, *Next
, BB
, Subtarget
);
16603 for (auto E
= BB
->end(), SequenceMBBI
= MachineBasicBlock::iterator(MI
);
16604 SequenceMBBI
!= E
; ++SequenceMBBI
) {
16605 if (SequenceMBBI
->isDebugInstr())
16607 if (isSelectPseudo(*SequenceMBBI
)) {
16608 if (SequenceMBBI
->getOperand(1).getReg() != LHS
||
16609 SequenceMBBI
->getOperand(2).getReg() != RHS
||
16610 SequenceMBBI
->getOperand(3).getImm() != CC
||
16611 SelectDests
.count(SequenceMBBI
->getOperand(4).getReg()) ||
16612 SelectDests
.count(SequenceMBBI
->getOperand(5).getReg()))
16614 LastSelectPseudo
= &*SequenceMBBI
;
16615 SequenceMBBI
->collectDebugValues(SelectDebugValues
);
16616 SelectDests
.insert(SequenceMBBI
->getOperand(0).getReg());
16619 if (SequenceMBBI
->hasUnmodeledSideEffects() ||
16620 SequenceMBBI
->mayLoadOrStore() ||
16621 SequenceMBBI
->usesCustomInsertionHook())
16623 if (llvm::any_of(SequenceMBBI
->operands(), [&](MachineOperand
&MO
) {
16624 return MO
.isReg() && MO
.isUse() && SelectDests
.count(MO
.getReg());
16629 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
16630 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
16631 DebugLoc DL
= MI
.getDebugLoc();
16632 MachineFunction::iterator I
= ++BB
->getIterator();
16634 MachineBasicBlock
*HeadMBB
= BB
;
16635 MachineFunction
*F
= BB
->getParent();
16636 MachineBasicBlock
*TailMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16637 MachineBasicBlock
*IfFalseMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16639 F
->insert(I
, IfFalseMBB
);
16640 F
->insert(I
, TailMBB
);
16642 // Transfer debug instructions associated with the selects to TailMBB.
16643 for (MachineInstr
*DebugInstr
: SelectDebugValues
) {
16644 TailMBB
->push_back(DebugInstr
->removeFromParent());
16647 // Move all instructions after the sequence to TailMBB.
16648 TailMBB
->splice(TailMBB
->end(), HeadMBB
,
16649 std::next(LastSelectPseudo
->getIterator()), HeadMBB
->end());
16650 // Update machine-CFG edges by transferring all successors of the current
16651 // block to the new block which will contain the Phi nodes for the selects.
16652 TailMBB
->transferSuccessorsAndUpdatePHIs(HeadMBB
);
16653 // Set the successors for HeadMBB.
16654 HeadMBB
->addSuccessor(IfFalseMBB
);
16655 HeadMBB
->addSuccessor(TailMBB
);
16657 // Insert appropriate branch.
16658 BuildMI(HeadMBB
, DL
, TII
.getBrCond(CC
))
16663 // IfFalseMBB just falls through to TailMBB.
16664 IfFalseMBB
->addSuccessor(TailMBB
);
16666 // Create PHIs for all of the select pseudo-instructions.
16667 auto SelectMBBI
= MI
.getIterator();
16668 auto SelectEnd
= std::next(LastSelectPseudo
->getIterator());
16669 auto InsertionPoint
= TailMBB
->begin();
16670 while (SelectMBBI
!= SelectEnd
) {
16671 auto Next
= std::next(SelectMBBI
);
16672 if (isSelectPseudo(*SelectMBBI
)) {
16673 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
16674 BuildMI(*TailMBB
, InsertionPoint
, SelectMBBI
->getDebugLoc(),
16675 TII
.get(RISCV::PHI
), SelectMBBI
->getOperand(0).getReg())
16676 .addReg(SelectMBBI
->getOperand(4).getReg())
16678 .addReg(SelectMBBI
->getOperand(5).getReg())
16679 .addMBB(IfFalseMBB
);
16680 SelectMBBI
->eraseFromParent();
16685 F
->getProperties().reset(MachineFunctionProperties::Property::NoPHIs
);
16689 static MachineBasicBlock
*emitVFROUND_NOEXCEPT_MASK(MachineInstr
&MI
,
16690 MachineBasicBlock
*BB
,
16692 unsigned CVTFOpc
) {
16693 DebugLoc DL
= MI
.getDebugLoc();
16695 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
16697 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
16698 Register SavedFFLAGS
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16700 // Save the old value of FFLAGS.
16701 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::ReadFFLAGS
), SavedFFLAGS
);
16703 assert(MI
.getNumOperands() == 7);
16705 // Emit a VFCVT_X_F
16706 const TargetRegisterInfo
*TRI
=
16707 BB
->getParent()->getSubtarget().getRegisterInfo();
16708 const TargetRegisterClass
*RC
= MI
.getRegClassConstraint(0, &TII
, TRI
);
16709 Register Tmp
= MRI
.createVirtualRegister(RC
);
16710 BuildMI(*BB
, MI
, DL
, TII
.get(CVTXOpc
), Tmp
)
16711 .add(MI
.getOperand(1))
16712 .add(MI
.getOperand(2))
16713 .add(MI
.getOperand(3))
16714 .add(MachineOperand::CreateImm(7)) // frm = DYN
16715 .add(MI
.getOperand(4))
16716 .add(MI
.getOperand(5))
16717 .add(MI
.getOperand(6))
16718 .add(MachineOperand::CreateReg(RISCV::FRM
,
16722 // Emit a VFCVT_F_X
16723 BuildMI(*BB
, MI
, DL
, TII
.get(CVTFOpc
))
16724 .add(MI
.getOperand(0))
16725 .add(MI
.getOperand(1))
16727 .add(MI
.getOperand(3))
16728 .add(MachineOperand::CreateImm(7)) // frm = DYN
16729 .add(MI
.getOperand(4))
16730 .add(MI
.getOperand(5))
16731 .add(MI
.getOperand(6))
16732 .add(MachineOperand::CreateReg(RISCV::FRM
,
16737 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::WriteFFLAGS
))
16738 .addReg(SavedFFLAGS
, RegState::Kill
);
16740 // Erase the pseudoinstruction.
16741 MI
.eraseFromParent();
16745 static MachineBasicBlock
*emitFROUND(MachineInstr
&MI
, MachineBasicBlock
*MBB
,
16746 const RISCVSubtarget
&Subtarget
) {
16747 unsigned CmpOpc
, F2IOpc
, I2FOpc
, FSGNJOpc
, FSGNJXOpc
;
16748 const TargetRegisterClass
*RC
;
16749 switch (MI
.getOpcode()) {
16751 llvm_unreachable("Unexpected opcode");
16752 case RISCV::PseudoFROUND_H
:
16753 CmpOpc
= RISCV::FLT_H
;
16754 F2IOpc
= RISCV::FCVT_W_H
;
16755 I2FOpc
= RISCV::FCVT_H_W
;
16756 FSGNJOpc
= RISCV::FSGNJ_H
;
16757 FSGNJXOpc
= RISCV::FSGNJX_H
;
16758 RC
= &RISCV::FPR16RegClass
;
16760 case RISCV::PseudoFROUND_H_INX
:
16761 CmpOpc
= RISCV::FLT_H_INX
;
16762 F2IOpc
= RISCV::FCVT_W_H_INX
;
16763 I2FOpc
= RISCV::FCVT_H_W_INX
;
16764 FSGNJOpc
= RISCV::FSGNJ_H_INX
;
16765 FSGNJXOpc
= RISCV::FSGNJX_H_INX
;
16766 RC
= &RISCV::GPRF16RegClass
;
16768 case RISCV::PseudoFROUND_S
:
16769 CmpOpc
= RISCV::FLT_S
;
16770 F2IOpc
= RISCV::FCVT_W_S
;
16771 I2FOpc
= RISCV::FCVT_S_W
;
16772 FSGNJOpc
= RISCV::FSGNJ_S
;
16773 FSGNJXOpc
= RISCV::FSGNJX_S
;
16774 RC
= &RISCV::FPR32RegClass
;
16776 case RISCV::PseudoFROUND_S_INX
:
16777 CmpOpc
= RISCV::FLT_S_INX
;
16778 F2IOpc
= RISCV::FCVT_W_S_INX
;
16779 I2FOpc
= RISCV::FCVT_S_W_INX
;
16780 FSGNJOpc
= RISCV::FSGNJ_S_INX
;
16781 FSGNJXOpc
= RISCV::FSGNJX_S_INX
;
16782 RC
= &RISCV::GPRF32RegClass
;
16784 case RISCV::PseudoFROUND_D
:
16785 assert(Subtarget
.is64Bit() && "Expected 64-bit GPR.");
16786 CmpOpc
= RISCV::FLT_D
;
16787 F2IOpc
= RISCV::FCVT_L_D
;
16788 I2FOpc
= RISCV::FCVT_D_L
;
16789 FSGNJOpc
= RISCV::FSGNJ_D
;
16790 FSGNJXOpc
= RISCV::FSGNJX_D
;
16791 RC
= &RISCV::FPR64RegClass
;
16793 case RISCV::PseudoFROUND_D_INX
:
16794 assert(Subtarget
.is64Bit() && "Expected 64-bit GPR.");
16795 CmpOpc
= RISCV::FLT_D_INX
;
16796 F2IOpc
= RISCV::FCVT_L_D_INX
;
16797 I2FOpc
= RISCV::FCVT_D_L_INX
;
16798 FSGNJOpc
= RISCV::FSGNJ_D_INX
;
16799 FSGNJXOpc
= RISCV::FSGNJX_D_INX
;
16800 RC
= &RISCV::GPRRegClass
;
16804 const BasicBlock
*BB
= MBB
->getBasicBlock();
16805 DebugLoc DL
= MI
.getDebugLoc();
16806 MachineFunction::iterator I
= ++MBB
->getIterator();
16808 MachineFunction
*F
= MBB
->getParent();
16809 MachineBasicBlock
*CvtMBB
= F
->CreateMachineBasicBlock(BB
);
16810 MachineBasicBlock
*DoneMBB
= F
->CreateMachineBasicBlock(BB
);
16812 F
->insert(I
, CvtMBB
);
16813 F
->insert(I
, DoneMBB
);
16814 // Move all instructions after the sequence to DoneMBB.
16815 DoneMBB
->splice(DoneMBB
->end(), MBB
, MachineBasicBlock::iterator(MI
),
16817 // Update machine-CFG edges by transferring all successors of the current
16818 // block to the new block which will contain the Phi nodes for the selects.
16819 DoneMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
16820 // Set the successors for MBB.
16821 MBB
->addSuccessor(CvtMBB
);
16822 MBB
->addSuccessor(DoneMBB
);
16824 Register DstReg
= MI
.getOperand(0).getReg();
16825 Register SrcReg
= MI
.getOperand(1).getReg();
16826 Register MaxReg
= MI
.getOperand(2).getReg();
16827 int64_t FRM
= MI
.getOperand(3).getImm();
16829 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
16830 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
16832 Register FabsReg
= MRI
.createVirtualRegister(RC
);
16833 BuildMI(MBB
, DL
, TII
.get(FSGNJXOpc
), FabsReg
).addReg(SrcReg
).addReg(SrcReg
);
16835 // Compare the FP value to the max value.
16836 Register CmpReg
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16838 BuildMI(MBB
, DL
, TII
.get(CmpOpc
), CmpReg
).addReg(FabsReg
).addReg(MaxReg
);
16839 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16840 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16843 BuildMI(MBB
, DL
, TII
.get(RISCV::BEQ
))
16848 CvtMBB
->addSuccessor(DoneMBB
);
16850 // Convert to integer.
16851 Register F2IReg
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16852 MIB
= BuildMI(CvtMBB
, DL
, TII
.get(F2IOpc
), F2IReg
).addReg(SrcReg
).addImm(FRM
);
16853 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16854 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16856 // Convert back to FP.
16857 Register I2FReg
= MRI
.createVirtualRegister(RC
);
16858 MIB
= BuildMI(CvtMBB
, DL
, TII
.get(I2FOpc
), I2FReg
).addReg(F2IReg
).addImm(FRM
);
16859 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16860 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16862 // Restore the sign bit.
16863 Register CvtReg
= MRI
.createVirtualRegister(RC
);
16864 BuildMI(CvtMBB
, DL
, TII
.get(FSGNJOpc
), CvtReg
).addReg(I2FReg
).addReg(SrcReg
);
16866 // Merge the results.
16867 BuildMI(*DoneMBB
, DoneMBB
->begin(), DL
, TII
.get(RISCV::PHI
), DstReg
)
16873 MI
.eraseFromParent();
16877 MachineBasicBlock
*
16878 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
16879 MachineBasicBlock
*BB
) const {
16880 switch (MI
.getOpcode()) {
16882 llvm_unreachable("Unexpected instr type to insert");
16883 case RISCV::ReadCycleWide
:
16884 assert(!Subtarget
.is64Bit() &&
16885 "ReadCycleWrite is only to be used on riscv32");
16886 return emitReadCycleWidePseudo(MI
, BB
);
16887 case RISCV::Select_GPR_Using_CC_GPR
:
16888 case RISCV::Select_FPR16_Using_CC_GPR
:
16889 case RISCV::Select_FPR16INX_Using_CC_GPR
:
16890 case RISCV::Select_FPR32_Using_CC_GPR
:
16891 case RISCV::Select_FPR32INX_Using_CC_GPR
:
16892 case RISCV::Select_FPR64_Using_CC_GPR
:
16893 case RISCV::Select_FPR64INX_Using_CC_GPR
:
16894 case RISCV::Select_FPR64IN32X_Using_CC_GPR
:
16895 return emitSelectPseudo(MI
, BB
, Subtarget
);
16896 case RISCV::BuildPairF64Pseudo
:
16897 case RISCV::BuildPairF64Pseudo_INX
:
16898 return emitBuildPairF64Pseudo(MI
, BB
, Subtarget
);
16899 case RISCV::SplitF64Pseudo
:
16900 case RISCV::SplitF64Pseudo_INX
:
16901 return emitSplitF64Pseudo(MI
, BB
, Subtarget
);
16902 case RISCV::PseudoQuietFLE_H
:
16903 return emitQuietFCMP(MI
, BB
, RISCV::FLE_H
, RISCV::FEQ_H
, Subtarget
);
16904 case RISCV::PseudoQuietFLE_H_INX
:
16905 return emitQuietFCMP(MI
, BB
, RISCV::FLE_H_INX
, RISCV::FEQ_H_INX
, Subtarget
);
16906 case RISCV::PseudoQuietFLT_H
:
16907 return emitQuietFCMP(MI
, BB
, RISCV::FLT_H
, RISCV::FEQ_H
, Subtarget
);
16908 case RISCV::PseudoQuietFLT_H_INX
:
16909 return emitQuietFCMP(MI
, BB
, RISCV::FLT_H_INX
, RISCV::FEQ_H_INX
, Subtarget
);
16910 case RISCV::PseudoQuietFLE_S
:
16911 return emitQuietFCMP(MI
, BB
, RISCV::FLE_S
, RISCV::FEQ_S
, Subtarget
);
16912 case RISCV::PseudoQuietFLE_S_INX
:
16913 return emitQuietFCMP(MI
, BB
, RISCV::FLE_S_INX
, RISCV::FEQ_S_INX
, Subtarget
);
16914 case RISCV::PseudoQuietFLT_S
:
16915 return emitQuietFCMP(MI
, BB
, RISCV::FLT_S
, RISCV::FEQ_S
, Subtarget
);
16916 case RISCV::PseudoQuietFLT_S_INX
:
16917 return emitQuietFCMP(MI
, BB
, RISCV::FLT_S_INX
, RISCV::FEQ_S_INX
, Subtarget
);
16918 case RISCV::PseudoQuietFLE_D
:
16919 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D
, RISCV::FEQ_D
, Subtarget
);
16920 case RISCV::PseudoQuietFLE_D_INX
:
16921 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D_INX
, RISCV::FEQ_D_INX
, Subtarget
);
16922 case RISCV::PseudoQuietFLE_D_IN32X
:
16923 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D_IN32X
, RISCV::FEQ_D_IN32X
,
16925 case RISCV::PseudoQuietFLT_D
:
16926 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D
, RISCV::FEQ_D
, Subtarget
);
16927 case RISCV::PseudoQuietFLT_D_INX
:
16928 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D_INX
, RISCV::FEQ_D_INX
, Subtarget
);
16929 case RISCV::PseudoQuietFLT_D_IN32X
:
16930 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D_IN32X
, RISCV::FEQ_D_IN32X
,
16933 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK
:
16934 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M1_MASK
,
16935 RISCV::PseudoVFCVT_F_X_V_M1_MASK
);
16936 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK
:
16937 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M2_MASK
,
16938 RISCV::PseudoVFCVT_F_X_V_M2_MASK
);
16939 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK
:
16940 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M4_MASK
,
16941 RISCV::PseudoVFCVT_F_X_V_M4_MASK
);
16942 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK
:
16943 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M8_MASK
,
16944 RISCV::PseudoVFCVT_F_X_V_M8_MASK
);
16945 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK
:
16946 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_MF2_MASK
,
16947 RISCV::PseudoVFCVT_F_X_V_MF2_MASK
);
16948 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK
:
16949 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_MF4_MASK
,
16950 RISCV::PseudoVFCVT_F_X_V_MF4_MASK
);
16951 case RISCV::PseudoFROUND_H
:
16952 case RISCV::PseudoFROUND_H_INX
:
16953 case RISCV::PseudoFROUND_S
:
16954 case RISCV::PseudoFROUND_S_INX
:
16955 case RISCV::PseudoFROUND_D
:
16956 case RISCV::PseudoFROUND_D_INX
:
16957 case RISCV::PseudoFROUND_D_IN32X
:
16958 return emitFROUND(MI
, BB
, Subtarget
);
16959 case TargetOpcode::STATEPOINT
:
16960 case TargetOpcode::STACKMAP
:
16961 case TargetOpcode::PATCHPOINT
:
16962 if (!Subtarget
.is64Bit())
16963 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
16964 "supported on 64-bit targets");
16965 return emitPatchPoint(MI
, BB
);
16969 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr
&MI
,
16970 SDNode
*Node
) const {
16971 // Add FRM dependency to any instructions with dynamic rounding mode.
16972 int Idx
= RISCV::getNamedOperandIdx(MI
.getOpcode(), RISCV::OpName::frm
);
16974 // Vector pseudos have FRM index indicated by TSFlags.
16975 Idx
= RISCVII::getFRMOpNum(MI
.getDesc());
16979 if (MI
.getOperand(Idx
).getImm() != RISCVFPRndMode::DYN
)
16981 // If the instruction already reads FRM, don't add another read.
16982 if (MI
.readsRegister(RISCV::FRM
))
16985 MachineOperand::CreateReg(RISCV::FRM
, /*isDef*/ false, /*isImp*/ true));
16988 // Calling Convention Implementation.
16989 // The expectations for frontend ABI lowering vary from target to target.
16990 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
16991 // details, but this is a longer term goal. For now, we simply try to keep the
16992 // role of the frontend as simple and well-defined as possible. The rules can
16993 // be summarised as:
16994 // * Never split up large scalar arguments. We handle them here.
16995 // * If a hardfloat calling convention is being used, and the struct may be
16996 // passed in a pair of registers (fp+fp, int+fp), and both registers are
16997 // available, then pass as two separate arguments. If either the GPRs or FPRs
16998 // are exhausted, then pass according to the rule below.
16999 // * If a struct could never be passed in registers or directly in a stack
17000 // slot (as it is larger than 2*XLEN and the floating point rules don't
17001 // apply), then pass it using a pointer with the byval attribute.
17002 // * If a struct is less than 2*XLEN, then coerce to either a two-element
17003 // word-sized array or a 2*XLEN scalar (depending on alignment).
17004 // * The frontend can determine whether a struct is returned by reference or
17005 // not based on its size and fields. If it will be returned by reference, the
17006 // frontend must modify the prototype so a pointer with the sret annotation is
17007 // passed as the first argument. This is not necessary for large scalar
17009 // * Struct return values and varargs should be coerced to structs containing
17010 // register-size fields in the same situations they would be for fixed
17013 static const MCPhysReg ArgFPR16s
[] = {
17014 RISCV::F10_H
, RISCV::F11_H
, RISCV::F12_H
, RISCV::F13_H
,
17015 RISCV::F14_H
, RISCV::F15_H
, RISCV::F16_H
, RISCV::F17_H
17017 static const MCPhysReg ArgFPR32s
[] = {
17018 RISCV::F10_F
, RISCV::F11_F
, RISCV::F12_F
, RISCV::F13_F
,
17019 RISCV::F14_F
, RISCV::F15_F
, RISCV::F16_F
, RISCV::F17_F
17021 static const MCPhysReg ArgFPR64s
[] = {
17022 RISCV::F10_D
, RISCV::F11_D
, RISCV::F12_D
, RISCV::F13_D
,
17023 RISCV::F14_D
, RISCV::F15_D
, RISCV::F16_D
, RISCV::F17_D
17025 // This is an interim calling convention and it may be changed in the future.
17026 static const MCPhysReg ArgVRs
[] = {
17027 RISCV::V8
, RISCV::V9
, RISCV::V10
, RISCV::V11
, RISCV::V12
, RISCV::V13
,
17028 RISCV::V14
, RISCV::V15
, RISCV::V16
, RISCV::V17
, RISCV::V18
, RISCV::V19
,
17029 RISCV::V20
, RISCV::V21
, RISCV::V22
, RISCV::V23
};
17030 static const MCPhysReg ArgVRM2s
[] = {RISCV::V8M2
, RISCV::V10M2
, RISCV::V12M2
,
17031 RISCV::V14M2
, RISCV::V16M2
, RISCV::V18M2
,
17032 RISCV::V20M2
, RISCV::V22M2
};
17033 static const MCPhysReg ArgVRM4s
[] = {RISCV::V8M4
, RISCV::V12M4
, RISCV::V16M4
,
17035 static const MCPhysReg ArgVRM8s
[] = {RISCV::V8M8
, RISCV::V16M8
};
17037 ArrayRef
<MCPhysReg
> RISCV::getArgGPRs() {
17038 static const MCPhysReg ArgGPRs
[] = {RISCV::X10
, RISCV::X11
, RISCV::X12
,
17039 RISCV::X13
, RISCV::X14
, RISCV::X15
,
17040 RISCV::X16
, RISCV::X17
};
17042 return ArrayRef(ArgGPRs
);
17045 // Pass a 2*XLEN argument that has been split into two XLEN values through
17046 // registers or the stack as necessary.
17047 static bool CC_RISCVAssign2XLen(unsigned XLen
, CCState
&State
, CCValAssign VA1
,
17048 ISD::ArgFlagsTy ArgFlags1
, unsigned ValNo2
,
17049 MVT ValVT2
, MVT LocVT2
,
17050 ISD::ArgFlagsTy ArgFlags2
) {
17051 unsigned XLenInBytes
= XLen
/ 8;
17052 ArrayRef
<MCPhysReg
> ArgGPRs
= RISCV::getArgGPRs();
17053 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
17054 // At least one half can be passed via register.
17055 State
.addLoc(CCValAssign::getReg(VA1
.getValNo(), VA1
.getValVT(), Reg
,
17056 VA1
.getLocVT(), CCValAssign::Full
));
17058 // Both halves must be passed on the stack, with proper alignment.
17060 std::max(Align(XLenInBytes
), ArgFlags1
.getNonZeroOrigAlign());
17062 CCValAssign::getMem(VA1
.getValNo(), VA1
.getValVT(),
17063 State
.AllocateStack(XLenInBytes
, StackAlign
),
17064 VA1
.getLocVT(), CCValAssign::Full
));
17065 State
.addLoc(CCValAssign::getMem(
17066 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, Align(XLenInBytes
)),
17067 LocVT2
, CCValAssign::Full
));
17071 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
17072 // The second half can also be passed via register.
17074 CCValAssign::getReg(ValNo2
, ValVT2
, Reg
, LocVT2
, CCValAssign::Full
));
17076 // The second half is passed via the stack, without additional alignment.
17077 State
.addLoc(CCValAssign::getMem(
17078 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, Align(XLenInBytes
)),
17079 LocVT2
, CCValAssign::Full
));
17085 static unsigned allocateRVVReg(MVT ValVT
, unsigned ValNo
,
17086 std::optional
<unsigned> FirstMaskArgument
,
17087 CCState
&State
, const RISCVTargetLowering
&TLI
) {
17088 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(ValVT
);
17089 if (RC
== &RISCV::VRRegClass
) {
17090 // Assign the first mask argument to V0.
17091 // This is an interim calling convention and it may be changed in the
17093 if (FirstMaskArgument
&& ValNo
== *FirstMaskArgument
)
17094 return State
.AllocateReg(RISCV::V0
);
17095 return State
.AllocateReg(ArgVRs
);
17097 if (RC
== &RISCV::VRM2RegClass
)
17098 return State
.AllocateReg(ArgVRM2s
);
17099 if (RC
== &RISCV::VRM4RegClass
)
17100 return State
.AllocateReg(ArgVRM4s
);
17101 if (RC
== &RISCV::VRM8RegClass
)
17102 return State
.AllocateReg(ArgVRM8s
);
17103 llvm_unreachable("Unhandled register class for ValueType");
17106 // Implements the RISC-V calling convention. Returns true upon failure.
17107 bool RISCV::CC_RISCV(const DataLayout
&DL
, RISCVABI::ABI ABI
, unsigned ValNo
,
17108 MVT ValVT
, MVT LocVT
, CCValAssign::LocInfo LocInfo
,
17109 ISD::ArgFlagsTy ArgFlags
, CCState
&State
, bool IsFixed
,
17110 bool IsRet
, Type
*OrigTy
, const RISCVTargetLowering
&TLI
,
17111 std::optional
<unsigned> FirstMaskArgument
) {
17112 unsigned XLen
= DL
.getLargestLegalIntTypeSizeInBits();
17113 assert(XLen
== 32 || XLen
== 64);
17114 MVT XLenVT
= XLen
== 32 ? MVT::i32
: MVT::i64
;
17116 // Static chain parameter must not be passed in normal argument registers,
17117 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
17118 if (ArgFlags
.isNest()) {
17119 if (unsigned Reg
= State
.AllocateReg(RISCV::X7
)) {
17120 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17125 // Any return value split in to more than two values can't be returned
17126 // directly. Vectors are returned via the available vector registers.
17127 if (!LocVT
.isVector() && IsRet
&& ValNo
> 1)
17130 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
17131 // variadic argument, or if no F16/F32 argument registers are available.
17132 bool UseGPRForF16_F32
= true;
17133 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
17134 // variadic argument, or if no F64 argument registers are available.
17135 bool UseGPRForF64
= true;
17139 llvm_unreachable("Unexpected ABI");
17140 case RISCVABI::ABI_ILP32
:
17141 case RISCVABI::ABI_LP64
:
17143 case RISCVABI::ABI_ILP32F
:
17144 case RISCVABI::ABI_LP64F
:
17145 UseGPRForF16_F32
= !IsFixed
;
17147 case RISCVABI::ABI_ILP32D
:
17148 case RISCVABI::ABI_LP64D
:
17149 UseGPRForF16_F32
= !IsFixed
;
17150 UseGPRForF64
= !IsFixed
;
17154 // FPR16, FPR32, and FPR64 alias each other.
17155 if (State
.getFirstUnallocated(ArgFPR32s
) == std::size(ArgFPR32s
)) {
17156 UseGPRForF16_F32
= true;
17157 UseGPRForF64
= true;
17160 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
17161 // similar local variables rather than directly checking against the target
17164 if (UseGPRForF16_F32
&&
17165 (ValVT
== MVT::f16
|| ValVT
== MVT::bf16
|| ValVT
== MVT::f32
)) {
17167 LocInfo
= CCValAssign::BCvt
;
17168 } else if (UseGPRForF64
&& XLen
== 64 && ValVT
== MVT::f64
) {
17170 LocInfo
= CCValAssign::BCvt
;
17173 ArrayRef
<MCPhysReg
> ArgGPRs
= RISCV::getArgGPRs();
17175 // If this is a variadic argument, the RISC-V calling convention requires
17176 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
17177 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
17178 // be used regardless of whether the original argument was split during
17179 // legalisation or not. The argument will not be passed by registers if the
17180 // original type is larger than 2*XLEN, so the register alignment rule does
17182 unsigned TwoXLenInBytes
= (2 * XLen
) / 8;
17183 if (!IsFixed
&& ArgFlags
.getNonZeroOrigAlign() == TwoXLenInBytes
&&
17184 DL
.getTypeAllocSize(OrigTy
) == TwoXLenInBytes
) {
17185 unsigned RegIdx
= State
.getFirstUnallocated(ArgGPRs
);
17186 // Skip 'odd' register if necessary.
17187 if (RegIdx
!= std::size(ArgGPRs
) && RegIdx
% 2 == 1)
17188 State
.AllocateReg(ArgGPRs
);
17191 SmallVectorImpl
<CCValAssign
> &PendingLocs
= State
.getPendingLocs();
17192 SmallVectorImpl
<ISD::ArgFlagsTy
> &PendingArgFlags
=
17193 State
.getPendingArgFlags();
17195 assert(PendingLocs
.size() == PendingArgFlags
.size() &&
17196 "PendingLocs and PendingArgFlags out of sync");
17198 // Handle passing f64 on RV32D with a soft float ABI or when floating point
17199 // registers are exhausted.
17200 if (UseGPRForF64
&& XLen
== 32 && ValVT
== MVT::f64
) {
17201 assert(PendingLocs
.empty() && "Can't lower f64 if it is split");
17202 // Depending on available argument GPRS, f64 may be passed in a pair of
17203 // GPRs, split between a GPR and the stack, or passed completely on the
17204 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
17206 Register Reg
= State
.AllocateReg(ArgGPRs
);
17208 unsigned StackOffset
= State
.AllocateStack(8, Align(8));
17210 CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
17214 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17215 Register HiReg
= State
.AllocateReg(ArgGPRs
);
17218 CCValAssign::getCustomReg(ValNo
, ValVT
, HiReg
, LocVT
, LocInfo
));
17220 unsigned StackOffset
= State
.AllocateStack(4, Align(4));
17222 CCValAssign::getCustomMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
17227 // Fixed-length vectors are located in the corresponding scalable-vector
17228 // container types.
17229 if (ValVT
.isFixedLengthVector())
17230 LocVT
= TLI
.getContainerForFixedLengthVector(LocVT
);
17232 // Split arguments might be passed indirectly, so keep track of the pending
17233 // values. Split vectors are passed via a mix of registers and indirectly, so
17234 // treat them as we would any other argument.
17235 if (ValVT
.isScalarInteger() && (ArgFlags
.isSplit() || !PendingLocs
.empty())) {
17237 LocInfo
= CCValAssign::Indirect
;
17238 PendingLocs
.push_back(
17239 CCValAssign::getPending(ValNo
, ValVT
, LocVT
, LocInfo
));
17240 PendingArgFlags
.push_back(ArgFlags
);
17241 if (!ArgFlags
.isSplitEnd()) {
17246 // If the split argument only had two elements, it should be passed directly
17247 // in registers or on the stack.
17248 if (ValVT
.isScalarInteger() && ArgFlags
.isSplitEnd() &&
17249 PendingLocs
.size() <= 2) {
17250 assert(PendingLocs
.size() == 2 && "Unexpected PendingLocs.size()");
17251 // Apply the normal calling convention rules to the first half of the
17253 CCValAssign VA
= PendingLocs
[0];
17254 ISD::ArgFlagsTy AF
= PendingArgFlags
[0];
17255 PendingLocs
.clear();
17256 PendingArgFlags
.clear();
17257 return CC_RISCVAssign2XLen(XLen
, State
, VA
, AF
, ValNo
, ValVT
, LocVT
,
17261 // Allocate to a register if possible, or else a stack slot.
17263 unsigned StoreSizeBytes
= XLen
/ 8;
17264 Align StackAlign
= Align(XLen
/ 8);
17266 if ((ValVT
== MVT::f16
|| ValVT
== MVT::bf16
) && !UseGPRForF16_F32
)
17267 Reg
= State
.AllocateReg(ArgFPR16s
);
17268 else if (ValVT
== MVT::f32
&& !UseGPRForF16_F32
)
17269 Reg
= State
.AllocateReg(ArgFPR32s
);
17270 else if (ValVT
== MVT::f64
&& !UseGPRForF64
)
17271 Reg
= State
.AllocateReg(ArgFPR64s
);
17272 else if (ValVT
.isVector()) {
17273 Reg
= allocateRVVReg(ValVT
, ValNo
, FirstMaskArgument
, State
, TLI
);
17275 // For return values, the vector must be passed fully via registers or
17277 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
17278 // but we're using all of them.
17281 // Try using a GPR to pass the address
17282 if ((Reg
= State
.AllocateReg(ArgGPRs
))) {
17284 LocInfo
= CCValAssign::Indirect
;
17285 } else if (ValVT
.isScalableVector()) {
17287 LocInfo
= CCValAssign::Indirect
;
17289 // Pass fixed-length vectors on the stack.
17291 StoreSizeBytes
= ValVT
.getStoreSize();
17292 // Align vectors to their element sizes, being careful for vXi1
17294 StackAlign
= MaybeAlign(ValVT
.getScalarSizeInBits() / 8).valueOrOne();
17298 Reg
= State
.AllocateReg(ArgGPRs
);
17301 unsigned StackOffset
=
17302 Reg
? 0 : State
.AllocateStack(StoreSizeBytes
, StackAlign
);
17304 // If we reach this point and PendingLocs is non-empty, we must be at the
17305 // end of a split argument that must be passed indirectly.
17306 if (!PendingLocs
.empty()) {
17307 assert(ArgFlags
.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
17308 assert(PendingLocs
.size() > 2 && "Unexpected PendingLocs.size()");
17310 for (auto &It
: PendingLocs
) {
17312 It
.convertToReg(Reg
);
17314 It
.convertToMem(StackOffset
);
17317 PendingLocs
.clear();
17318 PendingArgFlags
.clear();
17322 assert((!UseGPRForF16_F32
|| !UseGPRForF64
|| LocVT
== XLenVT
||
17323 (TLI
.getSubtarget().hasVInstructions() && ValVT
.isVector())) &&
17324 "Expected an XLenVT or vector types at this stage");
17327 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17331 // When a scalar floating-point value is passed on the stack, no
17332 // bit-conversion is needed.
17333 if (ValVT
.isFloatingPoint() && LocInfo
!= CCValAssign::Indirect
) {
17334 assert(!ValVT
.isVector());
17336 LocInfo
= CCValAssign::Full
;
17338 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
17342 template <typename ArgTy
>
17343 static std::optional
<unsigned> preAssignMask(const ArgTy
&Args
) {
17344 for (const auto &ArgIdx
: enumerate(Args
)) {
17345 MVT ArgVT
= ArgIdx
.value().VT
;
17346 if (ArgVT
.isVector() && ArgVT
.getVectorElementType() == MVT::i1
)
17347 return ArgIdx
.index();
17349 return std::nullopt
;
17352 void RISCVTargetLowering::analyzeInputArgs(
17353 MachineFunction
&MF
, CCState
&CCInfo
,
17354 const SmallVectorImpl
<ISD::InputArg
> &Ins
, bool IsRet
,
17355 RISCVCCAssignFn Fn
) const {
17356 unsigned NumArgs
= Ins
.size();
17357 FunctionType
*FType
= MF
.getFunction().getFunctionType();
17359 std::optional
<unsigned> FirstMaskArgument
;
17360 if (Subtarget
.hasVInstructions())
17361 FirstMaskArgument
= preAssignMask(Ins
);
17363 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
17364 MVT ArgVT
= Ins
[i
].VT
;
17365 ISD::ArgFlagsTy ArgFlags
= Ins
[i
].Flags
;
17367 Type
*ArgTy
= nullptr;
17369 ArgTy
= FType
->getReturnType();
17370 else if (Ins
[i
].isOrigArg())
17371 ArgTy
= FType
->getParamType(Ins
[i
].getOrigArgIndex());
17373 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
17374 if (Fn(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
17375 ArgFlags
, CCInfo
, /*IsFixed=*/true, IsRet
, ArgTy
, *this,
17376 FirstMaskArgument
)) {
17377 LLVM_DEBUG(dbgs() << "InputArg #" << i
<< " has unhandled type "
17379 llvm_unreachable(nullptr);
17384 void RISCVTargetLowering::analyzeOutputArgs(
17385 MachineFunction
&MF
, CCState
&CCInfo
,
17386 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, bool IsRet
,
17387 CallLoweringInfo
*CLI
, RISCVCCAssignFn Fn
) const {
17388 unsigned NumArgs
= Outs
.size();
17390 std::optional
<unsigned> FirstMaskArgument
;
17391 if (Subtarget
.hasVInstructions())
17392 FirstMaskArgument
= preAssignMask(Outs
);
17394 for (unsigned i
= 0; i
!= NumArgs
; i
++) {
17395 MVT ArgVT
= Outs
[i
].VT
;
17396 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
17397 Type
*OrigTy
= CLI
? CLI
->getArgs()[Outs
[i
].OrigArgIndex
].Ty
: nullptr;
17399 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
17400 if (Fn(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
17401 ArgFlags
, CCInfo
, Outs
[i
].IsFixed
, IsRet
, OrigTy
, *this,
17402 FirstMaskArgument
)) {
17403 LLVM_DEBUG(dbgs() << "OutputArg #" << i
<< " has unhandled type "
17405 llvm_unreachable(nullptr);
17410 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
17412 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, SDValue Val
,
17413 const CCValAssign
&VA
, const SDLoc
&DL
,
17414 const RISCVSubtarget
&Subtarget
) {
17415 switch (VA
.getLocInfo()) {
17417 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17418 case CCValAssign::Full
:
17419 if (VA
.getValVT().isFixedLengthVector() && VA
.getLocVT().isScalableVector())
17420 Val
= convertFromScalableVector(VA
.getValVT(), Val
, DAG
, Subtarget
);
17422 case CCValAssign::BCvt
:
17423 if (VA
.getLocVT().isInteger() &&
17424 (VA
.getValVT() == MVT::f16
|| VA
.getValVT() == MVT::bf16
)) {
17425 Val
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, VA
.getValVT(), Val
);
17426 } else if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
) {
17427 if (RV64LegalI32
) {
17428 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Val
);
17429 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f32
, Val
);
17431 Val
= DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, Val
);
17434 Val
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Val
);
17441 // The caller is responsible for loading the full value if the argument is
17442 // passed with CCValAssign::Indirect.
17443 static SDValue
unpackFromRegLoc(SelectionDAG
&DAG
, SDValue Chain
,
17444 const CCValAssign
&VA
, const SDLoc
&DL
,
17445 const ISD::InputArg
&In
,
17446 const RISCVTargetLowering
&TLI
) {
17447 MachineFunction
&MF
= DAG
.getMachineFunction();
17448 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
17449 EVT LocVT
= VA
.getLocVT();
17451 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(LocVT
.getSimpleVT());
17452 Register VReg
= RegInfo
.createVirtualRegister(RC
);
17453 RegInfo
.addLiveIn(VA
.getLocReg(), VReg
);
17454 Val
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
17456 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
17457 if (In
.isOrigArg()) {
17458 Argument
*OrigArg
= MF
.getFunction().getArg(In
.getOrigArgIndex());
17459 if (OrigArg
->getType()->isIntegerTy()) {
17460 unsigned BitWidth
= OrigArg
->getType()->getIntegerBitWidth();
17461 // An input zero extended from i31 can also be considered sign extended.
17462 if ((BitWidth
<= 32 && In
.Flags
.isSExt()) ||
17463 (BitWidth
< 32 && In
.Flags
.isZExt())) {
17464 RISCVMachineFunctionInfo
*RVFI
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
17465 RVFI
->addSExt32Register(VReg
);
17470 if (VA
.getLocInfo() == CCValAssign::Indirect
)
17473 return convertLocVTToValVT(DAG
, Val
, VA
, DL
, TLI
.getSubtarget());
17476 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, SDValue Val
,
17477 const CCValAssign
&VA
, const SDLoc
&DL
,
17478 const RISCVSubtarget
&Subtarget
) {
17479 EVT LocVT
= VA
.getLocVT();
17481 switch (VA
.getLocInfo()) {
17483 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17484 case CCValAssign::Full
:
17485 if (VA
.getValVT().isFixedLengthVector() && LocVT
.isScalableVector())
17486 Val
= convertToScalableVector(LocVT
, Val
, DAG
, Subtarget
);
17488 case CCValAssign::BCvt
:
17489 if (LocVT
.isInteger() &&
17490 (VA
.getValVT() == MVT::f16
|| VA
.getValVT() == MVT::bf16
)) {
17491 Val
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, LocVT
, Val
);
17492 } else if (LocVT
== MVT::i64
&& VA
.getValVT() == MVT::f32
) {
17493 if (RV64LegalI32
) {
17494 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i32
, Val
);
17495 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Val
);
17497 Val
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Val
);
17500 Val
= DAG
.getNode(ISD::BITCAST
, DL
, LocVT
, Val
);
17507 // The caller is responsible for loading the full value if the argument is
17508 // passed with CCValAssign::Indirect.
17509 static SDValue
unpackFromMemLoc(SelectionDAG
&DAG
, SDValue Chain
,
17510 const CCValAssign
&VA
, const SDLoc
&DL
) {
17511 MachineFunction
&MF
= DAG
.getMachineFunction();
17512 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17513 EVT LocVT
= VA
.getLocVT();
17514 EVT ValVT
= VA
.getValVT();
17515 EVT PtrVT
= MVT::getIntegerVT(DAG
.getDataLayout().getPointerSizeInBits(0));
17516 if (ValVT
.isScalableVector()) {
17517 // When the value is a scalable vector, we save the pointer which points to
17518 // the scalable vector value in the stack. The ValVT will be the pointer
17519 // type, instead of the scalable vector type.
17522 int FI
= MFI
.CreateFixedObject(ValVT
.getStoreSize(), VA
.getLocMemOffset(),
17523 /*IsImmutable=*/true);
17524 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
17527 ISD::LoadExtType ExtType
;
17528 switch (VA
.getLocInfo()) {
17530 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17531 case CCValAssign::Full
:
17532 case CCValAssign::Indirect
:
17533 case CCValAssign::BCvt
:
17534 ExtType
= ISD::NON_EXTLOAD
;
17537 Val
= DAG
.getExtLoad(
17538 ExtType
, DL
, LocVT
, Chain
, FIN
,
17539 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
), ValVT
);
17543 static SDValue
unpackF64OnRV32DSoftABI(SelectionDAG
&DAG
, SDValue Chain
,
17544 const CCValAssign
&VA
,
17545 const CCValAssign
&HiVA
,
17547 assert(VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
&&
17549 MachineFunction
&MF
= DAG
.getMachineFunction();
17550 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17551 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
17553 assert(VA
.isRegLoc() && "Expected register VA assignment");
17555 Register LoVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
17556 RegInfo
.addLiveIn(VA
.getLocReg(), LoVReg
);
17557 SDValue Lo
= DAG
.getCopyFromReg(Chain
, DL
, LoVReg
, MVT::i32
);
17559 if (HiVA
.isMemLoc()) {
17560 // Second half of f64 is passed on the stack.
17561 int FI
= MFI
.CreateFixedObject(4, HiVA
.getLocMemOffset(),
17562 /*IsImmutable=*/true);
17563 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
17564 Hi
= DAG
.getLoad(MVT::i32
, DL
, Chain
, FIN
,
17565 MachinePointerInfo::getFixedStack(MF
, FI
));
17567 // Second half of f64 is passed in another GPR.
17568 Register HiVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
17569 RegInfo
.addLiveIn(HiVA
.getLocReg(), HiVReg
);
17570 Hi
= DAG
.getCopyFromReg(Chain
, DL
, HiVReg
, MVT::i32
);
17572 return DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
17575 // FastCC has less than 1% performance improvement for some particular
17576 // benchmark. But theoretically, it may has benenfit for some cases.
17577 bool RISCV::CC_RISCV_FastCC(const DataLayout
&DL
, RISCVABI::ABI ABI
,
17578 unsigned ValNo
, MVT ValVT
, MVT LocVT
,
17579 CCValAssign::LocInfo LocInfo
,
17580 ISD::ArgFlagsTy ArgFlags
, CCState
&State
,
17581 bool IsFixed
, bool IsRet
, Type
*OrigTy
,
17582 const RISCVTargetLowering
&TLI
,
17583 std::optional
<unsigned> FirstMaskArgument
) {
17585 // X5 and X6 might be used for save-restore libcall.
17586 static const MCPhysReg GPRList
[] = {
17587 RISCV::X10
, RISCV::X11
, RISCV::X12
, RISCV::X13
, RISCV::X14
,
17588 RISCV::X15
, RISCV::X16
, RISCV::X17
, RISCV::X7
, RISCV::X28
,
17589 RISCV::X29
, RISCV::X30
, RISCV::X31
};
17591 if (LocVT
== MVT::i32
|| LocVT
== MVT::i64
) {
17592 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
17593 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17598 const RISCVSubtarget
&Subtarget
= TLI
.getSubtarget();
17600 if (LocVT
== MVT::f16
&&
17601 (Subtarget
.hasStdExtZfh() || Subtarget
.hasStdExtZfhmin())) {
17602 static const MCPhysReg FPR16List
[] = {
17603 RISCV::F10_H
, RISCV::F11_H
, RISCV::F12_H
, RISCV::F13_H
, RISCV::F14_H
,
17604 RISCV::F15_H
, RISCV::F16_H
, RISCV::F17_H
, RISCV::F0_H
, RISCV::F1_H
,
17605 RISCV::F2_H
, RISCV::F3_H
, RISCV::F4_H
, RISCV::F5_H
, RISCV::F6_H
,
17606 RISCV::F7_H
, RISCV::F28_H
, RISCV::F29_H
, RISCV::F30_H
, RISCV::F31_H
};
17607 if (unsigned Reg
= State
.AllocateReg(FPR16List
)) {
17608 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17613 if (LocVT
== MVT::f32
&& Subtarget
.hasStdExtF()) {
17614 static const MCPhysReg FPR32List
[] = {
17615 RISCV::F10_F
, RISCV::F11_F
, RISCV::F12_F
, RISCV::F13_F
, RISCV::F14_F
,
17616 RISCV::F15_F
, RISCV::F16_F
, RISCV::F17_F
, RISCV::F0_F
, RISCV::F1_F
,
17617 RISCV::F2_F
, RISCV::F3_F
, RISCV::F4_F
, RISCV::F5_F
, RISCV::F6_F
,
17618 RISCV::F7_F
, RISCV::F28_F
, RISCV::F29_F
, RISCV::F30_F
, RISCV::F31_F
};
17619 if (unsigned Reg
= State
.AllocateReg(FPR32List
)) {
17620 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17625 if (LocVT
== MVT::f64
&& Subtarget
.hasStdExtD()) {
17626 static const MCPhysReg FPR64List
[] = {
17627 RISCV::F10_D
, RISCV::F11_D
, RISCV::F12_D
, RISCV::F13_D
, RISCV::F14_D
,
17628 RISCV::F15_D
, RISCV::F16_D
, RISCV::F17_D
, RISCV::F0_D
, RISCV::F1_D
,
17629 RISCV::F2_D
, RISCV::F3_D
, RISCV::F4_D
, RISCV::F5_D
, RISCV::F6_D
,
17630 RISCV::F7_D
, RISCV::F28_D
, RISCV::F29_D
, RISCV::F30_D
, RISCV::F31_D
};
17631 if (unsigned Reg
= State
.AllocateReg(FPR64List
)) {
17632 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17637 // Check if there is an available GPR before hitting the stack.
17638 if ((LocVT
== MVT::f16
&&
17639 (Subtarget
.hasStdExtZhinx() || Subtarget
.hasStdExtZhinxmin())) ||
17640 (LocVT
== MVT::f32
&& Subtarget
.hasStdExtZfinx()) ||
17641 (LocVT
== MVT::f64
&& Subtarget
.is64Bit() &&
17642 Subtarget
.hasStdExtZdinx())) {
17643 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
17644 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17649 if (LocVT
== MVT::f16
) {
17650 unsigned Offset2
= State
.AllocateStack(2, Align(2));
17651 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset2
, LocVT
, LocInfo
));
17655 if (LocVT
== MVT::i32
|| LocVT
== MVT::f32
) {
17656 unsigned Offset4
= State
.AllocateStack(4, Align(4));
17657 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset4
, LocVT
, LocInfo
));
17661 if (LocVT
== MVT::i64
|| LocVT
== MVT::f64
) {
17662 unsigned Offset5
= State
.AllocateStack(8, Align(8));
17663 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset5
, LocVT
, LocInfo
));
17667 if (LocVT
.isVector()) {
17669 allocateRVVReg(ValVT
, ValNo
, FirstMaskArgument
, State
, TLI
)) {
17670 // Fixed-length vectors are located in the corresponding scalable-vector
17671 // container types.
17672 if (ValVT
.isFixedLengthVector())
17673 LocVT
= TLI
.getContainerForFixedLengthVector(LocVT
);
17674 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17676 // Try and pass the address via a "fast" GPR.
17677 if (unsigned GPRReg
= State
.AllocateReg(GPRList
)) {
17678 LocInfo
= CCValAssign::Indirect
;
17679 LocVT
= TLI
.getSubtarget().getXLenVT();
17680 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, GPRReg
, LocVT
, LocInfo
));
17681 } else if (ValVT
.isFixedLengthVector()) {
17683 MaybeAlign(ValVT
.getScalarSizeInBits() / 8).valueOrOne();
17684 unsigned StackOffset
=
17685 State
.AllocateStack(ValVT
.getStoreSize(), StackAlign
);
17687 CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
17689 // Can't pass scalable vectors on the stack.
17697 return true; // CC didn't match.
17700 bool RISCV::CC_RISCV_GHC(unsigned ValNo
, MVT ValVT
, MVT LocVT
,
17701 CCValAssign::LocInfo LocInfo
,
17702 ISD::ArgFlagsTy ArgFlags
, CCState
&State
) {
17703 if (ArgFlags
.isNest()) {
17704 report_fatal_error(
17705 "Attribute 'nest' is not supported in GHC calling convention");
17708 static const MCPhysReg GPRList
[] = {
17709 RISCV::X9
, RISCV::X18
, RISCV::X19
, RISCV::X20
, RISCV::X21
, RISCV::X22
,
17710 RISCV::X23
, RISCV::X24
, RISCV::X25
, RISCV::X26
, RISCV::X27
};
17712 if (LocVT
== MVT::i32
|| LocVT
== MVT::i64
) {
17713 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
17714 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
17715 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
17716 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17721 const RISCVSubtarget
&Subtarget
=
17722 State
.getMachineFunction().getSubtarget
<RISCVSubtarget
>();
17724 if (LocVT
== MVT::f32
&& Subtarget
.hasStdExtF()) {
17725 // Pass in STG registers: F1, ..., F6
17727 static const MCPhysReg FPR32List
[] = {RISCV::F8_F
, RISCV::F9_F
,
17728 RISCV::F18_F
, RISCV::F19_F
,
17729 RISCV::F20_F
, RISCV::F21_F
};
17730 if (unsigned Reg
= State
.AllocateReg(FPR32List
)) {
17731 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17736 if (LocVT
== MVT::f64
&& Subtarget
.hasStdExtD()) {
17737 // Pass in STG registers: D1, ..., D6
17739 static const MCPhysReg FPR64List
[] = {RISCV::F22_D
, RISCV::F23_D
,
17740 RISCV::F24_D
, RISCV::F25_D
,
17741 RISCV::F26_D
, RISCV::F27_D
};
17742 if (unsigned Reg
= State
.AllocateReg(FPR64List
)) {
17743 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17748 if ((LocVT
== MVT::f32
&& Subtarget
.hasStdExtZfinx()) ||
17749 (LocVT
== MVT::f64
&& Subtarget
.hasStdExtZdinx() &&
17750 Subtarget
.is64Bit())) {
17751 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
17752 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17757 report_fatal_error("No registers left in GHC calling convention");
17761 // Transform physical registers into virtual registers.
17762 SDValue
RISCVTargetLowering::LowerFormalArguments(
17763 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
17764 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
17765 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
17767 MachineFunction
&MF
= DAG
.getMachineFunction();
17769 switch (CallConv
) {
17771 report_fatal_error("Unsupported calling convention");
17772 case CallingConv::C
:
17773 case CallingConv::Fast
:
17774 case CallingConv::SPIR_KERNEL
:
17775 case CallingConv::GRAAL
:
17777 case CallingConv::GHC
:
17778 if (!Subtarget
.hasStdExtFOrZfinx() || !Subtarget
.hasStdExtDOrZdinx())
17779 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
17780 "(Zdinx/D) instruction set extensions");
17783 const Function
&Func
= MF
.getFunction();
17784 if (Func
.hasFnAttribute("interrupt")) {
17785 if (!Func
.arg_empty())
17786 report_fatal_error(
17787 "Functions with the interrupt attribute cannot have arguments!");
17790 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
17792 if (!(Kind
== "user" || Kind
== "supervisor" || Kind
== "machine"))
17793 report_fatal_error(
17794 "Function interrupt attribute argument not supported!");
17797 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
17798 MVT XLenVT
= Subtarget
.getXLenVT();
17799 unsigned XLenInBytes
= Subtarget
.getXLen() / 8;
17800 // Used with vargs to acumulate store chains.
17801 std::vector
<SDValue
> OutChains
;
17803 // Assign locations to all of the incoming arguments.
17804 SmallVector
<CCValAssign
, 16> ArgLocs
;
17805 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
17807 if (CallConv
== CallingConv::GHC
)
17808 CCInfo
.AnalyzeFormalArguments(Ins
, RISCV::CC_RISCV_GHC
);
17810 analyzeInputArgs(MF
, CCInfo
, Ins
, /*IsRet=*/false,
17811 CallConv
== CallingConv::Fast
? RISCV::CC_RISCV_FastCC
17812 : RISCV::CC_RISCV
);
17814 for (unsigned i
= 0, e
= ArgLocs
.size(), InsIdx
= 0; i
!= e
; ++i
, ++InsIdx
) {
17815 CCValAssign
&VA
= ArgLocs
[i
];
17817 // Passing f64 on RV32D with a soft float ABI must be handled as a special
17819 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
17820 assert(VA
.needsCustom());
17821 ArgValue
= unpackF64OnRV32DSoftABI(DAG
, Chain
, VA
, ArgLocs
[++i
], DL
);
17822 } else if (VA
.isRegLoc())
17823 ArgValue
= unpackFromRegLoc(DAG
, Chain
, VA
, DL
, Ins
[InsIdx
], *this);
17825 ArgValue
= unpackFromMemLoc(DAG
, Chain
, VA
, DL
);
17827 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
17828 // If the original argument was split and passed by reference (e.g. i128
17829 // on RV32), we need to load all parts of it here (using the same
17830 // address). Vectors may be partly split to registers and partly to the
17831 // stack, in which case the base address is partly offset and subsequent
17832 // stores are relative to that.
17833 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
17834 MachinePointerInfo()));
17835 unsigned ArgIndex
= Ins
[InsIdx
].OrigArgIndex
;
17836 unsigned ArgPartOffset
= Ins
[InsIdx
].PartOffset
;
17837 assert(VA
.getValVT().isVector() || ArgPartOffset
== 0);
17838 while (i
+ 1 != e
&& Ins
[InsIdx
+ 1].OrigArgIndex
== ArgIndex
) {
17839 CCValAssign
&PartVA
= ArgLocs
[i
+ 1];
17840 unsigned PartOffset
= Ins
[InsIdx
+ 1].PartOffset
- ArgPartOffset
;
17841 SDValue Offset
= DAG
.getIntPtrConstant(PartOffset
, DL
);
17842 if (PartVA
.getValVT().isScalableVector())
17843 Offset
= DAG
.getNode(ISD::VSCALE
, DL
, XLenVT
, Offset
);
17844 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
, Offset
);
17845 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
17846 MachinePointerInfo()));
17852 InVals
.push_back(ArgValue
);
17855 if (any_of(ArgLocs
,
17856 [](CCValAssign
&VA
) { return VA
.getLocVT().isScalableVector(); }))
17857 MF
.getInfo
<RISCVMachineFunctionInfo
>()->setIsVectorCall();
17860 ArrayRef
<MCPhysReg
> ArgRegs
= RISCV::getArgGPRs();
17861 unsigned Idx
= CCInfo
.getFirstUnallocated(ArgRegs
);
17862 const TargetRegisterClass
*RC
= &RISCV::GPRRegClass
;
17863 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17864 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
17865 RISCVMachineFunctionInfo
*RVFI
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
17867 // Size of the vararg save area. For now, the varargs save area is either
17868 // zero or large enough to hold a0-a7.
17869 int VarArgsSaveSize
= XLenInBytes
* (ArgRegs
.size() - Idx
);
17872 // If all registers are allocated, then all varargs must be passed on the
17873 // stack and we don't need to save any argregs.
17874 if (VarArgsSaveSize
== 0) {
17875 int VaArgOffset
= CCInfo
.getStackSize();
17876 FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
, true);
17878 int VaArgOffset
= -VarArgsSaveSize
;
17879 FI
= MFI
.CreateFixedObject(VarArgsSaveSize
, VaArgOffset
, true);
17881 // If saving an odd number of registers then create an extra stack slot to
17882 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
17883 // offsets to even-numbered registered remain 2*XLEN-aligned.
17885 MFI
.CreateFixedObject(
17886 XLenInBytes
, VaArgOffset
- static_cast<int>(XLenInBytes
), true);
17887 VarArgsSaveSize
+= XLenInBytes
;
17890 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
17892 // Copy the integer registers that may have been used for passing varargs
17893 // to the vararg save area.
17894 for (unsigned I
= Idx
; I
< ArgRegs
.size(); ++I
) {
17895 const Register Reg
= RegInfo
.createVirtualRegister(RC
);
17896 RegInfo
.addLiveIn(ArgRegs
[I
], Reg
);
17897 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, XLenVT
);
17898 SDValue Store
= DAG
.getStore(
17899 Chain
, DL
, ArgValue
, FIN
,
17900 MachinePointerInfo::getFixedStack(MF
, FI
, (I
- Idx
) * XLenInBytes
));
17901 OutChains
.push_back(Store
);
17903 DAG
.getMemBasePlusOffset(FIN
, TypeSize::getFixed(XLenInBytes
), DL
);
17907 // Record the frame index of the first variable argument
17908 // which is a value necessary to VASTART.
17909 RVFI
->setVarArgsFrameIndex(FI
);
17910 RVFI
->setVarArgsSaveSize(VarArgsSaveSize
);
17913 // All stores are grouped in one node to allow the matching between
17914 // the size of Ins and InVals. This only happens for vararg functions.
17915 if (!OutChains
.empty()) {
17916 OutChains
.push_back(Chain
);
17917 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
17923 /// isEligibleForTailCallOptimization - Check whether the call is eligible
17924 /// for tail call optimization.
17925 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
17926 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
17927 CCState
&CCInfo
, CallLoweringInfo
&CLI
, MachineFunction
&MF
,
17928 const SmallVector
<CCValAssign
, 16> &ArgLocs
) const {
17930 auto CalleeCC
= CLI
.CallConv
;
17931 auto &Outs
= CLI
.Outs
;
17932 auto &Caller
= MF
.getFunction();
17933 auto CallerCC
= Caller
.getCallingConv();
17935 // Exception-handling functions need a special set of instructions to
17936 // indicate a return to the hardware. Tail-calling another function would
17937 // probably break this.
17938 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
17939 // should be expanded as new function attributes are introduced.
17940 if (Caller
.hasFnAttribute("interrupt"))
17943 // Do not tail call opt if the stack is used to pass parameters.
17944 if (CCInfo
.getStackSize() != 0)
17947 // Do not tail call opt if any parameters need to be passed indirectly.
17948 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
17949 // passed indirectly. So the address of the value will be passed in a
17950 // register, or if not available, then the address is put on the stack. In
17951 // order to pass indirectly, space on the stack often needs to be allocated
17952 // in order to store the value. In this case the CCInfo.getNextStackOffset()
17953 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
17954 // are passed CCValAssign::Indirect.
17955 for (auto &VA
: ArgLocs
)
17956 if (VA
.getLocInfo() == CCValAssign::Indirect
)
17959 // Do not tail call opt if either caller or callee uses struct return
17961 auto IsCallerStructRet
= Caller
.hasStructRetAttr();
17962 auto IsCalleeStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
17963 if (IsCallerStructRet
|| IsCalleeStructRet
)
17966 // The callee has to preserve all registers the caller needs to preserve.
17967 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
17968 const uint32_t *CallerPreserved
= TRI
->getCallPreservedMask(MF
, CallerCC
);
17969 if (CalleeCC
!= CallerCC
) {
17970 const uint32_t *CalleePreserved
= TRI
->getCallPreservedMask(MF
, CalleeCC
);
17971 if (!TRI
->regmaskSubsetEqual(CallerPreserved
, CalleePreserved
))
17975 // Byval parameters hand the function a pointer directly into the stack area
17976 // we want to reuse during a tail call. Working around this *is* possible
17977 // but less efficient and uglier in LowerCall.
17978 for (auto &Arg
: Outs
)
17979 if (Arg
.Flags
.isByVal())
17985 static Align
getPrefTypeAlign(EVT VT
, SelectionDAG
&DAG
) {
17986 return DAG
.getDataLayout().getPrefTypeAlign(
17987 VT
.getTypeForEVT(*DAG
.getContext()));
17990 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
17991 // and output parameter nodes.
17992 SDValue
RISCVTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
17993 SmallVectorImpl
<SDValue
> &InVals
) const {
17994 SelectionDAG
&DAG
= CLI
.DAG
;
17995 SDLoc
&DL
= CLI
.DL
;
17996 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
17997 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
17998 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
17999 SDValue Chain
= CLI
.Chain
;
18000 SDValue Callee
= CLI
.Callee
;
18001 bool &IsTailCall
= CLI
.IsTailCall
;
18002 CallingConv::ID CallConv
= CLI
.CallConv
;
18003 bool IsVarArg
= CLI
.IsVarArg
;
18004 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
18005 MVT XLenVT
= Subtarget
.getXLenVT();
18007 MachineFunction
&MF
= DAG
.getMachineFunction();
18009 // Analyze the operands of the call, assigning locations to each operand.
18010 SmallVector
<CCValAssign
, 16> ArgLocs
;
18011 CCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
18013 if (CallConv
== CallingConv::GHC
)
18014 ArgCCInfo
.AnalyzeCallOperands(Outs
, RISCV::CC_RISCV_GHC
);
18016 analyzeOutputArgs(MF
, ArgCCInfo
, Outs
, /*IsRet=*/false, &CLI
,
18017 CallConv
== CallingConv::Fast
? RISCV::CC_RISCV_FastCC
18018 : RISCV::CC_RISCV
);
18020 // Check if it's really possible to do a tail call.
18022 IsTailCall
= isEligibleForTailCallOptimization(ArgCCInfo
, CLI
, MF
, ArgLocs
);
18026 else if (CLI
.CB
&& CLI
.CB
->isMustTailCall())
18027 report_fatal_error("failed to perform tail call elimination on a call "
18028 "site marked musttail");
18030 // Get a count of how many bytes are to be pushed on the stack.
18031 unsigned NumBytes
= ArgCCInfo
.getStackSize();
18033 // Create local copies for byval args
18034 SmallVector
<SDValue
, 8> ByValArgs
;
18035 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
18036 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
18037 if (!Flags
.isByVal())
18040 SDValue Arg
= OutVals
[i
];
18041 unsigned Size
= Flags
.getByValSize();
18042 Align Alignment
= Flags
.getNonZeroByValAlign();
18045 MF
.getFrameInfo().CreateStackObject(Size
, Alignment
, /*isSS=*/false);
18046 SDValue FIPtr
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
18047 SDValue SizeNode
= DAG
.getConstant(Size
, DL
, XLenVT
);
18049 Chain
= DAG
.getMemcpy(Chain
, DL
, FIPtr
, Arg
, SizeNode
, Alignment
,
18050 /*IsVolatile=*/false,
18051 /*AlwaysInline=*/false, IsTailCall
,
18052 MachinePointerInfo(), MachinePointerInfo());
18053 ByValArgs
.push_back(FIPtr
);
18057 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, CLI
.DL
);
18059 // Copy argument values to their designated locations.
18060 SmallVector
<std::pair
<Register
, SDValue
>, 8> RegsToPass
;
18061 SmallVector
<SDValue
, 8> MemOpChains
;
18063 for (unsigned i
= 0, j
= 0, e
= ArgLocs
.size(), OutIdx
= 0; i
!= e
;
18065 CCValAssign
&VA
= ArgLocs
[i
];
18066 SDValue ArgValue
= OutVals
[OutIdx
];
18067 ISD::ArgFlagsTy Flags
= Outs
[OutIdx
].Flags
;
18069 // Handle passing f64 on RV32D with a soft float ABI as a special case.
18070 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
18071 assert(VA
.isRegLoc() && "Expected register VA assignment");
18072 assert(VA
.needsCustom());
18073 SDValue SplitF64
= DAG
.getNode(
18074 RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
), ArgValue
);
18075 SDValue Lo
= SplitF64
.getValue(0);
18076 SDValue Hi
= SplitF64
.getValue(1);
18078 Register RegLo
= VA
.getLocReg();
18079 RegsToPass
.push_back(std::make_pair(RegLo
, Lo
));
18081 // Get the CCValAssign for the Hi part.
18082 CCValAssign
&HiVA
= ArgLocs
[++i
];
18084 if (HiVA
.isMemLoc()) {
18085 // Second half of f64 is passed on the stack.
18086 if (!StackPtr
.getNode())
18087 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
18089 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
18090 DAG
.getIntPtrConstant(HiVA
.getLocMemOffset(), DL
));
18092 MemOpChains
.push_back(
18093 DAG
.getStore(Chain
, DL
, Hi
, Address
, MachinePointerInfo()));
18095 // Second half of f64 is passed in another GPR.
18096 Register RegHigh
= HiVA
.getLocReg();
18097 RegsToPass
.push_back(std::make_pair(RegHigh
, Hi
));
18102 // Promote the value if needed.
18103 // For now, only handle fully promoted and indirect arguments.
18104 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
18105 // Store the argument in a stack slot and pass its address.
18107 std::max(getPrefTypeAlign(Outs
[OutIdx
].ArgVT
, DAG
),
18108 getPrefTypeAlign(ArgValue
.getValueType(), DAG
));
18109 TypeSize StoredSize
= ArgValue
.getValueType().getStoreSize();
18110 // If the original argument was split (e.g. i128), we need
18111 // to store the required parts of it here (and pass just one address).
18112 // Vectors may be partly split to registers and partly to the stack, in
18113 // which case the base address is partly offset and subsequent stores are
18114 // relative to that.
18115 unsigned ArgIndex
= Outs
[OutIdx
].OrigArgIndex
;
18116 unsigned ArgPartOffset
= Outs
[OutIdx
].PartOffset
;
18117 assert(VA
.getValVT().isVector() || ArgPartOffset
== 0);
18118 // Calculate the total size to store. We don't have access to what we're
18119 // actually storing other than performing the loop and collecting the
18121 SmallVector
<std::pair
<SDValue
, SDValue
>> Parts
;
18122 while (i
+ 1 != e
&& Outs
[OutIdx
+ 1].OrigArgIndex
== ArgIndex
) {
18123 SDValue PartValue
= OutVals
[OutIdx
+ 1];
18124 unsigned PartOffset
= Outs
[OutIdx
+ 1].PartOffset
- ArgPartOffset
;
18125 SDValue Offset
= DAG
.getIntPtrConstant(PartOffset
, DL
);
18126 EVT PartVT
= PartValue
.getValueType();
18127 if (PartVT
.isScalableVector())
18128 Offset
= DAG
.getNode(ISD::VSCALE
, DL
, XLenVT
, Offset
);
18129 StoredSize
+= PartVT
.getStoreSize();
18130 StackAlign
= std::max(StackAlign
, getPrefTypeAlign(PartVT
, DAG
));
18131 Parts
.push_back(std::make_pair(PartValue
, Offset
));
18135 SDValue SpillSlot
= DAG
.CreateStackTemporary(StoredSize
, StackAlign
);
18136 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
18137 MemOpChains
.push_back(
18138 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
18139 MachinePointerInfo::getFixedStack(MF
, FI
)));
18140 for (const auto &Part
: Parts
) {
18141 SDValue PartValue
= Part
.first
;
18142 SDValue PartOffset
= Part
.second
;
18144 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
, PartOffset
);
18145 MemOpChains
.push_back(
18146 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
18147 MachinePointerInfo::getFixedStack(MF
, FI
)));
18149 ArgValue
= SpillSlot
;
18151 ArgValue
= convertValVTToLocVT(DAG
, ArgValue
, VA
, DL
, Subtarget
);
18154 // Use local copy if it is a byval arg.
18155 if (Flags
.isByVal())
18156 ArgValue
= ByValArgs
[j
++];
18158 if (VA
.isRegLoc()) {
18159 // Queue up the argument copies and emit them at the end.
18160 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
18162 assert(VA
.isMemLoc() && "Argument not register or memory");
18163 assert(!IsTailCall
&& "Tail call not allowed if stack is used "
18164 "for passing parameters");
18166 // Work out the address of the stack slot.
18167 if (!StackPtr
.getNode())
18168 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
18170 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
18171 DAG
.getIntPtrConstant(VA
.getLocMemOffset(), DL
));
18174 MemOpChains
.push_back(
18175 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
18179 // Join the stores, which are independent of one another.
18180 if (!MemOpChains
.empty())
18181 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
18185 // Build a sequence of copy-to-reg nodes, chained and glued together.
18186 for (auto &Reg
: RegsToPass
) {
18187 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
.first
, Reg
.second
, Glue
);
18188 Glue
= Chain
.getValue(1);
18191 // Validate that none of the argument registers have been marked as
18192 // reserved, if so report an error. Do the same for the return address if this
18193 // is not a tailcall.
18194 validateCCReservedRegs(RegsToPass
, MF
);
18196 MF
.getSubtarget
<RISCVSubtarget
>().isRegisterReservedByUser(RISCV::X1
))
18197 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
18199 "Return address register required, but has been reserved."});
18201 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
18202 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
18203 // split it and then direct call can be matched by PseudoCALL.
18204 if (GlobalAddressSDNode
*S
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
18205 const GlobalValue
*GV
= S
->getGlobal();
18207 unsigned OpFlags
= RISCVII::MO_CALL
;
18208 if (!getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
))
18209 OpFlags
= RISCVII::MO_PLT
;
18211 Callee
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, OpFlags
);
18212 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
18213 unsigned OpFlags
= RISCVII::MO_CALL
;
18215 if (!getTargetMachine().shouldAssumeDSOLocal(*MF
.getFunction().getParent(),
18217 OpFlags
= RISCVII::MO_PLT
;
18219 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, OpFlags
);
18222 // The first call operand is the chain and the second is the target address.
18223 SmallVector
<SDValue
, 8> Ops
;
18224 Ops
.push_back(Chain
);
18225 Ops
.push_back(Callee
);
18227 // Add argument registers to the end of the list so that they are
18228 // known live into the call.
18229 for (auto &Reg
: RegsToPass
)
18230 Ops
.push_back(DAG
.getRegister(Reg
.first
, Reg
.second
.getValueType()));
18233 // Add a register mask operand representing the call-preserved registers.
18234 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
18235 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
18236 assert(Mask
&& "Missing call preserved mask for calling convention");
18237 Ops
.push_back(DAG
.getRegisterMask(Mask
));
18240 // Glue the call to the argument copies, if any.
18241 if (Glue
.getNode())
18242 Ops
.push_back(Glue
);
18244 assert((!CLI
.CFIType
|| CLI
.CB
->isIndirectCall()) &&
18245 "Unexpected CFI type for a direct call");
18248 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
18251 MF
.getFrameInfo().setHasTailCall();
18252 SDValue Ret
= DAG
.getNode(RISCVISD::TAIL
, DL
, NodeTys
, Ops
);
18254 Ret
.getNode()->setCFIType(CLI
.CFIType
->getZExtValue());
18255 DAG
.addNoMergeSiteInfo(Ret
.getNode(), CLI
.NoMerge
);
18259 Chain
= DAG
.getNode(RISCVISD::CALL
, DL
, NodeTys
, Ops
);
18261 Chain
.getNode()->setCFIType(CLI
.CFIType
->getZExtValue());
18262 DAG
.addNoMergeSiteInfo(Chain
.getNode(), CLI
.NoMerge
);
18263 Glue
= Chain
.getValue(1);
18265 // Mark the end of the call, which is glued to the call itself.
18266 Chain
= DAG
.getCALLSEQ_END(Chain
, NumBytes
, 0, Glue
, DL
);
18267 Glue
= Chain
.getValue(1);
18269 // Assign locations to each value returned by this call.
18270 SmallVector
<CCValAssign
, 16> RVLocs
;
18271 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, *DAG
.getContext());
18272 analyzeInputArgs(MF
, RetCCInfo
, Ins
, /*IsRet=*/true, RISCV::CC_RISCV
);
18274 // Copy all of the result registers out of their specified physreg.
18275 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
18276 auto &VA
= RVLocs
[i
];
18277 // Copy the value out
18279 DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(), VA
.getLocVT(), Glue
);
18280 // Glue the RetValue to the end of the call sequence
18281 Chain
= RetValue
.getValue(1);
18282 Glue
= RetValue
.getValue(2);
18284 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
18285 assert(VA
.needsCustom());
18286 SDValue RetValue2
= DAG
.getCopyFromReg(Chain
, DL
, RVLocs
[++i
].getLocReg(),
18288 Chain
= RetValue2
.getValue(1);
18289 Glue
= RetValue2
.getValue(2);
18290 RetValue
= DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, RetValue
,
18294 RetValue
= convertLocVTToValVT(DAG
, RetValue
, VA
, DL
, Subtarget
);
18296 InVals
.push_back(RetValue
);
18302 bool RISCVTargetLowering::CanLowerReturn(
18303 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
18304 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, LLVMContext
&Context
) const {
18305 SmallVector
<CCValAssign
, 16> RVLocs
;
18306 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
18308 std::optional
<unsigned> FirstMaskArgument
;
18309 if (Subtarget
.hasVInstructions())
18310 FirstMaskArgument
= preAssignMask(Outs
);
18312 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
18313 MVT VT
= Outs
[i
].VT
;
18314 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
18315 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
18316 if (RISCV::CC_RISCV(MF
.getDataLayout(), ABI
, i
, VT
, VT
, CCValAssign::Full
,
18317 ArgFlags
, CCInfo
, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
18318 *this, FirstMaskArgument
))
18325 RISCVTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
18327 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
18328 const SmallVectorImpl
<SDValue
> &OutVals
,
18329 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
18330 MachineFunction
&MF
= DAG
.getMachineFunction();
18331 const RISCVSubtarget
&STI
= MF
.getSubtarget
<RISCVSubtarget
>();
18333 // Stores the assignment of the return value to a location.
18334 SmallVector
<CCValAssign
, 16> RVLocs
;
18336 // Info about the registers and stack slot.
18337 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
18338 *DAG
.getContext());
18340 analyzeOutputArgs(DAG
.getMachineFunction(), CCInfo
, Outs
, /*IsRet=*/true,
18341 nullptr, RISCV::CC_RISCV
);
18343 if (CallConv
== CallingConv::GHC
&& !RVLocs
.empty())
18344 report_fatal_error("GHC functions return void only");
18347 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
18349 // Copy the result values into the output registers.
18350 for (unsigned i
= 0, e
= RVLocs
.size(), OutIdx
= 0; i
< e
; ++i
, ++OutIdx
) {
18351 SDValue Val
= OutVals
[OutIdx
];
18352 CCValAssign
&VA
= RVLocs
[i
];
18353 assert(VA
.isRegLoc() && "Can only return in registers!");
18355 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
18356 // Handle returning f64 on RV32D with a soft float ABI.
18357 assert(VA
.isRegLoc() && "Expected return via registers");
18358 assert(VA
.needsCustom());
18359 SDValue SplitF64
= DAG
.getNode(RISCVISD::SplitF64
, DL
,
18360 DAG
.getVTList(MVT::i32
, MVT::i32
), Val
);
18361 SDValue Lo
= SplitF64
.getValue(0);
18362 SDValue Hi
= SplitF64
.getValue(1);
18363 Register RegLo
= VA
.getLocReg();
18364 Register RegHi
= RVLocs
[++i
].getLocReg();
18366 if (STI
.isRegisterReservedByUser(RegLo
) ||
18367 STI
.isRegisterReservedByUser(RegHi
))
18368 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
18370 "Return value register required, but has been reserved."});
18372 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegLo
, Lo
, Glue
);
18373 Glue
= Chain
.getValue(1);
18374 RetOps
.push_back(DAG
.getRegister(RegLo
, MVT::i32
));
18375 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegHi
, Hi
, Glue
);
18376 Glue
= Chain
.getValue(1);
18377 RetOps
.push_back(DAG
.getRegister(RegHi
, MVT::i32
));
18379 // Handle a 'normal' return.
18380 Val
= convertValVTToLocVT(DAG
, Val
, VA
, DL
, Subtarget
);
18381 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), Val
, Glue
);
18383 if (STI
.isRegisterReservedByUser(VA
.getLocReg()))
18384 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
18386 "Return value register required, but has been reserved."});
18388 // Guarantee that all emitted copies are stuck together.
18389 Glue
= Chain
.getValue(1);
18390 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
18394 RetOps
[0] = Chain
; // Update chain.
18396 // Add the glue node if we have it.
18397 if (Glue
.getNode()) {
18398 RetOps
.push_back(Glue
);
18402 [](CCValAssign
&VA
) { return VA
.getLocVT().isScalableVector(); }))
18403 MF
.getInfo
<RISCVMachineFunctionInfo
>()->setIsVectorCall();
18405 unsigned RetOpc
= RISCVISD::RET_GLUE
;
18406 // Interrupt service routines use different return instructions.
18407 const Function
&Func
= DAG
.getMachineFunction().getFunction();
18408 if (Func
.hasFnAttribute("interrupt")) {
18409 if (!Func
.getReturnType()->isVoidTy())
18410 report_fatal_error(
18411 "Functions with the interrupt attribute must have void return type!");
18413 MachineFunction
&MF
= DAG
.getMachineFunction();
18415 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
18417 if (Kind
== "supervisor")
18418 RetOpc
= RISCVISD::SRET_GLUE
;
18420 RetOpc
= RISCVISD::MRET_GLUE
;
18423 return DAG
.getNode(RetOpc
, DL
, MVT::Other
, RetOps
);
18426 void RISCVTargetLowering::validateCCReservedRegs(
18427 const SmallVectorImpl
<std::pair
<llvm::Register
, llvm::SDValue
>> &Regs
,
18428 MachineFunction
&MF
) const {
18429 const Function
&F
= MF
.getFunction();
18430 const RISCVSubtarget
&STI
= MF
.getSubtarget
<RISCVSubtarget
>();
18432 if (llvm::any_of(Regs
, [&STI
](auto Reg
) {
18433 return STI
.isRegisterReservedByUser(Reg
.first
);
18435 F
.getContext().diagnose(DiagnosticInfoUnsupported
{
18436 F
, "Argument register required, but has been reserved."});
18439 // Check if the result of the node is only used as a return value, as
18440 // otherwise we can't perform a tail-call.
18441 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode
*N
, SDValue
&Chain
) const {
18442 if (N
->getNumValues() != 1)
18444 if (!N
->hasNUsesOfValue(1, 0))
18447 SDNode
*Copy
= *N
->use_begin();
18449 if (Copy
->getOpcode() == ISD::BITCAST
) {
18450 return isUsedByReturnOnly(Copy
, Chain
);
18453 // TODO: Handle additional opcodes in order to support tail-calling libcalls
18454 // with soft float ABIs.
18455 if (Copy
->getOpcode() != ISD::CopyToReg
) {
18459 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
18460 // isn't safe to perform a tail call.
18461 if (Copy
->getOperand(Copy
->getNumOperands() - 1).getValueType() == MVT::Glue
)
18464 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
18465 bool HasRet
= false;
18466 for (SDNode
*Node
: Copy
->uses()) {
18467 if (Node
->getOpcode() != RISCVISD::RET_GLUE
)
18474 Chain
= Copy
->getOperand(0);
18478 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
18479 return CI
->isTailCall();
18482 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode
) const {
18483 #define NODE_NAME_CASE(NODE) \
18484 case RISCVISD::NODE: \
18485 return "RISCVISD::" #NODE;
18486 // clang-format off
18487 switch ((RISCVISD::NodeType
)Opcode
) {
18488 case RISCVISD::FIRST_NUMBER
:
18490 NODE_NAME_CASE(RET_GLUE
)
18491 NODE_NAME_CASE(SRET_GLUE
)
18492 NODE_NAME_CASE(MRET_GLUE
)
18493 NODE_NAME_CASE(CALL
)
18494 NODE_NAME_CASE(SELECT_CC
)
18495 NODE_NAME_CASE(BR_CC
)
18496 NODE_NAME_CASE(BuildPairF64
)
18497 NODE_NAME_CASE(SplitF64
)
18498 NODE_NAME_CASE(TAIL
)
18499 NODE_NAME_CASE(ADD_LO
)
18501 NODE_NAME_CASE(LLA
)
18502 NODE_NAME_CASE(ADD_TPREL
)
18503 NODE_NAME_CASE(MULHSU
)
18504 NODE_NAME_CASE(SLLW
)
18505 NODE_NAME_CASE(SRAW
)
18506 NODE_NAME_CASE(SRLW
)
18507 NODE_NAME_CASE(DIVW
)
18508 NODE_NAME_CASE(DIVUW
)
18509 NODE_NAME_CASE(REMUW
)
18510 NODE_NAME_CASE(ROLW
)
18511 NODE_NAME_CASE(RORW
)
18512 NODE_NAME_CASE(CLZW
)
18513 NODE_NAME_CASE(CTZW
)
18514 NODE_NAME_CASE(ABSW
)
18515 NODE_NAME_CASE(FMV_H_X
)
18516 NODE_NAME_CASE(FMV_X_ANYEXTH
)
18517 NODE_NAME_CASE(FMV_X_SIGNEXTH
)
18518 NODE_NAME_CASE(FMV_W_X_RV64
)
18519 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64
)
18520 NODE_NAME_CASE(FCVT_X
)
18521 NODE_NAME_CASE(FCVT_XU
)
18522 NODE_NAME_CASE(FCVT_W_RV64
)
18523 NODE_NAME_CASE(FCVT_WU_RV64
)
18524 NODE_NAME_CASE(STRICT_FCVT_W_RV64
)
18525 NODE_NAME_CASE(STRICT_FCVT_WU_RV64
)
18526 NODE_NAME_CASE(FP_ROUND_BF16
)
18527 NODE_NAME_CASE(FP_EXTEND_BF16
)
18528 NODE_NAME_CASE(FROUND
)
18529 NODE_NAME_CASE(FCLASS
)
18530 NODE_NAME_CASE(FMAX
)
18531 NODE_NAME_CASE(FMIN
)
18532 NODE_NAME_CASE(READ_CYCLE_WIDE
)
18533 NODE_NAME_CASE(BREV8
)
18534 NODE_NAME_CASE(ORC_B
)
18535 NODE_NAME_CASE(ZIP
)
18536 NODE_NAME_CASE(UNZIP
)
18537 NODE_NAME_CASE(CLMUL
)
18538 NODE_NAME_CASE(CLMULH
)
18539 NODE_NAME_CASE(CLMULR
)
18540 NODE_NAME_CASE(SHA256SIG0
)
18541 NODE_NAME_CASE(SHA256SIG1
)
18542 NODE_NAME_CASE(SHA256SUM0
)
18543 NODE_NAME_CASE(SHA256SUM1
)
18544 NODE_NAME_CASE(SM4KS
)
18545 NODE_NAME_CASE(SM4ED
)
18546 NODE_NAME_CASE(SM3P0
)
18547 NODE_NAME_CASE(SM3P1
)
18548 NODE_NAME_CASE(TH_LWD
)
18549 NODE_NAME_CASE(TH_LWUD
)
18550 NODE_NAME_CASE(TH_LDD
)
18551 NODE_NAME_CASE(TH_SWD
)
18552 NODE_NAME_CASE(TH_SDD
)
18553 NODE_NAME_CASE(VMV_V_V_VL
)
18554 NODE_NAME_CASE(VMV_V_X_VL
)
18555 NODE_NAME_CASE(VFMV_V_F_VL
)
18556 NODE_NAME_CASE(VMV_X_S
)
18557 NODE_NAME_CASE(VMV_S_X_VL
)
18558 NODE_NAME_CASE(VFMV_S_F_VL
)
18559 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL
)
18560 NODE_NAME_CASE(READ_VLENB
)
18561 NODE_NAME_CASE(TRUNCATE_VECTOR_VL
)
18562 NODE_NAME_CASE(VSLIDEUP_VL
)
18563 NODE_NAME_CASE(VSLIDE1UP_VL
)
18564 NODE_NAME_CASE(VSLIDEDOWN_VL
)
18565 NODE_NAME_CASE(VSLIDE1DOWN_VL
)
18566 NODE_NAME_CASE(VFSLIDE1UP_VL
)
18567 NODE_NAME_CASE(VFSLIDE1DOWN_VL
)
18568 NODE_NAME_CASE(VID_VL
)
18569 NODE_NAME_CASE(VFNCVT_ROD_VL
)
18570 NODE_NAME_CASE(VECREDUCE_ADD_VL
)
18571 NODE_NAME_CASE(VECREDUCE_UMAX_VL
)
18572 NODE_NAME_CASE(VECREDUCE_SMAX_VL
)
18573 NODE_NAME_CASE(VECREDUCE_UMIN_VL
)
18574 NODE_NAME_CASE(VECREDUCE_SMIN_VL
)
18575 NODE_NAME_CASE(VECREDUCE_AND_VL
)
18576 NODE_NAME_CASE(VECREDUCE_OR_VL
)
18577 NODE_NAME_CASE(VECREDUCE_XOR_VL
)
18578 NODE_NAME_CASE(VECREDUCE_FADD_VL
)
18579 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL
)
18580 NODE_NAME_CASE(VECREDUCE_FMIN_VL
)
18581 NODE_NAME_CASE(VECREDUCE_FMAX_VL
)
18582 NODE_NAME_CASE(ADD_VL
)
18583 NODE_NAME_CASE(AND_VL
)
18584 NODE_NAME_CASE(MUL_VL
)
18585 NODE_NAME_CASE(OR_VL
)
18586 NODE_NAME_CASE(SDIV_VL
)
18587 NODE_NAME_CASE(SHL_VL
)
18588 NODE_NAME_CASE(SREM_VL
)
18589 NODE_NAME_CASE(SRA_VL
)
18590 NODE_NAME_CASE(SRL_VL
)
18591 NODE_NAME_CASE(ROTL_VL
)
18592 NODE_NAME_CASE(ROTR_VL
)
18593 NODE_NAME_CASE(SUB_VL
)
18594 NODE_NAME_CASE(UDIV_VL
)
18595 NODE_NAME_CASE(UREM_VL
)
18596 NODE_NAME_CASE(XOR_VL
)
18597 NODE_NAME_CASE(SADDSAT_VL
)
18598 NODE_NAME_CASE(UADDSAT_VL
)
18599 NODE_NAME_CASE(SSUBSAT_VL
)
18600 NODE_NAME_CASE(USUBSAT_VL
)
18601 NODE_NAME_CASE(FADD_VL
)
18602 NODE_NAME_CASE(FSUB_VL
)
18603 NODE_NAME_CASE(FMUL_VL
)
18604 NODE_NAME_CASE(FDIV_VL
)
18605 NODE_NAME_CASE(FNEG_VL
)
18606 NODE_NAME_CASE(FABS_VL
)
18607 NODE_NAME_CASE(FSQRT_VL
)
18608 NODE_NAME_CASE(FCLASS_VL
)
18609 NODE_NAME_CASE(VFMADD_VL
)
18610 NODE_NAME_CASE(VFNMADD_VL
)
18611 NODE_NAME_CASE(VFMSUB_VL
)
18612 NODE_NAME_CASE(VFNMSUB_VL
)
18613 NODE_NAME_CASE(VFWMADD_VL
)
18614 NODE_NAME_CASE(VFWNMADD_VL
)
18615 NODE_NAME_CASE(VFWMSUB_VL
)
18616 NODE_NAME_CASE(VFWNMSUB_VL
)
18617 NODE_NAME_CASE(FCOPYSIGN_VL
)
18618 NODE_NAME_CASE(SMIN_VL
)
18619 NODE_NAME_CASE(SMAX_VL
)
18620 NODE_NAME_CASE(UMIN_VL
)
18621 NODE_NAME_CASE(UMAX_VL
)
18622 NODE_NAME_CASE(BITREVERSE_VL
)
18623 NODE_NAME_CASE(BSWAP_VL
)
18624 NODE_NAME_CASE(CTLZ_VL
)
18625 NODE_NAME_CASE(CTTZ_VL
)
18626 NODE_NAME_CASE(CTPOP_VL
)
18627 NODE_NAME_CASE(VFMIN_VL
)
18628 NODE_NAME_CASE(VFMAX_VL
)
18629 NODE_NAME_CASE(MULHS_VL
)
18630 NODE_NAME_CASE(MULHU_VL
)
18631 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL
)
18632 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL
)
18633 NODE_NAME_CASE(VFCVT_RM_X_F_VL
)
18634 NODE_NAME_CASE(VFCVT_RM_XU_F_VL
)
18635 NODE_NAME_CASE(VFCVT_X_F_VL
)
18636 NODE_NAME_CASE(VFCVT_XU_F_VL
)
18637 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL
)
18638 NODE_NAME_CASE(SINT_TO_FP_VL
)
18639 NODE_NAME_CASE(UINT_TO_FP_VL
)
18640 NODE_NAME_CASE(VFCVT_RM_F_XU_VL
)
18641 NODE_NAME_CASE(VFCVT_RM_F_X_VL
)
18642 NODE_NAME_CASE(FP_EXTEND_VL
)
18643 NODE_NAME_CASE(FP_ROUND_VL
)
18644 NODE_NAME_CASE(STRICT_FADD_VL
)
18645 NODE_NAME_CASE(STRICT_FSUB_VL
)
18646 NODE_NAME_CASE(STRICT_FMUL_VL
)
18647 NODE_NAME_CASE(STRICT_FDIV_VL
)
18648 NODE_NAME_CASE(STRICT_FSQRT_VL
)
18649 NODE_NAME_CASE(STRICT_VFMADD_VL
)
18650 NODE_NAME_CASE(STRICT_VFNMADD_VL
)
18651 NODE_NAME_CASE(STRICT_VFMSUB_VL
)
18652 NODE_NAME_CASE(STRICT_VFNMSUB_VL
)
18653 NODE_NAME_CASE(STRICT_FP_ROUND_VL
)
18654 NODE_NAME_CASE(STRICT_FP_EXTEND_VL
)
18655 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL
)
18656 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL
)
18657 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL
)
18658 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL
)
18659 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL
)
18660 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL
)
18661 NODE_NAME_CASE(STRICT_FSETCC_VL
)
18662 NODE_NAME_CASE(STRICT_FSETCCS_VL
)
18663 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL
)
18664 NODE_NAME_CASE(VWMUL_VL
)
18665 NODE_NAME_CASE(VWMULU_VL
)
18666 NODE_NAME_CASE(VWMULSU_VL
)
18667 NODE_NAME_CASE(VWADD_VL
)
18668 NODE_NAME_CASE(VWADDU_VL
)
18669 NODE_NAME_CASE(VWSUB_VL
)
18670 NODE_NAME_CASE(VWSUBU_VL
)
18671 NODE_NAME_CASE(VWADD_W_VL
)
18672 NODE_NAME_CASE(VWADDU_W_VL
)
18673 NODE_NAME_CASE(VWSUB_W_VL
)
18674 NODE_NAME_CASE(VWSUBU_W_VL
)
18675 NODE_NAME_CASE(VWSLL_VL
)
18676 NODE_NAME_CASE(VFWMUL_VL
)
18677 NODE_NAME_CASE(VFWADD_VL
)
18678 NODE_NAME_CASE(VFWSUB_VL
)
18679 NODE_NAME_CASE(VFWADD_W_VL
)
18680 NODE_NAME_CASE(VFWSUB_W_VL
)
18681 NODE_NAME_CASE(VWMACC_VL
)
18682 NODE_NAME_CASE(VWMACCU_VL
)
18683 NODE_NAME_CASE(VWMACCSU_VL
)
18684 NODE_NAME_CASE(VNSRL_VL
)
18685 NODE_NAME_CASE(SETCC_VL
)
18686 NODE_NAME_CASE(VSELECT_VL
)
18687 NODE_NAME_CASE(VMERGE_VL
)
18688 NODE_NAME_CASE(VMAND_VL
)
18689 NODE_NAME_CASE(VMOR_VL
)
18690 NODE_NAME_CASE(VMXOR_VL
)
18691 NODE_NAME_CASE(VMCLR_VL
)
18692 NODE_NAME_CASE(VMSET_VL
)
18693 NODE_NAME_CASE(VRGATHER_VX_VL
)
18694 NODE_NAME_CASE(VRGATHER_VV_VL
)
18695 NODE_NAME_CASE(VRGATHEREI16_VV_VL
)
18696 NODE_NAME_CASE(VSEXT_VL
)
18697 NODE_NAME_CASE(VZEXT_VL
)
18698 NODE_NAME_CASE(VCPOP_VL
)
18699 NODE_NAME_CASE(VFIRST_VL
)
18700 NODE_NAME_CASE(READ_CSR
)
18701 NODE_NAME_CASE(WRITE_CSR
)
18702 NODE_NAME_CASE(SWAP_CSR
)
18703 NODE_NAME_CASE(CZERO_EQZ
)
18704 NODE_NAME_CASE(CZERO_NEZ
)
18708 #undef NODE_NAME_CASE
18711 /// getConstraintType - Given a constraint letter, return the type of
18712 /// constraint it is for this target.
18713 RISCVTargetLowering::ConstraintType
18714 RISCVTargetLowering::getConstraintType(StringRef Constraint
) const {
18715 if (Constraint
.size() == 1) {
18716 switch (Constraint
[0]) {
18720 return C_RegisterClass
;
18724 return C_Immediate
;
18727 case 'S': // A symbolic address
18731 if (Constraint
== "vr" || Constraint
== "vm")
18732 return C_RegisterClass
;
18734 return TargetLowering::getConstraintType(Constraint
);
18737 std::pair
<unsigned, const TargetRegisterClass
*>
18738 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
18739 StringRef Constraint
,
18741 // First, see if this is a constraint that directly corresponds to a RISC-V
18743 if (Constraint
.size() == 1) {
18744 switch (Constraint
[0]) {
18746 // TODO: Support fixed vectors up to XLen for P extension?
18749 if (VT
== MVT::f16
&& Subtarget
.hasStdExtZhinxmin())
18750 return std::make_pair(0U, &RISCV::GPRF16RegClass
);
18751 if (VT
== MVT::f32
&& Subtarget
.hasStdExtZfinx())
18752 return std::make_pair(0U, &RISCV::GPRF32RegClass
);
18753 if (VT
== MVT::f64
&& Subtarget
.hasStdExtZdinx() && !Subtarget
.is64Bit())
18754 return std::make_pair(0U, &RISCV::GPRPF64RegClass
);
18755 return std::make_pair(0U, &RISCV::GPRNoX0RegClass
);
18757 if (Subtarget
.hasStdExtZfhmin() && VT
== MVT::f16
)
18758 return std::make_pair(0U, &RISCV::FPR16RegClass
);
18759 if (Subtarget
.hasStdExtF() && VT
== MVT::f32
)
18760 return std::make_pair(0U, &RISCV::FPR32RegClass
);
18761 if (Subtarget
.hasStdExtD() && VT
== MVT::f64
)
18762 return std::make_pair(0U, &RISCV::FPR64RegClass
);
18767 } else if (Constraint
== "vr") {
18768 for (const auto *RC
: {&RISCV::VRRegClass
, &RISCV::VRM2RegClass
,
18769 &RISCV::VRM4RegClass
, &RISCV::VRM8RegClass
}) {
18770 if (TRI
->isTypeLegalForClass(*RC
, VT
.SimpleTy
))
18771 return std::make_pair(0U, RC
);
18773 } else if (Constraint
== "vm") {
18774 if (TRI
->isTypeLegalForClass(RISCV::VMV0RegClass
, VT
.SimpleTy
))
18775 return std::make_pair(0U, &RISCV::VMV0RegClass
);
18778 // Clang will correctly decode the usage of register name aliases into their
18779 // official names. However, other frontends like `rustc` do not. This allows
18780 // users of these frontends to use the ABI names for registers in LLVM-style
18781 // register constraints.
18782 unsigned XRegFromAlias
= StringSwitch
<unsigned>(Constraint
.lower())
18783 .Case("{zero}", RISCV::X0
)
18784 .Case("{ra}", RISCV::X1
)
18785 .Case("{sp}", RISCV::X2
)
18786 .Case("{gp}", RISCV::X3
)
18787 .Case("{tp}", RISCV::X4
)
18788 .Case("{t0}", RISCV::X5
)
18789 .Case("{t1}", RISCV::X6
)
18790 .Case("{t2}", RISCV::X7
)
18791 .Cases("{s0}", "{fp}", RISCV::X8
)
18792 .Case("{s1}", RISCV::X9
)
18793 .Case("{a0}", RISCV::X10
)
18794 .Case("{a1}", RISCV::X11
)
18795 .Case("{a2}", RISCV::X12
)
18796 .Case("{a3}", RISCV::X13
)
18797 .Case("{a4}", RISCV::X14
)
18798 .Case("{a5}", RISCV::X15
)
18799 .Case("{a6}", RISCV::X16
)
18800 .Case("{a7}", RISCV::X17
)
18801 .Case("{s2}", RISCV::X18
)
18802 .Case("{s3}", RISCV::X19
)
18803 .Case("{s4}", RISCV::X20
)
18804 .Case("{s5}", RISCV::X21
)
18805 .Case("{s6}", RISCV::X22
)
18806 .Case("{s7}", RISCV::X23
)
18807 .Case("{s8}", RISCV::X24
)
18808 .Case("{s9}", RISCV::X25
)
18809 .Case("{s10}", RISCV::X26
)
18810 .Case("{s11}", RISCV::X27
)
18811 .Case("{t3}", RISCV::X28
)
18812 .Case("{t4}", RISCV::X29
)
18813 .Case("{t5}", RISCV::X30
)
18814 .Case("{t6}", RISCV::X31
)
18815 .Default(RISCV::NoRegister
);
18816 if (XRegFromAlias
!= RISCV::NoRegister
)
18817 return std::make_pair(XRegFromAlias
, &RISCV::GPRRegClass
);
18819 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
18820 // TableGen record rather than the AsmName to choose registers for InlineAsm
18821 // constraints, plus we want to match those names to the widest floating point
18822 // register type available, manually select floating point registers here.
18824 // The second case is the ABI name of the register, so that frontends can also
18825 // use the ABI names in register constraint lists.
18826 if (Subtarget
.hasStdExtF()) {
18827 unsigned FReg
= StringSwitch
<unsigned>(Constraint
.lower())
18828 .Cases("{f0}", "{ft0}", RISCV::F0_F
)
18829 .Cases("{f1}", "{ft1}", RISCV::F1_F
)
18830 .Cases("{f2}", "{ft2}", RISCV::F2_F
)
18831 .Cases("{f3}", "{ft3}", RISCV::F3_F
)
18832 .Cases("{f4}", "{ft4}", RISCV::F4_F
)
18833 .Cases("{f5}", "{ft5}", RISCV::F5_F
)
18834 .Cases("{f6}", "{ft6}", RISCV::F6_F
)
18835 .Cases("{f7}", "{ft7}", RISCV::F7_F
)
18836 .Cases("{f8}", "{fs0}", RISCV::F8_F
)
18837 .Cases("{f9}", "{fs1}", RISCV::F9_F
)
18838 .Cases("{f10}", "{fa0}", RISCV::F10_F
)
18839 .Cases("{f11}", "{fa1}", RISCV::F11_F
)
18840 .Cases("{f12}", "{fa2}", RISCV::F12_F
)
18841 .Cases("{f13}", "{fa3}", RISCV::F13_F
)
18842 .Cases("{f14}", "{fa4}", RISCV::F14_F
)
18843 .Cases("{f15}", "{fa5}", RISCV::F15_F
)
18844 .Cases("{f16}", "{fa6}", RISCV::F16_F
)
18845 .Cases("{f17}", "{fa7}", RISCV::F17_F
)
18846 .Cases("{f18}", "{fs2}", RISCV::F18_F
)
18847 .Cases("{f19}", "{fs3}", RISCV::F19_F
)
18848 .Cases("{f20}", "{fs4}", RISCV::F20_F
)
18849 .Cases("{f21}", "{fs5}", RISCV::F21_F
)
18850 .Cases("{f22}", "{fs6}", RISCV::F22_F
)
18851 .Cases("{f23}", "{fs7}", RISCV::F23_F
)
18852 .Cases("{f24}", "{fs8}", RISCV::F24_F
)
18853 .Cases("{f25}", "{fs9}", RISCV::F25_F
)
18854 .Cases("{f26}", "{fs10}", RISCV::F26_F
)
18855 .Cases("{f27}", "{fs11}", RISCV::F27_F
)
18856 .Cases("{f28}", "{ft8}", RISCV::F28_F
)
18857 .Cases("{f29}", "{ft9}", RISCV::F29_F
)
18858 .Cases("{f30}", "{ft10}", RISCV::F30_F
)
18859 .Cases("{f31}", "{ft11}", RISCV::F31_F
)
18860 .Default(RISCV::NoRegister
);
18861 if (FReg
!= RISCV::NoRegister
) {
18862 assert(RISCV::F0_F
<= FReg
&& FReg
<= RISCV::F31_F
&& "Unknown fp-reg");
18863 if (Subtarget
.hasStdExtD() && (VT
== MVT::f64
|| VT
== MVT::Other
)) {
18864 unsigned RegNo
= FReg
- RISCV::F0_F
;
18865 unsigned DReg
= RISCV::F0_D
+ RegNo
;
18866 return std::make_pair(DReg
, &RISCV::FPR64RegClass
);
18868 if (VT
== MVT::f32
|| VT
== MVT::Other
)
18869 return std::make_pair(FReg
, &RISCV::FPR32RegClass
);
18870 if (Subtarget
.hasStdExtZfhmin() && VT
== MVT::f16
) {
18871 unsigned RegNo
= FReg
- RISCV::F0_F
;
18872 unsigned HReg
= RISCV::F0_H
+ RegNo
;
18873 return std::make_pair(HReg
, &RISCV::FPR16RegClass
);
18878 if (Subtarget
.hasVInstructions()) {
18879 Register VReg
= StringSwitch
<Register
>(Constraint
.lower())
18880 .Case("{v0}", RISCV::V0
)
18881 .Case("{v1}", RISCV::V1
)
18882 .Case("{v2}", RISCV::V2
)
18883 .Case("{v3}", RISCV::V3
)
18884 .Case("{v4}", RISCV::V4
)
18885 .Case("{v5}", RISCV::V5
)
18886 .Case("{v6}", RISCV::V6
)
18887 .Case("{v7}", RISCV::V7
)
18888 .Case("{v8}", RISCV::V8
)
18889 .Case("{v9}", RISCV::V9
)
18890 .Case("{v10}", RISCV::V10
)
18891 .Case("{v11}", RISCV::V11
)
18892 .Case("{v12}", RISCV::V12
)
18893 .Case("{v13}", RISCV::V13
)
18894 .Case("{v14}", RISCV::V14
)
18895 .Case("{v15}", RISCV::V15
)
18896 .Case("{v16}", RISCV::V16
)
18897 .Case("{v17}", RISCV::V17
)
18898 .Case("{v18}", RISCV::V18
)
18899 .Case("{v19}", RISCV::V19
)
18900 .Case("{v20}", RISCV::V20
)
18901 .Case("{v21}", RISCV::V21
)
18902 .Case("{v22}", RISCV::V22
)
18903 .Case("{v23}", RISCV::V23
)
18904 .Case("{v24}", RISCV::V24
)
18905 .Case("{v25}", RISCV::V25
)
18906 .Case("{v26}", RISCV::V26
)
18907 .Case("{v27}", RISCV::V27
)
18908 .Case("{v28}", RISCV::V28
)
18909 .Case("{v29}", RISCV::V29
)
18910 .Case("{v30}", RISCV::V30
)
18911 .Case("{v31}", RISCV::V31
)
18912 .Default(RISCV::NoRegister
);
18913 if (VReg
!= RISCV::NoRegister
) {
18914 if (TRI
->isTypeLegalForClass(RISCV::VMRegClass
, VT
.SimpleTy
))
18915 return std::make_pair(VReg
, &RISCV::VMRegClass
);
18916 if (TRI
->isTypeLegalForClass(RISCV::VRRegClass
, VT
.SimpleTy
))
18917 return std::make_pair(VReg
, &RISCV::VRRegClass
);
18918 for (const auto *RC
:
18919 {&RISCV::VRM2RegClass
, &RISCV::VRM4RegClass
, &RISCV::VRM8RegClass
}) {
18920 if (TRI
->isTypeLegalForClass(*RC
, VT
.SimpleTy
)) {
18921 VReg
= TRI
->getMatchingSuperReg(VReg
, RISCV::sub_vrm1_0
, RC
);
18922 return std::make_pair(VReg
, RC
);
18928 std::pair
<Register
, const TargetRegisterClass
*> Res
=
18929 TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
18931 // If we picked one of the Zfinx register classes, remap it to the GPR class.
18932 // FIXME: When Zfinx is supported in CodeGen this will need to take the
18933 // Subtarget into account.
18934 if (Res
.second
== &RISCV::GPRF16RegClass
||
18935 Res
.second
== &RISCV::GPRF32RegClass
||
18936 Res
.second
== &RISCV::GPRPF64RegClass
)
18937 return std::make_pair(Res
.first
, &RISCV::GPRRegClass
);
18942 InlineAsm::ConstraintCode
18943 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode
) const {
18944 // Currently only support length 1 constraints.
18945 if (ConstraintCode
.size() == 1) {
18946 switch (ConstraintCode
[0]) {
18948 return InlineAsm::ConstraintCode::A
;
18954 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode
);
18957 void RISCVTargetLowering::LowerAsmOperandForConstraint(
18958 SDValue Op
, StringRef Constraint
, std::vector
<SDValue
> &Ops
,
18959 SelectionDAG
&DAG
) const {
18960 // Currently only support length 1 constraints.
18961 if (Constraint
.size() == 1) {
18962 switch (Constraint
[0]) {
18964 // Validate & create a 12-bit signed immediate operand.
18965 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
18966 uint64_t CVal
= C
->getSExtValue();
18967 if (isInt
<12>(CVal
))
18969 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
18973 // Validate & create an integer zero operand.
18974 if (isNullConstant(Op
))
18976 DAG
.getTargetConstant(0, SDLoc(Op
), Subtarget
.getXLenVT()));
18979 // Validate & create a 5-bit unsigned immediate operand.
18980 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
18981 uint64_t CVal
= C
->getZExtValue();
18982 if (isUInt
<5>(CVal
))
18984 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
18988 if (const auto *GA
= dyn_cast
<GlobalAddressSDNode
>(Op
)) {
18989 Ops
.push_back(DAG
.getTargetGlobalAddress(GA
->getGlobal(), SDLoc(Op
),
18990 GA
->getValueType(0)));
18991 } else if (const auto *BA
= dyn_cast
<BlockAddressSDNode
>(Op
)) {
18992 Ops
.push_back(DAG
.getTargetBlockAddress(BA
->getBlockAddress(),
18993 BA
->getValueType(0)));
19000 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
19003 Instruction
*RISCVTargetLowering::emitLeadingFence(IRBuilderBase
&Builder
,
19005 AtomicOrdering Ord
) const {
19006 if (Subtarget
.hasStdExtZtso()) {
19007 if (isa
<LoadInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
19008 return Builder
.CreateFence(Ord
);
19012 if (isa
<LoadInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
19013 return Builder
.CreateFence(Ord
);
19014 if (isa
<StoreInst
>(Inst
) && isReleaseOrStronger(Ord
))
19015 return Builder
.CreateFence(AtomicOrdering::Release
);
19019 Instruction
*RISCVTargetLowering::emitTrailingFence(IRBuilderBase
&Builder
,
19021 AtomicOrdering Ord
) const {
19022 if (Subtarget
.hasStdExtZtso()) {
19023 if (isa
<StoreInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
19024 return Builder
.CreateFence(Ord
);
19028 if (isa
<LoadInst
>(Inst
) && isAcquireOrStronger(Ord
))
19029 return Builder
.CreateFence(AtomicOrdering::Acquire
);
19030 if (Subtarget
.enableSeqCstTrailingFence() && isa
<StoreInst
>(Inst
) &&
19031 Ord
== AtomicOrdering::SequentiallyConsistent
)
19032 return Builder
.CreateFence(AtomicOrdering::SequentiallyConsistent
);
19036 TargetLowering::AtomicExpansionKind
19037 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
19038 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
19039 // point operations can't be used in an lr/sc sequence without breaking the
19040 // forward-progress guarantee.
19041 if (AI
->isFloatingPointOperation() ||
19042 AI
->getOperation() == AtomicRMWInst::UIncWrap
||
19043 AI
->getOperation() == AtomicRMWInst::UDecWrap
)
19044 return AtomicExpansionKind::CmpXChg
;
19046 // Don't expand forced atomics, we want to have __sync libcalls instead.
19047 if (Subtarget
.hasForcedAtomics())
19048 return AtomicExpansionKind::None
;
19050 unsigned Size
= AI
->getType()->getPrimitiveSizeInBits();
19051 if (Size
== 8 || Size
== 16)
19052 return AtomicExpansionKind::MaskedIntrinsic
;
19053 return AtomicExpansionKind::None
;
19056 static Intrinsic::ID
19057 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen
, AtomicRMWInst::BinOp BinOp
) {
19061 llvm_unreachable("Unexpected AtomicRMW BinOp");
19062 case AtomicRMWInst::Xchg
:
19063 return Intrinsic::riscv_masked_atomicrmw_xchg_i32
;
19064 case AtomicRMWInst::Add
:
19065 return Intrinsic::riscv_masked_atomicrmw_add_i32
;
19066 case AtomicRMWInst::Sub
:
19067 return Intrinsic::riscv_masked_atomicrmw_sub_i32
;
19068 case AtomicRMWInst::Nand
:
19069 return Intrinsic::riscv_masked_atomicrmw_nand_i32
;
19070 case AtomicRMWInst::Max
:
19071 return Intrinsic::riscv_masked_atomicrmw_max_i32
;
19072 case AtomicRMWInst::Min
:
19073 return Intrinsic::riscv_masked_atomicrmw_min_i32
;
19074 case AtomicRMWInst::UMax
:
19075 return Intrinsic::riscv_masked_atomicrmw_umax_i32
;
19076 case AtomicRMWInst::UMin
:
19077 return Intrinsic::riscv_masked_atomicrmw_umin_i32
;
19084 llvm_unreachable("Unexpected AtomicRMW BinOp");
19085 case AtomicRMWInst::Xchg
:
19086 return Intrinsic::riscv_masked_atomicrmw_xchg_i64
;
19087 case AtomicRMWInst::Add
:
19088 return Intrinsic::riscv_masked_atomicrmw_add_i64
;
19089 case AtomicRMWInst::Sub
:
19090 return Intrinsic::riscv_masked_atomicrmw_sub_i64
;
19091 case AtomicRMWInst::Nand
:
19092 return Intrinsic::riscv_masked_atomicrmw_nand_i64
;
19093 case AtomicRMWInst::Max
:
19094 return Intrinsic::riscv_masked_atomicrmw_max_i64
;
19095 case AtomicRMWInst::Min
:
19096 return Intrinsic::riscv_masked_atomicrmw_min_i64
;
19097 case AtomicRMWInst::UMax
:
19098 return Intrinsic::riscv_masked_atomicrmw_umax_i64
;
19099 case AtomicRMWInst::UMin
:
19100 return Intrinsic::riscv_masked_atomicrmw_umin_i64
;
19104 llvm_unreachable("Unexpected XLen\n");
19107 Value
*RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
19108 IRBuilderBase
&Builder
, AtomicRMWInst
*AI
, Value
*AlignedAddr
, Value
*Incr
,
19109 Value
*Mask
, Value
*ShiftAmt
, AtomicOrdering Ord
) const {
19110 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
19111 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
19112 // mask, as this produces better code than the LR/SC loop emitted by
19113 // int_riscv_masked_atomicrmw_xchg.
19114 if (AI
->getOperation() == AtomicRMWInst::Xchg
&&
19115 isa
<ConstantInt
>(AI
->getValOperand())) {
19116 ConstantInt
*CVal
= cast
<ConstantInt
>(AI
->getValOperand());
19117 if (CVal
->isZero())
19118 return Builder
.CreateAtomicRMW(AtomicRMWInst::And
, AlignedAddr
,
19119 Builder
.CreateNot(Mask
, "Inv_Mask"),
19120 AI
->getAlign(), Ord
);
19121 if (CVal
->isMinusOne())
19122 return Builder
.CreateAtomicRMW(AtomicRMWInst::Or
, AlignedAddr
, Mask
,
19123 AI
->getAlign(), Ord
);
19126 unsigned XLen
= Subtarget
.getXLen();
19128 Builder
.getIntN(XLen
, static_cast<uint64_t>(AI
->getOrdering()));
19129 Type
*Tys
[] = {AlignedAddr
->getType()};
19130 Function
*LrwOpScwLoop
= Intrinsic::getDeclaration(
19132 getIntrinsicForMaskedAtomicRMWBinOp(XLen
, AI
->getOperation()), Tys
);
19135 Incr
= Builder
.CreateSExt(Incr
, Builder
.getInt64Ty());
19136 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
19137 ShiftAmt
= Builder
.CreateSExt(ShiftAmt
, Builder
.getInt64Ty());
19142 // Must pass the shift amount needed to sign extend the loaded value prior
19143 // to performing a signed comparison for min/max. ShiftAmt is the number of
19144 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
19145 // is the number of bits to left+right shift the value in order to
19147 if (AI
->getOperation() == AtomicRMWInst::Min
||
19148 AI
->getOperation() == AtomicRMWInst::Max
) {
19149 const DataLayout
&DL
= AI
->getModule()->getDataLayout();
19150 unsigned ValWidth
=
19151 DL
.getTypeStoreSizeInBits(AI
->getValOperand()->getType());
19153 Builder
.CreateSub(Builder
.getIntN(XLen
, XLen
- ValWidth
), ShiftAmt
);
19154 Result
= Builder
.CreateCall(LrwOpScwLoop
,
19155 {AlignedAddr
, Incr
, Mask
, SextShamt
, Ordering
});
19158 Builder
.CreateCall(LrwOpScwLoop
, {AlignedAddr
, Incr
, Mask
, Ordering
});
19162 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
19166 TargetLowering::AtomicExpansionKind
19167 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
19168 AtomicCmpXchgInst
*CI
) const {
19169 // Don't expand forced atomics, we want to have __sync libcalls instead.
19170 if (Subtarget
.hasForcedAtomics())
19171 return AtomicExpansionKind::None
;
19173 unsigned Size
= CI
->getCompareOperand()->getType()->getPrimitiveSizeInBits();
19174 if (Size
== 8 || Size
== 16)
19175 return AtomicExpansionKind::MaskedIntrinsic
;
19176 return AtomicExpansionKind::None
;
19179 Value
*RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
19180 IRBuilderBase
&Builder
, AtomicCmpXchgInst
*CI
, Value
*AlignedAddr
,
19181 Value
*CmpVal
, Value
*NewVal
, Value
*Mask
, AtomicOrdering Ord
) const {
19182 unsigned XLen
= Subtarget
.getXLen();
19183 Value
*Ordering
= Builder
.getIntN(XLen
, static_cast<uint64_t>(Ord
));
19184 Intrinsic::ID CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i32
;
19186 CmpVal
= Builder
.CreateSExt(CmpVal
, Builder
.getInt64Ty());
19187 NewVal
= Builder
.CreateSExt(NewVal
, Builder
.getInt64Ty());
19188 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
19189 CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i64
;
19191 Type
*Tys
[] = {AlignedAddr
->getType()};
19192 Function
*MaskedCmpXchg
=
19193 Intrinsic::getDeclaration(CI
->getModule(), CmpXchgIntrID
, Tys
);
19194 Value
*Result
= Builder
.CreateCall(
19195 MaskedCmpXchg
, {AlignedAddr
, CmpVal
, NewVal
, Mask
, Ordering
});
19197 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
19201 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend
,
19202 EVT DataVT
) const {
19203 // We have indexed loads for all legal index types. Indices are always
19205 return Extend
.getOpcode() == ISD::ZERO_EXTEND
&&
19206 isTypeLegal(Extend
.getValueType()) &&
19207 isTypeLegal(Extend
.getOperand(0).getValueType());
19210 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op
, EVT FPVT
,
19212 if (!isOperationLegalOrCustom(Op
, VT
) || !FPVT
.isSimple())
19215 switch (FPVT
.getSimpleVT().SimpleTy
) {
19217 return Subtarget
.hasStdExtZfhmin();
19219 return Subtarget
.hasStdExtF();
19221 return Subtarget
.hasStdExtD();
19227 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
19228 // If we are using the small code model, we can reduce size of jump table
19229 // entry to 4 bytes.
19230 if (Subtarget
.is64Bit() && !isPositionIndependent() &&
19231 getTargetMachine().getCodeModel() == CodeModel::Small
) {
19232 return MachineJumpTableInfo::EK_Custom32
;
19234 return TargetLowering::getJumpTableEncoding();
19237 const MCExpr
*RISCVTargetLowering::LowerCustomJumpTableEntry(
19238 const MachineJumpTableInfo
*MJTI
, const MachineBasicBlock
*MBB
,
19239 unsigned uid
, MCContext
&Ctx
) const {
19240 assert(Subtarget
.is64Bit() && !isPositionIndependent() &&
19241 getTargetMachine().getCodeModel() == CodeModel::Small
);
19242 return MCSymbolRefExpr::create(MBB
->getSymbol(), Ctx
);
19245 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
19246 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
19247 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
19248 // a power of two as well.
19249 // FIXME: This doesn't work for zve32, but that's already broken
19250 // elsewhere for the same reason.
19251 assert(Subtarget
.getRealMinVLen() >= 64 && "zve32* unsupported");
19252 static_assert(RISCV::RVVBitsPerBlock
== 64,
19253 "RVVBitsPerBlock changed, audit needed");
19257 bool RISCVTargetLowering::getIndexedAddressParts(SDNode
*Op
, SDValue
&Base
,
19259 ISD::MemIndexedMode
&AM
,
19261 SelectionDAG
&DAG
) const {
19262 // Target does not support indexed loads.
19263 if (!Subtarget
.hasVendorXTHeadMemIdx())
19266 if (Op
->getOpcode() != ISD::ADD
&& Op
->getOpcode() != ISD::SUB
)
19269 Base
= Op
->getOperand(0);
19270 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Op
->getOperand(1))) {
19271 int64_t RHSC
= RHS
->getSExtValue();
19272 if (Op
->getOpcode() == ISD::SUB
)
19273 RHSC
= -(uint64_t)RHSC
;
19275 // The constants that can be encoded in the THeadMemIdx instructions
19276 // are of the form (sign_extend(imm5) << imm2).
19277 bool isLegalIndexedOffset
= false;
19278 for (unsigned i
= 0; i
< 4; i
++)
19279 if (isInt
<5>(RHSC
>> i
) && ((RHSC
% (1LL << i
)) == 0)) {
19280 isLegalIndexedOffset
= true;
19284 if (!isLegalIndexedOffset
)
19287 IsInc
= (Op
->getOpcode() == ISD::ADD
);
19288 Offset
= Op
->getOperand(1);
19295 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
19297 ISD::MemIndexedMode
&AM
,
19298 SelectionDAG
&DAG
) const {
19301 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
19302 VT
= LD
->getMemoryVT();
19303 Ptr
= LD
->getBasePtr();
19304 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
19305 VT
= ST
->getMemoryVT();
19306 Ptr
= ST
->getBasePtr();
19311 if (!getIndexedAddressParts(Ptr
.getNode(), Base
, Offset
, AM
, IsInc
, DAG
))
19314 AM
= IsInc
? ISD::PRE_INC
: ISD::PRE_DEC
;
19318 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
19321 ISD::MemIndexedMode
&AM
,
19322 SelectionDAG
&DAG
) const {
19325 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
19326 VT
= LD
->getMemoryVT();
19327 Ptr
= LD
->getBasePtr();
19328 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
19329 VT
= ST
->getMemoryVT();
19330 Ptr
= ST
->getBasePtr();
19335 if (!getIndexedAddressParts(Op
, Base
, Offset
, AM
, IsInc
, DAG
))
19337 // Post-indexing updates the base, so it's not a valid transform
19338 // if that's not the same as the load's pointer.
19342 AM
= IsInc
? ISD::POST_INC
: ISD::POST_DEC
;
19346 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction
&MF
,
19348 EVT SVT
= VT
.getScalarType();
19350 if (!SVT
.isSimple())
19353 switch (SVT
.getSimpleVT().SimpleTy
) {
19355 return VT
.isVector() ? Subtarget
.hasVInstructionsF16()
19356 : Subtarget
.hasStdExtZfhOrZhinx();
19358 return Subtarget
.hasStdExtFOrZfinx();
19360 return Subtarget
.hasStdExtDOrZdinx();
19368 Register
RISCVTargetLowering::getExceptionPointerRegister(
19369 const Constant
*PersonalityFn
) const {
19373 Register
RISCVTargetLowering::getExceptionSelectorRegister(
19374 const Constant
*PersonalityFn
) const {
19378 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type
) const {
19379 // Return false to suppress the unnecessary extensions if the LibCall
19380 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
19381 if (Subtarget
.isSoftFPABI() && (Type
.isFloatingPoint() && !Type
.isVector() &&
19382 Type
.getSizeInBits() < Subtarget
.getXLen()))
19388 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type
, bool IsSigned
) const {
19389 if (Subtarget
.is64Bit() && Type
== MVT::i32
)
19395 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext
&Context
, EVT VT
,
19397 // Check integral scalar types.
19398 const bool HasExtMOrZmmul
=
19399 Subtarget
.hasStdExtM() || Subtarget
.hasStdExtZmmul();
19400 if (!VT
.isScalarInteger())
19403 // Omit the optimization if the sub target has the M extension and the data
19404 // size exceeds XLen.
19405 if (HasExtMOrZmmul
&& VT
.getSizeInBits() > Subtarget
.getXLen())
19408 if (auto *ConstNode
= dyn_cast
<ConstantSDNode
>(C
.getNode())) {
19409 // Break the MUL to a SLLI and an ADD/SUB.
19410 const APInt
&Imm
= ConstNode
->getAPIntValue();
19411 if ((Imm
+ 1).isPowerOf2() || (Imm
- 1).isPowerOf2() ||
19412 (1 - Imm
).isPowerOf2() || (-1 - Imm
).isPowerOf2())
19415 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
19416 if (Subtarget
.hasStdExtZba() && !Imm
.isSignedIntN(12) &&
19417 ((Imm
- 2).isPowerOf2() || (Imm
- 4).isPowerOf2() ||
19418 (Imm
- 8).isPowerOf2()))
19421 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
19422 // a pair of LUI/ADDI.
19423 if (!Imm
.isSignedIntN(12) && Imm
.countr_zero() < 12 &&
19424 ConstNode
->hasOneUse()) {
19425 APInt ImmS
= Imm
.ashr(Imm
.countr_zero());
19426 if ((ImmS
+ 1).isPowerOf2() || (ImmS
- 1).isPowerOf2() ||
19427 (1 - ImmS
).isPowerOf2())
19435 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode
,
19436 SDValue ConstNode
) const {
19437 // Let the DAGCombiner decide for vectors.
19438 EVT VT
= AddNode
.getValueType();
19442 // Let the DAGCombiner decide for larger types.
19443 if (VT
.getScalarSizeInBits() > Subtarget
.getXLen())
19446 // It is worse if c1 is simm12 while c1*c2 is not.
19447 ConstantSDNode
*C1Node
= cast
<ConstantSDNode
>(AddNode
.getOperand(1));
19448 ConstantSDNode
*C2Node
= cast
<ConstantSDNode
>(ConstNode
);
19449 const APInt
&C1
= C1Node
->getAPIntValue();
19450 const APInt
&C2
= C2Node
->getAPIntValue();
19451 if (C1
.isSignedIntN(12) && !(C1
* C2
).isSignedIntN(12))
19454 // Default to true and let the DAGCombiner decide.
19458 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
19459 EVT VT
, unsigned AddrSpace
, Align Alignment
, MachineMemOperand::Flags Flags
,
19460 unsigned *Fast
) const {
19461 if (!VT
.isVector()) {
19463 *Fast
= Subtarget
.hasFastUnalignedAccess();
19464 return Subtarget
.hasFastUnalignedAccess();
19467 // All vector implementations must support element alignment
19468 EVT ElemVT
= VT
.getVectorElementType();
19469 if (Alignment
>= ElemVT
.getStoreSize()) {
19475 // Note: We lower an unmasked unaligned vector access to an equally sized
19476 // e8 element type access. Given this, we effectively support all unmasked
19477 // misaligned accesses. TODO: Work through the codegen implications of
19478 // allowing such accesses to be formed, and considered fast.
19480 *Fast
= Subtarget
.hasFastUnalignedAccess();
19481 return Subtarget
.hasFastUnalignedAccess();
19485 EVT
RISCVTargetLowering::getOptimalMemOpType(const MemOp
&Op
,
19486 const AttributeList
&FuncAttributes
) const {
19487 if (!Subtarget
.hasVInstructions())
19490 if (FuncAttributes
.hasFnAttr(Attribute::NoImplicitFloat
))
19493 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
19494 // has an expansion threshold, and we want the number of hardware memory
19495 // operations to correspond roughly to that threshold. LMUL>1 operations
19496 // are typically expanded linearly internally, and thus correspond to more
19497 // than one actual memory operation. Note that store merging and load
19498 // combining will typically form larger LMUL operations from the LMUL1
19499 // operations emitted here, and that's okay because combining isn't
19500 // introducing new memory operations; it's just merging existing ones.
19501 const unsigned MinVLenInBytes
= Subtarget
.getRealMinVLen()/8;
19502 if (Op
.size() < MinVLenInBytes
)
19503 // TODO: Figure out short memops. For the moment, do the default thing
19504 // which ends up using scalar sequences.
19507 // Prefer i8 for non-zero memset as it allows us to avoid materializing
19508 // a large scalar constant and instead use vmv.v.x/i to do the
19509 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
19510 // maximize the chance we can encode the size in the vsetvli.
19511 MVT ELenVT
= MVT::getIntegerVT(Subtarget
.getELen());
19512 MVT PreferredVT
= (Op
.isMemset() && !Op
.isZeroMemset()) ? MVT::i8
: ELenVT
;
19514 // Do we have sufficient alignment for our preferred VT? If not, revert
19515 // to largest size allowed by our alignment criteria.
19516 if (PreferredVT
!= MVT::i8
&& !Subtarget
.hasFastUnalignedAccess()) {
19517 Align
RequiredAlign(PreferredVT
.getStoreSize());
19518 if (Op
.isFixedDstAlign())
19519 RequiredAlign
= std::min(RequiredAlign
, Op
.getDstAlign());
19521 RequiredAlign
= std::min(RequiredAlign
, Op
.getSrcAlign());
19522 PreferredVT
= MVT::getIntegerVT(RequiredAlign
.value() * 8);
19524 return MVT::getVectorVT(PreferredVT
, MinVLenInBytes
/PreferredVT
.getStoreSize());
19527 bool RISCVTargetLowering::splitValueIntoRegisterParts(
19528 SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Val
, SDValue
*Parts
,
19529 unsigned NumParts
, MVT PartVT
, std::optional
<CallingConv::ID
> CC
) const {
19530 bool IsABIRegCopy
= CC
.has_value();
19531 EVT ValueVT
= Val
.getValueType();
19532 if (IsABIRegCopy
&& (ValueVT
== MVT::f16
|| ValueVT
== MVT::bf16
) &&
19533 PartVT
== MVT::f32
) {
19534 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
19535 // nan, and cast to f32.
19536 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i16
, Val
);
19537 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i32
, Val
);
19538 Val
= DAG
.getNode(ISD::OR
, DL
, MVT::i32
, Val
,
19539 DAG
.getConstant(0xFFFF0000, DL
, MVT::i32
));
19540 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f32
, Val
);
19545 if (ValueVT
.isScalableVector() && PartVT
.isScalableVector()) {
19546 LLVMContext
&Context
= *DAG
.getContext();
19547 EVT ValueEltVT
= ValueVT
.getVectorElementType();
19548 EVT PartEltVT
= PartVT
.getVectorElementType();
19549 unsigned ValueVTBitSize
= ValueVT
.getSizeInBits().getKnownMinValue();
19550 unsigned PartVTBitSize
= PartVT
.getSizeInBits().getKnownMinValue();
19551 if (PartVTBitSize
% ValueVTBitSize
== 0) {
19552 assert(PartVTBitSize
>= ValueVTBitSize
);
19553 // If the element types are different, bitcast to the same element type of
19555 // Give an example here, we want copy a <vscale x 1 x i8> value to
19556 // <vscale x 4 x i16>.
19557 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
19558 // subvector, then we can bitcast to <vscale x 4 x i16>.
19559 if (ValueEltVT
!= PartEltVT
) {
19560 if (PartVTBitSize
> ValueVTBitSize
) {
19561 unsigned Count
= PartVTBitSize
/ ValueEltVT
.getFixedSizeInBits();
19562 assert(Count
!= 0 && "The number of element should not be zero.");
19563 EVT SameEltTypeVT
=
19564 EVT::getVectorVT(Context
, ValueEltVT
, Count
, /*IsScalable=*/true);
19565 Val
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, SameEltTypeVT
,
19566 DAG
.getUNDEF(SameEltTypeVT
), Val
,
19567 DAG
.getVectorIdxConstant(0, DL
));
19569 Val
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Val
);
19572 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, PartVT
, DAG
.getUNDEF(PartVT
),
19573 Val
, DAG
.getVectorIdxConstant(0, DL
));
19582 SDValue
RISCVTargetLowering::joinRegisterPartsIntoValue(
19583 SelectionDAG
&DAG
, const SDLoc
&DL
, const SDValue
*Parts
, unsigned NumParts
,
19584 MVT PartVT
, EVT ValueVT
, std::optional
<CallingConv::ID
> CC
) const {
19585 bool IsABIRegCopy
= CC
.has_value();
19586 if (IsABIRegCopy
&& (ValueVT
== MVT::f16
|| ValueVT
== MVT::bf16
) &&
19587 PartVT
== MVT::f32
) {
19588 SDValue Val
= Parts
[0];
19590 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
19591 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i32
, Val
);
19592 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, Val
);
19593 Val
= DAG
.getNode(ISD::BITCAST
, DL
, ValueVT
, Val
);
19597 if (ValueVT
.isScalableVector() && PartVT
.isScalableVector()) {
19598 LLVMContext
&Context
= *DAG
.getContext();
19599 SDValue Val
= Parts
[0];
19600 EVT ValueEltVT
= ValueVT
.getVectorElementType();
19601 EVT PartEltVT
= PartVT
.getVectorElementType();
19602 unsigned ValueVTBitSize
= ValueVT
.getSizeInBits().getKnownMinValue();
19603 unsigned PartVTBitSize
= PartVT
.getSizeInBits().getKnownMinValue();
19604 if (PartVTBitSize
% ValueVTBitSize
== 0) {
19605 assert(PartVTBitSize
>= ValueVTBitSize
);
19606 EVT SameEltTypeVT
= ValueVT
;
19607 // If the element types are different, convert it to the same element type
19609 // Give an example here, we want copy a <vscale x 1 x i8> value from
19610 // <vscale x 4 x i16>.
19611 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
19612 // then we can extract <vscale x 1 x i8>.
19613 if (ValueEltVT
!= PartEltVT
) {
19614 unsigned Count
= PartVTBitSize
/ ValueEltVT
.getFixedSizeInBits();
19615 assert(Count
!= 0 && "The number of element should not be zero.");
19617 EVT::getVectorVT(Context
, ValueEltVT
, Count
, /*IsScalable=*/true);
19618 Val
= DAG
.getNode(ISD::BITCAST
, DL
, SameEltTypeVT
, Val
);
19620 Val
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ValueVT
, Val
,
19621 DAG
.getVectorIdxConstant(0, DL
));
19628 bool RISCVTargetLowering::isIntDivCheap(EVT VT
, AttributeList Attr
) const {
19629 // When aggressively optimizing for code size, we prefer to use a div
19630 // instruction, as it is usually smaller than the alternative sequence.
19631 // TODO: Add vector division?
19632 bool OptSize
= Attr
.hasFnAttr(Attribute::MinSize
);
19633 return OptSize
&& !VT
.isVector();
19636 bool RISCVTargetLowering::preferScalarizeSplat(SDNode
*N
) const {
19637 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
19639 unsigned Opc
= N
->getOpcode();
19640 if (Opc
== ISD::ZERO_EXTEND
|| Opc
== ISD::SIGN_EXTEND
)
19645 static Value
*useTpOffset(IRBuilderBase
&IRB
, unsigned Offset
) {
19646 Module
*M
= IRB
.GetInsertBlock()->getParent()->getParent();
19647 Function
*ThreadPointerFunc
=
19648 Intrinsic::getDeclaration(M
, Intrinsic::thread_pointer
);
19649 return IRB
.CreateConstGEP1_32(IRB
.getInt8Ty(),
19650 IRB
.CreateCall(ThreadPointerFunc
), Offset
);
19653 Value
*RISCVTargetLowering::getIRStackGuard(IRBuilderBase
&IRB
) const {
19654 // Fuchsia provides a fixed TLS slot for the stack cookie.
19655 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
19656 if (Subtarget
.isTargetFuchsia())
19657 return useTpOffset(IRB
, -0x10);
19659 return TargetLowering::getIRStackGuard(IRB
);
19662 bool RISCVTargetLowering::isLegalInterleavedAccessType(
19663 VectorType
*VTy
, unsigned Factor
, Align Alignment
, unsigned AddrSpace
,
19664 const DataLayout
&DL
) const {
19665 EVT VT
= getValueType(DL
, VTy
);
19666 // Don't lower vlseg/vsseg for vector types that can't be split.
19667 if (!isTypeLegal(VT
))
19670 if (!isLegalElementTypeForRVV(VT
.getScalarType()) ||
19671 !allowsMemoryAccessForAlignment(VTy
->getContext(), DL
, VT
, AddrSpace
,
19675 MVT ContainerVT
= VT
.getSimpleVT();
19677 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
19678 if (!Subtarget
.useRVVForFixedLengthVectors())
19680 // Sometimes the interleaved access pass picks up splats as interleaves of
19681 // one element. Don't lower these.
19682 if (FVTy
->getNumElements() < 2)
19685 ContainerVT
= getContainerForFixedLengthVector(VT
.getSimpleVT());
19688 // Need to make sure that EMUL * NFIELDS ≤ 8
19689 auto [LMUL
, Fractional
] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT
));
19692 return Factor
* LMUL
<= 8;
19695 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType
,
19696 Align Alignment
) const {
19697 if (!Subtarget
.hasVInstructions())
19700 // Only support fixed vectors if we know the minimum vector size.
19701 if (DataType
.isFixedLengthVector() && !Subtarget
.useRVVForFixedLengthVectors())
19704 EVT ScalarType
= DataType
.getScalarType();
19705 if (!isLegalElementTypeForRVV(ScalarType
))
19708 if (!Subtarget
.hasFastUnalignedAccess() &&
19709 Alignment
< ScalarType
.getStoreSize())
19715 static const Intrinsic::ID FixedVlsegIntrIds
[] = {
19716 Intrinsic::riscv_seg2_load
, Intrinsic::riscv_seg3_load
,
19717 Intrinsic::riscv_seg4_load
, Intrinsic::riscv_seg5_load
,
19718 Intrinsic::riscv_seg6_load
, Intrinsic::riscv_seg7_load
,
19719 Intrinsic::riscv_seg8_load
};
19721 /// Lower an interleaved load into a vlsegN intrinsic.
19723 /// E.g. Lower an interleaved load (Factor = 2):
19724 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
19725 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
19726 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
19729 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
19731 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
19732 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
19733 bool RISCVTargetLowering::lowerInterleavedLoad(
19734 LoadInst
*LI
, ArrayRef
<ShuffleVectorInst
*> Shuffles
,
19735 ArrayRef
<unsigned> Indices
, unsigned Factor
) const {
19736 IRBuilder
<> Builder(LI
);
19738 auto *VTy
= cast
<FixedVectorType
>(Shuffles
[0]->getType());
19739 if (!isLegalInterleavedAccessType(VTy
, Factor
, LI
->getAlign(),
19740 LI
->getPointerAddressSpace(),
19741 LI
->getModule()->getDataLayout()))
19744 auto *XLenTy
= Type::getIntNTy(LI
->getContext(), Subtarget
.getXLen());
19746 Function
*VlsegNFunc
=
19747 Intrinsic::getDeclaration(LI
->getModule(), FixedVlsegIntrIds
[Factor
- 2],
19748 {VTy
, LI
->getPointerOperandType(), XLenTy
});
19750 Value
*VL
= ConstantInt::get(XLenTy
, VTy
->getNumElements());
19753 Builder
.CreateCall(VlsegNFunc
, {LI
->getPointerOperand(), VL
});
19755 for (unsigned i
= 0; i
< Shuffles
.size(); i
++) {
19756 Value
*SubVec
= Builder
.CreateExtractValue(VlsegN
, Indices
[i
]);
19757 Shuffles
[i
]->replaceAllUsesWith(SubVec
);
19763 static const Intrinsic::ID FixedVssegIntrIds
[] = {
19764 Intrinsic::riscv_seg2_store
, Intrinsic::riscv_seg3_store
,
19765 Intrinsic::riscv_seg4_store
, Intrinsic::riscv_seg5_store
,
19766 Intrinsic::riscv_seg6_store
, Intrinsic::riscv_seg7_store
,
19767 Intrinsic::riscv_seg8_store
};
19769 /// Lower an interleaved store into a vssegN intrinsic.
19771 /// E.g. Lower an interleaved store (Factor = 3):
19772 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
19773 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
19774 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
19777 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
19778 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
19779 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
19780 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
19783 /// Note that the new shufflevectors will be removed and we'll only generate one
19784 /// vsseg3 instruction in CodeGen.
19785 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst
*SI
,
19786 ShuffleVectorInst
*SVI
,
19787 unsigned Factor
) const {
19788 IRBuilder
<> Builder(SI
);
19789 auto *ShuffleVTy
= cast
<FixedVectorType
>(SVI
->getType());
19790 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
19791 auto *VTy
= FixedVectorType::get(ShuffleVTy
->getElementType(),
19792 ShuffleVTy
->getNumElements() / Factor
);
19793 if (!isLegalInterleavedAccessType(VTy
, Factor
, SI
->getAlign(),
19794 SI
->getPointerAddressSpace(),
19795 SI
->getModule()->getDataLayout()))
19798 auto *XLenTy
= Type::getIntNTy(SI
->getContext(), Subtarget
.getXLen());
19800 Function
*VssegNFunc
=
19801 Intrinsic::getDeclaration(SI
->getModule(), FixedVssegIntrIds
[Factor
- 2],
19802 {VTy
, SI
->getPointerOperandType(), XLenTy
});
19804 auto Mask
= SVI
->getShuffleMask();
19805 SmallVector
<Value
*, 10> Ops
;
19807 for (unsigned i
= 0; i
< Factor
; i
++) {
19808 Value
*Shuffle
= Builder
.CreateShuffleVector(
19809 SVI
->getOperand(0), SVI
->getOperand(1),
19810 createSequentialMask(Mask
[i
], VTy
->getNumElements(), 0));
19811 Ops
.push_back(Shuffle
);
19813 // This VL should be OK (should be executable in one vsseg instruction,
19814 // potentially under larger LMULs) because we checked that the fixed vector
19815 // type fits in isLegalInterleavedAccessType
19816 Value
*VL
= ConstantInt::get(XLenTy
, VTy
->getNumElements());
19817 Ops
.append({SI
->getPointerOperand(), VL
});
19819 Builder
.CreateCall(VssegNFunc
, Ops
);
19824 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst
*DI
,
19825 LoadInst
*LI
) const {
19826 assert(LI
->isSimple());
19827 IRBuilder
<> Builder(LI
);
19829 // Only deinterleave2 supported at present.
19830 if (DI
->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2
)
19833 unsigned Factor
= 2;
19835 VectorType
*VTy
= cast
<VectorType
>(DI
->getOperand(0)->getType());
19836 VectorType
*ResVTy
= cast
<VectorType
>(DI
->getType()->getContainedType(0));
19838 if (!isLegalInterleavedAccessType(ResVTy
, Factor
, LI
->getAlign(),
19839 LI
->getPointerAddressSpace(),
19840 LI
->getModule()->getDataLayout()))
19843 Function
*VlsegNFunc
;
19845 Type
*XLenTy
= Type::getIntNTy(LI
->getContext(), Subtarget
.getXLen());
19846 SmallVector
<Value
*, 10> Ops
;
19848 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
19849 VlsegNFunc
= Intrinsic::getDeclaration(
19850 LI
->getModule(), FixedVlsegIntrIds
[Factor
- 2],
19851 {ResVTy
, LI
->getPointerOperandType(), XLenTy
});
19852 VL
= ConstantInt::get(XLenTy
, FVTy
->getNumElements());
19854 static const Intrinsic::ID IntrIds
[] = {
19855 Intrinsic::riscv_vlseg2
, Intrinsic::riscv_vlseg3
,
19856 Intrinsic::riscv_vlseg4
, Intrinsic::riscv_vlseg5
,
19857 Intrinsic::riscv_vlseg6
, Intrinsic::riscv_vlseg7
,
19858 Intrinsic::riscv_vlseg8
};
19860 VlsegNFunc
= Intrinsic::getDeclaration(LI
->getModule(), IntrIds
[Factor
- 2],
19862 VL
= Constant::getAllOnesValue(XLenTy
);
19863 Ops
.append(Factor
, PoisonValue::get(ResVTy
));
19866 Ops
.append({LI
->getPointerOperand(), VL
});
19868 Value
*Vlseg
= Builder
.CreateCall(VlsegNFunc
, Ops
);
19869 DI
->replaceAllUsesWith(Vlseg
);
19874 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst
*II
,
19875 StoreInst
*SI
) const {
19876 assert(SI
->isSimple());
19877 IRBuilder
<> Builder(SI
);
19879 // Only interleave2 supported at present.
19880 if (II
->getIntrinsicID() != Intrinsic::experimental_vector_interleave2
)
19883 unsigned Factor
= 2;
19885 VectorType
*VTy
= cast
<VectorType
>(II
->getType());
19886 VectorType
*InVTy
= cast
<VectorType
>(II
->getOperand(0)->getType());
19888 if (!isLegalInterleavedAccessType(InVTy
, Factor
, SI
->getAlign(),
19889 SI
->getPointerAddressSpace(),
19890 SI
->getModule()->getDataLayout()))
19893 Function
*VssegNFunc
;
19895 Type
*XLenTy
= Type::getIntNTy(SI
->getContext(), Subtarget
.getXLen());
19897 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
19898 VssegNFunc
= Intrinsic::getDeclaration(
19899 SI
->getModule(), FixedVssegIntrIds
[Factor
- 2],
19900 {InVTy
, SI
->getPointerOperandType(), XLenTy
});
19901 VL
= ConstantInt::get(XLenTy
, FVTy
->getNumElements());
19903 static const Intrinsic::ID IntrIds
[] = {
19904 Intrinsic::riscv_vsseg2
, Intrinsic::riscv_vsseg3
,
19905 Intrinsic::riscv_vsseg4
, Intrinsic::riscv_vsseg5
,
19906 Intrinsic::riscv_vsseg6
, Intrinsic::riscv_vsseg7
,
19907 Intrinsic::riscv_vsseg8
};
19909 VssegNFunc
= Intrinsic::getDeclaration(SI
->getModule(), IntrIds
[Factor
- 2],
19911 VL
= Constant::getAllOnesValue(XLenTy
);
19914 Builder
.CreateCall(VssegNFunc
, {II
->getOperand(0), II
->getOperand(1),
19915 SI
->getPointerOperand(), VL
});
19921 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock
&MBB
,
19922 MachineBasicBlock::instr_iterator
&MBBI
,
19923 const TargetInstrInfo
*TII
) const {
19924 assert(MBBI
->isCall() && MBBI
->getCFIType() &&
19925 "Invalid call instruction for a KCFI check");
19926 assert(is_contained({RISCV::PseudoCALLIndirect
, RISCV::PseudoTAILIndirect
},
19927 MBBI
->getOpcode()));
19929 MachineOperand
&Target
= MBBI
->getOperand(0);
19930 Target
.setIsRenamable(false);
19932 return BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(RISCV::KCFI_CHECK
))
19933 .addReg(Target
.getReg())
19934 .addImm(MBBI
->getCFIType())
19938 #define GET_REGISTER_MATCHER
19939 #include "RISCVGenAsmMatcher.inc"
19942 RISCVTargetLowering::getRegisterByName(const char *RegName
, LLT VT
,
19943 const MachineFunction
&MF
) const {
19944 Register Reg
= MatchRegisterAltName(RegName
);
19945 if (Reg
== RISCV::NoRegister
)
19946 Reg
= MatchRegisterName(RegName
);
19947 if (Reg
== RISCV::NoRegister
)
19948 report_fatal_error(
19949 Twine("Invalid register name \"" + StringRef(RegName
) + "\"."));
19950 BitVector ReservedRegs
= Subtarget
.getRegisterInfo()->getReservedRegs(MF
);
19951 if (!ReservedRegs
.test(Reg
) && !Subtarget
.isRegisterReservedByUser(Reg
))
19952 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
19953 StringRef(RegName
) + "\"."));
19957 MachineMemOperand::Flags
19958 RISCVTargetLowering::getTargetMMOFlags(const Instruction
&I
) const {
19959 const MDNode
*NontemporalInfo
= I
.getMetadata(LLVMContext::MD_nontemporal
);
19961 if (NontemporalInfo
== nullptr)
19962 return MachineMemOperand::MONone
;
19964 // 1 for default value work as __RISCV_NTLH_ALL
19965 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
19966 // 3 -> __RISCV_NTLH_ALL_PRIVATE
19967 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
19968 // 5 -> __RISCV_NTLH_ALL
19969 int NontemporalLevel
= 5;
19970 const MDNode
*RISCVNontemporalInfo
=
19971 I
.getMetadata("riscv-nontemporal-domain");
19972 if (RISCVNontemporalInfo
!= nullptr)
19975 cast
<ConstantAsMetadata
>(RISCVNontemporalInfo
->getOperand(0))
19979 assert((1 <= NontemporalLevel
&& NontemporalLevel
<= 5) &&
19980 "RISC-V target doesn't support this non-temporal domain.");
19982 NontemporalLevel
-= 2;
19983 MachineMemOperand::Flags Flags
= MachineMemOperand::MONone
;
19984 if (NontemporalLevel
& 0b1)
19985 Flags
|= MONontemporalBit0
;
19986 if (NontemporalLevel
& 0b10)
19987 Flags
|= MONontemporalBit1
;
19992 MachineMemOperand::Flags
19993 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode
&Node
) const {
19995 MachineMemOperand::Flags NodeFlags
= Node
.getMemOperand()->getFlags();
19996 MachineMemOperand::Flags TargetFlags
= MachineMemOperand::MONone
;
19997 TargetFlags
|= (NodeFlags
& MONontemporalBit0
);
19998 TargetFlags
|= (NodeFlags
& MONontemporalBit1
);
20000 return TargetFlags
;
20003 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
20004 const MemSDNode
&NodeX
, const MemSDNode
&NodeY
) const {
20005 return getTargetMMOFlags(NodeX
) == getTargetMMOFlags(NodeY
);
20008 bool RISCVTargetLowering::isCtpopFast(EVT VT
) const {
20009 if (VT
.isScalableVector())
20010 return isTypeLegal(VT
) && Subtarget
.hasStdExtZvbb();
20011 if (VT
.isFixedLengthVector() && Subtarget
.hasStdExtZvbb())
20013 return Subtarget
.hasStdExtZbb() &&
20014 (VT
== MVT::i32
|| VT
== MVT::i64
|| VT
.isFixedLengthVector());
20017 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT
,
20018 ISD::CondCode Cond
) const {
20019 return isCtpopFast(VT
) ? 0 : 1;
20022 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction
&Inst
) const {
20023 // At the moment, the only scalable instruction GISel knows how to lower is
20024 // ret with scalable argument.
20026 if (Inst
.getType()->isScalableTy())
20029 for (unsigned i
= 0; i
< Inst
.getNumOperands(); ++i
)
20030 if (Inst
.getOperand(i
)->getType()->isScalableTy() &&
20031 !isa
<ReturnInst
>(&Inst
))
20034 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(&Inst
)) {
20035 if (AI
->getAllocatedType()->isScalableTy())
20043 RISCVTargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
20045 SmallVectorImpl
<SDNode
*> &Created
) const {
20046 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
20047 if (isIntDivCheap(N
->getValueType(0), Attr
))
20048 return SDValue(N
, 0); // Lower SDIV as SDIV
20050 // Only perform this transform if short forward branch opt is supported.
20051 if (!Subtarget
.hasShortForwardBranchOpt())
20053 EVT VT
= N
->getValueType(0);
20054 if (!(VT
== MVT::i32
|| (VT
== MVT::i64
&& Subtarget
.is64Bit())))
20057 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
20058 if (Divisor
.sgt(2048) || Divisor
.slt(-2048))
20060 return TargetLowering::buildSDIVPow2WithCMov(N
, Divisor
, DAG
, Created
);
20063 bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
20064 EVT VT
, const APInt
&AndMask
) const {
20065 if (Subtarget
.hasStdExtZicond() || Subtarget
.hasVendorXVentanaCondOps())
20066 return !Subtarget
.hasStdExtZbs() && AndMask
.ugt(1024);
20067 return TargetLowering::shouldFoldSelectWithSingleBitTest(VT
, AndMask
);
20070 unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
20071 return Subtarget
.getMinimumJumpTableEntries();
20074 namespace llvm::RISCVVIntrinsicsTable
{
20076 #define GET_RISCVVIntrinsicsTable_IMPL
20077 #include "RISCVGenSearchableTables.inc"
20079 } // namespace llvm::RISCVVIntrinsicsTable