1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/DiagnosticPrinter.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicsRISCV.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/InstructionCost.h"
43 #include "llvm/Support/KnownBits.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Support/raw_ostream.h"
50 #define DEBUG_TYPE "riscv-lower"
52 STATISTIC(NumTailCalls
, "Number of tail calls");
54 static cl::opt
<unsigned> ExtensionMaxWebSize(
55 DEBUG_TYPE
"-ext-max-web-size", cl::Hidden
,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
61 AllowSplatInVW_W(DEBUG_TYPE
"-form-vw-w-with-splat", cl::Hidden
,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
66 static cl::opt
<unsigned> NumRepeatedDivisors(
67 DEBUG_TYPE
"-fp-repeated-divisors", cl::Hidden
,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
73 FPImmCost(DEBUG_TYPE
"-fpimm-cost", cl::Hidden
,
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden
,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine
&TM
,
83 const RISCVSubtarget
&STI
)
84 : TargetLowering(TM
), Subtarget(STI
) {
86 if (Subtarget
.isRVE())
87 report_fatal_error("Codegen not yet implemented for RVE");
89 RISCVABI::ABI ABI
= Subtarget
.getTargetABI();
90 assert(ABI
!= RISCVABI::ABI_Unknown
&& "Improperly initialised target ABI");
92 if ((ABI
== RISCVABI::ABI_ILP32F
|| ABI
== RISCVABI::ABI_LP64F
) &&
93 !Subtarget
.hasStdExtF()) {
94 errs() << "Hard-float 'f' ABI can't be used for a target that "
95 "doesn't support the F instruction set extension (ignoring "
97 ABI
= Subtarget
.is64Bit() ? RISCVABI::ABI_LP64
: RISCVABI::ABI_ILP32
;
98 } else if ((ABI
== RISCVABI::ABI_ILP32D
|| ABI
== RISCVABI::ABI_LP64D
) &&
99 !Subtarget
.hasStdExtD()) {
100 errs() << "Hard-float 'd' ABI can't be used for a target that "
101 "doesn't support the D instruction set extension (ignoring "
103 ABI
= Subtarget
.is64Bit() ? RISCVABI::ABI_LP64
: RISCVABI::ABI_ILP32
;
108 report_fatal_error("Don't know how to lower this ABI");
109 case RISCVABI::ABI_ILP32
:
110 case RISCVABI::ABI_ILP32F
:
111 case RISCVABI::ABI_ILP32D
:
112 case RISCVABI::ABI_LP64
:
113 case RISCVABI::ABI_LP64F
:
114 case RISCVABI::ABI_LP64D
:
118 MVT XLenVT
= Subtarget
.getXLenVT();
120 // Set up the register classes.
121 addRegisterClass(XLenVT
, &RISCV::GPRRegClass
);
122 if (Subtarget
.is64Bit() && RV64LegalI32
)
123 addRegisterClass(MVT::i32
, &RISCV::GPRRegClass
);
125 if (Subtarget
.hasStdExtZfhOrZfhmin())
126 addRegisterClass(MVT::f16
, &RISCV::FPR16RegClass
);
127 if (Subtarget
.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16
, &RISCV::FPR16RegClass
);
129 if (Subtarget
.hasStdExtF())
130 addRegisterClass(MVT::f32
, &RISCV::FPR32RegClass
);
131 if (Subtarget
.hasStdExtD())
132 addRegisterClass(MVT::f64
, &RISCV::FPR64RegClass
);
133 if (Subtarget
.hasStdExtZhinxOrZhinxmin())
134 addRegisterClass(MVT::f16
, &RISCV::GPRF16RegClass
);
135 if (Subtarget
.hasStdExtZfinx())
136 addRegisterClass(MVT::f32
, &RISCV::GPRF32RegClass
);
137 if (Subtarget
.hasStdExtZdinx()) {
138 if (Subtarget
.is64Bit())
139 addRegisterClass(MVT::f64
, &RISCV::GPRRegClass
);
141 addRegisterClass(MVT::f64
, &RISCV::GPRPF64RegClass
);
144 static const MVT::SimpleValueType BoolVecVTs
[] = {
145 MVT::nxv1i1
, MVT::nxv2i1
, MVT::nxv4i1
, MVT::nxv8i1
,
146 MVT::nxv16i1
, MVT::nxv32i1
, MVT::nxv64i1
};
147 static const MVT::SimpleValueType IntVecVTs
[] = {
148 MVT::nxv1i8
, MVT::nxv2i8
, MVT::nxv4i8
, MVT::nxv8i8
, MVT::nxv16i8
,
149 MVT::nxv32i8
, MVT::nxv64i8
, MVT::nxv1i16
, MVT::nxv2i16
, MVT::nxv4i16
,
150 MVT::nxv8i16
, MVT::nxv16i16
, MVT::nxv32i16
, MVT::nxv1i32
, MVT::nxv2i32
,
151 MVT::nxv4i32
, MVT::nxv8i32
, MVT::nxv16i32
, MVT::nxv1i64
, MVT::nxv2i64
,
152 MVT::nxv4i64
, MVT::nxv8i64
};
153 static const MVT::SimpleValueType F16VecVTs
[] = {
154 MVT::nxv1f16
, MVT::nxv2f16
, MVT::nxv4f16
,
155 MVT::nxv8f16
, MVT::nxv16f16
, MVT::nxv32f16
};
156 static const MVT::SimpleValueType BF16VecVTs
[] = {
157 MVT::nxv1bf16
, MVT::nxv2bf16
, MVT::nxv4bf16
,
158 MVT::nxv8bf16
, MVT::nxv16bf16
, MVT::nxv32bf16
};
159 static const MVT::SimpleValueType F32VecVTs
[] = {
160 MVT::nxv1f32
, MVT::nxv2f32
, MVT::nxv4f32
, MVT::nxv8f32
, MVT::nxv16f32
};
161 static const MVT::SimpleValueType F64VecVTs
[] = {
162 MVT::nxv1f64
, MVT::nxv2f64
, MVT::nxv4f64
, MVT::nxv8f64
};
164 if (Subtarget
.hasVInstructions()) {
165 auto addRegClassForRVV
= [this](MVT VT
) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
168 unsigned MinElts
= RISCV::RVVBitsPerBlock
/ Subtarget
.getELen();
169 if (VT
.getVectorMinNumElements() < MinElts
)
172 unsigned Size
= VT
.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass
*RC
;
174 if (Size
<= RISCV::RVVBitsPerBlock
)
175 RC
= &RISCV::VRRegClass
;
176 else if (Size
== 2 * RISCV::RVVBitsPerBlock
)
177 RC
= &RISCV::VRM2RegClass
;
178 else if (Size
== 4 * RISCV::RVVBitsPerBlock
)
179 RC
= &RISCV::VRM4RegClass
;
180 else if (Size
== 8 * RISCV::RVVBitsPerBlock
)
181 RC
= &RISCV::VRM8RegClass
;
183 llvm_unreachable("Unexpected size");
185 addRegisterClass(VT
, RC
);
188 for (MVT VT
: BoolVecVTs
)
189 addRegClassForRVV(VT
);
190 for (MVT VT
: IntVecVTs
) {
191 if (VT
.getVectorElementType() == MVT::i64
&&
192 !Subtarget
.hasVInstructionsI64())
194 addRegClassForRVV(VT
);
197 if (Subtarget
.hasVInstructionsF16Minimal())
198 for (MVT VT
: F16VecVTs
)
199 addRegClassForRVV(VT
);
201 if (Subtarget
.hasVInstructionsBF16())
202 for (MVT VT
: BF16VecVTs
)
203 addRegClassForRVV(VT
);
205 if (Subtarget
.hasVInstructionsF32())
206 for (MVT VT
: F32VecVTs
)
207 addRegClassForRVV(VT
);
209 if (Subtarget
.hasVInstructionsF64())
210 for (MVT VT
: F64VecVTs
)
211 addRegClassForRVV(VT
);
213 if (Subtarget
.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors
= [this](MVT VT
) {
215 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
216 unsigned RCID
= getRegClassIDForVecVT(ContainerVT
);
217 const RISCVRegisterInfo
&TRI
= *Subtarget
.getRegisterInfo();
218 addRegisterClass(VT
, TRI
.getRegClass(RCID
));
220 for (MVT VT
: MVT::integer_fixedlen_vector_valuetypes())
221 if (useRVVForFixedLengthVectorVT(VT
))
222 addRegClassForFixedVectors(VT
);
224 for (MVT VT
: MVT::fp_fixedlen_vector_valuetypes())
225 if (useRVVForFixedLengthVectorVT(VT
))
226 addRegClassForFixedVectors(VT
);
230 // Compute derived properties from the register classes.
231 computeRegisterProperties(STI
.getRegisterInfo());
233 setStackPointerRegisterToSaveRestore(RISCV::X2
);
235 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, XLenVT
,
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
238 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, MVT::i32
,
241 // TODO: add all necessary setOperationAction calls.
242 setOperationAction(ISD::DYNAMIC_STACKALLOC
, XLenVT
, Expand
);
244 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
245 setOperationAction(ISD::BR_CC
, XLenVT
, Expand
);
246 if (RV64LegalI32
&& Subtarget
.is64Bit())
247 setOperationAction(ISD::BR_CC
, MVT::i32
, Expand
);
248 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
249 setOperationAction(ISD::SELECT_CC
, XLenVT
, Expand
);
250 if (RV64LegalI32
&& Subtarget
.is64Bit())
251 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Expand
);
253 setCondCodeAction(ISD::SETLE
, XLenVT
, Expand
);
254 setCondCodeAction(ISD::SETGT
, XLenVT
, Custom
);
255 setCondCodeAction(ISD::SETGE
, XLenVT
, Expand
);
256 setCondCodeAction(ISD::SETULE
, XLenVT
, Expand
);
257 setCondCodeAction(ISD::SETUGT
, XLenVT
, Custom
);
258 setCondCodeAction(ISD::SETUGE
, XLenVT
, Expand
);
260 if (RV64LegalI32
&& Subtarget
.is64Bit())
261 setOperationAction(ISD::SETCC
, MVT::i32
, Promote
);
263 setOperationAction({ISD::STACKSAVE
, ISD::STACKRESTORE
}, MVT::Other
, Expand
);
265 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
266 setOperationAction({ISD::VAARG
, ISD::VACOPY
, ISD::VAEND
}, MVT::Other
, Expand
);
268 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
270 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
272 if (!Subtarget
.hasStdExtZbb() && !Subtarget
.hasVendorXTHeadBb())
273 setOperationAction(ISD::SIGN_EXTEND_INREG
, {MVT::i8
, MVT::i16
}, Expand
);
275 if (Subtarget
.is64Bit()) {
276 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i64
, Custom
);
279 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
280 setOperationAction({ISD::ADD
, ISD::SUB
, ISD::SHL
, ISD::SRA
, ISD::SRL
},
282 setOperationAction(ISD::SADDO
, MVT::i32
, Custom
);
283 setOperationAction({ISD::UADDO
, ISD::USUBO
, ISD::UADDSAT
, ISD::USUBSAT
},
288 {RTLIB::SHL_I128
, RTLIB::SRL_I128
, RTLIB::SRA_I128
, RTLIB::MUL_I128
},
290 setLibcallName(RTLIB::MULO_I64
, nullptr);
293 if (!Subtarget
.hasStdExtM() && !Subtarget
.hasStdExtZmmul()) {
294 setOperationAction({ISD::MUL
, ISD::MULHS
, ISD::MULHU
}, XLenVT
, Expand
);
295 if (RV64LegalI32
&& Subtarget
.is64Bit())
296 setOperationAction(ISD::MUL
, MVT::i32
, Promote
);
297 } else if (Subtarget
.is64Bit()) {
298 setOperationAction(ISD::MUL
, MVT::i128
, Custom
);
300 setOperationAction(ISD::MUL
, MVT::i32
, Custom
);
302 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
305 if (!Subtarget
.hasStdExtM()) {
306 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
},
308 if (RV64LegalI32
&& Subtarget
.is64Bit())
309 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
}, MVT::i32
,
311 } else if (Subtarget
.is64Bit()) {
313 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::UREM
},
314 {MVT::i8
, MVT::i16
, MVT::i32
}, Custom
);
317 if (RV64LegalI32
&& Subtarget
.is64Bit()) {
318 setOperationAction({ISD::MULHS
, ISD::MULHU
}, MVT::i32
, Expand
);
320 {ISD::SDIVREM
, ISD::UDIVREM
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, MVT::i32
,
325 {ISD::SDIVREM
, ISD::UDIVREM
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, XLenVT
,
328 setOperationAction({ISD::SHL_PARTS
, ISD::SRL_PARTS
, ISD::SRA_PARTS
}, XLenVT
,
331 if (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) {
332 if (!RV64LegalI32
&& Subtarget
.is64Bit())
333 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Custom
);
334 } else if (Subtarget
.hasVendorXTHeadBb()) {
335 if (Subtarget
.is64Bit())
336 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Custom
);
337 setOperationAction({ISD::ROTL
, ISD::ROTR
}, XLenVT
, Custom
);
339 setOperationAction({ISD::ROTL
, ISD::ROTR
}, XLenVT
, Expand
);
340 if (RV64LegalI32
&& Subtarget
.is64Bit())
341 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Expand
);
344 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
345 // pattern match it directly in isel.
346 setOperationAction(ISD::BSWAP
, XLenVT
,
347 (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
348 Subtarget
.hasVendorXTHeadBb())
351 if (RV64LegalI32
&& Subtarget
.is64Bit())
352 setOperationAction(ISD::BSWAP
, MVT::i32
,
353 (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
354 Subtarget
.hasVendorXTHeadBb())
358 // Zbkb can use rev8+brev8 to implement bitreverse.
359 setOperationAction(ISD::BITREVERSE
, XLenVT
,
360 Subtarget
.hasStdExtZbkb() ? Custom
: Expand
);
362 if (Subtarget
.hasStdExtZbb()) {
363 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, XLenVT
,
365 if (RV64LegalI32
&& Subtarget
.is64Bit())
366 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, MVT::i32
,
369 if (Subtarget
.is64Bit()) {
371 setOperationAction(ISD::CTTZ
, MVT::i32
, Legal
);
373 setOperationAction({ISD::CTTZ
, ISD::CTTZ_ZERO_UNDEF
}, MVT::i32
, Custom
);
376 setOperationAction({ISD::CTTZ
, ISD::CTPOP
}, XLenVT
, Expand
);
377 if (RV64LegalI32
&& Subtarget
.is64Bit())
378 setOperationAction({ISD::CTTZ
, ISD::CTPOP
}, MVT::i32
, Expand
);
381 if (Subtarget
.hasStdExtZbb() || Subtarget
.hasVendorXTHeadBb()) {
382 // We need the custom lowering to make sure that the resulting sequence
383 // for the 32bit case is efficient on 64bit targets.
384 if (Subtarget
.is64Bit()) {
386 setOperationAction(ISD::CTLZ
, MVT::i32
,
387 Subtarget
.hasStdExtZbb() ? Legal
: Promote
);
388 if (!Subtarget
.hasStdExtZbb())
389 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i32
, Promote
);
391 setOperationAction({ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
}, MVT::i32
, Custom
);
394 setOperationAction(ISD::CTLZ
, XLenVT
, Expand
);
395 if (RV64LegalI32
&& Subtarget
.is64Bit())
396 setOperationAction(ISD::CTLZ
, MVT::i32
, Expand
);
399 if (!RV64LegalI32
&& Subtarget
.is64Bit())
400 setOperationAction(ISD::ABS
, MVT::i32
, Custom
);
402 if (!Subtarget
.hasVendorXTHeadCondMov())
403 setOperationAction(ISD::SELECT
, XLenVT
, Custom
);
405 if (RV64LegalI32
&& Subtarget
.is64Bit())
406 setOperationAction(ISD::SELECT
, MVT::i32
, Promote
);
408 static const unsigned FPLegalNodeTypes
[] = {
409 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::LRINT
,
410 ISD::LLRINT
, ISD::LROUND
, ISD::LLROUND
,
411 ISD::STRICT_LRINT
, ISD::STRICT_LLRINT
, ISD::STRICT_LROUND
,
412 ISD::STRICT_LLROUND
, ISD::STRICT_FMA
, ISD::STRICT_FADD
,
413 ISD::STRICT_FSUB
, ISD::STRICT_FMUL
, ISD::STRICT_FDIV
,
414 ISD::STRICT_FSQRT
, ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
};
416 static const ISD::CondCode FPCCToExpand
[] = {
417 ISD::SETOGT
, ISD::SETOGE
, ISD::SETONE
, ISD::SETUEQ
, ISD::SETUGT
,
418 ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
, ISD::SETUNE
, ISD::SETGT
,
419 ISD::SETGE
, ISD::SETNE
, ISD::SETO
, ISD::SETUO
};
421 static const unsigned FPOpToExpand
[] = {
422 ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
, ISD::FPOW
,
425 static const unsigned FPRndMode
[] = {
426 ISD::FCEIL
, ISD::FFLOOR
, ISD::FTRUNC
, ISD::FRINT
, ISD::FROUND
,
429 if (Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
430 setOperationAction(ISD::BITCAST
, MVT::i16
, Custom
);
432 static const unsigned ZfhminZfbfminPromoteOps
[] = {
433 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FADD
,
434 ISD::FSUB
, ISD::FMUL
, ISD::FMA
,
435 ISD::FDIV
, ISD::FSQRT
, ISD::FABS
,
436 ISD::FNEG
, ISD::STRICT_FMA
, ISD::STRICT_FADD
,
437 ISD::STRICT_FSUB
, ISD::STRICT_FMUL
, ISD::STRICT_FDIV
,
438 ISD::STRICT_FSQRT
, ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
,
439 ISD::SETCC
, ISD::FCEIL
, ISD::FFLOOR
,
440 ISD::FTRUNC
, ISD::FRINT
, ISD::FROUND
,
441 ISD::FROUNDEVEN
, ISD::SELECT
};
443 if (Subtarget
.hasStdExtZfbfmin()) {
444 setOperationAction(ISD::BITCAST
, MVT::i16
, Custom
);
445 setOperationAction(ISD::BITCAST
, MVT::bf16
, Custom
);
446 setOperationAction(ISD::FP_ROUND
, MVT::bf16
, Custom
);
447 setOperationAction(ISD::FP_EXTEND
, MVT::f32
, Custom
);
448 setOperationAction(ISD::FP_EXTEND
, MVT::f64
, Custom
);
449 setOperationAction(ISD::ConstantFP
, MVT::bf16
, Expand
);
450 setOperationAction(ISD::SELECT_CC
, MVT::bf16
, Expand
);
451 setOperationAction(ISD::BR_CC
, MVT::bf16
, Expand
);
452 setOperationAction(ZfhminZfbfminPromoteOps
, MVT::bf16
, Promote
);
453 setOperationAction(ISD::FREM
, MVT::bf16
, Promote
);
454 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
455 // DAGCombiner::visitFP_ROUND probably needs improvements first.
456 setOperationAction(ISD::FCOPYSIGN
, MVT::bf16
, Expand
);
459 if (Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
460 if (Subtarget
.hasStdExtZfhOrZhinx()) {
461 setOperationAction(FPLegalNodeTypes
, MVT::f16
, Legal
);
462 setOperationAction(FPRndMode
, MVT::f16
,
463 Subtarget
.hasStdExtZfa() ? Legal
: Custom
);
464 setOperationAction(ISD::SELECT
, MVT::f16
, Custom
);
465 setOperationAction(ISD::IS_FPCLASS
, MVT::f16
, Custom
);
467 setOperationAction(ZfhminZfbfminPromoteOps
, MVT::f16
, Promote
);
468 setOperationAction({ISD::STRICT_LRINT
, ISD::STRICT_LLRINT
,
469 ISD::STRICT_LROUND
, ISD::STRICT_LLROUND
},
471 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
472 // DAGCombiner::visitFP_ROUND probably needs improvements first.
473 setOperationAction(ISD::FCOPYSIGN
, MVT::f16
, Expand
);
476 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f16
, Legal
);
477 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f32
, Legal
);
478 setCondCodeAction(FPCCToExpand
, MVT::f16
, Expand
);
479 setOperationAction(ISD::SELECT_CC
, MVT::f16
, Expand
);
480 setOperationAction(ISD::BR_CC
, MVT::f16
, Expand
);
482 setOperationAction(ISD::FNEARBYINT
, MVT::f16
,
483 Subtarget
.hasStdExtZfa() ? Legal
: Promote
);
484 setOperationAction({ISD::FREM
, ISD::FPOW
, ISD::FPOWI
,
485 ISD::FCOS
, ISD::FSIN
, ISD::FSINCOS
, ISD::FEXP
,
486 ISD::FEXP2
, ISD::FEXP10
, ISD::FLOG
, ISD::FLOG2
,
490 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
491 // complete support for all operations in LegalizeDAG.
492 setOperationAction({ISD::STRICT_FCEIL
, ISD::STRICT_FFLOOR
,
493 ISD::STRICT_FNEARBYINT
, ISD::STRICT_FRINT
,
494 ISD::STRICT_FROUND
, ISD::STRICT_FROUNDEVEN
,
498 // We need to custom promote this.
499 if (Subtarget
.is64Bit())
500 setOperationAction(ISD::FPOWI
, MVT::i32
, Custom
);
502 if (!Subtarget
.hasStdExtZfa())
503 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f16
, Custom
);
506 if (Subtarget
.hasStdExtFOrZfinx()) {
507 setOperationAction(FPLegalNodeTypes
, MVT::f32
, Legal
);
508 setOperationAction(FPRndMode
, MVT::f32
,
509 Subtarget
.hasStdExtZfa() ? Legal
: Custom
);
510 setCondCodeAction(FPCCToExpand
, MVT::f32
, Expand
);
511 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
512 setOperationAction(ISD::SELECT
, MVT::f32
, Custom
);
513 setOperationAction(ISD::BR_CC
, MVT::f32
, Expand
);
514 setOperationAction(FPOpToExpand
, MVT::f32
, Expand
);
515 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::f16
, Expand
);
516 setTruncStoreAction(MVT::f32
, MVT::f16
, Expand
);
517 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::bf16
, Expand
);
518 setTruncStoreAction(MVT::f32
, MVT::bf16
, Expand
);
519 setOperationAction(ISD::IS_FPCLASS
, MVT::f32
, Custom
);
520 setOperationAction(ISD::BF16_TO_FP
, MVT::f32
, Custom
);
521 setOperationAction(ISD::FP_TO_BF16
, MVT::f32
,
522 Subtarget
.isSoftFPABI() ? LibCall
: Custom
);
523 setOperationAction(ISD::FP_TO_FP16
, MVT::f32
, Custom
);
524 setOperationAction(ISD::FP16_TO_FP
, MVT::f32
, Custom
);
526 if (Subtarget
.hasStdExtZfa())
527 setOperationAction(ISD::FNEARBYINT
, MVT::f32
, Legal
);
529 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f32
, Custom
);
532 if (Subtarget
.hasStdExtFOrZfinx() && Subtarget
.is64Bit())
533 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
535 if (Subtarget
.hasStdExtDOrZdinx()) {
536 setOperationAction(FPLegalNodeTypes
, MVT::f64
, Legal
);
538 if (Subtarget
.hasStdExtZfa()) {
539 setOperationAction(FPRndMode
, MVT::f64
, Legal
);
540 setOperationAction(ISD::FNEARBYINT
, MVT::f64
, Legal
);
541 setOperationAction(ISD::BITCAST
, MVT::i64
, Custom
);
542 setOperationAction(ISD::BITCAST
, MVT::f64
, Custom
);
544 if (Subtarget
.is64Bit())
545 setOperationAction(FPRndMode
, MVT::f64
, Custom
);
547 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f64
, Custom
);
550 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f32
, Legal
);
551 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f64
, Legal
);
552 setCondCodeAction(FPCCToExpand
, MVT::f64
, Expand
);
553 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
554 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
555 setOperationAction(ISD::BR_CC
, MVT::f64
, Expand
);
556 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
557 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
558 setOperationAction(FPOpToExpand
, MVT::f64
, Expand
);
559 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f16
, Expand
);
560 setTruncStoreAction(MVT::f64
, MVT::f16
, Expand
);
561 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::bf16
, Expand
);
562 setTruncStoreAction(MVT::f64
, MVT::bf16
, Expand
);
563 setOperationAction(ISD::IS_FPCLASS
, MVT::f64
, Custom
);
564 setOperationAction(ISD::BF16_TO_FP
, MVT::f64
, Custom
);
565 setOperationAction(ISD::FP_TO_BF16
, MVT::f64
,
566 Subtarget
.isSoftFPABI() ? LibCall
: Custom
);
567 setOperationAction(ISD::FP_TO_FP16
, MVT::f64
, Custom
);
568 setOperationAction(ISD::FP16_TO_FP
, MVT::f64
, Expand
);
571 if (Subtarget
.is64Bit()) {
572 setOperationAction({ISD::FP_TO_UINT
, ISD::FP_TO_SINT
,
573 ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
},
575 setOperationAction(ISD::LROUND
, MVT::i32
, Custom
);
578 if (Subtarget
.hasStdExtFOrZfinx()) {
579 setOperationAction({ISD::FP_TO_UINT_SAT
, ISD::FP_TO_SINT_SAT
}, XLenVT
,
582 setOperationAction({ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
,
583 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_SINT_TO_FP
},
586 if (RV64LegalI32
&& Subtarget
.is64Bit())
587 setOperationAction({ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
,
588 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_SINT_TO_FP
},
591 setOperationAction(ISD::GET_ROUNDING
, XLenVT
, Custom
);
592 setOperationAction(ISD::SET_ROUNDING
, MVT::Other
, Custom
);
595 setOperationAction({ISD::GlobalAddress
, ISD::BlockAddress
, ISD::ConstantPool
,
599 setOperationAction(ISD::GlobalTLSAddress
, XLenVT
, Custom
);
601 if (Subtarget
.is64Bit())
602 setOperationAction(ISD::Constant
, MVT::i64
, Custom
);
604 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
605 // Unfortunately this can't be determined just from the ISA naming string.
606 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
,
607 Subtarget
.is64Bit() ? Legal
: Custom
);
609 setOperationAction({ISD::TRAP
, ISD::DEBUGTRAP
}, MVT::Other
, Legal
);
610 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
611 if (Subtarget
.is64Bit())
612 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i32
, Custom
);
614 if (Subtarget
.hasStdExtZicbop()) {
615 setOperationAction(ISD::PREFETCH
, MVT::Other
, Legal
);
618 if (Subtarget
.hasStdExtA()) {
619 setMaxAtomicSizeInBitsSupported(Subtarget
.getXLen());
620 setMinCmpXchgSizeInBits(32);
621 } else if (Subtarget
.hasForcedAtomics()) {
622 setMaxAtomicSizeInBitsSupported(Subtarget
.getXLen());
624 setMaxAtomicSizeInBitsSupported(0);
627 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
629 setBooleanContents(ZeroOrOneBooleanContent
);
631 if (Subtarget
.hasVInstructions()) {
632 setBooleanVectorContents(ZeroOrOneBooleanContent
);
634 setOperationAction(ISD::VSCALE
, XLenVT
, Custom
);
635 if (RV64LegalI32
&& Subtarget
.is64Bit())
636 setOperationAction(ISD::VSCALE
, MVT::i32
, Custom
);
638 // RVV intrinsics may have illegal operands.
639 // We also need to custom legalize vmv.x.s.
640 setOperationAction({ISD::INTRINSIC_WO_CHAIN
, ISD::INTRINSIC_W_CHAIN
,
641 ISD::INTRINSIC_VOID
},
642 {MVT::i8
, MVT::i16
}, Custom
);
643 if (Subtarget
.is64Bit())
644 setOperationAction({ISD::INTRINSIC_W_CHAIN
, ISD::INTRINSIC_VOID
},
647 setOperationAction({ISD::INTRINSIC_WO_CHAIN
, ISD::INTRINSIC_W_CHAIN
},
650 setOperationAction({ISD::INTRINSIC_W_CHAIN
, ISD::INTRINSIC_VOID
},
653 static const unsigned IntegerVPOps
[] = {
654 ISD::VP_ADD
, ISD::VP_SUB
, ISD::VP_MUL
,
655 ISD::VP_SDIV
, ISD::VP_UDIV
, ISD::VP_SREM
,
656 ISD::VP_UREM
, ISD::VP_AND
, ISD::VP_OR
,
657 ISD::VP_XOR
, ISD::VP_ASHR
, ISD::VP_LSHR
,
658 ISD::VP_SHL
, ISD::VP_REDUCE_ADD
, ISD::VP_REDUCE_AND
,
659 ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
, ISD::VP_REDUCE_SMAX
,
660 ISD::VP_REDUCE_SMIN
, ISD::VP_REDUCE_UMAX
, ISD::VP_REDUCE_UMIN
,
661 ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::VP_FP_TO_SINT
,
662 ISD::VP_FP_TO_UINT
, ISD::VP_SETCC
, ISD::VP_SIGN_EXTEND
,
663 ISD::VP_ZERO_EXTEND
, ISD::VP_TRUNCATE
, ISD::VP_SMIN
,
664 ISD::VP_SMAX
, ISD::VP_UMIN
, ISD::VP_UMAX
,
667 static const unsigned FloatingPointVPOps
[] = {
668 ISD::VP_FADD
, ISD::VP_FSUB
, ISD::VP_FMUL
,
669 ISD::VP_FDIV
, ISD::VP_FNEG
, ISD::VP_FABS
,
670 ISD::VP_FMA
, ISD::VP_REDUCE_FADD
, ISD::VP_REDUCE_SEQ_FADD
,
671 ISD::VP_REDUCE_FMIN
, ISD::VP_REDUCE_FMAX
, ISD::VP_MERGE
,
672 ISD::VP_SELECT
, ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
,
673 ISD::VP_SETCC
, ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
,
674 ISD::VP_SQRT
, ISD::VP_FMINNUM
, ISD::VP_FMAXNUM
,
675 ISD::VP_FCEIL
, ISD::VP_FFLOOR
, ISD::VP_FROUND
,
676 ISD::VP_FROUNDEVEN
, ISD::VP_FCOPYSIGN
, ISD::VP_FROUNDTOZERO
,
677 ISD::VP_FRINT
, ISD::VP_FNEARBYINT
, ISD::VP_IS_FPCLASS
};
679 static const unsigned IntegerVecReduceOps
[] = {
680 ISD::VECREDUCE_ADD
, ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
,
681 ISD::VECREDUCE_XOR
, ISD::VECREDUCE_SMAX
, ISD::VECREDUCE_SMIN
,
682 ISD::VECREDUCE_UMAX
, ISD::VECREDUCE_UMIN
};
684 static const unsigned FloatingPointVecReduceOps
[] = {
685 ISD::VECREDUCE_FADD
, ISD::VECREDUCE_SEQ_FADD
, ISD::VECREDUCE_FMIN
,
686 ISD::VECREDUCE_FMAX
};
688 if (!Subtarget
.is64Bit()) {
689 // We must custom-lower certain vXi64 operations on RV32 due to the vector
690 // element type being illegal.
691 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
},
694 setOperationAction(IntegerVecReduceOps
, MVT::i64
, Custom
);
696 setOperationAction({ISD::VP_REDUCE_ADD
, ISD::VP_REDUCE_AND
,
697 ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
,
698 ISD::VP_REDUCE_SMAX
, ISD::VP_REDUCE_SMIN
,
699 ISD::VP_REDUCE_UMAX
, ISD::VP_REDUCE_UMIN
},
703 for (MVT VT
: BoolVecVTs
) {
704 if (!isTypeLegal(VT
))
707 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
709 // Mask VTs are custom-expanded into a series of standard nodes
710 setOperationAction({ISD::TRUNCATE
, ISD::CONCAT_VECTORS
,
711 ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
,
712 ISD::SCALAR_TO_VECTOR
},
715 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
718 setOperationAction(ISD::SELECT
, VT
, Custom
);
720 {ISD::SELECT_CC
, ISD::VSELECT
, ISD::VP_MERGE
, ISD::VP_SELECT
}, VT
,
723 setOperationAction({ISD::VP_AND
, ISD::VP_OR
, ISD::VP_XOR
}, VT
, Custom
);
726 {ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
, ISD::VECREDUCE_XOR
}, VT
,
730 {ISD::VP_REDUCE_AND
, ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
}, VT
,
733 // RVV has native int->float & float->int conversions where the
734 // element type sizes are within one power-of-two of each other. Any
735 // wider distances between type sizes have to be lowered as sequences
736 // which progressively narrow the gap in stages.
737 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
, ISD::FP_TO_SINT
,
738 ISD::FP_TO_UINT
, ISD::STRICT_SINT_TO_FP
,
739 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_FP_TO_SINT
,
740 ISD::STRICT_FP_TO_UINT
},
742 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
745 // Expand all extending loads to types larger than this, and truncating
746 // stores from types larger than this.
747 for (MVT OtherVT
: MVT::integer_scalable_vector_valuetypes()) {
748 setTruncStoreAction(OtherVT
, VT
, Expand
);
749 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, OtherVT
,
753 setOperationAction({ISD::VP_FP_TO_SINT
, ISD::VP_FP_TO_UINT
,
754 ISD::VP_TRUNCATE
, ISD::VP_SETCC
},
757 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
758 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
760 setOperationAction(ISD::VECTOR_REVERSE
, VT
, Custom
);
762 setOperationPromotedToType(
763 ISD::VECTOR_SPLICE
, VT
,
764 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount()));
767 for (MVT VT
: IntVecVTs
) {
768 if (!isTypeLegal(VT
))
771 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
772 setOperationAction(ISD::SPLAT_VECTOR_PARTS
, VT
, Custom
);
774 // Vectors implement MULHS/MULHU.
775 setOperationAction({ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, VT
, Expand
);
777 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
778 if (VT
.getVectorElementType() == MVT::i64
&& !Subtarget
.hasStdExtV())
779 setOperationAction({ISD::MULHU
, ISD::MULHS
}, VT
, Expand
);
781 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, VT
,
784 // Custom-lower extensions and truncations from/to mask types.
785 setOperationAction({ISD::ANY_EXTEND
, ISD::SIGN_EXTEND
, ISD::ZERO_EXTEND
},
788 // RVV has native int->float & float->int conversions where the
789 // element type sizes are within one power-of-two of each other. Any
790 // wider distances between type sizes have to be lowered as sequences
791 // which progressively narrow the gap in stages.
792 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
, ISD::FP_TO_SINT
,
793 ISD::FP_TO_UINT
, ISD::STRICT_SINT_TO_FP
,
794 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_FP_TO_SINT
,
795 ISD::STRICT_FP_TO_UINT
},
797 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
799 setOperationAction({ISD::LRINT
, ISD::LLRINT
}, VT
, Custom
);
801 {ISD::SADDSAT
, ISD::UADDSAT
, ISD::SSUBSAT
, ISD::USUBSAT
}, VT
, Legal
);
803 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
804 // nodes which truncate by one power of two at a time.
805 setOperationAction(ISD::TRUNCATE
, VT
, Custom
);
807 // Custom-lower insert/extract operations to simplify patterns.
808 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
811 // Custom-lower reduction operations to set up the corresponding custom
813 setOperationAction(IntegerVecReduceOps
, VT
, Custom
);
815 setOperationAction(IntegerVPOps
, VT
, Custom
);
817 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
819 setOperationAction({ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
},
823 {ISD::VP_LOAD
, ISD::VP_STORE
, ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
824 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
, ISD::VP_SCATTER
},
827 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
828 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
831 setOperationAction(ISD::SELECT
, VT
, Custom
);
832 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
834 setOperationAction({ISD::STEP_VECTOR
, ISD::VECTOR_REVERSE
}, VT
, Custom
);
836 for (MVT OtherVT
: MVT::integer_scalable_vector_valuetypes()) {
837 setTruncStoreAction(VT
, OtherVT
, Expand
);
838 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, OtherVT
,
842 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
843 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
846 setOperationAction(ISD::VECTOR_SPLICE
, VT
, Custom
);
848 if (Subtarget
.hasStdExtZvkb()) {
849 setOperationAction(ISD::BSWAP
, VT
, Legal
);
850 setOperationAction(ISD::VP_BSWAP
, VT
, Custom
);
852 setOperationAction({ISD::BSWAP
, ISD::VP_BSWAP
}, VT
, Expand
);
853 setOperationAction({ISD::ROTL
, ISD::ROTR
}, VT
, Expand
);
856 if (Subtarget
.hasStdExtZvbb()) {
857 setOperationAction(ISD::BITREVERSE
, VT
, Legal
);
858 setOperationAction(ISD::VP_BITREVERSE
, VT
, Custom
);
859 setOperationAction({ISD::VP_CTLZ
, ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ
,
860 ISD::VP_CTTZ_ZERO_UNDEF
, ISD::VP_CTPOP
},
863 setOperationAction({ISD::BITREVERSE
, ISD::VP_BITREVERSE
}, VT
, Expand
);
864 setOperationAction({ISD::CTLZ
, ISD::CTTZ
, ISD::CTPOP
}, VT
, Expand
);
865 setOperationAction({ISD::VP_CTLZ
, ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ
,
866 ISD::VP_CTTZ_ZERO_UNDEF
, ISD::VP_CTPOP
},
869 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
871 EVT FloatVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
872 if (isTypeLegal(FloatVT
)) {
873 setOperationAction({ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
,
874 ISD::CTTZ_ZERO_UNDEF
, ISD::VP_CTLZ
,
875 ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ_ZERO_UNDEF
},
881 // Expand various CCs to best match the RVV ISA, which natively supports UNE
882 // but no other unordered comparisons, and supports all ordered comparisons
883 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
884 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
885 // and we pattern-match those back to the "original", swapping operands once
886 // more. This way we catch both operations and both "vf" and "fv" forms with
888 static const ISD::CondCode VFPCCToExpand
[] = {
889 ISD::SETO
, ISD::SETONE
, ISD::SETUEQ
, ISD::SETUGT
,
890 ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
, ISD::SETUO
,
891 ISD::SETGT
, ISD::SETOGT
, ISD::SETGE
, ISD::SETOGE
,
894 // TODO: support more ops.
895 static const unsigned ZvfhminPromoteOps
[] = {
896 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FADD
, ISD::FSUB
,
897 ISD::FMUL
, ISD::FMA
, ISD::FDIV
, ISD::FSQRT
,
898 ISD::FABS
, ISD::FNEG
, ISD::FCOPYSIGN
, ISD::FCEIL
,
899 ISD::FFLOOR
, ISD::FROUND
, ISD::FROUNDEVEN
, ISD::FRINT
,
900 ISD::FNEARBYINT
, ISD::IS_FPCLASS
, ISD::SETCC
, ISD::FMAXIMUM
,
903 // TODO: support more vp ops.
904 static const unsigned ZvfhminPromoteVPOps
[] = {
905 ISD::VP_FADD
, ISD::VP_FSUB
, ISD::VP_FMUL
,
906 ISD::VP_FDIV
, ISD::VP_FNEG
, ISD::VP_FABS
,
907 ISD::VP_FMA
, ISD::VP_REDUCE_FADD
, ISD::VP_REDUCE_SEQ_FADD
,
908 ISD::VP_REDUCE_FMIN
, ISD::VP_REDUCE_FMAX
, ISD::VP_SQRT
,
909 ISD::VP_FMINNUM
, ISD::VP_FMAXNUM
, ISD::VP_FCEIL
,
910 ISD::VP_FFLOOR
, ISD::VP_FROUND
, ISD::VP_FROUNDEVEN
,
911 ISD::VP_FCOPYSIGN
, ISD::VP_FROUNDTOZERO
, ISD::VP_FRINT
,
912 ISD::VP_FNEARBYINT
, ISD::VP_SETCC
};
914 // Sets common operation actions on RVV floating-point vector types.
915 const auto SetCommonVFPActions
= [&](MVT VT
) {
916 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
917 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
918 // sizes are within one power-of-two of each other. Therefore conversions
919 // between vXf16 and vXf64 must be lowered as sequences which convert via
921 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
922 // Custom-lower insert/extract operations to simplify patterns.
923 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
925 // Expand various condition codes (explained above).
926 setCondCodeAction(VFPCCToExpand
, VT
, Expand
);
928 setOperationAction({ISD::FMINNUM
, ISD::FMAXNUM
}, VT
, Legal
);
929 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, VT
, Custom
);
931 setOperationAction({ISD::FTRUNC
, ISD::FCEIL
, ISD::FFLOOR
, ISD::FROUND
,
932 ISD::FROUNDEVEN
, ISD::FRINT
, ISD::FNEARBYINT
,
936 setOperationAction(FloatingPointVecReduceOps
, VT
, Custom
);
938 // Expand FP operations that need libcalls.
939 setOperationAction(ISD::FREM
, VT
, Expand
);
940 setOperationAction(ISD::FPOW
, VT
, Expand
);
941 setOperationAction(ISD::FCOS
, VT
, Expand
);
942 setOperationAction(ISD::FSIN
, VT
, Expand
);
943 setOperationAction(ISD::FSINCOS
, VT
, Expand
);
944 setOperationAction(ISD::FEXP
, VT
, Expand
);
945 setOperationAction(ISD::FEXP2
, VT
, Expand
);
946 setOperationAction(ISD::FEXP10
, VT
, Expand
);
947 setOperationAction(ISD::FLOG
, VT
, Expand
);
948 setOperationAction(ISD::FLOG2
, VT
, Expand
);
949 setOperationAction(ISD::FLOG10
, VT
, Expand
);
951 setOperationAction(ISD::FCOPYSIGN
, VT
, Legal
);
953 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
955 setOperationAction({ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
},
959 {ISD::VP_LOAD
, ISD::VP_STORE
, ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
960 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
, ISD::VP_SCATTER
},
963 setOperationAction(ISD::SELECT
, VT
, Custom
);
964 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
966 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
967 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
970 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
971 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
973 setOperationAction({ISD::VECTOR_REVERSE
, ISD::VECTOR_SPLICE
}, VT
, Custom
);
975 setOperationAction(FloatingPointVPOps
, VT
, Custom
);
977 setOperationAction({ISD::STRICT_FP_EXTEND
, ISD::STRICT_FP_ROUND
}, VT
,
979 setOperationAction({ISD::STRICT_FADD
, ISD::STRICT_FSUB
, ISD::STRICT_FMUL
,
980 ISD::STRICT_FDIV
, ISD::STRICT_FSQRT
, ISD::STRICT_FMA
},
982 setOperationAction({ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
,
983 ISD::STRICT_FTRUNC
, ISD::STRICT_FCEIL
,
984 ISD::STRICT_FFLOOR
, ISD::STRICT_FROUND
,
985 ISD::STRICT_FROUNDEVEN
, ISD::STRICT_FNEARBYINT
},
989 // Sets common extload/truncstore actions on RVV floating-point vector
991 const auto SetCommonVFPExtLoadTruncStoreActions
=
992 [&](MVT VT
, ArrayRef
<MVT::SimpleValueType
> SmallerVTs
) {
993 for (auto SmallVT
: SmallerVTs
) {
994 setTruncStoreAction(VT
, SmallVT
, Expand
);
995 setLoadExtAction(ISD::EXTLOAD
, VT
, SmallVT
, Expand
);
999 if (Subtarget
.hasVInstructionsF16()) {
1000 for (MVT VT
: F16VecVTs
) {
1001 if (!isTypeLegal(VT
))
1003 SetCommonVFPActions(VT
);
1005 } else if (Subtarget
.hasVInstructionsF16Minimal()) {
1006 for (MVT VT
: F16VecVTs
) {
1007 if (!isTypeLegal(VT
))
1009 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1010 setOperationAction({ISD::STRICT_FP_ROUND
, ISD::STRICT_FP_EXTEND
}, VT
,
1012 setOperationAction({ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
}, VT
, Custom
);
1013 setOperationAction({ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::SELECT
}, VT
,
1015 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1016 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
,
1017 ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
},
1019 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
1020 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
1022 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
1024 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1026 // Custom split nxv32f16 since nxv32f32 if not legal.
1027 if (VT
== MVT::nxv32f16
) {
1028 setOperationAction(ZvfhminPromoteOps
, VT
, Custom
);
1029 setOperationAction(ZvfhminPromoteVPOps
, VT
, Custom
);
1032 // Add more promote ops.
1033 MVT F32VecVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1034 setOperationPromotedToType(ZvfhminPromoteOps
, VT
, F32VecVT
);
1035 setOperationPromotedToType(ZvfhminPromoteVPOps
, VT
, F32VecVT
);
1039 if (Subtarget
.hasVInstructionsF32()) {
1040 for (MVT VT
: F32VecVTs
) {
1041 if (!isTypeLegal(VT
))
1043 SetCommonVFPActions(VT
);
1044 SetCommonVFPExtLoadTruncStoreActions(VT
, F16VecVTs
);
1048 if (Subtarget
.hasVInstructionsF64()) {
1049 for (MVT VT
: F64VecVTs
) {
1050 if (!isTypeLegal(VT
))
1052 SetCommonVFPActions(VT
);
1053 SetCommonVFPExtLoadTruncStoreActions(VT
, F16VecVTs
);
1054 SetCommonVFPExtLoadTruncStoreActions(VT
, F32VecVTs
);
1058 if (Subtarget
.useRVVForFixedLengthVectors()) {
1059 for (MVT VT
: MVT::integer_fixedlen_vector_valuetypes()) {
1060 if (!useRVVForFixedLengthVectorVT(VT
))
1063 // By default everything must be expanded.
1064 for (unsigned Op
= 0; Op
< ISD::BUILTIN_OP_END
; ++Op
)
1065 setOperationAction(Op
, VT
, Expand
);
1066 for (MVT OtherVT
: MVT::integer_fixedlen_vector_valuetypes()) {
1067 setTruncStoreAction(VT
, OtherVT
, Expand
);
1068 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
},
1069 OtherVT
, VT
, Expand
);
1072 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1073 // expansion to a build_vector of 0s.
1074 setOperationAction(ISD::UNDEF
, VT
, Custom
);
1076 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1077 setOperationAction({ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
}, VT
,
1080 setOperationAction({ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
}, VT
,
1083 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
},
1086 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
1088 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1090 setOperationAction(ISD::SETCC
, VT
, Custom
);
1092 setOperationAction(ISD::SELECT
, VT
, Custom
);
1094 setOperationAction(ISD::TRUNCATE
, VT
, Custom
);
1096 setOperationAction(ISD::BITCAST
, VT
, Custom
);
1099 {ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
, ISD::VECREDUCE_XOR
}, VT
,
1103 {ISD::VP_REDUCE_AND
, ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
}, VT
,
1112 ISD::STRICT_SINT_TO_FP
,
1113 ISD::STRICT_UINT_TO_FP
,
1114 ISD::STRICT_FP_TO_SINT
,
1115 ISD::STRICT_FP_TO_UINT
,
1118 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
1121 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
1123 // Operations below are different for between masks and other vectors.
1124 if (VT
.getVectorElementType() == MVT::i1
) {
1125 setOperationAction({ISD::VP_AND
, ISD::VP_OR
, ISD::VP_XOR
, ISD::AND
,
1129 setOperationAction({ISD::VP_FP_TO_SINT
, ISD::VP_FP_TO_UINT
,
1130 ISD::VP_SETCC
, ISD::VP_TRUNCATE
},
1135 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1136 // it before type legalization for i64 vectors on RV32. It will then be
1137 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1138 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1139 // improvements first.
1140 if (!Subtarget
.is64Bit() && VT
.getVectorElementType() == MVT::i64
) {
1141 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
1142 setOperationAction(ISD::SPLAT_VECTOR_PARTS
, VT
, Custom
);
1146 {ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
}, VT
, Custom
);
1148 setOperationAction({ISD::VP_LOAD
, ISD::VP_STORE
,
1149 ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
1150 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
,
1154 setOperationAction({ISD::ADD
, ISD::MUL
, ISD::SUB
, ISD::AND
, ISD::OR
,
1155 ISD::XOR
, ISD::SDIV
, ISD::SREM
, ISD::UDIV
,
1156 ISD::UREM
, ISD::SHL
, ISD::SRA
, ISD::SRL
},
1160 {ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
, ISD::ABS
}, VT
, Custom
);
1162 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1163 if (VT
.getVectorElementType() != MVT::i64
|| Subtarget
.hasStdExtV())
1164 setOperationAction({ISD::MULHS
, ISD::MULHU
}, VT
, Custom
);
1167 {ISD::SADDSAT
, ISD::UADDSAT
, ISD::SSUBSAT
, ISD::USUBSAT
}, VT
,
1170 setOperationAction(ISD::VSELECT
, VT
, Custom
);
1171 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1174 {ISD::ANY_EXTEND
, ISD::SIGN_EXTEND
, ISD::ZERO_EXTEND
}, VT
, Custom
);
1176 // Custom-lower reduction operations to set up the corresponding custom
1178 setOperationAction({ISD::VECREDUCE_ADD
, ISD::VECREDUCE_SMAX
,
1179 ISD::VECREDUCE_SMIN
, ISD::VECREDUCE_UMAX
,
1180 ISD::VECREDUCE_UMIN
},
1183 setOperationAction(IntegerVPOps
, VT
, Custom
);
1185 if (Subtarget
.hasStdExtZvkb())
1186 setOperationAction({ISD::BSWAP
, ISD::ROTL
, ISD::ROTR
}, VT
, Custom
);
1188 if (Subtarget
.hasStdExtZvbb()) {
1189 setOperationAction({ISD::BITREVERSE
, ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
,
1190 ISD::CTTZ
, ISD::CTTZ_ZERO_UNDEF
, ISD::CTPOP
},
1193 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1195 EVT FloatVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1196 if (isTypeLegal(FloatVT
))
1198 {ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
, ISD::CTTZ_ZERO_UNDEF
}, VT
,
1203 for (MVT VT
: MVT::fp_fixedlen_vector_valuetypes()) {
1204 // There are no extending loads or truncating stores.
1205 for (MVT InnerVT
: MVT::fp_fixedlen_vector_valuetypes()) {
1206 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
1207 setTruncStoreAction(VT
, InnerVT
, Expand
);
1210 if (!useRVVForFixedLengthVectorVT(VT
))
1213 // By default everything must be expanded.
1214 for (unsigned Op
= 0; Op
< ISD::BUILTIN_OP_END
; ++Op
)
1215 setOperationAction(Op
, VT
, Expand
);
1217 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1218 // expansion to a build_vector of 0s.
1219 setOperationAction(ISD::UNDEF
, VT
, Custom
);
1221 if (VT
.getVectorElementType() == MVT::f16
&&
1222 !Subtarget
.hasVInstructionsF16()) {
1223 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1224 setOperationAction({ISD::STRICT_FP_ROUND
, ISD::STRICT_FP_EXTEND
}, VT
,
1226 setOperationAction({ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
}, VT
, Custom
);
1228 {ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::VSELECT
, ISD::SELECT
}, VT
,
1230 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
,
1231 ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
},
1233 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
1234 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
1236 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1237 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
1238 MVT F32VecVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1239 // Don't promote f16 vector operations to f32 if f32 vector type is
1241 // TODO: could split the f16 vector into two vectors and do promotion.
1242 if (!isTypeLegal(F32VecVT
))
1244 setOperationPromotedToType(ZvfhminPromoteOps
, VT
, F32VecVT
);
1245 setOperationPromotedToType(ZvfhminPromoteVPOps
, VT
, F32VecVT
);
1249 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1250 setOperationAction({ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
}, VT
,
1253 setOperationAction({ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
,
1254 ISD::VECTOR_SHUFFLE
, ISD::INSERT_VECTOR_ELT
,
1255 ISD::EXTRACT_VECTOR_ELT
},
1258 setOperationAction({ISD::LOAD
, ISD::STORE
, ISD::MLOAD
, ISD::MSTORE
,
1259 ISD::MGATHER
, ISD::MSCATTER
},
1262 setOperationAction({ISD::VP_LOAD
, ISD::VP_STORE
,
1263 ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
1264 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
,
1268 setOperationAction({ISD::FADD
, ISD::FSUB
, ISD::FMUL
, ISD::FDIV
,
1269 ISD::FNEG
, ISD::FABS
, ISD::FCOPYSIGN
, ISD::FSQRT
,
1270 ISD::FMA
, ISD::FMINNUM
, ISD::FMAXNUM
,
1271 ISD::IS_FPCLASS
, ISD::FMAXIMUM
, ISD::FMINIMUM
},
1274 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1276 setOperationAction({ISD::FTRUNC
, ISD::FCEIL
, ISD::FFLOOR
, ISD::FROUND
,
1277 ISD::FROUNDEVEN
, ISD::FRINT
, ISD::FNEARBYINT
},
1280 setCondCodeAction(VFPCCToExpand
, VT
, Expand
);
1282 setOperationAction(ISD::SETCC
, VT
, Custom
);
1283 setOperationAction({ISD::VSELECT
, ISD::SELECT
}, VT
, Custom
);
1284 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1286 setOperationAction(ISD::BITCAST
, VT
, Custom
);
1288 setOperationAction(FloatingPointVecReduceOps
, VT
, Custom
);
1290 setOperationAction(FloatingPointVPOps
, VT
, Custom
);
1292 setOperationAction({ISD::STRICT_FP_EXTEND
, ISD::STRICT_FP_ROUND
}, VT
,
1295 {ISD::STRICT_FADD
, ISD::STRICT_FSUB
, ISD::STRICT_FMUL
,
1296 ISD::STRICT_FDIV
, ISD::STRICT_FSQRT
, ISD::STRICT_FMA
,
1297 ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
, ISD::STRICT_FTRUNC
,
1298 ISD::STRICT_FCEIL
, ISD::STRICT_FFLOOR
, ISD::STRICT_FROUND
,
1299 ISD::STRICT_FROUNDEVEN
, ISD::STRICT_FNEARBYINT
},
1303 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1304 setOperationAction(ISD::BITCAST
, {MVT::i8
, MVT::i16
, MVT::i32
, MVT::i64
},
1306 if (Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1307 setOperationAction(ISD::BITCAST
, MVT::f16
, Custom
);
1308 if (Subtarget
.hasStdExtFOrZfinx())
1309 setOperationAction(ISD::BITCAST
, MVT::f32
, Custom
);
1310 if (Subtarget
.hasStdExtDOrZdinx())
1311 setOperationAction(ISD::BITCAST
, MVT::f64
, Custom
);
1315 if (Subtarget
.hasStdExtA()) {
1316 setOperationAction(ISD::ATOMIC_LOAD_SUB
, XLenVT
, Expand
);
1317 if (RV64LegalI32
&& Subtarget
.is64Bit())
1318 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i32
, Expand
);
1321 if (Subtarget
.hasForcedAtomics()) {
1322 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1324 {ISD::ATOMIC_CMP_SWAP
, ISD::ATOMIC_SWAP
, ISD::ATOMIC_LOAD_ADD
,
1325 ISD::ATOMIC_LOAD_SUB
, ISD::ATOMIC_LOAD_AND
, ISD::ATOMIC_LOAD_OR
,
1326 ISD::ATOMIC_LOAD_XOR
, ISD::ATOMIC_LOAD_NAND
, ISD::ATOMIC_LOAD_MIN
,
1327 ISD::ATOMIC_LOAD_MAX
, ISD::ATOMIC_LOAD_UMIN
, ISD::ATOMIC_LOAD_UMAX
},
1331 if (Subtarget
.hasVendorXTHeadMemIdx()) {
1332 for (unsigned im
= (unsigned)ISD::PRE_INC
; im
!= (unsigned)ISD::POST_DEC
;
1334 setIndexedLoadAction(im
, MVT::i8
, Legal
);
1335 setIndexedStoreAction(im
, MVT::i8
, Legal
);
1336 setIndexedLoadAction(im
, MVT::i16
, Legal
);
1337 setIndexedStoreAction(im
, MVT::i16
, Legal
);
1338 setIndexedLoadAction(im
, MVT::i32
, Legal
);
1339 setIndexedStoreAction(im
, MVT::i32
, Legal
);
1341 if (Subtarget
.is64Bit()) {
1342 setIndexedLoadAction(im
, MVT::i64
, Legal
);
1343 setIndexedStoreAction(im
, MVT::i64
, Legal
);
1348 // Function alignments.
1349 const Align
FunctionAlignment(Subtarget
.hasStdExtCOrZca() ? 2 : 4);
1350 setMinFunctionAlignment(FunctionAlignment
);
1351 // Set preferred alignments.
1352 setPrefFunctionAlignment(Subtarget
.getPrefFunctionAlignment());
1353 setPrefLoopAlignment(Subtarget
.getPrefLoopAlignment());
1355 setMinimumJumpTableEntries(5);
1357 // Jumps are expensive, compared to logic
1358 setJumpIsExpensive();
1360 setTargetDAGCombine({ISD::INTRINSIC_VOID
, ISD::INTRINSIC_W_CHAIN
,
1361 ISD::INTRINSIC_WO_CHAIN
, ISD::ADD
, ISD::SUB
, ISD::AND
,
1362 ISD::OR
, ISD::XOR
, ISD::SETCC
, ISD::SELECT
});
1363 if (Subtarget
.is64Bit())
1364 setTargetDAGCombine(ISD::SRA
);
1366 if (Subtarget
.hasStdExtFOrZfinx())
1367 setTargetDAGCombine({ISD::FADD
, ISD::FMAXNUM
, ISD::FMINNUM
});
1369 if (Subtarget
.hasStdExtZbb())
1370 setTargetDAGCombine({ISD::UMAX
, ISD::UMIN
, ISD::SMAX
, ISD::SMIN
});
1372 if (Subtarget
.hasStdExtZbs() && Subtarget
.is64Bit())
1373 setTargetDAGCombine(ISD::TRUNCATE
);
1375 if (Subtarget
.hasStdExtZbkb())
1376 setTargetDAGCombine(ISD::BITREVERSE
);
1377 if (Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1378 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG
);
1379 if (Subtarget
.hasStdExtFOrZfinx())
1380 setTargetDAGCombine({ISD::ZERO_EXTEND
, ISD::FP_TO_SINT
, ISD::FP_TO_UINT
,
1381 ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
});
1382 if (Subtarget
.hasVInstructions())
1383 setTargetDAGCombine({ISD::FCOPYSIGN
, ISD::MGATHER
, ISD::MSCATTER
,
1384 ISD::VP_GATHER
, ISD::VP_SCATTER
, ISD::SRA
, ISD::SRL
,
1385 ISD::SHL
, ISD::STORE
, ISD::SPLAT_VECTOR
,
1386 ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
});
1387 if (Subtarget
.hasVendorXTHeadMemPair())
1388 setTargetDAGCombine({ISD::LOAD
, ISD::STORE
});
1389 if (Subtarget
.useRVVForFixedLengthVectors())
1390 setTargetDAGCombine(ISD::BITCAST
);
1392 setLibcallName(RTLIB::FPEXT_F16_F32
, "__extendhfsf2");
1393 setLibcallName(RTLIB::FPROUND_F32_F16
, "__truncsfhf2");
1395 // Disable strict node mutation.
1396 IsStrictFPEnabled
= true;
1399 EVT
RISCVTargetLowering::getSetCCResultType(const DataLayout
&DL
,
1400 LLVMContext
&Context
,
1403 return getPointerTy(DL
);
1404 if (Subtarget
.hasVInstructions() &&
1405 (VT
.isScalableVector() || Subtarget
.useRVVForFixedLengthVectors()))
1406 return EVT::getVectorVT(Context
, MVT::i1
, VT
.getVectorElementCount());
1407 return VT
.changeVectorElementTypeToInteger();
1410 MVT
RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1411 return Subtarget
.getXLenVT();
1414 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1415 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT
,
1417 bool IsScalable
) const {
1418 if (!Subtarget
.hasVInstructions())
1424 if (TripCountVT
!= MVT::i32
&& TripCountVT
!= Subtarget
.getXLenVT())
1427 // Don't allow VF=1 if those types are't legal.
1428 if (VF
< RISCV::RVVBitsPerBlock
/ Subtarget
.getELen())
1431 // VLEN=32 support is incomplete.
1432 if (Subtarget
.getRealMinVLen() < RISCV::RVVBitsPerBlock
)
1435 // The maximum VF is for the smallest element width with LMUL=8.
1436 // VF must be a power of 2.
1437 unsigned MaxVF
= (RISCV::RVVBitsPerBlock
/ 8) * 8;
1438 return VF
> MaxVF
|| !isPowerOf2_32(VF
);
1441 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
1443 MachineFunction
&MF
,
1444 unsigned Intrinsic
) const {
1445 auto &DL
= I
.getModule()->getDataLayout();
1447 auto SetRVVLoadStoreInfo
= [&](unsigned PtrOp
, bool IsStore
,
1448 bool IsUnitStrided
) {
1449 Info
.opc
= IsStore
? ISD::INTRINSIC_VOID
: ISD::INTRINSIC_W_CHAIN
;
1450 Info
.ptrVal
= I
.getArgOperand(PtrOp
);
1453 // Store value is the first operand.
1454 MemTy
= I
.getArgOperand(0)->getType();
1456 // Use return type. If it's segment load, return type is a struct.
1457 MemTy
= I
.getType();
1458 if (MemTy
->isStructTy())
1459 MemTy
= MemTy
->getStructElementType(0);
1462 MemTy
= MemTy
->getScalarType();
1464 Info
.memVT
= getValueType(DL
, MemTy
);
1465 Info
.align
= Align(DL
.getTypeSizeInBits(MemTy
->getScalarType()) / 8);
1466 Info
.size
= MemoryLocation::UnknownSize
;
1468 IsStore
? MachineMemOperand::MOStore
: MachineMemOperand::MOLoad
;
1472 if (I
.getMetadata(LLVMContext::MD_nontemporal
) != nullptr)
1473 Info
.flags
|= MachineMemOperand::MONonTemporal
;
1475 Info
.flags
|= RISCVTargetLowering::getTargetMMOFlags(I
);
1476 switch (Intrinsic
) {
1479 case Intrinsic::riscv_masked_atomicrmw_xchg_i32
:
1480 case Intrinsic::riscv_masked_atomicrmw_add_i32
:
1481 case Intrinsic::riscv_masked_atomicrmw_sub_i32
:
1482 case Intrinsic::riscv_masked_atomicrmw_nand_i32
:
1483 case Intrinsic::riscv_masked_atomicrmw_max_i32
:
1484 case Intrinsic::riscv_masked_atomicrmw_min_i32
:
1485 case Intrinsic::riscv_masked_atomicrmw_umax_i32
:
1486 case Intrinsic::riscv_masked_atomicrmw_umin_i32
:
1487 case Intrinsic::riscv_masked_cmpxchg_i32
:
1488 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
1489 Info
.memVT
= MVT::i32
;
1490 Info
.ptrVal
= I
.getArgOperand(0);
1492 Info
.align
= Align(4);
1493 Info
.flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOStore
|
1494 MachineMemOperand::MOVolatile
;
1496 case Intrinsic::riscv_masked_strided_load
:
1497 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1498 /*IsUnitStrided*/ false);
1499 case Intrinsic::riscv_masked_strided_store
:
1500 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1501 /*IsUnitStrided*/ false);
1502 case Intrinsic::riscv_seg2_load
:
1503 case Intrinsic::riscv_seg3_load
:
1504 case Intrinsic::riscv_seg4_load
:
1505 case Intrinsic::riscv_seg5_load
:
1506 case Intrinsic::riscv_seg6_load
:
1507 case Intrinsic::riscv_seg7_load
:
1508 case Intrinsic::riscv_seg8_load
:
1509 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1510 /*IsUnitStrided*/ false);
1511 case Intrinsic::riscv_seg2_store
:
1512 case Intrinsic::riscv_seg3_store
:
1513 case Intrinsic::riscv_seg4_store
:
1514 case Intrinsic::riscv_seg5_store
:
1515 case Intrinsic::riscv_seg6_store
:
1516 case Intrinsic::riscv_seg7_store
:
1517 case Intrinsic::riscv_seg8_store
:
1518 // Operands are (vec, ..., vec, ptr, vl)
1519 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1521 /*IsUnitStrided*/ false);
1522 case Intrinsic::riscv_vle
:
1523 case Intrinsic::riscv_vle_mask
:
1524 case Intrinsic::riscv_vleff
:
1525 case Intrinsic::riscv_vleff_mask
:
1526 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1528 /*IsUnitStrided*/ true);
1529 case Intrinsic::riscv_vse
:
1530 case Intrinsic::riscv_vse_mask
:
1531 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1533 /*IsUnitStrided*/ true);
1534 case Intrinsic::riscv_vlse
:
1535 case Intrinsic::riscv_vlse_mask
:
1536 case Intrinsic::riscv_vloxei
:
1537 case Intrinsic::riscv_vloxei_mask
:
1538 case Intrinsic::riscv_vluxei
:
1539 case Intrinsic::riscv_vluxei_mask
:
1540 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1542 /*IsUnitStrided*/ false);
1543 case Intrinsic::riscv_vsse
:
1544 case Intrinsic::riscv_vsse_mask
:
1545 case Intrinsic::riscv_vsoxei
:
1546 case Intrinsic::riscv_vsoxei_mask
:
1547 case Intrinsic::riscv_vsuxei
:
1548 case Intrinsic::riscv_vsuxei_mask
:
1549 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1551 /*IsUnitStrided*/ false);
1552 case Intrinsic::riscv_vlseg2
:
1553 case Intrinsic::riscv_vlseg3
:
1554 case Intrinsic::riscv_vlseg4
:
1555 case Intrinsic::riscv_vlseg5
:
1556 case Intrinsic::riscv_vlseg6
:
1557 case Intrinsic::riscv_vlseg7
:
1558 case Intrinsic::riscv_vlseg8
:
1559 case Intrinsic::riscv_vlseg2ff
:
1560 case Intrinsic::riscv_vlseg3ff
:
1561 case Intrinsic::riscv_vlseg4ff
:
1562 case Intrinsic::riscv_vlseg5ff
:
1563 case Intrinsic::riscv_vlseg6ff
:
1564 case Intrinsic::riscv_vlseg7ff
:
1565 case Intrinsic::riscv_vlseg8ff
:
1566 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1568 /*IsUnitStrided*/ false);
1569 case Intrinsic::riscv_vlseg2_mask
:
1570 case Intrinsic::riscv_vlseg3_mask
:
1571 case Intrinsic::riscv_vlseg4_mask
:
1572 case Intrinsic::riscv_vlseg5_mask
:
1573 case Intrinsic::riscv_vlseg6_mask
:
1574 case Intrinsic::riscv_vlseg7_mask
:
1575 case Intrinsic::riscv_vlseg8_mask
:
1576 case Intrinsic::riscv_vlseg2ff_mask
:
1577 case Intrinsic::riscv_vlseg3ff_mask
:
1578 case Intrinsic::riscv_vlseg4ff_mask
:
1579 case Intrinsic::riscv_vlseg5ff_mask
:
1580 case Intrinsic::riscv_vlseg6ff_mask
:
1581 case Intrinsic::riscv_vlseg7ff_mask
:
1582 case Intrinsic::riscv_vlseg8ff_mask
:
1583 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 4,
1585 /*IsUnitStrided*/ false);
1586 case Intrinsic::riscv_vlsseg2
:
1587 case Intrinsic::riscv_vlsseg3
:
1588 case Intrinsic::riscv_vlsseg4
:
1589 case Intrinsic::riscv_vlsseg5
:
1590 case Intrinsic::riscv_vlsseg6
:
1591 case Intrinsic::riscv_vlsseg7
:
1592 case Intrinsic::riscv_vlsseg8
:
1593 case Intrinsic::riscv_vloxseg2
:
1594 case Intrinsic::riscv_vloxseg3
:
1595 case Intrinsic::riscv_vloxseg4
:
1596 case Intrinsic::riscv_vloxseg5
:
1597 case Intrinsic::riscv_vloxseg6
:
1598 case Intrinsic::riscv_vloxseg7
:
1599 case Intrinsic::riscv_vloxseg8
:
1600 case Intrinsic::riscv_vluxseg2
:
1601 case Intrinsic::riscv_vluxseg3
:
1602 case Intrinsic::riscv_vluxseg4
:
1603 case Intrinsic::riscv_vluxseg5
:
1604 case Intrinsic::riscv_vluxseg6
:
1605 case Intrinsic::riscv_vluxseg7
:
1606 case Intrinsic::riscv_vluxseg8
:
1607 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1609 /*IsUnitStrided*/ false);
1610 case Intrinsic::riscv_vlsseg2_mask
:
1611 case Intrinsic::riscv_vlsseg3_mask
:
1612 case Intrinsic::riscv_vlsseg4_mask
:
1613 case Intrinsic::riscv_vlsseg5_mask
:
1614 case Intrinsic::riscv_vlsseg6_mask
:
1615 case Intrinsic::riscv_vlsseg7_mask
:
1616 case Intrinsic::riscv_vlsseg8_mask
:
1617 case Intrinsic::riscv_vloxseg2_mask
:
1618 case Intrinsic::riscv_vloxseg3_mask
:
1619 case Intrinsic::riscv_vloxseg4_mask
:
1620 case Intrinsic::riscv_vloxseg5_mask
:
1621 case Intrinsic::riscv_vloxseg6_mask
:
1622 case Intrinsic::riscv_vloxseg7_mask
:
1623 case Intrinsic::riscv_vloxseg8_mask
:
1624 case Intrinsic::riscv_vluxseg2_mask
:
1625 case Intrinsic::riscv_vluxseg3_mask
:
1626 case Intrinsic::riscv_vluxseg4_mask
:
1627 case Intrinsic::riscv_vluxseg5_mask
:
1628 case Intrinsic::riscv_vluxseg6_mask
:
1629 case Intrinsic::riscv_vluxseg7_mask
:
1630 case Intrinsic::riscv_vluxseg8_mask
:
1631 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 5,
1633 /*IsUnitStrided*/ false);
1634 case Intrinsic::riscv_vsseg2
:
1635 case Intrinsic::riscv_vsseg3
:
1636 case Intrinsic::riscv_vsseg4
:
1637 case Intrinsic::riscv_vsseg5
:
1638 case Intrinsic::riscv_vsseg6
:
1639 case Intrinsic::riscv_vsseg7
:
1640 case Intrinsic::riscv_vsseg8
:
1641 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1643 /*IsUnitStrided*/ false);
1644 case Intrinsic::riscv_vsseg2_mask
:
1645 case Intrinsic::riscv_vsseg3_mask
:
1646 case Intrinsic::riscv_vsseg4_mask
:
1647 case Intrinsic::riscv_vsseg5_mask
:
1648 case Intrinsic::riscv_vsseg6_mask
:
1649 case Intrinsic::riscv_vsseg7_mask
:
1650 case Intrinsic::riscv_vsseg8_mask
:
1651 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1653 /*IsUnitStrided*/ false);
1654 case Intrinsic::riscv_vssseg2
:
1655 case Intrinsic::riscv_vssseg3
:
1656 case Intrinsic::riscv_vssseg4
:
1657 case Intrinsic::riscv_vssseg5
:
1658 case Intrinsic::riscv_vssseg6
:
1659 case Intrinsic::riscv_vssseg7
:
1660 case Intrinsic::riscv_vssseg8
:
1661 case Intrinsic::riscv_vsoxseg2
:
1662 case Intrinsic::riscv_vsoxseg3
:
1663 case Intrinsic::riscv_vsoxseg4
:
1664 case Intrinsic::riscv_vsoxseg5
:
1665 case Intrinsic::riscv_vsoxseg6
:
1666 case Intrinsic::riscv_vsoxseg7
:
1667 case Intrinsic::riscv_vsoxseg8
:
1668 case Intrinsic::riscv_vsuxseg2
:
1669 case Intrinsic::riscv_vsuxseg3
:
1670 case Intrinsic::riscv_vsuxseg4
:
1671 case Intrinsic::riscv_vsuxseg5
:
1672 case Intrinsic::riscv_vsuxseg6
:
1673 case Intrinsic::riscv_vsuxseg7
:
1674 case Intrinsic::riscv_vsuxseg8
:
1675 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1677 /*IsUnitStrided*/ false);
1678 case Intrinsic::riscv_vssseg2_mask
:
1679 case Intrinsic::riscv_vssseg3_mask
:
1680 case Intrinsic::riscv_vssseg4_mask
:
1681 case Intrinsic::riscv_vssseg5_mask
:
1682 case Intrinsic::riscv_vssseg6_mask
:
1683 case Intrinsic::riscv_vssseg7_mask
:
1684 case Intrinsic::riscv_vssseg8_mask
:
1685 case Intrinsic::riscv_vsoxseg2_mask
:
1686 case Intrinsic::riscv_vsoxseg3_mask
:
1687 case Intrinsic::riscv_vsoxseg4_mask
:
1688 case Intrinsic::riscv_vsoxseg5_mask
:
1689 case Intrinsic::riscv_vsoxseg6_mask
:
1690 case Intrinsic::riscv_vsoxseg7_mask
:
1691 case Intrinsic::riscv_vsoxseg8_mask
:
1692 case Intrinsic::riscv_vsuxseg2_mask
:
1693 case Intrinsic::riscv_vsuxseg3_mask
:
1694 case Intrinsic::riscv_vsuxseg4_mask
:
1695 case Intrinsic::riscv_vsuxseg5_mask
:
1696 case Intrinsic::riscv_vsuxseg6_mask
:
1697 case Intrinsic::riscv_vsuxseg7_mask
:
1698 case Intrinsic::riscv_vsuxseg8_mask
:
1699 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 4,
1701 /*IsUnitStrided*/ false);
1705 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
1706 const AddrMode
&AM
, Type
*Ty
,
1708 Instruction
*I
) const {
1709 // No global is ever allowed as a base.
1713 // RVV instructions only support register addressing.
1714 if (Subtarget
.hasVInstructions() && isa
<VectorType
>(Ty
))
1715 return AM
.HasBaseReg
&& AM
.Scale
== 0 && !AM
.BaseOffs
;
1717 // Require a 12-bit signed offset.
1718 if (!isInt
<12>(AM
.BaseOffs
))
1722 case 0: // "r+i" or just "i", depending on HasBaseReg.
1725 if (!AM
.HasBaseReg
) // allow "r+i".
1727 return false; // disallow "r+r" or "r+r+i".
1735 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
1736 return isInt
<12>(Imm
);
1739 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
1740 return isInt
<12>(Imm
);
1743 // On RV32, 64-bit integers are split into their high and low parts and held
1744 // in two different registers, so the trunc is free since the low register can
1746 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1748 bool RISCVTargetLowering::isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const {
1749 if (Subtarget
.is64Bit() || !SrcTy
->isIntegerTy() || !DstTy
->isIntegerTy())
1751 unsigned SrcBits
= SrcTy
->getPrimitiveSizeInBits();
1752 unsigned DestBits
= DstTy
->getPrimitiveSizeInBits();
1753 return (SrcBits
== 64 && DestBits
== 32);
1756 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT
, EVT DstVT
) const {
1757 // We consider i64->i32 free on RV64 since we have good selection of W
1758 // instructions that make promoting operations back to i64 free in many cases.
1759 if (SrcVT
.isVector() || DstVT
.isVector() || !SrcVT
.isInteger() ||
1762 unsigned SrcBits
= SrcVT
.getSizeInBits();
1763 unsigned DestBits
= DstVT
.getSizeInBits();
1764 return (SrcBits
== 64 && DestBits
== 32);
1767 bool RISCVTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
1768 // Zexts are free if they can be combined with a load.
1769 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1770 // poorly with type legalization of compares preferring sext.
1771 if (auto *LD
= dyn_cast
<LoadSDNode
>(Val
)) {
1772 EVT MemVT
= LD
->getMemoryVT();
1773 if ((MemVT
== MVT::i8
|| MemVT
== MVT::i16
) &&
1774 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
1775 LD
->getExtensionType() == ISD::ZEXTLOAD
))
1779 return TargetLowering::isZExtFree(Val
, VT2
);
1782 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT
, EVT DstVT
) const {
1783 return Subtarget
.is64Bit() && SrcVT
== MVT::i32
&& DstVT
== MVT::i64
;
1786 bool RISCVTargetLowering::signExtendConstant(const ConstantInt
*CI
) const {
1787 return Subtarget
.is64Bit() && CI
->getType()->isIntegerTy(32);
1790 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type
*Ty
) const {
1791 return Subtarget
.hasStdExtZbb();
1794 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type
*Ty
) const {
1795 return Subtarget
.hasStdExtZbb() || Subtarget
.hasVendorXTHeadBb();
1798 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1799 const Instruction
&AndI
) const {
1800 // We expect to be able to match a bit extraction instruction if the Zbs
1801 // extension is supported and the mask is a power of two. However, we
1802 // conservatively return false if the mask would fit in an ANDI instruction,
1803 // on the basis that it's possible the sinking+duplication of the AND in
1804 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1805 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1806 if (!Subtarget
.hasStdExtZbs() && !Subtarget
.hasVendorXTHeadBs())
1808 ConstantInt
*Mask
= dyn_cast
<ConstantInt
>(AndI
.getOperand(1));
1811 return !Mask
->getValue().isSignedIntN(12) && Mask
->getValue().isPowerOf2();
1814 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y
) const {
1815 EVT VT
= Y
.getValueType();
1817 // FIXME: Support vectors once we have tests.
1821 return (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) &&
1822 !isa
<ConstantSDNode
>(Y
);
1825 bool RISCVTargetLowering::hasBitTest(SDValue X
, SDValue Y
) const {
1826 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1827 if (Subtarget
.hasStdExtZbs())
1828 return X
.getValueType().isScalarInteger();
1829 auto *C
= dyn_cast
<ConstantSDNode
>(Y
);
1830 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1831 if (Subtarget
.hasVendorXTHeadBs())
1832 return C
!= nullptr;
1833 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1834 return C
&& C
->getAPIntValue().ule(10);
1837 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode
,
1839 // Only enable for rvv.
1840 if (!VT
.isVector() || !Subtarget
.hasVInstructions())
1843 if (VT
.isFixedLengthVector() && !isTypeLegal(VT
))
1849 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
1851 assert(Ty
->isIntegerTy());
1853 unsigned BitSize
= Ty
->getIntegerBitWidth();
1854 if (BitSize
> Subtarget
.getXLen())
1857 // Fast path, assume 32-bit immediates are cheap.
1858 int64_t Val
= Imm
.getSExtValue();
1862 // A constant pool entry may be more aligned thant he load we're trying to
1863 // replace. If we don't support unaligned scalar mem, prefer the constant
1865 // TODO: Can the caller pass down the alignment?
1866 if (!Subtarget
.enableUnalignedScalarMem())
1869 // Prefer to keep the load if it would require many instructions.
1870 // This uses the same threshold we use for constant pools but doesn't
1871 // check useConstantPoolForLargeInts.
1872 // TODO: Should we keep the load only when we're definitely going to emit a
1875 RISCVMatInt::InstSeq Seq
=
1876 RISCVMatInt::generateInstSeq(Val
, Subtarget
.getFeatureBits());
1877 return Seq
.size() <= Subtarget
.getMaxBuildIntsCost();
1880 bool RISCVTargetLowering::
1881 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1882 SDValue X
, ConstantSDNode
*XC
, ConstantSDNode
*CC
, SDValue Y
,
1883 unsigned OldShiftOpcode
, unsigned NewShiftOpcode
,
1884 SelectionDAG
&DAG
) const {
1885 // One interesting pattern that we'd want to form is 'bit extract':
1886 // ((1 >> Y) & 1) ==/!= 0
1887 // But we also need to be careful not to try to reverse that fold.
1889 // Is this '((1 >> Y) & 1)'?
1890 if (XC
&& OldShiftOpcode
== ISD::SRL
&& XC
->isOne())
1891 return false; // Keep the 'bit extract' pattern.
1893 // Will this be '((1 >> Y) & 1)' after the transform?
1894 if (NewShiftOpcode
== ISD::SRL
&& CC
->isOne())
1895 return true; // Do form the 'bit extract' pattern.
1897 // If 'X' is a constant, and we transform, then we will immediately
1898 // try to undo the fold, thus causing endless combine loop.
1899 // So only do the transform if X is not a constant. This matches the default
1900 // implementation of this function.
1904 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode
, int Operand
) const {
1906 case Instruction::Add
:
1907 case Instruction::Sub
:
1908 case Instruction::Mul
:
1909 case Instruction::And
:
1910 case Instruction::Or
:
1911 case Instruction::Xor
:
1912 case Instruction::FAdd
:
1913 case Instruction::FSub
:
1914 case Instruction::FMul
:
1915 case Instruction::FDiv
:
1916 case Instruction::ICmp
:
1917 case Instruction::FCmp
:
1919 case Instruction::Shl
:
1920 case Instruction::LShr
:
1921 case Instruction::AShr
:
1922 case Instruction::UDiv
:
1923 case Instruction::SDiv
:
1924 case Instruction::URem
:
1925 case Instruction::SRem
:
1926 return Operand
== 1;
1933 bool RISCVTargetLowering::canSplatOperand(Instruction
*I
, int Operand
) const {
1934 if (!I
->getType()->isVectorTy() || !Subtarget
.hasVInstructions())
1937 if (canSplatOperand(I
->getOpcode(), Operand
))
1940 auto *II
= dyn_cast
<IntrinsicInst
>(I
);
1944 switch (II
->getIntrinsicID()) {
1945 case Intrinsic::fma
:
1946 case Intrinsic::vp_fma
:
1947 return Operand
== 0 || Operand
== 1;
1948 case Intrinsic::vp_shl
:
1949 case Intrinsic::vp_lshr
:
1950 case Intrinsic::vp_ashr
:
1951 case Intrinsic::vp_udiv
:
1952 case Intrinsic::vp_sdiv
:
1953 case Intrinsic::vp_urem
:
1954 case Intrinsic::vp_srem
:
1955 return Operand
== 1;
1956 // These intrinsics are commutative.
1957 case Intrinsic::vp_add
:
1958 case Intrinsic::vp_mul
:
1959 case Intrinsic::vp_and
:
1960 case Intrinsic::vp_or
:
1961 case Intrinsic::vp_xor
:
1962 case Intrinsic::vp_fadd
:
1963 case Intrinsic::vp_fmul
:
1964 case Intrinsic::vp_icmp
:
1965 case Intrinsic::vp_fcmp
:
1966 // These intrinsics have 'vr' versions.
1967 case Intrinsic::vp_sub
:
1968 case Intrinsic::vp_fsub
:
1969 case Intrinsic::vp_fdiv
:
1970 return Operand
== 0 || Operand
== 1;
1976 /// Check if sinking \p I's operands to I's basic block is profitable, because
1977 /// the operands can be folded into a target instruction, e.g.
1978 /// splats of scalars can fold into vector instructions.
1979 bool RISCVTargetLowering::shouldSinkOperands(
1980 Instruction
*I
, SmallVectorImpl
<Use
*> &Ops
) const {
1981 using namespace llvm::PatternMatch
;
1983 if (!I
->getType()->isVectorTy() || !Subtarget
.hasVInstructions())
1986 for (auto OpIdx
: enumerate(I
->operands())) {
1987 if (!canSplatOperand(I
, OpIdx
.index()))
1990 Instruction
*Op
= dyn_cast
<Instruction
>(OpIdx
.value().get());
1991 // Make sure we are not already sinking this operand
1992 if (!Op
|| any_of(Ops
, [&](Use
*U
) { return U
->get() == Op
; }))
1995 // We are looking for a splat that can be sunk.
1996 if (!match(Op
, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1997 m_Undef(), m_ZeroMask())))
2000 // Don't sink i1 splats.
2001 if (cast
<VectorType
>(Op
->getType())->getElementType()->isIntegerTy(1))
2004 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2005 // and vector registers
2006 for (Use
&U
: Op
->uses()) {
2007 Instruction
*Insn
= cast
<Instruction
>(U
.getUser());
2008 if (!canSplatOperand(Insn
, U
.getOperandNo()))
2012 Ops
.push_back(&Op
->getOperandUse(0));
2013 Ops
.push_back(&OpIdx
.value());
2018 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp
) const {
2019 unsigned Opc
= VecOp
.getOpcode();
2021 // Assume target opcodes can't be scalarized.
2022 // TODO - do we have any exceptions?
2023 if (Opc
>= ISD::BUILTIN_OP_END
)
2026 // If the vector op is not supported, try to convert to scalar.
2027 EVT VecVT
= VecOp
.getValueType();
2028 if (!isOperationLegalOrCustomOrPromote(Opc
, VecVT
))
2031 // If the vector op is supported, but the scalar op is not, the transform may
2032 // not be worthwhile.
2033 // Permit a vector binary operation can be converted to scalar binary
2034 // operation which is custom lowered with illegal type.
2035 EVT ScalarVT
= VecVT
.getScalarType();
2036 return isOperationLegalOrCustomOrPromote(Opc
, ScalarVT
) ||
2037 isOperationCustom(Opc
, ScalarVT
);
2040 bool RISCVTargetLowering::isOffsetFoldingLegal(
2041 const GlobalAddressSDNode
*GA
) const {
2042 // In order to maximise the opportunity for common subexpression elimination,
2043 // keep a separate ADD node for the global address offset instead of folding
2044 // it in the global address node. Later peephole optimisations may choose to
2045 // fold it back in when profitable.
2049 // Return one of the followings:
2050 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2051 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2052 // positive counterpart, which will be materialized from the first returned
2053 // element. The second returned element indicated that there should be a FNEG
2055 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2056 std::pair
<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat
&Imm
,
2058 if (!Subtarget
.hasStdExtZfa())
2059 return std::make_pair(-1, false);
2061 bool IsSupportedVT
= false;
2062 if (VT
== MVT::f16
) {
2063 IsSupportedVT
= Subtarget
.hasStdExtZfh() || Subtarget
.hasStdExtZvfh();
2064 } else if (VT
== MVT::f32
) {
2065 IsSupportedVT
= true;
2066 } else if (VT
== MVT::f64
) {
2067 assert(Subtarget
.hasStdExtD() && "Expect D extension");
2068 IsSupportedVT
= true;
2072 return std::make_pair(-1, false);
2074 int Index
= RISCVLoadFPImm::getLoadFPImm(Imm
);
2075 if (Index
< 0 && Imm
.isNegative())
2076 // Try the combination of its positive counterpart + FNEG.
2077 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm
), true);
2079 return std::make_pair(Index
, false);
2082 bool RISCVTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
2083 bool ForCodeSize
) const {
2084 bool IsLegalVT
= false;
2086 IsLegalVT
= Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
2087 else if (VT
== MVT::f32
)
2088 IsLegalVT
= Subtarget
.hasStdExtFOrZfinx();
2089 else if (VT
== MVT::f64
)
2090 IsLegalVT
= Subtarget
.hasStdExtDOrZdinx();
2091 else if (VT
== MVT::bf16
)
2092 IsLegalVT
= Subtarget
.hasStdExtZfbfmin();
2097 if (getLegalZfaFPImm(Imm
, VT
).first
>= 0)
2100 // Cannot create a 64 bit floating-point immediate value for rv32.
2101 if (Subtarget
.getXLen() < VT
.getScalarSizeInBits()) {
2102 // td can handle +0.0 or -0.0 already.
2103 // -0.0 can be created by fmv + fneg.
2104 return Imm
.isZero();
2107 // Special case: fmv + fneg
2108 if (Imm
.isNegZero())
2111 // Building an integer and then converting requires a fmv at the end of
2112 // the integer sequence.
2114 1 + RISCVMatInt::getIntMatCost(Imm
.bitcastToAPInt(), Subtarget
.getXLen(),
2115 Subtarget
.getFeatureBits());
2116 return Cost
<= FPImmCost
;
2119 // TODO: This is very conservative.
2120 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT
, EVT SrcVT
,
2121 unsigned Index
) const {
2122 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR
, ResVT
))
2125 // Only support extracting a fixed from a fixed vector for now.
2126 if (ResVT
.isScalableVector() || SrcVT
.isScalableVector())
2129 unsigned ResElts
= ResVT
.getVectorNumElements();
2130 unsigned SrcElts
= SrcVT
.getVectorNumElements();
2132 // Convervatively only handle extracting half of a vector.
2133 // TODO: Relax this.
2134 if ((ResElts
* 2) != SrcElts
)
2137 // The smallest type we can slide is i8.
2138 // TODO: We can extract index 0 from a mask vector without a slide.
2139 if (ResVT
.getVectorElementType() == MVT::i1
)
2142 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2147 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2148 // the upper half of a vector until we have more test coverage.
2149 return Index
== 0 || Index
== ResElts
;
2152 MVT
RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext
&Context
,
2155 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2156 // We might still end up using a GPR but that will be decided based on ABI.
2157 if (VT
== MVT::f16
&& Subtarget
.hasStdExtFOrZfinx() &&
2158 !Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
2161 MVT PartVT
= TargetLowering::getRegisterTypeForCallingConv(Context
, CC
, VT
);
2163 if (RV64LegalI32
&& Subtarget
.is64Bit() && PartVT
== MVT::i32
)
2169 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext
&Context
,
2172 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2173 // We might still end up using a GPR but that will be decided based on ABI.
2174 if (VT
== MVT::f16
&& Subtarget
.hasStdExtFOrZfinx() &&
2175 !Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
2178 return TargetLowering::getNumRegistersForCallingConv(Context
, CC
, VT
);
2181 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2182 LLVMContext
&Context
, CallingConv::ID CC
, EVT VT
, EVT
&IntermediateVT
,
2183 unsigned &NumIntermediates
, MVT
&RegisterVT
) const {
2184 unsigned NumRegs
= TargetLowering::getVectorTypeBreakdownForCallingConv(
2185 Context
, CC
, VT
, IntermediateVT
, NumIntermediates
, RegisterVT
);
2187 if (RV64LegalI32
&& Subtarget
.is64Bit() && IntermediateVT
== MVT::i32
)
2188 IntermediateVT
= MVT::i64
;
2190 if (RV64LegalI32
&& Subtarget
.is64Bit() && RegisterVT
== MVT::i32
)
2191 RegisterVT
= MVT::i64
;
2196 // Changes the condition code and swaps operands if necessary, so the SetCC
2197 // operation matches one of the comparisons supported directly by branches
2198 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2200 static void translateSetCCForBranch(const SDLoc
&DL
, SDValue
&LHS
, SDValue
&RHS
,
2201 ISD::CondCode
&CC
, SelectionDAG
&DAG
) {
2202 // If this is a single bit test that can't be handled by ANDI, shift the
2203 // bit to be tested to the MSB and perform a signed compare with 0.
2204 if (isIntEqualitySetCC(CC
) && isNullConstant(RHS
) &&
2205 LHS
.getOpcode() == ISD::AND
&& LHS
.hasOneUse() &&
2206 isa
<ConstantSDNode
>(LHS
.getOperand(1))) {
2207 uint64_t Mask
= LHS
.getConstantOperandVal(1);
2208 if ((isPowerOf2_64(Mask
) || isMask_64(Mask
)) && !isInt
<12>(Mask
)) {
2210 if (isPowerOf2_64(Mask
)) {
2211 CC
= CC
== ISD::SETEQ
? ISD::SETGE
: ISD::SETLT
;
2212 ShAmt
= LHS
.getValueSizeInBits() - 1 - Log2_64(Mask
);
2214 ShAmt
= LHS
.getValueSizeInBits() - llvm::bit_width(Mask
);
2217 LHS
= LHS
.getOperand(0);
2219 LHS
= DAG
.getNode(ISD::SHL
, DL
, LHS
.getValueType(), LHS
,
2220 DAG
.getConstant(ShAmt
, DL
, LHS
.getValueType()));
2225 if (auto *RHSC
= dyn_cast
<ConstantSDNode
>(RHS
)) {
2226 int64_t C
= RHSC
->getSExtValue();
2230 // Convert X > -1 to X >= 0.
2232 RHS
= DAG
.getConstant(0, DL
, RHS
.getValueType());
2238 // Convert X < 1 to 0 >= X.
2241 LHS
= DAG
.getConstant(0, DL
, RHS
.getValueType());
2256 CC
= ISD::getSetCCSwappedOperands(CC
);
2257 std::swap(LHS
, RHS
);
2262 RISCVII::VLMUL
RISCVTargetLowering::getLMUL(MVT VT
) {
2263 assert(VT
.isScalableVector() && "Expecting a scalable vector type");
2264 unsigned KnownSize
= VT
.getSizeInBits().getKnownMinValue();
2265 if (VT
.getVectorElementType() == MVT::i1
)
2268 switch (KnownSize
) {
2270 llvm_unreachable("Invalid LMUL.");
2272 return RISCVII::VLMUL::LMUL_F8
;
2274 return RISCVII::VLMUL::LMUL_F4
;
2276 return RISCVII::VLMUL::LMUL_F2
;
2278 return RISCVII::VLMUL::LMUL_1
;
2280 return RISCVII::VLMUL::LMUL_2
;
2282 return RISCVII::VLMUL::LMUL_4
;
2284 return RISCVII::VLMUL::LMUL_8
;
2288 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul
) {
2291 llvm_unreachable("Invalid LMUL.");
2292 case RISCVII::VLMUL::LMUL_F8
:
2293 case RISCVII::VLMUL::LMUL_F4
:
2294 case RISCVII::VLMUL::LMUL_F2
:
2295 case RISCVII::VLMUL::LMUL_1
:
2296 return RISCV::VRRegClassID
;
2297 case RISCVII::VLMUL::LMUL_2
:
2298 return RISCV::VRM2RegClassID
;
2299 case RISCVII::VLMUL::LMUL_4
:
2300 return RISCV::VRM4RegClassID
;
2301 case RISCVII::VLMUL::LMUL_8
:
2302 return RISCV::VRM8RegClassID
;
2306 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT
, unsigned Index
) {
2307 RISCVII::VLMUL LMUL
= getLMUL(VT
);
2308 if (LMUL
== RISCVII::VLMUL::LMUL_F8
||
2309 LMUL
== RISCVII::VLMUL::LMUL_F4
||
2310 LMUL
== RISCVII::VLMUL::LMUL_F2
||
2311 LMUL
== RISCVII::VLMUL::LMUL_1
) {
2312 static_assert(RISCV::sub_vrm1_7
== RISCV::sub_vrm1_0
+ 7,
2313 "Unexpected subreg numbering");
2314 return RISCV::sub_vrm1_0
+ Index
;
2316 if (LMUL
== RISCVII::VLMUL::LMUL_2
) {
2317 static_assert(RISCV::sub_vrm2_3
== RISCV::sub_vrm2_0
+ 3,
2318 "Unexpected subreg numbering");
2319 return RISCV::sub_vrm2_0
+ Index
;
2321 if (LMUL
== RISCVII::VLMUL::LMUL_4
) {
2322 static_assert(RISCV::sub_vrm4_1
== RISCV::sub_vrm4_0
+ 1,
2323 "Unexpected subreg numbering");
2324 return RISCV::sub_vrm4_0
+ Index
;
2326 llvm_unreachable("Invalid vector type.");
2329 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT
) {
2330 if (VT
.getVectorElementType() == MVT::i1
)
2331 return RISCV::VRRegClassID
;
2332 return getRegClassIDForLMUL(getLMUL(VT
));
2335 // Attempt to decompose a subvector insert/extract between VecVT and
2336 // SubVecVT via subregister indices. Returns the subregister index that
2337 // can perform the subvector insert/extract with the given element index, as
2338 // well as the index corresponding to any leftover subvectors that must be
2339 // further inserted/extracted within the register class for SubVecVT.
2340 std::pair
<unsigned, unsigned>
2341 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2342 MVT VecVT
, MVT SubVecVT
, unsigned InsertExtractIdx
,
2343 const RISCVRegisterInfo
*TRI
) {
2344 static_assert((RISCV::VRM8RegClassID
> RISCV::VRM4RegClassID
&&
2345 RISCV::VRM4RegClassID
> RISCV::VRM2RegClassID
&&
2346 RISCV::VRM2RegClassID
> RISCV::VRRegClassID
),
2347 "Register classes not ordered");
2348 unsigned VecRegClassID
= getRegClassIDForVecVT(VecVT
);
2349 unsigned SubRegClassID
= getRegClassIDForVecVT(SubVecVT
);
2350 // Try to compose a subregister index that takes us from the incoming
2351 // LMUL>1 register class down to the outgoing one. At each step we half
2353 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2354 // Note that this is not guaranteed to find a subregister index, such as
2355 // when we are extracting from one VR type to another.
2356 unsigned SubRegIdx
= RISCV::NoSubRegister
;
2357 for (const unsigned RCID
:
2358 {RISCV::VRM4RegClassID
, RISCV::VRM2RegClassID
, RISCV::VRRegClassID
})
2359 if (VecRegClassID
> RCID
&& SubRegClassID
<= RCID
) {
2360 VecVT
= VecVT
.getHalfNumVectorElementsVT();
2362 InsertExtractIdx
>= VecVT
.getVectorElementCount().getKnownMinValue();
2363 SubRegIdx
= TRI
->composeSubRegIndices(SubRegIdx
,
2364 getSubregIndexByMVT(VecVT
, IsHi
));
2366 InsertExtractIdx
-= VecVT
.getVectorElementCount().getKnownMinValue();
2368 return {SubRegIdx
, InsertExtractIdx
};
2371 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2372 // stores for those types.
2373 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT
) const {
2374 return !Subtarget
.useRVVForFixedLengthVectors() ||
2375 (VT
.isFixedLengthVector() && VT
.getVectorElementType() == MVT::i1
);
2378 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy
) const {
2379 if (!ScalarTy
.isSimple())
2381 switch (ScalarTy
.getSimpleVT().SimpleTy
) {
2383 return Subtarget
.is64Bit() ? Subtarget
.hasVInstructionsI64() : true;
2389 return Subtarget
.hasVInstructionsI64();
2391 return Subtarget
.hasVInstructionsF16();
2393 return Subtarget
.hasVInstructionsF32();
2395 return Subtarget
.hasVInstructionsF64();
2402 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2403 return NumRepeatedDivisors
;
2406 static SDValue
getVLOperand(SDValue Op
) {
2407 assert((Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2408 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
) &&
2409 "Unexpected opcode");
2410 bool HasChain
= Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
;
2411 unsigned IntNo
= Op
.getConstantOperandVal(HasChain
? 1 : 0);
2412 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo
*II
=
2413 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo
);
2416 return Op
.getOperand(II
->VLOperand
+ 1 + HasChain
);
2419 static bool useRVVForFixedLengthVectorVT(MVT VT
,
2420 const RISCVSubtarget
&Subtarget
) {
2421 assert(VT
.isFixedLengthVector() && "Expected a fixed length vector type!");
2422 if (!Subtarget
.useRVVForFixedLengthVectors())
2425 // We only support a set of vector types with a consistent maximum fixed size
2426 // across all supported vector element types to avoid legalization issues.
2427 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2428 // fixed-length vector type we support is 1024 bytes.
2429 if (VT
.getFixedSizeInBits() > 1024 * 8)
2432 unsigned MinVLen
= Subtarget
.getRealMinVLen();
2434 MVT EltVT
= VT
.getVectorElementType();
2436 // Don't use RVV for vectors we cannot scalarize if required.
2437 switch (EltVT
.SimpleTy
) {
2438 // i1 is supported but has different rules.
2442 // Masks can only use a single register.
2443 if (VT
.getVectorNumElements() > MinVLen
)
2452 if (!Subtarget
.hasVInstructionsI64())
2456 if (!Subtarget
.hasVInstructionsF16Minimal())
2460 if (!Subtarget
.hasVInstructionsF32())
2464 if (!Subtarget
.hasVInstructionsF64())
2469 // Reject elements larger than ELEN.
2470 if (EltVT
.getSizeInBits() > Subtarget
.getELen())
2473 unsigned LMul
= divideCeil(VT
.getSizeInBits(), MinVLen
);
2474 // Don't use RVV for types that don't fit.
2475 if (LMul
> Subtarget
.getMaxLMULForFixedLengthVectors())
2478 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2479 // the base fixed length RVV support in place.
2480 if (!VT
.isPow2VectorType())
2486 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT
) const {
2487 return ::useRVVForFixedLengthVectorVT(VT
, Subtarget
);
2490 // Return the largest legal scalable vector type that matches VT's element type.
2491 static MVT
getContainerForFixedLengthVector(const TargetLowering
&TLI
, MVT VT
,
2492 const RISCVSubtarget
&Subtarget
) {
2493 // This may be called before legal types are setup.
2494 assert(((VT
.isFixedLengthVector() && TLI
.isTypeLegal(VT
)) ||
2495 useRVVForFixedLengthVectorVT(VT
, Subtarget
)) &&
2496 "Expected legal fixed length vector!");
2498 unsigned MinVLen
= Subtarget
.getRealMinVLen();
2499 unsigned MaxELen
= Subtarget
.getELen();
2501 MVT EltVT
= VT
.getVectorElementType();
2502 switch (EltVT
.SimpleTy
) {
2504 llvm_unreachable("unexpected element type for RVV container");
2513 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2514 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2515 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2517 (VT
.getVectorNumElements() * RISCV::RVVBitsPerBlock
) / MinVLen
;
2518 NumElts
= std::max(NumElts
, RISCV::RVVBitsPerBlock
/ MaxELen
);
2519 assert(isPowerOf2_32(NumElts
) && "Expected power of 2 NumElts");
2520 return MVT::getScalableVectorVT(EltVT
, NumElts
);
2525 static MVT
getContainerForFixedLengthVector(SelectionDAG
&DAG
, MVT VT
,
2526 const RISCVSubtarget
&Subtarget
) {
2527 return getContainerForFixedLengthVector(DAG
.getTargetLoweringInfo(), VT
,
2531 MVT
RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT
) const {
2532 return ::getContainerForFixedLengthVector(*this, VT
, getSubtarget());
2535 // Grow V to consume an entire RVV register.
2536 static SDValue
convertToScalableVector(EVT VT
, SDValue V
, SelectionDAG
&DAG
,
2537 const RISCVSubtarget
&Subtarget
) {
2538 assert(VT
.isScalableVector() &&
2539 "Expected to convert into a scalable vector!");
2540 assert(V
.getValueType().isFixedLengthVector() &&
2541 "Expected a fixed length vector operand!");
2543 SDValue Zero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
2544 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, DAG
.getUNDEF(VT
), V
, Zero
);
2547 // Shrink V so it's just big enough to maintain a VT's worth of data.
2548 static SDValue
convertFromScalableVector(EVT VT
, SDValue V
, SelectionDAG
&DAG
,
2549 const RISCVSubtarget
&Subtarget
) {
2550 assert(VT
.isFixedLengthVector() &&
2551 "Expected to convert into a fixed length vector!");
2552 assert(V
.getValueType().isScalableVector() &&
2553 "Expected a scalable vector operand!");
2555 SDValue Zero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
2556 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, V
, Zero
);
2559 /// Return the type of the mask type suitable for masking the provided
2560 /// vector type. This is simply an i1 element type vector of the same
2561 /// (possibly scalable) length.
2562 static MVT
getMaskTypeFor(MVT VecVT
) {
2563 assert(VecVT
.isVector());
2564 ElementCount EC
= VecVT
.getVectorElementCount();
2565 return MVT::getVectorVT(MVT::i1
, EC
);
2568 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2569 /// vector length VL. .
2570 static SDValue
getAllOnesMask(MVT VecVT
, SDValue VL
, const SDLoc
&DL
,
2571 SelectionDAG
&DAG
) {
2572 MVT MaskVT
= getMaskTypeFor(VecVT
);
2573 return DAG
.getNode(RISCVISD::VMSET_VL
, DL
, MaskVT
, VL
);
2576 static SDValue
getVLOp(uint64_t NumElts
, const SDLoc
&DL
, SelectionDAG
&DAG
,
2577 const RISCVSubtarget
&Subtarget
) {
2578 return DAG
.getConstant(NumElts
, DL
, Subtarget
.getXLenVT());
2581 static std::pair
<SDValue
, SDValue
>
2582 getDefaultVLOps(uint64_t NumElts
, MVT ContainerVT
, const SDLoc
&DL
,
2583 SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
) {
2584 assert(ContainerVT
.isScalableVector() && "Expecting scalable container type");
2585 SDValue VL
= getVLOp(NumElts
, DL
, DAG
, Subtarget
);
2586 SDValue Mask
= getAllOnesMask(ContainerVT
, VL
, DL
, DAG
);
2590 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2591 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2592 // the vector type that the fixed-length vector is contained in. Otherwise if
2593 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2594 static std::pair
<SDValue
, SDValue
>
2595 getDefaultVLOps(MVT VecVT
, MVT ContainerVT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
2596 const RISCVSubtarget
&Subtarget
) {
2597 if (VecVT
.isFixedLengthVector())
2598 return getDefaultVLOps(VecVT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
2600 assert(ContainerVT
.isScalableVector() && "Expecting scalable container type");
2601 MVT XLenVT
= Subtarget
.getXLenVT();
2602 SDValue VL
= DAG
.getRegister(RISCV::X0
, XLenVT
);
2603 SDValue Mask
= getAllOnesMask(ContainerVT
, VL
, DL
, DAG
);
2607 // As above but assuming the given type is a scalable vector type.
2608 static std::pair
<SDValue
, SDValue
>
2609 getDefaultScalableVLOps(MVT VecVT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
2610 const RISCVSubtarget
&Subtarget
) {
2611 assert(VecVT
.isScalableVector() && "Expecting a scalable vector");
2612 return getDefaultVLOps(VecVT
, VecVT
, DL
, DAG
, Subtarget
);
2615 SDValue
RISCVTargetLowering::computeVLMax(MVT VecVT
, const SDLoc
&DL
,
2616 SelectionDAG
&DAG
) const {
2617 assert(VecVT
.isScalableVector() && "Expected scalable vector");
2618 return DAG
.getElementCount(DL
, Subtarget
.getXLenVT(),
2619 VecVT
.getVectorElementCount());
2622 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2623 // of either is (currently) supported. This can get us into an infinite loop
2624 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2626 // Until either (or both) of these can reliably lower any node, reporting that
2627 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2628 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2629 // which is not desirable.
2630 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2631 EVT VT
, unsigned DefinedValues
) const {
2635 InstructionCost
RISCVTargetLowering::getLMULCost(MVT VT
) const {
2636 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2637 // implementation-defined.
2639 return InstructionCost::getInvalid();
2640 unsigned DLenFactor
= Subtarget
.getDLenFactor();
2642 if (VT
.isScalableVector()) {
2645 std::tie(LMul
, Fractional
) =
2646 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT
));
2648 Cost
= LMul
<= DLenFactor
? (DLenFactor
/ LMul
) : 1;
2650 Cost
= (LMul
* DLenFactor
);
2652 Cost
= divideCeil(VT
.getSizeInBits(), Subtarget
.getRealMinVLen() / DLenFactor
);
2658 /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2659 /// is generally quadratic in the number of vreg implied by LMUL. Note that
2660 /// operand (index and possibly mask) are handled separately.
2661 InstructionCost
RISCVTargetLowering::getVRGatherVVCost(MVT VT
) const {
2662 return getLMULCost(VT
) * getLMULCost(VT
);
2665 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2666 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2667 /// or may track the vrgather.vv cost. It is implementation-dependent.
2668 InstructionCost
RISCVTargetLowering::getVRGatherVICost(MVT VT
) const {
2669 return getLMULCost(VT
);
2672 /// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2673 /// for the type VT. (This does not cover the vslide1up or vslide1down
2674 /// variants.) Slides may be linear in the number of vregs implied by LMUL,
2675 /// or may track the vrgather.vv cost. It is implementation-dependent.
2676 InstructionCost
RISCVTargetLowering::getVSlideCost(MVT VT
) const {
2677 return getLMULCost(VT
);
2680 static SDValue
lowerFP_TO_INT_SAT(SDValue Op
, SelectionDAG
&DAG
,
2681 const RISCVSubtarget
&Subtarget
) {
2682 // RISC-V FP-to-int conversions saturate to the destination register size, but
2683 // don't produce 0 for nan. We can use a conversion instruction and fix the
2684 // nan case with a compare and a select.
2685 SDValue Src
= Op
.getOperand(0);
2687 MVT DstVT
= Op
.getSimpleValueType();
2688 EVT SatVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
2690 bool IsSigned
= Op
.getOpcode() == ISD::FP_TO_SINT_SAT
;
2692 if (!DstVT
.isVector()) {
2693 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2695 if ((Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfhOrZhinx()) ||
2696 Src
.getValueType() == MVT::bf16
) {
2697 Src
= DAG
.getNode(ISD::FP_EXTEND
, SDLoc(Op
), MVT::f32
, Src
);
2702 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
2703 else if (DstVT
== MVT::i64
&& SatVT
== MVT::i32
)
2704 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
2707 // FIXME: Support other SatVTs by clamping before or after the conversion.
2710 SDValue FpToInt
= DAG
.getNode(
2711 Opc
, DL
, DstVT
, Src
,
2712 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, Subtarget
.getXLenVT()));
2714 if (Opc
== RISCVISD::FCVT_WU_RV64
)
2715 FpToInt
= DAG
.getZeroExtendInReg(FpToInt
, DL
, MVT::i32
);
2717 SDValue ZeroInt
= DAG
.getConstant(0, DL
, DstVT
);
2718 return DAG
.getSelectCC(DL
, Src
, Src
, ZeroInt
, FpToInt
,
2719 ISD::CondCode::SETUO
);
2724 MVT DstEltVT
= DstVT
.getVectorElementType();
2725 MVT SrcVT
= Src
.getSimpleValueType();
2726 MVT SrcEltVT
= SrcVT
.getVectorElementType();
2727 unsigned SrcEltSize
= SrcEltVT
.getSizeInBits();
2728 unsigned DstEltSize
= DstEltVT
.getSizeInBits();
2730 // Only handle saturating to the destination type.
2731 if (SatVT
!= DstEltVT
)
2734 // FIXME: Don't support narrowing by more than 1 steps for now.
2735 if (SrcEltSize
> (2 * DstEltSize
))
2738 MVT DstContainerVT
= DstVT
;
2739 MVT SrcContainerVT
= SrcVT
;
2740 if (DstVT
.isFixedLengthVector()) {
2741 DstContainerVT
= getContainerForFixedLengthVector(DAG
, DstVT
, Subtarget
);
2742 SrcContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
2743 assert(DstContainerVT
.getVectorElementCount() ==
2744 SrcContainerVT
.getVectorElementCount() &&
2745 "Expected same element count");
2746 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
2751 auto [Mask
, VL
] = getDefaultVLOps(DstVT
, DstContainerVT
, DL
, DAG
, Subtarget
);
2753 SDValue IsNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
2754 {Src
, Src
, DAG
.getCondCode(ISD::SETNE
),
2755 DAG
.getUNDEF(Mask
.getValueType()), Mask
, VL
});
2757 // Need to widen by more than 1 step, promote the FP type, then do a widening
2759 if (DstEltSize
> (2 * SrcEltSize
)) {
2760 assert(SrcContainerVT
.getVectorElementType() == MVT::f16
&& "Unexpected VT!");
2761 MVT InterVT
= SrcContainerVT
.changeVectorElementType(MVT::f32
);
2762 Src
= DAG
.getNode(RISCVISD::FP_EXTEND_VL
, DL
, InterVT
, Src
, Mask
, VL
);
2766 IsSigned
? RISCVISD::VFCVT_RTZ_X_F_VL
: RISCVISD::VFCVT_RTZ_XU_F_VL
;
2767 SDValue Res
= DAG
.getNode(RVVOpc
, DL
, DstContainerVT
, Src
, Mask
, VL
);
2769 SDValue SplatZero
= DAG
.getNode(
2770 RISCVISD::VMV_V_X_VL
, DL
, DstContainerVT
, DAG
.getUNDEF(DstContainerVT
),
2771 DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()), VL
);
2772 Res
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, DstContainerVT
, IsNan
, SplatZero
,
2775 if (DstVT
.isFixedLengthVector())
2776 Res
= convertFromScalableVector(DstVT
, Res
, DAG
, Subtarget
);
2781 static RISCVFPRndMode::RoundingMode
matchRoundingOp(unsigned Opc
) {
2783 case ISD::FROUNDEVEN
:
2784 case ISD::STRICT_FROUNDEVEN
:
2785 case ISD::VP_FROUNDEVEN
:
2786 return RISCVFPRndMode::RNE
;
2788 case ISD::STRICT_FTRUNC
:
2789 case ISD::VP_FROUNDTOZERO
:
2790 return RISCVFPRndMode::RTZ
;
2792 case ISD::STRICT_FFLOOR
:
2793 case ISD::VP_FFLOOR
:
2794 return RISCVFPRndMode::RDN
;
2796 case ISD::STRICT_FCEIL
:
2798 return RISCVFPRndMode::RUP
;
2800 case ISD::STRICT_FROUND
:
2801 case ISD::VP_FROUND
:
2802 return RISCVFPRndMode::RMM
;
2804 return RISCVFPRndMode::DYN
;
2807 return RISCVFPRndMode::Invalid
;
2810 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2811 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2812 // the integer domain and back. Taking care to avoid converting values that are
2813 // nan or already correct.
2815 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
2816 const RISCVSubtarget
&Subtarget
) {
2817 MVT VT
= Op
.getSimpleValueType();
2818 assert(VT
.isVector() && "Unexpected type");
2822 SDValue Src
= Op
.getOperand(0);
2824 MVT ContainerVT
= VT
;
2825 if (VT
.isFixedLengthVector()) {
2826 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
2827 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
2831 if (Op
->isVPOpcode()) {
2832 Mask
= Op
.getOperand(1);
2833 if (VT
.isFixedLengthVector())
2834 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
2836 VL
= Op
.getOperand(2);
2838 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
2841 // Freeze the source since we are increasing the number of uses.
2842 Src
= DAG
.getFreeze(Src
);
2844 // We do the conversion on the absolute value and fix the sign at the end.
2845 SDValue Abs
= DAG
.getNode(RISCVISD::FABS_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
2847 // Determine the largest integer that can be represented exactly. This and
2848 // values larger than it don't have any fractional bits so don't need to
2850 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(ContainerVT
);
2851 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
2852 APFloat MaxVal
= APFloat(FltSem
);
2853 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
2854 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
2855 SDValue MaxValNode
=
2856 DAG
.getConstantFP(MaxVal
, DL
, ContainerVT
.getVectorElementType());
2857 SDValue MaxValSplat
= DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, ContainerVT
,
2858 DAG
.getUNDEF(ContainerVT
), MaxValNode
, VL
);
2860 // If abs(Src) was larger than MaxVal or nan, keep it.
2861 MVT SetccVT
= MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
2863 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, SetccVT
,
2864 {Abs
, MaxValSplat
, DAG
.getCondCode(ISD::SETOLT
),
2867 // Truncate to integer and convert back to FP.
2868 MVT IntVT
= ContainerVT
.changeVectorElementTypeToInteger();
2869 MVT XLenVT
= Subtarget
.getXLenVT();
2872 switch (Op
.getOpcode()) {
2874 llvm_unreachable("Unexpected opcode");
2878 case ISD::VP_FFLOOR
:
2880 case ISD::FROUNDEVEN
:
2881 case ISD::VP_FROUND
:
2882 case ISD::VP_FROUNDEVEN
:
2883 case ISD::VP_FROUNDTOZERO
: {
2884 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
2885 assert(FRM
!= RISCVFPRndMode::Invalid
);
2886 Truncated
= DAG
.getNode(RISCVISD::VFCVT_RM_X_F_VL
, DL
, IntVT
, Src
, Mask
,
2887 DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
);
2891 Truncated
= DAG
.getNode(RISCVISD::VFCVT_RTZ_X_F_VL
, DL
, IntVT
, Src
,
2896 Truncated
= DAG
.getNode(RISCVISD::VFCVT_X_F_VL
, DL
, IntVT
, Src
, Mask
, VL
);
2898 case ISD::FNEARBYINT
:
2899 case ISD::VP_FNEARBYINT
:
2900 Truncated
= DAG
.getNode(RISCVISD::VFROUND_NOEXCEPT_VL
, DL
, ContainerVT
, Src
,
2905 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2906 if (Truncated
.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL
)
2907 Truncated
= DAG
.getNode(RISCVISD::SINT_TO_FP_VL
, DL
, ContainerVT
, Truncated
,
2910 // Restore the original sign so that -0.0 is preserved.
2911 Truncated
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Truncated
,
2912 Src
, Src
, Mask
, VL
);
2914 if (!VT
.isFixedLengthVector())
2917 return convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
2920 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2921 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2922 // qNan and coverting the new source to integer and back to FP.
2924 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
2925 const RISCVSubtarget
&Subtarget
) {
2927 MVT VT
= Op
.getSimpleValueType();
2928 SDValue Chain
= Op
.getOperand(0);
2929 SDValue Src
= Op
.getOperand(1);
2931 MVT ContainerVT
= VT
;
2932 if (VT
.isFixedLengthVector()) {
2933 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
2934 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
2937 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
2939 // Freeze the source since we are increasing the number of uses.
2940 Src
= DAG
.getFreeze(Src
);
2942 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2943 MVT MaskVT
= Mask
.getSimpleValueType();
2944 SDValue Unorder
= DAG
.getNode(RISCVISD::STRICT_FSETCC_VL
, DL
,
2945 DAG
.getVTList(MaskVT
, MVT::Other
),
2946 {Chain
, Src
, Src
, DAG
.getCondCode(ISD::SETUNE
),
2947 DAG
.getUNDEF(MaskVT
), Mask
, VL
});
2948 Chain
= Unorder
.getValue(1);
2949 Src
= DAG
.getNode(RISCVISD::STRICT_FADD_VL
, DL
,
2950 DAG
.getVTList(ContainerVT
, MVT::Other
),
2951 {Chain
, Src
, Src
, DAG
.getUNDEF(ContainerVT
), Unorder
, VL
});
2952 Chain
= Src
.getValue(1);
2954 // We do the conversion on the absolute value and fix the sign at the end.
2955 SDValue Abs
= DAG
.getNode(RISCVISD::FABS_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
2957 // Determine the largest integer that can be represented exactly. This and
2958 // values larger than it don't have any fractional bits so don't need to
2960 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(ContainerVT
);
2961 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
2962 APFloat MaxVal
= APFloat(FltSem
);
2963 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
2964 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
2965 SDValue MaxValNode
=
2966 DAG
.getConstantFP(MaxVal
, DL
, ContainerVT
.getVectorElementType());
2967 SDValue MaxValSplat
= DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, ContainerVT
,
2968 DAG
.getUNDEF(ContainerVT
), MaxValNode
, VL
);
2970 // If abs(Src) was larger than MaxVal or nan, keep it.
2972 RISCVISD::SETCC_VL
, DL
, MaskVT
,
2973 {Abs
, MaxValSplat
, DAG
.getCondCode(ISD::SETOLT
), Mask
, Mask
, VL
});
2975 // Truncate to integer and convert back to FP.
2976 MVT IntVT
= ContainerVT
.changeVectorElementTypeToInteger();
2977 MVT XLenVT
= Subtarget
.getXLenVT();
2980 switch (Op
.getOpcode()) {
2982 llvm_unreachable("Unexpected opcode");
2983 case ISD::STRICT_FCEIL
:
2984 case ISD::STRICT_FFLOOR
:
2985 case ISD::STRICT_FROUND
:
2986 case ISD::STRICT_FROUNDEVEN
: {
2987 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
2988 assert(FRM
!= RISCVFPRndMode::Invalid
);
2989 Truncated
= DAG
.getNode(
2990 RISCVISD::STRICT_VFCVT_RM_X_F_VL
, DL
, DAG
.getVTList(IntVT
, MVT::Other
),
2991 {Chain
, Src
, Mask
, DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
});
2994 case ISD::STRICT_FTRUNC
:
2996 DAG
.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL
, DL
,
2997 DAG
.getVTList(IntVT
, MVT::Other
), Chain
, Src
, Mask
, VL
);
2999 case ISD::STRICT_FNEARBYINT
:
3000 Truncated
= DAG
.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL
, DL
,
3001 DAG
.getVTList(ContainerVT
, MVT::Other
), Chain
, Src
,
3005 Chain
= Truncated
.getValue(1);
3007 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3008 if (Op
.getOpcode() != ISD::STRICT_FNEARBYINT
) {
3009 Truncated
= DAG
.getNode(RISCVISD::STRICT_SINT_TO_FP_VL
, DL
,
3010 DAG
.getVTList(ContainerVT
, MVT::Other
), Chain
,
3011 Truncated
, Mask
, VL
);
3012 Chain
= Truncated
.getValue(1);
3015 // Restore the original sign so that -0.0 is preserved.
3016 Truncated
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Truncated
,
3017 Src
, Src
, Mask
, VL
);
3019 if (VT
.isFixedLengthVector())
3020 Truncated
= convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
3021 return DAG
.getMergeValues({Truncated
, Chain
}, DL
);
3025 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
3026 const RISCVSubtarget
&Subtarget
) {
3027 MVT VT
= Op
.getSimpleValueType();
3029 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
3031 if (DAG
.shouldOptForSize())
3035 SDValue Src
= Op
.getOperand(0);
3037 // Create an integer the size of the mantissa with the MSB set. This and all
3038 // values larger than it don't have any fractional bits so don't need to be
3040 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(VT
);
3041 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
3042 APFloat MaxVal
= APFloat(FltSem
);
3043 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
3044 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
3045 SDValue MaxValNode
= DAG
.getConstantFP(MaxVal
, DL
, VT
);
3047 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
3048 return DAG
.getNode(RISCVISD::FROUND
, DL
, VT
, Src
, MaxValNode
,
3049 DAG
.getTargetConstant(FRM
, DL
, Subtarget
.getXLenVT()));
3052 // Expand vector LRINT and LLRINT by converting to the integer domain.
3053 static SDValue
lowerVectorXRINT(SDValue Op
, SelectionDAG
&DAG
,
3054 const RISCVSubtarget
&Subtarget
) {
3055 MVT VT
= Op
.getSimpleValueType();
3056 assert(VT
.isVector() && "Unexpected type");
3059 SDValue Src
= Op
.getOperand(0);
3060 MVT ContainerVT
= VT
;
3062 if (VT
.isFixedLengthVector()) {
3063 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3064 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
3067 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3069 DAG
.getNode(RISCVISD::VFCVT_X_F_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
3071 if (!VT
.isFixedLengthVector())
3074 return convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
3078 getVSlidedown(SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
,
3079 const SDLoc
&DL
, EVT VT
, SDValue Merge
, SDValue Op
,
3080 SDValue Offset
, SDValue Mask
, SDValue VL
,
3081 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
) {
3082 if (Merge
.isUndef())
3083 Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3084 SDValue PolicyOp
= DAG
.getTargetConstant(Policy
, DL
, Subtarget
.getXLenVT());
3085 SDValue Ops
[] = {Merge
, Op
, Offset
, Mask
, VL
, PolicyOp
};
3086 return DAG
.getNode(RISCVISD::VSLIDEDOWN_VL
, DL
, VT
, Ops
);
3090 getVSlideup(SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
, const SDLoc
&DL
,
3091 EVT VT
, SDValue Merge
, SDValue Op
, SDValue Offset
, SDValue Mask
,
3093 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
) {
3094 if (Merge
.isUndef())
3095 Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3096 SDValue PolicyOp
= DAG
.getTargetConstant(Policy
, DL
, Subtarget
.getXLenVT());
3097 SDValue Ops
[] = {Merge
, Op
, Offset
, Mask
, VL
, PolicyOp
};
3098 return DAG
.getNode(RISCVISD::VSLIDEUP_VL
, DL
, VT
, Ops
);
3101 struct VIDSequence
{
3102 int64_t StepNumerator
;
3103 unsigned StepDenominator
;
3107 static std::optional
<uint64_t> getExactInteger(const APFloat
&APF
,
3108 uint32_t BitWidth
) {
3109 APSInt
ValInt(BitWidth
, !APF
.isNegative());
3110 // We use an arbitrary rounding mode here. If a floating-point is an exact
3111 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3112 // the rounding mode changes the output value, then it is not an exact
3114 RoundingMode ArbitraryRM
= RoundingMode::TowardZero
;
3116 // If it is out of signed integer range, it will return an invalid operation.
3117 // If it is not an exact integer, IsExact is false.
3118 if ((APF
.convertToInteger(ValInt
, ArbitraryRM
, &IsExact
) ==
3119 APFloatBase::opInvalidOp
) ||
3121 return std::nullopt
;
3122 return ValInt
.extractBitsAsZExtValue(BitWidth
, 0);
3125 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3126 // to the (non-zero) step S and start value X. This can be then lowered as the
3127 // RVV sequence (VID * S) + X, for example.
3128 // The step S is represented as an integer numerator divided by a positive
3129 // denominator. Note that the implementation currently only identifies
3130 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
3131 // cannot detect 2/3, for example.
3132 // Note that this method will also match potentially unappealing index
3133 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3134 // determine whether this is worth generating code for.
3135 static std::optional
<VIDSequence
> isSimpleVIDSequence(SDValue Op
) {
3136 unsigned NumElts
= Op
.getNumOperands();
3137 assert(Op
.getOpcode() == ISD::BUILD_VECTOR
&& "Unexpected BUILD_VECTOR");
3138 bool IsInteger
= Op
.getValueType().isInteger();
3140 std::optional
<unsigned> SeqStepDenom
;
3141 std::optional
<int64_t> SeqStepNum
, SeqAddend
;
3142 std::optional
<std::pair
<uint64_t, unsigned>> PrevElt
;
3143 unsigned EltSizeInBits
= Op
.getValueType().getScalarSizeInBits();
3144 for (unsigned Idx
= 0; Idx
< NumElts
; Idx
++) {
3145 // Assume undef elements match the sequence; we just have to be careful
3146 // when interpolating across them.
3147 if (Op
.getOperand(Idx
).isUndef())
3152 // The BUILD_VECTOR must be all constants.
3153 if (!isa
<ConstantSDNode
>(Op
.getOperand(Idx
)))
3154 return std::nullopt
;
3155 Val
= Op
.getConstantOperandVal(Idx
) &
3156 maskTrailingOnes
<uint64_t>(EltSizeInBits
);
3158 // The BUILD_VECTOR must be all constants.
3159 if (!isa
<ConstantFPSDNode
>(Op
.getOperand(Idx
)))
3160 return std::nullopt
;
3161 if (auto ExactInteger
= getExactInteger(
3162 cast
<ConstantFPSDNode
>(Op
.getOperand(Idx
))->getValueAPF(),
3164 Val
= *ExactInteger
;
3166 return std::nullopt
;
3170 // Calculate the step since the last non-undef element, and ensure
3171 // it's consistent across the entire sequence.
3172 unsigned IdxDiff
= Idx
- PrevElt
->second
;
3173 int64_t ValDiff
= SignExtend64(Val
- PrevElt
->first
, EltSizeInBits
);
3175 // A zero-value value difference means that we're somewhere in the middle
3176 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3177 // step change before evaluating the sequence.
3181 int64_t Remainder
= ValDiff
% IdxDiff
;
3182 // Normalize the step if it's greater than 1.
3183 if (Remainder
!= ValDiff
) {
3184 // The difference must cleanly divide the element span.
3186 return std::nullopt
;
3192 SeqStepNum
= ValDiff
;
3193 else if (ValDiff
!= SeqStepNum
)
3194 return std::nullopt
;
3197 SeqStepDenom
= IdxDiff
;
3198 else if (IdxDiff
!= *SeqStepDenom
)
3199 return std::nullopt
;
3202 // Record this non-undef element for later.
3203 if (!PrevElt
|| PrevElt
->first
!= Val
)
3204 PrevElt
= std::make_pair(Val
, Idx
);
3207 // We need to have logged a step for this to count as a legal index sequence.
3208 if (!SeqStepNum
|| !SeqStepDenom
)
3209 return std::nullopt
;
3211 // Loop back through the sequence and validate elements we might have skipped
3212 // while waiting for a valid step. While doing this, log any sequence addend.
3213 for (unsigned Idx
= 0; Idx
< NumElts
; Idx
++) {
3214 if (Op
.getOperand(Idx
).isUndef())
3218 Val
= Op
.getConstantOperandVal(Idx
) &
3219 maskTrailingOnes
<uint64_t>(EltSizeInBits
);
3221 Val
= *getExactInteger(
3222 cast
<ConstantFPSDNode
>(Op
.getOperand(Idx
))->getValueAPF(),
3225 uint64_t ExpectedVal
=
3226 (int64_t)(Idx
* (uint64_t)*SeqStepNum
) / *SeqStepDenom
;
3227 int64_t Addend
= SignExtend64(Val
- ExpectedVal
, EltSizeInBits
);
3230 else if (Addend
!= SeqAddend
)
3231 return std::nullopt
;
3234 assert(SeqAddend
&& "Must have an addend if we have a step");
3236 return VIDSequence
{*SeqStepNum
, *SeqStepDenom
, *SeqAddend
};
3239 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3240 // and lower it as a VRGATHER_VX_VL from the source vector.
3241 static SDValue
matchSplatAsGather(SDValue SplatVal
, MVT VT
, const SDLoc
&DL
,
3243 const RISCVSubtarget
&Subtarget
) {
3244 if (SplatVal
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
3246 SDValue Vec
= SplatVal
.getOperand(0);
3247 // Only perform this optimization on vectors of the same size for simplicity.
3248 // Don't perform this optimization for i1 vectors.
3249 // FIXME: Support i1 vectors, maybe by promoting to i8?
3250 if (Vec
.getValueType() != VT
|| VT
.getVectorElementType() == MVT::i1
)
3252 SDValue Idx
= SplatVal
.getOperand(1);
3253 // The index must be a legal type.
3254 if (Idx
.getValueType() != Subtarget
.getXLenVT())
3257 MVT ContainerVT
= VT
;
3258 if (VT
.isFixedLengthVector()) {
3259 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3260 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
3263 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3265 SDValue Gather
= DAG
.getNode(RISCVISD::VRGATHER_VX_VL
, DL
, ContainerVT
, Vec
,
3266 Idx
, DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
3268 if (!VT
.isFixedLengthVector())
3271 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
3275 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3276 /// which constitute a large proportion of the elements. In such cases we can
3277 /// splat a vector with the dominant element and make up the shortfall with
3278 /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3279 /// Note that this includes vectors of 2 elements by association. The
3280 /// upper-most element is the "dominant" one, allowing us to use a splat to
3281 /// "insert" the upper element, and an insert of the lower element at position
3282 /// 0, which improves codegen.
3283 static SDValue
lowerBuildVectorViaDominantValues(SDValue Op
, SelectionDAG
&DAG
,
3284 const RISCVSubtarget
&Subtarget
) {
3285 MVT VT
= Op
.getSimpleValueType();
3286 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3288 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3291 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3293 MVT XLenVT
= Subtarget
.getXLenVT();
3294 unsigned NumElts
= Op
.getNumOperands();
3296 SDValue DominantValue
;
3297 unsigned MostCommonCount
= 0;
3298 DenseMap
<SDValue
, unsigned> ValueCounts
;
3299 unsigned NumUndefElts
=
3300 count_if(Op
->op_values(), [](const SDValue
&V
) { return V
.isUndef(); });
3302 // Track the number of scalar loads we know we'd be inserting, estimated as
3303 // any non-zero floating-point constant. Other kinds of element are either
3304 // already in registers or are materialized on demand. The threshold at which
3305 // a vector load is more desirable than several scalar materializion and
3306 // vector-insertion instructions is not known.
3307 unsigned NumScalarLoads
= 0;
3309 for (SDValue V
: Op
->op_values()) {
3313 ValueCounts
.insert(std::make_pair(V
, 0));
3314 unsigned &Count
= ValueCounts
[V
];
3316 if (auto *CFP
= dyn_cast
<ConstantFPSDNode
>(V
))
3317 NumScalarLoads
+= !CFP
->isExactlyValue(+0.0);
3319 // Is this value dominant? In case of a tie, prefer the highest element as
3320 // it's cheaper to insert near the beginning of a vector than it is at the
3322 if (++Count
>= MostCommonCount
) {
3324 MostCommonCount
= Count
;
3328 assert(DominantValue
&& "Not expecting an all-undef BUILD_VECTOR");
3329 unsigned NumDefElts
= NumElts
- NumUndefElts
;
3330 unsigned DominantValueCountThreshold
= NumDefElts
<= 2 ? 0 : NumDefElts
- 2;
3332 // Don't perform this optimization when optimizing for size, since
3333 // materializing elements and inserting them tends to cause code bloat.
3334 if (!DAG
.shouldOptForSize() && NumScalarLoads
< NumElts
&&
3335 (NumElts
!= 2 || ISD::isBuildVectorOfConstantSDNodes(Op
.getNode())) &&
3336 ((MostCommonCount
> DominantValueCountThreshold
) ||
3337 (ValueCounts
.size() <= Log2_32(NumDefElts
)))) {
3338 // Start by splatting the most common element.
3339 SDValue Vec
= DAG
.getSplatBuildVector(VT
, DL
, DominantValue
);
3341 DenseSet
<SDValue
> Processed
{DominantValue
};
3343 // We can handle an insert into the last element (of a splat) via
3344 // v(f)slide1down. This is slightly better than the vslideup insert
3345 // lowering as it avoids the need for a vector group temporary. It
3346 // is also better than using vmerge.vx as it avoids the need to
3347 // materialize the mask in a vector register.
3348 if (SDValue LastOp
= Op
->getOperand(Op
->getNumOperands() - 1);
3349 !LastOp
.isUndef() && ValueCounts
[LastOp
] == 1 &&
3350 LastOp
!= DominantValue
) {
3351 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
3353 VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
;
3354 if (!VT
.isFloatingPoint())
3355 LastOp
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, LastOp
);
3356 Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Vec
,
3358 Vec
= convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
3359 Processed
.insert(LastOp
);
3362 MVT SelMaskTy
= VT
.changeVectorElementType(MVT::i1
);
3363 for (const auto &OpIdx
: enumerate(Op
->ops())) {
3364 const SDValue
&V
= OpIdx
.value();
3365 if (V
.isUndef() || !Processed
.insert(V
).second
)
3367 if (ValueCounts
[V
] == 1) {
3368 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
, Vec
, V
,
3369 DAG
.getConstant(OpIdx
.index(), DL
, XLenVT
));
3371 // Blend in all instances of this value using a VSELECT, using a
3372 // mask where each bit signals whether that element is the one
3374 SmallVector
<SDValue
> Ops
;
3375 transform(Op
->op_values(), std::back_inserter(Ops
), [&](SDValue V1
) {
3376 return DAG
.getConstant(V
== V1
, DL
, XLenVT
);
3378 Vec
= DAG
.getNode(ISD::VSELECT
, DL
, VT
,
3379 DAG
.getBuildVector(SelMaskTy
, DL
, Ops
),
3380 DAG
.getSplatBuildVector(VT
, DL
, V
), Vec
);
3390 static SDValue
lowerBuildVectorOfConstants(SDValue Op
, SelectionDAG
&DAG
,
3391 const RISCVSubtarget
&Subtarget
) {
3392 MVT VT
= Op
.getSimpleValueType();
3393 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3395 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3398 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3400 MVT XLenVT
= Subtarget
.getXLenVT();
3401 unsigned NumElts
= Op
.getNumOperands();
3403 if (VT
.getVectorElementType() == MVT::i1
) {
3404 if (ISD::isBuildVectorAllZeros(Op
.getNode())) {
3405 SDValue VMClr
= DAG
.getNode(RISCVISD::VMCLR_VL
, DL
, ContainerVT
, VL
);
3406 return convertFromScalableVector(VT
, VMClr
, DAG
, Subtarget
);
3409 if (ISD::isBuildVectorAllOnes(Op
.getNode())) {
3410 SDValue VMSet
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
3411 return convertFromScalableVector(VT
, VMSet
, DAG
, Subtarget
);
3414 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3415 // scalar integer chunks whose bit-width depends on the number of mask
3417 // First, determine the most appropriate scalar integer type to use. This
3418 // is at most XLenVT, but may be shrunk to a smaller vector element type
3419 // according to the size of the final vector - use i8 chunks rather than
3420 // XLenVT if we're producing a v8i1. This results in more consistent
3421 // codegen across RV32 and RV64.
3422 unsigned NumViaIntegerBits
= std::clamp(NumElts
, 8u, Subtarget
.getXLen());
3423 NumViaIntegerBits
= std::min(NumViaIntegerBits
, Subtarget
.getELen());
3424 // If we have to use more than one INSERT_VECTOR_ELT then this
3425 // optimization is likely to increase code size; avoid peforming it in
3426 // such a case. We can use a load from a constant pool in this case.
3427 if (DAG
.shouldOptForSize() && NumElts
> NumViaIntegerBits
)
3429 // Now we can create our integer vector type. Note that it may be larger
3430 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3431 unsigned IntegerViaVecElts
= divideCeil(NumElts
, NumViaIntegerBits
);
3432 MVT IntegerViaVecVT
=
3433 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits
),
3437 unsigned BitPos
= 0, IntegerEltIdx
= 0;
3438 SmallVector
<SDValue
, 8> Elts(IntegerViaVecElts
);
3440 for (unsigned I
= 0; I
< NumElts
;) {
3441 SDValue V
= Op
.getOperand(I
);
3442 bool BitValue
= !V
.isUndef() && cast
<ConstantSDNode
>(V
)->getZExtValue();
3443 Bits
|= ((uint64_t)BitValue
<< BitPos
);
3447 // Once we accumulate enough bits to fill our scalar type or process the
3448 // last element, insert into our vector and clear our accumulated data.
3449 if (I
% NumViaIntegerBits
== 0 || I
== NumElts
) {
3450 if (NumViaIntegerBits
<= 32)
3451 Bits
= SignExtend64
<32>(Bits
);
3452 SDValue Elt
= DAG
.getConstant(Bits
, DL
, XLenVT
);
3453 Elts
[IntegerEltIdx
] = Elt
;
3460 SDValue Vec
= DAG
.getBuildVector(IntegerViaVecVT
, DL
, Elts
);
3462 if (NumElts
< NumViaIntegerBits
) {
3463 // If we're producing a smaller vector than our minimum legal integer
3464 // type, bitcast to the equivalent (known-legal) mask type, and extract
3466 assert(IntegerViaVecVT
== MVT::v1i8
&& "Unexpected mask vector type");
3467 Vec
= DAG
.getBitcast(MVT::v8i1
, Vec
);
3468 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Vec
,
3469 DAG
.getConstant(0, DL
, XLenVT
));
3471 // Else we must have produced an integer type with the same size as the
3472 // mask type; bitcast for the final result.
3473 assert(VT
.getSizeInBits() == IntegerViaVecVT
.getSizeInBits());
3474 Vec
= DAG
.getBitcast(VT
, Vec
);
3480 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3481 unsigned Opc
= VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3482 : RISCVISD::VMV_V_X_VL
;
3483 if (!VT
.isFloatingPoint())
3484 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Splat
);
3486 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Splat
, VL
);
3487 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
3490 // Try and match index sequences, which we can lower to the vid instruction
3491 // with optional modifications. An all-undef vector is matched by
3492 // getSplatValue, above.
3493 if (auto SimpleVID
= isSimpleVIDSequence(Op
)) {
3494 int64_t StepNumerator
= SimpleVID
->StepNumerator
;
3495 unsigned StepDenominator
= SimpleVID
->StepDenominator
;
3496 int64_t Addend
= SimpleVID
->Addend
;
3498 assert(StepNumerator
!= 0 && "Invalid step");
3499 bool Negate
= false;
3500 int64_t SplatStepVal
= StepNumerator
;
3501 unsigned StepOpcode
= ISD::MUL
;
3502 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3503 // anyway as the shift of 63 won't fit in uimm5.
3504 if (StepNumerator
!= 1 && StepNumerator
!= INT64_MIN
&&
3505 isPowerOf2_64(std::abs(StepNumerator
))) {
3506 Negate
= StepNumerator
< 0;
3507 StepOpcode
= ISD::SHL
;
3508 SplatStepVal
= Log2_64(std::abs(StepNumerator
));
3511 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3512 // threshold since it's the immediate value many RVV instructions accept.
3513 // There is no vmul.vi instruction so ensure multiply constant can fit in
3514 // a single addi instruction.
3515 if (((StepOpcode
== ISD::MUL
&& isInt
<12>(SplatStepVal
)) ||
3516 (StepOpcode
== ISD::SHL
&& isUInt
<5>(SplatStepVal
))) &&
3517 isPowerOf2_32(StepDenominator
) &&
3518 (SplatStepVal
>= 0 || StepDenominator
== 1) && isInt
<5>(Addend
)) {
3520 VT
.isFloatingPoint() ? VT
.changeVectorElementTypeToInteger() : VT
;
3521 MVT VIDContainerVT
=
3522 getContainerForFixedLengthVector(DAG
, VIDVT
, Subtarget
);
3523 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VIDContainerVT
, Mask
, VL
);
3524 // Convert right out of the scalable type so we can use standard ISD
3525 // nodes for the rest of the computation. If we used scalable types with
3526 // these, we'd lose the fixed-length vector info and generate worse
3528 VID
= convertFromScalableVector(VIDVT
, VID
, DAG
, Subtarget
);
3529 if ((StepOpcode
== ISD::MUL
&& SplatStepVal
!= 1) ||
3530 (StepOpcode
== ISD::SHL
&& SplatStepVal
!= 0)) {
3531 SDValue SplatStep
= DAG
.getConstant(SplatStepVal
, DL
, VIDVT
);
3532 VID
= DAG
.getNode(StepOpcode
, DL
, VIDVT
, VID
, SplatStep
);
3534 if (StepDenominator
!= 1) {
3536 DAG
.getConstant(Log2_64(StepDenominator
), DL
, VIDVT
);
3537 VID
= DAG
.getNode(ISD::SRL
, DL
, VIDVT
, VID
, SplatStep
);
3539 if (Addend
!= 0 || Negate
) {
3540 SDValue SplatAddend
= DAG
.getConstant(Addend
, DL
, VIDVT
);
3541 VID
= DAG
.getNode(Negate
? ISD::SUB
: ISD::ADD
, DL
, VIDVT
, SplatAddend
,
3544 if (VT
.isFloatingPoint()) {
3545 // TODO: Use vfwcvt to reduce register pressure.
3546 VID
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, VID
);
3552 // For very small build_vectors, use a single scalar insert of a constant.
3553 // TODO: Base this on constant rematerialization cost, not size.
3554 const unsigned EltBitSize
= VT
.getScalarSizeInBits();
3555 if (VT
.getSizeInBits() <= 32 &&
3556 ISD::isBuildVectorOfConstantSDNodes(Op
.getNode())) {
3557 MVT ViaIntVT
= MVT::getIntegerVT(VT
.getSizeInBits());
3558 assert((ViaIntVT
== MVT::i16
|| ViaIntVT
== MVT::i32
) &&
3559 "Unexpected sequence type");
3560 // If we can use the original VL with the modified element type, this
3561 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3562 // be moved into InsertVSETVLI?
3563 unsigned ViaVecLen
=
3564 (Subtarget
.getRealMinVLen() >= VT
.getSizeInBits() * NumElts
) ? NumElts
: 1;
3565 MVT ViaVecVT
= MVT::getVectorVT(ViaIntVT
, ViaVecLen
);
3567 uint64_t EltMask
= maskTrailingOnes
<uint64_t>(EltBitSize
);
3568 uint64_t SplatValue
= 0;
3569 // Construct the amalgamated value at this larger vector type.
3570 for (const auto &OpIdx
: enumerate(Op
->op_values())) {
3571 const auto &SeqV
= OpIdx
.value();
3572 if (!SeqV
.isUndef())
3573 SplatValue
|= ((cast
<ConstantSDNode
>(SeqV
)->getZExtValue() & EltMask
)
3574 << (OpIdx
.index() * EltBitSize
));
3577 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3578 // achieve better constant materializion.
3579 if (Subtarget
.is64Bit() && ViaIntVT
== MVT::i32
)
3580 SplatValue
= SignExtend64
<32>(SplatValue
);
3582 SDValue Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ViaVecVT
,
3583 DAG
.getUNDEF(ViaVecVT
),
3584 DAG
.getConstant(SplatValue
, DL
, XLenVT
),
3585 DAG
.getConstant(0, DL
, XLenVT
));
3587 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
,
3588 MVT::getVectorVT(ViaIntVT
, 1), Vec
,
3589 DAG
.getConstant(0, DL
, XLenVT
));
3590 return DAG
.getBitcast(VT
, Vec
);
3594 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3595 // when re-interpreted as a vector with a larger element type. For example,
3596 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3597 // could be instead splat as
3598 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3599 // TODO: This optimization could also work on non-constant splats, but it
3600 // would require bit-manipulation instructions to construct the splat value.
3601 SmallVector
<SDValue
> Sequence
;
3602 const auto *BV
= cast
<BuildVectorSDNode
>(Op
);
3603 if (VT
.isInteger() && EltBitSize
< 64 &&
3604 ISD::isBuildVectorOfConstantSDNodes(Op
.getNode()) &&
3605 BV
->getRepeatedSequence(Sequence
) &&
3606 (Sequence
.size() * EltBitSize
) <= 64) {
3607 unsigned SeqLen
= Sequence
.size();
3608 MVT ViaIntVT
= MVT::getIntegerVT(EltBitSize
* SeqLen
);
3609 assert((ViaIntVT
== MVT::i16
|| ViaIntVT
== MVT::i32
||
3610 ViaIntVT
== MVT::i64
) &&
3611 "Unexpected sequence type");
3613 // If we can use the original VL with the modified element type, this
3614 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3615 // be moved into InsertVSETVLI?
3616 const unsigned RequiredVL
= NumElts
/ SeqLen
;
3617 const unsigned ViaVecLen
=
3618 (Subtarget
.getRealMinVLen() >= ViaIntVT
.getSizeInBits() * NumElts
) ?
3619 NumElts
: RequiredVL
;
3620 MVT ViaVecVT
= MVT::getVectorVT(ViaIntVT
, ViaVecLen
);
3622 unsigned EltIdx
= 0;
3623 uint64_t EltMask
= maskTrailingOnes
<uint64_t>(EltBitSize
);
3624 uint64_t SplatValue
= 0;
3625 // Construct the amalgamated value which can be splatted as this larger
3627 for (const auto &SeqV
: Sequence
) {
3628 if (!SeqV
.isUndef())
3629 SplatValue
|= ((cast
<ConstantSDNode
>(SeqV
)->getZExtValue() & EltMask
)
3630 << (EltIdx
* EltBitSize
));
3634 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3635 // achieve better constant materializion.
3636 if (Subtarget
.is64Bit() && ViaIntVT
== MVT::i32
)
3637 SplatValue
= SignExtend64
<32>(SplatValue
);
3639 // Since we can't introduce illegal i64 types at this stage, we can only
3640 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3641 // way we can use RVV instructions to splat.
3642 assert((ViaIntVT
.bitsLE(XLenVT
) ||
3643 (!Subtarget
.is64Bit() && ViaIntVT
== MVT::i64
)) &&
3644 "Unexpected bitcast sequence");
3645 if (ViaIntVT
.bitsLE(XLenVT
) || isInt
<32>(SplatValue
)) {
3647 DAG
.getConstant(ViaVecVT
.getVectorNumElements(), DL
, XLenVT
);
3648 MVT ViaContainerVT
=
3649 getContainerForFixedLengthVector(DAG
, ViaVecVT
, Subtarget
);
3651 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ViaContainerVT
,
3652 DAG
.getUNDEF(ViaContainerVT
),
3653 DAG
.getConstant(SplatValue
, DL
, XLenVT
), ViaVL
);
3654 Splat
= convertFromScalableVector(ViaVecVT
, Splat
, DAG
, Subtarget
);
3655 if (ViaVecLen
!= RequiredVL
)
3656 Splat
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
,
3657 MVT::getVectorVT(ViaIntVT
, RequiredVL
), Splat
,
3658 DAG
.getConstant(0, DL
, XLenVT
));
3659 return DAG
.getBitcast(VT
, Splat
);
3663 // If the number of signbits allows, see if we can lower as a <N x i8>.
3664 // Our main goal here is to reduce LMUL (and thus work) required to
3665 // build the constant, but we will also narrow if the resulting
3666 // narrow vector is known to materialize cheaply.
3667 // TODO: We really should be costing the smaller vector. There are
3668 // profitable cases this misses.
3669 if (EltBitSize
> 8 && VT
.isInteger() &&
3670 (NumElts
<= 4 || VT
.getSizeInBits() > Subtarget
.getRealMinVLen())) {
3671 unsigned SignBits
= DAG
.ComputeNumSignBits(Op
);
3672 if (EltBitSize
- SignBits
< 8) {
3673 SDValue Source
= DAG
.getBuildVector(VT
.changeVectorElementType(MVT::i8
),
3675 Source
= convertToScalableVector(ContainerVT
.changeVectorElementType(MVT::i8
),
3676 Source
, DAG
, Subtarget
);
3677 SDValue Res
= DAG
.getNode(RISCVISD::VSEXT_VL
, DL
, ContainerVT
, Source
, Mask
, VL
);
3678 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
3682 if (SDValue Res
= lowerBuildVectorViaDominantValues(Op
, DAG
, Subtarget
))
3685 // For constant vectors, use generic constant pool lowering. Otherwise,
3686 // we'd have to materialize constants in GPRs just to move them into the
3691 static SDValue
lowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
,
3692 const RISCVSubtarget
&Subtarget
) {
3693 MVT VT
= Op
.getSimpleValueType();
3694 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3696 if (ISD::isBuildVectorOfConstantSDNodes(Op
.getNode()) ||
3697 ISD::isBuildVectorOfConstantFPSDNodes(Op
.getNode()))
3698 return lowerBuildVectorOfConstants(Op
, DAG
, Subtarget
);
3700 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3703 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3705 MVT XLenVT
= Subtarget
.getXLenVT();
3707 if (VT
.getVectorElementType() == MVT::i1
) {
3708 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3709 // vector type, we have a legal equivalently-sized i8 type, so we can use
3711 MVT WideVecVT
= VT
.changeVectorElementType(MVT::i8
);
3712 SDValue VecZero
= DAG
.getConstant(0, DL
, WideVecVT
);
3715 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3716 // For a splat, perform a scalar truncate before creating the wider
3718 Splat
= DAG
.getNode(ISD::AND
, DL
, Splat
.getValueType(), Splat
,
3719 DAG
.getConstant(1, DL
, Splat
.getValueType()));
3720 WideVec
= DAG
.getSplatBuildVector(WideVecVT
, DL
, Splat
);
3722 SmallVector
<SDValue
, 8> Ops(Op
->op_values());
3723 WideVec
= DAG
.getBuildVector(WideVecVT
, DL
, Ops
);
3724 SDValue VecOne
= DAG
.getConstant(1, DL
, WideVecVT
);
3725 WideVec
= DAG
.getNode(ISD::AND
, DL
, WideVecVT
, WideVec
, VecOne
);
3728 return DAG
.getSetCC(DL
, VT
, WideVec
, VecZero
, ISD::SETNE
);
3731 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3732 if (auto Gather
= matchSplatAsGather(Splat
, VT
, DL
, DAG
, Subtarget
))
3734 unsigned Opc
= VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3735 : RISCVISD::VMV_V_X_VL
;
3736 if (!VT
.isFloatingPoint())
3737 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Splat
);
3739 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Splat
, VL
);
3740 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
3743 if (SDValue Res
= lowerBuildVectorViaDominantValues(Op
, DAG
, Subtarget
))
3746 // Cap the cost at a value linear to the number of elements in the vector.
3747 // The default lowering is to use the stack. The vector store + scalar loads
3748 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3749 // being (at least) linear in LMUL. As a result, using the vslidedown
3750 // lowering for every element ends up being VL*LMUL..
3751 // TODO: Should we be directly costing the stack alternative? Doing so might
3752 // give us a more accurate upper bound.
3753 InstructionCost LinearBudget
= VT
.getVectorNumElements() * 2;
3755 // TODO: unify with TTI getSlideCost.
3756 InstructionCost PerSlideCost
= 1;
3757 switch (RISCVTargetLowering::getLMUL(ContainerVT
)) {
3759 case RISCVII::VLMUL::LMUL_2
:
3762 case RISCVII::VLMUL::LMUL_4
:
3765 case RISCVII::VLMUL::LMUL_8
:
3770 // TODO: Should we be using the build instseq then cost + evaluate scheme
3771 // we use for integer constants here?
3772 unsigned UndefCount
= 0;
3773 for (const SDValue
&V
: Op
->ops()) {
3779 LinearBudget
-= PerSlideCost
;
3782 LinearBudget
-= PerSlideCost
;
3785 LinearBudget
-= PerSlideCost
;
3788 if (LinearBudget
< 0)
3791 assert((!VT
.isFloatingPoint() ||
3792 VT
.getVectorElementType().getSizeInBits() <= Subtarget
.getFLen()) &&
3793 "Illegal type which will result in reserved encoding");
3795 const unsigned Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3797 SDValue Vec
= DAG
.getUNDEF(ContainerVT
);
3799 for (SDValue V
: Op
->ops()) {
3805 const SDValue Offset
= DAG
.getConstant(UndefCount
, DL
, Subtarget
.getXLenVT());
3806 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
3807 Vec
, Offset
, Mask
, VL
, Policy
);
3811 VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
;
3812 if (!VT
.isFloatingPoint())
3813 V
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), V
);
3814 Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Vec
,
3818 const SDValue Offset
= DAG
.getConstant(UndefCount
, DL
, Subtarget
.getXLenVT());
3819 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
3820 Vec
, Offset
, Mask
, VL
, Policy
);
3822 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
3825 static SDValue
splatPartsI64WithVL(const SDLoc
&DL
, MVT VT
, SDValue Passthru
,
3826 SDValue Lo
, SDValue Hi
, SDValue VL
,
3827 SelectionDAG
&DAG
) {
3829 Passthru
= DAG
.getUNDEF(VT
);
3830 if (isa
<ConstantSDNode
>(Lo
) && isa
<ConstantSDNode
>(Hi
)) {
3831 int32_t LoC
= cast
<ConstantSDNode
>(Lo
)->getSExtValue();
3832 int32_t HiC
= cast
<ConstantSDNode
>(Hi
)->getSExtValue();
3833 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3834 // node in order to try and match RVV vector/scalar instructions.
3835 if ((LoC
>> 31) == HiC
)
3836 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3838 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3839 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3840 // vlmax vsetvli or vsetivli to change the VL.
3841 // FIXME: Support larger constants?
3842 // FIXME: Support non-constant VLs by saturating?
3845 if (isAllOnesConstant(VL
) ||
3846 (isa
<RegisterSDNode
>(VL
) &&
3847 cast
<RegisterSDNode
>(VL
)->getReg() == RISCV::X0
))
3848 NewVL
= DAG
.getRegister(RISCV::X0
, MVT::i32
);
3849 else if (isa
<ConstantSDNode
>(VL
) &&
3850 isUInt
<4>(cast
<ConstantSDNode
>(VL
)->getZExtValue()))
3851 NewVL
= DAG
.getNode(ISD::ADD
, DL
, VL
.getValueType(), VL
, VL
);
3855 MVT::getVectorVT(MVT::i32
, VT
.getVectorElementCount() * 2);
3856 auto InterVec
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, InterVT
,
3857 DAG
.getUNDEF(InterVT
), Lo
,
3858 DAG
.getRegister(RISCV::X0
, MVT::i32
));
3859 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, InterVec
);
3864 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3865 if (Hi
.getOpcode() == ISD::SRA
&& Hi
.getOperand(0) == Lo
&&
3866 isa
<ConstantSDNode
>(Hi
.getOperand(1)) &&
3867 Hi
.getConstantOperandVal(1) == 31)
3868 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3870 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3871 // even if it might be sign extended.
3873 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3875 // Fall back to a stack store and stride x0 vector load.
3876 return DAG
.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
, DL
, VT
, Passthru
, Lo
,
3880 // Called by type legalization to handle splat of i64 on RV32.
3881 // FIXME: We can optimize this when the type has sign or zero bits in one
3883 static SDValue
splatSplitI64WithVL(const SDLoc
&DL
, MVT VT
, SDValue Passthru
,
3884 SDValue Scalar
, SDValue VL
,
3885 SelectionDAG
&DAG
) {
3886 assert(Scalar
.getValueType() == MVT::i64
&& "Unexpected VT!");
3888 std::tie(Lo
, Hi
) = DAG
.SplitScalar(Scalar
, DL
, MVT::i32
, MVT::i32
);
3889 return splatPartsI64WithVL(DL
, VT
, Passthru
, Lo
, Hi
, VL
, DAG
);
3892 // This function lowers a splat of a scalar operand Splat with the vector
3893 // length VL. It ensures the final sequence is type legal, which is useful when
3894 // lowering a splat after type legalization.
3895 static SDValue
lowerScalarSplat(SDValue Passthru
, SDValue Scalar
, SDValue VL
,
3896 MVT VT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
3897 const RISCVSubtarget
&Subtarget
) {
3898 bool HasPassthru
= Passthru
&& !Passthru
.isUndef();
3899 if (!HasPassthru
&& !Passthru
)
3900 Passthru
= DAG
.getUNDEF(VT
);
3901 if (VT
.isFloatingPoint())
3902 return DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
3904 MVT XLenVT
= Subtarget
.getXLenVT();
3906 // Simplest case is that the operand needs to be promoted to XLenVT.
3907 if (Scalar
.getValueType().bitsLE(XLenVT
)) {
3908 // If the operand is a constant, sign extend to increase our chances
3909 // of being able to use a .vi instruction. ANY_EXTEND would become a
3910 // a zero extend and the simm5 check in isel would fail.
3911 // FIXME: Should we ignore the upper bits in isel instead?
3913 isa
<ConstantSDNode
>(Scalar
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
3914 Scalar
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, Scalar
);
3915 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
3918 assert(XLenVT
== MVT::i32
&& Scalar
.getValueType() == MVT::i64
&&
3919 "Unexpected scalar for splat lowering!");
3921 if (isOneConstant(VL
) && isNullConstant(Scalar
))
3922 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
, Passthru
,
3923 DAG
.getConstant(0, DL
, XLenVT
), VL
);
3925 // Otherwise use the more complicated splatting algorithm.
3926 return splatSplitI64WithVL(DL
, VT
, Passthru
, Scalar
, VL
, DAG
);
3929 static MVT
getLMUL1VT(MVT VT
) {
3930 assert(VT
.getVectorElementType().getSizeInBits() <= 64 &&
3931 "Unexpected vector MVT");
3932 return MVT::getScalableVectorVT(
3933 VT
.getVectorElementType(),
3934 RISCV::RVVBitsPerBlock
/ VT
.getVectorElementType().getSizeInBits());
3937 // This function lowers an insert of a scalar operand Scalar into lane
3938 // 0 of the vector regardless of the value of VL. The contents of the
3939 // remaining lanes of the result vector are unspecified. VL is assumed
3941 static SDValue
lowerScalarInsert(SDValue Scalar
, SDValue VL
, MVT VT
,
3942 const SDLoc
&DL
, SelectionDAG
&DAG
,
3943 const RISCVSubtarget
&Subtarget
) {
3944 assert(VT
.isScalableVector() && "Expect VT is scalable vector type.");
3946 const MVT XLenVT
= Subtarget
.getXLenVT();
3947 SDValue Passthru
= DAG
.getUNDEF(VT
);
3949 if (Scalar
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
3950 isNullConstant(Scalar
.getOperand(1))) {
3951 SDValue ExtractedVal
= Scalar
.getOperand(0);
3952 MVT ExtractedVT
= ExtractedVal
.getSimpleValueType();
3953 MVT ExtractedContainerVT
= ExtractedVT
;
3954 if (ExtractedContainerVT
.isFixedLengthVector()) {
3955 ExtractedContainerVT
= getContainerForFixedLengthVector(
3956 DAG
, ExtractedContainerVT
, Subtarget
);
3957 ExtractedVal
= convertToScalableVector(ExtractedContainerVT
, ExtractedVal
,
3960 if (ExtractedContainerVT
.bitsLE(VT
))
3961 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Passthru
, ExtractedVal
,
3962 DAG
.getConstant(0, DL
, XLenVT
));
3963 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, ExtractedVal
,
3964 DAG
.getConstant(0, DL
, XLenVT
));
3968 if (VT
.isFloatingPoint())
3969 return DAG
.getNode(RISCVISD::VFMV_S_F_VL
, DL
, VT
,
3970 DAG
.getUNDEF(VT
), Scalar
, VL
);
3972 // Avoid the tricky legalization cases by falling back to using the
3973 // splat code which already handles it gracefully.
3974 if (!Scalar
.getValueType().bitsLE(XLenVT
))
3975 return lowerScalarSplat(DAG
.getUNDEF(VT
), Scalar
,
3976 DAG
.getConstant(1, DL
, XLenVT
),
3977 VT
, DL
, DAG
, Subtarget
);
3979 // If the operand is a constant, sign extend to increase our chances
3980 // of being able to use a .vi instruction. ANY_EXTEND would become a
3981 // a zero extend and the simm5 check in isel would fail.
3982 // FIXME: Should we ignore the upper bits in isel instead?
3984 isa
<ConstantSDNode
>(Scalar
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
3985 Scalar
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, Scalar
);
3986 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
,
3987 DAG
.getUNDEF(VT
), Scalar
, VL
);
3990 // Is this a shuffle extracts either the even or odd elements of a vector?
3991 // That is, specifically, either (a) or (b) below.
3992 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
3993 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
3994 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
3995 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
3996 // Returns {Src Vector, Even Elements} om success
3997 static bool isDeinterleaveShuffle(MVT VT
, MVT ContainerVT
, SDValue V1
,
3998 SDValue V2
, ArrayRef
<int> Mask
,
3999 const RISCVSubtarget
&Subtarget
) {
4000 // Need to be able to widen the vector.
4001 if (VT
.getScalarSizeInBits() >= Subtarget
.getELen())
4004 // Both input must be extracts.
4005 if (V1
.getOpcode() != ISD::EXTRACT_SUBVECTOR
||
4006 V2
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
4009 // Extracting from the same source.
4010 SDValue Src
= V1
.getOperand(0);
4011 if (Src
!= V2
.getOperand(0))
4014 // Src needs to have twice the number of elements.
4015 if (Src
.getValueType().getVectorNumElements() != (Mask
.size() * 2))
4018 // The extracts must extract the two halves of the source.
4019 if (V1
.getConstantOperandVal(1) != 0 ||
4020 V2
.getConstantOperandVal(1) != Mask
.size())
4023 // First index must be the first even or odd element from V1.
4024 if (Mask
[0] != 0 && Mask
[0] != 1)
4027 // The others must increase by 2 each time.
4028 // TODO: Support undef elements?
4029 for (unsigned i
= 1; i
!= Mask
.size(); ++i
)
4030 if (Mask
[i
] != Mask
[i
- 1] + 2)
4036 /// Is this shuffle interleaving contiguous elements from one vector into the
4037 /// even elements and contiguous elements from another vector into the odd
4038 /// elements. \p EvenSrc will contain the element that should be in the first
4039 /// even element. \p OddSrc will contain the element that should be in the first
4040 /// odd element. These can be the first element in a source or the element half
4041 /// way through the source.
4042 static bool isInterleaveShuffle(ArrayRef
<int> Mask
, MVT VT
, int &EvenSrc
,
4043 int &OddSrc
, const RISCVSubtarget
&Subtarget
) {
4044 // We need to be able to widen elements to the next larger integer type.
4045 if (VT
.getScalarSizeInBits() >= Subtarget
.getELen())
4048 int Size
= Mask
.size();
4049 int NumElts
= VT
.getVectorNumElements();
4050 assert(Size
== (int)NumElts
&& "Unexpected mask size");
4052 SmallVector
<unsigned, 2> StartIndexes
;
4053 if (!ShuffleVectorInst::isInterleaveMask(Mask
, 2, Size
* 2, StartIndexes
))
4056 EvenSrc
= StartIndexes
[0];
4057 OddSrc
= StartIndexes
[1];
4059 // One source should be low half of first vector.
4060 if (EvenSrc
!= 0 && OddSrc
!= 0)
4063 // Subvectors will be subtracted from either at the start of the two input
4064 // vectors, or at the start and middle of the first vector if it's an unary
4066 // In both cases, HalfNumElts will be extracted.
4067 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4068 // we'll create an illegal extract_subvector.
4069 // FIXME: We could support other values using a slidedown first.
4070 int HalfNumElts
= NumElts
/ 2;
4071 return ((EvenSrc
% HalfNumElts
) == 0) && ((OddSrc
% HalfNumElts
) == 0);
4074 /// Match shuffles that concatenate two vectors, rotate the concatenation,
4075 /// and then extract the original number of elements from the rotated result.
4076 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4077 /// returned rotation amount is for a rotate right, where elements move from
4078 /// higher elements to lower elements. \p LoSrc indicates the first source
4079 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4080 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4081 /// 0 or 1 if a rotation is found.
4083 /// NOTE: We talk about rotate to the right which matches how bit shift and
4084 /// rotate instructions are described where LSBs are on the right, but LLVM IR
4085 /// and the table below write vectors with the lowest elements on the left.
4086 static int isElementRotate(int &LoSrc
, int &HiSrc
, ArrayRef
<int> Mask
) {
4087 int Size
= Mask
.size();
4089 // We need to detect various ways of spelling a rotation:
4090 // [11, 12, 13, 14, 15, 0, 1, 2]
4091 // [-1, 12, 13, 14, -1, -1, 1, -1]
4092 // [-1, -1, -1, -1, -1, -1, 1, 2]
4093 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4094 // [-1, 4, 5, 6, -1, -1, 9, -1]
4095 // [-1, 4, 5, 6, -1, -1, -1, -1]
4099 for (int i
= 0; i
!= Size
; ++i
) {
4104 // Determine where a rotate vector would have started.
4105 int StartIdx
= i
- (M
% Size
);
4106 // The identity rotation isn't interesting, stop.
4110 // If we found the tail of a vector the rotation must be the missing
4111 // front. If we found the head of a vector, it must be how much of the
4113 int CandidateRotation
= StartIdx
< 0 ? -StartIdx
: Size
- StartIdx
;
4116 Rotation
= CandidateRotation
;
4117 else if (Rotation
!= CandidateRotation
)
4118 // The rotations don't match, so we can't match this mask.
4121 // Compute which value this mask is pointing at.
4122 int MaskSrc
= M
< Size
? 0 : 1;
4124 // Compute which of the two target values this index should be assigned to.
4125 // This reflects whether the high elements are remaining or the low elemnts
4127 int &TargetSrc
= StartIdx
< 0 ? HiSrc
: LoSrc
;
4129 // Either set up this value if we've not encountered it before, or check
4130 // that it remains consistent.
4132 TargetSrc
= MaskSrc
;
4133 else if (TargetSrc
!= MaskSrc
)
4134 // This may be a rotation, but it pulls from the inputs in some
4135 // unsupported interleaving.
4139 // Check that we successfully analyzed the mask, and normalize the results.
4140 assert(Rotation
!= 0 && "Failed to locate a viable rotation!");
4141 assert((LoSrc
>= 0 || HiSrc
>= 0) &&
4142 "Failed to find a rotated input vector!");
4147 // Lower a deinterleave shuffle to vnsrl.
4148 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4149 // -> [p, q, r, s] (EvenElts == false)
4150 // VT is the type of the vector to return, <[vscale x ]n x ty>
4151 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4152 static SDValue
getDeinterleaveViaVNSRL(const SDLoc
&DL
, MVT VT
, SDValue Src
,
4154 const RISCVSubtarget
&Subtarget
,
4155 SelectionDAG
&DAG
) {
4156 // The result is a vector of type <m x n x ty>
4157 MVT ContainerVT
= VT
;
4158 // Convert fixed vectors to scalable if needed
4159 if (ContainerVT
.isFixedLengthVector()) {
4160 assert(Src
.getSimpleValueType().isFixedLengthVector());
4161 ContainerVT
= getContainerForFixedLengthVector(DAG
, ContainerVT
, Subtarget
);
4163 // The source is a vector of type <m x n*2 x ty>
4164 MVT SrcContainerVT
=
4165 MVT::getVectorVT(ContainerVT
.getVectorElementType(),
4166 ContainerVT
.getVectorElementCount() * 2);
4167 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
4170 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4172 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4173 // This also converts FP to int.
4174 unsigned EltBits
= ContainerVT
.getScalarSizeInBits();
4175 MVT WideSrcContainerVT
= MVT::getVectorVT(
4176 MVT::getIntegerVT(EltBits
* 2), ContainerVT
.getVectorElementCount());
4177 Src
= DAG
.getBitcast(WideSrcContainerVT
, Src
);
4179 // The integer version of the container type.
4180 MVT IntContainerVT
= ContainerVT
.changeVectorElementTypeToInteger();
4182 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4183 // the original element size.
4184 unsigned Shift
= EvenElts
? 0 : EltBits
;
4185 SDValue SplatShift
= DAG
.getNode(
4186 RISCVISD::VMV_V_X_VL
, DL
, IntContainerVT
, DAG
.getUNDEF(ContainerVT
),
4187 DAG
.getConstant(Shift
, DL
, Subtarget
.getXLenVT()), VL
);
4189 DAG
.getNode(RISCVISD::VNSRL_VL
, DL
, IntContainerVT
, Src
, SplatShift
,
4190 DAG
.getUNDEF(IntContainerVT
), TrueMask
, VL
);
4191 // Cast back to FP if needed.
4192 Res
= DAG
.getBitcast(ContainerVT
, Res
);
4194 if (VT
.isFixedLengthVector())
4195 Res
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4199 // Lower the following shuffle to vslidedown.
4201 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
4202 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
4203 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4205 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
4206 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
4207 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
4208 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
4209 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
4210 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4211 static SDValue
lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc
&DL
, MVT VT
,
4212 SDValue V1
, SDValue V2
,
4214 const RISCVSubtarget
&Subtarget
,
4215 SelectionDAG
&DAG
) {
4216 auto findNonEXTRACT_SUBVECTORParent
=
4217 [](SDValue Parent
) -> std::pair
<SDValue
, uint64_t> {
4218 uint64_t Offset
= 0;
4219 while (Parent
.getOpcode() == ISD::EXTRACT_SUBVECTOR
&&
4220 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4221 // a scalable vector. But we don't want to match the case.
4222 Parent
.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4223 Offset
+= Parent
.getConstantOperandVal(1);
4224 Parent
= Parent
.getOperand(0);
4226 return std::make_pair(Parent
, Offset
);
4229 auto [V1Src
, V1IndexOffset
] = findNonEXTRACT_SUBVECTORParent(V1
);
4230 auto [V2Src
, V2IndexOffset
] = findNonEXTRACT_SUBVECTORParent(V2
);
4232 // Extracting from the same source.
4233 SDValue Src
= V1Src
;
4237 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4238 SmallVector
<int, 16> NewMask(Mask
);
4239 for (size_t i
= 0; i
!= NewMask
.size(); ++i
) {
4240 if (NewMask
[i
] == -1)
4243 if (static_cast<size_t>(NewMask
[i
]) < NewMask
.size()) {
4244 NewMask
[i
] = NewMask
[i
] + V1IndexOffset
;
4246 // Minus NewMask.size() is needed. Otherwise, the b case would be
4247 // <5,6,7,12> instead of <5,6,7,8>.
4248 NewMask
[i
] = NewMask
[i
] - NewMask
.size() + V2IndexOffset
;
4252 // First index must be known and non-zero. It will be used as the slidedown
4254 if (NewMask
[0] <= 0)
4257 // NewMask is also continuous.
4258 for (unsigned i
= 1; i
!= NewMask
.size(); ++i
)
4259 if (NewMask
[i
- 1] + 1 != NewMask
[i
])
4262 MVT XLenVT
= Subtarget
.getXLenVT();
4263 MVT SrcVT
= Src
.getSimpleValueType();
4264 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
4265 auto [TrueMask
, VL
] = getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
4267 getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
4268 convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
),
4269 DAG
.getConstant(NewMask
[0], DL
, XLenVT
), TrueMask
, VL
);
4271 ISD::EXTRACT_SUBVECTOR
, DL
, VT
,
4272 convertFromScalableVector(SrcVT
, Slidedown
, DAG
, Subtarget
),
4273 DAG
.getConstant(0, DL
, XLenVT
));
4276 // Because vslideup leaves the destination elements at the start intact, we can
4277 // use it to perform shuffles that insert subvectors:
4279 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4281 // vsetvli zero, 8, e8, mf2, ta, ma
4282 // vslideup.vi v8, v9, 4
4284 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4286 // vsetvli zero, 5, e8, mf2, tu, ma
4287 // vslideup.v1 v8, v9, 2
4288 static SDValue
lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc
&DL
, MVT VT
,
4289 SDValue V1
, SDValue V2
,
4291 const RISCVSubtarget
&Subtarget
,
4292 SelectionDAG
&DAG
) {
4293 unsigned NumElts
= VT
.getVectorNumElements();
4294 int NumSubElts
, Index
;
4295 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask
, NumElts
, NumSubElts
,
4299 bool OpsSwapped
= Mask
[Index
] < (int)NumElts
;
4300 SDValue InPlace
= OpsSwapped
? V2
: V1
;
4301 SDValue ToInsert
= OpsSwapped
? V1
: V2
;
4303 MVT XLenVT
= Subtarget
.getXLenVT();
4304 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4305 auto TrueMask
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
4306 // We slide up by the index that the subvector is being inserted at, and set
4307 // VL to the index + the number of elements being inserted.
4308 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
| RISCVII::MASK_AGNOSTIC
;
4309 // If the we're adding a suffix to the in place vector, i.e. inserting right
4310 // up to the very end of it, then we don't actually care about the tail.
4311 if (NumSubElts
+ Index
>= (int)NumElts
)
4312 Policy
|= RISCVII::TAIL_AGNOSTIC
;
4314 InPlace
= convertToScalableVector(ContainerVT
, InPlace
, DAG
, Subtarget
);
4315 ToInsert
= convertToScalableVector(ContainerVT
, ToInsert
, DAG
, Subtarget
);
4316 SDValue VL
= DAG
.getConstant(NumSubElts
+ Index
, DL
, XLenVT
);
4319 // If we're inserting into the lowest elements, use a tail undisturbed
4322 Res
= DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, ContainerVT
, InPlace
, ToInsert
,
4325 Res
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, InPlace
, ToInsert
,
4326 DAG
.getConstant(Index
, DL
, XLenVT
), TrueMask
, VL
, Policy
);
4327 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4330 /// Match v(f)slide1up/down idioms. These operations involve sliding
4331 /// N-1 elements to make room for an inserted scalar at one end.
4332 static SDValue
lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc
&DL
, MVT VT
,
4333 SDValue V1
, SDValue V2
,
4335 const RISCVSubtarget
&Subtarget
,
4336 SelectionDAG
&DAG
) {
4337 bool OpsSwapped
= false;
4338 if (!isa
<BuildVectorSDNode
>(V1
)) {
4339 if (!isa
<BuildVectorSDNode
>(V2
))
4344 SDValue Splat
= cast
<BuildVectorSDNode
>(V1
)->getSplatValue();
4348 // Return true if the mask could describe a slide of Mask.size() - 1
4349 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4350 auto isSlideMask
= [](ArrayRef
<int> Mask
, unsigned Base
, int Offset
) {
4351 const unsigned S
= (Offset
> 0) ? 0 : -Offset
;
4352 const unsigned E
= Mask
.size() - ((Offset
> 0) ? Offset
: 0);
4353 for (unsigned i
= S
; i
!= E
; ++i
)
4354 if (Mask
[i
] >= 0 && (unsigned)Mask
[i
] != Base
+ i
+ Offset
)
4359 const unsigned NumElts
= VT
.getVectorNumElements();
4360 bool IsVSlidedown
= isSlideMask(Mask
, OpsSwapped
? 0 : NumElts
, 1);
4361 if (!IsVSlidedown
&& !isSlideMask(Mask
, OpsSwapped
? 0 : NumElts
, -1))
4364 const int InsertIdx
= Mask
[IsVSlidedown
? (NumElts
- 1) : 0];
4365 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4366 if (InsertIdx
< 0 || InsertIdx
/ NumElts
!= (unsigned)OpsSwapped
)
4369 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4370 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4371 auto OpCode
= IsVSlidedown
?
4372 (VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
) :
4373 (VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
: RISCVISD::VSLIDE1UP_VL
);
4374 if (!VT
.isFloatingPoint())
4375 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), Splat
);
4376 auto Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
,
4377 DAG
.getUNDEF(ContainerVT
),
4378 convertToScalableVector(ContainerVT
, V2
, DAG
, Subtarget
),
4379 Splat
, TrueMask
, VL
);
4380 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
4383 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4384 // to create an interleaved vector of <[vscale x] n*2 x ty>.
4385 // This requires that the size of ty is less than the subtarget's maximum ELEN.
4386 static SDValue
getWideningInterleave(SDValue EvenV
, SDValue OddV
,
4387 const SDLoc
&DL
, SelectionDAG
&DAG
,
4388 const RISCVSubtarget
&Subtarget
) {
4389 MVT VecVT
= EvenV
.getSimpleValueType();
4390 MVT VecContainerVT
= VecVT
; // <vscale x n x ty>
4391 // Convert fixed vectors to scalable if needed
4392 if (VecContainerVT
.isFixedLengthVector()) {
4393 VecContainerVT
= getContainerForFixedLengthVector(DAG
, VecVT
, Subtarget
);
4394 EvenV
= convertToScalableVector(VecContainerVT
, EvenV
, DAG
, Subtarget
);
4395 OddV
= convertToScalableVector(VecContainerVT
, OddV
, DAG
, Subtarget
);
4398 assert(VecVT
.getScalarSizeInBits() < Subtarget
.getELen());
4400 // We're working with a vector of the same size as the resulting
4401 // interleaved vector, but with half the number of elements and
4402 // twice the SEW (Hence the restriction on not using the maximum
4405 MVT::getVectorVT(MVT::getIntegerVT(VecVT
.getScalarSizeInBits() * 2),
4406 VecVT
.getVectorElementCount());
4407 MVT WideContainerVT
= WideVT
; // <vscale x n x ty*2>
4408 if (WideContainerVT
.isFixedLengthVector())
4409 WideContainerVT
= getContainerForFixedLengthVector(DAG
, WideVT
, Subtarget
);
4411 // Bitcast the input vectors to integers in case they are FP
4412 VecContainerVT
= VecContainerVT
.changeTypeToInteger();
4413 EvenV
= DAG
.getBitcast(VecContainerVT
, EvenV
);
4414 OddV
= DAG
.getBitcast(VecContainerVT
, OddV
);
4416 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, VecContainerVT
, DL
, DAG
, Subtarget
);
4417 SDValue Passthru
= DAG
.getUNDEF(WideContainerVT
);
4419 SDValue Interleaved
;
4420 if (Subtarget
.hasStdExtZvbb()) {
4421 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4423 DAG
.getSplatVector(VecContainerVT
, DL
,
4424 DAG
.getConstant(VecVT
.getScalarSizeInBits(), DL
,
4425 Subtarget
.getXLenVT()));
4426 Interleaved
= DAG
.getNode(RISCVISD::VWSLL_VL
, DL
, WideContainerVT
, OddV
,
4427 OffsetVec
, Passthru
, Mask
, VL
);
4428 Interleaved
= DAG
.getNode(RISCVISD::VWADDU_W_VL
, DL
, WideContainerVT
,
4429 Interleaved
, EvenV
, Passthru
, Mask
, VL
);
4431 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4433 Interleaved
= DAG
.getNode(RISCVISD::VWADDU_VL
, DL
, WideContainerVT
, EvenV
,
4434 OddV
, Passthru
, Mask
, VL
);
4436 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4437 SDValue AllOnesVec
= DAG
.getSplatVector(
4438 VecContainerVT
, DL
, DAG
.getAllOnesConstant(DL
, Subtarget
.getXLenVT()));
4439 SDValue OddsMul
= DAG
.getNode(RISCVISD::VWMULU_VL
, DL
, WideContainerVT
,
4440 OddV
, AllOnesVec
, Passthru
, Mask
, VL
);
4442 // Add the two together so we get
4443 // (OddV * 0xff...ff) + (OddV + EvenV)
4444 // = (OddV * 0x100...00) + EvenV
4445 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4446 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4447 Interleaved
= DAG
.getNode(RISCVISD::ADD_VL
, DL
, WideContainerVT
,
4448 Interleaved
, OddsMul
, Passthru
, Mask
, VL
);
4451 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4452 MVT ResultContainerVT
= MVT::getVectorVT(
4453 VecVT
.getVectorElementType(), // Make sure to use original type
4454 VecContainerVT
.getVectorElementCount().multiplyCoefficientBy(2));
4455 Interleaved
= DAG
.getBitcast(ResultContainerVT
, Interleaved
);
4457 // Convert back to a fixed vector if needed
4459 MVT::getVectorVT(VecVT
.getVectorElementType(),
4460 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
4461 if (ResultVT
.isFixedLengthVector())
4463 convertFromScalableVector(ResultVT
, Interleaved
, DAG
, Subtarget
);
4468 // If we have a vector of bits that we want to reverse, we can use a vbrev on a
4469 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4470 static SDValue
lowerBitreverseShuffle(ShuffleVectorSDNode
*SVN
,
4472 const RISCVSubtarget
&Subtarget
) {
4474 MVT VT
= SVN
->getSimpleValueType(0);
4475 SDValue V
= SVN
->getOperand(0);
4476 unsigned NumElts
= VT
.getVectorNumElements();
4478 assert(VT
.getVectorElementType() == MVT::i1
);
4480 if (!ShuffleVectorInst::isReverseMask(SVN
->getMask(),
4481 SVN
->getMask().size()) ||
4482 !SVN
->getOperand(1).isUndef())
4485 unsigned ViaEltSize
= std::max((uint64_t)8, PowerOf2Ceil(NumElts
));
4486 EVT ViaVT
= EVT::getVectorVT(
4487 *DAG
.getContext(), EVT::getIntegerVT(*DAG
.getContext(), ViaEltSize
), 1);
4489 EVT::getVectorVT(*DAG
.getContext(), MVT::i1
, ViaVT
.getScalarSizeInBits());
4491 // If we don't have zvbb or the larger element type > ELEN, the operation will
4493 if (!Subtarget
.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE
,
4495 !Subtarget
.getTargetLowering()->isTypeLegal(ViaBitVT
))
4498 // If the bit vector doesn't fit exactly into the larger element type, we need
4499 // to insert it into the larger vector and then shift up the reversed bits
4500 // afterwards to get rid of the gap introduced.
4501 if (ViaEltSize
> NumElts
)
4502 V
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ViaBitVT
, DAG
.getUNDEF(ViaBitVT
),
4503 V
, DAG
.getVectorIdxConstant(0, DL
));
4506 DAG
.getNode(ISD::BITREVERSE
, DL
, ViaVT
, DAG
.getBitcast(ViaVT
, V
));
4508 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4510 if (ViaEltSize
> NumElts
)
4511 Res
= DAG
.getNode(ISD::SRL
, DL
, ViaVT
, Res
,
4512 DAG
.getConstant(ViaEltSize
- NumElts
, DL
, ViaVT
));
4514 Res
= DAG
.getBitcast(ViaBitVT
, Res
);
4516 if (ViaEltSize
> NumElts
)
4517 Res
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Res
,
4518 DAG
.getVectorIdxConstant(0, DL
));
4522 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4523 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4524 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4525 static SDValue
lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode
*SVN
,
4527 const RISCVSubtarget
&Subtarget
) {
4530 EVT VT
= SVN
->getValueType(0);
4531 unsigned NumElts
= VT
.getVectorNumElements();
4532 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
4533 unsigned NumSubElts
, RotateAmt
;
4534 if (!ShuffleVectorInst::isBitRotateMask(SVN
->getMask(), EltSizeInBits
, 2,
4535 NumElts
, NumSubElts
, RotateAmt
))
4537 MVT RotateVT
= MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits
* NumSubElts
),
4538 NumElts
/ NumSubElts
);
4540 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4541 if (!Subtarget
.getTargetLowering()->isTypeLegal(RotateVT
))
4544 SDValue Op
= DAG
.getBitcast(RotateVT
, SVN
->getOperand(0));
4547 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4548 // so canonicalize to vrev8.
4549 if (RotateVT
.getScalarType() == MVT::i16
&& RotateAmt
== 8)
4550 Rotate
= DAG
.getNode(ISD::BSWAP
, DL
, RotateVT
, Op
);
4552 Rotate
= DAG
.getNode(ISD::ROTL
, DL
, RotateVT
, Op
,
4553 DAG
.getConstant(RotateAmt
, DL
, RotateVT
));
4555 return DAG
.getBitcast(VT
, Rotate
);
4558 static SDValue
lowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
,
4559 const RISCVSubtarget
&Subtarget
) {
4560 SDValue V1
= Op
.getOperand(0);
4561 SDValue V2
= Op
.getOperand(1);
4563 MVT XLenVT
= Subtarget
.getXLenVT();
4564 MVT VT
= Op
.getSimpleValueType();
4565 unsigned NumElts
= VT
.getVectorNumElements();
4566 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
4568 if (VT
.getVectorElementType() == MVT::i1
) {
4569 // Lower to a vror.vi of a larger element type if possible before we promote
4571 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4573 if (SDValue V
= lowerBitreverseShuffle(SVN
, DAG
, Subtarget
))
4576 // Promote i1 shuffle to i8 shuffle.
4577 MVT WidenVT
= MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount());
4578 V1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, V1
);
4579 V2
= V2
.isUndef() ? DAG
.getUNDEF(WidenVT
)
4580 : DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, V2
);
4581 SDValue Shuffled
= DAG
.getVectorShuffle(WidenVT
, DL
, V1
, V2
, SVN
->getMask());
4582 return DAG
.getSetCC(DL
, VT
, Shuffled
, DAG
.getConstant(0, DL
, WidenVT
),
4586 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4588 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4590 if (SVN
->isSplat()) {
4591 const int Lane
= SVN
->getSplatIndex();
4593 MVT SVT
= VT
.getVectorElementType();
4595 // Turn splatted vector load into a strided load with an X0 stride.
4597 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4599 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4601 if (V
.getOpcode() == ISD::CONCAT_VECTORS
) {
4603 V
.getOperand(0).getSimpleValueType().getVectorNumElements();
4604 V
= V
.getOperand(Offset
/ OpElements
);
4605 Offset
%= OpElements
;
4608 // We need to ensure the load isn't atomic or volatile.
4609 if (ISD::isNormalLoad(V
.getNode()) && cast
<LoadSDNode
>(V
)->isSimple()) {
4610 auto *Ld
= cast
<LoadSDNode
>(V
);
4611 Offset
*= SVT
.getStoreSize();
4612 SDValue NewAddr
= DAG
.getMemBasePlusOffset(Ld
->getBasePtr(),
4613 TypeSize::Fixed(Offset
), DL
);
4615 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4616 if (SVT
.isInteger() && SVT
.bitsGT(XLenVT
)) {
4617 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
4619 DAG
.getTargetConstant(Intrinsic::riscv_vlse
, DL
, XLenVT
);
4620 SDValue Ops
[] = {Ld
->getChain(),
4622 DAG
.getUNDEF(ContainerVT
),
4624 DAG
.getRegister(RISCV::X0
, XLenVT
),
4626 SDValue NewLoad
= DAG
.getMemIntrinsicNode(
4627 ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, SVT
,
4628 DAG
.getMachineFunction().getMachineMemOperand(
4629 Ld
->getMemOperand(), Offset
, SVT
.getStoreSize()));
4630 DAG
.makeEquivalentMemoryOrdering(Ld
, NewLoad
);
4631 return convertFromScalableVector(VT
, NewLoad
, DAG
, Subtarget
);
4634 // Otherwise use a scalar load and splat. This will give the best
4635 // opportunity to fold a splat into the operation. ISel can turn it into
4636 // the x0 strided load if we aren't able to fold away the select.
4637 if (SVT
.isFloatingPoint())
4638 V
= DAG
.getLoad(SVT
, DL
, Ld
->getChain(), NewAddr
,
4639 Ld
->getPointerInfo().getWithOffset(Offset
),
4640 Ld
->getOriginalAlign(),
4641 Ld
->getMemOperand()->getFlags());
4643 V
= DAG
.getExtLoad(ISD::SEXTLOAD
, DL
, XLenVT
, Ld
->getChain(), NewAddr
,
4644 Ld
->getPointerInfo().getWithOffset(Offset
), SVT
,
4645 Ld
->getOriginalAlign(),
4646 Ld
->getMemOperand()->getFlags());
4647 DAG
.makeEquivalentMemoryOrdering(Ld
, V
);
4650 VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL
;
4652 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), V
, VL
);
4653 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
4656 V1
= convertToScalableVector(ContainerVT
, V1
, DAG
, Subtarget
);
4657 assert(Lane
< (int)NumElts
&& "Unexpected lane!");
4658 SDValue Gather
= DAG
.getNode(RISCVISD::VRGATHER_VX_VL
, DL
, ContainerVT
,
4659 V1
, DAG
.getConstant(Lane
, DL
, XLenVT
),
4660 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
4661 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
4665 ArrayRef
<int> Mask
= SVN
->getMask();
4668 lowerVECTOR_SHUFFLEAsVSlide1(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4672 lowerVECTOR_SHUFFLEAsVSlidedown(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4675 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
4677 if (Subtarget
.hasStdExtZvkb())
4678 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4681 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4682 // be undef which can be handled with a single SLIDEDOWN/UP.
4684 int Rotation
= isElementRotate(LoSrc
, HiSrc
, Mask
);
4688 LoV
= LoSrc
== 0 ? V1
: V2
;
4689 LoV
= convertToScalableVector(ContainerVT
, LoV
, DAG
, Subtarget
);
4692 HiV
= HiSrc
== 0 ? V1
: V2
;
4693 HiV
= convertToScalableVector(ContainerVT
, HiV
, DAG
, Subtarget
);
4696 // We found a rotation. We need to slide HiV down by Rotation. Then we need
4697 // to slide LoV up by (NumElts - Rotation).
4698 unsigned InvRotate
= NumElts
- Rotation
;
4700 SDValue Res
= DAG
.getUNDEF(ContainerVT
);
4702 // Even though we could use a smaller VL, don't to avoid a vsetivli
4704 Res
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, Res
, HiV
,
4705 DAG
.getConstant(Rotation
, DL
, XLenVT
), TrueMask
, VL
);
4708 Res
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Res
, LoV
,
4709 DAG
.getConstant(InvRotate
, DL
, XLenVT
), TrueMask
, VL
,
4710 RISCVII::TAIL_AGNOSTIC
);
4712 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4715 // If this is a deinterleave and we can widen the vector, then we can use
4716 // vnsrl to deinterleave.
4717 if (isDeinterleaveShuffle(VT
, ContainerVT
, V1
, V2
, Mask
, Subtarget
)) {
4718 return getDeinterleaveViaVNSRL(DL
, VT
, V1
.getOperand(0), Mask
[0] == 0,
4723 lowerVECTOR_SHUFFLEAsVSlideup(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4726 // Detect an interleave shuffle and lower to
4727 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4728 int EvenSrc
, OddSrc
;
4729 if (isInterleaveShuffle(Mask
, VT
, EvenSrc
, OddSrc
, Subtarget
)) {
4730 // Extract the halves of the vectors.
4731 MVT HalfVT
= VT
.getHalfNumVectorElementsVT();
4733 int Size
= Mask
.size();
4734 SDValue EvenV
, OddV
;
4735 assert(EvenSrc
>= 0 && "Undef source?");
4736 EvenV
= (EvenSrc
/ Size
) == 0 ? V1
: V2
;
4737 EvenV
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, HalfVT
, EvenV
,
4738 DAG
.getConstant(EvenSrc
% Size
, DL
, XLenVT
));
4740 assert(OddSrc
>= 0 && "Undef source?");
4741 OddV
= (OddSrc
/ Size
) == 0 ? V1
: V2
;
4742 OddV
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, HalfVT
, OddV
,
4743 DAG
.getConstant(OddSrc
% Size
, DL
, XLenVT
));
4745 return getWideningInterleave(EvenV
, OddV
, DL
, DAG
, Subtarget
);
4748 // Detect shuffles which can be re-expressed as vector selects; these are
4749 // shuffles in which each element in the destination is taken from an element
4750 // at the corresponding index in either source vectors.
4751 bool IsSelect
= all_of(enumerate(Mask
), [&](const auto &MaskIdx
) {
4752 int MaskIndex
= MaskIdx
.value();
4753 return MaskIndex
< 0 || MaskIdx
.index() == (unsigned)MaskIndex
% NumElts
;
4756 assert(!V1
.isUndef() && "Unexpected shuffle canonicalization");
4758 SmallVector
<SDValue
> MaskVals
;
4759 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4760 // merged with a second vrgather.
4761 SmallVector
<SDValue
> GatherIndicesLHS
, GatherIndicesRHS
;
4763 // By default we preserve the original operand order, and use a mask to
4764 // select LHS as true and RHS as false. However, since RVV vector selects may
4765 // feature splats but only on the LHS, we may choose to invert our mask and
4766 // instead select between RHS and LHS.
4767 bool SwapOps
= DAG
.isSplatValue(V2
) && !DAG
.isSplatValue(V1
);
4768 bool InvertMask
= IsSelect
== SwapOps
;
4770 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4772 DenseMap
<int, unsigned> LHSIndexCounts
, RHSIndexCounts
;
4774 // Now construct the mask that will be used by the vselect or blended
4775 // vrgather operation. For vrgathers, construct the appropriate indices into
4777 for (int MaskIndex
: Mask
) {
4778 bool SelectMaskVal
= (MaskIndex
< (int)NumElts
) ^ InvertMask
;
4779 MaskVals
.push_back(DAG
.getConstant(SelectMaskVal
, DL
, XLenVT
));
4781 bool IsLHSOrUndefIndex
= MaskIndex
< (int)NumElts
;
4782 GatherIndicesLHS
.push_back(IsLHSOrUndefIndex
&& MaskIndex
>= 0
4783 ? DAG
.getConstant(MaskIndex
, DL
, XLenVT
)
4784 : DAG
.getUNDEF(XLenVT
));
4785 GatherIndicesRHS
.push_back(
4786 IsLHSOrUndefIndex
? DAG
.getUNDEF(XLenVT
)
4787 : DAG
.getConstant(MaskIndex
- NumElts
, DL
, XLenVT
));
4788 if (IsLHSOrUndefIndex
&& MaskIndex
>= 0)
4789 ++LHSIndexCounts
[MaskIndex
];
4790 if (!IsLHSOrUndefIndex
)
4791 ++RHSIndexCounts
[MaskIndex
- NumElts
];
4797 std::swap(GatherIndicesLHS
, GatherIndicesRHS
);
4800 assert(MaskVals
.size() == NumElts
&& "Unexpected select-like shuffle");
4801 MVT MaskVT
= MVT::getVectorVT(MVT::i1
, NumElts
);
4802 SDValue SelectMask
= DAG
.getBuildVector(MaskVT
, DL
, MaskVals
);
4805 return DAG
.getNode(ISD::VSELECT
, DL
, VT
, SelectMask
, V1
, V2
);
4807 // We might be able to express the shuffle as a bitrotate. But even if we
4808 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
4809 // shifts and a vor will have a higher throughput than a vrgather.
4810 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4813 if (VT
.getScalarSizeInBits() == 8 && VT
.getVectorNumElements() > 256) {
4814 // On such a large vector we're unable to use i8 as the index type.
4815 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4816 // may involve vector splitting if we're already at LMUL=8, or our
4817 // user-supplied maximum fixed-length LMUL.
4821 unsigned GatherVXOpc
= RISCVISD::VRGATHER_VX_VL
;
4822 unsigned GatherVVOpc
= RISCVISD::VRGATHER_VV_VL
;
4823 MVT IndexVT
= VT
.changeTypeToInteger();
4824 // Since we can't introduce illegal index types at this stage, use i16 and
4825 // vrgatherei16 if the corresponding index type for plain vrgather is greater
4827 if (IndexVT
.getScalarType().bitsGT(XLenVT
)) {
4828 GatherVVOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
4829 IndexVT
= IndexVT
.changeVectorElementType(MVT::i16
);
4832 // If the mask allows, we can do all the index computation in 16 bits. This
4833 // requires less work and less register pressure at high LMUL, and creates
4834 // smaller constants which may be cheaper to materialize.
4835 if (IndexVT
.getScalarType().bitsGT(MVT::i16
) && isUInt
<16>(NumElts
- 1) &&
4836 (IndexVT
.getSizeInBits() / Subtarget
.getRealMinVLen()) > 1) {
4837 GatherVVOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
4838 IndexVT
= IndexVT
.changeVectorElementType(MVT::i16
);
4841 MVT IndexContainerVT
=
4842 ContainerVT
.changeVectorElementType(IndexVT
.getScalarType());
4845 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
4846 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
4847 if (SDValue SplatValue
= DAG
.getSplatValue(V1
, /*LegalTypes*/ true)) {
4848 Gather
= lowerScalarSplat(SDValue(), SplatValue
, VL
, ContainerVT
, DL
, DAG
,
4851 V1
= convertToScalableVector(ContainerVT
, V1
, DAG
, Subtarget
);
4852 // If only one index is used, we can use a "splat" vrgather.
4853 // TODO: We can splat the most-common index and fix-up any stragglers, if
4854 // that's beneficial.
4855 if (LHSIndexCounts
.size() == 1) {
4856 int SplatIndex
= LHSIndexCounts
.begin()->getFirst();
4857 Gather
= DAG
.getNode(GatherVXOpc
, DL
, ContainerVT
, V1
,
4858 DAG
.getConstant(SplatIndex
, DL
, XLenVT
),
4859 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
4861 SDValue LHSIndices
= DAG
.getBuildVector(IndexVT
, DL
, GatherIndicesLHS
);
4863 convertToScalableVector(IndexContainerVT
, LHSIndices
, DAG
, Subtarget
);
4865 Gather
= DAG
.getNode(GatherVVOpc
, DL
, ContainerVT
, V1
, LHSIndices
,
4866 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
4870 // If a second vector operand is used by this shuffle, blend it in with an
4871 // additional vrgather.
4872 if (!V2
.isUndef()) {
4873 V2
= convertToScalableVector(ContainerVT
, V2
, DAG
, Subtarget
);
4875 MVT MaskContainerVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
4877 convertToScalableVector(MaskContainerVT
, SelectMask
, DAG
, Subtarget
);
4879 // If only one index is used, we can use a "splat" vrgather.
4880 // TODO: We can splat the most-common index and fix-up any stragglers, if
4881 // that's beneficial.
4882 if (RHSIndexCounts
.size() == 1) {
4883 int SplatIndex
= RHSIndexCounts
.begin()->getFirst();
4884 Gather
= DAG
.getNode(GatherVXOpc
, DL
, ContainerVT
, V2
,
4885 DAG
.getConstant(SplatIndex
, DL
, XLenVT
), Gather
,
4888 SDValue RHSIndices
= DAG
.getBuildVector(IndexVT
, DL
, GatherIndicesRHS
);
4890 convertToScalableVector(IndexContainerVT
, RHSIndices
, DAG
, Subtarget
);
4891 Gather
= DAG
.getNode(GatherVVOpc
, DL
, ContainerVT
, V2
, RHSIndices
, Gather
,
4896 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
4899 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef
<int> M
, EVT VT
) const {
4900 // Support splats for any type. These should type legalize well.
4901 if (ShuffleVectorSDNode::isSplatMask(M
.data(), VT
))
4904 // Only support legal VTs for other shuffles for now.
4905 if (!isTypeLegal(VT
))
4908 MVT SVT
= VT
.getSimpleVT();
4910 // Not for i1 vectors.
4911 if (SVT
.getScalarType() == MVT::i1
)
4915 return (isElementRotate(Dummy1
, Dummy2
, M
) > 0) ||
4916 isInterleaveShuffle(M
, SVT
, Dummy1
, Dummy2
, Subtarget
);
4919 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
4922 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op
,
4923 SelectionDAG
&DAG
) const {
4924 MVT VT
= Op
.getSimpleValueType();
4925 unsigned EltSize
= VT
.getScalarSizeInBits();
4926 SDValue Src
= Op
.getOperand(0);
4928 MVT ContainerVT
= VT
;
4931 if (Op
->isVPOpcode()) {
4932 Mask
= Op
.getOperand(1);
4933 if (VT
.isFixedLengthVector())
4934 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
4936 VL
= Op
.getOperand(2);
4939 // We choose FP type that can represent the value if possible. Otherwise, we
4940 // use rounding to zero conversion for correct exponent of the result.
4941 // TODO: Use f16 for i8 when possible?
4942 MVT FloatEltVT
= (EltSize
>= 32) ? MVT::f64
: MVT::f32
;
4943 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT
, VT
.getVectorElementCount())))
4944 FloatEltVT
= MVT::f32
;
4945 MVT FloatVT
= MVT::getVectorVT(FloatEltVT
, VT
.getVectorElementCount());
4947 // Legal types should have been checked in the RISCVTargetLowering
4949 // TODO: Splitting may make sense in some cases.
4950 assert(DAG
.getTargetLoweringInfo().isTypeLegal(FloatVT
) &&
4951 "Expected legal float type!");
4953 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
4954 // The trailing zero count is equal to log2 of this single bit value.
4955 if (Op
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
) {
4956 SDValue Neg
= DAG
.getNegative(Src
, DL
, VT
);
4957 Src
= DAG
.getNode(ISD::AND
, DL
, VT
, Src
, Neg
);
4958 } else if (Op
.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF
) {
4959 SDValue Neg
= DAG
.getNode(ISD::VP_SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
),
4961 Src
= DAG
.getNode(ISD::VP_AND
, DL
, VT
, Src
, Neg
, Mask
, VL
);
4964 // We have a legal FP type, convert to it.
4966 if (FloatVT
.bitsGT(VT
)) {
4967 if (Op
->isVPOpcode())
4968 FloatVal
= DAG
.getNode(ISD::VP_UINT_TO_FP
, DL
, FloatVT
, Src
, Mask
, VL
);
4970 FloatVal
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, FloatVT
, Src
);
4972 // Use RTZ to avoid rounding influencing exponent of FloatVal.
4973 if (VT
.isFixedLengthVector()) {
4974 ContainerVT
= getContainerForFixedLengthVector(VT
);
4975 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
4977 if (!Op
->isVPOpcode())
4978 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4980 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, Subtarget
.getXLenVT());
4981 MVT ContainerFloatVT
=
4982 MVT::getVectorVT(FloatEltVT
, ContainerVT
.getVectorElementCount());
4983 FloatVal
= DAG
.getNode(RISCVISD::VFCVT_RM_F_XU_VL
, DL
, ContainerFloatVT
,
4984 Src
, Mask
, RTZRM
, VL
);
4985 if (VT
.isFixedLengthVector())
4986 FloatVal
= convertFromScalableVector(FloatVT
, FloatVal
, DAG
, Subtarget
);
4988 // Bitcast to integer and shift the exponent to the LSB.
4989 EVT IntVT
= FloatVT
.changeVectorElementTypeToInteger();
4990 SDValue Bitcast
= DAG
.getBitcast(IntVT
, FloatVal
);
4991 unsigned ShiftAmt
= FloatEltVT
== MVT::f64
? 52 : 23;
4994 // Restore back to original type. Truncation after SRL is to generate vnsrl.
4995 if (Op
->isVPOpcode()) {
4996 Exp
= DAG
.getNode(ISD::VP_LSHR
, DL
, IntVT
, Bitcast
,
4997 DAG
.getConstant(ShiftAmt
, DL
, IntVT
), Mask
, VL
);
4998 Exp
= DAG
.getVPZExtOrTrunc(DL
, VT
, Exp
, Mask
, VL
);
5000 Exp
= DAG
.getNode(ISD::SRL
, DL
, IntVT
, Bitcast
,
5001 DAG
.getConstant(ShiftAmt
, DL
, IntVT
));
5002 if (IntVT
.bitsLT(VT
))
5003 Exp
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, Exp
);
5004 else if (IntVT
.bitsGT(VT
))
5005 Exp
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Exp
);
5008 // The exponent contains log2 of the value in biased form.
5009 unsigned ExponentBias
= FloatEltVT
== MVT::f64
? 1023 : 127;
5010 // For trailing zeros, we just need to subtract the bias.
5011 if (Op
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
)
5012 return DAG
.getNode(ISD::SUB
, DL
, VT
, Exp
,
5013 DAG
.getConstant(ExponentBias
, DL
, VT
));
5014 if (Op
.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF
)
5015 return DAG
.getNode(ISD::VP_SUB
, DL
, VT
, Exp
,
5016 DAG
.getConstant(ExponentBias
, DL
, VT
), Mask
, VL
);
5018 // For leading zeros, we need to remove the bias and convert from log2 to
5019 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5020 unsigned Adjust
= ExponentBias
+ (EltSize
- 1);
5022 if (Op
->isVPOpcode())
5023 Res
= DAG
.getNode(ISD::VP_SUB
, DL
, VT
, DAG
.getConstant(Adjust
, DL
, VT
), Exp
,
5026 Res
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(Adjust
, DL
, VT
), Exp
);
5028 // The above result with zero input equals to Adjust which is greater than
5029 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5030 if (Op
.getOpcode() == ISD::CTLZ
)
5031 Res
= DAG
.getNode(ISD::UMIN
, DL
, VT
, Res
, DAG
.getConstant(EltSize
, DL
, VT
));
5032 else if (Op
.getOpcode() == ISD::VP_CTLZ
)
5033 Res
= DAG
.getNode(ISD::VP_UMIN
, DL
, VT
, Res
,
5034 DAG
.getConstant(EltSize
, DL
, VT
), Mask
, VL
);
5038 // While RVV has alignment restrictions, we should always be able to load as a
5039 // legal equivalently-sized byte-typed vector instead. This method is
5040 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5041 // the load is already correctly-aligned, it returns SDValue().
5042 SDValue
RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op
,
5043 SelectionDAG
&DAG
) const {
5044 auto *Load
= cast
<LoadSDNode
>(Op
);
5045 assert(Load
&& Load
->getMemoryVT().isVector() && "Expected vector load");
5047 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
5048 Load
->getMemoryVT(),
5049 *Load
->getMemOperand()))
5053 MVT VT
= Op
.getSimpleValueType();
5054 unsigned EltSizeBits
= VT
.getScalarSizeInBits();
5055 assert((EltSizeBits
== 16 || EltSizeBits
== 32 || EltSizeBits
== 64) &&
5056 "Unexpected unaligned RVV load type");
5058 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount() * (EltSizeBits
/ 8));
5059 assert(NewVT
.isValid() &&
5060 "Expecting equally-sized RVV vector types to be legal");
5061 SDValue L
= DAG
.getLoad(NewVT
, DL
, Load
->getChain(), Load
->getBasePtr(),
5062 Load
->getPointerInfo(), Load
->getOriginalAlign(),
5063 Load
->getMemOperand()->getFlags());
5064 return DAG
.getMergeValues({DAG
.getBitcast(VT
, L
), L
.getValue(1)}, DL
);
5067 // While RVV has alignment restrictions, we should always be able to store as a
5068 // legal equivalently-sized byte-typed vector instead. This method is
5069 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5070 // returns SDValue() if the store is already correctly aligned.
5071 SDValue
RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op
,
5072 SelectionDAG
&DAG
) const {
5073 auto *Store
= cast
<StoreSDNode
>(Op
);
5074 assert(Store
&& Store
->getValue().getValueType().isVector() &&
5075 "Expected vector store");
5077 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
5078 Store
->getMemoryVT(),
5079 *Store
->getMemOperand()))
5083 SDValue StoredVal
= Store
->getValue();
5084 MVT VT
= StoredVal
.getSimpleValueType();
5085 unsigned EltSizeBits
= VT
.getScalarSizeInBits();
5086 assert((EltSizeBits
== 16 || EltSizeBits
== 32 || EltSizeBits
== 64) &&
5087 "Unexpected unaligned RVV store type");
5089 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount() * (EltSizeBits
/ 8));
5090 assert(NewVT
.isValid() &&
5091 "Expecting equally-sized RVV vector types to be legal");
5092 StoredVal
= DAG
.getBitcast(NewVT
, StoredVal
);
5093 return DAG
.getStore(Store
->getChain(), DL
, StoredVal
, Store
->getBasePtr(),
5094 Store
->getPointerInfo(), Store
->getOriginalAlign(),
5095 Store
->getMemOperand()->getFlags());
5098 static SDValue
lowerConstant(SDValue Op
, SelectionDAG
&DAG
,
5099 const RISCVSubtarget
&Subtarget
) {
5100 assert(Op
.getValueType() == MVT::i64
&& "Unexpected VT");
5102 int64_t Imm
= cast
<ConstantSDNode
>(Op
)->getSExtValue();
5104 // All simm32 constants should be handled by isel.
5105 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5106 // this check redundant, but small immediates are common so this check
5107 // should have better compile time.
5111 // We only need to cost the immediate, if constant pool lowering is enabled.
5112 if (!Subtarget
.useConstantPoolForLargeInts())
5115 RISCVMatInt::InstSeq Seq
=
5116 RISCVMatInt::generateInstSeq(Imm
, Subtarget
.getFeatureBits());
5117 if (Seq
.size() <= Subtarget
.getMaxBuildIntsCost())
5120 // Optimizations below are disabled for opt size. If we're optimizing for
5121 // size, use a constant pool.
5122 if (DAG
.shouldOptForSize())
5125 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5126 // that if it will avoid a constant pool.
5127 // It will require an extra temporary register though.
5128 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5129 // low and high 32 bits are the same and bit 31 and 63 are set.
5130 unsigned ShiftAmt
, AddOpc
;
5131 RISCVMatInt::InstSeq SeqLo
= RISCVMatInt::generateTwoRegInstSeq(
5132 Imm
, Subtarget
.getFeatureBits(), ShiftAmt
, AddOpc
);
5133 if (!SeqLo
.empty() && (SeqLo
.size() + 2) <= Subtarget
.getMaxBuildIntsCost())
5139 static SDValue
LowerATOMIC_FENCE(SDValue Op
, SelectionDAG
&DAG
,
5140 const RISCVSubtarget
&Subtarget
) {
5142 AtomicOrdering FenceOrdering
=
5143 static_cast<AtomicOrdering
>(Op
.getConstantOperandVal(1));
5144 SyncScope::ID FenceSSID
=
5145 static_cast<SyncScope::ID
>(Op
.getConstantOperandVal(2));
5147 if (Subtarget
.hasStdExtZtso()) {
5148 // The only fence that needs an instruction is a sequentially-consistent
5149 // cross-thread fence.
5150 if (FenceOrdering
== AtomicOrdering::SequentiallyConsistent
&&
5151 FenceSSID
== SyncScope::System
)
5154 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5155 return DAG
.getNode(ISD::MEMBARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
5158 // singlethread fences only synchronize with signal handlers on the same
5159 // thread and thus only need to preserve instruction order, not actually
5160 // enforce memory ordering.
5161 if (FenceSSID
== SyncScope::SingleThread
)
5162 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5163 return DAG
.getNode(ISD::MEMBARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
5168 SDValue
RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op
,
5169 SelectionDAG
&DAG
) const {
5171 MVT VT
= Op
.getSimpleValueType();
5172 MVT XLenVT
= Subtarget
.getXLenVT();
5173 unsigned Check
= Op
.getConstantOperandVal(1);
5174 unsigned TDCMask
= 0;
5176 TDCMask
|= RISCV::FPMASK_Signaling_NaN
;
5178 TDCMask
|= RISCV::FPMASK_Quiet_NaN
;
5179 if (Check
& fcPosInf
)
5180 TDCMask
|= RISCV::FPMASK_Positive_Infinity
;
5181 if (Check
& fcNegInf
)
5182 TDCMask
|= RISCV::FPMASK_Negative_Infinity
;
5183 if (Check
& fcPosNormal
)
5184 TDCMask
|= RISCV::FPMASK_Positive_Normal
;
5185 if (Check
& fcNegNormal
)
5186 TDCMask
|= RISCV::FPMASK_Negative_Normal
;
5187 if (Check
& fcPosSubnormal
)
5188 TDCMask
|= RISCV::FPMASK_Positive_Subnormal
;
5189 if (Check
& fcNegSubnormal
)
5190 TDCMask
|= RISCV::FPMASK_Negative_Subnormal
;
5191 if (Check
& fcPosZero
)
5192 TDCMask
|= RISCV::FPMASK_Positive_Zero
;
5193 if (Check
& fcNegZero
)
5194 TDCMask
|= RISCV::FPMASK_Negative_Zero
;
5196 bool IsOneBitMask
= isPowerOf2_32(TDCMask
);
5198 SDValue TDCMaskV
= DAG
.getConstant(TDCMask
, DL
, XLenVT
);
5200 if (VT
.isVector()) {
5201 SDValue Op0
= Op
.getOperand(0);
5202 MVT VT0
= Op
.getOperand(0).getSimpleValueType();
5204 if (VT
.isScalableVector()) {
5205 MVT DstVT
= VT0
.changeVectorElementTypeToInteger();
5206 auto [Mask
, VL
] = getDefaultScalableVLOps(VT0
, DL
, DAG
, Subtarget
);
5207 if (Op
.getOpcode() == ISD::VP_IS_FPCLASS
) {
5208 Mask
= Op
.getOperand(2);
5209 VL
= Op
.getOperand(3);
5211 SDValue FPCLASS
= DAG
.getNode(RISCVISD::FCLASS_VL
, DL
, DstVT
, Op0
, Mask
,
5212 VL
, Op
->getFlags());
5214 return DAG
.getSetCC(DL
, VT
, FPCLASS
,
5215 DAG
.getConstant(TDCMask
, DL
, DstVT
),
5216 ISD::CondCode::SETEQ
);
5217 SDValue AND
= DAG
.getNode(ISD::AND
, DL
, DstVT
, FPCLASS
,
5218 DAG
.getConstant(TDCMask
, DL
, DstVT
));
5219 return DAG
.getSetCC(DL
, VT
, AND
, DAG
.getConstant(0, DL
, DstVT
),
5223 MVT ContainerVT0
= getContainerForFixedLengthVector(VT0
);
5224 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
5225 MVT ContainerDstVT
= ContainerVT0
.changeVectorElementTypeToInteger();
5226 auto [Mask
, VL
] = getDefaultVLOps(VT0
, ContainerVT0
, DL
, DAG
, Subtarget
);
5227 if (Op
.getOpcode() == ISD::VP_IS_FPCLASS
) {
5228 Mask
= Op
.getOperand(2);
5229 MVT MaskContainerVT
=
5230 getContainerForFixedLengthVector(Mask
.getSimpleValueType());
5231 Mask
= convertToScalableVector(MaskContainerVT
, Mask
, DAG
, Subtarget
);
5232 VL
= Op
.getOperand(3);
5234 Op0
= convertToScalableVector(ContainerVT0
, Op0
, DAG
, Subtarget
);
5236 SDValue FPCLASS
= DAG
.getNode(RISCVISD::FCLASS_VL
, DL
, ContainerDstVT
, Op0
,
5237 Mask
, VL
, Op
->getFlags());
5239 TDCMaskV
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerDstVT
,
5240 DAG
.getUNDEF(ContainerDstVT
), TDCMaskV
, VL
);
5243 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, ContainerVT
,
5244 {FPCLASS
, TDCMaskV
, DAG
.getCondCode(ISD::SETEQ
),
5245 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5246 return convertFromScalableVector(VT
, VMSEQ
, DAG
, Subtarget
);
5248 SDValue AND
= DAG
.getNode(RISCVISD::AND_VL
, DL
, ContainerDstVT
, FPCLASS
,
5249 TDCMaskV
, DAG
.getUNDEF(ContainerDstVT
), Mask
, VL
);
5251 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
5252 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerDstVT
,
5253 DAG
.getUNDEF(ContainerDstVT
), SplatZero
, VL
);
5255 SDValue VMSNE
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, ContainerVT
,
5256 {AND
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
5257 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5258 return convertFromScalableVector(VT
, VMSNE
, DAG
, Subtarget
);
5262 DAG
.getNode(RISCVISD::FPCLASS
, DL
, XLenVT
, Op
.getOperand(0));
5263 SDValue AND
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, FPCLASS
, TDCMaskV
);
5264 SDValue Res
= DAG
.getSetCC(DL
, XLenVT
, AND
, DAG
.getConstant(0, DL
, XLenVT
),
5265 ISD::CondCode::SETNE
);
5266 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Res
);
5269 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5270 // operations propagate nans.
5271 static SDValue
lowerFMAXIMUM_FMINIMUM(SDValue Op
, SelectionDAG
&DAG
,
5272 const RISCVSubtarget
&Subtarget
) {
5274 MVT VT
= Op
.getSimpleValueType();
5276 SDValue X
= Op
.getOperand(0);
5277 SDValue Y
= Op
.getOperand(1);
5279 if (!VT
.isVector()) {
5280 MVT XLenVT
= Subtarget
.getXLenVT();
5282 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5283 // ensures that when one input is a nan, the other will also be a nan
5284 // allowing the nan to propagate. If both inputs are nan, this will swap the
5285 // inputs which is harmless.
5288 if (!Op
->getFlags().hasNoNaNs() && !DAG
.isKnownNeverNaN(X
)) {
5289 SDValue XIsNonNan
= DAG
.getSetCC(DL
, XLenVT
, X
, X
, ISD::SETOEQ
);
5290 NewY
= DAG
.getSelect(DL
, VT
, XIsNonNan
, Y
, X
);
5294 if (!Op
->getFlags().hasNoNaNs() && !DAG
.isKnownNeverNaN(Y
)) {
5295 SDValue YIsNonNan
= DAG
.getSetCC(DL
, XLenVT
, Y
, Y
, ISD::SETOEQ
);
5296 NewX
= DAG
.getSelect(DL
, VT
, YIsNonNan
, X
, Y
);
5300 Op
.getOpcode() == ISD::FMAXIMUM
? RISCVISD::FMAX
: RISCVISD::FMIN
;
5301 return DAG
.getNode(Opc
, DL
, VT
, NewX
, NewY
);
5304 // Check no NaNs before converting to fixed vector scalable.
5305 bool XIsNeverNan
= Op
->getFlags().hasNoNaNs() || DAG
.isKnownNeverNaN(X
);
5306 bool YIsNeverNan
= Op
->getFlags().hasNoNaNs() || DAG
.isKnownNeverNaN(Y
);
5308 MVT ContainerVT
= VT
;
5309 if (VT
.isFixedLengthVector()) {
5310 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
5311 X
= convertToScalableVector(ContainerVT
, X
, DAG
, Subtarget
);
5312 Y
= convertToScalableVector(ContainerVT
, Y
, DAG
, Subtarget
);
5315 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
5319 SDValue XIsNonNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
5320 {X
, X
, DAG
.getCondCode(ISD::SETOEQ
),
5321 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5323 DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, XIsNonNan
, Y
, X
, VL
);
5328 SDValue YIsNonNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
5329 {Y
, Y
, DAG
.getCondCode(ISD::SETOEQ
),
5330 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5332 DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, YIsNonNan
, X
, Y
, VL
);
5336 Op
.getOpcode() == ISD::FMAXIMUM
? RISCVISD::VFMAX_VL
: RISCVISD::VFMIN_VL
;
5337 SDValue Res
= DAG
.getNode(Opc
, DL
, ContainerVT
, NewX
, NewY
,
5338 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
5339 if (VT
.isFixedLengthVector())
5340 Res
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
5344 /// Get a RISC-V target specified VL op for a given SDNode.
5345 static unsigned getRISCVVLOp(SDValue Op
) {
5346 #define OP_CASE(NODE) \
5348 return RISCVISD::NODE##_VL;
5349 #define VP_CASE(NODE) \
5350 case ISD::VP_##NODE: \
5351 return RISCVISD::NODE##_VL;
5353 switch (Op
.getOpcode()) {
5355 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5390 OP_CASE(STRICT_FADD
)
5391 OP_CASE(STRICT_FSUB
)
5392 OP_CASE(STRICT_FMUL
)
5393 OP_CASE(STRICT_FDIV
)
5394 OP_CASE(STRICT_FSQRT
)
5395 VP_CASE(ADD
) // VP_ADD
5396 VP_CASE(SUB
) // VP_SUB
5397 VP_CASE(MUL
) // VP_MUL
5398 VP_CASE(SDIV
) // VP_SDIV
5399 VP_CASE(SREM
) // VP_SREM
5400 VP_CASE(UDIV
) // VP_UDIV
5401 VP_CASE(UREM
) // VP_UREM
5402 VP_CASE(SHL
) // VP_SHL
5403 VP_CASE(FADD
) // VP_FADD
5404 VP_CASE(FSUB
) // VP_FSUB
5405 VP_CASE(FMUL
) // VP_FMUL
5406 VP_CASE(FDIV
) // VP_FDIV
5407 VP_CASE(FNEG
) // VP_FNEG
5408 VP_CASE(FABS
) // VP_FABS
5409 VP_CASE(SMIN
) // VP_SMIN
5410 VP_CASE(SMAX
) // VP_SMAX
5411 VP_CASE(UMIN
) // VP_UMIN
5412 VP_CASE(UMAX
) // VP_UMAX
5413 VP_CASE(FCOPYSIGN
) // VP_FCOPYSIGN
5414 VP_CASE(SETCC
) // VP_SETCC
5415 VP_CASE(SINT_TO_FP
) // VP_SINT_TO_FP
5416 VP_CASE(UINT_TO_FP
) // VP_UINT_TO_FP
5417 VP_CASE(BITREVERSE
) // VP_BITREVERSE
5418 VP_CASE(BSWAP
) // VP_BSWAP
5419 VP_CASE(CTLZ
) // VP_CTLZ
5420 VP_CASE(CTTZ
) // VP_CTTZ
5421 VP_CASE(CTPOP
) // VP_CTPOP
5422 case ISD::CTLZ_ZERO_UNDEF
:
5423 case ISD::VP_CTLZ_ZERO_UNDEF
:
5424 return RISCVISD::CTLZ_VL
;
5425 case ISD::CTTZ_ZERO_UNDEF
:
5426 case ISD::VP_CTTZ_ZERO_UNDEF
:
5427 return RISCVISD::CTTZ_VL
;
5430 return RISCVISD::VFMADD_VL
;
5431 case ISD::STRICT_FMA
:
5432 return RISCVISD::STRICT_VFMADD_VL
;
5435 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5436 return RISCVISD::VMAND_VL
;
5437 return RISCVISD::AND_VL
;
5440 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5441 return RISCVISD::VMOR_VL
;
5442 return RISCVISD::OR_VL
;
5445 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5446 return RISCVISD::VMXOR_VL
;
5447 return RISCVISD::XOR_VL
;
5448 case ISD::VP_SELECT
:
5449 return RISCVISD::VSELECT_VL
;
5451 return RISCVISD::VP_MERGE_VL
;
5453 return RISCVISD::SRA_VL
;
5455 return RISCVISD::SRL_VL
;
5457 return RISCVISD::FSQRT_VL
;
5458 case ISD::VP_SIGN_EXTEND
:
5459 return RISCVISD::VSEXT_VL
;
5460 case ISD::VP_ZERO_EXTEND
:
5461 return RISCVISD::VZEXT_VL
;
5462 case ISD::VP_FP_TO_SINT
:
5463 return RISCVISD::VFCVT_RTZ_X_F_VL
;
5464 case ISD::VP_FP_TO_UINT
:
5465 return RISCVISD::VFCVT_RTZ_XU_F_VL
;
5467 case ISD::VP_FMINNUM
:
5468 return RISCVISD::VFMIN_VL
;
5470 case ISD::VP_FMAXNUM
:
5471 return RISCVISD::VFMAX_VL
;
5478 /// Return true if a RISC-V target specified op has a merge operand.
5479 static bool hasMergeOp(unsigned Opcode
) {
5480 assert(Opcode
> RISCVISD::FIRST_NUMBER
&&
5481 Opcode
<= RISCVISD::LAST_RISCV_STRICTFP_OPCODE
&&
5482 "not a RISC-V target specific op");
5483 static_assert(RISCVISD::LAST_VL_VECTOR_OP
- RISCVISD::FIRST_VL_VECTOR_OP
==
5485 RISCVISD::LAST_RISCV_STRICTFP_OPCODE
-
5486 ISD::FIRST_TARGET_STRICTFP_OPCODE
==
5488 "adding target specific op should update this function");
5489 if (Opcode
>= RISCVISD::ADD_VL
&& Opcode
<= RISCVISD::VFMAX_VL
)
5491 if (Opcode
== RISCVISD::FCOPYSIGN_VL
)
5493 if (Opcode
>= RISCVISD::VWMUL_VL
&& Opcode
<= RISCVISD::VFWSUB_W_VL
)
5495 if (Opcode
== RISCVISD::SETCC_VL
)
5497 if (Opcode
>= RISCVISD::STRICT_FADD_VL
&& Opcode
<= RISCVISD::STRICT_FDIV_VL
)
5502 /// Return true if a RISC-V target specified op has a mask operand.
5503 static bool hasMaskOp(unsigned Opcode
) {
5504 assert(Opcode
> RISCVISD::FIRST_NUMBER
&&
5505 Opcode
<= RISCVISD::LAST_RISCV_STRICTFP_OPCODE
&&
5506 "not a RISC-V target specific op");
5507 static_assert(RISCVISD::LAST_VL_VECTOR_OP
- RISCVISD::FIRST_VL_VECTOR_OP
==
5509 RISCVISD::LAST_RISCV_STRICTFP_OPCODE
-
5510 ISD::FIRST_TARGET_STRICTFP_OPCODE
==
5512 "adding target specific op should update this function");
5513 if (Opcode
>= RISCVISD::TRUNCATE_VECTOR_VL
&& Opcode
<= RISCVISD::SETCC_VL
)
5515 if (Opcode
>= RISCVISD::VRGATHER_VX_VL
&& Opcode
<= RISCVISD::VFIRST_VL
)
5517 if (Opcode
>= RISCVISD::STRICT_FADD_VL
&&
5518 Opcode
<= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL
)
5523 static SDValue
SplitVectorOp(SDValue Op
, SelectionDAG
&DAG
) {
5524 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(Op
.getValueType());
5527 SmallVector
<SDValue
, 4> LoOperands(Op
.getNumOperands());
5528 SmallVector
<SDValue
, 4> HiOperands(Op
.getNumOperands());
5530 for (unsigned j
= 0; j
!= Op
.getNumOperands(); ++j
) {
5531 if (!Op
.getOperand(j
).getValueType().isVector()) {
5532 LoOperands
[j
] = Op
.getOperand(j
);
5533 HiOperands
[j
] = Op
.getOperand(j
);
5536 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5537 DAG
.SplitVector(Op
.getOperand(j
), DL
);
5541 DAG
.getNode(Op
.getOpcode(), DL
, LoVT
, LoOperands
, Op
->getFlags());
5543 DAG
.getNode(Op
.getOpcode(), DL
, HiVT
, HiOperands
, Op
->getFlags());
5545 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, Op
.getValueType(), LoRes
, HiRes
);
5548 static SDValue
SplitVPOp(SDValue Op
, SelectionDAG
&DAG
) {
5549 assert(ISD::isVPOpcode(Op
.getOpcode()) && "Not a VP op");
5550 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(Op
.getValueType());
5553 SmallVector
<SDValue
, 4> LoOperands(Op
.getNumOperands());
5554 SmallVector
<SDValue
, 4> HiOperands(Op
.getNumOperands());
5556 for (unsigned j
= 0; j
!= Op
.getNumOperands(); ++j
) {
5557 if (ISD::getVPExplicitVectorLengthIdx(Op
.getOpcode()) == j
) {
5558 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5559 DAG
.SplitEVL(Op
.getOperand(j
), Op
.getValueType(), DL
);
5562 if (!Op
.getOperand(j
).getValueType().isVector()) {
5563 LoOperands
[j
] = Op
.getOperand(j
);
5564 HiOperands
[j
] = Op
.getOperand(j
);
5567 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5568 DAG
.SplitVector(Op
.getOperand(j
), DL
);
5572 DAG
.getNode(Op
.getOpcode(), DL
, LoVT
, LoOperands
, Op
->getFlags());
5574 DAG
.getNode(Op
.getOpcode(), DL
, HiVT
, HiOperands
, Op
->getFlags());
5576 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, Op
.getValueType(), LoRes
, HiRes
);
5579 static SDValue
SplitVectorReductionOp(SDValue Op
, SelectionDAG
&DAG
) {
5582 auto [Lo
, Hi
] = DAG
.SplitVector(Op
.getOperand(1), DL
);
5583 auto [MaskLo
, MaskHi
] = DAG
.SplitVector(Op
.getOperand(2), DL
);
5584 auto [EVLLo
, EVLHi
] =
5585 DAG
.SplitEVL(Op
.getOperand(3), Op
.getOperand(1).getValueType(), DL
);
5588 DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
5589 {Op
.getOperand(0), Lo
, MaskLo
, EVLLo
}, Op
->getFlags());
5590 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
5591 {ResLo
, Hi
, MaskHi
, EVLHi
}, Op
->getFlags());
5594 SDValue
RISCVTargetLowering::LowerOperation(SDValue Op
,
5595 SelectionDAG
&DAG
) const {
5596 switch (Op
.getOpcode()) {
5598 report_fatal_error("unimplemented operand");
5599 case ISD::ATOMIC_FENCE
:
5600 return LowerATOMIC_FENCE(Op
, DAG
, Subtarget
);
5601 case ISD::GlobalAddress
:
5602 return lowerGlobalAddress(Op
, DAG
);
5603 case ISD::BlockAddress
:
5604 return lowerBlockAddress(Op
, DAG
);
5605 case ISD::ConstantPool
:
5606 return lowerConstantPool(Op
, DAG
);
5607 case ISD::JumpTable
:
5608 return lowerJumpTable(Op
, DAG
);
5609 case ISD::GlobalTLSAddress
:
5610 return lowerGlobalTLSAddress(Op
, DAG
);
5612 return lowerConstant(Op
, DAG
, Subtarget
);
5614 return lowerSELECT(Op
, DAG
);
5616 return lowerBRCOND(Op
, DAG
);
5618 return lowerVASTART(Op
, DAG
);
5619 case ISD::FRAMEADDR
:
5620 return lowerFRAMEADDR(Op
, DAG
);
5621 case ISD::RETURNADDR
:
5622 return lowerRETURNADDR(Op
, DAG
);
5623 case ISD::SHL_PARTS
:
5624 return lowerShiftLeftParts(Op
, DAG
);
5625 case ISD::SRA_PARTS
:
5626 return lowerShiftRightParts(Op
, DAG
, true);
5627 case ISD::SRL_PARTS
:
5628 return lowerShiftRightParts(Op
, DAG
, false);
5631 if (Op
.getValueType().isFixedLengthVector()) {
5632 assert(Subtarget
.hasStdExtZvkb());
5633 return lowerToScalableOp(Op
, DAG
);
5635 assert(Subtarget
.hasVendorXTHeadBb() &&
5636 !(Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) &&
5637 "Unexpected custom legalization");
5638 // XTHeadBb only supports rotate by constant.
5639 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5642 case ISD::BITCAST
: {
5644 EVT VT
= Op
.getValueType();
5645 SDValue Op0
= Op
.getOperand(0);
5646 EVT Op0VT
= Op0
.getValueType();
5647 MVT XLenVT
= Subtarget
.getXLenVT();
5648 if (VT
== MVT::f16
&& Op0VT
== MVT::i16
&&
5649 Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
5650 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Op0
);
5651 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, MVT::f16
, NewOp0
);
5654 if (VT
== MVT::bf16
&& Op0VT
== MVT::i16
&&
5655 Subtarget
.hasStdExtZfbfmin()) {
5656 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Op0
);
5657 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, MVT::bf16
, NewOp0
);
5660 if (VT
== MVT::f32
&& Op0VT
== MVT::i32
&& Subtarget
.is64Bit() &&
5661 Subtarget
.hasStdExtFOrZfinx()) {
5662 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
5664 DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, NewOp0
);
5667 if (VT
== MVT::f64
&& Op0VT
== MVT::i64
&& XLenVT
== MVT::i32
&&
5668 Subtarget
.hasStdExtZfa()) {
5670 std::tie(Lo
, Hi
) = DAG
.SplitScalar(Op0
, DL
, MVT::i32
, MVT::i32
);
5672 DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
5676 // Consider other scalar<->scalar casts as legal if the types are legal.
5677 // Otherwise expand them.
5678 if (!VT
.isVector() && !Op0VT
.isVector()) {
5679 if (isTypeLegal(VT
) && isTypeLegal(Op0VT
))
5684 assert(!VT
.isScalableVector() && !Op0VT
.isScalableVector() &&
5685 "Unexpected types");
5687 if (VT
.isFixedLengthVector()) {
5688 // We can handle fixed length vector bitcasts with a simple replacement
5690 if (Op0VT
.isFixedLengthVector())
5692 // When bitcasting from scalar to fixed-length vector, insert the scalar
5693 // into a one-element vector of the result type, and perform a vector
5695 if (!Op0VT
.isVector()) {
5696 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), Op0VT
, 1);
5697 if (!isTypeLegal(BVT
))
5699 return DAG
.getBitcast(VT
, DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, BVT
,
5700 DAG
.getUNDEF(BVT
), Op0
,
5701 DAG
.getConstant(0, DL
, XLenVT
)));
5705 // Custom-legalize bitcasts from fixed-length vector types to scalar types
5706 // thus: bitcast the vector to a one-element vector type whose element type
5707 // is the same as the result type, and extract the first element.
5708 if (!VT
.isVector() && Op0VT
.isFixedLengthVector()) {
5709 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 1);
5710 if (!isTypeLegal(BVT
))
5712 SDValue BVec
= DAG
.getBitcast(BVT
, Op0
);
5713 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VT
, BVec
,
5714 DAG
.getConstant(0, DL
, XLenVT
));
5718 case ISD::INTRINSIC_WO_CHAIN
:
5719 return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
5720 case ISD::INTRINSIC_W_CHAIN
:
5721 return LowerINTRINSIC_W_CHAIN(Op
, DAG
);
5722 case ISD::INTRINSIC_VOID
:
5723 return LowerINTRINSIC_VOID(Op
, DAG
);
5724 case ISD::IS_FPCLASS
:
5725 return LowerIS_FPCLASS(Op
, DAG
);
5726 case ISD::BITREVERSE
: {
5727 MVT VT
= Op
.getSimpleValueType();
5728 if (VT
.isFixedLengthVector()) {
5729 assert(Subtarget
.hasStdExtZvbb());
5730 return lowerToScalableOp(Op
, DAG
);
5733 assert(Subtarget
.hasStdExtZbkb() && "Unexpected custom legalization");
5734 assert(Op
.getOpcode() == ISD::BITREVERSE
&& "Unexpected opcode");
5735 // Expand bitreverse to a bswap(rev8) followed by brev8.
5736 SDValue BSwap
= DAG
.getNode(ISD::BSWAP
, DL
, VT
, Op
.getOperand(0));
5737 return DAG
.getNode(RISCVISD::BREV8
, DL
, VT
, BSwap
);
5740 // Only custom-lower vector truncates
5741 if (!Op
.getSimpleValueType().isVector())
5743 return lowerVectorTruncLike(Op
, DAG
);
5744 case ISD::ANY_EXTEND
:
5745 case ISD::ZERO_EXTEND
:
5746 if (Op
.getOperand(0).getValueType().isVector() &&
5747 Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
5748 return lowerVectorMaskExt(Op
, DAG
, /*ExtVal*/ 1);
5749 return lowerFixedLengthVectorExtendToRVV(Op
, DAG
, RISCVISD::VZEXT_VL
);
5750 case ISD::SIGN_EXTEND
:
5751 if (Op
.getOperand(0).getValueType().isVector() &&
5752 Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
5753 return lowerVectorMaskExt(Op
, DAG
, /*ExtVal*/ -1);
5754 return lowerFixedLengthVectorExtendToRVV(Op
, DAG
, RISCVISD::VSEXT_VL
);
5755 case ISD::SPLAT_VECTOR_PARTS
:
5756 return lowerSPLAT_VECTOR_PARTS(Op
, DAG
);
5757 case ISD::INSERT_VECTOR_ELT
:
5758 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
5759 case ISD::EXTRACT_VECTOR_ELT
:
5760 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
5761 case ISD::SCALAR_TO_VECTOR
: {
5762 MVT VT
= Op
.getSimpleValueType();
5764 SDValue Scalar
= Op
.getOperand(0);
5765 if (VT
.getVectorElementType() == MVT::i1
) {
5766 MVT WideVT
= VT
.changeVectorElementType(MVT::i8
);
5767 SDValue V
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, DL
, WideVT
, Scalar
);
5768 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, V
);
5770 MVT ContainerVT
= VT
;
5771 if (VT
.isFixedLengthVector())
5772 ContainerVT
= getContainerForFixedLengthVector(VT
);
5773 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
5774 Scalar
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), Scalar
);
5775 SDValue V
= DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, ContainerVT
,
5776 DAG
.getUNDEF(ContainerVT
), Scalar
, VL
);
5777 if (VT
.isFixedLengthVector())
5778 V
= convertFromScalableVector(VT
, V
, DAG
, Subtarget
);
5782 MVT XLenVT
= Subtarget
.getXLenVT();
5783 MVT VT
= Op
.getSimpleValueType();
5785 SDValue Res
= DAG
.getNode(RISCVISD::READ_VLENB
, DL
, XLenVT
);
5786 // We define our scalable vector types for lmul=1 to use a 64 bit known
5787 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
5788 // vscale as VLENB / 8.
5789 static_assert(RISCV::RVVBitsPerBlock
== 64, "Unexpected bits per block!");
5790 if (Subtarget
.getRealMinVLen() < RISCV::RVVBitsPerBlock
)
5791 report_fatal_error("Support for VLEN==32 is incomplete.");
5792 // We assume VLENB is a multiple of 8. We manually choose the best shift
5793 // here because SimplifyDemandedBits isn't always able to simplify it.
5794 uint64_t Val
= Op
.getConstantOperandVal(0);
5795 if (isPowerOf2_64(Val
)) {
5796 uint64_t Log2
= Log2_64(Val
);
5798 Res
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
, Res
,
5799 DAG
.getConstant(3 - Log2
, DL
, VT
));
5801 Res
= DAG
.getNode(ISD::SHL
, DL
, XLenVT
, Res
,
5802 DAG
.getConstant(Log2
- 3, DL
, XLenVT
));
5803 } else if ((Val
% 8) == 0) {
5804 // If the multiplier is a multiple of 8, scale it down to avoid needing
5805 // to shift the VLENB value.
5806 Res
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, Res
,
5807 DAG
.getConstant(Val
/ 8, DL
, XLenVT
));
5809 SDValue VScale
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
, Res
,
5810 DAG
.getConstant(3, DL
, XLenVT
));
5811 Res
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, VScale
,
5812 DAG
.getConstant(Val
, DL
, XLenVT
));
5814 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Res
);
5817 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
5818 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
5819 if (Op
.getValueType() == MVT::f16
&& Subtarget
.is64Bit() &&
5820 Op
.getOperand(1).getValueType() == MVT::i32
) {
5822 SDValue Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op
.getOperand(0));
5824 DAG
.getNode(ISD::FPOWI
, DL
, MVT::f32
, Op0
, Op
.getOperand(1));
5825 return DAG
.getNode(ISD::FP_ROUND
, DL
, MVT::f16
, Powi
,
5826 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
5832 if (Op
.getValueType() == MVT::nxv32f16
&&
5833 (Subtarget
.hasVInstructionsF16Minimal() &&
5834 !Subtarget
.hasVInstructionsF16()))
5835 return SplitVectorOp(Op
, DAG
);
5836 return lowerFMAXIMUM_FMINIMUM(Op
, DAG
, Subtarget
);
5837 case ISD::FP_EXTEND
: {
5839 EVT VT
= Op
.getValueType();
5840 SDValue Op0
= Op
.getOperand(0);
5841 EVT Op0VT
= Op0
.getValueType();
5842 if (VT
== MVT::f32
&& Op0VT
== MVT::bf16
&& Subtarget
.hasStdExtZfbfmin())
5843 return DAG
.getNode(RISCVISD::FP_EXTEND_BF16
, DL
, MVT::f32
, Op0
);
5844 if (VT
== MVT::f64
&& Op0VT
== MVT::bf16
&& Subtarget
.hasStdExtZfbfmin()) {
5846 DAG
.getNode(RISCVISD::FP_EXTEND_BF16
, DL
, MVT::f32
, Op0
);
5847 return DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f64
, FloatVal
);
5850 if (!Op
.getValueType().isVector())
5852 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
5854 case ISD::FP_ROUND
: {
5856 EVT VT
= Op
.getValueType();
5857 SDValue Op0
= Op
.getOperand(0);
5858 EVT Op0VT
= Op0
.getValueType();
5859 if (VT
== MVT::bf16
&& Op0VT
== MVT::f32
&& Subtarget
.hasStdExtZfbfmin())
5860 return DAG
.getNode(RISCVISD::FP_ROUND_BF16
, DL
, MVT::bf16
, Op0
);
5861 if (VT
== MVT::bf16
&& Op0VT
== MVT::f64
&& Subtarget
.hasStdExtZfbfmin() &&
5862 Subtarget
.hasStdExtDOrZdinx()) {
5864 DAG
.getNode(ISD::FP_ROUND
, DL
, MVT::f32
, Op0
,
5865 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
5866 return DAG
.getNode(RISCVISD::FP_ROUND_BF16
, DL
, MVT::bf16
, FloatVal
);
5869 if (!Op
.getValueType().isVector())
5871 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
5873 case ISD::STRICT_FP_ROUND
:
5874 case ISD::STRICT_FP_EXTEND
:
5875 return lowerStrictFPExtendOrRoundLike(Op
, DAG
);
5876 case ISD::SINT_TO_FP
:
5877 case ISD::UINT_TO_FP
:
5878 if (Op
.getValueType().isVector() &&
5879 Op
.getValueType().getScalarType() == MVT::f16
&&
5880 (Subtarget
.hasVInstructionsF16Minimal() &&
5881 !Subtarget
.hasVInstructionsF16())) {
5882 if (Op
.getValueType() == MVT::nxv32f16
)
5883 return SplitVectorOp(Op
, DAG
);
5887 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount());
5888 SDValue NC
= DAG
.getNode(Op
.getOpcode(), DL
, NVT
, Op
->ops());
5890 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NC
,
5891 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
5894 case ISD::FP_TO_SINT
:
5895 case ISD::FP_TO_UINT
:
5896 if (SDValue Op1
= Op
.getOperand(0);
5897 Op1
.getValueType().isVector() &&
5898 Op1
.getValueType().getScalarType() == MVT::f16
&&
5899 (Subtarget
.hasVInstructionsF16Minimal() &&
5900 !Subtarget
.hasVInstructionsF16())) {
5901 if (Op1
.getValueType() == MVT::nxv32f16
)
5902 return SplitVectorOp(Op
, DAG
);
5905 MVT NVT
= MVT::getVectorVT(MVT::f32
,
5906 Op1
.getValueType().getVectorElementCount());
5907 SDValue WidenVec
= DAG
.getNode(ISD::FP_EXTEND
, DL
, NVT
, Op1
);
5909 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(), WidenVec
);
5912 case ISD::STRICT_FP_TO_SINT
:
5913 case ISD::STRICT_FP_TO_UINT
:
5914 case ISD::STRICT_SINT_TO_FP
:
5915 case ISD::STRICT_UINT_TO_FP
: {
5916 // RVV can only do fp<->int conversions to types half/double the size as
5917 // the source. We custom-lower any conversions that do two hops into
5919 MVT VT
= Op
.getSimpleValueType();
5923 bool IsStrict
= Op
->isStrictFPOpcode();
5924 SDValue Src
= Op
.getOperand(0 + IsStrict
);
5925 MVT EltVT
= VT
.getVectorElementType();
5926 MVT SrcVT
= Src
.getSimpleValueType();
5927 MVT SrcEltVT
= SrcVT
.getVectorElementType();
5928 unsigned EltSize
= EltVT
.getSizeInBits();
5929 unsigned SrcEltSize
= SrcEltVT
.getSizeInBits();
5930 assert(isPowerOf2_32(EltSize
) && isPowerOf2_32(SrcEltSize
) &&
5931 "Unexpected vector element types");
5933 bool IsInt2FP
= SrcEltVT
.isInteger();
5934 // Widening conversions
5935 if (EltSize
> (2 * SrcEltSize
)) {
5937 // Do a regular integer sign/zero extension then convert to float.
5938 MVT IVecVT
= MVT::getVectorVT(MVT::getIntegerVT(EltSize
/ 2),
5939 VT
.getVectorElementCount());
5940 unsigned ExtOpcode
= (Op
.getOpcode() == ISD::UINT_TO_FP
||
5941 Op
.getOpcode() == ISD::STRICT_UINT_TO_FP
)
5944 SDValue Ext
= DAG
.getNode(ExtOpcode
, DL
, IVecVT
, Src
);
5946 return DAG
.getNode(Op
.getOpcode(), DL
, Op
->getVTList(),
5947 Op
.getOperand(0), Ext
);
5948 return DAG
.getNode(Op
.getOpcode(), DL
, VT
, Ext
);
5951 assert(SrcEltVT
== MVT::f16
&& "Unexpected FP_TO_[US]INT lowering");
5952 // Do one doubling fp_extend then complete the operation by converting
5954 MVT InterimFVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
5956 auto [FExt
, Chain
] =
5957 DAG
.getStrictFPExtendOrRound(Src
, Op
.getOperand(0), DL
, InterimFVT
);
5958 return DAG
.getNode(Op
.getOpcode(), DL
, Op
->getVTList(), Chain
, FExt
);
5960 SDValue FExt
= DAG
.getFPExtendOrRound(Src
, DL
, InterimFVT
);
5961 return DAG
.getNode(Op
.getOpcode(), DL
, VT
, FExt
);
5964 // Narrowing conversions
5965 if (SrcEltSize
> (2 * EltSize
)) {
5967 // One narrowing int_to_fp, then an fp_round.
5968 assert(EltVT
== MVT::f16
&& "Unexpected [US]_TO_FP lowering");
5969 MVT InterimFVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
5971 SDValue Int2FP
= DAG
.getNode(Op
.getOpcode(), DL
,
5972 DAG
.getVTList(InterimFVT
, MVT::Other
),
5973 Op
.getOperand(0), Src
);
5974 SDValue Chain
= Int2FP
.getValue(1);
5975 return DAG
.getStrictFPExtendOrRound(Int2FP
, Chain
, DL
, VT
).first
;
5977 SDValue Int2FP
= DAG
.getNode(Op
.getOpcode(), DL
, InterimFVT
, Src
);
5978 return DAG
.getFPExtendOrRound(Int2FP
, DL
, VT
);
5981 // One narrowing fp_to_int, then truncate the integer. If the float isn't
5982 // representable by the integer, the result is poison.
5983 MVT IVecVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
5984 VT
.getVectorElementCount());
5987 DAG
.getNode(Op
.getOpcode(), DL
, DAG
.getVTList(IVecVT
, MVT::Other
),
5988 Op
.getOperand(0), Src
);
5989 SDValue Res
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FP2Int
);
5990 return DAG
.getMergeValues({Res
, FP2Int
.getValue(1)}, DL
);
5992 SDValue FP2Int
= DAG
.getNode(Op
.getOpcode(), DL
, IVecVT
, Src
);
5993 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FP2Int
);
5996 // Scalable vectors can exit here. Patterns will handle equally-sized
5997 // conversions halving/doubling ones.
5998 if (!VT
.isFixedLengthVector())
6001 // For fixed-length vectors we lower to a custom "VL" node.
6002 unsigned RVVOpc
= 0;
6003 switch (Op
.getOpcode()) {
6005 llvm_unreachable("Impossible opcode");
6006 case ISD::FP_TO_SINT
:
6007 RVVOpc
= RISCVISD::VFCVT_RTZ_X_F_VL
;
6009 case ISD::FP_TO_UINT
:
6010 RVVOpc
= RISCVISD::VFCVT_RTZ_XU_F_VL
;
6012 case ISD::SINT_TO_FP
:
6013 RVVOpc
= RISCVISD::SINT_TO_FP_VL
;
6015 case ISD::UINT_TO_FP
:
6016 RVVOpc
= RISCVISD::UINT_TO_FP_VL
;
6018 case ISD::STRICT_FP_TO_SINT
:
6019 RVVOpc
= RISCVISD::STRICT_VFCVT_RTZ_X_F_VL
;
6021 case ISD::STRICT_FP_TO_UINT
:
6022 RVVOpc
= RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL
;
6024 case ISD::STRICT_SINT_TO_FP
:
6025 RVVOpc
= RISCVISD::STRICT_SINT_TO_FP_VL
;
6027 case ISD::STRICT_UINT_TO_FP
:
6028 RVVOpc
= RISCVISD::STRICT_UINT_TO_FP_VL
;
6032 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
6033 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
6034 assert(ContainerVT
.getVectorElementCount() == SrcContainerVT
.getVectorElementCount() &&
6035 "Expected same element count");
6037 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
6039 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
6041 Src
= DAG
.getNode(RVVOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
),
6042 Op
.getOperand(0), Src
, Mask
, VL
);
6043 SDValue SubVec
= convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
6044 return DAG
.getMergeValues({SubVec
, Src
.getValue(1)}, DL
);
6046 Src
= DAG
.getNode(RVVOpc
, DL
, ContainerVT
, Src
, Mask
, VL
);
6047 return convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
6049 case ISD::FP_TO_SINT_SAT
:
6050 case ISD::FP_TO_UINT_SAT
:
6051 return lowerFP_TO_INT_SAT(Op
, DAG
, Subtarget
);
6052 case ISD::FP_TO_BF16
: {
6053 // Custom lower to ensure the libcall return is passed in an FPR on hard
6055 assert(!Subtarget
.isSoftFPABI() && "Unexpected custom legalization");
6057 MakeLibCallOptions CallOptions
;
6059 RTLIB::getFPROUND(Op
.getOperand(0).getValueType(), MVT::bf16
);
6061 makeLibCall(DAG
, LC
, MVT::f32
, Op
.getOperand(0), CallOptions
, DL
).first
;
6062 if (Subtarget
.is64Bit() && !RV64LegalI32
)
6063 return DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Res
);
6064 return DAG
.getBitcast(MVT::i32
, Res
);
6066 case ISD::BF16_TO_FP
: {
6067 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6068 MVT VT
= Op
.getSimpleValueType();
6071 ISD::SHL
, DL
, Op
.getOperand(0).getValueType(), Op
.getOperand(0),
6072 DAG
.getShiftAmountConstant(16, Op
.getOperand(0).getValueType(), DL
));
6073 SDValue Res
= Subtarget
.is64Bit()
6074 ? DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, Op
)
6075 : DAG
.getBitcast(MVT::f32
, Op
);
6076 // fp_extend if the target VT is bigger than f32.
6078 return DAG
.getNode(ISD::FP_EXTEND
, DL
, VT
, Res
);
6081 case ISD::FP_TO_FP16
: {
6082 // Custom lower to ensure the libcall return is passed in an FPR on hard
6084 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6086 MakeLibCallOptions CallOptions
;
6088 RTLIB::getFPROUND(Op
.getOperand(0).getValueType(), MVT::f16
);
6090 makeLibCall(DAG
, LC
, MVT::f32
, Op
.getOperand(0), CallOptions
, DL
).first
;
6091 if (Subtarget
.is64Bit() && !RV64LegalI32
)
6092 return DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Res
);
6093 return DAG
.getBitcast(MVT::i32
, Res
);
6095 case ISD::FP16_TO_FP
: {
6096 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6098 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6100 MakeLibCallOptions CallOptions
;
6101 SDValue Arg
= Subtarget
.is64Bit()
6102 ? DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
,
6104 : DAG
.getBitcast(MVT::f32
, Op
.getOperand(0));
6106 makeLibCall(DAG
, RTLIB::FPEXT_F16_F32
, MVT::f32
, Arg
, CallOptions
, DL
)
6113 case ISD::FNEARBYINT
:
6116 case ISD::FROUNDEVEN
:
6117 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6120 return lowerVectorXRINT(Op
, DAG
, Subtarget
);
6121 case ISD::VECREDUCE_ADD
:
6122 case ISD::VECREDUCE_UMAX
:
6123 case ISD::VECREDUCE_SMAX
:
6124 case ISD::VECREDUCE_UMIN
:
6125 case ISD::VECREDUCE_SMIN
:
6126 return lowerVECREDUCE(Op
, DAG
);
6127 case ISD::VECREDUCE_AND
:
6128 case ISD::VECREDUCE_OR
:
6129 case ISD::VECREDUCE_XOR
:
6130 if (Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
6131 return lowerVectorMaskVecReduction(Op
, DAG
, /*IsVP*/ false);
6132 return lowerVECREDUCE(Op
, DAG
);
6133 case ISD::VECREDUCE_FADD
:
6134 case ISD::VECREDUCE_SEQ_FADD
:
6135 case ISD::VECREDUCE_FMIN
:
6136 case ISD::VECREDUCE_FMAX
:
6137 return lowerFPVECREDUCE(Op
, DAG
);
6138 case ISD::VP_REDUCE_ADD
:
6139 case ISD::VP_REDUCE_UMAX
:
6140 case ISD::VP_REDUCE_SMAX
:
6141 case ISD::VP_REDUCE_UMIN
:
6142 case ISD::VP_REDUCE_SMIN
:
6143 case ISD::VP_REDUCE_FADD
:
6144 case ISD::VP_REDUCE_SEQ_FADD
:
6145 case ISD::VP_REDUCE_FMIN
:
6146 case ISD::VP_REDUCE_FMAX
:
6147 if (Op
.getOperand(1).getValueType() == MVT::nxv32f16
&&
6148 (Subtarget
.hasVInstructionsF16Minimal() &&
6149 !Subtarget
.hasVInstructionsF16()))
6150 return SplitVectorReductionOp(Op
, DAG
);
6151 return lowerVPREDUCE(Op
, DAG
);
6152 case ISD::VP_REDUCE_AND
:
6153 case ISD::VP_REDUCE_OR
:
6154 case ISD::VP_REDUCE_XOR
:
6155 if (Op
.getOperand(1).getValueType().getVectorElementType() == MVT::i1
)
6156 return lowerVectorMaskVecReduction(Op
, DAG
, /*IsVP*/ true);
6157 return lowerVPREDUCE(Op
, DAG
);
6159 MVT ContainerVT
= getContainerForFixedLengthVector(Op
.getSimpleValueType());
6160 return convertFromScalableVector(Op
.getSimpleValueType(),
6161 DAG
.getUNDEF(ContainerVT
), DAG
, Subtarget
);
6163 case ISD::INSERT_SUBVECTOR
:
6164 return lowerINSERT_SUBVECTOR(Op
, DAG
);
6165 case ISD::EXTRACT_SUBVECTOR
:
6166 return lowerEXTRACT_SUBVECTOR(Op
, DAG
);
6167 case ISD::VECTOR_DEINTERLEAVE
:
6168 return lowerVECTOR_DEINTERLEAVE(Op
, DAG
);
6169 case ISD::VECTOR_INTERLEAVE
:
6170 return lowerVECTOR_INTERLEAVE(Op
, DAG
);
6171 case ISD::STEP_VECTOR
:
6172 return lowerSTEP_VECTOR(Op
, DAG
);
6173 case ISD::VECTOR_REVERSE
:
6174 return lowerVECTOR_REVERSE(Op
, DAG
);
6175 case ISD::VECTOR_SPLICE
:
6176 return lowerVECTOR_SPLICE(Op
, DAG
);
6177 case ISD::BUILD_VECTOR
:
6178 return lowerBUILD_VECTOR(Op
, DAG
, Subtarget
);
6179 case ISD::SPLAT_VECTOR
:
6180 if (Op
.getValueType().getScalarType() == MVT::f16
&&
6181 (Subtarget
.hasVInstructionsF16Minimal() &&
6182 !Subtarget
.hasVInstructionsF16())) {
6183 if (Op
.getValueType() == MVT::nxv32f16
)
6184 return SplitVectorOp(Op
, DAG
);
6187 DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op
.getOperand(0));
6188 SDValue NewSplat
= DAG
.getNode(
6189 ISD::SPLAT_VECTOR
, DL
,
6190 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount()),
6192 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NewSplat
,
6193 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6195 if (Op
.getValueType().getVectorElementType() == MVT::i1
)
6196 return lowerVectorMaskSplat(Op
, DAG
);
6198 case ISD::VECTOR_SHUFFLE
:
6199 return lowerVECTOR_SHUFFLE(Op
, DAG
, Subtarget
);
6200 case ISD::CONCAT_VECTORS
: {
6201 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6202 // better than going through the stack, as the default expansion does.
6204 MVT VT
= Op
.getSimpleValueType();
6205 unsigned NumOpElts
=
6206 Op
.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6207 SDValue Vec
= DAG
.getUNDEF(VT
);
6208 for (const auto &OpIdx
: enumerate(Op
->ops())) {
6209 SDValue SubVec
= OpIdx
.value();
6210 // Don't insert undef subvectors.
6211 if (SubVec
.isUndef())
6213 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Vec
, SubVec
,
6214 DAG
.getIntPtrConstant(OpIdx
.index() * NumOpElts
, DL
));
6219 if (auto V
= expandUnalignedRVVLoad(Op
, DAG
))
6221 if (Op
.getValueType().isFixedLengthVector())
6222 return lowerFixedLengthVectorLoadToRVV(Op
, DAG
);
6225 if (auto V
= expandUnalignedRVVStore(Op
, DAG
))
6227 if (Op
.getOperand(1).getValueType().isFixedLengthVector())
6228 return lowerFixedLengthVectorStoreToRVV(Op
, DAG
);
6232 return lowerMaskedLoad(Op
, DAG
);
6235 return lowerMaskedStore(Op
, DAG
);
6236 case ISD::SELECT_CC
: {
6237 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6238 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6239 // into separate SETCC+SELECT just like LegalizeDAG.
6240 SDValue Tmp1
= Op
.getOperand(0);
6241 SDValue Tmp2
= Op
.getOperand(1);
6242 SDValue True
= Op
.getOperand(2);
6243 SDValue False
= Op
.getOperand(3);
6244 EVT VT
= Op
.getValueType();
6245 SDValue CC
= Op
.getOperand(4);
6246 EVT CmpVT
= Tmp1
.getValueType();
6248 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), CmpVT
);
6251 DAG
.getNode(ISD::SETCC
, DL
, CCVT
, Tmp1
, Tmp2
, CC
, Op
->getFlags());
6252 return DAG
.getSelect(DL
, VT
, Cond
, True
, False
);
6255 MVT OpVT
= Op
.getOperand(0).getSimpleValueType();
6256 if (OpVT
.isScalarInteger()) {
6257 MVT VT
= Op
.getSimpleValueType();
6258 SDValue LHS
= Op
.getOperand(0);
6259 SDValue RHS
= Op
.getOperand(1);
6260 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
6261 assert((CCVal
== ISD::SETGT
|| CCVal
== ISD::SETUGT
) &&
6262 "Unexpected CondCode");
6266 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6267 // convert this to the equivalent of (set(u)ge X, C+1) by using
6268 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6270 if (isa
<ConstantSDNode
>(RHS
)) {
6271 int64_t Imm
= cast
<ConstantSDNode
>(RHS
)->getSExtValue();
6272 if (Imm
!= 0 && isInt
<12>((uint64_t)Imm
+ 1)) {
6273 // If this is an unsigned compare and the constant is -1, incrementing
6274 // the constant would change behavior. The result should be false.
6275 if (CCVal
== ISD::SETUGT
&& Imm
== -1)
6276 return DAG
.getConstant(0, DL
, VT
);
6277 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6278 CCVal
= ISD::getSetCCSwappedOperands(CCVal
);
6279 SDValue SetCC
= DAG
.getSetCC(
6280 DL
, VT
, LHS
, DAG
.getConstant(Imm
+ 1, DL
, OpVT
), CCVal
);
6281 return DAG
.getLogicalNOT(DL
, SetCC
, VT
);
6285 // Not a constant we could handle, swap the operands and condition code to
6287 CCVal
= ISD::getSetCCSwappedOperands(CCVal
);
6288 return DAG
.getSetCC(DL
, VT
, RHS
, LHS
, CCVal
);
6291 if (Op
.getOperand(0).getSimpleValueType() == MVT::nxv32f16
&&
6292 (Subtarget
.hasVInstructionsF16Minimal() &&
6293 !Subtarget
.hasVInstructionsF16()))
6294 return SplitVectorOp(Op
, DAG
);
6296 return lowerFixedLengthVectorSetccToRVV(Op
, DAG
);
6312 return lowerToScalableOp(Op
, DAG
);
6316 if (Op
.getSimpleValueType().isFixedLengthVector())
6317 return lowerToScalableOp(Op
, DAG
);
6318 // This can be called for an i32 shift amount that needs to be promoted.
6319 assert(Op
.getOperand(1).getValueType() == MVT::i32
&& Subtarget
.is64Bit() &&
6320 "Unexpected custom legalisation");
6332 if (Op
.getValueType() == MVT::nxv32f16
&&
6333 (Subtarget
.hasVInstructionsF16Minimal() &&
6334 !Subtarget
.hasVInstructionsF16()))
6335 return SplitVectorOp(Op
, DAG
);
6345 return lowerToScalableOp(Op
, DAG
);
6348 return lowerABS(Op
, DAG
);
6350 case ISD::CTLZ_ZERO_UNDEF
:
6352 case ISD::CTTZ_ZERO_UNDEF
:
6353 if (Subtarget
.hasStdExtZvbb())
6354 return lowerToScalableOp(Op
, DAG
);
6355 assert(Op
.getOpcode() != ISD::CTTZ
);
6356 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6358 return lowerFixedLengthVectorSelectToRVV(Op
, DAG
);
6359 case ISD::FCOPYSIGN
:
6360 if (Op
.getValueType() == MVT::nxv32f16
&&
6361 (Subtarget
.hasVInstructionsF16Minimal() &&
6362 !Subtarget
.hasVInstructionsF16()))
6363 return SplitVectorOp(Op
, DAG
);
6364 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op
, DAG
);
6365 case ISD::STRICT_FADD
:
6366 case ISD::STRICT_FSUB
:
6367 case ISD::STRICT_FMUL
:
6368 case ISD::STRICT_FDIV
:
6369 case ISD::STRICT_FSQRT
:
6370 case ISD::STRICT_FMA
:
6371 return lowerToScalableOp(Op
, DAG
);
6372 case ISD::STRICT_FSETCC
:
6373 case ISD::STRICT_FSETCCS
:
6374 return lowerVectorStrictFSetcc(Op
, DAG
);
6375 case ISD::STRICT_FCEIL
:
6376 case ISD::STRICT_FRINT
:
6377 case ISD::STRICT_FFLOOR
:
6378 case ISD::STRICT_FTRUNC
:
6379 case ISD::STRICT_FNEARBYINT
:
6380 case ISD::STRICT_FROUND
:
6381 case ISD::STRICT_FROUNDEVEN
:
6382 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6384 case ISD::VP_GATHER
:
6385 return lowerMaskedGather(Op
, DAG
);
6387 case ISD::VP_SCATTER
:
6388 return lowerMaskedScatter(Op
, DAG
);
6389 case ISD::GET_ROUNDING
:
6390 return lowerGET_ROUNDING(Op
, DAG
);
6391 case ISD::SET_ROUNDING
:
6392 return lowerSET_ROUNDING(Op
, DAG
);
6393 case ISD::EH_DWARF_CFA
:
6394 return lowerEH_DWARF_CFA(Op
, DAG
);
6395 case ISD::VP_SELECT
:
6404 return lowerVPOp(Op
, DAG
);
6408 return lowerLogicVPOp(Op
, DAG
);
6417 case ISD::VP_FMINNUM
:
6418 case ISD::VP_FMAXNUM
:
6419 case ISD::VP_FCOPYSIGN
:
6420 if (Op
.getValueType() == MVT::nxv32f16
&&
6421 (Subtarget
.hasVInstructionsF16Minimal() &&
6422 !Subtarget
.hasVInstructionsF16()))
6423 return SplitVPOp(Op
, DAG
);
6428 return lowerVPOp(Op
, DAG
);
6429 case ISD::VP_IS_FPCLASS
:
6430 return LowerIS_FPCLASS(Op
, DAG
);
6431 case ISD::VP_SIGN_EXTEND
:
6432 case ISD::VP_ZERO_EXTEND
:
6433 if (Op
.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1
)
6434 return lowerVPExtMaskOp(Op
, DAG
);
6435 return lowerVPOp(Op
, DAG
);
6436 case ISD::VP_TRUNCATE
:
6437 return lowerVectorTruncLike(Op
, DAG
);
6438 case ISD::VP_FP_EXTEND
:
6439 case ISD::VP_FP_ROUND
:
6440 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
6441 case ISD::VP_SINT_TO_FP
:
6442 case ISD::VP_UINT_TO_FP
:
6443 if (Op
.getValueType().isVector() &&
6444 Op
.getValueType().getScalarType() == MVT::f16
&&
6445 (Subtarget
.hasVInstructionsF16Minimal() &&
6446 !Subtarget
.hasVInstructionsF16())) {
6447 if (Op
.getValueType() == MVT::nxv32f16
)
6448 return SplitVPOp(Op
, DAG
);
6452 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount());
6453 auto NC
= DAG
.getNode(Op
.getOpcode(), DL
, NVT
, Op
->ops());
6455 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NC
,
6456 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6459 case ISD::VP_FP_TO_SINT
:
6460 case ISD::VP_FP_TO_UINT
:
6461 if (SDValue Op1
= Op
.getOperand(0);
6462 Op1
.getValueType().isVector() &&
6463 Op1
.getValueType().getScalarType() == MVT::f16
&&
6464 (Subtarget
.hasVInstructionsF16Minimal() &&
6465 !Subtarget
.hasVInstructionsF16())) {
6466 if (Op1
.getValueType() == MVT::nxv32f16
)
6467 return SplitVPOp(Op
, DAG
);
6470 MVT NVT
= MVT::getVectorVT(MVT::f32
,
6471 Op1
.getValueType().getVectorElementCount());
6472 SDValue WidenVec
= DAG
.getNode(ISD::FP_EXTEND
, DL
, NVT
, Op1
);
6474 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
6475 {WidenVec
, Op
.getOperand(1), Op
.getOperand(2)});
6477 return lowerVPFPIntConvOp(Op
, DAG
);
6479 if (Op
.getOperand(0).getSimpleValueType() == MVT::nxv32f16
&&
6480 (Subtarget
.hasVInstructionsF16Minimal() &&
6481 !Subtarget
.hasVInstructionsF16()))
6482 return SplitVPOp(Op
, DAG
);
6483 if (Op
.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1
)
6484 return lowerVPSetCCMaskOp(Op
, DAG
);
6490 case ISD::VP_BITREVERSE
:
6492 return lowerVPOp(Op
, DAG
);
6494 case ISD::VP_CTLZ_ZERO_UNDEF
:
6495 if (Subtarget
.hasStdExtZvbb())
6496 return lowerVPOp(Op
, DAG
);
6497 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6499 case ISD::VP_CTTZ_ZERO_UNDEF
:
6500 if (Subtarget
.hasStdExtZvbb())
6501 return lowerVPOp(Op
, DAG
);
6502 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6504 return lowerVPOp(Op
, DAG
);
6505 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD
:
6506 return lowerVPStridedLoad(Op
, DAG
);
6507 case ISD::EXPERIMENTAL_VP_STRIDED_STORE
:
6508 return lowerVPStridedStore(Op
, DAG
);
6510 case ISD::VP_FFLOOR
:
6512 case ISD::VP_FNEARBYINT
:
6513 case ISD::VP_FROUND
:
6514 case ISD::VP_FROUNDEVEN
:
6515 case ISD::VP_FROUNDTOZERO
:
6516 if (Op
.getValueType() == MVT::nxv32f16
&&
6517 (Subtarget
.hasVInstructionsF16Minimal() &&
6518 !Subtarget
.hasVInstructionsF16()))
6519 return SplitVPOp(Op
, DAG
);
6520 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6524 static SDValue
getTargetNode(GlobalAddressSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6525 SelectionDAG
&DAG
, unsigned Flags
) {
6526 return DAG
.getTargetGlobalAddress(N
->getGlobal(), DL
, Ty
, 0, Flags
);
6529 static SDValue
getTargetNode(BlockAddressSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6530 SelectionDAG
&DAG
, unsigned Flags
) {
6531 return DAG
.getTargetBlockAddress(N
->getBlockAddress(), Ty
, N
->getOffset(),
6535 static SDValue
getTargetNode(ConstantPoolSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6536 SelectionDAG
&DAG
, unsigned Flags
) {
6537 return DAG
.getTargetConstantPool(N
->getConstVal(), Ty
, N
->getAlign(),
6538 N
->getOffset(), Flags
);
6541 static SDValue
getTargetNode(JumpTableSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6542 SelectionDAG
&DAG
, unsigned Flags
) {
6543 return DAG
.getTargetJumpTable(N
->getIndex(), Ty
, Flags
);
6546 template <class NodeTy
>
6547 SDValue
RISCVTargetLowering::getAddr(NodeTy
*N
, SelectionDAG
&DAG
,
6548 bool IsLocal
, bool IsExternWeak
) const {
6550 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6552 // When HWASAN is used and tagging of global variables is enabled
6553 // they should be accessed via the GOT, since the tagged address of a global
6554 // is incompatible with existing code models. This also applies to non-pic
6556 if (isPositionIndependent() || Subtarget
.allowTaggedGlobals()) {
6557 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
6558 if (IsLocal
&& !Subtarget
.allowTaggedGlobals())
6559 // Use PC-relative addressing to access the symbol. This generates the
6560 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
6561 // %pcrel_lo(auipc)).
6562 return DAG
.getNode(RISCVISD::LLA
, DL
, Ty
, Addr
);
6564 // Use PC-relative addressing to access the GOT for this symbol, then load
6565 // the address from the GOT. This generates the pattern (PseudoLGA sym),
6566 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6568 SDValue(DAG
.getMachineNode(RISCV::PseudoLGA
, DL
, Ty
, Addr
), 0);
6569 MachineFunction
&MF
= DAG
.getMachineFunction();
6570 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6571 MachinePointerInfo::getGOT(MF
),
6572 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6573 MachineMemOperand::MOInvariant
,
6574 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6575 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6579 switch (getTargetMachine().getCodeModel()) {
6581 report_fatal_error("Unsupported code model for lowering");
6582 case CodeModel::Small
: {
6583 // Generate a sequence for accessing addresses within the first 2 GiB of
6584 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
6585 SDValue AddrHi
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_HI
);
6586 SDValue AddrLo
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_LO
);
6587 SDValue MNHi
= DAG
.getNode(RISCVISD::HI
, DL
, Ty
, AddrHi
);
6588 return DAG
.getNode(RISCVISD::ADD_LO
, DL
, Ty
, MNHi
, AddrLo
);
6590 case CodeModel::Medium
: {
6591 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
6593 // An extern weak symbol may be undefined, i.e. have value 0, which may
6594 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
6595 // symbol. This generates the pattern (PseudoLGA sym), which expands to
6596 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6598 SDValue(DAG
.getMachineNode(RISCV::PseudoLGA
, DL
, Ty
, Addr
), 0);
6599 MachineFunction
&MF
= DAG
.getMachineFunction();
6600 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6601 MachinePointerInfo::getGOT(MF
),
6602 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6603 MachineMemOperand::MOInvariant
,
6604 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6605 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6609 // Generate a sequence for accessing addresses within any 2GiB range within
6610 // the address space. This generates the pattern (PseudoLLA sym), which
6611 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
6612 return DAG
.getNode(RISCVISD::LLA
, DL
, Ty
, Addr
);
6617 SDValue
RISCVTargetLowering::lowerGlobalAddress(SDValue Op
,
6618 SelectionDAG
&DAG
) const {
6619 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
6620 assert(N
->getOffset() == 0 && "unexpected offset in global node");
6621 const GlobalValue
*GV
= N
->getGlobal();
6622 return getAddr(N
, DAG
, GV
->isDSOLocal(), GV
->hasExternalWeakLinkage());
6625 SDValue
RISCVTargetLowering::lowerBlockAddress(SDValue Op
,
6626 SelectionDAG
&DAG
) const {
6627 BlockAddressSDNode
*N
= cast
<BlockAddressSDNode
>(Op
);
6629 return getAddr(N
, DAG
);
6632 SDValue
RISCVTargetLowering::lowerConstantPool(SDValue Op
,
6633 SelectionDAG
&DAG
) const {
6634 ConstantPoolSDNode
*N
= cast
<ConstantPoolSDNode
>(Op
);
6636 return getAddr(N
, DAG
);
6639 SDValue
RISCVTargetLowering::lowerJumpTable(SDValue Op
,
6640 SelectionDAG
&DAG
) const {
6641 JumpTableSDNode
*N
= cast
<JumpTableSDNode
>(Op
);
6643 return getAddr(N
, DAG
);
6646 SDValue
RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode
*N
,
6648 bool UseGOT
) const {
6650 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6651 const GlobalValue
*GV
= N
->getGlobal();
6652 MVT XLenVT
= Subtarget
.getXLenVT();
6655 // Use PC-relative addressing to access the GOT for this TLS symbol, then
6656 // load the address from the GOT and add the thread pointer. This generates
6657 // the pattern (PseudoLA_TLS_IE sym), which expands to
6658 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
6659 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
6661 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_IE
, DL
, Ty
, Addr
), 0);
6662 MachineFunction
&MF
= DAG
.getMachineFunction();
6663 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6664 MachinePointerInfo::getGOT(MF
),
6665 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6666 MachineMemOperand::MOInvariant
,
6667 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6668 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6670 // Add the thread pointer.
6671 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
6672 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Load
, TPReg
);
6675 // Generate a sequence for accessing the address relative to the thread
6676 // pointer, with the appropriate adjustment for the thread pointer offset.
6677 // This generates the pattern
6678 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
6680 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_HI
);
6682 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_ADD
);
6684 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_LO
);
6686 SDValue MNHi
= DAG
.getNode(RISCVISD::HI
, DL
, Ty
, AddrHi
);
6687 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
6689 DAG
.getNode(RISCVISD::ADD_TPREL
, DL
, Ty
, MNHi
, TPReg
, AddrAdd
);
6690 return DAG
.getNode(RISCVISD::ADD_LO
, DL
, Ty
, MNAdd
, AddrLo
);
6693 SDValue
RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode
*N
,
6694 SelectionDAG
&DAG
) const {
6696 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6697 IntegerType
*CallTy
= Type::getIntNTy(*DAG
.getContext(), Ty
.getSizeInBits());
6698 const GlobalValue
*GV
= N
->getGlobal();
6700 // Use a PC-relative addressing mode to access the global dynamic GOT address.
6701 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
6702 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
6703 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
6705 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_GD
, DL
, Ty
, Addr
), 0);
6707 // Prepare argument list to generate call.
6712 Args
.push_back(Entry
);
6714 // Setup call to __tls_get_addr.
6715 TargetLowering::CallLoweringInfo
CLI(DAG
);
6717 .setChain(DAG
.getEntryNode())
6718 .setLibCallee(CallingConv::C
, CallTy
,
6719 DAG
.getExternalSymbol("__tls_get_addr", Ty
),
6722 return LowerCallTo(CLI
).first
;
6725 SDValue
RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op
,
6726 SelectionDAG
&DAG
) const {
6727 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
6728 assert(N
->getOffset() == 0 && "unexpected offset in global node");
6730 if (DAG
.getTarget().useEmulatedTLS())
6731 return LowerToTLSEmulatedModel(N
, DAG
);
6733 TLSModel::Model Model
= getTargetMachine().getTLSModel(N
->getGlobal());
6735 if (DAG
.getMachineFunction().getFunction().getCallingConv() ==
6737 report_fatal_error("In GHC calling convention TLS is not supported");
6741 case TLSModel::LocalExec
:
6742 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/false);
6744 case TLSModel::InitialExec
:
6745 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/true);
6747 case TLSModel::LocalDynamic
:
6748 case TLSModel::GeneralDynamic
:
6749 Addr
= getDynamicTLSAddr(N
, DAG
);
6756 // Return true if Val is equal to (setcc LHS, RHS, CC).
6757 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
6758 // Otherwise, return std::nullopt.
6759 static std::optional
<bool> matchSetCC(SDValue LHS
, SDValue RHS
,
6760 ISD::CondCode CC
, SDValue Val
) {
6761 assert(Val
->getOpcode() == ISD::SETCC
);
6762 SDValue LHS2
= Val
.getOperand(0);
6763 SDValue RHS2
= Val
.getOperand(1);
6764 ISD::CondCode CC2
= cast
<CondCodeSDNode
>(Val
.getOperand(2))->get();
6766 if (LHS
== LHS2
&& RHS
== RHS2
) {
6769 if (CC
== ISD::getSetCCInverse(CC2
, LHS2
.getValueType()))
6771 } else if (LHS
== RHS2
&& RHS
== LHS2
) {
6772 CC2
= ISD::getSetCCSwappedOperands(CC2
);
6775 if (CC
== ISD::getSetCCInverse(CC2
, LHS2
.getValueType()))
6779 return std::nullopt
;
6782 static SDValue
combineSelectToBinOp(SDNode
*N
, SelectionDAG
&DAG
,
6783 const RISCVSubtarget
&Subtarget
) {
6784 SDValue CondV
= N
->getOperand(0);
6785 SDValue TrueV
= N
->getOperand(1);
6786 SDValue FalseV
= N
->getOperand(2);
6787 MVT VT
= N
->getSimpleValueType(0);
6790 if (!Subtarget
.hasShortForwardBranchOpt()) {
6791 // (select c, -1, y) -> -c | y
6792 if (isAllOnesConstant(TrueV
)) {
6793 SDValue Neg
= DAG
.getNegative(CondV
, DL
, VT
);
6794 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, FalseV
);
6796 // (select c, y, -1) -> (c-1) | y
6797 if (isAllOnesConstant(FalseV
)) {
6798 SDValue Neg
= DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
,
6799 DAG
.getAllOnesConstant(DL
, VT
));
6800 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, TrueV
);
6803 // (select c, 0, y) -> (c-1) & y
6804 if (isNullConstant(TrueV
)) {
6805 SDValue Neg
= DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
,
6806 DAG
.getAllOnesConstant(DL
, VT
));
6807 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, FalseV
);
6809 // (select c, y, 0) -> -c & y
6810 if (isNullConstant(FalseV
)) {
6811 SDValue Neg
= DAG
.getNegative(CondV
, DL
, VT
);
6812 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, TrueV
);
6816 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
6817 // when both truev and falsev are also setcc.
6818 if (CondV
.getOpcode() == ISD::SETCC
&& TrueV
.getOpcode() == ISD::SETCC
&&
6819 FalseV
.getOpcode() == ISD::SETCC
) {
6820 SDValue LHS
= CondV
.getOperand(0);
6821 SDValue RHS
= CondV
.getOperand(1);
6822 ISD::CondCode CC
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
6824 // (select x, x, y) -> x | y
6825 // (select !x, x, y) -> x & y
6826 if (std::optional
<bool> MatchResult
= matchSetCC(LHS
, RHS
, CC
, TrueV
)) {
6827 return DAG
.getNode(*MatchResult
? ISD::OR
: ISD::AND
, DL
, VT
, TrueV
,
6830 // (select x, y, x) -> x & y
6831 // (select !x, y, x) -> x | y
6832 if (std::optional
<bool> MatchResult
= matchSetCC(LHS
, RHS
, CC
, FalseV
)) {
6833 return DAG
.getNode(*MatchResult
? ISD::AND
: ISD::OR
, DL
, VT
, TrueV
,
6841 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
6842 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
6843 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
6844 // being `0` or `-1`. In such cases we can replace `select` with `and`.
6845 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
6848 foldBinOpIntoSelectIfProfitable(SDNode
*BO
, SelectionDAG
&DAG
,
6849 const RISCVSubtarget
&Subtarget
) {
6850 if (Subtarget
.hasShortForwardBranchOpt())
6853 unsigned SelOpNo
= 0;
6854 SDValue Sel
= BO
->getOperand(0);
6855 if (Sel
.getOpcode() != ISD::SELECT
|| !Sel
.hasOneUse()) {
6857 Sel
= BO
->getOperand(1);
6860 if (Sel
.getOpcode() != ISD::SELECT
|| !Sel
.hasOneUse())
6863 unsigned ConstSelOpNo
= 1;
6864 unsigned OtherSelOpNo
= 2;
6865 if (!dyn_cast
<ConstantSDNode
>(Sel
->getOperand(ConstSelOpNo
))) {
6869 SDValue ConstSelOp
= Sel
->getOperand(ConstSelOpNo
);
6870 ConstantSDNode
*ConstSelOpNode
= dyn_cast
<ConstantSDNode
>(ConstSelOp
);
6871 if (!ConstSelOpNode
|| ConstSelOpNode
->isOpaque())
6874 SDValue ConstBinOp
= BO
->getOperand(SelOpNo
^ 1);
6875 ConstantSDNode
*ConstBinOpNode
= dyn_cast
<ConstantSDNode
>(ConstBinOp
);
6876 if (!ConstBinOpNode
|| ConstBinOpNode
->isOpaque())
6880 EVT VT
= BO
->getValueType(0);
6882 SDValue NewConstOps
[2] = {ConstSelOp
, ConstBinOp
};
6884 std::swap(NewConstOps
[0], NewConstOps
[1]);
6886 SDValue NewConstOp
=
6887 DAG
.FoldConstantArithmetic(BO
->getOpcode(), DL
, VT
, NewConstOps
);
6891 const APInt
&NewConstAPInt
=
6892 cast
<ConstantSDNode
>(NewConstOp
)->getAPIntValue();
6893 if (!NewConstAPInt
.isZero() && !NewConstAPInt
.isAllOnes())
6896 SDValue OtherSelOp
= Sel
->getOperand(OtherSelOpNo
);
6897 SDValue NewNonConstOps
[2] = {OtherSelOp
, ConstBinOp
};
6899 std::swap(NewNonConstOps
[0], NewNonConstOps
[1]);
6900 SDValue NewNonConstOp
= DAG
.getNode(BO
->getOpcode(), DL
, VT
, NewNonConstOps
);
6902 SDValue NewT
= (ConstSelOpNo
== 1) ? NewConstOp
: NewNonConstOp
;
6903 SDValue NewF
= (ConstSelOpNo
== 1) ? NewNonConstOp
: NewConstOp
;
6904 return DAG
.getSelect(DL
, VT
, Sel
.getOperand(0), NewT
, NewF
);
6907 SDValue
RISCVTargetLowering::lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
6908 SDValue CondV
= Op
.getOperand(0);
6909 SDValue TrueV
= Op
.getOperand(1);
6910 SDValue FalseV
= Op
.getOperand(2);
6912 MVT VT
= Op
.getSimpleValueType();
6913 MVT XLenVT
= Subtarget
.getXLenVT();
6915 // Lower vector SELECTs to VSELECTs by splatting the condition.
6916 if (VT
.isVector()) {
6917 MVT SplatCondVT
= VT
.changeVectorElementType(MVT::i1
);
6918 SDValue CondSplat
= DAG
.getSplat(SplatCondVT
, DL
, CondV
);
6919 return DAG
.getNode(ISD::VSELECT
, DL
, VT
, CondSplat
, TrueV
, FalseV
);
6922 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
6923 // nodes to implement the SELECT. Performing the lowering here allows for
6924 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
6925 // sequence or RISCVISD::SELECT_CC node (branch-based select).
6926 if ((Subtarget
.hasStdExtZicond() || Subtarget
.hasVendorXVentanaCondOps()) &&
6927 VT
.isScalarInteger()) {
6928 // (select c, t, 0) -> (czero_eqz t, c)
6929 if (isNullConstant(FalseV
))
6930 return DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
);
6931 // (select c, 0, f) -> (czero_nez f, c)
6932 if (isNullConstant(TrueV
))
6933 return DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
);
6935 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
6936 if (TrueV
.getOpcode() == ISD::AND
&&
6937 (TrueV
.getOperand(0) == FalseV
|| TrueV
.getOperand(1) == FalseV
))
6939 ISD::OR
, DL
, VT
, TrueV
,
6940 DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
));
6941 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
6942 if (FalseV
.getOpcode() == ISD::AND
&&
6943 (FalseV
.getOperand(0) == TrueV
|| FalseV
.getOperand(1) == TrueV
))
6945 ISD::OR
, DL
, VT
, FalseV
,
6946 DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
));
6948 // Try some other optimizations before falling back to generic lowering.
6949 if (SDValue V
= combineSelectToBinOp(Op
.getNode(), DAG
, Subtarget
))
6952 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
6953 // Unless we have the short forward branch optimization.
6954 if (!Subtarget
.hasShortForwardBranchOpt())
6957 DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
),
6958 DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
));
6961 if (SDValue V
= combineSelectToBinOp(Op
.getNode(), DAG
, Subtarget
))
6964 if (Op
.hasOneUse()) {
6965 unsigned UseOpc
= Op
->use_begin()->getOpcode();
6966 if (isBinOp(UseOpc
) && DAG
.isSafeToSpeculativelyExecute(UseOpc
)) {
6967 SDNode
*BinOp
= *Op
->use_begin();
6968 if (SDValue NewSel
= foldBinOpIntoSelectIfProfitable(*Op
->use_begin(),
6970 DAG
.ReplaceAllUsesWith(BinOp
, &NewSel
);
6971 return lowerSELECT(NewSel
, DAG
);
6976 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
6977 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
6978 const ConstantFPSDNode
*FPTV
= dyn_cast
<ConstantFPSDNode
>(TrueV
);
6979 const ConstantFPSDNode
*FPFV
= dyn_cast
<ConstantFPSDNode
>(FalseV
);
6981 if (FPTV
->isExactlyValue(1.0) && FPFV
->isExactlyValue(0.0))
6982 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, CondV
);
6983 if (FPTV
->isExactlyValue(0.0) && FPFV
->isExactlyValue(1.0)) {
6984 SDValue XOR
= DAG
.getNode(ISD::XOR
, DL
, XLenVT
, CondV
,
6985 DAG
.getConstant(1, DL
, XLenVT
));
6986 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, XOR
);
6990 // If the condition is not an integer SETCC which operates on XLenVT, we need
6991 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
6992 // (select condv, truev, falsev)
6993 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
6994 if (CondV
.getOpcode() != ISD::SETCC
||
6995 CondV
.getOperand(0).getSimpleValueType() != XLenVT
) {
6996 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
6997 SDValue SetNE
= DAG
.getCondCode(ISD::SETNE
);
6999 SDValue Ops
[] = {CondV
, Zero
, SetNE
, TrueV
, FalseV
};
7001 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, Ops
);
7004 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7005 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7006 // advantage of the integer compare+branch instructions. i.e.:
7007 // (select (setcc lhs, rhs, cc), truev, falsev)
7008 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7009 SDValue LHS
= CondV
.getOperand(0);
7010 SDValue RHS
= CondV
.getOperand(1);
7011 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
7013 // Special case for a select of 2 constants that have a diffence of 1.
7014 // Normally this is done by DAGCombine, but if the select is introduced by
7015 // type legalization or op legalization, we miss it. Restricting to SETLT
7016 // case for now because that is what signed saturating add/sub need.
7017 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7018 // but we would probably want to swap the true/false values if the condition
7019 // is SETGE/SETLE to avoid an XORI.
7020 if (isa
<ConstantSDNode
>(TrueV
) && isa
<ConstantSDNode
>(FalseV
) &&
7021 CCVal
== ISD::SETLT
) {
7022 const APInt
&TrueVal
= cast
<ConstantSDNode
>(TrueV
)->getAPIntValue();
7023 const APInt
&FalseVal
= cast
<ConstantSDNode
>(FalseV
)->getAPIntValue();
7024 if (TrueVal
- 1 == FalseVal
)
7025 return DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
, FalseV
);
7026 if (TrueVal
+ 1 == FalseVal
)
7027 return DAG
.getNode(ISD::SUB
, DL
, VT
, FalseV
, CondV
);
7030 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
7031 // 1 < x ? x : 1 -> 0 < x ? x : 1
7032 if (isOneConstant(LHS
) && (CCVal
== ISD::SETLT
|| CCVal
== ISD::SETULT
) &&
7033 RHS
== TrueV
&& LHS
== FalseV
) {
7034 LHS
= DAG
.getConstant(0, DL
, VT
);
7035 // 0 <u x is the same as x != 0.
7036 if (CCVal
== ISD::SETULT
) {
7037 std::swap(LHS
, RHS
);
7042 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7043 if (isAllOnesConstant(RHS
) && CCVal
== ISD::SETLT
&& LHS
== TrueV
&&
7045 RHS
= DAG
.getConstant(0, DL
, VT
);
7048 SDValue TargetCC
= DAG
.getCondCode(CCVal
);
7050 if (isa
<ConstantSDNode
>(TrueV
) && !isa
<ConstantSDNode
>(FalseV
)) {
7051 // (select (setcc lhs, rhs, CC), constant, falsev)
7052 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7053 std::swap(TrueV
, FalseV
);
7054 TargetCC
= DAG
.getCondCode(ISD::getSetCCInverse(CCVal
, LHS
.getValueType()));
7057 SDValue Ops
[] = {LHS
, RHS
, TargetCC
, TrueV
, FalseV
};
7058 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, Ops
);
7061 SDValue
RISCVTargetLowering::lowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const {
7062 SDValue CondV
= Op
.getOperand(1);
7064 MVT XLenVT
= Subtarget
.getXLenVT();
7066 if (CondV
.getOpcode() == ISD::SETCC
&&
7067 CondV
.getOperand(0).getValueType() == XLenVT
) {
7068 SDValue LHS
= CondV
.getOperand(0);
7069 SDValue RHS
= CondV
.getOperand(1);
7070 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
7072 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
7074 SDValue TargetCC
= DAG
.getCondCode(CCVal
);
7075 return DAG
.getNode(RISCVISD::BR_CC
, DL
, Op
.getValueType(), Op
.getOperand(0),
7076 LHS
, RHS
, TargetCC
, Op
.getOperand(2));
7079 return DAG
.getNode(RISCVISD::BR_CC
, DL
, Op
.getValueType(), Op
.getOperand(0),
7080 CondV
, DAG
.getConstant(0, DL
, XLenVT
),
7081 DAG
.getCondCode(ISD::SETNE
), Op
.getOperand(2));
7084 SDValue
RISCVTargetLowering::lowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
7085 MachineFunction
&MF
= DAG
.getMachineFunction();
7086 RISCVMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
7089 SDValue FI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
7090 getPointerTy(MF
.getDataLayout()));
7092 // vastart just stores the address of the VarArgsFrameIndex slot into the
7093 // memory location argument.
7094 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
7095 return DAG
.getStore(Op
.getOperand(0), DL
, FI
, Op
.getOperand(1),
7096 MachinePointerInfo(SV
));
7099 SDValue
RISCVTargetLowering::lowerFRAMEADDR(SDValue Op
,
7100 SelectionDAG
&DAG
) const {
7101 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
7102 MachineFunction
&MF
= DAG
.getMachineFunction();
7103 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7104 MFI
.setFrameAddressIsTaken(true);
7105 Register FrameReg
= RI
.getFrameRegister(MF
);
7106 int XLenInBytes
= Subtarget
.getXLen() / 8;
7108 EVT VT
= Op
.getValueType();
7110 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, FrameReg
, VT
);
7111 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
7113 int Offset
= -(XLenInBytes
* 2);
7114 SDValue Ptr
= DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
,
7115 DAG
.getIntPtrConstant(Offset
, DL
));
7117 DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
7122 SDValue
RISCVTargetLowering::lowerRETURNADDR(SDValue Op
,
7123 SelectionDAG
&DAG
) const {
7124 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
7125 MachineFunction
&MF
= DAG
.getMachineFunction();
7126 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7127 MFI
.setReturnAddressIsTaken(true);
7128 MVT XLenVT
= Subtarget
.getXLenVT();
7129 int XLenInBytes
= Subtarget
.getXLen() / 8;
7131 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
7134 EVT VT
= Op
.getValueType();
7136 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
7138 int Off
= -XLenInBytes
;
7139 SDValue FrameAddr
= lowerFRAMEADDR(Op
, DAG
);
7140 SDValue Offset
= DAG
.getConstant(Off
, DL
, VT
);
7141 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
7142 DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
, Offset
),
7143 MachinePointerInfo());
7146 // Return the value of the return address register, marking it an implicit
7148 Register Reg
= MF
.addLiveIn(RI
.getRARegister(), getRegClassFor(XLenVT
));
7149 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, Reg
, XLenVT
);
7152 SDValue
RISCVTargetLowering::lowerShiftLeftParts(SDValue Op
,
7153 SelectionDAG
&DAG
) const {
7155 SDValue Lo
= Op
.getOperand(0);
7156 SDValue Hi
= Op
.getOperand(1);
7157 SDValue Shamt
= Op
.getOperand(2);
7158 EVT VT
= Lo
.getValueType();
7160 // if Shamt-XLEN < 0: // Shamt < XLEN
7162 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
7165 // Hi = Lo << (Shamt-XLEN)
7167 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
7168 SDValue One
= DAG
.getConstant(1, DL
, VT
);
7169 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
7170 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
7171 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
7172 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
7174 SDValue LoTrue
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, Shamt
);
7175 SDValue ShiftRight1Lo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, One
);
7176 SDValue ShiftRightLo
=
7177 DAG
.getNode(ISD::SRL
, DL
, VT
, ShiftRight1Lo
, XLenMinus1Shamt
);
7178 SDValue ShiftLeftHi
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, Shamt
);
7179 SDValue HiTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftLeftHi
, ShiftRightLo
);
7180 SDValue HiFalse
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, ShamtMinusXLen
);
7182 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
7184 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, Zero
);
7185 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
7187 SDValue Parts
[2] = {Lo
, Hi
};
7188 return DAG
.getMergeValues(Parts
, DL
);
7191 SDValue
RISCVTargetLowering::lowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
,
7194 SDValue Lo
= Op
.getOperand(0);
7195 SDValue Hi
= Op
.getOperand(1);
7196 SDValue Shamt
= Op
.getOperand(2);
7197 EVT VT
= Lo
.getValueType();
7200 // if Shamt-XLEN < 0: // Shamt < XLEN
7201 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
7202 // Hi = Hi >>s Shamt
7204 // Lo = Hi >>s (Shamt-XLEN);
7205 // Hi = Hi >>s (XLEN-1)
7208 // if Shamt-XLEN < 0: // Shamt < XLEN
7209 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
7210 // Hi = Hi >>u Shamt
7212 // Lo = Hi >>u (Shamt-XLEN);
7215 unsigned ShiftRightOp
= IsSRA
? ISD::SRA
: ISD::SRL
;
7217 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
7218 SDValue One
= DAG
.getConstant(1, DL
, VT
);
7219 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
7220 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
7221 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
7222 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
7224 SDValue ShiftRightLo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, Shamt
);
7225 SDValue ShiftLeftHi1
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, One
);
7226 SDValue ShiftLeftHi
=
7227 DAG
.getNode(ISD::SHL
, DL
, VT
, ShiftLeftHi1
, XLenMinus1Shamt
);
7228 SDValue LoTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftRightLo
, ShiftLeftHi
);
7229 SDValue HiTrue
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, Shamt
);
7230 SDValue LoFalse
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, ShamtMinusXLen
);
7232 IsSRA
? DAG
.getNode(ISD::SRA
, DL
, VT
, Hi
, XLenMinus1
) : Zero
;
7234 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
7236 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, LoFalse
);
7237 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
7239 SDValue Parts
[2] = {Lo
, Hi
};
7240 return DAG
.getMergeValues(Parts
, DL
);
7243 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
7244 // legal equivalently-sized i8 type, so we can use that as a go-between.
7245 SDValue
RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op
,
7246 SelectionDAG
&DAG
) const {
7248 MVT VT
= Op
.getSimpleValueType();
7249 SDValue SplatVal
= Op
.getOperand(0);
7250 // All-zeros or all-ones splats are handled specially.
7251 if (ISD::isConstantSplatVectorAllOnes(Op
.getNode())) {
7252 SDValue VL
= getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
).second
;
7253 return DAG
.getNode(RISCVISD::VMSET_VL
, DL
, VT
, VL
);
7255 if (ISD::isConstantSplatVectorAllZeros(Op
.getNode())) {
7256 SDValue VL
= getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
).second
;
7257 return DAG
.getNode(RISCVISD::VMCLR_VL
, DL
, VT
, VL
);
7259 MVT InterVT
= VT
.changeVectorElementType(MVT::i8
);
7260 SplatVal
= DAG
.getNode(ISD::AND
, DL
, SplatVal
.getValueType(), SplatVal
,
7261 DAG
.getConstant(1, DL
, SplatVal
.getValueType()));
7262 SDValue LHS
= DAG
.getSplatVector(InterVT
, DL
, SplatVal
);
7263 SDValue Zero
= DAG
.getConstant(0, DL
, InterVT
);
7264 return DAG
.getSetCC(DL
, VT
, LHS
, Zero
, ISD::SETNE
);
7267 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7268 // illegal (currently only vXi64 RV32).
7269 // FIXME: We could also catch non-constant sign-extended i32 values and lower
7270 // them to VMV_V_X_VL.
7271 SDValue
RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op
,
7272 SelectionDAG
&DAG
) const {
7274 MVT VecVT
= Op
.getSimpleValueType();
7275 assert(!Subtarget
.is64Bit() && VecVT
.getVectorElementType() == MVT::i64
&&
7276 "Unexpected SPLAT_VECTOR_PARTS lowering");
7278 assert(Op
.getNumOperands() == 2 && "Unexpected number of operands!");
7279 SDValue Lo
= Op
.getOperand(0);
7280 SDValue Hi
= Op
.getOperand(1);
7282 MVT ContainerVT
= VecVT
;
7283 if (VecVT
.isFixedLengthVector())
7284 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7286 auto VL
= getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
7289 splatPartsI64WithVL(DL
, ContainerVT
, SDValue(), Lo
, Hi
, VL
, DAG
);
7291 if (VecVT
.isFixedLengthVector())
7292 Res
= convertFromScalableVector(VecVT
, Res
, DAG
, Subtarget
);
7297 // Custom-lower extensions from mask vectors by using a vselect either with 1
7298 // for zero/any-extension or -1 for sign-extension:
7299 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7300 // Note that any-extension is lowered identically to zero-extension.
7301 SDValue
RISCVTargetLowering::lowerVectorMaskExt(SDValue Op
, SelectionDAG
&DAG
,
7302 int64_t ExtTrueVal
) const {
7304 MVT VecVT
= Op
.getSimpleValueType();
7305 SDValue Src
= Op
.getOperand(0);
7306 // Only custom-lower extensions from mask types
7307 assert(Src
.getValueType().isVector() &&
7308 Src
.getValueType().getVectorElementType() == MVT::i1
);
7310 if (VecVT
.isScalableVector()) {
7311 SDValue SplatZero
= DAG
.getConstant(0, DL
, VecVT
);
7312 SDValue SplatTrueVal
= DAG
.getConstant(ExtTrueVal
, DL
, VecVT
);
7313 return DAG
.getNode(ISD::VSELECT
, DL
, VecVT
, Src
, SplatTrueVal
, SplatZero
);
7316 MVT ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7318 MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
7320 SDValue CC
= convertToScalableVector(I1ContainerVT
, Src
, DAG
, Subtarget
);
7322 SDValue VL
= getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
7324 MVT XLenVT
= Subtarget
.getXLenVT();
7325 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
7326 SDValue SplatTrueVal
= DAG
.getConstant(ExtTrueVal
, DL
, XLenVT
);
7328 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7329 DAG
.getUNDEF(ContainerVT
), SplatZero
, VL
);
7330 SplatTrueVal
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7331 DAG
.getUNDEF(ContainerVT
), SplatTrueVal
, VL
);
7332 SDValue Select
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, CC
,
7333 SplatTrueVal
, SplatZero
, VL
);
7335 return convertFromScalableVector(VecVT
, Select
, DAG
, Subtarget
);
7338 SDValue
RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7339 SDValue Op
, SelectionDAG
&DAG
, unsigned ExtendOpc
) const {
7340 MVT ExtVT
= Op
.getSimpleValueType();
7341 // Only custom-lower extensions from fixed-length vector types.
7342 if (!ExtVT
.isFixedLengthVector())
7344 MVT VT
= Op
.getOperand(0).getSimpleValueType();
7345 // Grab the canonical container type for the extended type. Infer the smaller
7346 // type from that to ensure the same number of vector elements, as we know
7347 // the LMUL will be sufficient to hold the smaller type.
7348 MVT ContainerExtVT
= getContainerForFixedLengthVector(ExtVT
);
7349 // Get the extended container type manually to ensure the same number of
7350 // vector elements between source and dest.
7351 MVT ContainerVT
= MVT::getVectorVT(VT
.getVectorElementType(),
7352 ContainerExtVT
.getVectorElementCount());
7355 convertToScalableVector(ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
7358 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
7360 SDValue Ext
= DAG
.getNode(ExtendOpc
, DL
, ContainerExtVT
, Op1
, Mask
, VL
);
7362 return convertFromScalableVector(ExtVT
, Ext
, DAG
, Subtarget
);
7365 // Custom-lower truncations from vectors to mask vectors by using a mask and a
7367 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7368 SDValue
RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op
,
7369 SelectionDAG
&DAG
) const {
7370 bool IsVPTrunc
= Op
.getOpcode() == ISD::VP_TRUNCATE
;
7372 EVT MaskVT
= Op
.getValueType();
7373 // Only expect to custom-lower truncations to mask types
7374 assert(MaskVT
.isVector() && MaskVT
.getVectorElementType() == MVT::i1
&&
7375 "Unexpected type for vector mask lowering");
7376 SDValue Src
= Op
.getOperand(0);
7377 MVT VecVT
= Src
.getSimpleValueType();
7380 Mask
= Op
.getOperand(1);
7381 VL
= Op
.getOperand(2);
7383 // If this is a fixed vector, we need to convert it to a scalable vector.
7384 MVT ContainerVT
= VecVT
;
7386 if (VecVT
.isFixedLengthVector()) {
7387 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7388 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
7390 MVT MaskContainerVT
=
7391 getContainerForFixedLengthVector(Mask
.getSimpleValueType());
7392 Mask
= convertToScalableVector(MaskContainerVT
, Mask
, DAG
, Subtarget
);
7397 std::tie(Mask
, VL
) =
7398 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7401 SDValue SplatOne
= DAG
.getConstant(1, DL
, Subtarget
.getXLenVT());
7402 SDValue SplatZero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
7404 SplatOne
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7405 DAG
.getUNDEF(ContainerVT
), SplatOne
, VL
);
7406 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7407 DAG
.getUNDEF(ContainerVT
), SplatZero
, VL
);
7409 MVT MaskContainerVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
7410 SDValue Trunc
= DAG
.getNode(RISCVISD::AND_VL
, DL
, ContainerVT
, Src
, SplatOne
,
7411 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
7412 Trunc
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskContainerVT
,
7413 {Trunc
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
7414 DAG
.getUNDEF(MaskContainerVT
), Mask
, VL
});
7415 if (MaskVT
.isFixedLengthVector())
7416 Trunc
= convertFromScalableVector(MaskVT
, Trunc
, DAG
, Subtarget
);
7420 SDValue
RISCVTargetLowering::lowerVectorTruncLike(SDValue Op
,
7421 SelectionDAG
&DAG
) const {
7422 bool IsVPTrunc
= Op
.getOpcode() == ISD::VP_TRUNCATE
;
7425 MVT VT
= Op
.getSimpleValueType();
7426 // Only custom-lower vector truncates
7427 assert(VT
.isVector() && "Unexpected type for vector truncate lowering");
7429 // Truncates to mask types are handled differently
7430 if (VT
.getVectorElementType() == MVT::i1
)
7431 return lowerVectorMaskTruncLike(Op
, DAG
);
7433 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7434 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7435 // truncate by one power of two at a time.
7436 MVT DstEltVT
= VT
.getVectorElementType();
7438 SDValue Src
= Op
.getOperand(0);
7439 MVT SrcVT
= Src
.getSimpleValueType();
7440 MVT SrcEltVT
= SrcVT
.getVectorElementType();
7442 assert(DstEltVT
.bitsLT(SrcEltVT
) && isPowerOf2_64(DstEltVT
.getSizeInBits()) &&
7443 isPowerOf2_64(SrcEltVT
.getSizeInBits()) &&
7444 "Unexpected vector truncate lowering");
7446 MVT ContainerVT
= SrcVT
;
7449 Mask
= Op
.getOperand(1);
7450 VL
= Op
.getOperand(2);
7452 if (SrcVT
.isFixedLengthVector()) {
7453 ContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7454 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
7456 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
7457 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
7461 SDValue Result
= Src
;
7463 std::tie(Mask
, VL
) =
7464 getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7467 LLVMContext
&Context
= *DAG
.getContext();
7468 const ElementCount Count
= ContainerVT
.getVectorElementCount();
7470 SrcEltVT
= MVT::getIntegerVT(SrcEltVT
.getSizeInBits() / 2);
7471 EVT ResultVT
= EVT::getVectorVT(Context
, SrcEltVT
, Count
);
7472 Result
= DAG
.getNode(RISCVISD::TRUNCATE_VECTOR_VL
, DL
, ResultVT
, Result
,
7474 } while (SrcEltVT
!= DstEltVT
);
7476 if (SrcVT
.isFixedLengthVector())
7477 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
7483 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op
,
7484 SelectionDAG
&DAG
) const {
7486 SDValue Chain
= Op
.getOperand(0);
7487 SDValue Src
= Op
.getOperand(1);
7488 MVT VT
= Op
.getSimpleValueType();
7489 MVT SrcVT
= Src
.getSimpleValueType();
7490 MVT ContainerVT
= VT
;
7491 if (VT
.isFixedLengthVector()) {
7492 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7494 SrcContainerVT
.changeVectorElementType(VT
.getVectorElementType());
7495 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
7498 auto [Mask
, VL
] = getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7500 // RVV can only widen/truncate fp to types double/half the size as the source.
7501 if ((VT
.getVectorElementType() == MVT::f64
&&
7502 SrcVT
.getVectorElementType() == MVT::f16
) ||
7503 (VT
.getVectorElementType() == MVT::f16
&&
7504 SrcVT
.getVectorElementType() == MVT::f64
)) {
7505 // For double rounding, the intermediate rounding should be round-to-odd.
7506 unsigned InterConvOpc
= Op
.getOpcode() == ISD::STRICT_FP_EXTEND
7507 ? RISCVISD::STRICT_FP_EXTEND_VL
7508 : RISCVISD::STRICT_VFNCVT_ROD_VL
;
7509 MVT InterVT
= ContainerVT
.changeVectorElementType(MVT::f32
);
7510 Src
= DAG
.getNode(InterConvOpc
, DL
, DAG
.getVTList(InterVT
, MVT::Other
),
7511 Chain
, Src
, Mask
, VL
);
7512 Chain
= Src
.getValue(1);
7515 unsigned ConvOpc
= Op
.getOpcode() == ISD::STRICT_FP_EXTEND
7516 ? RISCVISD::STRICT_FP_EXTEND_VL
7517 : RISCVISD::STRICT_FP_ROUND_VL
;
7518 SDValue Res
= DAG
.getNode(ConvOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
),
7519 Chain
, Src
, Mask
, VL
);
7520 if (VT
.isFixedLengthVector()) {
7521 // StrictFP operations have two result values. Their lowered result should
7522 // have same result count.
7523 SDValue SubVec
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
7524 Res
= DAG
.getMergeValues({SubVec
, Res
.getValue(1)}, DL
);
7530 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op
,
7531 SelectionDAG
&DAG
) const {
7533 Op
.getOpcode() == ISD::VP_FP_ROUND
|| Op
.getOpcode() == ISD::VP_FP_EXTEND
;
7535 Op
.getOpcode() == ISD::VP_FP_EXTEND
|| Op
.getOpcode() == ISD::FP_EXTEND
;
7536 // RVV can only do truncate fp to types half the size as the source. We
7537 // custom-lower f64->f16 rounds via RVV's round-to-odd float
7538 // conversion instruction.
7540 MVT VT
= Op
.getSimpleValueType();
7542 assert(VT
.isVector() && "Unexpected type for vector truncate lowering");
7544 SDValue Src
= Op
.getOperand(0);
7545 MVT SrcVT
= Src
.getSimpleValueType();
7547 bool IsDirectExtend
= IsExtend
&& (VT
.getVectorElementType() != MVT::f64
||
7548 SrcVT
.getVectorElementType() != MVT::f16
);
7549 bool IsDirectTrunc
= !IsExtend
&& (VT
.getVectorElementType() != MVT::f16
||
7550 SrcVT
.getVectorElementType() != MVT::f64
);
7552 bool IsDirectConv
= IsDirectExtend
|| IsDirectTrunc
;
7554 // Prepare any fixed-length vector operands.
7555 MVT ContainerVT
= VT
;
7558 Mask
= Op
.getOperand(1);
7559 VL
= Op
.getOperand(2);
7561 if (VT
.isFixedLengthVector()) {
7562 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7564 SrcContainerVT
.changeVectorElementType(VT
.getVectorElementType());
7565 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
7567 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
7568 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
7573 std::tie(Mask
, VL
) =
7574 getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7576 unsigned ConvOpc
= IsExtend
? RISCVISD::FP_EXTEND_VL
: RISCVISD::FP_ROUND_VL
;
7579 Src
= DAG
.getNode(ConvOpc
, DL
, ContainerVT
, Src
, Mask
, VL
);
7580 if (VT
.isFixedLengthVector())
7581 Src
= convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
7585 unsigned InterConvOpc
=
7586 IsExtend
? RISCVISD::FP_EXTEND_VL
: RISCVISD::VFNCVT_ROD_VL
;
7588 MVT InterVT
= ContainerVT
.changeVectorElementType(MVT::f32
);
7589 SDValue IntermediateConv
=
7590 DAG
.getNode(InterConvOpc
, DL
, InterVT
, Src
, Mask
, VL
);
7592 DAG
.getNode(ConvOpc
, DL
, ContainerVT
, IntermediateConv
, Mask
, VL
);
7593 if (VT
.isFixedLengthVector())
7594 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
7598 // Given a scalable vector type and an index into it, returns the type for the
7599 // smallest subvector that the index fits in. This can be used to reduce LMUL
7600 // for operations like vslidedown.
7602 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
7603 static std::optional
<MVT
>
7604 getSmallestVTForIndex(MVT VecVT
, unsigned MaxIdx
, SDLoc DL
, SelectionDAG
&DAG
,
7605 const RISCVSubtarget
&Subtarget
) {
7606 assert(VecVT
.isScalableVector());
7607 const unsigned EltSize
= VecVT
.getScalarSizeInBits();
7608 const unsigned VectorBitsMin
= Subtarget
.getRealMinVLen();
7609 const unsigned MinVLMAX
= VectorBitsMin
/ EltSize
;
7611 if (MaxIdx
< MinVLMAX
)
7612 SmallerVT
= getLMUL1VT(VecVT
);
7613 else if (MaxIdx
< MinVLMAX
* 2)
7614 SmallerVT
= getLMUL1VT(VecVT
).getDoubleNumVectorElementsVT();
7615 else if (MaxIdx
< MinVLMAX
* 4)
7616 SmallerVT
= getLMUL1VT(VecVT
)
7617 .getDoubleNumVectorElementsVT()
7618 .getDoubleNumVectorElementsVT();
7619 if (!SmallerVT
.isValid() || !VecVT
.bitsGT(SmallerVT
))
7620 return std::nullopt
;
7624 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
7625 // first position of a vector, and that vector is slid up to the insert index.
7626 // By limiting the active vector length to index+1 and merging with the
7627 // original vector (with an undisturbed tail policy for elements >= VL), we
7628 // achieve the desired result of leaving all elements untouched except the one
7629 // at VL-1, which is replaced with the desired value.
7630 SDValue
RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
7631 SelectionDAG
&DAG
) const {
7633 MVT VecVT
= Op
.getSimpleValueType();
7634 SDValue Vec
= Op
.getOperand(0);
7635 SDValue Val
= Op
.getOperand(1);
7636 SDValue Idx
= Op
.getOperand(2);
7638 if (VecVT
.getVectorElementType() == MVT::i1
) {
7639 // FIXME: For now we just promote to an i8 vector and insert into that,
7640 // but this is probably not optimal.
7641 MVT WideVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
7642 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Vec
);
7643 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, WideVT
, Vec
, Val
, Idx
);
7644 return DAG
.getNode(ISD::TRUNCATE
, DL
, VecVT
, Vec
);
7647 MVT ContainerVT
= VecVT
;
7648 // If the operand is a fixed-length vector, convert to a scalable one.
7649 if (VecVT
.isFixedLengthVector()) {
7650 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7651 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
7654 MVT OrigContainerVT
= ContainerVT
;
7655 SDValue OrigVec
= Vec
;
7656 // If we know the index we're going to insert at, we can shrink Vec so that
7657 // we're performing the scalar inserts and slideup on a smaller LMUL.
7658 if (auto *CIdx
= dyn_cast
<ConstantSDNode
>(Idx
)) {
7659 if (auto ShrunkVT
= getSmallestVTForIndex(ContainerVT
, CIdx
->getZExtValue(),
7660 DL
, DAG
, Subtarget
)) {
7661 ContainerVT
= *ShrunkVT
;
7662 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
7663 DAG
.getVectorIdxConstant(0, DL
));
7667 MVT XLenVT
= Subtarget
.getXLenVT();
7669 bool IsLegalInsert
= Subtarget
.is64Bit() || Val
.getValueType() != MVT::i64
;
7670 // Even i64-element vectors on RV32 can be lowered without scalar
7671 // legalization if the most-significant 32 bits of the value are not affected
7672 // by the sign-extension of the lower 32 bits.
7673 // TODO: We could also catch sign extensions of a 32-bit value.
7674 if (!IsLegalInsert
&& isa
<ConstantSDNode
>(Val
)) {
7675 const auto *CVal
= cast
<ConstantSDNode
>(Val
);
7676 if (isInt
<32>(CVal
->getSExtValue())) {
7677 IsLegalInsert
= true;
7678 Val
= DAG
.getConstant(CVal
->getSExtValue(), DL
, MVT::i32
);
7682 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7686 if (IsLegalInsert
) {
7688 VecVT
.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL
: RISCVISD::VMV_S_X_VL
;
7689 if (isNullConstant(Idx
)) {
7690 if (!VecVT
.isFloatingPoint())
7691 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Val
);
7692 Vec
= DAG
.getNode(Opc
, DL
, ContainerVT
, Vec
, Val
, VL
);
7694 if (ContainerVT
!= OrigContainerVT
)
7695 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
7696 Vec
, DAG
.getVectorIdxConstant(0, DL
));
7697 if (!VecVT
.isFixedLengthVector())
7699 return convertFromScalableVector(VecVT
, Vec
, DAG
, Subtarget
);
7701 ValInVec
= lowerScalarInsert(Val
, VL
, ContainerVT
, DL
, DAG
, Subtarget
);
7703 // On RV32, i64-element vectors must be specially handled to place the
7704 // value at element 0, by using two vslide1down instructions in sequence on
7705 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
7707 SDValue ValLo
, ValHi
;
7708 std::tie(ValLo
, ValHi
) = DAG
.SplitScalar(Val
, DL
, MVT::i32
, MVT::i32
);
7709 MVT I32ContainerVT
=
7710 MVT::getVectorVT(MVT::i32
, ContainerVT
.getVectorElementCount() * 2);
7712 getDefaultScalableVLOps(I32ContainerVT
, DL
, DAG
, Subtarget
).first
;
7713 // Limit the active VL to two.
7714 SDValue InsertI64VL
= DAG
.getConstant(2, DL
, XLenVT
);
7715 // If the Idx is 0 we can insert directly into the vector.
7716 if (isNullConstant(Idx
)) {
7717 // First slide in the lo value, then the hi in above it. We use slide1down
7718 // to avoid the register group overlap constraint of vslide1up.
7719 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
7720 Vec
, Vec
, ValLo
, I32Mask
, InsertI64VL
);
7721 // If the source vector is undef don't pass along the tail elements from
7722 // the previous slide1down.
7723 SDValue Tail
= Vec
.isUndef() ? Vec
: ValInVec
;
7724 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
7725 Tail
, ValInVec
, ValHi
, I32Mask
, InsertI64VL
);
7726 // Bitcast back to the right container type.
7727 ValInVec
= DAG
.getBitcast(ContainerVT
, ValInVec
);
7729 if (ContainerVT
!= OrigContainerVT
)
7731 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
7732 ValInVec
, DAG
.getVectorIdxConstant(0, DL
));
7733 if (!VecVT
.isFixedLengthVector())
7735 return convertFromScalableVector(VecVT
, ValInVec
, DAG
, Subtarget
);
7738 // First slide in the lo value, then the hi in above it. We use slide1down
7739 // to avoid the register group overlap constraint of vslide1up.
7740 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
7741 DAG
.getUNDEF(I32ContainerVT
),
7742 DAG
.getUNDEF(I32ContainerVT
), ValLo
,
7743 I32Mask
, InsertI64VL
);
7744 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
7745 DAG
.getUNDEF(I32ContainerVT
), ValInVec
, ValHi
,
7746 I32Mask
, InsertI64VL
);
7747 // Bitcast back to the right container type.
7748 ValInVec
= DAG
.getBitcast(ContainerVT
, ValInVec
);
7751 // Now that the value is in a vector, slide it into position.
7753 DAG
.getNode(ISD::ADD
, DL
, XLenVT
, Idx
, DAG
.getConstant(1, DL
, XLenVT
));
7755 // Use tail agnostic policy if Idx is the last index of Vec.
7756 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
;
7757 if (VecVT
.isFixedLengthVector() && isa
<ConstantSDNode
>(Idx
) &&
7758 cast
<ConstantSDNode
>(Idx
)->getZExtValue() + 1 ==
7759 VecVT
.getVectorNumElements())
7760 Policy
= RISCVII::TAIL_AGNOSTIC
;
7761 SDValue Slideup
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Vec
, ValInVec
,
7762 Idx
, Mask
, InsertVL
, Policy
);
7764 if (ContainerVT
!= OrigContainerVT
)
7765 Slideup
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
7766 Slideup
, DAG
.getVectorIdxConstant(0, DL
));
7767 if (!VecVT
.isFixedLengthVector())
7769 return convertFromScalableVector(VecVT
, Slideup
, DAG
, Subtarget
);
7772 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
7773 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
7774 // types this is done using VMV_X_S to allow us to glean information about the
7775 // sign bits of the result.
7776 SDValue
RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
7777 SelectionDAG
&DAG
) const {
7779 SDValue Idx
= Op
.getOperand(1);
7780 SDValue Vec
= Op
.getOperand(0);
7781 EVT EltVT
= Op
.getValueType();
7782 MVT VecVT
= Vec
.getSimpleValueType();
7783 MVT XLenVT
= Subtarget
.getXLenVT();
7785 if (VecVT
.getVectorElementType() == MVT::i1
) {
7786 // Use vfirst.m to extract the first bit.
7787 if (isNullConstant(Idx
)) {
7788 MVT ContainerVT
= VecVT
;
7789 if (VecVT
.isFixedLengthVector()) {
7790 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7791 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
7793 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7795 DAG
.getNode(RISCVISD::VFIRST_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
7796 SDValue Res
= DAG
.getSetCC(DL
, XLenVT
, Vfirst
,
7797 DAG
.getConstant(0, DL
, XLenVT
), ISD::SETEQ
);
7798 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Res
);
7800 if (VecVT
.isFixedLengthVector()) {
7801 unsigned NumElts
= VecVT
.getVectorNumElements();
7804 unsigned WidenVecLen
;
7805 SDValue ExtractElementIdx
;
7806 SDValue ExtractBitIdx
;
7807 unsigned MaxEEW
= Subtarget
.getELen();
7808 MVT LargestEltVT
= MVT::getIntegerVT(
7809 std::min(MaxEEW
, unsigned(XLenVT
.getSizeInBits())));
7810 if (NumElts
<= LargestEltVT
.getSizeInBits()) {
7811 assert(isPowerOf2_32(NumElts
) &&
7812 "the number of elements should be power of 2");
7813 WideEltVT
= MVT::getIntegerVT(NumElts
);
7815 ExtractElementIdx
= DAG
.getConstant(0, DL
, XLenVT
);
7816 ExtractBitIdx
= Idx
;
7818 WideEltVT
= LargestEltVT
;
7819 WidenVecLen
= NumElts
/ WideEltVT
.getSizeInBits();
7820 // extract element index = index / element width
7821 ExtractElementIdx
= DAG
.getNode(
7822 ISD::SRL
, DL
, XLenVT
, Idx
,
7823 DAG
.getConstant(Log2_64(WideEltVT
.getSizeInBits()), DL
, XLenVT
));
7824 // mask bit index = index % element width
7825 ExtractBitIdx
= DAG
.getNode(
7826 ISD::AND
, DL
, XLenVT
, Idx
,
7827 DAG
.getConstant(WideEltVT
.getSizeInBits() - 1, DL
, XLenVT
));
7829 MVT WideVT
= MVT::getVectorVT(WideEltVT
, WidenVecLen
);
7830 Vec
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, Vec
);
7831 SDValue ExtractElt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, XLenVT
,
7832 Vec
, ExtractElementIdx
);
7833 // Extract the bit from GPR.
7834 SDValue ShiftRight
=
7835 DAG
.getNode(ISD::SRL
, DL
, XLenVT
, ExtractElt
, ExtractBitIdx
);
7836 SDValue Res
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, ShiftRight
,
7837 DAG
.getConstant(1, DL
, XLenVT
));
7838 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Res
);
7841 // Otherwise, promote to an i8 vector and extract from that.
7842 MVT WideVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
7843 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Vec
);
7844 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Vec
, Idx
);
7847 // If this is a fixed vector, we need to convert it to a scalable vector.
7848 MVT ContainerVT
= VecVT
;
7849 if (VecVT
.isFixedLengthVector()) {
7850 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7851 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
7854 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
7855 // contains our index.
7856 std::optional
<uint64_t> MaxIdx
;
7857 if (VecVT
.isFixedLengthVector())
7858 MaxIdx
= VecVT
.getVectorNumElements() - 1;
7859 if (auto *IdxC
= dyn_cast
<ConstantSDNode
>(Idx
))
7860 MaxIdx
= IdxC
->getZExtValue();
7862 if (auto SmallerVT
=
7863 getSmallestVTForIndex(ContainerVT
, *MaxIdx
, DL
, DAG
, Subtarget
)) {
7864 ContainerVT
= *SmallerVT
;
7865 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
7866 DAG
.getConstant(0, DL
, XLenVT
));
7870 // If after narrowing, the required slide is still greater than LMUL2,
7871 // fallback to generic expansion and go through the stack. This is done
7872 // for a subtle reason: extracting *all* elements out of a vector is
7873 // widely expected to be linear in vector size, but because vslidedown
7874 // is linear in LMUL, performing N extracts using vslidedown becomes
7875 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
7876 // seems to have the same problem (the store is linear in LMUL), but the
7877 // generic expansion *memoizes* the store, and thus for many extracts of
7878 // the same vector we end up with one store and a bunch of loads.
7879 // TODO: We don't have the same code for insert_vector_elt because we
7880 // have BUILD_VECTOR and handle the degenerate case there. Should we
7881 // consider adding an inverse BUILD_VECTOR node?
7882 MVT LMUL2VT
= getLMUL1VT(ContainerVT
).getDoubleNumVectorElementsVT();
7883 if (ContainerVT
.bitsGT(LMUL2VT
) && VecVT
.isFixedLengthVector())
7886 // If the index is 0, the vector is already in the right position.
7887 if (!isNullConstant(Idx
)) {
7888 // Use a VL of 1 to avoid processing more elements than we need.
7889 auto [Mask
, VL
] = getDefaultVLOps(1, ContainerVT
, DL
, DAG
, Subtarget
);
7890 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
7891 DAG
.getUNDEF(ContainerVT
), Vec
, Idx
, Mask
, VL
);
7894 if (!EltVT
.isInteger()) {
7895 // Floating-point extracts are handled in TableGen.
7896 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Vec
,
7897 DAG
.getConstant(0, DL
, XLenVT
));
7900 SDValue Elt0
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
7901 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Elt0
);
7904 // Some RVV intrinsics may claim that they want an integer operand to be
7905 // promoted or expanded.
7906 static SDValue
lowerVectorIntrinsicScalars(SDValue Op
, SelectionDAG
&DAG
,
7907 const RISCVSubtarget
&Subtarget
) {
7908 assert((Op
.getOpcode() == ISD::INTRINSIC_VOID
||
7909 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
7910 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
) &&
7911 "Unexpected opcode");
7913 if (!Subtarget
.hasVInstructions())
7916 bool HasChain
= Op
.getOpcode() == ISD::INTRINSIC_VOID
||
7917 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
;
7918 unsigned IntNo
= Op
.getConstantOperandVal(HasChain
? 1 : 0);
7922 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo
*II
=
7923 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo
);
7924 if (!II
|| !II
->hasScalarOperand())
7927 unsigned SplatOp
= II
->ScalarOperand
+ 1 + HasChain
;
7928 assert(SplatOp
< Op
.getNumOperands());
7930 SmallVector
<SDValue
, 8> Operands(Op
->op_begin(), Op
->op_end());
7931 SDValue
&ScalarOp
= Operands
[SplatOp
];
7932 MVT OpVT
= ScalarOp
.getSimpleValueType();
7933 MVT XLenVT
= Subtarget
.getXLenVT();
7935 // If this isn't a scalar, or its type is XLenVT we're done.
7936 if (!OpVT
.isScalarInteger() || OpVT
== XLenVT
)
7939 // Simplest case is that the operand needs to be promoted to XLenVT.
7940 if (OpVT
.bitsLT(XLenVT
)) {
7941 // If the operand is a constant, sign extend to increase our chances
7942 // of being able to use a .vi instruction. ANY_EXTEND would become a
7943 // a zero extend and the simm5 check in isel would fail.
7944 // FIXME: Should we ignore the upper bits in isel instead?
7946 isa
<ConstantSDNode
>(ScalarOp
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
7947 ScalarOp
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, ScalarOp
);
7948 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
7951 // Use the previous operand to get the vXi64 VT. The result might be a mask
7952 // VT for compares. Using the previous operand assumes that the previous
7953 // operand will never have a smaller element size than a scalar operand and
7954 // that a widening operation never uses SEW=64.
7955 // NOTE: If this fails the below assert, we can probably just find the
7956 // element count from any operand or result and use it to construct the VT.
7957 assert(II
->ScalarOperand
> 0 && "Unexpected splat operand!");
7958 MVT VT
= Op
.getOperand(SplatOp
- 1).getSimpleValueType();
7960 // The more complex case is when the scalar is larger than XLenVT.
7961 assert(XLenVT
== MVT::i32
&& OpVT
== MVT::i64
&&
7962 VT
.getVectorElementType() == MVT::i64
&& "Unexpected VTs!");
7964 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
7965 // instruction to sign-extend since SEW>XLEN.
7966 if (DAG
.ComputeNumSignBits(ScalarOp
) > 32) {
7967 ScalarOp
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, ScalarOp
);
7968 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
7972 case Intrinsic::riscv_vslide1up
:
7973 case Intrinsic::riscv_vslide1down
:
7974 case Intrinsic::riscv_vslide1up_mask
:
7975 case Intrinsic::riscv_vslide1down_mask
: {
7976 // We need to special case these when the scalar is larger than XLen.
7977 unsigned NumOps
= Op
.getNumOperands();
7978 bool IsMasked
= NumOps
== 7;
7980 // Convert the vector source to the equivalent nxvXi32 vector.
7981 MVT I32VT
= MVT::getVectorVT(MVT::i32
, VT
.getVectorElementCount() * 2);
7982 SDValue Vec
= DAG
.getBitcast(I32VT
, Operands
[2]);
7983 SDValue ScalarLo
, ScalarHi
;
7984 std::tie(ScalarLo
, ScalarHi
) =
7985 DAG
.SplitScalar(ScalarOp
, DL
, MVT::i32
, MVT::i32
);
7987 // Double the VL since we halved SEW.
7988 SDValue AVL
= getVLOperand(Op
);
7991 // Optimize for constant AVL
7992 if (isa
<ConstantSDNode
>(AVL
)) {
7993 unsigned EltSize
= VT
.getScalarSizeInBits();
7994 unsigned MinSize
= VT
.getSizeInBits().getKnownMinValue();
7996 unsigned VectorBitsMax
= Subtarget
.getRealMaxVLen();
7998 RISCVTargetLowering::computeVLMAX(VectorBitsMax
, EltSize
, MinSize
);
8000 unsigned VectorBitsMin
= Subtarget
.getRealMinVLen();
8002 RISCVTargetLowering::computeVLMAX(VectorBitsMin
, EltSize
, MinSize
);
8004 uint64_t AVLInt
= cast
<ConstantSDNode
>(AVL
)->getZExtValue();
8005 if (AVLInt
<= MinVLMAX
) {
8006 I32VL
= DAG
.getConstant(2 * AVLInt
, DL
, XLenVT
);
8007 } else if (AVLInt
>= 2 * MaxVLMAX
) {
8008 // Just set vl to VLMAX in this situation
8009 RISCVII::VLMUL Lmul
= RISCVTargetLowering::getLMUL(I32VT
);
8010 SDValue LMUL
= DAG
.getConstant(Lmul
, DL
, XLenVT
);
8011 unsigned Sew
= RISCVVType::encodeSEW(I32VT
.getScalarSizeInBits());
8012 SDValue SEW
= DAG
.getConstant(Sew
, DL
, XLenVT
);
8013 SDValue SETVLMAX
= DAG
.getTargetConstant(
8014 Intrinsic::riscv_vsetvlimax
, DL
, MVT::i32
);
8015 I32VL
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, SETVLMAX
, SEW
,
8018 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8019 // is related to the hardware implementation.
8020 // So let the following code handle
8024 RISCVII::VLMUL Lmul
= RISCVTargetLowering::getLMUL(VT
);
8025 SDValue LMUL
= DAG
.getConstant(Lmul
, DL
, XLenVT
);
8026 unsigned Sew
= RISCVVType::encodeSEW(VT
.getScalarSizeInBits());
8027 SDValue SEW
= DAG
.getConstant(Sew
, DL
, XLenVT
);
8029 DAG
.getTargetConstant(Intrinsic::riscv_vsetvli
, DL
, MVT::i32
);
8030 // Using vsetvli instruction to get actually used length which related to
8031 // the hardware implementation
8032 SDValue VL
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, SETVL
, AVL
,
8035 DAG
.getNode(ISD::SHL
, DL
, XLenVT
, VL
, DAG
.getConstant(1, DL
, XLenVT
));
8038 SDValue I32Mask
= getAllOnesMask(I32VT
, I32VL
, DL
, DAG
);
8040 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8044 Passthru
= DAG
.getUNDEF(I32VT
);
8046 Passthru
= DAG
.getBitcast(I32VT
, Operands
[1]);
8048 if (IntNo
== Intrinsic::riscv_vslide1up
||
8049 IntNo
== Intrinsic::riscv_vslide1up_mask
) {
8050 Vec
= DAG
.getNode(RISCVISD::VSLIDE1UP_VL
, DL
, I32VT
, Passthru
, Vec
,
8051 ScalarHi
, I32Mask
, I32VL
);
8052 Vec
= DAG
.getNode(RISCVISD::VSLIDE1UP_VL
, DL
, I32VT
, Passthru
, Vec
,
8053 ScalarLo
, I32Mask
, I32VL
);
8055 Vec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32VT
, Passthru
, Vec
,
8056 ScalarLo
, I32Mask
, I32VL
);
8057 Vec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32VT
, Passthru
, Vec
,
8058 ScalarHi
, I32Mask
, I32VL
);
8061 // Convert back to nxvXi64.
8062 Vec
= DAG
.getBitcast(VT
, Vec
);
8066 // Apply mask after the operation.
8067 SDValue Mask
= Operands
[NumOps
- 3];
8068 SDValue MaskedOff
= Operands
[1];
8069 // Assume Policy operand is the last operand.
8071 cast
<ConstantSDNode
>(Operands
[NumOps
- 1])->getZExtValue();
8072 // We don't need to select maskedoff if it's undef.
8073 if (MaskedOff
.isUndef())
8076 if (Policy
== RISCVII::TAIL_AGNOSTIC
)
8077 return DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, VT
, Mask
, Vec
, MaskedOff
,
8079 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8080 // It's fine because vmerge does not care mask policy.
8081 return DAG
.getNode(RISCVISD::VP_MERGE_VL
, DL
, VT
, Mask
, Vec
, MaskedOff
,
8086 // We need to convert the scalar to a splat vector.
8087 SDValue VL
= getVLOperand(Op
);
8088 assert(VL
.getValueType() == XLenVT
);
8089 ScalarOp
= splatSplitI64WithVL(DL
, VT
, SDValue(), ScalarOp
, VL
, DAG
);
8090 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
8093 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8094 // scalable vector llvm.get.vector.length for now.
8096 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
8097 // (vscale * VF). The vscale and VF are independent of element width. We use
8098 // SEW=8 for the vsetvli because it is the only element width that supports all
8099 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8100 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8101 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8102 // SEW and LMUL are better for the surrounding vector instructions.
8103 static SDValue
lowerGetVectorLength(SDNode
*N
, SelectionDAG
&DAG
,
8104 const RISCVSubtarget
&Subtarget
) {
8105 MVT XLenVT
= Subtarget
.getXLenVT();
8107 // The smallest LMUL is only valid for the smallest element width.
8108 const unsigned ElementWidth
= 8;
8110 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8111 unsigned LMul1VF
= RISCV::RVVBitsPerBlock
/ ElementWidth
;
8112 // We don't support VF==1 with ELEN==32.
8113 unsigned MinVF
= RISCV::RVVBitsPerBlock
/ Subtarget
.getELen();
8115 unsigned VF
= N
->getConstantOperandVal(2);
8116 assert(VF
>= MinVF
&& VF
<= (LMul1VF
* 8) && isPowerOf2_32(VF
) &&
8120 bool Fractional
= VF
< LMul1VF
;
8121 unsigned LMulVal
= Fractional
? LMul1VF
/ VF
: VF
/ LMul1VF
;
8122 unsigned VLMUL
= (unsigned)RISCVVType::encodeLMUL(LMulVal
, Fractional
);
8123 unsigned VSEW
= RISCVVType::encodeSEW(ElementWidth
);
8127 SDValue LMul
= DAG
.getTargetConstant(VLMUL
, DL
, XLenVT
);
8128 SDValue Sew
= DAG
.getTargetConstant(VSEW
, DL
, XLenVT
);
8130 SDValue AVL
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, XLenVT
, N
->getOperand(1));
8132 SDValue ID
= DAG
.getTargetConstant(Intrinsic::riscv_vsetvli
, DL
, XLenVT
);
8134 DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, ID
, AVL
, Sew
, LMul
);
8135 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), Res
);
8138 // LMUL * VLEN should be greater than or equal to EGS * SEW
8139 static inline bool isValidEGW(int EGS
, EVT VT
,
8140 const RISCVSubtarget
&Subtarget
) {
8141 return (Subtarget
.getRealMinVLen() *
8142 VT
.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock
>=
8143 EGS
* VT
.getScalarSizeInBits();
8146 SDValue
RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
8147 SelectionDAG
&DAG
) const {
8148 unsigned IntNo
= Op
.getConstantOperandVal(0);
8150 MVT XLenVT
= Subtarget
.getXLenVT();
8154 break; // Don't custom lower most intrinsics.
8155 case Intrinsic::thread_pointer
: {
8156 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
8157 return DAG
.getRegister(RISCV::X4
, PtrVT
);
8159 case Intrinsic::riscv_orc_b
:
8160 case Intrinsic::riscv_brev8
:
8161 case Intrinsic::riscv_sha256sig0
:
8162 case Intrinsic::riscv_sha256sig1
:
8163 case Intrinsic::riscv_sha256sum0
:
8164 case Intrinsic::riscv_sha256sum1
:
8165 case Intrinsic::riscv_sm3p0
:
8166 case Intrinsic::riscv_sm3p1
: {
8169 case Intrinsic::riscv_orc_b
: Opc
= RISCVISD::ORC_B
; break;
8170 case Intrinsic::riscv_brev8
: Opc
= RISCVISD::BREV8
; break;
8171 case Intrinsic::riscv_sha256sig0
: Opc
= RISCVISD::SHA256SIG0
; break;
8172 case Intrinsic::riscv_sha256sig1
: Opc
= RISCVISD::SHA256SIG1
; break;
8173 case Intrinsic::riscv_sha256sum0
: Opc
= RISCVISD::SHA256SUM0
; break;
8174 case Intrinsic::riscv_sha256sum1
: Opc
= RISCVISD::SHA256SUM1
; break;
8175 case Intrinsic::riscv_sm3p0
: Opc
= RISCVISD::SM3P0
; break;
8176 case Intrinsic::riscv_sm3p1
: Opc
= RISCVISD::SM3P1
; break;
8179 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8181 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8182 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp
);
8183 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8186 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1));
8188 case Intrinsic::riscv_sm4ks
:
8189 case Intrinsic::riscv_sm4ed
: {
8191 IntNo
== Intrinsic::riscv_sm4ks
? RISCVISD::SM4KS
: RISCVISD::SM4ED
;
8193 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8195 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8197 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8199 DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
, Op
.getOperand(3));
8200 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8203 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1), Op
.getOperand(2),
8206 case Intrinsic::riscv_zip
:
8207 case Intrinsic::riscv_unzip
: {
8209 IntNo
== Intrinsic::riscv_zip
? RISCVISD::ZIP
: RISCVISD::UNZIP
;
8210 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1));
8212 case Intrinsic::riscv_clmul
:
8213 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8215 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8217 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8218 SDValue Res
= DAG
.getNode(RISCVISD::CLMUL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
8219 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8221 return DAG
.getNode(RISCVISD::CLMUL
, DL
, XLenVT
, Op
.getOperand(1),
8223 case Intrinsic::riscv_clmulh
:
8224 case Intrinsic::riscv_clmulr
: {
8226 IntNo
== Intrinsic::riscv_clmulh
? RISCVISD::CLMULH
: RISCVISD::CLMULR
;
8227 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8229 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8231 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8232 NewOp0
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
,
8233 DAG
.getConstant(32, DL
, MVT::i64
));
8234 NewOp1
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp1
,
8235 DAG
.getConstant(32, DL
, MVT::i64
));
8236 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
);
8237 Res
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Res
,
8238 DAG
.getConstant(32, DL
, MVT::i64
));
8239 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8242 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1), Op
.getOperand(2));
8244 case Intrinsic::experimental_get_vector_length
:
8245 return lowerGetVectorLength(Op
.getNode(), DAG
, Subtarget
);
8246 case Intrinsic::riscv_vmv_x_s
: {
8247 SDValue Res
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Op
.getOperand(1));
8248 return DAG
.getNode(ISD::TRUNCATE
, DL
, Op
.getValueType(), Res
);
8250 case Intrinsic::riscv_vfmv_f_s
:
8251 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, Op
.getValueType(),
8252 Op
.getOperand(1), DAG
.getConstant(0, DL
, XLenVT
));
8253 case Intrinsic::riscv_vmv_v_x
:
8254 return lowerScalarSplat(Op
.getOperand(1), Op
.getOperand(2),
8255 Op
.getOperand(3), Op
.getSimpleValueType(), DL
, DAG
,
8257 case Intrinsic::riscv_vfmv_v_f
:
8258 return DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, Op
.getValueType(),
8259 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
8260 case Intrinsic::riscv_vmv_s_x
: {
8261 SDValue Scalar
= Op
.getOperand(2);
8263 if (Scalar
.getValueType().bitsLE(XLenVT
)) {
8264 Scalar
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Scalar
);
8265 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, Op
.getValueType(),
8266 Op
.getOperand(1), Scalar
, Op
.getOperand(3));
8269 assert(Scalar
.getValueType() == MVT::i64
&& "Unexpected scalar VT!");
8271 // This is an i64 value that lives in two scalar registers. We have to
8272 // insert this in a convoluted way. First we build vXi64 splat containing
8273 // the two values that we assemble using some bit math. Next we'll use
8274 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8275 // to merge element 0 from our splat into the source vector.
8276 // FIXME: This is probably not the best way to do this, but it is
8277 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8284 // vmseq.vx mMask, vVid, 0
8285 // vmerge.vvm vDest, vSrc, vVal, mMask
8286 MVT VT
= Op
.getSimpleValueType();
8287 SDValue Vec
= Op
.getOperand(1);
8288 SDValue VL
= getVLOperand(Op
);
8290 SDValue SplattedVal
= splatSplitI64WithVL(DL
, VT
, SDValue(), Scalar
, VL
, DAG
);
8291 if (Op
.getOperand(1).isUndef())
8293 SDValue SplattedIdx
=
8294 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
8295 DAG
.getConstant(0, DL
, MVT::i32
), VL
);
8297 MVT MaskVT
= getMaskTypeFor(VT
);
8298 SDValue Mask
= getAllOnesMask(VT
, VL
, DL
, DAG
);
8299 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VT
, Mask
, VL
);
8300 SDValue SelectCond
=
8301 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskVT
,
8302 {VID
, SplattedIdx
, DAG
.getCondCode(ISD::SETEQ
),
8303 DAG
.getUNDEF(MaskVT
), Mask
, VL
});
8304 return DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, VT
, SelectCond
, SplattedVal
,
8307 // EGS * EEW >= 128 bits
8308 case Intrinsic::riscv_vaesdf_vv
:
8309 case Intrinsic::riscv_vaesdf_vs
:
8310 case Intrinsic::riscv_vaesdm_vv
:
8311 case Intrinsic::riscv_vaesdm_vs
:
8312 case Intrinsic::riscv_vaesef_vv
:
8313 case Intrinsic::riscv_vaesef_vs
:
8314 case Intrinsic::riscv_vaesem_vv
:
8315 case Intrinsic::riscv_vaesem_vs
:
8316 case Intrinsic::riscv_vaeskf1
:
8317 case Intrinsic::riscv_vaeskf2
:
8318 case Intrinsic::riscv_vaesz_vs
:
8319 case Intrinsic::riscv_vsm4k
:
8320 case Intrinsic::riscv_vsm4r_vv
:
8321 case Intrinsic::riscv_vsm4r_vs
: {
8322 if (!isValidEGW(4, Op
.getSimpleValueType(), Subtarget
) ||
8323 !isValidEGW(4, Op
->getOperand(1).getSimpleValueType(), Subtarget
) ||
8324 !isValidEGW(4, Op
->getOperand(2).getSimpleValueType(), Subtarget
))
8325 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8328 // EGS * EEW >= 256 bits
8329 case Intrinsic::riscv_vsm3c
:
8330 case Intrinsic::riscv_vsm3me
: {
8331 if (!isValidEGW(8, Op
.getSimpleValueType(), Subtarget
) ||
8332 !isValidEGW(8, Op
->getOperand(1).getSimpleValueType(), Subtarget
))
8333 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
8336 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
8337 case Intrinsic::riscv_vsha2ch
:
8338 case Intrinsic::riscv_vsha2cl
:
8339 case Intrinsic::riscv_vsha2ms
: {
8340 if (Op
->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
8341 !Subtarget
.hasStdExtZvknhb())
8342 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
8343 if (!isValidEGW(4, Op
.getSimpleValueType(), Subtarget
) ||
8344 !isValidEGW(4, Op
->getOperand(1).getSimpleValueType(), Subtarget
) ||
8345 !isValidEGW(4, Op
->getOperand(2).getSimpleValueType(), Subtarget
))
8346 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8349 case Intrinsic::riscv_sf_vc_v_x
:
8350 case Intrinsic::riscv_sf_vc_v_i
:
8351 case Intrinsic::riscv_sf_vc_v_xv
:
8352 case Intrinsic::riscv_sf_vc_v_iv
:
8353 case Intrinsic::riscv_sf_vc_v_vv
:
8354 case Intrinsic::riscv_sf_vc_v_fv
:
8355 case Intrinsic::riscv_sf_vc_v_xvv
:
8356 case Intrinsic::riscv_sf_vc_v_ivv
:
8357 case Intrinsic::riscv_sf_vc_v_vvv
:
8358 case Intrinsic::riscv_sf_vc_v_fvv
:
8359 case Intrinsic::riscv_sf_vc_v_xvw
:
8360 case Intrinsic::riscv_sf_vc_v_ivw
:
8361 case Intrinsic::riscv_sf_vc_v_vvw
:
8362 case Intrinsic::riscv_sf_vc_v_fvw
: {
8363 MVT VT
= Op
.getSimpleValueType();
8365 if (!VT
.isFixedLengthVector())
8368 SmallVector
<SDValue
, 6> Ops
;
8369 for (const SDValue
&V
: Op
->op_values()) {
8370 // Skip non-fixed vector operands.
8371 if (!V
.getValueType().isFixedLengthVector()) {
8377 getContainerForFixedLengthVector(V
.getSimpleValueType());
8378 Ops
.push_back(convertToScalableVector(OpContainerVT
, V
, DAG
, Subtarget
));
8381 MVT RetContainerVT
= getContainerForFixedLengthVector(VT
);
8383 DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, RetContainerVT
, Ops
);
8384 return convertFromScalableVector(VT
, Scalable
, DAG
, Subtarget
);
8388 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
8391 SDValue
RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op
,
8392 SelectionDAG
&DAG
) const {
8393 unsigned IntNo
= Op
.getConstantOperandVal(1);
8397 case Intrinsic::riscv_masked_strided_load
: {
8399 MVT XLenVT
= Subtarget
.getXLenVT();
8401 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8402 // the selection of the masked intrinsics doesn't do this for us.
8403 SDValue Mask
= Op
.getOperand(5);
8404 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
8406 MVT VT
= Op
->getSimpleValueType(0);
8407 MVT ContainerVT
= VT
;
8408 if (VT
.isFixedLengthVector())
8409 ContainerVT
= getContainerForFixedLengthVector(VT
);
8411 SDValue PassThru
= Op
.getOperand(2);
8413 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
8414 if (VT
.isFixedLengthVector()) {
8415 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
8416 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
8420 auto *Load
= cast
<MemIntrinsicSDNode
>(Op
);
8421 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
8422 SDValue Ptr
= Op
.getOperand(3);
8423 SDValue Stride
= Op
.getOperand(4);
8424 SDValue Result
, Chain
;
8426 // TODO: We restrict this to unmasked loads currently in consideration of
8427 // the complexity of hanlding all falses masks.
8428 if (IsUnmasked
&& isNullConstant(Stride
)) {
8429 MVT ScalarVT
= ContainerVT
.getVectorElementType();
8430 SDValue ScalarLoad
=
8431 DAG
.getExtLoad(ISD::ZEXTLOAD
, DL
, XLenVT
, Load
->getChain(), Ptr
,
8432 ScalarVT
, Load
->getMemOperand());
8433 Chain
= ScalarLoad
.getValue(1);
8434 Result
= lowerScalarSplat(SDValue(), ScalarLoad
, VL
, ContainerVT
, DL
, DAG
,
8437 SDValue IntID
= DAG
.getTargetConstant(
8438 IsUnmasked
? Intrinsic::riscv_vlse
: Intrinsic::riscv_vlse_mask
, DL
,
8441 SmallVector
<SDValue
, 8> Ops
{Load
->getChain(), IntID
};
8443 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
8445 Ops
.push_back(PassThru
);
8447 Ops
.push_back(Stride
);
8449 Ops
.push_back(Mask
);
8453 DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
8454 Ops
.push_back(Policy
);
8457 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
8459 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
8460 Load
->getMemoryVT(), Load
->getMemOperand());
8461 Chain
= Result
.getValue(1);
8463 if (VT
.isFixedLengthVector())
8464 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
8465 return DAG
.getMergeValues({Result
, Chain
}, DL
);
8467 case Intrinsic::riscv_seg2_load
:
8468 case Intrinsic::riscv_seg3_load
:
8469 case Intrinsic::riscv_seg4_load
:
8470 case Intrinsic::riscv_seg5_load
:
8471 case Intrinsic::riscv_seg6_load
:
8472 case Intrinsic::riscv_seg7_load
:
8473 case Intrinsic::riscv_seg8_load
: {
8475 static const Intrinsic::ID VlsegInts
[7] = {
8476 Intrinsic::riscv_vlseg2
, Intrinsic::riscv_vlseg3
,
8477 Intrinsic::riscv_vlseg4
, Intrinsic::riscv_vlseg5
,
8478 Intrinsic::riscv_vlseg6
, Intrinsic::riscv_vlseg7
,
8479 Intrinsic::riscv_vlseg8
};
8480 unsigned NF
= Op
->getNumValues() - 1;
8481 assert(NF
>= 2 && NF
<= 8 && "Unexpected seg number");
8482 MVT XLenVT
= Subtarget
.getXLenVT();
8483 MVT VT
= Op
->getSimpleValueType(0);
8484 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
8486 SDValue VL
= getVLOp(VT
.getVectorNumElements(), DL
, DAG
, Subtarget
);
8487 SDValue IntID
= DAG
.getTargetConstant(VlsegInts
[NF
- 2], DL
, XLenVT
);
8488 auto *Load
= cast
<MemIntrinsicSDNode
>(Op
);
8489 SmallVector
<EVT
, 9> ContainerVTs(NF
, ContainerVT
);
8490 ContainerVTs
.push_back(MVT::Other
);
8491 SDVTList VTs
= DAG
.getVTList(ContainerVTs
);
8492 SmallVector
<SDValue
, 12> Ops
= {Load
->getChain(), IntID
};
8493 Ops
.insert(Ops
.end(), NF
, DAG
.getUNDEF(ContainerVT
));
8494 Ops
.push_back(Op
.getOperand(2));
8497 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
8498 Load
->getMemoryVT(), Load
->getMemOperand());
8499 SmallVector
<SDValue
, 9> Results
;
8500 for (unsigned int RetIdx
= 0; RetIdx
< NF
; RetIdx
++)
8501 Results
.push_back(convertFromScalableVector(VT
, Result
.getValue(RetIdx
),
8503 Results
.push_back(Result
.getValue(NF
));
8504 return DAG
.getMergeValues(Results
, DL
);
8506 case Intrinsic::riscv_sf_vc_v_x_se
:
8507 case Intrinsic::riscv_sf_vc_v_i_se
:
8508 case Intrinsic::riscv_sf_vc_v_xv_se
:
8509 case Intrinsic::riscv_sf_vc_v_iv_se
:
8510 case Intrinsic::riscv_sf_vc_v_vv_se
:
8511 case Intrinsic::riscv_sf_vc_v_fv_se
:
8512 case Intrinsic::riscv_sf_vc_v_xvv_se
:
8513 case Intrinsic::riscv_sf_vc_v_ivv_se
:
8514 case Intrinsic::riscv_sf_vc_v_vvv_se
:
8515 case Intrinsic::riscv_sf_vc_v_fvv_se
:
8516 case Intrinsic::riscv_sf_vc_v_xvw_se
:
8517 case Intrinsic::riscv_sf_vc_v_ivw_se
:
8518 case Intrinsic::riscv_sf_vc_v_vvw_se
:
8519 case Intrinsic::riscv_sf_vc_v_fvw_se
: {
8520 MVT VT
= Op
.getSimpleValueType();
8522 if (!VT
.isFixedLengthVector())
8525 SmallVector
<SDValue
, 6> Ops
;
8526 for (const SDValue
&V
: Op
->op_values()) {
8527 // Skip non-fixed vector operands.
8528 if (!V
.getValueType().isFixedLengthVector()) {
8534 getContainerForFixedLengthVector(V
.getSimpleValueType());
8535 Ops
.push_back(convertToScalableVector(OpContainerVT
, V
, DAG
, Subtarget
));
8539 MVT RetContainerVT
= getContainerForFixedLengthVector(VT
);
8540 SDVTList VTs
= DAG
.getVTList({RetContainerVT
, MVT::Other
});
8541 SDValue ScalableVector
= DAG
.getNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
);
8542 SDValue FixedVector
=
8543 convertFromScalableVector(VT
, ScalableVector
, DAG
, Subtarget
);
8544 return DAG
.getMergeValues({FixedVector
, ScalableVector
.getValue(1)}, DL
);
8548 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
8551 SDValue
RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
8552 SelectionDAG
&DAG
) const {
8553 unsigned IntNo
= Op
.getConstantOperandVal(1);
8557 case Intrinsic::riscv_masked_strided_store
: {
8559 MVT XLenVT
= Subtarget
.getXLenVT();
8561 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8562 // the selection of the masked intrinsics doesn't do this for us.
8563 SDValue Mask
= Op
.getOperand(5);
8564 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
8566 SDValue Val
= Op
.getOperand(2);
8567 MVT VT
= Val
.getSimpleValueType();
8568 MVT ContainerVT
= VT
;
8569 if (VT
.isFixedLengthVector()) {
8570 ContainerVT
= getContainerForFixedLengthVector(VT
);
8571 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
8574 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
8575 if (VT
.isFixedLengthVector())
8576 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
8579 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
8581 SDValue IntID
= DAG
.getTargetConstant(
8582 IsUnmasked
? Intrinsic::riscv_vsse
: Intrinsic::riscv_vsse_mask
, DL
,
8585 auto *Store
= cast
<MemIntrinsicSDNode
>(Op
);
8586 SmallVector
<SDValue
, 8> Ops
{Store
->getChain(), IntID
};
8588 Ops
.push_back(Op
.getOperand(3)); // Ptr
8589 Ops
.push_back(Op
.getOperand(4)); // Stride
8591 Ops
.push_back(Mask
);
8594 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
, Store
->getVTList(),
8595 Ops
, Store
->getMemoryVT(),
8596 Store
->getMemOperand());
8598 case Intrinsic::riscv_seg2_store
:
8599 case Intrinsic::riscv_seg3_store
:
8600 case Intrinsic::riscv_seg4_store
:
8601 case Intrinsic::riscv_seg5_store
:
8602 case Intrinsic::riscv_seg6_store
:
8603 case Intrinsic::riscv_seg7_store
:
8604 case Intrinsic::riscv_seg8_store
: {
8606 static const Intrinsic::ID VssegInts
[] = {
8607 Intrinsic::riscv_vsseg2
, Intrinsic::riscv_vsseg3
,
8608 Intrinsic::riscv_vsseg4
, Intrinsic::riscv_vsseg5
,
8609 Intrinsic::riscv_vsseg6
, Intrinsic::riscv_vsseg7
,
8610 Intrinsic::riscv_vsseg8
};
8611 // Operands are (chain, int_id, vec*, ptr, vl)
8612 unsigned NF
= Op
->getNumOperands() - 4;
8613 assert(NF
>= 2 && NF
<= 8 && "Unexpected seg number");
8614 MVT XLenVT
= Subtarget
.getXLenVT();
8615 MVT VT
= Op
->getOperand(2).getSimpleValueType();
8616 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
8618 SDValue VL
= getVLOp(VT
.getVectorNumElements(), DL
, DAG
, Subtarget
);
8619 SDValue IntID
= DAG
.getTargetConstant(VssegInts
[NF
- 2], DL
, XLenVT
);
8620 SDValue Ptr
= Op
->getOperand(NF
+ 2);
8622 auto *FixedIntrinsic
= cast
<MemIntrinsicSDNode
>(Op
);
8623 SmallVector
<SDValue
, 12> Ops
= {FixedIntrinsic
->getChain(), IntID
};
8624 for (unsigned i
= 0; i
< NF
; i
++)
8625 Ops
.push_back(convertToScalableVector(
8626 ContainerVT
, FixedIntrinsic
->getOperand(2 + i
), DAG
, Subtarget
));
8627 Ops
.append({Ptr
, VL
});
8629 return DAG
.getMemIntrinsicNode(
8630 ISD::INTRINSIC_VOID
, DL
, DAG
.getVTList(MVT::Other
), Ops
,
8631 FixedIntrinsic
->getMemoryVT(), FixedIntrinsic
->getMemOperand());
8633 case Intrinsic::riscv_sf_vc_x_se_e8mf8
:
8634 case Intrinsic::riscv_sf_vc_x_se_e8mf4
:
8635 case Intrinsic::riscv_sf_vc_x_se_e8mf2
:
8636 case Intrinsic::riscv_sf_vc_x_se_e8m1
:
8637 case Intrinsic::riscv_sf_vc_x_se_e8m2
:
8638 case Intrinsic::riscv_sf_vc_x_se_e8m4
:
8639 case Intrinsic::riscv_sf_vc_x_se_e8m8
:
8640 case Intrinsic::riscv_sf_vc_x_se_e16mf4
:
8641 case Intrinsic::riscv_sf_vc_x_se_e16mf2
:
8642 case Intrinsic::riscv_sf_vc_x_se_e16m1
:
8643 case Intrinsic::riscv_sf_vc_x_se_e16m2
:
8644 case Intrinsic::riscv_sf_vc_x_se_e16m4
:
8645 case Intrinsic::riscv_sf_vc_x_se_e16m8
:
8646 case Intrinsic::riscv_sf_vc_x_se_e32mf2
:
8647 case Intrinsic::riscv_sf_vc_x_se_e32m1
:
8648 case Intrinsic::riscv_sf_vc_x_se_e32m2
:
8649 case Intrinsic::riscv_sf_vc_x_se_e32m4
:
8650 case Intrinsic::riscv_sf_vc_x_se_e32m8
:
8651 case Intrinsic::riscv_sf_vc_x_se_e64m1
:
8652 case Intrinsic::riscv_sf_vc_x_se_e64m2
:
8653 case Intrinsic::riscv_sf_vc_x_se_e64m4
:
8654 case Intrinsic::riscv_sf_vc_x_se_e64m8
:
8655 case Intrinsic::riscv_sf_vc_i_se_e8mf8
:
8656 case Intrinsic::riscv_sf_vc_i_se_e8mf4
:
8657 case Intrinsic::riscv_sf_vc_i_se_e8mf2
:
8658 case Intrinsic::riscv_sf_vc_i_se_e8m1
:
8659 case Intrinsic::riscv_sf_vc_i_se_e8m2
:
8660 case Intrinsic::riscv_sf_vc_i_se_e8m4
:
8661 case Intrinsic::riscv_sf_vc_i_se_e8m8
:
8662 case Intrinsic::riscv_sf_vc_i_se_e16mf4
:
8663 case Intrinsic::riscv_sf_vc_i_se_e16mf2
:
8664 case Intrinsic::riscv_sf_vc_i_se_e16m1
:
8665 case Intrinsic::riscv_sf_vc_i_se_e16m2
:
8666 case Intrinsic::riscv_sf_vc_i_se_e16m4
:
8667 case Intrinsic::riscv_sf_vc_i_se_e16m8
:
8668 case Intrinsic::riscv_sf_vc_i_se_e32mf2
:
8669 case Intrinsic::riscv_sf_vc_i_se_e32m1
:
8670 case Intrinsic::riscv_sf_vc_i_se_e32m2
:
8671 case Intrinsic::riscv_sf_vc_i_se_e32m4
:
8672 case Intrinsic::riscv_sf_vc_i_se_e32m8
:
8673 case Intrinsic::riscv_sf_vc_i_se_e64m1
:
8674 case Intrinsic::riscv_sf_vc_i_se_e64m2
:
8675 case Intrinsic::riscv_sf_vc_i_se_e64m4
:
8676 case Intrinsic::riscv_sf_vc_i_se_e64m8
:
8677 case Intrinsic::riscv_sf_vc_xv_se
:
8678 case Intrinsic::riscv_sf_vc_iv_se
:
8679 case Intrinsic::riscv_sf_vc_vv_se
:
8680 case Intrinsic::riscv_sf_vc_fv_se
:
8681 case Intrinsic::riscv_sf_vc_xvv_se
:
8682 case Intrinsic::riscv_sf_vc_ivv_se
:
8683 case Intrinsic::riscv_sf_vc_vvv_se
:
8684 case Intrinsic::riscv_sf_vc_fvv_se
:
8685 case Intrinsic::riscv_sf_vc_xvw_se
:
8686 case Intrinsic::riscv_sf_vc_ivw_se
:
8687 case Intrinsic::riscv_sf_vc_vvw_se
:
8688 case Intrinsic::riscv_sf_vc_fvw_se
: {
8689 if (!llvm::any_of(Op
->op_values(), [&](const SDValue
&V
) {
8690 return V
.getValueType().isFixedLengthVector();
8694 SmallVector
<SDValue
, 6> Ops
;
8695 for (const SDValue
&V
: Op
->op_values()) {
8696 // Skip non-fixed vector operands.
8697 if (!V
.getValueType().isFixedLengthVector()) {
8703 getContainerForFixedLengthVector(V
.getSimpleValueType());
8704 Ops
.push_back(convertToScalableVector(OpContainerVT
, V
, DAG
, Subtarget
));
8707 return DAG
.getNode(ISD::INTRINSIC_VOID
, SDLoc(Op
), Op
->getVTList(), Ops
);
8711 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
8714 static unsigned getRVVReductionOp(unsigned ISDOpcode
) {
8715 switch (ISDOpcode
) {
8717 llvm_unreachable("Unhandled reduction");
8718 case ISD::VP_REDUCE_ADD
:
8719 case ISD::VECREDUCE_ADD
:
8720 return RISCVISD::VECREDUCE_ADD_VL
;
8721 case ISD::VP_REDUCE_UMAX
:
8722 case ISD::VECREDUCE_UMAX
:
8723 return RISCVISD::VECREDUCE_UMAX_VL
;
8724 case ISD::VP_REDUCE_SMAX
:
8725 case ISD::VECREDUCE_SMAX
:
8726 return RISCVISD::VECREDUCE_SMAX_VL
;
8727 case ISD::VP_REDUCE_UMIN
:
8728 case ISD::VECREDUCE_UMIN
:
8729 return RISCVISD::VECREDUCE_UMIN_VL
;
8730 case ISD::VP_REDUCE_SMIN
:
8731 case ISD::VECREDUCE_SMIN
:
8732 return RISCVISD::VECREDUCE_SMIN_VL
;
8733 case ISD::VP_REDUCE_AND
:
8734 case ISD::VECREDUCE_AND
:
8735 return RISCVISD::VECREDUCE_AND_VL
;
8736 case ISD::VP_REDUCE_OR
:
8737 case ISD::VECREDUCE_OR
:
8738 return RISCVISD::VECREDUCE_OR_VL
;
8739 case ISD::VP_REDUCE_XOR
:
8740 case ISD::VECREDUCE_XOR
:
8741 return RISCVISD::VECREDUCE_XOR_VL
;
8742 case ISD::VP_REDUCE_FADD
:
8743 return RISCVISD::VECREDUCE_FADD_VL
;
8744 case ISD::VP_REDUCE_SEQ_FADD
:
8745 return RISCVISD::VECREDUCE_SEQ_FADD_VL
;
8746 case ISD::VP_REDUCE_FMAX
:
8747 return RISCVISD::VECREDUCE_FMAX_VL
;
8748 case ISD::VP_REDUCE_FMIN
:
8749 return RISCVISD::VECREDUCE_FMIN_VL
;
8754 SDValue
RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op
,
8758 SDValue Vec
= Op
.getOperand(IsVP
? 1 : 0);
8759 MVT VecVT
= Vec
.getSimpleValueType();
8760 assert((Op
.getOpcode() == ISD::VECREDUCE_AND
||
8761 Op
.getOpcode() == ISD::VECREDUCE_OR
||
8762 Op
.getOpcode() == ISD::VECREDUCE_XOR
||
8763 Op
.getOpcode() == ISD::VP_REDUCE_AND
||
8764 Op
.getOpcode() == ISD::VP_REDUCE_OR
||
8765 Op
.getOpcode() == ISD::VP_REDUCE_XOR
) &&
8766 "Unexpected reduction lowering");
8768 MVT XLenVT
= Subtarget
.getXLenVT();
8770 MVT ContainerVT
= VecVT
;
8771 if (VecVT
.isFixedLengthVector()) {
8772 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
8773 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
8778 Mask
= Op
.getOperand(2);
8779 VL
= Op
.getOperand(3);
8781 std::tie(Mask
, VL
) =
8782 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
8787 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
8789 switch (Op
.getOpcode()) {
8791 llvm_unreachable("Unhandled reduction");
8792 case ISD::VECREDUCE_AND
:
8793 case ISD::VP_REDUCE_AND
: {
8795 SDValue TrueMask
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
8796 Vec
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Vec
, TrueMask
, VL
);
8797 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
8802 case ISD::VECREDUCE_OR
:
8803 case ISD::VP_REDUCE_OR
:
8805 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
8809 case ISD::VECREDUCE_XOR
:
8810 case ISD::VP_REDUCE_XOR
: {
8811 // ((vcpop x) & 1) != 0
8812 SDValue One
= DAG
.getConstant(1, DL
, XLenVT
);
8813 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
8814 Vec
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Vec
, One
);
8821 SDValue SetCC
= DAG
.getSetCC(DL
, XLenVT
, Vec
, Zero
, CC
);
8822 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, Op
.getValueType(), SetCC
);
8827 // Now include the start value in the operation.
8828 // Note that we must return the start value when no elements are operated
8829 // upon. The vcpop instructions we've emitted in each case above will return
8830 // 0 for an inactive vector, and so we've already received the neutral value:
8831 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
8832 // can simply include the start value.
8833 return DAG
.getNode(BaseOpc
, DL
, Op
.getValueType(), SetCC
, Op
.getOperand(0));
8836 static bool isNonZeroAVL(SDValue AVL
) {
8837 auto *RegisterAVL
= dyn_cast
<RegisterSDNode
>(AVL
);
8838 auto *ImmAVL
= dyn_cast
<ConstantSDNode
>(AVL
);
8839 return (RegisterAVL
&& RegisterAVL
->getReg() == RISCV::X0
) ||
8840 (ImmAVL
&& ImmAVL
->getZExtValue() >= 1);
8843 /// Helper to lower a reduction sequence of the form:
8844 /// scalar = reduce_op vec, scalar_start
8845 static SDValue
lowerReductionSeq(unsigned RVVOpcode
, MVT ResVT
,
8846 SDValue StartValue
, SDValue Vec
, SDValue Mask
,
8847 SDValue VL
, const SDLoc
&DL
, SelectionDAG
&DAG
,
8848 const RISCVSubtarget
&Subtarget
) {
8849 const MVT VecVT
= Vec
.getSimpleValueType();
8850 const MVT M1VT
= getLMUL1VT(VecVT
);
8851 const MVT XLenVT
= Subtarget
.getXLenVT();
8852 const bool NonZeroAVL
= isNonZeroAVL(VL
);
8854 // The reduction needs an LMUL1 input; do the splat at either LMUL1
8855 // or the original VT if fractional.
8856 auto InnerVT
= VecVT
.bitsLE(M1VT
) ? VecVT
: M1VT
;
8857 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
8858 // prove it is non-zero. For the AVL=0 case, we need the scalar to
8859 // be the result of the reduction operation.
8860 auto InnerVL
= NonZeroAVL
? VL
: DAG
.getConstant(1, DL
, XLenVT
);
8861 SDValue InitialValue
= lowerScalarInsert(StartValue
, InnerVL
, InnerVT
, DL
,
8863 if (M1VT
!= InnerVT
)
8864 InitialValue
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, M1VT
,
8866 InitialValue
, DAG
.getConstant(0, DL
, XLenVT
));
8867 SDValue PassThru
= NonZeroAVL
? DAG
.getUNDEF(M1VT
) : InitialValue
;
8868 SDValue Policy
= DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
8869 SDValue Ops
[] = {PassThru
, Vec
, InitialValue
, Mask
, VL
, Policy
};
8870 SDValue Reduction
= DAG
.getNode(RVVOpcode
, DL
, M1VT
, Ops
);
8871 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ResVT
, Reduction
,
8872 DAG
.getConstant(0, DL
, XLenVT
));
8875 SDValue
RISCVTargetLowering::lowerVECREDUCE(SDValue Op
,
8876 SelectionDAG
&DAG
) const {
8878 SDValue Vec
= Op
.getOperand(0);
8879 EVT VecEVT
= Vec
.getValueType();
8881 unsigned BaseOpc
= ISD::getVecReduceBaseOpcode(Op
.getOpcode());
8883 // Due to ordering in legalize types we may have a vector type that needs to
8884 // be split. Do that manually so we can get down to a legal type.
8885 while (getTypeAction(*DAG
.getContext(), VecEVT
) ==
8886 TargetLowering::TypeSplitVector
) {
8887 auto [Lo
, Hi
] = DAG
.SplitVector(Vec
, DL
);
8888 VecEVT
= Lo
.getValueType();
8889 Vec
= DAG
.getNode(BaseOpc
, DL
, VecEVT
, Lo
, Hi
);
8892 // TODO: The type may need to be widened rather than split. Or widened before
8894 if (!isTypeLegal(VecEVT
))
8897 MVT VecVT
= VecEVT
.getSimpleVT();
8898 MVT VecEltVT
= VecVT
.getVectorElementType();
8899 unsigned RVVOpcode
= getRVVReductionOp(Op
.getOpcode());
8901 MVT ContainerVT
= VecVT
;
8902 if (VecVT
.isFixedLengthVector()) {
8903 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
8904 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
8907 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
8909 SDValue StartV
= DAG
.getNeutralElement(BaseOpc
, DL
, VecEltVT
, SDNodeFlags());
8917 MVT XLenVT
= Subtarget
.getXLenVT();
8918 StartV
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VecEltVT
, Vec
,
8919 DAG
.getConstant(0, DL
, XLenVT
));
8921 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), StartV
, Vec
,
8922 Mask
, VL
, DL
, DAG
, Subtarget
);
8925 // Given a reduction op, this function returns the matching reduction opcode,
8926 // the vector SDValue and the scalar SDValue required to lower this to a
8928 static std::tuple
<unsigned, SDValue
, SDValue
>
8929 getRVVFPReductionOpAndOperands(SDValue Op
, SelectionDAG
&DAG
, EVT EltVT
,
8930 const RISCVSubtarget
&Subtarget
) {
8932 auto Flags
= Op
->getFlags();
8933 unsigned Opcode
= Op
.getOpcode();
8936 llvm_unreachable("Unhandled reduction");
8937 case ISD::VECREDUCE_FADD
: {
8938 // Use positive zero if we can. It is cheaper to materialize.
8940 DAG
.getConstantFP(Flags
.hasNoSignedZeros() ? 0.0 : -0.0, DL
, EltVT
);
8941 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL
, Op
.getOperand(0), Zero
);
8943 case ISD::VECREDUCE_SEQ_FADD
:
8944 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL
, Op
.getOperand(1),
8946 case ISD::VECREDUCE_FMIN
:
8947 case ISD::VECREDUCE_FMAX
: {
8948 MVT XLenVT
= Subtarget
.getXLenVT();
8950 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Op
.getOperand(0),
8951 DAG
.getConstant(0, DL
, XLenVT
));
8952 unsigned RVVOpc
= (Opcode
== ISD::VECREDUCE_FMIN
)
8953 ? RISCVISD::VECREDUCE_FMIN_VL
8954 : RISCVISD::VECREDUCE_FMAX_VL
;
8955 return std::make_tuple(RVVOpc
, Op
.getOperand(0), Front
);
8960 SDValue
RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op
,
8961 SelectionDAG
&DAG
) const {
8963 MVT VecEltVT
= Op
.getSimpleValueType();
8966 SDValue VectorVal
, ScalarVal
;
8967 std::tie(RVVOpcode
, VectorVal
, ScalarVal
) =
8968 getRVVFPReductionOpAndOperands(Op
, DAG
, VecEltVT
, Subtarget
);
8969 MVT VecVT
= VectorVal
.getSimpleValueType();
8971 MVT ContainerVT
= VecVT
;
8972 if (VecVT
.isFixedLengthVector()) {
8973 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
8974 VectorVal
= convertToScalableVector(ContainerVT
, VectorVal
, DAG
, Subtarget
);
8977 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
8978 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), ScalarVal
,
8979 VectorVal
, Mask
, VL
, DL
, DAG
, Subtarget
);
8982 SDValue
RISCVTargetLowering::lowerVPREDUCE(SDValue Op
,
8983 SelectionDAG
&DAG
) const {
8985 SDValue Vec
= Op
.getOperand(1);
8986 EVT VecEVT
= Vec
.getValueType();
8988 // TODO: The type may need to be widened rather than split. Or widened before
8990 if (!isTypeLegal(VecEVT
))
8993 MVT VecVT
= VecEVT
.getSimpleVT();
8994 unsigned RVVOpcode
= getRVVReductionOp(Op
.getOpcode());
8996 if (VecVT
.isFixedLengthVector()) {
8997 auto ContainerVT
= getContainerForFixedLengthVector(VecVT
);
8998 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9001 SDValue VL
= Op
.getOperand(3);
9002 SDValue Mask
= Op
.getOperand(2);
9003 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), Op
.getOperand(0),
9004 Vec
, Mask
, VL
, DL
, DAG
, Subtarget
);
9007 SDValue
RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op
,
9008 SelectionDAG
&DAG
) const {
9009 SDValue Vec
= Op
.getOperand(0);
9010 SDValue SubVec
= Op
.getOperand(1);
9011 MVT VecVT
= Vec
.getSimpleValueType();
9012 MVT SubVecVT
= SubVec
.getSimpleValueType();
9015 MVT XLenVT
= Subtarget
.getXLenVT();
9016 unsigned OrigIdx
= Op
.getConstantOperandVal(2);
9017 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
9019 // We don't have the ability to slide mask vectors up indexed by their i1
9020 // elements; the smallest we can do is i8. Often we are able to bitcast to
9021 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9022 // into a scalable one, we might not necessarily have enough scalable
9023 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9024 if (SubVecVT
.getVectorElementType() == MVT::i1
&&
9025 (OrigIdx
!= 0 || !Vec
.isUndef())) {
9026 if (VecVT
.getVectorMinNumElements() >= 8 &&
9027 SubVecVT
.getVectorMinNumElements() >= 8) {
9028 assert(OrigIdx
% 8 == 0 && "Invalid index");
9029 assert(VecVT
.getVectorMinNumElements() % 8 == 0 &&
9030 SubVecVT
.getVectorMinNumElements() % 8 == 0 &&
9031 "Unexpected mask vector lowering");
9034 MVT::getVectorVT(MVT::i8
, SubVecVT
.getVectorMinNumElements() / 8,
9035 SubVecVT
.isScalableVector());
9036 VecVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorMinNumElements() / 8,
9037 VecVT
.isScalableVector());
9038 Vec
= DAG
.getBitcast(VecVT
, Vec
);
9039 SubVec
= DAG
.getBitcast(SubVecVT
, SubVec
);
9041 // We can't slide this mask vector up indexed by its i1 elements.
9042 // This poses a problem when we wish to insert a scalable vector which
9043 // can't be re-expressed as a larger type. Just choose the slow path and
9044 // extend to a larger type, then truncate back down.
9045 MVT ExtVecVT
= VecVT
.changeVectorElementType(MVT::i8
);
9046 MVT ExtSubVecVT
= SubVecVT
.changeVectorElementType(MVT::i8
);
9047 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtVecVT
, Vec
);
9048 SubVec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtSubVecVT
, SubVec
);
9049 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ExtVecVT
, Vec
, SubVec
,
9051 SDValue SplatZero
= DAG
.getConstant(0, DL
, ExtVecVT
);
9052 return DAG
.getSetCC(DL
, VecVT
, Vec
, SplatZero
, ISD::SETNE
);
9056 // If the subvector vector is a fixed-length type, we cannot use subregister
9057 // manipulation to simplify the codegen; we don't know which register of a
9058 // LMUL group contains the specific subvector as we only know the minimum
9059 // register size. Therefore we must slide the vector group up the full
9061 if (SubVecVT
.isFixedLengthVector()) {
9062 if (OrigIdx
== 0 && Vec
.isUndef() && !VecVT
.isFixedLengthVector())
9064 MVT ContainerVT
= VecVT
;
9065 if (VecVT
.isFixedLengthVector()) {
9066 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9067 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9070 if (OrigIdx
== 0 && Vec
.isUndef() && VecVT
.isFixedLengthVector()) {
9071 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
,
9072 DAG
.getUNDEF(ContainerVT
), SubVec
,
9073 DAG
.getConstant(0, DL
, XLenVT
));
9074 SubVec
= convertFromScalableVector(VecVT
, SubVec
, DAG
, Subtarget
);
9075 return DAG
.getBitcast(Op
.getValueType(), SubVec
);
9078 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
,
9079 DAG
.getUNDEF(ContainerVT
), SubVec
,
9080 DAG
.getConstant(0, DL
, XLenVT
));
9082 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
9083 // Set the vector length to only the number of elements we care about. Note
9084 // that for slideup this includes the offset.
9085 unsigned EndIndex
= OrigIdx
+ SubVecVT
.getVectorNumElements();
9086 SDValue VL
= getVLOp(EndIndex
, DL
, DAG
, Subtarget
);
9088 // Use tail agnostic policy if we're inserting over Vec's tail.
9089 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
;
9090 if (VecVT
.isFixedLengthVector() && EndIndex
== VecVT
.getVectorNumElements())
9091 Policy
= RISCVII::TAIL_AGNOSTIC
;
9093 // If we're inserting into the lowest elements, use a tail undisturbed
9097 DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, ContainerVT
, Vec
, SubVec
, VL
);
9099 SDValue SlideupAmt
= DAG
.getConstant(OrigIdx
, DL
, XLenVT
);
9100 SubVec
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Vec
, SubVec
,
9101 SlideupAmt
, Mask
, VL
, Policy
);
9104 if (VecVT
.isFixedLengthVector())
9105 SubVec
= convertFromScalableVector(VecVT
, SubVec
, DAG
, Subtarget
);
9106 return DAG
.getBitcast(Op
.getValueType(), SubVec
);
9109 unsigned SubRegIdx
, RemIdx
;
9110 std::tie(SubRegIdx
, RemIdx
) =
9111 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9112 VecVT
, SubVecVT
, OrigIdx
, TRI
);
9114 RISCVII::VLMUL SubVecLMUL
= RISCVTargetLowering::getLMUL(SubVecVT
);
9115 bool IsSubVecPartReg
= SubVecLMUL
== RISCVII::VLMUL::LMUL_F2
||
9116 SubVecLMUL
== RISCVII::VLMUL::LMUL_F4
||
9117 SubVecLMUL
== RISCVII::VLMUL::LMUL_F8
;
9119 // 1. If the Idx has been completely eliminated and this subvector's size is
9120 // a vector register or a multiple thereof, or the surrounding elements are
9121 // undef, then this is a subvector insert which naturally aligns to a vector
9122 // register. These can easily be handled using subregister manipulation.
9123 // 2. If the subvector is smaller than a vector register, then the insertion
9124 // must preserve the undisturbed elements of the register. We do this by
9125 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9126 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9127 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9128 // LMUL=1 type back into the larger vector (resolving to another subregister
9129 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9130 // to avoid allocating a large register group to hold our subvector.
9131 if (RemIdx
== 0 && (!IsSubVecPartReg
|| Vec
.isUndef()))
9134 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9135 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9136 // (in our case undisturbed). This means we can set up a subvector insertion
9137 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9138 // size of the subvector.
9139 MVT InterSubVT
= VecVT
;
9140 SDValue AlignedExtract
= Vec
;
9141 unsigned AlignedIdx
= OrigIdx
- RemIdx
;
9142 if (VecVT
.bitsGT(getLMUL1VT(VecVT
))) {
9143 InterSubVT
= getLMUL1VT(VecVT
);
9144 // Extract a subvector equal to the nearest full vector register type. This
9145 // should resolve to a EXTRACT_SUBREG instruction.
9146 AlignedExtract
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, InterSubVT
, Vec
,
9147 DAG
.getConstant(AlignedIdx
, DL
, XLenVT
));
9150 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, InterSubVT
,
9151 DAG
.getUNDEF(InterSubVT
), SubVec
,
9152 DAG
.getConstant(0, DL
, XLenVT
));
9154 auto [Mask
, VL
] = getDefaultScalableVLOps(VecVT
, DL
, DAG
, Subtarget
);
9156 VL
= computeVLMax(SubVecVT
, DL
, DAG
);
9158 // If we're inserting into the lowest elements, use a tail undisturbed
9161 SubVec
= DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, InterSubVT
, AlignedExtract
,
9164 SDValue SlideupAmt
=
9165 DAG
.getVScale(DL
, XLenVT
, APInt(XLenVT
.getSizeInBits(), RemIdx
));
9167 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9168 VL
= DAG
.getNode(ISD::ADD
, DL
, XLenVT
, SlideupAmt
, VL
);
9170 SubVec
= getVSlideup(DAG
, Subtarget
, DL
, InterSubVT
, AlignedExtract
, SubVec
,
9171 SlideupAmt
, Mask
, VL
);
9174 // If required, insert this subvector back into the correct vector register.
9175 // This should resolve to an INSERT_SUBREG instruction.
9176 if (VecVT
.bitsGT(InterSubVT
))
9177 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VecVT
, Vec
, SubVec
,
9178 DAG
.getConstant(AlignedIdx
, DL
, XLenVT
));
9180 // We might have bitcast from a mask type: cast back to the original type if
9182 return DAG
.getBitcast(Op
.getSimpleValueType(), SubVec
);
9185 SDValue
RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op
,
9186 SelectionDAG
&DAG
) const {
9187 SDValue Vec
= Op
.getOperand(0);
9188 MVT SubVecVT
= Op
.getSimpleValueType();
9189 MVT VecVT
= Vec
.getSimpleValueType();
9192 MVT XLenVT
= Subtarget
.getXLenVT();
9193 unsigned OrigIdx
= Op
.getConstantOperandVal(1);
9194 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
9196 // We don't have the ability to slide mask vectors down indexed by their i1
9197 // elements; the smallest we can do is i8. Often we are able to bitcast to
9198 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9199 // from a scalable one, we might not necessarily have enough scalable
9200 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9201 if (SubVecVT
.getVectorElementType() == MVT::i1
&& OrigIdx
!= 0) {
9202 if (VecVT
.getVectorMinNumElements() >= 8 &&
9203 SubVecVT
.getVectorMinNumElements() >= 8) {
9204 assert(OrigIdx
% 8 == 0 && "Invalid index");
9205 assert(VecVT
.getVectorMinNumElements() % 8 == 0 &&
9206 SubVecVT
.getVectorMinNumElements() % 8 == 0 &&
9207 "Unexpected mask vector lowering");
9210 MVT::getVectorVT(MVT::i8
, SubVecVT
.getVectorMinNumElements() / 8,
9211 SubVecVT
.isScalableVector());
9212 VecVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorMinNumElements() / 8,
9213 VecVT
.isScalableVector());
9214 Vec
= DAG
.getBitcast(VecVT
, Vec
);
9216 // We can't slide this mask vector down, indexed by its i1 elements.
9217 // This poses a problem when we wish to extract a scalable vector which
9218 // can't be re-expressed as a larger type. Just choose the slow path and
9219 // extend to a larger type, then truncate back down.
9220 // TODO: We could probably improve this when extracting certain fixed
9221 // from fixed, where we can extract as i8 and shift the correct element
9222 // right to reach the desired subvector?
9223 MVT ExtVecVT
= VecVT
.changeVectorElementType(MVT::i8
);
9224 MVT ExtSubVecVT
= SubVecVT
.changeVectorElementType(MVT::i8
);
9225 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtVecVT
, Vec
);
9226 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ExtSubVecVT
, Vec
,
9228 SDValue SplatZero
= DAG
.getConstant(0, DL
, ExtSubVecVT
);
9229 return DAG
.getSetCC(DL
, SubVecVT
, Vec
, SplatZero
, ISD::SETNE
);
9233 // With an index of 0 this is a cast-like subvector, which can be performed
9234 // with subregister operations.
9238 // If the subvector vector is a fixed-length type, we cannot use subregister
9239 // manipulation to simplify the codegen; we don't know which register of a
9240 // LMUL group contains the specific subvector as we only know the minimum
9241 // register size. Therefore we must slide the vector group down the full
9243 if (SubVecVT
.isFixedLengthVector()) {
9244 MVT ContainerVT
= VecVT
;
9245 if (VecVT
.isFixedLengthVector()) {
9246 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9247 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9250 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9251 unsigned LastIdx
= OrigIdx
+ SubVecVT
.getVectorNumElements() - 1;
9253 getSmallestVTForIndex(ContainerVT
, LastIdx
, DL
, DAG
, Subtarget
)) {
9254 ContainerVT
= *ShrunkVT
;
9255 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
9256 DAG
.getVectorIdxConstant(0, DL
));
9260 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
9261 // Set the vector length to only the number of elements we care about. This
9262 // avoids sliding down elements we're going to discard straight away.
9263 SDValue VL
= getVLOp(SubVecVT
.getVectorNumElements(), DL
, DAG
, Subtarget
);
9264 SDValue SlidedownAmt
= DAG
.getConstant(OrigIdx
, DL
, XLenVT
);
9266 getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
9267 DAG
.getUNDEF(ContainerVT
), Vec
, SlidedownAmt
, Mask
, VL
);
9268 // Now we can use a cast-like subvector extract to get the result.
9269 Slidedown
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SubVecVT
, Slidedown
,
9270 DAG
.getConstant(0, DL
, XLenVT
));
9271 return DAG
.getBitcast(Op
.getValueType(), Slidedown
);
9274 unsigned SubRegIdx
, RemIdx
;
9275 std::tie(SubRegIdx
, RemIdx
) =
9276 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9277 VecVT
, SubVecVT
, OrigIdx
, TRI
);
9279 // If the Idx has been completely eliminated then this is a subvector extract
9280 // which naturally aligns to a vector register. These can easily be handled
9281 // using subregister manipulation.
9285 // Else SubVecVT is a fractional LMUL and may need to be slid down.
9286 assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT
)).second
);
9288 // If the vector type is an LMUL-group type, extract a subvector equal to the
9289 // nearest full vector register type.
9290 MVT InterSubVT
= VecVT
;
9291 if (VecVT
.bitsGT(getLMUL1VT(VecVT
))) {
9292 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
9293 // we should have successfully decomposed the extract into a subregister.
9294 assert(SubRegIdx
!= RISCV::NoSubRegister
);
9295 InterSubVT
= getLMUL1VT(VecVT
);
9296 Vec
= DAG
.getTargetExtractSubreg(SubRegIdx
, DL
, InterSubVT
, Vec
);
9299 // Slide this vector register down by the desired number of elements in order
9300 // to place the desired subvector starting at element 0.
9301 SDValue SlidedownAmt
=
9302 DAG
.getVScale(DL
, XLenVT
, APInt(XLenVT
.getSizeInBits(), RemIdx
));
9304 auto [Mask
, VL
] = getDefaultScalableVLOps(InterSubVT
, DL
, DAG
, Subtarget
);
9306 getVSlidedown(DAG
, Subtarget
, DL
, InterSubVT
, DAG
.getUNDEF(InterSubVT
),
9307 Vec
, SlidedownAmt
, Mask
, VL
);
9309 // Now the vector is in the right position, extract our final subvector. This
9310 // should resolve to a COPY.
9311 Slidedown
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SubVecVT
, Slidedown
,
9312 DAG
.getConstant(0, DL
, XLenVT
));
9314 // We might have bitcast from a mask type: cast back to the original type if
9316 return DAG
.getBitcast(Op
.getSimpleValueType(), Slidedown
);
9319 // Widen a vector's operands to i8, then truncate its results back to the
9320 // original type, typically i1. All operand and result types must be the same.
9321 static SDValue
widenVectorOpsToi8(SDValue N
, const SDLoc
&DL
,
9322 SelectionDAG
&DAG
) {
9323 MVT VT
= N
.getSimpleValueType();
9324 MVT WideVT
= VT
.changeVectorElementType(MVT::i8
);
9325 SmallVector
<SDValue
, 4> WideOps
;
9326 for (SDValue Op
: N
->ops()) {
9327 assert(Op
.getSimpleValueType() == VT
&&
9328 "Operands and result must be same type");
9329 WideOps
.push_back(DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Op
));
9332 unsigned NumVals
= N
->getNumValues();
9334 SDVTList VTs
= DAG
.getVTList(SmallVector
<EVT
, 4>(
9335 NumVals
, N
.getValueType().changeVectorElementType(MVT::i8
)));
9336 SDValue WideN
= DAG
.getNode(N
.getOpcode(), DL
, VTs
, WideOps
);
9337 SmallVector
<SDValue
, 4> TruncVals
;
9338 for (unsigned I
= 0; I
< NumVals
; I
++) {
9339 TruncVals
.push_back(
9340 DAG
.getSetCC(DL
, N
->getSimpleValueType(I
), WideN
.getValue(I
),
9341 DAG
.getConstant(0, DL
, WideVT
), ISD::SETNE
));
9344 if (TruncVals
.size() > 1)
9345 return DAG
.getMergeValues(TruncVals
, DL
);
9346 return TruncVals
.front();
9349 SDValue
RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op
,
9350 SelectionDAG
&DAG
) const {
9352 MVT VecVT
= Op
.getSimpleValueType();
9353 MVT XLenVT
= Subtarget
.getXLenVT();
9355 assert(VecVT
.isScalableVector() &&
9356 "vector_interleave on non-scalable vector!");
9358 // 1 bit element vectors need to be widened to e8
9359 if (VecVT
.getVectorElementType() == MVT::i1
)
9360 return widenVectorOpsToi8(Op
, DL
, DAG
);
9362 // If the VT is LMUL=8, we need to split and reassemble.
9363 if (VecVT
.getSizeInBits().getKnownMinValue() ==
9364 (8 * RISCV::RVVBitsPerBlock
)) {
9365 auto [Op0Lo
, Op0Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9366 auto [Op1Lo
, Op1Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 1);
9367 EVT SplitVT
= Op0Lo
.getValueType();
9369 SDValue ResLo
= DAG
.getNode(ISD::VECTOR_DEINTERLEAVE
, DL
,
9370 DAG
.getVTList(SplitVT
, SplitVT
), Op0Lo
, Op0Hi
);
9371 SDValue ResHi
= DAG
.getNode(ISD::VECTOR_DEINTERLEAVE
, DL
,
9372 DAG
.getVTList(SplitVT
, SplitVT
), Op1Lo
, Op1Hi
);
9374 SDValue Even
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9375 ResLo
.getValue(0), ResHi
.getValue(0));
9376 SDValue Odd
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
, ResLo
.getValue(1),
9378 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9381 // Concatenate the two vectors as one vector to deinterleave
9383 MVT::getVectorVT(VecVT
.getVectorElementType(),
9384 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
9385 SDValue Concat
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, ConcatVT
,
9386 Op
.getOperand(0), Op
.getOperand(1));
9388 // We want to operate on all lanes, so get the mask and VL and mask for it
9389 auto [Mask
, VL
] = getDefaultScalableVLOps(ConcatVT
, DL
, DAG
, Subtarget
);
9390 SDValue Passthru
= DAG
.getUNDEF(ConcatVT
);
9392 // We can deinterleave through vnsrl.wi if the element type is smaller than
9394 if (VecVT
.getScalarSizeInBits() < Subtarget
.getELen()) {
9396 getDeinterleaveViaVNSRL(DL
, VecVT
, Concat
, true, Subtarget
, DAG
);
9398 getDeinterleaveViaVNSRL(DL
, VecVT
, Concat
, false, Subtarget
, DAG
);
9399 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9402 // For the indices, use the same SEW to avoid an extra vsetvli
9403 MVT IdxVT
= ConcatVT
.changeVectorElementTypeToInteger();
9404 // Create a vector of even indices {0, 2, 4, ...}
9406 DAG
.getStepVector(DL
, IdxVT
, APInt(IdxVT
.getScalarSizeInBits(), 2));
9407 // Create a vector of odd indices {1, 3, 5, ... }
9409 DAG
.getNode(ISD::ADD
, DL
, IdxVT
, EvenIdx
, DAG
.getConstant(1, DL
, IdxVT
));
9411 // Gather the even and odd elements into two separate vectors
9412 SDValue EvenWide
= DAG
.getNode(RISCVISD::VRGATHER_VV_VL
, DL
, ConcatVT
,
9413 Concat
, EvenIdx
, Passthru
, Mask
, VL
);
9414 SDValue OddWide
= DAG
.getNode(RISCVISD::VRGATHER_VV_VL
, DL
, ConcatVT
,
9415 Concat
, OddIdx
, Passthru
, Mask
, VL
);
9417 // Extract the result half of the gather for even and odd
9418 SDValue Even
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, EvenWide
,
9419 DAG
.getConstant(0, DL
, XLenVT
));
9420 SDValue Odd
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, OddWide
,
9421 DAG
.getConstant(0, DL
, XLenVT
));
9423 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9426 SDValue
RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op
,
9427 SelectionDAG
&DAG
) const {
9429 MVT VecVT
= Op
.getSimpleValueType();
9431 assert(VecVT
.isScalableVector() &&
9432 "vector_interleave on non-scalable vector!");
9434 // i1 vectors need to be widened to i8
9435 if (VecVT
.getVectorElementType() == MVT::i1
)
9436 return widenVectorOpsToi8(Op
, DL
, DAG
);
9438 MVT XLenVT
= Subtarget
.getXLenVT();
9439 SDValue VL
= DAG
.getRegister(RISCV::X0
, XLenVT
);
9441 // If the VT is LMUL=8, we need to split and reassemble.
9442 if (VecVT
.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock
)) {
9443 auto [Op0Lo
, Op0Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9444 auto [Op1Lo
, Op1Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 1);
9445 EVT SplitVT
= Op0Lo
.getValueType();
9447 SDValue ResLo
= DAG
.getNode(ISD::VECTOR_INTERLEAVE
, DL
,
9448 DAG
.getVTList(SplitVT
, SplitVT
), Op0Lo
, Op1Lo
);
9449 SDValue ResHi
= DAG
.getNode(ISD::VECTOR_INTERLEAVE
, DL
,
9450 DAG
.getVTList(SplitVT
, SplitVT
), Op0Hi
, Op1Hi
);
9452 SDValue Lo
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9453 ResLo
.getValue(0), ResLo
.getValue(1));
9454 SDValue Hi
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9455 ResHi
.getValue(0), ResHi
.getValue(1));
9456 return DAG
.getMergeValues({Lo
, Hi
}, DL
);
9459 SDValue Interleaved
;
9461 // If the element type is smaller than ELEN, then we can interleave with
9462 // vwaddu.vv and vwmaccu.vx
9463 if (VecVT
.getScalarSizeInBits() < Subtarget
.getELen()) {
9464 Interleaved
= getWideningInterleave(Op
.getOperand(0), Op
.getOperand(1), DL
,
9467 // Otherwise, fallback to using vrgathere16.vv
9469 MVT::getVectorVT(VecVT
.getVectorElementType(),
9470 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
9471 SDValue Concat
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, ConcatVT
,
9472 Op
.getOperand(0), Op
.getOperand(1));
9474 MVT IdxVT
= ConcatVT
.changeVectorElementType(MVT::i16
);
9476 // 0 1 2 3 4 5 6 7 ...
9477 SDValue StepVec
= DAG
.getStepVector(DL
, IdxVT
);
9479 // 1 1 1 1 1 1 1 1 ...
9480 SDValue Ones
= DAG
.getSplatVector(IdxVT
, DL
, DAG
.getConstant(1, DL
, XLenVT
));
9482 // 1 0 1 0 1 0 1 0 ...
9483 SDValue OddMask
= DAG
.getNode(ISD::AND
, DL
, IdxVT
, StepVec
, Ones
);
9484 OddMask
= DAG
.getSetCC(
9485 DL
, IdxVT
.changeVectorElementType(MVT::i1
), OddMask
,
9486 DAG
.getSplatVector(IdxVT
, DL
, DAG
.getConstant(0, DL
, XLenVT
)),
9487 ISD::CondCode::SETNE
);
9489 SDValue VLMax
= DAG
.getSplatVector(IdxVT
, DL
, computeVLMax(VecVT
, DL
, DAG
));
9491 // Build up the index vector for interleaving the concatenated vector
9492 // 0 0 1 1 2 2 3 3 ...
9493 SDValue Idx
= DAG
.getNode(ISD::SRL
, DL
, IdxVT
, StepVec
, Ones
);
9494 // 0 n 1 n+1 2 n+2 3 n+3 ...
9496 DAG
.getNode(RISCVISD::ADD_VL
, DL
, IdxVT
, Idx
, VLMax
, Idx
, OddMask
, VL
);
9498 // Then perform the interleave
9499 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
9500 SDValue TrueMask
= getAllOnesMask(IdxVT
, VL
, DL
, DAG
);
9501 Interleaved
= DAG
.getNode(RISCVISD::VRGATHEREI16_VV_VL
, DL
, ConcatVT
,
9502 Concat
, Idx
, DAG
.getUNDEF(ConcatVT
), TrueMask
, VL
);
9505 // Extract the two halves from the interleaved result
9506 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, Interleaved
,
9507 DAG
.getVectorIdxConstant(0, DL
));
9508 SDValue Hi
= DAG
.getNode(
9509 ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, Interleaved
,
9510 DAG
.getVectorIdxConstant(VecVT
.getVectorMinNumElements(), DL
));
9512 return DAG
.getMergeValues({Lo
, Hi
}, DL
);
9515 // Lower step_vector to the vid instruction. Any non-identity step value must
9516 // be accounted for my manual expansion.
9517 SDValue
RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op
,
9518 SelectionDAG
&DAG
) const {
9520 MVT VT
= Op
.getSimpleValueType();
9521 assert(VT
.isScalableVector() && "Expected scalable vector");
9522 MVT XLenVT
= Subtarget
.getXLenVT();
9523 auto [Mask
, VL
] = getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
);
9524 SDValue StepVec
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VT
, Mask
, VL
);
9525 uint64_t StepValImm
= Op
.getConstantOperandVal(0);
9526 if (StepValImm
!= 1) {
9527 if (isPowerOf2_64(StepValImm
)) {
9529 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
9530 DAG
.getConstant(Log2_64(StepValImm
), DL
, XLenVT
), VL
);
9531 StepVec
= DAG
.getNode(ISD::SHL
, DL
, VT
, StepVec
, StepVal
);
9533 SDValue StepVal
= lowerScalarSplat(
9534 SDValue(), DAG
.getConstant(StepValImm
, DL
, VT
.getVectorElementType()),
9535 VL
, VT
, DL
, DAG
, Subtarget
);
9536 StepVec
= DAG
.getNode(ISD::MUL
, DL
, VT
, StepVec
, StepVal
);
9542 // Implement vector_reverse using vrgather.vv with indices determined by
9543 // subtracting the id of each element from (VLMAX-1). This will convert
9544 // the indices like so:
9545 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
9546 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
9547 SDValue
RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op
,
9548 SelectionDAG
&DAG
) const {
9550 MVT VecVT
= Op
.getSimpleValueType();
9551 if (VecVT
.getVectorElementType() == MVT::i1
) {
9552 MVT WidenVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
9553 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, Op
.getOperand(0));
9554 SDValue Op2
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, WidenVT
, Op1
);
9555 return DAG
.getNode(ISD::TRUNCATE
, DL
, VecVT
, Op2
);
9557 unsigned EltSize
= VecVT
.getScalarSizeInBits();
9558 unsigned MinSize
= VecVT
.getSizeInBits().getKnownMinValue();
9559 unsigned VectorBitsMax
= Subtarget
.getRealMaxVLen();
9561 RISCVTargetLowering::computeVLMAX(VectorBitsMax
, EltSize
, MinSize
);
9563 unsigned GatherOpc
= RISCVISD::VRGATHER_VV_VL
;
9564 MVT IntVT
= VecVT
.changeVectorElementTypeToInteger();
9566 // If this is SEW=8 and VLMAX is potentially more than 256, we need
9567 // to use vrgatherei16.vv.
9568 // TODO: It's also possible to use vrgatherei16.vv for other types to
9569 // decrease register width for the index calculation.
9570 if (MaxVLMAX
> 256 && EltSize
== 8) {
9571 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
9572 // Reverse each half, then reassemble them in reverse order.
9573 // NOTE: It's also possible that after splitting that VLMAX no longer
9574 // requires vrgatherei16.vv.
9575 if (MinSize
== (8 * RISCV::RVVBitsPerBlock
)) {
9576 auto [Lo
, Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9577 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(VecVT
);
9578 Lo
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, LoVT
, Lo
);
9579 Hi
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, HiVT
, Hi
);
9580 // Reassemble the low and high pieces reversed.
9581 // FIXME: This is a CONCAT_VECTORS.
9583 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VecVT
, DAG
.getUNDEF(VecVT
), Hi
,
9584 DAG
.getIntPtrConstant(0, DL
));
9586 ISD::INSERT_SUBVECTOR
, DL
, VecVT
, Res
, Lo
,
9587 DAG
.getIntPtrConstant(LoVT
.getVectorMinNumElements(), DL
));
9590 // Just promote the int type to i16 which will double the LMUL.
9591 IntVT
= MVT::getVectorVT(MVT::i16
, VecVT
.getVectorElementCount());
9592 GatherOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
9595 MVT XLenVT
= Subtarget
.getXLenVT();
9596 auto [Mask
, VL
] = getDefaultScalableVLOps(VecVT
, DL
, DAG
, Subtarget
);
9598 // Calculate VLMAX-1 for the desired SEW.
9599 SDValue VLMinus1
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
,
9600 computeVLMax(VecVT
, DL
, DAG
),
9601 DAG
.getConstant(1, DL
, XLenVT
));
9603 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
9605 !Subtarget
.is64Bit() && IntVT
.getVectorElementType() == MVT::i64
;
9608 SplatVL
= DAG
.getSplatVector(IntVT
, DL
, VLMinus1
);
9610 SplatVL
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
, DAG
.getUNDEF(IntVT
),
9611 VLMinus1
, DAG
.getRegister(RISCV::X0
, XLenVT
));
9613 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, IntVT
, Mask
, VL
);
9614 SDValue Indices
= DAG
.getNode(RISCVISD::SUB_VL
, DL
, IntVT
, SplatVL
, VID
,
9615 DAG
.getUNDEF(IntVT
), Mask
, VL
);
9617 return DAG
.getNode(GatherOpc
, DL
, VecVT
, Op
.getOperand(0), Indices
,
9618 DAG
.getUNDEF(VecVT
), Mask
, VL
);
9621 SDValue
RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op
,
9622 SelectionDAG
&DAG
) const {
9624 SDValue V1
= Op
.getOperand(0);
9625 SDValue V2
= Op
.getOperand(1);
9626 MVT XLenVT
= Subtarget
.getXLenVT();
9627 MVT VecVT
= Op
.getSimpleValueType();
9629 SDValue VLMax
= computeVLMax(VecVT
, DL
, DAG
);
9631 int64_t ImmValue
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getSExtValue();
9632 SDValue DownOffset
, UpOffset
;
9633 if (ImmValue
>= 0) {
9634 // The operand is a TargetConstant, we need to rebuild it as a regular
9636 DownOffset
= DAG
.getConstant(ImmValue
, DL
, XLenVT
);
9637 UpOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, VLMax
, DownOffset
);
9639 // The operand is a TargetConstant, we need to rebuild it as a regular
9640 // constant rather than negating the original operand.
9641 UpOffset
= DAG
.getConstant(-ImmValue
, DL
, XLenVT
);
9642 DownOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, VLMax
, UpOffset
);
9645 SDValue TrueMask
= getAllOnesMask(VecVT
, VLMax
, DL
, DAG
);
9648 getVSlidedown(DAG
, Subtarget
, DL
, VecVT
, DAG
.getUNDEF(VecVT
), V1
,
9649 DownOffset
, TrueMask
, UpOffset
);
9650 return getVSlideup(DAG
, Subtarget
, DL
, VecVT
, SlideDown
, V2
, UpOffset
,
9651 TrueMask
, DAG
.getRegister(RISCV::X0
, XLenVT
),
9652 RISCVII::TAIL_AGNOSTIC
);
9656 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op
,
9657 SelectionDAG
&DAG
) const {
9659 auto *Load
= cast
<LoadSDNode
>(Op
);
9661 assert(allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
9662 Load
->getMemoryVT(),
9663 *Load
->getMemOperand()) &&
9664 "Expecting a correctly-aligned load");
9666 MVT VT
= Op
.getSimpleValueType();
9667 MVT XLenVT
= Subtarget
.getXLenVT();
9668 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
9670 SDValue VL
= getVLOp(VT
.getVectorNumElements(), DL
, DAG
, Subtarget
);
9672 bool IsMaskOp
= VT
.getVectorElementType() == MVT::i1
;
9673 SDValue IntID
= DAG
.getTargetConstant(
9674 IsMaskOp
? Intrinsic::riscv_vlm
: Intrinsic::riscv_vle
, DL
, XLenVT
);
9675 SmallVector
<SDValue
, 4> Ops
{Load
->getChain(), IntID
};
9677 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
9678 Ops
.push_back(Load
->getBasePtr());
9680 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
9682 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
9683 Load
->getMemoryVT(), Load
->getMemOperand());
9685 SDValue Result
= convertFromScalableVector(VT
, NewLoad
, DAG
, Subtarget
);
9686 return DAG
.getMergeValues({Result
, NewLoad
.getValue(1)}, DL
);
9690 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op
,
9691 SelectionDAG
&DAG
) const {
9693 auto *Store
= cast
<StoreSDNode
>(Op
);
9695 assert(allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
9696 Store
->getMemoryVT(),
9697 *Store
->getMemOperand()) &&
9698 "Expecting a correctly-aligned store");
9700 SDValue StoreVal
= Store
->getValue();
9701 MVT VT
= StoreVal
.getSimpleValueType();
9702 MVT XLenVT
= Subtarget
.getXLenVT();
9704 // If the size less than a byte, we need to pad with zeros to make a byte.
9705 if (VT
.getVectorElementType() == MVT::i1
&& VT
.getVectorNumElements() < 8) {
9707 StoreVal
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
,
9708 DAG
.getConstant(0, DL
, VT
), StoreVal
,
9709 DAG
.getIntPtrConstant(0, DL
));
9712 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
9714 SDValue VL
= getVLOp(VT
.getVectorNumElements(), DL
, DAG
, Subtarget
);
9717 convertToScalableVector(ContainerVT
, StoreVal
, DAG
, Subtarget
);
9719 bool IsMaskOp
= VT
.getVectorElementType() == MVT::i1
;
9720 SDValue IntID
= DAG
.getTargetConstant(
9721 IsMaskOp
? Intrinsic::riscv_vsm
: Intrinsic::riscv_vse
, DL
, XLenVT
);
9722 return DAG
.getMemIntrinsicNode(
9723 ISD::INTRINSIC_VOID
, DL
, DAG
.getVTList(MVT::Other
),
9724 {Store
->getChain(), IntID
, NewValue
, Store
->getBasePtr(), VL
},
9725 Store
->getMemoryVT(), Store
->getMemOperand());
9728 SDValue
RISCVTargetLowering::lowerMaskedLoad(SDValue Op
,
9729 SelectionDAG
&DAG
) const {
9731 MVT VT
= Op
.getSimpleValueType();
9733 const auto *MemSD
= cast
<MemSDNode
>(Op
);
9734 EVT MemVT
= MemSD
->getMemoryVT();
9735 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
9736 SDValue Chain
= MemSD
->getChain();
9737 SDValue BasePtr
= MemSD
->getBasePtr();
9739 SDValue Mask
, PassThru
, VL
;
9740 if (const auto *VPLoad
= dyn_cast
<VPLoadSDNode
>(Op
)) {
9741 Mask
= VPLoad
->getMask();
9742 PassThru
= DAG
.getUNDEF(VT
);
9743 VL
= VPLoad
->getVectorLength();
9745 const auto *MLoad
= cast
<MaskedLoadSDNode
>(Op
);
9746 Mask
= MLoad
->getMask();
9747 PassThru
= MLoad
->getPassThru();
9750 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
9752 MVT XLenVT
= Subtarget
.getXLenVT();
9754 MVT ContainerVT
= VT
;
9755 if (VT
.isFixedLengthVector()) {
9756 ContainerVT
= getContainerForFixedLengthVector(VT
);
9757 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
9759 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
9760 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
9765 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
9768 IsUnmasked
? Intrinsic::riscv_vle
: Intrinsic::riscv_vle_mask
;
9769 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
9771 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
9773 Ops
.push_back(PassThru
);
9774 Ops
.push_back(BasePtr
);
9776 Ops
.push_back(Mask
);
9779 Ops
.push_back(DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
));
9781 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
9784 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, MemVT
, MMO
);
9785 Chain
= Result
.getValue(1);
9787 if (VT
.isFixedLengthVector())
9788 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
9790 return DAG
.getMergeValues({Result
, Chain
}, DL
);
9793 SDValue
RISCVTargetLowering::lowerMaskedStore(SDValue Op
,
9794 SelectionDAG
&DAG
) const {
9797 const auto *MemSD
= cast
<MemSDNode
>(Op
);
9798 EVT MemVT
= MemSD
->getMemoryVT();
9799 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
9800 SDValue Chain
= MemSD
->getChain();
9801 SDValue BasePtr
= MemSD
->getBasePtr();
9802 SDValue Val
, Mask
, VL
;
9804 if (const auto *VPStore
= dyn_cast
<VPStoreSDNode
>(Op
)) {
9805 Val
= VPStore
->getValue();
9806 Mask
= VPStore
->getMask();
9807 VL
= VPStore
->getVectorLength();
9809 const auto *MStore
= cast
<MaskedStoreSDNode
>(Op
);
9810 Val
= MStore
->getValue();
9811 Mask
= MStore
->getMask();
9814 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
9816 MVT VT
= Val
.getSimpleValueType();
9817 MVT XLenVT
= Subtarget
.getXLenVT();
9819 MVT ContainerVT
= VT
;
9820 if (VT
.isFixedLengthVector()) {
9821 ContainerVT
= getContainerForFixedLengthVector(VT
);
9823 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
9825 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
9826 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
9831 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
9834 IsUnmasked
? Intrinsic::riscv_vse
: Intrinsic::riscv_vse_mask
;
9835 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
9837 Ops
.push_back(BasePtr
);
9839 Ops
.push_back(Mask
);
9842 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
,
9843 DAG
.getVTList(MVT::Other
), Ops
, MemVT
, MMO
);
9847 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op
,
9848 SelectionDAG
&DAG
) const {
9849 MVT InVT
= Op
.getOperand(0).getSimpleValueType();
9850 MVT ContainerVT
= getContainerForFixedLengthVector(InVT
);
9852 MVT VT
= Op
.getSimpleValueType();
9855 convertToScalableVector(ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
9857 convertToScalableVector(ContainerVT
, Op
.getOperand(1), DAG
, Subtarget
);
9860 auto [Mask
, VL
] = getDefaultVLOps(VT
.getVectorNumElements(), ContainerVT
, DL
,
9862 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
9865 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskVT
,
9866 {Op1
, Op2
, Op
.getOperand(2), DAG
.getUNDEF(MaskVT
), Mask
, VL
});
9868 return convertFromScalableVector(VT
, Cmp
, DAG
, Subtarget
);
9871 SDValue
RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op
,
9872 SelectionDAG
&DAG
) const {
9873 unsigned Opc
= Op
.getOpcode();
9875 SDValue Chain
= Op
.getOperand(0);
9876 SDValue Op1
= Op
.getOperand(1);
9877 SDValue Op2
= Op
.getOperand(2);
9878 SDValue CC
= Op
.getOperand(3);
9879 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
9880 MVT VT
= Op
.getSimpleValueType();
9881 MVT InVT
= Op1
.getSimpleValueType();
9883 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
9885 if (Opc
== ISD::STRICT_FSETCCS
) {
9886 // Expand strict_fsetccs(x, oeq) to
9887 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
9888 SDVTList VTList
= Op
->getVTList();
9889 if (CCVal
== ISD::SETEQ
|| CCVal
== ISD::SETOEQ
) {
9890 SDValue OLECCVal
= DAG
.getCondCode(ISD::SETOLE
);
9891 SDValue Tmp1
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op1
,
9893 SDValue Tmp2
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op2
,
9895 SDValue OutChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
9896 Tmp1
.getValue(1), Tmp2
.getValue(1));
9897 // Tmp1 and Tmp2 might be the same node.
9899 Tmp1
= DAG
.getNode(ISD::AND
, DL
, VT
, Tmp1
, Tmp2
);
9900 return DAG
.getMergeValues({Tmp1
, OutChain
}, DL
);
9903 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
9904 if (CCVal
== ISD::SETNE
|| CCVal
== ISD::SETUNE
) {
9905 SDValue OEQCCVal
= DAG
.getCondCode(ISD::SETOEQ
);
9906 SDValue OEQ
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op1
,
9908 SDValue Res
= DAG
.getNOT(DL
, OEQ
, VT
);
9909 return DAG
.getMergeValues({Res
, OEQ
.getValue(1)}, DL
);
9913 MVT ContainerInVT
= InVT
;
9914 if (InVT
.isFixedLengthVector()) {
9915 ContainerInVT
= getContainerForFixedLengthVector(InVT
);
9916 Op1
= convertToScalableVector(ContainerInVT
, Op1
, DAG
, Subtarget
);
9917 Op2
= convertToScalableVector(ContainerInVT
, Op2
, DAG
, Subtarget
);
9919 MVT MaskVT
= getMaskTypeFor(ContainerInVT
);
9921 auto [Mask
, VL
] = getDefaultVLOps(InVT
, ContainerInVT
, DL
, DAG
, Subtarget
);
9924 if (Opc
== ISD::STRICT_FSETCC
&&
9925 (CCVal
== ISD::SETLT
|| CCVal
== ISD::SETOLT
|| CCVal
== ISD::SETLE
||
9926 CCVal
== ISD::SETOLE
)) {
9927 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
9928 // active when both input elements are ordered.
9929 SDValue True
= getAllOnesMask(ContainerInVT
, VL
, DL
, DAG
);
9930 SDValue OrderMask1
= DAG
.getNode(
9931 RISCVISD::STRICT_FSETCC_VL
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
9932 {Chain
, Op1
, Op1
, DAG
.getCondCode(ISD::SETOEQ
), DAG
.getUNDEF(MaskVT
),
9934 SDValue OrderMask2
= DAG
.getNode(
9935 RISCVISD::STRICT_FSETCC_VL
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
9936 {Chain
, Op2
, Op2
, DAG
.getCondCode(ISD::SETOEQ
), DAG
.getUNDEF(MaskVT
),
9939 DAG
.getNode(RISCVISD::VMAND_VL
, DL
, MaskVT
, OrderMask1
, OrderMask2
, VL
);
9940 // Use Mask as the merge operand to let the result be 0 if either of the
9941 // inputs is unordered.
9942 Res
= DAG
.getNode(RISCVISD::STRICT_FSETCCS_VL
, DL
,
9943 DAG
.getVTList(MaskVT
, MVT::Other
),
9944 {Chain
, Op1
, Op2
, CC
, Mask
, Mask
, VL
});
9946 unsigned RVVOpc
= Opc
== ISD::STRICT_FSETCC
? RISCVISD::STRICT_FSETCC_VL
9947 : RISCVISD::STRICT_FSETCCS_VL
;
9948 Res
= DAG
.getNode(RVVOpc
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
9949 {Chain
, Op1
, Op2
, CC
, DAG
.getUNDEF(MaskVT
), Mask
, VL
});
9952 if (VT
.isFixedLengthVector()) {
9953 SDValue SubVec
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
9954 return DAG
.getMergeValues({SubVec
, Res
.getValue(1)}, DL
);
9959 // Lower vector ABS to smax(X, sub(0, X)).
9960 SDValue
RISCVTargetLowering::lowerABS(SDValue Op
, SelectionDAG
&DAG
) const {
9962 MVT VT
= Op
.getSimpleValueType();
9963 SDValue X
= Op
.getOperand(0);
9965 assert((Op
.getOpcode() == ISD::VP_ABS
|| VT
.isFixedLengthVector()) &&
9966 "Unexpected type for ISD::ABS");
9968 MVT ContainerVT
= VT
;
9969 if (VT
.isFixedLengthVector()) {
9970 ContainerVT
= getContainerForFixedLengthVector(VT
);
9971 X
= convertToScalableVector(ContainerVT
, X
, DAG
, Subtarget
);
9975 if (Op
->getOpcode() == ISD::VP_ABS
) {
9976 Mask
= Op
->getOperand(1);
9977 if (VT
.isFixedLengthVector())
9978 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
9980 VL
= Op
->getOperand(2);
9982 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
9984 SDValue SplatZero
= DAG
.getNode(
9985 RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
9986 DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()), VL
);
9987 SDValue NegX
= DAG
.getNode(RISCVISD::SUB_VL
, DL
, ContainerVT
, SplatZero
, X
,
9988 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
9989 SDValue Max
= DAG
.getNode(RISCVISD::SMAX_VL
, DL
, ContainerVT
, X
, NegX
,
9990 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
9992 if (VT
.isFixedLengthVector())
9993 Max
= convertFromScalableVector(VT
, Max
, DAG
, Subtarget
);
9997 SDValue
RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
9998 SDValue Op
, SelectionDAG
&DAG
) const {
10000 MVT VT
= Op
.getSimpleValueType();
10001 SDValue Mag
= Op
.getOperand(0);
10002 SDValue Sign
= Op
.getOperand(1);
10003 assert(Mag
.getValueType() == Sign
.getValueType() &&
10004 "Can only handle COPYSIGN with matching types.");
10006 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10007 Mag
= convertToScalableVector(ContainerVT
, Mag
, DAG
, Subtarget
);
10008 Sign
= convertToScalableVector(ContainerVT
, Sign
, DAG
, Subtarget
);
10010 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
10012 SDValue CopySign
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Mag
,
10013 Sign
, DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
10015 return convertFromScalableVector(VT
, CopySign
, DAG
, Subtarget
);
10018 SDValue
RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10019 SDValue Op
, SelectionDAG
&DAG
) const {
10020 MVT VT
= Op
.getSimpleValueType();
10021 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10023 MVT I1ContainerVT
=
10024 MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
10027 convertToScalableVector(I1ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
10029 convertToScalableVector(ContainerVT
, Op
.getOperand(1), DAG
, Subtarget
);
10031 convertToScalableVector(ContainerVT
, Op
.getOperand(2), DAG
, Subtarget
);
10034 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10037 DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, CC
, Op1
, Op2
, VL
);
10039 return convertFromScalableVector(VT
, Select
, DAG
, Subtarget
);
10042 SDValue
RISCVTargetLowering::lowerToScalableOp(SDValue Op
,
10043 SelectionDAG
&DAG
) const {
10044 unsigned NewOpc
= getRISCVVLOp(Op
);
10045 bool HasMergeOp
= hasMergeOp(NewOpc
);
10046 bool HasMask
= hasMaskOp(NewOpc
);
10048 MVT VT
= Op
.getSimpleValueType();
10049 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10051 // Create list of operands by converting existing ones to scalable types.
10052 SmallVector
<SDValue
, 6> Ops
;
10053 for (const SDValue
&V
: Op
->op_values()) {
10054 assert(!isa
<VTSDNode
>(V
) && "Unexpected VTSDNode node!");
10056 // Pass through non-vector operands.
10057 if (!V
.getValueType().isVector()) {
10062 // "cast" fixed length vector to a scalable vector.
10063 assert(useRVVForFixedLengthVectorVT(V
.getSimpleValueType()) &&
10064 "Only fixed length vectors are supported!");
10065 Ops
.push_back(convertToScalableVector(ContainerVT
, V
, DAG
, Subtarget
));
10069 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
10071 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10073 Ops
.push_back(Mask
);
10076 // StrictFP operations have two result values. Their lowered result should
10077 // have same result count.
10078 if (Op
->isStrictFPOpcode()) {
10079 SDValue ScalableRes
=
10080 DAG
.getNode(NewOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
), Ops
,
10082 SDValue SubVec
= convertFromScalableVector(VT
, ScalableRes
, DAG
, Subtarget
);
10083 return DAG
.getMergeValues({SubVec
, ScalableRes
.getValue(1)}, DL
);
10086 SDValue ScalableRes
=
10087 DAG
.getNode(NewOpc
, DL
, ContainerVT
, Ops
, Op
->getFlags());
10088 return convertFromScalableVector(VT
, ScalableRes
, DAG
, Subtarget
);
10091 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10092 // * Operands of each node are assumed to be in the same order.
10093 // * The EVL operand is promoted from i32 to i64 on RV64.
10094 // * Fixed-length vectors are converted to their scalable-vector container
10096 SDValue
RISCVTargetLowering::lowerVPOp(SDValue Op
, SelectionDAG
&DAG
) const {
10097 unsigned RISCVISDOpc
= getRISCVVLOp(Op
);
10098 bool HasMergeOp
= hasMergeOp(RISCVISDOpc
);
10101 MVT VT
= Op
.getSimpleValueType();
10102 SmallVector
<SDValue
, 4> Ops
;
10104 MVT ContainerVT
= VT
;
10105 if (VT
.isFixedLengthVector())
10106 ContainerVT
= getContainerForFixedLengthVector(VT
);
10108 for (const auto &OpIdx
: enumerate(Op
->ops())) {
10109 SDValue V
= OpIdx
.value();
10110 assert(!isa
<VTSDNode
>(V
) && "Unexpected VTSDNode node!");
10111 // Add dummy merge value before the mask.
10112 if (HasMergeOp
&& *ISD::getVPMaskIdx(Op
.getOpcode()) == OpIdx
.index())
10113 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10114 // Pass through operands which aren't fixed-length vectors.
10115 if (!V
.getValueType().isFixedLengthVector()) {
10119 // "cast" fixed length vector to a scalable vector.
10120 MVT OpVT
= V
.getSimpleValueType();
10121 MVT ContainerVT
= getContainerForFixedLengthVector(OpVT
);
10122 assert(useRVVForFixedLengthVectorVT(OpVT
) &&
10123 "Only fixed length vectors are supported!");
10124 Ops
.push_back(convertToScalableVector(ContainerVT
, V
, DAG
, Subtarget
));
10127 if (!VT
.isFixedLengthVector())
10128 return DAG
.getNode(RISCVISDOpc
, DL
, VT
, Ops
, Op
->getFlags());
10130 SDValue VPOp
= DAG
.getNode(RISCVISDOpc
, DL
, ContainerVT
, Ops
, Op
->getFlags());
10132 return convertFromScalableVector(VT
, VPOp
, DAG
, Subtarget
);
10135 SDValue
RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op
,
10136 SelectionDAG
&DAG
) const {
10138 MVT VT
= Op
.getSimpleValueType();
10140 SDValue Src
= Op
.getOperand(0);
10141 // NOTE: Mask is dropped.
10142 SDValue VL
= Op
.getOperand(2);
10144 MVT ContainerVT
= VT
;
10145 if (VT
.isFixedLengthVector()) {
10146 ContainerVT
= getContainerForFixedLengthVector(VT
);
10147 MVT SrcVT
= MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
10148 Src
= convertToScalableVector(SrcVT
, Src
, DAG
, Subtarget
);
10151 MVT XLenVT
= Subtarget
.getXLenVT();
10152 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
10153 SDValue ZeroSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10154 DAG
.getUNDEF(ContainerVT
), Zero
, VL
);
10156 SDValue SplatValue
= DAG
.getConstant(
10157 Op
.getOpcode() == ISD::VP_ZERO_EXTEND
? 1 : -1, DL
, XLenVT
);
10158 SDValue Splat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10159 DAG
.getUNDEF(ContainerVT
), SplatValue
, VL
);
10161 SDValue Result
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, ContainerVT
, Src
,
10162 Splat
, ZeroSplat
, VL
);
10163 if (!VT
.isFixedLengthVector())
10165 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10168 SDValue
RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op
,
10169 SelectionDAG
&DAG
) const {
10171 MVT VT
= Op
.getSimpleValueType();
10173 SDValue Op1
= Op
.getOperand(0);
10174 SDValue Op2
= Op
.getOperand(1);
10175 ISD::CondCode Condition
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
10176 // NOTE: Mask is dropped.
10177 SDValue VL
= Op
.getOperand(4);
10179 MVT ContainerVT
= VT
;
10180 if (VT
.isFixedLengthVector()) {
10181 ContainerVT
= getContainerForFixedLengthVector(VT
);
10182 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10183 Op2
= convertToScalableVector(ContainerVT
, Op2
, DAG
, Subtarget
);
10187 SDValue AllOneMask
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
10189 switch (Condition
) {
10192 // X != Y --> (X^Y)
10194 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, Op2
, VL
);
10196 // X == Y --> ~(X^Y)
10199 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, Op2
, VL
);
10201 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, AllOneMask
, VL
);
10204 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
10205 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
10207 case ISD::SETULT
: {
10209 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, AllOneMask
, VL
);
10210 Result
= DAG
.getNode(RISCVISD::VMAND_VL
, DL
, ContainerVT
, Temp
, Op2
, VL
);
10213 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
10214 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
10216 case ISD::SETUGT
: {
10218 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op2
, AllOneMask
, VL
);
10219 Result
= DAG
.getNode(RISCVISD::VMAND_VL
, DL
, ContainerVT
, Op1
, Temp
, VL
);
10222 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
10223 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
10225 case ISD::SETULE
: {
10227 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, AllOneMask
, VL
);
10228 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, Op2
, VL
);
10231 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
10232 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
10234 case ISD::SETUGE
: {
10236 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op2
, AllOneMask
, VL
);
10237 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, Op1
, VL
);
10242 if (!VT
.isFixedLengthVector())
10244 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10247 // Lower Floating-Point/Integer Type-Convert VP SDNodes
10248 SDValue
RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op
,
10249 SelectionDAG
&DAG
) const {
10252 SDValue Src
= Op
.getOperand(0);
10253 SDValue Mask
= Op
.getOperand(1);
10254 SDValue VL
= Op
.getOperand(2);
10255 unsigned RISCVISDOpc
= getRISCVVLOp(Op
);
10257 MVT DstVT
= Op
.getSimpleValueType();
10258 MVT SrcVT
= Src
.getSimpleValueType();
10259 if (DstVT
.isFixedLengthVector()) {
10260 DstVT
= getContainerForFixedLengthVector(DstVT
);
10261 SrcVT
= getContainerForFixedLengthVector(SrcVT
);
10262 Src
= convertToScalableVector(SrcVT
, Src
, DAG
, Subtarget
);
10263 MVT MaskVT
= getMaskTypeFor(DstVT
);
10264 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10267 unsigned DstEltSize
= DstVT
.getScalarSizeInBits();
10268 unsigned SrcEltSize
= SrcVT
.getScalarSizeInBits();
10271 if (DstEltSize
>= SrcEltSize
) { // Single-width and widening conversion.
10272 if (SrcVT
.isInteger()) {
10273 assert(DstVT
.isFloatingPoint() && "Wrong input/output vector types");
10275 unsigned RISCVISDExtOpc
= RISCVISDOpc
== RISCVISD::SINT_TO_FP_VL
10276 ? RISCVISD::VSEXT_VL
10277 : RISCVISD::VZEXT_VL
;
10279 // Do we need to do any pre-widening before converting?
10280 if (SrcEltSize
== 1) {
10281 MVT IntVT
= DstVT
.changeVectorElementTypeToInteger();
10282 MVT XLenVT
= Subtarget
.getXLenVT();
10283 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
10284 SDValue ZeroSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
,
10285 DAG
.getUNDEF(IntVT
), Zero
, VL
);
10286 SDValue One
= DAG
.getConstant(
10287 RISCVISDExtOpc
== RISCVISD::VZEXT_VL
? 1 : -1, DL
, XLenVT
);
10288 SDValue OneSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
,
10289 DAG
.getUNDEF(IntVT
), One
, VL
);
10290 Src
= DAG
.getNode(RISCVISD::VSELECT_VL
, DL
, IntVT
, Src
, OneSplat
,
10292 } else if (DstEltSize
> (2 * SrcEltSize
)) {
10293 // Widen before converting.
10294 MVT IntVT
= MVT::getVectorVT(MVT::getIntegerVT(DstEltSize
/ 2),
10295 DstVT
.getVectorElementCount());
10296 Src
= DAG
.getNode(RISCVISDExtOpc
, DL
, IntVT
, Src
, Mask
, VL
);
10299 Result
= DAG
.getNode(RISCVISDOpc
, DL
, DstVT
, Src
, Mask
, VL
);
10301 assert(SrcVT
.isFloatingPoint() && DstVT
.isInteger() &&
10302 "Wrong input/output vector types");
10304 // Convert f16 to f32 then convert f32 to i64.
10305 if (DstEltSize
> (2 * SrcEltSize
)) {
10306 assert(SrcVT
.getVectorElementType() == MVT::f16
&& "Unexpected type!");
10308 MVT::getVectorVT(MVT::f32
, DstVT
.getVectorElementCount());
10310 DAG
.getNode(RISCVISD::FP_EXTEND_VL
, DL
, InterimFVT
, Src
, Mask
, VL
);
10313 Result
= DAG
.getNode(RISCVISDOpc
, DL
, DstVT
, Src
, Mask
, VL
);
10315 } else { // Narrowing + Conversion
10316 if (SrcVT
.isInteger()) {
10317 assert(DstVT
.isFloatingPoint() && "Wrong input/output vector types");
10318 // First do a narrowing convert to an FP type half the size, then round
10319 // the FP type to a small FP type if needed.
10321 MVT InterimFVT
= DstVT
;
10322 if (SrcEltSize
> (2 * DstEltSize
)) {
10323 assert(SrcEltSize
== (4 * DstEltSize
) && "Unexpected types!");
10324 assert(DstVT
.getVectorElementType() == MVT::f16
&& "Unexpected type!");
10325 InterimFVT
= MVT::getVectorVT(MVT::f32
, DstVT
.getVectorElementCount());
10328 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimFVT
, Src
, Mask
, VL
);
10330 if (InterimFVT
!= DstVT
) {
10332 Result
= DAG
.getNode(RISCVISD::FP_ROUND_VL
, DL
, DstVT
, Src
, Mask
, VL
);
10335 assert(SrcVT
.isFloatingPoint() && DstVT
.isInteger() &&
10336 "Wrong input/output vector types");
10337 // First do a narrowing conversion to an integer half the size, then
10338 // truncate if needed.
10340 if (DstEltSize
== 1) {
10341 // First convert to the same size integer, then convert to mask using
10343 assert(SrcEltSize
>= 16 && "Unexpected FP type!");
10344 MVT InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
),
10345 DstVT
.getVectorElementCount());
10346 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimIVT
, Src
, Mask
, VL
);
10348 // Compare the integer result to 0. The integer should be 0 or 1/-1,
10349 // otherwise the conversion was undefined.
10350 MVT XLenVT
= Subtarget
.getXLenVT();
10351 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
10352 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, InterimIVT
,
10353 DAG
.getUNDEF(InterimIVT
), SplatZero
, VL
);
10354 Result
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, DstVT
,
10355 {Result
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
10356 DAG
.getUNDEF(DstVT
), Mask
, VL
});
10358 MVT InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
10359 DstVT
.getVectorElementCount());
10361 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimIVT
, Src
, Mask
, VL
);
10363 while (InterimIVT
!= DstVT
) {
10366 InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
10367 DstVT
.getVectorElementCount());
10368 Result
= DAG
.getNode(RISCVISD::TRUNCATE_VECTOR_VL
, DL
, InterimIVT
,
10375 MVT VT
= Op
.getSimpleValueType();
10376 if (!VT
.isFixedLengthVector())
10378 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10381 SDValue
RISCVTargetLowering::lowerLogicVPOp(SDValue Op
,
10382 SelectionDAG
&DAG
) const {
10383 MVT VT
= Op
.getSimpleValueType();
10384 if (VT
.getVectorElementType() != MVT::i1
)
10385 return lowerVPOp(Op
, DAG
);
10387 // It is safe to drop mask parameter as masked-off elements are undef.
10388 SDValue Op1
= Op
->getOperand(0);
10389 SDValue Op2
= Op
->getOperand(1);
10390 SDValue VL
= Op
->getOperand(3);
10392 MVT ContainerVT
= VT
;
10393 const bool IsFixed
= VT
.isFixedLengthVector();
10395 ContainerVT
= getContainerForFixedLengthVector(VT
);
10396 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10397 Op2
= convertToScalableVector(ContainerVT
, Op2
, DAG
, Subtarget
);
10401 SDValue Val
= DAG
.getNode(getRISCVVLOp(Op
), DL
, ContainerVT
, Op1
, Op2
, VL
);
10404 return convertFromScalableVector(VT
, Val
, DAG
, Subtarget
);
10407 SDValue
RISCVTargetLowering::lowerVPStridedLoad(SDValue Op
,
10408 SelectionDAG
&DAG
) const {
10410 MVT XLenVT
= Subtarget
.getXLenVT();
10411 MVT VT
= Op
.getSimpleValueType();
10412 MVT ContainerVT
= VT
;
10413 if (VT
.isFixedLengthVector())
10414 ContainerVT
= getContainerForFixedLengthVector(VT
);
10416 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
10418 auto *VPNode
= cast
<VPStridedLoadSDNode
>(Op
);
10419 // Check if the mask is known to be all ones
10420 SDValue Mask
= VPNode
->getMask();
10421 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10423 SDValue IntID
= DAG
.getTargetConstant(IsUnmasked
? Intrinsic::riscv_vlse
10424 : Intrinsic::riscv_vlse_mask
,
10426 SmallVector
<SDValue
, 8> Ops
{VPNode
->getChain(), IntID
,
10427 DAG
.getUNDEF(ContainerVT
), VPNode
->getBasePtr(),
10428 VPNode
->getStride()};
10430 if (VT
.isFixedLengthVector()) {
10431 MVT MaskVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
10432 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10434 Ops
.push_back(Mask
);
10436 Ops
.push_back(VPNode
->getVectorLength());
10438 SDValue Policy
= DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
10439 Ops
.push_back(Policy
);
10443 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
10444 VPNode
->getMemoryVT(), VPNode
->getMemOperand());
10445 SDValue Chain
= Result
.getValue(1);
10447 if (VT
.isFixedLengthVector())
10448 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10450 return DAG
.getMergeValues({Result
, Chain
}, DL
);
10453 SDValue
RISCVTargetLowering::lowerVPStridedStore(SDValue Op
,
10454 SelectionDAG
&DAG
) const {
10456 MVT XLenVT
= Subtarget
.getXLenVT();
10458 auto *VPNode
= cast
<VPStridedStoreSDNode
>(Op
);
10459 SDValue StoreVal
= VPNode
->getValue();
10460 MVT VT
= StoreVal
.getSimpleValueType();
10461 MVT ContainerVT
= VT
;
10462 if (VT
.isFixedLengthVector()) {
10463 ContainerVT
= getContainerForFixedLengthVector(VT
);
10464 StoreVal
= convertToScalableVector(ContainerVT
, StoreVal
, DAG
, Subtarget
);
10467 // Check if the mask is known to be all ones
10468 SDValue Mask
= VPNode
->getMask();
10469 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10471 SDValue IntID
= DAG
.getTargetConstant(IsUnmasked
? Intrinsic::riscv_vsse
10472 : Intrinsic::riscv_vsse_mask
,
10474 SmallVector
<SDValue
, 8> Ops
{VPNode
->getChain(), IntID
, StoreVal
,
10475 VPNode
->getBasePtr(), VPNode
->getStride()};
10477 if (VT
.isFixedLengthVector()) {
10478 MVT MaskVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
10479 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10481 Ops
.push_back(Mask
);
10483 Ops
.push_back(VPNode
->getVectorLength());
10485 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
, VPNode
->getVTList(),
10486 Ops
, VPNode
->getMemoryVT(),
10487 VPNode
->getMemOperand());
10490 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
10491 // matched to a RVV indexed load. The RVV indexed load instructions only
10492 // support the "unsigned unscaled" addressing mode; indices are implicitly
10493 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
10494 // signed or scaled indexing is extended to the XLEN value type and scaled
10496 SDValue
RISCVTargetLowering::lowerMaskedGather(SDValue Op
,
10497 SelectionDAG
&DAG
) const {
10499 MVT VT
= Op
.getSimpleValueType();
10501 const auto *MemSD
= cast
<MemSDNode
>(Op
.getNode());
10502 EVT MemVT
= MemSD
->getMemoryVT();
10503 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
10504 SDValue Chain
= MemSD
->getChain();
10505 SDValue BasePtr
= MemSD
->getBasePtr();
10507 ISD::LoadExtType LoadExtType
;
10508 SDValue Index
, Mask
, PassThru
, VL
;
10510 if (auto *VPGN
= dyn_cast
<VPGatherSDNode
>(Op
.getNode())) {
10511 Index
= VPGN
->getIndex();
10512 Mask
= VPGN
->getMask();
10513 PassThru
= DAG
.getUNDEF(VT
);
10514 VL
= VPGN
->getVectorLength();
10515 // VP doesn't support extending loads.
10516 LoadExtType
= ISD::NON_EXTLOAD
;
10518 // Else it must be a MGATHER.
10519 auto *MGN
= cast
<MaskedGatherSDNode
>(Op
.getNode());
10520 Index
= MGN
->getIndex();
10521 Mask
= MGN
->getMask();
10522 PassThru
= MGN
->getPassThru();
10523 LoadExtType
= MGN
->getExtensionType();
10526 MVT IndexVT
= Index
.getSimpleValueType();
10527 MVT XLenVT
= Subtarget
.getXLenVT();
10529 assert(VT
.getVectorElementCount() == IndexVT
.getVectorElementCount() &&
10530 "Unexpected VTs!");
10531 assert(BasePtr
.getSimpleValueType() == XLenVT
&& "Unexpected pointer type");
10532 // Targets have to explicitly opt-in for extending vector loads.
10533 assert(LoadExtType
== ISD::NON_EXTLOAD
&&
10534 "Unexpected extending MGATHER/VP_GATHER");
10537 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
10538 // the selection of the masked intrinsics doesn't do this for us.
10539 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10541 MVT ContainerVT
= VT
;
10542 if (VT
.isFixedLengthVector()) {
10543 ContainerVT
= getContainerForFixedLengthVector(VT
);
10544 IndexVT
= MVT::getVectorVT(IndexVT
.getVectorElementType(),
10545 ContainerVT
.getVectorElementCount());
10547 Index
= convertToScalableVector(IndexVT
, Index
, DAG
, Subtarget
);
10550 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10551 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10552 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
10557 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10559 if (XLenVT
== MVT::i32
&& IndexVT
.getVectorElementType().bitsGT(XLenVT
)) {
10560 IndexVT
= IndexVT
.changeVectorElementType(XLenVT
);
10561 Index
= DAG
.getNode(ISD::TRUNCATE
, DL
, IndexVT
, Index
);
10565 IsUnmasked
? Intrinsic::riscv_vluxei
: Intrinsic::riscv_vluxei_mask
;
10566 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
10568 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10570 Ops
.push_back(PassThru
);
10571 Ops
.push_back(BasePtr
);
10572 Ops
.push_back(Index
);
10574 Ops
.push_back(Mask
);
10577 Ops
.push_back(DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
));
10579 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
10581 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, MemVT
, MMO
);
10582 Chain
= Result
.getValue(1);
10584 if (VT
.isFixedLengthVector())
10585 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10587 return DAG
.getMergeValues({Result
, Chain
}, DL
);
10590 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
10591 // matched to a RVV indexed store. The RVV indexed store instructions only
10592 // support the "unsigned unscaled" addressing mode; indices are implicitly
10593 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
10594 // signed or scaled indexing is extended to the XLEN value type and scaled
10596 SDValue
RISCVTargetLowering::lowerMaskedScatter(SDValue Op
,
10597 SelectionDAG
&DAG
) const {
10599 const auto *MemSD
= cast
<MemSDNode
>(Op
.getNode());
10600 EVT MemVT
= MemSD
->getMemoryVT();
10601 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
10602 SDValue Chain
= MemSD
->getChain();
10603 SDValue BasePtr
= MemSD
->getBasePtr();
10605 bool IsTruncatingStore
= false;
10606 SDValue Index
, Mask
, Val
, VL
;
10608 if (auto *VPSN
= dyn_cast
<VPScatterSDNode
>(Op
.getNode())) {
10609 Index
= VPSN
->getIndex();
10610 Mask
= VPSN
->getMask();
10611 Val
= VPSN
->getValue();
10612 VL
= VPSN
->getVectorLength();
10613 // VP doesn't support truncating stores.
10614 IsTruncatingStore
= false;
10616 // Else it must be a MSCATTER.
10617 auto *MSN
= cast
<MaskedScatterSDNode
>(Op
.getNode());
10618 Index
= MSN
->getIndex();
10619 Mask
= MSN
->getMask();
10620 Val
= MSN
->getValue();
10621 IsTruncatingStore
= MSN
->isTruncatingStore();
10624 MVT VT
= Val
.getSimpleValueType();
10625 MVT IndexVT
= Index
.getSimpleValueType();
10626 MVT XLenVT
= Subtarget
.getXLenVT();
10628 assert(VT
.getVectorElementCount() == IndexVT
.getVectorElementCount() &&
10629 "Unexpected VTs!");
10630 assert(BasePtr
.getSimpleValueType() == XLenVT
&& "Unexpected pointer type");
10631 // Targets have to explicitly opt-in for extending vector loads and
10632 // truncating vector stores.
10633 assert(!IsTruncatingStore
&& "Unexpected truncating MSCATTER/VP_SCATTER");
10634 (void)IsTruncatingStore
;
10636 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
10637 // the selection of the masked intrinsics doesn't do this for us.
10638 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10640 MVT ContainerVT
= VT
;
10641 if (VT
.isFixedLengthVector()) {
10642 ContainerVT
= getContainerForFixedLengthVector(VT
);
10643 IndexVT
= MVT::getVectorVT(IndexVT
.getVectorElementType(),
10644 ContainerVT
.getVectorElementCount());
10646 Index
= convertToScalableVector(IndexVT
, Index
, DAG
, Subtarget
);
10647 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
10650 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10651 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10656 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10658 if (XLenVT
== MVT::i32
&& IndexVT
.getVectorElementType().bitsGT(XLenVT
)) {
10659 IndexVT
= IndexVT
.changeVectorElementType(XLenVT
);
10660 Index
= DAG
.getNode(ISD::TRUNCATE
, DL
, IndexVT
, Index
);
10664 IsUnmasked
? Intrinsic::riscv_vsoxei
: Intrinsic::riscv_vsoxei_mask
;
10665 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
10666 Ops
.push_back(Val
);
10667 Ops
.push_back(BasePtr
);
10668 Ops
.push_back(Index
);
10670 Ops
.push_back(Mask
);
10673 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
,
10674 DAG
.getVTList(MVT::Other
), Ops
, MemVT
, MMO
);
10677 SDValue
RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op
,
10678 SelectionDAG
&DAG
) const {
10679 const MVT XLenVT
= Subtarget
.getXLenVT();
10681 SDValue Chain
= Op
->getOperand(0);
10682 SDValue SysRegNo
= DAG
.getTargetConstant(
10683 RISCVSysReg::lookupSysRegByName("FRM")->Encoding
, DL
, XLenVT
);
10684 SDVTList VTs
= DAG
.getVTList(XLenVT
, MVT::Other
);
10685 SDValue RM
= DAG
.getNode(RISCVISD::READ_CSR
, DL
, VTs
, Chain
, SysRegNo
);
10687 // Encoding used for rounding mode in RISC-V differs from that used in
10688 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
10689 // table, which consists of a sequence of 4-bit fields, each representing
10690 // corresponding FLT_ROUNDS mode.
10691 static const int Table
=
10692 (int(RoundingMode::NearestTiesToEven
) << 4 * RISCVFPRndMode::RNE
) |
10693 (int(RoundingMode::TowardZero
) << 4 * RISCVFPRndMode::RTZ
) |
10694 (int(RoundingMode::TowardNegative
) << 4 * RISCVFPRndMode::RDN
) |
10695 (int(RoundingMode::TowardPositive
) << 4 * RISCVFPRndMode::RUP
) |
10696 (int(RoundingMode::NearestTiesToAway
) << 4 * RISCVFPRndMode::RMM
);
10699 DAG
.getNode(ISD::SHL
, DL
, XLenVT
, RM
, DAG
.getConstant(2, DL
, XLenVT
));
10700 SDValue Shifted
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
,
10701 DAG
.getConstant(Table
, DL
, XLenVT
), Shift
);
10702 SDValue Masked
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Shifted
,
10703 DAG
.getConstant(7, DL
, XLenVT
));
10705 return DAG
.getMergeValues({Masked
, Chain
}, DL
);
10708 SDValue
RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op
,
10709 SelectionDAG
&DAG
) const {
10710 const MVT XLenVT
= Subtarget
.getXLenVT();
10712 SDValue Chain
= Op
->getOperand(0);
10713 SDValue RMValue
= Op
->getOperand(1);
10714 SDValue SysRegNo
= DAG
.getTargetConstant(
10715 RISCVSysReg::lookupSysRegByName("FRM")->Encoding
, DL
, XLenVT
);
10717 // Encoding used for rounding mode in RISC-V differs from that used in
10718 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
10719 // a table, which consists of a sequence of 4-bit fields, each representing
10720 // corresponding RISC-V mode.
10721 static const unsigned Table
=
10722 (RISCVFPRndMode::RNE
<< 4 * int(RoundingMode::NearestTiesToEven
)) |
10723 (RISCVFPRndMode::RTZ
<< 4 * int(RoundingMode::TowardZero
)) |
10724 (RISCVFPRndMode::RDN
<< 4 * int(RoundingMode::TowardNegative
)) |
10725 (RISCVFPRndMode::RUP
<< 4 * int(RoundingMode::TowardPositive
)) |
10726 (RISCVFPRndMode::RMM
<< 4 * int(RoundingMode::NearestTiesToAway
));
10728 RMValue
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, XLenVT
, RMValue
);
10730 SDValue Shift
= DAG
.getNode(ISD::SHL
, DL
, XLenVT
, RMValue
,
10731 DAG
.getConstant(2, DL
, XLenVT
));
10732 SDValue Shifted
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
,
10733 DAG
.getConstant(Table
, DL
, XLenVT
), Shift
);
10734 RMValue
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Shifted
,
10735 DAG
.getConstant(0x7, DL
, XLenVT
));
10736 return DAG
.getNode(RISCVISD::WRITE_CSR
, DL
, MVT::Other
, Chain
, SysRegNo
,
10740 SDValue
RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op
,
10741 SelectionDAG
&DAG
) const {
10742 MachineFunction
&MF
= DAG
.getMachineFunction();
10744 bool isRISCV64
= Subtarget
.is64Bit();
10745 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
10747 int FI
= MF
.getFrameInfo().CreateFixedObject(isRISCV64
? 8 : 4, 0, false);
10748 return DAG
.getFrameIndex(FI
, PtrVT
);
10751 // Returns the opcode of the target-specific SDNode that implements the 32-bit
10752 // form of the given Opcode.
10753 static RISCVISD::NodeType
getRISCVWOpcode(unsigned Opcode
) {
10756 llvm_unreachable("Unexpected opcode");
10758 return RISCVISD::SLLW
;
10760 return RISCVISD::SRAW
;
10762 return RISCVISD::SRLW
;
10764 return RISCVISD::DIVW
;
10766 return RISCVISD::DIVUW
;
10768 return RISCVISD::REMUW
;
10770 return RISCVISD::ROLW
;
10772 return RISCVISD::RORW
;
10776 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
10777 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
10778 // otherwise be promoted to i64, making it difficult to select the
10779 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
10780 // type i8/i16/i32 is lost.
10781 static SDValue
customLegalizeToWOp(SDNode
*N
, SelectionDAG
&DAG
,
10782 unsigned ExtOpc
= ISD::ANY_EXTEND
) {
10784 RISCVISD::NodeType WOpcode
= getRISCVWOpcode(N
->getOpcode());
10785 SDValue NewOp0
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(0));
10786 SDValue NewOp1
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(1));
10787 SDValue NewRes
= DAG
.getNode(WOpcode
, DL
, MVT::i64
, NewOp0
, NewOp1
);
10788 // ReplaceNodeResults requires we maintain the same type for the return value.
10789 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), NewRes
);
10792 // Converts the given 32-bit operation to a i64 operation with signed extension
10793 // semantic to reduce the signed extension instructions.
10794 static SDValue
customLegalizeToWOpWithSExt(SDNode
*N
, SelectionDAG
&DAG
) {
10796 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
10797 SDValue NewOp1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
10798 SDValue NewWOp
= DAG
.getNode(N
->getOpcode(), DL
, MVT::i64
, NewOp0
, NewOp1
);
10799 SDValue NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewWOp
,
10800 DAG
.getValueType(MVT::i32
));
10801 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
);
10804 void RISCVTargetLowering::ReplaceNodeResults(SDNode
*N
,
10805 SmallVectorImpl
<SDValue
> &Results
,
10806 SelectionDAG
&DAG
) const {
10808 switch (N
->getOpcode()) {
10810 llvm_unreachable("Don't know how to custom type legalize this operation!");
10811 case ISD::STRICT_FP_TO_SINT
:
10812 case ISD::STRICT_FP_TO_UINT
:
10813 case ISD::FP_TO_SINT
:
10814 case ISD::FP_TO_UINT
: {
10815 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
10816 "Unexpected custom legalisation");
10817 bool IsStrict
= N
->isStrictFPOpcode();
10818 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT
||
10819 N
->getOpcode() == ISD::STRICT_FP_TO_SINT
;
10820 SDValue Op0
= IsStrict
? N
->getOperand(1) : N
->getOperand(0);
10821 if (getTypeAction(*DAG
.getContext(), Op0
.getValueType()) !=
10822 TargetLowering::TypeSoftenFloat
) {
10823 if (!isTypeLegal(Op0
.getValueType()))
10826 SDValue Chain
= N
->getOperand(0);
10827 // In absense of Zfh, promote f16 to f32, then convert.
10828 if (Op0
.getValueType() == MVT::f16
&&
10829 !Subtarget
.hasStdExtZfhOrZhinx()) {
10830 Op0
= DAG
.getNode(ISD::STRICT_FP_EXTEND
, DL
, {MVT::f32
, MVT::Other
},
10832 Chain
= Op0
.getValue(1);
10834 unsigned Opc
= IsSigned
? RISCVISD::STRICT_FCVT_W_RV64
10835 : RISCVISD::STRICT_FCVT_WU_RV64
;
10836 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Other
);
10837 SDValue Res
= DAG
.getNode(
10838 Opc
, DL
, VTs
, Chain
, Op0
,
10839 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, MVT::i64
));
10840 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
10841 Results
.push_back(Res
.getValue(1));
10844 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
10846 if ((Op0
.getValueType() == MVT::f16
&&
10847 !Subtarget
.hasStdExtZfhOrZhinx()) ||
10848 Op0
.getValueType() == MVT::bf16
)
10849 Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op0
);
10851 unsigned Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
10853 DAG
.getNode(Opc
, DL
, MVT::i64
, Op0
,
10854 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, MVT::i64
));
10855 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
10858 // If the FP type needs to be softened, emit a library call using the 'si'
10859 // version. If we left it to default legalization we'd end up with 'di'. If
10860 // the FP type doesn't need to be softened just let generic type
10861 // legalization promote the result type.
10864 LC
= RTLIB::getFPTOSINT(Op0
.getValueType(), N
->getValueType(0));
10866 LC
= RTLIB::getFPTOUINT(Op0
.getValueType(), N
->getValueType(0));
10867 MakeLibCallOptions CallOptions
;
10868 EVT OpVT
= Op0
.getValueType();
10869 CallOptions
.setTypeListBeforeSoften(OpVT
, N
->getValueType(0), true);
10870 SDValue Chain
= IsStrict
? N
->getOperand(0) : SDValue();
10872 std::tie(Result
, Chain
) =
10873 makeLibCall(DAG
, LC
, N
->getValueType(0), Op0
, CallOptions
, DL
, Chain
);
10874 Results
.push_back(Result
);
10876 Results
.push_back(Chain
);
10879 case ISD::LROUND
: {
10880 SDValue Op0
= N
->getOperand(0);
10881 EVT Op0VT
= Op0
.getValueType();
10882 if (getTypeAction(*DAG
.getContext(), Op0
.getValueType()) !=
10883 TargetLowering::TypeSoftenFloat
) {
10884 if (!isTypeLegal(Op0VT
))
10887 // In absense of Zfh, promote f16 to f32, then convert.
10888 if (Op0
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfhOrZhinx())
10889 Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op0
);
10892 DAG
.getNode(RISCVISD::FCVT_W_RV64
, DL
, MVT::i64
, Op0
,
10893 DAG
.getTargetConstant(RISCVFPRndMode::RMM
, DL
, MVT::i64
));
10894 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
10897 // If the FP type needs to be softened, emit a library call to lround. We'll
10898 // need to truncate the result. We assume any value that doesn't fit in i32
10899 // is allowed to return an unspecified value.
10900 RTLIB::Libcall LC
=
10901 Op0
.getValueType() == MVT::f64
? RTLIB::LROUND_F64
: RTLIB::LROUND_F32
;
10902 MakeLibCallOptions CallOptions
;
10903 EVT OpVT
= Op0
.getValueType();
10904 CallOptions
.setTypeListBeforeSoften(OpVT
, MVT::i64
, true);
10905 SDValue Result
= makeLibCall(DAG
, LC
, MVT::i64
, Op0
, CallOptions
, DL
).first
;
10906 Result
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Result
);
10907 Results
.push_back(Result
);
10910 case ISD::READCYCLECOUNTER
: {
10911 assert(!Subtarget
.is64Bit() &&
10912 "READCYCLECOUNTER only has custom type legalization on riscv32");
10914 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
10916 DAG
.getNode(RISCVISD::READ_CYCLE_WIDE
, DL
, VTs
, N
->getOperand(0));
10919 DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, RCW
, RCW
.getValue(1)));
10920 Results
.push_back(RCW
.getValue(2));
10924 if (!ISD::isNON_EXTLoad(N
))
10927 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
10928 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
10929 LoadSDNode
*Ld
= cast
<LoadSDNode
>(N
);
10932 SDValue Res
= DAG
.getExtLoad(ISD::SEXTLOAD
, dl
, MVT::i64
, Ld
->getChain(),
10933 Ld
->getBasePtr(), Ld
->getMemoryVT(),
10934 Ld
->getMemOperand());
10935 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, Res
));
10936 Results
.push_back(Res
.getValue(1));
10940 unsigned Size
= N
->getSimpleValueType(0).getSizeInBits();
10941 unsigned XLen
= Subtarget
.getXLen();
10942 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
10944 assert(Size
== (XLen
* 2) && "Unexpected custom legalisation");
10945 SDValue LHS
= N
->getOperand(0);
10946 SDValue RHS
= N
->getOperand(1);
10947 APInt HighMask
= APInt::getHighBitsSet(Size
, XLen
);
10949 bool LHSIsU
= DAG
.MaskedValueIsZero(LHS
, HighMask
);
10950 bool RHSIsU
= DAG
.MaskedValueIsZero(RHS
, HighMask
);
10951 // We need exactly one side to be unsigned.
10952 if (LHSIsU
== RHSIsU
)
10955 auto MakeMULPair
= [&](SDValue S
, SDValue U
) {
10956 MVT XLenVT
= Subtarget
.getXLenVT();
10957 S
= DAG
.getNode(ISD::TRUNCATE
, DL
, XLenVT
, S
);
10958 U
= DAG
.getNode(ISD::TRUNCATE
, DL
, XLenVT
, U
);
10959 SDValue Lo
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, S
, U
);
10960 SDValue Hi
= DAG
.getNode(RISCVISD::MULHSU
, DL
, XLenVT
, S
, U
);
10961 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, N
->getValueType(0), Lo
, Hi
);
10964 bool LHSIsS
= DAG
.ComputeNumSignBits(LHS
) > XLen
;
10965 bool RHSIsS
= DAG
.ComputeNumSignBits(RHS
) > XLen
;
10967 // The other operand should be signed, but still prefer MULH when
10969 if (RHSIsU
&& LHSIsS
&& !RHSIsS
)
10970 Results
.push_back(MakeMULPair(LHS
, RHS
));
10971 else if (LHSIsU
&& RHSIsS
&& !LHSIsS
)
10972 Results
.push_back(MakeMULPair(RHS
, LHS
));
10980 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
10981 "Unexpected custom legalisation");
10982 Results
.push_back(customLegalizeToWOpWithSExt(N
, DAG
));
10987 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
10988 "Unexpected custom legalisation");
10989 if (N
->getOperand(1).getOpcode() != ISD::Constant
) {
10990 // If we can use a BSET instruction, allow default promotion to apply.
10991 if (N
->getOpcode() == ISD::SHL
&& Subtarget
.hasStdExtZbs() &&
10992 isOneConstant(N
->getOperand(0)))
10994 Results
.push_back(customLegalizeToWOp(N
, DAG
));
10998 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
10999 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
11001 if (N
->getOpcode() == ISD::SHL
) {
11004 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11006 DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11007 SDValue NewWOp
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11008 SDValue NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewWOp
,
11009 DAG
.getValueType(MVT::i32
));
11010 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
));
11016 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11017 "Unexpected custom legalisation");
11018 assert((Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
11019 Subtarget
.hasVendorXTHeadBb()) &&
11020 "Unexpected custom legalization");
11021 if (!isa
<ConstantSDNode
>(N
->getOperand(1)) &&
11022 !(Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()))
11024 Results
.push_back(customLegalizeToWOp(N
, DAG
));
11027 case ISD::CTTZ_ZERO_UNDEF
:
11029 case ISD::CTLZ_ZERO_UNDEF
: {
11030 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11031 "Unexpected custom legalisation");
11034 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11036 N
->getOpcode() == ISD::CTTZ
|| N
->getOpcode() == ISD::CTTZ_ZERO_UNDEF
;
11037 unsigned Opc
= IsCTZ
? RISCVISD::CTZW
: RISCVISD::CLZW
;
11038 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
);
11039 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11045 MVT VT
= N
->getSimpleValueType(0);
11046 assert((VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
) &&
11047 Subtarget
.is64Bit() && Subtarget
.hasStdExtM() &&
11048 "Unexpected custom legalisation");
11049 // Don't promote division/remainder by constant since we should expand those
11050 // to multiply by magic constant.
11051 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
11052 if (N
->getOperand(1).getOpcode() == ISD::Constant
&&
11053 !isIntDivCheap(N
->getValueType(0), Attr
))
11056 // If the input is i32, use ANY_EXTEND since the W instructions don't read
11057 // the upper 32 bits. For other types we need to sign or zero extend
11058 // based on the opcode.
11059 unsigned ExtOpc
= ISD::ANY_EXTEND
;
11060 if (VT
!= MVT::i32
)
11061 ExtOpc
= N
->getOpcode() == ISD::SDIV
? ISD::SIGN_EXTEND
11062 : ISD::ZERO_EXTEND
;
11064 Results
.push_back(customLegalizeToWOp(N
, DAG
, ExtOpc
));
11068 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11069 "Unexpected custom legalisation");
11071 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
11072 // use the default legalization.
11073 if (!isa
<ConstantSDNode
>(N
->getOperand(1)))
11076 SDValue LHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11077 SDValue RHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11078 SDValue Res
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, LHS
, RHS
);
11079 Res
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Res
,
11080 DAG
.getValueType(MVT::i32
));
11082 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i64
);
11084 // For an addition, the result should be less than one of the operands (LHS)
11085 // if and only if the other operand (RHS) is negative, otherwise there will
11087 // For a subtraction, the result should be less than one of the operands
11088 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11089 // otherwise there will be overflow.
11090 EVT OType
= N
->getValueType(1);
11091 SDValue ResultLowerThanLHS
= DAG
.getSetCC(DL
, OType
, Res
, LHS
, ISD::SETLT
);
11092 SDValue ConditionRHS
= DAG
.getSetCC(DL
, OType
, RHS
, Zero
, ISD::SETLT
);
11095 DAG
.getNode(ISD::XOR
, DL
, OType
, ConditionRHS
, ResultLowerThanLHS
);
11096 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11097 Results
.push_back(Overflow
);
11102 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11103 "Unexpected custom legalisation");
11104 bool IsAdd
= N
->getOpcode() == ISD::UADDO
;
11105 // Create an ADDW or SUBW.
11106 SDValue LHS
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11107 SDValue RHS
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11109 DAG
.getNode(IsAdd
? ISD::ADD
: ISD::SUB
, DL
, MVT::i64
, LHS
, RHS
);
11110 Res
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Res
,
11111 DAG
.getValueType(MVT::i32
));
11114 if (IsAdd
&& isOneConstant(RHS
)) {
11115 // Special case uaddo X, 1 overflowed if the addition result is 0.
11116 // The general case (X + C) < C is not necessarily beneficial. Although we
11117 // reduce the live range of X, we may introduce the materialization of
11118 // constant C, especially when the setcc result is used by branch. We have
11119 // no compare with constant and branch instructions.
11120 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), Res
,
11121 DAG
.getConstant(0, DL
, MVT::i64
), ISD::SETEQ
);
11122 } else if (IsAdd
&& isAllOnesConstant(RHS
)) {
11123 // Special case uaddo X, -1 overflowed if X != 0.
11124 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), N
->getOperand(0),
11125 DAG
.getConstant(0, DL
, MVT::i32
), ISD::SETNE
);
11127 // Sign extend the LHS and perform an unsigned compare with the ADDW
11128 // result. Since the inputs are sign extended from i32, this is equivalent
11129 // to comparing the lower 32 bits.
11130 LHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11131 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), Res
, LHS
,
11132 IsAdd
? ISD::SETULT
: ISD::SETUGT
);
11135 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11136 Results
.push_back(Overflow
);
11140 case ISD::USUBSAT
: {
11141 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11142 "Unexpected custom legalisation");
11143 if (Subtarget
.hasStdExtZbb()) {
11144 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
11145 // sign extend allows overflow of the lower 32 bits to be detected on
11146 // the promoted size.
11148 DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11150 DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11151 SDValue Res
= DAG
.getNode(N
->getOpcode(), DL
, MVT::i64
, LHS
, RHS
);
11152 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11156 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
11157 // promotion for UADDO/USUBO.
11158 Results
.push_back(expandAddSubSat(N
, DAG
));
11162 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11163 "Unexpected custom legalisation");
11165 if (Subtarget
.hasStdExtZbb()) {
11166 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
11167 // This allows us to remember that the result is sign extended. Expanding
11168 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
11169 SDValue Src
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
,
11171 SDValue Abs
= DAG
.getNode(RISCVISD::ABSW
, DL
, MVT::i64
, Src
);
11172 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Abs
));
11176 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
11177 SDValue Src
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11179 // Freeze the source so we can increase it's use count.
11180 Src
= DAG
.getFreeze(Src
);
11182 // Copy sign bit to all bits using the sraiw pattern.
11183 SDValue SignFill
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Src
,
11184 DAG
.getValueType(MVT::i32
));
11185 SignFill
= DAG
.getNode(ISD::SRA
, DL
, MVT::i64
, SignFill
,
11186 DAG
.getConstant(31, DL
, MVT::i64
));
11188 SDValue NewRes
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, Src
, SignFill
);
11189 NewRes
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, NewRes
, SignFill
);
11191 // NOTE: The result is only required to be anyextended, but sext is
11192 // consistent with type legalization of sub.
11193 NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewRes
,
11194 DAG
.getValueType(MVT::i32
));
11195 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
));
11198 case ISD::BITCAST
: {
11199 EVT VT
= N
->getValueType(0);
11200 assert(VT
.isInteger() && !VT
.isVector() && "Unexpected VT!");
11201 SDValue Op0
= N
->getOperand(0);
11202 EVT Op0VT
= Op0
.getValueType();
11203 MVT XLenVT
= Subtarget
.getXLenVT();
11204 if (VT
== MVT::i16
&& Op0VT
== MVT::f16
&&
11205 Subtarget
.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
11206 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, XLenVT
, Op0
);
11207 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, FPConv
));
11208 } else if (VT
== MVT::i16
&& Op0VT
== MVT::bf16
&&
11209 Subtarget
.hasStdExtZfbfmin()) {
11210 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, XLenVT
, Op0
);
11211 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, FPConv
));
11212 } else if (VT
== MVT::i32
&& Op0VT
== MVT::f32
&& Subtarget
.is64Bit() &&
11213 Subtarget
.hasStdExtFOrZfinx()) {
11215 DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Op0
);
11216 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, FPConv
));
11217 } else if (VT
== MVT::i64
&& Op0VT
== MVT::f64
&& XLenVT
== MVT::i32
&&
11218 Subtarget
.hasStdExtZfa()) {
11219 SDValue NewReg
= DAG
.getNode(RISCVISD::SplitF64
, DL
,
11220 DAG
.getVTList(MVT::i32
, MVT::i32
), Op0
);
11221 SDValue RetReg
= DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
,
11222 NewReg
.getValue(0), NewReg
.getValue(1));
11223 Results
.push_back(RetReg
);
11224 } else if (!VT
.isVector() && Op0VT
.isFixedLengthVector() &&
11225 isTypeLegal(Op0VT
)) {
11226 // Custom-legalize bitcasts from fixed-length vector types to illegal
11227 // scalar types in order to improve codegen. Bitcast the vector to a
11228 // one-element vector type whose element type is the same as the result
11229 // type, and extract the first element.
11230 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 1);
11231 if (isTypeLegal(BVT
)) {
11232 SDValue BVec
= DAG
.getBitcast(BVT
, Op0
);
11233 Results
.push_back(DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VT
, BVec
,
11234 DAG
.getConstant(0, DL
, XLenVT
)));
11239 case RISCVISD::BREV8
: {
11240 MVT VT
= N
->getSimpleValueType(0);
11241 MVT XLenVT
= Subtarget
.getXLenVT();
11242 assert((VT
== MVT::i16
|| (VT
== MVT::i32
&& Subtarget
.is64Bit())) &&
11243 "Unexpected custom legalisation");
11244 assert(Subtarget
.hasStdExtZbkb() && "Unexpected extension");
11245 SDValue NewOp
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, N
->getOperand(0));
11246 SDValue NewRes
= DAG
.getNode(N
->getOpcode(), DL
, XLenVT
, NewOp
);
11247 // ReplaceNodeResults requires we maintain the same type for the return
11249 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, NewRes
));
11252 case ISD::EXTRACT_VECTOR_ELT
: {
11253 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
11254 // type is illegal (currently only vXi64 RV32).
11255 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
11256 // transferred to the destination register. We issue two of these from the
11257 // upper- and lower- halves of the SEW-bit vector element, slid down to the
11259 SDValue Vec
= N
->getOperand(0);
11260 SDValue Idx
= N
->getOperand(1);
11262 // The vector type hasn't been legalized yet so we can't issue target
11263 // specific nodes if it needs legalization.
11264 // FIXME: We would manually legalize if it's important.
11265 if (!isTypeLegal(Vec
.getValueType()))
11268 MVT VecVT
= Vec
.getSimpleValueType();
11270 assert(!Subtarget
.is64Bit() && N
->getValueType(0) == MVT::i64
&&
11271 VecVT
.getVectorElementType() == MVT::i64
&&
11272 "Unexpected EXTRACT_VECTOR_ELT legalization");
11274 // If this is a fixed vector, we need to convert it to a scalable vector.
11275 MVT ContainerVT
= VecVT
;
11276 if (VecVT
.isFixedLengthVector()) {
11277 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
11278 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
11281 MVT XLenVT
= Subtarget
.getXLenVT();
11283 // Use a VL of 1 to avoid processing more elements than we need.
11284 auto [Mask
, VL
] = getDefaultVLOps(1, ContainerVT
, DL
, DAG
, Subtarget
);
11286 // Unless the index is known to be 0, we must slide the vector down to get
11287 // the desired element into index 0.
11288 if (!isNullConstant(Idx
)) {
11289 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
11290 DAG
.getUNDEF(ContainerVT
), Vec
, Idx
, Mask
, VL
);
11293 // Extract the lower XLEN bits of the correct vector element.
11294 SDValue EltLo
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
11296 // To extract the upper XLEN bits of the vector element, shift the first
11297 // element right by 32 bits and re-extract the lower XLEN bits.
11298 SDValue ThirtyTwoV
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
11299 DAG
.getUNDEF(ContainerVT
),
11300 DAG
.getConstant(32, DL
, XLenVT
), VL
);
11302 DAG
.getNode(RISCVISD::SRL_VL
, DL
, ContainerVT
, Vec
, ThirtyTwoV
,
11303 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
11305 SDValue EltHi
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, LShr32
);
11307 Results
.push_back(DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, EltLo
, EltHi
));
11310 case ISD::INTRINSIC_WO_CHAIN
: {
11311 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
11315 "Don't know how to custom type legalize this intrinsic!");
11316 case Intrinsic::experimental_get_vector_length
: {
11317 SDValue Res
= lowerGetVectorLength(N
, DAG
, Subtarget
);
11318 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11321 case Intrinsic::riscv_orc_b
:
11322 case Intrinsic::riscv_brev8
:
11323 case Intrinsic::riscv_sha256sig0
:
11324 case Intrinsic::riscv_sha256sig1
:
11325 case Intrinsic::riscv_sha256sum0
:
11326 case Intrinsic::riscv_sha256sum1
:
11327 case Intrinsic::riscv_sm3p0
:
11328 case Intrinsic::riscv_sm3p1
: {
11329 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11333 case Intrinsic::riscv_orc_b
: Opc
= RISCVISD::ORC_B
; break;
11334 case Intrinsic::riscv_brev8
: Opc
= RISCVISD::BREV8
; break;
11335 case Intrinsic::riscv_sha256sig0
: Opc
= RISCVISD::SHA256SIG0
; break;
11336 case Intrinsic::riscv_sha256sig1
: Opc
= RISCVISD::SHA256SIG1
; break;
11337 case Intrinsic::riscv_sha256sum0
: Opc
= RISCVISD::SHA256SUM0
; break;
11338 case Intrinsic::riscv_sha256sum1
: Opc
= RISCVISD::SHA256SUM1
; break;
11339 case Intrinsic::riscv_sm3p0
: Opc
= RISCVISD::SM3P0
; break;
11340 case Intrinsic::riscv_sm3p1
: Opc
= RISCVISD::SM3P1
; break;
11344 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11345 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp
);
11346 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11349 case Intrinsic::riscv_sm4ks
:
11350 case Intrinsic::riscv_sm4ed
: {
11352 IntNo
== Intrinsic::riscv_sm4ks
? RISCVISD::SM4KS
: RISCVISD::SM4ED
;
11354 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11356 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11358 DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
, N
->getOperand(3));
11359 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11362 case Intrinsic::riscv_clmul
: {
11363 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11367 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11369 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11370 SDValue Res
= DAG
.getNode(RISCVISD::CLMUL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11371 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11374 case Intrinsic::riscv_clmulh
:
11375 case Intrinsic::riscv_clmulr
: {
11376 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11379 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
11380 // to the full 128-bit clmul result of multiplying two xlen values.
11381 // Perform clmulr or clmulh on the shifted values. Finally, extract the
11384 // The alternative is to mask the inputs to 32 bits and use clmul, but
11385 // that requires two shifts to mask each input without zext.w.
11386 // FIXME: If the inputs are known zero extended or could be freely
11387 // zero extended, the mask form would be better.
11389 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11391 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11392 NewOp0
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
,
11393 DAG
.getConstant(32, DL
, MVT::i64
));
11394 NewOp1
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp1
,
11395 DAG
.getConstant(32, DL
, MVT::i64
));
11396 unsigned Opc
= IntNo
== Intrinsic::riscv_clmulh
? RISCVISD::CLMULH
11397 : RISCVISD::CLMULR
;
11398 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11399 Res
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Res
,
11400 DAG
.getConstant(32, DL
, MVT::i64
));
11401 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11404 case Intrinsic::riscv_vmv_x_s
: {
11405 EVT VT
= N
->getValueType(0);
11406 MVT XLenVT
= Subtarget
.getXLenVT();
11407 if (VT
.bitsLT(XLenVT
)) {
11408 // Simple case just extract using vmv.x.s and truncate.
11409 SDValue Extract
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
,
11410 Subtarget
.getXLenVT(), N
->getOperand(1));
11411 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Extract
));
11415 assert(VT
== MVT::i64
&& !Subtarget
.is64Bit() &&
11416 "Unexpected custom legalization");
11418 // We need to do the move in two steps.
11419 SDValue Vec
= N
->getOperand(1);
11420 MVT VecVT
= Vec
.getSimpleValueType();
11422 // First extract the lower XLEN bits of the element.
11423 SDValue EltLo
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
11425 // To extract the upper XLEN bits of the vector element, shift the first
11426 // element right by 32 bits and re-extract the lower XLEN bits.
11427 auto [Mask
, VL
] = getDefaultVLOps(1, VecVT
, DL
, DAG
, Subtarget
);
11429 SDValue ThirtyTwoV
=
11430 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VecVT
, DAG
.getUNDEF(VecVT
),
11431 DAG
.getConstant(32, DL
, XLenVT
), VL
);
11432 SDValue LShr32
= DAG
.getNode(RISCVISD::SRL_VL
, DL
, VecVT
, Vec
, ThirtyTwoV
,
11433 DAG
.getUNDEF(VecVT
), Mask
, VL
);
11434 SDValue EltHi
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, LShr32
);
11437 DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, EltLo
, EltHi
));
11443 case ISD::VECREDUCE_ADD
:
11444 case ISD::VECREDUCE_AND
:
11445 case ISD::VECREDUCE_OR
:
11446 case ISD::VECREDUCE_XOR
:
11447 case ISD::VECREDUCE_SMAX
:
11448 case ISD::VECREDUCE_UMAX
:
11449 case ISD::VECREDUCE_SMIN
:
11450 case ISD::VECREDUCE_UMIN
:
11451 if (SDValue V
= lowerVECREDUCE(SDValue(N
, 0), DAG
))
11452 Results
.push_back(V
);
11454 case ISD::VP_REDUCE_ADD
:
11455 case ISD::VP_REDUCE_AND
:
11456 case ISD::VP_REDUCE_OR
:
11457 case ISD::VP_REDUCE_XOR
:
11458 case ISD::VP_REDUCE_SMAX
:
11459 case ISD::VP_REDUCE_UMAX
:
11460 case ISD::VP_REDUCE_SMIN
:
11461 case ISD::VP_REDUCE_UMIN
:
11462 if (SDValue V
= lowerVPREDUCE(SDValue(N
, 0), DAG
))
11463 Results
.push_back(V
);
11465 case ISD::GET_ROUNDING
: {
11466 SDVTList VTs
= DAG
.getVTList(Subtarget
.getXLenVT(), MVT::Other
);
11467 SDValue Res
= DAG
.getNode(ISD::GET_ROUNDING
, DL
, VTs
, N
->getOperand(0));
11468 Results
.push_back(Res
.getValue(0));
11469 Results
.push_back(Res
.getValue(1));
11475 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
11476 /// which corresponds to it.
11477 static unsigned getVecReduceOpcode(unsigned Opc
) {
11480 llvm_unreachable("Unhandled binary to transfrom reduction");
11482 return ISD::VECREDUCE_ADD
;
11484 return ISD::VECREDUCE_UMAX
;
11486 return ISD::VECREDUCE_SMAX
;
11488 return ISD::VECREDUCE_UMIN
;
11490 return ISD::VECREDUCE_SMIN
;
11492 return ISD::VECREDUCE_AND
;
11494 return ISD::VECREDUCE_OR
;
11496 return ISD::VECREDUCE_XOR
;
11498 // Note: This is the associative form of the generic reduction opcode.
11499 return ISD::VECREDUCE_FADD
;
11503 /// Perform two related transforms whose purpose is to incrementally recognize
11504 /// an explode_vector followed by scalar reduction as a vector reduction node.
11505 /// This exists to recover from a deficiency in SLP which can't handle
11506 /// forests with multiple roots sharing common nodes. In some cases, one
11507 /// of the trees will be vectorized, and the other will remain (unprofitably)
11510 combineBinOpOfExtractToReduceTree(SDNode
*N
, SelectionDAG
&DAG
,
11511 const RISCVSubtarget
&Subtarget
) {
11513 // This transforms need to run before all integer types have been legalized
11514 // to i64 (so that the vector element type matches the add type), and while
11515 // it's safe to introduce odd sized vector types.
11516 if (DAG
.NewNodesMustHaveLegalTypes
)
11519 // Without V, this transform isn't useful. We could form the (illegal)
11520 // operations and let them be scalarized again, but there's really no point.
11521 if (!Subtarget
.hasVInstructions())
11525 const EVT VT
= N
->getValueType(0);
11526 const unsigned Opc
= N
->getOpcode();
11528 // For FADD, we only handle the case with reassociation allowed. We
11529 // could handle strict reduction order, but at the moment, there's no
11530 // known reason to, and the complexity isn't worth it.
11531 // TODO: Handle fminnum and fmaxnum here
11532 if (!VT
.isInteger() &&
11533 (Opc
!= ISD::FADD
|| !N
->getFlags().hasAllowReassociation()))
11536 const unsigned ReduceOpc
= getVecReduceOpcode(Opc
);
11537 assert(Opc
== ISD::getVecReduceBaseOpcode(ReduceOpc
) &&
11538 "Inconsistent mappings");
11539 SDValue LHS
= N
->getOperand(0);
11540 SDValue RHS
= N
->getOperand(1);
11542 if (!LHS
.hasOneUse() || !RHS
.hasOneUse())
11545 if (RHS
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
11546 std::swap(LHS
, RHS
);
11548 if (RHS
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
11549 !isa
<ConstantSDNode
>(RHS
.getOperand(1)))
11552 uint64_t RHSIdx
= cast
<ConstantSDNode
>(RHS
.getOperand(1))->getLimitedValue();
11553 SDValue SrcVec
= RHS
.getOperand(0);
11554 EVT SrcVecVT
= SrcVec
.getValueType();
11555 assert(SrcVecVT
.getVectorElementType() == VT
);
11556 if (SrcVecVT
.isScalableVector())
11559 if (SrcVecVT
.getScalarSizeInBits() > Subtarget
.getELen())
11562 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
11563 // reduce_op (extract_subvector [2 x VT] from V). This will form the
11564 // root of our reduction tree. TODO: We could extend this to any two
11565 // adjacent aligned constant indices if desired.
11566 if (LHS
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
11567 LHS
.getOperand(0) == SrcVec
&& isa
<ConstantSDNode
>(LHS
.getOperand(1))) {
11569 cast
<ConstantSDNode
>(LHS
.getOperand(1))->getLimitedValue();
11570 if (0 == std::min(LHSIdx
, RHSIdx
) && 1 == std::max(LHSIdx
, RHSIdx
)) {
11571 EVT ReduceVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 2);
11572 SDValue Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ReduceVT
, SrcVec
,
11573 DAG
.getVectorIdxConstant(0, DL
));
11574 return DAG
.getNode(ReduceOpc
, DL
, VT
, Vec
, N
->getFlags());
11578 // Match (binop (reduce (extract_subvector V, 0),
11579 // (extract_vector_elt V, sizeof(SubVec))))
11580 // into a reduction of one more element from the original vector V.
11581 if (LHS
.getOpcode() != ReduceOpc
)
11584 SDValue ReduceVec
= LHS
.getOperand(0);
11585 if (ReduceVec
.getOpcode() == ISD::EXTRACT_SUBVECTOR
&&
11586 ReduceVec
.hasOneUse() && ReduceVec
.getOperand(0) == RHS
.getOperand(0) &&
11587 isNullConstant(ReduceVec
.getOperand(1)) &&
11588 ReduceVec
.getValueType().getVectorNumElements() == RHSIdx
) {
11589 // For illegal types (e.g. 3xi32), most will be combined again into a
11590 // wider (hopefully legal) type. If this is a terminal state, we are
11591 // relying on type legalization here to produce something reasonable
11592 // and this lowering quality could probably be improved. (TODO)
11593 EVT ReduceVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, RHSIdx
+ 1);
11594 SDValue Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ReduceVT
, SrcVec
,
11595 DAG
.getVectorIdxConstant(0, DL
));
11596 auto Flags
= ReduceVec
->getFlags();
11597 Flags
.intersectWith(N
->getFlags());
11598 return DAG
.getNode(ReduceOpc
, DL
, VT
, Vec
, Flags
);
11605 // Try to fold (<bop> x, (reduction.<bop> vec, start))
11606 static SDValue
combineBinOpToReduce(SDNode
*N
, SelectionDAG
&DAG
,
11607 const RISCVSubtarget
&Subtarget
) {
11608 auto BinOpToRVVReduce
= [](unsigned Opc
) {
11611 llvm_unreachable("Unhandled binary to transfrom reduction");
11613 return RISCVISD::VECREDUCE_ADD_VL
;
11615 return RISCVISD::VECREDUCE_UMAX_VL
;
11617 return RISCVISD::VECREDUCE_SMAX_VL
;
11619 return RISCVISD::VECREDUCE_UMIN_VL
;
11621 return RISCVISD::VECREDUCE_SMIN_VL
;
11623 return RISCVISD::VECREDUCE_AND_VL
;
11625 return RISCVISD::VECREDUCE_OR_VL
;
11627 return RISCVISD::VECREDUCE_XOR_VL
;
11629 return RISCVISD::VECREDUCE_FADD_VL
;
11631 return RISCVISD::VECREDUCE_FMAX_VL
;
11633 return RISCVISD::VECREDUCE_FMIN_VL
;
11637 auto IsReduction
= [&BinOpToRVVReduce
](SDValue V
, unsigned Opc
) {
11638 return V
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
11639 isNullConstant(V
.getOperand(1)) &&
11640 V
.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc
);
11643 unsigned Opc
= N
->getOpcode();
11644 unsigned ReduceIdx
;
11645 if (IsReduction(N
->getOperand(0), Opc
))
11647 else if (IsReduction(N
->getOperand(1), Opc
))
11652 // Skip if FADD disallows reassociation but the combiner needs.
11653 if (Opc
== ISD::FADD
&& !N
->getFlags().hasAllowReassociation())
11656 SDValue Extract
= N
->getOperand(ReduceIdx
);
11657 SDValue Reduce
= Extract
.getOperand(0);
11658 if (!Extract
.hasOneUse() || !Reduce
.hasOneUse())
11661 SDValue ScalarV
= Reduce
.getOperand(2);
11662 EVT ScalarVT
= ScalarV
.getValueType();
11663 if (ScalarV
.getOpcode() == ISD::INSERT_SUBVECTOR
&&
11664 ScalarV
.getOperand(0)->isUndef() &&
11665 isNullConstant(ScalarV
.getOperand(2)))
11666 ScalarV
= ScalarV
.getOperand(1);
11668 // Make sure that ScalarV is a splat with VL=1.
11669 if (ScalarV
.getOpcode() != RISCVISD::VFMV_S_F_VL
&&
11670 ScalarV
.getOpcode() != RISCVISD::VMV_S_X_VL
&&
11671 ScalarV
.getOpcode() != RISCVISD::VMV_V_X_VL
)
11674 if (!isNonZeroAVL(ScalarV
.getOperand(2)))
11677 // Check the scalar of ScalarV is neutral element
11678 // TODO: Deal with value other than neutral element.
11679 if (!isNeutralConstant(N
->getOpcode(), N
->getFlags(), ScalarV
.getOperand(1),
11683 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
11684 // FIXME: We might be able to improve this if operand 0 is undef.
11685 if (!isNonZeroAVL(Reduce
.getOperand(5)))
11688 SDValue NewStart
= N
->getOperand(1 - ReduceIdx
);
11691 SDValue NewScalarV
=
11692 lowerScalarInsert(NewStart
, ScalarV
.getOperand(2),
11693 ScalarV
.getSimpleValueType(), DL
, DAG
, Subtarget
);
11695 // If we looked through an INSERT_SUBVECTOR we need to restore it.
11696 if (ScalarVT
!= ScalarV
.getValueType())
11698 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ScalarVT
, DAG
.getUNDEF(ScalarVT
),
11699 NewScalarV
, DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()));
11701 SDValue Ops
[] = {Reduce
.getOperand(0), Reduce
.getOperand(1),
11702 NewScalarV
, Reduce
.getOperand(3),
11703 Reduce
.getOperand(4), Reduce
.getOperand(5)};
11704 SDValue NewReduce
=
11705 DAG
.getNode(Reduce
.getOpcode(), DL
, Reduce
.getValueType(), Ops
);
11706 return DAG
.getNode(Extract
.getOpcode(), DL
, Extract
.getValueType(), NewReduce
,
11707 Extract
.getOperand(1));
11710 // Optimize (add (shl x, c0), (shl y, c1)) ->
11711 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
11712 static SDValue
transformAddShlImm(SDNode
*N
, SelectionDAG
&DAG
,
11713 const RISCVSubtarget
&Subtarget
) {
11714 // Perform this optimization only in the zba extension.
11715 if (!Subtarget
.hasStdExtZba())
11718 // Skip for vector types and larger types.
11719 EVT VT
= N
->getValueType(0);
11720 if (VT
.isVector() || VT
.getSizeInBits() > Subtarget
.getXLen())
11723 // The two operand nodes must be SHL and have no other use.
11724 SDValue N0
= N
->getOperand(0);
11725 SDValue N1
= N
->getOperand(1);
11726 if (N0
->getOpcode() != ISD::SHL
|| N1
->getOpcode() != ISD::SHL
||
11727 !N0
->hasOneUse() || !N1
->hasOneUse())
11730 // Check c0 and c1.
11731 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
11732 auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
->getOperand(1));
11735 int64_t C0
= N0C
->getSExtValue();
11736 int64_t C1
= N1C
->getSExtValue();
11737 if (C0
<= 0 || C1
<= 0)
11740 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
11741 int64_t Bits
= std::min(C0
, C1
);
11742 int64_t Diff
= std::abs(C0
- C1
);
11743 if (Diff
!= 1 && Diff
!= 2 && Diff
!= 3)
11748 SDValue NS
= (C0
< C1
) ? N0
->getOperand(0) : N1
->getOperand(0);
11749 SDValue NL
= (C0
> C1
) ? N0
->getOperand(0) : N1
->getOperand(0);
11751 DAG
.getNode(ISD::SHL
, DL
, VT
, NL
, DAG
.getConstant(Diff
, DL
, VT
));
11752 SDValue NA1
= DAG
.getNode(ISD::ADD
, DL
, VT
, NA0
, NS
);
11753 return DAG
.getNode(ISD::SHL
, DL
, VT
, NA1
, DAG
.getConstant(Bits
, DL
, VT
));
11756 // Combine a constant select operand into its use:
11758 // (and (select cond, -1, c), x)
11759 // -> (select cond, x, (and x, c)) [AllOnes=1]
11760 // (or (select cond, 0, c), x)
11761 // -> (select cond, x, (or x, c)) [AllOnes=0]
11762 // (xor (select cond, 0, c), x)
11763 // -> (select cond, x, (xor x, c)) [AllOnes=0]
11764 // (add (select cond, 0, c), x)
11765 // -> (select cond, x, (add x, c)) [AllOnes=0]
11766 // (sub x, (select cond, 0, c))
11767 // -> (select cond, x, (sub x, c)) [AllOnes=0]
11768 static SDValue
combineSelectAndUse(SDNode
*N
, SDValue Slct
, SDValue OtherOp
,
11769 SelectionDAG
&DAG
, bool AllOnes
,
11770 const RISCVSubtarget
&Subtarget
) {
11771 EVT VT
= N
->getValueType(0);
11777 if (!Subtarget
.hasShortForwardBranchOpt()) {
11778 // (select cond, x, (and x, c)) has custom lowering with Zicond.
11779 if ((!Subtarget
.hasStdExtZicond() &&
11780 !Subtarget
.hasVendorXVentanaCondOps()) ||
11781 N
->getOpcode() != ISD::AND
)
11784 // Maybe harmful when condition code has multiple use.
11785 if (Slct
.getOpcode() == ISD::SELECT
&& !Slct
.getOperand(0).hasOneUse())
11788 // Maybe harmful when VT is wider than XLen.
11789 if (VT
.getSizeInBits() > Subtarget
.getXLen())
11793 if ((Slct
.getOpcode() != ISD::SELECT
&&
11794 Slct
.getOpcode() != RISCVISD::SELECT_CC
) ||
11798 auto isZeroOrAllOnes
= [](SDValue N
, bool AllOnes
) {
11799 return AllOnes
? isAllOnesConstant(N
) : isNullConstant(N
);
11802 bool SwapSelectOps
;
11803 unsigned OpOffset
= Slct
.getOpcode() == RISCVISD::SELECT_CC
? 2 : 0;
11804 SDValue TrueVal
= Slct
.getOperand(1 + OpOffset
);
11805 SDValue FalseVal
= Slct
.getOperand(2 + OpOffset
);
11806 SDValue NonConstantVal
;
11807 if (isZeroOrAllOnes(TrueVal
, AllOnes
)) {
11808 SwapSelectOps
= false;
11809 NonConstantVal
= FalseVal
;
11810 } else if (isZeroOrAllOnes(FalseVal
, AllOnes
)) {
11811 SwapSelectOps
= true;
11812 NonConstantVal
= TrueVal
;
11816 // Slct is now know to be the desired identity constant when CC is true.
11818 FalseVal
= DAG
.getNode(N
->getOpcode(), SDLoc(N
), VT
, OtherOp
, NonConstantVal
);
11819 // Unless SwapSelectOps says the condition should be false.
11821 std::swap(TrueVal
, FalseVal
);
11823 if (Slct
.getOpcode() == RISCVISD::SELECT_CC
)
11824 return DAG
.getNode(RISCVISD::SELECT_CC
, SDLoc(N
), VT
,
11825 {Slct
.getOperand(0), Slct
.getOperand(1),
11826 Slct
.getOperand(2), TrueVal
, FalseVal
});
11828 return DAG
.getNode(ISD::SELECT
, SDLoc(N
), VT
,
11829 {Slct
.getOperand(0), TrueVal
, FalseVal
});
11832 // Attempt combineSelectAndUse on each operand of a commutative operator N.
11833 static SDValue
combineSelectAndUseCommutative(SDNode
*N
, SelectionDAG
&DAG
,
11835 const RISCVSubtarget
&Subtarget
) {
11836 SDValue N0
= N
->getOperand(0);
11837 SDValue N1
= N
->getOperand(1);
11838 if (SDValue Result
= combineSelectAndUse(N
, N0
, N1
, DAG
, AllOnes
, Subtarget
))
11840 if (SDValue Result
= combineSelectAndUse(N
, N1
, N0
, DAG
, AllOnes
, Subtarget
))
11845 // Transform (add (mul x, c0), c1) ->
11846 // (add (mul (add x, c1/c0), c0), c1%c0).
11847 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
11848 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
11849 // to an infinite loop in DAGCombine if transformed.
11850 // Or transform (add (mul x, c0), c1) ->
11851 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
11852 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
11853 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
11854 // lead to an infinite loop in DAGCombine if transformed.
11855 // Or transform (add (mul x, c0), c1) ->
11856 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
11857 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
11858 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
11859 // lead to an infinite loop in DAGCombine if transformed.
11860 // Or transform (add (mul x, c0), c1) ->
11861 // (mul (add x, c1/c0), c0).
11862 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
11863 static SDValue
transformAddImmMulImm(SDNode
*N
, SelectionDAG
&DAG
,
11864 const RISCVSubtarget
&Subtarget
) {
11865 // Skip for vector types and larger types.
11866 EVT VT
= N
->getValueType(0);
11867 if (VT
.isVector() || VT
.getSizeInBits() > Subtarget
.getXLen())
11869 // The first operand node must be a MUL and has no other use.
11870 SDValue N0
= N
->getOperand(0);
11871 if (!N0
->hasOneUse() || N0
->getOpcode() != ISD::MUL
)
11873 // Check if c0 and c1 match above conditions.
11874 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
11875 auto *N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
11878 // If N0C has multiple uses it's possible one of the cases in
11879 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
11880 // in an infinite loop.
11881 if (!N0C
->hasOneUse())
11883 int64_t C0
= N0C
->getSExtValue();
11884 int64_t C1
= N1C
->getSExtValue();
11886 if (C0
== -1 || C0
== 0 || C0
== 1 || isInt
<12>(C1
))
11888 // Search for proper CA (non-zero) and CB that both are simm12.
11889 if ((C1
/ C0
) != 0 && isInt
<12>(C1
/ C0
) && isInt
<12>(C1
% C0
) &&
11890 !isInt
<12>(C0
* (C1
/ C0
))) {
11893 } else if ((C1
/ C0
+ 1) != 0 && isInt
<12>(C1
/ C0
+ 1) &&
11894 isInt
<12>(C1
% C0
- C0
) && !isInt
<12>(C0
* (C1
/ C0
+ 1))) {
11897 } else if ((C1
/ C0
- 1) != 0 && isInt
<12>(C1
/ C0
- 1) &&
11898 isInt
<12>(C1
% C0
+ C0
) && !isInt
<12>(C0
* (C1
/ C0
- 1))) {
11903 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
11905 SDValue New0
= DAG
.getNode(ISD::ADD
, DL
, VT
, N0
->getOperand(0),
11906 DAG
.getConstant(CA
, DL
, VT
));
11908 DAG
.getNode(ISD::MUL
, DL
, VT
, New0
, DAG
.getConstant(C0
, DL
, VT
));
11909 return DAG
.getNode(ISD::ADD
, DL
, VT
, New1
, DAG
.getConstant(CB
, DL
, VT
));
11912 // Try to turn (add (xor bool, 1) -1) into (neg bool).
11913 static SDValue
combineAddOfBooleanXor(SDNode
*N
, SelectionDAG
&DAG
) {
11914 SDValue N0
= N
->getOperand(0);
11915 SDValue N1
= N
->getOperand(1);
11916 EVT VT
= N
->getValueType(0);
11919 // RHS should be -1.
11920 if (!isAllOnesConstant(N1
))
11923 // Look for (xor X, 1).
11924 if (N0
.getOpcode() != ISD::XOR
|| !isOneConstant(N0
.getOperand(1)))
11927 // First xor input should be 0 or 1.
11928 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
11929 if (!DAG
.MaskedValueIsZero(N0
.getOperand(0), Mask
))
11932 // Emit a negate of the setcc.
11933 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
),
11937 static SDValue
performADDCombine(SDNode
*N
, SelectionDAG
&DAG
,
11938 const RISCVSubtarget
&Subtarget
) {
11939 if (SDValue V
= combineAddOfBooleanXor(N
, DAG
))
11941 if (SDValue V
= transformAddImmMulImm(N
, DAG
, Subtarget
))
11943 if (SDValue V
= transformAddShlImm(N
, DAG
, Subtarget
))
11945 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
11947 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
11950 // fold (add (select lhs, rhs, cc, 0, y), x) ->
11951 // (select lhs, rhs, cc, x, (add x, y))
11952 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
11955 // Try to turn a sub boolean RHS and constant LHS into an addi.
11956 static SDValue
combineSubOfBoolean(SDNode
*N
, SelectionDAG
&DAG
) {
11957 SDValue N0
= N
->getOperand(0);
11958 SDValue N1
= N
->getOperand(1);
11959 EVT VT
= N
->getValueType(0);
11962 // Require a constant LHS.
11963 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
);
11967 // All our optimizations involve subtracting 1 from the immediate and forming
11968 // an ADDI. Make sure the new immediate is valid for an ADDI.
11969 APInt ImmValMinus1
= N0C
->getAPIntValue() - 1;
11970 if (!ImmValMinus1
.isSignedIntN(12))
11974 if (N1
.getOpcode() == ISD::SETCC
&& N1
.hasOneUse()) {
11975 // (sub constant, (setcc x, y, eq/neq)) ->
11976 // (add (setcc x, y, neq/eq), constant - 1)
11977 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(N1
.getOperand(2))->get();
11978 EVT SetCCOpVT
= N1
.getOperand(0).getValueType();
11979 if (!isIntEqualitySetCC(CCVal
) || !SetCCOpVT
.isInteger())
11981 CCVal
= ISD::getSetCCInverse(CCVal
, SetCCOpVT
);
11983 DAG
.getSetCC(SDLoc(N1
), VT
, N1
.getOperand(0), N1
.getOperand(1), CCVal
);
11984 } else if (N1
.getOpcode() == ISD::XOR
&& isOneConstant(N1
.getOperand(1)) &&
11985 N1
.getOperand(0).getOpcode() == ISD::SETCC
) {
11986 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
11987 // Since setcc returns a bool the xor is equivalent to 1-setcc.
11988 NewLHS
= N1
.getOperand(0);
11992 SDValue NewRHS
= DAG
.getConstant(ImmValMinus1
, DL
, VT
);
11993 return DAG
.getNode(ISD::ADD
, DL
, VT
, NewLHS
, NewRHS
);
11996 static SDValue
performSUBCombine(SDNode
*N
, SelectionDAG
&DAG
,
11997 const RISCVSubtarget
&Subtarget
) {
11998 if (SDValue V
= combineSubOfBoolean(N
, DAG
))
12001 SDValue N0
= N
->getOperand(0);
12002 SDValue N1
= N
->getOperand(1);
12003 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
12004 if (isNullConstant(N0
) && N1
.getOpcode() == ISD::SETCC
&& N1
.hasOneUse() &&
12005 isNullConstant(N1
.getOperand(1))) {
12006 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(N1
.getOperand(2))->get();
12007 if (CCVal
== ISD::SETLT
) {
12008 EVT VT
= N
->getValueType(0);
12010 unsigned ShAmt
= N0
.getValueSizeInBits() - 1;
12011 return DAG
.getNode(ISD::SRA
, DL
, VT
, N1
.getOperand(0),
12012 DAG
.getConstant(ShAmt
, DL
, VT
));
12016 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
12017 // (select lhs, rhs, cc, x, (sub x, y))
12018 return combineSelectAndUse(N
, N1
, N0
, DAG
, /*AllOnes*/ false, Subtarget
);
12021 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
12022 // Legalizing setcc can introduce xors like this. Doing this transform reduces
12023 // the number of xors and may allow the xor to fold into a branch condition.
12024 static SDValue
combineDeMorganOfBoolean(SDNode
*N
, SelectionDAG
&DAG
) {
12025 SDValue N0
= N
->getOperand(0);
12026 SDValue N1
= N
->getOperand(1);
12027 bool IsAnd
= N
->getOpcode() == ISD::AND
;
12029 if (N0
.getOpcode() != ISD::XOR
|| N1
.getOpcode() != ISD::XOR
)
12032 if (!N0
.hasOneUse() || !N1
.hasOneUse())
12035 SDValue N01
= N0
.getOperand(1);
12036 SDValue N11
= N1
.getOperand(1);
12038 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
12039 // (xor X, -1) based on the upper bits of the other operand being 0. If the
12040 // operation is And, allow one of the Xors to use -1.
12041 if (isOneConstant(N01
)) {
12042 if (!isOneConstant(N11
) && !(IsAnd
&& isAllOnesConstant(N11
)))
12044 } else if (isOneConstant(N11
)) {
12045 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
12046 if (!(IsAnd
&& isAllOnesConstant(N01
)))
12051 EVT VT
= N
->getValueType(0);
12053 SDValue N00
= N0
.getOperand(0);
12054 SDValue N10
= N1
.getOperand(0);
12056 // The LHS of the xors needs to be 0/1.
12057 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
12058 if (!DAG
.MaskedValueIsZero(N00
, Mask
) || !DAG
.MaskedValueIsZero(N10
, Mask
))
12061 // Invert the opcode and insert a new xor.
12063 unsigned Opc
= IsAnd
? ISD::OR
: ISD::AND
;
12064 SDValue Logic
= DAG
.getNode(Opc
, DL
, VT
, N00
, N10
);
12065 return DAG
.getNode(ISD::XOR
, DL
, VT
, Logic
, DAG
.getConstant(1, DL
, VT
));
12068 static SDValue
performTRUNCATECombine(SDNode
*N
, SelectionDAG
&DAG
,
12069 const RISCVSubtarget
&Subtarget
) {
12070 SDValue N0
= N
->getOperand(0);
12071 EVT VT
= N
->getValueType(0);
12073 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
12074 // extending X. This is safe since we only need the LSB after the shift and
12075 // shift amounts larger than 31 would produce poison. If we wait until
12076 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12077 // to use a BEXT instruction.
12078 if (Subtarget
.is64Bit() && Subtarget
.hasStdExtZbs() && VT
== MVT::i1
&&
12079 N0
.getValueType() == MVT::i32
&& N0
.getOpcode() == ISD::SRL
&&
12080 !isa
<ConstantSDNode
>(N0
.getOperand(1)) && N0
.hasOneUse()) {
12082 SDValue Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N0
.getOperand(0));
12083 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N0
.getOperand(1));
12084 SDValue Srl
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Op0
, Op1
);
12085 return DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), VT
, Srl
);
12091 // Combines two comparison operation and logic operation to one selection
12092 // operation(min, max) and logic operation. Returns new constructed Node if
12093 // conditions for optimization are satisfied.
12094 static SDValue
performANDCombine(SDNode
*N
,
12095 TargetLowering::DAGCombinerInfo
&DCI
,
12096 const RISCVSubtarget
&Subtarget
) {
12097 SelectionDAG
&DAG
= DCI
.DAG
;
12099 SDValue N0
= N
->getOperand(0);
12100 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
12101 // extending X. This is safe since we only need the LSB after the shift and
12102 // shift amounts larger than 31 would produce poison. If we wait until
12103 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12104 // to use a BEXT instruction.
12105 if (Subtarget
.is64Bit() && Subtarget
.hasStdExtZbs() &&
12106 N
->getValueType(0) == MVT::i32
&& isOneConstant(N
->getOperand(1)) &&
12107 N0
.getOpcode() == ISD::SRL
&& !isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
12110 SDValue Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N0
.getOperand(0));
12111 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N0
.getOperand(1));
12112 SDValue Srl
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Op0
, Op1
);
12113 SDValue And
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, Srl
,
12114 DAG
.getConstant(1, DL
, MVT::i64
));
12115 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, And
);
12118 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12120 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12123 if (DCI
.isAfterLegalizeDAG())
12124 if (SDValue V
= combineDeMorganOfBoolean(N
, DAG
))
12127 // fold (and (select lhs, rhs, cc, -1, y), x) ->
12128 // (select lhs, rhs, cc, x, (and x, y))
12129 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ true, Subtarget
);
12132 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
12133 // FIXME: Generalize to other binary operators with same operand.
12134 static SDValue
combineOrOfCZERO(SDNode
*N
, SDValue N0
, SDValue N1
,
12135 SelectionDAG
&DAG
) {
12136 assert(N
->getOpcode() == ISD::OR
&& "Unexpected opcode");
12138 if (N0
.getOpcode() != RISCVISD::CZERO_EQZ
||
12139 N1
.getOpcode() != RISCVISD::CZERO_NEZ
||
12140 !N0
.hasOneUse() || !N1
.hasOneUse())
12143 // Should have the same condition.
12144 SDValue Cond
= N0
.getOperand(1);
12145 if (Cond
!= N1
.getOperand(1))
12148 SDValue TrueV
= N0
.getOperand(0);
12149 SDValue FalseV
= N1
.getOperand(0);
12151 if (TrueV
.getOpcode() != ISD::XOR
|| FalseV
.getOpcode() != ISD::XOR
||
12152 TrueV
.getOperand(1) != FalseV
.getOperand(1) ||
12153 !isOneConstant(TrueV
.getOperand(1)) ||
12154 !TrueV
.hasOneUse() || !FalseV
.hasOneUse())
12157 EVT VT
= N
->getValueType(0);
12160 SDValue NewN0
= DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
.getOperand(0),
12162 SDValue NewN1
= DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
.getOperand(0),
12164 SDValue NewOr
= DAG
.getNode(ISD::OR
, DL
, VT
, NewN0
, NewN1
);
12165 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewOr
, TrueV
.getOperand(1));
12168 static SDValue
performORCombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
,
12169 const RISCVSubtarget
&Subtarget
) {
12170 SelectionDAG
&DAG
= DCI
.DAG
;
12172 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12174 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12177 if (DCI
.isAfterLegalizeDAG())
12178 if (SDValue V
= combineDeMorganOfBoolean(N
, DAG
))
12181 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
12182 // We may be able to pull a common operation out of the true and false value.
12183 SDValue N0
= N
->getOperand(0);
12184 SDValue N1
= N
->getOperand(1);
12185 if (SDValue V
= combineOrOfCZERO(N
, N0
, N1
, DAG
))
12187 if (SDValue V
= combineOrOfCZERO(N
, N1
, N0
, DAG
))
12190 // fold (or (select cond, 0, y), x) ->
12191 // (select cond, x, (or x, y))
12192 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
12195 static SDValue
performXORCombine(SDNode
*N
, SelectionDAG
&DAG
,
12196 const RISCVSubtarget
&Subtarget
) {
12197 SDValue N0
= N
->getOperand(0);
12198 SDValue N1
= N
->getOperand(1);
12200 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
12201 // NOTE: Assumes ROL being legal means ROLW is legal.
12202 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
12203 if (N0
.getOpcode() == RISCVISD::SLLW
&&
12204 isAllOnesConstant(N1
) && isOneConstant(N0
.getOperand(0)) &&
12205 TLI
.isOperationLegal(ISD::ROTL
, MVT::i64
)) {
12207 return DAG
.getNode(RISCVISD::ROLW
, DL
, MVT::i64
,
12208 DAG
.getConstant(~1, DL
, MVT::i64
), N0
.getOperand(1));
12211 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
12212 if (N0
.getOpcode() == ISD::SETCC
&& isOneConstant(N1
) && N0
.hasOneUse()) {
12213 auto *ConstN00
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0));
12214 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
12215 if (ConstN00
&& CC
== ISD::SETLT
) {
12216 EVT VT
= N0
.getValueType();
12218 const APInt
&Imm
= ConstN00
->getAPIntValue();
12219 if ((Imm
+ 1).isSignedIntN(12))
12220 return DAG
.getSetCC(DL
, VT
, N0
.getOperand(1),
12221 DAG
.getConstant(Imm
+ 1, DL
, VT
), CC
);
12225 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12227 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12230 // fold (xor (select cond, 0, y), x) ->
12231 // (select cond, x, (xor x, y))
12232 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
12235 /// According to the property that indexed load/store instructions zero-extend
12236 /// their indices, try to narrow the type of index operand.
12237 static bool narrowIndex(SDValue
&N
, ISD::MemIndexType IndexType
, SelectionDAG
&DAG
) {
12238 if (isIndexTypeSigned(IndexType
))
12241 if (!N
->hasOneUse())
12244 EVT VT
= N
.getValueType();
12247 // In general, what we're doing here is seeing if we can sink a truncate to
12248 // a smaller element type into the expression tree building our index.
12249 // TODO: We can generalize this and handle a bunch more cases if useful.
12251 // Narrow a buildvector to the narrowest element type. This requires less
12252 // work and less register pressure at high LMUL, and creates smaller constants
12253 // which may be cheaper to materialize.
12254 if (ISD::isBuildVectorOfConstantSDNodes(N
.getNode())) {
12255 KnownBits Known
= DAG
.computeKnownBits(N
);
12256 unsigned ActiveBits
= std::max(8u, Known
.countMaxActiveBits());
12257 LLVMContext
&C
= *DAG
.getContext();
12258 EVT ResultVT
= EVT::getIntegerVT(C
, ActiveBits
).getRoundIntegerType(C
);
12259 if (ResultVT
.bitsLT(VT
.getVectorElementType())) {
12260 N
= DAG
.getNode(ISD::TRUNCATE
, DL
,
12261 VT
.changeVectorElementType(ResultVT
), N
);
12266 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
12267 if (N
.getOpcode() != ISD::SHL
)
12270 SDValue N0
= N
.getOperand(0);
12271 if (N0
.getOpcode() != ISD::ZERO_EXTEND
&&
12272 N0
.getOpcode() != RISCVISD::VZEXT_VL
)
12274 if (!N0
->hasOneUse())
12278 SDValue N1
= N
.getOperand(1);
12279 if (!ISD::isConstantSplatVector(N1
.getNode(), ShAmt
))
12282 SDValue Src
= N0
.getOperand(0);
12283 EVT SrcVT
= Src
.getValueType();
12284 unsigned SrcElen
= SrcVT
.getScalarSizeInBits();
12285 unsigned ShAmtV
= ShAmt
.getZExtValue();
12286 unsigned NewElen
= PowerOf2Ceil(SrcElen
+ ShAmtV
);
12287 NewElen
= std::max(NewElen
, 8U);
12289 // Skip if NewElen is not narrower than the original extended type.
12290 if (NewElen
>= N0
.getValueType().getScalarSizeInBits())
12293 EVT NewEltVT
= EVT::getIntegerVT(*DAG
.getContext(), NewElen
);
12294 EVT NewVT
= SrcVT
.changeVectorElementType(NewEltVT
);
12296 SDValue NewExt
= DAG
.getNode(N0
->getOpcode(), DL
, NewVT
, N0
->ops());
12297 SDValue NewShAmtVec
= DAG
.getConstant(ShAmtV
, DL
, NewVT
);
12298 N
= DAG
.getNode(ISD::SHL
, DL
, NewVT
, NewExt
, NewShAmtVec
);
12302 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
12303 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
12304 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
12305 // can become a sext.w instead of a shift pair.
12306 static SDValue
performSETCCCombine(SDNode
*N
, SelectionDAG
&DAG
,
12307 const RISCVSubtarget
&Subtarget
) {
12308 SDValue N0
= N
->getOperand(0);
12309 SDValue N1
= N
->getOperand(1);
12310 EVT VT
= N
->getValueType(0);
12311 EVT OpVT
= N0
.getValueType();
12313 if (OpVT
!= MVT::i64
|| !Subtarget
.is64Bit())
12316 // RHS needs to be a constant.
12317 auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
);
12321 // LHS needs to be (and X, 0xffffffff).
12322 if (N0
.getOpcode() != ISD::AND
|| !N0
.hasOneUse() ||
12323 !isa
<ConstantSDNode
>(N0
.getOperand(1)) ||
12324 N0
.getConstantOperandVal(1) != UINT64_C(0xffffffff))
12327 // Looking for an equality compare.
12328 ISD::CondCode Cond
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
12329 if (!isIntEqualitySetCC(Cond
))
12332 // Don't do this if the sign bit is provably zero, it will be turned back into
12334 APInt SignMask
= APInt::getOneBitSet(64, 31);
12335 if (DAG
.MaskedValueIsZero(N0
.getOperand(0), SignMask
))
12338 const APInt
&C1
= N1C
->getAPIntValue();
12341 // If the constant is larger than 2^32 - 1 it is impossible for both sides
12343 if (C1
.getActiveBits() > 32)
12344 return DAG
.getBoolConstant(Cond
== ISD::SETNE
, dl
, VT
, OpVT
);
12346 SDValue SExtOp
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, N
, OpVT
,
12347 N0
.getOperand(0), DAG
.getValueType(MVT::i32
));
12348 return DAG
.getSetCC(dl
, VT
, SExtOp
, DAG
.getConstant(C1
.trunc(32).sext(64),
12353 performSIGN_EXTEND_INREGCombine(SDNode
*N
, SelectionDAG
&DAG
,
12354 const RISCVSubtarget
&Subtarget
) {
12355 SDValue Src
= N
->getOperand(0);
12356 EVT VT
= N
->getValueType(0);
12358 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
12359 if (Src
.getOpcode() == RISCVISD::FMV_X_ANYEXTH
&&
12360 cast
<VTSDNode
>(N
->getOperand(1))->getVT().bitsGE(MVT::i16
))
12361 return DAG
.getNode(RISCVISD::FMV_X_SIGNEXTH
, SDLoc(N
), VT
,
12362 Src
.getOperand(0));
12368 // Forward declaration of the structure holding the necessary information to
12369 // apply a combine.
12370 struct CombineResult
;
12372 /// Helper class for folding sign/zero extensions.
12373 /// In particular, this class is used for the following combines:
12374 /// add_vl -> vwadd(u) | vwadd(u)_w
12375 /// sub_vl -> vwsub(u) | vwsub(u)_w
12376 /// mul_vl -> vwmul(u) | vwmul_su
12378 /// An object of this class represents an operand of the operation we want to
12380 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
12381 /// NodeExtensionHelper for `a` and one for `b`.
12383 /// This class abstracts away how the extension is materialized and
12384 /// how its Mask, VL, number of users affect the combines.
12387 /// - VWADD_W is conceptually == add(op0, sext(op1))
12388 /// - VWADDU_W == add(op0, zext(op1))
12389 /// - VWSUB_W == sub(op0, sext(op1))
12390 /// - VWSUBU_W == sub(op0, zext(op1))
12392 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
12393 /// zext|sext(smaller_value).
12394 struct NodeExtensionHelper
{
12395 /// Records if this operand is like being zero extended.
12397 /// Records if this operand is like being sign extended.
12398 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
12399 /// instance, a splat constant (e.g., 3), would support being both sign and
12402 /// This boolean captures whether we care if this operand would still be
12403 /// around after the folding happens.
12404 bool EnforceOneUse
;
12405 /// Records if this operand's mask needs to match the mask of the operation
12406 /// that it will fold into.
12408 /// Value of the Mask for this operand.
12409 /// It may be SDValue().
12411 /// Value of the vector length operand.
12412 /// It may be SDValue().
12414 /// Original value that this NodeExtensionHelper represents.
12415 SDValue OrigOperand
;
12417 /// Get the value feeding the extension or the value itself.
12418 /// E.g., for zext(a), this would return a.
12419 SDValue
getSource() const {
12420 switch (OrigOperand
.getOpcode()) {
12421 case RISCVISD::VSEXT_VL
:
12422 case RISCVISD::VZEXT_VL
:
12423 return OrigOperand
.getOperand(0);
12425 return OrigOperand
;
12429 /// Check if this instance represents a splat.
12430 bool isSplat() const {
12431 return OrigOperand
.getOpcode() == RISCVISD::VMV_V_X_VL
;
12434 /// Get or create a value that can feed \p Root with the given extension \p
12435 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
12436 /// \see ::getSource().
12437 SDValue
getOrCreateExtendedOp(const SDNode
*Root
, SelectionDAG
&DAG
,
12438 std::optional
<bool> SExt
) const {
12439 if (!SExt
.has_value())
12440 return OrigOperand
;
12442 MVT NarrowVT
= getNarrowType(Root
);
12444 SDValue Source
= getSource();
12445 if (Source
.getValueType() == NarrowVT
)
12448 unsigned ExtOpc
= *SExt
? RISCVISD::VSEXT_VL
: RISCVISD::VZEXT_VL
;
12450 // If we need an extension, we should be changing the type.
12452 auto [Mask
, VL
] = getMaskAndVL(Root
);
12453 switch (OrigOperand
.getOpcode()) {
12454 case RISCVISD::VSEXT_VL
:
12455 case RISCVISD::VZEXT_VL
:
12456 return DAG
.getNode(ExtOpc
, DL
, NarrowVT
, Source
, Mask
, VL
);
12457 case RISCVISD::VMV_V_X_VL
:
12458 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, NarrowVT
,
12459 DAG
.getUNDEF(NarrowVT
), Source
.getOperand(1), VL
);
12461 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
12462 // and that operand should already have the right NarrowVT so no
12463 // extension should be required at this point.
12464 llvm_unreachable("Unsupported opcode");
12468 /// Helper function to get the narrow type for \p Root.
12469 /// The narrow type is the type of \p Root where we divided the size of each
12470 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
12471 /// \pre The size of the type of the elements of Root must be a multiple of 2
12472 /// and be greater than 16.
12473 static MVT
getNarrowType(const SDNode
*Root
) {
12474 MVT VT
= Root
->getSimpleValueType(0);
12476 // Determine the narrow size.
12477 unsigned NarrowSize
= VT
.getScalarSizeInBits() / 2;
12478 assert(NarrowSize
>= 8 && "Trying to extend something we can't represent");
12479 MVT NarrowVT
= MVT::getVectorVT(MVT::getIntegerVT(NarrowSize
),
12480 VT
.getVectorElementCount());
12484 /// Return the opcode required to materialize the folding of the sign
12485 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
12486 /// both operands for \p Opcode.
12487 /// Put differently, get the opcode to materialize:
12488 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
12489 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
12490 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
12491 static unsigned getSameExtensionOpcode(unsigned Opcode
, bool IsSExt
) {
12493 case RISCVISD::ADD_VL
:
12494 case RISCVISD::VWADD_W_VL
:
12495 case RISCVISD::VWADDU_W_VL
:
12496 return IsSExt
? RISCVISD::VWADD_VL
: RISCVISD::VWADDU_VL
;
12497 case RISCVISD::MUL_VL
:
12498 return IsSExt
? RISCVISD::VWMUL_VL
: RISCVISD::VWMULU_VL
;
12499 case RISCVISD::SUB_VL
:
12500 case RISCVISD::VWSUB_W_VL
:
12501 case RISCVISD::VWSUBU_W_VL
:
12502 return IsSExt
? RISCVISD::VWSUB_VL
: RISCVISD::VWSUBU_VL
;
12504 llvm_unreachable("Unexpected opcode");
12508 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
12509 /// newOpcode(a, b).
12510 static unsigned getSUOpcode(unsigned Opcode
) {
12511 assert(Opcode
== RISCVISD::MUL_VL
&& "SU is only supported for MUL");
12512 return RISCVISD::VWMULSU_VL
;
12515 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
12516 /// newOpcode(a, b).
12517 static unsigned getWOpcode(unsigned Opcode
, bool IsSExt
) {
12519 case RISCVISD::ADD_VL
:
12520 return IsSExt
? RISCVISD::VWADD_W_VL
: RISCVISD::VWADDU_W_VL
;
12521 case RISCVISD::SUB_VL
:
12522 return IsSExt
? RISCVISD::VWSUB_W_VL
: RISCVISD::VWSUBU_W_VL
;
12524 llvm_unreachable("Unexpected opcode");
12528 using CombineToTry
= std::function
<std::optional
<CombineResult
>(
12529 SDNode
* /*Root*/, const NodeExtensionHelper
& /*LHS*/,
12530 const NodeExtensionHelper
& /*RHS*/)>;
12532 /// Check if this node needs to be fully folded or extended for all users.
12533 bool needToPromoteOtherUsers() const { return EnforceOneUse
; }
12535 /// Helper method to set the various fields of this struct based on the
12536 /// type of \p Root.
12537 void fillUpExtensionSupport(SDNode
*Root
, SelectionDAG
&DAG
) {
12538 SupportsZExt
= false;
12539 SupportsSExt
= false;
12540 EnforceOneUse
= true;
12542 switch (OrigOperand
.getOpcode()) {
12543 case RISCVISD::VZEXT_VL
:
12544 SupportsZExt
= true;
12545 Mask
= OrigOperand
.getOperand(1);
12546 VL
= OrigOperand
.getOperand(2);
12548 case RISCVISD::VSEXT_VL
:
12549 SupportsSExt
= true;
12550 Mask
= OrigOperand
.getOperand(1);
12551 VL
= OrigOperand
.getOperand(2);
12553 case RISCVISD::VMV_V_X_VL
: {
12554 // Historically, we didn't care about splat values not disappearing during
12556 EnforceOneUse
= false;
12558 VL
= OrigOperand
.getOperand(2);
12560 // The operand is a splat of a scalar.
12562 // The pasthru must be undef for tail agnostic.
12563 if (!OrigOperand
.getOperand(0).isUndef())
12566 // Get the scalar value.
12567 SDValue Op
= OrigOperand
.getOperand(1);
12569 // See if we have enough sign bits or zero bits in the scalar to use a
12570 // widening opcode by splatting to smaller element size.
12571 MVT VT
= Root
->getSimpleValueType(0);
12572 unsigned EltBits
= VT
.getScalarSizeInBits();
12573 unsigned ScalarBits
= Op
.getValueSizeInBits();
12574 // Make sure we're getting all element bits from the scalar register.
12575 // FIXME: Support implicit sign extension of vmv.v.x?
12576 if (ScalarBits
< EltBits
)
12579 unsigned NarrowSize
= VT
.getScalarSizeInBits() / 2;
12580 // If the narrow type cannot be expressed with a legal VMV,
12581 // this is not a valid candidate.
12582 if (NarrowSize
< 8)
12585 if (DAG
.ComputeMaxSignificantBits(Op
) <= NarrowSize
)
12586 SupportsSExt
= true;
12587 if (DAG
.MaskedValueIsZero(Op
,
12588 APInt::getBitsSetFrom(ScalarBits
, NarrowSize
)))
12589 SupportsZExt
= true;
12597 /// Check if \p Root supports any extension folding combines.
12598 static bool isSupportedRoot(const SDNode
*Root
) {
12599 switch (Root
->getOpcode()) {
12600 case RISCVISD::ADD_VL
:
12601 case RISCVISD::MUL_VL
:
12602 case RISCVISD::VWADD_W_VL
:
12603 case RISCVISD::VWADDU_W_VL
:
12604 case RISCVISD::SUB_VL
:
12605 case RISCVISD::VWSUB_W_VL
:
12606 case RISCVISD::VWSUBU_W_VL
:
12613 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
12614 NodeExtensionHelper(SDNode
*Root
, unsigned OperandIdx
, SelectionDAG
&DAG
) {
12615 assert(isSupportedRoot(Root
) && "Trying to build an helper with an "
12616 "unsupported root");
12617 assert(OperandIdx
< 2 && "Requesting something else than LHS or RHS");
12618 OrigOperand
= Root
->getOperand(OperandIdx
);
12620 unsigned Opc
= Root
->getOpcode();
12622 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
12623 // <ADD|SUB>(LHS, S|ZEXT(RHS))
12624 case RISCVISD::VWADD_W_VL
:
12625 case RISCVISD::VWADDU_W_VL
:
12626 case RISCVISD::VWSUB_W_VL
:
12627 case RISCVISD::VWSUBU_W_VL
:
12628 if (OperandIdx
== 1) {
12630 Opc
== RISCVISD::VWADDU_W_VL
|| Opc
== RISCVISD::VWSUBU_W_VL
;
12631 SupportsSExt
= !SupportsZExt
;
12632 std::tie(Mask
, VL
) = getMaskAndVL(Root
);
12634 // There's no existing extension here, so we don't have to worry about
12635 // making sure it gets removed.
12636 EnforceOneUse
= false;
12641 fillUpExtensionSupport(Root
, DAG
);
12646 /// Check if this operand is compatible with the given vector length \p VL.
12647 bool isVLCompatible(SDValue VL
) const {
12648 return this->VL
!= SDValue() && this->VL
== VL
;
12651 /// Check if this operand is compatible with the given \p Mask.
12652 bool isMaskCompatible(SDValue Mask
) const {
12653 return !CheckMask
|| (this->Mask
!= SDValue() && this->Mask
== Mask
);
12656 /// Helper function to get the Mask and VL from \p Root.
12657 static std::pair
<SDValue
, SDValue
> getMaskAndVL(const SDNode
*Root
) {
12658 assert(isSupportedRoot(Root
) && "Unexpected root");
12659 return std::make_pair(Root
->getOperand(3), Root
->getOperand(4));
12662 /// Check if the Mask and VL of this operand are compatible with \p Root.
12663 bool areVLAndMaskCompatible(const SDNode
*Root
) const {
12664 auto [Mask
, VL
] = getMaskAndVL(Root
);
12665 return isMaskCompatible(Mask
) && isVLCompatible(VL
);
12668 /// Helper function to check if \p N is commutative with respect to the
12669 /// foldings that are supported by this class.
12670 static bool isCommutative(const SDNode
*N
) {
12671 switch (N
->getOpcode()) {
12672 case RISCVISD::ADD_VL
:
12673 case RISCVISD::MUL_VL
:
12674 case RISCVISD::VWADD_W_VL
:
12675 case RISCVISD::VWADDU_W_VL
:
12677 case RISCVISD::SUB_VL
:
12678 case RISCVISD::VWSUB_W_VL
:
12679 case RISCVISD::VWSUBU_W_VL
:
12682 llvm_unreachable("Unexpected opcode");
12686 /// Get a list of combine to try for folding extensions in \p Root.
12687 /// Note that each returned CombineToTry function doesn't actually modify
12688 /// anything. Instead they produce an optional CombineResult that if not None,
12689 /// need to be materialized for the combine to be applied.
12690 /// \see CombineResult::materialize.
12691 /// If the related CombineToTry function returns std::nullopt, that means the
12692 /// combine didn't match.
12693 static SmallVector
<CombineToTry
> getSupportedFoldings(const SDNode
*Root
);
12696 /// Helper structure that holds all the necessary information to materialize a
12697 /// combine that does some extension folding.
12698 struct CombineResult
{
12699 /// Opcode to be generated when materializing the combine.
12700 unsigned TargetOpcode
;
12701 // No value means no extension is needed. If extension is needed, the value
12702 // indicates if it needs to be sign extended.
12703 std::optional
<bool> SExtLHS
;
12704 std::optional
<bool> SExtRHS
;
12705 /// Root of the combine.
12707 /// LHS of the TargetOpcode.
12708 NodeExtensionHelper LHS
;
12709 /// RHS of the TargetOpcode.
12710 NodeExtensionHelper RHS
;
12712 CombineResult(unsigned TargetOpcode
, SDNode
*Root
,
12713 const NodeExtensionHelper
&LHS
, std::optional
<bool> SExtLHS
,
12714 const NodeExtensionHelper
&RHS
, std::optional
<bool> SExtRHS
)
12715 : TargetOpcode(TargetOpcode
), SExtLHS(SExtLHS
), SExtRHS(SExtRHS
),
12716 Root(Root
), LHS(LHS
), RHS(RHS
) {}
12718 /// Return a value that uses TargetOpcode and that can be used to replace
12720 /// The actual replacement is *not* done in that method.
12721 SDValue
materialize(SelectionDAG
&DAG
) const {
12722 SDValue Mask
, VL
, Merge
;
12723 std::tie(Mask
, VL
) = NodeExtensionHelper::getMaskAndVL(Root
);
12724 Merge
= Root
->getOperand(2);
12725 return DAG
.getNode(TargetOpcode
, SDLoc(Root
), Root
->getValueType(0),
12726 LHS
.getOrCreateExtendedOp(Root
, DAG
, SExtLHS
),
12727 RHS
.getOrCreateExtendedOp(Root
, DAG
, SExtRHS
), Merge
,
12732 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
12733 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
12734 /// are zext) and LHS and RHS can be folded into Root.
12735 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
12737 /// \note If the pattern can match with both zext and sext, the returned
12738 /// CombineResult will feature the zext result.
12740 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12741 /// can be used to apply the pattern.
12742 static std::optional
<CombineResult
>
12743 canFoldToVWWithSameExtensionImpl(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
12744 const NodeExtensionHelper
&RHS
, bool AllowSExt
,
12746 assert((AllowSExt
|| AllowZExt
) && "Forgot to set what you want?");
12747 if (!LHS
.areVLAndMaskCompatible(Root
) || !RHS
.areVLAndMaskCompatible(Root
))
12748 return std::nullopt
;
12749 if (AllowZExt
&& LHS
.SupportsZExt
&& RHS
.SupportsZExt
)
12750 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
12751 Root
->getOpcode(), /*IsSExt=*/false),
12752 Root
, LHS
, /*SExtLHS=*/false, RHS
,
12753 /*SExtRHS=*/false);
12754 if (AllowSExt
&& LHS
.SupportsSExt
&& RHS
.SupportsSExt
)
12755 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
12756 Root
->getOpcode(), /*IsSExt=*/true),
12757 Root
, LHS
, /*SExtLHS=*/true, RHS
,
12759 return std::nullopt
;
12762 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
12763 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
12764 /// are zext) and LHS and RHS can be folded into Root.
12766 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12767 /// can be used to apply the pattern.
12768 static std::optional
<CombineResult
>
12769 canFoldToVWWithSameExtension(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
12770 const NodeExtensionHelper
&RHS
) {
12771 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/true,
12772 /*AllowZExt=*/true);
12775 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
12777 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12778 /// can be used to apply the pattern.
12779 static std::optional
<CombineResult
>
12780 canFoldToVW_W(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
12781 const NodeExtensionHelper
&RHS
) {
12782 if (!RHS
.areVLAndMaskCompatible(Root
))
12783 return std::nullopt
;
12785 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
12787 // Control this behavior behind an option (AllowSplatInVW_W) for testing
12789 if (RHS
.SupportsZExt
&& (!RHS
.isSplat() || AllowSplatInVW_W
))
12790 return CombineResult(
12791 NodeExtensionHelper::getWOpcode(Root
->getOpcode(), /*IsSExt=*/false),
12792 Root
, LHS
, /*SExtLHS=*/std::nullopt
, RHS
, /*SExtRHS=*/false);
12793 if (RHS
.SupportsSExt
&& (!RHS
.isSplat() || AllowSplatInVW_W
))
12794 return CombineResult(
12795 NodeExtensionHelper::getWOpcode(Root
->getOpcode(), /*IsSExt=*/true),
12796 Root
, LHS
, /*SExtLHS=*/std::nullopt
, RHS
, /*SExtRHS=*/true);
12797 return std::nullopt
;
12800 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
12802 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12803 /// can be used to apply the pattern.
12804 static std::optional
<CombineResult
>
12805 canFoldToVWWithSEXT(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
12806 const NodeExtensionHelper
&RHS
) {
12807 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/true,
12808 /*AllowZExt=*/false);
12811 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
12813 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12814 /// can be used to apply the pattern.
12815 static std::optional
<CombineResult
>
12816 canFoldToVWWithZEXT(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
12817 const NodeExtensionHelper
&RHS
) {
12818 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/false,
12819 /*AllowZExt=*/true);
12822 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
12824 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12825 /// can be used to apply the pattern.
12826 static std::optional
<CombineResult
>
12827 canFoldToVW_SU(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
12828 const NodeExtensionHelper
&RHS
) {
12829 if (!LHS
.SupportsSExt
|| !RHS
.SupportsZExt
)
12830 return std::nullopt
;
12831 if (!LHS
.areVLAndMaskCompatible(Root
) || !RHS
.areVLAndMaskCompatible(Root
))
12832 return std::nullopt
;
12833 return CombineResult(NodeExtensionHelper::getSUOpcode(Root
->getOpcode()),
12834 Root
, LHS
, /*SExtLHS=*/true, RHS
, /*SExtRHS=*/false);
12837 SmallVector
<NodeExtensionHelper::CombineToTry
>
12838 NodeExtensionHelper::getSupportedFoldings(const SDNode
*Root
) {
12839 SmallVector
<CombineToTry
> Strategies
;
12840 switch (Root
->getOpcode()) {
12841 case RISCVISD::ADD_VL
:
12842 case RISCVISD::SUB_VL
:
12843 // add|sub -> vwadd(u)|vwsub(u)
12844 Strategies
.push_back(canFoldToVWWithSameExtension
);
12845 // add|sub -> vwadd(u)_w|vwsub(u)_w
12846 Strategies
.push_back(canFoldToVW_W
);
12848 case RISCVISD::MUL_VL
:
12850 Strategies
.push_back(canFoldToVWWithSameExtension
);
12852 Strategies
.push_back(canFoldToVW_SU
);
12854 case RISCVISD::VWADD_W_VL
:
12855 case RISCVISD::VWSUB_W_VL
:
12856 // vwadd_w|vwsub_w -> vwadd|vwsub
12857 Strategies
.push_back(canFoldToVWWithSEXT
);
12859 case RISCVISD::VWADDU_W_VL
:
12860 case RISCVISD::VWSUBU_W_VL
:
12861 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
12862 Strategies
.push_back(canFoldToVWWithZEXT
);
12865 llvm_unreachable("Unexpected opcode");
12869 } // End anonymous namespace.
12871 /// Combine a binary operation to its equivalent VW or VW_W form.
12872 /// The supported combines are:
12873 /// add_vl -> vwadd(u) | vwadd(u)_w
12874 /// sub_vl -> vwsub(u) | vwsub(u)_w
12875 /// mul_vl -> vwmul(u) | vwmul_su
12876 /// vwadd_w(u) -> vwadd(u)
12877 /// vwub_w(u) -> vwadd(u)
12879 combineBinOp_VLToVWBinOp_VL(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
12880 SelectionDAG
&DAG
= DCI
.DAG
;
12882 assert(NodeExtensionHelper::isSupportedRoot(N
) &&
12883 "Shouldn't have called this method");
12884 SmallVector
<SDNode
*> Worklist
;
12885 SmallSet
<SDNode
*, 8> Inserted
;
12886 Worklist
.push_back(N
);
12887 Inserted
.insert(N
);
12888 SmallVector
<CombineResult
> CombinesToApply
;
12890 while (!Worklist
.empty()) {
12891 SDNode
*Root
= Worklist
.pop_back_val();
12892 if (!NodeExtensionHelper::isSupportedRoot(Root
))
12895 NodeExtensionHelper
LHS(N
, 0, DAG
);
12896 NodeExtensionHelper
RHS(N
, 1, DAG
);
12897 auto AppendUsersIfNeeded
= [&Worklist
,
12898 &Inserted
](const NodeExtensionHelper
&Op
) {
12899 if (Op
.needToPromoteOtherUsers()) {
12900 for (SDNode
*TheUse
: Op
.OrigOperand
->uses()) {
12901 if (Inserted
.insert(TheUse
).second
)
12902 Worklist
.push_back(TheUse
);
12907 // Control the compile time by limiting the number of node we look at in
12909 if (Inserted
.size() > ExtensionMaxWebSize
)
12912 SmallVector
<NodeExtensionHelper::CombineToTry
> FoldingStrategies
=
12913 NodeExtensionHelper::getSupportedFoldings(N
);
12915 assert(!FoldingStrategies
.empty() && "Nothing to be folded");
12916 bool Matched
= false;
12917 for (int Attempt
= 0;
12918 (Attempt
!= 1 + NodeExtensionHelper::isCommutative(N
)) && !Matched
;
12921 for (NodeExtensionHelper::CombineToTry FoldingStrategy
:
12922 FoldingStrategies
) {
12923 std::optional
<CombineResult
> Res
= FoldingStrategy(N
, LHS
, RHS
);
12926 CombinesToApply
.push_back(*Res
);
12927 // All the inputs that are extended need to be folded, otherwise
12928 // we would be leaving the old input (since it is may still be used),
12929 // and the new one.
12930 if (Res
->SExtLHS
.has_value())
12931 AppendUsersIfNeeded(LHS
);
12932 if (Res
->SExtRHS
.has_value())
12933 AppendUsersIfNeeded(RHS
);
12937 std::swap(LHS
, RHS
);
12939 // Right now we do an all or nothing approach.
12943 // Store the value for the replacement of the input node separately.
12944 SDValue InputRootReplacement
;
12945 // We do the RAUW after we materialize all the combines, because some replaced
12946 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
12947 // some of these nodes may appear in the NodeExtensionHelpers of some of the
12948 // yet-to-be-visited CombinesToApply roots.
12949 SmallVector
<std::pair
<SDValue
, SDValue
>> ValuesToReplace
;
12950 ValuesToReplace
.reserve(CombinesToApply
.size());
12951 for (CombineResult Res
: CombinesToApply
) {
12952 SDValue NewValue
= Res
.materialize(DAG
);
12953 if (!InputRootReplacement
) {
12954 assert(Res
.Root
== N
&&
12955 "First element is expected to be the current node");
12956 InputRootReplacement
= NewValue
;
12958 ValuesToReplace
.emplace_back(SDValue(Res
.Root
, 0), NewValue
);
12961 for (std::pair
<SDValue
, SDValue
> OldNewValues
: ValuesToReplace
) {
12962 DAG
.ReplaceAllUsesOfValueWith(OldNewValues
.first
, OldNewValues
.second
);
12963 DCI
.AddToWorklist(OldNewValues
.second
.getNode());
12965 return InputRootReplacement
;
12968 // Helper function for performMemPairCombine.
12969 // Try to combine the memory loads/stores LSNode1 and LSNode2
12970 // into a single memory pair operation.
12971 static SDValue
tryMemPairCombine(SelectionDAG
&DAG
, LSBaseSDNode
*LSNode1
,
12972 LSBaseSDNode
*LSNode2
, SDValue BasePtr
,
12974 SmallPtrSet
<const SDNode
*, 32> Visited
;
12975 SmallVector
<const SDNode
*, 8> Worklist
= {LSNode1
, LSNode2
};
12977 if (SDNode::hasPredecessorHelper(LSNode1
, Visited
, Worklist
) ||
12978 SDNode::hasPredecessorHelper(LSNode2
, Visited
, Worklist
))
12981 MachineFunction
&MF
= DAG
.getMachineFunction();
12982 const RISCVSubtarget
&Subtarget
= MF
.getSubtarget
<RISCVSubtarget
>();
12984 // The new operation has twice the width.
12985 MVT XLenVT
= Subtarget
.getXLenVT();
12986 EVT MemVT
= LSNode1
->getMemoryVT();
12987 EVT NewMemVT
= (MemVT
== MVT::i32
) ? MVT::i64
: MVT::i128
;
12988 MachineMemOperand
*MMO
= LSNode1
->getMemOperand();
12989 MachineMemOperand
*NewMMO
= MF
.getMachineMemOperand(
12990 MMO
, MMO
->getPointerInfo(), MemVT
== MVT::i32
? 8 : 16);
12992 if (LSNode1
->getOpcode() == ISD::LOAD
) {
12993 auto Ext
= cast
<LoadSDNode
>(LSNode1
)->getExtensionType();
12995 if (MemVT
== MVT::i32
)
12996 Opcode
= (Ext
== ISD::ZEXTLOAD
) ? RISCVISD::TH_LWUD
: RISCVISD::TH_LWD
;
12998 Opcode
= RISCVISD::TH_LDD
;
13000 SDValue Res
= DAG
.getMemIntrinsicNode(
13001 Opcode
, SDLoc(LSNode1
), DAG
.getVTList({XLenVT
, XLenVT
, MVT::Other
}),
13002 {LSNode1
->getChain(), BasePtr
,
13003 DAG
.getConstant(Imm
, SDLoc(LSNode1
), XLenVT
)},
13007 DAG
.getMergeValues({Res
.getValue(0), Res
.getValue(2)}, SDLoc(LSNode1
));
13009 DAG
.getMergeValues({Res
.getValue(1), Res
.getValue(2)}, SDLoc(LSNode2
));
13011 DAG
.ReplaceAllUsesWith(LSNode2
, Node2
.getNode());
13014 unsigned Opcode
= (MemVT
== MVT::i32
) ? RISCVISD::TH_SWD
: RISCVISD::TH_SDD
;
13016 SDValue Res
= DAG
.getMemIntrinsicNode(
13017 Opcode
, SDLoc(LSNode1
), DAG
.getVTList(MVT::Other
),
13018 {LSNode1
->getChain(), LSNode1
->getOperand(1), LSNode2
->getOperand(1),
13019 BasePtr
, DAG
.getConstant(Imm
, SDLoc(LSNode1
), XLenVT
)},
13022 DAG
.ReplaceAllUsesWith(LSNode2
, Res
.getNode());
13027 // Try to combine two adjacent loads/stores to a single pair instruction from
13028 // the XTHeadMemPair vendor extension.
13029 static SDValue
performMemPairCombine(SDNode
*N
,
13030 TargetLowering::DAGCombinerInfo
&DCI
) {
13031 SelectionDAG
&DAG
= DCI
.DAG
;
13032 MachineFunction
&MF
= DAG
.getMachineFunction();
13033 const RISCVSubtarget
&Subtarget
= MF
.getSubtarget
<RISCVSubtarget
>();
13035 // Target does not support load/store pair.
13036 if (!Subtarget
.hasVendorXTHeadMemPair())
13039 LSBaseSDNode
*LSNode1
= cast
<LSBaseSDNode
>(N
);
13040 EVT MemVT
= LSNode1
->getMemoryVT();
13041 unsigned OpNum
= LSNode1
->getOpcode() == ISD::LOAD
? 1 : 2;
13043 // No volatile, indexed or atomic loads/stores.
13044 if (!LSNode1
->isSimple() || LSNode1
->isIndexed())
13047 // Function to get a base + constant representation from a memory value.
13048 auto ExtractBaseAndOffset
= [](SDValue Ptr
) -> std::pair
<SDValue
, uint64_t> {
13049 if (Ptr
->getOpcode() == ISD::ADD
)
13050 if (auto *C1
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1)))
13051 return {Ptr
->getOperand(0), C1
->getZExtValue()};
13055 auto [Base1
, Offset1
] = ExtractBaseAndOffset(LSNode1
->getOperand(OpNum
));
13057 SDValue Chain
= N
->getOperand(0);
13058 for (SDNode::use_iterator UI
= Chain
->use_begin(), UE
= Chain
->use_end();
13060 SDUse
&Use
= UI
.getUse();
13061 if (Use
.getUser() != N
&& Use
.getResNo() == 0 &&
13062 Use
.getUser()->getOpcode() == N
->getOpcode()) {
13063 LSBaseSDNode
*LSNode2
= cast
<LSBaseSDNode
>(Use
.getUser());
13065 // No volatile, indexed or atomic loads/stores.
13066 if (!LSNode2
->isSimple() || LSNode2
->isIndexed())
13069 // Check if LSNode1 and LSNode2 have the same type and extension.
13070 if (LSNode1
->getOpcode() == ISD::LOAD
)
13071 if (cast
<LoadSDNode
>(LSNode2
)->getExtensionType() !=
13072 cast
<LoadSDNode
>(LSNode1
)->getExtensionType())
13075 if (LSNode1
->getMemoryVT() != LSNode2
->getMemoryVT())
13078 auto [Base2
, Offset2
] = ExtractBaseAndOffset(LSNode2
->getOperand(OpNum
));
13080 // Check if the base pointer is the same for both instruction.
13081 if (Base1
!= Base2
)
13084 // Check if the offsets match the XTHeadMemPair encoding contraints.
13085 bool Valid
= false;
13086 if (MemVT
== MVT::i32
) {
13087 // Check for adjacent i32 values and a 2-bit index.
13088 if ((Offset1
+ 4 == Offset2
) && isShiftedUInt
<2, 3>(Offset1
))
13090 } else if (MemVT
== MVT::i64
) {
13091 // Check for adjacent i64 values and a 2-bit index.
13092 if ((Offset1
+ 8 == Offset2
) && isShiftedUInt
<2, 4>(Offset1
))
13101 tryMemPairCombine(DAG
, LSNode1
, LSNode2
, Base1
, Offset1
))
13110 // (fp_to_int (froundeven X)) -> fcvt X, rne
13111 // (fp_to_int (ftrunc X)) -> fcvt X, rtz
13112 // (fp_to_int (ffloor X)) -> fcvt X, rdn
13113 // (fp_to_int (fceil X)) -> fcvt X, rup
13114 // (fp_to_int (fround X)) -> fcvt X, rmm
13115 static SDValue
performFP_TO_INTCombine(SDNode
*N
,
13116 TargetLowering::DAGCombinerInfo
&DCI
,
13117 const RISCVSubtarget
&Subtarget
) {
13118 SelectionDAG
&DAG
= DCI
.DAG
;
13119 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
13120 MVT XLenVT
= Subtarget
.getXLenVT();
13122 SDValue Src
= N
->getOperand(0);
13124 // Don't do this for strict-fp Src.
13125 if (Src
->isStrictFPOpcode() || Src
->isTargetStrictFPOpcode())
13128 // Ensure the FP type is legal.
13129 if (!TLI
.isTypeLegal(Src
.getValueType()))
13132 // Don't do this for f16 with Zfhmin and not Zfh.
13133 if (Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfh())
13136 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Src
.getOpcode());
13137 // If the result is invalid, we didn't find a foldable instruction.
13138 // If the result is dynamic, then we found an frint which we don't yet
13139 // support. It will cause 7 to be written to the FRM CSR for vector.
13140 // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below.
13141 if (FRM
== RISCVFPRndMode::Invalid
|| FRM
== RISCVFPRndMode::DYN
)
13145 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT
;
13146 EVT VT
= N
->getValueType(0);
13148 if (VT
.isVector() && TLI
.isTypeLegal(VT
)) {
13149 MVT SrcVT
= Src
.getSimpleValueType();
13150 MVT SrcContainerVT
= SrcVT
;
13151 MVT ContainerVT
= VT
.getSimpleVT();
13152 SDValue XVal
= Src
.getOperand(0);
13154 // For widening and narrowing conversions we just combine it into a
13155 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
13156 // end up getting lowered to their appropriate pseudo instructions based on
13157 // their operand types
13158 if (VT
.getScalarSizeInBits() > SrcVT
.getScalarSizeInBits() * 2 ||
13159 VT
.getScalarSizeInBits() * 2 < SrcVT
.getScalarSizeInBits())
13162 // Make fixed-length vectors scalable first
13163 if (SrcVT
.isFixedLengthVector()) {
13164 SrcContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
13165 XVal
= convertToScalableVector(SrcContainerVT
, XVal
, DAG
, Subtarget
);
13167 getContainerForFixedLengthVector(DAG
, ContainerVT
, Subtarget
);
13171 getDefaultVLOps(SrcVT
, SrcContainerVT
, DL
, DAG
, Subtarget
);
13174 if (FRM
== RISCVFPRndMode::RTZ
) {
13175 // Use the dedicated trunc static rounding mode if we're truncating so we
13176 // don't need to generate calls to fsrmi/fsrm
13178 IsSigned
? RISCVISD::VFCVT_RTZ_X_F_VL
: RISCVISD::VFCVT_RTZ_XU_F_VL
;
13179 FpToInt
= DAG
.getNode(Opc
, DL
, ContainerVT
, XVal
, Mask
, VL
);
13182 IsSigned
? RISCVISD::VFCVT_RM_X_F_VL
: RISCVISD::VFCVT_RM_XU_F_VL
;
13183 FpToInt
= DAG
.getNode(Opc
, DL
, ContainerVT
, XVal
, Mask
,
13184 DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
);
13187 // If converted from fixed-length to scalable, convert back
13188 if (VT
.isFixedLengthVector())
13189 FpToInt
= convertFromScalableVector(VT
, FpToInt
, DAG
, Subtarget
);
13194 // Only handle XLen or i32 types. Other types narrower than XLen will
13195 // eventually be legalized to XLenVT.
13196 if (VT
!= MVT::i32
&& VT
!= XLenVT
)
13201 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
13203 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
13205 SDValue FpToInt
= DAG
.getNode(Opc
, DL
, XLenVT
, Src
.getOperand(0),
13206 DAG
.getTargetConstant(FRM
, DL
, XLenVT
));
13207 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FpToInt
);
13211 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
13212 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
13213 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
13214 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
13215 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
13216 static SDValue
performFP_TO_INT_SATCombine(SDNode
*N
,
13217 TargetLowering::DAGCombinerInfo
&DCI
,
13218 const RISCVSubtarget
&Subtarget
) {
13219 SelectionDAG
&DAG
= DCI
.DAG
;
13220 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
13221 MVT XLenVT
= Subtarget
.getXLenVT();
13223 // Only handle XLen types. Other types narrower than XLen will eventually be
13224 // legalized to XLenVT.
13225 EVT DstVT
= N
->getValueType(0);
13226 if (DstVT
!= XLenVT
)
13229 SDValue Src
= N
->getOperand(0);
13231 // Don't do this for strict-fp Src.
13232 if (Src
->isStrictFPOpcode() || Src
->isTargetStrictFPOpcode())
13235 // Ensure the FP type is also legal.
13236 if (!TLI
.isTypeLegal(Src
.getValueType()))
13239 // Don't do this for f16 with Zfhmin and not Zfh.
13240 if (Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfh())
13243 EVT SatVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
13245 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Src
.getOpcode());
13246 if (FRM
== RISCVFPRndMode::Invalid
)
13249 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT_SAT
;
13252 if (SatVT
== DstVT
)
13253 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
13254 else if (DstVT
== MVT::i64
&& SatVT
== MVT::i32
)
13255 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
13258 // FIXME: Support other SatVTs by clamping before or after the conversion.
13260 Src
= Src
.getOperand(0);
13263 SDValue FpToInt
= DAG
.getNode(Opc
, DL
, XLenVT
, Src
,
13264 DAG
.getTargetConstant(FRM
, DL
, XLenVT
));
13266 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
13268 if (Opc
== RISCVISD::FCVT_WU_RV64
)
13269 FpToInt
= DAG
.getZeroExtendInReg(FpToInt
, DL
, MVT::i32
);
13271 // RISC-V FP-to-int conversions saturate to the destination register size, but
13272 // don't produce 0 for nan.
13273 SDValue ZeroInt
= DAG
.getConstant(0, DL
, DstVT
);
13274 return DAG
.getSelectCC(DL
, Src
, Src
, ZeroInt
, FpToInt
, ISD::CondCode::SETUO
);
13277 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
13278 // smaller than XLenVT.
13279 static SDValue
performBITREVERSECombine(SDNode
*N
, SelectionDAG
&DAG
,
13280 const RISCVSubtarget
&Subtarget
) {
13281 assert(Subtarget
.hasStdExtZbkb() && "Unexpected extension");
13283 SDValue Src
= N
->getOperand(0);
13284 if (Src
.getOpcode() != ISD::BSWAP
)
13287 EVT VT
= N
->getValueType(0);
13288 if (!VT
.isScalarInteger() || VT
.getSizeInBits() >= Subtarget
.getXLen() ||
13289 !llvm::has_single_bit
<uint32_t>(VT
.getSizeInBits()))
13293 return DAG
.getNode(RISCVISD::BREV8
, DL
, VT
, Src
.getOperand(0));
13296 // Convert from one FMA opcode to another based on whether we are negating the
13297 // multiply result and/or the accumulator.
13298 // NOTE: Only supports RVV operations with VL.
13299 static unsigned negateFMAOpcode(unsigned Opcode
, bool NegMul
, bool NegAcc
) {
13300 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
13302 // clang-format off
13304 default: llvm_unreachable("Unexpected opcode");
13305 case RISCVISD::VFMADD_VL
: Opcode
= RISCVISD::VFNMSUB_VL
; break;
13306 case RISCVISD::VFNMSUB_VL
: Opcode
= RISCVISD::VFMADD_VL
; break;
13307 case RISCVISD::VFNMADD_VL
: Opcode
= RISCVISD::VFMSUB_VL
; break;
13308 case RISCVISD::VFMSUB_VL
: Opcode
= RISCVISD::VFNMADD_VL
; break;
13309 case RISCVISD::STRICT_VFMADD_VL
: Opcode
= RISCVISD::STRICT_VFNMSUB_VL
; break;
13310 case RISCVISD::STRICT_VFNMSUB_VL
: Opcode
= RISCVISD::STRICT_VFMADD_VL
; break;
13311 case RISCVISD::STRICT_VFNMADD_VL
: Opcode
= RISCVISD::STRICT_VFMSUB_VL
; break;
13312 case RISCVISD::STRICT_VFMSUB_VL
: Opcode
= RISCVISD::STRICT_VFNMADD_VL
; break;
13317 // Negating the accumulator changes ADD<->SUB.
13319 // clang-format off
13321 default: llvm_unreachable("Unexpected opcode");
13322 case RISCVISD::VFMADD_VL
: Opcode
= RISCVISD::VFMSUB_VL
; break;
13323 case RISCVISD::VFMSUB_VL
: Opcode
= RISCVISD::VFMADD_VL
; break;
13324 case RISCVISD::VFNMADD_VL
: Opcode
= RISCVISD::VFNMSUB_VL
; break;
13325 case RISCVISD::VFNMSUB_VL
: Opcode
= RISCVISD::VFNMADD_VL
; break;
13326 case RISCVISD::STRICT_VFMADD_VL
: Opcode
= RISCVISD::STRICT_VFMSUB_VL
; break;
13327 case RISCVISD::STRICT_VFMSUB_VL
: Opcode
= RISCVISD::STRICT_VFMADD_VL
; break;
13328 case RISCVISD::STRICT_VFNMADD_VL
: Opcode
= RISCVISD::STRICT_VFNMSUB_VL
; break;
13329 case RISCVISD::STRICT_VFNMSUB_VL
: Opcode
= RISCVISD::STRICT_VFNMADD_VL
; break;
13337 static SDValue
combineVFMADD_VLWithVFNEG_VL(SDNode
*N
, SelectionDAG
&DAG
) {
13338 // Fold FNEG_VL into FMA opcodes.
13339 // The first operand of strict-fp is chain.
13340 unsigned Offset
= N
->isTargetStrictFPOpcode();
13341 SDValue A
= N
->getOperand(0 + Offset
);
13342 SDValue B
= N
->getOperand(1 + Offset
);
13343 SDValue C
= N
->getOperand(2 + Offset
);
13344 SDValue Mask
= N
->getOperand(3 + Offset
);
13345 SDValue VL
= N
->getOperand(4 + Offset
);
13347 auto invertIfNegative
= [&Mask
, &VL
](SDValue
&V
) {
13348 if (V
.getOpcode() == RISCVISD::FNEG_VL
&& V
.getOperand(1) == Mask
&&
13349 V
.getOperand(2) == VL
) {
13350 // Return the negated input.
13351 V
= V
.getOperand(0);
13358 bool NegA
= invertIfNegative(A
);
13359 bool NegB
= invertIfNegative(B
);
13360 bool NegC
= invertIfNegative(C
);
13362 // If no operands are negated, we're done.
13363 if (!NegA
&& !NegB
&& !NegC
)
13366 unsigned NewOpcode
= negateFMAOpcode(N
->getOpcode(), NegA
!= NegB
, NegC
);
13367 if (N
->isTargetStrictFPOpcode())
13368 return DAG
.getNode(NewOpcode
, SDLoc(N
), N
->getVTList(),
13369 {N
->getOperand(0), A
, B
, C
, Mask
, VL
});
13370 return DAG
.getNode(NewOpcode
, SDLoc(N
), N
->getValueType(0), A
, B
, C
, Mask
,
13374 static SDValue
performVFMADD_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
13375 const RISCVSubtarget
&Subtarget
) {
13376 if (SDValue V
= combineVFMADD_VLWithVFNEG_VL(N
, DAG
))
13379 if (N
->getValueType(0).isScalableVector() &&
13380 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
13381 (Subtarget
.hasVInstructionsF16Minimal() &&
13382 !Subtarget
.hasVInstructionsF16())) {
13386 // FIXME: Ignore strict opcodes for now.
13387 if (N
->isTargetStrictFPOpcode())
13390 // Try to form widening FMA.
13391 SDValue Op0
= N
->getOperand(0);
13392 SDValue Op1
= N
->getOperand(1);
13393 SDValue Mask
= N
->getOperand(3);
13394 SDValue VL
= N
->getOperand(4);
13396 if (Op0
.getOpcode() != RISCVISD::FP_EXTEND_VL
||
13397 Op1
.getOpcode() != RISCVISD::FP_EXTEND_VL
)
13400 // TODO: Refactor to handle more complex cases similar to
13401 // combineBinOp_VLToVWBinOp_VL.
13402 if ((!Op0
.hasOneUse() || !Op1
.hasOneUse()) &&
13403 (Op0
!= Op1
|| !Op0
->hasNUsesOfValue(2, 0)))
13406 // Check the mask and VL are the same.
13407 if (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
||
13408 Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
)
13412 switch (N
->getOpcode()) {
13414 llvm_unreachable("Unexpected opcode");
13415 case RISCVISD::VFMADD_VL
:
13416 NewOpc
= RISCVISD::VFWMADD_VL
;
13418 case RISCVISD::VFNMSUB_VL
:
13419 NewOpc
= RISCVISD::VFWNMSUB_VL
;
13421 case RISCVISD::VFNMADD_VL
:
13422 NewOpc
= RISCVISD::VFWNMADD_VL
;
13424 case RISCVISD::VFMSUB_VL
:
13425 NewOpc
= RISCVISD::VFWMSUB_VL
;
13429 Op0
= Op0
.getOperand(0);
13430 Op1
= Op1
.getOperand(0);
13432 return DAG
.getNode(NewOpc
, SDLoc(N
), N
->getValueType(0), Op0
, Op1
,
13433 N
->getOperand(2), Mask
, VL
);
13436 static SDValue
performVFMUL_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
13437 const RISCVSubtarget
&Subtarget
) {
13438 if (N
->getValueType(0).isScalableVector() &&
13439 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
13440 (Subtarget
.hasVInstructionsF16Minimal() &&
13441 !Subtarget
.hasVInstructionsF16())) {
13445 // FIXME: Ignore strict opcodes for now.
13446 assert(!N
->isTargetStrictFPOpcode() && "Unexpected opcode");
13448 // Try to form widening multiply.
13449 SDValue Op0
= N
->getOperand(0);
13450 SDValue Op1
= N
->getOperand(1);
13451 SDValue Merge
= N
->getOperand(2);
13452 SDValue Mask
= N
->getOperand(3);
13453 SDValue VL
= N
->getOperand(4);
13455 if (Op0
.getOpcode() != RISCVISD::FP_EXTEND_VL
||
13456 Op1
.getOpcode() != RISCVISD::FP_EXTEND_VL
)
13459 // TODO: Refactor to handle more complex cases similar to
13460 // combineBinOp_VLToVWBinOp_VL.
13461 if ((!Op0
.hasOneUse() || !Op1
.hasOneUse()) &&
13462 (Op0
!= Op1
|| !Op0
->hasNUsesOfValue(2, 0)))
13465 // Check the mask and VL are the same.
13466 if (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
||
13467 Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
)
13470 Op0
= Op0
.getOperand(0);
13471 Op1
= Op1
.getOperand(0);
13473 return DAG
.getNode(RISCVISD::VFWMUL_VL
, SDLoc(N
), N
->getValueType(0), Op0
,
13474 Op1
, Merge
, Mask
, VL
);
13477 static SDValue
performFADDSUB_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
13478 const RISCVSubtarget
&Subtarget
) {
13479 if (N
->getValueType(0).isScalableVector() &&
13480 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
13481 (Subtarget
.hasVInstructionsF16Minimal() &&
13482 !Subtarget
.hasVInstructionsF16())) {
13486 SDValue Op0
= N
->getOperand(0);
13487 SDValue Op1
= N
->getOperand(1);
13488 SDValue Merge
= N
->getOperand(2);
13489 SDValue Mask
= N
->getOperand(3);
13490 SDValue VL
= N
->getOperand(4);
13492 bool IsAdd
= N
->getOpcode() == RISCVISD::FADD_VL
;
13494 // Look for foldable FP_EXTENDS.
13496 Op0
.getOpcode() == RISCVISD::FP_EXTEND_VL
&&
13497 (Op0
.hasOneUse() || (Op0
== Op1
&& Op0
->hasNUsesOfValue(2, 0)));
13499 (Op0
== Op1
&& Op0IsExtend
) ||
13500 (Op1
.getOpcode() == RISCVISD::FP_EXTEND_VL
&& Op1
.hasOneUse());
13502 // Check the mask and VL.
13503 if (Op0IsExtend
&& (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
))
13504 Op0IsExtend
= false;
13505 if (Op1IsExtend
&& (Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
))
13506 Op1IsExtend
= false;
13509 if (!Op1IsExtend
) {
13510 // Sub requires at least operand 1 to be an extend.
13514 // Add is commutable, if the other operand is foldable, swap them.
13518 std::swap(Op0
, Op1
);
13519 std::swap(Op0IsExtend
, Op1IsExtend
);
13522 // Op1 is a foldable extend. Op0 might be foldable.
13523 Op1
= Op1
.getOperand(0);
13525 Op0
= Op0
.getOperand(0);
13529 Opc
= Op0IsExtend
? RISCVISD::VFWADD_VL
: RISCVISD::VFWADD_W_VL
;
13531 Opc
= Op0IsExtend
? RISCVISD::VFWSUB_VL
: RISCVISD::VFWSUB_W_VL
;
13533 return DAG
.getNode(Opc
, SDLoc(N
), N
->getValueType(0), Op0
, Op1
, Merge
, Mask
,
13537 static SDValue
performSRACombine(SDNode
*N
, SelectionDAG
&DAG
,
13538 const RISCVSubtarget
&Subtarget
) {
13539 assert(N
->getOpcode() == ISD::SRA
&& "Unexpected opcode");
13541 if (N
->getValueType(0) != MVT::i64
|| !Subtarget
.is64Bit())
13544 if (!isa
<ConstantSDNode
>(N
->getOperand(1)))
13546 uint64_t ShAmt
= N
->getConstantOperandVal(1);
13550 SDValue N0
= N
->getOperand(0);
13552 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
13553 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
13554 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
13556 N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&& N0
.hasOneUse() &&
13557 cast
<VTSDNode
>(N0
.getOperand(1))->getVT() == MVT::i32
&&
13558 N0
.getOperand(0).getOpcode() == ISD::SHL
&& N0
.getOperand(0).hasOneUse() &&
13559 isa
<ConstantSDNode
>(N0
.getOperand(0).getOperand(1))) {
13560 uint64_t LShAmt
= N0
.getOperand(0).getConstantOperandVal(1);
13562 SDLoc
ShlDL(N0
.getOperand(0));
13563 SDValue Shl
= DAG
.getNode(ISD::SHL
, ShlDL
, MVT::i64
,
13564 N0
.getOperand(0).getOperand(0),
13565 DAG
.getConstant(LShAmt
+ 32, ShlDL
, MVT::i64
));
13567 return DAG
.getNode(ISD::SRA
, DL
, MVT::i64
, Shl
,
13568 DAG
.getConstant(ShAmt
+ 32, DL
, MVT::i64
));
13572 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
13573 // FIXME: Should this be a generic combine? There's a similar combine on X86.
13575 // Also try these folds where an add or sub is in the middle.
13576 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
13577 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
13579 ConstantSDNode
*AddC
= nullptr;
13581 // We might have an ADD or SUB between the SRA and SHL.
13582 bool IsAdd
= N0
.getOpcode() == ISD::ADD
;
13583 if ((IsAdd
|| N0
.getOpcode() == ISD::SUB
)) {
13584 // Other operand needs to be a constant we can modify.
13585 AddC
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(IsAdd
? 1 : 0));
13589 // AddC needs to have at least 32 trailing zeros.
13590 if (AddC
->getAPIntValue().countr_zero() < 32)
13593 // All users should be a shift by constant less than or equal to 32. This
13594 // ensures we'll do this optimization for each of them to produce an
13595 // add/sub+sext_inreg they can all share.
13596 for (SDNode
*U
: N0
->uses()) {
13597 if (U
->getOpcode() != ISD::SRA
||
13598 !isa
<ConstantSDNode
>(U
->getOperand(1)) ||
13599 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() > 32)
13603 Shl
= N0
.getOperand(IsAdd
? 0 : 1);
13605 // Not an ADD or SUB.
13609 // Look for a shift left by 32.
13610 if (Shl
.getOpcode() != ISD::SHL
|| !isa
<ConstantSDNode
>(Shl
.getOperand(1)) ||
13611 Shl
.getConstantOperandVal(1) != 32)
13614 // We if we didn't look through an add/sub, then the shl should have one use.
13615 // If we did look through an add/sub, the sext_inreg we create is free so
13616 // we're only creating 2 new instructions. It's enough to only remove the
13617 // original sra+add/sub.
13618 if (!AddC
&& !Shl
.hasOneUse())
13622 SDValue In
= Shl
.getOperand(0);
13624 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
13627 SDValue ShiftedAddC
=
13628 DAG
.getConstant(AddC
->getAPIntValue().lshr(32), DL
, MVT::i64
);
13630 In
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, In
, ShiftedAddC
);
13632 In
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, ShiftedAddC
, In
);
13635 SDValue SExt
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, In
,
13636 DAG
.getValueType(MVT::i32
));
13640 return DAG
.getNode(
13641 ISD::SHL
, DL
, MVT::i64
, SExt
,
13642 DAG
.getConstant(32 - ShAmt
, DL
, MVT::i64
));
13645 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
13646 // the result is used as the conditon of a br_cc or select_cc we can invert,
13647 // inverting the setcc is free, and Z is 0/1. Caller will invert the
13648 // br_cc/select_cc.
13649 static SDValue
tryDemorganOfBooleanCondition(SDValue Cond
, SelectionDAG
&DAG
) {
13650 bool IsAnd
= Cond
.getOpcode() == ISD::AND
;
13651 if (!IsAnd
&& Cond
.getOpcode() != ISD::OR
)
13654 if (!Cond
.hasOneUse())
13657 SDValue Setcc
= Cond
.getOperand(0);
13658 SDValue Xor
= Cond
.getOperand(1);
13659 // Canonicalize setcc to LHS.
13660 if (Setcc
.getOpcode() != ISD::SETCC
)
13661 std::swap(Setcc
, Xor
);
13662 // LHS should be a setcc and RHS should be an xor.
13663 if (Setcc
.getOpcode() != ISD::SETCC
|| !Setcc
.hasOneUse() ||
13664 Xor
.getOpcode() != ISD::XOR
|| !Xor
.hasOneUse())
13667 // If the condition is an And, SimplifyDemandedBits may have changed
13668 // (xor Z, 1) to (not Z).
13669 SDValue Xor1
= Xor
.getOperand(1);
13670 if (!isOneConstant(Xor1
) && !(IsAnd
&& isAllOnesConstant(Xor1
)))
13673 EVT VT
= Cond
.getValueType();
13674 SDValue Xor0
= Xor
.getOperand(0);
13676 // The LHS of the xor needs to be 0/1.
13677 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
13678 if (!DAG
.MaskedValueIsZero(Xor0
, Mask
))
13681 // We can only invert integer setccs.
13682 EVT SetCCOpVT
= Setcc
.getOperand(0).getValueType();
13683 if (!SetCCOpVT
.isScalarInteger())
13686 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Setcc
.getOperand(2))->get();
13687 if (ISD::isIntEqualitySetCC(CCVal
)) {
13688 CCVal
= ISD::getSetCCInverse(CCVal
, SetCCOpVT
);
13689 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
, Setcc
.getOperand(0),
13690 Setcc
.getOperand(1), CCVal
);
13691 } else if (CCVal
== ISD::SETLT
&& isNullConstant(Setcc
.getOperand(0))) {
13692 // Invert (setlt 0, X) by converting to (setlt X, 1).
13693 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
, Setcc
.getOperand(1),
13694 DAG
.getConstant(1, SDLoc(Setcc
), VT
), CCVal
);
13695 } else if (CCVal
== ISD::SETLT
&& isOneConstant(Setcc
.getOperand(1))) {
13696 // (setlt X, 1) by converting to (setlt 0, X).
13697 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
,
13698 DAG
.getConstant(0, SDLoc(Setcc
), VT
),
13699 Setcc
.getOperand(0), CCVal
);
13703 unsigned Opc
= IsAnd
? ISD::OR
: ISD::AND
;
13704 return DAG
.getNode(Opc
, SDLoc(Cond
), VT
, Setcc
, Xor
.getOperand(0));
13707 // Perform common combines for BR_CC and SELECT_CC condtions.
13708 static bool combine_CC(SDValue
&LHS
, SDValue
&RHS
, SDValue
&CC
, const SDLoc
&DL
,
13709 SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
) {
13710 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
13712 // As far as arithmetic right shift always saves the sign,
13713 // shift can be omitted.
13714 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
13715 // setge (sra X, N), 0 -> setge X, 0
13716 if (isNullConstant(RHS
) && (CCVal
== ISD::SETGE
|| CCVal
== ISD::SETLT
) &&
13717 LHS
.getOpcode() == ISD::SRA
) {
13718 LHS
= LHS
.getOperand(0);
13722 if (!ISD::isIntEqualitySetCC(CCVal
))
13725 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
13726 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
13727 if (LHS
.getOpcode() == ISD::SETCC
&& isNullConstant(RHS
) &&
13728 LHS
.getOperand(0).getValueType() == Subtarget
.getXLenVT()) {
13729 // If we're looking for eq 0 instead of ne 0, we need to invert the
13731 bool Invert
= CCVal
== ISD::SETEQ
;
13732 CCVal
= cast
<CondCodeSDNode
>(LHS
.getOperand(2))->get();
13734 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
13736 RHS
= LHS
.getOperand(1);
13737 LHS
= LHS
.getOperand(0);
13738 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
13740 CC
= DAG
.getCondCode(CCVal
);
13744 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
13745 if (LHS
.getOpcode() == ISD::XOR
&& isNullConstant(RHS
)) {
13746 RHS
= LHS
.getOperand(1);
13747 LHS
= LHS
.getOperand(0);
13751 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
13752 if (isNullConstant(RHS
) && LHS
.getOpcode() == ISD::SRL
&& LHS
.hasOneUse() &&
13753 LHS
.getOperand(1).getOpcode() == ISD::Constant
) {
13754 SDValue LHS0
= LHS
.getOperand(0);
13755 if (LHS0
.getOpcode() == ISD::AND
&&
13756 LHS0
.getOperand(1).getOpcode() == ISD::Constant
) {
13757 uint64_t Mask
= LHS0
.getConstantOperandVal(1);
13758 uint64_t ShAmt
= LHS
.getConstantOperandVal(1);
13759 if (isPowerOf2_64(Mask
) && Log2_64(Mask
) == ShAmt
) {
13760 CCVal
= CCVal
== ISD::SETEQ
? ISD::SETGE
: ISD::SETLT
;
13761 CC
= DAG
.getCondCode(CCVal
);
13763 ShAmt
= LHS
.getValueSizeInBits() - 1 - ShAmt
;
13764 LHS
= LHS0
.getOperand(0);
13767 DAG
.getNode(ISD::SHL
, DL
, LHS
.getValueType(), LHS0
.getOperand(0),
13768 DAG
.getConstant(ShAmt
, DL
, LHS
.getValueType()));
13774 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
13775 // This can occur when legalizing some floating point comparisons.
13776 APInt Mask
= APInt::getBitsSetFrom(LHS
.getValueSizeInBits(), 1);
13777 if (isOneConstant(RHS
) && DAG
.MaskedValueIsZero(LHS
, Mask
)) {
13778 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
13779 CC
= DAG
.getCondCode(CCVal
);
13780 RHS
= DAG
.getConstant(0, DL
, LHS
.getValueType());
13784 if (isNullConstant(RHS
)) {
13785 if (SDValue NewCond
= tryDemorganOfBooleanCondition(LHS
, DAG
)) {
13786 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
13787 CC
= DAG
.getCondCode(CCVal
);
13797 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
13798 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
13799 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
13800 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
13801 static SDValue
tryFoldSelectIntoOp(SDNode
*N
, SelectionDAG
&DAG
,
13802 SDValue TrueVal
, SDValue FalseVal
,
13804 bool Commutative
= true;
13805 unsigned Opc
= TrueVal
.getOpcode();
13813 Commutative
= false;
13821 if (!TrueVal
.hasOneUse() || isa
<ConstantSDNode
>(FalseVal
))
13825 if (FalseVal
== TrueVal
.getOperand(0))
13827 else if (Commutative
&& FalseVal
== TrueVal
.getOperand(1))
13832 EVT VT
= N
->getValueType(0);
13834 SDValue OtherOp
= TrueVal
.getOperand(1 - OpToFold
);
13835 EVT OtherOpVT
= OtherOp
->getValueType(0);
13836 SDValue IdentityOperand
=
13837 DAG
.getNeutralElement(Opc
, DL
, OtherOpVT
, N
->getFlags());
13839 IdentityOperand
= DAG
.getConstant(0, DL
, OtherOpVT
);
13840 assert(IdentityOperand
&& "No identity operand!");
13843 std::swap(OtherOp
, IdentityOperand
);
13845 DAG
.getSelect(DL
, OtherOpVT
, N
->getOperand(0), OtherOp
, IdentityOperand
);
13846 return DAG
.getNode(TrueVal
.getOpcode(), DL
, VT
, FalseVal
, NewSel
);
13849 // This tries to get rid of `select` and `icmp` that are being used to handle
13850 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
13851 static SDValue
foldSelectOfCTTZOrCTLZ(SDNode
*N
, SelectionDAG
&DAG
) {
13852 SDValue Cond
= N
->getOperand(0);
13854 // This represents either CTTZ or CTLZ instruction.
13855 SDValue CountZeroes
;
13859 if (Cond
.getOpcode() != ISD::SETCC
)
13862 if (!isNullConstant(Cond
->getOperand(1)))
13865 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Cond
->getOperand(2))->get();
13866 if (CCVal
== ISD::CondCode::SETEQ
) {
13867 CountZeroes
= N
->getOperand(2);
13868 ValOnZero
= N
->getOperand(1);
13869 } else if (CCVal
== ISD::CondCode::SETNE
) {
13870 CountZeroes
= N
->getOperand(1);
13871 ValOnZero
= N
->getOperand(2);
13876 if (CountZeroes
.getOpcode() == ISD::TRUNCATE
||
13877 CountZeroes
.getOpcode() == ISD::ZERO_EXTEND
)
13878 CountZeroes
= CountZeroes
.getOperand(0);
13880 if (CountZeroes
.getOpcode() != ISD::CTTZ
&&
13881 CountZeroes
.getOpcode() != ISD::CTTZ_ZERO_UNDEF
&&
13882 CountZeroes
.getOpcode() != ISD::CTLZ
&&
13883 CountZeroes
.getOpcode() != ISD::CTLZ_ZERO_UNDEF
)
13886 if (!isNullConstant(ValOnZero
))
13889 SDValue CountZeroesArgument
= CountZeroes
->getOperand(0);
13890 if (Cond
->getOperand(0) != CountZeroesArgument
)
13893 if (CountZeroes
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
) {
13894 CountZeroes
= DAG
.getNode(ISD::CTTZ
, SDLoc(CountZeroes
),
13895 CountZeroes
.getValueType(), CountZeroesArgument
);
13896 } else if (CountZeroes
.getOpcode() == ISD::CTLZ_ZERO_UNDEF
) {
13897 CountZeroes
= DAG
.getNode(ISD::CTLZ
, SDLoc(CountZeroes
),
13898 CountZeroes
.getValueType(), CountZeroesArgument
);
13901 unsigned BitWidth
= CountZeroes
.getValueSizeInBits();
13902 SDValue BitWidthMinusOne
=
13903 DAG
.getConstant(BitWidth
- 1, SDLoc(N
), CountZeroes
.getValueType());
13905 auto AndNode
= DAG
.getNode(ISD::AND
, SDLoc(N
), CountZeroes
.getValueType(),
13906 CountZeroes
, BitWidthMinusOne
);
13907 return DAG
.getZExtOrTrunc(AndNode
, SDLoc(N
), N
->getValueType(0));
13910 static SDValue
performSELECTCombine(SDNode
*N
, SelectionDAG
&DAG
,
13911 const RISCVSubtarget
&Subtarget
) {
13912 if (SDValue Folded
= foldSelectOfCTTZOrCTLZ(N
, DAG
))
13915 if (Subtarget
.hasShortForwardBranchOpt())
13918 SDValue TrueVal
= N
->getOperand(1);
13919 SDValue FalseVal
= N
->getOperand(2);
13920 if (SDValue V
= tryFoldSelectIntoOp(N
, DAG
, TrueVal
, FalseVal
, /*Swapped*/false))
13922 return tryFoldSelectIntoOp(N
, DAG
, FalseVal
, TrueVal
, /*Swapped*/true);
13925 /// If we have a build_vector where each lane is binop X, C, where C
13926 /// is a constant (but not necessarily the same constant on all lanes),
13927 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
13928 /// We assume that materializing a constant build vector will be no more
13929 /// expensive that performing O(n) binops.
13930 static SDValue
performBUILD_VECTORCombine(SDNode
*N
, SelectionDAG
&DAG
,
13931 const RISCVSubtarget
&Subtarget
,
13932 const RISCVTargetLowering
&TLI
) {
13934 EVT VT
= N
->getValueType(0);
13936 assert(!VT
.isScalableVector() && "unexpected build vector");
13938 if (VT
.getVectorNumElements() == 1)
13941 const unsigned Opcode
= N
->op_begin()->getNode()->getOpcode();
13942 if (!TLI
.isBinOp(Opcode
))
13945 if (!TLI
.isOperationLegalOrCustom(Opcode
, VT
) || !TLI
.isTypeLegal(VT
))
13948 SmallVector
<SDValue
> LHSOps
;
13949 SmallVector
<SDValue
> RHSOps
;
13950 for (SDValue Op
: N
->ops()) {
13951 if (Op
.isUndef()) {
13952 // We can't form a divide or remainder from undef.
13953 if (!DAG
.isSafeToSpeculativelyExecute(Opcode
))
13956 LHSOps
.push_back(Op
);
13957 RHSOps
.push_back(Op
);
13961 // TODO: We can handle operations which have an neutral rhs value
13962 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
13963 // of profit in a more explicit manner.
13964 if (Op
.getOpcode() != Opcode
|| !Op
.hasOneUse())
13967 LHSOps
.push_back(Op
.getOperand(0));
13968 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)) &&
13969 !isa
<ConstantFPSDNode
>(Op
.getOperand(1)))
13971 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
13972 // have different LHS and RHS types.
13973 if (Op
.getOperand(0).getValueType() != Op
.getOperand(1).getValueType())
13975 RHSOps
.push_back(Op
.getOperand(1));
13978 return DAG
.getNode(Opcode
, DL
, VT
, DAG
.getBuildVector(VT
, DL
, LHSOps
),
13979 DAG
.getBuildVector(VT
, DL
, RHSOps
));
13982 // If we're concatenating a series of vector loads like
13983 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
13984 // Then we can turn this into a strided load by widening the vector elements
13985 // vlse32 p, stride=n
13986 static SDValue
performCONCAT_VECTORSCombine(SDNode
*N
, SelectionDAG
&DAG
,
13987 const RISCVSubtarget
&Subtarget
,
13988 const RISCVTargetLowering
&TLI
) {
13990 EVT VT
= N
->getValueType(0);
13992 // Only perform this combine on legal MVTs.
13993 if (!TLI
.isTypeLegal(VT
))
13996 // TODO: Potentially extend this to scalable vectors
13997 if (VT
.isScalableVector())
14000 auto *BaseLd
= dyn_cast
<LoadSDNode
>(N
->getOperand(0));
14001 if (!BaseLd
|| !BaseLd
->isSimple() || !ISD::isNormalLoad(BaseLd
) ||
14002 !SDValue(BaseLd
, 0).hasOneUse())
14005 EVT BaseLdVT
= BaseLd
->getValueType(0);
14007 // Go through the loads and check that they're strided
14008 SmallVector
<LoadSDNode
*> Lds
;
14009 Lds
.push_back(BaseLd
);
14010 Align Align
= BaseLd
->getAlign();
14011 for (SDValue Op
: N
->ops().drop_front()) {
14012 auto *Ld
= dyn_cast
<LoadSDNode
>(Op
);
14013 if (!Ld
|| !Ld
->isSimple() || !Op
.hasOneUse() ||
14014 Ld
->getChain() != BaseLd
->getChain() || !ISD::isNormalLoad(Ld
) ||
14015 Ld
->getValueType(0) != BaseLdVT
)
14020 // The common alignment is the most restrictive (smallest) of all the loads
14021 Align
= std::min(Align
, Ld
->getAlign());
14024 using PtrDiff
= std::pair
<std::variant
<int64_t, SDValue
>, bool>;
14025 auto GetPtrDiff
= [&DAG
](LoadSDNode
*Ld1
,
14026 LoadSDNode
*Ld2
) -> std::optional
<PtrDiff
> {
14027 // If the load ptrs can be decomposed into a common (Base + Index) with a
14028 // common constant stride, then return the constant stride.
14029 BaseIndexOffset BIO1
= BaseIndexOffset::match(Ld1
, DAG
);
14030 BaseIndexOffset BIO2
= BaseIndexOffset::match(Ld2
, DAG
);
14031 if (BIO1
.equalBaseIndex(BIO2
, DAG
))
14032 return {{BIO2
.getOffset() - BIO1
.getOffset(), false}};
14034 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
14035 SDValue P1
= Ld1
->getBasePtr();
14036 SDValue P2
= Ld2
->getBasePtr();
14037 if (P2
.getOpcode() == ISD::ADD
&& P2
.getOperand(0) == P1
)
14038 return {{P2
.getOperand(1), false}};
14039 if (P1
.getOpcode() == ISD::ADD
&& P1
.getOperand(0) == P2
)
14040 return {{P1
.getOperand(1), true}};
14042 return std::nullopt
;
14045 // Get the distance between the first and second loads
14046 auto BaseDiff
= GetPtrDiff(Lds
[0], Lds
[1]);
14050 // Check all the loads are the same distance apart
14051 for (auto *It
= Lds
.begin() + 1; It
!= Lds
.end() - 1; It
++)
14052 if (GetPtrDiff(*It
, *std::next(It
)) != BaseDiff
)
14055 // TODO: At this point, we've successfully matched a generalized gather
14056 // load. Maybe we should emit that, and then move the specialized
14057 // matchers above and below into a DAG combine?
14059 // Get the widened scalar type, e.g. v4i8 -> i64
14060 unsigned WideScalarBitWidth
=
14061 BaseLdVT
.getScalarSizeInBits() * BaseLdVT
.getVectorNumElements();
14062 MVT WideScalarVT
= MVT::getIntegerVT(WideScalarBitWidth
);
14064 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
14065 MVT WideVecVT
= MVT::getVectorVT(WideScalarVT
, N
->getNumOperands());
14066 if (!TLI
.isTypeLegal(WideVecVT
))
14069 // Check that the operation is legal
14070 if (!TLI
.isLegalStridedLoadStore(WideVecVT
, Align
))
14073 auto [StrideVariant
, MustNegateStride
] = *BaseDiff
;
14074 SDValue Stride
= std::holds_alternative
<SDValue
>(StrideVariant
)
14075 ? std::get
<SDValue
>(StrideVariant
)
14076 : DAG
.getConstant(std::get
<int64_t>(StrideVariant
), DL
,
14077 Lds
[0]->getOffset().getValueType());
14078 if (MustNegateStride
)
14079 Stride
= DAG
.getNegative(Stride
, DL
, Stride
.getValueType());
14081 SDVTList VTs
= DAG
.getVTList({WideVecVT
, MVT::Other
});
14083 DAG
.getTargetConstant(Intrinsic::riscv_masked_strided_load
, DL
,
14084 Subtarget
.getXLenVT());
14086 SDValue AllOneMask
=
14087 DAG
.getSplat(WideVecVT
.changeVectorElementType(MVT::i1
), DL
,
14088 DAG
.getConstant(1, DL
, MVT::i1
));
14090 SDValue Ops
[] = {BaseLd
->getChain(), IntID
, DAG
.getUNDEF(WideVecVT
),
14091 BaseLd
->getBasePtr(), Stride
, AllOneMask
};
14094 if (auto *ConstStride
= dyn_cast
<ConstantSDNode
>(Stride
);
14095 ConstStride
&& ConstStride
->getSExtValue() >= 0)
14096 // total size = (elsize * n) + (stride - elsize) * (n-1)
14097 // = elsize + stride * (n-1)
14098 MemSize
= WideScalarVT
.getSizeInBits() +
14099 ConstStride
->getSExtValue() * (N
->getNumOperands() - 1);
14101 // If Stride isn't constant, then we can't know how much it will load
14102 MemSize
= MemoryLocation::UnknownSize
;
14104 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
14105 BaseLd
->getPointerInfo(), BaseLd
->getMemOperand()->getFlags(), MemSize
,
14108 SDValue StridedLoad
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
,
14109 Ops
, WideVecVT
, MMO
);
14110 for (SDValue Ld
: N
->ops())
14111 DAG
.makeEquivalentMemoryOrdering(cast
<LoadSDNode
>(Ld
), StridedLoad
);
14113 return DAG
.getBitcast(VT
.getSimpleVT(), StridedLoad
);
14116 static SDValue
combineToVWMACC(SDNode
*N
, SelectionDAG
&DAG
,
14117 const RISCVSubtarget
&Subtarget
) {
14118 assert(N
->getOpcode() == RISCVISD::ADD_VL
);
14119 SDValue Addend
= N
->getOperand(0);
14120 SDValue MulOp
= N
->getOperand(1);
14121 SDValue AddMergeOp
= N
->getOperand(2);
14123 if (!AddMergeOp
.isUndef())
14126 auto IsVWMulOpc
= [](unsigned Opc
) {
14128 case RISCVISD::VWMUL_VL
:
14129 case RISCVISD::VWMULU_VL
:
14130 case RISCVISD::VWMULSU_VL
:
14137 if (!IsVWMulOpc(MulOp
.getOpcode()))
14138 std::swap(Addend
, MulOp
);
14140 if (!IsVWMulOpc(MulOp
.getOpcode()))
14143 SDValue MulMergeOp
= MulOp
.getOperand(2);
14145 if (!MulMergeOp
.isUndef())
14148 SDValue AddMask
= N
->getOperand(3);
14149 SDValue AddVL
= N
->getOperand(4);
14150 SDValue MulMask
= MulOp
.getOperand(3);
14151 SDValue MulVL
= MulOp
.getOperand(4);
14153 if (AddMask
!= MulMask
|| AddVL
!= MulVL
)
14156 unsigned Opc
= RISCVISD::VWMACC_VL
+ MulOp
.getOpcode() - RISCVISD::VWMUL_VL
;
14157 static_assert(RISCVISD::VWMACC_VL
+ 1 == RISCVISD::VWMACCU_VL
,
14158 "Unexpected opcode after VWMACC_VL");
14159 static_assert(RISCVISD::VWMACC_VL
+ 2 == RISCVISD::VWMACCSU_VL
,
14160 "Unexpected opcode after VWMACC_VL!");
14161 static_assert(RISCVISD::VWMUL_VL
+ 1 == RISCVISD::VWMULU_VL
,
14162 "Unexpected opcode after VWMUL_VL!");
14163 static_assert(RISCVISD::VWMUL_VL
+ 2 == RISCVISD::VWMULSU_VL
,
14164 "Unexpected opcode after VWMUL_VL!");
14167 EVT VT
= N
->getValueType(0);
14168 SDValue Ops
[] = {MulOp
.getOperand(0), MulOp
.getOperand(1), Addend
, AddMask
,
14170 return DAG
.getNode(Opc
, DL
, VT
, Ops
);
14173 static bool legalizeScatterGatherIndexType(SDLoc DL
, SDValue
&Index
,
14174 ISD::MemIndexType
&IndexType
,
14175 RISCVTargetLowering::DAGCombinerInfo
&DCI
) {
14176 if (!DCI
.isBeforeLegalize())
14179 SelectionDAG
&DAG
= DCI
.DAG
;
14181 DAG
.getMachineFunction().getSubtarget
<RISCVSubtarget
>().getXLenVT();
14183 const EVT IndexVT
= Index
.getValueType();
14185 // RISC-V indexed loads only support the "unsigned unscaled" addressing
14186 // mode, so anything else must be manually legalized.
14187 if (!isIndexTypeSigned(IndexType
))
14190 if (IndexVT
.getVectorElementType().bitsLT(XLenVT
)) {
14191 // Any index legalization should first promote to XLenVT, so we don't lose
14192 // bits when scaling. This may create an illegal index type so we let
14193 // LLVM's legalization take care of the splitting.
14194 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
14195 Index
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
,
14196 IndexVT
.changeVectorElementType(XLenVT
), Index
);
14198 IndexType
= ISD::UNSIGNED_SCALED
;
14202 /// Match the index vector of a scatter or gather node as the shuffle mask
14203 /// which performs the rearrangement if possible. Will only match if
14204 /// all lanes are touched, and thus replacing the scatter or gather with
14205 /// a unit strided access and shuffle is legal.
14206 static bool matchIndexAsShuffle(EVT VT
, SDValue Index
, SDValue Mask
,
14207 SmallVector
<int> &ShuffleMask
) {
14208 if (!ISD::isConstantSplatVectorAllOnes(Mask
.getNode()))
14210 if (!ISD::isBuildVectorOfConstantSDNodes(Index
.getNode()))
14213 const unsigned ElementSize
= VT
.getScalarStoreSize();
14214 const unsigned NumElems
= VT
.getVectorNumElements();
14216 // Create the shuffle mask and check all bits active
14217 assert(ShuffleMask
.empty());
14218 BitVector
ActiveLanes(NumElems
);
14219 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
++) {
14220 // TODO: We've found an active bit of UB, and could be
14221 // more aggressive here if desired.
14222 if (Index
->getOperand(i
)->isUndef())
14224 uint64_t C
= Index
->getConstantOperandVal(i
);
14225 if (C
% ElementSize
!= 0)
14227 C
= C
/ ElementSize
;
14230 ShuffleMask
.push_back(C
);
14231 ActiveLanes
.set(C
);
14233 return ActiveLanes
.all();
14236 /// Match the index of a gather or scatter operation as an operation
14237 /// with twice the element width and half the number of elements. This is
14238 /// generally profitable (if legal) because these operations are linear
14239 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
14240 /// come out ahead.
14241 static bool matchIndexAsWiderOp(EVT VT
, SDValue Index
, SDValue Mask
,
14242 Align BaseAlign
, const RISCVSubtarget
&ST
) {
14243 if (!ISD::isConstantSplatVectorAllOnes(Mask
.getNode()))
14245 if (!ISD::isBuildVectorOfConstantSDNodes(Index
.getNode()))
14248 // Attempt a doubling. If we can use a element type 4x or 8x in
14249 // size, this will happen via multiply iterations of the transform.
14250 const unsigned NumElems
= VT
.getVectorNumElements();
14251 if (NumElems
% 2 != 0)
14254 const unsigned ElementSize
= VT
.getScalarStoreSize();
14255 const unsigned WiderElementSize
= ElementSize
* 2;
14256 if (WiderElementSize
> ST
.getELen()/8)
14259 if (!ST
.enableUnalignedVectorMem() && BaseAlign
< WiderElementSize
)
14262 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
++) {
14263 // TODO: We've found an active bit of UB, and could be
14264 // more aggressive here if desired.
14265 if (Index
->getOperand(i
)->isUndef())
14267 // TODO: This offset check is too strict if we support fully
14268 // misaligned memory operations.
14269 uint64_t C
= Index
->getConstantOperandVal(i
);
14271 if (C
% WiderElementSize
!= 0)
14275 uint64_t Last
= Index
->getConstantOperandVal(i
-1);
14276 if (C
!= Last
+ ElementSize
)
14283 SDValue
RISCVTargetLowering::PerformDAGCombine(SDNode
*N
,
14284 DAGCombinerInfo
&DCI
) const {
14285 SelectionDAG
&DAG
= DCI
.DAG
;
14286 const MVT XLenVT
= Subtarget
.getXLenVT();
14289 // Helper to call SimplifyDemandedBits on an operand of N where only some low
14290 // bits are demanded. N will be added to the Worklist if it was not deleted.
14291 // Caller should return SDValue(N, 0) if this returns true.
14292 auto SimplifyDemandedLowBitsHelper
= [&](unsigned OpNo
, unsigned LowBits
) {
14293 SDValue Op
= N
->getOperand(OpNo
);
14294 APInt Mask
= APInt::getLowBitsSet(Op
.getValueSizeInBits(), LowBits
);
14295 if (!SimplifyDemandedBits(Op
, Mask
, DCI
))
14298 if (N
->getOpcode() != ISD::DELETED_NODE
)
14299 DCI
.AddToWorklist(N
);
14303 switch (N
->getOpcode()) {
14306 case RISCVISD::SplitF64
: {
14307 SDValue Op0
= N
->getOperand(0);
14308 // If the input to SplitF64 is just BuildPairF64 then the operation is
14309 // redundant. Instead, use BuildPairF64's operands directly.
14310 if (Op0
->getOpcode() == RISCVISD::BuildPairF64
)
14311 return DCI
.CombineTo(N
, Op0
.getOperand(0), Op0
.getOperand(1));
14313 if (Op0
->isUndef()) {
14314 SDValue Lo
= DAG
.getUNDEF(MVT::i32
);
14315 SDValue Hi
= DAG
.getUNDEF(MVT::i32
);
14316 return DCI
.CombineTo(N
, Lo
, Hi
);
14319 // It's cheaper to materialise two 32-bit integers than to load a double
14320 // from the constant pool and transfer it to integer registers through the
14322 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(Op0
)) {
14323 APInt V
= C
->getValueAPF().bitcastToAPInt();
14324 SDValue Lo
= DAG
.getConstant(V
.trunc(32), DL
, MVT::i32
);
14325 SDValue Hi
= DAG
.getConstant(V
.lshr(32).trunc(32), DL
, MVT::i32
);
14326 return DCI
.CombineTo(N
, Lo
, Hi
);
14329 // This is a target-specific version of a DAGCombine performed in
14330 // DAGCombiner::visitBITCAST. It performs the equivalent of:
14331 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14332 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14333 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
14334 !Op0
.getNode()->hasOneUse())
14336 SDValue NewSplitF64
=
14337 DAG
.getNode(RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
),
14338 Op0
.getOperand(0));
14339 SDValue Lo
= NewSplitF64
.getValue(0);
14340 SDValue Hi
= NewSplitF64
.getValue(1);
14341 APInt SignBit
= APInt::getSignMask(32);
14342 if (Op0
.getOpcode() == ISD::FNEG
) {
14343 SDValue NewHi
= DAG
.getNode(ISD::XOR
, DL
, MVT::i32
, Hi
,
14344 DAG
.getConstant(SignBit
, DL
, MVT::i32
));
14345 return DCI
.CombineTo(N
, Lo
, NewHi
);
14347 assert(Op0
.getOpcode() == ISD::FABS
);
14348 SDValue NewHi
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, Hi
,
14349 DAG
.getConstant(~SignBit
, DL
, MVT::i32
));
14350 return DCI
.CombineTo(N
, Lo
, NewHi
);
14352 case RISCVISD::SLLW
:
14353 case RISCVISD::SRAW
:
14354 case RISCVISD::SRLW
:
14355 case RISCVISD::RORW
:
14356 case RISCVISD::ROLW
: {
14357 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
14358 if (SimplifyDemandedLowBitsHelper(0, 32) ||
14359 SimplifyDemandedLowBitsHelper(1, 5))
14360 return SDValue(N
, 0);
14364 case RISCVISD::CLZW
:
14365 case RISCVISD::CTZW
: {
14366 // Only the lower 32 bits of the first operand are read
14367 if (SimplifyDemandedLowBitsHelper(0, 32))
14368 return SDValue(N
, 0);
14371 case RISCVISD::FMV_W_X_RV64
: {
14372 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
14373 // conversion is unnecessary and can be replaced with the
14374 // FMV_X_ANYEXTW_RV64 operand.
14375 SDValue Op0
= N
->getOperand(0);
14376 if (Op0
.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
)
14377 return Op0
.getOperand(0);
14380 case RISCVISD::FMV_X_ANYEXTH
:
14381 case RISCVISD::FMV_X_ANYEXTW_RV64
: {
14383 SDValue Op0
= N
->getOperand(0);
14384 MVT VT
= N
->getSimpleValueType(0);
14385 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
14386 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
14387 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
14388 if ((N
->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
&&
14389 Op0
->getOpcode() == RISCVISD::FMV_W_X_RV64
) ||
14390 (N
->getOpcode() == RISCVISD::FMV_X_ANYEXTH
&&
14391 Op0
->getOpcode() == RISCVISD::FMV_H_X
)) {
14392 assert(Op0
.getOperand(0).getValueType() == VT
&&
14393 "Unexpected value type!");
14394 return Op0
.getOperand(0);
14397 // This is a target-specific version of a DAGCombine performed in
14398 // DAGCombiner::visitBITCAST. It performs the equivalent of:
14399 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14400 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14401 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
14402 !Op0
.getNode()->hasOneUse())
14404 SDValue NewFMV
= DAG
.getNode(N
->getOpcode(), DL
, VT
, Op0
.getOperand(0));
14405 unsigned FPBits
= N
->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
? 32 : 16;
14406 APInt SignBit
= APInt::getSignMask(FPBits
).sext(VT
.getSizeInBits());
14407 if (Op0
.getOpcode() == ISD::FNEG
)
14408 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewFMV
,
14409 DAG
.getConstant(SignBit
, DL
, VT
));
14411 assert(Op0
.getOpcode() == ISD::FABS
);
14412 return DAG
.getNode(ISD::AND
, DL
, VT
, NewFMV
,
14413 DAG
.getConstant(~SignBit
, DL
, VT
));
14416 return performADDCombine(N
, DAG
, Subtarget
);
14418 return performSUBCombine(N
, DAG
, Subtarget
);
14420 return performANDCombine(N
, DCI
, Subtarget
);
14422 return performORCombine(N
, DCI
, Subtarget
);
14424 return performXORCombine(N
, DAG
, Subtarget
);
14431 case ISD::FMINNUM
: {
14432 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
14434 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
14439 return performSETCCCombine(N
, DAG
, Subtarget
);
14440 case ISD::SIGN_EXTEND_INREG
:
14441 return performSIGN_EXTEND_INREGCombine(N
, DAG
, Subtarget
);
14442 case ISD::ZERO_EXTEND
:
14443 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
14444 // type legalization. This is safe because fp_to_uint produces poison if
14446 if (N
->getValueType(0) == MVT::i64
&& Subtarget
.is64Bit()) {
14447 SDValue Src
= N
->getOperand(0);
14448 if (Src
.getOpcode() == ISD::FP_TO_UINT
&&
14449 isTypeLegal(Src
.getOperand(0).getValueType()))
14450 return DAG
.getNode(ISD::FP_TO_UINT
, SDLoc(N
), MVT::i64
,
14451 Src
.getOperand(0));
14452 if (Src
.getOpcode() == ISD::STRICT_FP_TO_UINT
&& Src
.hasOneUse() &&
14453 isTypeLegal(Src
.getOperand(1).getValueType())) {
14454 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Other
);
14455 SDValue Res
= DAG
.getNode(ISD::STRICT_FP_TO_UINT
, SDLoc(N
), VTs
,
14456 Src
.getOperand(0), Src
.getOperand(1));
14457 DCI
.CombineTo(N
, Res
);
14458 DAG
.ReplaceAllUsesOfValueWith(Src
.getValue(1), Res
.getValue(1));
14459 DCI
.recursivelyDeleteUnusedNodes(Src
.getNode());
14460 return SDValue(N
, 0); // Return N so it doesn't get rechecked.
14464 case RISCVISD::TRUNCATE_VECTOR_VL
: {
14465 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
14466 // This would be benefit for the cases where X and Y are both the same value
14467 // type of low precision vectors. Since the truncate would be lowered into
14468 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
14469 // restriction, such pattern would be expanded into a series of "vsetvli"
14470 // and "vnsrl" instructions later to reach this point.
14471 auto IsTruncNode
= [](SDValue V
) {
14472 if (V
.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL
)
14474 SDValue VL
= V
.getOperand(2);
14475 auto *C
= dyn_cast
<ConstantSDNode
>(VL
);
14476 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
14477 bool IsVLMAXForVMSET
= (C
&& C
->isAllOnes()) ||
14478 (isa
<RegisterSDNode
>(VL
) &&
14479 cast
<RegisterSDNode
>(VL
)->getReg() == RISCV::X0
);
14480 return V
.getOperand(1).getOpcode() == RISCVISD::VMSET_VL
&&
14484 SDValue Op
= N
->getOperand(0);
14486 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
14487 // to distinguish such pattern.
14488 while (IsTruncNode(Op
)) {
14489 if (!Op
.hasOneUse())
14491 Op
= Op
.getOperand(0);
14494 if (Op
.getOpcode() == ISD::SRA
&& Op
.hasOneUse()) {
14495 SDValue N0
= Op
.getOperand(0);
14496 SDValue N1
= Op
.getOperand(1);
14497 if (N0
.getOpcode() == ISD::SIGN_EXTEND
&& N0
.hasOneUse() &&
14498 N1
.getOpcode() == ISD::ZERO_EXTEND
&& N1
.hasOneUse()) {
14499 SDValue N00
= N0
.getOperand(0);
14500 SDValue N10
= N1
.getOperand(0);
14501 if (N00
.getValueType().isVector() &&
14502 N00
.getValueType() == N10
.getValueType() &&
14503 N
->getValueType(0) == N10
.getValueType()) {
14504 unsigned MaxShAmt
= N10
.getValueType().getScalarSizeInBits() - 1;
14505 SDValue SMin
= DAG
.getNode(
14506 ISD::SMIN
, SDLoc(N1
), N
->getValueType(0), N10
,
14507 DAG
.getConstant(MaxShAmt
, SDLoc(N1
), N
->getValueType(0)));
14508 return DAG
.getNode(ISD::SRA
, SDLoc(N
), N
->getValueType(0), N00
, SMin
);
14514 case ISD::TRUNCATE
:
14515 return performTRUNCATECombine(N
, DAG
, Subtarget
);
14517 return performSELECTCombine(N
, DAG
, Subtarget
);
14518 case RISCVISD::CZERO_EQZ
:
14519 case RISCVISD::CZERO_NEZ
:
14520 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
14521 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
14522 if (N
->getOperand(1).getOpcode() == ISD::XOR
&&
14523 isOneConstant(N
->getOperand(1).getOperand(1))) {
14524 SDValue Cond
= N
->getOperand(1).getOperand(0);
14525 APInt Mask
= APInt::getBitsSetFrom(Cond
.getValueSizeInBits(), 1);
14526 if (DAG
.MaskedValueIsZero(Cond
, Mask
)) {
14527 unsigned NewOpc
= N
->getOpcode() == RISCVISD::CZERO_EQZ
14528 ? RISCVISD::CZERO_NEZ
14529 : RISCVISD::CZERO_EQZ
;
14530 return DAG
.getNode(NewOpc
, SDLoc(N
), N
->getValueType(0),
14531 N
->getOperand(0), Cond
);
14536 case RISCVISD::SELECT_CC
: {
14538 SDValue LHS
= N
->getOperand(0);
14539 SDValue RHS
= N
->getOperand(1);
14540 SDValue CC
= N
->getOperand(2);
14541 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
14542 SDValue TrueV
= N
->getOperand(3);
14543 SDValue FalseV
= N
->getOperand(4);
14545 EVT VT
= N
->getValueType(0);
14547 // If the True and False values are the same, we don't need a select_cc.
14548 if (TrueV
== FalseV
)
14551 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
14552 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
14553 if (!Subtarget
.hasShortForwardBranchOpt() && isa
<ConstantSDNode
>(TrueV
) &&
14554 isa
<ConstantSDNode
>(FalseV
) && isNullConstant(RHS
) &&
14555 (CCVal
== ISD::CondCode::SETLT
|| CCVal
== ISD::CondCode::SETGE
)) {
14556 if (CCVal
== ISD::CondCode::SETGE
)
14557 std::swap(TrueV
, FalseV
);
14559 int64_t TrueSImm
= cast
<ConstantSDNode
>(TrueV
)->getSExtValue();
14560 int64_t FalseSImm
= cast
<ConstantSDNode
>(FalseV
)->getSExtValue();
14561 // Only handle simm12, if it is not in this range, it can be considered as
14563 if (isInt
<12>(TrueSImm
) && isInt
<12>(FalseSImm
) &&
14564 isInt
<12>(TrueSImm
- FalseSImm
)) {
14566 DAG
.getNode(ISD::SRA
, DL
, VT
, LHS
,
14567 DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
));
14569 DAG
.getNode(ISD::AND
, DL
, VT
, SRA
,
14570 DAG
.getConstant(TrueSImm
- FalseSImm
, DL
, VT
));
14571 return DAG
.getNode(ISD::ADD
, DL
, VT
, AND
, FalseV
);
14574 if (CCVal
== ISD::CondCode::SETGE
)
14575 std::swap(TrueV
, FalseV
);
14578 if (combine_CC(LHS
, RHS
, CC
, DL
, DAG
, Subtarget
))
14579 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, N
->getValueType(0),
14580 {LHS
, RHS
, CC
, TrueV
, FalseV
});
14582 if (!Subtarget
.hasShortForwardBranchOpt()) {
14583 // (select c, -1, y) -> -c | y
14584 if (isAllOnesConstant(TrueV
)) {
14585 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, CCVal
);
14586 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
14587 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, FalseV
);
14589 // (select c, y, -1) -> -!c | y
14590 if (isAllOnesConstant(FalseV
)) {
14592 DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::getSetCCInverse(CCVal
, VT
));
14593 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
14594 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, TrueV
);
14597 // (select c, 0, y) -> -!c & y
14598 if (isNullConstant(TrueV
)) {
14600 DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::getSetCCInverse(CCVal
, VT
));
14601 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
14602 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, FalseV
);
14604 // (select c, y, 0) -> -c & y
14605 if (isNullConstant(FalseV
)) {
14606 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, CCVal
);
14607 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
14608 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, TrueV
);
14610 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
14611 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
14612 if (((isOneConstant(FalseV
) && LHS
== TrueV
&&
14613 CCVal
== ISD::CondCode::SETNE
) ||
14614 (isOneConstant(TrueV
) && LHS
== FalseV
&&
14615 CCVal
== ISD::CondCode::SETEQ
)) &&
14616 isNullConstant(RHS
)) {
14617 // freeze it to be safe.
14618 LHS
= DAG
.getFreeze(LHS
);
14619 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::CondCode::SETEQ
);
14620 return DAG
.getNode(ISD::ADD
, DL
, VT
, LHS
, C
);
14624 // If both true/false are an xor with 1, pull through the select.
14625 // This can occur after op legalization if both operands are setccs that
14626 // require an xor to invert.
14627 // FIXME: Generalize to other binary ops with identical operand?
14628 if (TrueV
.getOpcode() == ISD::XOR
&& FalseV
.getOpcode() == ISD::XOR
&&
14629 TrueV
.getOperand(1) == FalseV
.getOperand(1) &&
14630 isOneConstant(TrueV
.getOperand(1)) &&
14631 TrueV
.hasOneUse() && FalseV
.hasOneUse()) {
14632 SDValue NewSel
= DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, CC
,
14633 TrueV
.getOperand(0), FalseV
.getOperand(0));
14634 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewSel
, TrueV
.getOperand(1));
14639 case RISCVISD::BR_CC
: {
14640 SDValue LHS
= N
->getOperand(1);
14641 SDValue RHS
= N
->getOperand(2);
14642 SDValue CC
= N
->getOperand(3);
14645 if (combine_CC(LHS
, RHS
, CC
, DL
, DAG
, Subtarget
))
14646 return DAG
.getNode(RISCVISD::BR_CC
, DL
, N
->getValueType(0),
14647 N
->getOperand(0), LHS
, RHS
, CC
, N
->getOperand(4));
14651 case ISD::BITREVERSE
:
14652 return performBITREVERSECombine(N
, DAG
, Subtarget
);
14653 case ISD::FP_TO_SINT
:
14654 case ISD::FP_TO_UINT
:
14655 return performFP_TO_INTCombine(N
, DCI
, Subtarget
);
14656 case ISD::FP_TO_SINT_SAT
:
14657 case ISD::FP_TO_UINT_SAT
:
14658 return performFP_TO_INT_SATCombine(N
, DCI
, Subtarget
);
14659 case ISD::FCOPYSIGN
: {
14660 EVT VT
= N
->getValueType(0);
14661 if (!VT
.isVector())
14663 // There is a form of VFSGNJ which injects the negated sign of its second
14664 // operand. Try and bubble any FNEG up after the extend/round to produce
14665 // this optimized pattern. Avoid modifying cases where FP_ROUND and
14667 SDValue In2
= N
->getOperand(1);
14668 // Avoid cases where the extend/round has multiple uses, as duplicating
14669 // those is typically more expensive than removing a fneg.
14670 if (!In2
.hasOneUse())
14672 if (In2
.getOpcode() != ISD::FP_EXTEND
&&
14673 (In2
.getOpcode() != ISD::FP_ROUND
|| In2
.getConstantOperandVal(1) != 0))
14675 In2
= In2
.getOperand(0);
14676 if (In2
.getOpcode() != ISD::FNEG
)
14679 SDValue NewFPExtRound
= DAG
.getFPExtendOrRound(In2
.getOperand(0), DL
, VT
);
14680 return DAG
.getNode(ISD::FCOPYSIGN
, DL
, VT
, N
->getOperand(0),
14681 DAG
.getNode(ISD::FNEG
, DL
, VT
, NewFPExtRound
));
14683 case ISD::MGATHER
: {
14684 const auto *MGN
= dyn_cast
<MaskedGatherSDNode
>(N
);
14685 const EVT VT
= N
->getValueType(0);
14686 SDValue Index
= MGN
->getIndex();
14687 SDValue ScaleOp
= MGN
->getScale();
14688 ISD::MemIndexType IndexType
= MGN
->getIndexType();
14689 assert(!MGN
->isIndexScaled() &&
14690 "Scaled gather/scatter should not be formed");
14693 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
14694 return DAG
.getMaskedGather(
14695 N
->getVTList(), MGN
->getMemoryVT(), DL
,
14696 {MGN
->getChain(), MGN
->getPassThru(), MGN
->getMask(),
14697 MGN
->getBasePtr(), Index
, ScaleOp
},
14698 MGN
->getMemOperand(), IndexType
, MGN
->getExtensionType());
14700 if (narrowIndex(Index
, IndexType
, DAG
))
14701 return DAG
.getMaskedGather(
14702 N
->getVTList(), MGN
->getMemoryVT(), DL
,
14703 {MGN
->getChain(), MGN
->getPassThru(), MGN
->getMask(),
14704 MGN
->getBasePtr(), Index
, ScaleOp
},
14705 MGN
->getMemOperand(), IndexType
, MGN
->getExtensionType());
14707 if (Index
.getOpcode() == ISD::BUILD_VECTOR
&&
14708 MGN
->getExtensionType() == ISD::NON_EXTLOAD
) {
14709 if (std::optional
<VIDSequence
> SimpleVID
= isSimpleVIDSequence(Index
);
14710 SimpleVID
&& SimpleVID
->StepDenominator
== 1) {
14711 const int64_t StepNumerator
= SimpleVID
->StepNumerator
;
14712 const int64_t Addend
= SimpleVID
->Addend
;
14714 // Note: We don't need to check alignment here since (by assumption
14715 // from the existance of the gather), our offsets must be sufficiently
14718 const EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
14719 assert(MGN
->getBasePtr()->getValueType(0) == PtrVT
);
14720 assert(IndexType
== ISD::UNSIGNED_SCALED
);
14721 SDValue BasePtr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, MGN
->getBasePtr(),
14722 DAG
.getConstant(Addend
, DL
, PtrVT
));
14724 SDVTList VTs
= DAG
.getVTList({VT
, MVT::Other
});
14726 DAG
.getTargetConstant(Intrinsic::riscv_masked_strided_load
, DL
,
14729 {MGN
->getChain(), IntID
, MGN
->getPassThru(), BasePtr
,
14730 DAG
.getConstant(StepNumerator
, DL
, XLenVT
), MGN
->getMask()};
14731 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
,
14732 Ops
, VT
, MGN
->getMemOperand());
14736 SmallVector
<int> ShuffleMask
;
14737 if (MGN
->getExtensionType() == ISD::NON_EXTLOAD
&&
14738 matchIndexAsShuffle(VT
, Index
, MGN
->getMask(), ShuffleMask
)) {
14739 SDValue Load
= DAG
.getMaskedLoad(VT
, DL
, MGN
->getChain(),
14740 MGN
->getBasePtr(), DAG
.getUNDEF(XLenVT
),
14741 MGN
->getMask(), DAG
.getUNDEF(VT
),
14742 MGN
->getMemoryVT(), MGN
->getMemOperand(),
14743 ISD::UNINDEXED
, ISD::NON_EXTLOAD
);
14745 DAG
.getVectorShuffle(VT
, DL
, Load
, DAG
.getUNDEF(VT
), ShuffleMask
);
14746 return DAG
.getMergeValues({Shuffle
, Load
.getValue(1)}, DL
);
14749 if (MGN
->getExtensionType() == ISD::NON_EXTLOAD
&&
14750 matchIndexAsWiderOp(VT
, Index
, MGN
->getMask(),
14751 MGN
->getMemOperand()->getBaseAlign(), Subtarget
)) {
14752 SmallVector
<SDValue
> NewIndices
;
14753 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
+= 2)
14754 NewIndices
.push_back(Index
.getOperand(i
));
14755 EVT IndexVT
= Index
.getValueType()
14756 .getHalfNumVectorElementsVT(*DAG
.getContext());
14757 Index
= DAG
.getBuildVector(IndexVT
, DL
, NewIndices
);
14759 unsigned ElementSize
= VT
.getScalarStoreSize();
14760 EVT WideScalarVT
= MVT::getIntegerVT(ElementSize
* 8 * 2);
14761 auto EltCnt
= VT
.getVectorElementCount();
14762 assert(EltCnt
.isKnownEven() && "Splitting vector, but not in half!");
14763 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), WideScalarVT
,
14764 EltCnt
.divideCoefficientBy(2));
14765 SDValue Passthru
= DAG
.getBitcast(WideVT
, MGN
->getPassThru());
14766 EVT MaskVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::i1
,
14767 EltCnt
.divideCoefficientBy(2));
14768 SDValue Mask
= DAG
.getSplat(MaskVT
, DL
, DAG
.getConstant(1, DL
, MVT::i1
));
14771 DAG
.getMaskedGather(DAG
.getVTList(WideVT
, MVT::Other
), WideVT
, DL
,
14772 {MGN
->getChain(), Passthru
, Mask
, MGN
->getBasePtr(),
14774 MGN
->getMemOperand(), IndexType
, ISD::NON_EXTLOAD
);
14775 SDValue Result
= DAG
.getBitcast(VT
, Gather
.getValue(0));
14776 return DAG
.getMergeValues({Result
, Gather
.getValue(1)}, DL
);
14780 case ISD::MSCATTER
:{
14781 const auto *MSN
= dyn_cast
<MaskedScatterSDNode
>(N
);
14782 SDValue Index
= MSN
->getIndex();
14783 SDValue ScaleOp
= MSN
->getScale();
14784 ISD::MemIndexType IndexType
= MSN
->getIndexType();
14785 assert(!MSN
->isIndexScaled() &&
14786 "Scaled gather/scatter should not be formed");
14789 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
14790 return DAG
.getMaskedScatter(
14791 N
->getVTList(), MSN
->getMemoryVT(), DL
,
14792 {MSN
->getChain(), MSN
->getValue(), MSN
->getMask(), MSN
->getBasePtr(),
14794 MSN
->getMemOperand(), IndexType
, MSN
->isTruncatingStore());
14796 if (narrowIndex(Index
, IndexType
, DAG
))
14797 return DAG
.getMaskedScatter(
14798 N
->getVTList(), MSN
->getMemoryVT(), DL
,
14799 {MSN
->getChain(), MSN
->getValue(), MSN
->getMask(), MSN
->getBasePtr(),
14801 MSN
->getMemOperand(), IndexType
, MSN
->isTruncatingStore());
14803 EVT VT
= MSN
->getValue()->getValueType(0);
14804 SmallVector
<int> ShuffleMask
;
14805 if (!MSN
->isTruncatingStore() &&
14806 matchIndexAsShuffle(VT
, Index
, MSN
->getMask(), ShuffleMask
)) {
14807 SDValue Shuffle
= DAG
.getVectorShuffle(VT
, DL
, MSN
->getValue(),
14808 DAG
.getUNDEF(VT
), ShuffleMask
);
14809 return DAG
.getMaskedStore(MSN
->getChain(), DL
, Shuffle
, MSN
->getBasePtr(),
14810 DAG
.getUNDEF(XLenVT
), MSN
->getMask(),
14811 MSN
->getMemoryVT(), MSN
->getMemOperand(),
14812 ISD::UNINDEXED
, false);
14816 case ISD::VP_GATHER
: {
14817 const auto *VPGN
= dyn_cast
<VPGatherSDNode
>(N
);
14818 SDValue Index
= VPGN
->getIndex();
14819 SDValue ScaleOp
= VPGN
->getScale();
14820 ISD::MemIndexType IndexType
= VPGN
->getIndexType();
14821 assert(!VPGN
->isIndexScaled() &&
14822 "Scaled gather/scatter should not be formed");
14825 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
14826 return DAG
.getGatherVP(N
->getVTList(), VPGN
->getMemoryVT(), DL
,
14827 {VPGN
->getChain(), VPGN
->getBasePtr(), Index
,
14828 ScaleOp
, VPGN
->getMask(),
14829 VPGN
->getVectorLength()},
14830 VPGN
->getMemOperand(), IndexType
);
14832 if (narrowIndex(Index
, IndexType
, DAG
))
14833 return DAG
.getGatherVP(N
->getVTList(), VPGN
->getMemoryVT(), DL
,
14834 {VPGN
->getChain(), VPGN
->getBasePtr(), Index
,
14835 ScaleOp
, VPGN
->getMask(),
14836 VPGN
->getVectorLength()},
14837 VPGN
->getMemOperand(), IndexType
);
14841 case ISD::VP_SCATTER
: {
14842 const auto *VPSN
= dyn_cast
<VPScatterSDNode
>(N
);
14843 SDValue Index
= VPSN
->getIndex();
14844 SDValue ScaleOp
= VPSN
->getScale();
14845 ISD::MemIndexType IndexType
= VPSN
->getIndexType();
14846 assert(!VPSN
->isIndexScaled() &&
14847 "Scaled gather/scatter should not be formed");
14850 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
14851 return DAG
.getScatterVP(N
->getVTList(), VPSN
->getMemoryVT(), DL
,
14852 {VPSN
->getChain(), VPSN
->getValue(),
14853 VPSN
->getBasePtr(), Index
, ScaleOp
,
14854 VPSN
->getMask(), VPSN
->getVectorLength()},
14855 VPSN
->getMemOperand(), IndexType
);
14857 if (narrowIndex(Index
, IndexType
, DAG
))
14858 return DAG
.getScatterVP(N
->getVTList(), VPSN
->getMemoryVT(), DL
,
14859 {VPSN
->getChain(), VPSN
->getValue(),
14860 VPSN
->getBasePtr(), Index
, ScaleOp
,
14861 VPSN
->getMask(), VPSN
->getVectorLength()},
14862 VPSN
->getMemOperand(), IndexType
);
14865 case RISCVISD::SRA_VL
:
14866 case RISCVISD::SRL_VL
:
14867 case RISCVISD::SHL_VL
: {
14868 SDValue ShAmt
= N
->getOperand(1);
14869 if (ShAmt
.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
) {
14870 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
14872 SDValue VL
= N
->getOperand(4);
14873 EVT VT
= N
->getValueType(0);
14874 ShAmt
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
14875 ShAmt
.getOperand(1), VL
);
14876 return DAG
.getNode(N
->getOpcode(), DL
, VT
, N
->getOperand(0), ShAmt
,
14877 N
->getOperand(2), N
->getOperand(3), N
->getOperand(4));
14882 if (SDValue V
= performSRACombine(N
, DAG
, Subtarget
))
14887 SDValue ShAmt
= N
->getOperand(1);
14888 if (ShAmt
.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
) {
14889 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
14891 EVT VT
= N
->getValueType(0);
14892 ShAmt
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
14893 ShAmt
.getOperand(1),
14894 DAG
.getRegister(RISCV::X0
, Subtarget
.getXLenVT()));
14895 return DAG
.getNode(N
->getOpcode(), DL
, VT
, N
->getOperand(0), ShAmt
);
14899 case RISCVISD::ADD_VL
:
14900 if (SDValue V
= combineBinOp_VLToVWBinOp_VL(N
, DCI
))
14902 return combineToVWMACC(N
, DAG
, Subtarget
);
14903 case RISCVISD::SUB_VL
:
14904 case RISCVISD::VWADD_W_VL
:
14905 case RISCVISD::VWADDU_W_VL
:
14906 case RISCVISD::VWSUB_W_VL
:
14907 case RISCVISD::VWSUBU_W_VL
:
14908 case RISCVISD::MUL_VL
:
14909 return combineBinOp_VLToVWBinOp_VL(N
, DCI
);
14910 case RISCVISD::VFMADD_VL
:
14911 case RISCVISD::VFNMADD_VL
:
14912 case RISCVISD::VFMSUB_VL
:
14913 case RISCVISD::VFNMSUB_VL
:
14914 case RISCVISD::STRICT_VFMADD_VL
:
14915 case RISCVISD::STRICT_VFNMADD_VL
:
14916 case RISCVISD::STRICT_VFMSUB_VL
:
14917 case RISCVISD::STRICT_VFNMSUB_VL
:
14918 return performVFMADD_VLCombine(N
, DAG
, Subtarget
);
14919 case RISCVISD::FMUL_VL
:
14920 return performVFMUL_VLCombine(N
, DAG
, Subtarget
);
14921 case RISCVISD::FADD_VL
:
14922 case RISCVISD::FSUB_VL
:
14923 return performFADDSUB_VLCombine(N
, DAG
, Subtarget
);
14926 if (DCI
.isAfterLegalizeDAG())
14927 if (SDValue V
= performMemPairCombine(N
, DCI
))
14930 if (N
->getOpcode() != ISD::STORE
)
14933 auto *Store
= cast
<StoreSDNode
>(N
);
14934 SDValue Chain
= Store
->getChain();
14935 EVT MemVT
= Store
->getMemoryVT();
14936 SDValue Val
= Store
->getValue();
14939 bool IsScalarizable
=
14940 MemVT
.isFixedLengthVector() && ISD::isNormalStore(Store
) &&
14941 Store
->isSimple() &&
14942 MemVT
.getVectorElementType().bitsLE(Subtarget
.getXLenVT()) &&
14943 isPowerOf2_64(MemVT
.getSizeInBits()) &&
14944 MemVT
.getSizeInBits() <= Subtarget
.getXLen();
14946 // If sufficiently aligned we can scalarize stores of constant vectors of
14947 // any power-of-two size up to XLen bits, provided that they aren't too
14948 // expensive to materialize.
14949 // vsetivli zero, 2, e8, m1, ta, ma
14951 // vse64.v v8, (a0)
14955 if (DCI
.isBeforeLegalize() && IsScalarizable
&&
14956 ISD::isBuildVectorOfConstantSDNodes(Val
.getNode())) {
14957 // Get the constant vector bits
14958 APInt
NewC(Val
.getValueSizeInBits(), 0);
14959 uint64_t EltSize
= Val
.getScalarValueSizeInBits();
14960 for (unsigned i
= 0; i
< Val
.getNumOperands(); i
++) {
14961 if (Val
.getOperand(i
).isUndef())
14963 NewC
.insertBits(Val
.getConstantOperandAPInt(i
).trunc(EltSize
),
14966 MVT NewVT
= MVT::getIntegerVT(MemVT
.getSizeInBits());
14968 if (RISCVMatInt::getIntMatCost(NewC
, Subtarget
.getXLen(),
14969 Subtarget
.getFeatureBits(), true) <= 2 &&
14970 allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
14971 NewVT
, *Store
->getMemOperand())) {
14972 SDValue NewV
= DAG
.getConstant(NewC
, DL
, NewVT
);
14973 return DAG
.getStore(Chain
, DL
, NewV
, Store
->getBasePtr(),
14974 Store
->getPointerInfo(), Store
->getOriginalAlign(),
14975 Store
->getMemOperand()->getFlags());
14979 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
14980 // vsetivli zero, 2, e16, m1, ta, ma
14981 // vle16.v v8, (a0)
14982 // vse16.v v8, (a1)
14983 if (auto *L
= dyn_cast
<LoadSDNode
>(Val
);
14984 L
&& DCI
.isBeforeLegalize() && IsScalarizable
&& L
->isSimple() &&
14985 L
->hasNUsesOfValue(1, 0) && L
->hasNUsesOfValue(1, 1) &&
14986 Store
->getChain() == SDValue(L
, 1) && ISD::isNormalLoad(L
) &&
14987 L
->getMemoryVT() == MemVT
) {
14988 MVT NewVT
= MVT::getIntegerVT(MemVT
.getSizeInBits());
14989 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
14990 NewVT
, *Store
->getMemOperand()) &&
14991 allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
14992 NewVT
, *L
->getMemOperand())) {
14993 SDValue NewL
= DAG
.getLoad(NewVT
, DL
, L
->getChain(), L
->getBasePtr(),
14994 L
->getPointerInfo(), L
->getOriginalAlign(),
14995 L
->getMemOperand()->getFlags());
14996 return DAG
.getStore(Chain
, DL
, NewL
, Store
->getBasePtr(),
14997 Store
->getPointerInfo(), Store
->getOriginalAlign(),
14998 Store
->getMemOperand()->getFlags());
15002 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
15003 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
15004 // any illegal types.
15005 if (Val
.getOpcode() == RISCVISD::VMV_X_S
||
15006 (DCI
.isAfterLegalizeDAG() &&
15007 Val
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
15008 isNullConstant(Val
.getOperand(1)))) {
15009 SDValue Src
= Val
.getOperand(0);
15010 MVT VecVT
= Src
.getSimpleValueType();
15011 // VecVT should be scalable and memory VT should match the element type.
15012 if (VecVT
.isScalableVector() &&
15013 MemVT
== VecVT
.getVectorElementType()) {
15015 MVT MaskVT
= getMaskTypeFor(VecVT
);
15016 return DAG
.getStoreVP(
15017 Store
->getChain(), DL
, Src
, Store
->getBasePtr(), Store
->getOffset(),
15018 DAG
.getConstant(1, DL
, MaskVT
),
15019 DAG
.getConstant(1, DL
, Subtarget
.getXLenVT()), MemVT
,
15020 Store
->getMemOperand(), Store
->getAddressingMode(),
15021 Store
->isTruncatingStore(), /*IsCompress*/ false);
15027 case ISD::SPLAT_VECTOR
: {
15028 EVT VT
= N
->getValueType(0);
15029 // Only perform this combine on legal MVT types.
15030 if (!isTypeLegal(VT
))
15032 if (auto Gather
= matchSplatAsGather(N
->getOperand(0), VT
.getSimpleVT(), N
,
15037 case ISD::BUILD_VECTOR
:
15038 if (SDValue V
= performBUILD_VECTORCombine(N
, DAG
, Subtarget
, *this))
15041 case ISD::CONCAT_VECTORS
:
15042 if (SDValue V
= performCONCAT_VECTORSCombine(N
, DAG
, Subtarget
, *this))
15045 case RISCVISD::VFMV_V_F_VL
: {
15046 const MVT VT
= N
->getSimpleValueType(0);
15047 SDValue Passthru
= N
->getOperand(0);
15048 SDValue Scalar
= N
->getOperand(1);
15049 SDValue VL
= N
->getOperand(2);
15051 // If VL is 1, we can use vfmv.s.f.
15052 if (isOneConstant(VL
))
15053 return DAG
.getNode(RISCVISD::VFMV_S_F_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15056 case RISCVISD::VMV_V_X_VL
: {
15057 const MVT VT
= N
->getSimpleValueType(0);
15058 SDValue Passthru
= N
->getOperand(0);
15059 SDValue Scalar
= N
->getOperand(1);
15060 SDValue VL
= N
->getOperand(2);
15062 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
15064 unsigned ScalarSize
= Scalar
.getValueSizeInBits();
15065 unsigned EltWidth
= VT
.getScalarSizeInBits();
15066 if (ScalarSize
> EltWidth
&& Passthru
.isUndef())
15067 if (SimplifyDemandedLowBitsHelper(1, EltWidth
))
15068 return SDValue(N
, 0);
15070 // If VL is 1 and the scalar value won't benefit from immediate, we can
15072 ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(Scalar
);
15073 if (isOneConstant(VL
) &&
15074 (!Const
|| Const
->isZero() ||
15075 !Const
->getAPIntValue().sextOrTrunc(EltWidth
).isSignedIntN(5)))
15076 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15080 case RISCVISD::VFMV_S_F_VL
: {
15081 SDValue Src
= N
->getOperand(1);
15082 // Try to remove vector->scalar->vector if the scalar->vector is inserting
15083 // into an undef vector.
15084 // TODO: Could use a vslide or vmv.v.v for non-undef.
15085 if (N
->getOperand(0).isUndef() &&
15086 Src
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
15087 isNullConstant(Src
.getOperand(1)) &&
15088 Src
.getOperand(0).getValueType().isScalableVector()) {
15089 EVT VT
= N
->getValueType(0);
15090 EVT SrcVT
= Src
.getOperand(0).getValueType();
15091 assert(SrcVT
.getVectorElementType() == VT
.getVectorElementType());
15092 // Widths match, just return the original vector.
15094 return Src
.getOperand(0);
15095 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
15099 case RISCVISD::VMV_S_X_VL
: {
15100 const MVT VT
= N
->getSimpleValueType(0);
15101 SDValue Passthru
= N
->getOperand(0);
15102 SDValue Scalar
= N
->getOperand(1);
15103 SDValue VL
= N
->getOperand(2);
15105 // Use M1 or smaller to avoid over constraining register allocation
15106 const MVT M1VT
= getLMUL1VT(VT
);
15107 if (M1VT
.bitsLT(VT
)) {
15108 SDValue M1Passthru
=
15109 DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, M1VT
, Passthru
,
15110 DAG
.getVectorIdxConstant(0, DL
));
15112 DAG
.getNode(N
->getOpcode(), DL
, M1VT
, M1Passthru
, Scalar
, VL
);
15113 Result
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Passthru
, Result
,
15114 DAG
.getConstant(0, DL
, XLenVT
));
15118 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
15119 // higher would involve overly constraining the register allocator for
15121 if (ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(Scalar
);
15122 Const
&& !Const
->isZero() && isInt
<5>(Const
->getSExtValue()) &&
15123 VT
.bitsLE(getLMUL1VT(VT
)) && Passthru
.isUndef())
15124 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15128 case ISD::INTRINSIC_VOID
:
15129 case ISD::INTRINSIC_W_CHAIN
:
15130 case ISD::INTRINSIC_WO_CHAIN
: {
15131 unsigned IntOpNo
= N
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
? 0 : 1;
15132 unsigned IntNo
= N
->getConstantOperandVal(IntOpNo
);
15134 // By default we do not combine any intrinsic.
15137 case Intrinsic::riscv_masked_strided_load
: {
15138 MVT VT
= N
->getSimpleValueType(0);
15139 auto *Load
= cast
<MemIntrinsicSDNode
>(N
);
15140 SDValue PassThru
= N
->getOperand(2);
15141 SDValue Base
= N
->getOperand(3);
15142 SDValue Stride
= N
->getOperand(4);
15143 SDValue Mask
= N
->getOperand(5);
15145 // If the stride is equal to the element size in bytes, we can use
15147 const unsigned ElementSize
= VT
.getScalarStoreSize();
15148 if (auto *StrideC
= dyn_cast
<ConstantSDNode
>(Stride
);
15149 StrideC
&& StrideC
->getZExtValue() == ElementSize
)
15150 return DAG
.getMaskedLoad(VT
, DL
, Load
->getChain(), Base
,
15151 DAG
.getUNDEF(XLenVT
), Mask
, PassThru
,
15152 Load
->getMemoryVT(), Load
->getMemOperand(),
15153 ISD::UNINDEXED
, ISD::NON_EXTLOAD
);
15156 case Intrinsic::riscv_masked_strided_store
: {
15157 auto *Store
= cast
<MemIntrinsicSDNode
>(N
);
15158 SDValue Value
= N
->getOperand(2);
15159 SDValue Base
= N
->getOperand(3);
15160 SDValue Stride
= N
->getOperand(4);
15161 SDValue Mask
= N
->getOperand(5);
15163 // If the stride is equal to the element size in bytes, we can use
15165 const unsigned ElementSize
= Value
.getValueType().getScalarStoreSize();
15166 if (auto *StrideC
= dyn_cast
<ConstantSDNode
>(Stride
);
15167 StrideC
&& StrideC
->getZExtValue() == ElementSize
)
15168 return DAG
.getMaskedStore(Store
->getChain(), DL
, Value
, Base
,
15169 DAG
.getUNDEF(XLenVT
), Mask
,
15170 Store
->getMemoryVT(), Store
->getMemOperand(),
15171 ISD::UNINDEXED
, false);
15174 case Intrinsic::riscv_vcpop
:
15175 case Intrinsic::riscv_vcpop_mask
:
15176 case Intrinsic::riscv_vfirst
:
15177 case Intrinsic::riscv_vfirst_mask
: {
15178 SDValue VL
= N
->getOperand(2);
15179 if (IntNo
== Intrinsic::riscv_vcpop_mask
||
15180 IntNo
== Intrinsic::riscv_vfirst_mask
)
15181 VL
= N
->getOperand(3);
15182 if (!isNullConstant(VL
))
15184 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
15186 EVT VT
= N
->getValueType(0);
15187 if (IntNo
== Intrinsic::riscv_vfirst
||
15188 IntNo
== Intrinsic::riscv_vfirst_mask
)
15189 return DAG
.getConstant(-1, DL
, VT
);
15190 return DAG
.getConstant(0, DL
, VT
);
15194 case ISD::BITCAST
: {
15195 assert(Subtarget
.useRVVForFixedLengthVectors());
15196 SDValue N0
= N
->getOperand(0);
15197 EVT VT
= N
->getValueType(0);
15198 EVT SrcVT
= N0
.getValueType();
15199 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
15200 // type, widen both sides to avoid a trip through memory.
15201 if ((SrcVT
== MVT::v1i1
|| SrcVT
== MVT::v2i1
|| SrcVT
== MVT::v4i1
) &&
15202 VT
.isScalarInteger()) {
15203 unsigned NumConcats
= 8 / SrcVT
.getVectorNumElements();
15204 SmallVector
<SDValue
, 4> Ops(NumConcats
, DAG
.getUNDEF(SrcVT
));
15207 N0
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, MVT::v8i1
, Ops
);
15208 N0
= DAG
.getBitcast(MVT::i8
, N0
);
15209 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, N0
);
15219 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
15220 EVT XVT
, unsigned KeptBits
) const {
15221 // For vectors, we don't have a preference..
15222 if (XVT
.isVector())
15225 if (XVT
!= MVT::i32
&& XVT
!= MVT::i64
)
15228 // We can use sext.w for RV64 or an srai 31 on RV32.
15229 if (KeptBits
== 32 || KeptBits
== 64)
15232 // With Zbb we can use sext.h/sext.b.
15233 return Subtarget
.hasStdExtZbb() &&
15234 ((KeptBits
== 8 && XVT
== MVT::i64
&& !Subtarget
.is64Bit()) ||
15238 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
15239 const SDNode
*N
, CombineLevel Level
) const {
15240 assert((N
->getOpcode() == ISD::SHL
|| N
->getOpcode() == ISD::SRA
||
15241 N
->getOpcode() == ISD::SRL
) &&
15242 "Expected shift op");
15244 // The following folds are only desirable if `(OP _, c1 << c2)` can be
15245 // materialised in fewer instructions than `(OP _, c1)`:
15247 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
15248 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
15249 SDValue N0
= N
->getOperand(0);
15250 EVT Ty
= N0
.getValueType();
15251 if (Ty
.isScalarInteger() &&
15252 (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::OR
)) {
15253 auto *C1
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
15254 auto *C2
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
15256 const APInt
&C1Int
= C1
->getAPIntValue();
15257 APInt ShiftedC1Int
= C1Int
<< C2
->getAPIntValue();
15259 // We can materialise `c1 << c2` into an add immediate, so it's "free",
15260 // and the combine should happen, to potentially allow further combines
15262 if (ShiftedC1Int
.getSignificantBits() <= 64 &&
15263 isLegalAddImmediate(ShiftedC1Int
.getSExtValue()))
15266 // We can materialise `c1` in an add immediate, so it's "free", and the
15267 // combine should be prevented.
15268 if (C1Int
.getSignificantBits() <= 64 &&
15269 isLegalAddImmediate(C1Int
.getSExtValue()))
15272 // Neither constant will fit into an immediate, so find materialisation
15274 int C1Cost
= RISCVMatInt::getIntMatCost(C1Int
, Ty
.getSizeInBits(),
15275 Subtarget
.getFeatureBits(),
15276 /*CompressionCost*/true);
15277 int ShiftedC1Cost
= RISCVMatInt::getIntMatCost(
15278 ShiftedC1Int
, Ty
.getSizeInBits(), Subtarget
.getFeatureBits(),
15279 /*CompressionCost*/true);
15281 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
15282 // combine should be prevented.
15283 if (C1Cost
< ShiftedC1Cost
)
15290 bool RISCVTargetLowering::targetShrinkDemandedConstant(
15291 SDValue Op
, const APInt
&DemandedBits
, const APInt
&DemandedElts
,
15292 TargetLoweringOpt
&TLO
) const {
15293 // Delay this optimization as late as possible.
15297 EVT VT
= Op
.getValueType();
15301 unsigned Opcode
= Op
.getOpcode();
15302 if (Opcode
!= ISD::AND
&& Opcode
!= ISD::OR
&& Opcode
!= ISD::XOR
)
15305 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
15309 const APInt
&Mask
= C
->getAPIntValue();
15311 // Clear all non-demanded bits initially.
15312 APInt ShrunkMask
= Mask
& DemandedBits
;
15314 // Try to make a smaller immediate by setting undemanded bits.
15316 APInt ExpandedMask
= Mask
| ~DemandedBits
;
15318 auto IsLegalMask
= [ShrunkMask
, ExpandedMask
](const APInt
&Mask
) -> bool {
15319 return ShrunkMask
.isSubsetOf(Mask
) && Mask
.isSubsetOf(ExpandedMask
);
15321 auto UseMask
= [Mask
, Op
, &TLO
](const APInt
&NewMask
) -> bool {
15322 if (NewMask
== Mask
)
15325 SDValue NewC
= TLO
.DAG
.getConstant(NewMask
, DL
, Op
.getValueType());
15326 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
15327 Op
.getOperand(0), NewC
);
15328 return TLO
.CombineTo(Op
, NewOp
);
15331 // If the shrunk mask fits in sign extended 12 bits, let the target
15332 // independent code apply it.
15333 if (ShrunkMask
.isSignedIntN(12))
15336 // And has a few special cases for zext.
15337 if (Opcode
== ISD::AND
) {
15338 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
15339 // otherwise use SLLI + SRLI.
15340 APInt NewMask
= APInt(Mask
.getBitWidth(), 0xffff);
15341 if (IsLegalMask(NewMask
))
15342 return UseMask(NewMask
);
15344 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
15345 if (VT
== MVT::i64
) {
15346 APInt NewMask
= APInt(64, 0xffffffff);
15347 if (IsLegalMask(NewMask
))
15348 return UseMask(NewMask
);
15352 // For the remaining optimizations, we need to be able to make a negative
15353 // number through a combination of mask and undemanded bits.
15354 if (!ExpandedMask
.isNegative())
15357 // What is the fewest number of bits we need to represent the negative number.
15358 unsigned MinSignedBits
= ExpandedMask
.getSignificantBits();
15360 // Try to make a 12 bit negative immediate. If that fails try to make a 32
15361 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
15362 // If we can't create a simm12, we shouldn't change opaque constants.
15363 APInt NewMask
= ShrunkMask
;
15364 if (MinSignedBits
<= 12)
15365 NewMask
.setBitsFrom(11);
15366 else if (!C
->isOpaque() && MinSignedBits
<= 32 && !ShrunkMask
.isSignedIntN(32))
15367 NewMask
.setBitsFrom(31);
15371 // Check that our new mask is a subset of the demanded mask.
15372 assert(IsLegalMask(NewMask
));
15373 return UseMask(NewMask
);
15376 static uint64_t computeGREVOrGORC(uint64_t x
, unsigned ShAmt
, bool IsGORC
) {
15377 static const uint64_t GREVMasks
[] = {
15378 0x5555555555555555ULL
, 0x3333333333333333ULL
, 0x0F0F0F0F0F0F0F0FULL
,
15379 0x00FF00FF00FF00FFULL
, 0x0000FFFF0000FFFFULL
, 0x00000000FFFFFFFFULL
};
15381 for (unsigned Stage
= 0; Stage
!= 6; ++Stage
) {
15382 unsigned Shift
= 1 << Stage
;
15383 if (ShAmt
& Shift
) {
15384 uint64_t Mask
= GREVMasks
[Stage
];
15385 uint64_t Res
= ((x
& Mask
) << Shift
) | ((x
>> Shift
) & Mask
);
15395 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
15397 const APInt
&DemandedElts
,
15398 const SelectionDAG
&DAG
,
15399 unsigned Depth
) const {
15400 unsigned BitWidth
= Known
.getBitWidth();
15401 unsigned Opc
= Op
.getOpcode();
15402 assert((Opc
>= ISD::BUILTIN_OP_END
||
15403 Opc
== ISD::INTRINSIC_WO_CHAIN
||
15404 Opc
== ISD::INTRINSIC_W_CHAIN
||
15405 Opc
== ISD::INTRINSIC_VOID
) &&
15406 "Should use MaskedValueIsZero if you don't know whether Op"
15407 " is a target node!");
15412 case RISCVISD::SELECT_CC
: {
15413 Known
= DAG
.computeKnownBits(Op
.getOperand(4), Depth
+ 1);
15414 // If we don't know any bits, early out.
15415 if (Known
.isUnknown())
15417 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(3), Depth
+ 1);
15419 // Only known if known in both the LHS and RHS.
15420 Known
= Known
.intersectWith(Known2
);
15423 case RISCVISD::CZERO_EQZ
:
15424 case RISCVISD::CZERO_NEZ
:
15425 Known
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
15426 // Result is either all zero or operand 0. We can propagate zeros, but not
15428 Known
.One
.clearAllBits();
15430 case RISCVISD::REMUW
: {
15432 Known
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
15433 Known2
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
15434 // We only care about the lower 32 bits.
15435 Known
= KnownBits::urem(Known
.trunc(32), Known2
.trunc(32));
15436 // Restore the original width by sign extending.
15437 Known
= Known
.sext(BitWidth
);
15440 case RISCVISD::DIVUW
: {
15442 Known
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
15443 Known2
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
15444 // We only care about the lower 32 bits.
15445 Known
= KnownBits::udiv(Known
.trunc(32), Known2
.trunc(32));
15446 // Restore the original width by sign extending.
15447 Known
= Known
.sext(BitWidth
);
15450 case RISCVISD::CTZW
: {
15451 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
15452 unsigned PossibleTZ
= Known2
.trunc(32).countMaxTrailingZeros();
15453 unsigned LowBits
= llvm::bit_width(PossibleTZ
);
15454 Known
.Zero
.setBitsFrom(LowBits
);
15457 case RISCVISD::CLZW
: {
15458 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
15459 unsigned PossibleLZ
= Known2
.trunc(32).countMaxLeadingZeros();
15460 unsigned LowBits
= llvm::bit_width(PossibleLZ
);
15461 Known
.Zero
.setBitsFrom(LowBits
);
15464 case RISCVISD::BREV8
:
15465 case RISCVISD::ORC_B
: {
15466 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
15467 // control value of 7 is equivalent to brev8 and orc.b.
15468 Known
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
15469 bool IsGORC
= Op
.getOpcode() == RISCVISD::ORC_B
;
15470 // To compute zeros, we need to invert the value and invert it back after.
15472 ~computeGREVOrGORC(~Known
.Zero
.getZExtValue(), 7, IsGORC
);
15473 Known
.One
= computeGREVOrGORC(Known
.One
.getZExtValue(), 7, IsGORC
);
15476 case RISCVISD::READ_VLENB
: {
15477 // We can use the minimum and maximum VLEN values to bound VLENB. We
15478 // know VLEN must be a power of two.
15479 const unsigned MinVLenB
= Subtarget
.getRealMinVLen() / 8;
15480 const unsigned MaxVLenB
= Subtarget
.getRealMaxVLen() / 8;
15481 assert(MinVLenB
> 0 && "READ_VLENB without vector extension enabled?");
15482 Known
.Zero
.setLowBits(Log2_32(MinVLenB
));
15483 Known
.Zero
.setBitsFrom(Log2_32(MaxVLenB
)+1);
15484 if (MaxVLenB
== MinVLenB
)
15485 Known
.One
.setBit(Log2_32(MinVLenB
));
15488 case RISCVISD::FPCLASS
: {
15489 // fclass will only set one of the low 10 bits.
15490 Known
.Zero
.setBitsFrom(10);
15493 case ISD::INTRINSIC_W_CHAIN
:
15494 case ISD::INTRINSIC_WO_CHAIN
: {
15496 Op
.getConstantOperandVal(Opc
== ISD::INTRINSIC_WO_CHAIN
? 0 : 1);
15499 // We can't do anything for most intrinsics.
15501 case Intrinsic::riscv_vsetvli
:
15502 case Intrinsic::riscv_vsetvlimax
:
15503 // Assume that VL output is >= 65536.
15504 // TODO: Take SEW and LMUL into account.
15506 Known
.Zero
.setBitsFrom(17);
15514 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
15515 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
15516 unsigned Depth
) const {
15517 switch (Op
.getOpcode()) {
15520 case RISCVISD::SELECT_CC
: {
15522 DAG
.ComputeNumSignBits(Op
.getOperand(3), DemandedElts
, Depth
+ 1);
15523 if (Tmp
== 1) return 1; // Early out.
15525 DAG
.ComputeNumSignBits(Op
.getOperand(4), DemandedElts
, Depth
+ 1);
15526 return std::min(Tmp
, Tmp2
);
15528 case RISCVISD::CZERO_EQZ
:
15529 case RISCVISD::CZERO_NEZ
:
15530 // Output is either all zero or operand 0. We can propagate sign bit count
15532 return DAG
.ComputeNumSignBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
15533 case RISCVISD::ABSW
: {
15534 // We expand this at isel to negw+max. The result will have 33 sign bits
15535 // if the input has at least 33 sign bits.
15537 DAG
.ComputeNumSignBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
15538 if (Tmp
< 33) return 1;
15541 case RISCVISD::SLLW
:
15542 case RISCVISD::SRAW
:
15543 case RISCVISD::SRLW
:
15544 case RISCVISD::DIVW
:
15545 case RISCVISD::DIVUW
:
15546 case RISCVISD::REMUW
:
15547 case RISCVISD::ROLW
:
15548 case RISCVISD::RORW
:
15549 case RISCVISD::FCVT_W_RV64
:
15550 case RISCVISD::FCVT_WU_RV64
:
15551 case RISCVISD::STRICT_FCVT_W_RV64
:
15552 case RISCVISD::STRICT_FCVT_WU_RV64
:
15553 // TODO: As the result is sign-extended, this is conservatively correct. A
15554 // more precise answer could be calculated for SRAW depending on known
15555 // bits in the shift amount.
15557 case RISCVISD::VMV_X_S
: {
15558 // The number of sign bits of the scalar result is computed by obtaining the
15559 // element type of the input vector operand, subtracting its width from the
15560 // XLEN, and then adding one (sign bit within the element type). If the
15561 // element type is wider than XLen, the least-significant XLEN bits are
15563 unsigned XLen
= Subtarget
.getXLen();
15564 unsigned EltBits
= Op
.getOperand(0).getScalarValueSizeInBits();
15565 if (EltBits
<= XLen
)
15566 return XLen
- EltBits
+ 1;
15569 case ISD::INTRINSIC_W_CHAIN
: {
15570 unsigned IntNo
= Op
.getConstantOperandVal(1);
15574 case Intrinsic::riscv_masked_atomicrmw_xchg_i64
:
15575 case Intrinsic::riscv_masked_atomicrmw_add_i64
:
15576 case Intrinsic::riscv_masked_atomicrmw_sub_i64
:
15577 case Intrinsic::riscv_masked_atomicrmw_nand_i64
:
15578 case Intrinsic::riscv_masked_atomicrmw_max_i64
:
15579 case Intrinsic::riscv_masked_atomicrmw_min_i64
:
15580 case Intrinsic::riscv_masked_atomicrmw_umax_i64
:
15581 case Intrinsic::riscv_masked_atomicrmw_umin_i64
:
15582 case Intrinsic::riscv_masked_cmpxchg_i64
:
15583 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
15584 // narrow atomic operation. These are implemented using atomic
15585 // operations at the minimum supported atomicrmw/cmpxchg width whose
15586 // result is then sign extended to XLEN. With +A, the minimum width is
15587 // 32 for both 64 and 32.
15588 assert(Subtarget
.getXLen() == 64);
15589 assert(getMinCmpXchgSizeInBits() == 32);
15590 assert(Subtarget
.hasStdExtA());
15600 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode
*Ld
) const {
15601 assert(Ld
&& "Unexpected null LoadSDNode");
15602 if (!ISD::isNormalLoad(Ld
))
15605 SDValue Ptr
= Ld
->getBasePtr();
15607 // Only constant pools with no offset are supported.
15608 auto GetSupportedConstantPool
= [](SDValue Ptr
) -> ConstantPoolSDNode
* {
15609 auto *CNode
= dyn_cast
<ConstantPoolSDNode
>(Ptr
);
15610 if (!CNode
|| CNode
->isMachineConstantPoolEntry() ||
15611 CNode
->getOffset() != 0)
15617 // Simple case, LLA.
15618 if (Ptr
.getOpcode() == RISCVISD::LLA
) {
15619 auto *CNode
= GetSupportedConstantPool(Ptr
);
15620 if (!CNode
|| CNode
->getTargetFlags() != 0)
15623 return CNode
->getConstVal();
15626 // Look for a HI and ADD_LO pair.
15627 if (Ptr
.getOpcode() != RISCVISD::ADD_LO
||
15628 Ptr
.getOperand(0).getOpcode() != RISCVISD::HI
)
15631 auto *CNodeLo
= GetSupportedConstantPool(Ptr
.getOperand(1));
15632 auto *CNodeHi
= GetSupportedConstantPool(Ptr
.getOperand(0).getOperand(0));
15634 if (!CNodeLo
|| CNodeLo
->getTargetFlags() != RISCVII::MO_LO
||
15635 !CNodeHi
|| CNodeHi
->getTargetFlags() != RISCVII::MO_HI
)
15638 if (CNodeLo
->getConstVal() != CNodeHi
->getConstVal())
15641 return CNodeLo
->getConstVal();
15644 static MachineBasicBlock
*emitReadCycleWidePseudo(MachineInstr
&MI
,
15645 MachineBasicBlock
*BB
) {
15646 assert(MI
.getOpcode() == RISCV::ReadCycleWide
&& "Unexpected instruction");
15648 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
15649 // Should the count have wrapped while it was being read, we need to try
15653 // rdcycleh x3 # load high word of cycle
15654 // rdcycle x2 # load low word of cycle
15655 // rdcycleh x4 # load high word of cycle
15656 // bne x3, x4, read # check if high word reads match, otherwise try again
15659 MachineFunction
&MF
= *BB
->getParent();
15660 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
15661 MachineFunction::iterator It
= ++BB
->getIterator();
15663 MachineBasicBlock
*LoopMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
15664 MF
.insert(It
, LoopMBB
);
15666 MachineBasicBlock
*DoneMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
15667 MF
.insert(It
, DoneMBB
);
15669 // Transfer the remainder of BB and its successor edges to DoneMBB.
15670 DoneMBB
->splice(DoneMBB
->begin(), BB
,
15671 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
15672 DoneMBB
->transferSuccessorsAndUpdatePHIs(BB
);
15674 BB
->addSuccessor(LoopMBB
);
15676 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
15677 Register ReadAgainReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
15678 Register LoReg
= MI
.getOperand(0).getReg();
15679 Register HiReg
= MI
.getOperand(1).getReg();
15680 DebugLoc DL
= MI
.getDebugLoc();
15682 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
15683 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), HiReg
)
15684 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
15685 .addReg(RISCV::X0
);
15686 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), LoReg
)
15687 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding
)
15688 .addReg(RISCV::X0
);
15689 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), ReadAgainReg
)
15690 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
15691 .addReg(RISCV::X0
);
15693 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::BNE
))
15695 .addReg(ReadAgainReg
)
15698 LoopMBB
->addSuccessor(LoopMBB
);
15699 LoopMBB
->addSuccessor(DoneMBB
);
15701 MI
.eraseFromParent();
15706 static MachineBasicBlock
*emitSplitF64Pseudo(MachineInstr
&MI
,
15707 MachineBasicBlock
*BB
,
15708 const RISCVSubtarget
&Subtarget
) {
15709 assert((MI
.getOpcode() == RISCV::SplitF64Pseudo
||
15710 MI
.getOpcode() == RISCV::SplitF64Pseudo_INX
) &&
15711 "Unexpected instruction");
15713 MachineFunction
&MF
= *BB
->getParent();
15714 DebugLoc DL
= MI
.getDebugLoc();
15715 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
15716 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
15717 Register LoReg
= MI
.getOperand(0).getReg();
15718 Register HiReg
= MI
.getOperand(1).getReg();
15719 Register SrcReg
= MI
.getOperand(2).getReg();
15721 const TargetRegisterClass
*SrcRC
= MI
.getOpcode() == RISCV::SplitF64Pseudo_INX
15722 ? &RISCV::GPRPF64RegClass
15723 : &RISCV::FPR64RegClass
;
15724 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex(MF
);
15726 TII
.storeRegToStackSlot(*BB
, MI
, SrcReg
, MI
.getOperand(2).isKill(), FI
, SrcRC
,
15728 MachinePointerInfo MPI
= MachinePointerInfo::getFixedStack(MF
, FI
);
15729 MachineMemOperand
*MMOLo
=
15730 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOLoad
, 4, Align(8));
15731 MachineMemOperand
*MMOHi
= MF
.getMachineMemOperand(
15732 MPI
.getWithOffset(4), MachineMemOperand::MOLoad
, 4, Align(8));
15733 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), LoReg
)
15736 .addMemOperand(MMOLo
);
15737 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), HiReg
)
15740 .addMemOperand(MMOHi
);
15741 MI
.eraseFromParent(); // The pseudo instruction is gone now.
15745 static MachineBasicBlock
*emitBuildPairF64Pseudo(MachineInstr
&MI
,
15746 MachineBasicBlock
*BB
,
15747 const RISCVSubtarget
&Subtarget
) {
15748 assert((MI
.getOpcode() == RISCV::BuildPairF64Pseudo
||
15749 MI
.getOpcode() == RISCV::BuildPairF64Pseudo_INX
) &&
15750 "Unexpected instruction");
15752 MachineFunction
&MF
= *BB
->getParent();
15753 DebugLoc DL
= MI
.getDebugLoc();
15754 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
15755 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
15756 Register DstReg
= MI
.getOperand(0).getReg();
15757 Register LoReg
= MI
.getOperand(1).getReg();
15758 Register HiReg
= MI
.getOperand(2).getReg();
15760 const TargetRegisterClass
*DstRC
=
15761 MI
.getOpcode() == RISCV::BuildPairF64Pseudo_INX
? &RISCV::GPRPF64RegClass
15762 : &RISCV::FPR64RegClass
;
15763 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex(MF
);
15765 MachinePointerInfo MPI
= MachinePointerInfo::getFixedStack(MF
, FI
);
15766 MachineMemOperand
*MMOLo
=
15767 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOStore
, 4, Align(8));
15768 MachineMemOperand
*MMOHi
= MF
.getMachineMemOperand(
15769 MPI
.getWithOffset(4), MachineMemOperand::MOStore
, 4, Align(8));
15770 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
15771 .addReg(LoReg
, getKillRegState(MI
.getOperand(1).isKill()))
15774 .addMemOperand(MMOLo
);
15775 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
15776 .addReg(HiReg
, getKillRegState(MI
.getOperand(2).isKill()))
15779 .addMemOperand(MMOHi
);
15780 TII
.loadRegFromStackSlot(*BB
, MI
, DstReg
, FI
, DstRC
, RI
, Register());
15781 MI
.eraseFromParent(); // The pseudo instruction is gone now.
15785 static bool isSelectPseudo(MachineInstr
&MI
) {
15786 switch (MI
.getOpcode()) {
15789 case RISCV::Select_GPR_Using_CC_GPR
:
15790 case RISCV::Select_FPR16_Using_CC_GPR
:
15791 case RISCV::Select_FPR16INX_Using_CC_GPR
:
15792 case RISCV::Select_FPR32_Using_CC_GPR
:
15793 case RISCV::Select_FPR32INX_Using_CC_GPR
:
15794 case RISCV::Select_FPR64_Using_CC_GPR
:
15795 case RISCV::Select_FPR64INX_Using_CC_GPR
:
15796 case RISCV::Select_FPR64IN32X_Using_CC_GPR
:
15801 static MachineBasicBlock
*emitQuietFCMP(MachineInstr
&MI
, MachineBasicBlock
*BB
,
15802 unsigned RelOpcode
, unsigned EqOpcode
,
15803 const RISCVSubtarget
&Subtarget
) {
15804 DebugLoc DL
= MI
.getDebugLoc();
15805 Register DstReg
= MI
.getOperand(0).getReg();
15806 Register Src1Reg
= MI
.getOperand(1).getReg();
15807 Register Src2Reg
= MI
.getOperand(2).getReg();
15808 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
15809 Register SavedFFlags
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
15810 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
15812 // Save the current FFLAGS.
15813 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::ReadFFLAGS
), SavedFFlags
);
15815 auto MIB
= BuildMI(*BB
, MI
, DL
, TII
.get(RelOpcode
), DstReg
)
15818 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
15819 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
15821 // Restore the FFLAGS.
15822 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::WriteFFLAGS
))
15823 .addReg(SavedFFlags
, RegState::Kill
);
15825 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
15826 auto MIB2
= BuildMI(*BB
, MI
, DL
, TII
.get(EqOpcode
), RISCV::X0
)
15827 .addReg(Src1Reg
, getKillRegState(MI
.getOperand(1).isKill()))
15828 .addReg(Src2Reg
, getKillRegState(MI
.getOperand(2).isKill()));
15829 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
15830 MIB2
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
15832 // Erase the pseudoinstruction.
15833 MI
.eraseFromParent();
15837 static MachineBasicBlock
*
15838 EmitLoweredCascadedSelect(MachineInstr
&First
, MachineInstr
&Second
,
15839 MachineBasicBlock
*ThisMBB
,
15840 const RISCVSubtarget
&Subtarget
) {
15841 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
15842 // Without this, custom-inserter would have generated:
15854 // A: X = ...; Y = ...
15856 // C: Z = PHI [X, A], [Y, B]
15858 // E: PHI [X, C], [Z, D]
15860 // If we lower both Select_FPRX_ in a single step, we can instead generate:
15872 // A: X = ...; Y = ...
15874 // E: PHI [X, A], [X, C], [Y, D]
15876 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
15877 const DebugLoc
&DL
= First
.getDebugLoc();
15878 const BasicBlock
*LLVM_BB
= ThisMBB
->getBasicBlock();
15879 MachineFunction
*F
= ThisMBB
->getParent();
15880 MachineBasicBlock
*FirstMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
15881 MachineBasicBlock
*SecondMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
15882 MachineBasicBlock
*SinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
15883 MachineFunction::iterator It
= ++ThisMBB
->getIterator();
15884 F
->insert(It
, FirstMBB
);
15885 F
->insert(It
, SecondMBB
);
15886 F
->insert(It
, SinkMBB
);
15888 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
15889 SinkMBB
->splice(SinkMBB
->begin(), ThisMBB
,
15890 std::next(MachineBasicBlock::iterator(First
)),
15892 SinkMBB
->transferSuccessorsAndUpdatePHIs(ThisMBB
);
15894 // Fallthrough block for ThisMBB.
15895 ThisMBB
->addSuccessor(FirstMBB
);
15896 // Fallthrough block for FirstMBB.
15897 FirstMBB
->addSuccessor(SecondMBB
);
15898 ThisMBB
->addSuccessor(SinkMBB
);
15899 FirstMBB
->addSuccessor(SinkMBB
);
15900 // This is fallthrough.
15901 SecondMBB
->addSuccessor(SinkMBB
);
15903 auto FirstCC
= static_cast<RISCVCC::CondCode
>(First
.getOperand(3).getImm());
15904 Register FLHS
= First
.getOperand(1).getReg();
15905 Register FRHS
= First
.getOperand(2).getReg();
15906 // Insert appropriate branch.
15907 BuildMI(FirstMBB
, DL
, TII
.getBrCond(FirstCC
))
15912 Register SLHS
= Second
.getOperand(1).getReg();
15913 Register SRHS
= Second
.getOperand(2).getReg();
15914 Register Op1Reg4
= First
.getOperand(4).getReg();
15915 Register Op1Reg5
= First
.getOperand(5).getReg();
15917 auto SecondCC
= static_cast<RISCVCC::CondCode
>(Second
.getOperand(3).getImm());
15918 // Insert appropriate branch.
15919 BuildMI(ThisMBB
, DL
, TII
.getBrCond(SecondCC
))
15924 Register DestReg
= Second
.getOperand(0).getReg();
15925 Register Op2Reg4
= Second
.getOperand(4).getReg();
15926 BuildMI(*SinkMBB
, SinkMBB
->begin(), DL
, TII
.get(RISCV::PHI
), DestReg
)
15932 .addMBB(SecondMBB
);
15934 // Now remove the Select_FPRX_s.
15935 First
.eraseFromParent();
15936 Second
.eraseFromParent();
15940 static MachineBasicBlock
*emitSelectPseudo(MachineInstr
&MI
,
15941 MachineBasicBlock
*BB
,
15942 const RISCVSubtarget
&Subtarget
) {
15943 // To "insert" Select_* instructions, we actually have to insert the triangle
15944 // control-flow pattern. The incoming instructions know the destination vreg
15945 // to set, the condition code register to branch on, the true/false values to
15946 // select between, and the condcode to use to select the appropriate branch.
15948 // We produce the following control flow:
15955 // When we find a sequence of selects we attempt to optimize their emission
15956 // by sharing the control flow. Currently we only handle cases where we have
15957 // multiple selects with the exact same condition (same LHS, RHS and CC).
15958 // The selects may be interleaved with other instructions if the other
15959 // instructions meet some requirements we deem safe:
15960 // - They are not pseudo instructions.
15961 // - They are debug instructions. Otherwise,
15962 // - They do not have side-effects, do not access memory and their inputs do
15963 // not depend on the results of the select pseudo-instructions.
15964 // The TrueV/FalseV operands of the selects cannot depend on the result of
15965 // previous selects in the sequence.
15966 // These conditions could be further relaxed. See the X86 target for a
15967 // related approach and more information.
15969 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
15970 // is checked here and handled by a separate function -
15971 // EmitLoweredCascadedSelect.
15972 Register LHS
= MI
.getOperand(1).getReg();
15973 Register RHS
= MI
.getOperand(2).getReg();
15974 auto CC
= static_cast<RISCVCC::CondCode
>(MI
.getOperand(3).getImm());
15976 SmallVector
<MachineInstr
*, 4> SelectDebugValues
;
15977 SmallSet
<Register
, 4> SelectDests
;
15978 SelectDests
.insert(MI
.getOperand(0).getReg());
15980 MachineInstr
*LastSelectPseudo
= &MI
;
15981 auto Next
= next_nodbg(MI
.getIterator(), BB
->instr_end());
15982 if (MI
.getOpcode() != RISCV::Select_GPR_Using_CC_GPR
&& Next
!= BB
->end() &&
15983 Next
->getOpcode() == MI
.getOpcode() &&
15984 Next
->getOperand(5).getReg() == MI
.getOperand(0).getReg() &&
15985 Next
->getOperand(5).isKill()) {
15986 return EmitLoweredCascadedSelect(MI
, *Next
, BB
, Subtarget
);
15989 for (auto E
= BB
->end(), SequenceMBBI
= MachineBasicBlock::iterator(MI
);
15990 SequenceMBBI
!= E
; ++SequenceMBBI
) {
15991 if (SequenceMBBI
->isDebugInstr())
15993 if (isSelectPseudo(*SequenceMBBI
)) {
15994 if (SequenceMBBI
->getOperand(1).getReg() != LHS
||
15995 SequenceMBBI
->getOperand(2).getReg() != RHS
||
15996 SequenceMBBI
->getOperand(3).getImm() != CC
||
15997 SelectDests
.count(SequenceMBBI
->getOperand(4).getReg()) ||
15998 SelectDests
.count(SequenceMBBI
->getOperand(5).getReg()))
16000 LastSelectPseudo
= &*SequenceMBBI
;
16001 SequenceMBBI
->collectDebugValues(SelectDebugValues
);
16002 SelectDests
.insert(SequenceMBBI
->getOperand(0).getReg());
16005 if (SequenceMBBI
->hasUnmodeledSideEffects() ||
16006 SequenceMBBI
->mayLoadOrStore() ||
16007 SequenceMBBI
->usesCustomInsertionHook())
16009 if (llvm::any_of(SequenceMBBI
->operands(), [&](MachineOperand
&MO
) {
16010 return MO
.isReg() && MO
.isUse() && SelectDests
.count(MO
.getReg());
16015 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
16016 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
16017 DebugLoc DL
= MI
.getDebugLoc();
16018 MachineFunction::iterator I
= ++BB
->getIterator();
16020 MachineBasicBlock
*HeadMBB
= BB
;
16021 MachineFunction
*F
= BB
->getParent();
16022 MachineBasicBlock
*TailMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16023 MachineBasicBlock
*IfFalseMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16025 F
->insert(I
, IfFalseMBB
);
16026 F
->insert(I
, TailMBB
);
16028 // Transfer debug instructions associated with the selects to TailMBB.
16029 for (MachineInstr
*DebugInstr
: SelectDebugValues
) {
16030 TailMBB
->push_back(DebugInstr
->removeFromParent());
16033 // Move all instructions after the sequence to TailMBB.
16034 TailMBB
->splice(TailMBB
->end(), HeadMBB
,
16035 std::next(LastSelectPseudo
->getIterator()), HeadMBB
->end());
16036 // Update machine-CFG edges by transferring all successors of the current
16037 // block to the new block which will contain the Phi nodes for the selects.
16038 TailMBB
->transferSuccessorsAndUpdatePHIs(HeadMBB
);
16039 // Set the successors for HeadMBB.
16040 HeadMBB
->addSuccessor(IfFalseMBB
);
16041 HeadMBB
->addSuccessor(TailMBB
);
16043 // Insert appropriate branch.
16044 BuildMI(HeadMBB
, DL
, TII
.getBrCond(CC
))
16049 // IfFalseMBB just falls through to TailMBB.
16050 IfFalseMBB
->addSuccessor(TailMBB
);
16052 // Create PHIs for all of the select pseudo-instructions.
16053 auto SelectMBBI
= MI
.getIterator();
16054 auto SelectEnd
= std::next(LastSelectPseudo
->getIterator());
16055 auto InsertionPoint
= TailMBB
->begin();
16056 while (SelectMBBI
!= SelectEnd
) {
16057 auto Next
= std::next(SelectMBBI
);
16058 if (isSelectPseudo(*SelectMBBI
)) {
16059 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
16060 BuildMI(*TailMBB
, InsertionPoint
, SelectMBBI
->getDebugLoc(),
16061 TII
.get(RISCV::PHI
), SelectMBBI
->getOperand(0).getReg())
16062 .addReg(SelectMBBI
->getOperand(4).getReg())
16064 .addReg(SelectMBBI
->getOperand(5).getReg())
16065 .addMBB(IfFalseMBB
);
16066 SelectMBBI
->eraseFromParent();
16071 F
->getProperties().reset(MachineFunctionProperties::Property::NoPHIs
);
16075 static MachineBasicBlock
*emitVFCVT_RM(MachineInstr
&MI
, MachineBasicBlock
*BB
,
16077 DebugLoc DL
= MI
.getDebugLoc();
16079 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
16081 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
16082 Register SavedFRM
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16084 assert(MI
.getNumOperands() == 8 || MI
.getNumOperands() == 7);
16085 unsigned FRMIdx
= MI
.getNumOperands() == 8 ? 4 : 3;
16087 // Update FRM and save the old value.
16088 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SwapFRMImm
), SavedFRM
)
16089 .addImm(MI
.getOperand(FRMIdx
).getImm());
16091 // Emit an VFCVT with the FRM == DYN
16092 auto MIB
= BuildMI(*BB
, MI
, DL
, TII
.get(Opcode
));
16094 for (unsigned I
= 0; I
< MI
.getNumOperands(); I
++)
16096 MIB
= MIB
.add(MI
.getOperand(I
));
16098 MIB
= MIB
.add(MachineOperand::CreateImm(7)); // frm = DYN
16100 MIB
.add(MachineOperand::CreateReg(RISCV::FRM
,
16104 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16105 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16108 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::WriteFRM
))
16109 .addReg(SavedFRM
, RegState::Kill
);
16111 // Erase the pseudoinstruction.
16112 MI
.eraseFromParent();
16116 static MachineBasicBlock
*emitVFROUND_NOEXCEPT_MASK(MachineInstr
&MI
,
16117 MachineBasicBlock
*BB
,
16119 unsigned CVTFOpc
) {
16120 DebugLoc DL
= MI
.getDebugLoc();
16122 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
16124 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
16125 Register SavedFFLAGS
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16127 // Save the old value of FFLAGS.
16128 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::ReadFFLAGS
), SavedFFLAGS
);
16130 assert(MI
.getNumOperands() == 7);
16132 // Emit a VFCVT_X_F
16133 const TargetRegisterInfo
*TRI
=
16134 BB
->getParent()->getSubtarget().getRegisterInfo();
16135 const TargetRegisterClass
*RC
= MI
.getRegClassConstraint(0, &TII
, TRI
);
16136 Register Tmp
= MRI
.createVirtualRegister(RC
);
16137 BuildMI(*BB
, MI
, DL
, TII
.get(CVTXOpc
), Tmp
)
16138 .add(MI
.getOperand(1))
16139 .add(MI
.getOperand(2))
16140 .add(MI
.getOperand(3))
16141 .add(MachineOperand::CreateImm(7)) // frm = DYN
16142 .add(MI
.getOperand(4))
16143 .add(MI
.getOperand(5))
16144 .add(MI
.getOperand(6))
16145 .add(MachineOperand::CreateReg(RISCV::FRM
,
16149 // Emit a VFCVT_F_X
16150 BuildMI(*BB
, MI
, DL
, TII
.get(CVTFOpc
))
16151 .add(MI
.getOperand(0))
16152 .add(MI
.getOperand(1))
16154 .add(MI
.getOperand(3))
16155 .add(MachineOperand::CreateImm(7)) // frm = DYN
16156 .add(MI
.getOperand(4))
16157 .add(MI
.getOperand(5))
16158 .add(MI
.getOperand(6))
16159 .add(MachineOperand::CreateReg(RISCV::FRM
,
16164 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::WriteFFLAGS
))
16165 .addReg(SavedFFLAGS
, RegState::Kill
);
16167 // Erase the pseudoinstruction.
16168 MI
.eraseFromParent();
16172 static MachineBasicBlock
*emitFROUND(MachineInstr
&MI
, MachineBasicBlock
*MBB
,
16173 const RISCVSubtarget
&Subtarget
) {
16174 unsigned CmpOpc
, F2IOpc
, I2FOpc
, FSGNJOpc
, FSGNJXOpc
;
16175 const TargetRegisterClass
*RC
;
16176 switch (MI
.getOpcode()) {
16178 llvm_unreachable("Unexpected opcode");
16179 case RISCV::PseudoFROUND_H
:
16180 CmpOpc
= RISCV::FLT_H
;
16181 F2IOpc
= RISCV::FCVT_W_H
;
16182 I2FOpc
= RISCV::FCVT_H_W
;
16183 FSGNJOpc
= RISCV::FSGNJ_H
;
16184 FSGNJXOpc
= RISCV::FSGNJX_H
;
16185 RC
= &RISCV::FPR16RegClass
;
16187 case RISCV::PseudoFROUND_H_INX
:
16188 CmpOpc
= RISCV::FLT_H_INX
;
16189 F2IOpc
= RISCV::FCVT_W_H_INX
;
16190 I2FOpc
= RISCV::FCVT_H_W_INX
;
16191 FSGNJOpc
= RISCV::FSGNJ_H_INX
;
16192 FSGNJXOpc
= RISCV::FSGNJX_H_INX
;
16193 RC
= &RISCV::GPRF16RegClass
;
16195 case RISCV::PseudoFROUND_S
:
16196 CmpOpc
= RISCV::FLT_S
;
16197 F2IOpc
= RISCV::FCVT_W_S
;
16198 I2FOpc
= RISCV::FCVT_S_W
;
16199 FSGNJOpc
= RISCV::FSGNJ_S
;
16200 FSGNJXOpc
= RISCV::FSGNJX_S
;
16201 RC
= &RISCV::FPR32RegClass
;
16203 case RISCV::PseudoFROUND_S_INX
:
16204 CmpOpc
= RISCV::FLT_S_INX
;
16205 F2IOpc
= RISCV::FCVT_W_S_INX
;
16206 I2FOpc
= RISCV::FCVT_S_W_INX
;
16207 FSGNJOpc
= RISCV::FSGNJ_S_INX
;
16208 FSGNJXOpc
= RISCV::FSGNJX_S_INX
;
16209 RC
= &RISCV::GPRF32RegClass
;
16211 case RISCV::PseudoFROUND_D
:
16212 assert(Subtarget
.is64Bit() && "Expected 64-bit GPR.");
16213 CmpOpc
= RISCV::FLT_D
;
16214 F2IOpc
= RISCV::FCVT_L_D
;
16215 I2FOpc
= RISCV::FCVT_D_L
;
16216 FSGNJOpc
= RISCV::FSGNJ_D
;
16217 FSGNJXOpc
= RISCV::FSGNJX_D
;
16218 RC
= &RISCV::FPR64RegClass
;
16220 case RISCV::PseudoFROUND_D_INX
:
16221 assert(Subtarget
.is64Bit() && "Expected 64-bit GPR.");
16222 CmpOpc
= RISCV::FLT_D_INX
;
16223 F2IOpc
= RISCV::FCVT_L_D_INX
;
16224 I2FOpc
= RISCV::FCVT_D_L_INX
;
16225 FSGNJOpc
= RISCV::FSGNJ_D_INX
;
16226 FSGNJXOpc
= RISCV::FSGNJX_D_INX
;
16227 RC
= &RISCV::GPRRegClass
;
16231 const BasicBlock
*BB
= MBB
->getBasicBlock();
16232 DebugLoc DL
= MI
.getDebugLoc();
16233 MachineFunction::iterator I
= ++MBB
->getIterator();
16235 MachineFunction
*F
= MBB
->getParent();
16236 MachineBasicBlock
*CvtMBB
= F
->CreateMachineBasicBlock(BB
);
16237 MachineBasicBlock
*DoneMBB
= F
->CreateMachineBasicBlock(BB
);
16239 F
->insert(I
, CvtMBB
);
16240 F
->insert(I
, DoneMBB
);
16241 // Move all instructions after the sequence to DoneMBB.
16242 DoneMBB
->splice(DoneMBB
->end(), MBB
, MachineBasicBlock::iterator(MI
),
16244 // Update machine-CFG edges by transferring all successors of the current
16245 // block to the new block which will contain the Phi nodes for the selects.
16246 DoneMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
16247 // Set the successors for MBB.
16248 MBB
->addSuccessor(CvtMBB
);
16249 MBB
->addSuccessor(DoneMBB
);
16251 Register DstReg
= MI
.getOperand(0).getReg();
16252 Register SrcReg
= MI
.getOperand(1).getReg();
16253 Register MaxReg
= MI
.getOperand(2).getReg();
16254 int64_t FRM
= MI
.getOperand(3).getImm();
16256 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
16257 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
16259 Register FabsReg
= MRI
.createVirtualRegister(RC
);
16260 BuildMI(MBB
, DL
, TII
.get(FSGNJXOpc
), FabsReg
).addReg(SrcReg
).addReg(SrcReg
);
16262 // Compare the FP value to the max value.
16263 Register CmpReg
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16265 BuildMI(MBB
, DL
, TII
.get(CmpOpc
), CmpReg
).addReg(FabsReg
).addReg(MaxReg
);
16266 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16267 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16270 BuildMI(MBB
, DL
, TII
.get(RISCV::BEQ
))
16275 CvtMBB
->addSuccessor(DoneMBB
);
16277 // Convert to integer.
16278 Register F2IReg
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16279 MIB
= BuildMI(CvtMBB
, DL
, TII
.get(F2IOpc
), F2IReg
).addReg(SrcReg
).addImm(FRM
);
16280 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16281 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16283 // Convert back to FP.
16284 Register I2FReg
= MRI
.createVirtualRegister(RC
);
16285 MIB
= BuildMI(CvtMBB
, DL
, TII
.get(I2FOpc
), I2FReg
).addReg(F2IReg
).addImm(FRM
);
16286 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16287 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16289 // Restore the sign bit.
16290 Register CvtReg
= MRI
.createVirtualRegister(RC
);
16291 BuildMI(CvtMBB
, DL
, TII
.get(FSGNJOpc
), CvtReg
).addReg(I2FReg
).addReg(SrcReg
);
16293 // Merge the results.
16294 BuildMI(*DoneMBB
, DoneMBB
->begin(), DL
, TII
.get(RISCV::PHI
), DstReg
)
16300 MI
.eraseFromParent();
16304 MachineBasicBlock
*
16305 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
16306 MachineBasicBlock
*BB
) const {
16307 switch (MI
.getOpcode()) {
16309 llvm_unreachable("Unexpected instr type to insert");
16310 case RISCV::ReadCycleWide
:
16311 assert(!Subtarget
.is64Bit() &&
16312 "ReadCycleWrite is only to be used on riscv32");
16313 return emitReadCycleWidePseudo(MI
, BB
);
16314 case RISCV::Select_GPR_Using_CC_GPR
:
16315 case RISCV::Select_FPR16_Using_CC_GPR
:
16316 case RISCV::Select_FPR16INX_Using_CC_GPR
:
16317 case RISCV::Select_FPR32_Using_CC_GPR
:
16318 case RISCV::Select_FPR32INX_Using_CC_GPR
:
16319 case RISCV::Select_FPR64_Using_CC_GPR
:
16320 case RISCV::Select_FPR64INX_Using_CC_GPR
:
16321 case RISCV::Select_FPR64IN32X_Using_CC_GPR
:
16322 return emitSelectPseudo(MI
, BB
, Subtarget
);
16323 case RISCV::BuildPairF64Pseudo
:
16324 case RISCV::BuildPairF64Pseudo_INX
:
16325 return emitBuildPairF64Pseudo(MI
, BB
, Subtarget
);
16326 case RISCV::SplitF64Pseudo
:
16327 case RISCV::SplitF64Pseudo_INX
:
16328 return emitSplitF64Pseudo(MI
, BB
, Subtarget
);
16329 case RISCV::PseudoQuietFLE_H
:
16330 return emitQuietFCMP(MI
, BB
, RISCV::FLE_H
, RISCV::FEQ_H
, Subtarget
);
16331 case RISCV::PseudoQuietFLE_H_INX
:
16332 return emitQuietFCMP(MI
, BB
, RISCV::FLE_H_INX
, RISCV::FEQ_H_INX
, Subtarget
);
16333 case RISCV::PseudoQuietFLT_H
:
16334 return emitQuietFCMP(MI
, BB
, RISCV::FLT_H
, RISCV::FEQ_H
, Subtarget
);
16335 case RISCV::PseudoQuietFLT_H_INX
:
16336 return emitQuietFCMP(MI
, BB
, RISCV::FLT_H_INX
, RISCV::FEQ_H_INX
, Subtarget
);
16337 case RISCV::PseudoQuietFLE_S
:
16338 return emitQuietFCMP(MI
, BB
, RISCV::FLE_S
, RISCV::FEQ_S
, Subtarget
);
16339 case RISCV::PseudoQuietFLE_S_INX
:
16340 return emitQuietFCMP(MI
, BB
, RISCV::FLE_S_INX
, RISCV::FEQ_S_INX
, Subtarget
);
16341 case RISCV::PseudoQuietFLT_S
:
16342 return emitQuietFCMP(MI
, BB
, RISCV::FLT_S
, RISCV::FEQ_S
, Subtarget
);
16343 case RISCV::PseudoQuietFLT_S_INX
:
16344 return emitQuietFCMP(MI
, BB
, RISCV::FLT_S_INX
, RISCV::FEQ_S_INX
, Subtarget
);
16345 case RISCV::PseudoQuietFLE_D
:
16346 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D
, RISCV::FEQ_D
, Subtarget
);
16347 case RISCV::PseudoQuietFLE_D_INX
:
16348 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D_INX
, RISCV::FEQ_D_INX
, Subtarget
);
16349 case RISCV::PseudoQuietFLE_D_IN32X
:
16350 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D_IN32X
, RISCV::FEQ_D_IN32X
,
16352 case RISCV::PseudoQuietFLT_D
:
16353 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D
, RISCV::FEQ_D
, Subtarget
);
16354 case RISCV::PseudoQuietFLT_D_INX
:
16355 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D_INX
, RISCV::FEQ_D_INX
, Subtarget
);
16356 case RISCV::PseudoQuietFLT_D_IN32X
:
16357 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D_IN32X
, RISCV::FEQ_D_IN32X
,
16360 #define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \
16361 case RISCV::RMOpc##_##LMUL: \
16362 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \
16363 case RISCV::RMOpc##_##LMUL##_MASK: \
16364 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
16366 #define PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16367 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \
16368 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \
16369 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \
16370 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \
16371 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
16373 #define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \
16374 PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16375 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
16377 #define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \
16378 PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16379 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
16382 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V
, PseudoVFCVT_X_F_V
)
16383 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V
, PseudoVFCVT_XU_F_V
)
16384 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V
, PseudoVFCVT_F_XU_V
)
16385 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V
, PseudoVFCVT_F_X_V
)
16388 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V
, PseudoVFWCVT_XU_F_V
);
16389 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V
, PseudoVFWCVT_X_F_V
);
16392 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W
, PseudoVFNCVT_XU_F_W
);
16393 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W
, PseudoVFNCVT_X_F_W
);
16394 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W
, PseudoVFNCVT_F_XU_W
);
16395 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W
, PseudoVFNCVT_F_X_W
);
16397 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK
:
16398 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M1_MASK
,
16399 RISCV::PseudoVFCVT_F_X_V_M1_MASK
);
16400 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK
:
16401 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M2_MASK
,
16402 RISCV::PseudoVFCVT_F_X_V_M2_MASK
);
16403 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK
:
16404 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M4_MASK
,
16405 RISCV::PseudoVFCVT_F_X_V_M4_MASK
);
16406 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK
:
16407 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M8_MASK
,
16408 RISCV::PseudoVFCVT_F_X_V_M8_MASK
);
16409 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK
:
16410 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_MF2_MASK
,
16411 RISCV::PseudoVFCVT_F_X_V_MF2_MASK
);
16412 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK
:
16413 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_MF4_MASK
,
16414 RISCV::PseudoVFCVT_F_X_V_MF4_MASK
);
16415 case RISCV::PseudoFROUND_H
:
16416 case RISCV::PseudoFROUND_H_INX
:
16417 case RISCV::PseudoFROUND_S
:
16418 case RISCV::PseudoFROUND_S_INX
:
16419 case RISCV::PseudoFROUND_D
:
16420 case RISCV::PseudoFROUND_D_INX
:
16421 case RISCV::PseudoFROUND_D_IN32X
:
16422 return emitFROUND(MI
, BB
, Subtarget
);
16423 case TargetOpcode::STATEPOINT
:
16424 case TargetOpcode::STACKMAP
:
16425 case TargetOpcode::PATCHPOINT
:
16426 if (!Subtarget
.is64Bit())
16427 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
16428 "supported on 64-bit targets");
16429 return emitPatchPoint(MI
, BB
);
16433 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr
&MI
,
16434 SDNode
*Node
) const {
16435 // Add FRM dependency to any instructions with dynamic rounding mode.
16436 int Idx
= RISCV::getNamedOperandIdx(MI
.getOpcode(), RISCV::OpName::frm
);
16438 // Vector pseudos have FRM index indicated by TSFlags.
16439 Idx
= RISCVII::getFRMOpNum(MI
.getDesc());
16443 if (MI
.getOperand(Idx
).getImm() != RISCVFPRndMode::DYN
)
16445 // If the instruction already reads FRM, don't add another read.
16446 if (MI
.readsRegister(RISCV::FRM
))
16449 MachineOperand::CreateReg(RISCV::FRM
, /*isDef*/ false, /*isImp*/ true));
16452 // Calling Convention Implementation.
16453 // The expectations for frontend ABI lowering vary from target to target.
16454 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
16455 // details, but this is a longer term goal. For now, we simply try to keep the
16456 // role of the frontend as simple and well-defined as possible. The rules can
16457 // be summarised as:
16458 // * Never split up large scalar arguments. We handle them here.
16459 // * If a hardfloat calling convention is being used, and the struct may be
16460 // passed in a pair of registers (fp+fp, int+fp), and both registers are
16461 // available, then pass as two separate arguments. If either the GPRs or FPRs
16462 // are exhausted, then pass according to the rule below.
16463 // * If a struct could never be passed in registers or directly in a stack
16464 // slot (as it is larger than 2*XLEN and the floating point rules don't
16465 // apply), then pass it using a pointer with the byval attribute.
16466 // * If a struct is less than 2*XLEN, then coerce to either a two-element
16467 // word-sized array or a 2*XLEN scalar (depending on alignment).
16468 // * The frontend can determine whether a struct is returned by reference or
16469 // not based on its size and fields. If it will be returned by reference, the
16470 // frontend must modify the prototype so a pointer with the sret annotation is
16471 // passed as the first argument. This is not necessary for large scalar
16473 // * Struct return values and varargs should be coerced to structs containing
16474 // register-size fields in the same situations they would be for fixed
16477 static const MCPhysReg ArgGPRs
[] = {
16478 RISCV::X10
, RISCV::X11
, RISCV::X12
, RISCV::X13
,
16479 RISCV::X14
, RISCV::X15
, RISCV::X16
, RISCV::X17
16481 static const MCPhysReg ArgFPR16s
[] = {
16482 RISCV::F10_H
, RISCV::F11_H
, RISCV::F12_H
, RISCV::F13_H
,
16483 RISCV::F14_H
, RISCV::F15_H
, RISCV::F16_H
, RISCV::F17_H
16485 static const MCPhysReg ArgFPR32s
[] = {
16486 RISCV::F10_F
, RISCV::F11_F
, RISCV::F12_F
, RISCV::F13_F
,
16487 RISCV::F14_F
, RISCV::F15_F
, RISCV::F16_F
, RISCV::F17_F
16489 static const MCPhysReg ArgFPR64s
[] = {
16490 RISCV::F10_D
, RISCV::F11_D
, RISCV::F12_D
, RISCV::F13_D
,
16491 RISCV::F14_D
, RISCV::F15_D
, RISCV::F16_D
, RISCV::F17_D
16493 // This is an interim calling convention and it may be changed in the future.
16494 static const MCPhysReg ArgVRs
[] = {
16495 RISCV::V8
, RISCV::V9
, RISCV::V10
, RISCV::V11
, RISCV::V12
, RISCV::V13
,
16496 RISCV::V14
, RISCV::V15
, RISCV::V16
, RISCV::V17
, RISCV::V18
, RISCV::V19
,
16497 RISCV::V20
, RISCV::V21
, RISCV::V22
, RISCV::V23
};
16498 static const MCPhysReg ArgVRM2s
[] = {RISCV::V8M2
, RISCV::V10M2
, RISCV::V12M2
,
16499 RISCV::V14M2
, RISCV::V16M2
, RISCV::V18M2
,
16500 RISCV::V20M2
, RISCV::V22M2
};
16501 static const MCPhysReg ArgVRM4s
[] = {RISCV::V8M4
, RISCV::V12M4
, RISCV::V16M4
,
16503 static const MCPhysReg ArgVRM8s
[] = {RISCV::V8M8
, RISCV::V16M8
};
16505 // Pass a 2*XLEN argument that has been split into two XLEN values through
16506 // registers or the stack as necessary.
16507 static bool CC_RISCVAssign2XLen(unsigned XLen
, CCState
&State
, CCValAssign VA1
,
16508 ISD::ArgFlagsTy ArgFlags1
, unsigned ValNo2
,
16509 MVT ValVT2
, MVT LocVT2
,
16510 ISD::ArgFlagsTy ArgFlags2
) {
16511 unsigned XLenInBytes
= XLen
/ 8;
16512 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
16513 // At least one half can be passed via register.
16514 State
.addLoc(CCValAssign::getReg(VA1
.getValNo(), VA1
.getValVT(), Reg
,
16515 VA1
.getLocVT(), CCValAssign::Full
));
16517 // Both halves must be passed on the stack, with proper alignment.
16519 std::max(Align(XLenInBytes
), ArgFlags1
.getNonZeroOrigAlign());
16521 CCValAssign::getMem(VA1
.getValNo(), VA1
.getValVT(),
16522 State
.AllocateStack(XLenInBytes
, StackAlign
),
16523 VA1
.getLocVT(), CCValAssign::Full
));
16524 State
.addLoc(CCValAssign::getMem(
16525 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, Align(XLenInBytes
)),
16526 LocVT2
, CCValAssign::Full
));
16530 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
16531 // The second half can also be passed via register.
16533 CCValAssign::getReg(ValNo2
, ValVT2
, Reg
, LocVT2
, CCValAssign::Full
));
16535 // The second half is passed via the stack, without additional alignment.
16536 State
.addLoc(CCValAssign::getMem(
16537 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, Align(XLenInBytes
)),
16538 LocVT2
, CCValAssign::Full
));
16544 static unsigned allocateRVVReg(MVT ValVT
, unsigned ValNo
,
16545 std::optional
<unsigned> FirstMaskArgument
,
16546 CCState
&State
, const RISCVTargetLowering
&TLI
) {
16547 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(ValVT
);
16548 if (RC
== &RISCV::VRRegClass
) {
16549 // Assign the first mask argument to V0.
16550 // This is an interim calling convention and it may be changed in the
16552 if (FirstMaskArgument
&& ValNo
== *FirstMaskArgument
)
16553 return State
.AllocateReg(RISCV::V0
);
16554 return State
.AllocateReg(ArgVRs
);
16556 if (RC
== &RISCV::VRM2RegClass
)
16557 return State
.AllocateReg(ArgVRM2s
);
16558 if (RC
== &RISCV::VRM4RegClass
)
16559 return State
.AllocateReg(ArgVRM4s
);
16560 if (RC
== &RISCV::VRM8RegClass
)
16561 return State
.AllocateReg(ArgVRM8s
);
16562 llvm_unreachable("Unhandled register class for ValueType");
16565 // Implements the RISC-V calling convention. Returns true upon failure.
16566 bool RISCV::CC_RISCV(const DataLayout
&DL
, RISCVABI::ABI ABI
, unsigned ValNo
,
16567 MVT ValVT
, MVT LocVT
, CCValAssign::LocInfo LocInfo
,
16568 ISD::ArgFlagsTy ArgFlags
, CCState
&State
, bool IsFixed
,
16569 bool IsRet
, Type
*OrigTy
, const RISCVTargetLowering
&TLI
,
16570 std::optional
<unsigned> FirstMaskArgument
) {
16571 unsigned XLen
= DL
.getLargestLegalIntTypeSizeInBits();
16572 assert(XLen
== 32 || XLen
== 64);
16573 MVT XLenVT
= XLen
== 32 ? MVT::i32
: MVT::i64
;
16575 // Static chain parameter must not be passed in normal argument registers,
16576 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
16577 if (ArgFlags
.isNest()) {
16578 if (unsigned Reg
= State
.AllocateReg(RISCV::X7
)) {
16579 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
16584 // Any return value split in to more than two values can't be returned
16585 // directly. Vectors are returned via the available vector registers.
16586 if (!LocVT
.isVector() && IsRet
&& ValNo
> 1)
16589 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
16590 // variadic argument, or if no F16/F32 argument registers are available.
16591 bool UseGPRForF16_F32
= true;
16592 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
16593 // variadic argument, or if no F64 argument registers are available.
16594 bool UseGPRForF64
= true;
16598 llvm_unreachable("Unexpected ABI");
16599 case RISCVABI::ABI_ILP32
:
16600 case RISCVABI::ABI_LP64
:
16602 case RISCVABI::ABI_ILP32F
:
16603 case RISCVABI::ABI_LP64F
:
16604 UseGPRForF16_F32
= !IsFixed
;
16606 case RISCVABI::ABI_ILP32D
:
16607 case RISCVABI::ABI_LP64D
:
16608 UseGPRForF16_F32
= !IsFixed
;
16609 UseGPRForF64
= !IsFixed
;
16613 // FPR16, FPR32, and FPR64 alias each other.
16614 if (State
.getFirstUnallocated(ArgFPR32s
) == std::size(ArgFPR32s
)) {
16615 UseGPRForF16_F32
= true;
16616 UseGPRForF64
= true;
16619 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
16620 // similar local variables rather than directly checking against the target
16623 if (UseGPRForF16_F32
&&
16624 (ValVT
== MVT::f16
|| ValVT
== MVT::bf16
|| ValVT
== MVT::f32
)) {
16626 LocInfo
= CCValAssign::BCvt
;
16627 } else if (UseGPRForF64
&& XLen
== 64 && ValVT
== MVT::f64
) {
16629 LocInfo
= CCValAssign::BCvt
;
16632 // If this is a variadic argument, the RISC-V calling convention requires
16633 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
16634 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
16635 // be used regardless of whether the original argument was split during
16636 // legalisation or not. The argument will not be passed by registers if the
16637 // original type is larger than 2*XLEN, so the register alignment rule does
16639 unsigned TwoXLenInBytes
= (2 * XLen
) / 8;
16640 if (!IsFixed
&& ArgFlags
.getNonZeroOrigAlign() == TwoXLenInBytes
&&
16641 DL
.getTypeAllocSize(OrigTy
) == TwoXLenInBytes
) {
16642 unsigned RegIdx
= State
.getFirstUnallocated(ArgGPRs
);
16643 // Skip 'odd' register if necessary.
16644 if (RegIdx
!= std::size(ArgGPRs
) && RegIdx
% 2 == 1)
16645 State
.AllocateReg(ArgGPRs
);
16648 SmallVectorImpl
<CCValAssign
> &PendingLocs
= State
.getPendingLocs();
16649 SmallVectorImpl
<ISD::ArgFlagsTy
> &PendingArgFlags
=
16650 State
.getPendingArgFlags();
16652 assert(PendingLocs
.size() == PendingArgFlags
.size() &&
16653 "PendingLocs and PendingArgFlags out of sync");
16655 // Handle passing f64 on RV32D with a soft float ABI or when floating point
16656 // registers are exhausted.
16657 if (UseGPRForF64
&& XLen
== 32 && ValVT
== MVT::f64
) {
16658 assert(PendingLocs
.empty() && "Can't lower f64 if it is split");
16659 // Depending on available argument GPRS, f64 may be passed in a pair of
16660 // GPRs, split between a GPR and the stack, or passed completely on the
16661 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
16663 Register Reg
= State
.AllocateReg(ArgGPRs
);
16665 unsigned StackOffset
= State
.AllocateStack(8, Align(8));
16667 CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
16671 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
16672 Register HiReg
= State
.AllocateReg(ArgGPRs
);
16675 CCValAssign::getCustomReg(ValNo
, ValVT
, HiReg
, LocVT
, LocInfo
));
16677 unsigned StackOffset
= State
.AllocateStack(4, Align(4));
16679 CCValAssign::getCustomMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
16684 // Fixed-length vectors are located in the corresponding scalable-vector
16685 // container types.
16686 if (ValVT
.isFixedLengthVector())
16687 LocVT
= TLI
.getContainerForFixedLengthVector(LocVT
);
16689 // Split arguments might be passed indirectly, so keep track of the pending
16690 // values. Split vectors are passed via a mix of registers and indirectly, so
16691 // treat them as we would any other argument.
16692 if (ValVT
.isScalarInteger() && (ArgFlags
.isSplit() || !PendingLocs
.empty())) {
16694 LocInfo
= CCValAssign::Indirect
;
16695 PendingLocs
.push_back(
16696 CCValAssign::getPending(ValNo
, ValVT
, LocVT
, LocInfo
));
16697 PendingArgFlags
.push_back(ArgFlags
);
16698 if (!ArgFlags
.isSplitEnd()) {
16703 // If the split argument only had two elements, it should be passed directly
16704 // in registers or on the stack.
16705 if (ValVT
.isScalarInteger() && ArgFlags
.isSplitEnd() &&
16706 PendingLocs
.size() <= 2) {
16707 assert(PendingLocs
.size() == 2 && "Unexpected PendingLocs.size()");
16708 // Apply the normal calling convention rules to the first half of the
16710 CCValAssign VA
= PendingLocs
[0];
16711 ISD::ArgFlagsTy AF
= PendingArgFlags
[0];
16712 PendingLocs
.clear();
16713 PendingArgFlags
.clear();
16714 return CC_RISCVAssign2XLen(XLen
, State
, VA
, AF
, ValNo
, ValVT
, LocVT
,
16718 // Allocate to a register if possible, or else a stack slot.
16720 unsigned StoreSizeBytes
= XLen
/ 8;
16721 Align StackAlign
= Align(XLen
/ 8);
16723 if ((ValVT
== MVT::f16
|| ValVT
== MVT::bf16
) && !UseGPRForF16_F32
)
16724 Reg
= State
.AllocateReg(ArgFPR16s
);
16725 else if (ValVT
== MVT::f32
&& !UseGPRForF16_F32
)
16726 Reg
= State
.AllocateReg(ArgFPR32s
);
16727 else if (ValVT
== MVT::f64
&& !UseGPRForF64
)
16728 Reg
= State
.AllocateReg(ArgFPR64s
);
16729 else if (ValVT
.isVector()) {
16730 Reg
= allocateRVVReg(ValVT
, ValNo
, FirstMaskArgument
, State
, TLI
);
16732 // For return values, the vector must be passed fully via registers or
16734 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
16735 // but we're using all of them.
16738 // Try using a GPR to pass the address
16739 if ((Reg
= State
.AllocateReg(ArgGPRs
))) {
16741 LocInfo
= CCValAssign::Indirect
;
16742 } else if (ValVT
.isScalableVector()) {
16744 LocInfo
= CCValAssign::Indirect
;
16746 // Pass fixed-length vectors on the stack.
16748 StoreSizeBytes
= ValVT
.getStoreSize();
16749 // Align vectors to their element sizes, being careful for vXi1
16751 StackAlign
= MaybeAlign(ValVT
.getScalarSizeInBits() / 8).valueOrOne();
16755 Reg
= State
.AllocateReg(ArgGPRs
);
16758 unsigned StackOffset
=
16759 Reg
? 0 : State
.AllocateStack(StoreSizeBytes
, StackAlign
);
16761 // If we reach this point and PendingLocs is non-empty, we must be at the
16762 // end of a split argument that must be passed indirectly.
16763 if (!PendingLocs
.empty()) {
16764 assert(ArgFlags
.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
16765 assert(PendingLocs
.size() > 2 && "Unexpected PendingLocs.size()");
16767 for (auto &It
: PendingLocs
) {
16769 It
.convertToReg(Reg
);
16771 It
.convertToMem(StackOffset
);
16774 PendingLocs
.clear();
16775 PendingArgFlags
.clear();
16779 assert((!UseGPRForF16_F32
|| !UseGPRForF64
|| LocVT
== XLenVT
||
16780 (TLI
.getSubtarget().hasVInstructions() && ValVT
.isVector())) &&
16781 "Expected an XLenVT or vector types at this stage");
16784 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
16788 // When a scalar floating-point value is passed on the stack, no
16789 // bit-conversion is needed.
16790 if (ValVT
.isFloatingPoint() && LocInfo
!= CCValAssign::Indirect
) {
16791 assert(!ValVT
.isVector());
16793 LocInfo
= CCValAssign::Full
;
16795 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
16799 template <typename ArgTy
>
16800 static std::optional
<unsigned> preAssignMask(const ArgTy
&Args
) {
16801 for (const auto &ArgIdx
: enumerate(Args
)) {
16802 MVT ArgVT
= ArgIdx
.value().VT
;
16803 if (ArgVT
.isVector() && ArgVT
.getVectorElementType() == MVT::i1
)
16804 return ArgIdx
.index();
16806 return std::nullopt
;
16809 void RISCVTargetLowering::analyzeInputArgs(
16810 MachineFunction
&MF
, CCState
&CCInfo
,
16811 const SmallVectorImpl
<ISD::InputArg
> &Ins
, bool IsRet
,
16812 RISCVCCAssignFn Fn
) const {
16813 unsigned NumArgs
= Ins
.size();
16814 FunctionType
*FType
= MF
.getFunction().getFunctionType();
16816 std::optional
<unsigned> FirstMaskArgument
;
16817 if (Subtarget
.hasVInstructions())
16818 FirstMaskArgument
= preAssignMask(Ins
);
16820 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
16821 MVT ArgVT
= Ins
[i
].VT
;
16822 ISD::ArgFlagsTy ArgFlags
= Ins
[i
].Flags
;
16824 Type
*ArgTy
= nullptr;
16826 ArgTy
= FType
->getReturnType();
16827 else if (Ins
[i
].isOrigArg())
16828 ArgTy
= FType
->getParamType(Ins
[i
].getOrigArgIndex());
16830 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
16831 if (Fn(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
16832 ArgFlags
, CCInfo
, /*IsFixed=*/true, IsRet
, ArgTy
, *this,
16833 FirstMaskArgument
)) {
16834 LLVM_DEBUG(dbgs() << "InputArg #" << i
<< " has unhandled type "
16836 llvm_unreachable(nullptr);
16841 void RISCVTargetLowering::analyzeOutputArgs(
16842 MachineFunction
&MF
, CCState
&CCInfo
,
16843 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, bool IsRet
,
16844 CallLoweringInfo
*CLI
, RISCVCCAssignFn Fn
) const {
16845 unsigned NumArgs
= Outs
.size();
16847 std::optional
<unsigned> FirstMaskArgument
;
16848 if (Subtarget
.hasVInstructions())
16849 FirstMaskArgument
= preAssignMask(Outs
);
16851 for (unsigned i
= 0; i
!= NumArgs
; i
++) {
16852 MVT ArgVT
= Outs
[i
].VT
;
16853 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
16854 Type
*OrigTy
= CLI
? CLI
->getArgs()[Outs
[i
].OrigArgIndex
].Ty
: nullptr;
16856 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
16857 if (Fn(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
16858 ArgFlags
, CCInfo
, Outs
[i
].IsFixed
, IsRet
, OrigTy
, *this,
16859 FirstMaskArgument
)) {
16860 LLVM_DEBUG(dbgs() << "OutputArg #" << i
<< " has unhandled type "
16862 llvm_unreachable(nullptr);
16867 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
16869 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, SDValue Val
,
16870 const CCValAssign
&VA
, const SDLoc
&DL
,
16871 const RISCVSubtarget
&Subtarget
) {
16872 switch (VA
.getLocInfo()) {
16874 llvm_unreachable("Unexpected CCValAssign::LocInfo");
16875 case CCValAssign::Full
:
16876 if (VA
.getValVT().isFixedLengthVector() && VA
.getLocVT().isScalableVector())
16877 Val
= convertFromScalableVector(VA
.getValVT(), Val
, DAG
, Subtarget
);
16879 case CCValAssign::BCvt
:
16880 if (VA
.getLocVT().isInteger() &&
16881 (VA
.getValVT() == MVT::f16
|| VA
.getValVT() == MVT::bf16
)) {
16882 Val
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, VA
.getValVT(), Val
);
16883 } else if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
) {
16884 if (RV64LegalI32
) {
16885 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Val
);
16886 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f32
, Val
);
16888 Val
= DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, Val
);
16891 Val
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Val
);
16898 // The caller is responsible for loading the full value if the argument is
16899 // passed with CCValAssign::Indirect.
16900 static SDValue
unpackFromRegLoc(SelectionDAG
&DAG
, SDValue Chain
,
16901 const CCValAssign
&VA
, const SDLoc
&DL
,
16902 const ISD::InputArg
&In
,
16903 const RISCVTargetLowering
&TLI
) {
16904 MachineFunction
&MF
= DAG
.getMachineFunction();
16905 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
16906 EVT LocVT
= VA
.getLocVT();
16908 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(LocVT
.getSimpleVT());
16909 Register VReg
= RegInfo
.createVirtualRegister(RC
);
16910 RegInfo
.addLiveIn(VA
.getLocReg(), VReg
);
16911 Val
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
16913 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
16914 if (In
.isOrigArg()) {
16915 Argument
*OrigArg
= MF
.getFunction().getArg(In
.getOrigArgIndex());
16916 if (OrigArg
->getType()->isIntegerTy()) {
16917 unsigned BitWidth
= OrigArg
->getType()->getIntegerBitWidth();
16918 // An input zero extended from i31 can also be considered sign extended.
16919 if ((BitWidth
<= 32 && In
.Flags
.isSExt()) ||
16920 (BitWidth
< 32 && In
.Flags
.isZExt())) {
16921 RISCVMachineFunctionInfo
*RVFI
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
16922 RVFI
->addSExt32Register(VReg
);
16927 if (VA
.getLocInfo() == CCValAssign::Indirect
)
16930 return convertLocVTToValVT(DAG
, Val
, VA
, DL
, TLI
.getSubtarget());
16933 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, SDValue Val
,
16934 const CCValAssign
&VA
, const SDLoc
&DL
,
16935 const RISCVSubtarget
&Subtarget
) {
16936 EVT LocVT
= VA
.getLocVT();
16938 switch (VA
.getLocInfo()) {
16940 llvm_unreachable("Unexpected CCValAssign::LocInfo");
16941 case CCValAssign::Full
:
16942 if (VA
.getValVT().isFixedLengthVector() && LocVT
.isScalableVector())
16943 Val
= convertToScalableVector(LocVT
, Val
, DAG
, Subtarget
);
16945 case CCValAssign::BCvt
:
16946 if (LocVT
.isInteger() &&
16947 (VA
.getValVT() == MVT::f16
|| VA
.getValVT() == MVT::bf16
)) {
16948 Val
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, LocVT
, Val
);
16949 } else if (LocVT
== MVT::i64
&& VA
.getValVT() == MVT::f32
) {
16950 if (RV64LegalI32
) {
16951 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i32
, Val
);
16952 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Val
);
16954 Val
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Val
);
16957 Val
= DAG
.getNode(ISD::BITCAST
, DL
, LocVT
, Val
);
16964 // The caller is responsible for loading the full value if the argument is
16965 // passed with CCValAssign::Indirect.
16966 static SDValue
unpackFromMemLoc(SelectionDAG
&DAG
, SDValue Chain
,
16967 const CCValAssign
&VA
, const SDLoc
&DL
) {
16968 MachineFunction
&MF
= DAG
.getMachineFunction();
16969 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
16970 EVT LocVT
= VA
.getLocVT();
16971 EVT ValVT
= VA
.getValVT();
16972 EVT PtrVT
= MVT::getIntegerVT(DAG
.getDataLayout().getPointerSizeInBits(0));
16973 if (ValVT
.isScalableVector()) {
16974 // When the value is a scalable vector, we save the pointer which points to
16975 // the scalable vector value in the stack. The ValVT will be the pointer
16976 // type, instead of the scalable vector type.
16979 int FI
= MFI
.CreateFixedObject(ValVT
.getStoreSize(), VA
.getLocMemOffset(),
16980 /*IsImmutable=*/true);
16981 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
16984 ISD::LoadExtType ExtType
;
16985 switch (VA
.getLocInfo()) {
16987 llvm_unreachable("Unexpected CCValAssign::LocInfo");
16988 case CCValAssign::Full
:
16989 case CCValAssign::Indirect
:
16990 case CCValAssign::BCvt
:
16991 ExtType
= ISD::NON_EXTLOAD
;
16994 Val
= DAG
.getExtLoad(
16995 ExtType
, DL
, LocVT
, Chain
, FIN
,
16996 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
), ValVT
);
17000 static SDValue
unpackF64OnRV32DSoftABI(SelectionDAG
&DAG
, SDValue Chain
,
17001 const CCValAssign
&VA
,
17002 const CCValAssign
&HiVA
,
17004 assert(VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
&&
17006 MachineFunction
&MF
= DAG
.getMachineFunction();
17007 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17008 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
17010 assert(VA
.isRegLoc() && "Expected register VA assignment");
17012 Register LoVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
17013 RegInfo
.addLiveIn(VA
.getLocReg(), LoVReg
);
17014 SDValue Lo
= DAG
.getCopyFromReg(Chain
, DL
, LoVReg
, MVT::i32
);
17016 if (HiVA
.isMemLoc()) {
17017 // Second half of f64 is passed on the stack.
17018 int FI
= MFI
.CreateFixedObject(4, HiVA
.getLocMemOffset(),
17019 /*IsImmutable=*/true);
17020 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
17021 Hi
= DAG
.getLoad(MVT::i32
, DL
, Chain
, FIN
,
17022 MachinePointerInfo::getFixedStack(MF
, FI
));
17024 // Second half of f64 is passed in another GPR.
17025 Register HiVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
17026 RegInfo
.addLiveIn(HiVA
.getLocReg(), HiVReg
);
17027 Hi
= DAG
.getCopyFromReg(Chain
, DL
, HiVReg
, MVT::i32
);
17029 return DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
17032 // FastCC has less than 1% performance improvement for some particular
17033 // benchmark. But theoretically, it may has benenfit for some cases.
17034 bool RISCV::CC_RISCV_FastCC(const DataLayout
&DL
, RISCVABI::ABI ABI
,
17035 unsigned ValNo
, MVT ValVT
, MVT LocVT
,
17036 CCValAssign::LocInfo LocInfo
,
17037 ISD::ArgFlagsTy ArgFlags
, CCState
&State
,
17038 bool IsFixed
, bool IsRet
, Type
*OrigTy
,
17039 const RISCVTargetLowering
&TLI
,
17040 std::optional
<unsigned> FirstMaskArgument
) {
17042 // X5 and X6 might be used for save-restore libcall.
17043 static const MCPhysReg GPRList
[] = {
17044 RISCV::X10
, RISCV::X11
, RISCV::X12
, RISCV::X13
, RISCV::X14
,
17045 RISCV::X15
, RISCV::X16
, RISCV::X17
, RISCV::X7
, RISCV::X28
,
17046 RISCV::X29
, RISCV::X30
, RISCV::X31
};
17048 if (LocVT
== MVT::i32
|| LocVT
== MVT::i64
) {
17049 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
17050 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17055 const RISCVSubtarget
&Subtarget
= TLI
.getSubtarget();
17057 if (LocVT
== MVT::f16
&&
17058 (Subtarget
.hasStdExtZfh() || Subtarget
.hasStdExtZfhmin())) {
17059 static const MCPhysReg FPR16List
[] = {
17060 RISCV::F10_H
, RISCV::F11_H
, RISCV::F12_H
, RISCV::F13_H
, RISCV::F14_H
,
17061 RISCV::F15_H
, RISCV::F16_H
, RISCV::F17_H
, RISCV::F0_H
, RISCV::F1_H
,
17062 RISCV::F2_H
, RISCV::F3_H
, RISCV::F4_H
, RISCV::F5_H
, RISCV::F6_H
,
17063 RISCV::F7_H
, RISCV::F28_H
, RISCV::F29_H
, RISCV::F30_H
, RISCV::F31_H
};
17064 if (unsigned Reg
= State
.AllocateReg(FPR16List
)) {
17065 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17070 if (LocVT
== MVT::f32
&& Subtarget
.hasStdExtF()) {
17071 static const MCPhysReg FPR32List
[] = {
17072 RISCV::F10_F
, RISCV::F11_F
, RISCV::F12_F
, RISCV::F13_F
, RISCV::F14_F
,
17073 RISCV::F15_F
, RISCV::F16_F
, RISCV::F17_F
, RISCV::F0_F
, RISCV::F1_F
,
17074 RISCV::F2_F
, RISCV::F3_F
, RISCV::F4_F
, RISCV::F5_F
, RISCV::F6_F
,
17075 RISCV::F7_F
, RISCV::F28_F
, RISCV::F29_F
, RISCV::F30_F
, RISCV::F31_F
};
17076 if (unsigned Reg
= State
.AllocateReg(FPR32List
)) {
17077 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17082 if (LocVT
== MVT::f64
&& Subtarget
.hasStdExtD()) {
17083 static const MCPhysReg FPR64List
[] = {
17084 RISCV::F10_D
, RISCV::F11_D
, RISCV::F12_D
, RISCV::F13_D
, RISCV::F14_D
,
17085 RISCV::F15_D
, RISCV::F16_D
, RISCV::F17_D
, RISCV::F0_D
, RISCV::F1_D
,
17086 RISCV::F2_D
, RISCV::F3_D
, RISCV::F4_D
, RISCV::F5_D
, RISCV::F6_D
,
17087 RISCV::F7_D
, RISCV::F28_D
, RISCV::F29_D
, RISCV::F30_D
, RISCV::F31_D
};
17088 if (unsigned Reg
= State
.AllocateReg(FPR64List
)) {
17089 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17094 // Check if there is an available GPR before hitting the stack.
17095 if ((LocVT
== MVT::f16
&&
17096 (Subtarget
.hasStdExtZhinx() || Subtarget
.hasStdExtZhinxmin())) ||
17097 (LocVT
== MVT::f32
&& Subtarget
.hasStdExtZfinx()) ||
17098 (LocVT
== MVT::f64
&& Subtarget
.is64Bit() &&
17099 Subtarget
.hasStdExtZdinx())) {
17100 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
17101 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17106 if (LocVT
== MVT::f16
) {
17107 unsigned Offset2
= State
.AllocateStack(2, Align(2));
17108 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset2
, LocVT
, LocInfo
));
17112 if (LocVT
== MVT::i32
|| LocVT
== MVT::f32
) {
17113 unsigned Offset4
= State
.AllocateStack(4, Align(4));
17114 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset4
, LocVT
, LocInfo
));
17118 if (LocVT
== MVT::i64
|| LocVT
== MVT::f64
) {
17119 unsigned Offset5
= State
.AllocateStack(8, Align(8));
17120 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset5
, LocVT
, LocInfo
));
17124 if (LocVT
.isVector()) {
17126 allocateRVVReg(ValVT
, ValNo
, FirstMaskArgument
, State
, TLI
)) {
17127 // Fixed-length vectors are located in the corresponding scalable-vector
17128 // container types.
17129 if (ValVT
.isFixedLengthVector())
17130 LocVT
= TLI
.getContainerForFixedLengthVector(LocVT
);
17131 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17133 // Try and pass the address via a "fast" GPR.
17134 if (unsigned GPRReg
= State
.AllocateReg(GPRList
)) {
17135 LocInfo
= CCValAssign::Indirect
;
17136 LocVT
= TLI
.getSubtarget().getXLenVT();
17137 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, GPRReg
, LocVT
, LocInfo
));
17138 } else if (ValVT
.isFixedLengthVector()) {
17140 MaybeAlign(ValVT
.getScalarSizeInBits() / 8).valueOrOne();
17141 unsigned StackOffset
=
17142 State
.AllocateStack(ValVT
.getStoreSize(), StackAlign
);
17144 CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
17146 // Can't pass scalable vectors on the stack.
17154 return true; // CC didn't match.
17157 bool RISCV::CC_RISCV_GHC(unsigned ValNo
, MVT ValVT
, MVT LocVT
,
17158 CCValAssign::LocInfo LocInfo
,
17159 ISD::ArgFlagsTy ArgFlags
, CCState
&State
) {
17160 if (ArgFlags
.isNest()) {
17161 report_fatal_error(
17162 "Attribute 'nest' is not supported in GHC calling convention");
17165 static const MCPhysReg GPRList
[] = {
17166 RISCV::X9
, RISCV::X18
, RISCV::X19
, RISCV::X20
, RISCV::X21
, RISCV::X22
,
17167 RISCV::X23
, RISCV::X24
, RISCV::X25
, RISCV::X26
, RISCV::X27
};
17169 if (LocVT
== MVT::i32
|| LocVT
== MVT::i64
) {
17170 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
17171 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
17172 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
17173 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17178 const RISCVSubtarget
&Subtarget
=
17179 State
.getMachineFunction().getSubtarget
<RISCVSubtarget
>();
17181 if (LocVT
== MVT::f32
&& Subtarget
.hasStdExtF()) {
17182 // Pass in STG registers: F1, ..., F6
17184 static const MCPhysReg FPR32List
[] = {RISCV::F8_F
, RISCV::F9_F
,
17185 RISCV::F18_F
, RISCV::F19_F
,
17186 RISCV::F20_F
, RISCV::F21_F
};
17187 if (unsigned Reg
= State
.AllocateReg(FPR32List
)) {
17188 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17193 if (LocVT
== MVT::f64
&& Subtarget
.hasStdExtD()) {
17194 // Pass in STG registers: D1, ..., D6
17196 static const MCPhysReg FPR64List
[] = {RISCV::F22_D
, RISCV::F23_D
,
17197 RISCV::F24_D
, RISCV::F25_D
,
17198 RISCV::F26_D
, RISCV::F27_D
};
17199 if (unsigned Reg
= State
.AllocateReg(FPR64List
)) {
17200 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17205 if ((LocVT
== MVT::f32
&& Subtarget
.hasStdExtZfinx()) ||
17206 (LocVT
== MVT::f64
&& Subtarget
.hasStdExtZdinx() &&
17207 Subtarget
.is64Bit())) {
17208 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
17209 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17214 report_fatal_error("No registers left in GHC calling convention");
17218 // Transform physical registers into virtual registers.
17219 SDValue
RISCVTargetLowering::LowerFormalArguments(
17220 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
17221 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
17222 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
17224 MachineFunction
&MF
= DAG
.getMachineFunction();
17226 switch (CallConv
) {
17228 report_fatal_error("Unsupported calling convention");
17229 case CallingConv::C
:
17230 case CallingConv::Fast
:
17231 case CallingConv::SPIR_KERNEL
:
17233 case CallingConv::GHC
:
17234 if (!Subtarget
.hasStdExtFOrZfinx() || !Subtarget
.hasStdExtDOrZdinx())
17235 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
17236 "(Zdinx/D) instruction set extensions");
17239 const Function
&Func
= MF
.getFunction();
17240 if (Func
.hasFnAttribute("interrupt")) {
17241 if (!Func
.arg_empty())
17242 report_fatal_error(
17243 "Functions with the interrupt attribute cannot have arguments!");
17246 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
17248 if (!(Kind
== "user" || Kind
== "supervisor" || Kind
== "machine"))
17249 report_fatal_error(
17250 "Function interrupt attribute argument not supported!");
17253 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
17254 MVT XLenVT
= Subtarget
.getXLenVT();
17255 unsigned XLenInBytes
= Subtarget
.getXLen() / 8;
17256 // Used with vargs to acumulate store chains.
17257 std::vector
<SDValue
> OutChains
;
17259 // Assign locations to all of the incoming arguments.
17260 SmallVector
<CCValAssign
, 16> ArgLocs
;
17261 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
17263 if (CallConv
== CallingConv::GHC
)
17264 CCInfo
.AnalyzeFormalArguments(Ins
, RISCV::CC_RISCV_GHC
);
17266 analyzeInputArgs(MF
, CCInfo
, Ins
, /*IsRet=*/false,
17267 CallConv
== CallingConv::Fast
? RISCV::CC_RISCV_FastCC
17268 : RISCV::CC_RISCV
);
17270 for (unsigned i
= 0, e
= ArgLocs
.size(), InsIdx
= 0; i
!= e
; ++i
, ++InsIdx
) {
17271 CCValAssign
&VA
= ArgLocs
[i
];
17273 // Passing f64 on RV32D with a soft float ABI must be handled as a special
17275 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
17276 assert(VA
.needsCustom());
17277 ArgValue
= unpackF64OnRV32DSoftABI(DAG
, Chain
, VA
, ArgLocs
[++i
], DL
);
17278 } else if (VA
.isRegLoc())
17279 ArgValue
= unpackFromRegLoc(DAG
, Chain
, VA
, DL
, Ins
[InsIdx
], *this);
17281 ArgValue
= unpackFromMemLoc(DAG
, Chain
, VA
, DL
);
17283 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
17284 // If the original argument was split and passed by reference (e.g. i128
17285 // on RV32), we need to load all parts of it here (using the same
17286 // address). Vectors may be partly split to registers and partly to the
17287 // stack, in which case the base address is partly offset and subsequent
17288 // stores are relative to that.
17289 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
17290 MachinePointerInfo()));
17291 unsigned ArgIndex
= Ins
[InsIdx
].OrigArgIndex
;
17292 unsigned ArgPartOffset
= Ins
[InsIdx
].PartOffset
;
17293 assert(VA
.getValVT().isVector() || ArgPartOffset
== 0);
17294 while (i
+ 1 != e
&& Ins
[InsIdx
+ 1].OrigArgIndex
== ArgIndex
) {
17295 CCValAssign
&PartVA
= ArgLocs
[i
+ 1];
17296 unsigned PartOffset
= Ins
[InsIdx
+ 1].PartOffset
- ArgPartOffset
;
17297 SDValue Offset
= DAG
.getIntPtrConstant(PartOffset
, DL
);
17298 if (PartVA
.getValVT().isScalableVector())
17299 Offset
= DAG
.getNode(ISD::VSCALE
, DL
, XLenVT
, Offset
);
17300 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
, Offset
);
17301 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
17302 MachinePointerInfo()));
17308 InVals
.push_back(ArgValue
);
17311 if (any_of(ArgLocs
,
17312 [](CCValAssign
&VA
) { return VA
.getLocVT().isScalableVector(); }))
17313 MF
.getInfo
<RISCVMachineFunctionInfo
>()->setIsVectorCall();
17316 ArrayRef
<MCPhysReg
> ArgRegs
= ArrayRef(ArgGPRs
);
17317 unsigned Idx
= CCInfo
.getFirstUnallocated(ArgRegs
);
17318 const TargetRegisterClass
*RC
= &RISCV::GPRRegClass
;
17319 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17320 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
17321 RISCVMachineFunctionInfo
*RVFI
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
17323 // Offset of the first variable argument from stack pointer, and size of
17324 // the vararg save area. For now, the varargs save area is either zero or
17325 // large enough to hold a0-a7.
17326 int VaArgOffset
, VarArgsSaveSize
;
17328 // If all registers are allocated, then all varargs must be passed on the
17329 // stack and we don't need to save any argregs.
17330 if (ArgRegs
.size() == Idx
) {
17331 VaArgOffset
= CCInfo
.getStackSize();
17332 VarArgsSaveSize
= 0;
17334 VarArgsSaveSize
= XLenInBytes
* (ArgRegs
.size() - Idx
);
17335 VaArgOffset
= -VarArgsSaveSize
;
17338 // Record the frame index of the first variable argument
17339 // which is a value necessary to VASTART.
17340 int FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
, true);
17341 RVFI
->setVarArgsFrameIndex(FI
);
17343 // If saving an odd number of registers then create an extra stack slot to
17344 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
17345 // offsets to even-numbered registered remain 2*XLEN-aligned.
17347 MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
- (int)XLenInBytes
, true);
17348 VarArgsSaveSize
+= XLenInBytes
;
17351 // Copy the integer registers that may have been used for passing varargs
17352 // to the vararg save area.
17353 for (unsigned I
= Idx
; I
< ArgRegs
.size();
17354 ++I
, VaArgOffset
+= XLenInBytes
) {
17355 const Register Reg
= RegInfo
.createVirtualRegister(RC
);
17356 RegInfo
.addLiveIn(ArgRegs
[I
], Reg
);
17357 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, XLenVT
);
17358 FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
, true);
17359 SDValue PtrOff
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
17360 SDValue Store
= DAG
.getStore(Chain
, DL
, ArgValue
, PtrOff
,
17361 MachinePointerInfo::getFixedStack(MF
, FI
));
17362 cast
<StoreSDNode
>(Store
.getNode())
17364 ->setValue((Value
*)nullptr);
17365 OutChains
.push_back(Store
);
17367 RVFI
->setVarArgsSaveSize(VarArgsSaveSize
);
17370 // All stores are grouped in one node to allow the matching between
17371 // the size of Ins and InVals. This only happens for vararg functions.
17372 if (!OutChains
.empty()) {
17373 OutChains
.push_back(Chain
);
17374 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
17380 /// isEligibleForTailCallOptimization - Check whether the call is eligible
17381 /// for tail call optimization.
17382 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
17383 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
17384 CCState
&CCInfo
, CallLoweringInfo
&CLI
, MachineFunction
&MF
,
17385 const SmallVector
<CCValAssign
, 16> &ArgLocs
) const {
17387 auto CalleeCC
= CLI
.CallConv
;
17388 auto &Outs
= CLI
.Outs
;
17389 auto &Caller
= MF
.getFunction();
17390 auto CallerCC
= Caller
.getCallingConv();
17392 // Exception-handling functions need a special set of instructions to
17393 // indicate a return to the hardware. Tail-calling another function would
17394 // probably break this.
17395 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
17396 // should be expanded as new function attributes are introduced.
17397 if (Caller
.hasFnAttribute("interrupt"))
17400 // Do not tail call opt if the stack is used to pass parameters.
17401 if (CCInfo
.getStackSize() != 0)
17404 // Do not tail call opt if any parameters need to be passed indirectly.
17405 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
17406 // passed indirectly. So the address of the value will be passed in a
17407 // register, or if not available, then the address is put on the stack. In
17408 // order to pass indirectly, space on the stack often needs to be allocated
17409 // in order to store the value. In this case the CCInfo.getNextStackOffset()
17410 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
17411 // are passed CCValAssign::Indirect.
17412 for (auto &VA
: ArgLocs
)
17413 if (VA
.getLocInfo() == CCValAssign::Indirect
)
17416 // Do not tail call opt if either caller or callee uses struct return
17418 auto IsCallerStructRet
= Caller
.hasStructRetAttr();
17419 auto IsCalleeStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
17420 if (IsCallerStructRet
|| IsCalleeStructRet
)
17423 // The callee has to preserve all registers the caller needs to preserve.
17424 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
17425 const uint32_t *CallerPreserved
= TRI
->getCallPreservedMask(MF
, CallerCC
);
17426 if (CalleeCC
!= CallerCC
) {
17427 const uint32_t *CalleePreserved
= TRI
->getCallPreservedMask(MF
, CalleeCC
);
17428 if (!TRI
->regmaskSubsetEqual(CallerPreserved
, CalleePreserved
))
17432 // Byval parameters hand the function a pointer directly into the stack area
17433 // we want to reuse during a tail call. Working around this *is* possible
17434 // but less efficient and uglier in LowerCall.
17435 for (auto &Arg
: Outs
)
17436 if (Arg
.Flags
.isByVal())
17442 static Align
getPrefTypeAlign(EVT VT
, SelectionDAG
&DAG
) {
17443 return DAG
.getDataLayout().getPrefTypeAlign(
17444 VT
.getTypeForEVT(*DAG
.getContext()));
17447 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
17448 // and output parameter nodes.
17449 SDValue
RISCVTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
17450 SmallVectorImpl
<SDValue
> &InVals
) const {
17451 SelectionDAG
&DAG
= CLI
.DAG
;
17452 SDLoc
&DL
= CLI
.DL
;
17453 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
17454 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
17455 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
17456 SDValue Chain
= CLI
.Chain
;
17457 SDValue Callee
= CLI
.Callee
;
17458 bool &IsTailCall
= CLI
.IsTailCall
;
17459 CallingConv::ID CallConv
= CLI
.CallConv
;
17460 bool IsVarArg
= CLI
.IsVarArg
;
17461 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
17462 MVT XLenVT
= Subtarget
.getXLenVT();
17464 MachineFunction
&MF
= DAG
.getMachineFunction();
17466 // Analyze the operands of the call, assigning locations to each operand.
17467 SmallVector
<CCValAssign
, 16> ArgLocs
;
17468 CCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
17470 if (CallConv
== CallingConv::GHC
)
17471 ArgCCInfo
.AnalyzeCallOperands(Outs
, RISCV::CC_RISCV_GHC
);
17473 analyzeOutputArgs(MF
, ArgCCInfo
, Outs
, /*IsRet=*/false, &CLI
,
17474 CallConv
== CallingConv::Fast
? RISCV::CC_RISCV_FastCC
17475 : RISCV::CC_RISCV
);
17477 // Check if it's really possible to do a tail call.
17479 IsTailCall
= isEligibleForTailCallOptimization(ArgCCInfo
, CLI
, MF
, ArgLocs
);
17483 else if (CLI
.CB
&& CLI
.CB
->isMustTailCall())
17484 report_fatal_error("failed to perform tail call elimination on a call "
17485 "site marked musttail");
17487 // Get a count of how many bytes are to be pushed on the stack.
17488 unsigned NumBytes
= ArgCCInfo
.getStackSize();
17490 // Create local copies for byval args
17491 SmallVector
<SDValue
, 8> ByValArgs
;
17492 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
17493 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
17494 if (!Flags
.isByVal())
17497 SDValue Arg
= OutVals
[i
];
17498 unsigned Size
= Flags
.getByValSize();
17499 Align Alignment
= Flags
.getNonZeroByValAlign();
17502 MF
.getFrameInfo().CreateStackObject(Size
, Alignment
, /*isSS=*/false);
17503 SDValue FIPtr
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
17504 SDValue SizeNode
= DAG
.getConstant(Size
, DL
, XLenVT
);
17506 Chain
= DAG
.getMemcpy(Chain
, DL
, FIPtr
, Arg
, SizeNode
, Alignment
,
17507 /*IsVolatile=*/false,
17508 /*AlwaysInline=*/false, IsTailCall
,
17509 MachinePointerInfo(), MachinePointerInfo());
17510 ByValArgs
.push_back(FIPtr
);
17514 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, CLI
.DL
);
17516 // Copy argument values to their designated locations.
17517 SmallVector
<std::pair
<Register
, SDValue
>, 8> RegsToPass
;
17518 SmallVector
<SDValue
, 8> MemOpChains
;
17520 for (unsigned i
= 0, j
= 0, e
= ArgLocs
.size(), OutIdx
= 0; i
!= e
;
17522 CCValAssign
&VA
= ArgLocs
[i
];
17523 SDValue ArgValue
= OutVals
[OutIdx
];
17524 ISD::ArgFlagsTy Flags
= Outs
[OutIdx
].Flags
;
17526 // Handle passing f64 on RV32D with a soft float ABI as a special case.
17527 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
17528 assert(VA
.isRegLoc() && "Expected register VA assignment");
17529 assert(VA
.needsCustom());
17530 SDValue SplitF64
= DAG
.getNode(
17531 RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
), ArgValue
);
17532 SDValue Lo
= SplitF64
.getValue(0);
17533 SDValue Hi
= SplitF64
.getValue(1);
17535 Register RegLo
= VA
.getLocReg();
17536 RegsToPass
.push_back(std::make_pair(RegLo
, Lo
));
17538 // Get the CCValAssign for the Hi part.
17539 CCValAssign
&HiVA
= ArgLocs
[++i
];
17541 if (HiVA
.isMemLoc()) {
17542 // Second half of f64 is passed on the stack.
17543 if (!StackPtr
.getNode())
17544 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
17546 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
17547 DAG
.getIntPtrConstant(HiVA
.getLocMemOffset(), DL
));
17549 MemOpChains
.push_back(
17550 DAG
.getStore(Chain
, DL
, Hi
, Address
, MachinePointerInfo()));
17552 // Second half of f64 is passed in another GPR.
17553 Register RegHigh
= HiVA
.getLocReg();
17554 RegsToPass
.push_back(std::make_pair(RegHigh
, Hi
));
17559 // Promote the value if needed.
17560 // For now, only handle fully promoted and indirect arguments.
17561 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
17562 // Store the argument in a stack slot and pass its address.
17564 std::max(getPrefTypeAlign(Outs
[OutIdx
].ArgVT
, DAG
),
17565 getPrefTypeAlign(ArgValue
.getValueType(), DAG
));
17566 TypeSize StoredSize
= ArgValue
.getValueType().getStoreSize();
17567 // If the original argument was split (e.g. i128), we need
17568 // to store the required parts of it here (and pass just one address).
17569 // Vectors may be partly split to registers and partly to the stack, in
17570 // which case the base address is partly offset and subsequent stores are
17571 // relative to that.
17572 unsigned ArgIndex
= Outs
[OutIdx
].OrigArgIndex
;
17573 unsigned ArgPartOffset
= Outs
[OutIdx
].PartOffset
;
17574 assert(VA
.getValVT().isVector() || ArgPartOffset
== 0);
17575 // Calculate the total size to store. We don't have access to what we're
17576 // actually storing other than performing the loop and collecting the
17578 SmallVector
<std::pair
<SDValue
, SDValue
>> Parts
;
17579 while (i
+ 1 != e
&& Outs
[OutIdx
+ 1].OrigArgIndex
== ArgIndex
) {
17580 SDValue PartValue
= OutVals
[OutIdx
+ 1];
17581 unsigned PartOffset
= Outs
[OutIdx
+ 1].PartOffset
- ArgPartOffset
;
17582 SDValue Offset
= DAG
.getIntPtrConstant(PartOffset
, DL
);
17583 EVT PartVT
= PartValue
.getValueType();
17584 if (PartVT
.isScalableVector())
17585 Offset
= DAG
.getNode(ISD::VSCALE
, DL
, XLenVT
, Offset
);
17586 StoredSize
+= PartVT
.getStoreSize();
17587 StackAlign
= std::max(StackAlign
, getPrefTypeAlign(PartVT
, DAG
));
17588 Parts
.push_back(std::make_pair(PartValue
, Offset
));
17592 SDValue SpillSlot
= DAG
.CreateStackTemporary(StoredSize
, StackAlign
);
17593 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
17594 MemOpChains
.push_back(
17595 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
17596 MachinePointerInfo::getFixedStack(MF
, FI
)));
17597 for (const auto &Part
: Parts
) {
17598 SDValue PartValue
= Part
.first
;
17599 SDValue PartOffset
= Part
.second
;
17601 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
, PartOffset
);
17602 MemOpChains
.push_back(
17603 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
17604 MachinePointerInfo::getFixedStack(MF
, FI
)));
17606 ArgValue
= SpillSlot
;
17608 ArgValue
= convertValVTToLocVT(DAG
, ArgValue
, VA
, DL
, Subtarget
);
17611 // Use local copy if it is a byval arg.
17612 if (Flags
.isByVal())
17613 ArgValue
= ByValArgs
[j
++];
17615 if (VA
.isRegLoc()) {
17616 // Queue up the argument copies and emit them at the end.
17617 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
17619 assert(VA
.isMemLoc() && "Argument not register or memory");
17620 assert(!IsTailCall
&& "Tail call not allowed if stack is used "
17621 "for passing parameters");
17623 // Work out the address of the stack slot.
17624 if (!StackPtr
.getNode())
17625 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
17627 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
17628 DAG
.getIntPtrConstant(VA
.getLocMemOffset(), DL
));
17631 MemOpChains
.push_back(
17632 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
17636 // Join the stores, which are independent of one another.
17637 if (!MemOpChains
.empty())
17638 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
17642 // Build a sequence of copy-to-reg nodes, chained and glued together.
17643 for (auto &Reg
: RegsToPass
) {
17644 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
.first
, Reg
.second
, Glue
);
17645 Glue
= Chain
.getValue(1);
17648 // Validate that none of the argument registers have been marked as
17649 // reserved, if so report an error. Do the same for the return address if this
17650 // is not a tailcall.
17651 validateCCReservedRegs(RegsToPass
, MF
);
17653 MF
.getSubtarget
<RISCVSubtarget
>().isRegisterReservedByUser(RISCV::X1
))
17654 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
17656 "Return address register required, but has been reserved."});
17658 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
17659 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
17660 // split it and then direct call can be matched by PseudoCALL.
17661 if (GlobalAddressSDNode
*S
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
17662 const GlobalValue
*GV
= S
->getGlobal();
17664 unsigned OpFlags
= RISCVII::MO_CALL
;
17665 if (!getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
))
17666 OpFlags
= RISCVII::MO_PLT
;
17668 Callee
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, OpFlags
);
17669 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
17670 unsigned OpFlags
= RISCVII::MO_CALL
;
17672 if (!getTargetMachine().shouldAssumeDSOLocal(*MF
.getFunction().getParent(),
17674 OpFlags
= RISCVII::MO_PLT
;
17676 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, OpFlags
);
17679 // The first call operand is the chain and the second is the target address.
17680 SmallVector
<SDValue
, 8> Ops
;
17681 Ops
.push_back(Chain
);
17682 Ops
.push_back(Callee
);
17684 // Add argument registers to the end of the list so that they are
17685 // known live into the call.
17686 for (auto &Reg
: RegsToPass
)
17687 Ops
.push_back(DAG
.getRegister(Reg
.first
, Reg
.second
.getValueType()));
17690 // Add a register mask operand representing the call-preserved registers.
17691 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
17692 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
17693 assert(Mask
&& "Missing call preserved mask for calling convention");
17694 Ops
.push_back(DAG
.getRegisterMask(Mask
));
17697 // Glue the call to the argument copies, if any.
17698 if (Glue
.getNode())
17699 Ops
.push_back(Glue
);
17701 assert((!CLI
.CFIType
|| CLI
.CB
->isIndirectCall()) &&
17702 "Unexpected CFI type for a direct call");
17705 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
17708 MF
.getFrameInfo().setHasTailCall();
17709 SDValue Ret
= DAG
.getNode(RISCVISD::TAIL
, DL
, NodeTys
, Ops
);
17711 Ret
.getNode()->setCFIType(CLI
.CFIType
->getZExtValue());
17712 DAG
.addNoMergeSiteInfo(Ret
.getNode(), CLI
.NoMerge
);
17716 Chain
= DAG
.getNode(RISCVISD::CALL
, DL
, NodeTys
, Ops
);
17718 Chain
.getNode()->setCFIType(CLI
.CFIType
->getZExtValue());
17719 DAG
.addNoMergeSiteInfo(Chain
.getNode(), CLI
.NoMerge
);
17720 Glue
= Chain
.getValue(1);
17722 // Mark the end of the call, which is glued to the call itself.
17723 Chain
= DAG
.getCALLSEQ_END(Chain
, NumBytes
, 0, Glue
, DL
);
17724 Glue
= Chain
.getValue(1);
17726 // Assign locations to each value returned by this call.
17727 SmallVector
<CCValAssign
, 16> RVLocs
;
17728 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, *DAG
.getContext());
17729 analyzeInputArgs(MF
, RetCCInfo
, Ins
, /*IsRet=*/true, RISCV::CC_RISCV
);
17731 // Copy all of the result registers out of their specified physreg.
17732 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
17733 auto &VA
= RVLocs
[i
];
17734 // Copy the value out
17736 DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(), VA
.getLocVT(), Glue
);
17737 // Glue the RetValue to the end of the call sequence
17738 Chain
= RetValue
.getValue(1);
17739 Glue
= RetValue
.getValue(2);
17741 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
17742 assert(VA
.needsCustom());
17743 SDValue RetValue2
= DAG
.getCopyFromReg(Chain
, DL
, RVLocs
[++i
].getLocReg(),
17745 Chain
= RetValue2
.getValue(1);
17746 Glue
= RetValue2
.getValue(2);
17747 RetValue
= DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, RetValue
,
17751 RetValue
= convertLocVTToValVT(DAG
, RetValue
, VA
, DL
, Subtarget
);
17753 InVals
.push_back(RetValue
);
17759 bool RISCVTargetLowering::CanLowerReturn(
17760 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
17761 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, LLVMContext
&Context
) const {
17762 SmallVector
<CCValAssign
, 16> RVLocs
;
17763 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
17765 std::optional
<unsigned> FirstMaskArgument
;
17766 if (Subtarget
.hasVInstructions())
17767 FirstMaskArgument
= preAssignMask(Outs
);
17769 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
17770 MVT VT
= Outs
[i
].VT
;
17771 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
17772 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
17773 if (RISCV::CC_RISCV(MF
.getDataLayout(), ABI
, i
, VT
, VT
, CCValAssign::Full
,
17774 ArgFlags
, CCInfo
, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
17775 *this, FirstMaskArgument
))
17782 RISCVTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
17784 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
17785 const SmallVectorImpl
<SDValue
> &OutVals
,
17786 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
17787 MachineFunction
&MF
= DAG
.getMachineFunction();
17788 const RISCVSubtarget
&STI
= MF
.getSubtarget
<RISCVSubtarget
>();
17790 // Stores the assignment of the return value to a location.
17791 SmallVector
<CCValAssign
, 16> RVLocs
;
17793 // Info about the registers and stack slot.
17794 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
17795 *DAG
.getContext());
17797 analyzeOutputArgs(DAG
.getMachineFunction(), CCInfo
, Outs
, /*IsRet=*/true,
17798 nullptr, RISCV::CC_RISCV
);
17800 if (CallConv
== CallingConv::GHC
&& !RVLocs
.empty())
17801 report_fatal_error("GHC functions return void only");
17804 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
17806 // Copy the result values into the output registers.
17807 for (unsigned i
= 0, e
= RVLocs
.size(), OutIdx
= 0; i
< e
; ++i
, ++OutIdx
) {
17808 SDValue Val
= OutVals
[OutIdx
];
17809 CCValAssign
&VA
= RVLocs
[i
];
17810 assert(VA
.isRegLoc() && "Can only return in registers!");
17812 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
17813 // Handle returning f64 on RV32D with a soft float ABI.
17814 assert(VA
.isRegLoc() && "Expected return via registers");
17815 assert(VA
.needsCustom());
17816 SDValue SplitF64
= DAG
.getNode(RISCVISD::SplitF64
, DL
,
17817 DAG
.getVTList(MVT::i32
, MVT::i32
), Val
);
17818 SDValue Lo
= SplitF64
.getValue(0);
17819 SDValue Hi
= SplitF64
.getValue(1);
17820 Register RegLo
= VA
.getLocReg();
17821 Register RegHi
= RVLocs
[++i
].getLocReg();
17823 if (STI
.isRegisterReservedByUser(RegLo
) ||
17824 STI
.isRegisterReservedByUser(RegHi
))
17825 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
17827 "Return value register required, but has been reserved."});
17829 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegLo
, Lo
, Glue
);
17830 Glue
= Chain
.getValue(1);
17831 RetOps
.push_back(DAG
.getRegister(RegLo
, MVT::i32
));
17832 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegHi
, Hi
, Glue
);
17833 Glue
= Chain
.getValue(1);
17834 RetOps
.push_back(DAG
.getRegister(RegHi
, MVT::i32
));
17836 // Handle a 'normal' return.
17837 Val
= convertValVTToLocVT(DAG
, Val
, VA
, DL
, Subtarget
);
17838 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), Val
, Glue
);
17840 if (STI
.isRegisterReservedByUser(VA
.getLocReg()))
17841 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
17843 "Return value register required, but has been reserved."});
17845 // Guarantee that all emitted copies are stuck together.
17846 Glue
= Chain
.getValue(1);
17847 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
17851 RetOps
[0] = Chain
; // Update chain.
17853 // Add the glue node if we have it.
17854 if (Glue
.getNode()) {
17855 RetOps
.push_back(Glue
);
17859 [](CCValAssign
&VA
) { return VA
.getLocVT().isScalableVector(); }))
17860 MF
.getInfo
<RISCVMachineFunctionInfo
>()->setIsVectorCall();
17862 unsigned RetOpc
= RISCVISD::RET_GLUE
;
17863 // Interrupt service routines use different return instructions.
17864 const Function
&Func
= DAG
.getMachineFunction().getFunction();
17865 if (Func
.hasFnAttribute("interrupt")) {
17866 if (!Func
.getReturnType()->isVoidTy())
17867 report_fatal_error(
17868 "Functions with the interrupt attribute must have void return type!");
17870 MachineFunction
&MF
= DAG
.getMachineFunction();
17872 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
17874 if (Kind
== "supervisor")
17875 RetOpc
= RISCVISD::SRET_GLUE
;
17877 RetOpc
= RISCVISD::MRET_GLUE
;
17880 return DAG
.getNode(RetOpc
, DL
, MVT::Other
, RetOps
);
17883 void RISCVTargetLowering::validateCCReservedRegs(
17884 const SmallVectorImpl
<std::pair
<llvm::Register
, llvm::SDValue
>> &Regs
,
17885 MachineFunction
&MF
) const {
17886 const Function
&F
= MF
.getFunction();
17887 const RISCVSubtarget
&STI
= MF
.getSubtarget
<RISCVSubtarget
>();
17889 if (llvm::any_of(Regs
, [&STI
](auto Reg
) {
17890 return STI
.isRegisterReservedByUser(Reg
.first
);
17892 F
.getContext().diagnose(DiagnosticInfoUnsupported
{
17893 F
, "Argument register required, but has been reserved."});
17896 // Check if the result of the node is only used as a return value, as
17897 // otherwise we can't perform a tail-call.
17898 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode
*N
, SDValue
&Chain
) const {
17899 if (N
->getNumValues() != 1)
17901 if (!N
->hasNUsesOfValue(1, 0))
17904 SDNode
*Copy
= *N
->use_begin();
17906 if (Copy
->getOpcode() == ISD::BITCAST
) {
17907 return isUsedByReturnOnly(Copy
, Chain
);
17910 // TODO: Handle additional opcodes in order to support tail-calling libcalls
17911 // with soft float ABIs.
17912 if (Copy
->getOpcode() != ISD::CopyToReg
) {
17916 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
17917 // isn't safe to perform a tail call.
17918 if (Copy
->getOperand(Copy
->getNumOperands() - 1).getValueType() == MVT::Glue
)
17921 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
17922 bool HasRet
= false;
17923 for (SDNode
*Node
: Copy
->uses()) {
17924 if (Node
->getOpcode() != RISCVISD::RET_GLUE
)
17931 Chain
= Copy
->getOperand(0);
17935 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
17936 return CI
->isTailCall();
17939 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode
) const {
17940 #define NODE_NAME_CASE(NODE) \
17941 case RISCVISD::NODE: \
17942 return "RISCVISD::" #NODE;
17943 // clang-format off
17944 switch ((RISCVISD::NodeType
)Opcode
) {
17945 case RISCVISD::FIRST_NUMBER
:
17947 NODE_NAME_CASE(RET_GLUE
)
17948 NODE_NAME_CASE(SRET_GLUE
)
17949 NODE_NAME_CASE(MRET_GLUE
)
17950 NODE_NAME_CASE(CALL
)
17951 NODE_NAME_CASE(SELECT_CC
)
17952 NODE_NAME_CASE(BR_CC
)
17953 NODE_NAME_CASE(BuildPairF64
)
17954 NODE_NAME_CASE(SplitF64
)
17955 NODE_NAME_CASE(TAIL
)
17956 NODE_NAME_CASE(ADD_LO
)
17958 NODE_NAME_CASE(LLA
)
17959 NODE_NAME_CASE(ADD_TPREL
)
17960 NODE_NAME_CASE(MULHSU
)
17961 NODE_NAME_CASE(SLLW
)
17962 NODE_NAME_CASE(SRAW
)
17963 NODE_NAME_CASE(SRLW
)
17964 NODE_NAME_CASE(DIVW
)
17965 NODE_NAME_CASE(DIVUW
)
17966 NODE_NAME_CASE(REMUW
)
17967 NODE_NAME_CASE(ROLW
)
17968 NODE_NAME_CASE(RORW
)
17969 NODE_NAME_CASE(CLZW
)
17970 NODE_NAME_CASE(CTZW
)
17971 NODE_NAME_CASE(ABSW
)
17972 NODE_NAME_CASE(FMV_H_X
)
17973 NODE_NAME_CASE(FMV_X_ANYEXTH
)
17974 NODE_NAME_CASE(FMV_X_SIGNEXTH
)
17975 NODE_NAME_CASE(FMV_W_X_RV64
)
17976 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64
)
17977 NODE_NAME_CASE(FCVT_X
)
17978 NODE_NAME_CASE(FCVT_XU
)
17979 NODE_NAME_CASE(FCVT_W_RV64
)
17980 NODE_NAME_CASE(FCVT_WU_RV64
)
17981 NODE_NAME_CASE(STRICT_FCVT_W_RV64
)
17982 NODE_NAME_CASE(STRICT_FCVT_WU_RV64
)
17983 NODE_NAME_CASE(FP_ROUND_BF16
)
17984 NODE_NAME_CASE(FP_EXTEND_BF16
)
17985 NODE_NAME_CASE(FROUND
)
17986 NODE_NAME_CASE(FPCLASS
)
17987 NODE_NAME_CASE(FMAX
)
17988 NODE_NAME_CASE(FMIN
)
17989 NODE_NAME_CASE(READ_CYCLE_WIDE
)
17990 NODE_NAME_CASE(BREV8
)
17991 NODE_NAME_CASE(ORC_B
)
17992 NODE_NAME_CASE(ZIP
)
17993 NODE_NAME_CASE(UNZIP
)
17994 NODE_NAME_CASE(CLMUL
)
17995 NODE_NAME_CASE(CLMULH
)
17996 NODE_NAME_CASE(CLMULR
)
17997 NODE_NAME_CASE(SHA256SIG0
)
17998 NODE_NAME_CASE(SHA256SIG1
)
17999 NODE_NAME_CASE(SHA256SUM0
)
18000 NODE_NAME_CASE(SHA256SUM1
)
18001 NODE_NAME_CASE(SM4KS
)
18002 NODE_NAME_CASE(SM4ED
)
18003 NODE_NAME_CASE(SM3P0
)
18004 NODE_NAME_CASE(SM3P1
)
18005 NODE_NAME_CASE(TH_LWD
)
18006 NODE_NAME_CASE(TH_LWUD
)
18007 NODE_NAME_CASE(TH_LDD
)
18008 NODE_NAME_CASE(TH_SWD
)
18009 NODE_NAME_CASE(TH_SDD
)
18010 NODE_NAME_CASE(VMV_V_V_VL
)
18011 NODE_NAME_CASE(VMV_V_X_VL
)
18012 NODE_NAME_CASE(VFMV_V_F_VL
)
18013 NODE_NAME_CASE(VMV_X_S
)
18014 NODE_NAME_CASE(VMV_S_X_VL
)
18015 NODE_NAME_CASE(VFMV_S_F_VL
)
18016 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL
)
18017 NODE_NAME_CASE(READ_VLENB
)
18018 NODE_NAME_CASE(TRUNCATE_VECTOR_VL
)
18019 NODE_NAME_CASE(VSLIDEUP_VL
)
18020 NODE_NAME_CASE(VSLIDE1UP_VL
)
18021 NODE_NAME_CASE(VSLIDEDOWN_VL
)
18022 NODE_NAME_CASE(VSLIDE1DOWN_VL
)
18023 NODE_NAME_CASE(VFSLIDE1UP_VL
)
18024 NODE_NAME_CASE(VFSLIDE1DOWN_VL
)
18025 NODE_NAME_CASE(VID_VL
)
18026 NODE_NAME_CASE(VFNCVT_ROD_VL
)
18027 NODE_NAME_CASE(VECREDUCE_ADD_VL
)
18028 NODE_NAME_CASE(VECREDUCE_UMAX_VL
)
18029 NODE_NAME_CASE(VECREDUCE_SMAX_VL
)
18030 NODE_NAME_CASE(VECREDUCE_UMIN_VL
)
18031 NODE_NAME_CASE(VECREDUCE_SMIN_VL
)
18032 NODE_NAME_CASE(VECREDUCE_AND_VL
)
18033 NODE_NAME_CASE(VECREDUCE_OR_VL
)
18034 NODE_NAME_CASE(VECREDUCE_XOR_VL
)
18035 NODE_NAME_CASE(VECREDUCE_FADD_VL
)
18036 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL
)
18037 NODE_NAME_CASE(VECREDUCE_FMIN_VL
)
18038 NODE_NAME_CASE(VECREDUCE_FMAX_VL
)
18039 NODE_NAME_CASE(ADD_VL
)
18040 NODE_NAME_CASE(AND_VL
)
18041 NODE_NAME_CASE(MUL_VL
)
18042 NODE_NAME_CASE(OR_VL
)
18043 NODE_NAME_CASE(SDIV_VL
)
18044 NODE_NAME_CASE(SHL_VL
)
18045 NODE_NAME_CASE(SREM_VL
)
18046 NODE_NAME_CASE(SRA_VL
)
18047 NODE_NAME_CASE(SRL_VL
)
18048 NODE_NAME_CASE(ROTL_VL
)
18049 NODE_NAME_CASE(ROTR_VL
)
18050 NODE_NAME_CASE(SUB_VL
)
18051 NODE_NAME_CASE(UDIV_VL
)
18052 NODE_NAME_CASE(UREM_VL
)
18053 NODE_NAME_CASE(XOR_VL
)
18054 NODE_NAME_CASE(SADDSAT_VL
)
18055 NODE_NAME_CASE(UADDSAT_VL
)
18056 NODE_NAME_CASE(SSUBSAT_VL
)
18057 NODE_NAME_CASE(USUBSAT_VL
)
18058 NODE_NAME_CASE(FADD_VL
)
18059 NODE_NAME_CASE(FSUB_VL
)
18060 NODE_NAME_CASE(FMUL_VL
)
18061 NODE_NAME_CASE(FDIV_VL
)
18062 NODE_NAME_CASE(FNEG_VL
)
18063 NODE_NAME_CASE(FABS_VL
)
18064 NODE_NAME_CASE(FSQRT_VL
)
18065 NODE_NAME_CASE(FCLASS_VL
)
18066 NODE_NAME_CASE(VFMADD_VL
)
18067 NODE_NAME_CASE(VFNMADD_VL
)
18068 NODE_NAME_CASE(VFMSUB_VL
)
18069 NODE_NAME_CASE(VFNMSUB_VL
)
18070 NODE_NAME_CASE(VFWMADD_VL
)
18071 NODE_NAME_CASE(VFWNMADD_VL
)
18072 NODE_NAME_CASE(VFWMSUB_VL
)
18073 NODE_NAME_CASE(VFWNMSUB_VL
)
18074 NODE_NAME_CASE(FCOPYSIGN_VL
)
18075 NODE_NAME_CASE(SMIN_VL
)
18076 NODE_NAME_CASE(SMAX_VL
)
18077 NODE_NAME_CASE(UMIN_VL
)
18078 NODE_NAME_CASE(UMAX_VL
)
18079 NODE_NAME_CASE(BITREVERSE_VL
)
18080 NODE_NAME_CASE(BSWAP_VL
)
18081 NODE_NAME_CASE(CTLZ_VL
)
18082 NODE_NAME_CASE(CTTZ_VL
)
18083 NODE_NAME_CASE(CTPOP_VL
)
18084 NODE_NAME_CASE(VFMIN_VL
)
18085 NODE_NAME_CASE(VFMAX_VL
)
18086 NODE_NAME_CASE(MULHS_VL
)
18087 NODE_NAME_CASE(MULHU_VL
)
18088 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL
)
18089 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL
)
18090 NODE_NAME_CASE(VFCVT_RM_X_F_VL
)
18091 NODE_NAME_CASE(VFCVT_RM_XU_F_VL
)
18092 NODE_NAME_CASE(VFCVT_X_F_VL
)
18093 NODE_NAME_CASE(VFCVT_XU_F_VL
)
18094 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL
)
18095 NODE_NAME_CASE(SINT_TO_FP_VL
)
18096 NODE_NAME_CASE(UINT_TO_FP_VL
)
18097 NODE_NAME_CASE(VFCVT_RM_F_XU_VL
)
18098 NODE_NAME_CASE(VFCVT_RM_F_X_VL
)
18099 NODE_NAME_CASE(FP_EXTEND_VL
)
18100 NODE_NAME_CASE(FP_ROUND_VL
)
18101 NODE_NAME_CASE(STRICT_FADD_VL
)
18102 NODE_NAME_CASE(STRICT_FSUB_VL
)
18103 NODE_NAME_CASE(STRICT_FMUL_VL
)
18104 NODE_NAME_CASE(STRICT_FDIV_VL
)
18105 NODE_NAME_CASE(STRICT_FSQRT_VL
)
18106 NODE_NAME_CASE(STRICT_VFMADD_VL
)
18107 NODE_NAME_CASE(STRICT_VFNMADD_VL
)
18108 NODE_NAME_CASE(STRICT_VFMSUB_VL
)
18109 NODE_NAME_CASE(STRICT_VFNMSUB_VL
)
18110 NODE_NAME_CASE(STRICT_FP_ROUND_VL
)
18111 NODE_NAME_CASE(STRICT_FP_EXTEND_VL
)
18112 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL
)
18113 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL
)
18114 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL
)
18115 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL
)
18116 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL
)
18117 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL
)
18118 NODE_NAME_CASE(STRICT_FSETCC_VL
)
18119 NODE_NAME_CASE(STRICT_FSETCCS_VL
)
18120 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL
)
18121 NODE_NAME_CASE(VWMUL_VL
)
18122 NODE_NAME_CASE(VWMULU_VL
)
18123 NODE_NAME_CASE(VWMULSU_VL
)
18124 NODE_NAME_CASE(VWADD_VL
)
18125 NODE_NAME_CASE(VWADDU_VL
)
18126 NODE_NAME_CASE(VWSUB_VL
)
18127 NODE_NAME_CASE(VWSUBU_VL
)
18128 NODE_NAME_CASE(VWADD_W_VL
)
18129 NODE_NAME_CASE(VWADDU_W_VL
)
18130 NODE_NAME_CASE(VWSUB_W_VL
)
18131 NODE_NAME_CASE(VWSUBU_W_VL
)
18132 NODE_NAME_CASE(VWSLL_VL
)
18133 NODE_NAME_CASE(VFWMUL_VL
)
18134 NODE_NAME_CASE(VFWADD_VL
)
18135 NODE_NAME_CASE(VFWSUB_VL
)
18136 NODE_NAME_CASE(VFWADD_W_VL
)
18137 NODE_NAME_CASE(VFWSUB_W_VL
)
18138 NODE_NAME_CASE(VWMACC_VL
)
18139 NODE_NAME_CASE(VWMACCU_VL
)
18140 NODE_NAME_CASE(VWMACCSU_VL
)
18141 NODE_NAME_CASE(VNSRL_VL
)
18142 NODE_NAME_CASE(SETCC_VL
)
18143 NODE_NAME_CASE(VSELECT_VL
)
18144 NODE_NAME_CASE(VP_MERGE_VL
)
18145 NODE_NAME_CASE(VMAND_VL
)
18146 NODE_NAME_CASE(VMOR_VL
)
18147 NODE_NAME_CASE(VMXOR_VL
)
18148 NODE_NAME_CASE(VMCLR_VL
)
18149 NODE_NAME_CASE(VMSET_VL
)
18150 NODE_NAME_CASE(VRGATHER_VX_VL
)
18151 NODE_NAME_CASE(VRGATHER_VV_VL
)
18152 NODE_NAME_CASE(VRGATHEREI16_VV_VL
)
18153 NODE_NAME_CASE(VSEXT_VL
)
18154 NODE_NAME_CASE(VZEXT_VL
)
18155 NODE_NAME_CASE(VCPOP_VL
)
18156 NODE_NAME_CASE(VFIRST_VL
)
18157 NODE_NAME_CASE(READ_CSR
)
18158 NODE_NAME_CASE(WRITE_CSR
)
18159 NODE_NAME_CASE(SWAP_CSR
)
18160 NODE_NAME_CASE(CZERO_EQZ
)
18161 NODE_NAME_CASE(CZERO_NEZ
)
18165 #undef NODE_NAME_CASE
18168 /// getConstraintType - Given a constraint letter, return the type of
18169 /// constraint it is for this target.
18170 RISCVTargetLowering::ConstraintType
18171 RISCVTargetLowering::getConstraintType(StringRef Constraint
) const {
18172 if (Constraint
.size() == 1) {
18173 switch (Constraint
[0]) {
18177 return C_RegisterClass
;
18181 return C_Immediate
;
18184 case 'S': // A symbolic address
18188 if (Constraint
== "vr" || Constraint
== "vm")
18189 return C_RegisterClass
;
18191 return TargetLowering::getConstraintType(Constraint
);
18194 std::pair
<unsigned, const TargetRegisterClass
*>
18195 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
18196 StringRef Constraint
,
18198 // First, see if this is a constraint that directly corresponds to a RISC-V
18200 if (Constraint
.size() == 1) {
18201 switch (Constraint
[0]) {
18203 // TODO: Support fixed vectors up to XLen for P extension?
18206 return std::make_pair(0U, &RISCV::GPRNoX0RegClass
);
18208 if (Subtarget
.hasStdExtZfhOrZfhmin() && VT
== MVT::f16
)
18209 return std::make_pair(0U, &RISCV::FPR16RegClass
);
18210 if (Subtarget
.hasStdExtF() && VT
== MVT::f32
)
18211 return std::make_pair(0U, &RISCV::FPR32RegClass
);
18212 if (Subtarget
.hasStdExtD() && VT
== MVT::f64
)
18213 return std::make_pair(0U, &RISCV::FPR64RegClass
);
18218 } else if (Constraint
== "vr") {
18219 for (const auto *RC
: {&RISCV::VRRegClass
, &RISCV::VRM2RegClass
,
18220 &RISCV::VRM4RegClass
, &RISCV::VRM8RegClass
}) {
18221 if (TRI
->isTypeLegalForClass(*RC
, VT
.SimpleTy
))
18222 return std::make_pair(0U, RC
);
18224 } else if (Constraint
== "vm") {
18225 if (TRI
->isTypeLegalForClass(RISCV::VMV0RegClass
, VT
.SimpleTy
))
18226 return std::make_pair(0U, &RISCV::VMV0RegClass
);
18229 // Clang will correctly decode the usage of register name aliases into their
18230 // official names. However, other frontends like `rustc` do not. This allows
18231 // users of these frontends to use the ABI names for registers in LLVM-style
18232 // register constraints.
18233 unsigned XRegFromAlias
= StringSwitch
<unsigned>(Constraint
.lower())
18234 .Case("{zero}", RISCV::X0
)
18235 .Case("{ra}", RISCV::X1
)
18236 .Case("{sp}", RISCV::X2
)
18237 .Case("{gp}", RISCV::X3
)
18238 .Case("{tp}", RISCV::X4
)
18239 .Case("{t0}", RISCV::X5
)
18240 .Case("{t1}", RISCV::X6
)
18241 .Case("{t2}", RISCV::X7
)
18242 .Cases("{s0}", "{fp}", RISCV::X8
)
18243 .Case("{s1}", RISCV::X9
)
18244 .Case("{a0}", RISCV::X10
)
18245 .Case("{a1}", RISCV::X11
)
18246 .Case("{a2}", RISCV::X12
)
18247 .Case("{a3}", RISCV::X13
)
18248 .Case("{a4}", RISCV::X14
)
18249 .Case("{a5}", RISCV::X15
)
18250 .Case("{a6}", RISCV::X16
)
18251 .Case("{a7}", RISCV::X17
)
18252 .Case("{s2}", RISCV::X18
)
18253 .Case("{s3}", RISCV::X19
)
18254 .Case("{s4}", RISCV::X20
)
18255 .Case("{s5}", RISCV::X21
)
18256 .Case("{s6}", RISCV::X22
)
18257 .Case("{s7}", RISCV::X23
)
18258 .Case("{s8}", RISCV::X24
)
18259 .Case("{s9}", RISCV::X25
)
18260 .Case("{s10}", RISCV::X26
)
18261 .Case("{s11}", RISCV::X27
)
18262 .Case("{t3}", RISCV::X28
)
18263 .Case("{t4}", RISCV::X29
)
18264 .Case("{t5}", RISCV::X30
)
18265 .Case("{t6}", RISCV::X31
)
18266 .Default(RISCV::NoRegister
);
18267 if (XRegFromAlias
!= RISCV::NoRegister
)
18268 return std::make_pair(XRegFromAlias
, &RISCV::GPRRegClass
);
18270 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
18271 // TableGen record rather than the AsmName to choose registers for InlineAsm
18272 // constraints, plus we want to match those names to the widest floating point
18273 // register type available, manually select floating point registers here.
18275 // The second case is the ABI name of the register, so that frontends can also
18276 // use the ABI names in register constraint lists.
18277 if (Subtarget
.hasStdExtF()) {
18278 unsigned FReg
= StringSwitch
<unsigned>(Constraint
.lower())
18279 .Cases("{f0}", "{ft0}", RISCV::F0_F
)
18280 .Cases("{f1}", "{ft1}", RISCV::F1_F
)
18281 .Cases("{f2}", "{ft2}", RISCV::F2_F
)
18282 .Cases("{f3}", "{ft3}", RISCV::F3_F
)
18283 .Cases("{f4}", "{ft4}", RISCV::F4_F
)
18284 .Cases("{f5}", "{ft5}", RISCV::F5_F
)
18285 .Cases("{f6}", "{ft6}", RISCV::F6_F
)
18286 .Cases("{f7}", "{ft7}", RISCV::F7_F
)
18287 .Cases("{f8}", "{fs0}", RISCV::F8_F
)
18288 .Cases("{f9}", "{fs1}", RISCV::F9_F
)
18289 .Cases("{f10}", "{fa0}", RISCV::F10_F
)
18290 .Cases("{f11}", "{fa1}", RISCV::F11_F
)
18291 .Cases("{f12}", "{fa2}", RISCV::F12_F
)
18292 .Cases("{f13}", "{fa3}", RISCV::F13_F
)
18293 .Cases("{f14}", "{fa4}", RISCV::F14_F
)
18294 .Cases("{f15}", "{fa5}", RISCV::F15_F
)
18295 .Cases("{f16}", "{fa6}", RISCV::F16_F
)
18296 .Cases("{f17}", "{fa7}", RISCV::F17_F
)
18297 .Cases("{f18}", "{fs2}", RISCV::F18_F
)
18298 .Cases("{f19}", "{fs3}", RISCV::F19_F
)
18299 .Cases("{f20}", "{fs4}", RISCV::F20_F
)
18300 .Cases("{f21}", "{fs5}", RISCV::F21_F
)
18301 .Cases("{f22}", "{fs6}", RISCV::F22_F
)
18302 .Cases("{f23}", "{fs7}", RISCV::F23_F
)
18303 .Cases("{f24}", "{fs8}", RISCV::F24_F
)
18304 .Cases("{f25}", "{fs9}", RISCV::F25_F
)
18305 .Cases("{f26}", "{fs10}", RISCV::F26_F
)
18306 .Cases("{f27}", "{fs11}", RISCV::F27_F
)
18307 .Cases("{f28}", "{ft8}", RISCV::F28_F
)
18308 .Cases("{f29}", "{ft9}", RISCV::F29_F
)
18309 .Cases("{f30}", "{ft10}", RISCV::F30_F
)
18310 .Cases("{f31}", "{ft11}", RISCV::F31_F
)
18311 .Default(RISCV::NoRegister
);
18312 if (FReg
!= RISCV::NoRegister
) {
18313 assert(RISCV::F0_F
<= FReg
&& FReg
<= RISCV::F31_F
&& "Unknown fp-reg");
18314 if (Subtarget
.hasStdExtD() && (VT
== MVT::f64
|| VT
== MVT::Other
)) {
18315 unsigned RegNo
= FReg
- RISCV::F0_F
;
18316 unsigned DReg
= RISCV::F0_D
+ RegNo
;
18317 return std::make_pair(DReg
, &RISCV::FPR64RegClass
);
18319 if (VT
== MVT::f32
|| VT
== MVT::Other
)
18320 return std::make_pair(FReg
, &RISCV::FPR32RegClass
);
18321 if (Subtarget
.hasStdExtZfhOrZfhmin() && VT
== MVT::f16
) {
18322 unsigned RegNo
= FReg
- RISCV::F0_F
;
18323 unsigned HReg
= RISCV::F0_H
+ RegNo
;
18324 return std::make_pair(HReg
, &RISCV::FPR16RegClass
);
18329 if (Subtarget
.hasVInstructions()) {
18330 Register VReg
= StringSwitch
<Register
>(Constraint
.lower())
18331 .Case("{v0}", RISCV::V0
)
18332 .Case("{v1}", RISCV::V1
)
18333 .Case("{v2}", RISCV::V2
)
18334 .Case("{v3}", RISCV::V3
)
18335 .Case("{v4}", RISCV::V4
)
18336 .Case("{v5}", RISCV::V5
)
18337 .Case("{v6}", RISCV::V6
)
18338 .Case("{v7}", RISCV::V7
)
18339 .Case("{v8}", RISCV::V8
)
18340 .Case("{v9}", RISCV::V9
)
18341 .Case("{v10}", RISCV::V10
)
18342 .Case("{v11}", RISCV::V11
)
18343 .Case("{v12}", RISCV::V12
)
18344 .Case("{v13}", RISCV::V13
)
18345 .Case("{v14}", RISCV::V14
)
18346 .Case("{v15}", RISCV::V15
)
18347 .Case("{v16}", RISCV::V16
)
18348 .Case("{v17}", RISCV::V17
)
18349 .Case("{v18}", RISCV::V18
)
18350 .Case("{v19}", RISCV::V19
)
18351 .Case("{v20}", RISCV::V20
)
18352 .Case("{v21}", RISCV::V21
)
18353 .Case("{v22}", RISCV::V22
)
18354 .Case("{v23}", RISCV::V23
)
18355 .Case("{v24}", RISCV::V24
)
18356 .Case("{v25}", RISCV::V25
)
18357 .Case("{v26}", RISCV::V26
)
18358 .Case("{v27}", RISCV::V27
)
18359 .Case("{v28}", RISCV::V28
)
18360 .Case("{v29}", RISCV::V29
)
18361 .Case("{v30}", RISCV::V30
)
18362 .Case("{v31}", RISCV::V31
)
18363 .Default(RISCV::NoRegister
);
18364 if (VReg
!= RISCV::NoRegister
) {
18365 if (TRI
->isTypeLegalForClass(RISCV::VMRegClass
, VT
.SimpleTy
))
18366 return std::make_pair(VReg
, &RISCV::VMRegClass
);
18367 if (TRI
->isTypeLegalForClass(RISCV::VRRegClass
, VT
.SimpleTy
))
18368 return std::make_pair(VReg
, &RISCV::VRRegClass
);
18369 for (const auto *RC
:
18370 {&RISCV::VRM2RegClass
, &RISCV::VRM4RegClass
, &RISCV::VRM8RegClass
}) {
18371 if (TRI
->isTypeLegalForClass(*RC
, VT
.SimpleTy
)) {
18372 VReg
= TRI
->getMatchingSuperReg(VReg
, RISCV::sub_vrm1_0
, RC
);
18373 return std::make_pair(VReg
, RC
);
18379 std::pair
<Register
, const TargetRegisterClass
*> Res
=
18380 TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
18382 // If we picked one of the Zfinx register classes, remap it to the GPR class.
18383 // FIXME: When Zfinx is supported in CodeGen this will need to take the
18384 // Subtarget into account.
18385 if (Res
.second
== &RISCV::GPRF16RegClass
||
18386 Res
.second
== &RISCV::GPRF32RegClass
||
18387 Res
.second
== &RISCV::GPRPF64RegClass
)
18388 return std::make_pair(Res
.first
, &RISCV::GPRRegClass
);
18393 InlineAsm::ConstraintCode
18394 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode
) const {
18395 // Currently only support length 1 constraints.
18396 if (ConstraintCode
.size() == 1) {
18397 switch (ConstraintCode
[0]) {
18399 return InlineAsm::ConstraintCode::A
;
18405 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode
);
18408 void RISCVTargetLowering::LowerAsmOperandForConstraint(
18409 SDValue Op
, StringRef Constraint
, std::vector
<SDValue
> &Ops
,
18410 SelectionDAG
&DAG
) const {
18411 // Currently only support length 1 constraints.
18412 if (Constraint
.size() == 1) {
18413 switch (Constraint
[0]) {
18415 // Validate & create a 12-bit signed immediate operand.
18416 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
18417 uint64_t CVal
= C
->getSExtValue();
18418 if (isInt
<12>(CVal
))
18420 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
18424 // Validate & create an integer zero operand.
18425 if (isNullConstant(Op
))
18427 DAG
.getTargetConstant(0, SDLoc(Op
), Subtarget
.getXLenVT()));
18430 // Validate & create a 5-bit unsigned immediate operand.
18431 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
18432 uint64_t CVal
= C
->getZExtValue();
18433 if (isUInt
<5>(CVal
))
18435 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
18439 if (const auto *GA
= dyn_cast
<GlobalAddressSDNode
>(Op
)) {
18440 Ops
.push_back(DAG
.getTargetGlobalAddress(GA
->getGlobal(), SDLoc(Op
),
18441 GA
->getValueType(0)));
18442 } else if (const auto *BA
= dyn_cast
<BlockAddressSDNode
>(Op
)) {
18443 Ops
.push_back(DAG
.getTargetBlockAddress(BA
->getBlockAddress(),
18444 BA
->getValueType(0)));
18451 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
18454 Instruction
*RISCVTargetLowering::emitLeadingFence(IRBuilderBase
&Builder
,
18456 AtomicOrdering Ord
) const {
18457 if (Subtarget
.hasStdExtZtso()) {
18458 if (isa
<LoadInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
18459 return Builder
.CreateFence(Ord
);
18463 if (isa
<LoadInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
18464 return Builder
.CreateFence(Ord
);
18465 if (isa
<StoreInst
>(Inst
) && isReleaseOrStronger(Ord
))
18466 return Builder
.CreateFence(AtomicOrdering::Release
);
18470 Instruction
*RISCVTargetLowering::emitTrailingFence(IRBuilderBase
&Builder
,
18472 AtomicOrdering Ord
) const {
18473 if (Subtarget
.hasStdExtZtso()) {
18474 if (isa
<StoreInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
18475 return Builder
.CreateFence(Ord
);
18479 if (isa
<LoadInst
>(Inst
) && isAcquireOrStronger(Ord
))
18480 return Builder
.CreateFence(AtomicOrdering::Acquire
);
18481 if (Subtarget
.enableSeqCstTrailingFence() && isa
<StoreInst
>(Inst
) &&
18482 Ord
== AtomicOrdering::SequentiallyConsistent
)
18483 return Builder
.CreateFence(AtomicOrdering::SequentiallyConsistent
);
18487 TargetLowering::AtomicExpansionKind
18488 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
18489 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
18490 // point operations can't be used in an lr/sc sequence without breaking the
18491 // forward-progress guarantee.
18492 if (AI
->isFloatingPointOperation() ||
18493 AI
->getOperation() == AtomicRMWInst::UIncWrap
||
18494 AI
->getOperation() == AtomicRMWInst::UDecWrap
)
18495 return AtomicExpansionKind::CmpXChg
;
18497 // Don't expand forced atomics, we want to have __sync libcalls instead.
18498 if (Subtarget
.hasForcedAtomics())
18499 return AtomicExpansionKind::None
;
18501 unsigned Size
= AI
->getType()->getPrimitiveSizeInBits();
18502 if (Size
== 8 || Size
== 16)
18503 return AtomicExpansionKind::MaskedIntrinsic
;
18504 return AtomicExpansionKind::None
;
18507 static Intrinsic::ID
18508 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen
, AtomicRMWInst::BinOp BinOp
) {
18512 llvm_unreachable("Unexpected AtomicRMW BinOp");
18513 case AtomicRMWInst::Xchg
:
18514 return Intrinsic::riscv_masked_atomicrmw_xchg_i32
;
18515 case AtomicRMWInst::Add
:
18516 return Intrinsic::riscv_masked_atomicrmw_add_i32
;
18517 case AtomicRMWInst::Sub
:
18518 return Intrinsic::riscv_masked_atomicrmw_sub_i32
;
18519 case AtomicRMWInst::Nand
:
18520 return Intrinsic::riscv_masked_atomicrmw_nand_i32
;
18521 case AtomicRMWInst::Max
:
18522 return Intrinsic::riscv_masked_atomicrmw_max_i32
;
18523 case AtomicRMWInst::Min
:
18524 return Intrinsic::riscv_masked_atomicrmw_min_i32
;
18525 case AtomicRMWInst::UMax
:
18526 return Intrinsic::riscv_masked_atomicrmw_umax_i32
;
18527 case AtomicRMWInst::UMin
:
18528 return Intrinsic::riscv_masked_atomicrmw_umin_i32
;
18535 llvm_unreachable("Unexpected AtomicRMW BinOp");
18536 case AtomicRMWInst::Xchg
:
18537 return Intrinsic::riscv_masked_atomicrmw_xchg_i64
;
18538 case AtomicRMWInst::Add
:
18539 return Intrinsic::riscv_masked_atomicrmw_add_i64
;
18540 case AtomicRMWInst::Sub
:
18541 return Intrinsic::riscv_masked_atomicrmw_sub_i64
;
18542 case AtomicRMWInst::Nand
:
18543 return Intrinsic::riscv_masked_atomicrmw_nand_i64
;
18544 case AtomicRMWInst::Max
:
18545 return Intrinsic::riscv_masked_atomicrmw_max_i64
;
18546 case AtomicRMWInst::Min
:
18547 return Intrinsic::riscv_masked_atomicrmw_min_i64
;
18548 case AtomicRMWInst::UMax
:
18549 return Intrinsic::riscv_masked_atomicrmw_umax_i64
;
18550 case AtomicRMWInst::UMin
:
18551 return Intrinsic::riscv_masked_atomicrmw_umin_i64
;
18555 llvm_unreachable("Unexpected XLen\n");
18558 Value
*RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
18559 IRBuilderBase
&Builder
, AtomicRMWInst
*AI
, Value
*AlignedAddr
, Value
*Incr
,
18560 Value
*Mask
, Value
*ShiftAmt
, AtomicOrdering Ord
) const {
18561 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
18562 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
18563 // mask, as this produces better code than the LR/SC loop emitted by
18564 // int_riscv_masked_atomicrmw_xchg.
18565 if (AI
->getOperation() == AtomicRMWInst::Xchg
&&
18566 isa
<ConstantInt
>(AI
->getValOperand())) {
18567 ConstantInt
*CVal
= cast
<ConstantInt
>(AI
->getValOperand());
18568 if (CVal
->isZero())
18569 return Builder
.CreateAtomicRMW(AtomicRMWInst::And
, AlignedAddr
,
18570 Builder
.CreateNot(Mask
, "Inv_Mask"),
18571 AI
->getAlign(), Ord
);
18572 if (CVal
->isMinusOne())
18573 return Builder
.CreateAtomicRMW(AtomicRMWInst::Or
, AlignedAddr
, Mask
,
18574 AI
->getAlign(), Ord
);
18577 unsigned XLen
= Subtarget
.getXLen();
18579 Builder
.getIntN(XLen
, static_cast<uint64_t>(AI
->getOrdering()));
18580 Type
*Tys
[] = {AlignedAddr
->getType()};
18581 Function
*LrwOpScwLoop
= Intrinsic::getDeclaration(
18583 getIntrinsicForMaskedAtomicRMWBinOp(XLen
, AI
->getOperation()), Tys
);
18586 Incr
= Builder
.CreateSExt(Incr
, Builder
.getInt64Ty());
18587 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
18588 ShiftAmt
= Builder
.CreateSExt(ShiftAmt
, Builder
.getInt64Ty());
18593 // Must pass the shift amount needed to sign extend the loaded value prior
18594 // to performing a signed comparison for min/max. ShiftAmt is the number of
18595 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
18596 // is the number of bits to left+right shift the value in order to
18598 if (AI
->getOperation() == AtomicRMWInst::Min
||
18599 AI
->getOperation() == AtomicRMWInst::Max
) {
18600 const DataLayout
&DL
= AI
->getModule()->getDataLayout();
18601 unsigned ValWidth
=
18602 DL
.getTypeStoreSizeInBits(AI
->getValOperand()->getType());
18604 Builder
.CreateSub(Builder
.getIntN(XLen
, XLen
- ValWidth
), ShiftAmt
);
18605 Result
= Builder
.CreateCall(LrwOpScwLoop
,
18606 {AlignedAddr
, Incr
, Mask
, SextShamt
, Ordering
});
18609 Builder
.CreateCall(LrwOpScwLoop
, {AlignedAddr
, Incr
, Mask
, Ordering
});
18613 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
18617 TargetLowering::AtomicExpansionKind
18618 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
18619 AtomicCmpXchgInst
*CI
) const {
18620 // Don't expand forced atomics, we want to have __sync libcalls instead.
18621 if (Subtarget
.hasForcedAtomics())
18622 return AtomicExpansionKind::None
;
18624 unsigned Size
= CI
->getCompareOperand()->getType()->getPrimitiveSizeInBits();
18625 if (Size
== 8 || Size
== 16)
18626 return AtomicExpansionKind::MaskedIntrinsic
;
18627 return AtomicExpansionKind::None
;
18630 Value
*RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18631 IRBuilderBase
&Builder
, AtomicCmpXchgInst
*CI
, Value
*AlignedAddr
,
18632 Value
*CmpVal
, Value
*NewVal
, Value
*Mask
, AtomicOrdering Ord
) const {
18633 unsigned XLen
= Subtarget
.getXLen();
18634 Value
*Ordering
= Builder
.getIntN(XLen
, static_cast<uint64_t>(Ord
));
18635 Intrinsic::ID CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i32
;
18637 CmpVal
= Builder
.CreateSExt(CmpVal
, Builder
.getInt64Ty());
18638 NewVal
= Builder
.CreateSExt(NewVal
, Builder
.getInt64Ty());
18639 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
18640 CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i64
;
18642 Type
*Tys
[] = {AlignedAddr
->getType()};
18643 Function
*MaskedCmpXchg
=
18644 Intrinsic::getDeclaration(CI
->getModule(), CmpXchgIntrID
, Tys
);
18645 Value
*Result
= Builder
.CreateCall(
18646 MaskedCmpXchg
, {AlignedAddr
, CmpVal
, NewVal
, Mask
, Ordering
});
18648 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
18652 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend
,
18653 EVT DataVT
) const {
18654 // We have indexed loads for all legal index types. Indices are always
18656 return Extend
.getOpcode() == ISD::ZERO_EXTEND
&&
18657 isTypeLegal(Extend
.getValueType()) &&
18658 isTypeLegal(Extend
.getOperand(0).getValueType());
18661 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op
, EVT FPVT
,
18663 if (!isOperationLegalOrCustom(Op
, VT
) || !FPVT
.isSimple())
18666 switch (FPVT
.getSimpleVT().SimpleTy
) {
18668 return Subtarget
.hasStdExtZfhOrZfhmin();
18670 return Subtarget
.hasStdExtF();
18672 return Subtarget
.hasStdExtD();
18678 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
18679 // If we are using the small code model, we can reduce size of jump table
18680 // entry to 4 bytes.
18681 if (Subtarget
.is64Bit() && !isPositionIndependent() &&
18682 getTargetMachine().getCodeModel() == CodeModel::Small
) {
18683 return MachineJumpTableInfo::EK_Custom32
;
18685 return TargetLowering::getJumpTableEncoding();
18688 const MCExpr
*RISCVTargetLowering::LowerCustomJumpTableEntry(
18689 const MachineJumpTableInfo
*MJTI
, const MachineBasicBlock
*MBB
,
18690 unsigned uid
, MCContext
&Ctx
) const {
18691 assert(Subtarget
.is64Bit() && !isPositionIndependent() &&
18692 getTargetMachine().getCodeModel() == CodeModel::Small
);
18693 return MCSymbolRefExpr::create(MBB
->getSymbol(), Ctx
);
18696 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
18697 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
18698 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
18699 // a power of two as well.
18700 // FIXME: This doesn't work for zve32, but that's already broken
18701 // elsewhere for the same reason.
18702 assert(Subtarget
.getRealMinVLen() >= 64 && "zve32* unsupported");
18703 static_assert(RISCV::RVVBitsPerBlock
== 64,
18704 "RVVBitsPerBlock changed, audit needed");
18708 bool RISCVTargetLowering::getIndexedAddressParts(SDNode
*Op
, SDValue
&Base
,
18710 ISD::MemIndexedMode
&AM
,
18712 SelectionDAG
&DAG
) const {
18713 // Target does not support indexed loads.
18714 if (!Subtarget
.hasVendorXTHeadMemIdx())
18717 if (Op
->getOpcode() != ISD::ADD
&& Op
->getOpcode() != ISD::SUB
)
18720 Base
= Op
->getOperand(0);
18721 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Op
->getOperand(1))) {
18722 int64_t RHSC
= RHS
->getSExtValue();
18723 if (Op
->getOpcode() == ISD::SUB
)
18724 RHSC
= -(uint64_t)RHSC
;
18726 // The constants that can be encoded in the THeadMemIdx instructions
18727 // are of the form (sign_extend(imm5) << imm2).
18728 bool isLegalIndexedOffset
= false;
18729 for (unsigned i
= 0; i
< 4; i
++)
18730 if (isInt
<5>(RHSC
>> i
) && ((RHSC
% (1LL << i
)) == 0)) {
18731 isLegalIndexedOffset
= true;
18735 if (!isLegalIndexedOffset
)
18738 IsInc
= (Op
->getOpcode() == ISD::ADD
);
18739 Offset
= Op
->getOperand(1);
18746 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
18748 ISD::MemIndexedMode
&AM
,
18749 SelectionDAG
&DAG
) const {
18752 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
18753 VT
= LD
->getMemoryVT();
18754 Ptr
= LD
->getBasePtr();
18755 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
18756 VT
= ST
->getMemoryVT();
18757 Ptr
= ST
->getBasePtr();
18762 if (!getIndexedAddressParts(Ptr
.getNode(), Base
, Offset
, AM
, IsInc
, DAG
))
18765 AM
= IsInc
? ISD::PRE_INC
: ISD::PRE_DEC
;
18769 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
18772 ISD::MemIndexedMode
&AM
,
18773 SelectionDAG
&DAG
) const {
18776 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
18777 VT
= LD
->getMemoryVT();
18778 Ptr
= LD
->getBasePtr();
18779 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
18780 VT
= ST
->getMemoryVT();
18781 Ptr
= ST
->getBasePtr();
18786 if (!getIndexedAddressParts(Op
, Base
, Offset
, AM
, IsInc
, DAG
))
18788 // Post-indexing updates the base, so it's not a valid transform
18789 // if that's not the same as the load's pointer.
18793 AM
= IsInc
? ISD::POST_INC
: ISD::POST_DEC
;
18797 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction
&MF
,
18799 EVT SVT
= VT
.getScalarType();
18801 if (!SVT
.isSimple())
18804 switch (SVT
.getSimpleVT().SimpleTy
) {
18806 return VT
.isVector() ? Subtarget
.hasVInstructionsF16()
18807 : Subtarget
.hasStdExtZfhOrZhinx();
18809 return Subtarget
.hasStdExtFOrZfinx();
18811 return Subtarget
.hasStdExtDOrZdinx();
18819 Register
RISCVTargetLowering::getExceptionPointerRegister(
18820 const Constant
*PersonalityFn
) const {
18824 Register
RISCVTargetLowering::getExceptionSelectorRegister(
18825 const Constant
*PersonalityFn
) const {
18829 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type
) const {
18830 // Return false to suppress the unnecessary extensions if the LibCall
18831 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
18832 if (Subtarget
.isSoftFPABI() && (Type
.isFloatingPoint() && !Type
.isVector() &&
18833 Type
.getSizeInBits() < Subtarget
.getXLen()))
18839 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type
, bool IsSigned
) const {
18840 if (Subtarget
.is64Bit() && Type
== MVT::i32
)
18846 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext
&Context
, EVT VT
,
18848 // Check integral scalar types.
18849 const bool HasExtMOrZmmul
=
18850 Subtarget
.hasStdExtM() || Subtarget
.hasStdExtZmmul();
18851 if (!VT
.isScalarInteger())
18854 // Omit the optimization if the sub target has the M extension and the data
18855 // size exceeds XLen.
18856 if (HasExtMOrZmmul
&& VT
.getSizeInBits() > Subtarget
.getXLen())
18859 if (auto *ConstNode
= dyn_cast
<ConstantSDNode
>(C
.getNode())) {
18860 // Break the MUL to a SLLI and an ADD/SUB.
18861 const APInt
&Imm
= ConstNode
->getAPIntValue();
18862 if ((Imm
+ 1).isPowerOf2() || (Imm
- 1).isPowerOf2() ||
18863 (1 - Imm
).isPowerOf2() || (-1 - Imm
).isPowerOf2())
18866 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
18867 if (Subtarget
.hasStdExtZba() && !Imm
.isSignedIntN(12) &&
18868 ((Imm
- 2).isPowerOf2() || (Imm
- 4).isPowerOf2() ||
18869 (Imm
- 8).isPowerOf2()))
18872 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
18873 // a pair of LUI/ADDI.
18874 if (!Imm
.isSignedIntN(12) && Imm
.countr_zero() < 12 &&
18875 ConstNode
->hasOneUse()) {
18876 APInt ImmS
= Imm
.ashr(Imm
.countr_zero());
18877 if ((ImmS
+ 1).isPowerOf2() || (ImmS
- 1).isPowerOf2() ||
18878 (1 - ImmS
).isPowerOf2())
18886 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode
,
18887 SDValue ConstNode
) const {
18888 // Let the DAGCombiner decide for vectors.
18889 EVT VT
= AddNode
.getValueType();
18893 // Let the DAGCombiner decide for larger types.
18894 if (VT
.getScalarSizeInBits() > Subtarget
.getXLen())
18897 // It is worse if c1 is simm12 while c1*c2 is not.
18898 ConstantSDNode
*C1Node
= cast
<ConstantSDNode
>(AddNode
.getOperand(1));
18899 ConstantSDNode
*C2Node
= cast
<ConstantSDNode
>(ConstNode
);
18900 const APInt
&C1
= C1Node
->getAPIntValue();
18901 const APInt
&C2
= C2Node
->getAPIntValue();
18902 if (C1
.isSignedIntN(12) && !(C1
* C2
).isSignedIntN(12))
18905 // Default to true and let the DAGCombiner decide.
18909 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
18910 EVT VT
, unsigned AddrSpace
, Align Alignment
, MachineMemOperand::Flags Flags
,
18911 unsigned *Fast
) const {
18912 if (!VT
.isVector()) {
18914 *Fast
= Subtarget
.enableUnalignedScalarMem();
18915 return Subtarget
.enableUnalignedScalarMem();
18918 // All vector implementations must support element alignment
18919 EVT ElemVT
= VT
.getVectorElementType();
18920 if (Alignment
>= ElemVT
.getStoreSize()) {
18926 // Note: We lower an unmasked unaligned vector access to an equally sized
18927 // e8 element type access. Given this, we effectively support all unmasked
18928 // misaligned accesses. TODO: Work through the codegen implications of
18929 // allowing such accesses to be formed, and considered fast.
18931 *Fast
= Subtarget
.enableUnalignedVectorMem();
18932 return Subtarget
.enableUnalignedVectorMem();
18936 EVT
RISCVTargetLowering::getOptimalMemOpType(const MemOp
&Op
,
18937 const AttributeList
&FuncAttributes
) const {
18938 if (!Subtarget
.hasVInstructions())
18941 if (FuncAttributes
.hasFnAttr(Attribute::NoImplicitFloat
))
18944 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
18945 // has an expansion threshold, and we want the number of hardware memory
18946 // operations to correspond roughly to that threshold. LMUL>1 operations
18947 // are typically expanded linearly internally, and thus correspond to more
18948 // than one actual memory operation. Note that store merging and load
18949 // combining will typically form larger LMUL operations from the LMUL1
18950 // operations emitted here, and that's okay because combining isn't
18951 // introducing new memory operations; it's just merging existing ones.
18952 const unsigned MinVLenInBytes
= Subtarget
.getRealMinVLen()/8;
18953 if (Op
.size() < MinVLenInBytes
)
18954 // TODO: Figure out short memops. For the moment, do the default thing
18955 // which ends up using scalar sequences.
18958 // Prefer i8 for non-zero memset as it allows us to avoid materializing
18959 // a large scalar constant and instead use vmv.v.x/i to do the
18960 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
18961 // maximize the chance we can encode the size in the vsetvli.
18962 MVT ELenVT
= MVT::getIntegerVT(Subtarget
.getELen());
18963 MVT PreferredVT
= (Op
.isMemset() && !Op
.isZeroMemset()) ? MVT::i8
: ELenVT
;
18965 // Do we have sufficient alignment for our preferred VT? If not, revert
18966 // to largest size allowed by our alignment criteria.
18967 if (PreferredVT
!= MVT::i8
&& !Subtarget
.enableUnalignedVectorMem()) {
18968 Align
RequiredAlign(PreferredVT
.getStoreSize());
18969 if (Op
.isFixedDstAlign())
18970 RequiredAlign
= std::min(RequiredAlign
, Op
.getDstAlign());
18972 RequiredAlign
= std::min(RequiredAlign
, Op
.getSrcAlign());
18973 PreferredVT
= MVT::getIntegerVT(RequiredAlign
.value() * 8);
18975 return MVT::getVectorVT(PreferredVT
, MinVLenInBytes
/PreferredVT
.getStoreSize());
18978 bool RISCVTargetLowering::splitValueIntoRegisterParts(
18979 SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Val
, SDValue
*Parts
,
18980 unsigned NumParts
, MVT PartVT
, std::optional
<CallingConv::ID
> CC
) const {
18981 bool IsABIRegCopy
= CC
.has_value();
18982 EVT ValueVT
= Val
.getValueType();
18983 if (IsABIRegCopy
&& (ValueVT
== MVT::f16
|| ValueVT
== MVT::bf16
) &&
18984 PartVT
== MVT::f32
) {
18985 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
18986 // nan, and cast to f32.
18987 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i16
, Val
);
18988 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i32
, Val
);
18989 Val
= DAG
.getNode(ISD::OR
, DL
, MVT::i32
, Val
,
18990 DAG
.getConstant(0xFFFF0000, DL
, MVT::i32
));
18991 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f32
, Val
);
18996 if (ValueVT
.isScalableVector() && PartVT
.isScalableVector()) {
18997 LLVMContext
&Context
= *DAG
.getContext();
18998 EVT ValueEltVT
= ValueVT
.getVectorElementType();
18999 EVT PartEltVT
= PartVT
.getVectorElementType();
19000 unsigned ValueVTBitSize
= ValueVT
.getSizeInBits().getKnownMinValue();
19001 unsigned PartVTBitSize
= PartVT
.getSizeInBits().getKnownMinValue();
19002 if (PartVTBitSize
% ValueVTBitSize
== 0) {
19003 assert(PartVTBitSize
>= ValueVTBitSize
);
19004 // If the element types are different, bitcast to the same element type of
19006 // Give an example here, we want copy a <vscale x 1 x i8> value to
19007 // <vscale x 4 x i16>.
19008 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
19009 // subvector, then we can bitcast to <vscale x 4 x i16>.
19010 if (ValueEltVT
!= PartEltVT
) {
19011 if (PartVTBitSize
> ValueVTBitSize
) {
19012 unsigned Count
= PartVTBitSize
/ ValueEltVT
.getFixedSizeInBits();
19013 assert(Count
!= 0 && "The number of element should not be zero.");
19014 EVT SameEltTypeVT
=
19015 EVT::getVectorVT(Context
, ValueEltVT
, Count
, /*IsScalable=*/true);
19016 Val
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, SameEltTypeVT
,
19017 DAG
.getUNDEF(SameEltTypeVT
), Val
,
19018 DAG
.getVectorIdxConstant(0, DL
));
19020 Val
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Val
);
19023 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, PartVT
, DAG
.getUNDEF(PartVT
),
19024 Val
, DAG
.getVectorIdxConstant(0, DL
));
19033 SDValue
RISCVTargetLowering::joinRegisterPartsIntoValue(
19034 SelectionDAG
&DAG
, const SDLoc
&DL
, const SDValue
*Parts
, unsigned NumParts
,
19035 MVT PartVT
, EVT ValueVT
, std::optional
<CallingConv::ID
> CC
) const {
19036 bool IsABIRegCopy
= CC
.has_value();
19037 if (IsABIRegCopy
&& (ValueVT
== MVT::f16
|| ValueVT
== MVT::bf16
) &&
19038 PartVT
== MVT::f32
) {
19039 SDValue Val
= Parts
[0];
19041 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
19042 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i32
, Val
);
19043 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, Val
);
19044 Val
= DAG
.getNode(ISD::BITCAST
, DL
, ValueVT
, Val
);
19048 if (ValueVT
.isScalableVector() && PartVT
.isScalableVector()) {
19049 LLVMContext
&Context
= *DAG
.getContext();
19050 SDValue Val
= Parts
[0];
19051 EVT ValueEltVT
= ValueVT
.getVectorElementType();
19052 EVT PartEltVT
= PartVT
.getVectorElementType();
19053 unsigned ValueVTBitSize
= ValueVT
.getSizeInBits().getKnownMinValue();
19054 unsigned PartVTBitSize
= PartVT
.getSizeInBits().getKnownMinValue();
19055 if (PartVTBitSize
% ValueVTBitSize
== 0) {
19056 assert(PartVTBitSize
>= ValueVTBitSize
);
19057 EVT SameEltTypeVT
= ValueVT
;
19058 // If the element types are different, convert it to the same element type
19060 // Give an example here, we want copy a <vscale x 1 x i8> value from
19061 // <vscale x 4 x i16>.
19062 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
19063 // then we can extract <vscale x 1 x i8>.
19064 if (ValueEltVT
!= PartEltVT
) {
19065 unsigned Count
= PartVTBitSize
/ ValueEltVT
.getFixedSizeInBits();
19066 assert(Count
!= 0 && "The number of element should not be zero.");
19068 EVT::getVectorVT(Context
, ValueEltVT
, Count
, /*IsScalable=*/true);
19069 Val
= DAG
.getNode(ISD::BITCAST
, DL
, SameEltTypeVT
, Val
);
19071 Val
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ValueVT
, Val
,
19072 DAG
.getVectorIdxConstant(0, DL
));
19079 bool RISCVTargetLowering::isIntDivCheap(EVT VT
, AttributeList Attr
) const {
19080 // When aggressively optimizing for code size, we prefer to use a div
19081 // instruction, as it is usually smaller than the alternative sequence.
19082 // TODO: Add vector division?
19083 bool OptSize
= Attr
.hasFnAttr(Attribute::MinSize
);
19084 return OptSize
&& !VT
.isVector();
19087 bool RISCVTargetLowering::preferScalarizeSplat(SDNode
*N
) const {
19088 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
19090 unsigned Opc
= N
->getOpcode();
19091 if (Opc
== ISD::ZERO_EXTEND
|| Opc
== ISD::SIGN_EXTEND
)
19096 static Value
*useTpOffset(IRBuilderBase
&IRB
, unsigned Offset
) {
19097 Module
*M
= IRB
.GetInsertBlock()->getParent()->getParent();
19098 Function
*ThreadPointerFunc
=
19099 Intrinsic::getDeclaration(M
, Intrinsic::thread_pointer
);
19100 return IRB
.CreateConstGEP1_32(IRB
.getInt8Ty(),
19101 IRB
.CreateCall(ThreadPointerFunc
), Offset
);
19104 Value
*RISCVTargetLowering::getIRStackGuard(IRBuilderBase
&IRB
) const {
19105 // Fuchsia provides a fixed TLS slot for the stack cookie.
19106 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
19107 if (Subtarget
.isTargetFuchsia())
19108 return useTpOffset(IRB
, -0x10);
19110 return TargetLowering::getIRStackGuard(IRB
);
19113 bool RISCVTargetLowering::isLegalInterleavedAccessType(
19114 VectorType
*VTy
, unsigned Factor
, Align Alignment
, unsigned AddrSpace
,
19115 const DataLayout
&DL
) const {
19116 EVT VT
= getValueType(DL
, VTy
);
19117 // Don't lower vlseg/vsseg for vector types that can't be split.
19118 if (!isTypeLegal(VT
))
19121 if (!isLegalElementTypeForRVV(VT
.getScalarType()) ||
19122 !allowsMemoryAccessForAlignment(VTy
->getContext(), DL
, VT
, AddrSpace
,
19126 MVT ContainerVT
= VT
.getSimpleVT();
19128 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
19129 if (!Subtarget
.useRVVForFixedLengthVectors())
19131 // Sometimes the interleaved access pass picks up splats as interleaves of
19132 // one element. Don't lower these.
19133 if (FVTy
->getNumElements() < 2)
19136 ContainerVT
= getContainerForFixedLengthVector(VT
.getSimpleVT());
19139 // Need to make sure that EMUL * NFIELDS ≤ 8
19140 auto [LMUL
, Fractional
] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT
));
19143 return Factor
* LMUL
<= 8;
19146 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType
,
19147 Align Alignment
) const {
19148 if (!Subtarget
.hasVInstructions())
19151 // Only support fixed vectors if we know the minimum vector size.
19152 if (DataType
.isFixedLengthVector() && !Subtarget
.useRVVForFixedLengthVectors())
19155 EVT ScalarType
= DataType
.getScalarType();
19156 if (!isLegalElementTypeForRVV(ScalarType
))
19159 if (!Subtarget
.enableUnalignedVectorMem() &&
19160 Alignment
< ScalarType
.getStoreSize())
19166 static const Intrinsic::ID FixedVlsegIntrIds
[] = {
19167 Intrinsic::riscv_seg2_load
, Intrinsic::riscv_seg3_load
,
19168 Intrinsic::riscv_seg4_load
, Intrinsic::riscv_seg5_load
,
19169 Intrinsic::riscv_seg6_load
, Intrinsic::riscv_seg7_load
,
19170 Intrinsic::riscv_seg8_load
};
19172 /// Lower an interleaved load into a vlsegN intrinsic.
19174 /// E.g. Lower an interleaved load (Factor = 2):
19175 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
19176 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
19177 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
19180 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
19182 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
19183 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
19184 bool RISCVTargetLowering::lowerInterleavedLoad(
19185 LoadInst
*LI
, ArrayRef
<ShuffleVectorInst
*> Shuffles
,
19186 ArrayRef
<unsigned> Indices
, unsigned Factor
) const {
19187 IRBuilder
<> Builder(LI
);
19189 auto *VTy
= cast
<FixedVectorType
>(Shuffles
[0]->getType());
19190 if (!isLegalInterleavedAccessType(VTy
, Factor
, LI
->getAlign(),
19191 LI
->getPointerAddressSpace(),
19192 LI
->getModule()->getDataLayout()))
19195 auto *XLenTy
= Type::getIntNTy(LI
->getContext(), Subtarget
.getXLen());
19197 Function
*VlsegNFunc
=
19198 Intrinsic::getDeclaration(LI
->getModule(), FixedVlsegIntrIds
[Factor
- 2],
19199 {VTy
, LI
->getPointerOperandType(), XLenTy
});
19201 Value
*VL
= ConstantInt::get(XLenTy
, VTy
->getNumElements());
19204 Builder
.CreateCall(VlsegNFunc
, {LI
->getPointerOperand(), VL
});
19206 for (unsigned i
= 0; i
< Shuffles
.size(); i
++) {
19207 Value
*SubVec
= Builder
.CreateExtractValue(VlsegN
, Indices
[i
]);
19208 Shuffles
[i
]->replaceAllUsesWith(SubVec
);
19214 static const Intrinsic::ID FixedVssegIntrIds
[] = {
19215 Intrinsic::riscv_seg2_store
, Intrinsic::riscv_seg3_store
,
19216 Intrinsic::riscv_seg4_store
, Intrinsic::riscv_seg5_store
,
19217 Intrinsic::riscv_seg6_store
, Intrinsic::riscv_seg7_store
,
19218 Intrinsic::riscv_seg8_store
};
19220 /// Lower an interleaved store into a vssegN intrinsic.
19222 /// E.g. Lower an interleaved store (Factor = 3):
19223 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
19224 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
19225 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
19228 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
19229 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
19230 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
19231 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
19234 /// Note that the new shufflevectors will be removed and we'll only generate one
19235 /// vsseg3 instruction in CodeGen.
19236 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst
*SI
,
19237 ShuffleVectorInst
*SVI
,
19238 unsigned Factor
) const {
19239 IRBuilder
<> Builder(SI
);
19240 auto *ShuffleVTy
= cast
<FixedVectorType
>(SVI
->getType());
19241 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
19242 auto *VTy
= FixedVectorType::get(ShuffleVTy
->getElementType(),
19243 ShuffleVTy
->getNumElements() / Factor
);
19244 if (!isLegalInterleavedAccessType(VTy
, Factor
, SI
->getAlign(),
19245 SI
->getPointerAddressSpace(),
19246 SI
->getModule()->getDataLayout()))
19249 auto *XLenTy
= Type::getIntNTy(SI
->getContext(), Subtarget
.getXLen());
19251 Function
*VssegNFunc
=
19252 Intrinsic::getDeclaration(SI
->getModule(), FixedVssegIntrIds
[Factor
- 2],
19253 {VTy
, SI
->getPointerOperandType(), XLenTy
});
19255 auto Mask
= SVI
->getShuffleMask();
19256 SmallVector
<Value
*, 10> Ops
;
19258 for (unsigned i
= 0; i
< Factor
; i
++) {
19259 Value
*Shuffle
= Builder
.CreateShuffleVector(
19260 SVI
->getOperand(0), SVI
->getOperand(1),
19261 createSequentialMask(Mask
[i
], VTy
->getNumElements(), 0));
19262 Ops
.push_back(Shuffle
);
19264 // This VL should be OK (should be executable in one vsseg instruction,
19265 // potentially under larger LMULs) because we checked that the fixed vector
19266 // type fits in isLegalInterleavedAccessType
19267 Value
*VL
= ConstantInt::get(XLenTy
, VTy
->getNumElements());
19268 Ops
.append({SI
->getPointerOperand(), VL
});
19270 Builder
.CreateCall(VssegNFunc
, Ops
);
19275 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst
*DI
,
19276 LoadInst
*LI
) const {
19277 assert(LI
->isSimple());
19278 IRBuilder
<> Builder(LI
);
19280 // Only deinterleave2 supported at present.
19281 if (DI
->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2
)
19284 unsigned Factor
= 2;
19286 VectorType
*VTy
= cast
<VectorType
>(DI
->getOperand(0)->getType());
19287 VectorType
*ResVTy
= cast
<VectorType
>(DI
->getType()->getContainedType(0));
19289 if (!isLegalInterleavedAccessType(ResVTy
, Factor
, LI
->getAlign(),
19290 LI
->getPointerAddressSpace(),
19291 LI
->getModule()->getDataLayout()))
19294 Function
*VlsegNFunc
;
19296 Type
*XLenTy
= Type::getIntNTy(LI
->getContext(), Subtarget
.getXLen());
19297 SmallVector
<Value
*, 10> Ops
;
19299 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
19300 VlsegNFunc
= Intrinsic::getDeclaration(
19301 LI
->getModule(), FixedVlsegIntrIds
[Factor
- 2],
19302 {ResVTy
, LI
->getPointerOperandType(), XLenTy
});
19303 VL
= ConstantInt::get(XLenTy
, FVTy
->getNumElements());
19305 static const Intrinsic::ID IntrIds
[] = {
19306 Intrinsic::riscv_vlseg2
, Intrinsic::riscv_vlseg3
,
19307 Intrinsic::riscv_vlseg4
, Intrinsic::riscv_vlseg5
,
19308 Intrinsic::riscv_vlseg6
, Intrinsic::riscv_vlseg7
,
19309 Intrinsic::riscv_vlseg8
};
19311 VlsegNFunc
= Intrinsic::getDeclaration(LI
->getModule(), IntrIds
[Factor
- 2],
19313 VL
= Constant::getAllOnesValue(XLenTy
);
19314 Ops
.append(Factor
, PoisonValue::get(ResVTy
));
19317 Ops
.append({LI
->getPointerOperand(), VL
});
19319 Value
*Vlseg
= Builder
.CreateCall(VlsegNFunc
, Ops
);
19320 DI
->replaceAllUsesWith(Vlseg
);
19325 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst
*II
,
19326 StoreInst
*SI
) const {
19327 assert(SI
->isSimple());
19328 IRBuilder
<> Builder(SI
);
19330 // Only interleave2 supported at present.
19331 if (II
->getIntrinsicID() != Intrinsic::experimental_vector_interleave2
)
19334 unsigned Factor
= 2;
19336 VectorType
*VTy
= cast
<VectorType
>(II
->getType());
19337 VectorType
*InVTy
= cast
<VectorType
>(II
->getOperand(0)->getType());
19339 if (!isLegalInterleavedAccessType(InVTy
, Factor
, SI
->getAlign(),
19340 SI
->getPointerAddressSpace(),
19341 SI
->getModule()->getDataLayout()))
19344 Function
*VssegNFunc
;
19346 Type
*XLenTy
= Type::getIntNTy(SI
->getContext(), Subtarget
.getXLen());
19348 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
19349 VssegNFunc
= Intrinsic::getDeclaration(
19350 SI
->getModule(), FixedVssegIntrIds
[Factor
- 2],
19351 {InVTy
, SI
->getPointerOperandType(), XLenTy
});
19352 VL
= ConstantInt::get(XLenTy
, FVTy
->getNumElements());
19354 static const Intrinsic::ID IntrIds
[] = {
19355 Intrinsic::riscv_vsseg2
, Intrinsic::riscv_vsseg3
,
19356 Intrinsic::riscv_vsseg4
, Intrinsic::riscv_vsseg5
,
19357 Intrinsic::riscv_vsseg6
, Intrinsic::riscv_vsseg7
,
19358 Intrinsic::riscv_vsseg8
};
19360 VssegNFunc
= Intrinsic::getDeclaration(SI
->getModule(), IntrIds
[Factor
- 2],
19362 VL
= Constant::getAllOnesValue(XLenTy
);
19365 Builder
.CreateCall(VssegNFunc
, {II
->getOperand(0), II
->getOperand(1),
19366 SI
->getPointerOperand(), VL
});
19372 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock
&MBB
,
19373 MachineBasicBlock::instr_iterator
&MBBI
,
19374 const TargetInstrInfo
*TII
) const {
19375 assert(MBBI
->isCall() && MBBI
->getCFIType() &&
19376 "Invalid call instruction for a KCFI check");
19377 assert(is_contained({RISCV::PseudoCALLIndirect
, RISCV::PseudoTAILIndirect
},
19378 MBBI
->getOpcode()));
19380 MachineOperand
&Target
= MBBI
->getOperand(0);
19381 Target
.setIsRenamable(false);
19383 return BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(RISCV::KCFI_CHECK
))
19384 .addReg(Target
.getReg())
19385 .addImm(MBBI
->getCFIType())
19389 #define GET_REGISTER_MATCHER
19390 #include "RISCVGenAsmMatcher.inc"
19393 RISCVTargetLowering::getRegisterByName(const char *RegName
, LLT VT
,
19394 const MachineFunction
&MF
) const {
19395 Register Reg
= MatchRegisterAltName(RegName
);
19396 if (Reg
== RISCV::NoRegister
)
19397 Reg
= MatchRegisterName(RegName
);
19398 if (Reg
== RISCV::NoRegister
)
19399 report_fatal_error(
19400 Twine("Invalid register name \"" + StringRef(RegName
) + "\"."));
19401 BitVector ReservedRegs
= Subtarget
.getRegisterInfo()->getReservedRegs(MF
);
19402 if (!ReservedRegs
.test(Reg
) && !Subtarget
.isRegisterReservedByUser(Reg
))
19403 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
19404 StringRef(RegName
) + "\"."));
19408 MachineMemOperand::Flags
19409 RISCVTargetLowering::getTargetMMOFlags(const Instruction
&I
) const {
19410 const MDNode
*NontemporalInfo
= I
.getMetadata(LLVMContext::MD_nontemporal
);
19412 if (NontemporalInfo
== nullptr)
19413 return MachineMemOperand::MONone
;
19415 // 1 for default value work as __RISCV_NTLH_ALL
19416 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
19417 // 3 -> __RISCV_NTLH_ALL_PRIVATE
19418 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
19419 // 5 -> __RISCV_NTLH_ALL
19420 int NontemporalLevel
= 5;
19421 const MDNode
*RISCVNontemporalInfo
=
19422 I
.getMetadata("riscv-nontemporal-domain");
19423 if (RISCVNontemporalInfo
!= nullptr)
19426 cast
<ConstantAsMetadata
>(RISCVNontemporalInfo
->getOperand(0))
19430 assert((1 <= NontemporalLevel
&& NontemporalLevel
<= 5) &&
19431 "RISC-V target doesn't support this non-temporal domain.");
19433 NontemporalLevel
-= 2;
19434 MachineMemOperand::Flags Flags
= MachineMemOperand::MONone
;
19435 if (NontemporalLevel
& 0b1)
19436 Flags
|= MONontemporalBit0
;
19437 if (NontemporalLevel
& 0b10)
19438 Flags
|= MONontemporalBit1
;
19443 MachineMemOperand::Flags
19444 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode
&Node
) const {
19446 MachineMemOperand::Flags NodeFlags
= Node
.getMemOperand()->getFlags();
19447 MachineMemOperand::Flags TargetFlags
= MachineMemOperand::MONone
;
19448 TargetFlags
|= (NodeFlags
& MONontemporalBit0
);
19449 TargetFlags
|= (NodeFlags
& MONontemporalBit1
);
19451 return TargetFlags
;
19454 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
19455 const MemSDNode
&NodeX
, const MemSDNode
&NodeY
) const {
19456 return getTargetMMOFlags(NodeX
) == getTargetMMOFlags(NodeY
);
19459 bool RISCVTargetLowering::isCtpopFast(EVT VT
) const {
19460 if (VT
.isScalableVector())
19461 return isTypeLegal(VT
) && Subtarget
.hasStdExtZvbb();
19462 if (VT
.isFixedLengthVector() && Subtarget
.hasStdExtZvbb())
19464 return Subtarget
.hasStdExtZbb() &&
19465 (VT
== MVT::i32
|| VT
== MVT::i64
|| VT
.isFixedLengthVector());
19468 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT
,
19469 ISD::CondCode Cond
) const {
19470 return isCtpopFast(VT
) ? 0 : 1;
19473 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction
&Inst
) const {
19474 // We don't support scalable vectors in GISel.
19475 if (Inst
.getType()->isScalableTy())
19478 for (unsigned i
= 0; i
< Inst
.getNumOperands(); ++i
)
19479 if (Inst
.getOperand(i
)->getType()->isScalableTy())
19482 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(&Inst
)) {
19483 if (AI
->getAllocatedType()->isScalableTy())
19490 namespace llvm::RISCVVIntrinsicsTable
{
19492 #define GET_RISCVVIntrinsicsTable_IMPL
19493 #include "RISCVGenSearchableTables.inc"
19495 } // namespace llvm::RISCVVIntrinsicsTable