1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/DiagnosticPrinter.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicsRISCV.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/InstructionCost.h"
43 #include "llvm/Support/KnownBits.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Support/raw_ostream.h"
50 #define DEBUG_TYPE "riscv-lower"
52 STATISTIC(NumTailCalls
, "Number of tail calls");
54 static cl::opt
<unsigned> ExtensionMaxWebSize(
55 DEBUG_TYPE
"-ext-max-web-size", cl::Hidden
,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
61 AllowSplatInVW_W(DEBUG_TYPE
"-form-vw-w-with-splat", cl::Hidden
,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
66 static cl::opt
<unsigned> NumRepeatedDivisors(
67 DEBUG_TYPE
"-fp-repeated-divisors", cl::Hidden
,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
73 FPImmCost(DEBUG_TYPE
"-fpimm-cost", cl::Hidden
,
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden
,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine
&TM
,
83 const RISCVSubtarget
&STI
)
84 : TargetLowering(TM
), Subtarget(STI
) {
86 RISCVABI::ABI ABI
= Subtarget
.getTargetABI();
87 assert(ABI
!= RISCVABI::ABI_Unknown
&& "Improperly initialised target ABI");
89 if ((ABI
== RISCVABI::ABI_ILP32F
|| ABI
== RISCVABI::ABI_LP64F
) &&
90 !Subtarget
.hasStdExtF()) {
91 errs() << "Hard-float 'f' ABI can't be used for a target that "
92 "doesn't support the F instruction set extension (ignoring "
94 ABI
= Subtarget
.is64Bit() ? RISCVABI::ABI_LP64
: RISCVABI::ABI_ILP32
;
95 } else if ((ABI
== RISCVABI::ABI_ILP32D
|| ABI
== RISCVABI::ABI_LP64D
) &&
96 !Subtarget
.hasStdExtD()) {
97 errs() << "Hard-float 'd' ABI can't be used for a target that "
98 "doesn't support the D instruction set extension (ignoring "
100 ABI
= Subtarget
.is64Bit() ? RISCVABI::ABI_LP64
: RISCVABI::ABI_ILP32
;
105 report_fatal_error("Don't know how to lower this ABI");
106 case RISCVABI::ABI_ILP32
:
107 case RISCVABI::ABI_ILP32E
:
108 case RISCVABI::ABI_LP64E
:
109 case RISCVABI::ABI_ILP32F
:
110 case RISCVABI::ABI_ILP32D
:
111 case RISCVABI::ABI_LP64
:
112 case RISCVABI::ABI_LP64F
:
113 case RISCVABI::ABI_LP64D
:
117 MVT XLenVT
= Subtarget
.getXLenVT();
119 // Set up the register classes.
120 addRegisterClass(XLenVT
, &RISCV::GPRRegClass
);
121 if (Subtarget
.is64Bit() && RV64LegalI32
)
122 addRegisterClass(MVT::i32
, &RISCV::GPRRegClass
);
124 if (Subtarget
.hasStdExtZfhmin())
125 addRegisterClass(MVT::f16
, &RISCV::FPR16RegClass
);
126 if (Subtarget
.hasStdExtZfbfmin())
127 addRegisterClass(MVT::bf16
, &RISCV::FPR16RegClass
);
128 if (Subtarget
.hasStdExtF())
129 addRegisterClass(MVT::f32
, &RISCV::FPR32RegClass
);
130 if (Subtarget
.hasStdExtD())
131 addRegisterClass(MVT::f64
, &RISCV::FPR64RegClass
);
132 if (Subtarget
.hasStdExtZhinxmin())
133 addRegisterClass(MVT::f16
, &RISCV::GPRF16RegClass
);
134 if (Subtarget
.hasStdExtZfinx())
135 addRegisterClass(MVT::f32
, &RISCV::GPRF32RegClass
);
136 if (Subtarget
.hasStdExtZdinx()) {
137 if (Subtarget
.is64Bit())
138 addRegisterClass(MVT::f64
, &RISCV::GPRRegClass
);
140 addRegisterClass(MVT::f64
, &RISCV::GPRPairRegClass
);
143 static const MVT::SimpleValueType BoolVecVTs
[] = {
144 MVT::nxv1i1
, MVT::nxv2i1
, MVT::nxv4i1
, MVT::nxv8i1
,
145 MVT::nxv16i1
, MVT::nxv32i1
, MVT::nxv64i1
};
146 static const MVT::SimpleValueType IntVecVTs
[] = {
147 MVT::nxv1i8
, MVT::nxv2i8
, MVT::nxv4i8
, MVT::nxv8i8
, MVT::nxv16i8
,
148 MVT::nxv32i8
, MVT::nxv64i8
, MVT::nxv1i16
, MVT::nxv2i16
, MVT::nxv4i16
,
149 MVT::nxv8i16
, MVT::nxv16i16
, MVT::nxv32i16
, MVT::nxv1i32
, MVT::nxv2i32
,
150 MVT::nxv4i32
, MVT::nxv8i32
, MVT::nxv16i32
, MVT::nxv1i64
, MVT::nxv2i64
,
151 MVT::nxv4i64
, MVT::nxv8i64
};
152 static const MVT::SimpleValueType F16VecVTs
[] = {
153 MVT::nxv1f16
, MVT::nxv2f16
, MVT::nxv4f16
,
154 MVT::nxv8f16
, MVT::nxv16f16
, MVT::nxv32f16
};
155 static const MVT::SimpleValueType BF16VecVTs
[] = {
156 MVT::nxv1bf16
, MVT::nxv2bf16
, MVT::nxv4bf16
,
157 MVT::nxv8bf16
, MVT::nxv16bf16
, MVT::nxv32bf16
};
158 static const MVT::SimpleValueType F32VecVTs
[] = {
159 MVT::nxv1f32
, MVT::nxv2f32
, MVT::nxv4f32
, MVT::nxv8f32
, MVT::nxv16f32
};
160 static const MVT::SimpleValueType F64VecVTs
[] = {
161 MVT::nxv1f64
, MVT::nxv2f64
, MVT::nxv4f64
, MVT::nxv8f64
};
163 if (Subtarget
.hasVInstructions()) {
164 auto addRegClassForRVV
= [this](MVT VT
) {
165 // Disable the smallest fractional LMUL types if ELEN is less than
167 unsigned MinElts
= RISCV::RVVBitsPerBlock
/ Subtarget
.getELen();
168 if (VT
.getVectorMinNumElements() < MinElts
)
171 unsigned Size
= VT
.getSizeInBits().getKnownMinValue();
172 const TargetRegisterClass
*RC
;
173 if (Size
<= RISCV::RVVBitsPerBlock
)
174 RC
= &RISCV::VRRegClass
;
175 else if (Size
== 2 * RISCV::RVVBitsPerBlock
)
176 RC
= &RISCV::VRM2RegClass
;
177 else if (Size
== 4 * RISCV::RVVBitsPerBlock
)
178 RC
= &RISCV::VRM4RegClass
;
179 else if (Size
== 8 * RISCV::RVVBitsPerBlock
)
180 RC
= &RISCV::VRM8RegClass
;
182 llvm_unreachable("Unexpected size");
184 addRegisterClass(VT
, RC
);
187 for (MVT VT
: BoolVecVTs
)
188 addRegClassForRVV(VT
);
189 for (MVT VT
: IntVecVTs
) {
190 if (VT
.getVectorElementType() == MVT::i64
&&
191 !Subtarget
.hasVInstructionsI64())
193 addRegClassForRVV(VT
);
196 if (Subtarget
.hasVInstructionsF16Minimal())
197 for (MVT VT
: F16VecVTs
)
198 addRegClassForRVV(VT
);
200 if (Subtarget
.hasVInstructionsBF16())
201 for (MVT VT
: BF16VecVTs
)
202 addRegClassForRVV(VT
);
204 if (Subtarget
.hasVInstructionsF32())
205 for (MVT VT
: F32VecVTs
)
206 addRegClassForRVV(VT
);
208 if (Subtarget
.hasVInstructionsF64())
209 for (MVT VT
: F64VecVTs
)
210 addRegClassForRVV(VT
);
212 if (Subtarget
.useRVVForFixedLengthVectors()) {
213 auto addRegClassForFixedVectors
= [this](MVT VT
) {
214 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
215 unsigned RCID
= getRegClassIDForVecVT(ContainerVT
);
216 const RISCVRegisterInfo
&TRI
= *Subtarget
.getRegisterInfo();
217 addRegisterClass(VT
, TRI
.getRegClass(RCID
));
219 for (MVT VT
: MVT::integer_fixedlen_vector_valuetypes())
220 if (useRVVForFixedLengthVectorVT(VT
))
221 addRegClassForFixedVectors(VT
);
223 for (MVT VT
: MVT::fp_fixedlen_vector_valuetypes())
224 if (useRVVForFixedLengthVectorVT(VT
))
225 addRegClassForFixedVectors(VT
);
229 // Compute derived properties from the register classes.
230 computeRegisterProperties(STI
.getRegisterInfo());
232 setStackPointerRegisterToSaveRestore(RISCV::X2
);
234 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, XLenVT
,
236 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
237 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, MVT::i32
,
240 // TODO: add all necessary setOperationAction calls.
241 setOperationAction(ISD::DYNAMIC_STACKALLOC
, XLenVT
, Expand
);
243 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
244 setOperationAction(ISD::BR_CC
, XLenVT
, Expand
);
245 if (RV64LegalI32
&& Subtarget
.is64Bit())
246 setOperationAction(ISD::BR_CC
, MVT::i32
, Expand
);
247 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
248 setOperationAction(ISD::SELECT_CC
, XLenVT
, Expand
);
249 if (RV64LegalI32
&& Subtarget
.is64Bit())
250 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Expand
);
252 setCondCodeAction(ISD::SETLE
, XLenVT
, Expand
);
253 setCondCodeAction(ISD::SETGT
, XLenVT
, Custom
);
254 setCondCodeAction(ISD::SETGE
, XLenVT
, Expand
);
255 setCondCodeAction(ISD::SETULE
, XLenVT
, Expand
);
256 setCondCodeAction(ISD::SETUGT
, XLenVT
, Custom
);
257 setCondCodeAction(ISD::SETUGE
, XLenVT
, Expand
);
259 if (RV64LegalI32
&& Subtarget
.is64Bit())
260 setOperationAction(ISD::SETCC
, MVT::i32
, Promote
);
262 setOperationAction({ISD::STACKSAVE
, ISD::STACKRESTORE
}, MVT::Other
, Expand
);
264 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
265 setOperationAction({ISD::VAARG
, ISD::VACOPY
, ISD::VAEND
}, MVT::Other
, Expand
);
267 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
269 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
271 if (!Subtarget
.hasStdExtZbb() && !Subtarget
.hasVendorXTHeadBb())
272 setOperationAction(ISD::SIGN_EXTEND_INREG
, {MVT::i8
, MVT::i16
}, Expand
);
274 if (Subtarget
.is64Bit()) {
275 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i64
, Custom
);
278 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
279 setOperationAction({ISD::ADD
, ISD::SUB
, ISD::SHL
, ISD::SRA
, ISD::SRL
},
281 setOperationAction(ISD::SADDO
, MVT::i32
, Custom
);
282 setOperationAction({ISD::UADDO
, ISD::USUBO
, ISD::UADDSAT
, ISD::USUBSAT
},
287 {RTLIB::SHL_I128
, RTLIB::SRL_I128
, RTLIB::SRA_I128
, RTLIB::MUL_I128
},
289 setLibcallName(RTLIB::MULO_I64
, nullptr);
292 if (!Subtarget
.hasStdExtM() && !Subtarget
.hasStdExtZmmul()) {
293 setOperationAction({ISD::MUL
, ISD::MULHS
, ISD::MULHU
}, XLenVT
, Expand
);
294 if (RV64LegalI32
&& Subtarget
.is64Bit())
295 setOperationAction(ISD::MUL
, MVT::i32
, Promote
);
296 } else if (Subtarget
.is64Bit()) {
297 setOperationAction(ISD::MUL
, MVT::i128
, Custom
);
299 setOperationAction(ISD::MUL
, MVT::i32
, Custom
);
301 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
304 if (!Subtarget
.hasStdExtM()) {
305 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
},
307 if (RV64LegalI32
&& Subtarget
.is64Bit())
308 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
}, MVT::i32
,
310 } else if (Subtarget
.is64Bit()) {
312 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::UREM
},
313 {MVT::i8
, MVT::i16
, MVT::i32
}, Custom
);
316 if (RV64LegalI32
&& Subtarget
.is64Bit()) {
317 setOperationAction({ISD::MULHS
, ISD::MULHU
}, MVT::i32
, Expand
);
319 {ISD::SDIVREM
, ISD::UDIVREM
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, MVT::i32
,
324 {ISD::SDIVREM
, ISD::UDIVREM
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, XLenVT
,
327 setOperationAction({ISD::SHL_PARTS
, ISD::SRL_PARTS
, ISD::SRA_PARTS
}, XLenVT
,
330 if (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) {
331 if (!RV64LegalI32
&& Subtarget
.is64Bit())
332 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Custom
);
333 } else if (Subtarget
.hasVendorXTHeadBb()) {
334 if (Subtarget
.is64Bit())
335 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Custom
);
336 setOperationAction({ISD::ROTL
, ISD::ROTR
}, XLenVT
, Custom
);
337 } else if (Subtarget
.hasVendorXCVbitmanip()) {
338 setOperationAction(ISD::ROTL
, XLenVT
, Expand
);
340 setOperationAction({ISD::ROTL
, ISD::ROTR
}, XLenVT
, Expand
);
341 if (RV64LegalI32
&& Subtarget
.is64Bit())
342 setOperationAction({ISD::ROTL
, ISD::ROTR
}, MVT::i32
, Expand
);
345 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
346 // pattern match it directly in isel.
347 setOperationAction(ISD::BSWAP
, XLenVT
,
348 (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
349 Subtarget
.hasVendorXTHeadBb())
352 if (RV64LegalI32
&& Subtarget
.is64Bit())
353 setOperationAction(ISD::BSWAP
, MVT::i32
,
354 (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
355 Subtarget
.hasVendorXTHeadBb())
360 if (Subtarget
.hasVendorXCVbitmanip()) {
361 setOperationAction(ISD::BITREVERSE
, XLenVT
, Legal
);
363 // Zbkb can use rev8+brev8 to implement bitreverse.
364 setOperationAction(ISD::BITREVERSE
, XLenVT
,
365 Subtarget
.hasStdExtZbkb() ? Custom
: Expand
);
368 if (Subtarget
.hasStdExtZbb()) {
369 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, XLenVT
,
371 if (RV64LegalI32
&& Subtarget
.is64Bit())
372 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, MVT::i32
,
375 if (Subtarget
.is64Bit()) {
377 setOperationAction(ISD::CTTZ
, MVT::i32
, Legal
);
379 setOperationAction({ISD::CTTZ
, ISD::CTTZ_ZERO_UNDEF
}, MVT::i32
, Custom
);
381 } else if (!Subtarget
.hasVendorXCVbitmanip()) {
382 setOperationAction({ISD::CTTZ
, ISD::CTPOP
}, XLenVT
, Expand
);
383 if (RV64LegalI32
&& Subtarget
.is64Bit())
384 setOperationAction({ISD::CTTZ
, ISD::CTPOP
}, MVT::i32
, Expand
);
387 if (Subtarget
.hasStdExtZbb() || Subtarget
.hasVendorXTHeadBb() ||
388 Subtarget
.hasVendorXCVbitmanip()) {
389 // We need the custom lowering to make sure that the resulting sequence
390 // for the 32bit case is efficient on 64bit targets.
391 if (Subtarget
.is64Bit()) {
393 setOperationAction(ISD::CTLZ
, MVT::i32
,
394 Subtarget
.hasStdExtZbb() ? Legal
: Promote
);
395 if (!Subtarget
.hasStdExtZbb())
396 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i32
, Promote
);
398 setOperationAction({ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
}, MVT::i32
, Custom
);
401 setOperationAction(ISD::CTLZ
, XLenVT
, Expand
);
402 if (RV64LegalI32
&& Subtarget
.is64Bit())
403 setOperationAction(ISD::CTLZ
, MVT::i32
, Expand
);
406 if (!RV64LegalI32
&& Subtarget
.is64Bit() &&
407 !Subtarget
.hasShortForwardBranchOpt())
408 setOperationAction(ISD::ABS
, MVT::i32
, Custom
);
410 // We can use PseudoCCSUB to implement ABS.
411 if (Subtarget
.hasShortForwardBranchOpt())
412 setOperationAction(ISD::ABS
, XLenVT
, Legal
);
414 if (!Subtarget
.hasVendorXTHeadCondMov())
415 setOperationAction(ISD::SELECT
, XLenVT
, Custom
);
417 if (RV64LegalI32
&& Subtarget
.is64Bit())
418 setOperationAction(ISD::SELECT
, MVT::i32
, Promote
);
420 static const unsigned FPLegalNodeTypes
[] = {
421 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::LRINT
,
422 ISD::LLRINT
, ISD::LROUND
, ISD::LLROUND
,
423 ISD::STRICT_LRINT
, ISD::STRICT_LLRINT
, ISD::STRICT_LROUND
,
424 ISD::STRICT_LLROUND
, ISD::STRICT_FMA
, ISD::STRICT_FADD
,
425 ISD::STRICT_FSUB
, ISD::STRICT_FMUL
, ISD::STRICT_FDIV
,
426 ISD::STRICT_FSQRT
, ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
};
428 static const ISD::CondCode FPCCToExpand
[] = {
429 ISD::SETOGT
, ISD::SETOGE
, ISD::SETONE
, ISD::SETUEQ
, ISD::SETUGT
,
430 ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
, ISD::SETUNE
, ISD::SETGT
,
431 ISD::SETGE
, ISD::SETNE
, ISD::SETO
, ISD::SETUO
};
433 static const unsigned FPOpToExpand
[] = {
434 ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
, ISD::FPOW
,
437 static const unsigned FPRndMode
[] = {
438 ISD::FCEIL
, ISD::FFLOOR
, ISD::FTRUNC
, ISD::FRINT
, ISD::FROUND
,
441 if (Subtarget
.hasStdExtZfhminOrZhinxmin())
442 setOperationAction(ISD::BITCAST
, MVT::i16
, Custom
);
444 static const unsigned ZfhminZfbfminPromoteOps
[] = {
445 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FADD
,
446 ISD::FSUB
, ISD::FMUL
, ISD::FMA
,
447 ISD::FDIV
, ISD::FSQRT
, ISD::FABS
,
448 ISD::FNEG
, ISD::STRICT_FMA
, ISD::STRICT_FADD
,
449 ISD::STRICT_FSUB
, ISD::STRICT_FMUL
, ISD::STRICT_FDIV
,
450 ISD::STRICT_FSQRT
, ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
,
451 ISD::SETCC
, ISD::FCEIL
, ISD::FFLOOR
,
452 ISD::FTRUNC
, ISD::FRINT
, ISD::FROUND
,
453 ISD::FROUNDEVEN
, ISD::SELECT
};
455 if (Subtarget
.hasStdExtZfbfmin()) {
456 setOperationAction(ISD::BITCAST
, MVT::i16
, Custom
);
457 setOperationAction(ISD::BITCAST
, MVT::bf16
, Custom
);
458 setOperationAction(ISD::FP_ROUND
, MVT::bf16
, Custom
);
459 setOperationAction(ISD::FP_EXTEND
, MVT::f32
, Custom
);
460 setOperationAction(ISD::FP_EXTEND
, MVT::f64
, Custom
);
461 setOperationAction(ISD::ConstantFP
, MVT::bf16
, Expand
);
462 setOperationAction(ISD::SELECT_CC
, MVT::bf16
, Expand
);
463 setOperationAction(ISD::BR_CC
, MVT::bf16
, Expand
);
464 setOperationAction(ZfhminZfbfminPromoteOps
, MVT::bf16
, Promote
);
465 setOperationAction(ISD::FREM
, MVT::bf16
, Promote
);
466 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
467 // DAGCombiner::visitFP_ROUND probably needs improvements first.
468 setOperationAction(ISD::FCOPYSIGN
, MVT::bf16
, Expand
);
471 if (Subtarget
.hasStdExtZfhminOrZhinxmin()) {
472 if (Subtarget
.hasStdExtZfhOrZhinx()) {
473 setOperationAction(FPLegalNodeTypes
, MVT::f16
, Legal
);
474 setOperationAction(FPRndMode
, MVT::f16
,
475 Subtarget
.hasStdExtZfa() ? Legal
: Custom
);
476 setOperationAction(ISD::SELECT
, MVT::f16
, Custom
);
477 setOperationAction(ISD::IS_FPCLASS
, MVT::f16
, Custom
);
479 setOperationAction(ZfhminZfbfminPromoteOps
, MVT::f16
, Promote
);
480 setOperationAction({ISD::STRICT_LRINT
, ISD::STRICT_LLRINT
,
481 ISD::STRICT_LROUND
, ISD::STRICT_LLROUND
},
483 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
484 // DAGCombiner::visitFP_ROUND probably needs improvements first.
485 setOperationAction(ISD::FCOPYSIGN
, MVT::f16
, Expand
);
488 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f16
, Legal
);
489 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f32
, Legal
);
490 setCondCodeAction(FPCCToExpand
, MVT::f16
, Expand
);
491 setOperationAction(ISD::SELECT_CC
, MVT::f16
, Expand
);
492 setOperationAction(ISD::BR_CC
, MVT::f16
, Expand
);
494 setOperationAction(ISD::FNEARBYINT
, MVT::f16
,
495 Subtarget
.hasStdExtZfa() ? Legal
: Promote
);
496 setOperationAction({ISD::FREM
, ISD::FPOW
, ISD::FPOWI
,
497 ISD::FCOS
, ISD::FSIN
, ISD::FSINCOS
, ISD::FEXP
,
498 ISD::FEXP2
, ISD::FEXP10
, ISD::FLOG
, ISD::FLOG2
,
502 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
503 // complete support for all operations in LegalizeDAG.
504 setOperationAction({ISD::STRICT_FCEIL
, ISD::STRICT_FFLOOR
,
505 ISD::STRICT_FNEARBYINT
, ISD::STRICT_FRINT
,
506 ISD::STRICT_FROUND
, ISD::STRICT_FROUNDEVEN
,
510 // We need to custom promote this.
511 if (Subtarget
.is64Bit())
512 setOperationAction(ISD::FPOWI
, MVT::i32
, Custom
);
514 if (!Subtarget
.hasStdExtZfa())
515 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f16
, Custom
);
518 if (Subtarget
.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes
, MVT::f32
, Legal
);
520 setOperationAction(FPRndMode
, MVT::f32
,
521 Subtarget
.hasStdExtZfa() ? Legal
: Custom
);
522 setCondCodeAction(FPCCToExpand
, MVT::f32
, Expand
);
523 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
524 setOperationAction(ISD::SELECT
, MVT::f32
, Custom
);
525 setOperationAction(ISD::BR_CC
, MVT::f32
, Expand
);
526 setOperationAction(FPOpToExpand
, MVT::f32
, Expand
);
527 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::f16
, Expand
);
528 setTruncStoreAction(MVT::f32
, MVT::f16
, Expand
);
529 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::bf16
, Expand
);
530 setTruncStoreAction(MVT::f32
, MVT::bf16
, Expand
);
531 setOperationAction(ISD::IS_FPCLASS
, MVT::f32
, Custom
);
532 setOperationAction(ISD::BF16_TO_FP
, MVT::f32
, Custom
);
533 setOperationAction(ISD::FP_TO_BF16
, MVT::f32
,
534 Subtarget
.isSoftFPABI() ? LibCall
: Custom
);
535 setOperationAction(ISD::FP_TO_FP16
, MVT::f32
, Custom
);
536 setOperationAction(ISD::FP16_TO_FP
, MVT::f32
, Custom
);
538 if (Subtarget
.hasStdExtZfa())
539 setOperationAction(ISD::FNEARBYINT
, MVT::f32
, Legal
);
541 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f32
, Custom
);
544 if (Subtarget
.hasStdExtFOrZfinx() && Subtarget
.is64Bit())
545 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
547 if (Subtarget
.hasStdExtDOrZdinx()) {
548 setOperationAction(FPLegalNodeTypes
, MVT::f64
, Legal
);
550 if (Subtarget
.hasStdExtZfa()) {
551 setOperationAction(FPRndMode
, MVT::f64
, Legal
);
552 setOperationAction(ISD::FNEARBYINT
, MVT::f64
, Legal
);
553 setOperationAction(ISD::BITCAST
, MVT::i64
, Custom
);
554 setOperationAction(ISD::BITCAST
, MVT::f64
, Custom
);
556 if (Subtarget
.is64Bit())
557 setOperationAction(FPRndMode
, MVT::f64
, Custom
);
559 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, MVT::f64
, Custom
);
562 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f32
, Legal
);
563 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f64
, Legal
);
564 setCondCodeAction(FPCCToExpand
, MVT::f64
, Expand
);
565 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
566 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
567 setOperationAction(ISD::BR_CC
, MVT::f64
, Expand
);
568 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
569 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
570 setOperationAction(FPOpToExpand
, MVT::f64
, Expand
);
571 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f16
, Expand
);
572 setTruncStoreAction(MVT::f64
, MVT::f16
, Expand
);
573 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::bf16
, Expand
);
574 setTruncStoreAction(MVT::f64
, MVT::bf16
, Expand
);
575 setOperationAction(ISD::IS_FPCLASS
, MVT::f64
, Custom
);
576 setOperationAction(ISD::BF16_TO_FP
, MVT::f64
, Custom
);
577 setOperationAction(ISD::FP_TO_BF16
, MVT::f64
,
578 Subtarget
.isSoftFPABI() ? LibCall
: Custom
);
579 setOperationAction(ISD::FP_TO_FP16
, MVT::f64
, Custom
);
580 setOperationAction(ISD::FP16_TO_FP
, MVT::f64
, Expand
);
583 if (Subtarget
.is64Bit()) {
584 setOperationAction({ISD::FP_TO_UINT
, ISD::FP_TO_SINT
,
585 ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
},
587 setOperationAction(ISD::LROUND
, MVT::i32
, Custom
);
590 if (Subtarget
.hasStdExtFOrZfinx()) {
591 setOperationAction({ISD::FP_TO_UINT_SAT
, ISD::FP_TO_SINT_SAT
}, XLenVT
,
594 setOperationAction({ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
,
595 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_SINT_TO_FP
},
598 if (RV64LegalI32
&& Subtarget
.is64Bit())
599 setOperationAction({ISD::STRICT_FP_TO_UINT
, ISD::STRICT_FP_TO_SINT
,
600 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_SINT_TO_FP
},
603 setOperationAction(ISD::GET_ROUNDING
, XLenVT
, Custom
);
604 setOperationAction(ISD::SET_ROUNDING
, MVT::Other
, Custom
);
607 setOperationAction({ISD::GlobalAddress
, ISD::BlockAddress
, ISD::ConstantPool
,
611 setOperationAction(ISD::GlobalTLSAddress
, XLenVT
, Custom
);
613 if (Subtarget
.is64Bit())
614 setOperationAction(ISD::Constant
, MVT::i64
, Custom
);
616 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
617 // Unfortunately this can't be determined just from the ISA naming string.
618 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
,
619 Subtarget
.is64Bit() ? Legal
: Custom
);
621 setOperationAction({ISD::TRAP
, ISD::DEBUGTRAP
}, MVT::Other
, Legal
);
622 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
623 if (Subtarget
.is64Bit())
624 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i32
, Custom
);
626 if (Subtarget
.hasStdExtZicbop()) {
627 setOperationAction(ISD::PREFETCH
, MVT::Other
, Legal
);
630 if (Subtarget
.hasStdExtA()) {
631 setMaxAtomicSizeInBitsSupported(Subtarget
.getXLen());
632 setMinCmpXchgSizeInBits(32);
633 } else if (Subtarget
.hasForcedAtomics()) {
634 setMaxAtomicSizeInBitsSupported(Subtarget
.getXLen());
636 setMaxAtomicSizeInBitsSupported(0);
639 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
641 setBooleanContents(ZeroOrOneBooleanContent
);
643 if (Subtarget
.hasVInstructions()) {
644 setBooleanVectorContents(ZeroOrOneBooleanContent
);
646 setOperationAction(ISD::VSCALE
, XLenVT
, Custom
);
647 if (RV64LegalI32
&& Subtarget
.is64Bit())
648 setOperationAction(ISD::VSCALE
, MVT::i32
, Custom
);
650 // RVV intrinsics may have illegal operands.
651 // We also need to custom legalize vmv.x.s.
652 setOperationAction({ISD::INTRINSIC_WO_CHAIN
, ISD::INTRINSIC_W_CHAIN
,
653 ISD::INTRINSIC_VOID
},
654 {MVT::i8
, MVT::i16
}, Custom
);
655 if (Subtarget
.is64Bit())
656 setOperationAction({ISD::INTRINSIC_W_CHAIN
, ISD::INTRINSIC_VOID
},
659 setOperationAction({ISD::INTRINSIC_WO_CHAIN
, ISD::INTRINSIC_W_CHAIN
},
662 setOperationAction({ISD::INTRINSIC_W_CHAIN
, ISD::INTRINSIC_VOID
},
665 static const unsigned IntegerVPOps
[] = {
666 ISD::VP_ADD
, ISD::VP_SUB
, ISD::VP_MUL
,
667 ISD::VP_SDIV
, ISD::VP_UDIV
, ISD::VP_SREM
,
668 ISD::VP_UREM
, ISD::VP_AND
, ISD::VP_OR
,
669 ISD::VP_XOR
, ISD::VP_ASHR
, ISD::VP_LSHR
,
670 ISD::VP_SHL
, ISD::VP_REDUCE_ADD
, ISD::VP_REDUCE_AND
,
671 ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
, ISD::VP_REDUCE_SMAX
,
672 ISD::VP_REDUCE_SMIN
, ISD::VP_REDUCE_UMAX
, ISD::VP_REDUCE_UMIN
,
673 ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::VP_FP_TO_SINT
,
674 ISD::VP_FP_TO_UINT
, ISD::VP_SETCC
, ISD::VP_SIGN_EXTEND
,
675 ISD::VP_ZERO_EXTEND
, ISD::VP_TRUNCATE
, ISD::VP_SMIN
,
676 ISD::VP_SMAX
, ISD::VP_UMIN
, ISD::VP_UMAX
,
677 ISD::VP_ABS
, ISD::EXPERIMENTAL_VP_REVERSE
, ISD::EXPERIMENTAL_VP_SPLICE
};
679 static const unsigned FloatingPointVPOps
[] = {
680 ISD::VP_FADD
, ISD::VP_FSUB
, ISD::VP_FMUL
,
681 ISD::VP_FDIV
, ISD::VP_FNEG
, ISD::VP_FABS
,
682 ISD::VP_FMA
, ISD::VP_REDUCE_FADD
, ISD::VP_REDUCE_SEQ_FADD
,
683 ISD::VP_REDUCE_FMIN
, ISD::VP_REDUCE_FMAX
, ISD::VP_MERGE
,
684 ISD::VP_SELECT
, ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
,
685 ISD::VP_SETCC
, ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
,
686 ISD::VP_SQRT
, ISD::VP_FMINNUM
, ISD::VP_FMAXNUM
,
687 ISD::VP_FCEIL
, ISD::VP_FFLOOR
, ISD::VP_FROUND
,
688 ISD::VP_FROUNDEVEN
, ISD::VP_FCOPYSIGN
, ISD::VP_FROUNDTOZERO
,
689 ISD::VP_FRINT
, ISD::VP_FNEARBYINT
, ISD::VP_IS_FPCLASS
,
690 ISD::VP_FMINIMUM
, ISD::VP_FMAXIMUM
, ISD::EXPERIMENTAL_VP_REVERSE
,
691 ISD::EXPERIMENTAL_VP_SPLICE
};
693 static const unsigned IntegerVecReduceOps
[] = {
694 ISD::VECREDUCE_ADD
, ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
,
695 ISD::VECREDUCE_XOR
, ISD::VECREDUCE_SMAX
, ISD::VECREDUCE_SMIN
,
696 ISD::VECREDUCE_UMAX
, ISD::VECREDUCE_UMIN
};
698 static const unsigned FloatingPointVecReduceOps
[] = {
699 ISD::VECREDUCE_FADD
, ISD::VECREDUCE_SEQ_FADD
, ISD::VECREDUCE_FMIN
,
700 ISD::VECREDUCE_FMAX
};
702 if (!Subtarget
.is64Bit()) {
703 // We must custom-lower certain vXi64 operations on RV32 due to the vector
704 // element type being illegal.
705 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
},
708 setOperationAction(IntegerVecReduceOps
, MVT::i64
, Custom
);
710 setOperationAction({ISD::VP_REDUCE_ADD
, ISD::VP_REDUCE_AND
,
711 ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
,
712 ISD::VP_REDUCE_SMAX
, ISD::VP_REDUCE_SMIN
,
713 ISD::VP_REDUCE_UMAX
, ISD::VP_REDUCE_UMIN
},
717 for (MVT VT
: BoolVecVTs
) {
718 if (!isTypeLegal(VT
))
721 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
723 // Mask VTs are custom-expanded into a series of standard nodes
724 setOperationAction({ISD::TRUNCATE
, ISD::CONCAT_VECTORS
,
725 ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
,
726 ISD::SCALAR_TO_VECTOR
},
729 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
732 setOperationAction(ISD::SELECT
, VT
, Custom
);
734 {ISD::SELECT_CC
, ISD::VSELECT
, ISD::VP_MERGE
, ISD::VP_SELECT
}, VT
,
737 setOperationAction({ISD::VP_AND
, ISD::VP_OR
, ISD::VP_XOR
}, VT
, Custom
);
740 {ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
, ISD::VECREDUCE_XOR
}, VT
,
744 {ISD::VP_REDUCE_AND
, ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
}, VT
,
747 // RVV has native int->float & float->int conversions where the
748 // element type sizes are within one power-of-two of each other. Any
749 // wider distances between type sizes have to be lowered as sequences
750 // which progressively narrow the gap in stages.
751 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
, ISD::FP_TO_SINT
,
752 ISD::FP_TO_UINT
, ISD::STRICT_SINT_TO_FP
,
753 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_FP_TO_SINT
,
754 ISD::STRICT_FP_TO_UINT
},
756 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
759 // Expand all extending loads to types larger than this, and truncating
760 // stores from types larger than this.
761 for (MVT OtherVT
: MVT::integer_scalable_vector_valuetypes()) {
762 setTruncStoreAction(VT
, OtherVT
, Expand
);
763 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, VT
,
767 setOperationAction({ISD::VP_FP_TO_SINT
, ISD::VP_FP_TO_UINT
,
768 ISD::VP_TRUNCATE
, ISD::VP_SETCC
},
771 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
772 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
774 setOperationAction(ISD::VECTOR_REVERSE
, VT
, Custom
);
776 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE
, VT
, Custom
);
777 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE
, VT
, Custom
);
779 setOperationPromotedToType(
780 ISD::VECTOR_SPLICE
, VT
,
781 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount()));
784 for (MVT VT
: IntVecVTs
) {
785 if (!isTypeLegal(VT
))
788 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
789 setOperationAction(ISD::SPLAT_VECTOR_PARTS
, VT
, Custom
);
791 // Vectors implement MULHS/MULHU.
792 setOperationAction({ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, VT
, Expand
);
794 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
795 if (VT
.getVectorElementType() == MVT::i64
&& !Subtarget
.hasStdExtV())
796 setOperationAction({ISD::MULHU
, ISD::MULHS
}, VT
, Expand
);
798 setOperationAction({ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
}, VT
,
801 // Custom-lower extensions and truncations from/to mask types.
802 setOperationAction({ISD::ANY_EXTEND
, ISD::SIGN_EXTEND
, ISD::ZERO_EXTEND
},
805 // RVV has native int->float & float->int conversions where the
806 // element type sizes are within one power-of-two of each other. Any
807 // wider distances between type sizes have to be lowered as sequences
808 // which progressively narrow the gap in stages.
809 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
, ISD::FP_TO_SINT
,
810 ISD::FP_TO_UINT
, ISD::STRICT_SINT_TO_FP
,
811 ISD::STRICT_UINT_TO_FP
, ISD::STRICT_FP_TO_SINT
,
812 ISD::STRICT_FP_TO_UINT
},
814 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
816 setOperationAction({ISD::LRINT
, ISD::LLRINT
}, VT
, Custom
);
817 setOperationAction({ISD::AVGFLOORU
, ISD::AVGCEILU
, ISD::SADDSAT
,
818 ISD::UADDSAT
, ISD::SSUBSAT
, ISD::USUBSAT
},
821 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
822 // nodes which truncate by one power of two at a time.
823 setOperationAction(ISD::TRUNCATE
, VT
, Custom
);
825 // Custom-lower insert/extract operations to simplify patterns.
826 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
829 // Custom-lower reduction operations to set up the corresponding custom
831 setOperationAction(IntegerVecReduceOps
, VT
, Custom
);
833 setOperationAction(IntegerVPOps
, VT
, Custom
);
835 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
837 setOperationAction({ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
},
841 {ISD::VP_LOAD
, ISD::VP_STORE
, ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
842 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
, ISD::VP_SCATTER
},
845 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
846 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
849 setOperationAction(ISD::SELECT
, VT
, Custom
);
850 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
852 setOperationAction({ISD::STEP_VECTOR
, ISD::VECTOR_REVERSE
}, VT
, Custom
);
854 for (MVT OtherVT
: MVT::integer_scalable_vector_valuetypes()) {
855 setTruncStoreAction(VT
, OtherVT
, Expand
);
856 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, VT
,
860 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
861 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
864 setOperationAction(ISD::VECTOR_SPLICE
, VT
, Custom
);
866 if (Subtarget
.hasStdExtZvkb()) {
867 setOperationAction(ISD::BSWAP
, VT
, Legal
);
868 setOperationAction(ISD::VP_BSWAP
, VT
, Custom
);
870 setOperationAction({ISD::BSWAP
, ISD::VP_BSWAP
}, VT
, Expand
);
871 setOperationAction({ISD::ROTL
, ISD::ROTR
}, VT
, Expand
);
874 if (Subtarget
.hasStdExtZvbb()) {
875 setOperationAction(ISD::BITREVERSE
, VT
, Legal
);
876 setOperationAction(ISD::VP_BITREVERSE
, VT
, Custom
);
877 setOperationAction({ISD::VP_CTLZ
, ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ
,
878 ISD::VP_CTTZ_ZERO_UNDEF
, ISD::VP_CTPOP
},
881 setOperationAction({ISD::BITREVERSE
, ISD::VP_BITREVERSE
}, VT
, Expand
);
882 setOperationAction({ISD::CTLZ
, ISD::CTTZ
, ISD::CTPOP
}, VT
, Expand
);
883 setOperationAction({ISD::VP_CTLZ
, ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ
,
884 ISD::VP_CTTZ_ZERO_UNDEF
, ISD::VP_CTPOP
},
887 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
889 EVT FloatVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
890 if (isTypeLegal(FloatVT
)) {
891 setOperationAction({ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
,
892 ISD::CTTZ_ZERO_UNDEF
, ISD::VP_CTLZ
,
893 ISD::VP_CTLZ_ZERO_UNDEF
, ISD::VP_CTTZ_ZERO_UNDEF
},
899 // Expand various CCs to best match the RVV ISA, which natively supports UNE
900 // but no other unordered comparisons, and supports all ordered comparisons
901 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
902 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
903 // and we pattern-match those back to the "original", swapping operands once
904 // more. This way we catch both operations and both "vf" and "fv" forms with
906 static const ISD::CondCode VFPCCToExpand
[] = {
907 ISD::SETO
, ISD::SETONE
, ISD::SETUEQ
, ISD::SETUGT
,
908 ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
, ISD::SETUO
,
909 ISD::SETGT
, ISD::SETOGT
, ISD::SETGE
, ISD::SETOGE
,
912 // TODO: support more ops.
913 static const unsigned ZvfhminPromoteOps
[] = {
914 ISD::FMINNUM
, ISD::FMAXNUM
, ISD::FADD
, ISD::FSUB
,
915 ISD::FMUL
, ISD::FMA
, ISD::FDIV
, ISD::FSQRT
,
916 ISD::FABS
, ISD::FNEG
, ISD::FCOPYSIGN
, ISD::FCEIL
,
917 ISD::FFLOOR
, ISD::FROUND
, ISD::FROUNDEVEN
, ISD::FRINT
,
918 ISD::FNEARBYINT
, ISD::IS_FPCLASS
, ISD::SETCC
, ISD::FMAXIMUM
,
919 ISD::FMINIMUM
, ISD::STRICT_FADD
, ISD::STRICT_FSUB
, ISD::STRICT_FMUL
,
920 ISD::STRICT_FDIV
, ISD::STRICT_FSQRT
, ISD::STRICT_FMA
};
922 // TODO: support more vp ops.
923 static const unsigned ZvfhminPromoteVPOps
[] = {
924 ISD::VP_FADD
, ISD::VP_FSUB
, ISD::VP_FMUL
,
925 ISD::VP_FDIV
, ISD::VP_FNEG
, ISD::VP_FABS
,
926 ISD::VP_FMA
, ISD::VP_REDUCE_FADD
, ISD::VP_REDUCE_SEQ_FADD
,
927 ISD::VP_REDUCE_FMIN
, ISD::VP_REDUCE_FMAX
, ISD::VP_SQRT
,
928 ISD::VP_FMINNUM
, ISD::VP_FMAXNUM
, ISD::VP_FCEIL
,
929 ISD::VP_FFLOOR
, ISD::VP_FROUND
, ISD::VP_FROUNDEVEN
,
930 ISD::VP_FCOPYSIGN
, ISD::VP_FROUNDTOZERO
, ISD::VP_FRINT
,
931 ISD::VP_FNEARBYINT
, ISD::VP_SETCC
, ISD::VP_FMINIMUM
,
934 // Sets common operation actions on RVV floating-point vector types.
935 const auto SetCommonVFPActions
= [&](MVT VT
) {
936 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
937 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
938 // sizes are within one power-of-two of each other. Therefore conversions
939 // between vXf16 and vXf64 must be lowered as sequences which convert via
941 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
942 // Custom-lower insert/extract operations to simplify patterns.
943 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
}, VT
,
945 // Expand various condition codes (explained above).
946 setCondCodeAction(VFPCCToExpand
, VT
, Expand
);
948 setOperationAction({ISD::FMINNUM
, ISD::FMAXNUM
}, VT
, Legal
);
949 setOperationAction({ISD::FMAXIMUM
, ISD::FMINIMUM
}, VT
, Custom
);
951 setOperationAction({ISD::FTRUNC
, ISD::FCEIL
, ISD::FFLOOR
, ISD::FROUND
,
952 ISD::FROUNDEVEN
, ISD::FRINT
, ISD::FNEARBYINT
,
956 setOperationAction(FloatingPointVecReduceOps
, VT
, Custom
);
958 // Expand FP operations that need libcalls.
959 setOperationAction(ISD::FREM
, VT
, Expand
);
960 setOperationAction(ISD::FPOW
, VT
, Expand
);
961 setOperationAction(ISD::FCOS
, VT
, Expand
);
962 setOperationAction(ISD::FSIN
, VT
, Expand
);
963 setOperationAction(ISD::FSINCOS
, VT
, Expand
);
964 setOperationAction(ISD::FEXP
, VT
, Expand
);
965 setOperationAction(ISD::FEXP2
, VT
, Expand
);
966 setOperationAction(ISD::FEXP10
, VT
, Expand
);
967 setOperationAction(ISD::FLOG
, VT
, Expand
);
968 setOperationAction(ISD::FLOG2
, VT
, Expand
);
969 setOperationAction(ISD::FLOG10
, VT
, Expand
);
971 setOperationAction(ISD::FCOPYSIGN
, VT
, Legal
);
973 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
975 setOperationAction({ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
},
979 {ISD::VP_LOAD
, ISD::VP_STORE
, ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
980 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
, ISD::VP_SCATTER
},
983 setOperationAction(ISD::SELECT
, VT
, Custom
);
984 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
986 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
987 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
990 setOperationAction(ISD::VECTOR_DEINTERLEAVE
, VT
, Custom
);
991 setOperationAction(ISD::VECTOR_INTERLEAVE
, VT
, Custom
);
993 setOperationAction({ISD::VECTOR_REVERSE
, ISD::VECTOR_SPLICE
}, VT
, Custom
);
995 setOperationAction(FloatingPointVPOps
, VT
, Custom
);
997 setOperationAction({ISD::STRICT_FP_EXTEND
, ISD::STRICT_FP_ROUND
}, VT
,
999 setOperationAction({ISD::STRICT_FADD
, ISD::STRICT_FSUB
, ISD::STRICT_FMUL
,
1000 ISD::STRICT_FDIV
, ISD::STRICT_FSQRT
, ISD::STRICT_FMA
},
1002 setOperationAction({ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
,
1003 ISD::STRICT_FTRUNC
, ISD::STRICT_FCEIL
,
1004 ISD::STRICT_FFLOOR
, ISD::STRICT_FROUND
,
1005 ISD::STRICT_FROUNDEVEN
, ISD::STRICT_FNEARBYINT
},
1009 // Sets common extload/truncstore actions on RVV floating-point vector
1011 const auto SetCommonVFPExtLoadTruncStoreActions
=
1012 [&](MVT VT
, ArrayRef
<MVT::SimpleValueType
> SmallerVTs
) {
1013 for (auto SmallVT
: SmallerVTs
) {
1014 setTruncStoreAction(VT
, SmallVT
, Expand
);
1015 setLoadExtAction(ISD::EXTLOAD
, VT
, SmallVT
, Expand
);
1019 if (Subtarget
.hasVInstructionsF16()) {
1020 for (MVT VT
: F16VecVTs
) {
1021 if (!isTypeLegal(VT
))
1023 SetCommonVFPActions(VT
);
1025 } else if (Subtarget
.hasVInstructionsF16Minimal()) {
1026 for (MVT VT
: F16VecVTs
) {
1027 if (!isTypeLegal(VT
))
1029 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1030 setOperationAction({ISD::STRICT_FP_ROUND
, ISD::STRICT_FP_EXTEND
}, VT
,
1032 setOperationAction({ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
}, VT
, Custom
);
1033 setOperationAction({ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::SELECT
}, VT
,
1035 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1036 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
,
1037 ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
},
1039 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
1040 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
1042 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
1044 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1046 // Custom split nxv32f16 since nxv32f32 if not legal.
1047 if (VT
== MVT::nxv32f16
) {
1048 setOperationAction(ZvfhminPromoteOps
, VT
, Custom
);
1049 setOperationAction(ZvfhminPromoteVPOps
, VT
, Custom
);
1052 // Add more promote ops.
1053 MVT F32VecVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1054 setOperationPromotedToType(ZvfhminPromoteOps
, VT
, F32VecVT
);
1055 setOperationPromotedToType(ZvfhminPromoteVPOps
, VT
, F32VecVT
);
1059 if (Subtarget
.hasVInstructionsF32()) {
1060 for (MVT VT
: F32VecVTs
) {
1061 if (!isTypeLegal(VT
))
1063 SetCommonVFPActions(VT
);
1064 SetCommonVFPExtLoadTruncStoreActions(VT
, F16VecVTs
);
1068 if (Subtarget
.hasVInstructionsF64()) {
1069 for (MVT VT
: F64VecVTs
) {
1070 if (!isTypeLegal(VT
))
1072 SetCommonVFPActions(VT
);
1073 SetCommonVFPExtLoadTruncStoreActions(VT
, F16VecVTs
);
1074 SetCommonVFPExtLoadTruncStoreActions(VT
, F32VecVTs
);
1078 if (Subtarget
.useRVVForFixedLengthVectors()) {
1079 for (MVT VT
: MVT::integer_fixedlen_vector_valuetypes()) {
1080 if (!useRVVForFixedLengthVectorVT(VT
))
1083 // By default everything must be expanded.
1084 for (unsigned Op
= 0; Op
< ISD::BUILTIN_OP_END
; ++Op
)
1085 setOperationAction(Op
, VT
, Expand
);
1086 for (MVT OtherVT
: MVT::integer_fixedlen_vector_valuetypes()) {
1087 setTruncStoreAction(VT
, OtherVT
, Expand
);
1088 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, VT
,
1092 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1093 // expansion to a build_vector of 0s.
1094 setOperationAction(ISD::UNDEF
, VT
, Custom
);
1096 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1097 setOperationAction({ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
}, VT
,
1100 setOperationAction({ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
}, VT
,
1103 setOperationAction({ISD::INSERT_VECTOR_ELT
, ISD::EXTRACT_VECTOR_ELT
},
1106 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
1108 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1110 setOperationAction(ISD::SETCC
, VT
, Custom
);
1112 setOperationAction(ISD::SELECT
, VT
, Custom
);
1114 setOperationAction(ISD::TRUNCATE
, VT
, Custom
);
1116 setOperationAction(ISD::BITCAST
, VT
, Custom
);
1119 {ISD::VECREDUCE_AND
, ISD::VECREDUCE_OR
, ISD::VECREDUCE_XOR
}, VT
,
1123 {ISD::VP_REDUCE_AND
, ISD::VP_REDUCE_OR
, ISD::VP_REDUCE_XOR
}, VT
,
1132 ISD::STRICT_SINT_TO_FP
,
1133 ISD::STRICT_UINT_TO_FP
,
1134 ISD::STRICT_FP_TO_SINT
,
1135 ISD::STRICT_FP_TO_UINT
,
1138 setOperationAction({ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
}, VT
,
1141 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
1143 // Operations below are different for between masks and other vectors.
1144 if (VT
.getVectorElementType() == MVT::i1
) {
1145 setOperationAction({ISD::VP_AND
, ISD::VP_OR
, ISD::VP_XOR
, ISD::AND
,
1149 setOperationAction({ISD::VP_FP_TO_SINT
, ISD::VP_FP_TO_UINT
,
1150 ISD::VP_SETCC
, ISD::VP_TRUNCATE
},
1153 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE
, VT
, Custom
);
1154 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE
, VT
, Custom
);
1158 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1159 // it before type legalization for i64 vectors on RV32. It will then be
1160 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1161 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1162 // improvements first.
1163 if (!Subtarget
.is64Bit() && VT
.getVectorElementType() == MVT::i64
) {
1164 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Legal
);
1165 setOperationAction(ISD::SPLAT_VECTOR_PARTS
, VT
, Custom
);
1169 {ISD::MLOAD
, ISD::MSTORE
, ISD::MGATHER
, ISD::MSCATTER
}, VT
, Custom
);
1171 setOperationAction({ISD::VP_LOAD
, ISD::VP_STORE
,
1172 ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
1173 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
,
1177 setOperationAction({ISD::ADD
, ISD::MUL
, ISD::SUB
, ISD::AND
, ISD::OR
,
1178 ISD::XOR
, ISD::SDIV
, ISD::SREM
, ISD::UDIV
,
1179 ISD::UREM
, ISD::SHL
, ISD::SRA
, ISD::SRL
},
1183 {ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
, ISD::ABS
}, VT
, Custom
);
1185 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1186 if (VT
.getVectorElementType() != MVT::i64
|| Subtarget
.hasStdExtV())
1187 setOperationAction({ISD::MULHS
, ISD::MULHU
}, VT
, Custom
);
1189 setOperationAction({ISD::AVGFLOORU
, ISD::AVGCEILU
, ISD::SADDSAT
,
1190 ISD::UADDSAT
, ISD::SSUBSAT
, ISD::USUBSAT
},
1193 setOperationAction(ISD::VSELECT
, VT
, Custom
);
1194 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1197 {ISD::ANY_EXTEND
, ISD::SIGN_EXTEND
, ISD::ZERO_EXTEND
}, VT
, Custom
);
1199 // Custom-lower reduction operations to set up the corresponding custom
1201 setOperationAction({ISD::VECREDUCE_ADD
, ISD::VECREDUCE_SMAX
,
1202 ISD::VECREDUCE_SMIN
, ISD::VECREDUCE_UMAX
,
1203 ISD::VECREDUCE_UMIN
},
1206 setOperationAction(IntegerVPOps
, VT
, Custom
);
1208 if (Subtarget
.hasStdExtZvkb())
1209 setOperationAction({ISD::BSWAP
, ISD::ROTL
, ISD::ROTR
}, VT
, Custom
);
1211 if (Subtarget
.hasStdExtZvbb()) {
1212 setOperationAction({ISD::BITREVERSE
, ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
,
1213 ISD::CTTZ
, ISD::CTTZ_ZERO_UNDEF
, ISD::CTPOP
},
1216 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1218 EVT FloatVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1219 if (isTypeLegal(FloatVT
))
1221 {ISD::CTLZ
, ISD::CTLZ_ZERO_UNDEF
, ISD::CTTZ_ZERO_UNDEF
}, VT
,
1226 for (MVT VT
: MVT::fp_fixedlen_vector_valuetypes()) {
1227 // There are no extending loads or truncating stores.
1228 for (MVT InnerVT
: MVT::fp_fixedlen_vector_valuetypes()) {
1229 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
1230 setTruncStoreAction(VT
, InnerVT
, Expand
);
1233 if (!useRVVForFixedLengthVectorVT(VT
))
1236 // By default everything must be expanded.
1237 for (unsigned Op
= 0; Op
< ISD::BUILTIN_OP_END
; ++Op
)
1238 setOperationAction(Op
, VT
, Expand
);
1240 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1241 // expansion to a build_vector of 0s.
1242 setOperationAction(ISD::UNDEF
, VT
, Custom
);
1244 if (VT
.getVectorElementType() == MVT::f16
&&
1245 !Subtarget
.hasVInstructionsF16()) {
1246 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1247 setOperationAction({ISD::STRICT_FP_ROUND
, ISD::STRICT_FP_EXTEND
}, VT
,
1249 setOperationAction({ISD::VP_FP_ROUND
, ISD::VP_FP_EXTEND
}, VT
, Custom
);
1251 {ISD::VP_MERGE
, ISD::VP_SELECT
, ISD::VSELECT
, ISD::SELECT
}, VT
,
1253 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
,
1254 ISD::VP_SINT_TO_FP
, ISD::VP_UINT_TO_FP
},
1256 setOperationAction({ISD::CONCAT_VECTORS
, ISD::INSERT_SUBVECTOR
,
1257 ISD::EXTRACT_SUBVECTOR
, ISD::SCALAR_TO_VECTOR
},
1259 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Custom
);
1260 setOperationAction(ISD::SPLAT_VECTOR
, VT
, Custom
);
1261 MVT F32VecVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
1262 // Don't promote f16 vector operations to f32 if f32 vector type is
1264 // TODO: could split the f16 vector into two vectors and do promotion.
1265 if (!isTypeLegal(F32VecVT
))
1267 setOperationPromotedToType(ZvfhminPromoteOps
, VT
, F32VecVT
);
1268 setOperationPromotedToType(ZvfhminPromoteVPOps
, VT
, F32VecVT
);
1272 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1273 setOperationAction({ISD::INSERT_SUBVECTOR
, ISD::EXTRACT_SUBVECTOR
}, VT
,
1276 setOperationAction({ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
,
1277 ISD::VECTOR_SHUFFLE
, ISD::INSERT_VECTOR_ELT
,
1278 ISD::EXTRACT_VECTOR_ELT
},
1281 setOperationAction({ISD::LOAD
, ISD::STORE
, ISD::MLOAD
, ISD::MSTORE
,
1282 ISD::MGATHER
, ISD::MSCATTER
},
1285 setOperationAction({ISD::VP_LOAD
, ISD::VP_STORE
,
1286 ISD::EXPERIMENTAL_VP_STRIDED_LOAD
,
1287 ISD::EXPERIMENTAL_VP_STRIDED_STORE
, ISD::VP_GATHER
,
1291 setOperationAction({ISD::FADD
, ISD::FSUB
, ISD::FMUL
, ISD::FDIV
,
1292 ISD::FNEG
, ISD::FABS
, ISD::FCOPYSIGN
, ISD::FSQRT
,
1293 ISD::FMA
, ISD::FMINNUM
, ISD::FMAXNUM
,
1294 ISD::IS_FPCLASS
, ISD::FMAXIMUM
, ISD::FMINIMUM
},
1297 setOperationAction({ISD::FP_ROUND
, ISD::FP_EXTEND
}, VT
, Custom
);
1299 setOperationAction({ISD::FTRUNC
, ISD::FCEIL
, ISD::FFLOOR
, ISD::FROUND
,
1300 ISD::FROUNDEVEN
, ISD::FRINT
, ISD::FNEARBYINT
},
1303 setCondCodeAction(VFPCCToExpand
, VT
, Expand
);
1305 setOperationAction(ISD::SETCC
, VT
, Custom
);
1306 setOperationAction({ISD::VSELECT
, ISD::SELECT
}, VT
, Custom
);
1307 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1309 setOperationAction(ISD::BITCAST
, VT
, Custom
);
1311 setOperationAction(FloatingPointVecReduceOps
, VT
, Custom
);
1313 setOperationAction(FloatingPointVPOps
, VT
, Custom
);
1315 setOperationAction({ISD::STRICT_FP_EXTEND
, ISD::STRICT_FP_ROUND
}, VT
,
1318 {ISD::STRICT_FADD
, ISD::STRICT_FSUB
, ISD::STRICT_FMUL
,
1319 ISD::STRICT_FDIV
, ISD::STRICT_FSQRT
, ISD::STRICT_FMA
,
1320 ISD::STRICT_FSETCC
, ISD::STRICT_FSETCCS
, ISD::STRICT_FTRUNC
,
1321 ISD::STRICT_FCEIL
, ISD::STRICT_FFLOOR
, ISD::STRICT_FROUND
,
1322 ISD::STRICT_FROUNDEVEN
, ISD::STRICT_FNEARBYINT
},
1326 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1327 setOperationAction(ISD::BITCAST
, {MVT::i8
, MVT::i16
, MVT::i32
, MVT::i64
},
1329 if (Subtarget
.hasStdExtZfhminOrZhinxmin())
1330 setOperationAction(ISD::BITCAST
, MVT::f16
, Custom
);
1331 if (Subtarget
.hasStdExtFOrZfinx())
1332 setOperationAction(ISD::BITCAST
, MVT::f32
, Custom
);
1333 if (Subtarget
.hasStdExtDOrZdinx())
1334 setOperationAction(ISD::BITCAST
, MVT::f64
, Custom
);
1338 if (Subtarget
.hasStdExtA()) {
1339 setOperationAction(ISD::ATOMIC_LOAD_SUB
, XLenVT
, Expand
);
1340 if (RV64LegalI32
&& Subtarget
.is64Bit())
1341 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i32
, Expand
);
1344 if (Subtarget
.hasForcedAtomics()) {
1345 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1347 {ISD::ATOMIC_CMP_SWAP
, ISD::ATOMIC_SWAP
, ISD::ATOMIC_LOAD_ADD
,
1348 ISD::ATOMIC_LOAD_SUB
, ISD::ATOMIC_LOAD_AND
, ISD::ATOMIC_LOAD_OR
,
1349 ISD::ATOMIC_LOAD_XOR
, ISD::ATOMIC_LOAD_NAND
, ISD::ATOMIC_LOAD_MIN
,
1350 ISD::ATOMIC_LOAD_MAX
, ISD::ATOMIC_LOAD_UMIN
, ISD::ATOMIC_LOAD_UMAX
},
1354 if (Subtarget
.hasVendorXTHeadMemIdx()) {
1355 for (unsigned im
: {ISD::PRE_INC
, ISD::POST_INC
}) {
1356 setIndexedLoadAction(im
, MVT::i8
, Legal
);
1357 setIndexedStoreAction(im
, MVT::i8
, Legal
);
1358 setIndexedLoadAction(im
, MVT::i16
, Legal
);
1359 setIndexedStoreAction(im
, MVT::i16
, Legal
);
1360 setIndexedLoadAction(im
, MVT::i32
, Legal
);
1361 setIndexedStoreAction(im
, MVT::i32
, Legal
);
1363 if (Subtarget
.is64Bit()) {
1364 setIndexedLoadAction(im
, MVT::i64
, Legal
);
1365 setIndexedStoreAction(im
, MVT::i64
, Legal
);
1370 // Function alignments.
1371 const Align
FunctionAlignment(Subtarget
.hasStdExtCOrZca() ? 2 : 4);
1372 setMinFunctionAlignment(FunctionAlignment
);
1373 // Set preferred alignments.
1374 setPrefFunctionAlignment(Subtarget
.getPrefFunctionAlignment());
1375 setPrefLoopAlignment(Subtarget
.getPrefLoopAlignment());
1377 setTargetDAGCombine({ISD::INTRINSIC_VOID
, ISD::INTRINSIC_W_CHAIN
,
1378 ISD::INTRINSIC_WO_CHAIN
, ISD::ADD
, ISD::SUB
, ISD::MUL
,
1379 ISD::AND
, ISD::OR
, ISD::XOR
, ISD::SETCC
, ISD::SELECT
});
1380 if (Subtarget
.is64Bit())
1381 setTargetDAGCombine(ISD::SRA
);
1383 if (Subtarget
.hasStdExtFOrZfinx())
1384 setTargetDAGCombine({ISD::FADD
, ISD::FMAXNUM
, ISD::FMINNUM
});
1386 if (Subtarget
.hasStdExtZbb())
1387 setTargetDAGCombine({ISD::UMAX
, ISD::UMIN
, ISD::SMAX
, ISD::SMIN
});
1389 if (Subtarget
.hasStdExtZbs() && Subtarget
.is64Bit())
1390 setTargetDAGCombine(ISD::TRUNCATE
);
1392 if (Subtarget
.hasStdExtZbkb())
1393 setTargetDAGCombine(ISD::BITREVERSE
);
1394 if (Subtarget
.hasStdExtZfhminOrZhinxmin())
1395 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG
);
1396 if (Subtarget
.hasStdExtFOrZfinx())
1397 setTargetDAGCombine({ISD::ZERO_EXTEND
, ISD::FP_TO_SINT
, ISD::FP_TO_UINT
,
1398 ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
});
1399 if (Subtarget
.hasVInstructions())
1400 setTargetDAGCombine({ISD::FCOPYSIGN
, ISD::MGATHER
, ISD::MSCATTER
,
1401 ISD::VP_GATHER
, ISD::VP_SCATTER
, ISD::SRA
, ISD::SRL
,
1402 ISD::SHL
, ISD::STORE
, ISD::SPLAT_VECTOR
,
1403 ISD::BUILD_VECTOR
, ISD::CONCAT_VECTORS
,
1404 ISD::EXPERIMENTAL_VP_REVERSE
, ISD::MUL
,
1405 ISD::INSERT_VECTOR_ELT
});
1406 if (Subtarget
.hasVendorXTHeadMemPair())
1407 setTargetDAGCombine({ISD::LOAD
, ISD::STORE
});
1408 if (Subtarget
.useRVVForFixedLengthVectors())
1409 setTargetDAGCombine(ISD::BITCAST
);
1411 setLibcallName(RTLIB::FPEXT_F16_F32
, "__extendhfsf2");
1412 setLibcallName(RTLIB::FPROUND_F32_F16
, "__truncsfhf2");
1414 // Disable strict node mutation.
1415 IsStrictFPEnabled
= true;
1418 EVT
RISCVTargetLowering::getSetCCResultType(const DataLayout
&DL
,
1419 LLVMContext
&Context
,
1422 return getPointerTy(DL
);
1423 if (Subtarget
.hasVInstructions() &&
1424 (VT
.isScalableVector() || Subtarget
.useRVVForFixedLengthVectors()))
1425 return EVT::getVectorVT(Context
, MVT::i1
, VT
.getVectorElementCount());
1426 return VT
.changeVectorElementTypeToInteger();
1429 MVT
RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1430 return Subtarget
.getXLenVT();
1433 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1434 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT
,
1436 bool IsScalable
) const {
1437 if (!Subtarget
.hasVInstructions())
1443 if (TripCountVT
!= MVT::i32
&& TripCountVT
!= Subtarget
.getXLenVT())
1446 // Don't allow VF=1 if those types are't legal.
1447 if (VF
< RISCV::RVVBitsPerBlock
/ Subtarget
.getELen())
1450 // VLEN=32 support is incomplete.
1451 if (Subtarget
.getRealMinVLen() < RISCV::RVVBitsPerBlock
)
1454 // The maximum VF is for the smallest element width with LMUL=8.
1455 // VF must be a power of 2.
1456 unsigned MaxVF
= (RISCV::RVVBitsPerBlock
/ 8) * 8;
1457 return VF
> MaxVF
|| !isPowerOf2_32(VF
);
1460 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
1462 MachineFunction
&MF
,
1463 unsigned Intrinsic
) const {
1464 auto &DL
= I
.getModule()->getDataLayout();
1466 auto SetRVVLoadStoreInfo
= [&](unsigned PtrOp
, bool IsStore
,
1467 bool IsUnitStrided
) {
1468 Info
.opc
= IsStore
? ISD::INTRINSIC_VOID
: ISD::INTRINSIC_W_CHAIN
;
1469 Info
.ptrVal
= I
.getArgOperand(PtrOp
);
1472 // Store value is the first operand.
1473 MemTy
= I
.getArgOperand(0)->getType();
1475 // Use return type. If it's segment load, return type is a struct.
1476 MemTy
= I
.getType();
1477 if (MemTy
->isStructTy())
1478 MemTy
= MemTy
->getStructElementType(0);
1481 MemTy
= MemTy
->getScalarType();
1483 Info
.memVT
= getValueType(DL
, MemTy
);
1484 Info
.align
= Align(DL
.getTypeSizeInBits(MemTy
->getScalarType()) / 8);
1485 Info
.size
= MemoryLocation::UnknownSize
;
1487 IsStore
? MachineMemOperand::MOStore
: MachineMemOperand::MOLoad
;
1491 if (I
.getMetadata(LLVMContext::MD_nontemporal
) != nullptr)
1492 Info
.flags
|= MachineMemOperand::MONonTemporal
;
1494 Info
.flags
|= RISCVTargetLowering::getTargetMMOFlags(I
);
1495 switch (Intrinsic
) {
1498 case Intrinsic::riscv_masked_atomicrmw_xchg_i32
:
1499 case Intrinsic::riscv_masked_atomicrmw_add_i32
:
1500 case Intrinsic::riscv_masked_atomicrmw_sub_i32
:
1501 case Intrinsic::riscv_masked_atomicrmw_nand_i32
:
1502 case Intrinsic::riscv_masked_atomicrmw_max_i32
:
1503 case Intrinsic::riscv_masked_atomicrmw_min_i32
:
1504 case Intrinsic::riscv_masked_atomicrmw_umax_i32
:
1505 case Intrinsic::riscv_masked_atomicrmw_umin_i32
:
1506 case Intrinsic::riscv_masked_cmpxchg_i32
:
1507 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
1508 Info
.memVT
= MVT::i32
;
1509 Info
.ptrVal
= I
.getArgOperand(0);
1511 Info
.align
= Align(4);
1512 Info
.flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOStore
|
1513 MachineMemOperand::MOVolatile
;
1515 case Intrinsic::riscv_masked_strided_load
:
1516 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1517 /*IsUnitStrided*/ false);
1518 case Intrinsic::riscv_masked_strided_store
:
1519 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1520 /*IsUnitStrided*/ false);
1521 case Intrinsic::riscv_seg2_load
:
1522 case Intrinsic::riscv_seg3_load
:
1523 case Intrinsic::riscv_seg4_load
:
1524 case Intrinsic::riscv_seg5_load
:
1525 case Intrinsic::riscv_seg6_load
:
1526 case Intrinsic::riscv_seg7_load
:
1527 case Intrinsic::riscv_seg8_load
:
1528 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1529 /*IsUnitStrided*/ false);
1530 case Intrinsic::riscv_seg2_store
:
1531 case Intrinsic::riscv_seg3_store
:
1532 case Intrinsic::riscv_seg4_store
:
1533 case Intrinsic::riscv_seg5_store
:
1534 case Intrinsic::riscv_seg6_store
:
1535 case Intrinsic::riscv_seg7_store
:
1536 case Intrinsic::riscv_seg8_store
:
1537 // Operands are (vec, ..., vec, ptr, vl)
1538 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1540 /*IsUnitStrided*/ false);
1541 case Intrinsic::riscv_vle
:
1542 case Intrinsic::riscv_vle_mask
:
1543 case Intrinsic::riscv_vleff
:
1544 case Intrinsic::riscv_vleff_mask
:
1545 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1547 /*IsUnitStrided*/ true);
1548 case Intrinsic::riscv_vse
:
1549 case Intrinsic::riscv_vse_mask
:
1550 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1552 /*IsUnitStrided*/ true);
1553 case Intrinsic::riscv_vlse
:
1554 case Intrinsic::riscv_vlse_mask
:
1555 case Intrinsic::riscv_vloxei
:
1556 case Intrinsic::riscv_vloxei_mask
:
1557 case Intrinsic::riscv_vluxei
:
1558 case Intrinsic::riscv_vluxei_mask
:
1559 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1561 /*IsUnitStrided*/ false);
1562 case Intrinsic::riscv_vsse
:
1563 case Intrinsic::riscv_vsse_mask
:
1564 case Intrinsic::riscv_vsoxei
:
1565 case Intrinsic::riscv_vsoxei_mask
:
1566 case Intrinsic::riscv_vsuxei
:
1567 case Intrinsic::riscv_vsuxei_mask
:
1568 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1570 /*IsUnitStrided*/ false);
1571 case Intrinsic::riscv_vlseg2
:
1572 case Intrinsic::riscv_vlseg3
:
1573 case Intrinsic::riscv_vlseg4
:
1574 case Intrinsic::riscv_vlseg5
:
1575 case Intrinsic::riscv_vlseg6
:
1576 case Intrinsic::riscv_vlseg7
:
1577 case Intrinsic::riscv_vlseg8
:
1578 case Intrinsic::riscv_vlseg2ff
:
1579 case Intrinsic::riscv_vlseg3ff
:
1580 case Intrinsic::riscv_vlseg4ff
:
1581 case Intrinsic::riscv_vlseg5ff
:
1582 case Intrinsic::riscv_vlseg6ff
:
1583 case Intrinsic::riscv_vlseg7ff
:
1584 case Intrinsic::riscv_vlseg8ff
:
1585 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1587 /*IsUnitStrided*/ false);
1588 case Intrinsic::riscv_vlseg2_mask
:
1589 case Intrinsic::riscv_vlseg3_mask
:
1590 case Intrinsic::riscv_vlseg4_mask
:
1591 case Intrinsic::riscv_vlseg5_mask
:
1592 case Intrinsic::riscv_vlseg6_mask
:
1593 case Intrinsic::riscv_vlseg7_mask
:
1594 case Intrinsic::riscv_vlseg8_mask
:
1595 case Intrinsic::riscv_vlseg2ff_mask
:
1596 case Intrinsic::riscv_vlseg3ff_mask
:
1597 case Intrinsic::riscv_vlseg4ff_mask
:
1598 case Intrinsic::riscv_vlseg5ff_mask
:
1599 case Intrinsic::riscv_vlseg6ff_mask
:
1600 case Intrinsic::riscv_vlseg7ff_mask
:
1601 case Intrinsic::riscv_vlseg8ff_mask
:
1602 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 4,
1604 /*IsUnitStrided*/ false);
1605 case Intrinsic::riscv_vlsseg2
:
1606 case Intrinsic::riscv_vlsseg3
:
1607 case Intrinsic::riscv_vlsseg4
:
1608 case Intrinsic::riscv_vlsseg5
:
1609 case Intrinsic::riscv_vlsseg6
:
1610 case Intrinsic::riscv_vlsseg7
:
1611 case Intrinsic::riscv_vlsseg8
:
1612 case Intrinsic::riscv_vloxseg2
:
1613 case Intrinsic::riscv_vloxseg3
:
1614 case Intrinsic::riscv_vloxseg4
:
1615 case Intrinsic::riscv_vloxseg5
:
1616 case Intrinsic::riscv_vloxseg6
:
1617 case Intrinsic::riscv_vloxseg7
:
1618 case Intrinsic::riscv_vloxseg8
:
1619 case Intrinsic::riscv_vluxseg2
:
1620 case Intrinsic::riscv_vluxseg3
:
1621 case Intrinsic::riscv_vluxseg4
:
1622 case Intrinsic::riscv_vluxseg5
:
1623 case Intrinsic::riscv_vluxseg6
:
1624 case Intrinsic::riscv_vluxseg7
:
1625 case Intrinsic::riscv_vluxseg8
:
1626 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1628 /*IsUnitStrided*/ false);
1629 case Intrinsic::riscv_vlsseg2_mask
:
1630 case Intrinsic::riscv_vlsseg3_mask
:
1631 case Intrinsic::riscv_vlsseg4_mask
:
1632 case Intrinsic::riscv_vlsseg5_mask
:
1633 case Intrinsic::riscv_vlsseg6_mask
:
1634 case Intrinsic::riscv_vlsseg7_mask
:
1635 case Intrinsic::riscv_vlsseg8_mask
:
1636 case Intrinsic::riscv_vloxseg2_mask
:
1637 case Intrinsic::riscv_vloxseg3_mask
:
1638 case Intrinsic::riscv_vloxseg4_mask
:
1639 case Intrinsic::riscv_vloxseg5_mask
:
1640 case Intrinsic::riscv_vloxseg6_mask
:
1641 case Intrinsic::riscv_vloxseg7_mask
:
1642 case Intrinsic::riscv_vloxseg8_mask
:
1643 case Intrinsic::riscv_vluxseg2_mask
:
1644 case Intrinsic::riscv_vluxseg3_mask
:
1645 case Intrinsic::riscv_vluxseg4_mask
:
1646 case Intrinsic::riscv_vluxseg5_mask
:
1647 case Intrinsic::riscv_vluxseg6_mask
:
1648 case Intrinsic::riscv_vluxseg7_mask
:
1649 case Intrinsic::riscv_vluxseg8_mask
:
1650 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 5,
1652 /*IsUnitStrided*/ false);
1653 case Intrinsic::riscv_vsseg2
:
1654 case Intrinsic::riscv_vsseg3
:
1655 case Intrinsic::riscv_vsseg4
:
1656 case Intrinsic::riscv_vsseg5
:
1657 case Intrinsic::riscv_vsseg6
:
1658 case Intrinsic::riscv_vsseg7
:
1659 case Intrinsic::riscv_vsseg8
:
1660 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 2,
1662 /*IsUnitStrided*/ false);
1663 case Intrinsic::riscv_vsseg2_mask
:
1664 case Intrinsic::riscv_vsseg3_mask
:
1665 case Intrinsic::riscv_vsseg4_mask
:
1666 case Intrinsic::riscv_vsseg5_mask
:
1667 case Intrinsic::riscv_vsseg6_mask
:
1668 case Intrinsic::riscv_vsseg7_mask
:
1669 case Intrinsic::riscv_vsseg8_mask
:
1670 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1672 /*IsUnitStrided*/ false);
1673 case Intrinsic::riscv_vssseg2
:
1674 case Intrinsic::riscv_vssseg3
:
1675 case Intrinsic::riscv_vssseg4
:
1676 case Intrinsic::riscv_vssseg5
:
1677 case Intrinsic::riscv_vssseg6
:
1678 case Intrinsic::riscv_vssseg7
:
1679 case Intrinsic::riscv_vssseg8
:
1680 case Intrinsic::riscv_vsoxseg2
:
1681 case Intrinsic::riscv_vsoxseg3
:
1682 case Intrinsic::riscv_vsoxseg4
:
1683 case Intrinsic::riscv_vsoxseg5
:
1684 case Intrinsic::riscv_vsoxseg6
:
1685 case Intrinsic::riscv_vsoxseg7
:
1686 case Intrinsic::riscv_vsoxseg8
:
1687 case Intrinsic::riscv_vsuxseg2
:
1688 case Intrinsic::riscv_vsuxseg3
:
1689 case Intrinsic::riscv_vsuxseg4
:
1690 case Intrinsic::riscv_vsuxseg5
:
1691 case Intrinsic::riscv_vsuxseg6
:
1692 case Intrinsic::riscv_vsuxseg7
:
1693 case Intrinsic::riscv_vsuxseg8
:
1694 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 3,
1696 /*IsUnitStrided*/ false);
1697 case Intrinsic::riscv_vssseg2_mask
:
1698 case Intrinsic::riscv_vssseg3_mask
:
1699 case Intrinsic::riscv_vssseg4_mask
:
1700 case Intrinsic::riscv_vssseg5_mask
:
1701 case Intrinsic::riscv_vssseg6_mask
:
1702 case Intrinsic::riscv_vssseg7_mask
:
1703 case Intrinsic::riscv_vssseg8_mask
:
1704 case Intrinsic::riscv_vsoxseg2_mask
:
1705 case Intrinsic::riscv_vsoxseg3_mask
:
1706 case Intrinsic::riscv_vsoxseg4_mask
:
1707 case Intrinsic::riscv_vsoxseg5_mask
:
1708 case Intrinsic::riscv_vsoxseg6_mask
:
1709 case Intrinsic::riscv_vsoxseg7_mask
:
1710 case Intrinsic::riscv_vsoxseg8_mask
:
1711 case Intrinsic::riscv_vsuxseg2_mask
:
1712 case Intrinsic::riscv_vsuxseg3_mask
:
1713 case Intrinsic::riscv_vsuxseg4_mask
:
1714 case Intrinsic::riscv_vsuxseg5_mask
:
1715 case Intrinsic::riscv_vsuxseg6_mask
:
1716 case Intrinsic::riscv_vsuxseg7_mask
:
1717 case Intrinsic::riscv_vsuxseg8_mask
:
1718 return SetRVVLoadStoreInfo(/*PtrOp*/ I
.arg_size() - 4,
1720 /*IsUnitStrided*/ false);
1724 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
1725 const AddrMode
&AM
, Type
*Ty
,
1727 Instruction
*I
) const {
1728 // No global is ever allowed as a base.
1732 // RVV instructions only support register addressing.
1733 if (Subtarget
.hasVInstructions() && isa
<VectorType
>(Ty
))
1734 return AM
.HasBaseReg
&& AM
.Scale
== 0 && !AM
.BaseOffs
;
1736 // Require a 12-bit signed offset.
1737 if (!isInt
<12>(AM
.BaseOffs
))
1741 case 0: // "r+i" or just "i", depending on HasBaseReg.
1744 if (!AM
.HasBaseReg
) // allow "r+i".
1746 return false; // disallow "r+r" or "r+r+i".
1754 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
1755 return isInt
<12>(Imm
);
1758 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
1759 return isInt
<12>(Imm
);
1762 // On RV32, 64-bit integers are split into their high and low parts and held
1763 // in two different registers, so the trunc is free since the low register can
1765 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1767 bool RISCVTargetLowering::isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const {
1768 if (Subtarget
.is64Bit() || !SrcTy
->isIntegerTy() || !DstTy
->isIntegerTy())
1770 unsigned SrcBits
= SrcTy
->getPrimitiveSizeInBits();
1771 unsigned DestBits
= DstTy
->getPrimitiveSizeInBits();
1772 return (SrcBits
== 64 && DestBits
== 32);
1775 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT
, EVT DstVT
) const {
1776 // We consider i64->i32 free on RV64 since we have good selection of W
1777 // instructions that make promoting operations back to i64 free in many cases.
1778 if (SrcVT
.isVector() || DstVT
.isVector() || !SrcVT
.isInteger() ||
1781 unsigned SrcBits
= SrcVT
.getSizeInBits();
1782 unsigned DestBits
= DstVT
.getSizeInBits();
1783 return (SrcBits
== 64 && DestBits
== 32);
1786 bool RISCVTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
1787 // Zexts are free if they can be combined with a load.
1788 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1789 // poorly with type legalization of compares preferring sext.
1790 if (auto *LD
= dyn_cast
<LoadSDNode
>(Val
)) {
1791 EVT MemVT
= LD
->getMemoryVT();
1792 if ((MemVT
== MVT::i8
|| MemVT
== MVT::i16
) &&
1793 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
1794 LD
->getExtensionType() == ISD::ZEXTLOAD
))
1798 return TargetLowering::isZExtFree(Val
, VT2
);
1801 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT
, EVT DstVT
) const {
1802 return Subtarget
.is64Bit() && SrcVT
== MVT::i32
&& DstVT
== MVT::i64
;
1805 bool RISCVTargetLowering::signExtendConstant(const ConstantInt
*CI
) const {
1806 return Subtarget
.is64Bit() && CI
->getType()->isIntegerTy(32);
1809 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type
*Ty
) const {
1810 return Subtarget
.hasStdExtZbb() || Subtarget
.hasVendorXCVbitmanip();
1813 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type
*Ty
) const {
1814 return Subtarget
.hasStdExtZbb() || Subtarget
.hasVendorXTHeadBb() ||
1815 Subtarget
.hasVendorXCVbitmanip();
1818 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1819 const Instruction
&AndI
) const {
1820 // We expect to be able to match a bit extraction instruction if the Zbs
1821 // extension is supported and the mask is a power of two. However, we
1822 // conservatively return false if the mask would fit in an ANDI instruction,
1823 // on the basis that it's possible the sinking+duplication of the AND in
1824 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1825 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1826 if (!Subtarget
.hasStdExtZbs() && !Subtarget
.hasVendorXTHeadBs())
1828 ConstantInt
*Mask
= dyn_cast
<ConstantInt
>(AndI
.getOperand(1));
1831 return !Mask
->getValue().isSignedIntN(12) && Mask
->getValue().isPowerOf2();
1834 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y
) const {
1835 EVT VT
= Y
.getValueType();
1837 // FIXME: Support vectors once we have tests.
1841 return (Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) &&
1842 !isa
<ConstantSDNode
>(Y
);
1845 bool RISCVTargetLowering::hasBitTest(SDValue X
, SDValue Y
) const {
1846 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1847 if (Subtarget
.hasStdExtZbs())
1848 return X
.getValueType().isScalarInteger();
1849 auto *C
= dyn_cast
<ConstantSDNode
>(Y
);
1850 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1851 if (Subtarget
.hasVendorXTHeadBs())
1852 return C
!= nullptr;
1853 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1854 return C
&& C
->getAPIntValue().ule(10);
1857 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode
,
1859 // Only enable for rvv.
1860 if (!VT
.isVector() || !Subtarget
.hasVInstructions())
1863 if (VT
.isFixedLengthVector() && !isTypeLegal(VT
))
1869 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
1871 assert(Ty
->isIntegerTy());
1873 unsigned BitSize
= Ty
->getIntegerBitWidth();
1874 if (BitSize
> Subtarget
.getXLen())
1877 // Fast path, assume 32-bit immediates are cheap.
1878 int64_t Val
= Imm
.getSExtValue();
1882 // A constant pool entry may be more aligned thant he load we're trying to
1883 // replace. If we don't support unaligned scalar mem, prefer the constant
1885 // TODO: Can the caller pass down the alignment?
1886 if (!Subtarget
.hasFastUnalignedAccess())
1889 // Prefer to keep the load if it would require many instructions.
1890 // This uses the same threshold we use for constant pools but doesn't
1891 // check useConstantPoolForLargeInts.
1892 // TODO: Should we keep the load only when we're definitely going to emit a
1895 RISCVMatInt::InstSeq Seq
= RISCVMatInt::generateInstSeq(Val
, Subtarget
);
1896 return Seq
.size() <= Subtarget
.getMaxBuildIntsCost();
1899 bool RISCVTargetLowering::
1900 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1901 SDValue X
, ConstantSDNode
*XC
, ConstantSDNode
*CC
, SDValue Y
,
1902 unsigned OldShiftOpcode
, unsigned NewShiftOpcode
,
1903 SelectionDAG
&DAG
) const {
1904 // One interesting pattern that we'd want to form is 'bit extract':
1905 // ((1 >> Y) & 1) ==/!= 0
1906 // But we also need to be careful not to try to reverse that fold.
1908 // Is this '((1 >> Y) & 1)'?
1909 if (XC
&& OldShiftOpcode
== ISD::SRL
&& XC
->isOne())
1910 return false; // Keep the 'bit extract' pattern.
1912 // Will this be '((1 >> Y) & 1)' after the transform?
1913 if (NewShiftOpcode
== ISD::SRL
&& CC
->isOne())
1914 return true; // Do form the 'bit extract' pattern.
1916 // If 'X' is a constant, and we transform, then we will immediately
1917 // try to undo the fold, thus causing endless combine loop.
1918 // So only do the transform if X is not a constant. This matches the default
1919 // implementation of this function.
1923 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode
, int Operand
) const {
1925 case Instruction::Add
:
1926 case Instruction::Sub
:
1927 case Instruction::Mul
:
1928 case Instruction::And
:
1929 case Instruction::Or
:
1930 case Instruction::Xor
:
1931 case Instruction::FAdd
:
1932 case Instruction::FSub
:
1933 case Instruction::FMul
:
1934 case Instruction::FDiv
:
1935 case Instruction::ICmp
:
1936 case Instruction::FCmp
:
1938 case Instruction::Shl
:
1939 case Instruction::LShr
:
1940 case Instruction::AShr
:
1941 case Instruction::UDiv
:
1942 case Instruction::SDiv
:
1943 case Instruction::URem
:
1944 case Instruction::SRem
:
1945 return Operand
== 1;
1952 bool RISCVTargetLowering::canSplatOperand(Instruction
*I
, int Operand
) const {
1953 if (!I
->getType()->isVectorTy() || !Subtarget
.hasVInstructions())
1956 if (canSplatOperand(I
->getOpcode(), Operand
))
1959 auto *II
= dyn_cast
<IntrinsicInst
>(I
);
1963 switch (II
->getIntrinsicID()) {
1964 case Intrinsic::fma
:
1965 case Intrinsic::vp_fma
:
1966 return Operand
== 0 || Operand
== 1;
1967 case Intrinsic::vp_shl
:
1968 case Intrinsic::vp_lshr
:
1969 case Intrinsic::vp_ashr
:
1970 case Intrinsic::vp_udiv
:
1971 case Intrinsic::vp_sdiv
:
1972 case Intrinsic::vp_urem
:
1973 case Intrinsic::vp_srem
:
1974 return Operand
== 1;
1975 // These intrinsics are commutative.
1976 case Intrinsic::vp_add
:
1977 case Intrinsic::vp_mul
:
1978 case Intrinsic::vp_and
:
1979 case Intrinsic::vp_or
:
1980 case Intrinsic::vp_xor
:
1981 case Intrinsic::vp_fadd
:
1982 case Intrinsic::vp_fmul
:
1983 case Intrinsic::vp_icmp
:
1984 case Intrinsic::vp_fcmp
:
1985 // These intrinsics have 'vr' versions.
1986 case Intrinsic::vp_sub
:
1987 case Intrinsic::vp_fsub
:
1988 case Intrinsic::vp_fdiv
:
1989 return Operand
== 0 || Operand
== 1;
1995 /// Check if sinking \p I's operands to I's basic block is profitable, because
1996 /// the operands can be folded into a target instruction, e.g.
1997 /// splats of scalars can fold into vector instructions.
1998 bool RISCVTargetLowering::shouldSinkOperands(
1999 Instruction
*I
, SmallVectorImpl
<Use
*> &Ops
) const {
2000 using namespace llvm::PatternMatch
;
2002 if (!I
->getType()->isVectorTy() || !Subtarget
.hasVInstructions())
2005 for (auto OpIdx
: enumerate(I
->operands())) {
2006 if (!canSplatOperand(I
, OpIdx
.index()))
2009 Instruction
*Op
= dyn_cast
<Instruction
>(OpIdx
.value().get());
2010 // Make sure we are not already sinking this operand
2011 if (!Op
|| any_of(Ops
, [&](Use
*U
) { return U
->get() == Op
; }))
2014 // We are looking for a splat that can be sunk.
2015 if (!match(Op
, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
2016 m_Undef(), m_ZeroMask())))
2019 // Don't sink i1 splats.
2020 if (cast
<VectorType
>(Op
->getType())->getElementType()->isIntegerTy(1))
2023 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2024 // and vector registers
2025 for (Use
&U
: Op
->uses()) {
2026 Instruction
*Insn
= cast
<Instruction
>(U
.getUser());
2027 if (!canSplatOperand(Insn
, U
.getOperandNo()))
2031 Ops
.push_back(&Op
->getOperandUse(0));
2032 Ops
.push_back(&OpIdx
.value());
2037 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp
) const {
2038 unsigned Opc
= VecOp
.getOpcode();
2040 // Assume target opcodes can't be scalarized.
2041 // TODO - do we have any exceptions?
2042 if (Opc
>= ISD::BUILTIN_OP_END
)
2045 // If the vector op is not supported, try to convert to scalar.
2046 EVT VecVT
= VecOp
.getValueType();
2047 if (!isOperationLegalOrCustomOrPromote(Opc
, VecVT
))
2050 // If the vector op is supported, but the scalar op is not, the transform may
2051 // not be worthwhile.
2052 // Permit a vector binary operation can be converted to scalar binary
2053 // operation which is custom lowered with illegal type.
2054 EVT ScalarVT
= VecVT
.getScalarType();
2055 return isOperationLegalOrCustomOrPromote(Opc
, ScalarVT
) ||
2056 isOperationCustom(Opc
, ScalarVT
);
2059 bool RISCVTargetLowering::isOffsetFoldingLegal(
2060 const GlobalAddressSDNode
*GA
) const {
2061 // In order to maximise the opportunity for common subexpression elimination,
2062 // keep a separate ADD node for the global address offset instead of folding
2063 // it in the global address node. Later peephole optimisations may choose to
2064 // fold it back in when profitable.
2068 // Return one of the followings:
2069 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2070 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2071 // positive counterpart, which will be materialized from the first returned
2072 // element. The second returned element indicated that there should be a FNEG
2074 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2075 std::pair
<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat
&Imm
,
2077 if (!Subtarget
.hasStdExtZfa())
2078 return std::make_pair(-1, false);
2080 bool IsSupportedVT
= false;
2081 if (VT
== MVT::f16
) {
2082 IsSupportedVT
= Subtarget
.hasStdExtZfh() || Subtarget
.hasStdExtZvfh();
2083 } else if (VT
== MVT::f32
) {
2084 IsSupportedVT
= true;
2085 } else if (VT
== MVT::f64
) {
2086 assert(Subtarget
.hasStdExtD() && "Expect D extension");
2087 IsSupportedVT
= true;
2091 return std::make_pair(-1, false);
2093 int Index
= RISCVLoadFPImm::getLoadFPImm(Imm
);
2094 if (Index
< 0 && Imm
.isNegative())
2095 // Try the combination of its positive counterpart + FNEG.
2096 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm
), true);
2098 return std::make_pair(Index
, false);
2101 bool RISCVTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
2102 bool ForCodeSize
) const {
2103 bool IsLegalVT
= false;
2105 IsLegalVT
= Subtarget
.hasStdExtZfhminOrZhinxmin();
2106 else if (VT
== MVT::f32
)
2107 IsLegalVT
= Subtarget
.hasStdExtFOrZfinx();
2108 else if (VT
== MVT::f64
)
2109 IsLegalVT
= Subtarget
.hasStdExtDOrZdinx();
2110 else if (VT
== MVT::bf16
)
2111 IsLegalVT
= Subtarget
.hasStdExtZfbfmin();
2116 if (getLegalZfaFPImm(Imm
, VT
).first
>= 0)
2119 // Cannot create a 64 bit floating-point immediate value for rv32.
2120 if (Subtarget
.getXLen() < VT
.getScalarSizeInBits()) {
2121 // td can handle +0.0 or -0.0 already.
2122 // -0.0 can be created by fmv + fneg.
2123 return Imm
.isZero();
2126 // Special case: fmv + fneg
2127 if (Imm
.isNegZero())
2130 // Building an integer and then converting requires a fmv at the end of
2131 // the integer sequence.
2133 1 + RISCVMatInt::getIntMatCost(Imm
.bitcastToAPInt(), Subtarget
.getXLen(),
2135 return Cost
<= FPImmCost
;
2138 // TODO: This is very conservative.
2139 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT
, EVT SrcVT
,
2140 unsigned Index
) const {
2141 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR
, ResVT
))
2144 // Only support extracting a fixed from a fixed vector for now.
2145 if (ResVT
.isScalableVector() || SrcVT
.isScalableVector())
2148 unsigned ResElts
= ResVT
.getVectorNumElements();
2149 unsigned SrcElts
= SrcVT
.getVectorNumElements();
2151 // Convervatively only handle extracting half of a vector.
2152 // TODO: Relax this.
2153 if ((ResElts
* 2) != SrcElts
)
2156 // The smallest type we can slide is i8.
2157 // TODO: We can extract index 0 from a mask vector without a slide.
2158 if (ResVT
.getVectorElementType() == MVT::i1
)
2161 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2166 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2167 // the upper half of a vector until we have more test coverage.
2168 return Index
== 0 || Index
== ResElts
;
2171 MVT
RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext
&Context
,
2174 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2175 // We might still end up using a GPR but that will be decided based on ABI.
2176 if (VT
== MVT::f16
&& Subtarget
.hasStdExtFOrZfinx() &&
2177 !Subtarget
.hasStdExtZfhminOrZhinxmin())
2180 MVT PartVT
= TargetLowering::getRegisterTypeForCallingConv(Context
, CC
, VT
);
2182 if (RV64LegalI32
&& Subtarget
.is64Bit() && PartVT
== MVT::i32
)
2188 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext
&Context
,
2191 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2192 // We might still end up using a GPR but that will be decided based on ABI.
2193 if (VT
== MVT::f16
&& Subtarget
.hasStdExtFOrZfinx() &&
2194 !Subtarget
.hasStdExtZfhminOrZhinxmin())
2197 return TargetLowering::getNumRegistersForCallingConv(Context
, CC
, VT
);
2200 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2201 LLVMContext
&Context
, CallingConv::ID CC
, EVT VT
, EVT
&IntermediateVT
,
2202 unsigned &NumIntermediates
, MVT
&RegisterVT
) const {
2203 unsigned NumRegs
= TargetLowering::getVectorTypeBreakdownForCallingConv(
2204 Context
, CC
, VT
, IntermediateVT
, NumIntermediates
, RegisterVT
);
2206 if (RV64LegalI32
&& Subtarget
.is64Bit() && IntermediateVT
== MVT::i32
)
2207 IntermediateVT
= MVT::i64
;
2209 if (RV64LegalI32
&& Subtarget
.is64Bit() && RegisterVT
== MVT::i32
)
2210 RegisterVT
= MVT::i64
;
2215 // Changes the condition code and swaps operands if necessary, so the SetCC
2216 // operation matches one of the comparisons supported directly by branches
2217 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2219 static void translateSetCCForBranch(const SDLoc
&DL
, SDValue
&LHS
, SDValue
&RHS
,
2220 ISD::CondCode
&CC
, SelectionDAG
&DAG
) {
2221 // If this is a single bit test that can't be handled by ANDI, shift the
2222 // bit to be tested to the MSB and perform a signed compare with 0.
2223 if (isIntEqualitySetCC(CC
) && isNullConstant(RHS
) &&
2224 LHS
.getOpcode() == ISD::AND
&& LHS
.hasOneUse() &&
2225 isa
<ConstantSDNode
>(LHS
.getOperand(1))) {
2226 uint64_t Mask
= LHS
.getConstantOperandVal(1);
2227 if ((isPowerOf2_64(Mask
) || isMask_64(Mask
)) && !isInt
<12>(Mask
)) {
2229 if (isPowerOf2_64(Mask
)) {
2230 CC
= CC
== ISD::SETEQ
? ISD::SETGE
: ISD::SETLT
;
2231 ShAmt
= LHS
.getValueSizeInBits() - 1 - Log2_64(Mask
);
2233 ShAmt
= LHS
.getValueSizeInBits() - llvm::bit_width(Mask
);
2236 LHS
= LHS
.getOperand(0);
2238 LHS
= DAG
.getNode(ISD::SHL
, DL
, LHS
.getValueType(), LHS
,
2239 DAG
.getConstant(ShAmt
, DL
, LHS
.getValueType()));
2244 if (auto *RHSC
= dyn_cast
<ConstantSDNode
>(RHS
)) {
2245 int64_t C
= RHSC
->getSExtValue();
2249 // Convert X > -1 to X >= 0.
2251 RHS
= DAG
.getConstant(0, DL
, RHS
.getValueType());
2257 // Convert X < 1 to 0 >= X.
2260 LHS
= DAG
.getConstant(0, DL
, RHS
.getValueType());
2275 CC
= ISD::getSetCCSwappedOperands(CC
);
2276 std::swap(LHS
, RHS
);
2281 RISCVII::VLMUL
RISCVTargetLowering::getLMUL(MVT VT
) {
2282 assert(VT
.isScalableVector() && "Expecting a scalable vector type");
2283 unsigned KnownSize
= VT
.getSizeInBits().getKnownMinValue();
2284 if (VT
.getVectorElementType() == MVT::i1
)
2287 switch (KnownSize
) {
2289 llvm_unreachable("Invalid LMUL.");
2291 return RISCVII::VLMUL::LMUL_F8
;
2293 return RISCVII::VLMUL::LMUL_F4
;
2295 return RISCVII::VLMUL::LMUL_F2
;
2297 return RISCVII::VLMUL::LMUL_1
;
2299 return RISCVII::VLMUL::LMUL_2
;
2301 return RISCVII::VLMUL::LMUL_4
;
2303 return RISCVII::VLMUL::LMUL_8
;
2307 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul
) {
2310 llvm_unreachable("Invalid LMUL.");
2311 case RISCVII::VLMUL::LMUL_F8
:
2312 case RISCVII::VLMUL::LMUL_F4
:
2313 case RISCVII::VLMUL::LMUL_F2
:
2314 case RISCVII::VLMUL::LMUL_1
:
2315 return RISCV::VRRegClassID
;
2316 case RISCVII::VLMUL::LMUL_2
:
2317 return RISCV::VRM2RegClassID
;
2318 case RISCVII::VLMUL::LMUL_4
:
2319 return RISCV::VRM4RegClassID
;
2320 case RISCVII::VLMUL::LMUL_8
:
2321 return RISCV::VRM8RegClassID
;
2325 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT
, unsigned Index
) {
2326 RISCVII::VLMUL LMUL
= getLMUL(VT
);
2327 if (LMUL
== RISCVII::VLMUL::LMUL_F8
||
2328 LMUL
== RISCVII::VLMUL::LMUL_F4
||
2329 LMUL
== RISCVII::VLMUL::LMUL_F2
||
2330 LMUL
== RISCVII::VLMUL::LMUL_1
) {
2331 static_assert(RISCV::sub_vrm1_7
== RISCV::sub_vrm1_0
+ 7,
2332 "Unexpected subreg numbering");
2333 return RISCV::sub_vrm1_0
+ Index
;
2335 if (LMUL
== RISCVII::VLMUL::LMUL_2
) {
2336 static_assert(RISCV::sub_vrm2_3
== RISCV::sub_vrm2_0
+ 3,
2337 "Unexpected subreg numbering");
2338 return RISCV::sub_vrm2_0
+ Index
;
2340 if (LMUL
== RISCVII::VLMUL::LMUL_4
) {
2341 static_assert(RISCV::sub_vrm4_1
== RISCV::sub_vrm4_0
+ 1,
2342 "Unexpected subreg numbering");
2343 return RISCV::sub_vrm4_0
+ Index
;
2345 llvm_unreachable("Invalid vector type.");
2348 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT
) {
2349 if (VT
.getVectorElementType() == MVT::i1
)
2350 return RISCV::VRRegClassID
;
2351 return getRegClassIDForLMUL(getLMUL(VT
));
2354 // Attempt to decompose a subvector insert/extract between VecVT and
2355 // SubVecVT via subregister indices. Returns the subregister index that
2356 // can perform the subvector insert/extract with the given element index, as
2357 // well as the index corresponding to any leftover subvectors that must be
2358 // further inserted/extracted within the register class for SubVecVT.
2359 std::pair
<unsigned, unsigned>
2360 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2361 MVT VecVT
, MVT SubVecVT
, unsigned InsertExtractIdx
,
2362 const RISCVRegisterInfo
*TRI
) {
2363 static_assert((RISCV::VRM8RegClassID
> RISCV::VRM4RegClassID
&&
2364 RISCV::VRM4RegClassID
> RISCV::VRM2RegClassID
&&
2365 RISCV::VRM2RegClassID
> RISCV::VRRegClassID
),
2366 "Register classes not ordered");
2367 unsigned VecRegClassID
= getRegClassIDForVecVT(VecVT
);
2368 unsigned SubRegClassID
= getRegClassIDForVecVT(SubVecVT
);
2369 // Try to compose a subregister index that takes us from the incoming
2370 // LMUL>1 register class down to the outgoing one. At each step we half
2372 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2373 // Note that this is not guaranteed to find a subregister index, such as
2374 // when we are extracting from one VR type to another.
2375 unsigned SubRegIdx
= RISCV::NoSubRegister
;
2376 for (const unsigned RCID
:
2377 {RISCV::VRM4RegClassID
, RISCV::VRM2RegClassID
, RISCV::VRRegClassID
})
2378 if (VecRegClassID
> RCID
&& SubRegClassID
<= RCID
) {
2379 VecVT
= VecVT
.getHalfNumVectorElementsVT();
2381 InsertExtractIdx
>= VecVT
.getVectorElementCount().getKnownMinValue();
2382 SubRegIdx
= TRI
->composeSubRegIndices(SubRegIdx
,
2383 getSubregIndexByMVT(VecVT
, IsHi
));
2385 InsertExtractIdx
-= VecVT
.getVectorElementCount().getKnownMinValue();
2387 return {SubRegIdx
, InsertExtractIdx
};
2390 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2391 // stores for those types.
2392 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT
) const {
2393 return !Subtarget
.useRVVForFixedLengthVectors() ||
2394 (VT
.isFixedLengthVector() && VT
.getVectorElementType() == MVT::i1
);
2397 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy
) const {
2398 if (!ScalarTy
.isSimple())
2400 switch (ScalarTy
.getSimpleVT().SimpleTy
) {
2402 return Subtarget
.is64Bit() ? Subtarget
.hasVInstructionsI64() : true;
2408 return Subtarget
.hasVInstructionsI64();
2410 return Subtarget
.hasVInstructionsF16();
2412 return Subtarget
.hasVInstructionsF32();
2414 return Subtarget
.hasVInstructionsF64();
2421 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2422 return NumRepeatedDivisors
;
2425 static SDValue
getVLOperand(SDValue Op
) {
2426 assert((Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2427 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
) &&
2428 "Unexpected opcode");
2429 bool HasChain
= Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
;
2430 unsigned IntNo
= Op
.getConstantOperandVal(HasChain
? 1 : 0);
2431 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo
*II
=
2432 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo
);
2435 return Op
.getOperand(II
->VLOperand
+ 1 + HasChain
);
2438 static bool useRVVForFixedLengthVectorVT(MVT VT
,
2439 const RISCVSubtarget
&Subtarget
) {
2440 assert(VT
.isFixedLengthVector() && "Expected a fixed length vector type!");
2441 if (!Subtarget
.useRVVForFixedLengthVectors())
2444 // We only support a set of vector types with a consistent maximum fixed size
2445 // across all supported vector element types to avoid legalization issues.
2446 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2447 // fixed-length vector type we support is 1024 bytes.
2448 if (VT
.getFixedSizeInBits() > 1024 * 8)
2451 unsigned MinVLen
= Subtarget
.getRealMinVLen();
2453 MVT EltVT
= VT
.getVectorElementType();
2455 // Don't use RVV for vectors we cannot scalarize if required.
2456 switch (EltVT
.SimpleTy
) {
2457 // i1 is supported but has different rules.
2461 // Masks can only use a single register.
2462 if (VT
.getVectorNumElements() > MinVLen
)
2471 if (!Subtarget
.hasVInstructionsI64())
2475 if (!Subtarget
.hasVInstructionsF16Minimal())
2479 if (!Subtarget
.hasVInstructionsF32())
2483 if (!Subtarget
.hasVInstructionsF64())
2488 // Reject elements larger than ELEN.
2489 if (EltVT
.getSizeInBits() > Subtarget
.getELen())
2492 unsigned LMul
= divideCeil(VT
.getSizeInBits(), MinVLen
);
2493 // Don't use RVV for types that don't fit.
2494 if (LMul
> Subtarget
.getMaxLMULForFixedLengthVectors())
2497 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2498 // the base fixed length RVV support in place.
2499 if (!VT
.isPow2VectorType())
2505 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT
) const {
2506 return ::useRVVForFixedLengthVectorVT(VT
, Subtarget
);
2509 // Return the largest legal scalable vector type that matches VT's element type.
2510 static MVT
getContainerForFixedLengthVector(const TargetLowering
&TLI
, MVT VT
,
2511 const RISCVSubtarget
&Subtarget
) {
2512 // This may be called before legal types are setup.
2513 assert(((VT
.isFixedLengthVector() && TLI
.isTypeLegal(VT
)) ||
2514 useRVVForFixedLengthVectorVT(VT
, Subtarget
)) &&
2515 "Expected legal fixed length vector!");
2517 unsigned MinVLen
= Subtarget
.getRealMinVLen();
2518 unsigned MaxELen
= Subtarget
.getELen();
2520 MVT EltVT
= VT
.getVectorElementType();
2521 switch (EltVT
.SimpleTy
) {
2523 llvm_unreachable("unexpected element type for RVV container");
2532 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2533 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2534 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2536 (VT
.getVectorNumElements() * RISCV::RVVBitsPerBlock
) / MinVLen
;
2537 NumElts
= std::max(NumElts
, RISCV::RVVBitsPerBlock
/ MaxELen
);
2538 assert(isPowerOf2_32(NumElts
) && "Expected power of 2 NumElts");
2539 return MVT::getScalableVectorVT(EltVT
, NumElts
);
2544 static MVT
getContainerForFixedLengthVector(SelectionDAG
&DAG
, MVT VT
,
2545 const RISCVSubtarget
&Subtarget
) {
2546 return getContainerForFixedLengthVector(DAG
.getTargetLoweringInfo(), VT
,
2550 MVT
RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT
) const {
2551 return ::getContainerForFixedLengthVector(*this, VT
, getSubtarget());
2554 // Grow V to consume an entire RVV register.
2555 static SDValue
convertToScalableVector(EVT VT
, SDValue V
, SelectionDAG
&DAG
,
2556 const RISCVSubtarget
&Subtarget
) {
2557 assert(VT
.isScalableVector() &&
2558 "Expected to convert into a scalable vector!");
2559 assert(V
.getValueType().isFixedLengthVector() &&
2560 "Expected a fixed length vector operand!");
2562 SDValue Zero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
2563 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, DAG
.getUNDEF(VT
), V
, Zero
);
2566 // Shrink V so it's just big enough to maintain a VT's worth of data.
2567 static SDValue
convertFromScalableVector(EVT VT
, SDValue V
, SelectionDAG
&DAG
,
2568 const RISCVSubtarget
&Subtarget
) {
2569 assert(VT
.isFixedLengthVector() &&
2570 "Expected to convert into a fixed length vector!");
2571 assert(V
.getValueType().isScalableVector() &&
2572 "Expected a scalable vector operand!");
2574 SDValue Zero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
2575 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, V
, Zero
);
2578 /// Return the type of the mask type suitable for masking the provided
2579 /// vector type. This is simply an i1 element type vector of the same
2580 /// (possibly scalable) length.
2581 static MVT
getMaskTypeFor(MVT VecVT
) {
2582 assert(VecVT
.isVector());
2583 ElementCount EC
= VecVT
.getVectorElementCount();
2584 return MVT::getVectorVT(MVT::i1
, EC
);
2587 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2588 /// vector length VL. .
2589 static SDValue
getAllOnesMask(MVT VecVT
, SDValue VL
, const SDLoc
&DL
,
2590 SelectionDAG
&DAG
) {
2591 MVT MaskVT
= getMaskTypeFor(VecVT
);
2592 return DAG
.getNode(RISCVISD::VMSET_VL
, DL
, MaskVT
, VL
);
2595 static SDValue
getVLOp(uint64_t NumElts
, MVT ContainerVT
, const SDLoc
&DL
,
2596 SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
) {
2597 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2598 // canonicalize the representation. InsertVSETVLI will pick the immediate
2599 // encoding later if profitable.
2600 const auto [MinVLMAX
, MaxVLMAX
] =
2601 RISCVTargetLowering::computeVLMAXBounds(ContainerVT
, Subtarget
);
2602 if (MinVLMAX
== MaxVLMAX
&& NumElts
== MinVLMAX
)
2603 return DAG
.getRegister(RISCV::X0
, Subtarget
.getXLenVT());
2605 return DAG
.getConstant(NumElts
, DL
, Subtarget
.getXLenVT());
2608 static std::pair
<SDValue
, SDValue
>
2609 getDefaultScalableVLOps(MVT VecVT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
2610 const RISCVSubtarget
&Subtarget
) {
2611 assert(VecVT
.isScalableVector() && "Expecting a scalable vector");
2612 SDValue VL
= DAG
.getRegister(RISCV::X0
, Subtarget
.getXLenVT());
2613 SDValue Mask
= getAllOnesMask(VecVT
, VL
, DL
, DAG
);
2617 static std::pair
<SDValue
, SDValue
>
2618 getDefaultVLOps(uint64_t NumElts
, MVT ContainerVT
, const SDLoc
&DL
,
2619 SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
) {
2620 assert(ContainerVT
.isScalableVector() && "Expecting scalable container type");
2621 SDValue VL
= getVLOp(NumElts
, ContainerVT
, DL
, DAG
, Subtarget
);
2622 SDValue Mask
= getAllOnesMask(ContainerVT
, VL
, DL
, DAG
);
2626 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2627 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2628 // the vector type that the fixed-length vector is contained in. Otherwise if
2629 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2630 static std::pair
<SDValue
, SDValue
>
2631 getDefaultVLOps(MVT VecVT
, MVT ContainerVT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
2632 const RISCVSubtarget
&Subtarget
) {
2633 if (VecVT
.isFixedLengthVector())
2634 return getDefaultVLOps(VecVT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
2636 assert(ContainerVT
.isScalableVector() && "Expecting scalable container type");
2637 return getDefaultScalableVLOps(ContainerVT
, DL
, DAG
, Subtarget
);
2640 SDValue
RISCVTargetLowering::computeVLMax(MVT VecVT
, const SDLoc
&DL
,
2641 SelectionDAG
&DAG
) const {
2642 assert(VecVT
.isScalableVector() && "Expected scalable vector");
2643 return DAG
.getElementCount(DL
, Subtarget
.getXLenVT(),
2644 VecVT
.getVectorElementCount());
2647 std::pair
<unsigned, unsigned>
2648 RISCVTargetLowering::computeVLMAXBounds(MVT VecVT
,
2649 const RISCVSubtarget
&Subtarget
) {
2650 assert(VecVT
.isScalableVector() && "Expected scalable vector");
2652 unsigned EltSize
= VecVT
.getScalarSizeInBits();
2653 unsigned MinSize
= VecVT
.getSizeInBits().getKnownMinValue();
2655 unsigned VectorBitsMax
= Subtarget
.getRealMaxVLen();
2657 RISCVTargetLowering::computeVLMAX(VectorBitsMax
, EltSize
, MinSize
);
2659 unsigned VectorBitsMin
= Subtarget
.getRealMinVLen();
2661 RISCVTargetLowering::computeVLMAX(VectorBitsMin
, EltSize
, MinSize
);
2663 return std::make_pair(MinVLMAX
, MaxVLMAX
);
2666 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2667 // of either is (currently) supported. This can get us into an infinite loop
2668 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2670 // Until either (or both) of these can reliably lower any node, reporting that
2671 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2672 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2673 // which is not desirable.
2674 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2675 EVT VT
, unsigned DefinedValues
) const {
2679 InstructionCost
RISCVTargetLowering::getLMULCost(MVT VT
) const {
2680 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2681 // implementation-defined.
2683 return InstructionCost::getInvalid();
2684 unsigned DLenFactor
= Subtarget
.getDLenFactor();
2686 if (VT
.isScalableVector()) {
2689 std::tie(LMul
, Fractional
) =
2690 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT
));
2692 Cost
= LMul
<= DLenFactor
? (DLenFactor
/ LMul
) : 1;
2694 Cost
= (LMul
* DLenFactor
);
2696 Cost
= divideCeil(VT
.getSizeInBits(), Subtarget
.getRealMinVLen() / DLenFactor
);
2702 /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2703 /// is generally quadratic in the number of vreg implied by LMUL. Note that
2704 /// operand (index and possibly mask) are handled separately.
2705 InstructionCost
RISCVTargetLowering::getVRGatherVVCost(MVT VT
) const {
2706 return getLMULCost(VT
) * getLMULCost(VT
);
2709 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2710 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2711 /// or may track the vrgather.vv cost. It is implementation-dependent.
2712 InstructionCost
RISCVTargetLowering::getVRGatherVICost(MVT VT
) const {
2713 return getLMULCost(VT
);
2716 /// Return the cost of a vslidedown.vx or vslideup.vx instruction
2717 /// for the type VT. (This does not cover the vslide1up or vslide1down
2718 /// variants.) Slides may be linear in the number of vregs implied by LMUL,
2719 /// or may track the vrgather.vv cost. It is implementation-dependent.
2720 InstructionCost
RISCVTargetLowering::getVSlideVXCost(MVT VT
) const {
2721 return getLMULCost(VT
);
2724 /// Return the cost of a vslidedown.vi or vslideup.vi instruction
2725 /// for the type VT. (This does not cover the vslide1up or vslide1down
2726 /// variants.) Slides may be linear in the number of vregs implied by LMUL,
2727 /// or may track the vrgather.vv cost. It is implementation-dependent.
2728 InstructionCost
RISCVTargetLowering::getVSlideVICost(MVT VT
) const {
2729 return getLMULCost(VT
);
2732 static SDValue
lowerFP_TO_INT_SAT(SDValue Op
, SelectionDAG
&DAG
,
2733 const RISCVSubtarget
&Subtarget
) {
2734 // RISC-V FP-to-int conversions saturate to the destination register size, but
2735 // don't produce 0 for nan. We can use a conversion instruction and fix the
2736 // nan case with a compare and a select.
2737 SDValue Src
= Op
.getOperand(0);
2739 MVT DstVT
= Op
.getSimpleValueType();
2740 EVT SatVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
2742 bool IsSigned
= Op
.getOpcode() == ISD::FP_TO_SINT_SAT
;
2744 if (!DstVT
.isVector()) {
2745 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2747 if ((Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfhOrZhinx()) ||
2748 Src
.getValueType() == MVT::bf16
) {
2749 Src
= DAG
.getNode(ISD::FP_EXTEND
, SDLoc(Op
), MVT::f32
, Src
);
2754 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
2755 else if (DstVT
== MVT::i64
&& SatVT
== MVT::i32
)
2756 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
2759 // FIXME: Support other SatVTs by clamping before or after the conversion.
2762 SDValue FpToInt
= DAG
.getNode(
2763 Opc
, DL
, DstVT
, Src
,
2764 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, Subtarget
.getXLenVT()));
2766 if (Opc
== RISCVISD::FCVT_WU_RV64
)
2767 FpToInt
= DAG
.getZeroExtendInReg(FpToInt
, DL
, MVT::i32
);
2769 SDValue ZeroInt
= DAG
.getConstant(0, DL
, DstVT
);
2770 return DAG
.getSelectCC(DL
, Src
, Src
, ZeroInt
, FpToInt
,
2771 ISD::CondCode::SETUO
);
2776 MVT DstEltVT
= DstVT
.getVectorElementType();
2777 MVT SrcVT
= Src
.getSimpleValueType();
2778 MVT SrcEltVT
= SrcVT
.getVectorElementType();
2779 unsigned SrcEltSize
= SrcEltVT
.getSizeInBits();
2780 unsigned DstEltSize
= DstEltVT
.getSizeInBits();
2782 // Only handle saturating to the destination type.
2783 if (SatVT
!= DstEltVT
)
2786 // FIXME: Don't support narrowing by more than 1 steps for now.
2787 if (SrcEltSize
> (2 * DstEltSize
))
2790 MVT DstContainerVT
= DstVT
;
2791 MVT SrcContainerVT
= SrcVT
;
2792 if (DstVT
.isFixedLengthVector()) {
2793 DstContainerVT
= getContainerForFixedLengthVector(DAG
, DstVT
, Subtarget
);
2794 SrcContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
2795 assert(DstContainerVT
.getVectorElementCount() ==
2796 SrcContainerVT
.getVectorElementCount() &&
2797 "Expected same element count");
2798 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
2803 auto [Mask
, VL
] = getDefaultVLOps(DstVT
, DstContainerVT
, DL
, DAG
, Subtarget
);
2805 SDValue IsNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
2806 {Src
, Src
, DAG
.getCondCode(ISD::SETNE
),
2807 DAG
.getUNDEF(Mask
.getValueType()), Mask
, VL
});
2809 // Need to widen by more than 1 step, promote the FP type, then do a widening
2811 if (DstEltSize
> (2 * SrcEltSize
)) {
2812 assert(SrcContainerVT
.getVectorElementType() == MVT::f16
&& "Unexpected VT!");
2813 MVT InterVT
= SrcContainerVT
.changeVectorElementType(MVT::f32
);
2814 Src
= DAG
.getNode(RISCVISD::FP_EXTEND_VL
, DL
, InterVT
, Src
, Mask
, VL
);
2818 IsSigned
? RISCVISD::VFCVT_RTZ_X_F_VL
: RISCVISD::VFCVT_RTZ_XU_F_VL
;
2819 SDValue Res
= DAG
.getNode(RVVOpc
, DL
, DstContainerVT
, Src
, Mask
, VL
);
2821 SDValue SplatZero
= DAG
.getNode(
2822 RISCVISD::VMV_V_X_VL
, DL
, DstContainerVT
, DAG
.getUNDEF(DstContainerVT
),
2823 DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()), VL
);
2824 Res
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, DstContainerVT
, IsNan
, SplatZero
,
2825 Res
, DAG
.getUNDEF(DstContainerVT
), VL
);
2827 if (DstVT
.isFixedLengthVector())
2828 Res
= convertFromScalableVector(DstVT
, Res
, DAG
, Subtarget
);
2833 static RISCVFPRndMode::RoundingMode
matchRoundingOp(unsigned Opc
) {
2835 case ISD::FROUNDEVEN
:
2836 case ISD::STRICT_FROUNDEVEN
:
2837 case ISD::VP_FROUNDEVEN
:
2838 return RISCVFPRndMode::RNE
;
2840 case ISD::STRICT_FTRUNC
:
2841 case ISD::VP_FROUNDTOZERO
:
2842 return RISCVFPRndMode::RTZ
;
2844 case ISD::STRICT_FFLOOR
:
2845 case ISD::VP_FFLOOR
:
2846 return RISCVFPRndMode::RDN
;
2848 case ISD::STRICT_FCEIL
:
2850 return RISCVFPRndMode::RUP
;
2852 case ISD::STRICT_FROUND
:
2853 case ISD::VP_FROUND
:
2854 return RISCVFPRndMode::RMM
;
2856 return RISCVFPRndMode::DYN
;
2859 return RISCVFPRndMode::Invalid
;
2862 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2863 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2864 // the integer domain and back. Taking care to avoid converting values that are
2865 // nan or already correct.
2867 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
2868 const RISCVSubtarget
&Subtarget
) {
2869 MVT VT
= Op
.getSimpleValueType();
2870 assert(VT
.isVector() && "Unexpected type");
2874 SDValue Src
= Op
.getOperand(0);
2876 MVT ContainerVT
= VT
;
2877 if (VT
.isFixedLengthVector()) {
2878 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
2879 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
2883 if (Op
->isVPOpcode()) {
2884 Mask
= Op
.getOperand(1);
2885 if (VT
.isFixedLengthVector())
2886 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
2888 VL
= Op
.getOperand(2);
2890 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
2893 // Freeze the source since we are increasing the number of uses.
2894 Src
= DAG
.getFreeze(Src
);
2896 // We do the conversion on the absolute value and fix the sign at the end.
2897 SDValue Abs
= DAG
.getNode(RISCVISD::FABS_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
2899 // Determine the largest integer that can be represented exactly. This and
2900 // values larger than it don't have any fractional bits so don't need to
2902 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(ContainerVT
);
2903 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
2904 APFloat MaxVal
= APFloat(FltSem
);
2905 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
2906 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
2907 SDValue MaxValNode
=
2908 DAG
.getConstantFP(MaxVal
, DL
, ContainerVT
.getVectorElementType());
2909 SDValue MaxValSplat
= DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, ContainerVT
,
2910 DAG
.getUNDEF(ContainerVT
), MaxValNode
, VL
);
2912 // If abs(Src) was larger than MaxVal or nan, keep it.
2913 MVT SetccVT
= MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
2915 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, SetccVT
,
2916 {Abs
, MaxValSplat
, DAG
.getCondCode(ISD::SETOLT
),
2919 // Truncate to integer and convert back to FP.
2920 MVT IntVT
= ContainerVT
.changeVectorElementTypeToInteger();
2921 MVT XLenVT
= Subtarget
.getXLenVT();
2924 switch (Op
.getOpcode()) {
2926 llvm_unreachable("Unexpected opcode");
2930 case ISD::VP_FFLOOR
:
2932 case ISD::FROUNDEVEN
:
2933 case ISD::VP_FROUND
:
2934 case ISD::VP_FROUNDEVEN
:
2935 case ISD::VP_FROUNDTOZERO
: {
2936 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
2937 assert(FRM
!= RISCVFPRndMode::Invalid
);
2938 Truncated
= DAG
.getNode(RISCVISD::VFCVT_RM_X_F_VL
, DL
, IntVT
, Src
, Mask
,
2939 DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
);
2943 Truncated
= DAG
.getNode(RISCVISD::VFCVT_RTZ_X_F_VL
, DL
, IntVT
, Src
,
2948 Truncated
= DAG
.getNode(RISCVISD::VFCVT_X_F_VL
, DL
, IntVT
, Src
, Mask
, VL
);
2950 case ISD::FNEARBYINT
:
2951 case ISD::VP_FNEARBYINT
:
2952 Truncated
= DAG
.getNode(RISCVISD::VFROUND_NOEXCEPT_VL
, DL
, ContainerVT
, Src
,
2957 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2958 if (Truncated
.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL
)
2959 Truncated
= DAG
.getNode(RISCVISD::SINT_TO_FP_VL
, DL
, ContainerVT
, Truncated
,
2962 // Restore the original sign so that -0.0 is preserved.
2963 Truncated
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Truncated
,
2964 Src
, Src
, Mask
, VL
);
2966 if (!VT
.isFixedLengthVector())
2969 return convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
2972 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2973 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2974 // qNan and coverting the new source to integer and back to FP.
2976 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
2977 const RISCVSubtarget
&Subtarget
) {
2979 MVT VT
= Op
.getSimpleValueType();
2980 SDValue Chain
= Op
.getOperand(0);
2981 SDValue Src
= Op
.getOperand(1);
2983 MVT ContainerVT
= VT
;
2984 if (VT
.isFixedLengthVector()) {
2985 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
2986 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
2989 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
2991 // Freeze the source since we are increasing the number of uses.
2992 Src
= DAG
.getFreeze(Src
);
2994 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2995 MVT MaskVT
= Mask
.getSimpleValueType();
2996 SDValue Unorder
= DAG
.getNode(RISCVISD::STRICT_FSETCC_VL
, DL
,
2997 DAG
.getVTList(MaskVT
, MVT::Other
),
2998 {Chain
, Src
, Src
, DAG
.getCondCode(ISD::SETUNE
),
2999 DAG
.getUNDEF(MaskVT
), Mask
, VL
});
3000 Chain
= Unorder
.getValue(1);
3001 Src
= DAG
.getNode(RISCVISD::STRICT_FADD_VL
, DL
,
3002 DAG
.getVTList(ContainerVT
, MVT::Other
),
3003 {Chain
, Src
, Src
, DAG
.getUNDEF(ContainerVT
), Unorder
, VL
});
3004 Chain
= Src
.getValue(1);
3006 // We do the conversion on the absolute value and fix the sign at the end.
3007 SDValue Abs
= DAG
.getNode(RISCVISD::FABS_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
3009 // Determine the largest integer that can be represented exactly. This and
3010 // values larger than it don't have any fractional bits so don't need to
3012 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(ContainerVT
);
3013 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
3014 APFloat MaxVal
= APFloat(FltSem
);
3015 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
3016 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
3017 SDValue MaxValNode
=
3018 DAG
.getConstantFP(MaxVal
, DL
, ContainerVT
.getVectorElementType());
3019 SDValue MaxValSplat
= DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, ContainerVT
,
3020 DAG
.getUNDEF(ContainerVT
), MaxValNode
, VL
);
3022 // If abs(Src) was larger than MaxVal or nan, keep it.
3024 RISCVISD::SETCC_VL
, DL
, MaskVT
,
3025 {Abs
, MaxValSplat
, DAG
.getCondCode(ISD::SETOLT
), Mask
, Mask
, VL
});
3027 // Truncate to integer and convert back to FP.
3028 MVT IntVT
= ContainerVT
.changeVectorElementTypeToInteger();
3029 MVT XLenVT
= Subtarget
.getXLenVT();
3032 switch (Op
.getOpcode()) {
3034 llvm_unreachable("Unexpected opcode");
3035 case ISD::STRICT_FCEIL
:
3036 case ISD::STRICT_FFLOOR
:
3037 case ISD::STRICT_FROUND
:
3038 case ISD::STRICT_FROUNDEVEN
: {
3039 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
3040 assert(FRM
!= RISCVFPRndMode::Invalid
);
3041 Truncated
= DAG
.getNode(
3042 RISCVISD::STRICT_VFCVT_RM_X_F_VL
, DL
, DAG
.getVTList(IntVT
, MVT::Other
),
3043 {Chain
, Src
, Mask
, DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
});
3046 case ISD::STRICT_FTRUNC
:
3048 DAG
.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL
, DL
,
3049 DAG
.getVTList(IntVT
, MVT::Other
), Chain
, Src
, Mask
, VL
);
3051 case ISD::STRICT_FNEARBYINT
:
3052 Truncated
= DAG
.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL
, DL
,
3053 DAG
.getVTList(ContainerVT
, MVT::Other
), Chain
, Src
,
3057 Chain
= Truncated
.getValue(1);
3059 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3060 if (Op
.getOpcode() != ISD::STRICT_FNEARBYINT
) {
3061 Truncated
= DAG
.getNode(RISCVISD::STRICT_SINT_TO_FP_VL
, DL
,
3062 DAG
.getVTList(ContainerVT
, MVT::Other
), Chain
,
3063 Truncated
, Mask
, VL
);
3064 Chain
= Truncated
.getValue(1);
3067 // Restore the original sign so that -0.0 is preserved.
3068 Truncated
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Truncated
,
3069 Src
, Src
, Mask
, VL
);
3071 if (VT
.isFixedLengthVector())
3072 Truncated
= convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
3073 return DAG
.getMergeValues({Truncated
, Chain
}, DL
);
3077 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op
, SelectionDAG
&DAG
,
3078 const RISCVSubtarget
&Subtarget
) {
3079 MVT VT
= Op
.getSimpleValueType();
3081 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
3083 if (DAG
.shouldOptForSize())
3087 SDValue Src
= Op
.getOperand(0);
3089 // Create an integer the size of the mantissa with the MSB set. This and all
3090 // values larger than it don't have any fractional bits so don't need to be
3092 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(VT
);
3093 unsigned Precision
= APFloat::semanticsPrecision(FltSem
);
3094 APFloat MaxVal
= APFloat(FltSem
);
3095 MaxVal
.convertFromAPInt(APInt::getOneBitSet(Precision
, Precision
- 1),
3096 /*IsSigned*/ false, APFloat::rmNearestTiesToEven
);
3097 SDValue MaxValNode
= DAG
.getConstantFP(MaxVal
, DL
, VT
);
3099 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Op
.getOpcode());
3100 return DAG
.getNode(RISCVISD::FROUND
, DL
, VT
, Src
, MaxValNode
,
3101 DAG
.getTargetConstant(FRM
, DL
, Subtarget
.getXLenVT()));
3104 // Expand vector LRINT and LLRINT by converting to the integer domain.
3105 static SDValue
lowerVectorXRINT(SDValue Op
, SelectionDAG
&DAG
,
3106 const RISCVSubtarget
&Subtarget
) {
3107 MVT VT
= Op
.getSimpleValueType();
3108 assert(VT
.isVector() && "Unexpected type");
3111 SDValue Src
= Op
.getOperand(0);
3112 MVT ContainerVT
= VT
;
3114 if (VT
.isFixedLengthVector()) {
3115 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3116 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
3119 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3121 DAG
.getNode(RISCVISD::VFCVT_X_F_VL
, DL
, ContainerVT
, Src
, Mask
, VL
);
3123 if (!VT
.isFixedLengthVector())
3126 return convertFromScalableVector(VT
, Truncated
, DAG
, Subtarget
);
3130 getVSlidedown(SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
,
3131 const SDLoc
&DL
, EVT VT
, SDValue Merge
, SDValue Op
,
3132 SDValue Offset
, SDValue Mask
, SDValue VL
,
3133 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
) {
3134 if (Merge
.isUndef())
3135 Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3136 SDValue PolicyOp
= DAG
.getTargetConstant(Policy
, DL
, Subtarget
.getXLenVT());
3137 SDValue Ops
[] = {Merge
, Op
, Offset
, Mask
, VL
, PolicyOp
};
3138 return DAG
.getNode(RISCVISD::VSLIDEDOWN_VL
, DL
, VT
, Ops
);
3142 getVSlideup(SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
, const SDLoc
&DL
,
3143 EVT VT
, SDValue Merge
, SDValue Op
, SDValue Offset
, SDValue Mask
,
3145 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
) {
3146 if (Merge
.isUndef())
3147 Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3148 SDValue PolicyOp
= DAG
.getTargetConstant(Policy
, DL
, Subtarget
.getXLenVT());
3149 SDValue Ops
[] = {Merge
, Op
, Offset
, Mask
, VL
, PolicyOp
};
3150 return DAG
.getNode(RISCVISD::VSLIDEUP_VL
, DL
, VT
, Ops
);
3153 static MVT
getLMUL1VT(MVT VT
) {
3154 assert(VT
.getVectorElementType().getSizeInBits() <= 64 &&
3155 "Unexpected vector MVT");
3156 return MVT::getScalableVectorVT(
3157 VT
.getVectorElementType(),
3158 RISCV::RVVBitsPerBlock
/ VT
.getVectorElementType().getSizeInBits());
3161 struct VIDSequence
{
3162 int64_t StepNumerator
;
3163 unsigned StepDenominator
;
3167 static std::optional
<uint64_t> getExactInteger(const APFloat
&APF
,
3168 uint32_t BitWidth
) {
3169 APSInt
ValInt(BitWidth
, !APF
.isNegative());
3170 // We use an arbitrary rounding mode here. If a floating-point is an exact
3171 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3172 // the rounding mode changes the output value, then it is not an exact
3174 RoundingMode ArbitraryRM
= RoundingMode::TowardZero
;
3176 // If it is out of signed integer range, it will return an invalid operation.
3177 // If it is not an exact integer, IsExact is false.
3178 if ((APF
.convertToInteger(ValInt
, ArbitraryRM
, &IsExact
) ==
3179 APFloatBase::opInvalidOp
) ||
3181 return std::nullopt
;
3182 return ValInt
.extractBitsAsZExtValue(BitWidth
, 0);
3185 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3186 // to the (non-zero) step S and start value X. This can be then lowered as the
3187 // RVV sequence (VID * S) + X, for example.
3188 // The step S is represented as an integer numerator divided by a positive
3189 // denominator. Note that the implementation currently only identifies
3190 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
3191 // cannot detect 2/3, for example.
3192 // Note that this method will also match potentially unappealing index
3193 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3194 // determine whether this is worth generating code for.
3195 static std::optional
<VIDSequence
> isSimpleVIDSequence(SDValue Op
,
3196 unsigned EltSizeInBits
) {
3197 unsigned NumElts
= Op
.getNumOperands();
3198 assert(Op
.getOpcode() == ISD::BUILD_VECTOR
&& "Unexpected BUILD_VECTOR");
3199 bool IsInteger
= Op
.getValueType().isInteger();
3201 std::optional
<unsigned> SeqStepDenom
;
3202 std::optional
<int64_t> SeqStepNum
, SeqAddend
;
3203 std::optional
<std::pair
<uint64_t, unsigned>> PrevElt
;
3204 assert(EltSizeInBits
>= Op
.getValueType().getScalarSizeInBits());
3205 for (unsigned Idx
= 0; Idx
< NumElts
; Idx
++) {
3206 // Assume undef elements match the sequence; we just have to be careful
3207 // when interpolating across them.
3208 if (Op
.getOperand(Idx
).isUndef())
3213 // The BUILD_VECTOR must be all constants.
3214 if (!isa
<ConstantSDNode
>(Op
.getOperand(Idx
)))
3215 return std::nullopt
;
3216 Val
= Op
.getConstantOperandVal(Idx
) &
3217 maskTrailingOnes
<uint64_t>(Op
.getScalarValueSizeInBits());
3219 // The BUILD_VECTOR must be all constants.
3220 if (!isa
<ConstantFPSDNode
>(Op
.getOperand(Idx
)))
3221 return std::nullopt
;
3222 if (auto ExactInteger
= getExactInteger(
3223 cast
<ConstantFPSDNode
>(Op
.getOperand(Idx
))->getValueAPF(),
3224 Op
.getScalarValueSizeInBits()))
3225 Val
= *ExactInteger
;
3227 return std::nullopt
;
3231 // Calculate the step since the last non-undef element, and ensure
3232 // it's consistent across the entire sequence.
3233 unsigned IdxDiff
= Idx
- PrevElt
->second
;
3234 int64_t ValDiff
= SignExtend64(Val
- PrevElt
->first
, EltSizeInBits
);
3236 // A zero-value value difference means that we're somewhere in the middle
3237 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3238 // step change before evaluating the sequence.
3242 int64_t Remainder
= ValDiff
% IdxDiff
;
3243 // Normalize the step if it's greater than 1.
3244 if (Remainder
!= ValDiff
) {
3245 // The difference must cleanly divide the element span.
3247 return std::nullopt
;
3253 SeqStepNum
= ValDiff
;
3254 else if (ValDiff
!= SeqStepNum
)
3255 return std::nullopt
;
3258 SeqStepDenom
= IdxDiff
;
3259 else if (IdxDiff
!= *SeqStepDenom
)
3260 return std::nullopt
;
3263 // Record this non-undef element for later.
3264 if (!PrevElt
|| PrevElt
->first
!= Val
)
3265 PrevElt
= std::make_pair(Val
, Idx
);
3268 // We need to have logged a step for this to count as a legal index sequence.
3269 if (!SeqStepNum
|| !SeqStepDenom
)
3270 return std::nullopt
;
3272 // Loop back through the sequence and validate elements we might have skipped
3273 // while waiting for a valid step. While doing this, log any sequence addend.
3274 for (unsigned Idx
= 0; Idx
< NumElts
; Idx
++) {
3275 if (Op
.getOperand(Idx
).isUndef())
3279 Val
= Op
.getConstantOperandVal(Idx
) &
3280 maskTrailingOnes
<uint64_t>(Op
.getScalarValueSizeInBits());
3282 Val
= *getExactInteger(
3283 cast
<ConstantFPSDNode
>(Op
.getOperand(Idx
))->getValueAPF(),
3284 Op
.getScalarValueSizeInBits());
3286 uint64_t ExpectedVal
=
3287 (int64_t)(Idx
* (uint64_t)*SeqStepNum
) / *SeqStepDenom
;
3288 int64_t Addend
= SignExtend64(Val
- ExpectedVal
, EltSizeInBits
);
3291 else if (Addend
!= SeqAddend
)
3292 return std::nullopt
;
3295 assert(SeqAddend
&& "Must have an addend if we have a step");
3297 return VIDSequence
{*SeqStepNum
, *SeqStepDenom
, *SeqAddend
};
3300 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3301 // and lower it as a VRGATHER_VX_VL from the source vector.
3302 static SDValue
matchSplatAsGather(SDValue SplatVal
, MVT VT
, const SDLoc
&DL
,
3304 const RISCVSubtarget
&Subtarget
) {
3305 if (SplatVal
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
3307 SDValue Vec
= SplatVal
.getOperand(0);
3308 // Only perform this optimization on vectors of the same size for simplicity.
3309 // Don't perform this optimization for i1 vectors.
3310 // FIXME: Support i1 vectors, maybe by promoting to i8?
3311 if (Vec
.getValueType() != VT
|| VT
.getVectorElementType() == MVT::i1
)
3313 SDValue Idx
= SplatVal
.getOperand(1);
3314 // The index must be a legal type.
3315 if (Idx
.getValueType() != Subtarget
.getXLenVT())
3318 MVT ContainerVT
= VT
;
3319 if (VT
.isFixedLengthVector()) {
3320 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3321 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
3324 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3326 SDValue Gather
= DAG
.getNode(RISCVISD::VRGATHER_VX_VL
, DL
, ContainerVT
, Vec
,
3327 Idx
, DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
3329 if (!VT
.isFixedLengthVector())
3332 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
3336 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3337 /// which constitute a large proportion of the elements. In such cases we can
3338 /// splat a vector with the dominant element and make up the shortfall with
3339 /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3340 /// Note that this includes vectors of 2 elements by association. The
3341 /// upper-most element is the "dominant" one, allowing us to use a splat to
3342 /// "insert" the upper element, and an insert of the lower element at position
3343 /// 0, which improves codegen.
3344 static SDValue
lowerBuildVectorViaDominantValues(SDValue Op
, SelectionDAG
&DAG
,
3345 const RISCVSubtarget
&Subtarget
) {
3346 MVT VT
= Op
.getSimpleValueType();
3347 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3349 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3352 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3354 MVT XLenVT
= Subtarget
.getXLenVT();
3355 unsigned NumElts
= Op
.getNumOperands();
3357 SDValue DominantValue
;
3358 unsigned MostCommonCount
= 0;
3359 DenseMap
<SDValue
, unsigned> ValueCounts
;
3360 unsigned NumUndefElts
=
3361 count_if(Op
->op_values(), [](const SDValue
&V
) { return V
.isUndef(); });
3363 // Track the number of scalar loads we know we'd be inserting, estimated as
3364 // any non-zero floating-point constant. Other kinds of element are either
3365 // already in registers or are materialized on demand. The threshold at which
3366 // a vector load is more desirable than several scalar materializion and
3367 // vector-insertion instructions is not known.
3368 unsigned NumScalarLoads
= 0;
3370 for (SDValue V
: Op
->op_values()) {
3374 ValueCounts
.insert(std::make_pair(V
, 0));
3375 unsigned &Count
= ValueCounts
[V
];
3377 if (auto *CFP
= dyn_cast
<ConstantFPSDNode
>(V
))
3378 NumScalarLoads
+= !CFP
->isExactlyValue(+0.0);
3380 // Is this value dominant? In case of a tie, prefer the highest element as
3381 // it's cheaper to insert near the beginning of a vector than it is at the
3383 if (++Count
>= MostCommonCount
) {
3385 MostCommonCount
= Count
;
3389 assert(DominantValue
&& "Not expecting an all-undef BUILD_VECTOR");
3390 unsigned NumDefElts
= NumElts
- NumUndefElts
;
3391 unsigned DominantValueCountThreshold
= NumDefElts
<= 2 ? 0 : NumDefElts
- 2;
3393 // Don't perform this optimization when optimizing for size, since
3394 // materializing elements and inserting them tends to cause code bloat.
3395 if (!DAG
.shouldOptForSize() && NumScalarLoads
< NumElts
&&
3396 (NumElts
!= 2 || ISD::isBuildVectorOfConstantSDNodes(Op
.getNode())) &&
3397 ((MostCommonCount
> DominantValueCountThreshold
) ||
3398 (ValueCounts
.size() <= Log2_32(NumDefElts
)))) {
3399 // Start by splatting the most common element.
3400 SDValue Vec
= DAG
.getSplatBuildVector(VT
, DL
, DominantValue
);
3402 DenseSet
<SDValue
> Processed
{DominantValue
};
3404 // We can handle an insert into the last element (of a splat) via
3405 // v(f)slide1down. This is slightly better than the vslideup insert
3406 // lowering as it avoids the need for a vector group temporary. It
3407 // is also better than using vmerge.vx as it avoids the need to
3408 // materialize the mask in a vector register.
3409 if (SDValue LastOp
= Op
->getOperand(Op
->getNumOperands() - 1);
3410 !LastOp
.isUndef() && ValueCounts
[LastOp
] == 1 &&
3411 LastOp
!= DominantValue
) {
3412 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
3414 VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
;
3415 if (!VT
.isFloatingPoint())
3416 LastOp
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, LastOp
);
3417 Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Vec
,
3419 Vec
= convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
3420 Processed
.insert(LastOp
);
3423 MVT SelMaskTy
= VT
.changeVectorElementType(MVT::i1
);
3424 for (const auto &OpIdx
: enumerate(Op
->ops())) {
3425 const SDValue
&V
= OpIdx
.value();
3426 if (V
.isUndef() || !Processed
.insert(V
).second
)
3428 if (ValueCounts
[V
] == 1) {
3429 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
, Vec
, V
,
3430 DAG
.getConstant(OpIdx
.index(), DL
, XLenVT
));
3432 // Blend in all instances of this value using a VSELECT, using a
3433 // mask where each bit signals whether that element is the one
3435 SmallVector
<SDValue
> Ops
;
3436 transform(Op
->op_values(), std::back_inserter(Ops
), [&](SDValue V1
) {
3437 return DAG
.getConstant(V
== V1
, DL
, XLenVT
);
3439 Vec
= DAG
.getNode(ISD::VSELECT
, DL
, VT
,
3440 DAG
.getBuildVector(SelMaskTy
, DL
, Ops
),
3441 DAG
.getSplatBuildVector(VT
, DL
, V
), Vec
);
3451 static SDValue
lowerBuildVectorOfConstants(SDValue Op
, SelectionDAG
&DAG
,
3452 const RISCVSubtarget
&Subtarget
) {
3453 MVT VT
= Op
.getSimpleValueType();
3454 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3456 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3459 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3461 MVT XLenVT
= Subtarget
.getXLenVT();
3462 unsigned NumElts
= Op
.getNumOperands();
3464 if (VT
.getVectorElementType() == MVT::i1
) {
3465 if (ISD::isBuildVectorAllZeros(Op
.getNode())) {
3466 SDValue VMClr
= DAG
.getNode(RISCVISD::VMCLR_VL
, DL
, ContainerVT
, VL
);
3467 return convertFromScalableVector(VT
, VMClr
, DAG
, Subtarget
);
3470 if (ISD::isBuildVectorAllOnes(Op
.getNode())) {
3471 SDValue VMSet
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
3472 return convertFromScalableVector(VT
, VMSet
, DAG
, Subtarget
);
3475 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3476 // scalar integer chunks whose bit-width depends on the number of mask
3478 // First, determine the most appropriate scalar integer type to use. This
3479 // is at most XLenVT, but may be shrunk to a smaller vector element type
3480 // according to the size of the final vector - use i8 chunks rather than
3481 // XLenVT if we're producing a v8i1. This results in more consistent
3482 // codegen across RV32 and RV64.
3483 unsigned NumViaIntegerBits
= std::clamp(NumElts
, 8u, Subtarget
.getXLen());
3484 NumViaIntegerBits
= std::min(NumViaIntegerBits
, Subtarget
.getELen());
3485 // If we have to use more than one INSERT_VECTOR_ELT then this
3486 // optimization is likely to increase code size; avoid peforming it in
3487 // such a case. We can use a load from a constant pool in this case.
3488 if (DAG
.shouldOptForSize() && NumElts
> NumViaIntegerBits
)
3490 // Now we can create our integer vector type. Note that it may be larger
3491 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3492 unsigned IntegerViaVecElts
= divideCeil(NumElts
, NumViaIntegerBits
);
3493 MVT IntegerViaVecVT
=
3494 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits
),
3498 unsigned BitPos
= 0, IntegerEltIdx
= 0;
3499 SmallVector
<SDValue
, 8> Elts(IntegerViaVecElts
);
3501 for (unsigned I
= 0; I
< NumElts
;) {
3502 SDValue V
= Op
.getOperand(I
);
3503 bool BitValue
= !V
.isUndef() && V
->getAsZExtVal();
3504 Bits
|= ((uint64_t)BitValue
<< BitPos
);
3508 // Once we accumulate enough bits to fill our scalar type or process the
3509 // last element, insert into our vector and clear our accumulated data.
3510 if (I
% NumViaIntegerBits
== 0 || I
== NumElts
) {
3511 if (NumViaIntegerBits
<= 32)
3512 Bits
= SignExtend64
<32>(Bits
);
3513 SDValue Elt
= DAG
.getConstant(Bits
, DL
, XLenVT
);
3514 Elts
[IntegerEltIdx
] = Elt
;
3521 SDValue Vec
= DAG
.getBuildVector(IntegerViaVecVT
, DL
, Elts
);
3523 if (NumElts
< NumViaIntegerBits
) {
3524 // If we're producing a smaller vector than our minimum legal integer
3525 // type, bitcast to the equivalent (known-legal) mask type, and extract
3527 assert(IntegerViaVecVT
== MVT::v1i8
&& "Unexpected mask vector type");
3528 Vec
= DAG
.getBitcast(MVT::v8i1
, Vec
);
3529 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Vec
,
3530 DAG
.getConstant(0, DL
, XLenVT
));
3532 // Else we must have produced an integer type with the same size as the
3533 // mask type; bitcast for the final result.
3534 assert(VT
.getSizeInBits() == IntegerViaVecVT
.getSizeInBits());
3535 Vec
= DAG
.getBitcast(VT
, Vec
);
3541 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3542 unsigned Opc
= VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3543 : RISCVISD::VMV_V_X_VL
;
3544 if (!VT
.isFloatingPoint())
3545 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Splat
);
3547 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Splat
, VL
);
3548 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
3551 // Try and match index sequences, which we can lower to the vid instruction
3552 // with optional modifications. An all-undef vector is matched by
3553 // getSplatValue, above.
3554 if (auto SimpleVID
= isSimpleVIDSequence(Op
, Op
.getScalarValueSizeInBits())) {
3555 int64_t StepNumerator
= SimpleVID
->StepNumerator
;
3556 unsigned StepDenominator
= SimpleVID
->StepDenominator
;
3557 int64_t Addend
= SimpleVID
->Addend
;
3559 assert(StepNumerator
!= 0 && "Invalid step");
3560 bool Negate
= false;
3561 int64_t SplatStepVal
= StepNumerator
;
3562 unsigned StepOpcode
= ISD::MUL
;
3563 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3564 // anyway as the shift of 63 won't fit in uimm5.
3565 if (StepNumerator
!= 1 && StepNumerator
!= INT64_MIN
&&
3566 isPowerOf2_64(std::abs(StepNumerator
))) {
3567 Negate
= StepNumerator
< 0;
3568 StepOpcode
= ISD::SHL
;
3569 SplatStepVal
= Log2_64(std::abs(StepNumerator
));
3572 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3573 // threshold since it's the immediate value many RVV instructions accept.
3574 // There is no vmul.vi instruction so ensure multiply constant can fit in
3575 // a single addi instruction.
3576 if (((StepOpcode
== ISD::MUL
&& isInt
<12>(SplatStepVal
)) ||
3577 (StepOpcode
== ISD::SHL
&& isUInt
<5>(SplatStepVal
))) &&
3578 isPowerOf2_32(StepDenominator
) &&
3579 (SplatStepVal
>= 0 || StepDenominator
== 1) && isInt
<5>(Addend
)) {
3581 VT
.isFloatingPoint() ? VT
.changeVectorElementTypeToInteger() : VT
;
3582 MVT VIDContainerVT
=
3583 getContainerForFixedLengthVector(DAG
, VIDVT
, Subtarget
);
3584 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VIDContainerVT
, Mask
, VL
);
3585 // Convert right out of the scalable type so we can use standard ISD
3586 // nodes for the rest of the computation. If we used scalable types with
3587 // these, we'd lose the fixed-length vector info and generate worse
3589 VID
= convertFromScalableVector(VIDVT
, VID
, DAG
, Subtarget
);
3590 if ((StepOpcode
== ISD::MUL
&& SplatStepVal
!= 1) ||
3591 (StepOpcode
== ISD::SHL
&& SplatStepVal
!= 0)) {
3592 SDValue SplatStep
= DAG
.getConstant(SplatStepVal
, DL
, VIDVT
);
3593 VID
= DAG
.getNode(StepOpcode
, DL
, VIDVT
, VID
, SplatStep
);
3595 if (StepDenominator
!= 1) {
3597 DAG
.getConstant(Log2_64(StepDenominator
), DL
, VIDVT
);
3598 VID
= DAG
.getNode(ISD::SRL
, DL
, VIDVT
, VID
, SplatStep
);
3600 if (Addend
!= 0 || Negate
) {
3601 SDValue SplatAddend
= DAG
.getConstant(Addend
, DL
, VIDVT
);
3602 VID
= DAG
.getNode(Negate
? ISD::SUB
: ISD::ADD
, DL
, VIDVT
, SplatAddend
,
3605 if (VT
.isFloatingPoint()) {
3606 // TODO: Use vfwcvt to reduce register pressure.
3607 VID
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, VID
);
3613 // For very small build_vectors, use a single scalar insert of a constant.
3614 // TODO: Base this on constant rematerialization cost, not size.
3615 const unsigned EltBitSize
= VT
.getScalarSizeInBits();
3616 if (VT
.getSizeInBits() <= 32 &&
3617 ISD::isBuildVectorOfConstantSDNodes(Op
.getNode())) {
3618 MVT ViaIntVT
= MVT::getIntegerVT(VT
.getSizeInBits());
3619 assert((ViaIntVT
== MVT::i16
|| ViaIntVT
== MVT::i32
) &&
3620 "Unexpected sequence type");
3621 // If we can use the original VL with the modified element type, this
3622 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3623 // be moved into InsertVSETVLI?
3624 unsigned ViaVecLen
=
3625 (Subtarget
.getRealMinVLen() >= VT
.getSizeInBits() * NumElts
) ? NumElts
: 1;
3626 MVT ViaVecVT
= MVT::getVectorVT(ViaIntVT
, ViaVecLen
);
3628 uint64_t EltMask
= maskTrailingOnes
<uint64_t>(EltBitSize
);
3629 uint64_t SplatValue
= 0;
3630 // Construct the amalgamated value at this larger vector type.
3631 for (const auto &OpIdx
: enumerate(Op
->op_values())) {
3632 const auto &SeqV
= OpIdx
.value();
3633 if (!SeqV
.isUndef())
3635 ((SeqV
->getAsZExtVal() & EltMask
) << (OpIdx
.index() * EltBitSize
));
3638 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3639 // achieve better constant materializion.
3640 if (Subtarget
.is64Bit() && ViaIntVT
== MVT::i32
)
3641 SplatValue
= SignExtend64
<32>(SplatValue
);
3643 SDValue Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ViaVecVT
,
3644 DAG
.getUNDEF(ViaVecVT
),
3645 DAG
.getConstant(SplatValue
, DL
, XLenVT
),
3646 DAG
.getConstant(0, DL
, XLenVT
));
3648 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
,
3649 MVT::getVectorVT(ViaIntVT
, 1), Vec
,
3650 DAG
.getConstant(0, DL
, XLenVT
));
3651 return DAG
.getBitcast(VT
, Vec
);
3655 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3656 // when re-interpreted as a vector with a larger element type. For example,
3657 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3658 // could be instead splat as
3659 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3660 // TODO: This optimization could also work on non-constant splats, but it
3661 // would require bit-manipulation instructions to construct the splat value.
3662 SmallVector
<SDValue
> Sequence
;
3663 const auto *BV
= cast
<BuildVectorSDNode
>(Op
);
3664 if (VT
.isInteger() && EltBitSize
< Subtarget
.getELen() &&
3665 ISD::isBuildVectorOfConstantSDNodes(Op
.getNode()) &&
3666 BV
->getRepeatedSequence(Sequence
) &&
3667 (Sequence
.size() * EltBitSize
) <= Subtarget
.getELen()) {
3668 unsigned SeqLen
= Sequence
.size();
3669 MVT ViaIntVT
= MVT::getIntegerVT(EltBitSize
* SeqLen
);
3670 assert((ViaIntVT
== MVT::i16
|| ViaIntVT
== MVT::i32
||
3671 ViaIntVT
== MVT::i64
) &&
3672 "Unexpected sequence type");
3674 // If we can use the original VL with the modified element type, this
3675 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3676 // be moved into InsertVSETVLI?
3677 const unsigned RequiredVL
= NumElts
/ SeqLen
;
3678 const unsigned ViaVecLen
=
3679 (Subtarget
.getRealMinVLen() >= ViaIntVT
.getSizeInBits() * NumElts
) ?
3680 NumElts
: RequiredVL
;
3681 MVT ViaVecVT
= MVT::getVectorVT(ViaIntVT
, ViaVecLen
);
3683 unsigned EltIdx
= 0;
3684 uint64_t EltMask
= maskTrailingOnes
<uint64_t>(EltBitSize
);
3685 uint64_t SplatValue
= 0;
3686 // Construct the amalgamated value which can be splatted as this larger
3688 for (const auto &SeqV
: Sequence
) {
3689 if (!SeqV
.isUndef())
3691 ((SeqV
->getAsZExtVal() & EltMask
) << (EltIdx
* EltBitSize
));
3695 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3696 // achieve better constant materializion.
3697 if (Subtarget
.is64Bit() && ViaIntVT
== MVT::i32
)
3698 SplatValue
= SignExtend64
<32>(SplatValue
);
3700 // Since we can't introduce illegal i64 types at this stage, we can only
3701 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3702 // way we can use RVV instructions to splat.
3703 assert((ViaIntVT
.bitsLE(XLenVT
) ||
3704 (!Subtarget
.is64Bit() && ViaIntVT
== MVT::i64
)) &&
3705 "Unexpected bitcast sequence");
3706 if (ViaIntVT
.bitsLE(XLenVT
) || isInt
<32>(SplatValue
)) {
3708 DAG
.getConstant(ViaVecVT
.getVectorNumElements(), DL
, XLenVT
);
3709 MVT ViaContainerVT
=
3710 getContainerForFixedLengthVector(DAG
, ViaVecVT
, Subtarget
);
3712 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ViaContainerVT
,
3713 DAG
.getUNDEF(ViaContainerVT
),
3714 DAG
.getConstant(SplatValue
, DL
, XLenVT
), ViaVL
);
3715 Splat
= convertFromScalableVector(ViaVecVT
, Splat
, DAG
, Subtarget
);
3716 if (ViaVecLen
!= RequiredVL
)
3717 Splat
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
,
3718 MVT::getVectorVT(ViaIntVT
, RequiredVL
), Splat
,
3719 DAG
.getConstant(0, DL
, XLenVT
));
3720 return DAG
.getBitcast(VT
, Splat
);
3724 // If the number of signbits allows, see if we can lower as a <N x i8>.
3725 // Our main goal here is to reduce LMUL (and thus work) required to
3726 // build the constant, but we will also narrow if the resulting
3727 // narrow vector is known to materialize cheaply.
3728 // TODO: We really should be costing the smaller vector. There are
3729 // profitable cases this misses.
3730 if (EltBitSize
> 8 && VT
.isInteger() &&
3731 (NumElts
<= 4 || VT
.getSizeInBits() > Subtarget
.getRealMinVLen())) {
3732 unsigned SignBits
= DAG
.ComputeNumSignBits(Op
);
3733 if (EltBitSize
- SignBits
< 8) {
3734 SDValue Source
= DAG
.getBuildVector(VT
.changeVectorElementType(MVT::i8
),
3736 Source
= convertToScalableVector(ContainerVT
.changeVectorElementType(MVT::i8
),
3737 Source
, DAG
, Subtarget
);
3738 SDValue Res
= DAG
.getNode(RISCVISD::VSEXT_VL
, DL
, ContainerVT
, Source
, Mask
, VL
);
3739 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
3743 if (SDValue Res
= lowerBuildVectorViaDominantValues(Op
, DAG
, Subtarget
))
3746 // For constant vectors, use generic constant pool lowering. Otherwise,
3747 // we'd have to materialize constants in GPRs just to move them into the
3752 static SDValue
lowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
,
3753 const RISCVSubtarget
&Subtarget
) {
3754 MVT VT
= Op
.getSimpleValueType();
3755 assert(VT
.isFixedLengthVector() && "Unexpected vector!");
3757 if (ISD::isBuildVectorOfConstantSDNodes(Op
.getNode()) ||
3758 ISD::isBuildVectorOfConstantFPSDNodes(Op
.getNode()))
3759 return lowerBuildVectorOfConstants(Op
, DAG
, Subtarget
);
3761 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3764 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
3766 MVT XLenVT
= Subtarget
.getXLenVT();
3768 if (VT
.getVectorElementType() == MVT::i1
) {
3769 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3770 // vector type, we have a legal equivalently-sized i8 type, so we can use
3772 MVT WideVecVT
= VT
.changeVectorElementType(MVT::i8
);
3773 SDValue VecZero
= DAG
.getConstant(0, DL
, WideVecVT
);
3776 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3777 // For a splat, perform a scalar truncate before creating the wider
3779 Splat
= DAG
.getNode(ISD::AND
, DL
, Splat
.getValueType(), Splat
,
3780 DAG
.getConstant(1, DL
, Splat
.getValueType()));
3781 WideVec
= DAG
.getSplatBuildVector(WideVecVT
, DL
, Splat
);
3783 SmallVector
<SDValue
, 8> Ops(Op
->op_values());
3784 WideVec
= DAG
.getBuildVector(WideVecVT
, DL
, Ops
);
3785 SDValue VecOne
= DAG
.getConstant(1, DL
, WideVecVT
);
3786 WideVec
= DAG
.getNode(ISD::AND
, DL
, WideVecVT
, WideVec
, VecOne
);
3789 return DAG
.getSetCC(DL
, VT
, WideVec
, VecZero
, ISD::SETNE
);
3792 if (SDValue Splat
= cast
<BuildVectorSDNode
>(Op
)->getSplatValue()) {
3793 if (auto Gather
= matchSplatAsGather(Splat
, VT
, DL
, DAG
, Subtarget
))
3795 unsigned Opc
= VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3796 : RISCVISD::VMV_V_X_VL
;
3797 if (!VT
.isFloatingPoint())
3798 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Splat
);
3800 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Splat
, VL
);
3801 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
3804 if (SDValue Res
= lowerBuildVectorViaDominantValues(Op
, DAG
, Subtarget
))
3807 // If we're compiling for an exact VLEN value, we can split our work per
3808 // register in the register group.
3809 const unsigned MinVLen
= Subtarget
.getRealMinVLen();
3810 const unsigned MaxVLen
= Subtarget
.getRealMaxVLen();
3811 if (MinVLen
== MaxVLen
&& VT
.getSizeInBits().getKnownMinValue() > MinVLen
) {
3812 MVT ElemVT
= VT
.getVectorElementType();
3813 unsigned ElemsPerVReg
= MinVLen
/ ElemVT
.getFixedSizeInBits();
3814 EVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
3815 MVT OneRegVT
= MVT::getVectorVT(ElemVT
, ElemsPerVReg
);
3816 MVT M1VT
= getContainerForFixedLengthVector(DAG
, OneRegVT
, Subtarget
);
3817 assert(M1VT
== getLMUL1VT(M1VT
));
3819 // The following semantically builds up a fixed length concat_vector
3820 // of the component build_vectors. We eagerly lower to scalable and
3821 // insert_subvector here to avoid DAG combining it back to a large
3823 SmallVector
<SDValue
> BuildVectorOps(Op
->op_begin(), Op
->op_end());
3824 unsigned NumOpElts
= M1VT
.getVectorMinNumElements();
3825 SDValue Vec
= DAG
.getUNDEF(ContainerVT
);
3826 for (unsigned i
= 0; i
< VT
.getVectorNumElements(); i
+= ElemsPerVReg
) {
3827 auto OneVRegOfOps
= ArrayRef(BuildVectorOps
).slice(i
, ElemsPerVReg
);
3829 DAG
.getNode(ISD::BUILD_VECTOR
, DL
, OneRegVT
, OneVRegOfOps
);
3830 SubBV
= convertToScalableVector(M1VT
, SubBV
, DAG
, Subtarget
);
3831 unsigned InsertIdx
= (i
/ ElemsPerVReg
) * NumOpElts
;
3832 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
, Vec
, SubBV
,
3833 DAG
.getVectorIdxConstant(InsertIdx
, DL
));
3835 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
3838 // Cap the cost at a value linear to the number of elements in the vector.
3839 // The default lowering is to use the stack. The vector store + scalar loads
3840 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3841 // being (at least) linear in LMUL. As a result, using the vslidedown
3842 // lowering for every element ends up being VL*LMUL..
3843 // TODO: Should we be directly costing the stack alternative? Doing so might
3844 // give us a more accurate upper bound.
3845 InstructionCost LinearBudget
= VT
.getVectorNumElements() * 2;
3847 // TODO: unify with TTI getSlideCost.
3848 InstructionCost PerSlideCost
= 1;
3849 switch (RISCVTargetLowering::getLMUL(ContainerVT
)) {
3851 case RISCVII::VLMUL::LMUL_2
:
3854 case RISCVII::VLMUL::LMUL_4
:
3857 case RISCVII::VLMUL::LMUL_8
:
3862 // TODO: Should we be using the build instseq then cost + evaluate scheme
3863 // we use for integer constants here?
3864 unsigned UndefCount
= 0;
3865 for (const SDValue
&V
: Op
->ops()) {
3871 LinearBudget
-= PerSlideCost
;
3874 LinearBudget
-= PerSlideCost
;
3877 LinearBudget
-= PerSlideCost
;
3880 if (LinearBudget
< 0)
3883 assert((!VT
.isFloatingPoint() ||
3884 VT
.getVectorElementType().getSizeInBits() <= Subtarget
.getFLen()) &&
3885 "Illegal type which will result in reserved encoding");
3887 const unsigned Policy
= RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
;
3891 for (SDValue V
: Op
->ops()) {
3897 // Start our sequence with a TA splat in the hopes that hardware is able to
3898 // recognize there's no dependency on the prior value of our temporary
3901 Vec
= DAG
.getSplatVector(VT
, DL
, V
);
3902 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
3908 const SDValue Offset
= DAG
.getConstant(UndefCount
, DL
, Subtarget
.getXLenVT());
3909 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
3910 Vec
, Offset
, Mask
, VL
, Policy
);
3914 VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
;
3915 if (!VT
.isFloatingPoint())
3916 V
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), V
);
3917 Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), Vec
,
3921 const SDValue Offset
= DAG
.getConstant(UndefCount
, DL
, Subtarget
.getXLenVT());
3922 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
3923 Vec
, Offset
, Mask
, VL
, Policy
);
3925 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
3928 static SDValue
splatPartsI64WithVL(const SDLoc
&DL
, MVT VT
, SDValue Passthru
,
3929 SDValue Lo
, SDValue Hi
, SDValue VL
,
3930 SelectionDAG
&DAG
) {
3932 Passthru
= DAG
.getUNDEF(VT
);
3933 if (isa
<ConstantSDNode
>(Lo
) && isa
<ConstantSDNode
>(Hi
)) {
3934 int32_t LoC
= cast
<ConstantSDNode
>(Lo
)->getSExtValue();
3935 int32_t HiC
= cast
<ConstantSDNode
>(Hi
)->getSExtValue();
3936 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3937 // node in order to try and match RVV vector/scalar instructions.
3938 if ((LoC
>> 31) == HiC
)
3939 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3941 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3942 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3943 // vlmax vsetvli or vsetivli to change the VL.
3944 // FIXME: Support larger constants?
3945 // FIXME: Support non-constant VLs by saturating?
3948 if (isAllOnesConstant(VL
) ||
3949 (isa
<RegisterSDNode
>(VL
) &&
3950 cast
<RegisterSDNode
>(VL
)->getReg() == RISCV::X0
))
3951 NewVL
= DAG
.getRegister(RISCV::X0
, MVT::i32
);
3952 else if (isa
<ConstantSDNode
>(VL
) && isUInt
<4>(VL
->getAsZExtVal()))
3953 NewVL
= DAG
.getNode(ISD::ADD
, DL
, VL
.getValueType(), VL
, VL
);
3957 MVT::getVectorVT(MVT::i32
, VT
.getVectorElementCount() * 2);
3958 auto InterVec
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, InterVT
,
3959 DAG
.getUNDEF(InterVT
), Lo
,
3960 DAG
.getRegister(RISCV::X0
, MVT::i32
));
3961 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, InterVec
);
3966 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3967 if (Hi
.getOpcode() == ISD::SRA
&& Hi
.getOperand(0) == Lo
&&
3968 isa
<ConstantSDNode
>(Hi
.getOperand(1)) &&
3969 Hi
.getConstantOperandVal(1) == 31)
3970 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3972 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3973 // even if it might be sign extended.
3975 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Lo
, VL
);
3977 // Fall back to a stack store and stride x0 vector load.
3978 return DAG
.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
, DL
, VT
, Passthru
, Lo
,
3982 // Called by type legalization to handle splat of i64 on RV32.
3983 // FIXME: We can optimize this when the type has sign or zero bits in one
3985 static SDValue
splatSplitI64WithVL(const SDLoc
&DL
, MVT VT
, SDValue Passthru
,
3986 SDValue Scalar
, SDValue VL
,
3987 SelectionDAG
&DAG
) {
3988 assert(Scalar
.getValueType() == MVT::i64
&& "Unexpected VT!");
3990 std::tie(Lo
, Hi
) = DAG
.SplitScalar(Scalar
, DL
, MVT::i32
, MVT::i32
);
3991 return splatPartsI64WithVL(DL
, VT
, Passthru
, Lo
, Hi
, VL
, DAG
);
3994 // This function lowers a splat of a scalar operand Splat with the vector
3995 // length VL. It ensures the final sequence is type legal, which is useful when
3996 // lowering a splat after type legalization.
3997 static SDValue
lowerScalarSplat(SDValue Passthru
, SDValue Scalar
, SDValue VL
,
3998 MVT VT
, const SDLoc
&DL
, SelectionDAG
&DAG
,
3999 const RISCVSubtarget
&Subtarget
) {
4000 bool HasPassthru
= Passthru
&& !Passthru
.isUndef();
4001 if (!HasPassthru
&& !Passthru
)
4002 Passthru
= DAG
.getUNDEF(VT
);
4003 if (VT
.isFloatingPoint())
4004 return DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
4006 MVT XLenVT
= Subtarget
.getXLenVT();
4008 // Simplest case is that the operand needs to be promoted to XLenVT.
4009 if (Scalar
.getValueType().bitsLE(XLenVT
)) {
4010 // If the operand is a constant, sign extend to increase our chances
4011 // of being able to use a .vi instruction. ANY_EXTEND would become a
4012 // a zero extend and the simm5 check in isel would fail.
4013 // FIXME: Should we ignore the upper bits in isel instead?
4015 isa
<ConstantSDNode
>(Scalar
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
4016 Scalar
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, Scalar
);
4017 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
4020 assert(XLenVT
== MVT::i32
&& Scalar
.getValueType() == MVT::i64
&&
4021 "Unexpected scalar for splat lowering!");
4023 if (isOneConstant(VL
) && isNullConstant(Scalar
))
4024 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
, Passthru
,
4025 DAG
.getConstant(0, DL
, XLenVT
), VL
);
4027 // Otherwise use the more complicated splatting algorithm.
4028 return splatSplitI64WithVL(DL
, VT
, Passthru
, Scalar
, VL
, DAG
);
4031 // This function lowers an insert of a scalar operand Scalar into lane
4032 // 0 of the vector regardless of the value of VL. The contents of the
4033 // remaining lanes of the result vector are unspecified. VL is assumed
4035 static SDValue
lowerScalarInsert(SDValue Scalar
, SDValue VL
, MVT VT
,
4036 const SDLoc
&DL
, SelectionDAG
&DAG
,
4037 const RISCVSubtarget
&Subtarget
) {
4038 assert(VT
.isScalableVector() && "Expect VT is scalable vector type.");
4040 const MVT XLenVT
= Subtarget
.getXLenVT();
4041 SDValue Passthru
= DAG
.getUNDEF(VT
);
4043 if (Scalar
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
4044 isNullConstant(Scalar
.getOperand(1))) {
4045 SDValue ExtractedVal
= Scalar
.getOperand(0);
4046 // The element types must be the same.
4047 if (ExtractedVal
.getValueType().getVectorElementType() ==
4048 VT
.getVectorElementType()) {
4049 MVT ExtractedVT
= ExtractedVal
.getSimpleValueType();
4050 MVT ExtractedContainerVT
= ExtractedVT
;
4051 if (ExtractedContainerVT
.isFixedLengthVector()) {
4052 ExtractedContainerVT
= getContainerForFixedLengthVector(
4053 DAG
, ExtractedContainerVT
, Subtarget
);
4054 ExtractedVal
= convertToScalableVector(ExtractedContainerVT
,
4055 ExtractedVal
, DAG
, Subtarget
);
4057 if (ExtractedContainerVT
.bitsLE(VT
))
4058 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Passthru
,
4059 ExtractedVal
, DAG
.getConstant(0, DL
, XLenVT
));
4060 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, ExtractedVal
,
4061 DAG
.getConstant(0, DL
, XLenVT
));
4066 if (VT
.isFloatingPoint())
4067 return DAG
.getNode(RISCVISD::VFMV_S_F_VL
, DL
, VT
,
4068 DAG
.getUNDEF(VT
), Scalar
, VL
);
4070 // Avoid the tricky legalization cases by falling back to using the
4071 // splat code which already handles it gracefully.
4072 if (!Scalar
.getValueType().bitsLE(XLenVT
))
4073 return lowerScalarSplat(DAG
.getUNDEF(VT
), Scalar
,
4074 DAG
.getConstant(1, DL
, XLenVT
),
4075 VT
, DL
, DAG
, Subtarget
);
4077 // If the operand is a constant, sign extend to increase our chances
4078 // of being able to use a .vi instruction. ANY_EXTEND would become a
4079 // a zero extend and the simm5 check in isel would fail.
4080 // FIXME: Should we ignore the upper bits in isel instead?
4082 isa
<ConstantSDNode
>(Scalar
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
4083 Scalar
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, Scalar
);
4084 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
,
4085 DAG
.getUNDEF(VT
), Scalar
, VL
);
4088 // Is this a shuffle extracts either the even or odd elements of a vector?
4089 // That is, specifically, either (a) or (b) below.
4090 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
4091 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
4092 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4093 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4094 // Returns {Src Vector, Even Elements} om success
4095 static bool isDeinterleaveShuffle(MVT VT
, MVT ContainerVT
, SDValue V1
,
4096 SDValue V2
, ArrayRef
<int> Mask
,
4097 const RISCVSubtarget
&Subtarget
) {
4098 // Need to be able to widen the vector.
4099 if (VT
.getScalarSizeInBits() >= Subtarget
.getELen())
4102 // Both input must be extracts.
4103 if (V1
.getOpcode() != ISD::EXTRACT_SUBVECTOR
||
4104 V2
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
4107 // Extracting from the same source.
4108 SDValue Src
= V1
.getOperand(0);
4109 if (Src
!= V2
.getOperand(0))
4112 // Src needs to have twice the number of elements.
4113 if (Src
.getValueType().getVectorNumElements() != (Mask
.size() * 2))
4116 // The extracts must extract the two halves of the source.
4117 if (V1
.getConstantOperandVal(1) != 0 ||
4118 V2
.getConstantOperandVal(1) != Mask
.size())
4121 // First index must be the first even or odd element from V1.
4122 if (Mask
[0] != 0 && Mask
[0] != 1)
4125 // The others must increase by 2 each time.
4126 // TODO: Support undef elements?
4127 for (unsigned i
= 1; i
!= Mask
.size(); ++i
)
4128 if (Mask
[i
] != Mask
[i
- 1] + 2)
4134 /// Is this shuffle interleaving contiguous elements from one vector into the
4135 /// even elements and contiguous elements from another vector into the odd
4136 /// elements. \p EvenSrc will contain the element that should be in the first
4137 /// even element. \p OddSrc will contain the element that should be in the first
4138 /// odd element. These can be the first element in a source or the element half
4139 /// way through the source.
4140 static bool isInterleaveShuffle(ArrayRef
<int> Mask
, MVT VT
, int &EvenSrc
,
4141 int &OddSrc
, const RISCVSubtarget
&Subtarget
) {
4142 // We need to be able to widen elements to the next larger integer type.
4143 if (VT
.getScalarSizeInBits() >= Subtarget
.getELen())
4146 int Size
= Mask
.size();
4147 int NumElts
= VT
.getVectorNumElements();
4148 assert(Size
== (int)NumElts
&& "Unexpected mask size");
4150 SmallVector
<unsigned, 2> StartIndexes
;
4151 if (!ShuffleVectorInst::isInterleaveMask(Mask
, 2, Size
* 2, StartIndexes
))
4154 EvenSrc
= StartIndexes
[0];
4155 OddSrc
= StartIndexes
[1];
4157 // One source should be low half of first vector.
4158 if (EvenSrc
!= 0 && OddSrc
!= 0)
4161 // Subvectors will be subtracted from either at the start of the two input
4162 // vectors, or at the start and middle of the first vector if it's an unary
4164 // In both cases, HalfNumElts will be extracted.
4165 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4166 // we'll create an illegal extract_subvector.
4167 // FIXME: We could support other values using a slidedown first.
4168 int HalfNumElts
= NumElts
/ 2;
4169 return ((EvenSrc
% HalfNumElts
) == 0) && ((OddSrc
% HalfNumElts
) == 0);
4172 /// Match shuffles that concatenate two vectors, rotate the concatenation,
4173 /// and then extract the original number of elements from the rotated result.
4174 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4175 /// returned rotation amount is for a rotate right, where elements move from
4176 /// higher elements to lower elements. \p LoSrc indicates the first source
4177 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4178 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4179 /// 0 or 1 if a rotation is found.
4181 /// NOTE: We talk about rotate to the right which matches how bit shift and
4182 /// rotate instructions are described where LSBs are on the right, but LLVM IR
4183 /// and the table below write vectors with the lowest elements on the left.
4184 static int isElementRotate(int &LoSrc
, int &HiSrc
, ArrayRef
<int> Mask
) {
4185 int Size
= Mask
.size();
4187 // We need to detect various ways of spelling a rotation:
4188 // [11, 12, 13, 14, 15, 0, 1, 2]
4189 // [-1, 12, 13, 14, -1, -1, 1, -1]
4190 // [-1, -1, -1, -1, -1, -1, 1, 2]
4191 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4192 // [-1, 4, 5, 6, -1, -1, 9, -1]
4193 // [-1, 4, 5, 6, -1, -1, -1, -1]
4197 for (int i
= 0; i
!= Size
; ++i
) {
4202 // Determine where a rotate vector would have started.
4203 int StartIdx
= i
- (M
% Size
);
4204 // The identity rotation isn't interesting, stop.
4208 // If we found the tail of a vector the rotation must be the missing
4209 // front. If we found the head of a vector, it must be how much of the
4211 int CandidateRotation
= StartIdx
< 0 ? -StartIdx
: Size
- StartIdx
;
4214 Rotation
= CandidateRotation
;
4215 else if (Rotation
!= CandidateRotation
)
4216 // The rotations don't match, so we can't match this mask.
4219 // Compute which value this mask is pointing at.
4220 int MaskSrc
= M
< Size
? 0 : 1;
4222 // Compute which of the two target values this index should be assigned to.
4223 // This reflects whether the high elements are remaining or the low elemnts
4225 int &TargetSrc
= StartIdx
< 0 ? HiSrc
: LoSrc
;
4227 // Either set up this value if we've not encountered it before, or check
4228 // that it remains consistent.
4230 TargetSrc
= MaskSrc
;
4231 else if (TargetSrc
!= MaskSrc
)
4232 // This may be a rotation, but it pulls from the inputs in some
4233 // unsupported interleaving.
4237 // Check that we successfully analyzed the mask, and normalize the results.
4238 assert(Rotation
!= 0 && "Failed to locate a viable rotation!");
4239 assert((LoSrc
>= 0 || HiSrc
>= 0) &&
4240 "Failed to find a rotated input vector!");
4245 // Lower a deinterleave shuffle to vnsrl.
4246 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4247 // -> [p, q, r, s] (EvenElts == false)
4248 // VT is the type of the vector to return, <[vscale x ]n x ty>
4249 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4250 static SDValue
getDeinterleaveViaVNSRL(const SDLoc
&DL
, MVT VT
, SDValue Src
,
4252 const RISCVSubtarget
&Subtarget
,
4253 SelectionDAG
&DAG
) {
4254 // The result is a vector of type <m x n x ty>
4255 MVT ContainerVT
= VT
;
4256 // Convert fixed vectors to scalable if needed
4257 if (ContainerVT
.isFixedLengthVector()) {
4258 assert(Src
.getSimpleValueType().isFixedLengthVector());
4259 ContainerVT
= getContainerForFixedLengthVector(DAG
, ContainerVT
, Subtarget
);
4261 // The source is a vector of type <m x n*2 x ty>
4262 MVT SrcContainerVT
=
4263 MVT::getVectorVT(ContainerVT
.getVectorElementType(),
4264 ContainerVT
.getVectorElementCount() * 2);
4265 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
4268 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4270 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4271 // This also converts FP to int.
4272 unsigned EltBits
= ContainerVT
.getScalarSizeInBits();
4273 MVT WideSrcContainerVT
= MVT::getVectorVT(
4274 MVT::getIntegerVT(EltBits
* 2), ContainerVT
.getVectorElementCount());
4275 Src
= DAG
.getBitcast(WideSrcContainerVT
, Src
);
4277 // The integer version of the container type.
4278 MVT IntContainerVT
= ContainerVT
.changeVectorElementTypeToInteger();
4280 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4281 // the original element size.
4282 unsigned Shift
= EvenElts
? 0 : EltBits
;
4283 SDValue SplatShift
= DAG
.getNode(
4284 RISCVISD::VMV_V_X_VL
, DL
, IntContainerVT
, DAG
.getUNDEF(ContainerVT
),
4285 DAG
.getConstant(Shift
, DL
, Subtarget
.getXLenVT()), VL
);
4287 DAG
.getNode(RISCVISD::VNSRL_VL
, DL
, IntContainerVT
, Src
, SplatShift
,
4288 DAG
.getUNDEF(IntContainerVT
), TrueMask
, VL
);
4289 // Cast back to FP if needed.
4290 Res
= DAG
.getBitcast(ContainerVT
, Res
);
4292 if (VT
.isFixedLengthVector())
4293 Res
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4297 // Lower the following shuffle to vslidedown.
4299 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
4300 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
4301 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4303 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
4304 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
4305 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
4306 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
4307 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
4308 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4309 static SDValue
lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc
&DL
, MVT VT
,
4310 SDValue V1
, SDValue V2
,
4312 const RISCVSubtarget
&Subtarget
,
4313 SelectionDAG
&DAG
) {
4314 auto findNonEXTRACT_SUBVECTORParent
=
4315 [](SDValue Parent
) -> std::pair
<SDValue
, uint64_t> {
4316 uint64_t Offset
= 0;
4317 while (Parent
.getOpcode() == ISD::EXTRACT_SUBVECTOR
&&
4318 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4319 // a scalable vector. But we don't want to match the case.
4320 Parent
.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4321 Offset
+= Parent
.getConstantOperandVal(1);
4322 Parent
= Parent
.getOperand(0);
4324 return std::make_pair(Parent
, Offset
);
4327 auto [V1Src
, V1IndexOffset
] = findNonEXTRACT_SUBVECTORParent(V1
);
4328 auto [V2Src
, V2IndexOffset
] = findNonEXTRACT_SUBVECTORParent(V2
);
4330 // Extracting from the same source.
4331 SDValue Src
= V1Src
;
4335 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4336 SmallVector
<int, 16> NewMask(Mask
);
4337 for (size_t i
= 0; i
!= NewMask
.size(); ++i
) {
4338 if (NewMask
[i
] == -1)
4341 if (static_cast<size_t>(NewMask
[i
]) < NewMask
.size()) {
4342 NewMask
[i
] = NewMask
[i
] + V1IndexOffset
;
4344 // Minus NewMask.size() is needed. Otherwise, the b case would be
4345 // <5,6,7,12> instead of <5,6,7,8>.
4346 NewMask
[i
] = NewMask
[i
] - NewMask
.size() + V2IndexOffset
;
4350 // First index must be known and non-zero. It will be used as the slidedown
4352 if (NewMask
[0] <= 0)
4355 // NewMask is also continuous.
4356 for (unsigned i
= 1; i
!= NewMask
.size(); ++i
)
4357 if (NewMask
[i
- 1] + 1 != NewMask
[i
])
4360 MVT XLenVT
= Subtarget
.getXLenVT();
4361 MVT SrcVT
= Src
.getSimpleValueType();
4362 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
4363 auto [TrueMask
, VL
] = getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
4365 getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
4366 convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
),
4367 DAG
.getConstant(NewMask
[0], DL
, XLenVT
), TrueMask
, VL
);
4369 ISD::EXTRACT_SUBVECTOR
, DL
, VT
,
4370 convertFromScalableVector(SrcVT
, Slidedown
, DAG
, Subtarget
),
4371 DAG
.getConstant(0, DL
, XLenVT
));
4374 // Because vslideup leaves the destination elements at the start intact, we can
4375 // use it to perform shuffles that insert subvectors:
4377 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4379 // vsetvli zero, 8, e8, mf2, ta, ma
4380 // vslideup.vi v8, v9, 4
4382 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4384 // vsetvli zero, 5, e8, mf2, tu, ma
4385 // vslideup.v1 v8, v9, 2
4386 static SDValue
lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc
&DL
, MVT VT
,
4387 SDValue V1
, SDValue V2
,
4389 const RISCVSubtarget
&Subtarget
,
4390 SelectionDAG
&DAG
) {
4391 unsigned NumElts
= VT
.getVectorNumElements();
4392 int NumSubElts
, Index
;
4393 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask
, NumElts
, NumSubElts
,
4397 bool OpsSwapped
= Mask
[Index
] < (int)NumElts
;
4398 SDValue InPlace
= OpsSwapped
? V2
: V1
;
4399 SDValue ToInsert
= OpsSwapped
? V1
: V2
;
4401 MVT XLenVT
= Subtarget
.getXLenVT();
4402 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4403 auto TrueMask
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
4404 // We slide up by the index that the subvector is being inserted at, and set
4405 // VL to the index + the number of elements being inserted.
4406 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
| RISCVII::MASK_AGNOSTIC
;
4407 // If the we're adding a suffix to the in place vector, i.e. inserting right
4408 // up to the very end of it, then we don't actually care about the tail.
4409 if (NumSubElts
+ Index
>= (int)NumElts
)
4410 Policy
|= RISCVII::TAIL_AGNOSTIC
;
4412 InPlace
= convertToScalableVector(ContainerVT
, InPlace
, DAG
, Subtarget
);
4413 ToInsert
= convertToScalableVector(ContainerVT
, ToInsert
, DAG
, Subtarget
);
4414 SDValue VL
= DAG
.getConstant(NumSubElts
+ Index
, DL
, XLenVT
);
4417 // If we're inserting into the lowest elements, use a tail undisturbed
4420 Res
= DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, ContainerVT
, InPlace
, ToInsert
,
4423 Res
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, InPlace
, ToInsert
,
4424 DAG
.getConstant(Index
, DL
, XLenVT
), TrueMask
, VL
, Policy
);
4425 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4428 /// Match v(f)slide1up/down idioms. These operations involve sliding
4429 /// N-1 elements to make room for an inserted scalar at one end.
4430 static SDValue
lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc
&DL
, MVT VT
,
4431 SDValue V1
, SDValue V2
,
4433 const RISCVSubtarget
&Subtarget
,
4434 SelectionDAG
&DAG
) {
4435 bool OpsSwapped
= false;
4436 if (!isa
<BuildVectorSDNode
>(V1
)) {
4437 if (!isa
<BuildVectorSDNode
>(V2
))
4442 SDValue Splat
= cast
<BuildVectorSDNode
>(V1
)->getSplatValue();
4446 // Return true if the mask could describe a slide of Mask.size() - 1
4447 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4448 auto isSlideMask
= [](ArrayRef
<int> Mask
, unsigned Base
, int Offset
) {
4449 const unsigned S
= (Offset
> 0) ? 0 : -Offset
;
4450 const unsigned E
= Mask
.size() - ((Offset
> 0) ? Offset
: 0);
4451 for (unsigned i
= S
; i
!= E
; ++i
)
4452 if (Mask
[i
] >= 0 && (unsigned)Mask
[i
] != Base
+ i
+ Offset
)
4457 const unsigned NumElts
= VT
.getVectorNumElements();
4458 bool IsVSlidedown
= isSlideMask(Mask
, OpsSwapped
? 0 : NumElts
, 1);
4459 if (!IsVSlidedown
&& !isSlideMask(Mask
, OpsSwapped
? 0 : NumElts
, -1))
4462 const int InsertIdx
= Mask
[IsVSlidedown
? (NumElts
- 1) : 0];
4463 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4464 if (InsertIdx
< 0 || InsertIdx
/ NumElts
!= (unsigned)OpsSwapped
)
4467 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4468 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4469 auto OpCode
= IsVSlidedown
?
4470 (VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL
: RISCVISD::VSLIDE1DOWN_VL
) :
4471 (VT
.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
: RISCVISD::VSLIDE1UP_VL
);
4472 if (!VT
.isFloatingPoint())
4473 Splat
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), Splat
);
4474 auto Vec
= DAG
.getNode(OpCode
, DL
, ContainerVT
,
4475 DAG
.getUNDEF(ContainerVT
),
4476 convertToScalableVector(ContainerVT
, V2
, DAG
, Subtarget
),
4477 Splat
, TrueMask
, VL
);
4478 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
4481 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4482 // to create an interleaved vector of <[vscale x] n*2 x ty>.
4483 // This requires that the size of ty is less than the subtarget's maximum ELEN.
4484 static SDValue
getWideningInterleave(SDValue EvenV
, SDValue OddV
,
4485 const SDLoc
&DL
, SelectionDAG
&DAG
,
4486 const RISCVSubtarget
&Subtarget
) {
4487 MVT VecVT
= EvenV
.getSimpleValueType();
4488 MVT VecContainerVT
= VecVT
; // <vscale x n x ty>
4489 // Convert fixed vectors to scalable if needed
4490 if (VecContainerVT
.isFixedLengthVector()) {
4491 VecContainerVT
= getContainerForFixedLengthVector(DAG
, VecVT
, Subtarget
);
4492 EvenV
= convertToScalableVector(VecContainerVT
, EvenV
, DAG
, Subtarget
);
4493 OddV
= convertToScalableVector(VecContainerVT
, OddV
, DAG
, Subtarget
);
4496 assert(VecVT
.getScalarSizeInBits() < Subtarget
.getELen());
4498 // We're working with a vector of the same size as the resulting
4499 // interleaved vector, but with half the number of elements and
4500 // twice the SEW (Hence the restriction on not using the maximum
4503 MVT::getVectorVT(MVT::getIntegerVT(VecVT
.getScalarSizeInBits() * 2),
4504 VecVT
.getVectorElementCount());
4505 MVT WideContainerVT
= WideVT
; // <vscale x n x ty*2>
4506 if (WideContainerVT
.isFixedLengthVector())
4507 WideContainerVT
= getContainerForFixedLengthVector(DAG
, WideVT
, Subtarget
);
4509 // Bitcast the input vectors to integers in case they are FP
4510 VecContainerVT
= VecContainerVT
.changeTypeToInteger();
4511 EvenV
= DAG
.getBitcast(VecContainerVT
, EvenV
);
4512 OddV
= DAG
.getBitcast(VecContainerVT
, OddV
);
4514 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, VecContainerVT
, DL
, DAG
, Subtarget
);
4515 SDValue Passthru
= DAG
.getUNDEF(WideContainerVT
);
4517 SDValue Interleaved
;
4518 if (Subtarget
.hasStdExtZvbb()) {
4519 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4521 DAG
.getSplatVector(VecContainerVT
, DL
,
4522 DAG
.getConstant(VecVT
.getScalarSizeInBits(), DL
,
4523 Subtarget
.getXLenVT()));
4524 Interleaved
= DAG
.getNode(RISCVISD::VWSLL_VL
, DL
, WideContainerVT
, OddV
,
4525 OffsetVec
, Passthru
, Mask
, VL
);
4526 Interleaved
= DAG
.getNode(RISCVISD::VWADDU_W_VL
, DL
, WideContainerVT
,
4527 Interleaved
, EvenV
, Passthru
, Mask
, VL
);
4529 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4531 Interleaved
= DAG
.getNode(RISCVISD::VWADDU_VL
, DL
, WideContainerVT
, EvenV
,
4532 OddV
, Passthru
, Mask
, VL
);
4534 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4535 SDValue AllOnesVec
= DAG
.getSplatVector(
4536 VecContainerVT
, DL
, DAG
.getAllOnesConstant(DL
, Subtarget
.getXLenVT()));
4537 SDValue OddsMul
= DAG
.getNode(RISCVISD::VWMULU_VL
, DL
, WideContainerVT
,
4538 OddV
, AllOnesVec
, Passthru
, Mask
, VL
);
4540 // Add the two together so we get
4541 // (OddV * 0xff...ff) + (OddV + EvenV)
4542 // = (OddV * 0x100...00) + EvenV
4543 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4544 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4545 Interleaved
= DAG
.getNode(RISCVISD::ADD_VL
, DL
, WideContainerVT
,
4546 Interleaved
, OddsMul
, Passthru
, Mask
, VL
);
4549 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4550 MVT ResultContainerVT
= MVT::getVectorVT(
4551 VecVT
.getVectorElementType(), // Make sure to use original type
4552 VecContainerVT
.getVectorElementCount().multiplyCoefficientBy(2));
4553 Interleaved
= DAG
.getBitcast(ResultContainerVT
, Interleaved
);
4555 // Convert back to a fixed vector if needed
4557 MVT::getVectorVT(VecVT
.getVectorElementType(),
4558 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
4559 if (ResultVT
.isFixedLengthVector())
4561 convertFromScalableVector(ResultVT
, Interleaved
, DAG
, Subtarget
);
4566 // If we have a vector of bits that we want to reverse, we can use a vbrev on a
4567 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4568 static SDValue
lowerBitreverseShuffle(ShuffleVectorSDNode
*SVN
,
4570 const RISCVSubtarget
&Subtarget
) {
4572 MVT VT
= SVN
->getSimpleValueType(0);
4573 SDValue V
= SVN
->getOperand(0);
4574 unsigned NumElts
= VT
.getVectorNumElements();
4576 assert(VT
.getVectorElementType() == MVT::i1
);
4578 if (!ShuffleVectorInst::isReverseMask(SVN
->getMask(),
4579 SVN
->getMask().size()) ||
4580 !SVN
->getOperand(1).isUndef())
4583 unsigned ViaEltSize
= std::max((uint64_t)8, PowerOf2Ceil(NumElts
));
4584 EVT ViaVT
= EVT::getVectorVT(
4585 *DAG
.getContext(), EVT::getIntegerVT(*DAG
.getContext(), ViaEltSize
), 1);
4587 EVT::getVectorVT(*DAG
.getContext(), MVT::i1
, ViaVT
.getScalarSizeInBits());
4589 // If we don't have zvbb or the larger element type > ELEN, the operation will
4591 if (!Subtarget
.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE
,
4593 !Subtarget
.getTargetLowering()->isTypeLegal(ViaBitVT
))
4596 // If the bit vector doesn't fit exactly into the larger element type, we need
4597 // to insert it into the larger vector and then shift up the reversed bits
4598 // afterwards to get rid of the gap introduced.
4599 if (ViaEltSize
> NumElts
)
4600 V
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ViaBitVT
, DAG
.getUNDEF(ViaBitVT
),
4601 V
, DAG
.getVectorIdxConstant(0, DL
));
4604 DAG
.getNode(ISD::BITREVERSE
, DL
, ViaVT
, DAG
.getBitcast(ViaVT
, V
));
4606 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4608 if (ViaEltSize
> NumElts
)
4609 Res
= DAG
.getNode(ISD::SRL
, DL
, ViaVT
, Res
,
4610 DAG
.getConstant(ViaEltSize
- NumElts
, DL
, ViaVT
));
4612 Res
= DAG
.getBitcast(ViaBitVT
, Res
);
4614 if (ViaEltSize
> NumElts
)
4615 Res
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Res
,
4616 DAG
.getVectorIdxConstant(0, DL
));
4620 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4621 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4622 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4623 static SDValue
lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode
*SVN
,
4625 const RISCVSubtarget
&Subtarget
) {
4628 EVT VT
= SVN
->getValueType(0);
4629 unsigned NumElts
= VT
.getVectorNumElements();
4630 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
4631 unsigned NumSubElts
, RotateAmt
;
4632 if (!ShuffleVectorInst::isBitRotateMask(SVN
->getMask(), EltSizeInBits
, 2,
4633 NumElts
, NumSubElts
, RotateAmt
))
4635 MVT RotateVT
= MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits
* NumSubElts
),
4636 NumElts
/ NumSubElts
);
4638 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4639 if (!Subtarget
.getTargetLowering()->isTypeLegal(RotateVT
))
4642 SDValue Op
= DAG
.getBitcast(RotateVT
, SVN
->getOperand(0));
4645 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4646 // so canonicalize to vrev8.
4647 if (RotateVT
.getScalarType() == MVT::i16
&& RotateAmt
== 8)
4648 Rotate
= DAG
.getNode(ISD::BSWAP
, DL
, RotateVT
, Op
);
4650 Rotate
= DAG
.getNode(ISD::ROTL
, DL
, RotateVT
, Op
,
4651 DAG
.getConstant(RotateAmt
, DL
, RotateVT
));
4653 return DAG
.getBitcast(VT
, Rotate
);
4656 // If compiling with an exactly known VLEN, see if we can split a
4657 // shuffle on m2 or larger into a small number of m1 sized shuffles
4658 // which write each destination registers exactly once.
4659 static SDValue
lowerShuffleViaVRegSplitting(ShuffleVectorSDNode
*SVN
,
4661 const RISCVSubtarget
&Subtarget
) {
4663 MVT VT
= SVN
->getSimpleValueType(0);
4664 SDValue V1
= SVN
->getOperand(0);
4665 SDValue V2
= SVN
->getOperand(1);
4666 ArrayRef
<int> Mask
= SVN
->getMask();
4667 unsigned NumElts
= VT
.getVectorNumElements();
4669 // If we don't know exact data layout, not much we can do. If this
4670 // is already m1 or smaller, no point in splitting further.
4671 const unsigned MinVLen
= Subtarget
.getRealMinVLen();
4672 const unsigned MaxVLen
= Subtarget
.getRealMaxVLen();
4673 if (MinVLen
!= MaxVLen
|| VT
.getSizeInBits().getFixedValue() <= MinVLen
)
4676 MVT ElemVT
= VT
.getVectorElementType();
4677 unsigned ElemsPerVReg
= MinVLen
/ ElemVT
.getFixedSizeInBits();
4678 unsigned VRegsPerSrc
= NumElts
/ ElemsPerVReg
;
4680 SmallVector
<std::pair
<int, SmallVector
<int>>>
4681 OutMasks(VRegsPerSrc
, {-1, {}});
4683 // Check if our mask can be done as a 1-to-1 mapping from source
4684 // to destination registers in the group without needing to
4685 // write each destination more than once.
4686 for (unsigned DstIdx
= 0; DstIdx
< Mask
.size(); DstIdx
++) {
4687 int DstVecIdx
= DstIdx
/ ElemsPerVReg
;
4688 int DstSubIdx
= DstIdx
% ElemsPerVReg
;
4689 int SrcIdx
= Mask
[DstIdx
];
4690 if (SrcIdx
< 0 || (unsigned)SrcIdx
>= 2 * NumElts
)
4692 int SrcVecIdx
= SrcIdx
/ ElemsPerVReg
;
4693 int SrcSubIdx
= SrcIdx
% ElemsPerVReg
;
4694 if (OutMasks
[DstVecIdx
].first
== -1)
4695 OutMasks
[DstVecIdx
].first
= SrcVecIdx
;
4696 if (OutMasks
[DstVecIdx
].first
!= SrcVecIdx
)
4697 // Note: This case could easily be handled by keeping track of a chain
4698 // of source values and generating two element shuffles below. This is
4699 // less an implementation question, and more a profitability one.
4702 OutMasks
[DstVecIdx
].second
.resize(ElemsPerVReg
, -1);
4703 OutMasks
[DstVecIdx
].second
[DstSubIdx
] = SrcSubIdx
;
4706 EVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4707 MVT OneRegVT
= MVT::getVectorVT(ElemVT
, ElemsPerVReg
);
4708 MVT M1VT
= getContainerForFixedLengthVector(DAG
, OneRegVT
, Subtarget
);
4709 assert(M1VT
== getLMUL1VT(M1VT
));
4710 unsigned NumOpElts
= M1VT
.getVectorMinNumElements();
4711 SDValue Vec
= DAG
.getUNDEF(ContainerVT
);
4712 // The following semantically builds up a fixed length concat_vector
4713 // of the component shuffle_vectors. We eagerly lower to scalable here
4714 // to avoid DAG combining it back to a large shuffle_vector again.
4715 V1
= convertToScalableVector(ContainerVT
, V1
, DAG
, Subtarget
);
4716 V2
= convertToScalableVector(ContainerVT
, V2
, DAG
, Subtarget
);
4717 for (unsigned DstVecIdx
= 0 ; DstVecIdx
< OutMasks
.size(); DstVecIdx
++) {
4718 auto &[SrcVecIdx
, SrcSubMask
] = OutMasks
[DstVecIdx
];
4719 if (SrcVecIdx
== -1)
4721 unsigned ExtractIdx
= (SrcVecIdx
% VRegsPerSrc
) * NumOpElts
;
4722 SDValue SrcVec
= (unsigned)SrcVecIdx
>= VRegsPerSrc
? V2
: V1
;
4723 SDValue SubVec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, M1VT
, SrcVec
,
4724 DAG
.getVectorIdxConstant(ExtractIdx
, DL
));
4725 SubVec
= convertFromScalableVector(OneRegVT
, SubVec
, DAG
, Subtarget
);
4726 SubVec
= DAG
.getVectorShuffle(OneRegVT
, DL
, SubVec
, SubVec
, SrcSubMask
);
4727 SubVec
= convertToScalableVector(M1VT
, SubVec
, DAG
, Subtarget
);
4728 unsigned InsertIdx
= DstVecIdx
* NumOpElts
;
4729 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
, Vec
, SubVec
,
4730 DAG
.getVectorIdxConstant(InsertIdx
, DL
));
4732 return convertFromScalableVector(VT
, Vec
, DAG
, Subtarget
);
4735 static SDValue
lowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
,
4736 const RISCVSubtarget
&Subtarget
) {
4737 SDValue V1
= Op
.getOperand(0);
4738 SDValue V2
= Op
.getOperand(1);
4740 MVT XLenVT
= Subtarget
.getXLenVT();
4741 MVT VT
= Op
.getSimpleValueType();
4742 unsigned NumElts
= VT
.getVectorNumElements();
4743 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
4745 if (VT
.getVectorElementType() == MVT::i1
) {
4746 // Lower to a vror.vi of a larger element type if possible before we promote
4748 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4750 if (SDValue V
= lowerBitreverseShuffle(SVN
, DAG
, Subtarget
))
4753 // Promote i1 shuffle to i8 shuffle.
4754 MVT WidenVT
= MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount());
4755 V1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, V1
);
4756 V2
= V2
.isUndef() ? DAG
.getUNDEF(WidenVT
)
4757 : DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, V2
);
4758 SDValue Shuffled
= DAG
.getVectorShuffle(WidenVT
, DL
, V1
, V2
, SVN
->getMask());
4759 return DAG
.getSetCC(DL
, VT
, Shuffled
, DAG
.getConstant(0, DL
, WidenVT
),
4763 MVT ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
4765 auto [TrueMask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
4767 if (SVN
->isSplat()) {
4768 const int Lane
= SVN
->getSplatIndex();
4770 MVT SVT
= VT
.getVectorElementType();
4772 // Turn splatted vector load into a strided load with an X0 stride.
4774 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4776 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4778 if (V
.getOpcode() == ISD::CONCAT_VECTORS
) {
4780 V
.getOperand(0).getSimpleValueType().getVectorNumElements();
4781 V
= V
.getOperand(Offset
/ OpElements
);
4782 Offset
%= OpElements
;
4785 // We need to ensure the load isn't atomic or volatile.
4786 if (ISD::isNormalLoad(V
.getNode()) && cast
<LoadSDNode
>(V
)->isSimple()) {
4787 auto *Ld
= cast
<LoadSDNode
>(V
);
4788 Offset
*= SVT
.getStoreSize();
4789 SDValue NewAddr
= DAG
.getMemBasePlusOffset(
4790 Ld
->getBasePtr(), TypeSize::getFixed(Offset
), DL
);
4792 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4793 if (SVT
.isInteger() && SVT
.bitsGT(XLenVT
)) {
4794 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
4796 DAG
.getTargetConstant(Intrinsic::riscv_vlse
, DL
, XLenVT
);
4797 SDValue Ops
[] = {Ld
->getChain(),
4799 DAG
.getUNDEF(ContainerVT
),
4801 DAG
.getRegister(RISCV::X0
, XLenVT
),
4803 SDValue NewLoad
= DAG
.getMemIntrinsicNode(
4804 ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, SVT
,
4805 DAG
.getMachineFunction().getMachineMemOperand(
4806 Ld
->getMemOperand(), Offset
, SVT
.getStoreSize()));
4807 DAG
.makeEquivalentMemoryOrdering(Ld
, NewLoad
);
4808 return convertFromScalableVector(VT
, NewLoad
, DAG
, Subtarget
);
4811 // Otherwise use a scalar load and splat. This will give the best
4812 // opportunity to fold a splat into the operation. ISel can turn it into
4813 // the x0 strided load if we aren't able to fold away the select.
4814 if (SVT
.isFloatingPoint())
4815 V
= DAG
.getLoad(SVT
, DL
, Ld
->getChain(), NewAddr
,
4816 Ld
->getPointerInfo().getWithOffset(Offset
),
4817 Ld
->getOriginalAlign(),
4818 Ld
->getMemOperand()->getFlags());
4820 V
= DAG
.getExtLoad(ISD::SEXTLOAD
, DL
, XLenVT
, Ld
->getChain(), NewAddr
,
4821 Ld
->getPointerInfo().getWithOffset(Offset
), SVT
,
4822 Ld
->getOriginalAlign(),
4823 Ld
->getMemOperand()->getFlags());
4824 DAG
.makeEquivalentMemoryOrdering(Ld
, V
);
4827 VT
.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL
;
4829 DAG
.getNode(Opc
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
), V
, VL
);
4830 return convertFromScalableVector(VT
, Splat
, DAG
, Subtarget
);
4833 V1
= convertToScalableVector(ContainerVT
, V1
, DAG
, Subtarget
);
4834 assert(Lane
< (int)NumElts
&& "Unexpected lane!");
4835 SDValue Gather
= DAG
.getNode(RISCVISD::VRGATHER_VX_VL
, DL
, ContainerVT
,
4836 V1
, DAG
.getConstant(Lane
, DL
, XLenVT
),
4837 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
4838 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
4842 // For exact VLEN m2 or greater, try to split to m1 operations if we
4843 // can split cleanly.
4844 if (SDValue V
= lowerShuffleViaVRegSplitting(SVN
, DAG
, Subtarget
))
4847 ArrayRef
<int> Mask
= SVN
->getMask();
4850 lowerVECTOR_SHUFFLEAsVSlide1(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4854 lowerVECTOR_SHUFFLEAsVSlidedown(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4857 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
4859 if (Subtarget
.hasStdExtZvkb())
4860 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4863 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4864 // be undef which can be handled with a single SLIDEDOWN/UP.
4866 int Rotation
= isElementRotate(LoSrc
, HiSrc
, Mask
);
4870 LoV
= LoSrc
== 0 ? V1
: V2
;
4871 LoV
= convertToScalableVector(ContainerVT
, LoV
, DAG
, Subtarget
);
4874 HiV
= HiSrc
== 0 ? V1
: V2
;
4875 HiV
= convertToScalableVector(ContainerVT
, HiV
, DAG
, Subtarget
);
4878 // We found a rotation. We need to slide HiV down by Rotation. Then we need
4879 // to slide LoV up by (NumElts - Rotation).
4880 unsigned InvRotate
= NumElts
- Rotation
;
4882 SDValue Res
= DAG
.getUNDEF(ContainerVT
);
4884 // Even though we could use a smaller VL, don't to avoid a vsetivli
4886 Res
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, Res
, HiV
,
4887 DAG
.getConstant(Rotation
, DL
, XLenVT
), TrueMask
, VL
);
4890 Res
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Res
, LoV
,
4891 DAG
.getConstant(InvRotate
, DL
, XLenVT
), TrueMask
, VL
,
4892 RISCVII::TAIL_AGNOSTIC
);
4894 return convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
4897 // If this is a deinterleave and we can widen the vector, then we can use
4898 // vnsrl to deinterleave.
4899 if (isDeinterleaveShuffle(VT
, ContainerVT
, V1
, V2
, Mask
, Subtarget
)) {
4900 return getDeinterleaveViaVNSRL(DL
, VT
, V1
.getOperand(0), Mask
[0] == 0,
4905 lowerVECTOR_SHUFFLEAsVSlideup(DL
, VT
, V1
, V2
, Mask
, Subtarget
, DAG
))
4908 // Detect an interleave shuffle and lower to
4909 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4910 int EvenSrc
, OddSrc
;
4911 if (isInterleaveShuffle(Mask
, VT
, EvenSrc
, OddSrc
, Subtarget
)) {
4912 // Extract the halves of the vectors.
4913 MVT HalfVT
= VT
.getHalfNumVectorElementsVT();
4915 int Size
= Mask
.size();
4916 SDValue EvenV
, OddV
;
4917 assert(EvenSrc
>= 0 && "Undef source?");
4918 EvenV
= (EvenSrc
/ Size
) == 0 ? V1
: V2
;
4919 EvenV
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, HalfVT
, EvenV
,
4920 DAG
.getConstant(EvenSrc
% Size
, DL
, XLenVT
));
4922 assert(OddSrc
>= 0 && "Undef source?");
4923 OddV
= (OddSrc
/ Size
) == 0 ? V1
: V2
;
4924 OddV
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, HalfVT
, OddV
,
4925 DAG
.getConstant(OddSrc
% Size
, DL
, XLenVT
));
4927 return getWideningInterleave(EvenV
, OddV
, DL
, DAG
, Subtarget
);
4930 // Detect shuffles which can be re-expressed as vector selects; these are
4931 // shuffles in which each element in the destination is taken from an element
4932 // at the corresponding index in either source vectors.
4933 bool IsSelect
= all_of(enumerate(Mask
), [&](const auto &MaskIdx
) {
4934 int MaskIndex
= MaskIdx
.value();
4935 return MaskIndex
< 0 || MaskIdx
.index() == (unsigned)MaskIndex
% NumElts
;
4938 assert(!V1
.isUndef() && "Unexpected shuffle canonicalization");
4940 // By default we preserve the original operand order, and use a mask to
4941 // select LHS as true and RHS as false. However, since RVV vector selects may
4942 // feature splats but only on the LHS, we may choose to invert our mask and
4943 // instead select between RHS and LHS.
4944 bool SwapOps
= DAG
.isSplatValue(V2
) && !DAG
.isSplatValue(V1
);
4947 // Now construct the mask that will be used by the vselect operation.
4948 SmallVector
<SDValue
> MaskVals
;
4949 for (int MaskIndex
: Mask
) {
4950 bool SelectMaskVal
= (MaskIndex
< (int)NumElts
) ^ SwapOps
;
4951 MaskVals
.push_back(DAG
.getConstant(SelectMaskVal
, DL
, XLenVT
));
4957 assert(MaskVals
.size() == NumElts
&& "Unexpected select-like shuffle");
4958 MVT MaskVT
= MVT::getVectorVT(MVT::i1
, NumElts
);
4959 SDValue SelectMask
= DAG
.getBuildVector(MaskVT
, DL
, MaskVals
);
4960 return DAG
.getNode(ISD::VSELECT
, DL
, VT
, SelectMask
, V1
, V2
);
4963 // We might be able to express the shuffle as a bitrotate. But even if we
4964 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
4965 // shifts and a vor will have a higher throughput than a vrgather.
4966 if (SDValue V
= lowerVECTOR_SHUFFLEAsRotate(SVN
, DAG
, Subtarget
))
4969 if (VT
.getScalarSizeInBits() == 8 && VT
.getVectorNumElements() > 256) {
4970 // On such a large vector we're unable to use i8 as the index type.
4971 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4972 // may involve vector splitting if we're already at LMUL=8, or our
4973 // user-supplied maximum fixed-length LMUL.
4977 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4978 // merged with a second vrgather.
4979 SmallVector
<SDValue
> GatherIndicesLHS
, GatherIndicesRHS
;
4981 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4983 DenseMap
<int, unsigned> LHSIndexCounts
, RHSIndexCounts
;
4985 SmallVector
<SDValue
> MaskVals
;
4987 // Now construct the mask that will be used by the blended vrgather operation.
4988 // Cconstruct the appropriate indices into each vector.
4989 for (int MaskIndex
: Mask
) {
4990 bool SelectMaskVal
= (MaskIndex
< (int)NumElts
) ^ !SwapOps
;
4991 MaskVals
.push_back(DAG
.getConstant(SelectMaskVal
, DL
, XLenVT
));
4992 bool IsLHSOrUndefIndex
= MaskIndex
< (int)NumElts
;
4993 GatherIndicesLHS
.push_back(IsLHSOrUndefIndex
&& MaskIndex
>= 0
4994 ? DAG
.getConstant(MaskIndex
, DL
, XLenVT
)
4995 : DAG
.getUNDEF(XLenVT
));
4996 GatherIndicesRHS
.push_back(
4997 IsLHSOrUndefIndex
? DAG
.getUNDEF(XLenVT
)
4998 : DAG
.getConstant(MaskIndex
- NumElts
, DL
, XLenVT
));
4999 if (IsLHSOrUndefIndex
&& MaskIndex
>= 0)
5000 ++LHSIndexCounts
[MaskIndex
];
5001 if (!IsLHSOrUndefIndex
)
5002 ++RHSIndexCounts
[MaskIndex
- NumElts
];
5007 std::swap(GatherIndicesLHS
, GatherIndicesRHS
);
5010 assert(MaskVals
.size() == NumElts
&& "Unexpected select-like shuffle");
5011 MVT MaskVT
= MVT::getVectorVT(MVT::i1
, NumElts
);
5012 SDValue SelectMask
= DAG
.getBuildVector(MaskVT
, DL
, MaskVals
);
5014 unsigned GatherVXOpc
= RISCVISD::VRGATHER_VX_VL
;
5015 unsigned GatherVVOpc
= RISCVISD::VRGATHER_VV_VL
;
5016 MVT IndexVT
= VT
.changeTypeToInteger();
5017 // Since we can't introduce illegal index types at this stage, use i16 and
5018 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5020 if (IndexVT
.getScalarType().bitsGT(XLenVT
)) {
5021 GatherVVOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
5022 IndexVT
= IndexVT
.changeVectorElementType(MVT::i16
);
5025 // If the mask allows, we can do all the index computation in 16 bits. This
5026 // requires less work and less register pressure at high LMUL, and creates
5027 // smaller constants which may be cheaper to materialize.
5028 if (IndexVT
.getScalarType().bitsGT(MVT::i16
) && isUInt
<16>(NumElts
- 1) &&
5029 (IndexVT
.getSizeInBits() / Subtarget
.getRealMinVLen()) > 1) {
5030 GatherVVOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
5031 IndexVT
= IndexVT
.changeVectorElementType(MVT::i16
);
5034 MVT IndexContainerVT
=
5035 ContainerVT
.changeVectorElementType(IndexVT
.getScalarType());
5038 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
5039 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
5040 if (SDValue SplatValue
= DAG
.getSplatValue(V1
, /*LegalTypes*/ true)) {
5041 Gather
= lowerScalarSplat(SDValue(), SplatValue
, VL
, ContainerVT
, DL
, DAG
,
5044 V1
= convertToScalableVector(ContainerVT
, V1
, DAG
, Subtarget
);
5045 // If only one index is used, we can use a "splat" vrgather.
5046 // TODO: We can splat the most-common index and fix-up any stragglers, if
5047 // that's beneficial.
5048 if (LHSIndexCounts
.size() == 1) {
5049 int SplatIndex
= LHSIndexCounts
.begin()->getFirst();
5050 Gather
= DAG
.getNode(GatherVXOpc
, DL
, ContainerVT
, V1
,
5051 DAG
.getConstant(SplatIndex
, DL
, XLenVT
),
5052 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
5054 SDValue LHSIndices
= DAG
.getBuildVector(IndexVT
, DL
, GatherIndicesLHS
);
5056 convertToScalableVector(IndexContainerVT
, LHSIndices
, DAG
, Subtarget
);
5058 Gather
= DAG
.getNode(GatherVVOpc
, DL
, ContainerVT
, V1
, LHSIndices
,
5059 DAG
.getUNDEF(ContainerVT
), TrueMask
, VL
);
5063 // If a second vector operand is used by this shuffle, blend it in with an
5064 // additional vrgather.
5065 if (!V2
.isUndef()) {
5066 V2
= convertToScalableVector(ContainerVT
, V2
, DAG
, Subtarget
);
5068 MVT MaskContainerVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
5070 convertToScalableVector(MaskContainerVT
, SelectMask
, DAG
, Subtarget
);
5072 // If only one index is used, we can use a "splat" vrgather.
5073 // TODO: We can splat the most-common index and fix-up any stragglers, if
5074 // that's beneficial.
5075 if (RHSIndexCounts
.size() == 1) {
5076 int SplatIndex
= RHSIndexCounts
.begin()->getFirst();
5077 Gather
= DAG
.getNode(GatherVXOpc
, DL
, ContainerVT
, V2
,
5078 DAG
.getConstant(SplatIndex
, DL
, XLenVT
), Gather
,
5081 SDValue RHSIndices
= DAG
.getBuildVector(IndexVT
, DL
, GatherIndicesRHS
);
5083 convertToScalableVector(IndexContainerVT
, RHSIndices
, DAG
, Subtarget
);
5084 Gather
= DAG
.getNode(GatherVVOpc
, DL
, ContainerVT
, V2
, RHSIndices
, Gather
,
5089 return convertFromScalableVector(VT
, Gather
, DAG
, Subtarget
);
5092 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef
<int> M
, EVT VT
) const {
5093 // Support splats for any type. These should type legalize well.
5094 if (ShuffleVectorSDNode::isSplatMask(M
.data(), VT
))
5097 // Only support legal VTs for other shuffles for now.
5098 if (!isTypeLegal(VT
))
5101 MVT SVT
= VT
.getSimpleVT();
5103 // Not for i1 vectors.
5104 if (SVT
.getScalarType() == MVT::i1
)
5108 return (isElementRotate(Dummy1
, Dummy2
, M
) > 0) ||
5109 isInterleaveShuffle(M
, SVT
, Dummy1
, Dummy2
, Subtarget
);
5112 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5115 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op
,
5116 SelectionDAG
&DAG
) const {
5117 MVT VT
= Op
.getSimpleValueType();
5118 unsigned EltSize
= VT
.getScalarSizeInBits();
5119 SDValue Src
= Op
.getOperand(0);
5121 MVT ContainerVT
= VT
;
5124 if (Op
->isVPOpcode()) {
5125 Mask
= Op
.getOperand(1);
5126 if (VT
.isFixedLengthVector())
5127 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
5129 VL
= Op
.getOperand(2);
5132 // We choose FP type that can represent the value if possible. Otherwise, we
5133 // use rounding to zero conversion for correct exponent of the result.
5134 // TODO: Use f16 for i8 when possible?
5135 MVT FloatEltVT
= (EltSize
>= 32) ? MVT::f64
: MVT::f32
;
5136 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT
, VT
.getVectorElementCount())))
5137 FloatEltVT
= MVT::f32
;
5138 MVT FloatVT
= MVT::getVectorVT(FloatEltVT
, VT
.getVectorElementCount());
5140 // Legal types should have been checked in the RISCVTargetLowering
5142 // TODO: Splitting may make sense in some cases.
5143 assert(DAG
.getTargetLoweringInfo().isTypeLegal(FloatVT
) &&
5144 "Expected legal float type!");
5146 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5147 // The trailing zero count is equal to log2 of this single bit value.
5148 if (Op
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
) {
5149 SDValue Neg
= DAG
.getNegative(Src
, DL
, VT
);
5150 Src
= DAG
.getNode(ISD::AND
, DL
, VT
, Src
, Neg
);
5151 } else if (Op
.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF
) {
5152 SDValue Neg
= DAG
.getNode(ISD::VP_SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
),
5154 Src
= DAG
.getNode(ISD::VP_AND
, DL
, VT
, Src
, Neg
, Mask
, VL
);
5157 // We have a legal FP type, convert to it.
5159 if (FloatVT
.bitsGT(VT
)) {
5160 if (Op
->isVPOpcode())
5161 FloatVal
= DAG
.getNode(ISD::VP_UINT_TO_FP
, DL
, FloatVT
, Src
, Mask
, VL
);
5163 FloatVal
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, FloatVT
, Src
);
5165 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5166 if (VT
.isFixedLengthVector()) {
5167 ContainerVT
= getContainerForFixedLengthVector(VT
);
5168 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
5170 if (!Op
->isVPOpcode())
5171 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
5173 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, Subtarget
.getXLenVT());
5174 MVT ContainerFloatVT
=
5175 MVT::getVectorVT(FloatEltVT
, ContainerVT
.getVectorElementCount());
5176 FloatVal
= DAG
.getNode(RISCVISD::VFCVT_RM_F_XU_VL
, DL
, ContainerFloatVT
,
5177 Src
, Mask
, RTZRM
, VL
);
5178 if (VT
.isFixedLengthVector())
5179 FloatVal
= convertFromScalableVector(FloatVT
, FloatVal
, DAG
, Subtarget
);
5181 // Bitcast to integer and shift the exponent to the LSB.
5182 EVT IntVT
= FloatVT
.changeVectorElementTypeToInteger();
5183 SDValue Bitcast
= DAG
.getBitcast(IntVT
, FloatVal
);
5184 unsigned ShiftAmt
= FloatEltVT
== MVT::f64
? 52 : 23;
5187 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5188 if (Op
->isVPOpcode()) {
5189 Exp
= DAG
.getNode(ISD::VP_LSHR
, DL
, IntVT
, Bitcast
,
5190 DAG
.getConstant(ShiftAmt
, DL
, IntVT
), Mask
, VL
);
5191 Exp
= DAG
.getVPZExtOrTrunc(DL
, VT
, Exp
, Mask
, VL
);
5193 Exp
= DAG
.getNode(ISD::SRL
, DL
, IntVT
, Bitcast
,
5194 DAG
.getConstant(ShiftAmt
, DL
, IntVT
));
5195 if (IntVT
.bitsLT(VT
))
5196 Exp
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, Exp
);
5197 else if (IntVT
.bitsGT(VT
))
5198 Exp
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Exp
);
5201 // The exponent contains log2 of the value in biased form.
5202 unsigned ExponentBias
= FloatEltVT
== MVT::f64
? 1023 : 127;
5203 // For trailing zeros, we just need to subtract the bias.
5204 if (Op
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
)
5205 return DAG
.getNode(ISD::SUB
, DL
, VT
, Exp
,
5206 DAG
.getConstant(ExponentBias
, DL
, VT
));
5207 if (Op
.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF
)
5208 return DAG
.getNode(ISD::VP_SUB
, DL
, VT
, Exp
,
5209 DAG
.getConstant(ExponentBias
, DL
, VT
), Mask
, VL
);
5211 // For leading zeros, we need to remove the bias and convert from log2 to
5212 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5213 unsigned Adjust
= ExponentBias
+ (EltSize
- 1);
5215 if (Op
->isVPOpcode())
5216 Res
= DAG
.getNode(ISD::VP_SUB
, DL
, VT
, DAG
.getConstant(Adjust
, DL
, VT
), Exp
,
5219 Res
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(Adjust
, DL
, VT
), Exp
);
5221 // The above result with zero input equals to Adjust which is greater than
5222 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5223 if (Op
.getOpcode() == ISD::CTLZ
)
5224 Res
= DAG
.getNode(ISD::UMIN
, DL
, VT
, Res
, DAG
.getConstant(EltSize
, DL
, VT
));
5225 else if (Op
.getOpcode() == ISD::VP_CTLZ
)
5226 Res
= DAG
.getNode(ISD::VP_UMIN
, DL
, VT
, Res
,
5227 DAG
.getConstant(EltSize
, DL
, VT
), Mask
, VL
);
5231 // While RVV has alignment restrictions, we should always be able to load as a
5232 // legal equivalently-sized byte-typed vector instead. This method is
5233 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5234 // the load is already correctly-aligned, it returns SDValue().
5235 SDValue
RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op
,
5236 SelectionDAG
&DAG
) const {
5237 auto *Load
= cast
<LoadSDNode
>(Op
);
5238 assert(Load
&& Load
->getMemoryVT().isVector() && "Expected vector load");
5240 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
5241 Load
->getMemoryVT(),
5242 *Load
->getMemOperand()))
5246 MVT VT
= Op
.getSimpleValueType();
5247 unsigned EltSizeBits
= VT
.getScalarSizeInBits();
5248 assert((EltSizeBits
== 16 || EltSizeBits
== 32 || EltSizeBits
== 64) &&
5249 "Unexpected unaligned RVV load type");
5251 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount() * (EltSizeBits
/ 8));
5252 assert(NewVT
.isValid() &&
5253 "Expecting equally-sized RVV vector types to be legal");
5254 SDValue L
= DAG
.getLoad(NewVT
, DL
, Load
->getChain(), Load
->getBasePtr(),
5255 Load
->getPointerInfo(), Load
->getOriginalAlign(),
5256 Load
->getMemOperand()->getFlags());
5257 return DAG
.getMergeValues({DAG
.getBitcast(VT
, L
), L
.getValue(1)}, DL
);
5260 // While RVV has alignment restrictions, we should always be able to store as a
5261 // legal equivalently-sized byte-typed vector instead. This method is
5262 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5263 // returns SDValue() if the store is already correctly aligned.
5264 SDValue
RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op
,
5265 SelectionDAG
&DAG
) const {
5266 auto *Store
= cast
<StoreSDNode
>(Op
);
5267 assert(Store
&& Store
->getValue().getValueType().isVector() &&
5268 "Expected vector store");
5270 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
5271 Store
->getMemoryVT(),
5272 *Store
->getMemOperand()))
5276 SDValue StoredVal
= Store
->getValue();
5277 MVT VT
= StoredVal
.getSimpleValueType();
5278 unsigned EltSizeBits
= VT
.getScalarSizeInBits();
5279 assert((EltSizeBits
== 16 || EltSizeBits
== 32 || EltSizeBits
== 64) &&
5280 "Unexpected unaligned RVV store type");
5282 MVT::getVectorVT(MVT::i8
, VT
.getVectorElementCount() * (EltSizeBits
/ 8));
5283 assert(NewVT
.isValid() &&
5284 "Expecting equally-sized RVV vector types to be legal");
5285 StoredVal
= DAG
.getBitcast(NewVT
, StoredVal
);
5286 return DAG
.getStore(Store
->getChain(), DL
, StoredVal
, Store
->getBasePtr(),
5287 Store
->getPointerInfo(), Store
->getOriginalAlign(),
5288 Store
->getMemOperand()->getFlags());
5291 static SDValue
lowerConstant(SDValue Op
, SelectionDAG
&DAG
,
5292 const RISCVSubtarget
&Subtarget
) {
5293 assert(Op
.getValueType() == MVT::i64
&& "Unexpected VT");
5295 int64_t Imm
= cast
<ConstantSDNode
>(Op
)->getSExtValue();
5297 // All simm32 constants should be handled by isel.
5298 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5299 // this check redundant, but small immediates are common so this check
5300 // should have better compile time.
5304 // We only need to cost the immediate, if constant pool lowering is enabled.
5305 if (!Subtarget
.useConstantPoolForLargeInts())
5308 RISCVMatInt::InstSeq Seq
= RISCVMatInt::generateInstSeq(Imm
, Subtarget
);
5309 if (Seq
.size() <= Subtarget
.getMaxBuildIntsCost())
5312 // Optimizations below are disabled for opt size. If we're optimizing for
5313 // size, use a constant pool.
5314 if (DAG
.shouldOptForSize())
5317 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5318 // that if it will avoid a constant pool.
5319 // It will require an extra temporary register though.
5320 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5321 // low and high 32 bits are the same and bit 31 and 63 are set.
5322 unsigned ShiftAmt
, AddOpc
;
5323 RISCVMatInt::InstSeq SeqLo
=
5324 RISCVMatInt::generateTwoRegInstSeq(Imm
, Subtarget
, ShiftAmt
, AddOpc
);
5325 if (!SeqLo
.empty() && (SeqLo
.size() + 2) <= Subtarget
.getMaxBuildIntsCost())
5331 static SDValue
LowerATOMIC_FENCE(SDValue Op
, SelectionDAG
&DAG
,
5332 const RISCVSubtarget
&Subtarget
) {
5334 AtomicOrdering FenceOrdering
=
5335 static_cast<AtomicOrdering
>(Op
.getConstantOperandVal(1));
5336 SyncScope::ID FenceSSID
=
5337 static_cast<SyncScope::ID
>(Op
.getConstantOperandVal(2));
5339 if (Subtarget
.hasStdExtZtso()) {
5340 // The only fence that needs an instruction is a sequentially-consistent
5341 // cross-thread fence.
5342 if (FenceOrdering
== AtomicOrdering::SequentiallyConsistent
&&
5343 FenceSSID
== SyncScope::System
)
5346 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5347 return DAG
.getNode(ISD::MEMBARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
5350 // singlethread fences only synchronize with signal handlers on the same
5351 // thread and thus only need to preserve instruction order, not actually
5352 // enforce memory ordering.
5353 if (FenceSSID
== SyncScope::SingleThread
)
5354 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5355 return DAG
.getNode(ISD::MEMBARRIER
, dl
, MVT::Other
, Op
.getOperand(0));
5360 SDValue
RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op
,
5361 SelectionDAG
&DAG
) const {
5363 MVT VT
= Op
.getSimpleValueType();
5364 MVT XLenVT
= Subtarget
.getXLenVT();
5365 unsigned Check
= Op
.getConstantOperandVal(1);
5366 unsigned TDCMask
= 0;
5368 TDCMask
|= RISCV::FPMASK_Signaling_NaN
;
5370 TDCMask
|= RISCV::FPMASK_Quiet_NaN
;
5371 if (Check
& fcPosInf
)
5372 TDCMask
|= RISCV::FPMASK_Positive_Infinity
;
5373 if (Check
& fcNegInf
)
5374 TDCMask
|= RISCV::FPMASK_Negative_Infinity
;
5375 if (Check
& fcPosNormal
)
5376 TDCMask
|= RISCV::FPMASK_Positive_Normal
;
5377 if (Check
& fcNegNormal
)
5378 TDCMask
|= RISCV::FPMASK_Negative_Normal
;
5379 if (Check
& fcPosSubnormal
)
5380 TDCMask
|= RISCV::FPMASK_Positive_Subnormal
;
5381 if (Check
& fcNegSubnormal
)
5382 TDCMask
|= RISCV::FPMASK_Negative_Subnormal
;
5383 if (Check
& fcPosZero
)
5384 TDCMask
|= RISCV::FPMASK_Positive_Zero
;
5385 if (Check
& fcNegZero
)
5386 TDCMask
|= RISCV::FPMASK_Negative_Zero
;
5388 bool IsOneBitMask
= isPowerOf2_32(TDCMask
);
5390 SDValue TDCMaskV
= DAG
.getConstant(TDCMask
, DL
, XLenVT
);
5392 if (VT
.isVector()) {
5393 SDValue Op0
= Op
.getOperand(0);
5394 MVT VT0
= Op
.getOperand(0).getSimpleValueType();
5396 if (VT
.isScalableVector()) {
5397 MVT DstVT
= VT0
.changeVectorElementTypeToInteger();
5398 auto [Mask
, VL
] = getDefaultScalableVLOps(VT0
, DL
, DAG
, Subtarget
);
5399 if (Op
.getOpcode() == ISD::VP_IS_FPCLASS
) {
5400 Mask
= Op
.getOperand(2);
5401 VL
= Op
.getOperand(3);
5403 SDValue FPCLASS
= DAG
.getNode(RISCVISD::FCLASS_VL
, DL
, DstVT
, Op0
, Mask
,
5404 VL
, Op
->getFlags());
5406 return DAG
.getSetCC(DL
, VT
, FPCLASS
,
5407 DAG
.getConstant(TDCMask
, DL
, DstVT
),
5408 ISD::CondCode::SETEQ
);
5409 SDValue AND
= DAG
.getNode(ISD::AND
, DL
, DstVT
, FPCLASS
,
5410 DAG
.getConstant(TDCMask
, DL
, DstVT
));
5411 return DAG
.getSetCC(DL
, VT
, AND
, DAG
.getConstant(0, DL
, DstVT
),
5415 MVT ContainerVT0
= getContainerForFixedLengthVector(VT0
);
5416 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
5417 MVT ContainerDstVT
= ContainerVT0
.changeVectorElementTypeToInteger();
5418 auto [Mask
, VL
] = getDefaultVLOps(VT0
, ContainerVT0
, DL
, DAG
, Subtarget
);
5419 if (Op
.getOpcode() == ISD::VP_IS_FPCLASS
) {
5420 Mask
= Op
.getOperand(2);
5421 MVT MaskContainerVT
=
5422 getContainerForFixedLengthVector(Mask
.getSimpleValueType());
5423 Mask
= convertToScalableVector(MaskContainerVT
, Mask
, DAG
, Subtarget
);
5424 VL
= Op
.getOperand(3);
5426 Op0
= convertToScalableVector(ContainerVT0
, Op0
, DAG
, Subtarget
);
5428 SDValue FPCLASS
= DAG
.getNode(RISCVISD::FCLASS_VL
, DL
, ContainerDstVT
, Op0
,
5429 Mask
, VL
, Op
->getFlags());
5431 TDCMaskV
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerDstVT
,
5432 DAG
.getUNDEF(ContainerDstVT
), TDCMaskV
, VL
);
5435 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, ContainerVT
,
5436 {FPCLASS
, TDCMaskV
, DAG
.getCondCode(ISD::SETEQ
),
5437 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5438 return convertFromScalableVector(VT
, VMSEQ
, DAG
, Subtarget
);
5440 SDValue AND
= DAG
.getNode(RISCVISD::AND_VL
, DL
, ContainerDstVT
, FPCLASS
,
5441 TDCMaskV
, DAG
.getUNDEF(ContainerDstVT
), Mask
, VL
);
5443 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
5444 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerDstVT
,
5445 DAG
.getUNDEF(ContainerDstVT
), SplatZero
, VL
);
5447 SDValue VMSNE
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, ContainerVT
,
5448 {AND
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
5449 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5450 return convertFromScalableVector(VT
, VMSNE
, DAG
, Subtarget
);
5453 SDValue FCLASS
= DAG
.getNode(RISCVISD::FCLASS
, DL
, XLenVT
, Op
.getOperand(0));
5454 SDValue AND
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, FCLASS
, TDCMaskV
);
5455 SDValue Res
= DAG
.getSetCC(DL
, XLenVT
, AND
, DAG
.getConstant(0, DL
, XLenVT
),
5456 ISD::CondCode::SETNE
);
5457 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Res
);
5460 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5461 // operations propagate nans.
5462 static SDValue
lowerFMAXIMUM_FMINIMUM(SDValue Op
, SelectionDAG
&DAG
,
5463 const RISCVSubtarget
&Subtarget
) {
5465 MVT VT
= Op
.getSimpleValueType();
5467 SDValue X
= Op
.getOperand(0);
5468 SDValue Y
= Op
.getOperand(1);
5470 if (!VT
.isVector()) {
5471 MVT XLenVT
= Subtarget
.getXLenVT();
5473 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5474 // ensures that when one input is a nan, the other will also be a nan
5475 // allowing the nan to propagate. If both inputs are nan, this will swap the
5476 // inputs which is harmless.
5479 if (!Op
->getFlags().hasNoNaNs() && !DAG
.isKnownNeverNaN(X
)) {
5480 SDValue XIsNonNan
= DAG
.getSetCC(DL
, XLenVT
, X
, X
, ISD::SETOEQ
);
5481 NewY
= DAG
.getSelect(DL
, VT
, XIsNonNan
, Y
, X
);
5485 if (!Op
->getFlags().hasNoNaNs() && !DAG
.isKnownNeverNaN(Y
)) {
5486 SDValue YIsNonNan
= DAG
.getSetCC(DL
, XLenVT
, Y
, Y
, ISD::SETOEQ
);
5487 NewX
= DAG
.getSelect(DL
, VT
, YIsNonNan
, X
, Y
);
5491 Op
.getOpcode() == ISD::FMAXIMUM
? RISCVISD::FMAX
: RISCVISD::FMIN
;
5492 return DAG
.getNode(Opc
, DL
, VT
, NewX
, NewY
);
5495 // Check no NaNs before converting to fixed vector scalable.
5496 bool XIsNeverNan
= Op
->getFlags().hasNoNaNs() || DAG
.isKnownNeverNaN(X
);
5497 bool YIsNeverNan
= Op
->getFlags().hasNoNaNs() || DAG
.isKnownNeverNaN(Y
);
5499 MVT ContainerVT
= VT
;
5500 if (VT
.isFixedLengthVector()) {
5501 ContainerVT
= getContainerForFixedLengthVector(DAG
, VT
, Subtarget
);
5502 X
= convertToScalableVector(ContainerVT
, X
, DAG
, Subtarget
);
5503 Y
= convertToScalableVector(ContainerVT
, Y
, DAG
, Subtarget
);
5507 if (Op
->isVPOpcode()) {
5508 Mask
= Op
.getOperand(2);
5509 if (VT
.isFixedLengthVector())
5510 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
5512 VL
= Op
.getOperand(3);
5514 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
5519 SDValue XIsNonNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
5520 {X
, X
, DAG
.getCondCode(ISD::SETOEQ
),
5521 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5522 NewY
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, ContainerVT
, XIsNonNan
, Y
, X
,
5523 DAG
.getUNDEF(ContainerVT
), VL
);
5528 SDValue YIsNonNan
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, Mask
.getValueType(),
5529 {Y
, Y
, DAG
.getCondCode(ISD::SETOEQ
),
5530 DAG
.getUNDEF(ContainerVT
), Mask
, VL
});
5531 NewX
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, ContainerVT
, YIsNonNan
, X
, Y
,
5532 DAG
.getUNDEF(ContainerVT
), VL
);
5536 Op
.getOpcode() == ISD::FMAXIMUM
|| Op
->getOpcode() == ISD::VP_FMAXIMUM
5537 ? RISCVISD::VFMAX_VL
5538 : RISCVISD::VFMIN_VL
;
5539 SDValue Res
= DAG
.getNode(Opc
, DL
, ContainerVT
, NewX
, NewY
,
5540 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
5541 if (VT
.isFixedLengthVector())
5542 Res
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
5546 /// Get a RISC-V target specified VL op for a given SDNode.
5547 static unsigned getRISCVVLOp(SDValue Op
) {
5548 #define OP_CASE(NODE) \
5550 return RISCVISD::NODE##_VL;
5551 #define VP_CASE(NODE) \
5552 case ISD::VP_##NODE: \
5553 return RISCVISD::NODE##_VL;
5555 switch (Op
.getOpcode()) {
5557 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5594 OP_CASE(STRICT_FADD
)
5595 OP_CASE(STRICT_FSUB
)
5596 OP_CASE(STRICT_FMUL
)
5597 OP_CASE(STRICT_FDIV
)
5598 OP_CASE(STRICT_FSQRT
)
5599 VP_CASE(ADD
) // VP_ADD
5600 VP_CASE(SUB
) // VP_SUB
5601 VP_CASE(MUL
) // VP_MUL
5602 VP_CASE(SDIV
) // VP_SDIV
5603 VP_CASE(SREM
) // VP_SREM
5604 VP_CASE(UDIV
) // VP_UDIV
5605 VP_CASE(UREM
) // VP_UREM
5606 VP_CASE(SHL
) // VP_SHL
5607 VP_CASE(FADD
) // VP_FADD
5608 VP_CASE(FSUB
) // VP_FSUB
5609 VP_CASE(FMUL
) // VP_FMUL
5610 VP_CASE(FDIV
) // VP_FDIV
5611 VP_CASE(FNEG
) // VP_FNEG
5612 VP_CASE(FABS
) // VP_FABS
5613 VP_CASE(SMIN
) // VP_SMIN
5614 VP_CASE(SMAX
) // VP_SMAX
5615 VP_CASE(UMIN
) // VP_UMIN
5616 VP_CASE(UMAX
) // VP_UMAX
5617 VP_CASE(FCOPYSIGN
) // VP_FCOPYSIGN
5618 VP_CASE(SETCC
) // VP_SETCC
5619 VP_CASE(SINT_TO_FP
) // VP_SINT_TO_FP
5620 VP_CASE(UINT_TO_FP
) // VP_UINT_TO_FP
5621 VP_CASE(BITREVERSE
) // VP_BITREVERSE
5622 VP_CASE(BSWAP
) // VP_BSWAP
5623 VP_CASE(CTLZ
) // VP_CTLZ
5624 VP_CASE(CTTZ
) // VP_CTTZ
5625 VP_CASE(CTPOP
) // VP_CTPOP
5626 case ISD::CTLZ_ZERO_UNDEF
:
5627 case ISD::VP_CTLZ_ZERO_UNDEF
:
5628 return RISCVISD::CTLZ_VL
;
5629 case ISD::CTTZ_ZERO_UNDEF
:
5630 case ISD::VP_CTTZ_ZERO_UNDEF
:
5631 return RISCVISD::CTTZ_VL
;
5634 return RISCVISD::VFMADD_VL
;
5635 case ISD::STRICT_FMA
:
5636 return RISCVISD::STRICT_VFMADD_VL
;
5639 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5640 return RISCVISD::VMAND_VL
;
5641 return RISCVISD::AND_VL
;
5644 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5645 return RISCVISD::VMOR_VL
;
5646 return RISCVISD::OR_VL
;
5649 if (Op
.getSimpleValueType().getVectorElementType() == MVT::i1
)
5650 return RISCVISD::VMXOR_VL
;
5651 return RISCVISD::XOR_VL
;
5652 case ISD::VP_SELECT
:
5654 return RISCVISD::VMERGE_VL
;
5656 return RISCVISD::SRA_VL
;
5658 return RISCVISD::SRL_VL
;
5660 return RISCVISD::FSQRT_VL
;
5661 case ISD::VP_SIGN_EXTEND
:
5662 return RISCVISD::VSEXT_VL
;
5663 case ISD::VP_ZERO_EXTEND
:
5664 return RISCVISD::VZEXT_VL
;
5665 case ISD::VP_FP_TO_SINT
:
5666 return RISCVISD::VFCVT_RTZ_X_F_VL
;
5667 case ISD::VP_FP_TO_UINT
:
5668 return RISCVISD::VFCVT_RTZ_XU_F_VL
;
5670 case ISD::VP_FMINNUM
:
5671 return RISCVISD::VFMIN_VL
;
5673 case ISD::VP_FMAXNUM
:
5674 return RISCVISD::VFMAX_VL
;
5681 /// Return true if a RISC-V target specified op has a merge operand.
5682 static bool hasMergeOp(unsigned Opcode
) {
5683 assert(Opcode
> RISCVISD::FIRST_NUMBER
&&
5684 Opcode
<= RISCVISD::LAST_RISCV_STRICTFP_OPCODE
&&
5685 "not a RISC-V target specific op");
5686 static_assert(RISCVISD::LAST_VL_VECTOR_OP
- RISCVISD::FIRST_VL_VECTOR_OP
==
5688 RISCVISD::LAST_RISCV_STRICTFP_OPCODE
-
5689 ISD::FIRST_TARGET_STRICTFP_OPCODE
==
5691 "adding target specific op should update this function");
5692 if (Opcode
>= RISCVISD::ADD_VL
&& Opcode
<= RISCVISD::VFMAX_VL
)
5694 if (Opcode
== RISCVISD::FCOPYSIGN_VL
)
5696 if (Opcode
>= RISCVISD::VWMUL_VL
&& Opcode
<= RISCVISD::VFWSUB_W_VL
)
5698 if (Opcode
== RISCVISD::SETCC_VL
)
5700 if (Opcode
>= RISCVISD::STRICT_FADD_VL
&& Opcode
<= RISCVISD::STRICT_FDIV_VL
)
5702 if (Opcode
== RISCVISD::VMERGE_VL
)
5707 /// Return true if a RISC-V target specified op has a mask operand.
5708 static bool hasMaskOp(unsigned Opcode
) {
5709 assert(Opcode
> RISCVISD::FIRST_NUMBER
&&
5710 Opcode
<= RISCVISD::LAST_RISCV_STRICTFP_OPCODE
&&
5711 "not a RISC-V target specific op");
5712 static_assert(RISCVISD::LAST_VL_VECTOR_OP
- RISCVISD::FIRST_VL_VECTOR_OP
==
5714 RISCVISD::LAST_RISCV_STRICTFP_OPCODE
-
5715 ISD::FIRST_TARGET_STRICTFP_OPCODE
==
5717 "adding target specific op should update this function");
5718 if (Opcode
>= RISCVISD::TRUNCATE_VECTOR_VL
&& Opcode
<= RISCVISD::SETCC_VL
)
5720 if (Opcode
>= RISCVISD::VRGATHER_VX_VL
&& Opcode
<= RISCVISD::VFIRST_VL
)
5722 if (Opcode
>= RISCVISD::STRICT_FADD_VL
&&
5723 Opcode
<= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL
)
5728 static SDValue
SplitVectorOp(SDValue Op
, SelectionDAG
&DAG
) {
5729 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(Op
.getValueType());
5732 SmallVector
<SDValue
, 4> LoOperands(Op
.getNumOperands());
5733 SmallVector
<SDValue
, 4> HiOperands(Op
.getNumOperands());
5735 for (unsigned j
= 0; j
!= Op
.getNumOperands(); ++j
) {
5736 if (!Op
.getOperand(j
).getValueType().isVector()) {
5737 LoOperands
[j
] = Op
.getOperand(j
);
5738 HiOperands
[j
] = Op
.getOperand(j
);
5741 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5742 DAG
.SplitVector(Op
.getOperand(j
), DL
);
5746 DAG
.getNode(Op
.getOpcode(), DL
, LoVT
, LoOperands
, Op
->getFlags());
5748 DAG
.getNode(Op
.getOpcode(), DL
, HiVT
, HiOperands
, Op
->getFlags());
5750 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, Op
.getValueType(), LoRes
, HiRes
);
5753 static SDValue
SplitVPOp(SDValue Op
, SelectionDAG
&DAG
) {
5754 assert(ISD::isVPOpcode(Op
.getOpcode()) && "Not a VP op");
5755 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(Op
.getValueType());
5758 SmallVector
<SDValue
, 4> LoOperands(Op
.getNumOperands());
5759 SmallVector
<SDValue
, 4> HiOperands(Op
.getNumOperands());
5761 for (unsigned j
= 0; j
!= Op
.getNumOperands(); ++j
) {
5762 if (ISD::getVPExplicitVectorLengthIdx(Op
.getOpcode()) == j
) {
5763 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5764 DAG
.SplitEVL(Op
.getOperand(j
), Op
.getValueType(), DL
);
5767 if (!Op
.getOperand(j
).getValueType().isVector()) {
5768 LoOperands
[j
] = Op
.getOperand(j
);
5769 HiOperands
[j
] = Op
.getOperand(j
);
5772 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5773 DAG
.SplitVector(Op
.getOperand(j
), DL
);
5777 DAG
.getNode(Op
.getOpcode(), DL
, LoVT
, LoOperands
, Op
->getFlags());
5779 DAG
.getNode(Op
.getOpcode(), DL
, HiVT
, HiOperands
, Op
->getFlags());
5781 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, Op
.getValueType(), LoRes
, HiRes
);
5784 static SDValue
SplitVectorReductionOp(SDValue Op
, SelectionDAG
&DAG
) {
5787 auto [Lo
, Hi
] = DAG
.SplitVector(Op
.getOperand(1), DL
);
5788 auto [MaskLo
, MaskHi
] = DAG
.SplitVector(Op
.getOperand(2), DL
);
5789 auto [EVLLo
, EVLHi
] =
5790 DAG
.SplitEVL(Op
.getOperand(3), Op
.getOperand(1).getValueType(), DL
);
5793 DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
5794 {Op
.getOperand(0), Lo
, MaskLo
, EVLLo
}, Op
->getFlags());
5795 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
5796 {ResLo
, Hi
, MaskHi
, EVLHi
}, Op
->getFlags());
5799 static SDValue
SplitStrictFPVectorOp(SDValue Op
, SelectionDAG
&DAG
) {
5801 assert(Op
->isStrictFPOpcode());
5803 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(Op
->getValueType(0));
5805 SDVTList LoVTs
= DAG
.getVTList(LoVT
, Op
->getValueType(1));
5806 SDVTList HiVTs
= DAG
.getVTList(HiVT
, Op
->getValueType(1));
5810 SmallVector
<SDValue
, 4> LoOperands(Op
.getNumOperands());
5811 SmallVector
<SDValue
, 4> HiOperands(Op
.getNumOperands());
5813 for (unsigned j
= 0; j
!= Op
.getNumOperands(); ++j
) {
5814 if (!Op
.getOperand(j
).getValueType().isVector()) {
5815 LoOperands
[j
] = Op
.getOperand(j
);
5816 HiOperands
[j
] = Op
.getOperand(j
);
5819 std::tie(LoOperands
[j
], HiOperands
[j
]) =
5820 DAG
.SplitVector(Op
.getOperand(j
), DL
);
5824 DAG
.getNode(Op
.getOpcode(), DL
, LoVTs
, LoOperands
, Op
->getFlags());
5825 HiOperands
[0] = LoRes
.getValue(1);
5827 DAG
.getNode(Op
.getOpcode(), DL
, HiVTs
, HiOperands
, Op
->getFlags());
5829 SDValue V
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, Op
->getValueType(0),
5830 LoRes
.getValue(0), HiRes
.getValue(0));
5831 return DAG
.getMergeValues({V
, HiRes
.getValue(1)}, DL
);
5834 SDValue
RISCVTargetLowering::LowerOperation(SDValue Op
,
5835 SelectionDAG
&DAG
) const {
5836 switch (Op
.getOpcode()) {
5838 report_fatal_error("unimplemented operand");
5839 case ISD::ATOMIC_FENCE
:
5840 return LowerATOMIC_FENCE(Op
, DAG
, Subtarget
);
5841 case ISD::GlobalAddress
:
5842 return lowerGlobalAddress(Op
, DAG
);
5843 case ISD::BlockAddress
:
5844 return lowerBlockAddress(Op
, DAG
);
5845 case ISD::ConstantPool
:
5846 return lowerConstantPool(Op
, DAG
);
5847 case ISD::JumpTable
:
5848 return lowerJumpTable(Op
, DAG
);
5849 case ISD::GlobalTLSAddress
:
5850 return lowerGlobalTLSAddress(Op
, DAG
);
5852 return lowerConstant(Op
, DAG
, Subtarget
);
5854 return lowerSELECT(Op
, DAG
);
5856 return lowerBRCOND(Op
, DAG
);
5858 return lowerVASTART(Op
, DAG
);
5859 case ISD::FRAMEADDR
:
5860 return lowerFRAMEADDR(Op
, DAG
);
5861 case ISD::RETURNADDR
:
5862 return lowerRETURNADDR(Op
, DAG
);
5863 case ISD::SHL_PARTS
:
5864 return lowerShiftLeftParts(Op
, DAG
);
5865 case ISD::SRA_PARTS
:
5866 return lowerShiftRightParts(Op
, DAG
, true);
5867 case ISD::SRL_PARTS
:
5868 return lowerShiftRightParts(Op
, DAG
, false);
5871 if (Op
.getValueType().isFixedLengthVector()) {
5872 assert(Subtarget
.hasStdExtZvkb());
5873 return lowerToScalableOp(Op
, DAG
);
5875 assert(Subtarget
.hasVendorXTHeadBb() &&
5876 !(Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()) &&
5877 "Unexpected custom legalization");
5878 // XTHeadBb only supports rotate by constant.
5879 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5882 case ISD::BITCAST
: {
5884 EVT VT
= Op
.getValueType();
5885 SDValue Op0
= Op
.getOperand(0);
5886 EVT Op0VT
= Op0
.getValueType();
5887 MVT XLenVT
= Subtarget
.getXLenVT();
5888 if (VT
== MVT::f16
&& Op0VT
== MVT::i16
&&
5889 Subtarget
.hasStdExtZfhminOrZhinxmin()) {
5890 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Op0
);
5891 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, MVT::f16
, NewOp0
);
5894 if (VT
== MVT::bf16
&& Op0VT
== MVT::i16
&&
5895 Subtarget
.hasStdExtZfbfmin()) {
5896 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Op0
);
5897 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, MVT::bf16
, NewOp0
);
5900 if (VT
== MVT::f32
&& Op0VT
== MVT::i32
&& Subtarget
.is64Bit() &&
5901 Subtarget
.hasStdExtFOrZfinx()) {
5902 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
5904 DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, NewOp0
);
5907 if (VT
== MVT::f64
&& Op0VT
== MVT::i64
&& XLenVT
== MVT::i32
&&
5908 Subtarget
.hasStdExtZfa()) {
5910 std::tie(Lo
, Hi
) = DAG
.SplitScalar(Op0
, DL
, MVT::i32
, MVT::i32
);
5912 DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
5916 // Consider other scalar<->scalar casts as legal if the types are legal.
5917 // Otherwise expand them.
5918 if (!VT
.isVector() && !Op0VT
.isVector()) {
5919 if (isTypeLegal(VT
) && isTypeLegal(Op0VT
))
5924 assert(!VT
.isScalableVector() && !Op0VT
.isScalableVector() &&
5925 "Unexpected types");
5927 if (VT
.isFixedLengthVector()) {
5928 // We can handle fixed length vector bitcasts with a simple replacement
5930 if (Op0VT
.isFixedLengthVector())
5932 // When bitcasting from scalar to fixed-length vector, insert the scalar
5933 // into a one-element vector of the result type, and perform a vector
5935 if (!Op0VT
.isVector()) {
5936 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), Op0VT
, 1);
5937 if (!isTypeLegal(BVT
))
5939 return DAG
.getBitcast(VT
, DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, BVT
,
5940 DAG
.getUNDEF(BVT
), Op0
,
5941 DAG
.getConstant(0, DL
, XLenVT
)));
5945 // Custom-legalize bitcasts from fixed-length vector types to scalar types
5946 // thus: bitcast the vector to a one-element vector type whose element type
5947 // is the same as the result type, and extract the first element.
5948 if (!VT
.isVector() && Op0VT
.isFixedLengthVector()) {
5949 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 1);
5950 if (!isTypeLegal(BVT
))
5952 SDValue BVec
= DAG
.getBitcast(BVT
, Op0
);
5953 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VT
, BVec
,
5954 DAG
.getConstant(0, DL
, XLenVT
));
5958 case ISD::INTRINSIC_WO_CHAIN
:
5959 return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
5960 case ISD::INTRINSIC_W_CHAIN
:
5961 return LowerINTRINSIC_W_CHAIN(Op
, DAG
);
5962 case ISD::INTRINSIC_VOID
:
5963 return LowerINTRINSIC_VOID(Op
, DAG
);
5964 case ISD::IS_FPCLASS
:
5965 return LowerIS_FPCLASS(Op
, DAG
);
5966 case ISD::BITREVERSE
: {
5967 MVT VT
= Op
.getSimpleValueType();
5968 if (VT
.isFixedLengthVector()) {
5969 assert(Subtarget
.hasStdExtZvbb());
5970 return lowerToScalableOp(Op
, DAG
);
5973 assert(Subtarget
.hasStdExtZbkb() && "Unexpected custom legalization");
5974 assert(Op
.getOpcode() == ISD::BITREVERSE
&& "Unexpected opcode");
5975 // Expand bitreverse to a bswap(rev8) followed by brev8.
5976 SDValue BSwap
= DAG
.getNode(ISD::BSWAP
, DL
, VT
, Op
.getOperand(0));
5977 return DAG
.getNode(RISCVISD::BREV8
, DL
, VT
, BSwap
);
5980 // Only custom-lower vector truncates
5981 if (!Op
.getSimpleValueType().isVector())
5983 return lowerVectorTruncLike(Op
, DAG
);
5984 case ISD::ANY_EXTEND
:
5985 case ISD::ZERO_EXTEND
:
5986 if (Op
.getOperand(0).getValueType().isVector() &&
5987 Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
5988 return lowerVectorMaskExt(Op
, DAG
, /*ExtVal*/ 1);
5989 return lowerFixedLengthVectorExtendToRVV(Op
, DAG
, RISCVISD::VZEXT_VL
);
5990 case ISD::SIGN_EXTEND
:
5991 if (Op
.getOperand(0).getValueType().isVector() &&
5992 Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
5993 return lowerVectorMaskExt(Op
, DAG
, /*ExtVal*/ -1);
5994 return lowerFixedLengthVectorExtendToRVV(Op
, DAG
, RISCVISD::VSEXT_VL
);
5995 case ISD::SPLAT_VECTOR_PARTS
:
5996 return lowerSPLAT_VECTOR_PARTS(Op
, DAG
);
5997 case ISD::INSERT_VECTOR_ELT
:
5998 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
5999 case ISD::EXTRACT_VECTOR_ELT
:
6000 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
6001 case ISD::SCALAR_TO_VECTOR
: {
6002 MVT VT
= Op
.getSimpleValueType();
6004 SDValue Scalar
= Op
.getOperand(0);
6005 if (VT
.getVectorElementType() == MVT::i1
) {
6006 MVT WideVT
= VT
.changeVectorElementType(MVT::i8
);
6007 SDValue V
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, DL
, WideVT
, Scalar
);
6008 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, V
);
6010 MVT ContainerVT
= VT
;
6011 if (VT
.isFixedLengthVector())
6012 ContainerVT
= getContainerForFixedLengthVector(VT
);
6013 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
6014 Scalar
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getXLenVT(), Scalar
);
6015 SDValue V
= DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, ContainerVT
,
6016 DAG
.getUNDEF(ContainerVT
), Scalar
, VL
);
6017 if (VT
.isFixedLengthVector())
6018 V
= convertFromScalableVector(VT
, V
, DAG
, Subtarget
);
6022 MVT XLenVT
= Subtarget
.getXLenVT();
6023 MVT VT
= Op
.getSimpleValueType();
6025 SDValue Res
= DAG
.getNode(RISCVISD::READ_VLENB
, DL
, XLenVT
);
6026 // We define our scalable vector types for lmul=1 to use a 64 bit known
6027 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6028 // vscale as VLENB / 8.
6029 static_assert(RISCV::RVVBitsPerBlock
== 64, "Unexpected bits per block!");
6030 if (Subtarget
.getRealMinVLen() < RISCV::RVVBitsPerBlock
)
6031 report_fatal_error("Support for VLEN==32 is incomplete.");
6032 // We assume VLENB is a multiple of 8. We manually choose the best shift
6033 // here because SimplifyDemandedBits isn't always able to simplify it.
6034 uint64_t Val
= Op
.getConstantOperandVal(0);
6035 if (isPowerOf2_64(Val
)) {
6036 uint64_t Log2
= Log2_64(Val
);
6038 Res
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
, Res
,
6039 DAG
.getConstant(3 - Log2
, DL
, VT
));
6041 Res
= DAG
.getNode(ISD::SHL
, DL
, XLenVT
, Res
,
6042 DAG
.getConstant(Log2
- 3, DL
, XLenVT
));
6043 } else if ((Val
% 8) == 0) {
6044 // If the multiplier is a multiple of 8, scale it down to avoid needing
6045 // to shift the VLENB value.
6046 Res
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, Res
,
6047 DAG
.getConstant(Val
/ 8, DL
, XLenVT
));
6049 SDValue VScale
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
, Res
,
6050 DAG
.getConstant(3, DL
, XLenVT
));
6051 Res
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, VScale
,
6052 DAG
.getConstant(Val
, DL
, XLenVT
));
6054 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Res
);
6057 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6058 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6059 if (Op
.getValueType() == MVT::f16
&& Subtarget
.is64Bit() &&
6060 Op
.getOperand(1).getValueType() == MVT::i32
) {
6062 SDValue Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op
.getOperand(0));
6064 DAG
.getNode(ISD::FPOWI
, DL
, MVT::f32
, Op0
, Op
.getOperand(1));
6065 return DAG
.getNode(ISD::FP_ROUND
, DL
, MVT::f16
, Powi
,
6066 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6072 if (Op
.getValueType() == MVT::nxv32f16
&&
6073 (Subtarget
.hasVInstructionsF16Minimal() &&
6074 !Subtarget
.hasVInstructionsF16()))
6075 return SplitVectorOp(Op
, DAG
);
6076 return lowerFMAXIMUM_FMINIMUM(Op
, DAG
, Subtarget
);
6077 case ISD::FP_EXTEND
: {
6079 EVT VT
= Op
.getValueType();
6080 SDValue Op0
= Op
.getOperand(0);
6081 EVT Op0VT
= Op0
.getValueType();
6082 if (VT
== MVT::f32
&& Op0VT
== MVT::bf16
&& Subtarget
.hasStdExtZfbfmin())
6083 return DAG
.getNode(RISCVISD::FP_EXTEND_BF16
, DL
, MVT::f32
, Op0
);
6084 if (VT
== MVT::f64
&& Op0VT
== MVT::bf16
&& Subtarget
.hasStdExtZfbfmin()) {
6086 DAG
.getNode(RISCVISD::FP_EXTEND_BF16
, DL
, MVT::f32
, Op0
);
6087 return DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f64
, FloatVal
);
6090 if (!Op
.getValueType().isVector())
6092 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
6094 case ISD::FP_ROUND
: {
6096 EVT VT
= Op
.getValueType();
6097 SDValue Op0
= Op
.getOperand(0);
6098 EVT Op0VT
= Op0
.getValueType();
6099 if (VT
== MVT::bf16
&& Op0VT
== MVT::f32
&& Subtarget
.hasStdExtZfbfmin())
6100 return DAG
.getNode(RISCVISD::FP_ROUND_BF16
, DL
, MVT::bf16
, Op0
);
6101 if (VT
== MVT::bf16
&& Op0VT
== MVT::f64
&& Subtarget
.hasStdExtZfbfmin() &&
6102 Subtarget
.hasStdExtDOrZdinx()) {
6104 DAG
.getNode(ISD::FP_ROUND
, DL
, MVT::f32
, Op0
,
6105 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6106 return DAG
.getNode(RISCVISD::FP_ROUND_BF16
, DL
, MVT::bf16
, FloatVal
);
6109 if (!Op
.getValueType().isVector())
6111 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
6113 case ISD::STRICT_FP_ROUND
:
6114 case ISD::STRICT_FP_EXTEND
:
6115 return lowerStrictFPExtendOrRoundLike(Op
, DAG
);
6116 case ISD::SINT_TO_FP
:
6117 case ISD::UINT_TO_FP
:
6118 if (Op
.getValueType().isVector() &&
6119 Op
.getValueType().getScalarType() == MVT::f16
&&
6120 (Subtarget
.hasVInstructionsF16Minimal() &&
6121 !Subtarget
.hasVInstructionsF16())) {
6122 if (Op
.getValueType() == MVT::nxv32f16
)
6123 return SplitVectorOp(Op
, DAG
);
6127 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount());
6128 SDValue NC
= DAG
.getNode(Op
.getOpcode(), DL
, NVT
, Op
->ops());
6130 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NC
,
6131 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6134 case ISD::FP_TO_SINT
:
6135 case ISD::FP_TO_UINT
:
6136 if (SDValue Op1
= Op
.getOperand(0);
6137 Op1
.getValueType().isVector() &&
6138 Op1
.getValueType().getScalarType() == MVT::f16
&&
6139 (Subtarget
.hasVInstructionsF16Minimal() &&
6140 !Subtarget
.hasVInstructionsF16())) {
6141 if (Op1
.getValueType() == MVT::nxv32f16
)
6142 return SplitVectorOp(Op
, DAG
);
6145 MVT NVT
= MVT::getVectorVT(MVT::f32
,
6146 Op1
.getValueType().getVectorElementCount());
6147 SDValue WidenVec
= DAG
.getNode(ISD::FP_EXTEND
, DL
, NVT
, Op1
);
6149 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(), WidenVec
);
6152 case ISD::STRICT_FP_TO_SINT
:
6153 case ISD::STRICT_FP_TO_UINT
:
6154 case ISD::STRICT_SINT_TO_FP
:
6155 case ISD::STRICT_UINT_TO_FP
: {
6156 // RVV can only do fp<->int conversions to types half/double the size as
6157 // the source. We custom-lower any conversions that do two hops into
6159 MVT VT
= Op
.getSimpleValueType();
6163 bool IsStrict
= Op
->isStrictFPOpcode();
6164 SDValue Src
= Op
.getOperand(0 + IsStrict
);
6165 MVT EltVT
= VT
.getVectorElementType();
6166 MVT SrcVT
= Src
.getSimpleValueType();
6167 MVT SrcEltVT
= SrcVT
.getVectorElementType();
6168 unsigned EltSize
= EltVT
.getSizeInBits();
6169 unsigned SrcEltSize
= SrcEltVT
.getSizeInBits();
6170 assert(isPowerOf2_32(EltSize
) && isPowerOf2_32(SrcEltSize
) &&
6171 "Unexpected vector element types");
6173 bool IsInt2FP
= SrcEltVT
.isInteger();
6174 // Widening conversions
6175 if (EltSize
> (2 * SrcEltSize
)) {
6177 // Do a regular integer sign/zero extension then convert to float.
6178 MVT IVecVT
= MVT::getVectorVT(MVT::getIntegerVT(EltSize
/ 2),
6179 VT
.getVectorElementCount());
6180 unsigned ExtOpcode
= (Op
.getOpcode() == ISD::UINT_TO_FP
||
6181 Op
.getOpcode() == ISD::STRICT_UINT_TO_FP
)
6184 SDValue Ext
= DAG
.getNode(ExtOpcode
, DL
, IVecVT
, Src
);
6186 return DAG
.getNode(Op
.getOpcode(), DL
, Op
->getVTList(),
6187 Op
.getOperand(0), Ext
);
6188 return DAG
.getNode(Op
.getOpcode(), DL
, VT
, Ext
);
6191 assert(SrcEltVT
== MVT::f16
&& "Unexpected FP_TO_[US]INT lowering");
6192 // Do one doubling fp_extend then complete the operation by converting
6194 MVT InterimFVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
6196 auto [FExt
, Chain
] =
6197 DAG
.getStrictFPExtendOrRound(Src
, Op
.getOperand(0), DL
, InterimFVT
);
6198 return DAG
.getNode(Op
.getOpcode(), DL
, Op
->getVTList(), Chain
, FExt
);
6200 SDValue FExt
= DAG
.getFPExtendOrRound(Src
, DL
, InterimFVT
);
6201 return DAG
.getNode(Op
.getOpcode(), DL
, VT
, FExt
);
6204 // Narrowing conversions
6205 if (SrcEltSize
> (2 * EltSize
)) {
6207 // One narrowing int_to_fp, then an fp_round.
6208 assert(EltVT
== MVT::f16
&& "Unexpected [US]_TO_FP lowering");
6209 MVT InterimFVT
= MVT::getVectorVT(MVT::f32
, VT
.getVectorElementCount());
6211 SDValue Int2FP
= DAG
.getNode(Op
.getOpcode(), DL
,
6212 DAG
.getVTList(InterimFVT
, MVT::Other
),
6213 Op
.getOperand(0), Src
);
6214 SDValue Chain
= Int2FP
.getValue(1);
6215 return DAG
.getStrictFPExtendOrRound(Int2FP
, Chain
, DL
, VT
).first
;
6217 SDValue Int2FP
= DAG
.getNode(Op
.getOpcode(), DL
, InterimFVT
, Src
);
6218 return DAG
.getFPExtendOrRound(Int2FP
, DL
, VT
);
6221 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6222 // representable by the integer, the result is poison.
6223 MVT IVecVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
6224 VT
.getVectorElementCount());
6227 DAG
.getNode(Op
.getOpcode(), DL
, DAG
.getVTList(IVecVT
, MVT::Other
),
6228 Op
.getOperand(0), Src
);
6229 SDValue Res
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FP2Int
);
6230 return DAG
.getMergeValues({Res
, FP2Int
.getValue(1)}, DL
);
6232 SDValue FP2Int
= DAG
.getNode(Op
.getOpcode(), DL
, IVecVT
, Src
);
6233 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FP2Int
);
6236 // Scalable vectors can exit here. Patterns will handle equally-sized
6237 // conversions halving/doubling ones.
6238 if (!VT
.isFixedLengthVector())
6241 // For fixed-length vectors we lower to a custom "VL" node.
6242 unsigned RVVOpc
= 0;
6243 switch (Op
.getOpcode()) {
6245 llvm_unreachable("Impossible opcode");
6246 case ISD::FP_TO_SINT
:
6247 RVVOpc
= RISCVISD::VFCVT_RTZ_X_F_VL
;
6249 case ISD::FP_TO_UINT
:
6250 RVVOpc
= RISCVISD::VFCVT_RTZ_XU_F_VL
;
6252 case ISD::SINT_TO_FP
:
6253 RVVOpc
= RISCVISD::SINT_TO_FP_VL
;
6255 case ISD::UINT_TO_FP
:
6256 RVVOpc
= RISCVISD::UINT_TO_FP_VL
;
6258 case ISD::STRICT_FP_TO_SINT
:
6259 RVVOpc
= RISCVISD::STRICT_VFCVT_RTZ_X_F_VL
;
6261 case ISD::STRICT_FP_TO_UINT
:
6262 RVVOpc
= RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL
;
6264 case ISD::STRICT_SINT_TO_FP
:
6265 RVVOpc
= RISCVISD::STRICT_SINT_TO_FP_VL
;
6267 case ISD::STRICT_UINT_TO_FP
:
6268 RVVOpc
= RISCVISD::STRICT_UINT_TO_FP_VL
;
6272 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
6273 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
6274 assert(ContainerVT
.getVectorElementCount() == SrcContainerVT
.getVectorElementCount() &&
6275 "Expected same element count");
6277 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
6279 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
6281 Src
= DAG
.getNode(RVVOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
),
6282 Op
.getOperand(0), Src
, Mask
, VL
);
6283 SDValue SubVec
= convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
6284 return DAG
.getMergeValues({SubVec
, Src
.getValue(1)}, DL
);
6286 Src
= DAG
.getNode(RVVOpc
, DL
, ContainerVT
, Src
, Mask
, VL
);
6287 return convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
6289 case ISD::FP_TO_SINT_SAT
:
6290 case ISD::FP_TO_UINT_SAT
:
6291 return lowerFP_TO_INT_SAT(Op
, DAG
, Subtarget
);
6292 case ISD::FP_TO_BF16
: {
6293 // Custom lower to ensure the libcall return is passed in an FPR on hard
6295 assert(!Subtarget
.isSoftFPABI() && "Unexpected custom legalization");
6297 MakeLibCallOptions CallOptions
;
6299 RTLIB::getFPROUND(Op
.getOperand(0).getValueType(), MVT::bf16
);
6301 makeLibCall(DAG
, LC
, MVT::f32
, Op
.getOperand(0), CallOptions
, DL
).first
;
6302 if (Subtarget
.is64Bit() && !RV64LegalI32
)
6303 return DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Res
);
6304 return DAG
.getBitcast(MVT::i32
, Res
);
6306 case ISD::BF16_TO_FP
: {
6307 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6308 MVT VT
= Op
.getSimpleValueType();
6311 ISD::SHL
, DL
, Op
.getOperand(0).getValueType(), Op
.getOperand(0),
6312 DAG
.getShiftAmountConstant(16, Op
.getOperand(0).getValueType(), DL
));
6313 SDValue Res
= Subtarget
.is64Bit()
6314 ? DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, Op
)
6315 : DAG
.getBitcast(MVT::f32
, Op
);
6316 // fp_extend if the target VT is bigger than f32.
6318 return DAG
.getNode(ISD::FP_EXTEND
, DL
, VT
, Res
);
6321 case ISD::FP_TO_FP16
: {
6322 // Custom lower to ensure the libcall return is passed in an FPR on hard
6324 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6326 MakeLibCallOptions CallOptions
;
6328 RTLIB::getFPROUND(Op
.getOperand(0).getValueType(), MVT::f16
);
6330 makeLibCall(DAG
, LC
, MVT::f32
, Op
.getOperand(0), CallOptions
, DL
).first
;
6331 if (Subtarget
.is64Bit() && !RV64LegalI32
)
6332 return DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Res
);
6333 return DAG
.getBitcast(MVT::i32
, Res
);
6335 case ISD::FP16_TO_FP
: {
6336 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6338 assert(Subtarget
.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6340 MakeLibCallOptions CallOptions
;
6341 SDValue Arg
= Subtarget
.is64Bit()
6342 ? DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
,
6344 : DAG
.getBitcast(MVT::f32
, Op
.getOperand(0));
6346 makeLibCall(DAG
, RTLIB::FPEXT_F16_F32
, MVT::f32
, Arg
, CallOptions
, DL
)
6353 case ISD::FNEARBYINT
:
6356 case ISD::FROUNDEVEN
:
6357 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6360 return lowerVectorXRINT(Op
, DAG
, Subtarget
);
6361 case ISD::VECREDUCE_ADD
:
6362 case ISD::VECREDUCE_UMAX
:
6363 case ISD::VECREDUCE_SMAX
:
6364 case ISD::VECREDUCE_UMIN
:
6365 case ISD::VECREDUCE_SMIN
:
6366 return lowerVECREDUCE(Op
, DAG
);
6367 case ISD::VECREDUCE_AND
:
6368 case ISD::VECREDUCE_OR
:
6369 case ISD::VECREDUCE_XOR
:
6370 if (Op
.getOperand(0).getValueType().getVectorElementType() == MVT::i1
)
6371 return lowerVectorMaskVecReduction(Op
, DAG
, /*IsVP*/ false);
6372 return lowerVECREDUCE(Op
, DAG
);
6373 case ISD::VECREDUCE_FADD
:
6374 case ISD::VECREDUCE_SEQ_FADD
:
6375 case ISD::VECREDUCE_FMIN
:
6376 case ISD::VECREDUCE_FMAX
:
6377 return lowerFPVECREDUCE(Op
, DAG
);
6378 case ISD::VP_REDUCE_ADD
:
6379 case ISD::VP_REDUCE_UMAX
:
6380 case ISD::VP_REDUCE_SMAX
:
6381 case ISD::VP_REDUCE_UMIN
:
6382 case ISD::VP_REDUCE_SMIN
:
6383 case ISD::VP_REDUCE_FADD
:
6384 case ISD::VP_REDUCE_SEQ_FADD
:
6385 case ISD::VP_REDUCE_FMIN
:
6386 case ISD::VP_REDUCE_FMAX
:
6387 if (Op
.getOperand(1).getValueType() == MVT::nxv32f16
&&
6388 (Subtarget
.hasVInstructionsF16Minimal() &&
6389 !Subtarget
.hasVInstructionsF16()))
6390 return SplitVectorReductionOp(Op
, DAG
);
6391 return lowerVPREDUCE(Op
, DAG
);
6392 case ISD::VP_REDUCE_AND
:
6393 case ISD::VP_REDUCE_OR
:
6394 case ISD::VP_REDUCE_XOR
:
6395 if (Op
.getOperand(1).getValueType().getVectorElementType() == MVT::i1
)
6396 return lowerVectorMaskVecReduction(Op
, DAG
, /*IsVP*/ true);
6397 return lowerVPREDUCE(Op
, DAG
);
6399 MVT ContainerVT
= getContainerForFixedLengthVector(Op
.getSimpleValueType());
6400 return convertFromScalableVector(Op
.getSimpleValueType(),
6401 DAG
.getUNDEF(ContainerVT
), DAG
, Subtarget
);
6403 case ISD::INSERT_SUBVECTOR
:
6404 return lowerINSERT_SUBVECTOR(Op
, DAG
);
6405 case ISD::EXTRACT_SUBVECTOR
:
6406 return lowerEXTRACT_SUBVECTOR(Op
, DAG
);
6407 case ISD::VECTOR_DEINTERLEAVE
:
6408 return lowerVECTOR_DEINTERLEAVE(Op
, DAG
);
6409 case ISD::VECTOR_INTERLEAVE
:
6410 return lowerVECTOR_INTERLEAVE(Op
, DAG
);
6411 case ISD::STEP_VECTOR
:
6412 return lowerSTEP_VECTOR(Op
, DAG
);
6413 case ISD::VECTOR_REVERSE
:
6414 return lowerVECTOR_REVERSE(Op
, DAG
);
6415 case ISD::VECTOR_SPLICE
:
6416 return lowerVECTOR_SPLICE(Op
, DAG
);
6417 case ISD::BUILD_VECTOR
:
6418 return lowerBUILD_VECTOR(Op
, DAG
, Subtarget
);
6419 case ISD::SPLAT_VECTOR
:
6420 if (Op
.getValueType().getScalarType() == MVT::f16
&&
6421 (Subtarget
.hasVInstructionsF16Minimal() &&
6422 !Subtarget
.hasVInstructionsF16())) {
6423 if (Op
.getValueType() == MVT::nxv32f16
)
6424 return SplitVectorOp(Op
, DAG
);
6427 DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op
.getOperand(0));
6428 SDValue NewSplat
= DAG
.getNode(
6429 ISD::SPLAT_VECTOR
, DL
,
6430 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount()),
6432 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NewSplat
,
6433 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6435 if (Op
.getValueType().getVectorElementType() == MVT::i1
)
6436 return lowerVectorMaskSplat(Op
, DAG
);
6438 case ISD::VECTOR_SHUFFLE
:
6439 return lowerVECTOR_SHUFFLE(Op
, DAG
, Subtarget
);
6440 case ISD::CONCAT_VECTORS
: {
6441 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6442 // better than going through the stack, as the default expansion does.
6444 MVT VT
= Op
.getSimpleValueType();
6445 unsigned NumOpElts
=
6446 Op
.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6447 SDValue Vec
= DAG
.getUNDEF(VT
);
6448 for (const auto &OpIdx
: enumerate(Op
->ops())) {
6449 SDValue SubVec
= OpIdx
.value();
6450 // Don't insert undef subvectors.
6451 if (SubVec
.isUndef())
6453 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Vec
, SubVec
,
6454 DAG
.getIntPtrConstant(OpIdx
.index() * NumOpElts
, DL
));
6459 if (auto V
= expandUnalignedRVVLoad(Op
, DAG
))
6461 if (Op
.getValueType().isFixedLengthVector())
6462 return lowerFixedLengthVectorLoadToRVV(Op
, DAG
);
6465 if (auto V
= expandUnalignedRVVStore(Op
, DAG
))
6467 if (Op
.getOperand(1).getValueType().isFixedLengthVector())
6468 return lowerFixedLengthVectorStoreToRVV(Op
, DAG
);
6472 return lowerMaskedLoad(Op
, DAG
);
6475 return lowerMaskedStore(Op
, DAG
);
6476 case ISD::SELECT_CC
: {
6477 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6478 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6479 // into separate SETCC+SELECT just like LegalizeDAG.
6480 SDValue Tmp1
= Op
.getOperand(0);
6481 SDValue Tmp2
= Op
.getOperand(1);
6482 SDValue True
= Op
.getOperand(2);
6483 SDValue False
= Op
.getOperand(3);
6484 EVT VT
= Op
.getValueType();
6485 SDValue CC
= Op
.getOperand(4);
6486 EVT CmpVT
= Tmp1
.getValueType();
6488 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), CmpVT
);
6491 DAG
.getNode(ISD::SETCC
, DL
, CCVT
, Tmp1
, Tmp2
, CC
, Op
->getFlags());
6492 return DAG
.getSelect(DL
, VT
, Cond
, True
, False
);
6495 MVT OpVT
= Op
.getOperand(0).getSimpleValueType();
6496 if (OpVT
.isScalarInteger()) {
6497 MVT VT
= Op
.getSimpleValueType();
6498 SDValue LHS
= Op
.getOperand(0);
6499 SDValue RHS
= Op
.getOperand(1);
6500 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
6501 assert((CCVal
== ISD::SETGT
|| CCVal
== ISD::SETUGT
) &&
6502 "Unexpected CondCode");
6506 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6507 // convert this to the equivalent of (set(u)ge X, C+1) by using
6508 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6510 if (isa
<ConstantSDNode
>(RHS
)) {
6511 int64_t Imm
= cast
<ConstantSDNode
>(RHS
)->getSExtValue();
6512 if (Imm
!= 0 && isInt
<12>((uint64_t)Imm
+ 1)) {
6513 // If this is an unsigned compare and the constant is -1, incrementing
6514 // the constant would change behavior. The result should be false.
6515 if (CCVal
== ISD::SETUGT
&& Imm
== -1)
6516 return DAG
.getConstant(0, DL
, VT
);
6517 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6518 CCVal
= ISD::getSetCCSwappedOperands(CCVal
);
6519 SDValue SetCC
= DAG
.getSetCC(
6520 DL
, VT
, LHS
, DAG
.getConstant(Imm
+ 1, DL
, OpVT
), CCVal
);
6521 return DAG
.getLogicalNOT(DL
, SetCC
, VT
);
6525 // Not a constant we could handle, swap the operands and condition code to
6527 CCVal
= ISD::getSetCCSwappedOperands(CCVal
);
6528 return DAG
.getSetCC(DL
, VT
, RHS
, LHS
, CCVal
);
6531 if (Op
.getOperand(0).getSimpleValueType() == MVT::nxv32f16
&&
6532 (Subtarget
.hasVInstructionsF16Minimal() &&
6533 !Subtarget
.hasVInstructionsF16()))
6534 return SplitVectorOp(Op
, DAG
);
6536 return lowerFixedLengthVectorSetccToRVV(Op
, DAG
);
6552 return lowerToScalableOp(Op
, DAG
);
6556 if (Op
.getSimpleValueType().isFixedLengthVector())
6557 return lowerToScalableOp(Op
, DAG
);
6558 // This can be called for an i32 shift amount that needs to be promoted.
6559 assert(Op
.getOperand(1).getValueType() == MVT::i32
&& Subtarget
.is64Bit() &&
6560 "Unexpected custom legalisation");
6572 if (Op
.getValueType() == MVT::nxv32f16
&&
6573 (Subtarget
.hasVInstructionsF16Minimal() &&
6574 !Subtarget
.hasVInstructionsF16()))
6575 return SplitVectorOp(Op
, DAG
);
6577 case ISD::AVGFLOORU
:
6587 return lowerToScalableOp(Op
, DAG
);
6590 return lowerABS(Op
, DAG
);
6592 case ISD::CTLZ_ZERO_UNDEF
:
6594 case ISD::CTTZ_ZERO_UNDEF
:
6595 if (Subtarget
.hasStdExtZvbb())
6596 return lowerToScalableOp(Op
, DAG
);
6597 assert(Op
.getOpcode() != ISD::CTTZ
);
6598 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6600 return lowerFixedLengthVectorSelectToRVV(Op
, DAG
);
6601 case ISD::FCOPYSIGN
:
6602 if (Op
.getValueType() == MVT::nxv32f16
&&
6603 (Subtarget
.hasVInstructionsF16Minimal() &&
6604 !Subtarget
.hasVInstructionsF16()))
6605 return SplitVectorOp(Op
, DAG
);
6606 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op
, DAG
);
6607 case ISD::STRICT_FADD
:
6608 case ISD::STRICT_FSUB
:
6609 case ISD::STRICT_FMUL
:
6610 case ISD::STRICT_FDIV
:
6611 case ISD::STRICT_FSQRT
:
6612 case ISD::STRICT_FMA
:
6613 if (Op
.getValueType() == MVT::nxv32f16
&&
6614 (Subtarget
.hasVInstructionsF16Minimal() &&
6615 !Subtarget
.hasVInstructionsF16()))
6616 return SplitStrictFPVectorOp(Op
, DAG
);
6617 return lowerToScalableOp(Op
, DAG
);
6618 case ISD::STRICT_FSETCC
:
6619 case ISD::STRICT_FSETCCS
:
6620 return lowerVectorStrictFSetcc(Op
, DAG
);
6621 case ISD::STRICT_FCEIL
:
6622 case ISD::STRICT_FRINT
:
6623 case ISD::STRICT_FFLOOR
:
6624 case ISD::STRICT_FTRUNC
:
6625 case ISD::STRICT_FNEARBYINT
:
6626 case ISD::STRICT_FROUND
:
6627 case ISD::STRICT_FROUNDEVEN
:
6628 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6630 case ISD::VP_GATHER
:
6631 return lowerMaskedGather(Op
, DAG
);
6633 case ISD::VP_SCATTER
:
6634 return lowerMaskedScatter(Op
, DAG
);
6635 case ISD::GET_ROUNDING
:
6636 return lowerGET_ROUNDING(Op
, DAG
);
6637 case ISD::SET_ROUNDING
:
6638 return lowerSET_ROUNDING(Op
, DAG
);
6639 case ISD::EH_DWARF_CFA
:
6640 return lowerEH_DWARF_CFA(Op
, DAG
);
6641 case ISD::VP_SELECT
:
6650 return lowerVPOp(Op
, DAG
);
6654 return lowerLogicVPOp(Op
, DAG
);
6663 case ISD::VP_FMINNUM
:
6664 case ISD::VP_FMAXNUM
:
6665 case ISD::VP_FCOPYSIGN
:
6666 if (Op
.getValueType() == MVT::nxv32f16
&&
6667 (Subtarget
.hasVInstructionsF16Minimal() &&
6668 !Subtarget
.hasVInstructionsF16()))
6669 return SplitVPOp(Op
, DAG
);
6674 return lowerVPOp(Op
, DAG
);
6675 case ISD::VP_IS_FPCLASS
:
6676 return LowerIS_FPCLASS(Op
, DAG
);
6677 case ISD::VP_SIGN_EXTEND
:
6678 case ISD::VP_ZERO_EXTEND
:
6679 if (Op
.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1
)
6680 return lowerVPExtMaskOp(Op
, DAG
);
6681 return lowerVPOp(Op
, DAG
);
6682 case ISD::VP_TRUNCATE
:
6683 return lowerVectorTruncLike(Op
, DAG
);
6684 case ISD::VP_FP_EXTEND
:
6685 case ISD::VP_FP_ROUND
:
6686 return lowerVectorFPExtendOrRoundLike(Op
, DAG
);
6687 case ISD::VP_SINT_TO_FP
:
6688 case ISD::VP_UINT_TO_FP
:
6689 if (Op
.getValueType().isVector() &&
6690 Op
.getValueType().getScalarType() == MVT::f16
&&
6691 (Subtarget
.hasVInstructionsF16Minimal() &&
6692 !Subtarget
.hasVInstructionsF16())) {
6693 if (Op
.getValueType() == MVT::nxv32f16
)
6694 return SplitVPOp(Op
, DAG
);
6698 MVT::getVectorVT(MVT::f32
, Op
.getValueType().getVectorElementCount());
6699 auto NC
= DAG
.getNode(Op
.getOpcode(), DL
, NVT
, Op
->ops());
6701 return DAG
.getNode(ISD::FP_ROUND
, DL
, Op
.getValueType(), NC
,
6702 DAG
.getIntPtrConstant(0, DL
, /*isTarget=*/true));
6705 case ISD::VP_FP_TO_SINT
:
6706 case ISD::VP_FP_TO_UINT
:
6707 if (SDValue Op1
= Op
.getOperand(0);
6708 Op1
.getValueType().isVector() &&
6709 Op1
.getValueType().getScalarType() == MVT::f16
&&
6710 (Subtarget
.hasVInstructionsF16Minimal() &&
6711 !Subtarget
.hasVInstructionsF16())) {
6712 if (Op1
.getValueType() == MVT::nxv32f16
)
6713 return SplitVPOp(Op
, DAG
);
6716 MVT NVT
= MVT::getVectorVT(MVT::f32
,
6717 Op1
.getValueType().getVectorElementCount());
6718 SDValue WidenVec
= DAG
.getNode(ISD::FP_EXTEND
, DL
, NVT
, Op1
);
6720 return DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
6721 {WidenVec
, Op
.getOperand(1), Op
.getOperand(2)});
6723 return lowerVPFPIntConvOp(Op
, DAG
);
6725 if (Op
.getOperand(0).getSimpleValueType() == MVT::nxv32f16
&&
6726 (Subtarget
.hasVInstructionsF16Minimal() &&
6727 !Subtarget
.hasVInstructionsF16()))
6728 return SplitVPOp(Op
, DAG
);
6729 if (Op
.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1
)
6730 return lowerVPSetCCMaskOp(Op
, DAG
);
6736 case ISD::VP_BITREVERSE
:
6738 return lowerVPOp(Op
, DAG
);
6740 case ISD::VP_CTLZ_ZERO_UNDEF
:
6741 if (Subtarget
.hasStdExtZvbb())
6742 return lowerVPOp(Op
, DAG
);
6743 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6745 case ISD::VP_CTTZ_ZERO_UNDEF
:
6746 if (Subtarget
.hasStdExtZvbb())
6747 return lowerVPOp(Op
, DAG
);
6748 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op
, DAG
);
6750 return lowerVPOp(Op
, DAG
);
6751 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD
:
6752 return lowerVPStridedLoad(Op
, DAG
);
6753 case ISD::EXPERIMENTAL_VP_STRIDED_STORE
:
6754 return lowerVPStridedStore(Op
, DAG
);
6756 case ISD::VP_FFLOOR
:
6758 case ISD::VP_FNEARBYINT
:
6759 case ISD::VP_FROUND
:
6760 case ISD::VP_FROUNDEVEN
:
6761 case ISD::VP_FROUNDTOZERO
:
6762 if (Op
.getValueType() == MVT::nxv32f16
&&
6763 (Subtarget
.hasVInstructionsF16Minimal() &&
6764 !Subtarget
.hasVInstructionsF16()))
6765 return SplitVPOp(Op
, DAG
);
6766 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op
, DAG
, Subtarget
);
6767 case ISD::VP_FMAXIMUM
:
6768 case ISD::VP_FMINIMUM
:
6769 if (Op
.getValueType() == MVT::nxv32f16
&&
6770 (Subtarget
.hasVInstructionsF16Minimal() &&
6771 !Subtarget
.hasVInstructionsF16()))
6772 return SplitVPOp(Op
, DAG
);
6773 return lowerFMAXIMUM_FMINIMUM(Op
, DAG
, Subtarget
);
6774 case ISD::EXPERIMENTAL_VP_SPLICE
:
6775 return lowerVPSpliceExperimental(Op
, DAG
);
6776 case ISD::EXPERIMENTAL_VP_REVERSE
:
6777 return lowerVPReverseExperimental(Op
, DAG
);
6781 static SDValue
getTargetNode(GlobalAddressSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6782 SelectionDAG
&DAG
, unsigned Flags
) {
6783 return DAG
.getTargetGlobalAddress(N
->getGlobal(), DL
, Ty
, 0, Flags
);
6786 static SDValue
getTargetNode(BlockAddressSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6787 SelectionDAG
&DAG
, unsigned Flags
) {
6788 return DAG
.getTargetBlockAddress(N
->getBlockAddress(), Ty
, N
->getOffset(),
6792 static SDValue
getTargetNode(ConstantPoolSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6793 SelectionDAG
&DAG
, unsigned Flags
) {
6794 return DAG
.getTargetConstantPool(N
->getConstVal(), Ty
, N
->getAlign(),
6795 N
->getOffset(), Flags
);
6798 static SDValue
getTargetNode(JumpTableSDNode
*N
, const SDLoc
&DL
, EVT Ty
,
6799 SelectionDAG
&DAG
, unsigned Flags
) {
6800 return DAG
.getTargetJumpTable(N
->getIndex(), Ty
, Flags
);
6803 template <class NodeTy
>
6804 SDValue
RISCVTargetLowering::getAddr(NodeTy
*N
, SelectionDAG
&DAG
,
6805 bool IsLocal
, bool IsExternWeak
) const {
6807 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6809 // When HWASAN is used and tagging of global variables is enabled
6810 // they should be accessed via the GOT, since the tagged address of a global
6811 // is incompatible with existing code models. This also applies to non-pic
6813 if (isPositionIndependent() || Subtarget
.allowTaggedGlobals()) {
6814 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
6815 if (IsLocal
&& !Subtarget
.allowTaggedGlobals())
6816 // Use PC-relative addressing to access the symbol. This generates the
6817 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
6818 // %pcrel_lo(auipc)).
6819 return DAG
.getNode(RISCVISD::LLA
, DL
, Ty
, Addr
);
6821 // Use PC-relative addressing to access the GOT for this symbol, then load
6822 // the address from the GOT. This generates the pattern (PseudoLGA sym),
6823 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6825 SDValue(DAG
.getMachineNode(RISCV::PseudoLGA
, DL
, Ty
, Addr
), 0);
6826 MachineFunction
&MF
= DAG
.getMachineFunction();
6827 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6828 MachinePointerInfo::getGOT(MF
),
6829 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6830 MachineMemOperand::MOInvariant
,
6831 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6832 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6836 switch (getTargetMachine().getCodeModel()) {
6838 report_fatal_error("Unsupported code model for lowering");
6839 case CodeModel::Small
: {
6840 // Generate a sequence for accessing addresses within the first 2 GiB of
6841 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
6842 SDValue AddrHi
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_HI
);
6843 SDValue AddrLo
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_LO
);
6844 SDValue MNHi
= DAG
.getNode(RISCVISD::HI
, DL
, Ty
, AddrHi
);
6845 return DAG
.getNode(RISCVISD::ADD_LO
, DL
, Ty
, MNHi
, AddrLo
);
6847 case CodeModel::Medium
: {
6848 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
6850 // An extern weak symbol may be undefined, i.e. have value 0, which may
6851 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
6852 // symbol. This generates the pattern (PseudoLGA sym), which expands to
6853 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6855 SDValue(DAG
.getMachineNode(RISCV::PseudoLGA
, DL
, Ty
, Addr
), 0);
6856 MachineFunction
&MF
= DAG
.getMachineFunction();
6857 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6858 MachinePointerInfo::getGOT(MF
),
6859 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6860 MachineMemOperand::MOInvariant
,
6861 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6862 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6866 // Generate a sequence for accessing addresses within any 2GiB range within
6867 // the address space. This generates the pattern (PseudoLLA sym), which
6868 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
6869 return DAG
.getNode(RISCVISD::LLA
, DL
, Ty
, Addr
);
6874 SDValue
RISCVTargetLowering::lowerGlobalAddress(SDValue Op
,
6875 SelectionDAG
&DAG
) const {
6876 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
6877 assert(N
->getOffset() == 0 && "unexpected offset in global node");
6878 const GlobalValue
*GV
= N
->getGlobal();
6879 return getAddr(N
, DAG
, GV
->isDSOLocal(), GV
->hasExternalWeakLinkage());
6882 SDValue
RISCVTargetLowering::lowerBlockAddress(SDValue Op
,
6883 SelectionDAG
&DAG
) const {
6884 BlockAddressSDNode
*N
= cast
<BlockAddressSDNode
>(Op
);
6886 return getAddr(N
, DAG
);
6889 SDValue
RISCVTargetLowering::lowerConstantPool(SDValue Op
,
6890 SelectionDAG
&DAG
) const {
6891 ConstantPoolSDNode
*N
= cast
<ConstantPoolSDNode
>(Op
);
6893 return getAddr(N
, DAG
);
6896 SDValue
RISCVTargetLowering::lowerJumpTable(SDValue Op
,
6897 SelectionDAG
&DAG
) const {
6898 JumpTableSDNode
*N
= cast
<JumpTableSDNode
>(Op
);
6900 return getAddr(N
, DAG
);
6903 SDValue
RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode
*N
,
6905 bool UseGOT
) const {
6907 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6908 const GlobalValue
*GV
= N
->getGlobal();
6909 MVT XLenVT
= Subtarget
.getXLenVT();
6912 // Use PC-relative addressing to access the GOT for this TLS symbol, then
6913 // load the address from the GOT and add the thread pointer. This generates
6914 // the pattern (PseudoLA_TLS_IE sym), which expands to
6915 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
6916 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
6918 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_IE
, DL
, Ty
, Addr
), 0);
6919 MachineFunction
&MF
= DAG
.getMachineFunction();
6920 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
6921 MachinePointerInfo::getGOT(MF
),
6922 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
6923 MachineMemOperand::MOInvariant
,
6924 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
6925 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
6927 // Add the thread pointer.
6928 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
6929 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Load
, TPReg
);
6932 // Generate a sequence for accessing the address relative to the thread
6933 // pointer, with the appropriate adjustment for the thread pointer offset.
6934 // This generates the pattern
6935 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
6937 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_HI
);
6939 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_ADD
);
6941 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_LO
);
6943 SDValue MNHi
= DAG
.getNode(RISCVISD::HI
, DL
, Ty
, AddrHi
);
6944 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
6946 DAG
.getNode(RISCVISD::ADD_TPREL
, DL
, Ty
, MNHi
, TPReg
, AddrAdd
);
6947 return DAG
.getNode(RISCVISD::ADD_LO
, DL
, Ty
, MNAdd
, AddrLo
);
6950 SDValue
RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode
*N
,
6951 SelectionDAG
&DAG
) const {
6953 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6954 IntegerType
*CallTy
= Type::getIntNTy(*DAG
.getContext(), Ty
.getSizeInBits());
6955 const GlobalValue
*GV
= N
->getGlobal();
6957 // Use a PC-relative addressing mode to access the global dynamic GOT address.
6958 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
6959 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
6960 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
6962 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_GD
, DL
, Ty
, Addr
), 0);
6964 // Prepare argument list to generate call.
6969 Args
.push_back(Entry
);
6971 // Setup call to __tls_get_addr.
6972 TargetLowering::CallLoweringInfo
CLI(DAG
);
6974 .setChain(DAG
.getEntryNode())
6975 .setLibCallee(CallingConv::C
, CallTy
,
6976 DAG
.getExternalSymbol("__tls_get_addr", Ty
),
6979 return LowerCallTo(CLI
).first
;
6982 SDValue
RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode
*N
,
6983 SelectionDAG
&DAG
) const {
6985 EVT Ty
= getPointerTy(DAG
.getDataLayout());
6986 const GlobalValue
*GV
= N
->getGlobal();
6988 // Use a PC-relative addressing mode to access the global dynamic GOT address.
6989 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
6991 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
6992 // lw tY, tX, %tlsdesc_lo_load(label) // R_RISCV_TLSDESC_LOAD_LO12_I(label)
6993 // addi a0, tX, %tlsdesc_lo_add(label) // R_RISCV_TLSDESC_ADD_LO12_I(label)
6994 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
6995 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
6996 return SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLSDESC
, DL
, Ty
, Addr
), 0);
6999 SDValue
RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op
,
7000 SelectionDAG
&DAG
) const {
7001 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
7002 assert(N
->getOffset() == 0 && "unexpected offset in global node");
7004 if (DAG
.getTarget().useEmulatedTLS())
7005 return LowerToTLSEmulatedModel(N
, DAG
);
7007 TLSModel::Model Model
= getTargetMachine().getTLSModel(N
->getGlobal());
7009 if (DAG
.getMachineFunction().getFunction().getCallingConv() ==
7011 report_fatal_error("In GHC calling convention TLS is not supported");
7015 case TLSModel::LocalExec
:
7016 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/false);
7018 case TLSModel::InitialExec
:
7019 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/true);
7021 case TLSModel::LocalDynamic
:
7022 case TLSModel::GeneralDynamic
:
7023 Addr
= DAG
.getTarget().useTLSDESC() ? getTLSDescAddr(N
, DAG
)
7024 : getDynamicTLSAddr(N
, DAG
);
7031 // Return true if Val is equal to (setcc LHS, RHS, CC).
7032 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
7033 // Otherwise, return std::nullopt.
7034 static std::optional
<bool> matchSetCC(SDValue LHS
, SDValue RHS
,
7035 ISD::CondCode CC
, SDValue Val
) {
7036 assert(Val
->getOpcode() == ISD::SETCC
);
7037 SDValue LHS2
= Val
.getOperand(0);
7038 SDValue RHS2
= Val
.getOperand(1);
7039 ISD::CondCode CC2
= cast
<CondCodeSDNode
>(Val
.getOperand(2))->get();
7041 if (LHS
== LHS2
&& RHS
== RHS2
) {
7044 if (CC
== ISD::getSetCCInverse(CC2
, LHS2
.getValueType()))
7046 } else if (LHS
== RHS2
&& RHS
== LHS2
) {
7047 CC2
= ISD::getSetCCSwappedOperands(CC2
);
7050 if (CC
== ISD::getSetCCInverse(CC2
, LHS2
.getValueType()))
7054 return std::nullopt
;
7057 static SDValue
combineSelectToBinOp(SDNode
*N
, SelectionDAG
&DAG
,
7058 const RISCVSubtarget
&Subtarget
) {
7059 SDValue CondV
= N
->getOperand(0);
7060 SDValue TrueV
= N
->getOperand(1);
7061 SDValue FalseV
= N
->getOperand(2);
7062 MVT VT
= N
->getSimpleValueType(0);
7065 if (!Subtarget
.hasConditionalMoveFusion()) {
7066 // (select c, -1, y) -> -c | y
7067 if (isAllOnesConstant(TrueV
)) {
7068 SDValue Neg
= DAG
.getNegative(CondV
, DL
, VT
);
7069 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, FalseV
);
7071 // (select c, y, -1) -> (c-1) | y
7072 if (isAllOnesConstant(FalseV
)) {
7073 SDValue Neg
= DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
,
7074 DAG
.getAllOnesConstant(DL
, VT
));
7075 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, TrueV
);
7078 // (select c, 0, y) -> (c-1) & y
7079 if (isNullConstant(TrueV
)) {
7080 SDValue Neg
= DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
,
7081 DAG
.getAllOnesConstant(DL
, VT
));
7082 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, FalseV
);
7084 // (select c, y, 0) -> -c & y
7085 if (isNullConstant(FalseV
)) {
7086 SDValue Neg
= DAG
.getNegative(CondV
, DL
, VT
);
7087 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, TrueV
);
7091 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7092 // when both truev and falsev are also setcc.
7093 if (CondV
.getOpcode() == ISD::SETCC
&& TrueV
.getOpcode() == ISD::SETCC
&&
7094 FalseV
.getOpcode() == ISD::SETCC
) {
7095 SDValue LHS
= CondV
.getOperand(0);
7096 SDValue RHS
= CondV
.getOperand(1);
7097 ISD::CondCode CC
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
7099 // (select x, x, y) -> x | y
7100 // (select !x, x, y) -> x & y
7101 if (std::optional
<bool> MatchResult
= matchSetCC(LHS
, RHS
, CC
, TrueV
)) {
7102 return DAG
.getNode(*MatchResult
? ISD::OR
: ISD::AND
, DL
, VT
, TrueV
,
7105 // (select x, y, x) -> x & y
7106 // (select !x, y, x) -> x | y
7107 if (std::optional
<bool> MatchResult
= matchSetCC(LHS
, RHS
, CC
, FalseV
)) {
7108 return DAG
.getNode(*MatchResult
? ISD::AND
: ISD::OR
, DL
, VT
, TrueV
,
7116 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7117 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7118 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7119 // being `0` or `-1`. In such cases we can replace `select` with `and`.
7120 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7123 foldBinOpIntoSelectIfProfitable(SDNode
*BO
, SelectionDAG
&DAG
,
7124 const RISCVSubtarget
&Subtarget
) {
7125 if (Subtarget
.hasShortForwardBranchOpt())
7128 unsigned SelOpNo
= 0;
7129 SDValue Sel
= BO
->getOperand(0);
7130 if (Sel
.getOpcode() != ISD::SELECT
|| !Sel
.hasOneUse()) {
7132 Sel
= BO
->getOperand(1);
7135 if (Sel
.getOpcode() != ISD::SELECT
|| !Sel
.hasOneUse())
7138 unsigned ConstSelOpNo
= 1;
7139 unsigned OtherSelOpNo
= 2;
7140 if (!dyn_cast
<ConstantSDNode
>(Sel
->getOperand(ConstSelOpNo
))) {
7144 SDValue ConstSelOp
= Sel
->getOperand(ConstSelOpNo
);
7145 ConstantSDNode
*ConstSelOpNode
= dyn_cast
<ConstantSDNode
>(ConstSelOp
);
7146 if (!ConstSelOpNode
|| ConstSelOpNode
->isOpaque())
7149 SDValue ConstBinOp
= BO
->getOperand(SelOpNo
^ 1);
7150 ConstantSDNode
*ConstBinOpNode
= dyn_cast
<ConstantSDNode
>(ConstBinOp
);
7151 if (!ConstBinOpNode
|| ConstBinOpNode
->isOpaque())
7155 EVT VT
= BO
->getValueType(0);
7157 SDValue NewConstOps
[2] = {ConstSelOp
, ConstBinOp
};
7159 std::swap(NewConstOps
[0], NewConstOps
[1]);
7161 SDValue NewConstOp
=
7162 DAG
.FoldConstantArithmetic(BO
->getOpcode(), DL
, VT
, NewConstOps
);
7166 const APInt
&NewConstAPInt
= NewConstOp
->getAsAPIntVal();
7167 if (!NewConstAPInt
.isZero() && !NewConstAPInt
.isAllOnes())
7170 SDValue OtherSelOp
= Sel
->getOperand(OtherSelOpNo
);
7171 SDValue NewNonConstOps
[2] = {OtherSelOp
, ConstBinOp
};
7173 std::swap(NewNonConstOps
[0], NewNonConstOps
[1]);
7174 SDValue NewNonConstOp
= DAG
.getNode(BO
->getOpcode(), DL
, VT
, NewNonConstOps
);
7176 SDValue NewT
= (ConstSelOpNo
== 1) ? NewConstOp
: NewNonConstOp
;
7177 SDValue NewF
= (ConstSelOpNo
== 1) ? NewNonConstOp
: NewConstOp
;
7178 return DAG
.getSelect(DL
, VT
, Sel
.getOperand(0), NewT
, NewF
);
7181 SDValue
RISCVTargetLowering::lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
7182 SDValue CondV
= Op
.getOperand(0);
7183 SDValue TrueV
= Op
.getOperand(1);
7184 SDValue FalseV
= Op
.getOperand(2);
7186 MVT VT
= Op
.getSimpleValueType();
7187 MVT XLenVT
= Subtarget
.getXLenVT();
7189 // Lower vector SELECTs to VSELECTs by splatting the condition.
7190 if (VT
.isVector()) {
7191 MVT SplatCondVT
= VT
.changeVectorElementType(MVT::i1
);
7192 SDValue CondSplat
= DAG
.getSplat(SplatCondVT
, DL
, CondV
);
7193 return DAG
.getNode(ISD::VSELECT
, DL
, VT
, CondSplat
, TrueV
, FalseV
);
7196 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7197 // nodes to implement the SELECT. Performing the lowering here allows for
7198 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7199 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7200 if ((Subtarget
.hasStdExtZicond() || Subtarget
.hasVendorXVentanaCondOps()) &&
7201 VT
.isScalarInteger()) {
7202 // (select c, t, 0) -> (czero_eqz t, c)
7203 if (isNullConstant(FalseV
))
7204 return DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
);
7205 // (select c, 0, f) -> (czero_nez f, c)
7206 if (isNullConstant(TrueV
))
7207 return DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
);
7209 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7210 if (TrueV
.getOpcode() == ISD::AND
&&
7211 (TrueV
.getOperand(0) == FalseV
|| TrueV
.getOperand(1) == FalseV
))
7213 ISD::OR
, DL
, VT
, TrueV
,
7214 DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
));
7215 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7216 if (FalseV
.getOpcode() == ISD::AND
&&
7217 (FalseV
.getOperand(0) == TrueV
|| FalseV
.getOperand(1) == TrueV
))
7219 ISD::OR
, DL
, VT
, FalseV
,
7220 DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
));
7222 // Try some other optimizations before falling back to generic lowering.
7223 if (SDValue V
= combineSelectToBinOp(Op
.getNode(), DAG
, Subtarget
))
7226 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7227 // Unless we have the short forward branch optimization.
7228 if (!Subtarget
.hasConditionalMoveFusion())
7231 DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
, CondV
),
7232 DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
, CondV
));
7235 if (SDValue V
= combineSelectToBinOp(Op
.getNode(), DAG
, Subtarget
))
7238 if (Op
.hasOneUse()) {
7239 unsigned UseOpc
= Op
->use_begin()->getOpcode();
7240 if (isBinOp(UseOpc
) && DAG
.isSafeToSpeculativelyExecute(UseOpc
)) {
7241 SDNode
*BinOp
= *Op
->use_begin();
7242 if (SDValue NewSel
= foldBinOpIntoSelectIfProfitable(*Op
->use_begin(),
7244 DAG
.ReplaceAllUsesWith(BinOp
, &NewSel
);
7245 return lowerSELECT(NewSel
, DAG
);
7250 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7251 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7252 const ConstantFPSDNode
*FPTV
= dyn_cast
<ConstantFPSDNode
>(TrueV
);
7253 const ConstantFPSDNode
*FPFV
= dyn_cast
<ConstantFPSDNode
>(FalseV
);
7255 if (FPTV
->isExactlyValue(1.0) && FPFV
->isExactlyValue(0.0))
7256 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, CondV
);
7257 if (FPTV
->isExactlyValue(0.0) && FPFV
->isExactlyValue(1.0)) {
7258 SDValue XOR
= DAG
.getNode(ISD::XOR
, DL
, XLenVT
, CondV
,
7259 DAG
.getConstant(1, DL
, XLenVT
));
7260 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, VT
, XOR
);
7264 // If the condition is not an integer SETCC which operates on XLenVT, we need
7265 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7266 // (select condv, truev, falsev)
7267 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7268 if (CondV
.getOpcode() != ISD::SETCC
||
7269 CondV
.getOperand(0).getSimpleValueType() != XLenVT
) {
7270 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
7271 SDValue SetNE
= DAG
.getCondCode(ISD::SETNE
);
7273 SDValue Ops
[] = {CondV
, Zero
, SetNE
, TrueV
, FalseV
};
7275 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, Ops
);
7278 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7279 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7280 // advantage of the integer compare+branch instructions. i.e.:
7281 // (select (setcc lhs, rhs, cc), truev, falsev)
7282 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7283 SDValue LHS
= CondV
.getOperand(0);
7284 SDValue RHS
= CondV
.getOperand(1);
7285 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
7287 // Special case for a select of 2 constants that have a diffence of 1.
7288 // Normally this is done by DAGCombine, but if the select is introduced by
7289 // type legalization or op legalization, we miss it. Restricting to SETLT
7290 // case for now because that is what signed saturating add/sub need.
7291 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7292 // but we would probably want to swap the true/false values if the condition
7293 // is SETGE/SETLE to avoid an XORI.
7294 if (isa
<ConstantSDNode
>(TrueV
) && isa
<ConstantSDNode
>(FalseV
) &&
7295 CCVal
== ISD::SETLT
) {
7296 const APInt
&TrueVal
= TrueV
->getAsAPIntVal();
7297 const APInt
&FalseVal
= FalseV
->getAsAPIntVal();
7298 if (TrueVal
- 1 == FalseVal
)
7299 return DAG
.getNode(ISD::ADD
, DL
, VT
, CondV
, FalseV
);
7300 if (TrueVal
+ 1 == FalseVal
)
7301 return DAG
.getNode(ISD::SUB
, DL
, VT
, FalseV
, CondV
);
7304 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
7305 // 1 < x ? x : 1 -> 0 < x ? x : 1
7306 if (isOneConstant(LHS
) && (CCVal
== ISD::SETLT
|| CCVal
== ISD::SETULT
) &&
7307 RHS
== TrueV
&& LHS
== FalseV
) {
7308 LHS
= DAG
.getConstant(0, DL
, VT
);
7309 // 0 <u x is the same as x != 0.
7310 if (CCVal
== ISD::SETULT
) {
7311 std::swap(LHS
, RHS
);
7316 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7317 if (isAllOnesConstant(RHS
) && CCVal
== ISD::SETLT
&& LHS
== TrueV
&&
7319 RHS
= DAG
.getConstant(0, DL
, VT
);
7322 SDValue TargetCC
= DAG
.getCondCode(CCVal
);
7324 if (isa
<ConstantSDNode
>(TrueV
) && !isa
<ConstantSDNode
>(FalseV
)) {
7325 // (select (setcc lhs, rhs, CC), constant, falsev)
7326 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7327 std::swap(TrueV
, FalseV
);
7328 TargetCC
= DAG
.getCondCode(ISD::getSetCCInverse(CCVal
, LHS
.getValueType()));
7331 SDValue Ops
[] = {LHS
, RHS
, TargetCC
, TrueV
, FalseV
};
7332 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, Ops
);
7335 SDValue
RISCVTargetLowering::lowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const {
7336 SDValue CondV
= Op
.getOperand(1);
7338 MVT XLenVT
= Subtarget
.getXLenVT();
7340 if (CondV
.getOpcode() == ISD::SETCC
&&
7341 CondV
.getOperand(0).getValueType() == XLenVT
) {
7342 SDValue LHS
= CondV
.getOperand(0);
7343 SDValue RHS
= CondV
.getOperand(1);
7344 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CondV
.getOperand(2))->get();
7346 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
7348 SDValue TargetCC
= DAG
.getCondCode(CCVal
);
7349 return DAG
.getNode(RISCVISD::BR_CC
, DL
, Op
.getValueType(), Op
.getOperand(0),
7350 LHS
, RHS
, TargetCC
, Op
.getOperand(2));
7353 return DAG
.getNode(RISCVISD::BR_CC
, DL
, Op
.getValueType(), Op
.getOperand(0),
7354 CondV
, DAG
.getConstant(0, DL
, XLenVT
),
7355 DAG
.getCondCode(ISD::SETNE
), Op
.getOperand(2));
7358 SDValue
RISCVTargetLowering::lowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
7359 MachineFunction
&MF
= DAG
.getMachineFunction();
7360 RISCVMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
7363 SDValue FI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
7364 getPointerTy(MF
.getDataLayout()));
7366 // vastart just stores the address of the VarArgsFrameIndex slot into the
7367 // memory location argument.
7368 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
7369 return DAG
.getStore(Op
.getOperand(0), DL
, FI
, Op
.getOperand(1),
7370 MachinePointerInfo(SV
));
7373 SDValue
RISCVTargetLowering::lowerFRAMEADDR(SDValue Op
,
7374 SelectionDAG
&DAG
) const {
7375 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
7376 MachineFunction
&MF
= DAG
.getMachineFunction();
7377 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7378 MFI
.setFrameAddressIsTaken(true);
7379 Register FrameReg
= RI
.getFrameRegister(MF
);
7380 int XLenInBytes
= Subtarget
.getXLen() / 8;
7382 EVT VT
= Op
.getValueType();
7384 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, FrameReg
, VT
);
7385 unsigned Depth
= Op
.getConstantOperandVal(0);
7387 int Offset
= -(XLenInBytes
* 2);
7388 SDValue Ptr
= DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
,
7389 DAG
.getIntPtrConstant(Offset
, DL
));
7391 DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
7396 SDValue
RISCVTargetLowering::lowerRETURNADDR(SDValue Op
,
7397 SelectionDAG
&DAG
) const {
7398 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
7399 MachineFunction
&MF
= DAG
.getMachineFunction();
7400 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7401 MFI
.setReturnAddressIsTaken(true);
7402 MVT XLenVT
= Subtarget
.getXLenVT();
7403 int XLenInBytes
= Subtarget
.getXLen() / 8;
7405 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
7408 EVT VT
= Op
.getValueType();
7410 unsigned Depth
= Op
.getConstantOperandVal(0);
7412 int Off
= -XLenInBytes
;
7413 SDValue FrameAddr
= lowerFRAMEADDR(Op
, DAG
);
7414 SDValue Offset
= DAG
.getConstant(Off
, DL
, VT
);
7415 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
7416 DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
, Offset
),
7417 MachinePointerInfo());
7420 // Return the value of the return address register, marking it an implicit
7422 Register Reg
= MF
.addLiveIn(RI
.getRARegister(), getRegClassFor(XLenVT
));
7423 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, Reg
, XLenVT
);
7426 SDValue
RISCVTargetLowering::lowerShiftLeftParts(SDValue Op
,
7427 SelectionDAG
&DAG
) const {
7429 SDValue Lo
= Op
.getOperand(0);
7430 SDValue Hi
= Op
.getOperand(1);
7431 SDValue Shamt
= Op
.getOperand(2);
7432 EVT VT
= Lo
.getValueType();
7434 // if Shamt-XLEN < 0: // Shamt < XLEN
7436 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7439 // Hi = Lo << (Shamt-XLEN)
7441 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
7442 SDValue One
= DAG
.getConstant(1, DL
, VT
);
7443 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
7444 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
7445 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
7446 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
7448 SDValue LoTrue
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, Shamt
);
7449 SDValue ShiftRight1Lo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, One
);
7450 SDValue ShiftRightLo
=
7451 DAG
.getNode(ISD::SRL
, DL
, VT
, ShiftRight1Lo
, XLenMinus1Shamt
);
7452 SDValue ShiftLeftHi
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, Shamt
);
7453 SDValue HiTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftLeftHi
, ShiftRightLo
);
7454 SDValue HiFalse
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, ShamtMinusXLen
);
7456 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
7458 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, Zero
);
7459 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
7461 SDValue Parts
[2] = {Lo
, Hi
};
7462 return DAG
.getMergeValues(Parts
, DL
);
7465 SDValue
RISCVTargetLowering::lowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
,
7468 SDValue Lo
= Op
.getOperand(0);
7469 SDValue Hi
= Op
.getOperand(1);
7470 SDValue Shamt
= Op
.getOperand(2);
7471 EVT VT
= Lo
.getValueType();
7474 // if Shamt-XLEN < 0: // Shamt < XLEN
7475 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7476 // Hi = Hi >>s Shamt
7478 // Lo = Hi >>s (Shamt-XLEN);
7479 // Hi = Hi >>s (XLEN-1)
7482 // if Shamt-XLEN < 0: // Shamt < XLEN
7483 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7484 // Hi = Hi >>u Shamt
7486 // Lo = Hi >>u (Shamt-XLEN);
7489 unsigned ShiftRightOp
= IsSRA
? ISD::SRA
: ISD::SRL
;
7491 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
7492 SDValue One
= DAG
.getConstant(1, DL
, VT
);
7493 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
7494 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
7495 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
7496 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
7498 SDValue ShiftRightLo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, Shamt
);
7499 SDValue ShiftLeftHi1
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, One
);
7500 SDValue ShiftLeftHi
=
7501 DAG
.getNode(ISD::SHL
, DL
, VT
, ShiftLeftHi1
, XLenMinus1Shamt
);
7502 SDValue LoTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftRightLo
, ShiftLeftHi
);
7503 SDValue HiTrue
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, Shamt
);
7504 SDValue LoFalse
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, ShamtMinusXLen
);
7506 IsSRA
? DAG
.getNode(ISD::SRA
, DL
, VT
, Hi
, XLenMinus1
) : Zero
;
7508 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
7510 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, LoFalse
);
7511 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
7513 SDValue Parts
[2] = {Lo
, Hi
};
7514 return DAG
.getMergeValues(Parts
, DL
);
7517 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
7518 // legal equivalently-sized i8 type, so we can use that as a go-between.
7519 SDValue
RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op
,
7520 SelectionDAG
&DAG
) const {
7522 MVT VT
= Op
.getSimpleValueType();
7523 SDValue SplatVal
= Op
.getOperand(0);
7524 // All-zeros or all-ones splats are handled specially.
7525 if (ISD::isConstantSplatVectorAllOnes(Op
.getNode())) {
7526 SDValue VL
= getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
).second
;
7527 return DAG
.getNode(RISCVISD::VMSET_VL
, DL
, VT
, VL
);
7529 if (ISD::isConstantSplatVectorAllZeros(Op
.getNode())) {
7530 SDValue VL
= getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
).second
;
7531 return DAG
.getNode(RISCVISD::VMCLR_VL
, DL
, VT
, VL
);
7533 MVT InterVT
= VT
.changeVectorElementType(MVT::i8
);
7534 SplatVal
= DAG
.getNode(ISD::AND
, DL
, SplatVal
.getValueType(), SplatVal
,
7535 DAG
.getConstant(1, DL
, SplatVal
.getValueType()));
7536 SDValue LHS
= DAG
.getSplatVector(InterVT
, DL
, SplatVal
);
7537 SDValue Zero
= DAG
.getConstant(0, DL
, InterVT
);
7538 return DAG
.getSetCC(DL
, VT
, LHS
, Zero
, ISD::SETNE
);
7541 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7542 // illegal (currently only vXi64 RV32).
7543 // FIXME: We could also catch non-constant sign-extended i32 values and lower
7544 // them to VMV_V_X_VL.
7545 SDValue
RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op
,
7546 SelectionDAG
&DAG
) const {
7548 MVT VecVT
= Op
.getSimpleValueType();
7549 assert(!Subtarget
.is64Bit() && VecVT
.getVectorElementType() == MVT::i64
&&
7550 "Unexpected SPLAT_VECTOR_PARTS lowering");
7552 assert(Op
.getNumOperands() == 2 && "Unexpected number of operands!");
7553 SDValue Lo
= Op
.getOperand(0);
7554 SDValue Hi
= Op
.getOperand(1);
7556 MVT ContainerVT
= VecVT
;
7557 if (VecVT
.isFixedLengthVector())
7558 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7560 auto VL
= getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
7563 splatPartsI64WithVL(DL
, ContainerVT
, SDValue(), Lo
, Hi
, VL
, DAG
);
7565 if (VecVT
.isFixedLengthVector())
7566 Res
= convertFromScalableVector(VecVT
, Res
, DAG
, Subtarget
);
7571 // Custom-lower extensions from mask vectors by using a vselect either with 1
7572 // for zero/any-extension or -1 for sign-extension:
7573 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7574 // Note that any-extension is lowered identically to zero-extension.
7575 SDValue
RISCVTargetLowering::lowerVectorMaskExt(SDValue Op
, SelectionDAG
&DAG
,
7576 int64_t ExtTrueVal
) const {
7578 MVT VecVT
= Op
.getSimpleValueType();
7579 SDValue Src
= Op
.getOperand(0);
7580 // Only custom-lower extensions from mask types
7581 assert(Src
.getValueType().isVector() &&
7582 Src
.getValueType().getVectorElementType() == MVT::i1
);
7584 if (VecVT
.isScalableVector()) {
7585 SDValue SplatZero
= DAG
.getConstant(0, DL
, VecVT
);
7586 SDValue SplatTrueVal
= DAG
.getConstant(ExtTrueVal
, DL
, VecVT
);
7587 return DAG
.getNode(ISD::VSELECT
, DL
, VecVT
, Src
, SplatTrueVal
, SplatZero
);
7590 MVT ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7592 MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
7594 SDValue CC
= convertToScalableVector(I1ContainerVT
, Src
, DAG
, Subtarget
);
7596 SDValue VL
= getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
7598 MVT XLenVT
= Subtarget
.getXLenVT();
7599 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
7600 SDValue SplatTrueVal
= DAG
.getConstant(ExtTrueVal
, DL
, XLenVT
);
7602 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7603 DAG
.getUNDEF(ContainerVT
), SplatZero
, VL
);
7604 SplatTrueVal
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7605 DAG
.getUNDEF(ContainerVT
), SplatTrueVal
, VL
);
7607 DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, ContainerVT
, CC
, SplatTrueVal
,
7608 SplatZero
, DAG
.getUNDEF(ContainerVT
), VL
);
7610 return convertFromScalableVector(VecVT
, Select
, DAG
, Subtarget
);
7613 SDValue
RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7614 SDValue Op
, SelectionDAG
&DAG
, unsigned ExtendOpc
) const {
7615 MVT ExtVT
= Op
.getSimpleValueType();
7616 // Only custom-lower extensions from fixed-length vector types.
7617 if (!ExtVT
.isFixedLengthVector())
7619 MVT VT
= Op
.getOperand(0).getSimpleValueType();
7620 // Grab the canonical container type for the extended type. Infer the smaller
7621 // type from that to ensure the same number of vector elements, as we know
7622 // the LMUL will be sufficient to hold the smaller type.
7623 MVT ContainerExtVT
= getContainerForFixedLengthVector(ExtVT
);
7624 // Get the extended container type manually to ensure the same number of
7625 // vector elements between source and dest.
7626 MVT ContainerVT
= MVT::getVectorVT(VT
.getVectorElementType(),
7627 ContainerExtVT
.getVectorElementCount());
7630 convertToScalableVector(ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
7633 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
7635 SDValue Ext
= DAG
.getNode(ExtendOpc
, DL
, ContainerExtVT
, Op1
, Mask
, VL
);
7637 return convertFromScalableVector(ExtVT
, Ext
, DAG
, Subtarget
);
7640 // Custom-lower truncations from vectors to mask vectors by using a mask and a
7642 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7643 SDValue
RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op
,
7644 SelectionDAG
&DAG
) const {
7645 bool IsVPTrunc
= Op
.getOpcode() == ISD::VP_TRUNCATE
;
7647 EVT MaskVT
= Op
.getValueType();
7648 // Only expect to custom-lower truncations to mask types
7649 assert(MaskVT
.isVector() && MaskVT
.getVectorElementType() == MVT::i1
&&
7650 "Unexpected type for vector mask lowering");
7651 SDValue Src
= Op
.getOperand(0);
7652 MVT VecVT
= Src
.getSimpleValueType();
7655 Mask
= Op
.getOperand(1);
7656 VL
= Op
.getOperand(2);
7658 // If this is a fixed vector, we need to convert it to a scalable vector.
7659 MVT ContainerVT
= VecVT
;
7661 if (VecVT
.isFixedLengthVector()) {
7662 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7663 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
7665 MVT MaskContainerVT
=
7666 getContainerForFixedLengthVector(Mask
.getSimpleValueType());
7667 Mask
= convertToScalableVector(MaskContainerVT
, Mask
, DAG
, Subtarget
);
7672 std::tie(Mask
, VL
) =
7673 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7676 SDValue SplatOne
= DAG
.getConstant(1, DL
, Subtarget
.getXLenVT());
7677 SDValue SplatZero
= DAG
.getConstant(0, DL
, Subtarget
.getXLenVT());
7679 SplatOne
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7680 DAG
.getUNDEF(ContainerVT
), SplatOne
, VL
);
7681 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
7682 DAG
.getUNDEF(ContainerVT
), SplatZero
, VL
);
7684 MVT MaskContainerVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
7685 SDValue Trunc
= DAG
.getNode(RISCVISD::AND_VL
, DL
, ContainerVT
, Src
, SplatOne
,
7686 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
7687 Trunc
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskContainerVT
,
7688 {Trunc
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
7689 DAG
.getUNDEF(MaskContainerVT
), Mask
, VL
});
7690 if (MaskVT
.isFixedLengthVector())
7691 Trunc
= convertFromScalableVector(MaskVT
, Trunc
, DAG
, Subtarget
);
7695 SDValue
RISCVTargetLowering::lowerVectorTruncLike(SDValue Op
,
7696 SelectionDAG
&DAG
) const {
7697 bool IsVPTrunc
= Op
.getOpcode() == ISD::VP_TRUNCATE
;
7700 MVT VT
= Op
.getSimpleValueType();
7701 // Only custom-lower vector truncates
7702 assert(VT
.isVector() && "Unexpected type for vector truncate lowering");
7704 // Truncates to mask types are handled differently
7705 if (VT
.getVectorElementType() == MVT::i1
)
7706 return lowerVectorMaskTruncLike(Op
, DAG
);
7708 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7709 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7710 // truncate by one power of two at a time.
7711 MVT DstEltVT
= VT
.getVectorElementType();
7713 SDValue Src
= Op
.getOperand(0);
7714 MVT SrcVT
= Src
.getSimpleValueType();
7715 MVT SrcEltVT
= SrcVT
.getVectorElementType();
7717 assert(DstEltVT
.bitsLT(SrcEltVT
) && isPowerOf2_64(DstEltVT
.getSizeInBits()) &&
7718 isPowerOf2_64(SrcEltVT
.getSizeInBits()) &&
7719 "Unexpected vector truncate lowering");
7721 MVT ContainerVT
= SrcVT
;
7724 Mask
= Op
.getOperand(1);
7725 VL
= Op
.getOperand(2);
7727 if (SrcVT
.isFixedLengthVector()) {
7728 ContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7729 Src
= convertToScalableVector(ContainerVT
, Src
, DAG
, Subtarget
);
7731 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
7732 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
7736 SDValue Result
= Src
;
7738 std::tie(Mask
, VL
) =
7739 getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7742 LLVMContext
&Context
= *DAG
.getContext();
7743 const ElementCount Count
= ContainerVT
.getVectorElementCount();
7745 SrcEltVT
= MVT::getIntegerVT(SrcEltVT
.getSizeInBits() / 2);
7746 EVT ResultVT
= EVT::getVectorVT(Context
, SrcEltVT
, Count
);
7747 Result
= DAG
.getNode(RISCVISD::TRUNCATE_VECTOR_VL
, DL
, ResultVT
, Result
,
7749 } while (SrcEltVT
!= DstEltVT
);
7751 if (SrcVT
.isFixedLengthVector())
7752 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
7758 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op
,
7759 SelectionDAG
&DAG
) const {
7761 SDValue Chain
= Op
.getOperand(0);
7762 SDValue Src
= Op
.getOperand(1);
7763 MVT VT
= Op
.getSimpleValueType();
7764 MVT SrcVT
= Src
.getSimpleValueType();
7765 MVT ContainerVT
= VT
;
7766 if (VT
.isFixedLengthVector()) {
7767 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7769 SrcContainerVT
.changeVectorElementType(VT
.getVectorElementType());
7770 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
7773 auto [Mask
, VL
] = getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7775 // RVV can only widen/truncate fp to types double/half the size as the source.
7776 if ((VT
.getVectorElementType() == MVT::f64
&&
7777 SrcVT
.getVectorElementType() == MVT::f16
) ||
7778 (VT
.getVectorElementType() == MVT::f16
&&
7779 SrcVT
.getVectorElementType() == MVT::f64
)) {
7780 // For double rounding, the intermediate rounding should be round-to-odd.
7781 unsigned InterConvOpc
= Op
.getOpcode() == ISD::STRICT_FP_EXTEND
7782 ? RISCVISD::STRICT_FP_EXTEND_VL
7783 : RISCVISD::STRICT_VFNCVT_ROD_VL
;
7784 MVT InterVT
= ContainerVT
.changeVectorElementType(MVT::f32
);
7785 Src
= DAG
.getNode(InterConvOpc
, DL
, DAG
.getVTList(InterVT
, MVT::Other
),
7786 Chain
, Src
, Mask
, VL
);
7787 Chain
= Src
.getValue(1);
7790 unsigned ConvOpc
= Op
.getOpcode() == ISD::STRICT_FP_EXTEND
7791 ? RISCVISD::STRICT_FP_EXTEND_VL
7792 : RISCVISD::STRICT_FP_ROUND_VL
;
7793 SDValue Res
= DAG
.getNode(ConvOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
),
7794 Chain
, Src
, Mask
, VL
);
7795 if (VT
.isFixedLengthVector()) {
7796 // StrictFP operations have two result values. Their lowered result should
7797 // have same result count.
7798 SDValue SubVec
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
7799 Res
= DAG
.getMergeValues({SubVec
, Res
.getValue(1)}, DL
);
7805 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op
,
7806 SelectionDAG
&DAG
) const {
7808 Op
.getOpcode() == ISD::VP_FP_ROUND
|| Op
.getOpcode() == ISD::VP_FP_EXTEND
;
7810 Op
.getOpcode() == ISD::VP_FP_EXTEND
|| Op
.getOpcode() == ISD::FP_EXTEND
;
7811 // RVV can only do truncate fp to types half the size as the source. We
7812 // custom-lower f64->f16 rounds via RVV's round-to-odd float
7813 // conversion instruction.
7815 MVT VT
= Op
.getSimpleValueType();
7817 assert(VT
.isVector() && "Unexpected type for vector truncate lowering");
7819 SDValue Src
= Op
.getOperand(0);
7820 MVT SrcVT
= Src
.getSimpleValueType();
7822 bool IsDirectExtend
= IsExtend
&& (VT
.getVectorElementType() != MVT::f64
||
7823 SrcVT
.getVectorElementType() != MVT::f16
);
7824 bool IsDirectTrunc
= !IsExtend
&& (VT
.getVectorElementType() != MVT::f16
||
7825 SrcVT
.getVectorElementType() != MVT::f64
);
7827 bool IsDirectConv
= IsDirectExtend
|| IsDirectTrunc
;
7829 // Prepare any fixed-length vector operands.
7830 MVT ContainerVT
= VT
;
7833 Mask
= Op
.getOperand(1);
7834 VL
= Op
.getOperand(2);
7836 if (VT
.isFixedLengthVector()) {
7837 MVT SrcContainerVT
= getContainerForFixedLengthVector(SrcVT
);
7839 SrcContainerVT
.changeVectorElementType(VT
.getVectorElementType());
7840 Src
= convertToScalableVector(SrcContainerVT
, Src
, DAG
, Subtarget
);
7842 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
7843 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
7848 std::tie(Mask
, VL
) =
7849 getDefaultVLOps(SrcVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7851 unsigned ConvOpc
= IsExtend
? RISCVISD::FP_EXTEND_VL
: RISCVISD::FP_ROUND_VL
;
7854 Src
= DAG
.getNode(ConvOpc
, DL
, ContainerVT
, Src
, Mask
, VL
);
7855 if (VT
.isFixedLengthVector())
7856 Src
= convertFromScalableVector(VT
, Src
, DAG
, Subtarget
);
7860 unsigned InterConvOpc
=
7861 IsExtend
? RISCVISD::FP_EXTEND_VL
: RISCVISD::VFNCVT_ROD_VL
;
7863 MVT InterVT
= ContainerVT
.changeVectorElementType(MVT::f32
);
7864 SDValue IntermediateConv
=
7865 DAG
.getNode(InterConvOpc
, DL
, InterVT
, Src
, Mask
, VL
);
7867 DAG
.getNode(ConvOpc
, DL
, ContainerVT
, IntermediateConv
, Mask
, VL
);
7868 if (VT
.isFixedLengthVector())
7869 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
7873 // Given a scalable vector type and an index into it, returns the type for the
7874 // smallest subvector that the index fits in. This can be used to reduce LMUL
7875 // for operations like vslidedown.
7877 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
7878 static std::optional
<MVT
>
7879 getSmallestVTForIndex(MVT VecVT
, unsigned MaxIdx
, SDLoc DL
, SelectionDAG
&DAG
,
7880 const RISCVSubtarget
&Subtarget
) {
7881 assert(VecVT
.isScalableVector());
7882 const unsigned EltSize
= VecVT
.getScalarSizeInBits();
7883 const unsigned VectorBitsMin
= Subtarget
.getRealMinVLen();
7884 const unsigned MinVLMAX
= VectorBitsMin
/ EltSize
;
7886 if (MaxIdx
< MinVLMAX
)
7887 SmallerVT
= getLMUL1VT(VecVT
);
7888 else if (MaxIdx
< MinVLMAX
* 2)
7889 SmallerVT
= getLMUL1VT(VecVT
).getDoubleNumVectorElementsVT();
7890 else if (MaxIdx
< MinVLMAX
* 4)
7891 SmallerVT
= getLMUL1VT(VecVT
)
7892 .getDoubleNumVectorElementsVT()
7893 .getDoubleNumVectorElementsVT();
7894 if (!SmallerVT
.isValid() || !VecVT
.bitsGT(SmallerVT
))
7895 return std::nullopt
;
7899 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
7900 // first position of a vector, and that vector is slid up to the insert index.
7901 // By limiting the active vector length to index+1 and merging with the
7902 // original vector (with an undisturbed tail policy for elements >= VL), we
7903 // achieve the desired result of leaving all elements untouched except the one
7904 // at VL-1, which is replaced with the desired value.
7905 SDValue
RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
7906 SelectionDAG
&DAG
) const {
7908 MVT VecVT
= Op
.getSimpleValueType();
7909 SDValue Vec
= Op
.getOperand(0);
7910 SDValue Val
= Op
.getOperand(1);
7911 SDValue Idx
= Op
.getOperand(2);
7913 if (VecVT
.getVectorElementType() == MVT::i1
) {
7914 // FIXME: For now we just promote to an i8 vector and insert into that,
7915 // but this is probably not optimal.
7916 MVT WideVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
7917 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Vec
);
7918 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, WideVT
, Vec
, Val
, Idx
);
7919 return DAG
.getNode(ISD::TRUNCATE
, DL
, VecVT
, Vec
);
7922 MVT ContainerVT
= VecVT
;
7923 // If the operand is a fixed-length vector, convert to a scalable one.
7924 if (VecVT
.isFixedLengthVector()) {
7925 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
7926 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
7929 // If we know the index we're going to insert at, we can shrink Vec so that
7930 // we're performing the scalar inserts and slideup on a smaller LMUL.
7931 MVT OrigContainerVT
= ContainerVT
;
7932 SDValue OrigVec
= Vec
;
7934 if (auto *IdxC
= dyn_cast
<ConstantSDNode
>(Idx
)) {
7935 const unsigned OrigIdx
= IdxC
->getZExtValue();
7936 // Do we know an upper bound on LMUL?
7937 if (auto ShrunkVT
= getSmallestVTForIndex(ContainerVT
, OrigIdx
,
7938 DL
, DAG
, Subtarget
)) {
7939 ContainerVT
= *ShrunkVT
;
7940 AlignedIdx
= DAG
.getVectorIdxConstant(0, DL
);
7943 // If we're compiling for an exact VLEN value, we can always perform
7944 // the insert in m1 as we can determine the register corresponding to
7945 // the index in the register group.
7946 const unsigned MinVLen
= Subtarget
.getRealMinVLen();
7947 const unsigned MaxVLen
= Subtarget
.getRealMaxVLen();
7948 const MVT M1VT
= getLMUL1VT(ContainerVT
);
7949 if (MinVLen
== MaxVLen
&& ContainerVT
.bitsGT(M1VT
)) {
7950 EVT ElemVT
= VecVT
.getVectorElementType();
7951 unsigned ElemsPerVReg
= MinVLen
/ ElemVT
.getFixedSizeInBits();
7952 unsigned RemIdx
= OrigIdx
% ElemsPerVReg
;
7953 unsigned SubRegIdx
= OrigIdx
/ ElemsPerVReg
;
7954 unsigned ExtractIdx
=
7955 SubRegIdx
* M1VT
.getVectorElementCount().getKnownMinValue();
7956 AlignedIdx
= DAG
.getVectorIdxConstant(ExtractIdx
, DL
);
7957 Idx
= DAG
.getVectorIdxConstant(RemIdx
, DL
);
7962 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
7966 MVT XLenVT
= Subtarget
.getXLenVT();
7968 bool IsLegalInsert
= Subtarget
.is64Bit() || Val
.getValueType() != MVT::i64
;
7969 // Even i64-element vectors on RV32 can be lowered without scalar
7970 // legalization if the most-significant 32 bits of the value are not affected
7971 // by the sign-extension of the lower 32 bits.
7972 // TODO: We could also catch sign extensions of a 32-bit value.
7973 if (!IsLegalInsert
&& isa
<ConstantSDNode
>(Val
)) {
7974 const auto *CVal
= cast
<ConstantSDNode
>(Val
);
7975 if (isInt
<32>(CVal
->getSExtValue())) {
7976 IsLegalInsert
= true;
7977 Val
= DAG
.getConstant(CVal
->getSExtValue(), DL
, MVT::i32
);
7981 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
7985 if (IsLegalInsert
) {
7987 VecVT
.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL
: RISCVISD::VMV_S_X_VL
;
7988 if (isNullConstant(Idx
)) {
7989 if (!VecVT
.isFloatingPoint())
7990 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Val
);
7991 Vec
= DAG
.getNode(Opc
, DL
, ContainerVT
, Vec
, Val
, VL
);
7994 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
7996 if (!VecVT
.isFixedLengthVector())
7998 return convertFromScalableVector(VecVT
, Vec
, DAG
, Subtarget
);
8000 ValInVec
= lowerScalarInsert(Val
, VL
, ContainerVT
, DL
, DAG
, Subtarget
);
8002 // On RV32, i64-element vectors must be specially handled to place the
8003 // value at element 0, by using two vslide1down instructions in sequence on
8004 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8006 SDValue ValLo
, ValHi
;
8007 std::tie(ValLo
, ValHi
) = DAG
.SplitScalar(Val
, DL
, MVT::i32
, MVT::i32
);
8008 MVT I32ContainerVT
=
8009 MVT::getVectorVT(MVT::i32
, ContainerVT
.getVectorElementCount() * 2);
8011 getDefaultScalableVLOps(I32ContainerVT
, DL
, DAG
, Subtarget
).first
;
8012 // Limit the active VL to two.
8013 SDValue InsertI64VL
= DAG
.getConstant(2, DL
, XLenVT
);
8014 // If the Idx is 0 we can insert directly into the vector.
8015 if (isNullConstant(Idx
)) {
8016 // First slide in the lo value, then the hi in above it. We use slide1down
8017 // to avoid the register group overlap constraint of vslide1up.
8018 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
8019 Vec
, Vec
, ValLo
, I32Mask
, InsertI64VL
);
8020 // If the source vector is undef don't pass along the tail elements from
8021 // the previous slide1down.
8022 SDValue Tail
= Vec
.isUndef() ? Vec
: ValInVec
;
8023 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
8024 Tail
, ValInVec
, ValHi
, I32Mask
, InsertI64VL
);
8025 // Bitcast back to the right container type.
8026 ValInVec
= DAG
.getBitcast(ContainerVT
, ValInVec
);
8030 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
8031 ValInVec
, AlignedIdx
);
8032 if (!VecVT
.isFixedLengthVector())
8034 return convertFromScalableVector(VecVT
, ValInVec
, DAG
, Subtarget
);
8037 // First slide in the lo value, then the hi in above it. We use slide1down
8038 // to avoid the register group overlap constraint of vslide1up.
8039 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
8040 DAG
.getUNDEF(I32ContainerVT
),
8041 DAG
.getUNDEF(I32ContainerVT
), ValLo
,
8042 I32Mask
, InsertI64VL
);
8043 ValInVec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32ContainerVT
,
8044 DAG
.getUNDEF(I32ContainerVT
), ValInVec
, ValHi
,
8045 I32Mask
, InsertI64VL
);
8046 // Bitcast back to the right container type.
8047 ValInVec
= DAG
.getBitcast(ContainerVT
, ValInVec
);
8050 // Now that the value is in a vector, slide it into position.
8052 DAG
.getNode(ISD::ADD
, DL
, XLenVT
, Idx
, DAG
.getConstant(1, DL
, XLenVT
));
8054 // Use tail agnostic policy if Idx is the last index of Vec.
8055 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
;
8056 if (VecVT
.isFixedLengthVector() && isa
<ConstantSDNode
>(Idx
) &&
8057 Idx
->getAsZExtVal() + 1 == VecVT
.getVectorNumElements())
8058 Policy
= RISCVII::TAIL_AGNOSTIC
;
8059 SDValue Slideup
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Vec
, ValInVec
,
8060 Idx
, Mask
, InsertVL
, Policy
);
8063 Slideup
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, OrigContainerVT
, OrigVec
,
8064 Slideup
, AlignedIdx
);
8065 if (!VecVT
.isFixedLengthVector())
8067 return convertFromScalableVector(VecVT
, Slideup
, DAG
, Subtarget
);
8070 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8071 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8072 // types this is done using VMV_X_S to allow us to glean information about the
8073 // sign bits of the result.
8074 SDValue
RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
8075 SelectionDAG
&DAG
) const {
8077 SDValue Idx
= Op
.getOperand(1);
8078 SDValue Vec
= Op
.getOperand(0);
8079 EVT EltVT
= Op
.getValueType();
8080 MVT VecVT
= Vec
.getSimpleValueType();
8081 MVT XLenVT
= Subtarget
.getXLenVT();
8083 if (VecVT
.getVectorElementType() == MVT::i1
) {
8084 // Use vfirst.m to extract the first bit.
8085 if (isNullConstant(Idx
)) {
8086 MVT ContainerVT
= VecVT
;
8087 if (VecVT
.isFixedLengthVector()) {
8088 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
8089 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
8091 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
8093 DAG
.getNode(RISCVISD::VFIRST_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
8094 SDValue Res
= DAG
.getSetCC(DL
, XLenVT
, Vfirst
,
8095 DAG
.getConstant(0, DL
, XLenVT
), ISD::SETEQ
);
8096 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Res
);
8098 if (VecVT
.isFixedLengthVector()) {
8099 unsigned NumElts
= VecVT
.getVectorNumElements();
8102 unsigned WidenVecLen
;
8103 SDValue ExtractElementIdx
;
8104 SDValue ExtractBitIdx
;
8105 unsigned MaxEEW
= Subtarget
.getELen();
8106 MVT LargestEltVT
= MVT::getIntegerVT(
8107 std::min(MaxEEW
, unsigned(XLenVT
.getSizeInBits())));
8108 if (NumElts
<= LargestEltVT
.getSizeInBits()) {
8109 assert(isPowerOf2_32(NumElts
) &&
8110 "the number of elements should be power of 2");
8111 WideEltVT
= MVT::getIntegerVT(NumElts
);
8113 ExtractElementIdx
= DAG
.getConstant(0, DL
, XLenVT
);
8114 ExtractBitIdx
= Idx
;
8116 WideEltVT
= LargestEltVT
;
8117 WidenVecLen
= NumElts
/ WideEltVT
.getSizeInBits();
8118 // extract element index = index / element width
8119 ExtractElementIdx
= DAG
.getNode(
8120 ISD::SRL
, DL
, XLenVT
, Idx
,
8121 DAG
.getConstant(Log2_64(WideEltVT
.getSizeInBits()), DL
, XLenVT
));
8122 // mask bit index = index % element width
8123 ExtractBitIdx
= DAG
.getNode(
8124 ISD::AND
, DL
, XLenVT
, Idx
,
8125 DAG
.getConstant(WideEltVT
.getSizeInBits() - 1, DL
, XLenVT
));
8127 MVT WideVT
= MVT::getVectorVT(WideEltVT
, WidenVecLen
);
8128 Vec
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, Vec
);
8129 SDValue ExtractElt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, XLenVT
,
8130 Vec
, ExtractElementIdx
);
8131 // Extract the bit from GPR.
8132 SDValue ShiftRight
=
8133 DAG
.getNode(ISD::SRL
, DL
, XLenVT
, ExtractElt
, ExtractBitIdx
);
8134 SDValue Res
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, ShiftRight
,
8135 DAG
.getConstant(1, DL
, XLenVT
));
8136 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Res
);
8139 // Otherwise, promote to an i8 vector and extract from that.
8140 MVT WideVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
8141 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Vec
);
8142 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Vec
, Idx
);
8145 // If this is a fixed vector, we need to convert it to a scalable vector.
8146 MVT ContainerVT
= VecVT
;
8147 if (VecVT
.isFixedLengthVector()) {
8148 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
8149 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
8152 // If we're compiling for an exact VLEN value and we have a known
8153 // constant index, we can always perform the extract in m1 (or
8154 // smaller) as we can determine the register corresponding to
8155 // the index in the register group.
8156 const unsigned MinVLen
= Subtarget
.getRealMinVLen();
8157 const unsigned MaxVLen
= Subtarget
.getRealMaxVLen();
8158 if (auto *IdxC
= dyn_cast
<ConstantSDNode
>(Idx
);
8159 IdxC
&& MinVLen
== MaxVLen
&&
8160 VecVT
.getSizeInBits().getKnownMinValue() > MinVLen
) {
8161 MVT M1VT
= getLMUL1VT(ContainerVT
);
8162 unsigned OrigIdx
= IdxC
->getZExtValue();
8163 EVT ElemVT
= VecVT
.getVectorElementType();
8164 unsigned ElemsPerVReg
= MinVLen
/ ElemVT
.getFixedSizeInBits();
8165 unsigned RemIdx
= OrigIdx
% ElemsPerVReg
;
8166 unsigned SubRegIdx
= OrigIdx
/ ElemsPerVReg
;
8167 unsigned ExtractIdx
=
8168 SubRegIdx
* M1VT
.getVectorElementCount().getKnownMinValue();
8169 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, M1VT
, Vec
,
8170 DAG
.getVectorIdxConstant(ExtractIdx
, DL
));
8171 Idx
= DAG
.getVectorIdxConstant(RemIdx
, DL
);
8175 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8176 // contains our index.
8177 std::optional
<uint64_t> MaxIdx
;
8178 if (VecVT
.isFixedLengthVector())
8179 MaxIdx
= VecVT
.getVectorNumElements() - 1;
8180 if (auto *IdxC
= dyn_cast
<ConstantSDNode
>(Idx
))
8181 MaxIdx
= IdxC
->getZExtValue();
8183 if (auto SmallerVT
=
8184 getSmallestVTForIndex(ContainerVT
, *MaxIdx
, DL
, DAG
, Subtarget
)) {
8185 ContainerVT
= *SmallerVT
;
8186 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
8187 DAG
.getConstant(0, DL
, XLenVT
));
8191 // If after narrowing, the required slide is still greater than LMUL2,
8192 // fallback to generic expansion and go through the stack. This is done
8193 // for a subtle reason: extracting *all* elements out of a vector is
8194 // widely expected to be linear in vector size, but because vslidedown
8195 // is linear in LMUL, performing N extracts using vslidedown becomes
8196 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8197 // seems to have the same problem (the store is linear in LMUL), but the
8198 // generic expansion *memoizes* the store, and thus for many extracts of
8199 // the same vector we end up with one store and a bunch of loads.
8200 // TODO: We don't have the same code for insert_vector_elt because we
8201 // have BUILD_VECTOR and handle the degenerate case there. Should we
8202 // consider adding an inverse BUILD_VECTOR node?
8203 MVT LMUL2VT
= getLMUL1VT(ContainerVT
).getDoubleNumVectorElementsVT();
8204 if (ContainerVT
.bitsGT(LMUL2VT
) && VecVT
.isFixedLengthVector())
8207 // If the index is 0, the vector is already in the right position.
8208 if (!isNullConstant(Idx
)) {
8209 // Use a VL of 1 to avoid processing more elements than we need.
8210 auto [Mask
, VL
] = getDefaultVLOps(1, ContainerVT
, DL
, DAG
, Subtarget
);
8211 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
8212 DAG
.getUNDEF(ContainerVT
), Vec
, Idx
, Mask
, VL
);
8215 if (!EltVT
.isInteger()) {
8216 // Floating-point extracts are handled in TableGen.
8217 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Vec
,
8218 DAG
.getConstant(0, DL
, XLenVT
));
8221 SDValue Elt0
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
8222 return DAG
.getNode(ISD::TRUNCATE
, DL
, EltVT
, Elt0
);
8225 // Some RVV intrinsics may claim that they want an integer operand to be
8226 // promoted or expanded.
8227 static SDValue
lowerVectorIntrinsicScalars(SDValue Op
, SelectionDAG
&DAG
,
8228 const RISCVSubtarget
&Subtarget
) {
8229 assert((Op
.getOpcode() == ISD::INTRINSIC_VOID
||
8230 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
8231 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
) &&
8232 "Unexpected opcode");
8234 if (!Subtarget
.hasVInstructions())
8237 bool HasChain
= Op
.getOpcode() == ISD::INTRINSIC_VOID
||
8238 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
;
8239 unsigned IntNo
= Op
.getConstantOperandVal(HasChain
? 1 : 0);
8243 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo
*II
=
8244 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo
);
8245 if (!II
|| !II
->hasScalarOperand())
8248 unsigned SplatOp
= II
->ScalarOperand
+ 1 + HasChain
;
8249 assert(SplatOp
< Op
.getNumOperands());
8251 SmallVector
<SDValue
, 8> Operands(Op
->op_begin(), Op
->op_end());
8252 SDValue
&ScalarOp
= Operands
[SplatOp
];
8253 MVT OpVT
= ScalarOp
.getSimpleValueType();
8254 MVT XLenVT
= Subtarget
.getXLenVT();
8256 // If this isn't a scalar, or its type is XLenVT we're done.
8257 if (!OpVT
.isScalarInteger() || OpVT
== XLenVT
)
8260 // Simplest case is that the operand needs to be promoted to XLenVT.
8261 if (OpVT
.bitsLT(XLenVT
)) {
8262 // If the operand is a constant, sign extend to increase our chances
8263 // of being able to use a .vi instruction. ANY_EXTEND would become a
8264 // a zero extend and the simm5 check in isel would fail.
8265 // FIXME: Should we ignore the upper bits in isel instead?
8267 isa
<ConstantSDNode
>(ScalarOp
) ? ISD::SIGN_EXTEND
: ISD::ANY_EXTEND
;
8268 ScalarOp
= DAG
.getNode(ExtOpc
, DL
, XLenVT
, ScalarOp
);
8269 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
8272 // Use the previous operand to get the vXi64 VT. The result might be a mask
8273 // VT for compares. Using the previous operand assumes that the previous
8274 // operand will never have a smaller element size than a scalar operand and
8275 // that a widening operation never uses SEW=64.
8276 // NOTE: If this fails the below assert, we can probably just find the
8277 // element count from any operand or result and use it to construct the VT.
8278 assert(II
->ScalarOperand
> 0 && "Unexpected splat operand!");
8279 MVT VT
= Op
.getOperand(SplatOp
- 1).getSimpleValueType();
8281 // The more complex case is when the scalar is larger than XLenVT.
8282 assert(XLenVT
== MVT::i32
&& OpVT
== MVT::i64
&&
8283 VT
.getVectorElementType() == MVT::i64
&& "Unexpected VTs!");
8285 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8286 // instruction to sign-extend since SEW>XLEN.
8287 if (DAG
.ComputeNumSignBits(ScalarOp
) > 32) {
8288 ScalarOp
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, ScalarOp
);
8289 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
8293 case Intrinsic::riscv_vslide1up
:
8294 case Intrinsic::riscv_vslide1down
:
8295 case Intrinsic::riscv_vslide1up_mask
:
8296 case Intrinsic::riscv_vslide1down_mask
: {
8297 // We need to special case these when the scalar is larger than XLen.
8298 unsigned NumOps
= Op
.getNumOperands();
8299 bool IsMasked
= NumOps
== 7;
8301 // Convert the vector source to the equivalent nxvXi32 vector.
8302 MVT I32VT
= MVT::getVectorVT(MVT::i32
, VT
.getVectorElementCount() * 2);
8303 SDValue Vec
= DAG
.getBitcast(I32VT
, Operands
[2]);
8304 SDValue ScalarLo
, ScalarHi
;
8305 std::tie(ScalarLo
, ScalarHi
) =
8306 DAG
.SplitScalar(ScalarOp
, DL
, MVT::i32
, MVT::i32
);
8308 // Double the VL since we halved SEW.
8309 SDValue AVL
= getVLOperand(Op
);
8312 // Optimize for constant AVL
8313 if (isa
<ConstantSDNode
>(AVL
)) {
8314 const auto [MinVLMAX
, MaxVLMAX
] =
8315 RISCVTargetLowering::computeVLMAXBounds(VT
, Subtarget
);
8317 uint64_t AVLInt
= AVL
->getAsZExtVal();
8318 if (AVLInt
<= MinVLMAX
) {
8319 I32VL
= DAG
.getConstant(2 * AVLInt
, DL
, XLenVT
);
8320 } else if (AVLInt
>= 2 * MaxVLMAX
) {
8321 // Just set vl to VLMAX in this situation
8322 RISCVII::VLMUL Lmul
= RISCVTargetLowering::getLMUL(I32VT
);
8323 SDValue LMUL
= DAG
.getConstant(Lmul
, DL
, XLenVT
);
8324 unsigned Sew
= RISCVVType::encodeSEW(I32VT
.getScalarSizeInBits());
8325 SDValue SEW
= DAG
.getConstant(Sew
, DL
, XLenVT
);
8326 SDValue SETVLMAX
= DAG
.getTargetConstant(
8327 Intrinsic::riscv_vsetvlimax
, DL
, MVT::i32
);
8328 I32VL
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, SETVLMAX
, SEW
,
8331 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8332 // is related to the hardware implementation.
8333 // So let the following code handle
8337 RISCVII::VLMUL Lmul
= RISCVTargetLowering::getLMUL(VT
);
8338 SDValue LMUL
= DAG
.getConstant(Lmul
, DL
, XLenVT
);
8339 unsigned Sew
= RISCVVType::encodeSEW(VT
.getScalarSizeInBits());
8340 SDValue SEW
= DAG
.getConstant(Sew
, DL
, XLenVT
);
8342 DAG
.getTargetConstant(Intrinsic::riscv_vsetvli
, DL
, MVT::i32
);
8343 // Using vsetvli instruction to get actually used length which related to
8344 // the hardware implementation
8345 SDValue VL
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, SETVL
, AVL
,
8348 DAG
.getNode(ISD::SHL
, DL
, XLenVT
, VL
, DAG
.getConstant(1, DL
, XLenVT
));
8351 SDValue I32Mask
= getAllOnesMask(I32VT
, I32VL
, DL
, DAG
);
8353 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8357 Passthru
= DAG
.getUNDEF(I32VT
);
8359 Passthru
= DAG
.getBitcast(I32VT
, Operands
[1]);
8361 if (IntNo
== Intrinsic::riscv_vslide1up
||
8362 IntNo
== Intrinsic::riscv_vslide1up_mask
) {
8363 Vec
= DAG
.getNode(RISCVISD::VSLIDE1UP_VL
, DL
, I32VT
, Passthru
, Vec
,
8364 ScalarHi
, I32Mask
, I32VL
);
8365 Vec
= DAG
.getNode(RISCVISD::VSLIDE1UP_VL
, DL
, I32VT
, Passthru
, Vec
,
8366 ScalarLo
, I32Mask
, I32VL
);
8368 Vec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32VT
, Passthru
, Vec
,
8369 ScalarLo
, I32Mask
, I32VL
);
8370 Vec
= DAG
.getNode(RISCVISD::VSLIDE1DOWN_VL
, DL
, I32VT
, Passthru
, Vec
,
8371 ScalarHi
, I32Mask
, I32VL
);
8374 // Convert back to nxvXi64.
8375 Vec
= DAG
.getBitcast(VT
, Vec
);
8379 // Apply mask after the operation.
8380 SDValue Mask
= Operands
[NumOps
- 3];
8381 SDValue MaskedOff
= Operands
[1];
8382 // Assume Policy operand is the last operand.
8383 uint64_t Policy
= Operands
[NumOps
- 1]->getAsZExtVal();
8384 // We don't need to select maskedoff if it's undef.
8385 if (MaskedOff
.isUndef())
8388 if (Policy
== RISCVII::TAIL_AGNOSTIC
)
8389 return DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, VT
, Mask
, Vec
, MaskedOff
,
8390 DAG
.getUNDEF(VT
), AVL
);
8391 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8392 // It's fine because vmerge does not care mask policy.
8393 return DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, VT
, Mask
, Vec
, MaskedOff
,
8398 // We need to convert the scalar to a splat vector.
8399 SDValue VL
= getVLOperand(Op
);
8400 assert(VL
.getValueType() == XLenVT
);
8401 ScalarOp
= splatSplitI64WithVL(DL
, VT
, SDValue(), ScalarOp
, VL
, DAG
);
8402 return DAG
.getNode(Op
->getOpcode(), DL
, Op
->getVTList(), Operands
);
8405 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8406 // scalable vector llvm.get.vector.length for now.
8408 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
8409 // (vscale * VF). The vscale and VF are independent of element width. We use
8410 // SEW=8 for the vsetvli because it is the only element width that supports all
8411 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8412 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8413 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8414 // SEW and LMUL are better for the surrounding vector instructions.
8415 static SDValue
lowerGetVectorLength(SDNode
*N
, SelectionDAG
&DAG
,
8416 const RISCVSubtarget
&Subtarget
) {
8417 MVT XLenVT
= Subtarget
.getXLenVT();
8419 // The smallest LMUL is only valid for the smallest element width.
8420 const unsigned ElementWidth
= 8;
8422 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8423 unsigned LMul1VF
= RISCV::RVVBitsPerBlock
/ ElementWidth
;
8424 // We don't support VF==1 with ELEN==32.
8425 unsigned MinVF
= RISCV::RVVBitsPerBlock
/ Subtarget
.getELen();
8427 unsigned VF
= N
->getConstantOperandVal(2);
8428 assert(VF
>= MinVF
&& VF
<= (LMul1VF
* 8) && isPowerOf2_32(VF
) &&
8432 bool Fractional
= VF
< LMul1VF
;
8433 unsigned LMulVal
= Fractional
? LMul1VF
/ VF
: VF
/ LMul1VF
;
8434 unsigned VLMUL
= (unsigned)RISCVVType::encodeLMUL(LMulVal
, Fractional
);
8435 unsigned VSEW
= RISCVVType::encodeSEW(ElementWidth
);
8439 SDValue LMul
= DAG
.getTargetConstant(VLMUL
, DL
, XLenVT
);
8440 SDValue Sew
= DAG
.getTargetConstant(VSEW
, DL
, XLenVT
);
8442 SDValue AVL
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, XLenVT
, N
->getOperand(1));
8444 SDValue ID
= DAG
.getTargetConstant(Intrinsic::riscv_vsetvli
, DL
, XLenVT
);
8446 DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, XLenVT
, ID
, AVL
, Sew
, LMul
);
8447 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), Res
);
8450 static void getVCIXOperands(SDValue
&Op
, SelectionDAG
&DAG
,
8451 SmallVector
<SDValue
> &Ops
) {
8454 const RISCVSubtarget
&Subtarget
=
8455 DAG
.getMachineFunction().getSubtarget
<RISCVSubtarget
>();
8456 for (const SDValue
&V
: Op
->op_values()) {
8457 EVT ValType
= V
.getValueType();
8458 if (ValType
.isScalableVector() && ValType
.isFloatingPoint()) {
8460 MVT::getVectorVT(MVT::getIntegerVT(ValType
.getScalarSizeInBits()),
8461 ValType
.getVectorElementCount());
8462 Ops
.push_back(DAG
.getBitcast(InterimIVT
, V
));
8463 } else if (ValType
.isFixedLengthVector()) {
8464 MVT OpContainerVT
= getContainerForFixedLengthVector(
8465 DAG
, V
.getSimpleValueType(), Subtarget
);
8466 Ops
.push_back(convertToScalableVector(OpContainerVT
, V
, DAG
, Subtarget
));
8472 // LMUL * VLEN should be greater than or equal to EGS * SEW
8473 static inline bool isValidEGW(int EGS
, EVT VT
,
8474 const RISCVSubtarget
&Subtarget
) {
8475 return (Subtarget
.getRealMinVLen() *
8476 VT
.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock
>=
8477 EGS
* VT
.getScalarSizeInBits();
8480 SDValue
RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
8481 SelectionDAG
&DAG
) const {
8482 unsigned IntNo
= Op
.getConstantOperandVal(0);
8484 MVT XLenVT
= Subtarget
.getXLenVT();
8488 break; // Don't custom lower most intrinsics.
8489 case Intrinsic::thread_pointer
: {
8490 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
8491 return DAG
.getRegister(RISCV::X4
, PtrVT
);
8493 case Intrinsic::riscv_orc_b
:
8494 case Intrinsic::riscv_brev8
:
8495 case Intrinsic::riscv_sha256sig0
:
8496 case Intrinsic::riscv_sha256sig1
:
8497 case Intrinsic::riscv_sha256sum0
:
8498 case Intrinsic::riscv_sha256sum1
:
8499 case Intrinsic::riscv_sm3p0
:
8500 case Intrinsic::riscv_sm3p1
: {
8503 case Intrinsic::riscv_orc_b
: Opc
= RISCVISD::ORC_B
; break;
8504 case Intrinsic::riscv_brev8
: Opc
= RISCVISD::BREV8
; break;
8505 case Intrinsic::riscv_sha256sig0
: Opc
= RISCVISD::SHA256SIG0
; break;
8506 case Intrinsic::riscv_sha256sig1
: Opc
= RISCVISD::SHA256SIG1
; break;
8507 case Intrinsic::riscv_sha256sum0
: Opc
= RISCVISD::SHA256SUM0
; break;
8508 case Intrinsic::riscv_sha256sum1
: Opc
= RISCVISD::SHA256SUM1
; break;
8509 case Intrinsic::riscv_sm3p0
: Opc
= RISCVISD::SM3P0
; break;
8510 case Intrinsic::riscv_sm3p1
: Opc
= RISCVISD::SM3P1
; break;
8513 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8515 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8516 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp
);
8517 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8520 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1));
8522 case Intrinsic::riscv_sm4ks
:
8523 case Intrinsic::riscv_sm4ed
: {
8525 IntNo
== Intrinsic::riscv_sm4ks
? RISCVISD::SM4KS
: RISCVISD::SM4ED
;
8527 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8529 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8531 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8533 DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
, Op
.getOperand(3));
8534 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8537 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1), Op
.getOperand(2),
8540 case Intrinsic::riscv_zip
:
8541 case Intrinsic::riscv_unzip
: {
8543 IntNo
== Intrinsic::riscv_zip
? RISCVISD::ZIP
: RISCVISD::UNZIP
;
8544 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1));
8546 case Intrinsic::riscv_clmul
:
8547 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8549 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8551 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8552 SDValue Res
= DAG
.getNode(RISCVISD::CLMUL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
8553 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8555 return DAG
.getNode(RISCVISD::CLMUL
, DL
, XLenVT
, Op
.getOperand(1),
8557 case Intrinsic::riscv_clmulh
:
8558 case Intrinsic::riscv_clmulr
: {
8560 IntNo
== Intrinsic::riscv_clmulh
? RISCVISD::CLMULH
: RISCVISD::CLMULR
;
8561 if (RV64LegalI32
&& Subtarget
.is64Bit() && Op
.getValueType() == MVT::i32
) {
8563 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(1));
8565 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2));
8566 NewOp0
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
,
8567 DAG
.getConstant(32, DL
, MVT::i64
));
8568 NewOp1
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp1
,
8569 DAG
.getConstant(32, DL
, MVT::i64
));
8570 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
);
8571 Res
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Res
,
8572 DAG
.getConstant(32, DL
, MVT::i64
));
8573 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
);
8576 return DAG
.getNode(Opc
, DL
, XLenVT
, Op
.getOperand(1), Op
.getOperand(2));
8578 case Intrinsic::experimental_get_vector_length
:
8579 return lowerGetVectorLength(Op
.getNode(), DAG
, Subtarget
);
8580 case Intrinsic::riscv_vmv_x_s
: {
8581 SDValue Res
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Op
.getOperand(1));
8582 return DAG
.getNode(ISD::TRUNCATE
, DL
, Op
.getValueType(), Res
);
8584 case Intrinsic::riscv_vfmv_f_s
:
8585 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, Op
.getValueType(),
8586 Op
.getOperand(1), DAG
.getConstant(0, DL
, XLenVT
));
8587 case Intrinsic::riscv_vmv_v_x
:
8588 return lowerScalarSplat(Op
.getOperand(1), Op
.getOperand(2),
8589 Op
.getOperand(3), Op
.getSimpleValueType(), DL
, DAG
,
8591 case Intrinsic::riscv_vfmv_v_f
:
8592 return DAG
.getNode(RISCVISD::VFMV_V_F_VL
, DL
, Op
.getValueType(),
8593 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
8594 case Intrinsic::riscv_vmv_s_x
: {
8595 SDValue Scalar
= Op
.getOperand(2);
8597 if (Scalar
.getValueType().bitsLE(XLenVT
)) {
8598 Scalar
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, Scalar
);
8599 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, Op
.getValueType(),
8600 Op
.getOperand(1), Scalar
, Op
.getOperand(3));
8603 assert(Scalar
.getValueType() == MVT::i64
&& "Unexpected scalar VT!");
8605 // This is an i64 value that lives in two scalar registers. We have to
8606 // insert this in a convoluted way. First we build vXi64 splat containing
8607 // the two values that we assemble using some bit math. Next we'll use
8608 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8609 // to merge element 0 from our splat into the source vector.
8610 // FIXME: This is probably not the best way to do this, but it is
8611 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8618 // vmseq.vx mMask, vVid, 0
8619 // vmerge.vvm vDest, vSrc, vVal, mMask
8620 MVT VT
= Op
.getSimpleValueType();
8621 SDValue Vec
= Op
.getOperand(1);
8622 SDValue VL
= getVLOperand(Op
);
8624 SDValue SplattedVal
= splatSplitI64WithVL(DL
, VT
, SDValue(), Scalar
, VL
, DAG
);
8625 if (Op
.getOperand(1).isUndef())
8627 SDValue SplattedIdx
=
8628 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
8629 DAG
.getConstant(0, DL
, MVT::i32
), VL
);
8631 MVT MaskVT
= getMaskTypeFor(VT
);
8632 SDValue Mask
= getAllOnesMask(VT
, VL
, DL
, DAG
);
8633 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VT
, Mask
, VL
);
8634 SDValue SelectCond
=
8635 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskVT
,
8636 {VID
, SplattedIdx
, DAG
.getCondCode(ISD::SETEQ
),
8637 DAG
.getUNDEF(MaskVT
), Mask
, VL
});
8638 return DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, VT
, SelectCond
, SplattedVal
,
8639 Vec
, DAG
.getUNDEF(VT
), VL
);
8641 case Intrinsic::riscv_vfmv_s_f
:
8642 return DAG
.getNode(RISCVISD::VFMV_S_F_VL
, DL
, Op
.getSimpleValueType(),
8643 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
8644 // EGS * EEW >= 128 bits
8645 case Intrinsic::riscv_vaesdf_vv
:
8646 case Intrinsic::riscv_vaesdf_vs
:
8647 case Intrinsic::riscv_vaesdm_vv
:
8648 case Intrinsic::riscv_vaesdm_vs
:
8649 case Intrinsic::riscv_vaesef_vv
:
8650 case Intrinsic::riscv_vaesef_vs
:
8651 case Intrinsic::riscv_vaesem_vv
:
8652 case Intrinsic::riscv_vaesem_vs
:
8653 case Intrinsic::riscv_vaeskf1
:
8654 case Intrinsic::riscv_vaeskf2
:
8655 case Intrinsic::riscv_vaesz_vs
:
8656 case Intrinsic::riscv_vsm4k
:
8657 case Intrinsic::riscv_vsm4r_vv
:
8658 case Intrinsic::riscv_vsm4r_vs
: {
8659 if (!isValidEGW(4, Op
.getSimpleValueType(), Subtarget
) ||
8660 !isValidEGW(4, Op
->getOperand(1).getSimpleValueType(), Subtarget
) ||
8661 !isValidEGW(4, Op
->getOperand(2).getSimpleValueType(), Subtarget
))
8662 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8665 // EGS * EEW >= 256 bits
8666 case Intrinsic::riscv_vsm3c
:
8667 case Intrinsic::riscv_vsm3me
: {
8668 if (!isValidEGW(8, Op
.getSimpleValueType(), Subtarget
) ||
8669 !isValidEGW(8, Op
->getOperand(1).getSimpleValueType(), Subtarget
))
8670 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
8673 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
8674 case Intrinsic::riscv_vsha2ch
:
8675 case Intrinsic::riscv_vsha2cl
:
8676 case Intrinsic::riscv_vsha2ms
: {
8677 if (Op
->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
8678 !Subtarget
.hasStdExtZvknhb())
8679 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
8680 if (!isValidEGW(4, Op
.getSimpleValueType(), Subtarget
) ||
8681 !isValidEGW(4, Op
->getOperand(1).getSimpleValueType(), Subtarget
) ||
8682 !isValidEGW(4, Op
->getOperand(2).getSimpleValueType(), Subtarget
))
8683 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8686 case Intrinsic::riscv_sf_vc_v_x
:
8687 case Intrinsic::riscv_sf_vc_v_i
:
8688 case Intrinsic::riscv_sf_vc_v_xv
:
8689 case Intrinsic::riscv_sf_vc_v_iv
:
8690 case Intrinsic::riscv_sf_vc_v_vv
:
8691 case Intrinsic::riscv_sf_vc_v_fv
:
8692 case Intrinsic::riscv_sf_vc_v_xvv
:
8693 case Intrinsic::riscv_sf_vc_v_ivv
:
8694 case Intrinsic::riscv_sf_vc_v_vvv
:
8695 case Intrinsic::riscv_sf_vc_v_fvv
:
8696 case Intrinsic::riscv_sf_vc_v_xvw
:
8697 case Intrinsic::riscv_sf_vc_v_ivw
:
8698 case Intrinsic::riscv_sf_vc_v_vvw
:
8699 case Intrinsic::riscv_sf_vc_v_fvw
: {
8700 MVT VT
= Op
.getSimpleValueType();
8702 SmallVector
<SDValue
> Ops
;
8703 getVCIXOperands(Op
, DAG
, Ops
);
8706 if (VT
.isFixedLengthVector())
8707 RetVT
= getContainerForFixedLengthVector(VT
);
8708 else if (VT
.isFloatingPoint())
8709 RetVT
= MVT::getVectorVT(MVT::getIntegerVT(VT
.getScalarSizeInBits()),
8710 VT
.getVectorElementCount());
8712 SDValue NewNode
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, DL
, RetVT
, Ops
);
8714 if (VT
.isFixedLengthVector())
8715 NewNode
= convertFromScalableVector(VT
, NewNode
, DAG
, Subtarget
);
8716 else if (VT
.isFloatingPoint())
8717 NewNode
= DAG
.getBitcast(VT
, NewNode
);
8726 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
8729 SDValue
RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op
,
8730 SelectionDAG
&DAG
) const {
8731 unsigned IntNo
= Op
.getConstantOperandVal(1);
8735 case Intrinsic::riscv_masked_strided_load
: {
8737 MVT XLenVT
= Subtarget
.getXLenVT();
8739 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8740 // the selection of the masked intrinsics doesn't do this for us.
8741 SDValue Mask
= Op
.getOperand(5);
8742 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
8744 MVT VT
= Op
->getSimpleValueType(0);
8745 MVT ContainerVT
= VT
;
8746 if (VT
.isFixedLengthVector())
8747 ContainerVT
= getContainerForFixedLengthVector(VT
);
8749 SDValue PassThru
= Op
.getOperand(2);
8751 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
8752 if (VT
.isFixedLengthVector()) {
8753 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
8754 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
8758 auto *Load
= cast
<MemIntrinsicSDNode
>(Op
);
8759 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
8760 SDValue Ptr
= Op
.getOperand(3);
8761 SDValue Stride
= Op
.getOperand(4);
8762 SDValue Result
, Chain
;
8764 // TODO: We restrict this to unmasked loads currently in consideration of
8765 // the complexity of hanlding all falses masks.
8766 if (IsUnmasked
&& isNullConstant(Stride
)) {
8767 MVT ScalarVT
= ContainerVT
.getVectorElementType();
8768 SDValue ScalarLoad
=
8769 DAG
.getExtLoad(ISD::ZEXTLOAD
, DL
, XLenVT
, Load
->getChain(), Ptr
,
8770 ScalarVT
, Load
->getMemOperand());
8771 Chain
= ScalarLoad
.getValue(1);
8772 Result
= lowerScalarSplat(SDValue(), ScalarLoad
, VL
, ContainerVT
, DL
, DAG
,
8775 SDValue IntID
= DAG
.getTargetConstant(
8776 IsUnmasked
? Intrinsic::riscv_vlse
: Intrinsic::riscv_vlse_mask
, DL
,
8779 SmallVector
<SDValue
, 8> Ops
{Load
->getChain(), IntID
};
8781 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
8783 Ops
.push_back(PassThru
);
8785 Ops
.push_back(Stride
);
8787 Ops
.push_back(Mask
);
8791 DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
8792 Ops
.push_back(Policy
);
8795 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
8797 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
8798 Load
->getMemoryVT(), Load
->getMemOperand());
8799 Chain
= Result
.getValue(1);
8801 if (VT
.isFixedLengthVector())
8802 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
8803 return DAG
.getMergeValues({Result
, Chain
}, DL
);
8805 case Intrinsic::riscv_seg2_load
:
8806 case Intrinsic::riscv_seg3_load
:
8807 case Intrinsic::riscv_seg4_load
:
8808 case Intrinsic::riscv_seg5_load
:
8809 case Intrinsic::riscv_seg6_load
:
8810 case Intrinsic::riscv_seg7_load
:
8811 case Intrinsic::riscv_seg8_load
: {
8813 static const Intrinsic::ID VlsegInts
[7] = {
8814 Intrinsic::riscv_vlseg2
, Intrinsic::riscv_vlseg3
,
8815 Intrinsic::riscv_vlseg4
, Intrinsic::riscv_vlseg5
,
8816 Intrinsic::riscv_vlseg6
, Intrinsic::riscv_vlseg7
,
8817 Intrinsic::riscv_vlseg8
};
8818 unsigned NF
= Op
->getNumValues() - 1;
8819 assert(NF
>= 2 && NF
<= 8 && "Unexpected seg number");
8820 MVT XLenVT
= Subtarget
.getXLenVT();
8821 MVT VT
= Op
->getSimpleValueType(0);
8822 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
8824 SDValue VL
= getVLOp(VT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
8826 SDValue IntID
= DAG
.getTargetConstant(VlsegInts
[NF
- 2], DL
, XLenVT
);
8827 auto *Load
= cast
<MemIntrinsicSDNode
>(Op
);
8828 SmallVector
<EVT
, 9> ContainerVTs(NF
, ContainerVT
);
8829 ContainerVTs
.push_back(MVT::Other
);
8830 SDVTList VTs
= DAG
.getVTList(ContainerVTs
);
8831 SmallVector
<SDValue
, 12> Ops
= {Load
->getChain(), IntID
};
8832 Ops
.insert(Ops
.end(), NF
, DAG
.getUNDEF(ContainerVT
));
8833 Ops
.push_back(Op
.getOperand(2));
8836 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
8837 Load
->getMemoryVT(), Load
->getMemOperand());
8838 SmallVector
<SDValue
, 9> Results
;
8839 for (unsigned int RetIdx
= 0; RetIdx
< NF
; RetIdx
++)
8840 Results
.push_back(convertFromScalableVector(VT
, Result
.getValue(RetIdx
),
8842 Results
.push_back(Result
.getValue(NF
));
8843 return DAG
.getMergeValues(Results
, DL
);
8845 case Intrinsic::riscv_sf_vc_v_x_se
:
8846 case Intrinsic::riscv_sf_vc_v_i_se
:
8847 case Intrinsic::riscv_sf_vc_v_xv_se
:
8848 case Intrinsic::riscv_sf_vc_v_iv_se
:
8849 case Intrinsic::riscv_sf_vc_v_vv_se
:
8850 case Intrinsic::riscv_sf_vc_v_fv_se
:
8851 case Intrinsic::riscv_sf_vc_v_xvv_se
:
8852 case Intrinsic::riscv_sf_vc_v_ivv_se
:
8853 case Intrinsic::riscv_sf_vc_v_vvv_se
:
8854 case Intrinsic::riscv_sf_vc_v_fvv_se
:
8855 case Intrinsic::riscv_sf_vc_v_xvw_se
:
8856 case Intrinsic::riscv_sf_vc_v_ivw_se
:
8857 case Intrinsic::riscv_sf_vc_v_vvw_se
:
8858 case Intrinsic::riscv_sf_vc_v_fvw_se
: {
8859 MVT VT
= Op
.getSimpleValueType();
8861 SmallVector
<SDValue
> Ops
;
8862 getVCIXOperands(Op
, DAG
, Ops
);
8865 if (VT
.isFixedLengthVector())
8866 RetVT
= getContainerForFixedLengthVector(VT
);
8867 else if (VT
.isFloatingPoint())
8868 RetVT
= MVT::getVectorVT(MVT::getIntegerVT(RetVT
.getScalarSizeInBits()),
8869 RetVT
.getVectorElementCount());
8871 SDVTList VTs
= DAG
.getVTList({RetVT
, MVT::Other
});
8872 SDValue NewNode
= DAG
.getNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
);
8874 if (VT
.isFixedLengthVector()) {
8875 SDValue FixedVector
=
8876 convertFromScalableVector(VT
, NewNode
, DAG
, Subtarget
);
8877 NewNode
= DAG
.getMergeValues({FixedVector
, NewNode
.getValue(1)}, DL
);
8878 } else if (VT
.isFloatingPoint()) {
8879 SDValue BitCast
= DAG
.getBitcast(VT
, NewNode
.getValue(0));
8880 NewNode
= DAG
.getMergeValues({BitCast
, NewNode
.getValue(1)}, DL
);
8890 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
8893 SDValue
RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
8894 SelectionDAG
&DAG
) const {
8895 unsigned IntNo
= Op
.getConstantOperandVal(1);
8899 case Intrinsic::riscv_masked_strided_store
: {
8901 MVT XLenVT
= Subtarget
.getXLenVT();
8903 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8904 // the selection of the masked intrinsics doesn't do this for us.
8905 SDValue Mask
= Op
.getOperand(5);
8906 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
8908 SDValue Val
= Op
.getOperand(2);
8909 MVT VT
= Val
.getSimpleValueType();
8910 MVT ContainerVT
= VT
;
8911 if (VT
.isFixedLengthVector()) {
8912 ContainerVT
= getContainerForFixedLengthVector(VT
);
8913 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
8916 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
8917 if (VT
.isFixedLengthVector())
8918 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
8921 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
8923 SDValue IntID
= DAG
.getTargetConstant(
8924 IsUnmasked
? Intrinsic::riscv_vsse
: Intrinsic::riscv_vsse_mask
, DL
,
8927 auto *Store
= cast
<MemIntrinsicSDNode
>(Op
);
8928 SmallVector
<SDValue
, 8> Ops
{Store
->getChain(), IntID
};
8930 Ops
.push_back(Op
.getOperand(3)); // Ptr
8931 Ops
.push_back(Op
.getOperand(4)); // Stride
8933 Ops
.push_back(Mask
);
8936 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
, Store
->getVTList(),
8937 Ops
, Store
->getMemoryVT(),
8938 Store
->getMemOperand());
8940 case Intrinsic::riscv_seg2_store
:
8941 case Intrinsic::riscv_seg3_store
:
8942 case Intrinsic::riscv_seg4_store
:
8943 case Intrinsic::riscv_seg5_store
:
8944 case Intrinsic::riscv_seg6_store
:
8945 case Intrinsic::riscv_seg7_store
:
8946 case Intrinsic::riscv_seg8_store
: {
8948 static const Intrinsic::ID VssegInts
[] = {
8949 Intrinsic::riscv_vsseg2
, Intrinsic::riscv_vsseg3
,
8950 Intrinsic::riscv_vsseg4
, Intrinsic::riscv_vsseg5
,
8951 Intrinsic::riscv_vsseg6
, Intrinsic::riscv_vsseg7
,
8952 Intrinsic::riscv_vsseg8
};
8953 // Operands are (chain, int_id, vec*, ptr, vl)
8954 unsigned NF
= Op
->getNumOperands() - 4;
8955 assert(NF
>= 2 && NF
<= 8 && "Unexpected seg number");
8956 MVT XLenVT
= Subtarget
.getXLenVT();
8957 MVT VT
= Op
->getOperand(2).getSimpleValueType();
8958 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
8960 SDValue VL
= getVLOp(VT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
8962 SDValue IntID
= DAG
.getTargetConstant(VssegInts
[NF
- 2], DL
, XLenVT
);
8963 SDValue Ptr
= Op
->getOperand(NF
+ 2);
8965 auto *FixedIntrinsic
= cast
<MemIntrinsicSDNode
>(Op
);
8966 SmallVector
<SDValue
, 12> Ops
= {FixedIntrinsic
->getChain(), IntID
};
8967 for (unsigned i
= 0; i
< NF
; i
++)
8968 Ops
.push_back(convertToScalableVector(
8969 ContainerVT
, FixedIntrinsic
->getOperand(2 + i
), DAG
, Subtarget
));
8970 Ops
.append({Ptr
, VL
});
8972 return DAG
.getMemIntrinsicNode(
8973 ISD::INTRINSIC_VOID
, DL
, DAG
.getVTList(MVT::Other
), Ops
,
8974 FixedIntrinsic
->getMemoryVT(), FixedIntrinsic
->getMemOperand());
8976 case Intrinsic::riscv_sf_vc_x_se_e8mf8
:
8977 case Intrinsic::riscv_sf_vc_x_se_e8mf4
:
8978 case Intrinsic::riscv_sf_vc_x_se_e8mf2
:
8979 case Intrinsic::riscv_sf_vc_x_se_e8m1
:
8980 case Intrinsic::riscv_sf_vc_x_se_e8m2
:
8981 case Intrinsic::riscv_sf_vc_x_se_e8m4
:
8982 case Intrinsic::riscv_sf_vc_x_se_e8m8
:
8983 case Intrinsic::riscv_sf_vc_x_se_e16mf4
:
8984 case Intrinsic::riscv_sf_vc_x_se_e16mf2
:
8985 case Intrinsic::riscv_sf_vc_x_se_e16m1
:
8986 case Intrinsic::riscv_sf_vc_x_se_e16m2
:
8987 case Intrinsic::riscv_sf_vc_x_se_e16m4
:
8988 case Intrinsic::riscv_sf_vc_x_se_e16m8
:
8989 case Intrinsic::riscv_sf_vc_x_se_e32mf2
:
8990 case Intrinsic::riscv_sf_vc_x_se_e32m1
:
8991 case Intrinsic::riscv_sf_vc_x_se_e32m2
:
8992 case Intrinsic::riscv_sf_vc_x_se_e32m4
:
8993 case Intrinsic::riscv_sf_vc_x_se_e32m8
:
8994 case Intrinsic::riscv_sf_vc_x_se_e64m1
:
8995 case Intrinsic::riscv_sf_vc_x_se_e64m2
:
8996 case Intrinsic::riscv_sf_vc_x_se_e64m4
:
8997 case Intrinsic::riscv_sf_vc_x_se_e64m8
:
8998 case Intrinsic::riscv_sf_vc_i_se_e8mf8
:
8999 case Intrinsic::riscv_sf_vc_i_se_e8mf4
:
9000 case Intrinsic::riscv_sf_vc_i_se_e8mf2
:
9001 case Intrinsic::riscv_sf_vc_i_se_e8m1
:
9002 case Intrinsic::riscv_sf_vc_i_se_e8m2
:
9003 case Intrinsic::riscv_sf_vc_i_se_e8m4
:
9004 case Intrinsic::riscv_sf_vc_i_se_e8m8
:
9005 case Intrinsic::riscv_sf_vc_i_se_e16mf4
:
9006 case Intrinsic::riscv_sf_vc_i_se_e16mf2
:
9007 case Intrinsic::riscv_sf_vc_i_se_e16m1
:
9008 case Intrinsic::riscv_sf_vc_i_se_e16m2
:
9009 case Intrinsic::riscv_sf_vc_i_se_e16m4
:
9010 case Intrinsic::riscv_sf_vc_i_se_e16m8
:
9011 case Intrinsic::riscv_sf_vc_i_se_e32mf2
:
9012 case Intrinsic::riscv_sf_vc_i_se_e32m1
:
9013 case Intrinsic::riscv_sf_vc_i_se_e32m2
:
9014 case Intrinsic::riscv_sf_vc_i_se_e32m4
:
9015 case Intrinsic::riscv_sf_vc_i_se_e32m8
:
9016 case Intrinsic::riscv_sf_vc_i_se_e64m1
:
9017 case Intrinsic::riscv_sf_vc_i_se_e64m2
:
9018 case Intrinsic::riscv_sf_vc_i_se_e64m4
:
9019 case Intrinsic::riscv_sf_vc_i_se_e64m8
:
9020 case Intrinsic::riscv_sf_vc_xv_se
:
9021 case Intrinsic::riscv_sf_vc_iv_se
:
9022 case Intrinsic::riscv_sf_vc_vv_se
:
9023 case Intrinsic::riscv_sf_vc_fv_se
:
9024 case Intrinsic::riscv_sf_vc_xvv_se
:
9025 case Intrinsic::riscv_sf_vc_ivv_se
:
9026 case Intrinsic::riscv_sf_vc_vvv_se
:
9027 case Intrinsic::riscv_sf_vc_fvv_se
:
9028 case Intrinsic::riscv_sf_vc_xvw_se
:
9029 case Intrinsic::riscv_sf_vc_ivw_se
:
9030 case Intrinsic::riscv_sf_vc_vvw_se
:
9031 case Intrinsic::riscv_sf_vc_fvw_se
: {
9032 SmallVector
<SDValue
> Ops
;
9033 getVCIXOperands(Op
, DAG
, Ops
);
9036 DAG
.getNode(ISD::INTRINSIC_VOID
, SDLoc(Op
), Op
->getVTList(), Ops
);
9045 return lowerVectorIntrinsicScalars(Op
, DAG
, Subtarget
);
9048 static unsigned getRVVReductionOp(unsigned ISDOpcode
) {
9049 switch (ISDOpcode
) {
9051 llvm_unreachable("Unhandled reduction");
9052 case ISD::VP_REDUCE_ADD
:
9053 case ISD::VECREDUCE_ADD
:
9054 return RISCVISD::VECREDUCE_ADD_VL
;
9055 case ISD::VP_REDUCE_UMAX
:
9056 case ISD::VECREDUCE_UMAX
:
9057 return RISCVISD::VECREDUCE_UMAX_VL
;
9058 case ISD::VP_REDUCE_SMAX
:
9059 case ISD::VECREDUCE_SMAX
:
9060 return RISCVISD::VECREDUCE_SMAX_VL
;
9061 case ISD::VP_REDUCE_UMIN
:
9062 case ISD::VECREDUCE_UMIN
:
9063 return RISCVISD::VECREDUCE_UMIN_VL
;
9064 case ISD::VP_REDUCE_SMIN
:
9065 case ISD::VECREDUCE_SMIN
:
9066 return RISCVISD::VECREDUCE_SMIN_VL
;
9067 case ISD::VP_REDUCE_AND
:
9068 case ISD::VECREDUCE_AND
:
9069 return RISCVISD::VECREDUCE_AND_VL
;
9070 case ISD::VP_REDUCE_OR
:
9071 case ISD::VECREDUCE_OR
:
9072 return RISCVISD::VECREDUCE_OR_VL
;
9073 case ISD::VP_REDUCE_XOR
:
9074 case ISD::VECREDUCE_XOR
:
9075 return RISCVISD::VECREDUCE_XOR_VL
;
9076 case ISD::VP_REDUCE_FADD
:
9077 return RISCVISD::VECREDUCE_FADD_VL
;
9078 case ISD::VP_REDUCE_SEQ_FADD
:
9079 return RISCVISD::VECREDUCE_SEQ_FADD_VL
;
9080 case ISD::VP_REDUCE_FMAX
:
9081 return RISCVISD::VECREDUCE_FMAX_VL
;
9082 case ISD::VP_REDUCE_FMIN
:
9083 return RISCVISD::VECREDUCE_FMIN_VL
;
9088 SDValue
RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op
,
9092 SDValue Vec
= Op
.getOperand(IsVP
? 1 : 0);
9093 MVT VecVT
= Vec
.getSimpleValueType();
9094 assert((Op
.getOpcode() == ISD::VECREDUCE_AND
||
9095 Op
.getOpcode() == ISD::VECREDUCE_OR
||
9096 Op
.getOpcode() == ISD::VECREDUCE_XOR
||
9097 Op
.getOpcode() == ISD::VP_REDUCE_AND
||
9098 Op
.getOpcode() == ISD::VP_REDUCE_OR
||
9099 Op
.getOpcode() == ISD::VP_REDUCE_XOR
) &&
9100 "Unexpected reduction lowering");
9102 MVT XLenVT
= Subtarget
.getXLenVT();
9104 MVT ContainerVT
= VecVT
;
9105 if (VecVT
.isFixedLengthVector()) {
9106 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9107 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9112 Mask
= Op
.getOperand(2);
9113 VL
= Op
.getOperand(3);
9115 std::tie(Mask
, VL
) =
9116 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
9121 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
9123 switch (Op
.getOpcode()) {
9125 llvm_unreachable("Unhandled reduction");
9126 case ISD::VECREDUCE_AND
:
9127 case ISD::VP_REDUCE_AND
: {
9129 SDValue TrueMask
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
9130 Vec
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Vec
, TrueMask
, VL
);
9131 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
9136 case ISD::VECREDUCE_OR
:
9137 case ISD::VP_REDUCE_OR
:
9139 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
9143 case ISD::VECREDUCE_XOR
:
9144 case ISD::VP_REDUCE_XOR
: {
9145 // ((vcpop x) & 1) != 0
9146 SDValue One
= DAG
.getConstant(1, DL
, XLenVT
);
9147 Vec
= DAG
.getNode(RISCVISD::VCPOP_VL
, DL
, XLenVT
, Vec
, Mask
, VL
);
9148 Vec
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Vec
, One
);
9155 SDValue SetCC
= DAG
.getSetCC(DL
, XLenVT
, Vec
, Zero
, CC
);
9156 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, Op
.getValueType(), SetCC
);
9161 // Now include the start value in the operation.
9162 // Note that we must return the start value when no elements are operated
9163 // upon. The vcpop instructions we've emitted in each case above will return
9164 // 0 for an inactive vector, and so we've already received the neutral value:
9165 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9166 // can simply include the start value.
9167 return DAG
.getNode(BaseOpc
, DL
, Op
.getValueType(), SetCC
, Op
.getOperand(0));
9170 static bool isNonZeroAVL(SDValue AVL
) {
9171 auto *RegisterAVL
= dyn_cast
<RegisterSDNode
>(AVL
);
9172 auto *ImmAVL
= dyn_cast
<ConstantSDNode
>(AVL
);
9173 return (RegisterAVL
&& RegisterAVL
->getReg() == RISCV::X0
) ||
9174 (ImmAVL
&& ImmAVL
->getZExtValue() >= 1);
9177 /// Helper to lower a reduction sequence of the form:
9178 /// scalar = reduce_op vec, scalar_start
9179 static SDValue
lowerReductionSeq(unsigned RVVOpcode
, MVT ResVT
,
9180 SDValue StartValue
, SDValue Vec
, SDValue Mask
,
9181 SDValue VL
, const SDLoc
&DL
, SelectionDAG
&DAG
,
9182 const RISCVSubtarget
&Subtarget
) {
9183 const MVT VecVT
= Vec
.getSimpleValueType();
9184 const MVT M1VT
= getLMUL1VT(VecVT
);
9185 const MVT XLenVT
= Subtarget
.getXLenVT();
9186 const bool NonZeroAVL
= isNonZeroAVL(VL
);
9188 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9189 // or the original VT if fractional.
9190 auto InnerVT
= VecVT
.bitsLE(M1VT
) ? VecVT
: M1VT
;
9191 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9192 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9193 // be the result of the reduction operation.
9194 auto InnerVL
= NonZeroAVL
? VL
: DAG
.getConstant(1, DL
, XLenVT
);
9195 SDValue InitialValue
= lowerScalarInsert(StartValue
, InnerVL
, InnerVT
, DL
,
9197 if (M1VT
!= InnerVT
)
9198 InitialValue
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, M1VT
,
9200 InitialValue
, DAG
.getConstant(0, DL
, XLenVT
));
9201 SDValue PassThru
= NonZeroAVL
? DAG
.getUNDEF(M1VT
) : InitialValue
;
9202 SDValue Policy
= DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
9203 SDValue Ops
[] = {PassThru
, Vec
, InitialValue
, Mask
, VL
, Policy
};
9204 SDValue Reduction
= DAG
.getNode(RVVOpcode
, DL
, M1VT
, Ops
);
9205 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ResVT
, Reduction
,
9206 DAG
.getConstant(0, DL
, XLenVT
));
9209 SDValue
RISCVTargetLowering::lowerVECREDUCE(SDValue Op
,
9210 SelectionDAG
&DAG
) const {
9212 SDValue Vec
= Op
.getOperand(0);
9213 EVT VecEVT
= Vec
.getValueType();
9215 unsigned BaseOpc
= ISD::getVecReduceBaseOpcode(Op
.getOpcode());
9217 // Due to ordering in legalize types we may have a vector type that needs to
9218 // be split. Do that manually so we can get down to a legal type.
9219 while (getTypeAction(*DAG
.getContext(), VecEVT
) ==
9220 TargetLowering::TypeSplitVector
) {
9221 auto [Lo
, Hi
] = DAG
.SplitVector(Vec
, DL
);
9222 VecEVT
= Lo
.getValueType();
9223 Vec
= DAG
.getNode(BaseOpc
, DL
, VecEVT
, Lo
, Hi
);
9226 // TODO: The type may need to be widened rather than split. Or widened before
9228 if (!isTypeLegal(VecEVT
))
9231 MVT VecVT
= VecEVT
.getSimpleVT();
9232 MVT VecEltVT
= VecVT
.getVectorElementType();
9233 unsigned RVVOpcode
= getRVVReductionOp(Op
.getOpcode());
9235 MVT ContainerVT
= VecVT
;
9236 if (VecVT
.isFixedLengthVector()) {
9237 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9238 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9241 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
9243 SDValue StartV
= DAG
.getNeutralElement(BaseOpc
, DL
, VecEltVT
, SDNodeFlags());
9251 MVT XLenVT
= Subtarget
.getXLenVT();
9252 StartV
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VecEltVT
, Vec
,
9253 DAG
.getConstant(0, DL
, XLenVT
));
9255 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), StartV
, Vec
,
9256 Mask
, VL
, DL
, DAG
, Subtarget
);
9259 // Given a reduction op, this function returns the matching reduction opcode,
9260 // the vector SDValue and the scalar SDValue required to lower this to a
9262 static std::tuple
<unsigned, SDValue
, SDValue
>
9263 getRVVFPReductionOpAndOperands(SDValue Op
, SelectionDAG
&DAG
, EVT EltVT
,
9264 const RISCVSubtarget
&Subtarget
) {
9266 auto Flags
= Op
->getFlags();
9267 unsigned Opcode
= Op
.getOpcode();
9270 llvm_unreachable("Unhandled reduction");
9271 case ISD::VECREDUCE_FADD
: {
9272 // Use positive zero if we can. It is cheaper to materialize.
9274 DAG
.getConstantFP(Flags
.hasNoSignedZeros() ? 0.0 : -0.0, DL
, EltVT
);
9275 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL
, Op
.getOperand(0), Zero
);
9277 case ISD::VECREDUCE_SEQ_FADD
:
9278 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL
, Op
.getOperand(1),
9280 case ISD::VECREDUCE_FMIN
:
9281 case ISD::VECREDUCE_FMAX
: {
9282 MVT XLenVT
= Subtarget
.getXLenVT();
9284 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Op
.getOperand(0),
9285 DAG
.getConstant(0, DL
, XLenVT
));
9286 unsigned RVVOpc
= (Opcode
== ISD::VECREDUCE_FMIN
)
9287 ? RISCVISD::VECREDUCE_FMIN_VL
9288 : RISCVISD::VECREDUCE_FMAX_VL
;
9289 return std::make_tuple(RVVOpc
, Op
.getOperand(0), Front
);
9294 SDValue
RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op
,
9295 SelectionDAG
&DAG
) const {
9297 MVT VecEltVT
= Op
.getSimpleValueType();
9300 SDValue VectorVal
, ScalarVal
;
9301 std::tie(RVVOpcode
, VectorVal
, ScalarVal
) =
9302 getRVVFPReductionOpAndOperands(Op
, DAG
, VecEltVT
, Subtarget
);
9303 MVT VecVT
= VectorVal
.getSimpleValueType();
9305 MVT ContainerVT
= VecVT
;
9306 if (VecVT
.isFixedLengthVector()) {
9307 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9308 VectorVal
= convertToScalableVector(ContainerVT
, VectorVal
, DAG
, Subtarget
);
9311 auto [Mask
, VL
] = getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
);
9312 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), ScalarVal
,
9313 VectorVal
, Mask
, VL
, DL
, DAG
, Subtarget
);
9316 SDValue
RISCVTargetLowering::lowerVPREDUCE(SDValue Op
,
9317 SelectionDAG
&DAG
) const {
9319 SDValue Vec
= Op
.getOperand(1);
9320 EVT VecEVT
= Vec
.getValueType();
9322 // TODO: The type may need to be widened rather than split. Or widened before
9324 if (!isTypeLegal(VecEVT
))
9327 MVT VecVT
= VecEVT
.getSimpleVT();
9328 unsigned RVVOpcode
= getRVVReductionOp(Op
.getOpcode());
9330 if (VecVT
.isFixedLengthVector()) {
9331 auto ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9332 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9335 SDValue VL
= Op
.getOperand(3);
9336 SDValue Mask
= Op
.getOperand(2);
9337 return lowerReductionSeq(RVVOpcode
, Op
.getSimpleValueType(), Op
.getOperand(0),
9338 Vec
, Mask
, VL
, DL
, DAG
, Subtarget
);
9341 SDValue
RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op
,
9342 SelectionDAG
&DAG
) const {
9343 SDValue Vec
= Op
.getOperand(0);
9344 SDValue SubVec
= Op
.getOperand(1);
9345 MVT VecVT
= Vec
.getSimpleValueType();
9346 MVT SubVecVT
= SubVec
.getSimpleValueType();
9349 MVT XLenVT
= Subtarget
.getXLenVT();
9350 unsigned OrigIdx
= Op
.getConstantOperandVal(2);
9351 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
9353 // We don't have the ability to slide mask vectors up indexed by their i1
9354 // elements; the smallest we can do is i8. Often we are able to bitcast to
9355 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9356 // into a scalable one, we might not necessarily have enough scalable
9357 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9358 if (SubVecVT
.getVectorElementType() == MVT::i1
&&
9359 (OrigIdx
!= 0 || !Vec
.isUndef())) {
9360 if (VecVT
.getVectorMinNumElements() >= 8 &&
9361 SubVecVT
.getVectorMinNumElements() >= 8) {
9362 assert(OrigIdx
% 8 == 0 && "Invalid index");
9363 assert(VecVT
.getVectorMinNumElements() % 8 == 0 &&
9364 SubVecVT
.getVectorMinNumElements() % 8 == 0 &&
9365 "Unexpected mask vector lowering");
9368 MVT::getVectorVT(MVT::i8
, SubVecVT
.getVectorMinNumElements() / 8,
9369 SubVecVT
.isScalableVector());
9370 VecVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorMinNumElements() / 8,
9371 VecVT
.isScalableVector());
9372 Vec
= DAG
.getBitcast(VecVT
, Vec
);
9373 SubVec
= DAG
.getBitcast(SubVecVT
, SubVec
);
9375 // We can't slide this mask vector up indexed by its i1 elements.
9376 // This poses a problem when we wish to insert a scalable vector which
9377 // can't be re-expressed as a larger type. Just choose the slow path and
9378 // extend to a larger type, then truncate back down.
9379 MVT ExtVecVT
= VecVT
.changeVectorElementType(MVT::i8
);
9380 MVT ExtSubVecVT
= SubVecVT
.changeVectorElementType(MVT::i8
);
9381 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtVecVT
, Vec
);
9382 SubVec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtSubVecVT
, SubVec
);
9383 Vec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ExtVecVT
, Vec
, SubVec
,
9385 SDValue SplatZero
= DAG
.getConstant(0, DL
, ExtVecVT
);
9386 return DAG
.getSetCC(DL
, VecVT
, Vec
, SplatZero
, ISD::SETNE
);
9390 // If the subvector vector is a fixed-length type, we cannot use subregister
9391 // manipulation to simplify the codegen; we don't know which register of a
9392 // LMUL group contains the specific subvector as we only know the minimum
9393 // register size. Therefore we must slide the vector group up the full
9395 if (SubVecVT
.isFixedLengthVector()) {
9396 if (OrigIdx
== 0 && Vec
.isUndef() && !VecVT
.isFixedLengthVector())
9398 MVT ContainerVT
= VecVT
;
9399 if (VecVT
.isFixedLengthVector()) {
9400 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9401 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9404 if (OrigIdx
== 0 && Vec
.isUndef() && VecVT
.isFixedLengthVector()) {
9405 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
,
9406 DAG
.getUNDEF(ContainerVT
), SubVec
,
9407 DAG
.getConstant(0, DL
, XLenVT
));
9408 SubVec
= convertFromScalableVector(VecVT
, SubVec
, DAG
, Subtarget
);
9409 return DAG
.getBitcast(Op
.getValueType(), SubVec
);
9412 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ContainerVT
,
9413 DAG
.getUNDEF(ContainerVT
), SubVec
,
9414 DAG
.getConstant(0, DL
, XLenVT
));
9416 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
9417 // Set the vector length to only the number of elements we care about. Note
9418 // that for slideup this includes the offset.
9419 unsigned EndIndex
= OrigIdx
+ SubVecVT
.getVectorNumElements();
9420 SDValue VL
= getVLOp(EndIndex
, ContainerVT
, DL
, DAG
, Subtarget
);
9422 // Use tail agnostic policy if we're inserting over Vec's tail.
9423 unsigned Policy
= RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
;
9424 if (VecVT
.isFixedLengthVector() && EndIndex
== VecVT
.getVectorNumElements())
9425 Policy
= RISCVII::TAIL_AGNOSTIC
;
9427 // If we're inserting into the lowest elements, use a tail undisturbed
9431 DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, ContainerVT
, Vec
, SubVec
, VL
);
9433 SDValue SlideupAmt
= DAG
.getConstant(OrigIdx
, DL
, XLenVT
);
9434 SubVec
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, Vec
, SubVec
,
9435 SlideupAmt
, Mask
, VL
, Policy
);
9438 if (VecVT
.isFixedLengthVector())
9439 SubVec
= convertFromScalableVector(VecVT
, SubVec
, DAG
, Subtarget
);
9440 return DAG
.getBitcast(Op
.getValueType(), SubVec
);
9443 unsigned SubRegIdx
, RemIdx
;
9444 std::tie(SubRegIdx
, RemIdx
) =
9445 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9446 VecVT
, SubVecVT
, OrigIdx
, TRI
);
9448 RISCVII::VLMUL SubVecLMUL
= RISCVTargetLowering::getLMUL(SubVecVT
);
9449 bool IsSubVecPartReg
= SubVecLMUL
== RISCVII::VLMUL::LMUL_F2
||
9450 SubVecLMUL
== RISCVII::VLMUL::LMUL_F4
||
9451 SubVecLMUL
== RISCVII::VLMUL::LMUL_F8
;
9453 // 1. If the Idx has been completely eliminated and this subvector's size is
9454 // a vector register or a multiple thereof, or the surrounding elements are
9455 // undef, then this is a subvector insert which naturally aligns to a vector
9456 // register. These can easily be handled using subregister manipulation.
9457 // 2. If the subvector is smaller than a vector register, then the insertion
9458 // must preserve the undisturbed elements of the register. We do this by
9459 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9460 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9461 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9462 // LMUL=1 type back into the larger vector (resolving to another subregister
9463 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9464 // to avoid allocating a large register group to hold our subvector.
9465 if (RemIdx
== 0 && (!IsSubVecPartReg
|| Vec
.isUndef()))
9468 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9469 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9470 // (in our case undisturbed). This means we can set up a subvector insertion
9471 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9472 // size of the subvector.
9473 MVT InterSubVT
= VecVT
;
9474 SDValue AlignedExtract
= Vec
;
9475 unsigned AlignedIdx
= OrigIdx
- RemIdx
;
9476 if (VecVT
.bitsGT(getLMUL1VT(VecVT
))) {
9477 InterSubVT
= getLMUL1VT(VecVT
);
9478 // Extract a subvector equal to the nearest full vector register type. This
9479 // should resolve to a EXTRACT_SUBREG instruction.
9480 AlignedExtract
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, InterSubVT
, Vec
,
9481 DAG
.getConstant(AlignedIdx
, DL
, XLenVT
));
9484 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, InterSubVT
,
9485 DAG
.getUNDEF(InterSubVT
), SubVec
,
9486 DAG
.getConstant(0, DL
, XLenVT
));
9488 auto [Mask
, VL
] = getDefaultScalableVLOps(VecVT
, DL
, DAG
, Subtarget
);
9490 VL
= computeVLMax(SubVecVT
, DL
, DAG
);
9492 // If we're inserting into the lowest elements, use a tail undisturbed
9495 SubVec
= DAG
.getNode(RISCVISD::VMV_V_V_VL
, DL
, InterSubVT
, AlignedExtract
,
9498 SDValue SlideupAmt
=
9499 DAG
.getVScale(DL
, XLenVT
, APInt(XLenVT
.getSizeInBits(), RemIdx
));
9501 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9502 VL
= DAG
.getNode(ISD::ADD
, DL
, XLenVT
, SlideupAmt
, VL
);
9504 SubVec
= getVSlideup(DAG
, Subtarget
, DL
, InterSubVT
, AlignedExtract
, SubVec
,
9505 SlideupAmt
, Mask
, VL
);
9508 // If required, insert this subvector back into the correct vector register.
9509 // This should resolve to an INSERT_SUBREG instruction.
9510 if (VecVT
.bitsGT(InterSubVT
))
9511 SubVec
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VecVT
, Vec
, SubVec
,
9512 DAG
.getConstant(AlignedIdx
, DL
, XLenVT
));
9514 // We might have bitcast from a mask type: cast back to the original type if
9516 return DAG
.getBitcast(Op
.getSimpleValueType(), SubVec
);
9519 SDValue
RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op
,
9520 SelectionDAG
&DAG
) const {
9521 SDValue Vec
= Op
.getOperand(0);
9522 MVT SubVecVT
= Op
.getSimpleValueType();
9523 MVT VecVT
= Vec
.getSimpleValueType();
9526 MVT XLenVT
= Subtarget
.getXLenVT();
9527 unsigned OrigIdx
= Op
.getConstantOperandVal(1);
9528 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
9530 // We don't have the ability to slide mask vectors down indexed by their i1
9531 // elements; the smallest we can do is i8. Often we are able to bitcast to
9532 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9533 // from a scalable one, we might not necessarily have enough scalable
9534 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9535 if (SubVecVT
.getVectorElementType() == MVT::i1
&& OrigIdx
!= 0) {
9536 if (VecVT
.getVectorMinNumElements() >= 8 &&
9537 SubVecVT
.getVectorMinNumElements() >= 8) {
9538 assert(OrigIdx
% 8 == 0 && "Invalid index");
9539 assert(VecVT
.getVectorMinNumElements() % 8 == 0 &&
9540 SubVecVT
.getVectorMinNumElements() % 8 == 0 &&
9541 "Unexpected mask vector lowering");
9544 MVT::getVectorVT(MVT::i8
, SubVecVT
.getVectorMinNumElements() / 8,
9545 SubVecVT
.isScalableVector());
9546 VecVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorMinNumElements() / 8,
9547 VecVT
.isScalableVector());
9548 Vec
= DAG
.getBitcast(VecVT
, Vec
);
9550 // We can't slide this mask vector down, indexed by its i1 elements.
9551 // This poses a problem when we wish to extract a scalable vector which
9552 // can't be re-expressed as a larger type. Just choose the slow path and
9553 // extend to a larger type, then truncate back down.
9554 // TODO: We could probably improve this when extracting certain fixed
9555 // from fixed, where we can extract as i8 and shift the correct element
9556 // right to reach the desired subvector?
9557 MVT ExtVecVT
= VecVT
.changeVectorElementType(MVT::i8
);
9558 MVT ExtSubVecVT
= SubVecVT
.changeVectorElementType(MVT::i8
);
9559 Vec
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, ExtVecVT
, Vec
);
9560 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ExtSubVecVT
, Vec
,
9562 SDValue SplatZero
= DAG
.getConstant(0, DL
, ExtSubVecVT
);
9563 return DAG
.getSetCC(DL
, SubVecVT
, Vec
, SplatZero
, ISD::SETNE
);
9567 // With an index of 0 this is a cast-like subvector, which can be performed
9568 // with subregister operations.
9572 // If the subvector vector is a fixed-length type, we cannot use subregister
9573 // manipulation to simplify the codegen; we don't know which register of a
9574 // LMUL group contains the specific subvector as we only know the minimum
9575 // register size. Therefore we must slide the vector group down the full
9577 if (SubVecVT
.isFixedLengthVector()) {
9578 MVT ContainerVT
= VecVT
;
9579 if (VecVT
.isFixedLengthVector()) {
9580 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
9581 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
9584 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9585 unsigned LastIdx
= OrigIdx
+ SubVecVT
.getVectorNumElements() - 1;
9587 getSmallestVTForIndex(ContainerVT
, LastIdx
, DL
, DAG
, Subtarget
)) {
9588 ContainerVT
= *ShrunkVT
;
9589 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ContainerVT
, Vec
,
9590 DAG
.getVectorIdxConstant(0, DL
));
9594 getDefaultVLOps(VecVT
, ContainerVT
, DL
, DAG
, Subtarget
).first
;
9595 // Set the vector length to only the number of elements we care about. This
9596 // avoids sliding down elements we're going to discard straight away.
9597 SDValue VL
= getVLOp(SubVecVT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
9599 SDValue SlidedownAmt
= DAG
.getConstant(OrigIdx
, DL
, XLenVT
);
9601 getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
9602 DAG
.getUNDEF(ContainerVT
), Vec
, SlidedownAmt
, Mask
, VL
);
9603 // Now we can use a cast-like subvector extract to get the result.
9604 Slidedown
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SubVecVT
, Slidedown
,
9605 DAG
.getConstant(0, DL
, XLenVT
));
9606 return DAG
.getBitcast(Op
.getValueType(), Slidedown
);
9609 unsigned SubRegIdx
, RemIdx
;
9610 std::tie(SubRegIdx
, RemIdx
) =
9611 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9612 VecVT
, SubVecVT
, OrigIdx
, TRI
);
9614 // If the Idx has been completely eliminated then this is a subvector extract
9615 // which naturally aligns to a vector register. These can easily be handled
9616 // using subregister manipulation.
9620 // Else SubVecVT is a fractional LMUL and may need to be slid down.
9621 assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT
)).second
);
9623 // If the vector type is an LMUL-group type, extract a subvector equal to the
9624 // nearest full vector register type.
9625 MVT InterSubVT
= VecVT
;
9626 if (VecVT
.bitsGT(getLMUL1VT(VecVT
))) {
9627 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
9628 // we should have successfully decomposed the extract into a subregister.
9629 assert(SubRegIdx
!= RISCV::NoSubRegister
);
9630 InterSubVT
= getLMUL1VT(VecVT
);
9631 Vec
= DAG
.getTargetExtractSubreg(SubRegIdx
, DL
, InterSubVT
, Vec
);
9634 // Slide this vector register down by the desired number of elements in order
9635 // to place the desired subvector starting at element 0.
9636 SDValue SlidedownAmt
=
9637 DAG
.getVScale(DL
, XLenVT
, APInt(XLenVT
.getSizeInBits(), RemIdx
));
9639 auto [Mask
, VL
] = getDefaultScalableVLOps(InterSubVT
, DL
, DAG
, Subtarget
);
9641 getVSlidedown(DAG
, Subtarget
, DL
, InterSubVT
, DAG
.getUNDEF(InterSubVT
),
9642 Vec
, SlidedownAmt
, Mask
, VL
);
9644 // Now the vector is in the right position, extract our final subvector. This
9645 // should resolve to a COPY.
9646 Slidedown
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SubVecVT
, Slidedown
,
9647 DAG
.getConstant(0, DL
, XLenVT
));
9649 // We might have bitcast from a mask type: cast back to the original type if
9651 return DAG
.getBitcast(Op
.getSimpleValueType(), Slidedown
);
9654 // Widen a vector's operands to i8, then truncate its results back to the
9655 // original type, typically i1. All operand and result types must be the same.
9656 static SDValue
widenVectorOpsToi8(SDValue N
, const SDLoc
&DL
,
9657 SelectionDAG
&DAG
) {
9658 MVT VT
= N
.getSimpleValueType();
9659 MVT WideVT
= VT
.changeVectorElementType(MVT::i8
);
9660 SmallVector
<SDValue
, 4> WideOps
;
9661 for (SDValue Op
: N
->ops()) {
9662 assert(Op
.getSimpleValueType() == VT
&&
9663 "Operands and result must be same type");
9664 WideOps
.push_back(DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WideVT
, Op
));
9667 unsigned NumVals
= N
->getNumValues();
9669 SDVTList VTs
= DAG
.getVTList(SmallVector
<EVT
, 4>(
9670 NumVals
, N
.getValueType().changeVectorElementType(MVT::i8
)));
9671 SDValue WideN
= DAG
.getNode(N
.getOpcode(), DL
, VTs
, WideOps
);
9672 SmallVector
<SDValue
, 4> TruncVals
;
9673 for (unsigned I
= 0; I
< NumVals
; I
++) {
9674 TruncVals
.push_back(
9675 DAG
.getSetCC(DL
, N
->getSimpleValueType(I
), WideN
.getValue(I
),
9676 DAG
.getConstant(0, DL
, WideVT
), ISD::SETNE
));
9679 if (TruncVals
.size() > 1)
9680 return DAG
.getMergeValues(TruncVals
, DL
);
9681 return TruncVals
.front();
9684 SDValue
RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op
,
9685 SelectionDAG
&DAG
) const {
9687 MVT VecVT
= Op
.getSimpleValueType();
9688 MVT XLenVT
= Subtarget
.getXLenVT();
9690 assert(VecVT
.isScalableVector() &&
9691 "vector_interleave on non-scalable vector!");
9693 // 1 bit element vectors need to be widened to e8
9694 if (VecVT
.getVectorElementType() == MVT::i1
)
9695 return widenVectorOpsToi8(Op
, DL
, DAG
);
9697 // If the VT is LMUL=8, we need to split and reassemble.
9698 if (VecVT
.getSizeInBits().getKnownMinValue() ==
9699 (8 * RISCV::RVVBitsPerBlock
)) {
9700 auto [Op0Lo
, Op0Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9701 auto [Op1Lo
, Op1Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 1);
9702 EVT SplitVT
= Op0Lo
.getValueType();
9704 SDValue ResLo
= DAG
.getNode(ISD::VECTOR_DEINTERLEAVE
, DL
,
9705 DAG
.getVTList(SplitVT
, SplitVT
), Op0Lo
, Op0Hi
);
9706 SDValue ResHi
= DAG
.getNode(ISD::VECTOR_DEINTERLEAVE
, DL
,
9707 DAG
.getVTList(SplitVT
, SplitVT
), Op1Lo
, Op1Hi
);
9709 SDValue Even
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9710 ResLo
.getValue(0), ResHi
.getValue(0));
9711 SDValue Odd
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
, ResLo
.getValue(1),
9713 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9716 // Concatenate the two vectors as one vector to deinterleave
9718 MVT::getVectorVT(VecVT
.getVectorElementType(),
9719 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
9720 SDValue Concat
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, ConcatVT
,
9721 Op
.getOperand(0), Op
.getOperand(1));
9723 // We want to operate on all lanes, so get the mask and VL and mask for it
9724 auto [Mask
, VL
] = getDefaultScalableVLOps(ConcatVT
, DL
, DAG
, Subtarget
);
9725 SDValue Passthru
= DAG
.getUNDEF(ConcatVT
);
9727 // We can deinterleave through vnsrl.wi if the element type is smaller than
9729 if (VecVT
.getScalarSizeInBits() < Subtarget
.getELen()) {
9731 getDeinterleaveViaVNSRL(DL
, VecVT
, Concat
, true, Subtarget
, DAG
);
9733 getDeinterleaveViaVNSRL(DL
, VecVT
, Concat
, false, Subtarget
, DAG
);
9734 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9737 // For the indices, use the same SEW to avoid an extra vsetvli
9738 MVT IdxVT
= ConcatVT
.changeVectorElementTypeToInteger();
9739 // Create a vector of even indices {0, 2, 4, ...}
9741 DAG
.getStepVector(DL
, IdxVT
, APInt(IdxVT
.getScalarSizeInBits(), 2));
9742 // Create a vector of odd indices {1, 3, 5, ... }
9744 DAG
.getNode(ISD::ADD
, DL
, IdxVT
, EvenIdx
, DAG
.getConstant(1, DL
, IdxVT
));
9746 // Gather the even and odd elements into two separate vectors
9747 SDValue EvenWide
= DAG
.getNode(RISCVISD::VRGATHER_VV_VL
, DL
, ConcatVT
,
9748 Concat
, EvenIdx
, Passthru
, Mask
, VL
);
9749 SDValue OddWide
= DAG
.getNode(RISCVISD::VRGATHER_VV_VL
, DL
, ConcatVT
,
9750 Concat
, OddIdx
, Passthru
, Mask
, VL
);
9752 // Extract the result half of the gather for even and odd
9753 SDValue Even
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, EvenWide
,
9754 DAG
.getConstant(0, DL
, XLenVT
));
9755 SDValue Odd
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, OddWide
,
9756 DAG
.getConstant(0, DL
, XLenVT
));
9758 return DAG
.getMergeValues({Even
, Odd
}, DL
);
9761 SDValue
RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op
,
9762 SelectionDAG
&DAG
) const {
9764 MVT VecVT
= Op
.getSimpleValueType();
9766 assert(VecVT
.isScalableVector() &&
9767 "vector_interleave on non-scalable vector!");
9769 // i1 vectors need to be widened to i8
9770 if (VecVT
.getVectorElementType() == MVT::i1
)
9771 return widenVectorOpsToi8(Op
, DL
, DAG
);
9773 MVT XLenVT
= Subtarget
.getXLenVT();
9774 SDValue VL
= DAG
.getRegister(RISCV::X0
, XLenVT
);
9776 // If the VT is LMUL=8, we need to split and reassemble.
9777 if (VecVT
.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock
)) {
9778 auto [Op0Lo
, Op0Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9779 auto [Op1Lo
, Op1Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 1);
9780 EVT SplitVT
= Op0Lo
.getValueType();
9782 SDValue ResLo
= DAG
.getNode(ISD::VECTOR_INTERLEAVE
, DL
,
9783 DAG
.getVTList(SplitVT
, SplitVT
), Op0Lo
, Op1Lo
);
9784 SDValue ResHi
= DAG
.getNode(ISD::VECTOR_INTERLEAVE
, DL
,
9785 DAG
.getVTList(SplitVT
, SplitVT
), Op0Hi
, Op1Hi
);
9787 SDValue Lo
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9788 ResLo
.getValue(0), ResLo
.getValue(1));
9789 SDValue Hi
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VecVT
,
9790 ResHi
.getValue(0), ResHi
.getValue(1));
9791 return DAG
.getMergeValues({Lo
, Hi
}, DL
);
9794 SDValue Interleaved
;
9796 // If the element type is smaller than ELEN, then we can interleave with
9797 // vwaddu.vv and vwmaccu.vx
9798 if (VecVT
.getScalarSizeInBits() < Subtarget
.getELen()) {
9799 Interleaved
= getWideningInterleave(Op
.getOperand(0), Op
.getOperand(1), DL
,
9802 // Otherwise, fallback to using vrgathere16.vv
9804 MVT::getVectorVT(VecVT
.getVectorElementType(),
9805 VecVT
.getVectorElementCount().multiplyCoefficientBy(2));
9806 SDValue Concat
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, ConcatVT
,
9807 Op
.getOperand(0), Op
.getOperand(1));
9809 MVT IdxVT
= ConcatVT
.changeVectorElementType(MVT::i16
);
9811 // 0 1 2 3 4 5 6 7 ...
9812 SDValue StepVec
= DAG
.getStepVector(DL
, IdxVT
);
9814 // 1 1 1 1 1 1 1 1 ...
9815 SDValue Ones
= DAG
.getSplatVector(IdxVT
, DL
, DAG
.getConstant(1, DL
, XLenVT
));
9817 // 1 0 1 0 1 0 1 0 ...
9818 SDValue OddMask
= DAG
.getNode(ISD::AND
, DL
, IdxVT
, StepVec
, Ones
);
9819 OddMask
= DAG
.getSetCC(
9820 DL
, IdxVT
.changeVectorElementType(MVT::i1
), OddMask
,
9821 DAG
.getSplatVector(IdxVT
, DL
, DAG
.getConstant(0, DL
, XLenVT
)),
9822 ISD::CondCode::SETNE
);
9824 SDValue VLMax
= DAG
.getSplatVector(IdxVT
, DL
, computeVLMax(VecVT
, DL
, DAG
));
9826 // Build up the index vector for interleaving the concatenated vector
9827 // 0 0 1 1 2 2 3 3 ...
9828 SDValue Idx
= DAG
.getNode(ISD::SRL
, DL
, IdxVT
, StepVec
, Ones
);
9829 // 0 n 1 n+1 2 n+2 3 n+3 ...
9831 DAG
.getNode(RISCVISD::ADD_VL
, DL
, IdxVT
, Idx
, VLMax
, Idx
, OddMask
, VL
);
9833 // Then perform the interleave
9834 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
9835 SDValue TrueMask
= getAllOnesMask(IdxVT
, VL
, DL
, DAG
);
9836 Interleaved
= DAG
.getNode(RISCVISD::VRGATHEREI16_VV_VL
, DL
, ConcatVT
,
9837 Concat
, Idx
, DAG
.getUNDEF(ConcatVT
), TrueMask
, VL
);
9840 // Extract the two halves from the interleaved result
9841 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, Interleaved
,
9842 DAG
.getVectorIdxConstant(0, DL
));
9843 SDValue Hi
= DAG
.getNode(
9844 ISD::EXTRACT_SUBVECTOR
, DL
, VecVT
, Interleaved
,
9845 DAG
.getVectorIdxConstant(VecVT
.getVectorMinNumElements(), DL
));
9847 return DAG
.getMergeValues({Lo
, Hi
}, DL
);
9850 // Lower step_vector to the vid instruction. Any non-identity step value must
9851 // be accounted for my manual expansion.
9852 SDValue
RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op
,
9853 SelectionDAG
&DAG
) const {
9855 MVT VT
= Op
.getSimpleValueType();
9856 assert(VT
.isScalableVector() && "Expected scalable vector");
9857 MVT XLenVT
= Subtarget
.getXLenVT();
9858 auto [Mask
, VL
] = getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
);
9859 SDValue StepVec
= DAG
.getNode(RISCVISD::VID_VL
, DL
, VT
, Mask
, VL
);
9860 uint64_t StepValImm
= Op
.getConstantOperandVal(0);
9861 if (StepValImm
!= 1) {
9862 if (isPowerOf2_64(StepValImm
)) {
9864 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
9865 DAG
.getConstant(Log2_64(StepValImm
), DL
, XLenVT
), VL
);
9866 StepVec
= DAG
.getNode(ISD::SHL
, DL
, VT
, StepVec
, StepVal
);
9868 SDValue StepVal
= lowerScalarSplat(
9869 SDValue(), DAG
.getConstant(StepValImm
, DL
, VT
.getVectorElementType()),
9870 VL
, VT
, DL
, DAG
, Subtarget
);
9871 StepVec
= DAG
.getNode(ISD::MUL
, DL
, VT
, StepVec
, StepVal
);
9877 // Implement vector_reverse using vrgather.vv with indices determined by
9878 // subtracting the id of each element from (VLMAX-1). This will convert
9879 // the indices like so:
9880 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
9881 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
9882 SDValue
RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op
,
9883 SelectionDAG
&DAG
) const {
9885 MVT VecVT
= Op
.getSimpleValueType();
9886 if (VecVT
.getVectorElementType() == MVT::i1
) {
9887 MVT WidenVT
= MVT::getVectorVT(MVT::i8
, VecVT
.getVectorElementCount());
9888 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, WidenVT
, Op
.getOperand(0));
9889 SDValue Op2
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, WidenVT
, Op1
);
9890 return DAG
.getNode(ISD::TRUNCATE
, DL
, VecVT
, Op2
);
9892 unsigned EltSize
= VecVT
.getScalarSizeInBits();
9893 unsigned MinSize
= VecVT
.getSizeInBits().getKnownMinValue();
9894 unsigned VectorBitsMax
= Subtarget
.getRealMaxVLen();
9896 RISCVTargetLowering::computeVLMAX(VectorBitsMax
, EltSize
, MinSize
);
9898 unsigned GatherOpc
= RISCVISD::VRGATHER_VV_VL
;
9899 MVT IntVT
= VecVT
.changeVectorElementTypeToInteger();
9901 // If this is SEW=8 and VLMAX is potentially more than 256, we need
9902 // to use vrgatherei16.vv.
9903 // TODO: It's also possible to use vrgatherei16.vv for other types to
9904 // decrease register width for the index calculation.
9905 if (MaxVLMAX
> 256 && EltSize
== 8) {
9906 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
9907 // Reverse each half, then reassemble them in reverse order.
9908 // NOTE: It's also possible that after splitting that VLMAX no longer
9909 // requires vrgatherei16.vv.
9910 if (MinSize
== (8 * RISCV::RVVBitsPerBlock
)) {
9911 auto [Lo
, Hi
] = DAG
.SplitVectorOperand(Op
.getNode(), 0);
9912 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(VecVT
);
9913 Lo
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, LoVT
, Lo
);
9914 Hi
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, HiVT
, Hi
);
9915 // Reassemble the low and high pieces reversed.
9916 // FIXME: This is a CONCAT_VECTORS.
9918 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VecVT
, DAG
.getUNDEF(VecVT
), Hi
,
9919 DAG
.getIntPtrConstant(0, DL
));
9921 ISD::INSERT_SUBVECTOR
, DL
, VecVT
, Res
, Lo
,
9922 DAG
.getIntPtrConstant(LoVT
.getVectorMinNumElements(), DL
));
9925 // Just promote the int type to i16 which will double the LMUL.
9926 IntVT
= MVT::getVectorVT(MVT::i16
, VecVT
.getVectorElementCount());
9927 GatherOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
9930 MVT XLenVT
= Subtarget
.getXLenVT();
9931 auto [Mask
, VL
] = getDefaultScalableVLOps(VecVT
, DL
, DAG
, Subtarget
);
9933 // Calculate VLMAX-1 for the desired SEW.
9934 SDValue VLMinus1
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
,
9935 computeVLMax(VecVT
, DL
, DAG
),
9936 DAG
.getConstant(1, DL
, XLenVT
));
9938 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
9940 !Subtarget
.is64Bit() && IntVT
.getVectorElementType() == MVT::i64
;
9943 SplatVL
= DAG
.getSplatVector(IntVT
, DL
, VLMinus1
);
9945 SplatVL
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
, DAG
.getUNDEF(IntVT
),
9946 VLMinus1
, DAG
.getRegister(RISCV::X0
, XLenVT
));
9948 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, IntVT
, Mask
, VL
);
9949 SDValue Indices
= DAG
.getNode(RISCVISD::SUB_VL
, DL
, IntVT
, SplatVL
, VID
,
9950 DAG
.getUNDEF(IntVT
), Mask
, VL
);
9952 return DAG
.getNode(GatherOpc
, DL
, VecVT
, Op
.getOperand(0), Indices
,
9953 DAG
.getUNDEF(VecVT
), Mask
, VL
);
9956 SDValue
RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op
,
9957 SelectionDAG
&DAG
) const {
9959 SDValue V1
= Op
.getOperand(0);
9960 SDValue V2
= Op
.getOperand(1);
9961 MVT XLenVT
= Subtarget
.getXLenVT();
9962 MVT VecVT
= Op
.getSimpleValueType();
9964 SDValue VLMax
= computeVLMax(VecVT
, DL
, DAG
);
9966 int64_t ImmValue
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getSExtValue();
9967 SDValue DownOffset
, UpOffset
;
9968 if (ImmValue
>= 0) {
9969 // The operand is a TargetConstant, we need to rebuild it as a regular
9971 DownOffset
= DAG
.getConstant(ImmValue
, DL
, XLenVT
);
9972 UpOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, VLMax
, DownOffset
);
9974 // The operand is a TargetConstant, we need to rebuild it as a regular
9975 // constant rather than negating the original operand.
9976 UpOffset
= DAG
.getConstant(-ImmValue
, DL
, XLenVT
);
9977 DownOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, VLMax
, UpOffset
);
9980 SDValue TrueMask
= getAllOnesMask(VecVT
, VLMax
, DL
, DAG
);
9983 getVSlidedown(DAG
, Subtarget
, DL
, VecVT
, DAG
.getUNDEF(VecVT
), V1
,
9984 DownOffset
, TrueMask
, UpOffset
);
9985 return getVSlideup(DAG
, Subtarget
, DL
, VecVT
, SlideDown
, V2
, UpOffset
,
9986 TrueMask
, DAG
.getRegister(RISCV::X0
, XLenVT
),
9987 RISCVII::TAIL_AGNOSTIC
);
9991 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op
,
9992 SelectionDAG
&DAG
) const {
9994 auto *Load
= cast
<LoadSDNode
>(Op
);
9996 assert(allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
9997 Load
->getMemoryVT(),
9998 *Load
->getMemOperand()) &&
9999 "Expecting a correctly-aligned load");
10001 MVT VT
= Op
.getSimpleValueType();
10002 MVT XLenVT
= Subtarget
.getXLenVT();
10003 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10005 // If we know the exact VLEN and our fixed length vector completely fills
10006 // the container, use a whole register load instead.
10007 const auto [MinVLMAX
, MaxVLMAX
] =
10008 RISCVTargetLowering::computeVLMAXBounds(ContainerVT
, Subtarget
);
10009 if (MinVLMAX
== MaxVLMAX
&& MinVLMAX
== VT
.getVectorNumElements() &&
10010 getLMUL1VT(ContainerVT
).bitsLE(ContainerVT
)) {
10012 DAG
.getLoad(ContainerVT
, DL
, Load
->getChain(), Load
->getBasePtr(),
10013 Load
->getMemOperand());
10014 SDValue Result
= convertFromScalableVector(VT
, NewLoad
, DAG
, Subtarget
);
10015 return DAG
.getMergeValues({Result
, NewLoad
.getValue(1)}, DL
);
10018 SDValue VL
= getVLOp(VT
.getVectorNumElements(), ContainerVT
, DL
, DAG
, Subtarget
);
10020 bool IsMaskOp
= VT
.getVectorElementType() == MVT::i1
;
10021 SDValue IntID
= DAG
.getTargetConstant(
10022 IsMaskOp
? Intrinsic::riscv_vlm
: Intrinsic::riscv_vle
, DL
, XLenVT
);
10023 SmallVector
<SDValue
, 4> Ops
{Load
->getChain(), IntID
};
10025 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10026 Ops
.push_back(Load
->getBasePtr());
10028 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
10030 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
10031 Load
->getMemoryVT(), Load
->getMemOperand());
10033 SDValue Result
= convertFromScalableVector(VT
, NewLoad
, DAG
, Subtarget
);
10034 return DAG
.getMergeValues({Result
, NewLoad
.getValue(1)}, DL
);
10038 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op
,
10039 SelectionDAG
&DAG
) const {
10041 auto *Store
= cast
<StoreSDNode
>(Op
);
10043 assert(allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
10044 Store
->getMemoryVT(),
10045 *Store
->getMemOperand()) &&
10046 "Expecting a correctly-aligned store");
10048 SDValue StoreVal
= Store
->getValue();
10049 MVT VT
= StoreVal
.getSimpleValueType();
10050 MVT XLenVT
= Subtarget
.getXLenVT();
10052 // If the size less than a byte, we need to pad with zeros to make a byte.
10053 if (VT
.getVectorElementType() == MVT::i1
&& VT
.getVectorNumElements() < 8) {
10055 StoreVal
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
,
10056 DAG
.getConstant(0, DL
, VT
), StoreVal
,
10057 DAG
.getIntPtrConstant(0, DL
));
10060 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10063 convertToScalableVector(ContainerVT
, StoreVal
, DAG
, Subtarget
);
10066 // If we know the exact VLEN and our fixed length vector completely fills
10067 // the container, use a whole register store instead.
10068 const auto [MinVLMAX
, MaxVLMAX
] =
10069 RISCVTargetLowering::computeVLMAXBounds(ContainerVT
, Subtarget
);
10070 if (MinVLMAX
== MaxVLMAX
&& MinVLMAX
== VT
.getVectorNumElements() &&
10071 getLMUL1VT(ContainerVT
).bitsLE(ContainerVT
))
10072 return DAG
.getStore(Store
->getChain(), DL
, NewValue
, Store
->getBasePtr(),
10073 Store
->getMemOperand());
10075 SDValue VL
= getVLOp(VT
.getVectorNumElements(), ContainerVT
, DL
, DAG
,
10078 bool IsMaskOp
= VT
.getVectorElementType() == MVT::i1
;
10079 SDValue IntID
= DAG
.getTargetConstant(
10080 IsMaskOp
? Intrinsic::riscv_vsm
: Intrinsic::riscv_vse
, DL
, XLenVT
);
10081 return DAG
.getMemIntrinsicNode(
10082 ISD::INTRINSIC_VOID
, DL
, DAG
.getVTList(MVT::Other
),
10083 {Store
->getChain(), IntID
, NewValue
, Store
->getBasePtr(), VL
},
10084 Store
->getMemoryVT(), Store
->getMemOperand());
10087 SDValue
RISCVTargetLowering::lowerMaskedLoad(SDValue Op
,
10088 SelectionDAG
&DAG
) const {
10090 MVT VT
= Op
.getSimpleValueType();
10092 const auto *MemSD
= cast
<MemSDNode
>(Op
);
10093 EVT MemVT
= MemSD
->getMemoryVT();
10094 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
10095 SDValue Chain
= MemSD
->getChain();
10096 SDValue BasePtr
= MemSD
->getBasePtr();
10098 SDValue Mask
, PassThru
, VL
;
10099 if (const auto *VPLoad
= dyn_cast
<VPLoadSDNode
>(Op
)) {
10100 Mask
= VPLoad
->getMask();
10101 PassThru
= DAG
.getUNDEF(VT
);
10102 VL
= VPLoad
->getVectorLength();
10104 const auto *MLoad
= cast
<MaskedLoadSDNode
>(Op
);
10105 Mask
= MLoad
->getMask();
10106 PassThru
= MLoad
->getPassThru();
10109 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10111 MVT XLenVT
= Subtarget
.getXLenVT();
10113 MVT ContainerVT
= VT
;
10114 if (VT
.isFixedLengthVector()) {
10115 ContainerVT
= getContainerForFixedLengthVector(VT
);
10116 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
10118 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10119 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10124 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10127 IsUnmasked
? Intrinsic::riscv_vle
: Intrinsic::riscv_vle_mask
;
10128 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
10130 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10132 Ops
.push_back(PassThru
);
10133 Ops
.push_back(BasePtr
);
10135 Ops
.push_back(Mask
);
10138 Ops
.push_back(DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
));
10140 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
10143 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, MemVT
, MMO
);
10144 Chain
= Result
.getValue(1);
10146 if (VT
.isFixedLengthVector())
10147 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10149 return DAG
.getMergeValues({Result
, Chain
}, DL
);
10152 SDValue
RISCVTargetLowering::lowerMaskedStore(SDValue Op
,
10153 SelectionDAG
&DAG
) const {
10156 const auto *MemSD
= cast
<MemSDNode
>(Op
);
10157 EVT MemVT
= MemSD
->getMemoryVT();
10158 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
10159 SDValue Chain
= MemSD
->getChain();
10160 SDValue BasePtr
= MemSD
->getBasePtr();
10161 SDValue Val
, Mask
, VL
;
10163 if (const auto *VPStore
= dyn_cast
<VPStoreSDNode
>(Op
)) {
10164 Val
= VPStore
->getValue();
10165 Mask
= VPStore
->getMask();
10166 VL
= VPStore
->getVectorLength();
10168 const auto *MStore
= cast
<MaskedStoreSDNode
>(Op
);
10169 Val
= MStore
->getValue();
10170 Mask
= MStore
->getMask();
10173 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
10175 MVT VT
= Val
.getSimpleValueType();
10176 MVT XLenVT
= Subtarget
.getXLenVT();
10178 MVT ContainerVT
= VT
;
10179 if (VT
.isFixedLengthVector()) {
10180 ContainerVT
= getContainerForFixedLengthVector(VT
);
10182 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
10184 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10185 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10190 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10193 IsUnmasked
? Intrinsic::riscv_vse
: Intrinsic::riscv_vse_mask
;
10194 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
10195 Ops
.push_back(Val
);
10196 Ops
.push_back(BasePtr
);
10198 Ops
.push_back(Mask
);
10201 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
,
10202 DAG
.getVTList(MVT::Other
), Ops
, MemVT
, MMO
);
10206 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op
,
10207 SelectionDAG
&DAG
) const {
10208 MVT InVT
= Op
.getOperand(0).getSimpleValueType();
10209 MVT ContainerVT
= getContainerForFixedLengthVector(InVT
);
10211 MVT VT
= Op
.getSimpleValueType();
10214 convertToScalableVector(ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
10216 convertToScalableVector(ContainerVT
, Op
.getOperand(1), DAG
, Subtarget
);
10219 auto [Mask
, VL
] = getDefaultVLOps(VT
.getVectorNumElements(), ContainerVT
, DL
,
10221 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10224 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, MaskVT
,
10225 {Op1
, Op2
, Op
.getOperand(2), DAG
.getUNDEF(MaskVT
), Mask
, VL
});
10227 return convertFromScalableVector(VT
, Cmp
, DAG
, Subtarget
);
10230 SDValue
RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op
,
10231 SelectionDAG
&DAG
) const {
10232 unsigned Opc
= Op
.getOpcode();
10234 SDValue Chain
= Op
.getOperand(0);
10235 SDValue Op1
= Op
.getOperand(1);
10236 SDValue Op2
= Op
.getOperand(2);
10237 SDValue CC
= Op
.getOperand(3);
10238 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
10239 MVT VT
= Op
.getSimpleValueType();
10240 MVT InVT
= Op1
.getSimpleValueType();
10242 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10244 if (Opc
== ISD::STRICT_FSETCCS
) {
10245 // Expand strict_fsetccs(x, oeq) to
10246 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10247 SDVTList VTList
= Op
->getVTList();
10248 if (CCVal
== ISD::SETEQ
|| CCVal
== ISD::SETOEQ
) {
10249 SDValue OLECCVal
= DAG
.getCondCode(ISD::SETOLE
);
10250 SDValue Tmp1
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op1
,
10252 SDValue Tmp2
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op2
,
10254 SDValue OutChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
10255 Tmp1
.getValue(1), Tmp2
.getValue(1));
10256 // Tmp1 and Tmp2 might be the same node.
10258 Tmp1
= DAG
.getNode(ISD::AND
, DL
, VT
, Tmp1
, Tmp2
);
10259 return DAG
.getMergeValues({Tmp1
, OutChain
}, DL
);
10262 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10263 if (CCVal
== ISD::SETNE
|| CCVal
== ISD::SETUNE
) {
10264 SDValue OEQCCVal
= DAG
.getCondCode(ISD::SETOEQ
);
10265 SDValue OEQ
= DAG
.getNode(ISD::STRICT_FSETCCS
, DL
, VTList
, Chain
, Op1
,
10267 SDValue Res
= DAG
.getNOT(DL
, OEQ
, VT
);
10268 return DAG
.getMergeValues({Res
, OEQ
.getValue(1)}, DL
);
10272 MVT ContainerInVT
= InVT
;
10273 if (InVT
.isFixedLengthVector()) {
10274 ContainerInVT
= getContainerForFixedLengthVector(InVT
);
10275 Op1
= convertToScalableVector(ContainerInVT
, Op1
, DAG
, Subtarget
);
10276 Op2
= convertToScalableVector(ContainerInVT
, Op2
, DAG
, Subtarget
);
10278 MVT MaskVT
= getMaskTypeFor(ContainerInVT
);
10280 auto [Mask
, VL
] = getDefaultVLOps(InVT
, ContainerInVT
, DL
, DAG
, Subtarget
);
10283 if (Opc
== ISD::STRICT_FSETCC
&&
10284 (CCVal
== ISD::SETLT
|| CCVal
== ISD::SETOLT
|| CCVal
== ISD::SETLE
||
10285 CCVal
== ISD::SETOLE
)) {
10286 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10287 // active when both input elements are ordered.
10288 SDValue True
= getAllOnesMask(ContainerInVT
, VL
, DL
, DAG
);
10289 SDValue OrderMask1
= DAG
.getNode(
10290 RISCVISD::STRICT_FSETCC_VL
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
10291 {Chain
, Op1
, Op1
, DAG
.getCondCode(ISD::SETOEQ
), DAG
.getUNDEF(MaskVT
),
10293 SDValue OrderMask2
= DAG
.getNode(
10294 RISCVISD::STRICT_FSETCC_VL
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
10295 {Chain
, Op2
, Op2
, DAG
.getCondCode(ISD::SETOEQ
), DAG
.getUNDEF(MaskVT
),
10298 DAG
.getNode(RISCVISD::VMAND_VL
, DL
, MaskVT
, OrderMask1
, OrderMask2
, VL
);
10299 // Use Mask as the merge operand to let the result be 0 if either of the
10300 // inputs is unordered.
10301 Res
= DAG
.getNode(RISCVISD::STRICT_FSETCCS_VL
, DL
,
10302 DAG
.getVTList(MaskVT
, MVT::Other
),
10303 {Chain
, Op1
, Op2
, CC
, Mask
, Mask
, VL
});
10305 unsigned RVVOpc
= Opc
== ISD::STRICT_FSETCC
? RISCVISD::STRICT_FSETCC_VL
10306 : RISCVISD::STRICT_FSETCCS_VL
;
10307 Res
= DAG
.getNode(RVVOpc
, DL
, DAG
.getVTList(MaskVT
, MVT::Other
),
10308 {Chain
, Op1
, Op2
, CC
, DAG
.getUNDEF(MaskVT
), Mask
, VL
});
10311 if (VT
.isFixedLengthVector()) {
10312 SDValue SubVec
= convertFromScalableVector(VT
, Res
, DAG
, Subtarget
);
10313 return DAG
.getMergeValues({SubVec
, Res
.getValue(1)}, DL
);
10318 // Lower vector ABS to smax(X, sub(0, X)).
10319 SDValue
RISCVTargetLowering::lowerABS(SDValue Op
, SelectionDAG
&DAG
) const {
10321 MVT VT
= Op
.getSimpleValueType();
10322 SDValue X
= Op
.getOperand(0);
10324 assert((Op
.getOpcode() == ISD::VP_ABS
|| VT
.isFixedLengthVector()) &&
10325 "Unexpected type for ISD::ABS");
10327 MVT ContainerVT
= VT
;
10328 if (VT
.isFixedLengthVector()) {
10329 ContainerVT
= getContainerForFixedLengthVector(VT
);
10330 X
= convertToScalableVector(ContainerVT
, X
, DAG
, Subtarget
);
10334 if (Op
->getOpcode() == ISD::VP_ABS
) {
10335 Mask
= Op
->getOperand(1);
10336 if (VT
.isFixedLengthVector())
10337 Mask
= convertToScalableVector(getMaskTypeFor(ContainerVT
), Mask
, DAG
,
10339 VL
= Op
->getOperand(2);
10341 std::tie(Mask
, VL
) = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
10343 SDValue SplatZero
= DAG
.getNode(
10344 RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
10345 DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()), VL
);
10346 SDValue NegX
= DAG
.getNode(RISCVISD::SUB_VL
, DL
, ContainerVT
, SplatZero
, X
,
10347 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
10348 SDValue Max
= DAG
.getNode(RISCVISD::SMAX_VL
, DL
, ContainerVT
, X
, NegX
,
10349 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
10351 if (VT
.isFixedLengthVector())
10352 Max
= convertFromScalableVector(VT
, Max
, DAG
, Subtarget
);
10356 SDValue
RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10357 SDValue Op
, SelectionDAG
&DAG
) const {
10359 MVT VT
= Op
.getSimpleValueType();
10360 SDValue Mag
= Op
.getOperand(0);
10361 SDValue Sign
= Op
.getOperand(1);
10362 assert(Mag
.getValueType() == Sign
.getValueType() &&
10363 "Can only handle COPYSIGN with matching types.");
10365 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10366 Mag
= convertToScalableVector(ContainerVT
, Mag
, DAG
, Subtarget
);
10367 Sign
= convertToScalableVector(ContainerVT
, Sign
, DAG
, Subtarget
);
10369 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
10371 SDValue CopySign
= DAG
.getNode(RISCVISD::FCOPYSIGN_VL
, DL
, ContainerVT
, Mag
,
10372 Sign
, DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
10374 return convertFromScalableVector(VT
, CopySign
, DAG
, Subtarget
);
10377 SDValue
RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10378 SDValue Op
, SelectionDAG
&DAG
) const {
10379 MVT VT
= Op
.getSimpleValueType();
10380 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10382 MVT I1ContainerVT
=
10383 MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
10386 convertToScalableVector(I1ContainerVT
, Op
.getOperand(0), DAG
, Subtarget
);
10388 convertToScalableVector(ContainerVT
, Op
.getOperand(1), DAG
, Subtarget
);
10390 convertToScalableVector(ContainerVT
, Op
.getOperand(2), DAG
, Subtarget
);
10393 SDValue VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
10395 SDValue Select
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, ContainerVT
, CC
, Op1
,
10396 Op2
, DAG
.getUNDEF(ContainerVT
), VL
);
10398 return convertFromScalableVector(VT
, Select
, DAG
, Subtarget
);
10401 SDValue
RISCVTargetLowering::lowerToScalableOp(SDValue Op
,
10402 SelectionDAG
&DAG
) const {
10403 unsigned NewOpc
= getRISCVVLOp(Op
);
10404 bool HasMergeOp
= hasMergeOp(NewOpc
);
10405 bool HasMask
= hasMaskOp(NewOpc
);
10407 MVT VT
= Op
.getSimpleValueType();
10408 MVT ContainerVT
= getContainerForFixedLengthVector(VT
);
10410 // Create list of operands by converting existing ones to scalable types.
10411 SmallVector
<SDValue
, 6> Ops
;
10412 for (const SDValue
&V
: Op
->op_values()) {
10413 assert(!isa
<VTSDNode
>(V
) && "Unexpected VTSDNode node!");
10415 // Pass through non-vector operands.
10416 if (!V
.getValueType().isVector()) {
10421 // "cast" fixed length vector to a scalable vector.
10422 assert(useRVVForFixedLengthVectorVT(V
.getSimpleValueType()) &&
10423 "Only fixed length vectors are supported!");
10424 Ops
.push_back(convertToScalableVector(ContainerVT
, V
, DAG
, Subtarget
));
10428 auto [Mask
, VL
] = getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
);
10430 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10432 Ops
.push_back(Mask
);
10435 // StrictFP operations have two result values. Their lowered result should
10436 // have same result count.
10437 if (Op
->isStrictFPOpcode()) {
10438 SDValue ScalableRes
=
10439 DAG
.getNode(NewOpc
, DL
, DAG
.getVTList(ContainerVT
, MVT::Other
), Ops
,
10441 SDValue SubVec
= convertFromScalableVector(VT
, ScalableRes
, DAG
, Subtarget
);
10442 return DAG
.getMergeValues({SubVec
, ScalableRes
.getValue(1)}, DL
);
10445 SDValue ScalableRes
=
10446 DAG
.getNode(NewOpc
, DL
, ContainerVT
, Ops
, Op
->getFlags());
10447 return convertFromScalableVector(VT
, ScalableRes
, DAG
, Subtarget
);
10450 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10451 // * Operands of each node are assumed to be in the same order.
10452 // * The EVL operand is promoted from i32 to i64 on RV64.
10453 // * Fixed-length vectors are converted to their scalable-vector container
10455 SDValue
RISCVTargetLowering::lowerVPOp(SDValue Op
, SelectionDAG
&DAG
) const {
10456 unsigned RISCVISDOpc
= getRISCVVLOp(Op
);
10457 bool HasMergeOp
= hasMergeOp(RISCVISDOpc
);
10460 MVT VT
= Op
.getSimpleValueType();
10461 SmallVector
<SDValue
, 4> Ops
;
10463 MVT ContainerVT
= VT
;
10464 if (VT
.isFixedLengthVector())
10465 ContainerVT
= getContainerForFixedLengthVector(VT
);
10467 for (const auto &OpIdx
: enumerate(Op
->ops())) {
10468 SDValue V
= OpIdx
.value();
10469 assert(!isa
<VTSDNode
>(V
) && "Unexpected VTSDNode node!");
10470 // Add dummy merge value before the mask. Or if there isn't a mask, before
10473 auto MaskIdx
= ISD::getVPMaskIdx(Op
.getOpcode());
10475 if (*MaskIdx
== OpIdx
.index())
10476 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10477 } else if (ISD::getVPExplicitVectorLengthIdx(Op
.getOpcode()) ==
10479 if (Op
.getOpcode() == ISD::VP_MERGE
) {
10480 // For VP_MERGE, copy the false operand instead of an undef value.
10481 Ops
.push_back(Ops
.back());
10483 assert(Op
.getOpcode() == ISD::VP_SELECT
);
10484 // For VP_SELECT, add an undef value.
10485 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
10489 // Pass through operands which aren't fixed-length vectors.
10490 if (!V
.getValueType().isFixedLengthVector()) {
10494 // "cast" fixed length vector to a scalable vector.
10495 MVT OpVT
= V
.getSimpleValueType();
10496 MVT ContainerVT
= getContainerForFixedLengthVector(OpVT
);
10497 assert(useRVVForFixedLengthVectorVT(OpVT
) &&
10498 "Only fixed length vectors are supported!");
10499 Ops
.push_back(convertToScalableVector(ContainerVT
, V
, DAG
, Subtarget
));
10502 if (!VT
.isFixedLengthVector())
10503 return DAG
.getNode(RISCVISDOpc
, DL
, VT
, Ops
, Op
->getFlags());
10505 SDValue VPOp
= DAG
.getNode(RISCVISDOpc
, DL
, ContainerVT
, Ops
, Op
->getFlags());
10507 return convertFromScalableVector(VT
, VPOp
, DAG
, Subtarget
);
10510 SDValue
RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op
,
10511 SelectionDAG
&DAG
) const {
10513 MVT VT
= Op
.getSimpleValueType();
10515 SDValue Src
= Op
.getOperand(0);
10516 // NOTE: Mask is dropped.
10517 SDValue VL
= Op
.getOperand(2);
10519 MVT ContainerVT
= VT
;
10520 if (VT
.isFixedLengthVector()) {
10521 ContainerVT
= getContainerForFixedLengthVector(VT
);
10522 MVT SrcVT
= MVT::getVectorVT(MVT::i1
, ContainerVT
.getVectorElementCount());
10523 Src
= convertToScalableVector(SrcVT
, Src
, DAG
, Subtarget
);
10526 MVT XLenVT
= Subtarget
.getXLenVT();
10527 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
10528 SDValue ZeroSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10529 DAG
.getUNDEF(ContainerVT
), Zero
, VL
);
10531 SDValue SplatValue
= DAG
.getConstant(
10532 Op
.getOpcode() == ISD::VP_ZERO_EXTEND
? 1 : -1, DL
, XLenVT
);
10533 SDValue Splat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10534 DAG
.getUNDEF(ContainerVT
), SplatValue
, VL
);
10536 SDValue Result
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, ContainerVT
, Src
, Splat
,
10537 ZeroSplat
, DAG
.getUNDEF(ContainerVT
), VL
);
10538 if (!VT
.isFixedLengthVector())
10540 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10543 SDValue
RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op
,
10544 SelectionDAG
&DAG
) const {
10546 MVT VT
= Op
.getSimpleValueType();
10548 SDValue Op1
= Op
.getOperand(0);
10549 SDValue Op2
= Op
.getOperand(1);
10550 ISD::CondCode Condition
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
10551 // NOTE: Mask is dropped.
10552 SDValue VL
= Op
.getOperand(4);
10554 MVT ContainerVT
= VT
;
10555 if (VT
.isFixedLengthVector()) {
10556 ContainerVT
= getContainerForFixedLengthVector(VT
);
10557 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10558 Op2
= convertToScalableVector(ContainerVT
, Op2
, DAG
, Subtarget
);
10562 SDValue AllOneMask
= DAG
.getNode(RISCVISD::VMSET_VL
, DL
, ContainerVT
, VL
);
10564 switch (Condition
) {
10567 // X != Y --> (X^Y)
10569 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, Op2
, VL
);
10571 // X == Y --> ~(X^Y)
10574 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, Op2
, VL
);
10576 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, AllOneMask
, VL
);
10579 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
10580 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
10582 case ISD::SETULT
: {
10584 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, AllOneMask
, VL
);
10585 Result
= DAG
.getNode(RISCVISD::VMAND_VL
, DL
, ContainerVT
, Temp
, Op2
, VL
);
10588 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
10589 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
10591 case ISD::SETUGT
: {
10593 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op2
, AllOneMask
, VL
);
10594 Result
= DAG
.getNode(RISCVISD::VMAND_VL
, DL
, ContainerVT
, Op1
, Temp
, VL
);
10597 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
10598 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
10600 case ISD::SETULE
: {
10602 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op1
, AllOneMask
, VL
);
10603 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, Op2
, VL
);
10606 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
10607 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
10609 case ISD::SETUGE
: {
10611 DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Op2
, AllOneMask
, VL
);
10612 Result
= DAG
.getNode(RISCVISD::VMXOR_VL
, DL
, ContainerVT
, Temp
, Op1
, VL
);
10617 if (!VT
.isFixedLengthVector())
10619 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10622 // Lower Floating-Point/Integer Type-Convert VP SDNodes
10623 SDValue
RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op
,
10624 SelectionDAG
&DAG
) const {
10627 SDValue Src
= Op
.getOperand(0);
10628 SDValue Mask
= Op
.getOperand(1);
10629 SDValue VL
= Op
.getOperand(2);
10630 unsigned RISCVISDOpc
= getRISCVVLOp(Op
);
10632 MVT DstVT
= Op
.getSimpleValueType();
10633 MVT SrcVT
= Src
.getSimpleValueType();
10634 if (DstVT
.isFixedLengthVector()) {
10635 DstVT
= getContainerForFixedLengthVector(DstVT
);
10636 SrcVT
= getContainerForFixedLengthVector(SrcVT
);
10637 Src
= convertToScalableVector(SrcVT
, Src
, DAG
, Subtarget
);
10638 MVT MaskVT
= getMaskTypeFor(DstVT
);
10639 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10642 unsigned DstEltSize
= DstVT
.getScalarSizeInBits();
10643 unsigned SrcEltSize
= SrcVT
.getScalarSizeInBits();
10646 if (DstEltSize
>= SrcEltSize
) { // Single-width and widening conversion.
10647 if (SrcVT
.isInteger()) {
10648 assert(DstVT
.isFloatingPoint() && "Wrong input/output vector types");
10650 unsigned RISCVISDExtOpc
= RISCVISDOpc
== RISCVISD::SINT_TO_FP_VL
10651 ? RISCVISD::VSEXT_VL
10652 : RISCVISD::VZEXT_VL
;
10654 // Do we need to do any pre-widening before converting?
10655 if (SrcEltSize
== 1) {
10656 MVT IntVT
= DstVT
.changeVectorElementTypeToInteger();
10657 MVT XLenVT
= Subtarget
.getXLenVT();
10658 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
10659 SDValue ZeroSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
,
10660 DAG
.getUNDEF(IntVT
), Zero
, VL
);
10661 SDValue One
= DAG
.getConstant(
10662 RISCVISDExtOpc
== RISCVISD::VZEXT_VL
? 1 : -1, DL
, XLenVT
);
10663 SDValue OneSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IntVT
,
10664 DAG
.getUNDEF(IntVT
), One
, VL
);
10665 Src
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, IntVT
, Src
, OneSplat
,
10666 ZeroSplat
, DAG
.getUNDEF(IntVT
), VL
);
10667 } else if (DstEltSize
> (2 * SrcEltSize
)) {
10668 // Widen before converting.
10669 MVT IntVT
= MVT::getVectorVT(MVT::getIntegerVT(DstEltSize
/ 2),
10670 DstVT
.getVectorElementCount());
10671 Src
= DAG
.getNode(RISCVISDExtOpc
, DL
, IntVT
, Src
, Mask
, VL
);
10674 Result
= DAG
.getNode(RISCVISDOpc
, DL
, DstVT
, Src
, Mask
, VL
);
10676 assert(SrcVT
.isFloatingPoint() && DstVT
.isInteger() &&
10677 "Wrong input/output vector types");
10679 // Convert f16 to f32 then convert f32 to i64.
10680 if (DstEltSize
> (2 * SrcEltSize
)) {
10681 assert(SrcVT
.getVectorElementType() == MVT::f16
&& "Unexpected type!");
10683 MVT::getVectorVT(MVT::f32
, DstVT
.getVectorElementCount());
10685 DAG
.getNode(RISCVISD::FP_EXTEND_VL
, DL
, InterimFVT
, Src
, Mask
, VL
);
10688 Result
= DAG
.getNode(RISCVISDOpc
, DL
, DstVT
, Src
, Mask
, VL
);
10690 } else { // Narrowing + Conversion
10691 if (SrcVT
.isInteger()) {
10692 assert(DstVT
.isFloatingPoint() && "Wrong input/output vector types");
10693 // First do a narrowing convert to an FP type half the size, then round
10694 // the FP type to a small FP type if needed.
10696 MVT InterimFVT
= DstVT
;
10697 if (SrcEltSize
> (2 * DstEltSize
)) {
10698 assert(SrcEltSize
== (4 * DstEltSize
) && "Unexpected types!");
10699 assert(DstVT
.getVectorElementType() == MVT::f16
&& "Unexpected type!");
10700 InterimFVT
= MVT::getVectorVT(MVT::f32
, DstVT
.getVectorElementCount());
10703 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimFVT
, Src
, Mask
, VL
);
10705 if (InterimFVT
!= DstVT
) {
10707 Result
= DAG
.getNode(RISCVISD::FP_ROUND_VL
, DL
, DstVT
, Src
, Mask
, VL
);
10710 assert(SrcVT
.isFloatingPoint() && DstVT
.isInteger() &&
10711 "Wrong input/output vector types");
10712 // First do a narrowing conversion to an integer half the size, then
10713 // truncate if needed.
10715 if (DstEltSize
== 1) {
10716 // First convert to the same size integer, then convert to mask using
10718 assert(SrcEltSize
>= 16 && "Unexpected FP type!");
10719 MVT InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
),
10720 DstVT
.getVectorElementCount());
10721 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimIVT
, Src
, Mask
, VL
);
10723 // Compare the integer result to 0. The integer should be 0 or 1/-1,
10724 // otherwise the conversion was undefined.
10725 MVT XLenVT
= Subtarget
.getXLenVT();
10726 SDValue SplatZero
= DAG
.getConstant(0, DL
, XLenVT
);
10727 SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, InterimIVT
,
10728 DAG
.getUNDEF(InterimIVT
), SplatZero
, VL
);
10729 Result
= DAG
.getNode(RISCVISD::SETCC_VL
, DL
, DstVT
,
10730 {Result
, SplatZero
, DAG
.getCondCode(ISD::SETNE
),
10731 DAG
.getUNDEF(DstVT
), Mask
, VL
});
10733 MVT InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
10734 DstVT
.getVectorElementCount());
10736 Result
= DAG
.getNode(RISCVISDOpc
, DL
, InterimIVT
, Src
, Mask
, VL
);
10738 while (InterimIVT
!= DstVT
) {
10741 InterimIVT
= MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize
/ 2),
10742 DstVT
.getVectorElementCount());
10743 Result
= DAG
.getNode(RISCVISD::TRUNCATE_VECTOR_VL
, DL
, InterimIVT
,
10750 MVT VT
= Op
.getSimpleValueType();
10751 if (!VT
.isFixedLengthVector())
10753 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10757 RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op
,
10758 SelectionDAG
&DAG
) const {
10761 SDValue Op1
= Op
.getOperand(0);
10762 SDValue Op2
= Op
.getOperand(1);
10763 SDValue Offset
= Op
.getOperand(2);
10764 SDValue Mask
= Op
.getOperand(3);
10765 SDValue EVL1
= Op
.getOperand(4);
10766 SDValue EVL2
= Op
.getOperand(5);
10768 const MVT XLenVT
= Subtarget
.getXLenVT();
10769 MVT VT
= Op
.getSimpleValueType();
10770 MVT ContainerVT
= VT
;
10771 if (VT
.isFixedLengthVector()) {
10772 ContainerVT
= getContainerForFixedLengthVector(VT
);
10773 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10774 Op2
= convertToScalableVector(ContainerVT
, Op2
, DAG
, Subtarget
);
10775 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10776 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10779 bool IsMaskVector
= VT
.getVectorElementType() == MVT::i1
;
10780 if (IsMaskVector
) {
10781 ContainerVT
= ContainerVT
.changeVectorElementType(MVT::i8
);
10783 // Expand input operands
10784 SDValue SplatOneOp1
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10785 DAG
.getUNDEF(ContainerVT
),
10786 DAG
.getConstant(1, DL
, XLenVT
), EVL1
);
10787 SDValue SplatZeroOp1
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10788 DAG
.getUNDEF(ContainerVT
),
10789 DAG
.getConstant(0, DL
, XLenVT
), EVL1
);
10790 Op1
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, ContainerVT
, Op1
, SplatOneOp1
,
10791 SplatZeroOp1
, DAG
.getUNDEF(ContainerVT
), EVL1
);
10793 SDValue SplatOneOp2
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10794 DAG
.getUNDEF(ContainerVT
),
10795 DAG
.getConstant(1, DL
, XLenVT
), EVL2
);
10796 SDValue SplatZeroOp2
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
10797 DAG
.getUNDEF(ContainerVT
),
10798 DAG
.getConstant(0, DL
, XLenVT
), EVL2
);
10799 Op2
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, ContainerVT
, Op2
, SplatOneOp2
,
10800 SplatZeroOp2
, DAG
.getUNDEF(ContainerVT
), EVL2
);
10803 int64_t ImmValue
= cast
<ConstantSDNode
>(Offset
)->getSExtValue();
10804 SDValue DownOffset
, UpOffset
;
10805 if (ImmValue
>= 0) {
10806 // The operand is a TargetConstant, we need to rebuild it as a regular
10808 DownOffset
= DAG
.getConstant(ImmValue
, DL
, XLenVT
);
10809 UpOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, EVL1
, DownOffset
);
10811 // The operand is a TargetConstant, we need to rebuild it as a regular
10812 // constant rather than negating the original operand.
10813 UpOffset
= DAG
.getConstant(-ImmValue
, DL
, XLenVT
);
10814 DownOffset
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, EVL1
, UpOffset
);
10817 SDValue SlideDown
=
10818 getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
, DAG
.getUNDEF(ContainerVT
),
10819 Op1
, DownOffset
, Mask
, UpOffset
);
10820 SDValue Result
= getVSlideup(DAG
, Subtarget
, DL
, ContainerVT
, SlideDown
, Op2
,
10821 UpOffset
, Mask
, EVL2
, RISCVII::TAIL_AGNOSTIC
);
10823 if (IsMaskVector
) {
10824 // Truncate Result back to a mask vector (Result has same EVL as Op2)
10825 Result
= DAG
.getNode(
10826 RISCVISD::SETCC_VL
, DL
, ContainerVT
.changeVectorElementType(MVT::i1
),
10827 {Result
, DAG
.getConstant(0, DL
, ContainerVT
),
10828 DAG
.getCondCode(ISD::SETNE
), DAG
.getUNDEF(getMaskTypeFor(ContainerVT
)),
10832 if (!VT
.isFixedLengthVector())
10834 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10838 RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op
,
10839 SelectionDAG
&DAG
) const {
10841 MVT VT
= Op
.getSimpleValueType();
10842 MVT XLenVT
= Subtarget
.getXLenVT();
10844 SDValue Op1
= Op
.getOperand(0);
10845 SDValue Mask
= Op
.getOperand(1);
10846 SDValue EVL
= Op
.getOperand(2);
10848 MVT ContainerVT
= VT
;
10849 if (VT
.isFixedLengthVector()) {
10850 ContainerVT
= getContainerForFixedLengthVector(VT
);
10851 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10852 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
10853 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
10856 MVT GatherVT
= ContainerVT
;
10857 MVT IndicesVT
= ContainerVT
.changeVectorElementTypeToInteger();
10858 // Check if we are working with mask vectors
10859 bool IsMaskVector
= ContainerVT
.getVectorElementType() == MVT::i1
;
10860 if (IsMaskVector
) {
10861 GatherVT
= IndicesVT
= ContainerVT
.changeVectorElementType(MVT::i8
);
10863 // Expand input operand
10864 SDValue SplatOne
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IndicesVT
,
10865 DAG
.getUNDEF(IndicesVT
),
10866 DAG
.getConstant(1, DL
, XLenVT
), EVL
);
10867 SDValue SplatZero
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IndicesVT
,
10868 DAG
.getUNDEF(IndicesVT
),
10869 DAG
.getConstant(0, DL
, XLenVT
), EVL
);
10870 Op1
= DAG
.getNode(RISCVISD::VMERGE_VL
, DL
, IndicesVT
, Op1
, SplatOne
,
10871 SplatZero
, DAG
.getUNDEF(IndicesVT
), EVL
);
10874 unsigned EltSize
= GatherVT
.getScalarSizeInBits();
10875 unsigned MinSize
= GatherVT
.getSizeInBits().getKnownMinValue();
10876 unsigned VectorBitsMax
= Subtarget
.getRealMaxVLen();
10877 unsigned MaxVLMAX
=
10878 RISCVTargetLowering::computeVLMAX(VectorBitsMax
, EltSize
, MinSize
);
10880 unsigned GatherOpc
= RISCVISD::VRGATHER_VV_VL
;
10881 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
10882 // to use vrgatherei16.vv.
10883 // TODO: It's also possible to use vrgatherei16.vv for other types to
10884 // decrease register width for the index calculation.
10885 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10886 if (MaxVLMAX
> 256 && EltSize
== 8) {
10887 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
10888 // Split the vector in half and reverse each half using a full register
10890 // Swap the halves and concatenate them.
10891 // Slide the concatenated result by (VLMax - VL).
10892 if (MinSize
== (8 * RISCV::RVVBitsPerBlock
)) {
10893 auto [LoVT
, HiVT
] = DAG
.GetSplitDestVTs(GatherVT
);
10894 auto [Lo
, Hi
] = DAG
.SplitVector(Op1
, DL
);
10896 SDValue LoRev
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, LoVT
, Lo
);
10897 SDValue HiRev
= DAG
.getNode(ISD::VECTOR_REVERSE
, DL
, HiVT
, Hi
);
10899 // Reassemble the low and high pieces reversed.
10900 // NOTE: this Result is unmasked (because we do not need masks for
10901 // shuffles). If in the future this has to change, we can use a SELECT_VL
10902 // between Result and UNDEF using the mask originally passed to VP_REVERSE
10904 DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, GatherVT
, HiRev
, LoRev
);
10906 // Slide off any elements from past EVL that were reversed into the low
10908 unsigned MinElts
= GatherVT
.getVectorMinNumElements();
10909 SDValue VLMax
= DAG
.getNode(ISD::VSCALE
, DL
, XLenVT
,
10910 DAG
.getConstant(MinElts
, DL
, XLenVT
));
10911 SDValue Diff
= DAG
.getNode(ISD::SUB
, DL
, XLenVT
, VLMax
, EVL
);
10913 Result
= getVSlidedown(DAG
, Subtarget
, DL
, GatherVT
,
10914 DAG
.getUNDEF(GatherVT
), Result
, Diff
, Mask
, EVL
);
10916 if (IsMaskVector
) {
10917 // Truncate Result back to a mask vector
10919 DAG
.getNode(RISCVISD::SETCC_VL
, DL
, ContainerVT
,
10920 {Result
, DAG
.getConstant(0, DL
, GatherVT
),
10921 DAG
.getCondCode(ISD::SETNE
),
10922 DAG
.getUNDEF(getMaskTypeFor(ContainerVT
)), Mask
, EVL
});
10925 if (!VT
.isFixedLengthVector())
10927 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10930 // Just promote the int type to i16 which will double the LMUL.
10931 IndicesVT
= MVT::getVectorVT(MVT::i16
, IndicesVT
.getVectorElementCount());
10932 GatherOpc
= RISCVISD::VRGATHEREI16_VV_VL
;
10935 SDValue VID
= DAG
.getNode(RISCVISD::VID_VL
, DL
, IndicesVT
, Mask
, EVL
);
10937 DAG
.getNode(ISD::SUB
, DL
, XLenVT
, EVL
, DAG
.getConstant(1, DL
, XLenVT
));
10938 SDValue VecLenSplat
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, IndicesVT
,
10939 DAG
.getUNDEF(IndicesVT
), VecLen
, EVL
);
10940 SDValue VRSUB
= DAG
.getNode(RISCVISD::SUB_VL
, DL
, IndicesVT
, VecLenSplat
, VID
,
10941 DAG
.getUNDEF(IndicesVT
), Mask
, EVL
);
10942 SDValue Result
= DAG
.getNode(GatherOpc
, DL
, GatherVT
, Op1
, VRSUB
,
10943 DAG
.getUNDEF(GatherVT
), Mask
, EVL
);
10945 if (IsMaskVector
) {
10946 // Truncate Result back to a mask vector
10947 Result
= DAG
.getNode(
10948 RISCVISD::SETCC_VL
, DL
, ContainerVT
,
10949 {Result
, DAG
.getConstant(0, DL
, GatherVT
), DAG
.getCondCode(ISD::SETNE
),
10950 DAG
.getUNDEF(getMaskTypeFor(ContainerVT
)), Mask
, EVL
});
10953 if (!VT
.isFixedLengthVector())
10955 return convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
10958 SDValue
RISCVTargetLowering::lowerLogicVPOp(SDValue Op
,
10959 SelectionDAG
&DAG
) const {
10960 MVT VT
= Op
.getSimpleValueType();
10961 if (VT
.getVectorElementType() != MVT::i1
)
10962 return lowerVPOp(Op
, DAG
);
10964 // It is safe to drop mask parameter as masked-off elements are undef.
10965 SDValue Op1
= Op
->getOperand(0);
10966 SDValue Op2
= Op
->getOperand(1);
10967 SDValue VL
= Op
->getOperand(3);
10969 MVT ContainerVT
= VT
;
10970 const bool IsFixed
= VT
.isFixedLengthVector();
10972 ContainerVT
= getContainerForFixedLengthVector(VT
);
10973 Op1
= convertToScalableVector(ContainerVT
, Op1
, DAG
, Subtarget
);
10974 Op2
= convertToScalableVector(ContainerVT
, Op2
, DAG
, Subtarget
);
10978 SDValue Val
= DAG
.getNode(getRISCVVLOp(Op
), DL
, ContainerVT
, Op1
, Op2
, VL
);
10981 return convertFromScalableVector(VT
, Val
, DAG
, Subtarget
);
10984 SDValue
RISCVTargetLowering::lowerVPStridedLoad(SDValue Op
,
10985 SelectionDAG
&DAG
) const {
10987 MVT XLenVT
= Subtarget
.getXLenVT();
10988 MVT VT
= Op
.getSimpleValueType();
10989 MVT ContainerVT
= VT
;
10990 if (VT
.isFixedLengthVector())
10991 ContainerVT
= getContainerForFixedLengthVector(VT
);
10993 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
10995 auto *VPNode
= cast
<VPStridedLoadSDNode
>(Op
);
10996 // Check if the mask is known to be all ones
10997 SDValue Mask
= VPNode
->getMask();
10998 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
11000 SDValue IntID
= DAG
.getTargetConstant(IsUnmasked
? Intrinsic::riscv_vlse
11001 : Intrinsic::riscv_vlse_mask
,
11003 SmallVector
<SDValue
, 8> Ops
{VPNode
->getChain(), IntID
,
11004 DAG
.getUNDEF(ContainerVT
), VPNode
->getBasePtr(),
11005 VPNode
->getStride()};
11007 if (VT
.isFixedLengthVector()) {
11008 MVT MaskVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
11009 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
11011 Ops
.push_back(Mask
);
11013 Ops
.push_back(VPNode
->getVectorLength());
11015 SDValue Policy
= DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
);
11016 Ops
.push_back(Policy
);
11020 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
11021 VPNode
->getMemoryVT(), VPNode
->getMemOperand());
11022 SDValue Chain
= Result
.getValue(1);
11024 if (VT
.isFixedLengthVector())
11025 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
11027 return DAG
.getMergeValues({Result
, Chain
}, DL
);
11030 SDValue
RISCVTargetLowering::lowerVPStridedStore(SDValue Op
,
11031 SelectionDAG
&DAG
) const {
11033 MVT XLenVT
= Subtarget
.getXLenVT();
11035 auto *VPNode
= cast
<VPStridedStoreSDNode
>(Op
);
11036 SDValue StoreVal
= VPNode
->getValue();
11037 MVT VT
= StoreVal
.getSimpleValueType();
11038 MVT ContainerVT
= VT
;
11039 if (VT
.isFixedLengthVector()) {
11040 ContainerVT
= getContainerForFixedLengthVector(VT
);
11041 StoreVal
= convertToScalableVector(ContainerVT
, StoreVal
, DAG
, Subtarget
);
11044 // Check if the mask is known to be all ones
11045 SDValue Mask
= VPNode
->getMask();
11046 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
11048 SDValue IntID
= DAG
.getTargetConstant(IsUnmasked
? Intrinsic::riscv_vsse
11049 : Intrinsic::riscv_vsse_mask
,
11051 SmallVector
<SDValue
, 8> Ops
{VPNode
->getChain(), IntID
, StoreVal
,
11052 VPNode
->getBasePtr(), VPNode
->getStride()};
11054 if (VT
.isFixedLengthVector()) {
11055 MVT MaskVT
= ContainerVT
.changeVectorElementType(MVT::i1
);
11056 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
11058 Ops
.push_back(Mask
);
11060 Ops
.push_back(VPNode
->getVectorLength());
11062 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
, VPNode
->getVTList(),
11063 Ops
, VPNode
->getMemoryVT(),
11064 VPNode
->getMemOperand());
11067 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11068 // matched to a RVV indexed load. The RVV indexed load instructions only
11069 // support the "unsigned unscaled" addressing mode; indices are implicitly
11070 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
11071 // signed or scaled indexing is extended to the XLEN value type and scaled
11073 SDValue
RISCVTargetLowering::lowerMaskedGather(SDValue Op
,
11074 SelectionDAG
&DAG
) const {
11076 MVT VT
= Op
.getSimpleValueType();
11078 const auto *MemSD
= cast
<MemSDNode
>(Op
.getNode());
11079 EVT MemVT
= MemSD
->getMemoryVT();
11080 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
11081 SDValue Chain
= MemSD
->getChain();
11082 SDValue BasePtr
= MemSD
->getBasePtr();
11084 ISD::LoadExtType LoadExtType
;
11085 SDValue Index
, Mask
, PassThru
, VL
;
11087 if (auto *VPGN
= dyn_cast
<VPGatherSDNode
>(Op
.getNode())) {
11088 Index
= VPGN
->getIndex();
11089 Mask
= VPGN
->getMask();
11090 PassThru
= DAG
.getUNDEF(VT
);
11091 VL
= VPGN
->getVectorLength();
11092 // VP doesn't support extending loads.
11093 LoadExtType
= ISD::NON_EXTLOAD
;
11095 // Else it must be a MGATHER.
11096 auto *MGN
= cast
<MaskedGatherSDNode
>(Op
.getNode());
11097 Index
= MGN
->getIndex();
11098 Mask
= MGN
->getMask();
11099 PassThru
= MGN
->getPassThru();
11100 LoadExtType
= MGN
->getExtensionType();
11103 MVT IndexVT
= Index
.getSimpleValueType();
11104 MVT XLenVT
= Subtarget
.getXLenVT();
11106 assert(VT
.getVectorElementCount() == IndexVT
.getVectorElementCount() &&
11107 "Unexpected VTs!");
11108 assert(BasePtr
.getSimpleValueType() == XLenVT
&& "Unexpected pointer type");
11109 // Targets have to explicitly opt-in for extending vector loads.
11110 assert(LoadExtType
== ISD::NON_EXTLOAD
&&
11111 "Unexpected extending MGATHER/VP_GATHER");
11114 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11115 // the selection of the masked intrinsics doesn't do this for us.
11116 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
11118 MVT ContainerVT
= VT
;
11119 if (VT
.isFixedLengthVector()) {
11120 ContainerVT
= getContainerForFixedLengthVector(VT
);
11121 IndexVT
= MVT::getVectorVT(IndexVT
.getVectorElementType(),
11122 ContainerVT
.getVectorElementCount());
11124 Index
= convertToScalableVector(IndexVT
, Index
, DAG
, Subtarget
);
11127 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
11128 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
11129 PassThru
= convertToScalableVector(ContainerVT
, PassThru
, DAG
, Subtarget
);
11134 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
11136 if (XLenVT
== MVT::i32
&& IndexVT
.getVectorElementType().bitsGT(XLenVT
)) {
11137 IndexVT
= IndexVT
.changeVectorElementType(XLenVT
);
11138 Index
= DAG
.getNode(ISD::TRUNCATE
, DL
, IndexVT
, Index
);
11142 IsUnmasked
? Intrinsic::riscv_vluxei
: Intrinsic::riscv_vluxei_mask
;
11143 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
11145 Ops
.push_back(DAG
.getUNDEF(ContainerVT
));
11147 Ops
.push_back(PassThru
);
11148 Ops
.push_back(BasePtr
);
11149 Ops
.push_back(Index
);
11151 Ops
.push_back(Mask
);
11154 Ops
.push_back(DAG
.getTargetConstant(RISCVII::TAIL_AGNOSTIC
, DL
, XLenVT
));
11156 SDVTList VTs
= DAG
.getVTList({ContainerVT
, MVT::Other
});
11158 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
, MemVT
, MMO
);
11159 Chain
= Result
.getValue(1);
11161 if (VT
.isFixedLengthVector())
11162 Result
= convertFromScalableVector(VT
, Result
, DAG
, Subtarget
);
11164 return DAG
.getMergeValues({Result
, Chain
}, DL
);
11167 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11168 // matched to a RVV indexed store. The RVV indexed store instructions only
11169 // support the "unsigned unscaled" addressing mode; indices are implicitly
11170 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
11171 // signed or scaled indexing is extended to the XLEN value type and scaled
11173 SDValue
RISCVTargetLowering::lowerMaskedScatter(SDValue Op
,
11174 SelectionDAG
&DAG
) const {
11176 const auto *MemSD
= cast
<MemSDNode
>(Op
.getNode());
11177 EVT MemVT
= MemSD
->getMemoryVT();
11178 MachineMemOperand
*MMO
= MemSD
->getMemOperand();
11179 SDValue Chain
= MemSD
->getChain();
11180 SDValue BasePtr
= MemSD
->getBasePtr();
11182 bool IsTruncatingStore
= false;
11183 SDValue Index
, Mask
, Val
, VL
;
11185 if (auto *VPSN
= dyn_cast
<VPScatterSDNode
>(Op
.getNode())) {
11186 Index
= VPSN
->getIndex();
11187 Mask
= VPSN
->getMask();
11188 Val
= VPSN
->getValue();
11189 VL
= VPSN
->getVectorLength();
11190 // VP doesn't support truncating stores.
11191 IsTruncatingStore
= false;
11193 // Else it must be a MSCATTER.
11194 auto *MSN
= cast
<MaskedScatterSDNode
>(Op
.getNode());
11195 Index
= MSN
->getIndex();
11196 Mask
= MSN
->getMask();
11197 Val
= MSN
->getValue();
11198 IsTruncatingStore
= MSN
->isTruncatingStore();
11201 MVT VT
= Val
.getSimpleValueType();
11202 MVT IndexVT
= Index
.getSimpleValueType();
11203 MVT XLenVT
= Subtarget
.getXLenVT();
11205 assert(VT
.getVectorElementCount() == IndexVT
.getVectorElementCount() &&
11206 "Unexpected VTs!");
11207 assert(BasePtr
.getSimpleValueType() == XLenVT
&& "Unexpected pointer type");
11208 // Targets have to explicitly opt-in for extending vector loads and
11209 // truncating vector stores.
11210 assert(!IsTruncatingStore
&& "Unexpected truncating MSCATTER/VP_SCATTER");
11211 (void)IsTruncatingStore
;
11213 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11214 // the selection of the masked intrinsics doesn't do this for us.
11215 bool IsUnmasked
= ISD::isConstantSplatVectorAllOnes(Mask
.getNode());
11217 MVT ContainerVT
= VT
;
11218 if (VT
.isFixedLengthVector()) {
11219 ContainerVT
= getContainerForFixedLengthVector(VT
);
11220 IndexVT
= MVT::getVectorVT(IndexVT
.getVectorElementType(),
11221 ContainerVT
.getVectorElementCount());
11223 Index
= convertToScalableVector(IndexVT
, Index
, DAG
, Subtarget
);
11224 Val
= convertToScalableVector(ContainerVT
, Val
, DAG
, Subtarget
);
11227 MVT MaskVT
= getMaskTypeFor(ContainerVT
);
11228 Mask
= convertToScalableVector(MaskVT
, Mask
, DAG
, Subtarget
);
11233 VL
= getDefaultVLOps(VT
, ContainerVT
, DL
, DAG
, Subtarget
).second
;
11235 if (XLenVT
== MVT::i32
&& IndexVT
.getVectorElementType().bitsGT(XLenVT
)) {
11236 IndexVT
= IndexVT
.changeVectorElementType(XLenVT
);
11237 Index
= DAG
.getNode(ISD::TRUNCATE
, DL
, IndexVT
, Index
);
11241 IsUnmasked
? Intrinsic::riscv_vsoxei
: Intrinsic::riscv_vsoxei_mask
;
11242 SmallVector
<SDValue
, 8> Ops
{Chain
, DAG
.getTargetConstant(IntID
, DL
, XLenVT
)};
11243 Ops
.push_back(Val
);
11244 Ops
.push_back(BasePtr
);
11245 Ops
.push_back(Index
);
11247 Ops
.push_back(Mask
);
11250 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, DL
,
11251 DAG
.getVTList(MVT::Other
), Ops
, MemVT
, MMO
);
11254 SDValue
RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op
,
11255 SelectionDAG
&DAG
) const {
11256 const MVT XLenVT
= Subtarget
.getXLenVT();
11258 SDValue Chain
= Op
->getOperand(0);
11259 SDValue SysRegNo
= DAG
.getTargetConstant(
11260 RISCVSysReg::lookupSysRegByName("FRM")->Encoding
, DL
, XLenVT
);
11261 SDVTList VTs
= DAG
.getVTList(XLenVT
, MVT::Other
);
11262 SDValue RM
= DAG
.getNode(RISCVISD::READ_CSR
, DL
, VTs
, Chain
, SysRegNo
);
11264 // Encoding used for rounding mode in RISC-V differs from that used in
11265 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11266 // table, which consists of a sequence of 4-bit fields, each representing
11267 // corresponding FLT_ROUNDS mode.
11268 static const int Table
=
11269 (int(RoundingMode::NearestTiesToEven
) << 4 * RISCVFPRndMode::RNE
) |
11270 (int(RoundingMode::TowardZero
) << 4 * RISCVFPRndMode::RTZ
) |
11271 (int(RoundingMode::TowardNegative
) << 4 * RISCVFPRndMode::RDN
) |
11272 (int(RoundingMode::TowardPositive
) << 4 * RISCVFPRndMode::RUP
) |
11273 (int(RoundingMode::NearestTiesToAway
) << 4 * RISCVFPRndMode::RMM
);
11276 DAG
.getNode(ISD::SHL
, DL
, XLenVT
, RM
, DAG
.getConstant(2, DL
, XLenVT
));
11277 SDValue Shifted
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
,
11278 DAG
.getConstant(Table
, DL
, XLenVT
), Shift
);
11279 SDValue Masked
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Shifted
,
11280 DAG
.getConstant(7, DL
, XLenVT
));
11282 return DAG
.getMergeValues({Masked
, Chain
}, DL
);
11285 SDValue
RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op
,
11286 SelectionDAG
&DAG
) const {
11287 const MVT XLenVT
= Subtarget
.getXLenVT();
11289 SDValue Chain
= Op
->getOperand(0);
11290 SDValue RMValue
= Op
->getOperand(1);
11291 SDValue SysRegNo
= DAG
.getTargetConstant(
11292 RISCVSysReg::lookupSysRegByName("FRM")->Encoding
, DL
, XLenVT
);
11294 // Encoding used for rounding mode in RISC-V differs from that used in
11295 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11296 // a table, which consists of a sequence of 4-bit fields, each representing
11297 // corresponding RISC-V mode.
11298 static const unsigned Table
=
11299 (RISCVFPRndMode::RNE
<< 4 * int(RoundingMode::NearestTiesToEven
)) |
11300 (RISCVFPRndMode::RTZ
<< 4 * int(RoundingMode::TowardZero
)) |
11301 (RISCVFPRndMode::RDN
<< 4 * int(RoundingMode::TowardNegative
)) |
11302 (RISCVFPRndMode::RUP
<< 4 * int(RoundingMode::TowardPositive
)) |
11303 (RISCVFPRndMode::RMM
<< 4 * int(RoundingMode::NearestTiesToAway
));
11305 RMValue
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, XLenVT
, RMValue
);
11307 SDValue Shift
= DAG
.getNode(ISD::SHL
, DL
, XLenVT
, RMValue
,
11308 DAG
.getConstant(2, DL
, XLenVT
));
11309 SDValue Shifted
= DAG
.getNode(ISD::SRL
, DL
, XLenVT
,
11310 DAG
.getConstant(Table
, DL
, XLenVT
), Shift
);
11311 RMValue
= DAG
.getNode(ISD::AND
, DL
, XLenVT
, Shifted
,
11312 DAG
.getConstant(0x7, DL
, XLenVT
));
11313 return DAG
.getNode(RISCVISD::WRITE_CSR
, DL
, MVT::Other
, Chain
, SysRegNo
,
11317 SDValue
RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op
,
11318 SelectionDAG
&DAG
) const {
11319 MachineFunction
&MF
= DAG
.getMachineFunction();
11321 bool isRISCV64
= Subtarget
.is64Bit();
11322 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
11324 int FI
= MF
.getFrameInfo().CreateFixedObject(isRISCV64
? 8 : 4, 0, false);
11325 return DAG
.getFrameIndex(FI
, PtrVT
);
11328 // Returns the opcode of the target-specific SDNode that implements the 32-bit
11329 // form of the given Opcode.
11330 static RISCVISD::NodeType
getRISCVWOpcode(unsigned Opcode
) {
11333 llvm_unreachable("Unexpected opcode");
11335 return RISCVISD::SLLW
;
11337 return RISCVISD::SRAW
;
11339 return RISCVISD::SRLW
;
11341 return RISCVISD::DIVW
;
11343 return RISCVISD::DIVUW
;
11345 return RISCVISD::REMUW
;
11347 return RISCVISD::ROLW
;
11349 return RISCVISD::RORW
;
11353 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11354 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11355 // otherwise be promoted to i64, making it difficult to select the
11356 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11357 // type i8/i16/i32 is lost.
11358 static SDValue
customLegalizeToWOp(SDNode
*N
, SelectionDAG
&DAG
,
11359 unsigned ExtOpc
= ISD::ANY_EXTEND
) {
11361 RISCVISD::NodeType WOpcode
= getRISCVWOpcode(N
->getOpcode());
11362 SDValue NewOp0
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(0));
11363 SDValue NewOp1
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(1));
11364 SDValue NewRes
= DAG
.getNode(WOpcode
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11365 // ReplaceNodeResults requires we maintain the same type for the return value.
11366 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), NewRes
);
11369 // Converts the given 32-bit operation to a i64 operation with signed extension
11370 // semantic to reduce the signed extension instructions.
11371 static SDValue
customLegalizeToWOpWithSExt(SDNode
*N
, SelectionDAG
&DAG
) {
11373 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11374 SDValue NewOp1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11375 SDValue NewWOp
= DAG
.getNode(N
->getOpcode(), DL
, MVT::i64
, NewOp0
, NewOp1
);
11376 SDValue NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewWOp
,
11377 DAG
.getValueType(MVT::i32
));
11378 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
);
11381 void RISCVTargetLowering::ReplaceNodeResults(SDNode
*N
,
11382 SmallVectorImpl
<SDValue
> &Results
,
11383 SelectionDAG
&DAG
) const {
11385 switch (N
->getOpcode()) {
11387 llvm_unreachable("Don't know how to custom type legalize this operation!");
11388 case ISD::STRICT_FP_TO_SINT
:
11389 case ISD::STRICT_FP_TO_UINT
:
11390 case ISD::FP_TO_SINT
:
11391 case ISD::FP_TO_UINT
: {
11392 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11393 "Unexpected custom legalisation");
11394 bool IsStrict
= N
->isStrictFPOpcode();
11395 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT
||
11396 N
->getOpcode() == ISD::STRICT_FP_TO_SINT
;
11397 SDValue Op0
= IsStrict
? N
->getOperand(1) : N
->getOperand(0);
11398 if (getTypeAction(*DAG
.getContext(), Op0
.getValueType()) !=
11399 TargetLowering::TypeSoftenFloat
) {
11400 if (!isTypeLegal(Op0
.getValueType()))
11403 SDValue Chain
= N
->getOperand(0);
11404 // In absense of Zfh, promote f16 to f32, then convert.
11405 if (Op0
.getValueType() == MVT::f16
&&
11406 !Subtarget
.hasStdExtZfhOrZhinx()) {
11407 Op0
= DAG
.getNode(ISD::STRICT_FP_EXTEND
, DL
, {MVT::f32
, MVT::Other
},
11409 Chain
= Op0
.getValue(1);
11411 unsigned Opc
= IsSigned
? RISCVISD::STRICT_FCVT_W_RV64
11412 : RISCVISD::STRICT_FCVT_WU_RV64
;
11413 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Other
);
11414 SDValue Res
= DAG
.getNode(
11415 Opc
, DL
, VTs
, Chain
, Op0
,
11416 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, MVT::i64
));
11417 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11418 Results
.push_back(Res
.getValue(1));
11421 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11423 if ((Op0
.getValueType() == MVT::f16
&&
11424 !Subtarget
.hasStdExtZfhOrZhinx()) ||
11425 Op0
.getValueType() == MVT::bf16
)
11426 Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op0
);
11428 unsigned Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
11430 DAG
.getNode(Opc
, DL
, MVT::i64
, Op0
,
11431 DAG
.getTargetConstant(RISCVFPRndMode::RTZ
, DL
, MVT::i64
));
11432 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11435 // If the FP type needs to be softened, emit a library call using the 'si'
11436 // version. If we left it to default legalization we'd end up with 'di'. If
11437 // the FP type doesn't need to be softened just let generic type
11438 // legalization promote the result type.
11441 LC
= RTLIB::getFPTOSINT(Op0
.getValueType(), N
->getValueType(0));
11443 LC
= RTLIB::getFPTOUINT(Op0
.getValueType(), N
->getValueType(0));
11444 MakeLibCallOptions CallOptions
;
11445 EVT OpVT
= Op0
.getValueType();
11446 CallOptions
.setTypeListBeforeSoften(OpVT
, N
->getValueType(0), true);
11447 SDValue Chain
= IsStrict
? N
->getOperand(0) : SDValue();
11449 std::tie(Result
, Chain
) =
11450 makeLibCall(DAG
, LC
, N
->getValueType(0), Op0
, CallOptions
, DL
, Chain
);
11451 Results
.push_back(Result
);
11453 Results
.push_back(Chain
);
11456 case ISD::LROUND
: {
11457 SDValue Op0
= N
->getOperand(0);
11458 EVT Op0VT
= Op0
.getValueType();
11459 if (getTypeAction(*DAG
.getContext(), Op0
.getValueType()) !=
11460 TargetLowering::TypeSoftenFloat
) {
11461 if (!isTypeLegal(Op0VT
))
11464 // In absense of Zfh, promote f16 to f32, then convert.
11465 if (Op0
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfhOrZhinx())
11466 Op0
= DAG
.getNode(ISD::FP_EXTEND
, DL
, MVT::f32
, Op0
);
11469 DAG
.getNode(RISCVISD::FCVT_W_RV64
, DL
, MVT::i64
, Op0
,
11470 DAG
.getTargetConstant(RISCVFPRndMode::RMM
, DL
, MVT::i64
));
11471 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11474 // If the FP type needs to be softened, emit a library call to lround. We'll
11475 // need to truncate the result. We assume any value that doesn't fit in i32
11476 // is allowed to return an unspecified value.
11477 RTLIB::Libcall LC
=
11478 Op0
.getValueType() == MVT::f64
? RTLIB::LROUND_F64
: RTLIB::LROUND_F32
;
11479 MakeLibCallOptions CallOptions
;
11480 EVT OpVT
= Op0
.getValueType();
11481 CallOptions
.setTypeListBeforeSoften(OpVT
, MVT::i64
, true);
11482 SDValue Result
= makeLibCall(DAG
, LC
, MVT::i64
, Op0
, CallOptions
, DL
).first
;
11483 Result
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Result
);
11484 Results
.push_back(Result
);
11487 case ISD::READCYCLECOUNTER
: {
11488 assert(!Subtarget
.is64Bit() &&
11489 "READCYCLECOUNTER only has custom type legalization on riscv32");
11491 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
11493 DAG
.getNode(RISCVISD::READ_CYCLE_WIDE
, DL
, VTs
, N
->getOperand(0));
11496 DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, RCW
, RCW
.getValue(1)));
11497 Results
.push_back(RCW
.getValue(2));
11501 if (!ISD::isNON_EXTLoad(N
))
11504 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11505 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11506 LoadSDNode
*Ld
= cast
<LoadSDNode
>(N
);
11509 SDValue Res
= DAG
.getExtLoad(ISD::SEXTLOAD
, dl
, MVT::i64
, Ld
->getChain(),
11510 Ld
->getBasePtr(), Ld
->getMemoryVT(),
11511 Ld
->getMemOperand());
11512 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, Res
));
11513 Results
.push_back(Res
.getValue(1));
11517 unsigned Size
= N
->getSimpleValueType(0).getSizeInBits();
11518 unsigned XLen
= Subtarget
.getXLen();
11519 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11521 assert(Size
== (XLen
* 2) && "Unexpected custom legalisation");
11522 SDValue LHS
= N
->getOperand(0);
11523 SDValue RHS
= N
->getOperand(1);
11524 APInt HighMask
= APInt::getHighBitsSet(Size
, XLen
);
11526 bool LHSIsU
= DAG
.MaskedValueIsZero(LHS
, HighMask
);
11527 bool RHSIsU
= DAG
.MaskedValueIsZero(RHS
, HighMask
);
11528 // We need exactly one side to be unsigned.
11529 if (LHSIsU
== RHSIsU
)
11532 auto MakeMULPair
= [&](SDValue S
, SDValue U
) {
11533 MVT XLenVT
= Subtarget
.getXLenVT();
11534 S
= DAG
.getNode(ISD::TRUNCATE
, DL
, XLenVT
, S
);
11535 U
= DAG
.getNode(ISD::TRUNCATE
, DL
, XLenVT
, U
);
11536 SDValue Lo
= DAG
.getNode(ISD::MUL
, DL
, XLenVT
, S
, U
);
11537 SDValue Hi
= DAG
.getNode(RISCVISD::MULHSU
, DL
, XLenVT
, S
, U
);
11538 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, N
->getValueType(0), Lo
, Hi
);
11541 bool LHSIsS
= DAG
.ComputeNumSignBits(LHS
) > XLen
;
11542 bool RHSIsS
= DAG
.ComputeNumSignBits(RHS
) > XLen
;
11544 // The other operand should be signed, but still prefer MULH when
11546 if (RHSIsU
&& LHSIsS
&& !RHSIsS
)
11547 Results
.push_back(MakeMULPair(LHS
, RHS
));
11548 else if (LHSIsU
&& RHSIsS
&& !LHSIsS
)
11549 Results
.push_back(MakeMULPair(RHS
, LHS
));
11557 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11558 "Unexpected custom legalisation");
11559 Results
.push_back(customLegalizeToWOpWithSExt(N
, DAG
));
11564 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11565 "Unexpected custom legalisation");
11566 if (N
->getOperand(1).getOpcode() != ISD::Constant
) {
11567 // If we can use a BSET instruction, allow default promotion to apply.
11568 if (N
->getOpcode() == ISD::SHL
&& Subtarget
.hasStdExtZbs() &&
11569 isOneConstant(N
->getOperand(0)))
11571 Results
.push_back(customLegalizeToWOp(N
, DAG
));
11575 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
11576 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
11578 if (N
->getOpcode() == ISD::SHL
) {
11581 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11583 DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11584 SDValue NewWOp
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11585 SDValue NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewWOp
,
11586 DAG
.getValueType(MVT::i32
));
11587 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
));
11593 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11594 "Unexpected custom legalisation");
11595 assert((Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb() ||
11596 Subtarget
.hasVendorXTHeadBb()) &&
11597 "Unexpected custom legalization");
11598 if (!isa
<ConstantSDNode
>(N
->getOperand(1)) &&
11599 !(Subtarget
.hasStdExtZbb() || Subtarget
.hasStdExtZbkb()))
11601 Results
.push_back(customLegalizeToWOp(N
, DAG
));
11604 case ISD::CTTZ_ZERO_UNDEF
:
11606 case ISD::CTLZ_ZERO_UNDEF
: {
11607 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11608 "Unexpected custom legalisation");
11611 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11613 N
->getOpcode() == ISD::CTTZ
|| N
->getOpcode() == ISD::CTTZ_ZERO_UNDEF
;
11614 unsigned Opc
= IsCTZ
? RISCVISD::CTZW
: RISCVISD::CLZW
;
11615 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
);
11616 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11622 MVT VT
= N
->getSimpleValueType(0);
11623 assert((VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
) &&
11624 Subtarget
.is64Bit() && Subtarget
.hasStdExtM() &&
11625 "Unexpected custom legalisation");
11626 // Don't promote division/remainder by constant since we should expand those
11627 // to multiply by magic constant.
11628 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
11629 if (N
->getOperand(1).getOpcode() == ISD::Constant
&&
11630 !isIntDivCheap(N
->getValueType(0), Attr
))
11633 // If the input is i32, use ANY_EXTEND since the W instructions don't read
11634 // the upper 32 bits. For other types we need to sign or zero extend
11635 // based on the opcode.
11636 unsigned ExtOpc
= ISD::ANY_EXTEND
;
11637 if (VT
!= MVT::i32
)
11638 ExtOpc
= N
->getOpcode() == ISD::SDIV
? ISD::SIGN_EXTEND
11639 : ISD::ZERO_EXTEND
;
11641 Results
.push_back(customLegalizeToWOp(N
, DAG
, ExtOpc
));
11645 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11646 "Unexpected custom legalisation");
11648 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
11649 // use the default legalization.
11650 if (!isa
<ConstantSDNode
>(N
->getOperand(1)))
11653 SDValue LHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11654 SDValue RHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11655 SDValue Res
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, LHS
, RHS
);
11656 Res
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Res
,
11657 DAG
.getValueType(MVT::i32
));
11659 SDValue Zero
= DAG
.getConstant(0, DL
, MVT::i64
);
11661 // For an addition, the result should be less than one of the operands (LHS)
11662 // if and only if the other operand (RHS) is negative, otherwise there will
11664 // For a subtraction, the result should be less than one of the operands
11665 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11666 // otherwise there will be overflow.
11667 EVT OType
= N
->getValueType(1);
11668 SDValue ResultLowerThanLHS
= DAG
.getSetCC(DL
, OType
, Res
, LHS
, ISD::SETLT
);
11669 SDValue ConditionRHS
= DAG
.getSetCC(DL
, OType
, RHS
, Zero
, ISD::SETLT
);
11672 DAG
.getNode(ISD::XOR
, DL
, OType
, ConditionRHS
, ResultLowerThanLHS
);
11673 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11674 Results
.push_back(Overflow
);
11679 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11680 "Unexpected custom legalisation");
11681 bool IsAdd
= N
->getOpcode() == ISD::UADDO
;
11682 // Create an ADDW or SUBW.
11683 SDValue LHS
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11684 SDValue RHS
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11686 DAG
.getNode(IsAdd
? ISD::ADD
: ISD::SUB
, DL
, MVT::i64
, LHS
, RHS
);
11687 Res
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Res
,
11688 DAG
.getValueType(MVT::i32
));
11691 if (IsAdd
&& isOneConstant(RHS
)) {
11692 // Special case uaddo X, 1 overflowed if the addition result is 0.
11693 // The general case (X + C) < C is not necessarily beneficial. Although we
11694 // reduce the live range of X, we may introduce the materialization of
11695 // constant C, especially when the setcc result is used by branch. We have
11696 // no compare with constant and branch instructions.
11697 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), Res
,
11698 DAG
.getConstant(0, DL
, MVT::i64
), ISD::SETEQ
);
11699 } else if (IsAdd
&& isAllOnesConstant(RHS
)) {
11700 // Special case uaddo X, -1 overflowed if X != 0.
11701 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), N
->getOperand(0),
11702 DAG
.getConstant(0, DL
, MVT::i32
), ISD::SETNE
);
11704 // Sign extend the LHS and perform an unsigned compare with the ADDW
11705 // result. Since the inputs are sign extended from i32, this is equivalent
11706 // to comparing the lower 32 bits.
11707 LHS
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11708 Overflow
= DAG
.getSetCC(DL
, N
->getValueType(1), Res
, LHS
,
11709 IsAdd
? ISD::SETULT
: ISD::SETUGT
);
11712 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11713 Results
.push_back(Overflow
);
11717 case ISD::USUBSAT
: {
11718 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11719 "Unexpected custom legalisation");
11720 if (Subtarget
.hasStdExtZbb()) {
11721 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
11722 // sign extend allows overflow of the lower 32 bits to be detected on
11723 // the promoted size.
11725 DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11727 DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11728 SDValue Res
= DAG
.getNode(N
->getOpcode(), DL
, MVT::i64
, LHS
, RHS
);
11729 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11733 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
11734 // promotion for UADDO/USUBO.
11735 Results
.push_back(expandAddSubSat(N
, DAG
));
11739 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
11740 "Unexpected custom legalisation");
11742 if (Subtarget
.hasStdExtZbb()) {
11743 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
11744 // This allows us to remember that the result is sign extended. Expanding
11745 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
11746 SDValue Src
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
,
11748 SDValue Abs
= DAG
.getNode(RISCVISD::ABSW
, DL
, MVT::i64
, Src
);
11749 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Abs
));
11753 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
11754 SDValue Src
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
11756 // Freeze the source so we can increase it's use count.
11757 Src
= DAG
.getFreeze(Src
);
11759 // Copy sign bit to all bits using the sraiw pattern.
11760 SDValue SignFill
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, Src
,
11761 DAG
.getValueType(MVT::i32
));
11762 SignFill
= DAG
.getNode(ISD::SRA
, DL
, MVT::i64
, SignFill
,
11763 DAG
.getConstant(31, DL
, MVT::i64
));
11765 SDValue NewRes
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, Src
, SignFill
);
11766 NewRes
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, NewRes
, SignFill
);
11768 // NOTE: The result is only required to be anyextended, but sext is
11769 // consistent with type legalization of sub.
11770 NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewRes
,
11771 DAG
.getValueType(MVT::i32
));
11772 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
));
11775 case ISD::BITCAST
: {
11776 EVT VT
= N
->getValueType(0);
11777 assert(VT
.isInteger() && !VT
.isVector() && "Unexpected VT!");
11778 SDValue Op0
= N
->getOperand(0);
11779 EVT Op0VT
= Op0
.getValueType();
11780 MVT XLenVT
= Subtarget
.getXLenVT();
11781 if (VT
== MVT::i16
&& Op0VT
== MVT::f16
&&
11782 Subtarget
.hasStdExtZfhminOrZhinxmin()) {
11783 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, XLenVT
, Op0
);
11784 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, FPConv
));
11785 } else if (VT
== MVT::i16
&& Op0VT
== MVT::bf16
&&
11786 Subtarget
.hasStdExtZfbfmin()) {
11787 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, XLenVT
, Op0
);
11788 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, FPConv
));
11789 } else if (VT
== MVT::i32
&& Op0VT
== MVT::f32
&& Subtarget
.is64Bit() &&
11790 Subtarget
.hasStdExtFOrZfinx()) {
11792 DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Op0
);
11793 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, FPConv
));
11794 } else if (VT
== MVT::i64
&& Op0VT
== MVT::f64
&& XLenVT
== MVT::i32
&&
11795 Subtarget
.hasStdExtZfa()) {
11796 SDValue NewReg
= DAG
.getNode(RISCVISD::SplitF64
, DL
,
11797 DAG
.getVTList(MVT::i32
, MVT::i32
), Op0
);
11798 SDValue RetReg
= DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
,
11799 NewReg
.getValue(0), NewReg
.getValue(1));
11800 Results
.push_back(RetReg
);
11801 } else if (!VT
.isVector() && Op0VT
.isFixedLengthVector() &&
11802 isTypeLegal(Op0VT
)) {
11803 // Custom-legalize bitcasts from fixed-length vector types to illegal
11804 // scalar types in order to improve codegen. Bitcast the vector to a
11805 // one-element vector type whose element type is the same as the result
11806 // type, and extract the first element.
11807 EVT BVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 1);
11808 if (isTypeLegal(BVT
)) {
11809 SDValue BVec
= DAG
.getBitcast(BVT
, Op0
);
11810 Results
.push_back(DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VT
, BVec
,
11811 DAG
.getConstant(0, DL
, XLenVT
)));
11816 case RISCVISD::BREV8
: {
11817 MVT VT
= N
->getSimpleValueType(0);
11818 MVT XLenVT
= Subtarget
.getXLenVT();
11819 assert((VT
== MVT::i16
|| (VT
== MVT::i32
&& Subtarget
.is64Bit())) &&
11820 "Unexpected custom legalisation");
11821 assert(Subtarget
.hasStdExtZbkb() && "Unexpected extension");
11822 SDValue NewOp
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, XLenVT
, N
->getOperand(0));
11823 SDValue NewRes
= DAG
.getNode(N
->getOpcode(), DL
, XLenVT
, NewOp
);
11824 // ReplaceNodeResults requires we maintain the same type for the return
11826 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, NewRes
));
11829 case ISD::EXTRACT_VECTOR_ELT
: {
11830 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
11831 // type is illegal (currently only vXi64 RV32).
11832 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
11833 // transferred to the destination register. We issue two of these from the
11834 // upper- and lower- halves of the SEW-bit vector element, slid down to the
11836 SDValue Vec
= N
->getOperand(0);
11837 SDValue Idx
= N
->getOperand(1);
11839 // The vector type hasn't been legalized yet so we can't issue target
11840 // specific nodes if it needs legalization.
11841 // FIXME: We would manually legalize if it's important.
11842 if (!isTypeLegal(Vec
.getValueType()))
11845 MVT VecVT
= Vec
.getSimpleValueType();
11847 assert(!Subtarget
.is64Bit() && N
->getValueType(0) == MVT::i64
&&
11848 VecVT
.getVectorElementType() == MVT::i64
&&
11849 "Unexpected EXTRACT_VECTOR_ELT legalization");
11851 // If this is a fixed vector, we need to convert it to a scalable vector.
11852 MVT ContainerVT
= VecVT
;
11853 if (VecVT
.isFixedLengthVector()) {
11854 ContainerVT
= getContainerForFixedLengthVector(VecVT
);
11855 Vec
= convertToScalableVector(ContainerVT
, Vec
, DAG
, Subtarget
);
11858 MVT XLenVT
= Subtarget
.getXLenVT();
11860 // Use a VL of 1 to avoid processing more elements than we need.
11861 auto [Mask
, VL
] = getDefaultVLOps(1, ContainerVT
, DL
, DAG
, Subtarget
);
11863 // Unless the index is known to be 0, we must slide the vector down to get
11864 // the desired element into index 0.
11865 if (!isNullConstant(Idx
)) {
11866 Vec
= getVSlidedown(DAG
, Subtarget
, DL
, ContainerVT
,
11867 DAG
.getUNDEF(ContainerVT
), Vec
, Idx
, Mask
, VL
);
11870 // Extract the lower XLEN bits of the correct vector element.
11871 SDValue EltLo
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
11873 // To extract the upper XLEN bits of the vector element, shift the first
11874 // element right by 32 bits and re-extract the lower XLEN bits.
11875 SDValue ThirtyTwoV
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, ContainerVT
,
11876 DAG
.getUNDEF(ContainerVT
),
11877 DAG
.getConstant(32, DL
, XLenVT
), VL
);
11879 DAG
.getNode(RISCVISD::SRL_VL
, DL
, ContainerVT
, Vec
, ThirtyTwoV
,
11880 DAG
.getUNDEF(ContainerVT
), Mask
, VL
);
11882 SDValue EltHi
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, LShr32
);
11884 Results
.push_back(DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, EltLo
, EltHi
));
11887 case ISD::INTRINSIC_WO_CHAIN
: {
11888 unsigned IntNo
= N
->getConstantOperandVal(0);
11892 "Don't know how to custom type legalize this intrinsic!");
11893 case Intrinsic::experimental_get_vector_length
: {
11894 SDValue Res
= lowerGetVectorLength(N
, DAG
, Subtarget
);
11895 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11898 case Intrinsic::riscv_orc_b
:
11899 case Intrinsic::riscv_brev8
:
11900 case Intrinsic::riscv_sha256sig0
:
11901 case Intrinsic::riscv_sha256sig1
:
11902 case Intrinsic::riscv_sha256sum0
:
11903 case Intrinsic::riscv_sha256sum1
:
11904 case Intrinsic::riscv_sm3p0
:
11905 case Intrinsic::riscv_sm3p1
: {
11906 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11910 case Intrinsic::riscv_orc_b
: Opc
= RISCVISD::ORC_B
; break;
11911 case Intrinsic::riscv_brev8
: Opc
= RISCVISD::BREV8
; break;
11912 case Intrinsic::riscv_sha256sig0
: Opc
= RISCVISD::SHA256SIG0
; break;
11913 case Intrinsic::riscv_sha256sig1
: Opc
= RISCVISD::SHA256SIG1
; break;
11914 case Intrinsic::riscv_sha256sum0
: Opc
= RISCVISD::SHA256SUM0
; break;
11915 case Intrinsic::riscv_sha256sum1
: Opc
= RISCVISD::SHA256SUM1
; break;
11916 case Intrinsic::riscv_sm3p0
: Opc
= RISCVISD::SM3P0
; break;
11917 case Intrinsic::riscv_sm3p1
: Opc
= RISCVISD::SM3P1
; break;
11921 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11922 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp
);
11923 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11926 case Intrinsic::riscv_sm4ks
:
11927 case Intrinsic::riscv_sm4ed
: {
11929 IntNo
== Intrinsic::riscv_sm4ks
? RISCVISD::SM4KS
: RISCVISD::SM4ED
;
11931 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11933 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11935 DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
, N
->getOperand(3));
11936 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11939 case Intrinsic::riscv_clmul
: {
11940 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11944 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11946 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11947 SDValue Res
= DAG
.getNode(RISCVISD::CLMUL
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11948 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11951 case Intrinsic::riscv_clmulh
:
11952 case Intrinsic::riscv_clmulr
: {
11953 if (!Subtarget
.is64Bit() || N
->getValueType(0) != MVT::i32
)
11956 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
11957 // to the full 128-bit clmul result of multiplying two xlen values.
11958 // Perform clmulr or clmulh on the shifted values. Finally, extract the
11961 // The alternative is to mask the inputs to 32 bits and use clmul, but
11962 // that requires two shifts to mask each input without zext.w.
11963 // FIXME: If the inputs are known zero extended or could be freely
11964 // zero extended, the mask form would be better.
11966 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
11968 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(2));
11969 NewOp0
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp0
,
11970 DAG
.getConstant(32, DL
, MVT::i64
));
11971 NewOp1
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, NewOp1
,
11972 DAG
.getConstant(32, DL
, MVT::i64
));
11973 unsigned Opc
= IntNo
== Intrinsic::riscv_clmulh
? RISCVISD::CLMULH
11974 : RISCVISD::CLMULR
;
11975 SDValue Res
= DAG
.getNode(Opc
, DL
, MVT::i64
, NewOp0
, NewOp1
);
11976 Res
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Res
,
11977 DAG
.getConstant(32, DL
, MVT::i64
));
11978 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Res
));
11981 case Intrinsic::riscv_vmv_x_s
: {
11982 EVT VT
= N
->getValueType(0);
11983 MVT XLenVT
= Subtarget
.getXLenVT();
11984 if (VT
.bitsLT(XLenVT
)) {
11985 // Simple case just extract using vmv.x.s and truncate.
11986 SDValue Extract
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
,
11987 Subtarget
.getXLenVT(), N
->getOperand(1));
11988 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Extract
));
11992 assert(VT
== MVT::i64
&& !Subtarget
.is64Bit() &&
11993 "Unexpected custom legalization");
11995 // We need to do the move in two steps.
11996 SDValue Vec
= N
->getOperand(1);
11997 MVT VecVT
= Vec
.getSimpleValueType();
11999 // First extract the lower XLEN bits of the element.
12000 SDValue EltLo
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, Vec
);
12002 // To extract the upper XLEN bits of the vector element, shift the first
12003 // element right by 32 bits and re-extract the lower XLEN bits.
12004 auto [Mask
, VL
] = getDefaultVLOps(1, VecVT
, DL
, DAG
, Subtarget
);
12006 SDValue ThirtyTwoV
=
12007 DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VecVT
, DAG
.getUNDEF(VecVT
),
12008 DAG
.getConstant(32, DL
, XLenVT
), VL
);
12009 SDValue LShr32
= DAG
.getNode(RISCVISD::SRL_VL
, DL
, VecVT
, Vec
, ThirtyTwoV
,
12010 DAG
.getUNDEF(VecVT
), Mask
, VL
);
12011 SDValue EltHi
= DAG
.getNode(RISCVISD::VMV_X_S
, DL
, XLenVT
, LShr32
);
12014 DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, EltLo
, EltHi
));
12020 case ISD::VECREDUCE_ADD
:
12021 case ISD::VECREDUCE_AND
:
12022 case ISD::VECREDUCE_OR
:
12023 case ISD::VECREDUCE_XOR
:
12024 case ISD::VECREDUCE_SMAX
:
12025 case ISD::VECREDUCE_UMAX
:
12026 case ISD::VECREDUCE_SMIN
:
12027 case ISD::VECREDUCE_UMIN
:
12028 if (SDValue V
= lowerVECREDUCE(SDValue(N
, 0), DAG
))
12029 Results
.push_back(V
);
12031 case ISD::VP_REDUCE_ADD
:
12032 case ISD::VP_REDUCE_AND
:
12033 case ISD::VP_REDUCE_OR
:
12034 case ISD::VP_REDUCE_XOR
:
12035 case ISD::VP_REDUCE_SMAX
:
12036 case ISD::VP_REDUCE_UMAX
:
12037 case ISD::VP_REDUCE_SMIN
:
12038 case ISD::VP_REDUCE_UMIN
:
12039 if (SDValue V
= lowerVPREDUCE(SDValue(N
, 0), DAG
))
12040 Results
.push_back(V
);
12042 case ISD::GET_ROUNDING
: {
12043 SDVTList VTs
= DAG
.getVTList(Subtarget
.getXLenVT(), MVT::Other
);
12044 SDValue Res
= DAG
.getNode(ISD::GET_ROUNDING
, DL
, VTs
, N
->getOperand(0));
12045 Results
.push_back(Res
.getValue(0));
12046 Results
.push_back(Res
.getValue(1));
12052 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12053 /// which corresponds to it.
12054 static unsigned getVecReduceOpcode(unsigned Opc
) {
12057 llvm_unreachable("Unhandled binary to transfrom reduction");
12059 return ISD::VECREDUCE_ADD
;
12061 return ISD::VECREDUCE_UMAX
;
12063 return ISD::VECREDUCE_SMAX
;
12065 return ISD::VECREDUCE_UMIN
;
12067 return ISD::VECREDUCE_SMIN
;
12069 return ISD::VECREDUCE_AND
;
12071 return ISD::VECREDUCE_OR
;
12073 return ISD::VECREDUCE_XOR
;
12075 // Note: This is the associative form of the generic reduction opcode.
12076 return ISD::VECREDUCE_FADD
;
12080 /// Perform two related transforms whose purpose is to incrementally recognize
12081 /// an explode_vector followed by scalar reduction as a vector reduction node.
12082 /// This exists to recover from a deficiency in SLP which can't handle
12083 /// forests with multiple roots sharing common nodes. In some cases, one
12084 /// of the trees will be vectorized, and the other will remain (unprofitably)
12087 combineBinOpOfExtractToReduceTree(SDNode
*N
, SelectionDAG
&DAG
,
12088 const RISCVSubtarget
&Subtarget
) {
12090 // This transforms need to run before all integer types have been legalized
12091 // to i64 (so that the vector element type matches the add type), and while
12092 // it's safe to introduce odd sized vector types.
12093 if (DAG
.NewNodesMustHaveLegalTypes
)
12096 // Without V, this transform isn't useful. We could form the (illegal)
12097 // operations and let them be scalarized again, but there's really no point.
12098 if (!Subtarget
.hasVInstructions())
12102 const EVT VT
= N
->getValueType(0);
12103 const unsigned Opc
= N
->getOpcode();
12105 // For FADD, we only handle the case with reassociation allowed. We
12106 // could handle strict reduction order, but at the moment, there's no
12107 // known reason to, and the complexity isn't worth it.
12108 // TODO: Handle fminnum and fmaxnum here
12109 if (!VT
.isInteger() &&
12110 (Opc
!= ISD::FADD
|| !N
->getFlags().hasAllowReassociation()))
12113 const unsigned ReduceOpc
= getVecReduceOpcode(Opc
);
12114 assert(Opc
== ISD::getVecReduceBaseOpcode(ReduceOpc
) &&
12115 "Inconsistent mappings");
12116 SDValue LHS
= N
->getOperand(0);
12117 SDValue RHS
= N
->getOperand(1);
12119 if (!LHS
.hasOneUse() || !RHS
.hasOneUse())
12122 if (RHS
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12123 std::swap(LHS
, RHS
);
12125 if (RHS
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
12126 !isa
<ConstantSDNode
>(RHS
.getOperand(1)))
12129 uint64_t RHSIdx
= cast
<ConstantSDNode
>(RHS
.getOperand(1))->getLimitedValue();
12130 SDValue SrcVec
= RHS
.getOperand(0);
12131 EVT SrcVecVT
= SrcVec
.getValueType();
12132 assert(SrcVecVT
.getVectorElementType() == VT
);
12133 if (SrcVecVT
.isScalableVector())
12136 if (SrcVecVT
.getScalarSizeInBits() > Subtarget
.getELen())
12139 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12140 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12141 // root of our reduction tree. TODO: We could extend this to any two
12142 // adjacent aligned constant indices if desired.
12143 if (LHS
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
12144 LHS
.getOperand(0) == SrcVec
&& isa
<ConstantSDNode
>(LHS
.getOperand(1))) {
12146 cast
<ConstantSDNode
>(LHS
.getOperand(1))->getLimitedValue();
12147 if (0 == std::min(LHSIdx
, RHSIdx
) && 1 == std::max(LHSIdx
, RHSIdx
)) {
12148 EVT ReduceVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, 2);
12149 SDValue Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ReduceVT
, SrcVec
,
12150 DAG
.getVectorIdxConstant(0, DL
));
12151 return DAG
.getNode(ReduceOpc
, DL
, VT
, Vec
, N
->getFlags());
12155 // Match (binop (reduce (extract_subvector V, 0),
12156 // (extract_vector_elt V, sizeof(SubVec))))
12157 // into a reduction of one more element from the original vector V.
12158 if (LHS
.getOpcode() != ReduceOpc
)
12161 SDValue ReduceVec
= LHS
.getOperand(0);
12162 if (ReduceVec
.getOpcode() == ISD::EXTRACT_SUBVECTOR
&&
12163 ReduceVec
.hasOneUse() && ReduceVec
.getOperand(0) == RHS
.getOperand(0) &&
12164 isNullConstant(ReduceVec
.getOperand(1)) &&
12165 ReduceVec
.getValueType().getVectorNumElements() == RHSIdx
) {
12166 // For illegal types (e.g. 3xi32), most will be combined again into a
12167 // wider (hopefully legal) type. If this is a terminal state, we are
12168 // relying on type legalization here to produce something reasonable
12169 // and this lowering quality could probably be improved. (TODO)
12170 EVT ReduceVT
= EVT::getVectorVT(*DAG
.getContext(), VT
, RHSIdx
+ 1);
12171 SDValue Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ReduceVT
, SrcVec
,
12172 DAG
.getVectorIdxConstant(0, DL
));
12173 auto Flags
= ReduceVec
->getFlags();
12174 Flags
.intersectWith(N
->getFlags());
12175 return DAG
.getNode(ReduceOpc
, DL
, VT
, Vec
, Flags
);
12182 // Try to fold (<bop> x, (reduction.<bop> vec, start))
12183 static SDValue
combineBinOpToReduce(SDNode
*N
, SelectionDAG
&DAG
,
12184 const RISCVSubtarget
&Subtarget
) {
12185 auto BinOpToRVVReduce
= [](unsigned Opc
) {
12188 llvm_unreachable("Unhandled binary to transfrom reduction");
12190 return RISCVISD::VECREDUCE_ADD_VL
;
12192 return RISCVISD::VECREDUCE_UMAX_VL
;
12194 return RISCVISD::VECREDUCE_SMAX_VL
;
12196 return RISCVISD::VECREDUCE_UMIN_VL
;
12198 return RISCVISD::VECREDUCE_SMIN_VL
;
12200 return RISCVISD::VECREDUCE_AND_VL
;
12202 return RISCVISD::VECREDUCE_OR_VL
;
12204 return RISCVISD::VECREDUCE_XOR_VL
;
12206 return RISCVISD::VECREDUCE_FADD_VL
;
12208 return RISCVISD::VECREDUCE_FMAX_VL
;
12210 return RISCVISD::VECREDUCE_FMIN_VL
;
12214 auto IsReduction
= [&BinOpToRVVReduce
](SDValue V
, unsigned Opc
) {
12215 return V
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
12216 isNullConstant(V
.getOperand(1)) &&
12217 V
.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc
);
12220 unsigned Opc
= N
->getOpcode();
12221 unsigned ReduceIdx
;
12222 if (IsReduction(N
->getOperand(0), Opc
))
12224 else if (IsReduction(N
->getOperand(1), Opc
))
12229 // Skip if FADD disallows reassociation but the combiner needs.
12230 if (Opc
== ISD::FADD
&& !N
->getFlags().hasAllowReassociation())
12233 SDValue Extract
= N
->getOperand(ReduceIdx
);
12234 SDValue Reduce
= Extract
.getOperand(0);
12235 if (!Extract
.hasOneUse() || !Reduce
.hasOneUse())
12238 SDValue ScalarV
= Reduce
.getOperand(2);
12239 EVT ScalarVT
= ScalarV
.getValueType();
12240 if (ScalarV
.getOpcode() == ISD::INSERT_SUBVECTOR
&&
12241 ScalarV
.getOperand(0)->isUndef() &&
12242 isNullConstant(ScalarV
.getOperand(2)))
12243 ScalarV
= ScalarV
.getOperand(1);
12245 // Make sure that ScalarV is a splat with VL=1.
12246 if (ScalarV
.getOpcode() != RISCVISD::VFMV_S_F_VL
&&
12247 ScalarV
.getOpcode() != RISCVISD::VMV_S_X_VL
&&
12248 ScalarV
.getOpcode() != RISCVISD::VMV_V_X_VL
)
12251 if (!isNonZeroAVL(ScalarV
.getOperand(2)))
12254 // Check the scalar of ScalarV is neutral element
12255 // TODO: Deal with value other than neutral element.
12256 if (!isNeutralConstant(N
->getOpcode(), N
->getFlags(), ScalarV
.getOperand(1),
12260 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12261 // FIXME: We might be able to improve this if operand 0 is undef.
12262 if (!isNonZeroAVL(Reduce
.getOperand(5)))
12265 SDValue NewStart
= N
->getOperand(1 - ReduceIdx
);
12268 SDValue NewScalarV
=
12269 lowerScalarInsert(NewStart
, ScalarV
.getOperand(2),
12270 ScalarV
.getSimpleValueType(), DL
, DAG
, Subtarget
);
12272 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12273 if (ScalarVT
!= ScalarV
.getValueType())
12275 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, ScalarVT
, DAG
.getUNDEF(ScalarVT
),
12276 NewScalarV
, DAG
.getConstant(0, DL
, Subtarget
.getXLenVT()));
12278 SDValue Ops
[] = {Reduce
.getOperand(0), Reduce
.getOperand(1),
12279 NewScalarV
, Reduce
.getOperand(3),
12280 Reduce
.getOperand(4), Reduce
.getOperand(5)};
12281 SDValue NewReduce
=
12282 DAG
.getNode(Reduce
.getOpcode(), DL
, Reduce
.getValueType(), Ops
);
12283 return DAG
.getNode(Extract
.getOpcode(), DL
, Extract
.getValueType(), NewReduce
,
12284 Extract
.getOperand(1));
12287 // Optimize (add (shl x, c0), (shl y, c1)) ->
12288 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12289 static SDValue
transformAddShlImm(SDNode
*N
, SelectionDAG
&DAG
,
12290 const RISCVSubtarget
&Subtarget
) {
12291 // Perform this optimization only in the zba extension.
12292 if (!Subtarget
.hasStdExtZba())
12295 // Skip for vector types and larger types.
12296 EVT VT
= N
->getValueType(0);
12297 if (VT
.isVector() || VT
.getSizeInBits() > Subtarget
.getXLen())
12300 // The two operand nodes must be SHL and have no other use.
12301 SDValue N0
= N
->getOperand(0);
12302 SDValue N1
= N
->getOperand(1);
12303 if (N0
->getOpcode() != ISD::SHL
|| N1
->getOpcode() != ISD::SHL
||
12304 !N0
->hasOneUse() || !N1
->hasOneUse())
12307 // Check c0 and c1.
12308 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
12309 auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
->getOperand(1));
12312 int64_t C0
= N0C
->getSExtValue();
12313 int64_t C1
= N1C
->getSExtValue();
12314 if (C0
<= 0 || C1
<= 0)
12317 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12318 int64_t Bits
= std::min(C0
, C1
);
12319 int64_t Diff
= std::abs(C0
- C1
);
12320 if (Diff
!= 1 && Diff
!= 2 && Diff
!= 3)
12325 SDValue NS
= (C0
< C1
) ? N0
->getOperand(0) : N1
->getOperand(0);
12326 SDValue NL
= (C0
> C1
) ? N0
->getOperand(0) : N1
->getOperand(0);
12328 DAG
.getNode(ISD::SHL
, DL
, VT
, NL
, DAG
.getConstant(Diff
, DL
, VT
));
12329 SDValue NA1
= DAG
.getNode(ISD::ADD
, DL
, VT
, NA0
, NS
);
12330 return DAG
.getNode(ISD::SHL
, DL
, VT
, NA1
, DAG
.getConstant(Bits
, DL
, VT
));
12333 // Combine a constant select operand into its use:
12335 // (and (select cond, -1, c), x)
12336 // -> (select cond, x, (and x, c)) [AllOnes=1]
12337 // (or (select cond, 0, c), x)
12338 // -> (select cond, x, (or x, c)) [AllOnes=0]
12339 // (xor (select cond, 0, c), x)
12340 // -> (select cond, x, (xor x, c)) [AllOnes=0]
12341 // (add (select cond, 0, c), x)
12342 // -> (select cond, x, (add x, c)) [AllOnes=0]
12343 // (sub x, (select cond, 0, c))
12344 // -> (select cond, x, (sub x, c)) [AllOnes=0]
12345 static SDValue
combineSelectAndUse(SDNode
*N
, SDValue Slct
, SDValue OtherOp
,
12346 SelectionDAG
&DAG
, bool AllOnes
,
12347 const RISCVSubtarget
&Subtarget
) {
12348 EVT VT
= N
->getValueType(0);
12354 if (!Subtarget
.hasConditionalMoveFusion()) {
12355 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12356 if ((!Subtarget
.hasStdExtZicond() &&
12357 !Subtarget
.hasVendorXVentanaCondOps()) ||
12358 N
->getOpcode() != ISD::AND
)
12361 // Maybe harmful when condition code has multiple use.
12362 if (Slct
.getOpcode() == ISD::SELECT
&& !Slct
.getOperand(0).hasOneUse())
12365 // Maybe harmful when VT is wider than XLen.
12366 if (VT
.getSizeInBits() > Subtarget
.getXLen())
12370 if ((Slct
.getOpcode() != ISD::SELECT
&&
12371 Slct
.getOpcode() != RISCVISD::SELECT_CC
) ||
12375 auto isZeroOrAllOnes
= [](SDValue N
, bool AllOnes
) {
12376 return AllOnes
? isAllOnesConstant(N
) : isNullConstant(N
);
12379 bool SwapSelectOps
;
12380 unsigned OpOffset
= Slct
.getOpcode() == RISCVISD::SELECT_CC
? 2 : 0;
12381 SDValue TrueVal
= Slct
.getOperand(1 + OpOffset
);
12382 SDValue FalseVal
= Slct
.getOperand(2 + OpOffset
);
12383 SDValue NonConstantVal
;
12384 if (isZeroOrAllOnes(TrueVal
, AllOnes
)) {
12385 SwapSelectOps
= false;
12386 NonConstantVal
= FalseVal
;
12387 } else if (isZeroOrAllOnes(FalseVal
, AllOnes
)) {
12388 SwapSelectOps
= true;
12389 NonConstantVal
= TrueVal
;
12393 // Slct is now know to be the desired identity constant when CC is true.
12395 FalseVal
= DAG
.getNode(N
->getOpcode(), SDLoc(N
), VT
, OtherOp
, NonConstantVal
);
12396 // Unless SwapSelectOps says the condition should be false.
12398 std::swap(TrueVal
, FalseVal
);
12400 if (Slct
.getOpcode() == RISCVISD::SELECT_CC
)
12401 return DAG
.getNode(RISCVISD::SELECT_CC
, SDLoc(N
), VT
,
12402 {Slct
.getOperand(0), Slct
.getOperand(1),
12403 Slct
.getOperand(2), TrueVal
, FalseVal
});
12405 return DAG
.getNode(ISD::SELECT
, SDLoc(N
), VT
,
12406 {Slct
.getOperand(0), TrueVal
, FalseVal
});
12409 // Attempt combineSelectAndUse on each operand of a commutative operator N.
12410 static SDValue
combineSelectAndUseCommutative(SDNode
*N
, SelectionDAG
&DAG
,
12412 const RISCVSubtarget
&Subtarget
) {
12413 SDValue N0
= N
->getOperand(0);
12414 SDValue N1
= N
->getOperand(1);
12415 if (SDValue Result
= combineSelectAndUse(N
, N0
, N1
, DAG
, AllOnes
, Subtarget
))
12417 if (SDValue Result
= combineSelectAndUse(N
, N1
, N0
, DAG
, AllOnes
, Subtarget
))
12422 // Transform (add (mul x, c0), c1) ->
12423 // (add (mul (add x, c1/c0), c0), c1%c0).
12424 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12425 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
12426 // to an infinite loop in DAGCombine if transformed.
12427 // Or transform (add (mul x, c0), c1) ->
12428 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12429 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12430 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12431 // lead to an infinite loop in DAGCombine if transformed.
12432 // Or transform (add (mul x, c0), c1) ->
12433 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12434 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12435 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12436 // lead to an infinite loop in DAGCombine if transformed.
12437 // Or transform (add (mul x, c0), c1) ->
12438 // (mul (add x, c1/c0), c0).
12439 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12440 static SDValue
transformAddImmMulImm(SDNode
*N
, SelectionDAG
&DAG
,
12441 const RISCVSubtarget
&Subtarget
) {
12442 // Skip for vector types and larger types.
12443 EVT VT
= N
->getValueType(0);
12444 if (VT
.isVector() || VT
.getSizeInBits() > Subtarget
.getXLen())
12446 // The first operand node must be a MUL and has no other use.
12447 SDValue N0
= N
->getOperand(0);
12448 if (!N0
->hasOneUse() || N0
->getOpcode() != ISD::MUL
)
12450 // Check if c0 and c1 match above conditions.
12451 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
12452 auto *N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
12455 // If N0C has multiple uses it's possible one of the cases in
12456 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12457 // in an infinite loop.
12458 if (!N0C
->hasOneUse())
12460 int64_t C0
= N0C
->getSExtValue();
12461 int64_t C1
= N1C
->getSExtValue();
12463 if (C0
== -1 || C0
== 0 || C0
== 1 || isInt
<12>(C1
))
12465 // Search for proper CA (non-zero) and CB that both are simm12.
12466 if ((C1
/ C0
) != 0 && isInt
<12>(C1
/ C0
) && isInt
<12>(C1
% C0
) &&
12467 !isInt
<12>(C0
* (C1
/ C0
))) {
12470 } else if ((C1
/ C0
+ 1) != 0 && isInt
<12>(C1
/ C0
+ 1) &&
12471 isInt
<12>(C1
% C0
- C0
) && !isInt
<12>(C0
* (C1
/ C0
+ 1))) {
12474 } else if ((C1
/ C0
- 1) != 0 && isInt
<12>(C1
/ C0
- 1) &&
12475 isInt
<12>(C1
% C0
+ C0
) && !isInt
<12>(C0
* (C1
/ C0
- 1))) {
12480 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12482 SDValue New0
= DAG
.getNode(ISD::ADD
, DL
, VT
, N0
->getOperand(0),
12483 DAG
.getConstant(CA
, DL
, VT
));
12485 DAG
.getNode(ISD::MUL
, DL
, VT
, New0
, DAG
.getConstant(C0
, DL
, VT
));
12486 return DAG
.getNode(ISD::ADD
, DL
, VT
, New1
, DAG
.getConstant(CB
, DL
, VT
));
12489 // Try to turn (add (xor bool, 1) -1) into (neg bool).
12490 static SDValue
combineAddOfBooleanXor(SDNode
*N
, SelectionDAG
&DAG
) {
12491 SDValue N0
= N
->getOperand(0);
12492 SDValue N1
= N
->getOperand(1);
12493 EVT VT
= N
->getValueType(0);
12496 // RHS should be -1.
12497 if (!isAllOnesConstant(N1
))
12500 // Look for (xor X, 1).
12501 if (N0
.getOpcode() != ISD::XOR
|| !isOneConstant(N0
.getOperand(1)))
12504 // First xor input should be 0 or 1.
12505 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
12506 if (!DAG
.MaskedValueIsZero(N0
.getOperand(0), Mask
))
12509 // Emit a negate of the setcc.
12510 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
),
12514 static SDValue
performADDCombine(SDNode
*N
, SelectionDAG
&DAG
,
12515 const RISCVSubtarget
&Subtarget
) {
12516 if (SDValue V
= combineAddOfBooleanXor(N
, DAG
))
12518 if (SDValue V
= transformAddImmMulImm(N
, DAG
, Subtarget
))
12520 if (SDValue V
= transformAddShlImm(N
, DAG
, Subtarget
))
12522 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12524 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12527 // fold (add (select lhs, rhs, cc, 0, y), x) ->
12528 // (select lhs, rhs, cc, x, (add x, y))
12529 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
12532 // Try to turn a sub boolean RHS and constant LHS into an addi.
12533 static SDValue
combineSubOfBoolean(SDNode
*N
, SelectionDAG
&DAG
) {
12534 SDValue N0
= N
->getOperand(0);
12535 SDValue N1
= N
->getOperand(1);
12536 EVT VT
= N
->getValueType(0);
12539 // Require a constant LHS.
12540 auto *N0C
= dyn_cast
<ConstantSDNode
>(N0
);
12544 // All our optimizations involve subtracting 1 from the immediate and forming
12545 // an ADDI. Make sure the new immediate is valid for an ADDI.
12546 APInt ImmValMinus1
= N0C
->getAPIntValue() - 1;
12547 if (!ImmValMinus1
.isSignedIntN(12))
12551 if (N1
.getOpcode() == ISD::SETCC
&& N1
.hasOneUse()) {
12552 // (sub constant, (setcc x, y, eq/neq)) ->
12553 // (add (setcc x, y, neq/eq), constant - 1)
12554 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(N1
.getOperand(2))->get();
12555 EVT SetCCOpVT
= N1
.getOperand(0).getValueType();
12556 if (!isIntEqualitySetCC(CCVal
) || !SetCCOpVT
.isInteger())
12558 CCVal
= ISD::getSetCCInverse(CCVal
, SetCCOpVT
);
12560 DAG
.getSetCC(SDLoc(N1
), VT
, N1
.getOperand(0), N1
.getOperand(1), CCVal
);
12561 } else if (N1
.getOpcode() == ISD::XOR
&& isOneConstant(N1
.getOperand(1)) &&
12562 N1
.getOperand(0).getOpcode() == ISD::SETCC
) {
12563 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
12564 // Since setcc returns a bool the xor is equivalent to 1-setcc.
12565 NewLHS
= N1
.getOperand(0);
12569 SDValue NewRHS
= DAG
.getConstant(ImmValMinus1
, DL
, VT
);
12570 return DAG
.getNode(ISD::ADD
, DL
, VT
, NewLHS
, NewRHS
);
12573 static SDValue
performSUBCombine(SDNode
*N
, SelectionDAG
&DAG
,
12574 const RISCVSubtarget
&Subtarget
) {
12575 if (SDValue V
= combineSubOfBoolean(N
, DAG
))
12578 SDValue N0
= N
->getOperand(0);
12579 SDValue N1
= N
->getOperand(1);
12580 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
12581 if (isNullConstant(N0
) && N1
.getOpcode() == ISD::SETCC
&& N1
.hasOneUse() &&
12582 isNullConstant(N1
.getOperand(1))) {
12583 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(N1
.getOperand(2))->get();
12584 if (CCVal
== ISD::SETLT
) {
12585 EVT VT
= N
->getValueType(0);
12587 unsigned ShAmt
= N0
.getValueSizeInBits() - 1;
12588 return DAG
.getNode(ISD::SRA
, DL
, VT
, N1
.getOperand(0),
12589 DAG
.getConstant(ShAmt
, DL
, VT
));
12593 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
12594 // (select lhs, rhs, cc, x, (sub x, y))
12595 return combineSelectAndUse(N
, N1
, N0
, DAG
, /*AllOnes*/ false, Subtarget
);
12598 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
12599 // Legalizing setcc can introduce xors like this. Doing this transform reduces
12600 // the number of xors and may allow the xor to fold into a branch condition.
12601 static SDValue
combineDeMorganOfBoolean(SDNode
*N
, SelectionDAG
&DAG
) {
12602 SDValue N0
= N
->getOperand(0);
12603 SDValue N1
= N
->getOperand(1);
12604 bool IsAnd
= N
->getOpcode() == ISD::AND
;
12606 if (N0
.getOpcode() != ISD::XOR
|| N1
.getOpcode() != ISD::XOR
)
12609 if (!N0
.hasOneUse() || !N1
.hasOneUse())
12612 SDValue N01
= N0
.getOperand(1);
12613 SDValue N11
= N1
.getOperand(1);
12615 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
12616 // (xor X, -1) based on the upper bits of the other operand being 0. If the
12617 // operation is And, allow one of the Xors to use -1.
12618 if (isOneConstant(N01
)) {
12619 if (!isOneConstant(N11
) && !(IsAnd
&& isAllOnesConstant(N11
)))
12621 } else if (isOneConstant(N11
)) {
12622 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
12623 if (!(IsAnd
&& isAllOnesConstant(N01
)))
12628 EVT VT
= N
->getValueType(0);
12630 SDValue N00
= N0
.getOperand(0);
12631 SDValue N10
= N1
.getOperand(0);
12633 // The LHS of the xors needs to be 0/1.
12634 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
12635 if (!DAG
.MaskedValueIsZero(N00
, Mask
) || !DAG
.MaskedValueIsZero(N10
, Mask
))
12638 // Invert the opcode and insert a new xor.
12640 unsigned Opc
= IsAnd
? ISD::OR
: ISD::AND
;
12641 SDValue Logic
= DAG
.getNode(Opc
, DL
, VT
, N00
, N10
);
12642 return DAG
.getNode(ISD::XOR
, DL
, VT
, Logic
, DAG
.getConstant(1, DL
, VT
));
12645 static SDValue
performTRUNCATECombine(SDNode
*N
, SelectionDAG
&DAG
,
12646 const RISCVSubtarget
&Subtarget
) {
12647 SDValue N0
= N
->getOperand(0);
12648 EVT VT
= N
->getValueType(0);
12650 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
12651 // extending X. This is safe since we only need the LSB after the shift and
12652 // shift amounts larger than 31 would produce poison. If we wait until
12653 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12654 // to use a BEXT instruction.
12655 if (!RV64LegalI32
&& Subtarget
.is64Bit() && Subtarget
.hasStdExtZbs() && VT
== MVT::i1
&&
12656 N0
.getValueType() == MVT::i32
&& N0
.getOpcode() == ISD::SRL
&&
12657 !isa
<ConstantSDNode
>(N0
.getOperand(1)) && N0
.hasOneUse()) {
12659 SDValue Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N0
.getOperand(0));
12660 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N0
.getOperand(1));
12661 SDValue Srl
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Op0
, Op1
);
12662 return DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), VT
, Srl
);
12668 // Combines two comparison operation and logic operation to one selection
12669 // operation(min, max) and logic operation. Returns new constructed Node if
12670 // conditions for optimization are satisfied.
12671 static SDValue
performANDCombine(SDNode
*N
,
12672 TargetLowering::DAGCombinerInfo
&DCI
,
12673 const RISCVSubtarget
&Subtarget
) {
12674 SelectionDAG
&DAG
= DCI
.DAG
;
12676 SDValue N0
= N
->getOperand(0);
12677 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
12678 // extending X. This is safe since we only need the LSB after the shift and
12679 // shift amounts larger than 31 would produce poison. If we wait until
12680 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12681 // to use a BEXT instruction.
12682 if (!RV64LegalI32
&& Subtarget
.is64Bit() && Subtarget
.hasStdExtZbs() &&
12683 N
->getValueType(0) == MVT::i32
&& isOneConstant(N
->getOperand(1)) &&
12684 N0
.getOpcode() == ISD::SRL
&& !isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
12687 SDValue Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N0
.getOperand(0));
12688 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N0
.getOperand(1));
12689 SDValue Srl
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Op0
, Op1
);
12690 SDValue And
= DAG
.getNode(ISD::AND
, DL
, MVT::i64
, Srl
,
12691 DAG
.getConstant(1, DL
, MVT::i64
));
12692 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, And
);
12695 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12697 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12700 if (DCI
.isAfterLegalizeDAG())
12701 if (SDValue V
= combineDeMorganOfBoolean(N
, DAG
))
12704 // fold (and (select lhs, rhs, cc, -1, y), x) ->
12705 // (select lhs, rhs, cc, x, (and x, y))
12706 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ true, Subtarget
);
12709 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
12710 // FIXME: Generalize to other binary operators with same operand.
12711 static SDValue
combineOrOfCZERO(SDNode
*N
, SDValue N0
, SDValue N1
,
12712 SelectionDAG
&DAG
) {
12713 assert(N
->getOpcode() == ISD::OR
&& "Unexpected opcode");
12715 if (N0
.getOpcode() != RISCVISD::CZERO_EQZ
||
12716 N1
.getOpcode() != RISCVISD::CZERO_NEZ
||
12717 !N0
.hasOneUse() || !N1
.hasOneUse())
12720 // Should have the same condition.
12721 SDValue Cond
= N0
.getOperand(1);
12722 if (Cond
!= N1
.getOperand(1))
12725 SDValue TrueV
= N0
.getOperand(0);
12726 SDValue FalseV
= N1
.getOperand(0);
12728 if (TrueV
.getOpcode() != ISD::XOR
|| FalseV
.getOpcode() != ISD::XOR
||
12729 TrueV
.getOperand(1) != FalseV
.getOperand(1) ||
12730 !isOneConstant(TrueV
.getOperand(1)) ||
12731 !TrueV
.hasOneUse() || !FalseV
.hasOneUse())
12734 EVT VT
= N
->getValueType(0);
12737 SDValue NewN0
= DAG
.getNode(RISCVISD::CZERO_EQZ
, DL
, VT
, TrueV
.getOperand(0),
12739 SDValue NewN1
= DAG
.getNode(RISCVISD::CZERO_NEZ
, DL
, VT
, FalseV
.getOperand(0),
12741 SDValue NewOr
= DAG
.getNode(ISD::OR
, DL
, VT
, NewN0
, NewN1
);
12742 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewOr
, TrueV
.getOperand(1));
12745 static SDValue
performORCombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
,
12746 const RISCVSubtarget
&Subtarget
) {
12747 SelectionDAG
&DAG
= DCI
.DAG
;
12749 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12751 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12754 if (DCI
.isAfterLegalizeDAG())
12755 if (SDValue V
= combineDeMorganOfBoolean(N
, DAG
))
12758 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
12759 // We may be able to pull a common operation out of the true and false value.
12760 SDValue N0
= N
->getOperand(0);
12761 SDValue N1
= N
->getOperand(1);
12762 if (SDValue V
= combineOrOfCZERO(N
, N0
, N1
, DAG
))
12764 if (SDValue V
= combineOrOfCZERO(N
, N1
, N0
, DAG
))
12767 // fold (or (select cond, 0, y), x) ->
12768 // (select cond, x, (or x, y))
12769 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
12772 static SDValue
performXORCombine(SDNode
*N
, SelectionDAG
&DAG
,
12773 const RISCVSubtarget
&Subtarget
) {
12774 SDValue N0
= N
->getOperand(0);
12775 SDValue N1
= N
->getOperand(1);
12777 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
12778 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
12779 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
12780 if (!RV64LegalI32
&& Subtarget
.is64Bit() && Subtarget
.hasStdExtZbs() &&
12781 N
->getValueType(0) == MVT::i32
&& isAllOnesConstant(N1
) &&
12782 N0
.getOpcode() == ISD::SHL
&& isAllOnesConstant(N0
.getOperand(0)) &&
12783 !isa
<ConstantSDNode
>(N0
.getOperand(1)) && N0
.hasOneUse()) {
12785 SDValue Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N0
.getOperand(0));
12786 SDValue Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N0
.getOperand(1));
12787 SDValue Shl
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, Op0
, Op1
);
12788 SDValue And
= DAG
.getNOT(DL
, Shl
, MVT::i64
);
12789 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, And
);
12792 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
12793 // NOTE: Assumes ROL being legal means ROLW is legal.
12794 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
12795 if (N0
.getOpcode() == RISCVISD::SLLW
&&
12796 isAllOnesConstant(N1
) && isOneConstant(N0
.getOperand(0)) &&
12797 TLI
.isOperationLegal(ISD::ROTL
, MVT::i64
)) {
12799 return DAG
.getNode(RISCVISD::ROLW
, DL
, MVT::i64
,
12800 DAG
.getConstant(~1, DL
, MVT::i64
), N0
.getOperand(1));
12803 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
12804 if (N0
.getOpcode() == ISD::SETCC
&& isOneConstant(N1
) && N0
.hasOneUse()) {
12805 auto *ConstN00
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0));
12806 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
12807 if (ConstN00
&& CC
== ISD::SETLT
) {
12808 EVT VT
= N0
.getValueType();
12810 const APInt
&Imm
= ConstN00
->getAPIntValue();
12811 if ((Imm
+ 1).isSignedIntN(12))
12812 return DAG
.getSetCC(DL
, VT
, N0
.getOperand(1),
12813 DAG
.getConstant(Imm
+ 1, DL
, VT
), CC
);
12817 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
12819 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
12822 // fold (xor (select cond, 0, y), x) ->
12823 // (select cond, x, (xor x, y))
12824 return combineSelectAndUseCommutative(N
, DAG
, /*AllOnes*/ false, Subtarget
);
12827 static SDValue
performMULCombine(SDNode
*N
, SelectionDAG
&DAG
) {
12828 EVT VT
= N
->getValueType(0);
12829 if (!VT
.isVector())
12833 SDValue N0
= N
->getOperand(0);
12834 SDValue N1
= N
->getOperand(1);
12836 unsigned AddSubOpc
;
12838 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
12839 // (mul x, add (y, 1)) -> (add x, (mul x, y))
12840 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
12841 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
12842 auto IsAddSubWith1
= [&](SDValue V
) -> bool {
12843 AddSubOpc
= V
->getOpcode();
12844 if ((AddSubOpc
== ISD::ADD
|| AddSubOpc
== ISD::SUB
) && V
->hasOneUse()) {
12845 SDValue Opnd
= V
->getOperand(1);
12846 MulOper
= V
->getOperand(0);
12847 if (AddSubOpc
== ISD::SUB
)
12848 std::swap(Opnd
, MulOper
);
12849 if (isOneOrOneSplat(Opnd
))
12855 if (IsAddSubWith1(N0
)) {
12856 SDValue MulVal
= DAG
.getNode(ISD::MUL
, DL
, VT
, N1
, MulOper
);
12857 return DAG
.getNode(AddSubOpc
, DL
, VT
, N1
, MulVal
);
12860 if (IsAddSubWith1(N1
)) {
12861 SDValue MulVal
= DAG
.getNode(ISD::MUL
, DL
, VT
, N0
, MulOper
);
12862 return DAG
.getNode(AddSubOpc
, DL
, VT
, N0
, MulVal
);
12868 /// According to the property that indexed load/store instructions zero-extend
12869 /// their indices, try to narrow the type of index operand.
12870 static bool narrowIndex(SDValue
&N
, ISD::MemIndexType IndexType
, SelectionDAG
&DAG
) {
12871 if (isIndexTypeSigned(IndexType
))
12874 if (!N
->hasOneUse())
12877 EVT VT
= N
.getValueType();
12880 // In general, what we're doing here is seeing if we can sink a truncate to
12881 // a smaller element type into the expression tree building our index.
12882 // TODO: We can generalize this and handle a bunch more cases if useful.
12884 // Narrow a buildvector to the narrowest element type. This requires less
12885 // work and less register pressure at high LMUL, and creates smaller constants
12886 // which may be cheaper to materialize.
12887 if (ISD::isBuildVectorOfConstantSDNodes(N
.getNode())) {
12888 KnownBits Known
= DAG
.computeKnownBits(N
);
12889 unsigned ActiveBits
= std::max(8u, Known
.countMaxActiveBits());
12890 LLVMContext
&C
= *DAG
.getContext();
12891 EVT ResultVT
= EVT::getIntegerVT(C
, ActiveBits
).getRoundIntegerType(C
);
12892 if (ResultVT
.bitsLT(VT
.getVectorElementType())) {
12893 N
= DAG
.getNode(ISD::TRUNCATE
, DL
,
12894 VT
.changeVectorElementType(ResultVT
), N
);
12899 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
12900 if (N
.getOpcode() != ISD::SHL
)
12903 SDValue N0
= N
.getOperand(0);
12904 if (N0
.getOpcode() != ISD::ZERO_EXTEND
&&
12905 N0
.getOpcode() != RISCVISD::VZEXT_VL
)
12907 if (!N0
->hasOneUse())
12911 SDValue N1
= N
.getOperand(1);
12912 if (!ISD::isConstantSplatVector(N1
.getNode(), ShAmt
))
12915 SDValue Src
= N0
.getOperand(0);
12916 EVT SrcVT
= Src
.getValueType();
12917 unsigned SrcElen
= SrcVT
.getScalarSizeInBits();
12918 unsigned ShAmtV
= ShAmt
.getZExtValue();
12919 unsigned NewElen
= PowerOf2Ceil(SrcElen
+ ShAmtV
);
12920 NewElen
= std::max(NewElen
, 8U);
12922 // Skip if NewElen is not narrower than the original extended type.
12923 if (NewElen
>= N0
.getValueType().getScalarSizeInBits())
12926 EVT NewEltVT
= EVT::getIntegerVT(*DAG
.getContext(), NewElen
);
12927 EVT NewVT
= SrcVT
.changeVectorElementType(NewEltVT
);
12929 SDValue NewExt
= DAG
.getNode(N0
->getOpcode(), DL
, NewVT
, N0
->ops());
12930 SDValue NewShAmtVec
= DAG
.getConstant(ShAmtV
, DL
, NewVT
);
12931 N
= DAG
.getNode(ISD::SHL
, DL
, NewVT
, NewExt
, NewShAmtVec
);
12935 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
12936 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
12937 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
12938 // can become a sext.w instead of a shift pair.
12939 static SDValue
performSETCCCombine(SDNode
*N
, SelectionDAG
&DAG
,
12940 const RISCVSubtarget
&Subtarget
) {
12941 SDValue N0
= N
->getOperand(0);
12942 SDValue N1
= N
->getOperand(1);
12943 EVT VT
= N
->getValueType(0);
12944 EVT OpVT
= N0
.getValueType();
12946 if (OpVT
!= MVT::i64
|| !Subtarget
.is64Bit())
12949 // RHS needs to be a constant.
12950 auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
);
12954 // LHS needs to be (and X, 0xffffffff).
12955 if (N0
.getOpcode() != ISD::AND
|| !N0
.hasOneUse() ||
12956 !isa
<ConstantSDNode
>(N0
.getOperand(1)) ||
12957 N0
.getConstantOperandVal(1) != UINT64_C(0xffffffff))
12960 // Looking for an equality compare.
12961 ISD::CondCode Cond
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
12962 if (!isIntEqualitySetCC(Cond
))
12965 // Don't do this if the sign bit is provably zero, it will be turned back into
12967 APInt SignMask
= APInt::getOneBitSet(64, 31);
12968 if (DAG
.MaskedValueIsZero(N0
.getOperand(0), SignMask
))
12971 const APInt
&C1
= N1C
->getAPIntValue();
12974 // If the constant is larger than 2^32 - 1 it is impossible for both sides
12976 if (C1
.getActiveBits() > 32)
12977 return DAG
.getBoolConstant(Cond
== ISD::SETNE
, dl
, VT
, OpVT
);
12979 SDValue SExtOp
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, N
, OpVT
,
12980 N0
.getOperand(0), DAG
.getValueType(MVT::i32
));
12981 return DAG
.getSetCC(dl
, VT
, SExtOp
, DAG
.getConstant(C1
.trunc(32).sext(64),
12986 performSIGN_EXTEND_INREGCombine(SDNode
*N
, SelectionDAG
&DAG
,
12987 const RISCVSubtarget
&Subtarget
) {
12988 SDValue Src
= N
->getOperand(0);
12989 EVT VT
= N
->getValueType(0);
12991 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
12992 if (Src
.getOpcode() == RISCVISD::FMV_X_ANYEXTH
&&
12993 cast
<VTSDNode
>(N
->getOperand(1))->getVT().bitsGE(MVT::i16
))
12994 return DAG
.getNode(RISCVISD::FMV_X_SIGNEXTH
, SDLoc(N
), VT
,
12995 Src
.getOperand(0));
13001 // Forward declaration of the structure holding the necessary information to
13002 // apply a combine.
13003 struct CombineResult
;
13005 /// Helper class for folding sign/zero extensions.
13006 /// In particular, this class is used for the following combines:
13007 /// add | add_vl -> vwadd(u) | vwadd(u)_w
13008 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13009 /// mul | mul_vl -> vwmul(u) | vwmul_su
13011 /// An object of this class represents an operand of the operation we want to
13013 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13014 /// NodeExtensionHelper for `a` and one for `b`.
13016 /// This class abstracts away how the extension is materialized and
13017 /// how its Mask, VL, number of users affect the combines.
13020 /// - VWADD_W is conceptually == add(op0, sext(op1))
13021 /// - VWADDU_W == add(op0, zext(op1))
13022 /// - VWSUB_W == sub(op0, sext(op1))
13023 /// - VWSUBU_W == sub(op0, zext(op1))
13025 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13026 /// zext|sext(smaller_value).
13027 struct NodeExtensionHelper
{
13028 /// Records if this operand is like being zero extended.
13030 /// Records if this operand is like being sign extended.
13031 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13032 /// instance, a splat constant (e.g., 3), would support being both sign and
13035 /// This boolean captures whether we care if this operand would still be
13036 /// around after the folding happens.
13037 bool EnforceOneUse
;
13038 /// Records if this operand's mask needs to match the mask of the operation
13039 /// that it will fold into.
13041 /// Value of the Mask for this operand.
13042 /// It may be SDValue().
13044 /// Value of the vector length operand.
13045 /// It may be SDValue().
13047 /// Original value that this NodeExtensionHelper represents.
13048 SDValue OrigOperand
;
13050 /// Get the value feeding the extension or the value itself.
13051 /// E.g., for zext(a), this would return a.
13052 SDValue
getSource() const {
13053 switch (OrigOperand
.getOpcode()) {
13054 case ISD::ZERO_EXTEND
:
13055 case ISD::SIGN_EXTEND
:
13056 case RISCVISD::VSEXT_VL
:
13057 case RISCVISD::VZEXT_VL
:
13058 return OrigOperand
.getOperand(0);
13060 return OrigOperand
;
13064 /// Check if this instance represents a splat.
13065 bool isSplat() const {
13066 return OrigOperand
.getOpcode() == RISCVISD::VMV_V_X_VL
;
13069 /// Get or create a value that can feed \p Root with the given extension \p
13070 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
13071 /// \see ::getSource().
13072 SDValue
getOrCreateExtendedOp(SDNode
*Root
, SelectionDAG
&DAG
,
13073 const RISCVSubtarget
&Subtarget
,
13074 std::optional
<bool> SExt
) const {
13075 if (!SExt
.has_value())
13076 return OrigOperand
;
13078 MVT NarrowVT
= getNarrowType(Root
);
13080 SDValue Source
= getSource();
13081 if (Source
.getValueType() == NarrowVT
)
13084 unsigned ExtOpc
= *SExt
? RISCVISD::VSEXT_VL
: RISCVISD::VZEXT_VL
;
13086 // If we need an extension, we should be changing the type.
13088 auto [Mask
, VL
] = getMaskAndVL(Root
, DAG
, Subtarget
);
13089 switch (OrigOperand
.getOpcode()) {
13090 case ISD::ZERO_EXTEND
:
13091 case ISD::SIGN_EXTEND
:
13092 case RISCVISD::VSEXT_VL
:
13093 case RISCVISD::VZEXT_VL
:
13094 return DAG
.getNode(ExtOpc
, DL
, NarrowVT
, Source
, Mask
, VL
);
13095 case RISCVISD::VMV_V_X_VL
:
13096 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, NarrowVT
,
13097 DAG
.getUNDEF(NarrowVT
), Source
.getOperand(1), VL
);
13099 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13100 // and that operand should already have the right NarrowVT so no
13101 // extension should be required at this point.
13102 llvm_unreachable("Unsupported opcode");
13106 /// Helper function to get the narrow type for \p Root.
13107 /// The narrow type is the type of \p Root where we divided the size of each
13108 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13109 /// \pre The size of the type of the elements of Root must be a multiple of 2
13110 /// and be greater than 16.
13111 static MVT
getNarrowType(const SDNode
*Root
) {
13112 MVT VT
= Root
->getSimpleValueType(0);
13114 // Determine the narrow size.
13115 unsigned NarrowSize
= VT
.getScalarSizeInBits() / 2;
13116 assert(NarrowSize
>= 8 && "Trying to extend something we can't represent");
13117 MVT NarrowVT
= MVT::getVectorVT(MVT::getIntegerVT(NarrowSize
),
13118 VT
.getVectorElementCount());
13122 /// Return the opcode required to materialize the folding of the sign
13123 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
13124 /// both operands for \p Opcode.
13125 /// Put differently, get the opcode to materialize:
13126 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13127 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13128 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
13129 static unsigned getSameExtensionOpcode(unsigned Opcode
, bool IsSExt
) {
13132 case RISCVISD::ADD_VL
:
13133 case RISCVISD::VWADD_W_VL
:
13134 case RISCVISD::VWADDU_W_VL
:
13135 return IsSExt
? RISCVISD::VWADD_VL
: RISCVISD::VWADDU_VL
;
13137 case RISCVISD::MUL_VL
:
13138 return IsSExt
? RISCVISD::VWMUL_VL
: RISCVISD::VWMULU_VL
;
13140 case RISCVISD::SUB_VL
:
13141 case RISCVISD::VWSUB_W_VL
:
13142 case RISCVISD::VWSUBU_W_VL
:
13143 return IsSExt
? RISCVISD::VWSUB_VL
: RISCVISD::VWSUBU_VL
;
13145 llvm_unreachable("Unexpected opcode");
13149 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13150 /// newOpcode(a, b).
13151 static unsigned getSUOpcode(unsigned Opcode
) {
13152 assert((Opcode
== RISCVISD::MUL_VL
|| Opcode
== ISD::MUL
) &&
13153 "SU is only supported for MUL");
13154 return RISCVISD::VWMULSU_VL
;
13157 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
13158 /// newOpcode(a, b).
13159 static unsigned getWOpcode(unsigned Opcode
, bool IsSExt
) {
13162 case RISCVISD::ADD_VL
:
13163 return IsSExt
? RISCVISD::VWADD_W_VL
: RISCVISD::VWADDU_W_VL
;
13165 case RISCVISD::SUB_VL
:
13166 return IsSExt
? RISCVISD::VWSUB_W_VL
: RISCVISD::VWSUBU_W_VL
;
13168 llvm_unreachable("Unexpected opcode");
13172 using CombineToTry
= std::function
<std::optional
<CombineResult
>(
13173 SDNode
* /*Root*/, const NodeExtensionHelper
& /*LHS*/,
13174 const NodeExtensionHelper
& /*RHS*/, SelectionDAG
&,
13175 const RISCVSubtarget
&)>;
13177 /// Check if this node needs to be fully folded or extended for all users.
13178 bool needToPromoteOtherUsers() const { return EnforceOneUse
; }
13180 /// Helper method to set the various fields of this struct based on the
13181 /// type of \p Root.
13182 void fillUpExtensionSupport(SDNode
*Root
, SelectionDAG
&DAG
,
13183 const RISCVSubtarget
&Subtarget
) {
13184 SupportsZExt
= false;
13185 SupportsSExt
= false;
13186 EnforceOneUse
= true;
13188 unsigned Opc
= OrigOperand
.getOpcode();
13190 case ISD::ZERO_EXTEND
:
13191 case ISD::SIGN_EXTEND
: {
13192 MVT VT
= OrigOperand
.getSimpleValueType();
13193 if (!VT
.isVector())
13196 SDValue NarrowElt
= OrigOperand
.getOperand(0);
13197 MVT NarrowVT
= NarrowElt
.getSimpleValueType();
13199 unsigned ScalarBits
= VT
.getScalarSizeInBits();
13200 unsigned NarrowScalarBits
= NarrowVT
.getScalarSizeInBits();
13202 // Ensure the narrowing element type is legal
13203 if (!Subtarget
.getTargetLowering()->isTypeLegal(NarrowElt
.getValueType()))
13206 // Ensure the extension's semantic is equivalent to rvv vzext or vsext.
13207 if (ScalarBits
!= NarrowScalarBits
* 2)
13210 SupportsZExt
= Opc
== ISD::ZERO_EXTEND
;
13211 SupportsSExt
= Opc
== ISD::SIGN_EXTEND
;
13214 std::tie(Mask
, VL
) = getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
);
13217 case RISCVISD::VZEXT_VL
:
13218 SupportsZExt
= true;
13219 Mask
= OrigOperand
.getOperand(1);
13220 VL
= OrigOperand
.getOperand(2);
13222 case RISCVISD::VSEXT_VL
:
13223 SupportsSExt
= true;
13224 Mask
= OrigOperand
.getOperand(1);
13225 VL
= OrigOperand
.getOperand(2);
13227 case RISCVISD::VMV_V_X_VL
: {
13228 // Historically, we didn't care about splat values not disappearing during
13230 EnforceOneUse
= false;
13232 VL
= OrigOperand
.getOperand(2);
13234 // The operand is a splat of a scalar.
13236 // The pasthru must be undef for tail agnostic.
13237 if (!OrigOperand
.getOperand(0).isUndef())
13240 // Get the scalar value.
13241 SDValue Op
= OrigOperand
.getOperand(1);
13243 // See if we have enough sign bits or zero bits in the scalar to use a
13244 // widening opcode by splatting to smaller element size.
13245 MVT VT
= Root
->getSimpleValueType(0);
13246 unsigned EltBits
= VT
.getScalarSizeInBits();
13247 unsigned ScalarBits
= Op
.getValueSizeInBits();
13248 // Make sure we're getting all element bits from the scalar register.
13249 // FIXME: Support implicit sign extension of vmv.v.x?
13250 if (ScalarBits
< EltBits
)
13253 unsigned NarrowSize
= VT
.getScalarSizeInBits() / 2;
13254 // If the narrow type cannot be expressed with a legal VMV,
13255 // this is not a valid candidate.
13256 if (NarrowSize
< 8)
13259 if (DAG
.ComputeMaxSignificantBits(Op
) <= NarrowSize
)
13260 SupportsSExt
= true;
13261 if (DAG
.MaskedValueIsZero(Op
,
13262 APInt::getBitsSetFrom(ScalarBits
, NarrowSize
)))
13263 SupportsZExt
= true;
13271 /// Check if \p Root supports any extension folding combines.
13272 static bool isSupportedRoot(const SDNode
*Root
, const SelectionDAG
&DAG
) {
13273 switch (Root
->getOpcode()) {
13277 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
13278 if (!TLI
.isTypeLegal(Root
->getValueType(0)))
13280 return Root
->getValueType(0).isScalableVector();
13282 case RISCVISD::ADD_VL
:
13283 case RISCVISD::MUL_VL
:
13284 case RISCVISD::VWADD_W_VL
:
13285 case RISCVISD::VWADDU_W_VL
:
13286 case RISCVISD::SUB_VL
:
13287 case RISCVISD::VWSUB_W_VL
:
13288 case RISCVISD::VWSUBU_W_VL
:
13295 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
13296 NodeExtensionHelper(SDNode
*Root
, unsigned OperandIdx
, SelectionDAG
&DAG
,
13297 const RISCVSubtarget
&Subtarget
) {
13298 assert(isSupportedRoot(Root
, DAG
) && "Trying to build an helper with an "
13299 "unsupported root");
13300 assert(OperandIdx
< 2 && "Requesting something else than LHS or RHS");
13301 OrigOperand
= Root
->getOperand(OperandIdx
);
13303 unsigned Opc
= Root
->getOpcode();
13305 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
13306 // <ADD|SUB>(LHS, S|ZEXT(RHS))
13307 case RISCVISD::VWADD_W_VL
:
13308 case RISCVISD::VWADDU_W_VL
:
13309 case RISCVISD::VWSUB_W_VL
:
13310 case RISCVISD::VWSUBU_W_VL
:
13311 if (OperandIdx
== 1) {
13313 Opc
== RISCVISD::VWADDU_W_VL
|| Opc
== RISCVISD::VWSUBU_W_VL
;
13314 SupportsSExt
= !SupportsZExt
;
13315 std::tie(Mask
, VL
) = getMaskAndVL(Root
, DAG
, Subtarget
);
13317 // There's no existing extension here, so we don't have to worry about
13318 // making sure it gets removed.
13319 EnforceOneUse
= false;
13324 fillUpExtensionSupport(Root
, DAG
, Subtarget
);
13329 /// Check if this operand is compatible with the given vector length \p VL.
13330 bool isVLCompatible(SDValue VL
) const {
13331 return this->VL
!= SDValue() && this->VL
== VL
;
13334 /// Check if this operand is compatible with the given \p Mask.
13335 bool isMaskCompatible(SDValue Mask
) const {
13336 return !CheckMask
|| (this->Mask
!= SDValue() && this->Mask
== Mask
);
13339 /// Helper function to get the Mask and VL from \p Root.
13340 static std::pair
<SDValue
, SDValue
>
13341 getMaskAndVL(const SDNode
*Root
, SelectionDAG
&DAG
,
13342 const RISCVSubtarget
&Subtarget
) {
13343 assert(isSupportedRoot(Root
, DAG
) && "Unexpected root");
13344 switch (Root
->getOpcode()) {
13349 MVT VT
= Root
->getSimpleValueType(0);
13350 return getDefaultScalableVLOps(VT
, DL
, DAG
, Subtarget
);
13353 return std::make_pair(Root
->getOperand(3), Root
->getOperand(4));
13357 /// Check if the Mask and VL of this operand are compatible with \p Root.
13358 bool areVLAndMaskCompatible(SDNode
*Root
, SelectionDAG
&DAG
,
13359 const RISCVSubtarget
&Subtarget
) const {
13360 auto [Mask
, VL
] = getMaskAndVL(Root
, DAG
, Subtarget
);
13361 return isMaskCompatible(Mask
) && isVLCompatible(VL
);
13364 /// Helper function to check if \p N is commutative with respect to the
13365 /// foldings that are supported by this class.
13366 static bool isCommutative(const SDNode
*N
) {
13367 switch (N
->getOpcode()) {
13370 case RISCVISD::ADD_VL
:
13371 case RISCVISD::MUL_VL
:
13372 case RISCVISD::VWADD_W_VL
:
13373 case RISCVISD::VWADDU_W_VL
:
13376 case RISCVISD::SUB_VL
:
13377 case RISCVISD::VWSUB_W_VL
:
13378 case RISCVISD::VWSUBU_W_VL
:
13381 llvm_unreachable("Unexpected opcode");
13385 /// Get a list of combine to try for folding extensions in \p Root.
13386 /// Note that each returned CombineToTry function doesn't actually modify
13387 /// anything. Instead they produce an optional CombineResult that if not None,
13388 /// need to be materialized for the combine to be applied.
13389 /// \see CombineResult::materialize.
13390 /// If the related CombineToTry function returns std::nullopt, that means the
13391 /// combine didn't match.
13392 static SmallVector
<CombineToTry
> getSupportedFoldings(const SDNode
*Root
);
13395 /// Helper structure that holds all the necessary information to materialize a
13396 /// combine that does some extension folding.
13397 struct CombineResult
{
13398 /// Opcode to be generated when materializing the combine.
13399 unsigned TargetOpcode
;
13400 // No value means no extension is needed. If extension is needed, the value
13401 // indicates if it needs to be sign extended.
13402 std::optional
<bool> SExtLHS
;
13403 std::optional
<bool> SExtRHS
;
13404 /// Root of the combine.
13406 /// LHS of the TargetOpcode.
13407 NodeExtensionHelper LHS
;
13408 /// RHS of the TargetOpcode.
13409 NodeExtensionHelper RHS
;
13411 CombineResult(unsigned TargetOpcode
, SDNode
*Root
,
13412 const NodeExtensionHelper
&LHS
, std::optional
<bool> SExtLHS
,
13413 const NodeExtensionHelper
&RHS
, std::optional
<bool> SExtRHS
)
13414 : TargetOpcode(TargetOpcode
), SExtLHS(SExtLHS
), SExtRHS(SExtRHS
),
13415 Root(Root
), LHS(LHS
), RHS(RHS
) {}
13417 /// Return a value that uses TargetOpcode and that can be used to replace
13419 /// The actual replacement is *not* done in that method.
13420 SDValue
materialize(SelectionDAG
&DAG
,
13421 const RISCVSubtarget
&Subtarget
) const {
13422 SDValue Mask
, VL
, Merge
;
13423 std::tie(Mask
, VL
) =
13424 NodeExtensionHelper::getMaskAndVL(Root
, DAG
, Subtarget
);
13425 switch (Root
->getOpcode()) {
13427 Merge
= Root
->getOperand(2);
13432 Merge
= DAG
.getUNDEF(Root
->getValueType(0));
13435 return DAG
.getNode(TargetOpcode
, SDLoc(Root
), Root
->getValueType(0),
13436 LHS
.getOrCreateExtendedOp(Root
, DAG
, Subtarget
, SExtLHS
),
13437 RHS
.getOrCreateExtendedOp(Root
, DAG
, Subtarget
, SExtRHS
),
13442 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
13443 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
13444 /// are zext) and LHS and RHS can be folded into Root.
13445 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
13447 /// \note If the pattern can match with both zext and sext, the returned
13448 /// CombineResult will feature the zext result.
13450 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13451 /// can be used to apply the pattern.
13452 static std::optional
<CombineResult
>
13453 canFoldToVWWithSameExtensionImpl(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13454 const NodeExtensionHelper
&RHS
, bool AllowSExt
,
13455 bool AllowZExt
, SelectionDAG
&DAG
,
13456 const RISCVSubtarget
&Subtarget
) {
13457 assert((AllowSExt
|| AllowZExt
) && "Forgot to set what you want?");
13458 if (!LHS
.areVLAndMaskCompatible(Root
, DAG
, Subtarget
) ||
13459 !RHS
.areVLAndMaskCompatible(Root
, DAG
, Subtarget
))
13460 return std::nullopt
;
13461 if (AllowZExt
&& LHS
.SupportsZExt
&& RHS
.SupportsZExt
)
13462 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
13463 Root
->getOpcode(), /*IsSExt=*/false),
13464 Root
, LHS
, /*SExtLHS=*/false, RHS
, /*SExtRHS=*/false);
13465 if (AllowSExt
&& LHS
.SupportsSExt
&& RHS
.SupportsSExt
)
13466 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
13467 Root
->getOpcode(), /*IsSExt=*/true),
13468 Root
, LHS
, /*SExtLHS=*/true, RHS
,
13470 return std::nullopt
;
13473 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
13474 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
13475 /// are zext) and LHS and RHS can be folded into Root.
13477 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13478 /// can be used to apply the pattern.
13479 static std::optional
<CombineResult
>
13480 canFoldToVWWithSameExtension(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13481 const NodeExtensionHelper
&RHS
, SelectionDAG
&DAG
,
13482 const RISCVSubtarget
&Subtarget
) {
13483 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/true,
13484 /*AllowZExt=*/true, DAG
, Subtarget
);
13487 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
13489 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13490 /// can be used to apply the pattern.
13491 static std::optional
<CombineResult
>
13492 canFoldToVW_W(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13493 const NodeExtensionHelper
&RHS
, SelectionDAG
&DAG
,
13494 const RISCVSubtarget
&Subtarget
) {
13495 if (!RHS
.areVLAndMaskCompatible(Root
, DAG
, Subtarget
))
13496 return std::nullopt
;
13498 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
13500 // Control this behavior behind an option (AllowSplatInVW_W) for testing
13502 if (RHS
.SupportsZExt
&& (!RHS
.isSplat() || AllowSplatInVW_W
))
13503 return CombineResult(
13504 NodeExtensionHelper::getWOpcode(Root
->getOpcode(), /*IsSExt=*/false),
13505 Root
, LHS
, /*SExtLHS=*/std::nullopt
, RHS
, /*SExtRHS=*/false);
13506 if (RHS
.SupportsSExt
&& (!RHS
.isSplat() || AllowSplatInVW_W
))
13507 return CombineResult(
13508 NodeExtensionHelper::getWOpcode(Root
->getOpcode(), /*IsSExt=*/true),
13509 Root
, LHS
, /*SExtLHS=*/std::nullopt
, RHS
, /*SExtRHS=*/true);
13510 return std::nullopt
;
13513 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
13515 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13516 /// can be used to apply the pattern.
13517 static std::optional
<CombineResult
>
13518 canFoldToVWWithSEXT(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13519 const NodeExtensionHelper
&RHS
, SelectionDAG
&DAG
,
13520 const RISCVSubtarget
&Subtarget
) {
13521 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/true,
13522 /*AllowZExt=*/false, DAG
, Subtarget
);
13525 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
13527 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13528 /// can be used to apply the pattern.
13529 static std::optional
<CombineResult
>
13530 canFoldToVWWithZEXT(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13531 const NodeExtensionHelper
&RHS
, SelectionDAG
&DAG
,
13532 const RISCVSubtarget
&Subtarget
) {
13533 return canFoldToVWWithSameExtensionImpl(Root
, LHS
, RHS
, /*AllowSExt=*/false,
13534 /*AllowZExt=*/true, DAG
, Subtarget
);
13537 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
13539 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13540 /// can be used to apply the pattern.
13541 static std::optional
<CombineResult
>
13542 canFoldToVW_SU(SDNode
*Root
, const NodeExtensionHelper
&LHS
,
13543 const NodeExtensionHelper
&RHS
, SelectionDAG
&DAG
,
13544 const RISCVSubtarget
&Subtarget
) {
13546 if (!LHS
.SupportsSExt
|| !RHS
.SupportsZExt
)
13547 return std::nullopt
;
13548 if (!LHS
.areVLAndMaskCompatible(Root
, DAG
, Subtarget
) ||
13549 !RHS
.areVLAndMaskCompatible(Root
, DAG
, Subtarget
))
13550 return std::nullopt
;
13551 return CombineResult(NodeExtensionHelper::getSUOpcode(Root
->getOpcode()),
13552 Root
, LHS
, /*SExtLHS=*/true, RHS
, /*SExtRHS=*/false);
13555 SmallVector
<NodeExtensionHelper::CombineToTry
>
13556 NodeExtensionHelper::getSupportedFoldings(const SDNode
*Root
) {
13557 SmallVector
<CombineToTry
> Strategies
;
13558 switch (Root
->getOpcode()) {
13561 case RISCVISD::ADD_VL
:
13562 case RISCVISD::SUB_VL
:
13563 // add|sub -> vwadd(u)|vwsub(u)
13564 Strategies
.push_back(canFoldToVWWithSameExtension
);
13565 // add|sub -> vwadd(u)_w|vwsub(u)_w
13566 Strategies
.push_back(canFoldToVW_W
);
13569 case RISCVISD::MUL_VL
:
13571 Strategies
.push_back(canFoldToVWWithSameExtension
);
13573 Strategies
.push_back(canFoldToVW_SU
);
13575 case RISCVISD::VWADD_W_VL
:
13576 case RISCVISD::VWSUB_W_VL
:
13577 // vwadd_w|vwsub_w -> vwadd|vwsub
13578 Strategies
.push_back(canFoldToVWWithSEXT
);
13580 case RISCVISD::VWADDU_W_VL
:
13581 case RISCVISD::VWSUBU_W_VL
:
13582 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
13583 Strategies
.push_back(canFoldToVWWithZEXT
);
13586 llvm_unreachable("Unexpected opcode");
13590 } // End anonymous namespace.
13592 /// Combine a binary operation to its equivalent VW or VW_W form.
13593 /// The supported combines are:
13594 /// add_vl -> vwadd(u) | vwadd(u)_w
13595 /// sub_vl -> vwsub(u) | vwsub(u)_w
13596 /// mul_vl -> vwmul(u) | vwmul_su
13597 /// vwadd_w(u) -> vwadd(u)
13598 /// vwub_w(u) -> vwadd(u)
13599 static SDValue
combineBinOp_VLToVWBinOp_VL(SDNode
*N
,
13600 TargetLowering::DAGCombinerInfo
&DCI
,
13601 const RISCVSubtarget
&Subtarget
) {
13602 SelectionDAG
&DAG
= DCI
.DAG
;
13604 if (!NodeExtensionHelper::isSupportedRoot(N
, DAG
))
13607 SmallVector
<SDNode
*> Worklist
;
13608 SmallSet
<SDNode
*, 8> Inserted
;
13609 Worklist
.push_back(N
);
13610 Inserted
.insert(N
);
13611 SmallVector
<CombineResult
> CombinesToApply
;
13613 while (!Worklist
.empty()) {
13614 SDNode
*Root
= Worklist
.pop_back_val();
13615 if (!NodeExtensionHelper::isSupportedRoot(Root
, DAG
))
13618 NodeExtensionHelper
LHS(N
, 0, DAG
, Subtarget
);
13619 NodeExtensionHelper
RHS(N
, 1, DAG
, Subtarget
);
13620 auto AppendUsersIfNeeded
= [&Worklist
,
13621 &Inserted
](const NodeExtensionHelper
&Op
) {
13622 if (Op
.needToPromoteOtherUsers()) {
13623 for (SDNode
*TheUse
: Op
.OrigOperand
->uses()) {
13624 if (Inserted
.insert(TheUse
).second
)
13625 Worklist
.push_back(TheUse
);
13630 // Control the compile time by limiting the number of node we look at in
13632 if (Inserted
.size() > ExtensionMaxWebSize
)
13635 SmallVector
<NodeExtensionHelper::CombineToTry
> FoldingStrategies
=
13636 NodeExtensionHelper::getSupportedFoldings(N
);
13638 assert(!FoldingStrategies
.empty() && "Nothing to be folded");
13639 bool Matched
= false;
13640 for (int Attempt
= 0;
13641 (Attempt
!= 1 + NodeExtensionHelper::isCommutative(N
)) && !Matched
;
13644 for (NodeExtensionHelper::CombineToTry FoldingStrategy
:
13645 FoldingStrategies
) {
13646 std::optional
<CombineResult
> Res
=
13647 FoldingStrategy(N
, LHS
, RHS
, DAG
, Subtarget
);
13650 CombinesToApply
.push_back(*Res
);
13651 // All the inputs that are extended need to be folded, otherwise
13652 // we would be leaving the old input (since it is may still be used),
13653 // and the new one.
13654 if (Res
->SExtLHS
.has_value())
13655 AppendUsersIfNeeded(LHS
);
13656 if (Res
->SExtRHS
.has_value())
13657 AppendUsersIfNeeded(RHS
);
13661 std::swap(LHS
, RHS
);
13663 // Right now we do an all or nothing approach.
13667 // Store the value for the replacement of the input node separately.
13668 SDValue InputRootReplacement
;
13669 // We do the RAUW after we materialize all the combines, because some replaced
13670 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
13671 // some of these nodes may appear in the NodeExtensionHelpers of some of the
13672 // yet-to-be-visited CombinesToApply roots.
13673 SmallVector
<std::pair
<SDValue
, SDValue
>> ValuesToReplace
;
13674 ValuesToReplace
.reserve(CombinesToApply
.size());
13675 for (CombineResult Res
: CombinesToApply
) {
13676 SDValue NewValue
= Res
.materialize(DAG
, Subtarget
);
13677 if (!InputRootReplacement
) {
13678 assert(Res
.Root
== N
&&
13679 "First element is expected to be the current node");
13680 InputRootReplacement
= NewValue
;
13682 ValuesToReplace
.emplace_back(SDValue(Res
.Root
, 0), NewValue
);
13685 for (std::pair
<SDValue
, SDValue
> OldNewValues
: ValuesToReplace
) {
13686 DAG
.ReplaceAllUsesOfValueWith(OldNewValues
.first
, OldNewValues
.second
);
13687 DCI
.AddToWorklist(OldNewValues
.second
.getNode());
13689 return InputRootReplacement
;
13692 // Helper function for performMemPairCombine.
13693 // Try to combine the memory loads/stores LSNode1 and LSNode2
13694 // into a single memory pair operation.
13695 static SDValue
tryMemPairCombine(SelectionDAG
&DAG
, LSBaseSDNode
*LSNode1
,
13696 LSBaseSDNode
*LSNode2
, SDValue BasePtr
,
13698 SmallPtrSet
<const SDNode
*, 32> Visited
;
13699 SmallVector
<const SDNode
*, 8> Worklist
= {LSNode1
, LSNode2
};
13701 if (SDNode::hasPredecessorHelper(LSNode1
, Visited
, Worklist
) ||
13702 SDNode::hasPredecessorHelper(LSNode2
, Visited
, Worklist
))
13705 MachineFunction
&MF
= DAG
.getMachineFunction();
13706 const RISCVSubtarget
&Subtarget
= MF
.getSubtarget
<RISCVSubtarget
>();
13708 // The new operation has twice the width.
13709 MVT XLenVT
= Subtarget
.getXLenVT();
13710 EVT MemVT
= LSNode1
->getMemoryVT();
13711 EVT NewMemVT
= (MemVT
== MVT::i32
) ? MVT::i64
: MVT::i128
;
13712 MachineMemOperand
*MMO
= LSNode1
->getMemOperand();
13713 MachineMemOperand
*NewMMO
= MF
.getMachineMemOperand(
13714 MMO
, MMO
->getPointerInfo(), MemVT
== MVT::i32
? 8 : 16);
13716 if (LSNode1
->getOpcode() == ISD::LOAD
) {
13717 auto Ext
= cast
<LoadSDNode
>(LSNode1
)->getExtensionType();
13719 if (MemVT
== MVT::i32
)
13720 Opcode
= (Ext
== ISD::ZEXTLOAD
) ? RISCVISD::TH_LWUD
: RISCVISD::TH_LWD
;
13722 Opcode
= RISCVISD::TH_LDD
;
13724 SDValue Res
= DAG
.getMemIntrinsicNode(
13725 Opcode
, SDLoc(LSNode1
), DAG
.getVTList({XLenVT
, XLenVT
, MVT::Other
}),
13726 {LSNode1
->getChain(), BasePtr
,
13727 DAG
.getConstant(Imm
, SDLoc(LSNode1
), XLenVT
)},
13731 DAG
.getMergeValues({Res
.getValue(0), Res
.getValue(2)}, SDLoc(LSNode1
));
13733 DAG
.getMergeValues({Res
.getValue(1), Res
.getValue(2)}, SDLoc(LSNode2
));
13735 DAG
.ReplaceAllUsesWith(LSNode2
, Node2
.getNode());
13738 unsigned Opcode
= (MemVT
== MVT::i32
) ? RISCVISD::TH_SWD
: RISCVISD::TH_SDD
;
13740 SDValue Res
= DAG
.getMemIntrinsicNode(
13741 Opcode
, SDLoc(LSNode1
), DAG
.getVTList(MVT::Other
),
13742 {LSNode1
->getChain(), LSNode1
->getOperand(1), LSNode2
->getOperand(1),
13743 BasePtr
, DAG
.getConstant(Imm
, SDLoc(LSNode1
), XLenVT
)},
13746 DAG
.ReplaceAllUsesWith(LSNode2
, Res
.getNode());
13751 // Try to combine two adjacent loads/stores to a single pair instruction from
13752 // the XTHeadMemPair vendor extension.
13753 static SDValue
performMemPairCombine(SDNode
*N
,
13754 TargetLowering::DAGCombinerInfo
&DCI
) {
13755 SelectionDAG
&DAG
= DCI
.DAG
;
13756 MachineFunction
&MF
= DAG
.getMachineFunction();
13757 const RISCVSubtarget
&Subtarget
= MF
.getSubtarget
<RISCVSubtarget
>();
13759 // Target does not support load/store pair.
13760 if (!Subtarget
.hasVendorXTHeadMemPair())
13763 LSBaseSDNode
*LSNode1
= cast
<LSBaseSDNode
>(N
);
13764 EVT MemVT
= LSNode1
->getMemoryVT();
13765 unsigned OpNum
= LSNode1
->getOpcode() == ISD::LOAD
? 1 : 2;
13767 // No volatile, indexed or atomic loads/stores.
13768 if (!LSNode1
->isSimple() || LSNode1
->isIndexed())
13771 // Function to get a base + constant representation from a memory value.
13772 auto ExtractBaseAndOffset
= [](SDValue Ptr
) -> std::pair
<SDValue
, uint64_t> {
13773 if (Ptr
->getOpcode() == ISD::ADD
)
13774 if (auto *C1
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1)))
13775 return {Ptr
->getOperand(0), C1
->getZExtValue()};
13779 auto [Base1
, Offset1
] = ExtractBaseAndOffset(LSNode1
->getOperand(OpNum
));
13781 SDValue Chain
= N
->getOperand(0);
13782 for (SDNode::use_iterator UI
= Chain
->use_begin(), UE
= Chain
->use_end();
13784 SDUse
&Use
= UI
.getUse();
13785 if (Use
.getUser() != N
&& Use
.getResNo() == 0 &&
13786 Use
.getUser()->getOpcode() == N
->getOpcode()) {
13787 LSBaseSDNode
*LSNode2
= cast
<LSBaseSDNode
>(Use
.getUser());
13789 // No volatile, indexed or atomic loads/stores.
13790 if (!LSNode2
->isSimple() || LSNode2
->isIndexed())
13793 // Check if LSNode1 and LSNode2 have the same type and extension.
13794 if (LSNode1
->getOpcode() == ISD::LOAD
)
13795 if (cast
<LoadSDNode
>(LSNode2
)->getExtensionType() !=
13796 cast
<LoadSDNode
>(LSNode1
)->getExtensionType())
13799 if (LSNode1
->getMemoryVT() != LSNode2
->getMemoryVT())
13802 auto [Base2
, Offset2
] = ExtractBaseAndOffset(LSNode2
->getOperand(OpNum
));
13804 // Check if the base pointer is the same for both instruction.
13805 if (Base1
!= Base2
)
13808 // Check if the offsets match the XTHeadMemPair encoding contraints.
13809 bool Valid
= false;
13810 if (MemVT
== MVT::i32
) {
13811 // Check for adjacent i32 values and a 2-bit index.
13812 if ((Offset1
+ 4 == Offset2
) && isShiftedUInt
<2, 3>(Offset1
))
13814 } else if (MemVT
== MVT::i64
) {
13815 // Check for adjacent i64 values and a 2-bit index.
13816 if ((Offset1
+ 8 == Offset2
) && isShiftedUInt
<2, 4>(Offset1
))
13825 tryMemPairCombine(DAG
, LSNode1
, LSNode2
, Base1
, Offset1
))
13834 // (fp_to_int (froundeven X)) -> fcvt X, rne
13835 // (fp_to_int (ftrunc X)) -> fcvt X, rtz
13836 // (fp_to_int (ffloor X)) -> fcvt X, rdn
13837 // (fp_to_int (fceil X)) -> fcvt X, rup
13838 // (fp_to_int (fround X)) -> fcvt X, rmm
13839 // (fp_to_int (frint X)) -> fcvt X
13840 static SDValue
performFP_TO_INTCombine(SDNode
*N
,
13841 TargetLowering::DAGCombinerInfo
&DCI
,
13842 const RISCVSubtarget
&Subtarget
) {
13843 SelectionDAG
&DAG
= DCI
.DAG
;
13844 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
13845 MVT XLenVT
= Subtarget
.getXLenVT();
13847 SDValue Src
= N
->getOperand(0);
13849 // Don't do this for strict-fp Src.
13850 if (Src
->isStrictFPOpcode() || Src
->isTargetStrictFPOpcode())
13853 // Ensure the FP type is legal.
13854 if (!TLI
.isTypeLegal(Src
.getValueType()))
13857 // Don't do this for f16 with Zfhmin and not Zfh.
13858 if (Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfh())
13861 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Src
.getOpcode());
13862 // If the result is invalid, we didn't find a foldable instruction.
13863 if (FRM
== RISCVFPRndMode::Invalid
)
13867 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT
;
13868 EVT VT
= N
->getValueType(0);
13870 if (VT
.isVector() && TLI
.isTypeLegal(VT
)) {
13871 MVT SrcVT
= Src
.getSimpleValueType();
13872 MVT SrcContainerVT
= SrcVT
;
13873 MVT ContainerVT
= VT
.getSimpleVT();
13874 SDValue XVal
= Src
.getOperand(0);
13876 // For widening and narrowing conversions we just combine it into a
13877 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
13878 // end up getting lowered to their appropriate pseudo instructions based on
13879 // their operand types
13880 if (VT
.getScalarSizeInBits() > SrcVT
.getScalarSizeInBits() * 2 ||
13881 VT
.getScalarSizeInBits() * 2 < SrcVT
.getScalarSizeInBits())
13884 // Make fixed-length vectors scalable first
13885 if (SrcVT
.isFixedLengthVector()) {
13886 SrcContainerVT
= getContainerForFixedLengthVector(DAG
, SrcVT
, Subtarget
);
13887 XVal
= convertToScalableVector(SrcContainerVT
, XVal
, DAG
, Subtarget
);
13889 getContainerForFixedLengthVector(DAG
, ContainerVT
, Subtarget
);
13893 getDefaultVLOps(SrcVT
, SrcContainerVT
, DL
, DAG
, Subtarget
);
13896 if (FRM
== RISCVFPRndMode::RTZ
) {
13897 // Use the dedicated trunc static rounding mode if we're truncating so we
13898 // don't need to generate calls to fsrmi/fsrm
13900 IsSigned
? RISCVISD::VFCVT_RTZ_X_F_VL
: RISCVISD::VFCVT_RTZ_XU_F_VL
;
13901 FpToInt
= DAG
.getNode(Opc
, DL
, ContainerVT
, XVal
, Mask
, VL
);
13902 } else if (FRM
== RISCVFPRndMode::DYN
) {
13904 IsSigned
? RISCVISD::VFCVT_X_F_VL
: RISCVISD::VFCVT_XU_F_VL
;
13905 FpToInt
= DAG
.getNode(Opc
, DL
, ContainerVT
, XVal
, Mask
, VL
);
13908 IsSigned
? RISCVISD::VFCVT_RM_X_F_VL
: RISCVISD::VFCVT_RM_XU_F_VL
;
13909 FpToInt
= DAG
.getNode(Opc
, DL
, ContainerVT
, XVal
, Mask
,
13910 DAG
.getTargetConstant(FRM
, DL
, XLenVT
), VL
);
13913 // If converted from fixed-length to scalable, convert back
13914 if (VT
.isFixedLengthVector())
13915 FpToInt
= convertFromScalableVector(VT
, FpToInt
, DAG
, Subtarget
);
13920 // Only handle XLen or i32 types. Other types narrower than XLen will
13921 // eventually be legalized to XLenVT.
13922 if (VT
!= MVT::i32
&& VT
!= XLenVT
)
13927 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
13929 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
13931 SDValue FpToInt
= DAG
.getNode(Opc
, DL
, XLenVT
, Src
.getOperand(0),
13932 DAG
.getTargetConstant(FRM
, DL
, XLenVT
));
13933 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, FpToInt
);
13937 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
13938 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
13939 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
13940 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
13941 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
13942 // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
13943 static SDValue
performFP_TO_INT_SATCombine(SDNode
*N
,
13944 TargetLowering::DAGCombinerInfo
&DCI
,
13945 const RISCVSubtarget
&Subtarget
) {
13946 SelectionDAG
&DAG
= DCI
.DAG
;
13947 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
13948 MVT XLenVT
= Subtarget
.getXLenVT();
13950 // Only handle XLen types. Other types narrower than XLen will eventually be
13951 // legalized to XLenVT.
13952 EVT DstVT
= N
->getValueType(0);
13953 if (DstVT
!= XLenVT
)
13956 SDValue Src
= N
->getOperand(0);
13958 // Don't do this for strict-fp Src.
13959 if (Src
->isStrictFPOpcode() || Src
->isTargetStrictFPOpcode())
13962 // Ensure the FP type is also legal.
13963 if (!TLI
.isTypeLegal(Src
.getValueType()))
13966 // Don't do this for f16 with Zfhmin and not Zfh.
13967 if (Src
.getValueType() == MVT::f16
&& !Subtarget
.hasStdExtZfh())
13970 EVT SatVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
13972 RISCVFPRndMode::RoundingMode FRM
= matchRoundingOp(Src
.getOpcode());
13973 if (FRM
== RISCVFPRndMode::Invalid
)
13976 bool IsSigned
= N
->getOpcode() == ISD::FP_TO_SINT_SAT
;
13979 if (SatVT
== DstVT
)
13980 Opc
= IsSigned
? RISCVISD::FCVT_X
: RISCVISD::FCVT_XU
;
13981 else if (DstVT
== MVT::i64
&& SatVT
== MVT::i32
)
13982 Opc
= IsSigned
? RISCVISD::FCVT_W_RV64
: RISCVISD::FCVT_WU_RV64
;
13985 // FIXME: Support other SatVTs by clamping before or after the conversion.
13987 Src
= Src
.getOperand(0);
13990 SDValue FpToInt
= DAG
.getNode(Opc
, DL
, XLenVT
, Src
,
13991 DAG
.getTargetConstant(FRM
, DL
, XLenVT
));
13993 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
13995 if (Opc
== RISCVISD::FCVT_WU_RV64
)
13996 FpToInt
= DAG
.getZeroExtendInReg(FpToInt
, DL
, MVT::i32
);
13998 // RISC-V FP-to-int conversions saturate to the destination register size, but
13999 // don't produce 0 for nan.
14000 SDValue ZeroInt
= DAG
.getConstant(0, DL
, DstVT
);
14001 return DAG
.getSelectCC(DL
, Src
, Src
, ZeroInt
, FpToInt
, ISD::CondCode::SETUO
);
14004 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14005 // smaller than XLenVT.
14006 static SDValue
performBITREVERSECombine(SDNode
*N
, SelectionDAG
&DAG
,
14007 const RISCVSubtarget
&Subtarget
) {
14008 assert(Subtarget
.hasStdExtZbkb() && "Unexpected extension");
14010 SDValue Src
= N
->getOperand(0);
14011 if (Src
.getOpcode() != ISD::BSWAP
)
14014 EVT VT
= N
->getValueType(0);
14015 if (!VT
.isScalarInteger() || VT
.getSizeInBits() >= Subtarget
.getXLen() ||
14016 !llvm::has_single_bit
<uint32_t>(VT
.getSizeInBits()))
14020 return DAG
.getNode(RISCVISD::BREV8
, DL
, VT
, Src
.getOperand(0));
14023 // Convert from one FMA opcode to another based on whether we are negating the
14024 // multiply result and/or the accumulator.
14025 // NOTE: Only supports RVV operations with VL.
14026 static unsigned negateFMAOpcode(unsigned Opcode
, bool NegMul
, bool NegAcc
) {
14027 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14029 // clang-format off
14031 default: llvm_unreachable("Unexpected opcode");
14032 case RISCVISD::VFMADD_VL
: Opcode
= RISCVISD::VFNMSUB_VL
; break;
14033 case RISCVISD::VFNMSUB_VL
: Opcode
= RISCVISD::VFMADD_VL
; break;
14034 case RISCVISD::VFNMADD_VL
: Opcode
= RISCVISD::VFMSUB_VL
; break;
14035 case RISCVISD::VFMSUB_VL
: Opcode
= RISCVISD::VFNMADD_VL
; break;
14036 case RISCVISD::STRICT_VFMADD_VL
: Opcode
= RISCVISD::STRICT_VFNMSUB_VL
; break;
14037 case RISCVISD::STRICT_VFNMSUB_VL
: Opcode
= RISCVISD::STRICT_VFMADD_VL
; break;
14038 case RISCVISD::STRICT_VFNMADD_VL
: Opcode
= RISCVISD::STRICT_VFMSUB_VL
; break;
14039 case RISCVISD::STRICT_VFMSUB_VL
: Opcode
= RISCVISD::STRICT_VFNMADD_VL
; break;
14044 // Negating the accumulator changes ADD<->SUB.
14046 // clang-format off
14048 default: llvm_unreachable("Unexpected opcode");
14049 case RISCVISD::VFMADD_VL
: Opcode
= RISCVISD::VFMSUB_VL
; break;
14050 case RISCVISD::VFMSUB_VL
: Opcode
= RISCVISD::VFMADD_VL
; break;
14051 case RISCVISD::VFNMADD_VL
: Opcode
= RISCVISD::VFNMSUB_VL
; break;
14052 case RISCVISD::VFNMSUB_VL
: Opcode
= RISCVISD::VFNMADD_VL
; break;
14053 case RISCVISD::STRICT_VFMADD_VL
: Opcode
= RISCVISD::STRICT_VFMSUB_VL
; break;
14054 case RISCVISD::STRICT_VFMSUB_VL
: Opcode
= RISCVISD::STRICT_VFMADD_VL
; break;
14055 case RISCVISD::STRICT_VFNMADD_VL
: Opcode
= RISCVISD::STRICT_VFNMSUB_VL
; break;
14056 case RISCVISD::STRICT_VFNMSUB_VL
: Opcode
= RISCVISD::STRICT_VFNMADD_VL
; break;
14064 static SDValue
combineVFMADD_VLWithVFNEG_VL(SDNode
*N
, SelectionDAG
&DAG
) {
14065 // Fold FNEG_VL into FMA opcodes.
14066 // The first operand of strict-fp is chain.
14067 unsigned Offset
= N
->isTargetStrictFPOpcode();
14068 SDValue A
= N
->getOperand(0 + Offset
);
14069 SDValue B
= N
->getOperand(1 + Offset
);
14070 SDValue C
= N
->getOperand(2 + Offset
);
14071 SDValue Mask
= N
->getOperand(3 + Offset
);
14072 SDValue VL
= N
->getOperand(4 + Offset
);
14074 auto invertIfNegative
= [&Mask
, &VL
](SDValue
&V
) {
14075 if (V
.getOpcode() == RISCVISD::FNEG_VL
&& V
.getOperand(1) == Mask
&&
14076 V
.getOperand(2) == VL
) {
14077 // Return the negated input.
14078 V
= V
.getOperand(0);
14085 bool NegA
= invertIfNegative(A
);
14086 bool NegB
= invertIfNegative(B
);
14087 bool NegC
= invertIfNegative(C
);
14089 // If no operands are negated, we're done.
14090 if (!NegA
&& !NegB
&& !NegC
)
14093 unsigned NewOpcode
= negateFMAOpcode(N
->getOpcode(), NegA
!= NegB
, NegC
);
14094 if (N
->isTargetStrictFPOpcode())
14095 return DAG
.getNode(NewOpcode
, SDLoc(N
), N
->getVTList(),
14096 {N
->getOperand(0), A
, B
, C
, Mask
, VL
});
14097 return DAG
.getNode(NewOpcode
, SDLoc(N
), N
->getValueType(0), A
, B
, C
, Mask
,
14101 static SDValue
performVFMADD_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
14102 const RISCVSubtarget
&Subtarget
) {
14103 if (SDValue V
= combineVFMADD_VLWithVFNEG_VL(N
, DAG
))
14106 if (N
->getValueType(0).isScalableVector() &&
14107 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
14108 (Subtarget
.hasVInstructionsF16Minimal() &&
14109 !Subtarget
.hasVInstructionsF16())) {
14113 // FIXME: Ignore strict opcodes for now.
14114 if (N
->isTargetStrictFPOpcode())
14117 // Try to form widening FMA.
14118 SDValue Op0
= N
->getOperand(0);
14119 SDValue Op1
= N
->getOperand(1);
14120 SDValue Mask
= N
->getOperand(3);
14121 SDValue VL
= N
->getOperand(4);
14123 if (Op0
.getOpcode() != RISCVISD::FP_EXTEND_VL
||
14124 Op1
.getOpcode() != RISCVISD::FP_EXTEND_VL
)
14127 // TODO: Refactor to handle more complex cases similar to
14128 // combineBinOp_VLToVWBinOp_VL.
14129 if ((!Op0
.hasOneUse() || !Op1
.hasOneUse()) &&
14130 (Op0
!= Op1
|| !Op0
->hasNUsesOfValue(2, 0)))
14133 // Check the mask and VL are the same.
14134 if (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
||
14135 Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
)
14139 switch (N
->getOpcode()) {
14141 llvm_unreachable("Unexpected opcode");
14142 case RISCVISD::VFMADD_VL
:
14143 NewOpc
= RISCVISD::VFWMADD_VL
;
14145 case RISCVISD::VFNMSUB_VL
:
14146 NewOpc
= RISCVISD::VFWNMSUB_VL
;
14148 case RISCVISD::VFNMADD_VL
:
14149 NewOpc
= RISCVISD::VFWNMADD_VL
;
14151 case RISCVISD::VFMSUB_VL
:
14152 NewOpc
= RISCVISD::VFWMSUB_VL
;
14156 Op0
= Op0
.getOperand(0);
14157 Op1
= Op1
.getOperand(0);
14159 return DAG
.getNode(NewOpc
, SDLoc(N
), N
->getValueType(0), Op0
, Op1
,
14160 N
->getOperand(2), Mask
, VL
);
14163 static SDValue
performVFMUL_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
14164 const RISCVSubtarget
&Subtarget
) {
14165 if (N
->getValueType(0).isScalableVector() &&
14166 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
14167 (Subtarget
.hasVInstructionsF16Minimal() &&
14168 !Subtarget
.hasVInstructionsF16())) {
14172 // FIXME: Ignore strict opcodes for now.
14173 assert(!N
->isTargetStrictFPOpcode() && "Unexpected opcode");
14175 // Try to form widening multiply.
14176 SDValue Op0
= N
->getOperand(0);
14177 SDValue Op1
= N
->getOperand(1);
14178 SDValue Merge
= N
->getOperand(2);
14179 SDValue Mask
= N
->getOperand(3);
14180 SDValue VL
= N
->getOperand(4);
14182 if (Op0
.getOpcode() != RISCVISD::FP_EXTEND_VL
||
14183 Op1
.getOpcode() != RISCVISD::FP_EXTEND_VL
)
14186 // TODO: Refactor to handle more complex cases similar to
14187 // combineBinOp_VLToVWBinOp_VL.
14188 if ((!Op0
.hasOneUse() || !Op1
.hasOneUse()) &&
14189 (Op0
!= Op1
|| !Op0
->hasNUsesOfValue(2, 0)))
14192 // Check the mask and VL are the same.
14193 if (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
||
14194 Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
)
14197 Op0
= Op0
.getOperand(0);
14198 Op1
= Op1
.getOperand(0);
14200 return DAG
.getNode(RISCVISD::VFWMUL_VL
, SDLoc(N
), N
->getValueType(0), Op0
,
14201 Op1
, Merge
, Mask
, VL
);
14204 static SDValue
performFADDSUB_VLCombine(SDNode
*N
, SelectionDAG
&DAG
,
14205 const RISCVSubtarget
&Subtarget
) {
14206 if (N
->getValueType(0).isScalableVector() &&
14207 N
->getValueType(0).getVectorElementType() == MVT::f32
&&
14208 (Subtarget
.hasVInstructionsF16Minimal() &&
14209 !Subtarget
.hasVInstructionsF16())) {
14213 SDValue Op0
= N
->getOperand(0);
14214 SDValue Op1
= N
->getOperand(1);
14215 SDValue Merge
= N
->getOperand(2);
14216 SDValue Mask
= N
->getOperand(3);
14217 SDValue VL
= N
->getOperand(4);
14219 bool IsAdd
= N
->getOpcode() == RISCVISD::FADD_VL
;
14221 // Look for foldable FP_EXTENDS.
14223 Op0
.getOpcode() == RISCVISD::FP_EXTEND_VL
&&
14224 (Op0
.hasOneUse() || (Op0
== Op1
&& Op0
->hasNUsesOfValue(2, 0)));
14226 (Op0
== Op1
&& Op0IsExtend
) ||
14227 (Op1
.getOpcode() == RISCVISD::FP_EXTEND_VL
&& Op1
.hasOneUse());
14229 // Check the mask and VL.
14230 if (Op0IsExtend
&& (Op0
.getOperand(1) != Mask
|| Op0
.getOperand(2) != VL
))
14231 Op0IsExtend
= false;
14232 if (Op1IsExtend
&& (Op1
.getOperand(1) != Mask
|| Op1
.getOperand(2) != VL
))
14233 Op1IsExtend
= false;
14236 if (!Op1IsExtend
) {
14237 // Sub requires at least operand 1 to be an extend.
14241 // Add is commutable, if the other operand is foldable, swap them.
14245 std::swap(Op0
, Op1
);
14246 std::swap(Op0IsExtend
, Op1IsExtend
);
14249 // Op1 is a foldable extend. Op0 might be foldable.
14250 Op1
= Op1
.getOperand(0);
14252 Op0
= Op0
.getOperand(0);
14256 Opc
= Op0IsExtend
? RISCVISD::VFWADD_VL
: RISCVISD::VFWADD_W_VL
;
14258 Opc
= Op0IsExtend
? RISCVISD::VFWSUB_VL
: RISCVISD::VFWSUB_W_VL
;
14260 return DAG
.getNode(Opc
, SDLoc(N
), N
->getValueType(0), Op0
, Op1
, Merge
, Mask
,
14264 static SDValue
performSRACombine(SDNode
*N
, SelectionDAG
&DAG
,
14265 const RISCVSubtarget
&Subtarget
) {
14266 assert(N
->getOpcode() == ISD::SRA
&& "Unexpected opcode");
14268 if (N
->getValueType(0) != MVT::i64
|| !Subtarget
.is64Bit())
14271 if (!isa
<ConstantSDNode
>(N
->getOperand(1)))
14273 uint64_t ShAmt
= N
->getConstantOperandVal(1);
14277 SDValue N0
= N
->getOperand(0);
14279 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
14280 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
14281 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
14283 N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&& N0
.hasOneUse() &&
14284 cast
<VTSDNode
>(N0
.getOperand(1))->getVT() == MVT::i32
&&
14285 N0
.getOperand(0).getOpcode() == ISD::SHL
&& N0
.getOperand(0).hasOneUse() &&
14286 isa
<ConstantSDNode
>(N0
.getOperand(0).getOperand(1))) {
14287 uint64_t LShAmt
= N0
.getOperand(0).getConstantOperandVal(1);
14289 SDLoc
ShlDL(N0
.getOperand(0));
14290 SDValue Shl
= DAG
.getNode(ISD::SHL
, ShlDL
, MVT::i64
,
14291 N0
.getOperand(0).getOperand(0),
14292 DAG
.getConstant(LShAmt
+ 32, ShlDL
, MVT::i64
));
14294 return DAG
.getNode(ISD::SRA
, DL
, MVT::i64
, Shl
,
14295 DAG
.getConstant(ShAmt
+ 32, DL
, MVT::i64
));
14299 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
14300 // FIXME: Should this be a generic combine? There's a similar combine on X86.
14302 // Also try these folds where an add or sub is in the middle.
14303 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
14304 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
14306 ConstantSDNode
*AddC
= nullptr;
14308 // We might have an ADD or SUB between the SRA and SHL.
14309 bool IsAdd
= N0
.getOpcode() == ISD::ADD
;
14310 if ((IsAdd
|| N0
.getOpcode() == ISD::SUB
)) {
14311 // Other operand needs to be a constant we can modify.
14312 AddC
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(IsAdd
? 1 : 0));
14316 // AddC needs to have at least 32 trailing zeros.
14317 if (AddC
->getAPIntValue().countr_zero() < 32)
14320 // All users should be a shift by constant less than or equal to 32. This
14321 // ensures we'll do this optimization for each of them to produce an
14322 // add/sub+sext_inreg they can all share.
14323 for (SDNode
*U
: N0
->uses()) {
14324 if (U
->getOpcode() != ISD::SRA
||
14325 !isa
<ConstantSDNode
>(U
->getOperand(1)) ||
14326 U
->getConstantOperandVal(1) > 32)
14330 Shl
= N0
.getOperand(IsAdd
? 0 : 1);
14332 // Not an ADD or SUB.
14336 // Look for a shift left by 32.
14337 if (Shl
.getOpcode() != ISD::SHL
|| !isa
<ConstantSDNode
>(Shl
.getOperand(1)) ||
14338 Shl
.getConstantOperandVal(1) != 32)
14341 // We if we didn't look through an add/sub, then the shl should have one use.
14342 // If we did look through an add/sub, the sext_inreg we create is free so
14343 // we're only creating 2 new instructions. It's enough to only remove the
14344 // original sra+add/sub.
14345 if (!AddC
&& !Shl
.hasOneUse())
14349 SDValue In
= Shl
.getOperand(0);
14351 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
14354 SDValue ShiftedAddC
=
14355 DAG
.getConstant(AddC
->getAPIntValue().lshr(32), DL
, MVT::i64
);
14357 In
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, In
, ShiftedAddC
);
14359 In
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, ShiftedAddC
, In
);
14362 SDValue SExt
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, In
,
14363 DAG
.getValueType(MVT::i32
));
14367 return DAG
.getNode(
14368 ISD::SHL
, DL
, MVT::i64
, SExt
,
14369 DAG
.getConstant(32 - ShAmt
, DL
, MVT::i64
));
14372 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
14373 // the result is used as the conditon of a br_cc or select_cc we can invert,
14374 // inverting the setcc is free, and Z is 0/1. Caller will invert the
14375 // br_cc/select_cc.
14376 static SDValue
tryDemorganOfBooleanCondition(SDValue Cond
, SelectionDAG
&DAG
) {
14377 bool IsAnd
= Cond
.getOpcode() == ISD::AND
;
14378 if (!IsAnd
&& Cond
.getOpcode() != ISD::OR
)
14381 if (!Cond
.hasOneUse())
14384 SDValue Setcc
= Cond
.getOperand(0);
14385 SDValue Xor
= Cond
.getOperand(1);
14386 // Canonicalize setcc to LHS.
14387 if (Setcc
.getOpcode() != ISD::SETCC
)
14388 std::swap(Setcc
, Xor
);
14389 // LHS should be a setcc and RHS should be an xor.
14390 if (Setcc
.getOpcode() != ISD::SETCC
|| !Setcc
.hasOneUse() ||
14391 Xor
.getOpcode() != ISD::XOR
|| !Xor
.hasOneUse())
14394 // If the condition is an And, SimplifyDemandedBits may have changed
14395 // (xor Z, 1) to (not Z).
14396 SDValue Xor1
= Xor
.getOperand(1);
14397 if (!isOneConstant(Xor1
) && !(IsAnd
&& isAllOnesConstant(Xor1
)))
14400 EVT VT
= Cond
.getValueType();
14401 SDValue Xor0
= Xor
.getOperand(0);
14403 // The LHS of the xor needs to be 0/1.
14404 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), 1);
14405 if (!DAG
.MaskedValueIsZero(Xor0
, Mask
))
14408 // We can only invert integer setccs.
14409 EVT SetCCOpVT
= Setcc
.getOperand(0).getValueType();
14410 if (!SetCCOpVT
.isScalarInteger())
14413 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Setcc
.getOperand(2))->get();
14414 if (ISD::isIntEqualitySetCC(CCVal
)) {
14415 CCVal
= ISD::getSetCCInverse(CCVal
, SetCCOpVT
);
14416 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
, Setcc
.getOperand(0),
14417 Setcc
.getOperand(1), CCVal
);
14418 } else if (CCVal
== ISD::SETLT
&& isNullConstant(Setcc
.getOperand(0))) {
14419 // Invert (setlt 0, X) by converting to (setlt X, 1).
14420 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
, Setcc
.getOperand(1),
14421 DAG
.getConstant(1, SDLoc(Setcc
), VT
), CCVal
);
14422 } else if (CCVal
== ISD::SETLT
&& isOneConstant(Setcc
.getOperand(1))) {
14423 // (setlt X, 1) by converting to (setlt 0, X).
14424 Setcc
= DAG
.getSetCC(SDLoc(Setcc
), VT
,
14425 DAG
.getConstant(0, SDLoc(Setcc
), VT
),
14426 Setcc
.getOperand(0), CCVal
);
14430 unsigned Opc
= IsAnd
? ISD::OR
: ISD::AND
;
14431 return DAG
.getNode(Opc
, SDLoc(Cond
), VT
, Setcc
, Xor
.getOperand(0));
14434 // Perform common combines for BR_CC and SELECT_CC condtions.
14435 static bool combine_CC(SDValue
&LHS
, SDValue
&RHS
, SDValue
&CC
, const SDLoc
&DL
,
14436 SelectionDAG
&DAG
, const RISCVSubtarget
&Subtarget
) {
14437 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
14439 // As far as arithmetic right shift always saves the sign,
14440 // shift can be omitted.
14441 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
14442 // setge (sra X, N), 0 -> setge X, 0
14443 if (isNullConstant(RHS
) && (CCVal
== ISD::SETGE
|| CCVal
== ISD::SETLT
) &&
14444 LHS
.getOpcode() == ISD::SRA
) {
14445 LHS
= LHS
.getOperand(0);
14449 if (!ISD::isIntEqualitySetCC(CCVal
))
14452 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
14453 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
14454 if (LHS
.getOpcode() == ISD::SETCC
&& isNullConstant(RHS
) &&
14455 LHS
.getOperand(0).getValueType() == Subtarget
.getXLenVT()) {
14456 // If we're looking for eq 0 instead of ne 0, we need to invert the
14458 bool Invert
= CCVal
== ISD::SETEQ
;
14459 CCVal
= cast
<CondCodeSDNode
>(LHS
.getOperand(2))->get();
14461 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
14463 RHS
= LHS
.getOperand(1);
14464 LHS
= LHS
.getOperand(0);
14465 translateSetCCForBranch(DL
, LHS
, RHS
, CCVal
, DAG
);
14467 CC
= DAG
.getCondCode(CCVal
);
14471 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
14472 if (LHS
.getOpcode() == ISD::XOR
&& isNullConstant(RHS
)) {
14473 RHS
= LHS
.getOperand(1);
14474 LHS
= LHS
.getOperand(0);
14478 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
14479 if (isNullConstant(RHS
) && LHS
.getOpcode() == ISD::SRL
&& LHS
.hasOneUse() &&
14480 LHS
.getOperand(1).getOpcode() == ISD::Constant
) {
14481 SDValue LHS0
= LHS
.getOperand(0);
14482 if (LHS0
.getOpcode() == ISD::AND
&&
14483 LHS0
.getOperand(1).getOpcode() == ISD::Constant
) {
14484 uint64_t Mask
= LHS0
.getConstantOperandVal(1);
14485 uint64_t ShAmt
= LHS
.getConstantOperandVal(1);
14486 if (isPowerOf2_64(Mask
) && Log2_64(Mask
) == ShAmt
) {
14487 CCVal
= CCVal
== ISD::SETEQ
? ISD::SETGE
: ISD::SETLT
;
14488 CC
= DAG
.getCondCode(CCVal
);
14490 ShAmt
= LHS
.getValueSizeInBits() - 1 - ShAmt
;
14491 LHS
= LHS0
.getOperand(0);
14494 DAG
.getNode(ISD::SHL
, DL
, LHS
.getValueType(), LHS0
.getOperand(0),
14495 DAG
.getConstant(ShAmt
, DL
, LHS
.getValueType()));
14501 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
14502 // This can occur when legalizing some floating point comparisons.
14503 APInt Mask
= APInt::getBitsSetFrom(LHS
.getValueSizeInBits(), 1);
14504 if (isOneConstant(RHS
) && DAG
.MaskedValueIsZero(LHS
, Mask
)) {
14505 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
14506 CC
= DAG
.getCondCode(CCVal
);
14507 RHS
= DAG
.getConstant(0, DL
, LHS
.getValueType());
14511 if (isNullConstant(RHS
)) {
14512 if (SDValue NewCond
= tryDemorganOfBooleanCondition(LHS
, DAG
)) {
14513 CCVal
= ISD::getSetCCInverse(CCVal
, LHS
.getValueType());
14514 CC
= DAG
.getCondCode(CCVal
);
14524 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
14525 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
14526 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
14527 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
14528 static SDValue
tryFoldSelectIntoOp(SDNode
*N
, SelectionDAG
&DAG
,
14529 SDValue TrueVal
, SDValue FalseVal
,
14531 bool Commutative
= true;
14532 unsigned Opc
= TrueVal
.getOpcode();
14540 Commutative
= false;
14548 if (!TrueVal
.hasOneUse() || isa
<ConstantSDNode
>(FalseVal
))
14552 if (FalseVal
== TrueVal
.getOperand(0))
14554 else if (Commutative
&& FalseVal
== TrueVal
.getOperand(1))
14559 EVT VT
= N
->getValueType(0);
14561 SDValue OtherOp
= TrueVal
.getOperand(1 - OpToFold
);
14562 EVT OtherOpVT
= OtherOp
->getValueType(0);
14563 SDValue IdentityOperand
=
14564 DAG
.getNeutralElement(Opc
, DL
, OtherOpVT
, N
->getFlags());
14566 IdentityOperand
= DAG
.getConstant(0, DL
, OtherOpVT
);
14567 assert(IdentityOperand
&& "No identity operand!");
14570 std::swap(OtherOp
, IdentityOperand
);
14572 DAG
.getSelect(DL
, OtherOpVT
, N
->getOperand(0), OtherOp
, IdentityOperand
);
14573 return DAG
.getNode(TrueVal
.getOpcode(), DL
, VT
, FalseVal
, NewSel
);
14576 // This tries to get rid of `select` and `icmp` that are being used to handle
14577 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
14578 static SDValue
foldSelectOfCTTZOrCTLZ(SDNode
*N
, SelectionDAG
&DAG
) {
14579 SDValue Cond
= N
->getOperand(0);
14581 // This represents either CTTZ or CTLZ instruction.
14582 SDValue CountZeroes
;
14586 if (Cond
.getOpcode() != ISD::SETCC
)
14589 if (!isNullConstant(Cond
->getOperand(1)))
14592 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(Cond
->getOperand(2))->get();
14593 if (CCVal
== ISD::CondCode::SETEQ
) {
14594 CountZeroes
= N
->getOperand(2);
14595 ValOnZero
= N
->getOperand(1);
14596 } else if (CCVal
== ISD::CondCode::SETNE
) {
14597 CountZeroes
= N
->getOperand(1);
14598 ValOnZero
= N
->getOperand(2);
14603 if (CountZeroes
.getOpcode() == ISD::TRUNCATE
||
14604 CountZeroes
.getOpcode() == ISD::ZERO_EXTEND
)
14605 CountZeroes
= CountZeroes
.getOperand(0);
14607 if (CountZeroes
.getOpcode() != ISD::CTTZ
&&
14608 CountZeroes
.getOpcode() != ISD::CTTZ_ZERO_UNDEF
&&
14609 CountZeroes
.getOpcode() != ISD::CTLZ
&&
14610 CountZeroes
.getOpcode() != ISD::CTLZ_ZERO_UNDEF
)
14613 if (!isNullConstant(ValOnZero
))
14616 SDValue CountZeroesArgument
= CountZeroes
->getOperand(0);
14617 if (Cond
->getOperand(0) != CountZeroesArgument
)
14620 if (CountZeroes
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
) {
14621 CountZeroes
= DAG
.getNode(ISD::CTTZ
, SDLoc(CountZeroes
),
14622 CountZeroes
.getValueType(), CountZeroesArgument
);
14623 } else if (CountZeroes
.getOpcode() == ISD::CTLZ_ZERO_UNDEF
) {
14624 CountZeroes
= DAG
.getNode(ISD::CTLZ
, SDLoc(CountZeroes
),
14625 CountZeroes
.getValueType(), CountZeroesArgument
);
14628 unsigned BitWidth
= CountZeroes
.getValueSizeInBits();
14629 SDValue BitWidthMinusOne
=
14630 DAG
.getConstant(BitWidth
- 1, SDLoc(N
), CountZeroes
.getValueType());
14632 auto AndNode
= DAG
.getNode(ISD::AND
, SDLoc(N
), CountZeroes
.getValueType(),
14633 CountZeroes
, BitWidthMinusOne
);
14634 return DAG
.getZExtOrTrunc(AndNode
, SDLoc(N
), N
->getValueType(0));
14637 static SDValue
useInversedSetcc(SDNode
*N
, SelectionDAG
&DAG
,
14638 const RISCVSubtarget
&Subtarget
) {
14639 SDValue Cond
= N
->getOperand(0);
14640 SDValue True
= N
->getOperand(1);
14641 SDValue False
= N
->getOperand(2);
14643 EVT VT
= N
->getValueType(0);
14644 EVT CondVT
= Cond
.getValueType();
14646 if (Cond
.getOpcode() != ISD::SETCC
|| !Cond
.hasOneUse())
14649 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
14650 // BEXTI, where C is power of 2.
14651 if (Subtarget
.hasStdExtZbs() && VT
.isScalarInteger() &&
14652 (Subtarget
.hasStdExtZicond() || Subtarget
.hasVendorXVentanaCondOps())) {
14653 SDValue LHS
= Cond
.getOperand(0);
14654 SDValue RHS
= Cond
.getOperand(1);
14655 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
14656 if (CC
== ISD::SETEQ
&& LHS
.getOpcode() == ISD::AND
&&
14657 isa
<ConstantSDNode
>(LHS
.getOperand(1)) && isNullConstant(RHS
)) {
14658 const APInt
&MaskVal
= LHS
.getConstantOperandAPInt(1);
14659 if (MaskVal
.isPowerOf2() && !MaskVal
.isSignedIntN(12))
14660 return DAG
.getSelect(DL
, VT
,
14661 DAG
.getSetCC(DL
, CondVT
, LHS
, RHS
, ISD::SETNE
),
14668 static SDValue
performSELECTCombine(SDNode
*N
, SelectionDAG
&DAG
,
14669 const RISCVSubtarget
&Subtarget
) {
14670 if (SDValue Folded
= foldSelectOfCTTZOrCTLZ(N
, DAG
))
14673 if (SDValue V
= useInversedSetcc(N
, DAG
, Subtarget
))
14676 if (Subtarget
.hasConditionalMoveFusion())
14679 SDValue TrueVal
= N
->getOperand(1);
14680 SDValue FalseVal
= N
->getOperand(2);
14681 if (SDValue V
= tryFoldSelectIntoOp(N
, DAG
, TrueVal
, FalseVal
, /*Swapped*/false))
14683 return tryFoldSelectIntoOp(N
, DAG
, FalseVal
, TrueVal
, /*Swapped*/true);
14686 /// If we have a build_vector where each lane is binop X, C, where C
14687 /// is a constant (but not necessarily the same constant on all lanes),
14688 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
14689 /// We assume that materializing a constant build vector will be no more
14690 /// expensive that performing O(n) binops.
14691 static SDValue
performBUILD_VECTORCombine(SDNode
*N
, SelectionDAG
&DAG
,
14692 const RISCVSubtarget
&Subtarget
,
14693 const RISCVTargetLowering
&TLI
) {
14695 EVT VT
= N
->getValueType(0);
14697 assert(!VT
.isScalableVector() && "unexpected build vector");
14699 if (VT
.getVectorNumElements() == 1)
14702 const unsigned Opcode
= N
->op_begin()->getNode()->getOpcode();
14703 if (!TLI
.isBinOp(Opcode
))
14706 if (!TLI
.isOperationLegalOrCustom(Opcode
, VT
) || !TLI
.isTypeLegal(VT
))
14709 SmallVector
<SDValue
> LHSOps
;
14710 SmallVector
<SDValue
> RHSOps
;
14711 for (SDValue Op
: N
->ops()) {
14712 if (Op
.isUndef()) {
14713 // We can't form a divide or remainder from undef.
14714 if (!DAG
.isSafeToSpeculativelyExecute(Opcode
))
14717 LHSOps
.push_back(Op
);
14718 RHSOps
.push_back(Op
);
14722 // TODO: We can handle operations which have an neutral rhs value
14723 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
14724 // of profit in a more explicit manner.
14725 if (Op
.getOpcode() != Opcode
|| !Op
.hasOneUse())
14728 LHSOps
.push_back(Op
.getOperand(0));
14729 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)) &&
14730 !isa
<ConstantFPSDNode
>(Op
.getOperand(1)))
14732 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
14733 // have different LHS and RHS types.
14734 if (Op
.getOperand(0).getValueType() != Op
.getOperand(1).getValueType())
14736 RHSOps
.push_back(Op
.getOperand(1));
14739 return DAG
.getNode(Opcode
, DL
, VT
, DAG
.getBuildVector(VT
, DL
, LHSOps
),
14740 DAG
.getBuildVector(VT
, DL
, RHSOps
));
14743 static SDValue
performINSERT_VECTOR_ELTCombine(SDNode
*N
, SelectionDAG
&DAG
,
14744 const RISCVSubtarget
&Subtarget
,
14745 const RISCVTargetLowering
&TLI
) {
14746 SDValue InVec
= N
->getOperand(0);
14747 SDValue InVal
= N
->getOperand(1);
14748 SDValue EltNo
= N
->getOperand(2);
14751 EVT VT
= InVec
.getValueType();
14752 if (VT
.isScalableVector())
14755 if (!InVec
.hasOneUse())
14758 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
14759 // move the insert_vector_elts into the arms of the binop. Note that
14760 // the new RHS must be a constant.
14761 const unsigned InVecOpcode
= InVec
->getOpcode();
14762 if (InVecOpcode
== InVal
->getOpcode() && TLI
.isBinOp(InVecOpcode
) &&
14763 InVal
.hasOneUse()) {
14764 SDValue InVecLHS
= InVec
->getOperand(0);
14765 SDValue InVecRHS
= InVec
->getOperand(1);
14766 SDValue InValLHS
= InVal
->getOperand(0);
14767 SDValue InValRHS
= InVal
->getOperand(1);
14769 if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS
.getNode()))
14771 if (!isa
<ConstantSDNode
>(InValRHS
) && !isa
<ConstantFPSDNode
>(InValRHS
))
14773 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
14774 // have different LHS and RHS types.
14775 if (InVec
.getOperand(0).getValueType() != InVec
.getOperand(1).getValueType())
14777 SDValue LHS
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
,
14778 InVecLHS
, InValLHS
, EltNo
);
14779 SDValue RHS
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
,
14780 InVecRHS
, InValRHS
, EltNo
);
14781 return DAG
.getNode(InVecOpcode
, DL
, VT
, LHS
, RHS
);
14784 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
14785 // move the insert_vector_elt to the source operand of the concat_vector.
14786 if (InVec
.getOpcode() != ISD::CONCAT_VECTORS
)
14789 auto *IndexC
= dyn_cast
<ConstantSDNode
>(EltNo
);
14792 unsigned Elt
= IndexC
->getZExtValue();
14794 EVT ConcatVT
= InVec
.getOperand(0).getValueType();
14795 if (ConcatVT
.getVectorElementType() != InVal
.getValueType())
14797 unsigned ConcatNumElts
= ConcatVT
.getVectorNumElements();
14798 SDValue NewIdx
= DAG
.getConstant(Elt
% ConcatNumElts
, DL
,
14799 EltNo
.getValueType());
14801 unsigned ConcatOpIdx
= Elt
/ ConcatNumElts
;
14802 SDValue ConcatOp
= InVec
.getOperand(ConcatOpIdx
);
14803 ConcatOp
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ConcatVT
,
14804 ConcatOp
, InVal
, NewIdx
);
14806 SmallVector
<SDValue
> ConcatOps
;
14807 ConcatOps
.append(InVec
->op_begin(), InVec
->op_end());
14808 ConcatOps
[ConcatOpIdx
] = ConcatOp
;
14809 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VT
, ConcatOps
);
14812 // If we're concatenating a series of vector loads like
14813 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
14814 // Then we can turn this into a strided load by widening the vector elements
14815 // vlse32 p, stride=n
14816 static SDValue
performCONCAT_VECTORSCombine(SDNode
*N
, SelectionDAG
&DAG
,
14817 const RISCVSubtarget
&Subtarget
,
14818 const RISCVTargetLowering
&TLI
) {
14820 EVT VT
= N
->getValueType(0);
14822 // Only perform this combine on legal MVTs.
14823 if (!TLI
.isTypeLegal(VT
))
14826 // TODO: Potentially extend this to scalable vectors
14827 if (VT
.isScalableVector())
14830 auto *BaseLd
= dyn_cast
<LoadSDNode
>(N
->getOperand(0));
14831 if (!BaseLd
|| !BaseLd
->isSimple() || !ISD::isNormalLoad(BaseLd
) ||
14832 !SDValue(BaseLd
, 0).hasOneUse())
14835 EVT BaseLdVT
= BaseLd
->getValueType(0);
14837 // Go through the loads and check that they're strided
14838 SmallVector
<LoadSDNode
*> Lds
;
14839 Lds
.push_back(BaseLd
);
14840 Align Align
= BaseLd
->getAlign();
14841 for (SDValue Op
: N
->ops().drop_front()) {
14842 auto *Ld
= dyn_cast
<LoadSDNode
>(Op
);
14843 if (!Ld
|| !Ld
->isSimple() || !Op
.hasOneUse() ||
14844 Ld
->getChain() != BaseLd
->getChain() || !ISD::isNormalLoad(Ld
) ||
14845 Ld
->getValueType(0) != BaseLdVT
)
14850 // The common alignment is the most restrictive (smallest) of all the loads
14851 Align
= std::min(Align
, Ld
->getAlign());
14854 using PtrDiff
= std::pair
<std::variant
<int64_t, SDValue
>, bool>;
14855 auto GetPtrDiff
= [&DAG
](LoadSDNode
*Ld1
,
14856 LoadSDNode
*Ld2
) -> std::optional
<PtrDiff
> {
14857 // If the load ptrs can be decomposed into a common (Base + Index) with a
14858 // common constant stride, then return the constant stride.
14859 BaseIndexOffset BIO1
= BaseIndexOffset::match(Ld1
, DAG
);
14860 BaseIndexOffset BIO2
= BaseIndexOffset::match(Ld2
, DAG
);
14861 if (BIO1
.equalBaseIndex(BIO2
, DAG
))
14862 return {{BIO2
.getOffset() - BIO1
.getOffset(), false}};
14864 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
14865 SDValue P1
= Ld1
->getBasePtr();
14866 SDValue P2
= Ld2
->getBasePtr();
14867 if (P2
.getOpcode() == ISD::ADD
&& P2
.getOperand(0) == P1
)
14868 return {{P2
.getOperand(1), false}};
14869 if (P1
.getOpcode() == ISD::ADD
&& P1
.getOperand(0) == P2
)
14870 return {{P1
.getOperand(1), true}};
14872 return std::nullopt
;
14875 // Get the distance between the first and second loads
14876 auto BaseDiff
= GetPtrDiff(Lds
[0], Lds
[1]);
14880 // Check all the loads are the same distance apart
14881 for (auto *It
= Lds
.begin() + 1; It
!= Lds
.end() - 1; It
++)
14882 if (GetPtrDiff(*It
, *std::next(It
)) != BaseDiff
)
14885 // TODO: At this point, we've successfully matched a generalized gather
14886 // load. Maybe we should emit that, and then move the specialized
14887 // matchers above and below into a DAG combine?
14889 // Get the widened scalar type, e.g. v4i8 -> i64
14890 unsigned WideScalarBitWidth
=
14891 BaseLdVT
.getScalarSizeInBits() * BaseLdVT
.getVectorNumElements();
14892 MVT WideScalarVT
= MVT::getIntegerVT(WideScalarBitWidth
);
14894 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
14895 MVT WideVecVT
= MVT::getVectorVT(WideScalarVT
, N
->getNumOperands());
14896 if (!TLI
.isTypeLegal(WideVecVT
))
14899 // Check that the operation is legal
14900 if (!TLI
.isLegalStridedLoadStore(WideVecVT
, Align
))
14903 auto [StrideVariant
, MustNegateStride
] = *BaseDiff
;
14904 SDValue Stride
= std::holds_alternative
<SDValue
>(StrideVariant
)
14905 ? std::get
<SDValue
>(StrideVariant
)
14906 : DAG
.getConstant(std::get
<int64_t>(StrideVariant
), DL
,
14907 Lds
[0]->getOffset().getValueType());
14908 if (MustNegateStride
)
14909 Stride
= DAG
.getNegative(Stride
, DL
, Stride
.getValueType());
14911 SDVTList VTs
= DAG
.getVTList({WideVecVT
, MVT::Other
});
14913 DAG
.getTargetConstant(Intrinsic::riscv_masked_strided_load
, DL
,
14914 Subtarget
.getXLenVT());
14916 SDValue AllOneMask
=
14917 DAG
.getSplat(WideVecVT
.changeVectorElementType(MVT::i1
), DL
,
14918 DAG
.getConstant(1, DL
, MVT::i1
));
14920 SDValue Ops
[] = {BaseLd
->getChain(), IntID
, DAG
.getUNDEF(WideVecVT
),
14921 BaseLd
->getBasePtr(), Stride
, AllOneMask
};
14924 if (auto *ConstStride
= dyn_cast
<ConstantSDNode
>(Stride
);
14925 ConstStride
&& ConstStride
->getSExtValue() >= 0)
14926 // total size = (elsize * n) + (stride - elsize) * (n-1)
14927 // = elsize + stride * (n-1)
14928 MemSize
= WideScalarVT
.getSizeInBits() +
14929 ConstStride
->getSExtValue() * (N
->getNumOperands() - 1);
14931 // If Stride isn't constant, then we can't know how much it will load
14932 MemSize
= MemoryLocation::UnknownSize
;
14934 MachineMemOperand
*MMO
= DAG
.getMachineFunction().getMachineMemOperand(
14935 BaseLd
->getPointerInfo(), BaseLd
->getMemOperand()->getFlags(), MemSize
,
14938 SDValue StridedLoad
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
,
14939 Ops
, WideVecVT
, MMO
);
14940 for (SDValue Ld
: N
->ops())
14941 DAG
.makeEquivalentMemoryOrdering(cast
<LoadSDNode
>(Ld
), StridedLoad
);
14943 return DAG
.getBitcast(VT
.getSimpleVT(), StridedLoad
);
14946 static SDValue
combineToVWMACC(SDNode
*N
, SelectionDAG
&DAG
,
14947 const RISCVSubtarget
&Subtarget
) {
14949 assert(N
->getOpcode() == RISCVISD::ADD_VL
|| N
->getOpcode() == ISD::ADD
);
14951 if (N
->getValueType(0).isFixedLengthVector())
14954 SDValue Addend
= N
->getOperand(0);
14955 SDValue MulOp
= N
->getOperand(1);
14957 if (N
->getOpcode() == RISCVISD::ADD_VL
) {
14958 SDValue AddMergeOp
= N
->getOperand(2);
14959 if (!AddMergeOp
.isUndef())
14963 auto IsVWMulOpc
= [](unsigned Opc
) {
14965 case RISCVISD::VWMUL_VL
:
14966 case RISCVISD::VWMULU_VL
:
14967 case RISCVISD::VWMULSU_VL
:
14974 if (!IsVWMulOpc(MulOp
.getOpcode()))
14975 std::swap(Addend
, MulOp
);
14977 if (!IsVWMulOpc(MulOp
.getOpcode()))
14980 SDValue MulMergeOp
= MulOp
.getOperand(2);
14982 if (!MulMergeOp
.isUndef())
14985 auto [AddMask
, AddVL
] = [](SDNode
*N
, SelectionDAG
&DAG
,
14986 const RISCVSubtarget
&Subtarget
) {
14987 if (N
->getOpcode() == ISD::ADD
) {
14989 return getDefaultScalableVLOps(N
->getSimpleValueType(0), DL
, DAG
,
14992 return std::make_pair(N
->getOperand(3), N
->getOperand(4));
14993 }(N
, DAG
, Subtarget
);
14995 SDValue MulMask
= MulOp
.getOperand(3);
14996 SDValue MulVL
= MulOp
.getOperand(4);
14998 if (AddMask
!= MulMask
|| AddVL
!= MulVL
)
15001 unsigned Opc
= RISCVISD::VWMACC_VL
+ MulOp
.getOpcode() - RISCVISD::VWMUL_VL
;
15002 static_assert(RISCVISD::VWMACC_VL
+ 1 == RISCVISD::VWMACCU_VL
,
15003 "Unexpected opcode after VWMACC_VL");
15004 static_assert(RISCVISD::VWMACC_VL
+ 2 == RISCVISD::VWMACCSU_VL
,
15005 "Unexpected opcode after VWMACC_VL!");
15006 static_assert(RISCVISD::VWMUL_VL
+ 1 == RISCVISD::VWMULU_VL
,
15007 "Unexpected opcode after VWMUL_VL!");
15008 static_assert(RISCVISD::VWMUL_VL
+ 2 == RISCVISD::VWMULSU_VL
,
15009 "Unexpected opcode after VWMUL_VL!");
15012 EVT VT
= N
->getValueType(0);
15013 SDValue Ops
[] = {MulOp
.getOperand(0), MulOp
.getOperand(1), Addend
, AddMask
,
15015 return DAG
.getNode(Opc
, DL
, VT
, Ops
);
15018 static bool legalizeScatterGatherIndexType(SDLoc DL
, SDValue
&Index
,
15019 ISD::MemIndexType
&IndexType
,
15020 RISCVTargetLowering::DAGCombinerInfo
&DCI
) {
15021 if (!DCI
.isBeforeLegalize())
15024 SelectionDAG
&DAG
= DCI
.DAG
;
15026 DAG
.getMachineFunction().getSubtarget
<RISCVSubtarget
>().getXLenVT();
15028 const EVT IndexVT
= Index
.getValueType();
15030 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15031 // mode, so anything else must be manually legalized.
15032 if (!isIndexTypeSigned(IndexType
))
15035 if (IndexVT
.getVectorElementType().bitsLT(XLenVT
)) {
15036 // Any index legalization should first promote to XLenVT, so we don't lose
15037 // bits when scaling. This may create an illegal index type so we let
15038 // LLVM's legalization take care of the splitting.
15039 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15040 Index
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
,
15041 IndexVT
.changeVectorElementType(XLenVT
), Index
);
15043 IndexType
= ISD::UNSIGNED_SCALED
;
15047 /// Match the index vector of a scatter or gather node as the shuffle mask
15048 /// which performs the rearrangement if possible. Will only match if
15049 /// all lanes are touched, and thus replacing the scatter or gather with
15050 /// a unit strided access and shuffle is legal.
15051 static bool matchIndexAsShuffle(EVT VT
, SDValue Index
, SDValue Mask
,
15052 SmallVector
<int> &ShuffleMask
) {
15053 if (!ISD::isConstantSplatVectorAllOnes(Mask
.getNode()))
15055 if (!ISD::isBuildVectorOfConstantSDNodes(Index
.getNode()))
15058 const unsigned ElementSize
= VT
.getScalarStoreSize();
15059 const unsigned NumElems
= VT
.getVectorNumElements();
15061 // Create the shuffle mask and check all bits active
15062 assert(ShuffleMask
.empty());
15063 BitVector
ActiveLanes(NumElems
);
15064 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
++) {
15065 // TODO: We've found an active bit of UB, and could be
15066 // more aggressive here if desired.
15067 if (Index
->getOperand(i
)->isUndef())
15069 uint64_t C
= Index
->getConstantOperandVal(i
);
15070 if (C
% ElementSize
!= 0)
15072 C
= C
/ ElementSize
;
15075 ShuffleMask
.push_back(C
);
15076 ActiveLanes
.set(C
);
15078 return ActiveLanes
.all();
15081 /// Match the index of a gather or scatter operation as an operation
15082 /// with twice the element width and half the number of elements. This is
15083 /// generally profitable (if legal) because these operations are linear
15084 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15085 /// come out ahead.
15086 static bool matchIndexAsWiderOp(EVT VT
, SDValue Index
, SDValue Mask
,
15087 Align BaseAlign
, const RISCVSubtarget
&ST
) {
15088 if (!ISD::isConstantSplatVectorAllOnes(Mask
.getNode()))
15090 if (!ISD::isBuildVectorOfConstantSDNodes(Index
.getNode()))
15093 // Attempt a doubling. If we can use a element type 4x or 8x in
15094 // size, this will happen via multiply iterations of the transform.
15095 const unsigned NumElems
= VT
.getVectorNumElements();
15096 if (NumElems
% 2 != 0)
15099 const unsigned ElementSize
= VT
.getScalarStoreSize();
15100 const unsigned WiderElementSize
= ElementSize
* 2;
15101 if (WiderElementSize
> ST
.getELen()/8)
15104 if (!ST
.hasFastUnalignedAccess() && BaseAlign
< WiderElementSize
)
15107 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
++) {
15108 // TODO: We've found an active bit of UB, and could be
15109 // more aggressive here if desired.
15110 if (Index
->getOperand(i
)->isUndef())
15112 // TODO: This offset check is too strict if we support fully
15113 // misaligned memory operations.
15114 uint64_t C
= Index
->getConstantOperandVal(i
);
15116 if (C
% WiderElementSize
!= 0)
15120 uint64_t Last
= Index
->getConstantOperandVal(i
-1);
15121 if (C
!= Last
+ ElementSize
)
15128 SDValue
RISCVTargetLowering::PerformDAGCombine(SDNode
*N
,
15129 DAGCombinerInfo
&DCI
) const {
15130 SelectionDAG
&DAG
= DCI
.DAG
;
15131 const MVT XLenVT
= Subtarget
.getXLenVT();
15134 // Helper to call SimplifyDemandedBits on an operand of N where only some low
15135 // bits are demanded. N will be added to the Worklist if it was not deleted.
15136 // Caller should return SDValue(N, 0) if this returns true.
15137 auto SimplifyDemandedLowBitsHelper
= [&](unsigned OpNo
, unsigned LowBits
) {
15138 SDValue Op
= N
->getOperand(OpNo
);
15139 APInt Mask
= APInt::getLowBitsSet(Op
.getValueSizeInBits(), LowBits
);
15140 if (!SimplifyDemandedBits(Op
, Mask
, DCI
))
15143 if (N
->getOpcode() != ISD::DELETED_NODE
)
15144 DCI
.AddToWorklist(N
);
15148 switch (N
->getOpcode()) {
15151 case RISCVISD::SplitF64
: {
15152 SDValue Op0
= N
->getOperand(0);
15153 // If the input to SplitF64 is just BuildPairF64 then the operation is
15154 // redundant. Instead, use BuildPairF64's operands directly.
15155 if (Op0
->getOpcode() == RISCVISD::BuildPairF64
)
15156 return DCI
.CombineTo(N
, Op0
.getOperand(0), Op0
.getOperand(1));
15158 if (Op0
->isUndef()) {
15159 SDValue Lo
= DAG
.getUNDEF(MVT::i32
);
15160 SDValue Hi
= DAG
.getUNDEF(MVT::i32
);
15161 return DCI
.CombineTo(N
, Lo
, Hi
);
15164 // It's cheaper to materialise two 32-bit integers than to load a double
15165 // from the constant pool and transfer it to integer registers through the
15167 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(Op0
)) {
15168 APInt V
= C
->getValueAPF().bitcastToAPInt();
15169 SDValue Lo
= DAG
.getConstant(V
.trunc(32), DL
, MVT::i32
);
15170 SDValue Hi
= DAG
.getConstant(V
.lshr(32).trunc(32), DL
, MVT::i32
);
15171 return DCI
.CombineTo(N
, Lo
, Hi
);
15174 // This is a target-specific version of a DAGCombine performed in
15175 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15176 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15177 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15178 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
15179 !Op0
.getNode()->hasOneUse())
15181 SDValue NewSplitF64
=
15182 DAG
.getNode(RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
),
15183 Op0
.getOperand(0));
15184 SDValue Lo
= NewSplitF64
.getValue(0);
15185 SDValue Hi
= NewSplitF64
.getValue(1);
15186 APInt SignBit
= APInt::getSignMask(32);
15187 if (Op0
.getOpcode() == ISD::FNEG
) {
15188 SDValue NewHi
= DAG
.getNode(ISD::XOR
, DL
, MVT::i32
, Hi
,
15189 DAG
.getConstant(SignBit
, DL
, MVT::i32
));
15190 return DCI
.CombineTo(N
, Lo
, NewHi
);
15192 assert(Op0
.getOpcode() == ISD::FABS
);
15193 SDValue NewHi
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, Hi
,
15194 DAG
.getConstant(~SignBit
, DL
, MVT::i32
));
15195 return DCI
.CombineTo(N
, Lo
, NewHi
);
15197 case RISCVISD::SLLW
:
15198 case RISCVISD::SRAW
:
15199 case RISCVISD::SRLW
:
15200 case RISCVISD::RORW
:
15201 case RISCVISD::ROLW
: {
15202 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15203 if (SimplifyDemandedLowBitsHelper(0, 32) ||
15204 SimplifyDemandedLowBitsHelper(1, 5))
15205 return SDValue(N
, 0);
15209 case RISCVISD::CLZW
:
15210 case RISCVISD::CTZW
: {
15211 // Only the lower 32 bits of the first operand are read
15212 if (SimplifyDemandedLowBitsHelper(0, 32))
15213 return SDValue(N
, 0);
15216 case RISCVISD::FMV_W_X_RV64
: {
15217 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15218 // conversion is unnecessary and can be replaced with the
15219 // FMV_X_ANYEXTW_RV64 operand.
15220 SDValue Op0
= N
->getOperand(0);
15221 if (Op0
.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
)
15222 return Op0
.getOperand(0);
15225 case RISCVISD::FMV_X_ANYEXTH
:
15226 case RISCVISD::FMV_X_ANYEXTW_RV64
: {
15228 SDValue Op0
= N
->getOperand(0);
15229 MVT VT
= N
->getSimpleValueType(0);
15230 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
15231 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
15232 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
15233 if ((N
->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
&&
15234 Op0
->getOpcode() == RISCVISD::FMV_W_X_RV64
) ||
15235 (N
->getOpcode() == RISCVISD::FMV_X_ANYEXTH
&&
15236 Op0
->getOpcode() == RISCVISD::FMV_H_X
)) {
15237 assert(Op0
.getOperand(0).getValueType() == VT
&&
15238 "Unexpected value type!");
15239 return Op0
.getOperand(0);
15242 // This is a target-specific version of a DAGCombine performed in
15243 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15244 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15245 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15246 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
15247 !Op0
.getNode()->hasOneUse())
15249 SDValue NewFMV
= DAG
.getNode(N
->getOpcode(), DL
, VT
, Op0
.getOperand(0));
15250 unsigned FPBits
= N
->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64
? 32 : 16;
15251 APInt SignBit
= APInt::getSignMask(FPBits
).sext(VT
.getSizeInBits());
15252 if (Op0
.getOpcode() == ISD::FNEG
)
15253 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewFMV
,
15254 DAG
.getConstant(SignBit
, DL
, VT
));
15256 assert(Op0
.getOpcode() == ISD::FABS
);
15257 return DAG
.getNode(ISD::AND
, DL
, VT
, NewFMV
,
15258 DAG
.getConstant(~SignBit
, DL
, VT
));
15261 if (SDValue V
= combineBinOp_VLToVWBinOp_VL(N
, DCI
, Subtarget
))
15263 if (SDValue V
= combineToVWMACC(N
, DAG
, Subtarget
))
15265 return performADDCombine(N
, DAG
, Subtarget
);
15268 if (SDValue V
= combineBinOp_VLToVWBinOp_VL(N
, DCI
, Subtarget
))
15270 return performSUBCombine(N
, DAG
, Subtarget
);
15273 return performANDCombine(N
, DCI
, Subtarget
);
15275 return performORCombine(N
, DCI
, Subtarget
);
15277 return performXORCombine(N
, DAG
, Subtarget
);
15279 if (SDValue V
= combineBinOp_VLToVWBinOp_VL(N
, DCI
, Subtarget
))
15281 return performMULCombine(N
, DAG
);
15288 case ISD::FMINNUM
: {
15289 if (SDValue V
= combineBinOpToReduce(N
, DAG
, Subtarget
))
15291 if (SDValue V
= combineBinOpOfExtractToReduceTree(N
, DAG
, Subtarget
))
15296 return performSETCCCombine(N
, DAG
, Subtarget
);
15297 case ISD::SIGN_EXTEND_INREG
:
15298 return performSIGN_EXTEND_INREGCombine(N
, DAG
, Subtarget
);
15299 case ISD::ZERO_EXTEND
:
15300 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
15301 // type legalization. This is safe because fp_to_uint produces poison if
15303 if (N
->getValueType(0) == MVT::i64
&& Subtarget
.is64Bit()) {
15304 SDValue Src
= N
->getOperand(0);
15305 if (Src
.getOpcode() == ISD::FP_TO_UINT
&&
15306 isTypeLegal(Src
.getOperand(0).getValueType()))
15307 return DAG
.getNode(ISD::FP_TO_UINT
, SDLoc(N
), MVT::i64
,
15308 Src
.getOperand(0));
15309 if (Src
.getOpcode() == ISD::STRICT_FP_TO_UINT
&& Src
.hasOneUse() &&
15310 isTypeLegal(Src
.getOperand(1).getValueType())) {
15311 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Other
);
15312 SDValue Res
= DAG
.getNode(ISD::STRICT_FP_TO_UINT
, SDLoc(N
), VTs
,
15313 Src
.getOperand(0), Src
.getOperand(1));
15314 DCI
.CombineTo(N
, Res
);
15315 DAG
.ReplaceAllUsesOfValueWith(Src
.getValue(1), Res
.getValue(1));
15316 DCI
.recursivelyDeleteUnusedNodes(Src
.getNode());
15317 return SDValue(N
, 0); // Return N so it doesn't get rechecked.
15321 case RISCVISD::TRUNCATE_VECTOR_VL
: {
15322 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
15323 // This would be benefit for the cases where X and Y are both the same value
15324 // type of low precision vectors. Since the truncate would be lowered into
15325 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
15326 // restriction, such pattern would be expanded into a series of "vsetvli"
15327 // and "vnsrl" instructions later to reach this point.
15328 auto IsTruncNode
= [](SDValue V
) {
15329 if (V
.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL
)
15331 SDValue VL
= V
.getOperand(2);
15332 auto *C
= dyn_cast
<ConstantSDNode
>(VL
);
15333 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
15334 bool IsVLMAXForVMSET
= (C
&& C
->isAllOnes()) ||
15335 (isa
<RegisterSDNode
>(VL
) &&
15336 cast
<RegisterSDNode
>(VL
)->getReg() == RISCV::X0
);
15337 return V
.getOperand(1).getOpcode() == RISCVISD::VMSET_VL
&&
15341 SDValue Op
= N
->getOperand(0);
15343 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
15344 // to distinguish such pattern.
15345 while (IsTruncNode(Op
)) {
15346 if (!Op
.hasOneUse())
15348 Op
= Op
.getOperand(0);
15351 if (Op
.getOpcode() == ISD::SRA
&& Op
.hasOneUse()) {
15352 SDValue N0
= Op
.getOperand(0);
15353 SDValue N1
= Op
.getOperand(1);
15354 if (N0
.getOpcode() == ISD::SIGN_EXTEND
&& N0
.hasOneUse() &&
15355 N1
.getOpcode() == ISD::ZERO_EXTEND
&& N1
.hasOneUse()) {
15356 SDValue N00
= N0
.getOperand(0);
15357 SDValue N10
= N1
.getOperand(0);
15358 if (N00
.getValueType().isVector() &&
15359 N00
.getValueType() == N10
.getValueType() &&
15360 N
->getValueType(0) == N10
.getValueType()) {
15361 unsigned MaxShAmt
= N10
.getValueType().getScalarSizeInBits() - 1;
15362 SDValue SMin
= DAG
.getNode(
15363 ISD::SMIN
, SDLoc(N1
), N
->getValueType(0), N10
,
15364 DAG
.getConstant(MaxShAmt
, SDLoc(N1
), N
->getValueType(0)));
15365 return DAG
.getNode(ISD::SRA
, SDLoc(N
), N
->getValueType(0), N00
, SMin
);
15371 case ISD::TRUNCATE
:
15372 return performTRUNCATECombine(N
, DAG
, Subtarget
);
15374 return performSELECTCombine(N
, DAG
, Subtarget
);
15375 case RISCVISD::CZERO_EQZ
:
15376 case RISCVISD::CZERO_NEZ
:
15377 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
15378 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
15379 if (N
->getOperand(1).getOpcode() == ISD::XOR
&&
15380 isOneConstant(N
->getOperand(1).getOperand(1))) {
15381 SDValue Cond
= N
->getOperand(1).getOperand(0);
15382 APInt Mask
= APInt::getBitsSetFrom(Cond
.getValueSizeInBits(), 1);
15383 if (DAG
.MaskedValueIsZero(Cond
, Mask
)) {
15384 unsigned NewOpc
= N
->getOpcode() == RISCVISD::CZERO_EQZ
15385 ? RISCVISD::CZERO_NEZ
15386 : RISCVISD::CZERO_EQZ
;
15387 return DAG
.getNode(NewOpc
, SDLoc(N
), N
->getValueType(0),
15388 N
->getOperand(0), Cond
);
15393 case RISCVISD::SELECT_CC
: {
15395 SDValue LHS
= N
->getOperand(0);
15396 SDValue RHS
= N
->getOperand(1);
15397 SDValue CC
= N
->getOperand(2);
15398 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(CC
)->get();
15399 SDValue TrueV
= N
->getOperand(3);
15400 SDValue FalseV
= N
->getOperand(4);
15402 EVT VT
= N
->getValueType(0);
15404 // If the True and False values are the same, we don't need a select_cc.
15405 if (TrueV
== FalseV
)
15408 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
15409 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
15410 if (!Subtarget
.hasShortForwardBranchOpt() && isa
<ConstantSDNode
>(TrueV
) &&
15411 isa
<ConstantSDNode
>(FalseV
) && isNullConstant(RHS
) &&
15412 (CCVal
== ISD::CondCode::SETLT
|| CCVal
== ISD::CondCode::SETGE
)) {
15413 if (CCVal
== ISD::CondCode::SETGE
)
15414 std::swap(TrueV
, FalseV
);
15416 int64_t TrueSImm
= cast
<ConstantSDNode
>(TrueV
)->getSExtValue();
15417 int64_t FalseSImm
= cast
<ConstantSDNode
>(FalseV
)->getSExtValue();
15418 // Only handle simm12, if it is not in this range, it can be considered as
15420 if (isInt
<12>(TrueSImm
) && isInt
<12>(FalseSImm
) &&
15421 isInt
<12>(TrueSImm
- FalseSImm
)) {
15423 DAG
.getNode(ISD::SRA
, DL
, VT
, LHS
,
15424 DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
));
15426 DAG
.getNode(ISD::AND
, DL
, VT
, SRA
,
15427 DAG
.getConstant(TrueSImm
- FalseSImm
, DL
, VT
));
15428 return DAG
.getNode(ISD::ADD
, DL
, VT
, AND
, FalseV
);
15431 if (CCVal
== ISD::CondCode::SETGE
)
15432 std::swap(TrueV
, FalseV
);
15435 if (combine_CC(LHS
, RHS
, CC
, DL
, DAG
, Subtarget
))
15436 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, N
->getValueType(0),
15437 {LHS
, RHS
, CC
, TrueV
, FalseV
});
15439 if (!Subtarget
.hasConditionalMoveFusion()) {
15440 // (select c, -1, y) -> -c | y
15441 if (isAllOnesConstant(TrueV
)) {
15442 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, CCVal
);
15443 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
15444 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, FalseV
);
15446 // (select c, y, -1) -> -!c | y
15447 if (isAllOnesConstant(FalseV
)) {
15449 DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::getSetCCInverse(CCVal
, VT
));
15450 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
15451 return DAG
.getNode(ISD::OR
, DL
, VT
, Neg
, TrueV
);
15454 // (select c, 0, y) -> -!c & y
15455 if (isNullConstant(TrueV
)) {
15457 DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::getSetCCInverse(CCVal
, VT
));
15458 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
15459 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, FalseV
);
15461 // (select c, y, 0) -> -c & y
15462 if (isNullConstant(FalseV
)) {
15463 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, CCVal
);
15464 SDValue Neg
= DAG
.getNegative(C
, DL
, VT
);
15465 return DAG
.getNode(ISD::AND
, DL
, VT
, Neg
, TrueV
);
15467 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
15468 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
15469 if (((isOneConstant(FalseV
) && LHS
== TrueV
&&
15470 CCVal
== ISD::CondCode::SETNE
) ||
15471 (isOneConstant(TrueV
) && LHS
== FalseV
&&
15472 CCVal
== ISD::CondCode::SETEQ
)) &&
15473 isNullConstant(RHS
)) {
15474 // freeze it to be safe.
15475 LHS
= DAG
.getFreeze(LHS
);
15476 SDValue C
= DAG
.getSetCC(DL
, VT
, LHS
, RHS
, ISD::CondCode::SETEQ
);
15477 return DAG
.getNode(ISD::ADD
, DL
, VT
, LHS
, C
);
15481 // If both true/false are an xor with 1, pull through the select.
15482 // This can occur after op legalization if both operands are setccs that
15483 // require an xor to invert.
15484 // FIXME: Generalize to other binary ops with identical operand?
15485 if (TrueV
.getOpcode() == ISD::XOR
&& FalseV
.getOpcode() == ISD::XOR
&&
15486 TrueV
.getOperand(1) == FalseV
.getOperand(1) &&
15487 isOneConstant(TrueV
.getOperand(1)) &&
15488 TrueV
.hasOneUse() && FalseV
.hasOneUse()) {
15489 SDValue NewSel
= DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, CC
,
15490 TrueV
.getOperand(0), FalseV
.getOperand(0));
15491 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewSel
, TrueV
.getOperand(1));
15496 case RISCVISD::BR_CC
: {
15497 SDValue LHS
= N
->getOperand(1);
15498 SDValue RHS
= N
->getOperand(2);
15499 SDValue CC
= N
->getOperand(3);
15502 if (combine_CC(LHS
, RHS
, CC
, DL
, DAG
, Subtarget
))
15503 return DAG
.getNode(RISCVISD::BR_CC
, DL
, N
->getValueType(0),
15504 N
->getOperand(0), LHS
, RHS
, CC
, N
->getOperand(4));
15508 case ISD::BITREVERSE
:
15509 return performBITREVERSECombine(N
, DAG
, Subtarget
);
15510 case ISD::FP_TO_SINT
:
15511 case ISD::FP_TO_UINT
:
15512 return performFP_TO_INTCombine(N
, DCI
, Subtarget
);
15513 case ISD::FP_TO_SINT_SAT
:
15514 case ISD::FP_TO_UINT_SAT
:
15515 return performFP_TO_INT_SATCombine(N
, DCI
, Subtarget
);
15516 case ISD::FCOPYSIGN
: {
15517 EVT VT
= N
->getValueType(0);
15518 if (!VT
.isVector())
15520 // There is a form of VFSGNJ which injects the negated sign of its second
15521 // operand. Try and bubble any FNEG up after the extend/round to produce
15522 // this optimized pattern. Avoid modifying cases where FP_ROUND and
15524 SDValue In2
= N
->getOperand(1);
15525 // Avoid cases where the extend/round has multiple uses, as duplicating
15526 // those is typically more expensive than removing a fneg.
15527 if (!In2
.hasOneUse())
15529 if (In2
.getOpcode() != ISD::FP_EXTEND
&&
15530 (In2
.getOpcode() != ISD::FP_ROUND
|| In2
.getConstantOperandVal(1) != 0))
15532 In2
= In2
.getOperand(0);
15533 if (In2
.getOpcode() != ISD::FNEG
)
15536 SDValue NewFPExtRound
= DAG
.getFPExtendOrRound(In2
.getOperand(0), DL
, VT
);
15537 return DAG
.getNode(ISD::FCOPYSIGN
, DL
, VT
, N
->getOperand(0),
15538 DAG
.getNode(ISD::FNEG
, DL
, VT
, NewFPExtRound
));
15540 case ISD::MGATHER
: {
15541 const auto *MGN
= dyn_cast
<MaskedGatherSDNode
>(N
);
15542 const EVT VT
= N
->getValueType(0);
15543 SDValue Index
= MGN
->getIndex();
15544 SDValue ScaleOp
= MGN
->getScale();
15545 ISD::MemIndexType IndexType
= MGN
->getIndexType();
15546 assert(!MGN
->isIndexScaled() &&
15547 "Scaled gather/scatter should not be formed");
15550 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
15551 return DAG
.getMaskedGather(
15552 N
->getVTList(), MGN
->getMemoryVT(), DL
,
15553 {MGN
->getChain(), MGN
->getPassThru(), MGN
->getMask(),
15554 MGN
->getBasePtr(), Index
, ScaleOp
},
15555 MGN
->getMemOperand(), IndexType
, MGN
->getExtensionType());
15557 if (narrowIndex(Index
, IndexType
, DAG
))
15558 return DAG
.getMaskedGather(
15559 N
->getVTList(), MGN
->getMemoryVT(), DL
,
15560 {MGN
->getChain(), MGN
->getPassThru(), MGN
->getMask(),
15561 MGN
->getBasePtr(), Index
, ScaleOp
},
15562 MGN
->getMemOperand(), IndexType
, MGN
->getExtensionType());
15564 if (Index
.getOpcode() == ISD::BUILD_VECTOR
&&
15565 MGN
->getExtensionType() == ISD::NON_EXTLOAD
&& isTypeLegal(VT
)) {
15566 // The sequence will be XLenVT, not the type of Index. Tell
15567 // isSimpleVIDSequence this so we avoid overflow.
15568 if (std::optional
<VIDSequence
> SimpleVID
=
15569 isSimpleVIDSequence(Index
, Subtarget
.getXLen());
15570 SimpleVID
&& SimpleVID
->StepDenominator
== 1) {
15571 const int64_t StepNumerator
= SimpleVID
->StepNumerator
;
15572 const int64_t Addend
= SimpleVID
->Addend
;
15574 // Note: We don't need to check alignment here since (by assumption
15575 // from the existance of the gather), our offsets must be sufficiently
15578 const EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
15579 assert(MGN
->getBasePtr()->getValueType(0) == PtrVT
);
15580 assert(IndexType
== ISD::UNSIGNED_SCALED
);
15581 SDValue BasePtr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, MGN
->getBasePtr(),
15582 DAG
.getConstant(Addend
, DL
, PtrVT
));
15584 SDVTList VTs
= DAG
.getVTList({VT
, MVT::Other
});
15586 DAG
.getTargetConstant(Intrinsic::riscv_masked_strided_load
, DL
,
15589 {MGN
->getChain(), IntID
, MGN
->getPassThru(), BasePtr
,
15590 DAG
.getConstant(StepNumerator
, DL
, XLenVT
), MGN
->getMask()};
15591 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
,
15592 Ops
, VT
, MGN
->getMemOperand());
15596 SmallVector
<int> ShuffleMask
;
15597 if (MGN
->getExtensionType() == ISD::NON_EXTLOAD
&&
15598 matchIndexAsShuffle(VT
, Index
, MGN
->getMask(), ShuffleMask
)) {
15599 SDValue Load
= DAG
.getMaskedLoad(VT
, DL
, MGN
->getChain(),
15600 MGN
->getBasePtr(), DAG
.getUNDEF(XLenVT
),
15601 MGN
->getMask(), DAG
.getUNDEF(VT
),
15602 MGN
->getMemoryVT(), MGN
->getMemOperand(),
15603 ISD::UNINDEXED
, ISD::NON_EXTLOAD
);
15605 DAG
.getVectorShuffle(VT
, DL
, Load
, DAG
.getUNDEF(VT
), ShuffleMask
);
15606 return DAG
.getMergeValues({Shuffle
, Load
.getValue(1)}, DL
);
15609 if (MGN
->getExtensionType() == ISD::NON_EXTLOAD
&&
15610 matchIndexAsWiderOp(VT
, Index
, MGN
->getMask(),
15611 MGN
->getMemOperand()->getBaseAlign(), Subtarget
)) {
15612 SmallVector
<SDValue
> NewIndices
;
15613 for (unsigned i
= 0; i
< Index
->getNumOperands(); i
+= 2)
15614 NewIndices
.push_back(Index
.getOperand(i
));
15615 EVT IndexVT
= Index
.getValueType()
15616 .getHalfNumVectorElementsVT(*DAG
.getContext());
15617 Index
= DAG
.getBuildVector(IndexVT
, DL
, NewIndices
);
15619 unsigned ElementSize
= VT
.getScalarStoreSize();
15620 EVT WideScalarVT
= MVT::getIntegerVT(ElementSize
* 8 * 2);
15621 auto EltCnt
= VT
.getVectorElementCount();
15622 assert(EltCnt
.isKnownEven() && "Splitting vector, but not in half!");
15623 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), WideScalarVT
,
15624 EltCnt
.divideCoefficientBy(2));
15625 SDValue Passthru
= DAG
.getBitcast(WideVT
, MGN
->getPassThru());
15626 EVT MaskVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::i1
,
15627 EltCnt
.divideCoefficientBy(2));
15628 SDValue Mask
= DAG
.getSplat(MaskVT
, DL
, DAG
.getConstant(1, DL
, MVT::i1
));
15631 DAG
.getMaskedGather(DAG
.getVTList(WideVT
, MVT::Other
), WideVT
, DL
,
15632 {MGN
->getChain(), Passthru
, Mask
, MGN
->getBasePtr(),
15634 MGN
->getMemOperand(), IndexType
, ISD::NON_EXTLOAD
);
15635 SDValue Result
= DAG
.getBitcast(VT
, Gather
.getValue(0));
15636 return DAG
.getMergeValues({Result
, Gather
.getValue(1)}, DL
);
15640 case ISD::MSCATTER
:{
15641 const auto *MSN
= dyn_cast
<MaskedScatterSDNode
>(N
);
15642 SDValue Index
= MSN
->getIndex();
15643 SDValue ScaleOp
= MSN
->getScale();
15644 ISD::MemIndexType IndexType
= MSN
->getIndexType();
15645 assert(!MSN
->isIndexScaled() &&
15646 "Scaled gather/scatter should not be formed");
15649 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
15650 return DAG
.getMaskedScatter(
15651 N
->getVTList(), MSN
->getMemoryVT(), DL
,
15652 {MSN
->getChain(), MSN
->getValue(), MSN
->getMask(), MSN
->getBasePtr(),
15654 MSN
->getMemOperand(), IndexType
, MSN
->isTruncatingStore());
15656 if (narrowIndex(Index
, IndexType
, DAG
))
15657 return DAG
.getMaskedScatter(
15658 N
->getVTList(), MSN
->getMemoryVT(), DL
,
15659 {MSN
->getChain(), MSN
->getValue(), MSN
->getMask(), MSN
->getBasePtr(),
15661 MSN
->getMemOperand(), IndexType
, MSN
->isTruncatingStore());
15663 EVT VT
= MSN
->getValue()->getValueType(0);
15664 SmallVector
<int> ShuffleMask
;
15665 if (!MSN
->isTruncatingStore() &&
15666 matchIndexAsShuffle(VT
, Index
, MSN
->getMask(), ShuffleMask
)) {
15667 SDValue Shuffle
= DAG
.getVectorShuffle(VT
, DL
, MSN
->getValue(),
15668 DAG
.getUNDEF(VT
), ShuffleMask
);
15669 return DAG
.getMaskedStore(MSN
->getChain(), DL
, Shuffle
, MSN
->getBasePtr(),
15670 DAG
.getUNDEF(XLenVT
), MSN
->getMask(),
15671 MSN
->getMemoryVT(), MSN
->getMemOperand(),
15672 ISD::UNINDEXED
, false);
15676 case ISD::VP_GATHER
: {
15677 const auto *VPGN
= dyn_cast
<VPGatherSDNode
>(N
);
15678 SDValue Index
= VPGN
->getIndex();
15679 SDValue ScaleOp
= VPGN
->getScale();
15680 ISD::MemIndexType IndexType
= VPGN
->getIndexType();
15681 assert(!VPGN
->isIndexScaled() &&
15682 "Scaled gather/scatter should not be formed");
15685 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
15686 return DAG
.getGatherVP(N
->getVTList(), VPGN
->getMemoryVT(), DL
,
15687 {VPGN
->getChain(), VPGN
->getBasePtr(), Index
,
15688 ScaleOp
, VPGN
->getMask(),
15689 VPGN
->getVectorLength()},
15690 VPGN
->getMemOperand(), IndexType
);
15692 if (narrowIndex(Index
, IndexType
, DAG
))
15693 return DAG
.getGatherVP(N
->getVTList(), VPGN
->getMemoryVT(), DL
,
15694 {VPGN
->getChain(), VPGN
->getBasePtr(), Index
,
15695 ScaleOp
, VPGN
->getMask(),
15696 VPGN
->getVectorLength()},
15697 VPGN
->getMemOperand(), IndexType
);
15701 case ISD::VP_SCATTER
: {
15702 const auto *VPSN
= dyn_cast
<VPScatterSDNode
>(N
);
15703 SDValue Index
= VPSN
->getIndex();
15704 SDValue ScaleOp
= VPSN
->getScale();
15705 ISD::MemIndexType IndexType
= VPSN
->getIndexType();
15706 assert(!VPSN
->isIndexScaled() &&
15707 "Scaled gather/scatter should not be formed");
15710 if (legalizeScatterGatherIndexType(DL
, Index
, IndexType
, DCI
))
15711 return DAG
.getScatterVP(N
->getVTList(), VPSN
->getMemoryVT(), DL
,
15712 {VPSN
->getChain(), VPSN
->getValue(),
15713 VPSN
->getBasePtr(), Index
, ScaleOp
,
15714 VPSN
->getMask(), VPSN
->getVectorLength()},
15715 VPSN
->getMemOperand(), IndexType
);
15717 if (narrowIndex(Index
, IndexType
, DAG
))
15718 return DAG
.getScatterVP(N
->getVTList(), VPSN
->getMemoryVT(), DL
,
15719 {VPSN
->getChain(), VPSN
->getValue(),
15720 VPSN
->getBasePtr(), Index
, ScaleOp
,
15721 VPSN
->getMask(), VPSN
->getVectorLength()},
15722 VPSN
->getMemOperand(), IndexType
);
15725 case RISCVISD::SRA_VL
:
15726 case RISCVISD::SRL_VL
:
15727 case RISCVISD::SHL_VL
: {
15728 SDValue ShAmt
= N
->getOperand(1);
15729 if (ShAmt
.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
) {
15730 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
15732 SDValue VL
= N
->getOperand(4);
15733 EVT VT
= N
->getValueType(0);
15734 ShAmt
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
15735 ShAmt
.getOperand(1), VL
);
15736 return DAG
.getNode(N
->getOpcode(), DL
, VT
, N
->getOperand(0), ShAmt
,
15737 N
->getOperand(2), N
->getOperand(3), N
->getOperand(4));
15742 if (SDValue V
= performSRACombine(N
, DAG
, Subtarget
))
15747 SDValue ShAmt
= N
->getOperand(1);
15748 if (ShAmt
.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
) {
15749 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
15751 EVT VT
= N
->getValueType(0);
15752 ShAmt
= DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, DAG
.getUNDEF(VT
),
15753 ShAmt
.getOperand(1),
15754 DAG
.getRegister(RISCV::X0
, Subtarget
.getXLenVT()));
15755 return DAG
.getNode(N
->getOpcode(), DL
, VT
, N
->getOperand(0), ShAmt
);
15759 case RISCVISD::ADD_VL
:
15760 if (SDValue V
= combineBinOp_VLToVWBinOp_VL(N
, DCI
, Subtarget
))
15762 return combineToVWMACC(N
, DAG
, Subtarget
);
15763 case RISCVISD::SUB_VL
:
15764 case RISCVISD::VWADD_W_VL
:
15765 case RISCVISD::VWADDU_W_VL
:
15766 case RISCVISD::VWSUB_W_VL
:
15767 case RISCVISD::VWSUBU_W_VL
:
15768 case RISCVISD::MUL_VL
:
15769 return combineBinOp_VLToVWBinOp_VL(N
, DCI
, Subtarget
);
15770 case RISCVISD::VFMADD_VL
:
15771 case RISCVISD::VFNMADD_VL
:
15772 case RISCVISD::VFMSUB_VL
:
15773 case RISCVISD::VFNMSUB_VL
:
15774 case RISCVISD::STRICT_VFMADD_VL
:
15775 case RISCVISD::STRICT_VFNMADD_VL
:
15776 case RISCVISD::STRICT_VFMSUB_VL
:
15777 case RISCVISD::STRICT_VFNMSUB_VL
:
15778 return performVFMADD_VLCombine(N
, DAG
, Subtarget
);
15779 case RISCVISD::FMUL_VL
:
15780 return performVFMUL_VLCombine(N
, DAG
, Subtarget
);
15781 case RISCVISD::FADD_VL
:
15782 case RISCVISD::FSUB_VL
:
15783 return performFADDSUB_VLCombine(N
, DAG
, Subtarget
);
15786 if (DCI
.isAfterLegalizeDAG())
15787 if (SDValue V
= performMemPairCombine(N
, DCI
))
15790 if (N
->getOpcode() != ISD::STORE
)
15793 auto *Store
= cast
<StoreSDNode
>(N
);
15794 SDValue Chain
= Store
->getChain();
15795 EVT MemVT
= Store
->getMemoryVT();
15796 SDValue Val
= Store
->getValue();
15799 bool IsScalarizable
=
15800 MemVT
.isFixedLengthVector() && ISD::isNormalStore(Store
) &&
15801 Store
->isSimple() &&
15802 MemVT
.getVectorElementType().bitsLE(Subtarget
.getXLenVT()) &&
15803 isPowerOf2_64(MemVT
.getSizeInBits()) &&
15804 MemVT
.getSizeInBits() <= Subtarget
.getXLen();
15806 // If sufficiently aligned we can scalarize stores of constant vectors of
15807 // any power-of-two size up to XLen bits, provided that they aren't too
15808 // expensive to materialize.
15809 // vsetivli zero, 2, e8, m1, ta, ma
15811 // vse64.v v8, (a0)
15815 if (DCI
.isBeforeLegalize() && IsScalarizable
&&
15816 ISD::isBuildVectorOfConstantSDNodes(Val
.getNode())) {
15817 // Get the constant vector bits
15818 APInt
NewC(Val
.getValueSizeInBits(), 0);
15819 uint64_t EltSize
= Val
.getScalarValueSizeInBits();
15820 for (unsigned i
= 0; i
< Val
.getNumOperands(); i
++) {
15821 if (Val
.getOperand(i
).isUndef())
15823 NewC
.insertBits(Val
.getConstantOperandAPInt(i
).trunc(EltSize
),
15826 MVT NewVT
= MVT::getIntegerVT(MemVT
.getSizeInBits());
15828 if (RISCVMatInt::getIntMatCost(NewC
, Subtarget
.getXLen(), Subtarget
,
15830 allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
15831 NewVT
, *Store
->getMemOperand())) {
15832 SDValue NewV
= DAG
.getConstant(NewC
, DL
, NewVT
);
15833 return DAG
.getStore(Chain
, DL
, NewV
, Store
->getBasePtr(),
15834 Store
->getPointerInfo(), Store
->getOriginalAlign(),
15835 Store
->getMemOperand()->getFlags());
15839 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
15840 // vsetivli zero, 2, e16, m1, ta, ma
15841 // vle16.v v8, (a0)
15842 // vse16.v v8, (a1)
15843 if (auto *L
= dyn_cast
<LoadSDNode
>(Val
);
15844 L
&& DCI
.isBeforeLegalize() && IsScalarizable
&& L
->isSimple() &&
15845 L
->hasNUsesOfValue(1, 0) && L
->hasNUsesOfValue(1, 1) &&
15846 Store
->getChain() == SDValue(L
, 1) && ISD::isNormalLoad(L
) &&
15847 L
->getMemoryVT() == MemVT
) {
15848 MVT NewVT
= MVT::getIntegerVT(MemVT
.getSizeInBits());
15849 if (allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
15850 NewVT
, *Store
->getMemOperand()) &&
15851 allowsMemoryAccessForAlignment(*DAG
.getContext(), DAG
.getDataLayout(),
15852 NewVT
, *L
->getMemOperand())) {
15853 SDValue NewL
= DAG
.getLoad(NewVT
, DL
, L
->getChain(), L
->getBasePtr(),
15854 L
->getPointerInfo(), L
->getOriginalAlign(),
15855 L
->getMemOperand()->getFlags());
15856 return DAG
.getStore(Chain
, DL
, NewL
, Store
->getBasePtr(),
15857 Store
->getPointerInfo(), Store
->getOriginalAlign(),
15858 Store
->getMemOperand()->getFlags());
15862 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
15863 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
15864 // any illegal types.
15865 if (Val
.getOpcode() == RISCVISD::VMV_X_S
||
15866 (DCI
.isAfterLegalizeDAG() &&
15867 Val
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
15868 isNullConstant(Val
.getOperand(1)))) {
15869 SDValue Src
= Val
.getOperand(0);
15870 MVT VecVT
= Src
.getSimpleValueType();
15871 // VecVT should be scalable and memory VT should match the element type.
15872 if (!Store
->isIndexed() && VecVT
.isScalableVector() &&
15873 MemVT
== VecVT
.getVectorElementType()) {
15875 MVT MaskVT
= getMaskTypeFor(VecVT
);
15876 return DAG
.getStoreVP(
15877 Store
->getChain(), DL
, Src
, Store
->getBasePtr(), Store
->getOffset(),
15878 DAG
.getConstant(1, DL
, MaskVT
),
15879 DAG
.getConstant(1, DL
, Subtarget
.getXLenVT()), MemVT
,
15880 Store
->getMemOperand(), Store
->getAddressingMode(),
15881 Store
->isTruncatingStore(), /*IsCompress*/ false);
15887 case ISD::SPLAT_VECTOR
: {
15888 EVT VT
= N
->getValueType(0);
15889 // Only perform this combine on legal MVT types.
15890 if (!isTypeLegal(VT
))
15892 if (auto Gather
= matchSplatAsGather(N
->getOperand(0), VT
.getSimpleVT(), N
,
15897 case ISD::BUILD_VECTOR
:
15898 if (SDValue V
= performBUILD_VECTORCombine(N
, DAG
, Subtarget
, *this))
15901 case ISD::CONCAT_VECTORS
:
15902 if (SDValue V
= performCONCAT_VECTORSCombine(N
, DAG
, Subtarget
, *this))
15905 case ISD::INSERT_VECTOR_ELT
:
15906 if (SDValue V
= performINSERT_VECTOR_ELTCombine(N
, DAG
, Subtarget
, *this))
15909 case RISCVISD::VFMV_V_F_VL
: {
15910 const MVT VT
= N
->getSimpleValueType(0);
15911 SDValue Passthru
= N
->getOperand(0);
15912 SDValue Scalar
= N
->getOperand(1);
15913 SDValue VL
= N
->getOperand(2);
15915 // If VL is 1, we can use vfmv.s.f.
15916 if (isOneConstant(VL
))
15917 return DAG
.getNode(RISCVISD::VFMV_S_F_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15920 case RISCVISD::VMV_V_X_VL
: {
15921 const MVT VT
= N
->getSimpleValueType(0);
15922 SDValue Passthru
= N
->getOperand(0);
15923 SDValue Scalar
= N
->getOperand(1);
15924 SDValue VL
= N
->getOperand(2);
15926 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
15928 unsigned ScalarSize
= Scalar
.getValueSizeInBits();
15929 unsigned EltWidth
= VT
.getScalarSizeInBits();
15930 if (ScalarSize
> EltWidth
&& Passthru
.isUndef())
15931 if (SimplifyDemandedLowBitsHelper(1, EltWidth
))
15932 return SDValue(N
, 0);
15934 // If VL is 1 and the scalar value won't benefit from immediate, we can
15936 ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(Scalar
);
15937 if (isOneConstant(VL
) &&
15938 (!Const
|| Const
->isZero() ||
15939 !Const
->getAPIntValue().sextOrTrunc(EltWidth
).isSignedIntN(5)))
15940 return DAG
.getNode(RISCVISD::VMV_S_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15944 case RISCVISD::VFMV_S_F_VL
: {
15945 SDValue Src
= N
->getOperand(1);
15946 // Try to remove vector->scalar->vector if the scalar->vector is inserting
15947 // into an undef vector.
15948 // TODO: Could use a vslide or vmv.v.v for non-undef.
15949 if (N
->getOperand(0).isUndef() &&
15950 Src
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
15951 isNullConstant(Src
.getOperand(1)) &&
15952 Src
.getOperand(0).getValueType().isScalableVector()) {
15953 EVT VT
= N
->getValueType(0);
15954 EVT SrcVT
= Src
.getOperand(0).getValueType();
15955 assert(SrcVT
.getVectorElementType() == VT
.getVectorElementType());
15956 // Widths match, just return the original vector.
15958 return Src
.getOperand(0);
15959 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
15963 case RISCVISD::VMV_S_X_VL
: {
15964 const MVT VT
= N
->getSimpleValueType(0);
15965 SDValue Passthru
= N
->getOperand(0);
15966 SDValue Scalar
= N
->getOperand(1);
15967 SDValue VL
= N
->getOperand(2);
15969 // Use M1 or smaller to avoid over constraining register allocation
15970 const MVT M1VT
= getLMUL1VT(VT
);
15971 if (M1VT
.bitsLT(VT
)) {
15972 SDValue M1Passthru
=
15973 DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, M1VT
, Passthru
,
15974 DAG
.getVectorIdxConstant(0, DL
));
15976 DAG
.getNode(N
->getOpcode(), DL
, M1VT
, M1Passthru
, Scalar
, VL
);
15977 Result
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, Passthru
, Result
,
15978 DAG
.getConstant(0, DL
, XLenVT
));
15982 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
15983 // higher would involve overly constraining the register allocator for
15985 if (ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(Scalar
);
15986 Const
&& !Const
->isZero() && isInt
<5>(Const
->getSExtValue()) &&
15987 VT
.bitsLE(getLMUL1VT(VT
)) && Passthru
.isUndef())
15988 return DAG
.getNode(RISCVISD::VMV_V_X_VL
, DL
, VT
, Passthru
, Scalar
, VL
);
15992 case RISCVISD::VMV_X_S
: {
15993 SDValue Vec
= N
->getOperand(0);
15994 MVT VecVT
= N
->getOperand(0).getSimpleValueType();
15995 const MVT M1VT
= getLMUL1VT(VecVT
);
15996 if (M1VT
.bitsLT(VecVT
)) {
15997 Vec
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, M1VT
, Vec
,
15998 DAG
.getVectorIdxConstant(0, DL
));
15999 return DAG
.getNode(RISCVISD::VMV_X_S
, DL
, N
->getSimpleValueType(0), Vec
);
16003 case ISD::INTRINSIC_VOID
:
16004 case ISD::INTRINSIC_W_CHAIN
:
16005 case ISD::INTRINSIC_WO_CHAIN
: {
16006 unsigned IntOpNo
= N
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
? 0 : 1;
16007 unsigned IntNo
= N
->getConstantOperandVal(IntOpNo
);
16009 // By default we do not combine any intrinsic.
16012 case Intrinsic::riscv_masked_strided_load
: {
16013 MVT VT
= N
->getSimpleValueType(0);
16014 auto *Load
= cast
<MemIntrinsicSDNode
>(N
);
16015 SDValue PassThru
= N
->getOperand(2);
16016 SDValue Base
= N
->getOperand(3);
16017 SDValue Stride
= N
->getOperand(4);
16018 SDValue Mask
= N
->getOperand(5);
16020 // If the stride is equal to the element size in bytes, we can use
16022 const unsigned ElementSize
= VT
.getScalarStoreSize();
16023 if (auto *StrideC
= dyn_cast
<ConstantSDNode
>(Stride
);
16024 StrideC
&& StrideC
->getZExtValue() == ElementSize
)
16025 return DAG
.getMaskedLoad(VT
, DL
, Load
->getChain(), Base
,
16026 DAG
.getUNDEF(XLenVT
), Mask
, PassThru
,
16027 Load
->getMemoryVT(), Load
->getMemOperand(),
16028 ISD::UNINDEXED
, ISD::NON_EXTLOAD
);
16031 case Intrinsic::riscv_masked_strided_store
: {
16032 auto *Store
= cast
<MemIntrinsicSDNode
>(N
);
16033 SDValue Value
= N
->getOperand(2);
16034 SDValue Base
= N
->getOperand(3);
16035 SDValue Stride
= N
->getOperand(4);
16036 SDValue Mask
= N
->getOperand(5);
16038 // If the stride is equal to the element size in bytes, we can use
16040 const unsigned ElementSize
= Value
.getValueType().getScalarStoreSize();
16041 if (auto *StrideC
= dyn_cast
<ConstantSDNode
>(Stride
);
16042 StrideC
&& StrideC
->getZExtValue() == ElementSize
)
16043 return DAG
.getMaskedStore(Store
->getChain(), DL
, Value
, Base
,
16044 DAG
.getUNDEF(XLenVT
), Mask
,
16045 Store
->getMemoryVT(), Store
->getMemOperand(),
16046 ISD::UNINDEXED
, false);
16049 case Intrinsic::riscv_vcpop
:
16050 case Intrinsic::riscv_vcpop_mask
:
16051 case Intrinsic::riscv_vfirst
:
16052 case Intrinsic::riscv_vfirst_mask
: {
16053 SDValue VL
= N
->getOperand(2);
16054 if (IntNo
== Intrinsic::riscv_vcpop_mask
||
16055 IntNo
== Intrinsic::riscv_vfirst_mask
)
16056 VL
= N
->getOperand(3);
16057 if (!isNullConstant(VL
))
16059 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16061 EVT VT
= N
->getValueType(0);
16062 if (IntNo
== Intrinsic::riscv_vfirst
||
16063 IntNo
== Intrinsic::riscv_vfirst_mask
)
16064 return DAG
.getConstant(-1, DL
, VT
);
16065 return DAG
.getConstant(0, DL
, VT
);
16069 case ISD::BITCAST
: {
16070 assert(Subtarget
.useRVVForFixedLengthVectors());
16071 SDValue N0
= N
->getOperand(0);
16072 EVT VT
= N
->getValueType(0);
16073 EVT SrcVT
= N0
.getValueType();
16074 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16075 // type, widen both sides to avoid a trip through memory.
16076 if ((SrcVT
== MVT::v1i1
|| SrcVT
== MVT::v2i1
|| SrcVT
== MVT::v4i1
) &&
16077 VT
.isScalarInteger()) {
16078 unsigned NumConcats
= 8 / SrcVT
.getVectorNumElements();
16079 SmallVector
<SDValue
, 4> Ops(NumConcats
, DAG
.getUNDEF(SrcVT
));
16082 N0
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, MVT::v8i1
, Ops
);
16083 N0
= DAG
.getBitcast(MVT::i8
, N0
);
16084 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, N0
);
16094 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
16095 EVT XVT
, unsigned KeptBits
) const {
16096 // For vectors, we don't have a preference..
16097 if (XVT
.isVector())
16100 if (XVT
!= MVT::i32
&& XVT
!= MVT::i64
)
16103 // We can use sext.w for RV64 or an srai 31 on RV32.
16104 if (KeptBits
== 32 || KeptBits
== 64)
16107 // With Zbb we can use sext.h/sext.b.
16108 return Subtarget
.hasStdExtZbb() &&
16109 ((KeptBits
== 8 && XVT
== MVT::i64
&& !Subtarget
.is64Bit()) ||
16113 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
16114 const SDNode
*N
, CombineLevel Level
) const {
16115 assert((N
->getOpcode() == ISD::SHL
|| N
->getOpcode() == ISD::SRA
||
16116 N
->getOpcode() == ISD::SRL
) &&
16117 "Expected shift op");
16119 // The following folds are only desirable if `(OP _, c1 << c2)` can be
16120 // materialised in fewer instructions than `(OP _, c1)`:
16122 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16123 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16124 SDValue N0
= N
->getOperand(0);
16125 EVT Ty
= N0
.getValueType();
16126 if (Ty
.isScalarInteger() &&
16127 (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::OR
)) {
16128 auto *C1
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
16129 auto *C2
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
16131 const APInt
&C1Int
= C1
->getAPIntValue();
16132 APInt ShiftedC1Int
= C1Int
<< C2
->getAPIntValue();
16134 // We can materialise `c1 << c2` into an add immediate, so it's "free",
16135 // and the combine should happen, to potentially allow further combines
16137 if (ShiftedC1Int
.getSignificantBits() <= 64 &&
16138 isLegalAddImmediate(ShiftedC1Int
.getSExtValue()))
16141 // We can materialise `c1` in an add immediate, so it's "free", and the
16142 // combine should be prevented.
16143 if (C1Int
.getSignificantBits() <= 64 &&
16144 isLegalAddImmediate(C1Int
.getSExtValue()))
16147 // Neither constant will fit into an immediate, so find materialisation
16150 RISCVMatInt::getIntMatCost(C1Int
, Ty
.getSizeInBits(), Subtarget
,
16151 /*CompressionCost*/ true);
16152 int ShiftedC1Cost
= RISCVMatInt::getIntMatCost(
16153 ShiftedC1Int
, Ty
.getSizeInBits(), Subtarget
,
16154 /*CompressionCost*/ true);
16156 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16157 // combine should be prevented.
16158 if (C1Cost
< ShiftedC1Cost
)
16165 bool RISCVTargetLowering::targetShrinkDemandedConstant(
16166 SDValue Op
, const APInt
&DemandedBits
, const APInt
&DemandedElts
,
16167 TargetLoweringOpt
&TLO
) const {
16168 // Delay this optimization as late as possible.
16172 EVT VT
= Op
.getValueType();
16176 unsigned Opcode
= Op
.getOpcode();
16177 if (Opcode
!= ISD::AND
&& Opcode
!= ISD::OR
&& Opcode
!= ISD::XOR
)
16180 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
16184 const APInt
&Mask
= C
->getAPIntValue();
16186 // Clear all non-demanded bits initially.
16187 APInt ShrunkMask
= Mask
& DemandedBits
;
16189 // Try to make a smaller immediate by setting undemanded bits.
16191 APInt ExpandedMask
= Mask
| ~DemandedBits
;
16193 auto IsLegalMask
= [ShrunkMask
, ExpandedMask
](const APInt
&Mask
) -> bool {
16194 return ShrunkMask
.isSubsetOf(Mask
) && Mask
.isSubsetOf(ExpandedMask
);
16196 auto UseMask
= [Mask
, Op
, &TLO
](const APInt
&NewMask
) -> bool {
16197 if (NewMask
== Mask
)
16200 SDValue NewC
= TLO
.DAG
.getConstant(NewMask
, DL
, Op
.getValueType());
16201 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), DL
, Op
.getValueType(),
16202 Op
.getOperand(0), NewC
);
16203 return TLO
.CombineTo(Op
, NewOp
);
16206 // If the shrunk mask fits in sign extended 12 bits, let the target
16207 // independent code apply it.
16208 if (ShrunkMask
.isSignedIntN(12))
16211 // And has a few special cases for zext.
16212 if (Opcode
== ISD::AND
) {
16213 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
16214 // otherwise use SLLI + SRLI.
16215 APInt NewMask
= APInt(Mask
.getBitWidth(), 0xffff);
16216 if (IsLegalMask(NewMask
))
16217 return UseMask(NewMask
);
16219 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
16220 if (VT
== MVT::i64
) {
16221 APInt NewMask
= APInt(64, 0xffffffff);
16222 if (IsLegalMask(NewMask
))
16223 return UseMask(NewMask
);
16227 // For the remaining optimizations, we need to be able to make a negative
16228 // number through a combination of mask and undemanded bits.
16229 if (!ExpandedMask
.isNegative())
16232 // What is the fewest number of bits we need to represent the negative number.
16233 unsigned MinSignedBits
= ExpandedMask
.getSignificantBits();
16235 // Try to make a 12 bit negative immediate. If that fails try to make a 32
16236 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
16237 // If we can't create a simm12, we shouldn't change opaque constants.
16238 APInt NewMask
= ShrunkMask
;
16239 if (MinSignedBits
<= 12)
16240 NewMask
.setBitsFrom(11);
16241 else if (!C
->isOpaque() && MinSignedBits
<= 32 && !ShrunkMask
.isSignedIntN(32))
16242 NewMask
.setBitsFrom(31);
16246 // Check that our new mask is a subset of the demanded mask.
16247 assert(IsLegalMask(NewMask
));
16248 return UseMask(NewMask
);
16251 static uint64_t computeGREVOrGORC(uint64_t x
, unsigned ShAmt
, bool IsGORC
) {
16252 static const uint64_t GREVMasks
[] = {
16253 0x5555555555555555ULL
, 0x3333333333333333ULL
, 0x0F0F0F0F0F0F0F0FULL
,
16254 0x00FF00FF00FF00FFULL
, 0x0000FFFF0000FFFFULL
, 0x00000000FFFFFFFFULL
};
16256 for (unsigned Stage
= 0; Stage
!= 6; ++Stage
) {
16257 unsigned Shift
= 1 << Stage
;
16258 if (ShAmt
& Shift
) {
16259 uint64_t Mask
= GREVMasks
[Stage
];
16260 uint64_t Res
= ((x
& Mask
) << Shift
) | ((x
>> Shift
) & Mask
);
16270 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
16272 const APInt
&DemandedElts
,
16273 const SelectionDAG
&DAG
,
16274 unsigned Depth
) const {
16275 unsigned BitWidth
= Known
.getBitWidth();
16276 unsigned Opc
= Op
.getOpcode();
16277 assert((Opc
>= ISD::BUILTIN_OP_END
||
16278 Opc
== ISD::INTRINSIC_WO_CHAIN
||
16279 Opc
== ISD::INTRINSIC_W_CHAIN
||
16280 Opc
== ISD::INTRINSIC_VOID
) &&
16281 "Should use MaskedValueIsZero if you don't know whether Op"
16282 " is a target node!");
16287 case RISCVISD::SELECT_CC
: {
16288 Known
= DAG
.computeKnownBits(Op
.getOperand(4), Depth
+ 1);
16289 // If we don't know any bits, early out.
16290 if (Known
.isUnknown())
16292 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(3), Depth
+ 1);
16294 // Only known if known in both the LHS and RHS.
16295 Known
= Known
.intersectWith(Known2
);
16298 case RISCVISD::CZERO_EQZ
:
16299 case RISCVISD::CZERO_NEZ
:
16300 Known
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
16301 // Result is either all zero or operand 0. We can propagate zeros, but not
16303 Known
.One
.clearAllBits();
16305 case RISCVISD::REMUW
: {
16307 Known
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16308 Known2
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
16309 // We only care about the lower 32 bits.
16310 Known
= KnownBits::urem(Known
.trunc(32), Known2
.trunc(32));
16311 // Restore the original width by sign extending.
16312 Known
= Known
.sext(BitWidth
);
16315 case RISCVISD::DIVUW
: {
16317 Known
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16318 Known2
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
16319 // We only care about the lower 32 bits.
16320 Known
= KnownBits::udiv(Known
.trunc(32), Known2
.trunc(32));
16321 // Restore the original width by sign extending.
16322 Known
= Known
.sext(BitWidth
);
16325 case RISCVISD::SLLW
: {
16327 Known
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16328 Known2
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
16329 Known
= KnownBits::shl(Known
.trunc(32), Known2
.trunc(5).zext(32));
16330 // Restore the original width by sign extending.
16331 Known
= Known
.sext(BitWidth
);
16334 case RISCVISD::CTZW
: {
16335 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
16336 unsigned PossibleTZ
= Known2
.trunc(32).countMaxTrailingZeros();
16337 unsigned LowBits
= llvm::bit_width(PossibleTZ
);
16338 Known
.Zero
.setBitsFrom(LowBits
);
16341 case RISCVISD::CLZW
: {
16342 KnownBits Known2
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
16343 unsigned PossibleLZ
= Known2
.trunc(32).countMaxLeadingZeros();
16344 unsigned LowBits
= llvm::bit_width(PossibleLZ
);
16345 Known
.Zero
.setBitsFrom(LowBits
);
16348 case RISCVISD::BREV8
:
16349 case RISCVISD::ORC_B
: {
16350 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
16351 // control value of 7 is equivalent to brev8 and orc.b.
16352 Known
= DAG
.computeKnownBits(Op
.getOperand(0), Depth
+ 1);
16353 bool IsGORC
= Op
.getOpcode() == RISCVISD::ORC_B
;
16354 // To compute zeros, we need to invert the value and invert it back after.
16356 ~computeGREVOrGORC(~Known
.Zero
.getZExtValue(), 7, IsGORC
);
16357 Known
.One
= computeGREVOrGORC(Known
.One
.getZExtValue(), 7, IsGORC
);
16360 case RISCVISD::READ_VLENB
: {
16361 // We can use the minimum and maximum VLEN values to bound VLENB. We
16362 // know VLEN must be a power of two.
16363 const unsigned MinVLenB
= Subtarget
.getRealMinVLen() / 8;
16364 const unsigned MaxVLenB
= Subtarget
.getRealMaxVLen() / 8;
16365 assert(MinVLenB
> 0 && "READ_VLENB without vector extension enabled?");
16366 Known
.Zero
.setLowBits(Log2_32(MinVLenB
));
16367 Known
.Zero
.setBitsFrom(Log2_32(MaxVLenB
)+1);
16368 if (MaxVLenB
== MinVLenB
)
16369 Known
.One
.setBit(Log2_32(MinVLenB
));
16372 case RISCVISD::FCLASS
: {
16373 // fclass will only set one of the low 10 bits.
16374 Known
.Zero
.setBitsFrom(10);
16377 case ISD::INTRINSIC_W_CHAIN
:
16378 case ISD::INTRINSIC_WO_CHAIN
: {
16380 Op
.getConstantOperandVal(Opc
== ISD::INTRINSIC_WO_CHAIN
? 0 : 1);
16383 // We can't do anything for most intrinsics.
16385 case Intrinsic::riscv_vsetvli
:
16386 case Intrinsic::riscv_vsetvlimax
: {
16387 bool HasAVL
= IntNo
== Intrinsic::riscv_vsetvli
;
16388 unsigned VSEW
= Op
.getConstantOperandVal(HasAVL
+ 1);
16389 RISCVII::VLMUL VLMUL
=
16390 static_cast<RISCVII::VLMUL
>(Op
.getConstantOperandVal(HasAVL
+ 2));
16391 unsigned SEW
= RISCVVType::decodeVSEW(VSEW
);
16392 auto [LMul
, Fractional
] = RISCVVType::decodeVLMUL(VLMUL
);
16393 uint64_t MaxVL
= Subtarget
.getRealMaxVLen() / SEW
;
16394 MaxVL
= (Fractional
) ? MaxVL
/ LMul
: MaxVL
* LMul
;
16396 // Result of vsetvli must be not larger than AVL.
16397 if (HasAVL
&& isa
<ConstantSDNode
>(Op
.getOperand(1)))
16398 MaxVL
= std::min(MaxVL
, Op
.getConstantOperandVal(1));
16400 unsigned KnownZeroFirstBit
= Log2_32(MaxVL
) + 1;
16401 if (BitWidth
> KnownZeroFirstBit
)
16402 Known
.Zero
.setBitsFrom(KnownZeroFirstBit
);
16411 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
16412 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
16413 unsigned Depth
) const {
16414 switch (Op
.getOpcode()) {
16417 case RISCVISD::SELECT_CC
: {
16419 DAG
.ComputeNumSignBits(Op
.getOperand(3), DemandedElts
, Depth
+ 1);
16420 if (Tmp
== 1) return 1; // Early out.
16422 DAG
.ComputeNumSignBits(Op
.getOperand(4), DemandedElts
, Depth
+ 1);
16423 return std::min(Tmp
, Tmp2
);
16425 case RISCVISD::CZERO_EQZ
:
16426 case RISCVISD::CZERO_NEZ
:
16427 // Output is either all zero or operand 0. We can propagate sign bit count
16429 return DAG
.ComputeNumSignBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16430 case RISCVISD::ABSW
: {
16431 // We expand this at isel to negw+max. The result will have 33 sign bits
16432 // if the input has at least 33 sign bits.
16434 DAG
.ComputeNumSignBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
16435 if (Tmp
< 33) return 1;
16438 case RISCVISD::SLLW
:
16439 case RISCVISD::SRAW
:
16440 case RISCVISD::SRLW
:
16441 case RISCVISD::DIVW
:
16442 case RISCVISD::DIVUW
:
16443 case RISCVISD::REMUW
:
16444 case RISCVISD::ROLW
:
16445 case RISCVISD::RORW
:
16446 case RISCVISD::FCVT_W_RV64
:
16447 case RISCVISD::FCVT_WU_RV64
:
16448 case RISCVISD::STRICT_FCVT_W_RV64
:
16449 case RISCVISD::STRICT_FCVT_WU_RV64
:
16450 // TODO: As the result is sign-extended, this is conservatively correct. A
16451 // more precise answer could be calculated for SRAW depending on known
16452 // bits in the shift amount.
16454 case RISCVISD::VMV_X_S
: {
16455 // The number of sign bits of the scalar result is computed by obtaining the
16456 // element type of the input vector operand, subtracting its width from the
16457 // XLEN, and then adding one (sign bit within the element type). If the
16458 // element type is wider than XLen, the least-significant XLEN bits are
16460 unsigned XLen
= Subtarget
.getXLen();
16461 unsigned EltBits
= Op
.getOperand(0).getScalarValueSizeInBits();
16462 if (EltBits
<= XLen
)
16463 return XLen
- EltBits
+ 1;
16466 case ISD::INTRINSIC_W_CHAIN
: {
16467 unsigned IntNo
= Op
.getConstantOperandVal(1);
16471 case Intrinsic::riscv_masked_atomicrmw_xchg_i64
:
16472 case Intrinsic::riscv_masked_atomicrmw_add_i64
:
16473 case Intrinsic::riscv_masked_atomicrmw_sub_i64
:
16474 case Intrinsic::riscv_masked_atomicrmw_nand_i64
:
16475 case Intrinsic::riscv_masked_atomicrmw_max_i64
:
16476 case Intrinsic::riscv_masked_atomicrmw_min_i64
:
16477 case Intrinsic::riscv_masked_atomicrmw_umax_i64
:
16478 case Intrinsic::riscv_masked_atomicrmw_umin_i64
:
16479 case Intrinsic::riscv_masked_cmpxchg_i64
:
16480 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
16481 // narrow atomic operation. These are implemented using atomic
16482 // operations at the minimum supported atomicrmw/cmpxchg width whose
16483 // result is then sign extended to XLEN. With +A, the minimum width is
16484 // 32 for both 64 and 32.
16485 assert(Subtarget
.getXLen() == 64);
16486 assert(getMinCmpXchgSizeInBits() == 32);
16487 assert(Subtarget
.hasStdExtA());
16498 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode
*Ld
) const {
16499 assert(Ld
&& "Unexpected null LoadSDNode");
16500 if (!ISD::isNormalLoad(Ld
))
16503 SDValue Ptr
= Ld
->getBasePtr();
16505 // Only constant pools with no offset are supported.
16506 auto GetSupportedConstantPool
= [](SDValue Ptr
) -> ConstantPoolSDNode
* {
16507 auto *CNode
= dyn_cast
<ConstantPoolSDNode
>(Ptr
);
16508 if (!CNode
|| CNode
->isMachineConstantPoolEntry() ||
16509 CNode
->getOffset() != 0)
16515 // Simple case, LLA.
16516 if (Ptr
.getOpcode() == RISCVISD::LLA
) {
16517 auto *CNode
= GetSupportedConstantPool(Ptr
);
16518 if (!CNode
|| CNode
->getTargetFlags() != 0)
16521 return CNode
->getConstVal();
16524 // Look for a HI and ADD_LO pair.
16525 if (Ptr
.getOpcode() != RISCVISD::ADD_LO
||
16526 Ptr
.getOperand(0).getOpcode() != RISCVISD::HI
)
16529 auto *CNodeLo
= GetSupportedConstantPool(Ptr
.getOperand(1));
16530 auto *CNodeHi
= GetSupportedConstantPool(Ptr
.getOperand(0).getOperand(0));
16532 if (!CNodeLo
|| CNodeLo
->getTargetFlags() != RISCVII::MO_LO
||
16533 !CNodeHi
|| CNodeHi
->getTargetFlags() != RISCVII::MO_HI
)
16536 if (CNodeLo
->getConstVal() != CNodeHi
->getConstVal())
16539 return CNodeLo
->getConstVal();
16542 static MachineBasicBlock
*emitReadCycleWidePseudo(MachineInstr
&MI
,
16543 MachineBasicBlock
*BB
) {
16544 assert(MI
.getOpcode() == RISCV::ReadCycleWide
&& "Unexpected instruction");
16546 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
16547 // Should the count have wrapped while it was being read, we need to try
16551 // rdcycleh x3 # load high word of cycle
16552 // rdcycle x2 # load low word of cycle
16553 // rdcycleh x4 # load high word of cycle
16554 // bne x3, x4, read # check if high word reads match, otherwise try again
16557 MachineFunction
&MF
= *BB
->getParent();
16558 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
16559 MachineFunction::iterator It
= ++BB
->getIterator();
16561 MachineBasicBlock
*LoopMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
16562 MF
.insert(It
, LoopMBB
);
16564 MachineBasicBlock
*DoneMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
16565 MF
.insert(It
, DoneMBB
);
16567 // Transfer the remainder of BB and its successor edges to DoneMBB.
16568 DoneMBB
->splice(DoneMBB
->begin(), BB
,
16569 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
16570 DoneMBB
->transferSuccessorsAndUpdatePHIs(BB
);
16572 BB
->addSuccessor(LoopMBB
);
16574 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
16575 Register ReadAgainReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
16576 Register LoReg
= MI
.getOperand(0).getReg();
16577 Register HiReg
= MI
.getOperand(1).getReg();
16578 DebugLoc DL
= MI
.getDebugLoc();
16580 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
16581 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), HiReg
)
16582 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
16583 .addReg(RISCV::X0
);
16584 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), LoReg
)
16585 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding
)
16586 .addReg(RISCV::X0
);
16587 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), ReadAgainReg
)
16588 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
16589 .addReg(RISCV::X0
);
16591 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::BNE
))
16593 .addReg(ReadAgainReg
)
16596 LoopMBB
->addSuccessor(LoopMBB
);
16597 LoopMBB
->addSuccessor(DoneMBB
);
16599 MI
.eraseFromParent();
16604 static MachineBasicBlock
*emitSplitF64Pseudo(MachineInstr
&MI
,
16605 MachineBasicBlock
*BB
,
16606 const RISCVSubtarget
&Subtarget
) {
16607 assert((MI
.getOpcode() == RISCV::SplitF64Pseudo
||
16608 MI
.getOpcode() == RISCV::SplitF64Pseudo_INX
) &&
16609 "Unexpected instruction");
16611 MachineFunction
&MF
= *BB
->getParent();
16612 DebugLoc DL
= MI
.getDebugLoc();
16613 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
16614 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
16615 Register LoReg
= MI
.getOperand(0).getReg();
16616 Register HiReg
= MI
.getOperand(1).getReg();
16617 Register SrcReg
= MI
.getOperand(2).getReg();
16619 const TargetRegisterClass
*SrcRC
= MI
.getOpcode() == RISCV::SplitF64Pseudo_INX
16620 ? &RISCV::GPRPairRegClass
16621 : &RISCV::FPR64RegClass
;
16622 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex(MF
);
16624 TII
.storeRegToStackSlot(*BB
, MI
, SrcReg
, MI
.getOperand(2).isKill(), FI
, SrcRC
,
16626 MachinePointerInfo MPI
= MachinePointerInfo::getFixedStack(MF
, FI
);
16627 MachineMemOperand
*MMOLo
=
16628 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOLoad
, 4, Align(8));
16629 MachineMemOperand
*MMOHi
= MF
.getMachineMemOperand(
16630 MPI
.getWithOffset(4), MachineMemOperand::MOLoad
, 4, Align(8));
16631 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), LoReg
)
16634 .addMemOperand(MMOLo
);
16635 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), HiReg
)
16638 .addMemOperand(MMOHi
);
16639 MI
.eraseFromParent(); // The pseudo instruction is gone now.
16643 static MachineBasicBlock
*emitBuildPairF64Pseudo(MachineInstr
&MI
,
16644 MachineBasicBlock
*BB
,
16645 const RISCVSubtarget
&Subtarget
) {
16646 assert((MI
.getOpcode() == RISCV::BuildPairF64Pseudo
||
16647 MI
.getOpcode() == RISCV::BuildPairF64Pseudo_INX
) &&
16648 "Unexpected instruction");
16650 MachineFunction
&MF
= *BB
->getParent();
16651 DebugLoc DL
= MI
.getDebugLoc();
16652 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
16653 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
16654 Register DstReg
= MI
.getOperand(0).getReg();
16655 Register LoReg
= MI
.getOperand(1).getReg();
16656 Register HiReg
= MI
.getOperand(2).getReg();
16658 const TargetRegisterClass
*DstRC
=
16659 MI
.getOpcode() == RISCV::BuildPairF64Pseudo_INX
? &RISCV::GPRPairRegClass
16660 : &RISCV::FPR64RegClass
;
16661 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex(MF
);
16663 MachinePointerInfo MPI
= MachinePointerInfo::getFixedStack(MF
, FI
);
16664 MachineMemOperand
*MMOLo
=
16665 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOStore
, 4, Align(8));
16666 MachineMemOperand
*MMOHi
= MF
.getMachineMemOperand(
16667 MPI
.getWithOffset(4), MachineMemOperand::MOStore
, 4, Align(8));
16668 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
16669 .addReg(LoReg
, getKillRegState(MI
.getOperand(1).isKill()))
16672 .addMemOperand(MMOLo
);
16673 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
16674 .addReg(HiReg
, getKillRegState(MI
.getOperand(2).isKill()))
16677 .addMemOperand(MMOHi
);
16678 TII
.loadRegFromStackSlot(*BB
, MI
, DstReg
, FI
, DstRC
, RI
, Register());
16679 MI
.eraseFromParent(); // The pseudo instruction is gone now.
16683 static bool isSelectPseudo(MachineInstr
&MI
) {
16684 switch (MI
.getOpcode()) {
16687 case RISCV::Select_GPR_Using_CC_GPR
:
16688 case RISCV::Select_FPR16_Using_CC_GPR
:
16689 case RISCV::Select_FPR16INX_Using_CC_GPR
:
16690 case RISCV::Select_FPR32_Using_CC_GPR
:
16691 case RISCV::Select_FPR32INX_Using_CC_GPR
:
16692 case RISCV::Select_FPR64_Using_CC_GPR
:
16693 case RISCV::Select_FPR64INX_Using_CC_GPR
:
16694 case RISCV::Select_FPR64IN32X_Using_CC_GPR
:
16699 static MachineBasicBlock
*emitQuietFCMP(MachineInstr
&MI
, MachineBasicBlock
*BB
,
16700 unsigned RelOpcode
, unsigned EqOpcode
,
16701 const RISCVSubtarget
&Subtarget
) {
16702 DebugLoc DL
= MI
.getDebugLoc();
16703 Register DstReg
= MI
.getOperand(0).getReg();
16704 Register Src1Reg
= MI
.getOperand(1).getReg();
16705 Register Src2Reg
= MI
.getOperand(2).getReg();
16706 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
16707 Register SavedFFlags
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16708 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
16710 // Save the current FFLAGS.
16711 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::ReadFFLAGS
), SavedFFlags
);
16713 auto MIB
= BuildMI(*BB
, MI
, DL
, TII
.get(RelOpcode
), DstReg
)
16716 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16717 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16719 // Restore the FFLAGS.
16720 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::WriteFFLAGS
))
16721 .addReg(SavedFFlags
, RegState::Kill
);
16723 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
16724 auto MIB2
= BuildMI(*BB
, MI
, DL
, TII
.get(EqOpcode
), RISCV::X0
)
16725 .addReg(Src1Reg
, getKillRegState(MI
.getOperand(1).isKill()))
16726 .addReg(Src2Reg
, getKillRegState(MI
.getOperand(2).isKill()));
16727 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
16728 MIB2
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
16730 // Erase the pseudoinstruction.
16731 MI
.eraseFromParent();
16735 static MachineBasicBlock
*
16736 EmitLoweredCascadedSelect(MachineInstr
&First
, MachineInstr
&Second
,
16737 MachineBasicBlock
*ThisMBB
,
16738 const RISCVSubtarget
&Subtarget
) {
16739 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
16740 // Without this, custom-inserter would have generated:
16752 // A: X = ...; Y = ...
16754 // C: Z = PHI [X, A], [Y, B]
16756 // E: PHI [X, C], [Z, D]
16758 // If we lower both Select_FPRX_ in a single step, we can instead generate:
16770 // A: X = ...; Y = ...
16772 // E: PHI [X, A], [X, C], [Y, D]
16774 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
16775 const DebugLoc
&DL
= First
.getDebugLoc();
16776 const BasicBlock
*LLVM_BB
= ThisMBB
->getBasicBlock();
16777 MachineFunction
*F
= ThisMBB
->getParent();
16778 MachineBasicBlock
*FirstMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16779 MachineBasicBlock
*SecondMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16780 MachineBasicBlock
*SinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16781 MachineFunction::iterator It
= ++ThisMBB
->getIterator();
16782 F
->insert(It
, FirstMBB
);
16783 F
->insert(It
, SecondMBB
);
16784 F
->insert(It
, SinkMBB
);
16786 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
16787 SinkMBB
->splice(SinkMBB
->begin(), ThisMBB
,
16788 std::next(MachineBasicBlock::iterator(First
)),
16790 SinkMBB
->transferSuccessorsAndUpdatePHIs(ThisMBB
);
16792 // Fallthrough block for ThisMBB.
16793 ThisMBB
->addSuccessor(FirstMBB
);
16794 // Fallthrough block for FirstMBB.
16795 FirstMBB
->addSuccessor(SecondMBB
);
16796 ThisMBB
->addSuccessor(SinkMBB
);
16797 FirstMBB
->addSuccessor(SinkMBB
);
16798 // This is fallthrough.
16799 SecondMBB
->addSuccessor(SinkMBB
);
16801 auto FirstCC
= static_cast<RISCVCC::CondCode
>(First
.getOperand(3).getImm());
16802 Register FLHS
= First
.getOperand(1).getReg();
16803 Register FRHS
= First
.getOperand(2).getReg();
16804 // Insert appropriate branch.
16805 BuildMI(FirstMBB
, DL
, TII
.getBrCond(FirstCC
))
16810 Register SLHS
= Second
.getOperand(1).getReg();
16811 Register SRHS
= Second
.getOperand(2).getReg();
16812 Register Op1Reg4
= First
.getOperand(4).getReg();
16813 Register Op1Reg5
= First
.getOperand(5).getReg();
16815 auto SecondCC
= static_cast<RISCVCC::CondCode
>(Second
.getOperand(3).getImm());
16816 // Insert appropriate branch.
16817 BuildMI(ThisMBB
, DL
, TII
.getBrCond(SecondCC
))
16822 Register DestReg
= Second
.getOperand(0).getReg();
16823 Register Op2Reg4
= Second
.getOperand(4).getReg();
16824 BuildMI(*SinkMBB
, SinkMBB
->begin(), DL
, TII
.get(RISCV::PHI
), DestReg
)
16830 .addMBB(SecondMBB
);
16832 // Now remove the Select_FPRX_s.
16833 First
.eraseFromParent();
16834 Second
.eraseFromParent();
16838 static MachineBasicBlock
*emitSelectPseudo(MachineInstr
&MI
,
16839 MachineBasicBlock
*BB
,
16840 const RISCVSubtarget
&Subtarget
) {
16841 // To "insert" Select_* instructions, we actually have to insert the triangle
16842 // control-flow pattern. The incoming instructions know the destination vreg
16843 // to set, the condition code register to branch on, the true/false values to
16844 // select between, and the condcode to use to select the appropriate branch.
16846 // We produce the following control flow:
16853 // When we find a sequence of selects we attempt to optimize their emission
16854 // by sharing the control flow. Currently we only handle cases where we have
16855 // multiple selects with the exact same condition (same LHS, RHS and CC).
16856 // The selects may be interleaved with other instructions if the other
16857 // instructions meet some requirements we deem safe:
16858 // - They are not pseudo instructions.
16859 // - They are debug instructions. Otherwise,
16860 // - They do not have side-effects, do not access memory and their inputs do
16861 // not depend on the results of the select pseudo-instructions.
16862 // The TrueV/FalseV operands of the selects cannot depend on the result of
16863 // previous selects in the sequence.
16864 // These conditions could be further relaxed. See the X86 target for a
16865 // related approach and more information.
16867 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
16868 // is checked here and handled by a separate function -
16869 // EmitLoweredCascadedSelect.
16870 Register LHS
= MI
.getOperand(1).getReg();
16871 Register RHS
= MI
.getOperand(2).getReg();
16872 auto CC
= static_cast<RISCVCC::CondCode
>(MI
.getOperand(3).getImm());
16874 SmallVector
<MachineInstr
*, 4> SelectDebugValues
;
16875 SmallSet
<Register
, 4> SelectDests
;
16876 SelectDests
.insert(MI
.getOperand(0).getReg());
16878 MachineInstr
*LastSelectPseudo
= &MI
;
16879 auto Next
= next_nodbg(MI
.getIterator(), BB
->instr_end());
16880 if (MI
.getOpcode() != RISCV::Select_GPR_Using_CC_GPR
&& Next
!= BB
->end() &&
16881 Next
->getOpcode() == MI
.getOpcode() &&
16882 Next
->getOperand(5).getReg() == MI
.getOperand(0).getReg() &&
16883 Next
->getOperand(5).isKill()) {
16884 return EmitLoweredCascadedSelect(MI
, *Next
, BB
, Subtarget
);
16887 for (auto E
= BB
->end(), SequenceMBBI
= MachineBasicBlock::iterator(MI
);
16888 SequenceMBBI
!= E
; ++SequenceMBBI
) {
16889 if (SequenceMBBI
->isDebugInstr())
16891 if (isSelectPseudo(*SequenceMBBI
)) {
16892 if (SequenceMBBI
->getOperand(1).getReg() != LHS
||
16893 SequenceMBBI
->getOperand(2).getReg() != RHS
||
16894 SequenceMBBI
->getOperand(3).getImm() != CC
||
16895 SelectDests
.count(SequenceMBBI
->getOperand(4).getReg()) ||
16896 SelectDests
.count(SequenceMBBI
->getOperand(5).getReg()))
16898 LastSelectPseudo
= &*SequenceMBBI
;
16899 SequenceMBBI
->collectDebugValues(SelectDebugValues
);
16900 SelectDests
.insert(SequenceMBBI
->getOperand(0).getReg());
16903 if (SequenceMBBI
->hasUnmodeledSideEffects() ||
16904 SequenceMBBI
->mayLoadOrStore() ||
16905 SequenceMBBI
->usesCustomInsertionHook())
16907 if (llvm::any_of(SequenceMBBI
->operands(), [&](MachineOperand
&MO
) {
16908 return MO
.isReg() && MO
.isUse() && SelectDests
.count(MO
.getReg());
16913 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
16914 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
16915 DebugLoc DL
= MI
.getDebugLoc();
16916 MachineFunction::iterator I
= ++BB
->getIterator();
16918 MachineBasicBlock
*HeadMBB
= BB
;
16919 MachineFunction
*F
= BB
->getParent();
16920 MachineBasicBlock
*TailMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16921 MachineBasicBlock
*IfFalseMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
16923 F
->insert(I
, IfFalseMBB
);
16924 F
->insert(I
, TailMBB
);
16926 // Transfer debug instructions associated with the selects to TailMBB.
16927 for (MachineInstr
*DebugInstr
: SelectDebugValues
) {
16928 TailMBB
->push_back(DebugInstr
->removeFromParent());
16931 // Move all instructions after the sequence to TailMBB.
16932 TailMBB
->splice(TailMBB
->end(), HeadMBB
,
16933 std::next(LastSelectPseudo
->getIterator()), HeadMBB
->end());
16934 // Update machine-CFG edges by transferring all successors of the current
16935 // block to the new block which will contain the Phi nodes for the selects.
16936 TailMBB
->transferSuccessorsAndUpdatePHIs(HeadMBB
);
16937 // Set the successors for HeadMBB.
16938 HeadMBB
->addSuccessor(IfFalseMBB
);
16939 HeadMBB
->addSuccessor(TailMBB
);
16941 // Insert appropriate branch.
16942 BuildMI(HeadMBB
, DL
, TII
.getBrCond(CC
))
16947 // IfFalseMBB just falls through to TailMBB.
16948 IfFalseMBB
->addSuccessor(TailMBB
);
16950 // Create PHIs for all of the select pseudo-instructions.
16951 auto SelectMBBI
= MI
.getIterator();
16952 auto SelectEnd
= std::next(LastSelectPseudo
->getIterator());
16953 auto InsertionPoint
= TailMBB
->begin();
16954 while (SelectMBBI
!= SelectEnd
) {
16955 auto Next
= std::next(SelectMBBI
);
16956 if (isSelectPseudo(*SelectMBBI
)) {
16957 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
16958 BuildMI(*TailMBB
, InsertionPoint
, SelectMBBI
->getDebugLoc(),
16959 TII
.get(RISCV::PHI
), SelectMBBI
->getOperand(0).getReg())
16960 .addReg(SelectMBBI
->getOperand(4).getReg())
16962 .addReg(SelectMBBI
->getOperand(5).getReg())
16963 .addMBB(IfFalseMBB
);
16964 SelectMBBI
->eraseFromParent();
16969 F
->getProperties().reset(MachineFunctionProperties::Property::NoPHIs
);
16973 static MachineBasicBlock
*emitVFROUND_NOEXCEPT_MASK(MachineInstr
&MI
,
16974 MachineBasicBlock
*BB
,
16976 unsigned CVTFOpc
) {
16977 DebugLoc DL
= MI
.getDebugLoc();
16979 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
16981 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
16982 Register SavedFFLAGS
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
16984 // Save the old value of FFLAGS.
16985 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::ReadFFLAGS
), SavedFFLAGS
);
16987 assert(MI
.getNumOperands() == 7);
16989 // Emit a VFCVT_X_F
16990 const TargetRegisterInfo
*TRI
=
16991 BB
->getParent()->getSubtarget().getRegisterInfo();
16992 const TargetRegisterClass
*RC
= MI
.getRegClassConstraint(0, &TII
, TRI
);
16993 Register Tmp
= MRI
.createVirtualRegister(RC
);
16994 BuildMI(*BB
, MI
, DL
, TII
.get(CVTXOpc
), Tmp
)
16995 .add(MI
.getOperand(1))
16996 .add(MI
.getOperand(2))
16997 .add(MI
.getOperand(3))
16998 .add(MachineOperand::CreateImm(7)) // frm = DYN
16999 .add(MI
.getOperand(4))
17000 .add(MI
.getOperand(5))
17001 .add(MI
.getOperand(6))
17002 .add(MachineOperand::CreateReg(RISCV::FRM
,
17006 // Emit a VFCVT_F_X
17007 BuildMI(*BB
, MI
, DL
, TII
.get(CVTFOpc
))
17008 .add(MI
.getOperand(0))
17009 .add(MI
.getOperand(1))
17011 .add(MI
.getOperand(3))
17012 .add(MachineOperand::CreateImm(7)) // frm = DYN
17013 .add(MI
.getOperand(4))
17014 .add(MI
.getOperand(5))
17015 .add(MI
.getOperand(6))
17016 .add(MachineOperand::CreateReg(RISCV::FRM
,
17021 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::WriteFFLAGS
))
17022 .addReg(SavedFFLAGS
, RegState::Kill
);
17024 // Erase the pseudoinstruction.
17025 MI
.eraseFromParent();
17029 static MachineBasicBlock
*emitFROUND(MachineInstr
&MI
, MachineBasicBlock
*MBB
,
17030 const RISCVSubtarget
&Subtarget
) {
17031 unsigned CmpOpc
, F2IOpc
, I2FOpc
, FSGNJOpc
, FSGNJXOpc
;
17032 const TargetRegisterClass
*RC
;
17033 switch (MI
.getOpcode()) {
17035 llvm_unreachable("Unexpected opcode");
17036 case RISCV::PseudoFROUND_H
:
17037 CmpOpc
= RISCV::FLT_H
;
17038 F2IOpc
= RISCV::FCVT_W_H
;
17039 I2FOpc
= RISCV::FCVT_H_W
;
17040 FSGNJOpc
= RISCV::FSGNJ_H
;
17041 FSGNJXOpc
= RISCV::FSGNJX_H
;
17042 RC
= &RISCV::FPR16RegClass
;
17044 case RISCV::PseudoFROUND_H_INX
:
17045 CmpOpc
= RISCV::FLT_H_INX
;
17046 F2IOpc
= RISCV::FCVT_W_H_INX
;
17047 I2FOpc
= RISCV::FCVT_H_W_INX
;
17048 FSGNJOpc
= RISCV::FSGNJ_H_INX
;
17049 FSGNJXOpc
= RISCV::FSGNJX_H_INX
;
17050 RC
= &RISCV::GPRF16RegClass
;
17052 case RISCV::PseudoFROUND_S
:
17053 CmpOpc
= RISCV::FLT_S
;
17054 F2IOpc
= RISCV::FCVT_W_S
;
17055 I2FOpc
= RISCV::FCVT_S_W
;
17056 FSGNJOpc
= RISCV::FSGNJ_S
;
17057 FSGNJXOpc
= RISCV::FSGNJX_S
;
17058 RC
= &RISCV::FPR32RegClass
;
17060 case RISCV::PseudoFROUND_S_INX
:
17061 CmpOpc
= RISCV::FLT_S_INX
;
17062 F2IOpc
= RISCV::FCVT_W_S_INX
;
17063 I2FOpc
= RISCV::FCVT_S_W_INX
;
17064 FSGNJOpc
= RISCV::FSGNJ_S_INX
;
17065 FSGNJXOpc
= RISCV::FSGNJX_S_INX
;
17066 RC
= &RISCV::GPRF32RegClass
;
17068 case RISCV::PseudoFROUND_D
:
17069 assert(Subtarget
.is64Bit() && "Expected 64-bit GPR.");
17070 CmpOpc
= RISCV::FLT_D
;
17071 F2IOpc
= RISCV::FCVT_L_D
;
17072 I2FOpc
= RISCV::FCVT_D_L
;
17073 FSGNJOpc
= RISCV::FSGNJ_D
;
17074 FSGNJXOpc
= RISCV::FSGNJX_D
;
17075 RC
= &RISCV::FPR64RegClass
;
17077 case RISCV::PseudoFROUND_D_INX
:
17078 assert(Subtarget
.is64Bit() && "Expected 64-bit GPR.");
17079 CmpOpc
= RISCV::FLT_D_INX
;
17080 F2IOpc
= RISCV::FCVT_L_D_INX
;
17081 I2FOpc
= RISCV::FCVT_D_L_INX
;
17082 FSGNJOpc
= RISCV::FSGNJ_D_INX
;
17083 FSGNJXOpc
= RISCV::FSGNJX_D_INX
;
17084 RC
= &RISCV::GPRRegClass
;
17088 const BasicBlock
*BB
= MBB
->getBasicBlock();
17089 DebugLoc DL
= MI
.getDebugLoc();
17090 MachineFunction::iterator I
= ++MBB
->getIterator();
17092 MachineFunction
*F
= MBB
->getParent();
17093 MachineBasicBlock
*CvtMBB
= F
->CreateMachineBasicBlock(BB
);
17094 MachineBasicBlock
*DoneMBB
= F
->CreateMachineBasicBlock(BB
);
17096 F
->insert(I
, CvtMBB
);
17097 F
->insert(I
, DoneMBB
);
17098 // Move all instructions after the sequence to DoneMBB.
17099 DoneMBB
->splice(DoneMBB
->end(), MBB
, MachineBasicBlock::iterator(MI
),
17101 // Update machine-CFG edges by transferring all successors of the current
17102 // block to the new block which will contain the Phi nodes for the selects.
17103 DoneMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
17104 // Set the successors for MBB.
17105 MBB
->addSuccessor(CvtMBB
);
17106 MBB
->addSuccessor(DoneMBB
);
17108 Register DstReg
= MI
.getOperand(0).getReg();
17109 Register SrcReg
= MI
.getOperand(1).getReg();
17110 Register MaxReg
= MI
.getOperand(2).getReg();
17111 int64_t FRM
= MI
.getOperand(3).getImm();
17113 const RISCVInstrInfo
&TII
= *Subtarget
.getInstrInfo();
17114 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
17116 Register FabsReg
= MRI
.createVirtualRegister(RC
);
17117 BuildMI(MBB
, DL
, TII
.get(FSGNJXOpc
), FabsReg
).addReg(SrcReg
).addReg(SrcReg
);
17119 // Compare the FP value to the max value.
17120 Register CmpReg
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
17122 BuildMI(MBB
, DL
, TII
.get(CmpOpc
), CmpReg
).addReg(FabsReg
).addReg(MaxReg
);
17123 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
17124 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
17127 BuildMI(MBB
, DL
, TII
.get(RISCV::BEQ
))
17132 CvtMBB
->addSuccessor(DoneMBB
);
17134 // Convert to integer.
17135 Register F2IReg
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
17136 MIB
= BuildMI(CvtMBB
, DL
, TII
.get(F2IOpc
), F2IReg
).addReg(SrcReg
).addImm(FRM
);
17137 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
17138 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
17140 // Convert back to FP.
17141 Register I2FReg
= MRI
.createVirtualRegister(RC
);
17142 MIB
= BuildMI(CvtMBB
, DL
, TII
.get(I2FOpc
), I2FReg
).addReg(F2IReg
).addImm(FRM
);
17143 if (MI
.getFlag(MachineInstr::MIFlag::NoFPExcept
))
17144 MIB
->setFlag(MachineInstr::MIFlag::NoFPExcept
);
17146 // Restore the sign bit.
17147 Register CvtReg
= MRI
.createVirtualRegister(RC
);
17148 BuildMI(CvtMBB
, DL
, TII
.get(FSGNJOpc
), CvtReg
).addReg(I2FReg
).addReg(SrcReg
);
17150 // Merge the results.
17151 BuildMI(*DoneMBB
, DoneMBB
->begin(), DL
, TII
.get(RISCV::PHI
), DstReg
)
17157 MI
.eraseFromParent();
17161 MachineBasicBlock
*
17162 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
17163 MachineBasicBlock
*BB
) const {
17164 switch (MI
.getOpcode()) {
17166 llvm_unreachable("Unexpected instr type to insert");
17167 case RISCV::ReadCycleWide
:
17168 assert(!Subtarget
.is64Bit() &&
17169 "ReadCycleWrite is only to be used on riscv32");
17170 return emitReadCycleWidePseudo(MI
, BB
);
17171 case RISCV::Select_GPR_Using_CC_GPR
:
17172 case RISCV::Select_FPR16_Using_CC_GPR
:
17173 case RISCV::Select_FPR16INX_Using_CC_GPR
:
17174 case RISCV::Select_FPR32_Using_CC_GPR
:
17175 case RISCV::Select_FPR32INX_Using_CC_GPR
:
17176 case RISCV::Select_FPR64_Using_CC_GPR
:
17177 case RISCV::Select_FPR64INX_Using_CC_GPR
:
17178 case RISCV::Select_FPR64IN32X_Using_CC_GPR
:
17179 return emitSelectPseudo(MI
, BB
, Subtarget
);
17180 case RISCV::BuildPairF64Pseudo
:
17181 case RISCV::BuildPairF64Pseudo_INX
:
17182 return emitBuildPairF64Pseudo(MI
, BB
, Subtarget
);
17183 case RISCV::SplitF64Pseudo
:
17184 case RISCV::SplitF64Pseudo_INX
:
17185 return emitSplitF64Pseudo(MI
, BB
, Subtarget
);
17186 case RISCV::PseudoQuietFLE_H
:
17187 return emitQuietFCMP(MI
, BB
, RISCV::FLE_H
, RISCV::FEQ_H
, Subtarget
);
17188 case RISCV::PseudoQuietFLE_H_INX
:
17189 return emitQuietFCMP(MI
, BB
, RISCV::FLE_H_INX
, RISCV::FEQ_H_INX
, Subtarget
);
17190 case RISCV::PseudoQuietFLT_H
:
17191 return emitQuietFCMP(MI
, BB
, RISCV::FLT_H
, RISCV::FEQ_H
, Subtarget
);
17192 case RISCV::PseudoQuietFLT_H_INX
:
17193 return emitQuietFCMP(MI
, BB
, RISCV::FLT_H_INX
, RISCV::FEQ_H_INX
, Subtarget
);
17194 case RISCV::PseudoQuietFLE_S
:
17195 return emitQuietFCMP(MI
, BB
, RISCV::FLE_S
, RISCV::FEQ_S
, Subtarget
);
17196 case RISCV::PseudoQuietFLE_S_INX
:
17197 return emitQuietFCMP(MI
, BB
, RISCV::FLE_S_INX
, RISCV::FEQ_S_INX
, Subtarget
);
17198 case RISCV::PseudoQuietFLT_S
:
17199 return emitQuietFCMP(MI
, BB
, RISCV::FLT_S
, RISCV::FEQ_S
, Subtarget
);
17200 case RISCV::PseudoQuietFLT_S_INX
:
17201 return emitQuietFCMP(MI
, BB
, RISCV::FLT_S_INX
, RISCV::FEQ_S_INX
, Subtarget
);
17202 case RISCV::PseudoQuietFLE_D
:
17203 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D
, RISCV::FEQ_D
, Subtarget
);
17204 case RISCV::PseudoQuietFLE_D_INX
:
17205 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D_INX
, RISCV::FEQ_D_INX
, Subtarget
);
17206 case RISCV::PseudoQuietFLE_D_IN32X
:
17207 return emitQuietFCMP(MI
, BB
, RISCV::FLE_D_IN32X
, RISCV::FEQ_D_IN32X
,
17209 case RISCV::PseudoQuietFLT_D
:
17210 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D
, RISCV::FEQ_D
, Subtarget
);
17211 case RISCV::PseudoQuietFLT_D_INX
:
17212 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D_INX
, RISCV::FEQ_D_INX
, Subtarget
);
17213 case RISCV::PseudoQuietFLT_D_IN32X
:
17214 return emitQuietFCMP(MI
, BB
, RISCV::FLT_D_IN32X
, RISCV::FEQ_D_IN32X
,
17217 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK
:
17218 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M1_MASK
,
17219 RISCV::PseudoVFCVT_F_X_V_M1_MASK
);
17220 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK
:
17221 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M2_MASK
,
17222 RISCV::PseudoVFCVT_F_X_V_M2_MASK
);
17223 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK
:
17224 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M4_MASK
,
17225 RISCV::PseudoVFCVT_F_X_V_M4_MASK
);
17226 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK
:
17227 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_M8_MASK
,
17228 RISCV::PseudoVFCVT_F_X_V_M8_MASK
);
17229 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK
:
17230 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_MF2_MASK
,
17231 RISCV::PseudoVFCVT_F_X_V_MF2_MASK
);
17232 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK
:
17233 return emitVFROUND_NOEXCEPT_MASK(MI
, BB
, RISCV::PseudoVFCVT_X_F_V_MF4_MASK
,
17234 RISCV::PseudoVFCVT_F_X_V_MF4_MASK
);
17235 case RISCV::PseudoFROUND_H
:
17236 case RISCV::PseudoFROUND_H_INX
:
17237 case RISCV::PseudoFROUND_S
:
17238 case RISCV::PseudoFROUND_S_INX
:
17239 case RISCV::PseudoFROUND_D
:
17240 case RISCV::PseudoFROUND_D_INX
:
17241 case RISCV::PseudoFROUND_D_IN32X
:
17242 return emitFROUND(MI
, BB
, Subtarget
);
17243 case TargetOpcode::STATEPOINT
:
17244 case TargetOpcode::STACKMAP
:
17245 case TargetOpcode::PATCHPOINT
:
17246 if (!Subtarget
.is64Bit())
17247 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
17248 "supported on 64-bit targets");
17249 return emitPatchPoint(MI
, BB
);
17253 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr
&MI
,
17254 SDNode
*Node
) const {
17255 // Add FRM dependency to any instructions with dynamic rounding mode.
17256 int Idx
= RISCV::getNamedOperandIdx(MI
.getOpcode(), RISCV::OpName::frm
);
17258 // Vector pseudos have FRM index indicated by TSFlags.
17259 Idx
= RISCVII::getFRMOpNum(MI
.getDesc());
17263 if (MI
.getOperand(Idx
).getImm() != RISCVFPRndMode::DYN
)
17265 // If the instruction already reads FRM, don't add another read.
17266 if (MI
.readsRegister(RISCV::FRM
))
17269 MachineOperand::CreateReg(RISCV::FRM
, /*isDef*/ false, /*isImp*/ true));
17272 // Calling Convention Implementation.
17273 // The expectations for frontend ABI lowering vary from target to target.
17274 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
17275 // details, but this is a longer term goal. For now, we simply try to keep the
17276 // role of the frontend as simple and well-defined as possible. The rules can
17277 // be summarised as:
17278 // * Never split up large scalar arguments. We handle them here.
17279 // * If a hardfloat calling convention is being used, and the struct may be
17280 // passed in a pair of registers (fp+fp, int+fp), and both registers are
17281 // available, then pass as two separate arguments. If either the GPRs or FPRs
17282 // are exhausted, then pass according to the rule below.
17283 // * If a struct could never be passed in registers or directly in a stack
17284 // slot (as it is larger than 2*XLEN and the floating point rules don't
17285 // apply), then pass it using a pointer with the byval attribute.
17286 // * If a struct is less than 2*XLEN, then coerce to either a two-element
17287 // word-sized array or a 2*XLEN scalar (depending on alignment).
17288 // * The frontend can determine whether a struct is returned by reference or
17289 // not based on its size and fields. If it will be returned by reference, the
17290 // frontend must modify the prototype so a pointer with the sret annotation is
17291 // passed as the first argument. This is not necessary for large scalar
17293 // * Struct return values and varargs should be coerced to structs containing
17294 // register-size fields in the same situations they would be for fixed
17297 static const MCPhysReg ArgFPR16s
[] = {
17298 RISCV::F10_H
, RISCV::F11_H
, RISCV::F12_H
, RISCV::F13_H
,
17299 RISCV::F14_H
, RISCV::F15_H
, RISCV::F16_H
, RISCV::F17_H
17301 static const MCPhysReg ArgFPR32s
[] = {
17302 RISCV::F10_F
, RISCV::F11_F
, RISCV::F12_F
, RISCV::F13_F
,
17303 RISCV::F14_F
, RISCV::F15_F
, RISCV::F16_F
, RISCV::F17_F
17305 static const MCPhysReg ArgFPR64s
[] = {
17306 RISCV::F10_D
, RISCV::F11_D
, RISCV::F12_D
, RISCV::F13_D
,
17307 RISCV::F14_D
, RISCV::F15_D
, RISCV::F16_D
, RISCV::F17_D
17309 // This is an interim calling convention and it may be changed in the future.
17310 static const MCPhysReg ArgVRs
[] = {
17311 RISCV::V8
, RISCV::V9
, RISCV::V10
, RISCV::V11
, RISCV::V12
, RISCV::V13
,
17312 RISCV::V14
, RISCV::V15
, RISCV::V16
, RISCV::V17
, RISCV::V18
, RISCV::V19
,
17313 RISCV::V20
, RISCV::V21
, RISCV::V22
, RISCV::V23
};
17314 static const MCPhysReg ArgVRM2s
[] = {RISCV::V8M2
, RISCV::V10M2
, RISCV::V12M2
,
17315 RISCV::V14M2
, RISCV::V16M2
, RISCV::V18M2
,
17316 RISCV::V20M2
, RISCV::V22M2
};
17317 static const MCPhysReg ArgVRM4s
[] = {RISCV::V8M4
, RISCV::V12M4
, RISCV::V16M4
,
17319 static const MCPhysReg ArgVRM8s
[] = {RISCV::V8M8
, RISCV::V16M8
};
17321 ArrayRef
<MCPhysReg
> RISCV::getArgGPRs(const RISCVABI::ABI ABI
) {
17322 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
17324 static const MCPhysReg ArgIGPRs
[] = {RISCV::X10
, RISCV::X11
, RISCV::X12
,
17325 RISCV::X13
, RISCV::X14
, RISCV::X15
,
17326 RISCV::X16
, RISCV::X17
};
17327 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
17328 static const MCPhysReg ArgEGPRs
[] = {RISCV::X10
, RISCV::X11
, RISCV::X12
,
17329 RISCV::X13
, RISCV::X14
, RISCV::X15
};
17331 if (ABI
== RISCVABI::ABI_ILP32E
|| ABI
== RISCVABI::ABI_LP64E
)
17332 return ArrayRef(ArgEGPRs
);
17334 return ArrayRef(ArgIGPRs
);
17337 static ArrayRef
<MCPhysReg
> getFastCCArgGPRs(const RISCVABI::ABI ABI
) {
17338 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
17339 // for save-restore libcall, so we don't use them.
17340 static const MCPhysReg FastCCIGPRs
[] = {
17341 RISCV::X10
, RISCV::X11
, RISCV::X12
, RISCV::X13
, RISCV::X14
,
17342 RISCV::X15
, RISCV::X16
, RISCV::X17
, RISCV::X7
, RISCV::X28
,
17343 RISCV::X29
, RISCV::X30
, RISCV::X31
};
17345 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
17346 static const MCPhysReg FastCCEGPRs
[] = {RISCV::X10
, RISCV::X11
, RISCV::X12
,
17347 RISCV::X13
, RISCV::X14
, RISCV::X15
,
17350 if (ABI
== RISCVABI::ABI_ILP32E
|| ABI
== RISCVABI::ABI_LP64E
)
17351 return ArrayRef(FastCCEGPRs
);
17353 return ArrayRef(FastCCIGPRs
);
17356 // Pass a 2*XLEN argument that has been split into two XLEN values through
17357 // registers or the stack as necessary.
17358 static bool CC_RISCVAssign2XLen(unsigned XLen
, CCState
&State
, CCValAssign VA1
,
17359 ISD::ArgFlagsTy ArgFlags1
, unsigned ValNo2
,
17360 MVT ValVT2
, MVT LocVT2
,
17361 ISD::ArgFlagsTy ArgFlags2
, bool EABI
) {
17362 unsigned XLenInBytes
= XLen
/ 8;
17363 const RISCVSubtarget
&STI
=
17364 State
.getMachineFunction().getSubtarget
<RISCVSubtarget
>();
17365 ArrayRef
<MCPhysReg
> ArgGPRs
= RISCV::getArgGPRs(STI
.getTargetABI());
17367 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
17368 // At least one half can be passed via register.
17369 State
.addLoc(CCValAssign::getReg(VA1
.getValNo(), VA1
.getValVT(), Reg
,
17370 VA1
.getLocVT(), CCValAssign::Full
));
17372 // Both halves must be passed on the stack, with proper alignment.
17373 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
17374 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
17375 Align
StackAlign(XLenInBytes
);
17376 if (!EABI
|| XLen
!= 32)
17377 StackAlign
= std::max(StackAlign
, ArgFlags1
.getNonZeroOrigAlign());
17379 CCValAssign::getMem(VA1
.getValNo(), VA1
.getValVT(),
17380 State
.AllocateStack(XLenInBytes
, StackAlign
),
17381 VA1
.getLocVT(), CCValAssign::Full
));
17382 State
.addLoc(CCValAssign::getMem(
17383 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, Align(XLenInBytes
)),
17384 LocVT2
, CCValAssign::Full
));
17388 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
17389 // The second half can also be passed via register.
17391 CCValAssign::getReg(ValNo2
, ValVT2
, Reg
, LocVT2
, CCValAssign::Full
));
17393 // The second half is passed via the stack, without additional alignment.
17394 State
.addLoc(CCValAssign::getMem(
17395 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, Align(XLenInBytes
)),
17396 LocVT2
, CCValAssign::Full
));
17402 static unsigned allocateRVVReg(MVT ValVT
, unsigned ValNo
,
17403 std::optional
<unsigned> FirstMaskArgument
,
17404 CCState
&State
, const RISCVTargetLowering
&TLI
) {
17405 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(ValVT
);
17406 if (RC
== &RISCV::VRRegClass
) {
17407 // Assign the first mask argument to V0.
17408 // This is an interim calling convention and it may be changed in the
17410 if (FirstMaskArgument
&& ValNo
== *FirstMaskArgument
)
17411 return State
.AllocateReg(RISCV::V0
);
17412 return State
.AllocateReg(ArgVRs
);
17414 if (RC
== &RISCV::VRM2RegClass
)
17415 return State
.AllocateReg(ArgVRM2s
);
17416 if (RC
== &RISCV::VRM4RegClass
)
17417 return State
.AllocateReg(ArgVRM4s
);
17418 if (RC
== &RISCV::VRM8RegClass
)
17419 return State
.AllocateReg(ArgVRM8s
);
17420 llvm_unreachable("Unhandled register class for ValueType");
17423 // Implements the RISC-V calling convention. Returns true upon failure.
17424 bool RISCV::CC_RISCV(const DataLayout
&DL
, RISCVABI::ABI ABI
, unsigned ValNo
,
17425 MVT ValVT
, MVT LocVT
, CCValAssign::LocInfo LocInfo
,
17426 ISD::ArgFlagsTy ArgFlags
, CCState
&State
, bool IsFixed
,
17427 bool IsRet
, Type
*OrigTy
, const RISCVTargetLowering
&TLI
,
17428 std::optional
<unsigned> FirstMaskArgument
) {
17429 unsigned XLen
= DL
.getLargestLegalIntTypeSizeInBits();
17430 assert(XLen
== 32 || XLen
== 64);
17431 MVT XLenVT
= XLen
== 32 ? MVT::i32
: MVT::i64
;
17433 // Static chain parameter must not be passed in normal argument registers,
17434 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
17435 if (ArgFlags
.isNest()) {
17436 if (unsigned Reg
= State
.AllocateReg(RISCV::X7
)) {
17437 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17442 // Any return value split in to more than two values can't be returned
17443 // directly. Vectors are returned via the available vector registers.
17444 if (!LocVT
.isVector() && IsRet
&& ValNo
> 1)
17447 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
17448 // variadic argument, or if no F16/F32 argument registers are available.
17449 bool UseGPRForF16_F32
= true;
17450 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
17451 // variadic argument, or if no F64 argument registers are available.
17452 bool UseGPRForF64
= true;
17456 llvm_unreachable("Unexpected ABI");
17457 case RISCVABI::ABI_ILP32
:
17458 case RISCVABI::ABI_ILP32E
:
17459 case RISCVABI::ABI_LP64
:
17460 case RISCVABI::ABI_LP64E
:
17462 case RISCVABI::ABI_ILP32F
:
17463 case RISCVABI::ABI_LP64F
:
17464 UseGPRForF16_F32
= !IsFixed
;
17466 case RISCVABI::ABI_ILP32D
:
17467 case RISCVABI::ABI_LP64D
:
17468 UseGPRForF16_F32
= !IsFixed
;
17469 UseGPRForF64
= !IsFixed
;
17473 // FPR16, FPR32, and FPR64 alias each other.
17474 if (State
.getFirstUnallocated(ArgFPR32s
) == std::size(ArgFPR32s
)) {
17475 UseGPRForF16_F32
= true;
17476 UseGPRForF64
= true;
17479 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
17480 // similar local variables rather than directly checking against the target
17483 if (UseGPRForF16_F32
&&
17484 (ValVT
== MVT::f16
|| ValVT
== MVT::bf16
|| ValVT
== MVT::f32
)) {
17486 LocInfo
= CCValAssign::BCvt
;
17487 } else if (UseGPRForF64
&& XLen
== 64 && ValVT
== MVT::f64
) {
17489 LocInfo
= CCValAssign::BCvt
;
17492 ArrayRef
<MCPhysReg
> ArgGPRs
= RISCV::getArgGPRs(ABI
);
17494 // If this is a variadic argument, the RISC-V calling convention requires
17495 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
17496 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
17497 // be used regardless of whether the original argument was split during
17498 // legalisation or not. The argument will not be passed by registers if the
17499 // original type is larger than 2*XLEN, so the register alignment rule does
17501 // TODO: To be compatible with GCC's behaviors, we don't align registers
17502 // currently if we are using ILP32E calling convention. This behavior may be
17503 // changed when RV32E/ILP32E is ratified.
17504 unsigned TwoXLenInBytes
= (2 * XLen
) / 8;
17505 if (!IsFixed
&& ArgFlags
.getNonZeroOrigAlign() == TwoXLenInBytes
&&
17506 DL
.getTypeAllocSize(OrigTy
) == TwoXLenInBytes
&&
17507 ABI
!= RISCVABI::ABI_ILP32E
) {
17508 unsigned RegIdx
= State
.getFirstUnallocated(ArgGPRs
);
17509 // Skip 'odd' register if necessary.
17510 if (RegIdx
!= std::size(ArgGPRs
) && RegIdx
% 2 == 1)
17511 State
.AllocateReg(ArgGPRs
);
17514 SmallVectorImpl
<CCValAssign
> &PendingLocs
= State
.getPendingLocs();
17515 SmallVectorImpl
<ISD::ArgFlagsTy
> &PendingArgFlags
=
17516 State
.getPendingArgFlags();
17518 assert(PendingLocs
.size() == PendingArgFlags
.size() &&
17519 "PendingLocs and PendingArgFlags out of sync");
17521 // Handle passing f64 on RV32D with a soft float ABI or when floating point
17522 // registers are exhausted.
17523 if (UseGPRForF64
&& XLen
== 32 && ValVT
== MVT::f64
) {
17524 assert(PendingLocs
.empty() && "Can't lower f64 if it is split");
17525 // Depending on available argument GPRS, f64 may be passed in a pair of
17526 // GPRs, split between a GPR and the stack, or passed completely on the
17527 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
17529 Register Reg
= State
.AllocateReg(ArgGPRs
);
17531 unsigned StackOffset
= State
.AllocateStack(8, Align(8));
17533 CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
17537 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17538 Register HiReg
= State
.AllocateReg(ArgGPRs
);
17541 CCValAssign::getCustomReg(ValNo
, ValVT
, HiReg
, LocVT
, LocInfo
));
17543 unsigned StackOffset
= State
.AllocateStack(4, Align(4));
17545 CCValAssign::getCustomMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
17550 // Fixed-length vectors are located in the corresponding scalable-vector
17551 // container types.
17552 if (ValVT
.isFixedLengthVector())
17553 LocVT
= TLI
.getContainerForFixedLengthVector(LocVT
);
17555 // Split arguments might be passed indirectly, so keep track of the pending
17556 // values. Split vectors are passed via a mix of registers and indirectly, so
17557 // treat them as we would any other argument.
17558 if (ValVT
.isScalarInteger() && (ArgFlags
.isSplit() || !PendingLocs
.empty())) {
17560 LocInfo
= CCValAssign::Indirect
;
17561 PendingLocs
.push_back(
17562 CCValAssign::getPending(ValNo
, ValVT
, LocVT
, LocInfo
));
17563 PendingArgFlags
.push_back(ArgFlags
);
17564 if (!ArgFlags
.isSplitEnd()) {
17569 // If the split argument only had two elements, it should be passed directly
17570 // in registers or on the stack.
17571 if (ValVT
.isScalarInteger() && ArgFlags
.isSplitEnd() &&
17572 PendingLocs
.size() <= 2) {
17573 assert(PendingLocs
.size() == 2 && "Unexpected PendingLocs.size()");
17574 // Apply the normal calling convention rules to the first half of the
17576 CCValAssign VA
= PendingLocs
[0];
17577 ISD::ArgFlagsTy AF
= PendingArgFlags
[0];
17578 PendingLocs
.clear();
17579 PendingArgFlags
.clear();
17580 return CC_RISCVAssign2XLen(
17581 XLen
, State
, VA
, AF
, ValNo
, ValVT
, LocVT
, ArgFlags
,
17582 ABI
== RISCVABI::ABI_ILP32E
|| ABI
== RISCVABI::ABI_LP64E
);
17585 // Allocate to a register if possible, or else a stack slot.
17587 unsigned StoreSizeBytes
= XLen
/ 8;
17588 Align StackAlign
= Align(XLen
/ 8);
17590 if ((ValVT
== MVT::f16
|| ValVT
== MVT::bf16
) && !UseGPRForF16_F32
)
17591 Reg
= State
.AllocateReg(ArgFPR16s
);
17592 else if (ValVT
== MVT::f32
&& !UseGPRForF16_F32
)
17593 Reg
= State
.AllocateReg(ArgFPR32s
);
17594 else if (ValVT
== MVT::f64
&& !UseGPRForF64
)
17595 Reg
= State
.AllocateReg(ArgFPR64s
);
17596 else if (ValVT
.isVector()) {
17597 Reg
= allocateRVVReg(ValVT
, ValNo
, FirstMaskArgument
, State
, TLI
);
17599 // For return values, the vector must be passed fully via registers or
17601 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
17602 // but we're using all of them.
17605 // Try using a GPR to pass the address
17606 if ((Reg
= State
.AllocateReg(ArgGPRs
))) {
17608 LocInfo
= CCValAssign::Indirect
;
17609 } else if (ValVT
.isScalableVector()) {
17611 LocInfo
= CCValAssign::Indirect
;
17613 // Pass fixed-length vectors on the stack.
17615 StoreSizeBytes
= ValVT
.getStoreSize();
17616 // Align vectors to their element sizes, being careful for vXi1
17618 StackAlign
= MaybeAlign(ValVT
.getScalarSizeInBits() / 8).valueOrOne();
17622 Reg
= State
.AllocateReg(ArgGPRs
);
17625 unsigned StackOffset
=
17626 Reg
? 0 : State
.AllocateStack(StoreSizeBytes
, StackAlign
);
17628 // If we reach this point and PendingLocs is non-empty, we must be at the
17629 // end of a split argument that must be passed indirectly.
17630 if (!PendingLocs
.empty()) {
17631 assert(ArgFlags
.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
17632 assert(PendingLocs
.size() > 2 && "Unexpected PendingLocs.size()");
17634 for (auto &It
: PendingLocs
) {
17636 It
.convertToReg(Reg
);
17638 It
.convertToMem(StackOffset
);
17641 PendingLocs
.clear();
17642 PendingArgFlags
.clear();
17646 assert((!UseGPRForF16_F32
|| !UseGPRForF64
|| LocVT
== XLenVT
||
17647 (TLI
.getSubtarget().hasVInstructions() && ValVT
.isVector())) &&
17648 "Expected an XLenVT or vector types at this stage");
17651 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17655 // When a scalar floating-point value is passed on the stack, no
17656 // bit-conversion is needed.
17657 if (ValVT
.isFloatingPoint() && LocInfo
!= CCValAssign::Indirect
) {
17658 assert(!ValVT
.isVector());
17660 LocInfo
= CCValAssign::Full
;
17662 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
17666 template <typename ArgTy
>
17667 static std::optional
<unsigned> preAssignMask(const ArgTy
&Args
) {
17668 for (const auto &ArgIdx
: enumerate(Args
)) {
17669 MVT ArgVT
= ArgIdx
.value().VT
;
17670 if (ArgVT
.isVector() && ArgVT
.getVectorElementType() == MVT::i1
)
17671 return ArgIdx
.index();
17673 return std::nullopt
;
17676 void RISCVTargetLowering::analyzeInputArgs(
17677 MachineFunction
&MF
, CCState
&CCInfo
,
17678 const SmallVectorImpl
<ISD::InputArg
> &Ins
, bool IsRet
,
17679 RISCVCCAssignFn Fn
) const {
17680 unsigned NumArgs
= Ins
.size();
17681 FunctionType
*FType
= MF
.getFunction().getFunctionType();
17683 std::optional
<unsigned> FirstMaskArgument
;
17684 if (Subtarget
.hasVInstructions())
17685 FirstMaskArgument
= preAssignMask(Ins
);
17687 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
17688 MVT ArgVT
= Ins
[i
].VT
;
17689 ISD::ArgFlagsTy ArgFlags
= Ins
[i
].Flags
;
17691 Type
*ArgTy
= nullptr;
17693 ArgTy
= FType
->getReturnType();
17694 else if (Ins
[i
].isOrigArg())
17695 ArgTy
= FType
->getParamType(Ins
[i
].getOrigArgIndex());
17697 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
17698 if (Fn(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
17699 ArgFlags
, CCInfo
, /*IsFixed=*/true, IsRet
, ArgTy
, *this,
17700 FirstMaskArgument
)) {
17701 LLVM_DEBUG(dbgs() << "InputArg #" << i
<< " has unhandled type "
17703 llvm_unreachable(nullptr);
17708 void RISCVTargetLowering::analyzeOutputArgs(
17709 MachineFunction
&MF
, CCState
&CCInfo
,
17710 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, bool IsRet
,
17711 CallLoweringInfo
*CLI
, RISCVCCAssignFn Fn
) const {
17712 unsigned NumArgs
= Outs
.size();
17714 std::optional
<unsigned> FirstMaskArgument
;
17715 if (Subtarget
.hasVInstructions())
17716 FirstMaskArgument
= preAssignMask(Outs
);
17718 for (unsigned i
= 0; i
!= NumArgs
; i
++) {
17719 MVT ArgVT
= Outs
[i
].VT
;
17720 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
17721 Type
*OrigTy
= CLI
? CLI
->getArgs()[Outs
[i
].OrigArgIndex
].Ty
: nullptr;
17723 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
17724 if (Fn(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
17725 ArgFlags
, CCInfo
, Outs
[i
].IsFixed
, IsRet
, OrigTy
, *this,
17726 FirstMaskArgument
)) {
17727 LLVM_DEBUG(dbgs() << "OutputArg #" << i
<< " has unhandled type "
17729 llvm_unreachable(nullptr);
17734 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
17736 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, SDValue Val
,
17737 const CCValAssign
&VA
, const SDLoc
&DL
,
17738 const RISCVSubtarget
&Subtarget
) {
17739 switch (VA
.getLocInfo()) {
17741 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17742 case CCValAssign::Full
:
17743 if (VA
.getValVT().isFixedLengthVector() && VA
.getLocVT().isScalableVector())
17744 Val
= convertFromScalableVector(VA
.getValVT(), Val
, DAG
, Subtarget
);
17746 case CCValAssign::BCvt
:
17747 if (VA
.getLocVT().isInteger() &&
17748 (VA
.getValVT() == MVT::f16
|| VA
.getValVT() == MVT::bf16
)) {
17749 Val
= DAG
.getNode(RISCVISD::FMV_H_X
, DL
, VA
.getValVT(), Val
);
17750 } else if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
) {
17751 if (RV64LegalI32
) {
17752 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Val
);
17753 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f32
, Val
);
17755 Val
= DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, Val
);
17758 Val
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Val
);
17765 // The caller is responsible for loading the full value if the argument is
17766 // passed with CCValAssign::Indirect.
17767 static SDValue
unpackFromRegLoc(SelectionDAG
&DAG
, SDValue Chain
,
17768 const CCValAssign
&VA
, const SDLoc
&DL
,
17769 const ISD::InputArg
&In
,
17770 const RISCVTargetLowering
&TLI
) {
17771 MachineFunction
&MF
= DAG
.getMachineFunction();
17772 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
17773 EVT LocVT
= VA
.getLocVT();
17775 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(LocVT
.getSimpleVT());
17776 Register VReg
= RegInfo
.createVirtualRegister(RC
);
17777 RegInfo
.addLiveIn(VA
.getLocReg(), VReg
);
17778 Val
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
17780 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
17781 if (In
.isOrigArg()) {
17782 Argument
*OrigArg
= MF
.getFunction().getArg(In
.getOrigArgIndex());
17783 if (OrigArg
->getType()->isIntegerTy()) {
17784 unsigned BitWidth
= OrigArg
->getType()->getIntegerBitWidth();
17785 // An input zero extended from i31 can also be considered sign extended.
17786 if ((BitWidth
<= 32 && In
.Flags
.isSExt()) ||
17787 (BitWidth
< 32 && In
.Flags
.isZExt())) {
17788 RISCVMachineFunctionInfo
*RVFI
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
17789 RVFI
->addSExt32Register(VReg
);
17794 if (VA
.getLocInfo() == CCValAssign::Indirect
)
17797 return convertLocVTToValVT(DAG
, Val
, VA
, DL
, TLI
.getSubtarget());
17800 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, SDValue Val
,
17801 const CCValAssign
&VA
, const SDLoc
&DL
,
17802 const RISCVSubtarget
&Subtarget
) {
17803 EVT LocVT
= VA
.getLocVT();
17805 switch (VA
.getLocInfo()) {
17807 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17808 case CCValAssign::Full
:
17809 if (VA
.getValVT().isFixedLengthVector() && LocVT
.isScalableVector())
17810 Val
= convertToScalableVector(LocVT
, Val
, DAG
, Subtarget
);
17812 case CCValAssign::BCvt
:
17813 if (LocVT
.isInteger() &&
17814 (VA
.getValVT() == MVT::f16
|| VA
.getValVT() == MVT::bf16
)) {
17815 Val
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTH
, DL
, LocVT
, Val
);
17816 } else if (LocVT
== MVT::i64
&& VA
.getValVT() == MVT::f32
) {
17817 if (RV64LegalI32
) {
17818 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i32
, Val
);
17819 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Val
);
17821 Val
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Val
);
17824 Val
= DAG
.getNode(ISD::BITCAST
, DL
, LocVT
, Val
);
17831 // The caller is responsible for loading the full value if the argument is
17832 // passed with CCValAssign::Indirect.
17833 static SDValue
unpackFromMemLoc(SelectionDAG
&DAG
, SDValue Chain
,
17834 const CCValAssign
&VA
, const SDLoc
&DL
) {
17835 MachineFunction
&MF
= DAG
.getMachineFunction();
17836 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17837 EVT LocVT
= VA
.getLocVT();
17838 EVT ValVT
= VA
.getValVT();
17839 EVT PtrVT
= MVT::getIntegerVT(DAG
.getDataLayout().getPointerSizeInBits(0));
17840 if (ValVT
.isScalableVector()) {
17841 // When the value is a scalable vector, we save the pointer which points to
17842 // the scalable vector value in the stack. The ValVT will be the pointer
17843 // type, instead of the scalable vector type.
17846 int FI
= MFI
.CreateFixedObject(ValVT
.getStoreSize(), VA
.getLocMemOffset(),
17847 /*IsImmutable=*/true);
17848 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
17851 ISD::LoadExtType ExtType
;
17852 switch (VA
.getLocInfo()) {
17854 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17855 case CCValAssign::Full
:
17856 case CCValAssign::Indirect
:
17857 case CCValAssign::BCvt
:
17858 ExtType
= ISD::NON_EXTLOAD
;
17861 Val
= DAG
.getExtLoad(
17862 ExtType
, DL
, LocVT
, Chain
, FIN
,
17863 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
), ValVT
);
17867 static SDValue
unpackF64OnRV32DSoftABI(SelectionDAG
&DAG
, SDValue Chain
,
17868 const CCValAssign
&VA
,
17869 const CCValAssign
&HiVA
,
17871 assert(VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
&&
17873 MachineFunction
&MF
= DAG
.getMachineFunction();
17874 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17875 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
17877 assert(VA
.isRegLoc() && "Expected register VA assignment");
17879 Register LoVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
17880 RegInfo
.addLiveIn(VA
.getLocReg(), LoVReg
);
17881 SDValue Lo
= DAG
.getCopyFromReg(Chain
, DL
, LoVReg
, MVT::i32
);
17883 if (HiVA
.isMemLoc()) {
17884 // Second half of f64 is passed on the stack.
17885 int FI
= MFI
.CreateFixedObject(4, HiVA
.getLocMemOffset(),
17886 /*IsImmutable=*/true);
17887 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
17888 Hi
= DAG
.getLoad(MVT::i32
, DL
, Chain
, FIN
,
17889 MachinePointerInfo::getFixedStack(MF
, FI
));
17891 // Second half of f64 is passed in another GPR.
17892 Register HiVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
17893 RegInfo
.addLiveIn(HiVA
.getLocReg(), HiVReg
);
17894 Hi
= DAG
.getCopyFromReg(Chain
, DL
, HiVReg
, MVT::i32
);
17896 return DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
17899 // FastCC has less than 1% performance improvement for some particular
17900 // benchmark. But theoretically, it may has benenfit for some cases.
17901 bool RISCV::CC_RISCV_FastCC(const DataLayout
&DL
, RISCVABI::ABI ABI
,
17902 unsigned ValNo
, MVT ValVT
, MVT LocVT
,
17903 CCValAssign::LocInfo LocInfo
,
17904 ISD::ArgFlagsTy ArgFlags
, CCState
&State
,
17905 bool IsFixed
, bool IsRet
, Type
*OrigTy
,
17906 const RISCVTargetLowering
&TLI
,
17907 std::optional
<unsigned> FirstMaskArgument
) {
17908 if (LocVT
== MVT::i32
|| LocVT
== MVT::i64
) {
17909 if (unsigned Reg
= State
.AllocateReg(getFastCCArgGPRs(ABI
))) {
17910 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17915 const RISCVSubtarget
&Subtarget
= TLI
.getSubtarget();
17917 if (LocVT
== MVT::f16
&&
17918 (Subtarget
.hasStdExtZfh() || Subtarget
.hasStdExtZfhmin())) {
17919 static const MCPhysReg FPR16List
[] = {
17920 RISCV::F10_H
, RISCV::F11_H
, RISCV::F12_H
, RISCV::F13_H
, RISCV::F14_H
,
17921 RISCV::F15_H
, RISCV::F16_H
, RISCV::F17_H
, RISCV::F0_H
, RISCV::F1_H
,
17922 RISCV::F2_H
, RISCV::F3_H
, RISCV::F4_H
, RISCV::F5_H
, RISCV::F6_H
,
17923 RISCV::F7_H
, RISCV::F28_H
, RISCV::F29_H
, RISCV::F30_H
, RISCV::F31_H
};
17924 if (unsigned Reg
= State
.AllocateReg(FPR16List
)) {
17925 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17930 if (LocVT
== MVT::f32
&& Subtarget
.hasStdExtF()) {
17931 static const MCPhysReg FPR32List
[] = {
17932 RISCV::F10_F
, RISCV::F11_F
, RISCV::F12_F
, RISCV::F13_F
, RISCV::F14_F
,
17933 RISCV::F15_F
, RISCV::F16_F
, RISCV::F17_F
, RISCV::F0_F
, RISCV::F1_F
,
17934 RISCV::F2_F
, RISCV::F3_F
, RISCV::F4_F
, RISCV::F5_F
, RISCV::F6_F
,
17935 RISCV::F7_F
, RISCV::F28_F
, RISCV::F29_F
, RISCV::F30_F
, RISCV::F31_F
};
17936 if (unsigned Reg
= State
.AllocateReg(FPR32List
)) {
17937 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17942 if (LocVT
== MVT::f64
&& Subtarget
.hasStdExtD()) {
17943 static const MCPhysReg FPR64List
[] = {
17944 RISCV::F10_D
, RISCV::F11_D
, RISCV::F12_D
, RISCV::F13_D
, RISCV::F14_D
,
17945 RISCV::F15_D
, RISCV::F16_D
, RISCV::F17_D
, RISCV::F0_D
, RISCV::F1_D
,
17946 RISCV::F2_D
, RISCV::F3_D
, RISCV::F4_D
, RISCV::F5_D
, RISCV::F6_D
,
17947 RISCV::F7_D
, RISCV::F28_D
, RISCV::F29_D
, RISCV::F30_D
, RISCV::F31_D
};
17948 if (unsigned Reg
= State
.AllocateReg(FPR64List
)) {
17949 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17954 // Check if there is an available GPR before hitting the stack.
17955 if ((LocVT
== MVT::f16
&&
17956 (Subtarget
.hasStdExtZhinx() || Subtarget
.hasStdExtZhinxmin())) ||
17957 (LocVT
== MVT::f32
&& Subtarget
.hasStdExtZfinx()) ||
17958 (LocVT
== MVT::f64
&& Subtarget
.is64Bit() &&
17959 Subtarget
.hasStdExtZdinx())) {
17960 if (unsigned Reg
= State
.AllocateReg(getFastCCArgGPRs(ABI
))) {
17961 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17966 if (LocVT
== MVT::f16
) {
17967 unsigned Offset2
= State
.AllocateStack(2, Align(2));
17968 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset2
, LocVT
, LocInfo
));
17972 if (LocVT
== MVT::i32
|| LocVT
== MVT::f32
) {
17973 unsigned Offset4
= State
.AllocateStack(4, Align(4));
17974 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset4
, LocVT
, LocInfo
));
17978 if (LocVT
== MVT::i64
|| LocVT
== MVT::f64
) {
17979 unsigned Offset5
= State
.AllocateStack(8, Align(8));
17980 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset5
, LocVT
, LocInfo
));
17984 if (LocVT
.isVector()) {
17986 allocateRVVReg(ValVT
, ValNo
, FirstMaskArgument
, State
, TLI
)) {
17987 // Fixed-length vectors are located in the corresponding scalable-vector
17988 // container types.
17989 if (ValVT
.isFixedLengthVector())
17990 LocVT
= TLI
.getContainerForFixedLengthVector(LocVT
);
17991 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
17993 // Try and pass the address via a "fast" GPR.
17994 if (unsigned GPRReg
= State
.AllocateReg(getFastCCArgGPRs(ABI
))) {
17995 LocInfo
= CCValAssign::Indirect
;
17996 LocVT
= TLI
.getSubtarget().getXLenVT();
17997 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, GPRReg
, LocVT
, LocInfo
));
17998 } else if (ValVT
.isFixedLengthVector()) {
18000 MaybeAlign(ValVT
.getScalarSizeInBits() / 8).valueOrOne();
18001 unsigned StackOffset
=
18002 State
.AllocateStack(ValVT
.getStoreSize(), StackAlign
);
18004 CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
18006 // Can't pass scalable vectors on the stack.
18014 return true; // CC didn't match.
18017 bool RISCV::CC_RISCV_GHC(unsigned ValNo
, MVT ValVT
, MVT LocVT
,
18018 CCValAssign::LocInfo LocInfo
,
18019 ISD::ArgFlagsTy ArgFlags
, CCState
&State
) {
18020 if (ArgFlags
.isNest()) {
18021 report_fatal_error(
18022 "Attribute 'nest' is not supported in GHC calling convention");
18025 static const MCPhysReg GPRList
[] = {
18026 RISCV::X9
, RISCV::X18
, RISCV::X19
, RISCV::X20
, RISCV::X21
, RISCV::X22
,
18027 RISCV::X23
, RISCV::X24
, RISCV::X25
, RISCV::X26
, RISCV::X27
};
18029 if (LocVT
== MVT::i32
|| LocVT
== MVT::i64
) {
18030 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18031 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18032 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
18033 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
18038 const RISCVSubtarget
&Subtarget
=
18039 State
.getMachineFunction().getSubtarget
<RISCVSubtarget
>();
18041 if (LocVT
== MVT::f32
&& Subtarget
.hasStdExtF()) {
18042 // Pass in STG registers: F1, ..., F6
18044 static const MCPhysReg FPR32List
[] = {RISCV::F8_F
, RISCV::F9_F
,
18045 RISCV::F18_F
, RISCV::F19_F
,
18046 RISCV::F20_F
, RISCV::F21_F
};
18047 if (unsigned Reg
= State
.AllocateReg(FPR32List
)) {
18048 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
18053 if (LocVT
== MVT::f64
&& Subtarget
.hasStdExtD()) {
18054 // Pass in STG registers: D1, ..., D6
18056 static const MCPhysReg FPR64List
[] = {RISCV::F22_D
, RISCV::F23_D
,
18057 RISCV::F24_D
, RISCV::F25_D
,
18058 RISCV::F26_D
, RISCV::F27_D
};
18059 if (unsigned Reg
= State
.AllocateReg(FPR64List
)) {
18060 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
18065 if ((LocVT
== MVT::f32
&& Subtarget
.hasStdExtZfinx()) ||
18066 (LocVT
== MVT::f64
&& Subtarget
.hasStdExtZdinx() &&
18067 Subtarget
.is64Bit())) {
18068 if (unsigned Reg
= State
.AllocateReg(GPRList
)) {
18069 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
18074 report_fatal_error("No registers left in GHC calling convention");
18078 // Transform physical registers into virtual registers.
18079 SDValue
RISCVTargetLowering::LowerFormalArguments(
18080 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
18081 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
18082 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
18084 MachineFunction
&MF
= DAG
.getMachineFunction();
18086 switch (CallConv
) {
18088 report_fatal_error("Unsupported calling convention");
18089 case CallingConv::C
:
18090 case CallingConv::Fast
:
18091 case CallingConv::SPIR_KERNEL
:
18092 case CallingConv::GRAAL
:
18094 case CallingConv::GHC
:
18095 if (Subtarget
.isRVE())
18096 report_fatal_error("GHC calling convention is not supported on RVE!");
18097 if (!Subtarget
.hasStdExtFOrZfinx() || !Subtarget
.hasStdExtDOrZdinx())
18098 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
18099 "(Zdinx/D) instruction set extensions");
18102 const Function
&Func
= MF
.getFunction();
18103 if (Func
.hasFnAttribute("interrupt")) {
18104 if (!Func
.arg_empty())
18105 report_fatal_error(
18106 "Functions with the interrupt attribute cannot have arguments!");
18109 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
18111 if (!(Kind
== "user" || Kind
== "supervisor" || Kind
== "machine"))
18112 report_fatal_error(
18113 "Function interrupt attribute argument not supported!");
18116 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
18117 MVT XLenVT
= Subtarget
.getXLenVT();
18118 unsigned XLenInBytes
= Subtarget
.getXLen() / 8;
18119 // Used with vargs to acumulate store chains.
18120 std::vector
<SDValue
> OutChains
;
18122 // Assign locations to all of the incoming arguments.
18123 SmallVector
<CCValAssign
, 16> ArgLocs
;
18124 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
18126 if (CallConv
== CallingConv::GHC
)
18127 CCInfo
.AnalyzeFormalArguments(Ins
, RISCV::CC_RISCV_GHC
);
18129 analyzeInputArgs(MF
, CCInfo
, Ins
, /*IsRet=*/false,
18130 CallConv
== CallingConv::Fast
? RISCV::CC_RISCV_FastCC
18131 : RISCV::CC_RISCV
);
18133 for (unsigned i
= 0, e
= ArgLocs
.size(), InsIdx
= 0; i
!= e
; ++i
, ++InsIdx
) {
18134 CCValAssign
&VA
= ArgLocs
[i
];
18136 // Passing f64 on RV32D with a soft float ABI must be handled as a special
18138 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
18139 assert(VA
.needsCustom());
18140 ArgValue
= unpackF64OnRV32DSoftABI(DAG
, Chain
, VA
, ArgLocs
[++i
], DL
);
18141 } else if (VA
.isRegLoc())
18142 ArgValue
= unpackFromRegLoc(DAG
, Chain
, VA
, DL
, Ins
[InsIdx
], *this);
18144 ArgValue
= unpackFromMemLoc(DAG
, Chain
, VA
, DL
);
18146 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
18147 // If the original argument was split and passed by reference (e.g. i128
18148 // on RV32), we need to load all parts of it here (using the same
18149 // address). Vectors may be partly split to registers and partly to the
18150 // stack, in which case the base address is partly offset and subsequent
18151 // stores are relative to that.
18152 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
18153 MachinePointerInfo()));
18154 unsigned ArgIndex
= Ins
[InsIdx
].OrigArgIndex
;
18155 unsigned ArgPartOffset
= Ins
[InsIdx
].PartOffset
;
18156 assert(VA
.getValVT().isVector() || ArgPartOffset
== 0);
18157 while (i
+ 1 != e
&& Ins
[InsIdx
+ 1].OrigArgIndex
== ArgIndex
) {
18158 CCValAssign
&PartVA
= ArgLocs
[i
+ 1];
18159 unsigned PartOffset
= Ins
[InsIdx
+ 1].PartOffset
- ArgPartOffset
;
18160 SDValue Offset
= DAG
.getIntPtrConstant(PartOffset
, DL
);
18161 if (PartVA
.getValVT().isScalableVector())
18162 Offset
= DAG
.getNode(ISD::VSCALE
, DL
, XLenVT
, Offset
);
18163 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
, Offset
);
18164 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
18165 MachinePointerInfo()));
18171 InVals
.push_back(ArgValue
);
18174 if (any_of(ArgLocs
,
18175 [](CCValAssign
&VA
) { return VA
.getLocVT().isScalableVector(); }))
18176 MF
.getInfo
<RISCVMachineFunctionInfo
>()->setIsVectorCall();
18179 ArrayRef
<MCPhysReg
> ArgRegs
= RISCV::getArgGPRs(Subtarget
.getTargetABI());
18180 unsigned Idx
= CCInfo
.getFirstUnallocated(ArgRegs
);
18181 const TargetRegisterClass
*RC
= &RISCV::GPRRegClass
;
18182 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
18183 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
18184 RISCVMachineFunctionInfo
*RVFI
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
18186 // Size of the vararg save area. For now, the varargs save area is either
18187 // zero or large enough to hold a0-a7.
18188 int VarArgsSaveSize
= XLenInBytes
* (ArgRegs
.size() - Idx
);
18191 // If all registers are allocated, then all varargs must be passed on the
18192 // stack and we don't need to save any argregs.
18193 if (VarArgsSaveSize
== 0) {
18194 int VaArgOffset
= CCInfo
.getStackSize();
18195 FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
, true);
18197 int VaArgOffset
= -VarArgsSaveSize
;
18198 FI
= MFI
.CreateFixedObject(VarArgsSaveSize
, VaArgOffset
, true);
18200 // If saving an odd number of registers then create an extra stack slot to
18201 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
18202 // offsets to even-numbered registered remain 2*XLEN-aligned.
18204 MFI
.CreateFixedObject(
18205 XLenInBytes
, VaArgOffset
- static_cast<int>(XLenInBytes
), true);
18206 VarArgsSaveSize
+= XLenInBytes
;
18209 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
18211 // Copy the integer registers that may have been used for passing varargs
18212 // to the vararg save area.
18213 for (unsigned I
= Idx
; I
< ArgRegs
.size(); ++I
) {
18214 const Register Reg
= RegInfo
.createVirtualRegister(RC
);
18215 RegInfo
.addLiveIn(ArgRegs
[I
], Reg
);
18216 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, XLenVT
);
18217 SDValue Store
= DAG
.getStore(
18218 Chain
, DL
, ArgValue
, FIN
,
18219 MachinePointerInfo::getFixedStack(MF
, FI
, (I
- Idx
) * XLenInBytes
));
18220 OutChains
.push_back(Store
);
18222 DAG
.getMemBasePlusOffset(FIN
, TypeSize::getFixed(XLenInBytes
), DL
);
18226 // Record the frame index of the first variable argument
18227 // which is a value necessary to VASTART.
18228 RVFI
->setVarArgsFrameIndex(FI
);
18229 RVFI
->setVarArgsSaveSize(VarArgsSaveSize
);
18232 // All stores are grouped in one node to allow the matching between
18233 // the size of Ins and InVals. This only happens for vararg functions.
18234 if (!OutChains
.empty()) {
18235 OutChains
.push_back(Chain
);
18236 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
18242 /// isEligibleForTailCallOptimization - Check whether the call is eligible
18243 /// for tail call optimization.
18244 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
18245 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
18246 CCState
&CCInfo
, CallLoweringInfo
&CLI
, MachineFunction
&MF
,
18247 const SmallVector
<CCValAssign
, 16> &ArgLocs
) const {
18249 auto CalleeCC
= CLI
.CallConv
;
18250 auto &Outs
= CLI
.Outs
;
18251 auto &Caller
= MF
.getFunction();
18252 auto CallerCC
= Caller
.getCallingConv();
18254 // Exception-handling functions need a special set of instructions to
18255 // indicate a return to the hardware. Tail-calling another function would
18256 // probably break this.
18257 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
18258 // should be expanded as new function attributes are introduced.
18259 if (Caller
.hasFnAttribute("interrupt"))
18262 // Do not tail call opt if the stack is used to pass parameters.
18263 if (CCInfo
.getStackSize() != 0)
18266 // Do not tail call opt if any parameters need to be passed indirectly.
18267 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
18268 // passed indirectly. So the address of the value will be passed in a
18269 // register, or if not available, then the address is put on the stack. In
18270 // order to pass indirectly, space on the stack often needs to be allocated
18271 // in order to store the value. In this case the CCInfo.getNextStackOffset()
18272 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
18273 // are passed CCValAssign::Indirect.
18274 for (auto &VA
: ArgLocs
)
18275 if (VA
.getLocInfo() == CCValAssign::Indirect
)
18278 // Do not tail call opt if either caller or callee uses struct return
18280 auto IsCallerStructRet
= Caller
.hasStructRetAttr();
18281 auto IsCalleeStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
18282 if (IsCallerStructRet
|| IsCalleeStructRet
)
18285 // The callee has to preserve all registers the caller needs to preserve.
18286 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
18287 const uint32_t *CallerPreserved
= TRI
->getCallPreservedMask(MF
, CallerCC
);
18288 if (CalleeCC
!= CallerCC
) {
18289 const uint32_t *CalleePreserved
= TRI
->getCallPreservedMask(MF
, CalleeCC
);
18290 if (!TRI
->regmaskSubsetEqual(CallerPreserved
, CalleePreserved
))
18294 // Byval parameters hand the function a pointer directly into the stack area
18295 // we want to reuse during a tail call. Working around this *is* possible
18296 // but less efficient and uglier in LowerCall.
18297 for (auto &Arg
: Outs
)
18298 if (Arg
.Flags
.isByVal())
18304 static Align
getPrefTypeAlign(EVT VT
, SelectionDAG
&DAG
) {
18305 return DAG
.getDataLayout().getPrefTypeAlign(
18306 VT
.getTypeForEVT(*DAG
.getContext()));
18309 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
18310 // and output parameter nodes.
18311 SDValue
RISCVTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
18312 SmallVectorImpl
<SDValue
> &InVals
) const {
18313 SelectionDAG
&DAG
= CLI
.DAG
;
18314 SDLoc
&DL
= CLI
.DL
;
18315 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
18316 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
18317 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
18318 SDValue Chain
= CLI
.Chain
;
18319 SDValue Callee
= CLI
.Callee
;
18320 bool &IsTailCall
= CLI
.IsTailCall
;
18321 CallingConv::ID CallConv
= CLI
.CallConv
;
18322 bool IsVarArg
= CLI
.IsVarArg
;
18323 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
18324 MVT XLenVT
= Subtarget
.getXLenVT();
18326 MachineFunction
&MF
= DAG
.getMachineFunction();
18328 // Analyze the operands of the call, assigning locations to each operand.
18329 SmallVector
<CCValAssign
, 16> ArgLocs
;
18330 CCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
18332 if (CallConv
== CallingConv::GHC
) {
18333 if (Subtarget
.isRVE())
18334 report_fatal_error("GHC calling convention is not supported on RVE!");
18335 ArgCCInfo
.AnalyzeCallOperands(Outs
, RISCV::CC_RISCV_GHC
);
18337 analyzeOutputArgs(MF
, ArgCCInfo
, Outs
, /*IsRet=*/false, &CLI
,
18338 CallConv
== CallingConv::Fast
? RISCV::CC_RISCV_FastCC
18339 : RISCV::CC_RISCV
);
18341 // Check if it's really possible to do a tail call.
18343 IsTailCall
= isEligibleForTailCallOptimization(ArgCCInfo
, CLI
, MF
, ArgLocs
);
18347 else if (CLI
.CB
&& CLI
.CB
->isMustTailCall())
18348 report_fatal_error("failed to perform tail call elimination on a call "
18349 "site marked musttail");
18351 // Get a count of how many bytes are to be pushed on the stack.
18352 unsigned NumBytes
= ArgCCInfo
.getStackSize();
18354 // Create local copies for byval args
18355 SmallVector
<SDValue
, 8> ByValArgs
;
18356 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
18357 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
18358 if (!Flags
.isByVal())
18361 SDValue Arg
= OutVals
[i
];
18362 unsigned Size
= Flags
.getByValSize();
18363 Align Alignment
= Flags
.getNonZeroByValAlign();
18366 MF
.getFrameInfo().CreateStackObject(Size
, Alignment
, /*isSS=*/false);
18367 SDValue FIPtr
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
18368 SDValue SizeNode
= DAG
.getConstant(Size
, DL
, XLenVT
);
18370 Chain
= DAG
.getMemcpy(Chain
, DL
, FIPtr
, Arg
, SizeNode
, Alignment
,
18371 /*IsVolatile=*/false,
18372 /*AlwaysInline=*/false, IsTailCall
,
18373 MachinePointerInfo(), MachinePointerInfo());
18374 ByValArgs
.push_back(FIPtr
);
18378 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, CLI
.DL
);
18380 // Copy argument values to their designated locations.
18381 SmallVector
<std::pair
<Register
, SDValue
>, 8> RegsToPass
;
18382 SmallVector
<SDValue
, 8> MemOpChains
;
18384 for (unsigned i
= 0, j
= 0, e
= ArgLocs
.size(), OutIdx
= 0; i
!= e
;
18386 CCValAssign
&VA
= ArgLocs
[i
];
18387 SDValue ArgValue
= OutVals
[OutIdx
];
18388 ISD::ArgFlagsTy Flags
= Outs
[OutIdx
].Flags
;
18390 // Handle passing f64 on RV32D with a soft float ABI as a special case.
18391 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
18392 assert(VA
.isRegLoc() && "Expected register VA assignment");
18393 assert(VA
.needsCustom());
18394 SDValue SplitF64
= DAG
.getNode(
18395 RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
), ArgValue
);
18396 SDValue Lo
= SplitF64
.getValue(0);
18397 SDValue Hi
= SplitF64
.getValue(1);
18399 Register RegLo
= VA
.getLocReg();
18400 RegsToPass
.push_back(std::make_pair(RegLo
, Lo
));
18402 // Get the CCValAssign for the Hi part.
18403 CCValAssign
&HiVA
= ArgLocs
[++i
];
18405 if (HiVA
.isMemLoc()) {
18406 // Second half of f64 is passed on the stack.
18407 if (!StackPtr
.getNode())
18408 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
18410 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
18411 DAG
.getIntPtrConstant(HiVA
.getLocMemOffset(), DL
));
18413 MemOpChains
.push_back(
18414 DAG
.getStore(Chain
, DL
, Hi
, Address
, MachinePointerInfo()));
18416 // Second half of f64 is passed in another GPR.
18417 Register RegHigh
= HiVA
.getLocReg();
18418 RegsToPass
.push_back(std::make_pair(RegHigh
, Hi
));
18423 // Promote the value if needed.
18424 // For now, only handle fully promoted and indirect arguments.
18425 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
18426 // Store the argument in a stack slot and pass its address.
18428 std::max(getPrefTypeAlign(Outs
[OutIdx
].ArgVT
, DAG
),
18429 getPrefTypeAlign(ArgValue
.getValueType(), DAG
));
18430 TypeSize StoredSize
= ArgValue
.getValueType().getStoreSize();
18431 // If the original argument was split (e.g. i128), we need
18432 // to store the required parts of it here (and pass just one address).
18433 // Vectors may be partly split to registers and partly to the stack, in
18434 // which case the base address is partly offset and subsequent stores are
18435 // relative to that.
18436 unsigned ArgIndex
= Outs
[OutIdx
].OrigArgIndex
;
18437 unsigned ArgPartOffset
= Outs
[OutIdx
].PartOffset
;
18438 assert(VA
.getValVT().isVector() || ArgPartOffset
== 0);
18439 // Calculate the total size to store. We don't have access to what we're
18440 // actually storing other than performing the loop and collecting the
18442 SmallVector
<std::pair
<SDValue
, SDValue
>> Parts
;
18443 while (i
+ 1 != e
&& Outs
[OutIdx
+ 1].OrigArgIndex
== ArgIndex
) {
18444 SDValue PartValue
= OutVals
[OutIdx
+ 1];
18445 unsigned PartOffset
= Outs
[OutIdx
+ 1].PartOffset
- ArgPartOffset
;
18446 SDValue Offset
= DAG
.getIntPtrConstant(PartOffset
, DL
);
18447 EVT PartVT
= PartValue
.getValueType();
18448 if (PartVT
.isScalableVector())
18449 Offset
= DAG
.getNode(ISD::VSCALE
, DL
, XLenVT
, Offset
);
18450 StoredSize
+= PartVT
.getStoreSize();
18451 StackAlign
= std::max(StackAlign
, getPrefTypeAlign(PartVT
, DAG
));
18452 Parts
.push_back(std::make_pair(PartValue
, Offset
));
18456 SDValue SpillSlot
= DAG
.CreateStackTemporary(StoredSize
, StackAlign
);
18457 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
18458 MemOpChains
.push_back(
18459 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
18460 MachinePointerInfo::getFixedStack(MF
, FI
)));
18461 for (const auto &Part
: Parts
) {
18462 SDValue PartValue
= Part
.first
;
18463 SDValue PartOffset
= Part
.second
;
18465 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
, PartOffset
);
18466 MemOpChains
.push_back(
18467 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
18468 MachinePointerInfo::getFixedStack(MF
, FI
)));
18470 ArgValue
= SpillSlot
;
18472 ArgValue
= convertValVTToLocVT(DAG
, ArgValue
, VA
, DL
, Subtarget
);
18475 // Use local copy if it is a byval arg.
18476 if (Flags
.isByVal())
18477 ArgValue
= ByValArgs
[j
++];
18479 if (VA
.isRegLoc()) {
18480 // Queue up the argument copies and emit them at the end.
18481 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
18483 assert(VA
.isMemLoc() && "Argument not register or memory");
18484 assert(!IsTailCall
&& "Tail call not allowed if stack is used "
18485 "for passing parameters");
18487 // Work out the address of the stack slot.
18488 if (!StackPtr
.getNode())
18489 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
18491 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
18492 DAG
.getIntPtrConstant(VA
.getLocMemOffset(), DL
));
18495 MemOpChains
.push_back(
18496 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
18500 // Join the stores, which are independent of one another.
18501 if (!MemOpChains
.empty())
18502 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
18506 // Build a sequence of copy-to-reg nodes, chained and glued together.
18507 for (auto &Reg
: RegsToPass
) {
18508 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
.first
, Reg
.second
, Glue
);
18509 Glue
= Chain
.getValue(1);
18512 // Validate that none of the argument registers have been marked as
18513 // reserved, if so report an error. Do the same for the return address if this
18514 // is not a tailcall.
18515 validateCCReservedRegs(RegsToPass
, MF
);
18517 MF
.getSubtarget
<RISCVSubtarget
>().isRegisterReservedByUser(RISCV::X1
))
18518 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
18520 "Return address register required, but has been reserved."});
18522 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
18523 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
18524 // split it and then direct call can be matched by PseudoCALL.
18525 if (GlobalAddressSDNode
*S
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
18526 const GlobalValue
*GV
= S
->getGlobal();
18527 Callee
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, RISCVII::MO_CALL
);
18528 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
18529 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, RISCVII::MO_CALL
);
18532 // The first call operand is the chain and the second is the target address.
18533 SmallVector
<SDValue
, 8> Ops
;
18534 Ops
.push_back(Chain
);
18535 Ops
.push_back(Callee
);
18537 // Add argument registers to the end of the list so that they are
18538 // known live into the call.
18539 for (auto &Reg
: RegsToPass
)
18540 Ops
.push_back(DAG
.getRegister(Reg
.first
, Reg
.second
.getValueType()));
18543 // Add a register mask operand representing the call-preserved registers.
18544 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
18545 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
18546 assert(Mask
&& "Missing call preserved mask for calling convention");
18547 Ops
.push_back(DAG
.getRegisterMask(Mask
));
18550 // Glue the call to the argument copies, if any.
18551 if (Glue
.getNode())
18552 Ops
.push_back(Glue
);
18554 assert((!CLI
.CFIType
|| CLI
.CB
->isIndirectCall()) &&
18555 "Unexpected CFI type for a direct call");
18558 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
18561 MF
.getFrameInfo().setHasTailCall();
18562 SDValue Ret
= DAG
.getNode(RISCVISD::TAIL
, DL
, NodeTys
, Ops
);
18564 Ret
.getNode()->setCFIType(CLI
.CFIType
->getZExtValue());
18565 DAG
.addNoMergeSiteInfo(Ret
.getNode(), CLI
.NoMerge
);
18569 Chain
= DAG
.getNode(RISCVISD::CALL
, DL
, NodeTys
, Ops
);
18571 Chain
.getNode()->setCFIType(CLI
.CFIType
->getZExtValue());
18572 DAG
.addNoMergeSiteInfo(Chain
.getNode(), CLI
.NoMerge
);
18573 Glue
= Chain
.getValue(1);
18575 // Mark the end of the call, which is glued to the call itself.
18576 Chain
= DAG
.getCALLSEQ_END(Chain
, NumBytes
, 0, Glue
, DL
);
18577 Glue
= Chain
.getValue(1);
18579 // Assign locations to each value returned by this call.
18580 SmallVector
<CCValAssign
, 16> RVLocs
;
18581 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, *DAG
.getContext());
18582 analyzeInputArgs(MF
, RetCCInfo
, Ins
, /*IsRet=*/true, RISCV::CC_RISCV
);
18584 // Copy all of the result registers out of their specified physreg.
18585 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
18586 auto &VA
= RVLocs
[i
];
18587 // Copy the value out
18589 DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(), VA
.getLocVT(), Glue
);
18590 // Glue the RetValue to the end of the call sequence
18591 Chain
= RetValue
.getValue(1);
18592 Glue
= RetValue
.getValue(2);
18594 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
18595 assert(VA
.needsCustom());
18596 SDValue RetValue2
= DAG
.getCopyFromReg(Chain
, DL
, RVLocs
[++i
].getLocReg(),
18598 Chain
= RetValue2
.getValue(1);
18599 Glue
= RetValue2
.getValue(2);
18600 RetValue
= DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, RetValue
,
18604 RetValue
= convertLocVTToValVT(DAG
, RetValue
, VA
, DL
, Subtarget
);
18606 InVals
.push_back(RetValue
);
18612 bool RISCVTargetLowering::CanLowerReturn(
18613 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
18614 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, LLVMContext
&Context
) const {
18615 SmallVector
<CCValAssign
, 16> RVLocs
;
18616 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
18618 std::optional
<unsigned> FirstMaskArgument
;
18619 if (Subtarget
.hasVInstructions())
18620 FirstMaskArgument
= preAssignMask(Outs
);
18622 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
18623 MVT VT
= Outs
[i
].VT
;
18624 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
18625 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
18626 if (RISCV::CC_RISCV(MF
.getDataLayout(), ABI
, i
, VT
, VT
, CCValAssign::Full
,
18627 ArgFlags
, CCInfo
, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
18628 *this, FirstMaskArgument
))
18635 RISCVTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
18637 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
18638 const SmallVectorImpl
<SDValue
> &OutVals
,
18639 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
18640 MachineFunction
&MF
= DAG
.getMachineFunction();
18641 const RISCVSubtarget
&STI
= MF
.getSubtarget
<RISCVSubtarget
>();
18643 // Stores the assignment of the return value to a location.
18644 SmallVector
<CCValAssign
, 16> RVLocs
;
18646 // Info about the registers and stack slot.
18647 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
18648 *DAG
.getContext());
18650 analyzeOutputArgs(DAG
.getMachineFunction(), CCInfo
, Outs
, /*IsRet=*/true,
18651 nullptr, RISCV::CC_RISCV
);
18653 if (CallConv
== CallingConv::GHC
&& !RVLocs
.empty())
18654 report_fatal_error("GHC functions return void only");
18657 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
18659 // Copy the result values into the output registers.
18660 for (unsigned i
= 0, e
= RVLocs
.size(), OutIdx
= 0; i
< e
; ++i
, ++OutIdx
) {
18661 SDValue Val
= OutVals
[OutIdx
];
18662 CCValAssign
&VA
= RVLocs
[i
];
18663 assert(VA
.isRegLoc() && "Can only return in registers!");
18665 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
18666 // Handle returning f64 on RV32D with a soft float ABI.
18667 assert(VA
.isRegLoc() && "Expected return via registers");
18668 assert(VA
.needsCustom());
18669 SDValue SplitF64
= DAG
.getNode(RISCVISD::SplitF64
, DL
,
18670 DAG
.getVTList(MVT::i32
, MVT::i32
), Val
);
18671 SDValue Lo
= SplitF64
.getValue(0);
18672 SDValue Hi
= SplitF64
.getValue(1);
18673 Register RegLo
= VA
.getLocReg();
18674 Register RegHi
= RVLocs
[++i
].getLocReg();
18676 if (STI
.isRegisterReservedByUser(RegLo
) ||
18677 STI
.isRegisterReservedByUser(RegHi
))
18678 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
18680 "Return value register required, but has been reserved."});
18682 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegLo
, Lo
, Glue
);
18683 Glue
= Chain
.getValue(1);
18684 RetOps
.push_back(DAG
.getRegister(RegLo
, MVT::i32
));
18685 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegHi
, Hi
, Glue
);
18686 Glue
= Chain
.getValue(1);
18687 RetOps
.push_back(DAG
.getRegister(RegHi
, MVT::i32
));
18689 // Handle a 'normal' return.
18690 Val
= convertValVTToLocVT(DAG
, Val
, VA
, DL
, Subtarget
);
18691 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), Val
, Glue
);
18693 if (STI
.isRegisterReservedByUser(VA
.getLocReg()))
18694 MF
.getFunction().getContext().diagnose(DiagnosticInfoUnsupported
{
18696 "Return value register required, but has been reserved."});
18698 // Guarantee that all emitted copies are stuck together.
18699 Glue
= Chain
.getValue(1);
18700 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
18704 RetOps
[0] = Chain
; // Update chain.
18706 // Add the glue node if we have it.
18707 if (Glue
.getNode()) {
18708 RetOps
.push_back(Glue
);
18712 [](CCValAssign
&VA
) { return VA
.getLocVT().isScalableVector(); }))
18713 MF
.getInfo
<RISCVMachineFunctionInfo
>()->setIsVectorCall();
18715 unsigned RetOpc
= RISCVISD::RET_GLUE
;
18716 // Interrupt service routines use different return instructions.
18717 const Function
&Func
= DAG
.getMachineFunction().getFunction();
18718 if (Func
.hasFnAttribute("interrupt")) {
18719 if (!Func
.getReturnType()->isVoidTy())
18720 report_fatal_error(
18721 "Functions with the interrupt attribute must have void return type!");
18723 MachineFunction
&MF
= DAG
.getMachineFunction();
18725 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
18727 if (Kind
== "supervisor")
18728 RetOpc
= RISCVISD::SRET_GLUE
;
18730 RetOpc
= RISCVISD::MRET_GLUE
;
18733 return DAG
.getNode(RetOpc
, DL
, MVT::Other
, RetOps
);
18736 void RISCVTargetLowering::validateCCReservedRegs(
18737 const SmallVectorImpl
<std::pair
<llvm::Register
, llvm::SDValue
>> &Regs
,
18738 MachineFunction
&MF
) const {
18739 const Function
&F
= MF
.getFunction();
18740 const RISCVSubtarget
&STI
= MF
.getSubtarget
<RISCVSubtarget
>();
18742 if (llvm::any_of(Regs
, [&STI
](auto Reg
) {
18743 return STI
.isRegisterReservedByUser(Reg
.first
);
18745 F
.getContext().diagnose(DiagnosticInfoUnsupported
{
18746 F
, "Argument register required, but has been reserved."});
18749 // Check if the result of the node is only used as a return value, as
18750 // otherwise we can't perform a tail-call.
18751 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode
*N
, SDValue
&Chain
) const {
18752 if (N
->getNumValues() != 1)
18754 if (!N
->hasNUsesOfValue(1, 0))
18757 SDNode
*Copy
= *N
->use_begin();
18759 if (Copy
->getOpcode() == ISD::BITCAST
) {
18760 return isUsedByReturnOnly(Copy
, Chain
);
18763 // TODO: Handle additional opcodes in order to support tail-calling libcalls
18764 // with soft float ABIs.
18765 if (Copy
->getOpcode() != ISD::CopyToReg
) {
18769 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
18770 // isn't safe to perform a tail call.
18771 if (Copy
->getOperand(Copy
->getNumOperands() - 1).getValueType() == MVT::Glue
)
18774 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
18775 bool HasRet
= false;
18776 for (SDNode
*Node
: Copy
->uses()) {
18777 if (Node
->getOpcode() != RISCVISD::RET_GLUE
)
18784 Chain
= Copy
->getOperand(0);
18788 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
18789 return CI
->isTailCall();
18792 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode
) const {
18793 #define NODE_NAME_CASE(NODE) \
18794 case RISCVISD::NODE: \
18795 return "RISCVISD::" #NODE;
18796 // clang-format off
18797 switch ((RISCVISD::NodeType
)Opcode
) {
18798 case RISCVISD::FIRST_NUMBER
:
18800 NODE_NAME_CASE(RET_GLUE
)
18801 NODE_NAME_CASE(SRET_GLUE
)
18802 NODE_NAME_CASE(MRET_GLUE
)
18803 NODE_NAME_CASE(CALL
)
18804 NODE_NAME_CASE(SELECT_CC
)
18805 NODE_NAME_CASE(BR_CC
)
18806 NODE_NAME_CASE(BuildPairF64
)
18807 NODE_NAME_CASE(SplitF64
)
18808 NODE_NAME_CASE(TAIL
)
18809 NODE_NAME_CASE(ADD_LO
)
18811 NODE_NAME_CASE(LLA
)
18812 NODE_NAME_CASE(ADD_TPREL
)
18813 NODE_NAME_CASE(MULHSU
)
18814 NODE_NAME_CASE(SLLW
)
18815 NODE_NAME_CASE(SRAW
)
18816 NODE_NAME_CASE(SRLW
)
18817 NODE_NAME_CASE(DIVW
)
18818 NODE_NAME_CASE(DIVUW
)
18819 NODE_NAME_CASE(REMUW
)
18820 NODE_NAME_CASE(ROLW
)
18821 NODE_NAME_CASE(RORW
)
18822 NODE_NAME_CASE(CLZW
)
18823 NODE_NAME_CASE(CTZW
)
18824 NODE_NAME_CASE(ABSW
)
18825 NODE_NAME_CASE(FMV_H_X
)
18826 NODE_NAME_CASE(FMV_X_ANYEXTH
)
18827 NODE_NAME_CASE(FMV_X_SIGNEXTH
)
18828 NODE_NAME_CASE(FMV_W_X_RV64
)
18829 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64
)
18830 NODE_NAME_CASE(FCVT_X
)
18831 NODE_NAME_CASE(FCVT_XU
)
18832 NODE_NAME_CASE(FCVT_W_RV64
)
18833 NODE_NAME_CASE(FCVT_WU_RV64
)
18834 NODE_NAME_CASE(STRICT_FCVT_W_RV64
)
18835 NODE_NAME_CASE(STRICT_FCVT_WU_RV64
)
18836 NODE_NAME_CASE(FP_ROUND_BF16
)
18837 NODE_NAME_CASE(FP_EXTEND_BF16
)
18838 NODE_NAME_CASE(FROUND
)
18839 NODE_NAME_CASE(FCLASS
)
18840 NODE_NAME_CASE(FMAX
)
18841 NODE_NAME_CASE(FMIN
)
18842 NODE_NAME_CASE(READ_CYCLE_WIDE
)
18843 NODE_NAME_CASE(BREV8
)
18844 NODE_NAME_CASE(ORC_B
)
18845 NODE_NAME_CASE(ZIP
)
18846 NODE_NAME_CASE(UNZIP
)
18847 NODE_NAME_CASE(CLMUL
)
18848 NODE_NAME_CASE(CLMULH
)
18849 NODE_NAME_CASE(CLMULR
)
18850 NODE_NAME_CASE(SHA256SIG0
)
18851 NODE_NAME_CASE(SHA256SIG1
)
18852 NODE_NAME_CASE(SHA256SUM0
)
18853 NODE_NAME_CASE(SHA256SUM1
)
18854 NODE_NAME_CASE(SM4KS
)
18855 NODE_NAME_CASE(SM4ED
)
18856 NODE_NAME_CASE(SM3P0
)
18857 NODE_NAME_CASE(SM3P1
)
18858 NODE_NAME_CASE(TH_LWD
)
18859 NODE_NAME_CASE(TH_LWUD
)
18860 NODE_NAME_CASE(TH_LDD
)
18861 NODE_NAME_CASE(TH_SWD
)
18862 NODE_NAME_CASE(TH_SDD
)
18863 NODE_NAME_CASE(VMV_V_V_VL
)
18864 NODE_NAME_CASE(VMV_V_X_VL
)
18865 NODE_NAME_CASE(VFMV_V_F_VL
)
18866 NODE_NAME_CASE(VMV_X_S
)
18867 NODE_NAME_CASE(VMV_S_X_VL
)
18868 NODE_NAME_CASE(VFMV_S_F_VL
)
18869 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL
)
18870 NODE_NAME_CASE(READ_VLENB
)
18871 NODE_NAME_CASE(TRUNCATE_VECTOR_VL
)
18872 NODE_NAME_CASE(VSLIDEUP_VL
)
18873 NODE_NAME_CASE(VSLIDE1UP_VL
)
18874 NODE_NAME_CASE(VSLIDEDOWN_VL
)
18875 NODE_NAME_CASE(VSLIDE1DOWN_VL
)
18876 NODE_NAME_CASE(VFSLIDE1UP_VL
)
18877 NODE_NAME_CASE(VFSLIDE1DOWN_VL
)
18878 NODE_NAME_CASE(VID_VL
)
18879 NODE_NAME_CASE(VFNCVT_ROD_VL
)
18880 NODE_NAME_CASE(VECREDUCE_ADD_VL
)
18881 NODE_NAME_CASE(VECREDUCE_UMAX_VL
)
18882 NODE_NAME_CASE(VECREDUCE_SMAX_VL
)
18883 NODE_NAME_CASE(VECREDUCE_UMIN_VL
)
18884 NODE_NAME_CASE(VECREDUCE_SMIN_VL
)
18885 NODE_NAME_CASE(VECREDUCE_AND_VL
)
18886 NODE_NAME_CASE(VECREDUCE_OR_VL
)
18887 NODE_NAME_CASE(VECREDUCE_XOR_VL
)
18888 NODE_NAME_CASE(VECREDUCE_FADD_VL
)
18889 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL
)
18890 NODE_NAME_CASE(VECREDUCE_FMIN_VL
)
18891 NODE_NAME_CASE(VECREDUCE_FMAX_VL
)
18892 NODE_NAME_CASE(ADD_VL
)
18893 NODE_NAME_CASE(AND_VL
)
18894 NODE_NAME_CASE(MUL_VL
)
18895 NODE_NAME_CASE(OR_VL
)
18896 NODE_NAME_CASE(SDIV_VL
)
18897 NODE_NAME_CASE(SHL_VL
)
18898 NODE_NAME_CASE(SREM_VL
)
18899 NODE_NAME_CASE(SRA_VL
)
18900 NODE_NAME_CASE(SRL_VL
)
18901 NODE_NAME_CASE(ROTL_VL
)
18902 NODE_NAME_CASE(ROTR_VL
)
18903 NODE_NAME_CASE(SUB_VL
)
18904 NODE_NAME_CASE(UDIV_VL
)
18905 NODE_NAME_CASE(UREM_VL
)
18906 NODE_NAME_CASE(XOR_VL
)
18907 NODE_NAME_CASE(AVGFLOORU_VL
)
18908 NODE_NAME_CASE(AVGCEILU_VL
)
18909 NODE_NAME_CASE(SADDSAT_VL
)
18910 NODE_NAME_CASE(UADDSAT_VL
)
18911 NODE_NAME_CASE(SSUBSAT_VL
)
18912 NODE_NAME_CASE(USUBSAT_VL
)
18913 NODE_NAME_CASE(FADD_VL
)
18914 NODE_NAME_CASE(FSUB_VL
)
18915 NODE_NAME_CASE(FMUL_VL
)
18916 NODE_NAME_CASE(FDIV_VL
)
18917 NODE_NAME_CASE(FNEG_VL
)
18918 NODE_NAME_CASE(FABS_VL
)
18919 NODE_NAME_CASE(FSQRT_VL
)
18920 NODE_NAME_CASE(FCLASS_VL
)
18921 NODE_NAME_CASE(VFMADD_VL
)
18922 NODE_NAME_CASE(VFNMADD_VL
)
18923 NODE_NAME_CASE(VFMSUB_VL
)
18924 NODE_NAME_CASE(VFNMSUB_VL
)
18925 NODE_NAME_CASE(VFWMADD_VL
)
18926 NODE_NAME_CASE(VFWNMADD_VL
)
18927 NODE_NAME_CASE(VFWMSUB_VL
)
18928 NODE_NAME_CASE(VFWNMSUB_VL
)
18929 NODE_NAME_CASE(FCOPYSIGN_VL
)
18930 NODE_NAME_CASE(SMIN_VL
)
18931 NODE_NAME_CASE(SMAX_VL
)
18932 NODE_NAME_CASE(UMIN_VL
)
18933 NODE_NAME_CASE(UMAX_VL
)
18934 NODE_NAME_CASE(BITREVERSE_VL
)
18935 NODE_NAME_CASE(BSWAP_VL
)
18936 NODE_NAME_CASE(CTLZ_VL
)
18937 NODE_NAME_CASE(CTTZ_VL
)
18938 NODE_NAME_CASE(CTPOP_VL
)
18939 NODE_NAME_CASE(VFMIN_VL
)
18940 NODE_NAME_CASE(VFMAX_VL
)
18941 NODE_NAME_CASE(MULHS_VL
)
18942 NODE_NAME_CASE(MULHU_VL
)
18943 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL
)
18944 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL
)
18945 NODE_NAME_CASE(VFCVT_RM_X_F_VL
)
18946 NODE_NAME_CASE(VFCVT_RM_XU_F_VL
)
18947 NODE_NAME_CASE(VFCVT_X_F_VL
)
18948 NODE_NAME_CASE(VFCVT_XU_F_VL
)
18949 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL
)
18950 NODE_NAME_CASE(SINT_TO_FP_VL
)
18951 NODE_NAME_CASE(UINT_TO_FP_VL
)
18952 NODE_NAME_CASE(VFCVT_RM_F_XU_VL
)
18953 NODE_NAME_CASE(VFCVT_RM_F_X_VL
)
18954 NODE_NAME_CASE(FP_EXTEND_VL
)
18955 NODE_NAME_CASE(FP_ROUND_VL
)
18956 NODE_NAME_CASE(STRICT_FADD_VL
)
18957 NODE_NAME_CASE(STRICT_FSUB_VL
)
18958 NODE_NAME_CASE(STRICT_FMUL_VL
)
18959 NODE_NAME_CASE(STRICT_FDIV_VL
)
18960 NODE_NAME_CASE(STRICT_FSQRT_VL
)
18961 NODE_NAME_CASE(STRICT_VFMADD_VL
)
18962 NODE_NAME_CASE(STRICT_VFNMADD_VL
)
18963 NODE_NAME_CASE(STRICT_VFMSUB_VL
)
18964 NODE_NAME_CASE(STRICT_VFNMSUB_VL
)
18965 NODE_NAME_CASE(STRICT_FP_ROUND_VL
)
18966 NODE_NAME_CASE(STRICT_FP_EXTEND_VL
)
18967 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL
)
18968 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL
)
18969 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL
)
18970 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL
)
18971 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL
)
18972 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL
)
18973 NODE_NAME_CASE(STRICT_FSETCC_VL
)
18974 NODE_NAME_CASE(STRICT_FSETCCS_VL
)
18975 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL
)
18976 NODE_NAME_CASE(VWMUL_VL
)
18977 NODE_NAME_CASE(VWMULU_VL
)
18978 NODE_NAME_CASE(VWMULSU_VL
)
18979 NODE_NAME_CASE(VWADD_VL
)
18980 NODE_NAME_CASE(VWADDU_VL
)
18981 NODE_NAME_CASE(VWSUB_VL
)
18982 NODE_NAME_CASE(VWSUBU_VL
)
18983 NODE_NAME_CASE(VWADD_W_VL
)
18984 NODE_NAME_CASE(VWADDU_W_VL
)
18985 NODE_NAME_CASE(VWSUB_W_VL
)
18986 NODE_NAME_CASE(VWSUBU_W_VL
)
18987 NODE_NAME_CASE(VWSLL_VL
)
18988 NODE_NAME_CASE(VFWMUL_VL
)
18989 NODE_NAME_CASE(VFWADD_VL
)
18990 NODE_NAME_CASE(VFWSUB_VL
)
18991 NODE_NAME_CASE(VFWADD_W_VL
)
18992 NODE_NAME_CASE(VFWSUB_W_VL
)
18993 NODE_NAME_CASE(VWMACC_VL
)
18994 NODE_NAME_CASE(VWMACCU_VL
)
18995 NODE_NAME_CASE(VWMACCSU_VL
)
18996 NODE_NAME_CASE(VNSRL_VL
)
18997 NODE_NAME_CASE(SETCC_VL
)
18998 NODE_NAME_CASE(VMERGE_VL
)
18999 NODE_NAME_CASE(VMAND_VL
)
19000 NODE_NAME_CASE(VMOR_VL
)
19001 NODE_NAME_CASE(VMXOR_VL
)
19002 NODE_NAME_CASE(VMCLR_VL
)
19003 NODE_NAME_CASE(VMSET_VL
)
19004 NODE_NAME_CASE(VRGATHER_VX_VL
)
19005 NODE_NAME_CASE(VRGATHER_VV_VL
)
19006 NODE_NAME_CASE(VRGATHEREI16_VV_VL
)
19007 NODE_NAME_CASE(VSEXT_VL
)
19008 NODE_NAME_CASE(VZEXT_VL
)
19009 NODE_NAME_CASE(VCPOP_VL
)
19010 NODE_NAME_CASE(VFIRST_VL
)
19011 NODE_NAME_CASE(READ_CSR
)
19012 NODE_NAME_CASE(WRITE_CSR
)
19013 NODE_NAME_CASE(SWAP_CSR
)
19014 NODE_NAME_CASE(CZERO_EQZ
)
19015 NODE_NAME_CASE(CZERO_NEZ
)
19019 #undef NODE_NAME_CASE
19022 /// getConstraintType - Given a constraint letter, return the type of
19023 /// constraint it is for this target.
19024 RISCVTargetLowering::ConstraintType
19025 RISCVTargetLowering::getConstraintType(StringRef Constraint
) const {
19026 if (Constraint
.size() == 1) {
19027 switch (Constraint
[0]) {
19031 return C_RegisterClass
;
19035 return C_Immediate
;
19038 case 'S': // A symbolic address
19042 if (Constraint
== "vr" || Constraint
== "vm")
19043 return C_RegisterClass
;
19045 return TargetLowering::getConstraintType(Constraint
);
19048 std::pair
<unsigned, const TargetRegisterClass
*>
19049 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
19050 StringRef Constraint
,
19052 // First, see if this is a constraint that directly corresponds to a RISC-V
19054 if (Constraint
.size() == 1) {
19055 switch (Constraint
[0]) {
19057 // TODO: Support fixed vectors up to XLen for P extension?
19060 if (VT
== MVT::f16
&& Subtarget
.hasStdExtZhinxmin())
19061 return std::make_pair(0U, &RISCV::GPRF16RegClass
);
19062 if (VT
== MVT::f32
&& Subtarget
.hasStdExtZfinx())
19063 return std::make_pair(0U, &RISCV::GPRF32RegClass
);
19064 if (VT
== MVT::f64
&& Subtarget
.hasStdExtZdinx() && !Subtarget
.is64Bit())
19065 return std::make_pair(0U, &RISCV::GPRPairRegClass
);
19066 return std::make_pair(0U, &RISCV::GPRNoX0RegClass
);
19068 if (Subtarget
.hasStdExtZfhmin() && VT
== MVT::f16
)
19069 return std::make_pair(0U, &RISCV::FPR16RegClass
);
19070 if (Subtarget
.hasStdExtF() && VT
== MVT::f32
)
19071 return std::make_pair(0U, &RISCV::FPR32RegClass
);
19072 if (Subtarget
.hasStdExtD() && VT
== MVT::f64
)
19073 return std::make_pair(0U, &RISCV::FPR64RegClass
);
19078 } else if (Constraint
== "vr") {
19079 for (const auto *RC
: {&RISCV::VRRegClass
, &RISCV::VRM2RegClass
,
19080 &RISCV::VRM4RegClass
, &RISCV::VRM8RegClass
}) {
19081 if (TRI
->isTypeLegalForClass(*RC
, VT
.SimpleTy
))
19082 return std::make_pair(0U, RC
);
19084 } else if (Constraint
== "vm") {
19085 if (TRI
->isTypeLegalForClass(RISCV::VMV0RegClass
, VT
.SimpleTy
))
19086 return std::make_pair(0U, &RISCV::VMV0RegClass
);
19089 // Clang will correctly decode the usage of register name aliases into their
19090 // official names. However, other frontends like `rustc` do not. This allows
19091 // users of these frontends to use the ABI names for registers in LLVM-style
19092 // register constraints.
19093 unsigned XRegFromAlias
= StringSwitch
<unsigned>(Constraint
.lower())
19094 .Case("{zero}", RISCV::X0
)
19095 .Case("{ra}", RISCV::X1
)
19096 .Case("{sp}", RISCV::X2
)
19097 .Case("{gp}", RISCV::X3
)
19098 .Case("{tp}", RISCV::X4
)
19099 .Case("{t0}", RISCV::X5
)
19100 .Case("{t1}", RISCV::X6
)
19101 .Case("{t2}", RISCV::X7
)
19102 .Cases("{s0}", "{fp}", RISCV::X8
)
19103 .Case("{s1}", RISCV::X9
)
19104 .Case("{a0}", RISCV::X10
)
19105 .Case("{a1}", RISCV::X11
)
19106 .Case("{a2}", RISCV::X12
)
19107 .Case("{a3}", RISCV::X13
)
19108 .Case("{a4}", RISCV::X14
)
19109 .Case("{a5}", RISCV::X15
)
19110 .Case("{a6}", RISCV::X16
)
19111 .Case("{a7}", RISCV::X17
)
19112 .Case("{s2}", RISCV::X18
)
19113 .Case("{s3}", RISCV::X19
)
19114 .Case("{s4}", RISCV::X20
)
19115 .Case("{s5}", RISCV::X21
)
19116 .Case("{s6}", RISCV::X22
)
19117 .Case("{s7}", RISCV::X23
)
19118 .Case("{s8}", RISCV::X24
)
19119 .Case("{s9}", RISCV::X25
)
19120 .Case("{s10}", RISCV::X26
)
19121 .Case("{s11}", RISCV::X27
)
19122 .Case("{t3}", RISCV::X28
)
19123 .Case("{t4}", RISCV::X29
)
19124 .Case("{t5}", RISCV::X30
)
19125 .Case("{t6}", RISCV::X31
)
19126 .Default(RISCV::NoRegister
);
19127 if (XRegFromAlias
!= RISCV::NoRegister
)
19128 return std::make_pair(XRegFromAlias
, &RISCV::GPRRegClass
);
19130 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
19131 // TableGen record rather than the AsmName to choose registers for InlineAsm
19132 // constraints, plus we want to match those names to the widest floating point
19133 // register type available, manually select floating point registers here.
19135 // The second case is the ABI name of the register, so that frontends can also
19136 // use the ABI names in register constraint lists.
19137 if (Subtarget
.hasStdExtF()) {
19138 unsigned FReg
= StringSwitch
<unsigned>(Constraint
.lower())
19139 .Cases("{f0}", "{ft0}", RISCV::F0_F
)
19140 .Cases("{f1}", "{ft1}", RISCV::F1_F
)
19141 .Cases("{f2}", "{ft2}", RISCV::F2_F
)
19142 .Cases("{f3}", "{ft3}", RISCV::F3_F
)
19143 .Cases("{f4}", "{ft4}", RISCV::F4_F
)
19144 .Cases("{f5}", "{ft5}", RISCV::F5_F
)
19145 .Cases("{f6}", "{ft6}", RISCV::F6_F
)
19146 .Cases("{f7}", "{ft7}", RISCV::F7_F
)
19147 .Cases("{f8}", "{fs0}", RISCV::F8_F
)
19148 .Cases("{f9}", "{fs1}", RISCV::F9_F
)
19149 .Cases("{f10}", "{fa0}", RISCV::F10_F
)
19150 .Cases("{f11}", "{fa1}", RISCV::F11_F
)
19151 .Cases("{f12}", "{fa2}", RISCV::F12_F
)
19152 .Cases("{f13}", "{fa3}", RISCV::F13_F
)
19153 .Cases("{f14}", "{fa4}", RISCV::F14_F
)
19154 .Cases("{f15}", "{fa5}", RISCV::F15_F
)
19155 .Cases("{f16}", "{fa6}", RISCV::F16_F
)
19156 .Cases("{f17}", "{fa7}", RISCV::F17_F
)
19157 .Cases("{f18}", "{fs2}", RISCV::F18_F
)
19158 .Cases("{f19}", "{fs3}", RISCV::F19_F
)
19159 .Cases("{f20}", "{fs4}", RISCV::F20_F
)
19160 .Cases("{f21}", "{fs5}", RISCV::F21_F
)
19161 .Cases("{f22}", "{fs6}", RISCV::F22_F
)
19162 .Cases("{f23}", "{fs7}", RISCV::F23_F
)
19163 .Cases("{f24}", "{fs8}", RISCV::F24_F
)
19164 .Cases("{f25}", "{fs9}", RISCV::F25_F
)
19165 .Cases("{f26}", "{fs10}", RISCV::F26_F
)
19166 .Cases("{f27}", "{fs11}", RISCV::F27_F
)
19167 .Cases("{f28}", "{ft8}", RISCV::F28_F
)
19168 .Cases("{f29}", "{ft9}", RISCV::F29_F
)
19169 .Cases("{f30}", "{ft10}", RISCV::F30_F
)
19170 .Cases("{f31}", "{ft11}", RISCV::F31_F
)
19171 .Default(RISCV::NoRegister
);
19172 if (FReg
!= RISCV::NoRegister
) {
19173 assert(RISCV::F0_F
<= FReg
&& FReg
<= RISCV::F31_F
&& "Unknown fp-reg");
19174 if (Subtarget
.hasStdExtD() && (VT
== MVT::f64
|| VT
== MVT::Other
)) {
19175 unsigned RegNo
= FReg
- RISCV::F0_F
;
19176 unsigned DReg
= RISCV::F0_D
+ RegNo
;
19177 return std::make_pair(DReg
, &RISCV::FPR64RegClass
);
19179 if (VT
== MVT::f32
|| VT
== MVT::Other
)
19180 return std::make_pair(FReg
, &RISCV::FPR32RegClass
);
19181 if (Subtarget
.hasStdExtZfhmin() && VT
== MVT::f16
) {
19182 unsigned RegNo
= FReg
- RISCV::F0_F
;
19183 unsigned HReg
= RISCV::F0_H
+ RegNo
;
19184 return std::make_pair(HReg
, &RISCV::FPR16RegClass
);
19189 if (Subtarget
.hasVInstructions()) {
19190 Register VReg
= StringSwitch
<Register
>(Constraint
.lower())
19191 .Case("{v0}", RISCV::V0
)
19192 .Case("{v1}", RISCV::V1
)
19193 .Case("{v2}", RISCV::V2
)
19194 .Case("{v3}", RISCV::V3
)
19195 .Case("{v4}", RISCV::V4
)
19196 .Case("{v5}", RISCV::V5
)
19197 .Case("{v6}", RISCV::V6
)
19198 .Case("{v7}", RISCV::V7
)
19199 .Case("{v8}", RISCV::V8
)
19200 .Case("{v9}", RISCV::V9
)
19201 .Case("{v10}", RISCV::V10
)
19202 .Case("{v11}", RISCV::V11
)
19203 .Case("{v12}", RISCV::V12
)
19204 .Case("{v13}", RISCV::V13
)
19205 .Case("{v14}", RISCV::V14
)
19206 .Case("{v15}", RISCV::V15
)
19207 .Case("{v16}", RISCV::V16
)
19208 .Case("{v17}", RISCV::V17
)
19209 .Case("{v18}", RISCV::V18
)
19210 .Case("{v19}", RISCV::V19
)
19211 .Case("{v20}", RISCV::V20
)
19212 .Case("{v21}", RISCV::V21
)
19213 .Case("{v22}", RISCV::V22
)
19214 .Case("{v23}", RISCV::V23
)
19215 .Case("{v24}", RISCV::V24
)
19216 .Case("{v25}", RISCV::V25
)
19217 .Case("{v26}", RISCV::V26
)
19218 .Case("{v27}", RISCV::V27
)
19219 .Case("{v28}", RISCV::V28
)
19220 .Case("{v29}", RISCV::V29
)
19221 .Case("{v30}", RISCV::V30
)
19222 .Case("{v31}", RISCV::V31
)
19223 .Default(RISCV::NoRegister
);
19224 if (VReg
!= RISCV::NoRegister
) {
19225 if (TRI
->isTypeLegalForClass(RISCV::VMRegClass
, VT
.SimpleTy
))
19226 return std::make_pair(VReg
, &RISCV::VMRegClass
);
19227 if (TRI
->isTypeLegalForClass(RISCV::VRRegClass
, VT
.SimpleTy
))
19228 return std::make_pair(VReg
, &RISCV::VRRegClass
);
19229 for (const auto *RC
:
19230 {&RISCV::VRM2RegClass
, &RISCV::VRM4RegClass
, &RISCV::VRM8RegClass
}) {
19231 if (TRI
->isTypeLegalForClass(*RC
, VT
.SimpleTy
)) {
19232 VReg
= TRI
->getMatchingSuperReg(VReg
, RISCV::sub_vrm1_0
, RC
);
19233 return std::make_pair(VReg
, RC
);
19239 std::pair
<Register
, const TargetRegisterClass
*> Res
=
19240 TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
19242 // If we picked one of the Zfinx register classes, remap it to the GPR class.
19243 // FIXME: When Zfinx is supported in CodeGen this will need to take the
19244 // Subtarget into account.
19245 if (Res
.second
== &RISCV::GPRF16RegClass
||
19246 Res
.second
== &RISCV::GPRF32RegClass
||
19247 Res
.second
== &RISCV::GPRPairRegClass
)
19248 return std::make_pair(Res
.first
, &RISCV::GPRRegClass
);
19253 InlineAsm::ConstraintCode
19254 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode
) const {
19255 // Currently only support length 1 constraints.
19256 if (ConstraintCode
.size() == 1) {
19257 switch (ConstraintCode
[0]) {
19259 return InlineAsm::ConstraintCode::A
;
19265 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode
);
19268 void RISCVTargetLowering::LowerAsmOperandForConstraint(
19269 SDValue Op
, StringRef Constraint
, std::vector
<SDValue
> &Ops
,
19270 SelectionDAG
&DAG
) const {
19271 // Currently only support length 1 constraints.
19272 if (Constraint
.size() == 1) {
19273 switch (Constraint
[0]) {
19275 // Validate & create a 12-bit signed immediate operand.
19276 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
19277 uint64_t CVal
= C
->getSExtValue();
19278 if (isInt
<12>(CVal
))
19280 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
19284 // Validate & create an integer zero operand.
19285 if (isNullConstant(Op
))
19287 DAG
.getTargetConstant(0, SDLoc(Op
), Subtarget
.getXLenVT()));
19290 // Validate & create a 5-bit unsigned immediate operand.
19291 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
19292 uint64_t CVal
= C
->getZExtValue();
19293 if (isUInt
<5>(CVal
))
19295 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
19299 if (const auto *GA
= dyn_cast
<GlobalAddressSDNode
>(Op
)) {
19300 Ops
.push_back(DAG
.getTargetGlobalAddress(GA
->getGlobal(), SDLoc(Op
),
19301 GA
->getValueType(0)));
19302 } else if (const auto *BA
= dyn_cast
<BlockAddressSDNode
>(Op
)) {
19303 Ops
.push_back(DAG
.getTargetBlockAddress(BA
->getBlockAddress(),
19304 BA
->getValueType(0)));
19311 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
19314 Instruction
*RISCVTargetLowering::emitLeadingFence(IRBuilderBase
&Builder
,
19316 AtomicOrdering Ord
) const {
19317 if (Subtarget
.hasStdExtZtso()) {
19318 if (isa
<LoadInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
19319 return Builder
.CreateFence(Ord
);
19323 if (isa
<LoadInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
19324 return Builder
.CreateFence(Ord
);
19325 if (isa
<StoreInst
>(Inst
) && isReleaseOrStronger(Ord
))
19326 return Builder
.CreateFence(AtomicOrdering::Release
);
19330 Instruction
*RISCVTargetLowering::emitTrailingFence(IRBuilderBase
&Builder
,
19332 AtomicOrdering Ord
) const {
19333 if (Subtarget
.hasStdExtZtso()) {
19334 if (isa
<StoreInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
19335 return Builder
.CreateFence(Ord
);
19339 if (isa
<LoadInst
>(Inst
) && isAcquireOrStronger(Ord
))
19340 return Builder
.CreateFence(AtomicOrdering::Acquire
);
19341 if (Subtarget
.enableSeqCstTrailingFence() && isa
<StoreInst
>(Inst
) &&
19342 Ord
== AtomicOrdering::SequentiallyConsistent
)
19343 return Builder
.CreateFence(AtomicOrdering::SequentiallyConsistent
);
19347 TargetLowering::AtomicExpansionKind
19348 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
19349 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
19350 // point operations can't be used in an lr/sc sequence without breaking the
19351 // forward-progress guarantee.
19352 if (AI
->isFloatingPointOperation() ||
19353 AI
->getOperation() == AtomicRMWInst::UIncWrap
||
19354 AI
->getOperation() == AtomicRMWInst::UDecWrap
)
19355 return AtomicExpansionKind::CmpXChg
;
19357 // Don't expand forced atomics, we want to have __sync libcalls instead.
19358 if (Subtarget
.hasForcedAtomics())
19359 return AtomicExpansionKind::None
;
19361 unsigned Size
= AI
->getType()->getPrimitiveSizeInBits();
19362 if (Size
== 8 || Size
== 16)
19363 return AtomicExpansionKind::MaskedIntrinsic
;
19364 return AtomicExpansionKind::None
;
19367 static Intrinsic::ID
19368 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen
, AtomicRMWInst::BinOp BinOp
) {
19372 llvm_unreachable("Unexpected AtomicRMW BinOp");
19373 case AtomicRMWInst::Xchg
:
19374 return Intrinsic::riscv_masked_atomicrmw_xchg_i32
;
19375 case AtomicRMWInst::Add
:
19376 return Intrinsic::riscv_masked_atomicrmw_add_i32
;
19377 case AtomicRMWInst::Sub
:
19378 return Intrinsic::riscv_masked_atomicrmw_sub_i32
;
19379 case AtomicRMWInst::Nand
:
19380 return Intrinsic::riscv_masked_atomicrmw_nand_i32
;
19381 case AtomicRMWInst::Max
:
19382 return Intrinsic::riscv_masked_atomicrmw_max_i32
;
19383 case AtomicRMWInst::Min
:
19384 return Intrinsic::riscv_masked_atomicrmw_min_i32
;
19385 case AtomicRMWInst::UMax
:
19386 return Intrinsic::riscv_masked_atomicrmw_umax_i32
;
19387 case AtomicRMWInst::UMin
:
19388 return Intrinsic::riscv_masked_atomicrmw_umin_i32
;
19395 llvm_unreachable("Unexpected AtomicRMW BinOp");
19396 case AtomicRMWInst::Xchg
:
19397 return Intrinsic::riscv_masked_atomicrmw_xchg_i64
;
19398 case AtomicRMWInst::Add
:
19399 return Intrinsic::riscv_masked_atomicrmw_add_i64
;
19400 case AtomicRMWInst::Sub
:
19401 return Intrinsic::riscv_masked_atomicrmw_sub_i64
;
19402 case AtomicRMWInst::Nand
:
19403 return Intrinsic::riscv_masked_atomicrmw_nand_i64
;
19404 case AtomicRMWInst::Max
:
19405 return Intrinsic::riscv_masked_atomicrmw_max_i64
;
19406 case AtomicRMWInst::Min
:
19407 return Intrinsic::riscv_masked_atomicrmw_min_i64
;
19408 case AtomicRMWInst::UMax
:
19409 return Intrinsic::riscv_masked_atomicrmw_umax_i64
;
19410 case AtomicRMWInst::UMin
:
19411 return Intrinsic::riscv_masked_atomicrmw_umin_i64
;
19415 llvm_unreachable("Unexpected XLen\n");
19418 Value
*RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
19419 IRBuilderBase
&Builder
, AtomicRMWInst
*AI
, Value
*AlignedAddr
, Value
*Incr
,
19420 Value
*Mask
, Value
*ShiftAmt
, AtomicOrdering Ord
) const {
19421 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
19422 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
19423 // mask, as this produces better code than the LR/SC loop emitted by
19424 // int_riscv_masked_atomicrmw_xchg.
19425 if (AI
->getOperation() == AtomicRMWInst::Xchg
&&
19426 isa
<ConstantInt
>(AI
->getValOperand())) {
19427 ConstantInt
*CVal
= cast
<ConstantInt
>(AI
->getValOperand());
19428 if (CVal
->isZero())
19429 return Builder
.CreateAtomicRMW(AtomicRMWInst::And
, AlignedAddr
,
19430 Builder
.CreateNot(Mask
, "Inv_Mask"),
19431 AI
->getAlign(), Ord
);
19432 if (CVal
->isMinusOne())
19433 return Builder
.CreateAtomicRMW(AtomicRMWInst::Or
, AlignedAddr
, Mask
,
19434 AI
->getAlign(), Ord
);
19437 unsigned XLen
= Subtarget
.getXLen();
19439 Builder
.getIntN(XLen
, static_cast<uint64_t>(AI
->getOrdering()));
19440 Type
*Tys
[] = {AlignedAddr
->getType()};
19441 Function
*LrwOpScwLoop
= Intrinsic::getDeclaration(
19443 getIntrinsicForMaskedAtomicRMWBinOp(XLen
, AI
->getOperation()), Tys
);
19446 Incr
= Builder
.CreateSExt(Incr
, Builder
.getInt64Ty());
19447 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
19448 ShiftAmt
= Builder
.CreateSExt(ShiftAmt
, Builder
.getInt64Ty());
19453 // Must pass the shift amount needed to sign extend the loaded value prior
19454 // to performing a signed comparison for min/max. ShiftAmt is the number of
19455 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
19456 // is the number of bits to left+right shift the value in order to
19458 if (AI
->getOperation() == AtomicRMWInst::Min
||
19459 AI
->getOperation() == AtomicRMWInst::Max
) {
19460 const DataLayout
&DL
= AI
->getModule()->getDataLayout();
19461 unsigned ValWidth
=
19462 DL
.getTypeStoreSizeInBits(AI
->getValOperand()->getType());
19464 Builder
.CreateSub(Builder
.getIntN(XLen
, XLen
- ValWidth
), ShiftAmt
);
19465 Result
= Builder
.CreateCall(LrwOpScwLoop
,
19466 {AlignedAddr
, Incr
, Mask
, SextShamt
, Ordering
});
19469 Builder
.CreateCall(LrwOpScwLoop
, {AlignedAddr
, Incr
, Mask
, Ordering
});
19473 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
19477 TargetLowering::AtomicExpansionKind
19478 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
19479 AtomicCmpXchgInst
*CI
) const {
19480 // Don't expand forced atomics, we want to have __sync libcalls instead.
19481 if (Subtarget
.hasForcedAtomics())
19482 return AtomicExpansionKind::None
;
19484 unsigned Size
= CI
->getCompareOperand()->getType()->getPrimitiveSizeInBits();
19485 if (Size
== 8 || Size
== 16)
19486 return AtomicExpansionKind::MaskedIntrinsic
;
19487 return AtomicExpansionKind::None
;
19490 Value
*RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
19491 IRBuilderBase
&Builder
, AtomicCmpXchgInst
*CI
, Value
*AlignedAddr
,
19492 Value
*CmpVal
, Value
*NewVal
, Value
*Mask
, AtomicOrdering Ord
) const {
19493 unsigned XLen
= Subtarget
.getXLen();
19494 Value
*Ordering
= Builder
.getIntN(XLen
, static_cast<uint64_t>(Ord
));
19495 Intrinsic::ID CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i32
;
19497 CmpVal
= Builder
.CreateSExt(CmpVal
, Builder
.getInt64Ty());
19498 NewVal
= Builder
.CreateSExt(NewVal
, Builder
.getInt64Ty());
19499 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
19500 CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i64
;
19502 Type
*Tys
[] = {AlignedAddr
->getType()};
19503 Function
*MaskedCmpXchg
=
19504 Intrinsic::getDeclaration(CI
->getModule(), CmpXchgIntrID
, Tys
);
19505 Value
*Result
= Builder
.CreateCall(
19506 MaskedCmpXchg
, {AlignedAddr
, CmpVal
, NewVal
, Mask
, Ordering
});
19508 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
19512 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend
,
19513 EVT DataVT
) const {
19514 // We have indexed loads for all legal index types. Indices are always
19516 return Extend
.getOpcode() == ISD::ZERO_EXTEND
&&
19517 isTypeLegal(Extend
.getValueType()) &&
19518 isTypeLegal(Extend
.getOperand(0).getValueType());
19521 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op
, EVT FPVT
,
19523 if (!isOperationLegalOrCustom(Op
, VT
) || !FPVT
.isSimple())
19526 switch (FPVT
.getSimpleVT().SimpleTy
) {
19528 return Subtarget
.hasStdExtZfhmin();
19530 return Subtarget
.hasStdExtF();
19532 return Subtarget
.hasStdExtD();
19538 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
19539 // If we are using the small code model, we can reduce size of jump table
19540 // entry to 4 bytes.
19541 if (Subtarget
.is64Bit() && !isPositionIndependent() &&
19542 getTargetMachine().getCodeModel() == CodeModel::Small
) {
19543 return MachineJumpTableInfo::EK_Custom32
;
19545 return TargetLowering::getJumpTableEncoding();
19548 const MCExpr
*RISCVTargetLowering::LowerCustomJumpTableEntry(
19549 const MachineJumpTableInfo
*MJTI
, const MachineBasicBlock
*MBB
,
19550 unsigned uid
, MCContext
&Ctx
) const {
19551 assert(Subtarget
.is64Bit() && !isPositionIndependent() &&
19552 getTargetMachine().getCodeModel() == CodeModel::Small
);
19553 return MCSymbolRefExpr::create(MBB
->getSymbol(), Ctx
);
19556 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
19557 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
19558 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
19559 // a power of two as well.
19560 // FIXME: This doesn't work for zve32, but that's already broken
19561 // elsewhere for the same reason.
19562 assert(Subtarget
.getRealMinVLen() >= 64 && "zve32* unsupported");
19563 static_assert(RISCV::RVVBitsPerBlock
== 64,
19564 "RVVBitsPerBlock changed, audit needed");
19568 bool RISCVTargetLowering::getIndexedAddressParts(SDNode
*Op
, SDValue
&Base
,
19570 ISD::MemIndexedMode
&AM
,
19571 SelectionDAG
&DAG
) const {
19572 // Target does not support indexed loads.
19573 if (!Subtarget
.hasVendorXTHeadMemIdx())
19576 if (Op
->getOpcode() != ISD::ADD
&& Op
->getOpcode() != ISD::SUB
)
19579 Base
= Op
->getOperand(0);
19580 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Op
->getOperand(1))) {
19581 int64_t RHSC
= RHS
->getSExtValue();
19582 if (Op
->getOpcode() == ISD::SUB
)
19583 RHSC
= -(uint64_t)RHSC
;
19585 // The constants that can be encoded in the THeadMemIdx instructions
19586 // are of the form (sign_extend(imm5) << imm2).
19587 bool isLegalIndexedOffset
= false;
19588 for (unsigned i
= 0; i
< 4; i
++)
19589 if (isInt
<5>(RHSC
>> i
) && ((RHSC
% (1LL << i
)) == 0)) {
19590 isLegalIndexedOffset
= true;
19594 if (!isLegalIndexedOffset
)
19597 Offset
= Op
->getOperand(1);
19604 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
19606 ISD::MemIndexedMode
&AM
,
19607 SelectionDAG
&DAG
) const {
19610 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
19611 VT
= LD
->getMemoryVT();
19612 Ptr
= LD
->getBasePtr();
19613 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
19614 VT
= ST
->getMemoryVT();
19615 Ptr
= ST
->getBasePtr();
19619 if (!getIndexedAddressParts(Ptr
.getNode(), Base
, Offset
, AM
, DAG
))
19626 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
19629 ISD::MemIndexedMode
&AM
,
19630 SelectionDAG
&DAG
) const {
19633 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
19634 VT
= LD
->getMemoryVT();
19635 Ptr
= LD
->getBasePtr();
19636 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
19637 VT
= ST
->getMemoryVT();
19638 Ptr
= ST
->getBasePtr();
19642 if (!getIndexedAddressParts(Op
, Base
, Offset
, AM
, DAG
))
19644 // Post-indexing updates the base, so it's not a valid transform
19645 // if that's not the same as the load's pointer.
19649 AM
= ISD::POST_INC
;
19653 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction
&MF
,
19655 EVT SVT
= VT
.getScalarType();
19657 if (!SVT
.isSimple())
19660 switch (SVT
.getSimpleVT().SimpleTy
) {
19662 return VT
.isVector() ? Subtarget
.hasVInstructionsF16()
19663 : Subtarget
.hasStdExtZfhOrZhinx();
19665 return Subtarget
.hasStdExtFOrZfinx();
19667 return Subtarget
.hasStdExtDOrZdinx();
19675 ISD::NodeType
RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
19676 // Zacas will use amocas.w which does not require extension.
19677 return Subtarget
.hasStdExtZacas() ? ISD::ANY_EXTEND
: ISD::SIGN_EXTEND
;
19680 Register
RISCVTargetLowering::getExceptionPointerRegister(
19681 const Constant
*PersonalityFn
) const {
19685 Register
RISCVTargetLowering::getExceptionSelectorRegister(
19686 const Constant
*PersonalityFn
) const {
19690 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type
) const {
19691 // Return false to suppress the unnecessary extensions if the LibCall
19692 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
19693 if (Subtarget
.isSoftFPABI() && (Type
.isFloatingPoint() && !Type
.isVector() &&
19694 Type
.getSizeInBits() < Subtarget
.getXLen()))
19700 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type
, bool IsSigned
) const {
19701 if (Subtarget
.is64Bit() && Type
== MVT::i32
)
19707 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext
&Context
, EVT VT
,
19709 // Check integral scalar types.
19710 const bool HasExtMOrZmmul
=
19711 Subtarget
.hasStdExtM() || Subtarget
.hasStdExtZmmul();
19712 if (!VT
.isScalarInteger())
19715 // Omit the optimization if the sub target has the M extension and the data
19716 // size exceeds XLen.
19717 if (HasExtMOrZmmul
&& VT
.getSizeInBits() > Subtarget
.getXLen())
19720 if (auto *ConstNode
= dyn_cast
<ConstantSDNode
>(C
.getNode())) {
19721 // Break the MUL to a SLLI and an ADD/SUB.
19722 const APInt
&Imm
= ConstNode
->getAPIntValue();
19723 if ((Imm
+ 1).isPowerOf2() || (Imm
- 1).isPowerOf2() ||
19724 (1 - Imm
).isPowerOf2() || (-1 - Imm
).isPowerOf2())
19727 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
19728 if (Subtarget
.hasStdExtZba() && !Imm
.isSignedIntN(12) &&
19729 ((Imm
- 2).isPowerOf2() || (Imm
- 4).isPowerOf2() ||
19730 (Imm
- 8).isPowerOf2()))
19733 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
19734 // a pair of LUI/ADDI.
19735 if (!Imm
.isSignedIntN(12) && Imm
.countr_zero() < 12 &&
19736 ConstNode
->hasOneUse()) {
19737 APInt ImmS
= Imm
.ashr(Imm
.countr_zero());
19738 if ((ImmS
+ 1).isPowerOf2() || (ImmS
- 1).isPowerOf2() ||
19739 (1 - ImmS
).isPowerOf2())
19747 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode
,
19748 SDValue ConstNode
) const {
19749 // Let the DAGCombiner decide for vectors.
19750 EVT VT
= AddNode
.getValueType();
19754 // Let the DAGCombiner decide for larger types.
19755 if (VT
.getScalarSizeInBits() > Subtarget
.getXLen())
19758 // It is worse if c1 is simm12 while c1*c2 is not.
19759 ConstantSDNode
*C1Node
= cast
<ConstantSDNode
>(AddNode
.getOperand(1));
19760 ConstantSDNode
*C2Node
= cast
<ConstantSDNode
>(ConstNode
);
19761 const APInt
&C1
= C1Node
->getAPIntValue();
19762 const APInt
&C2
= C2Node
->getAPIntValue();
19763 if (C1
.isSignedIntN(12) && !(C1
* C2
).isSignedIntN(12))
19766 // Default to true and let the DAGCombiner decide.
19770 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
19771 EVT VT
, unsigned AddrSpace
, Align Alignment
, MachineMemOperand::Flags Flags
,
19772 unsigned *Fast
) const {
19773 if (!VT
.isVector()) {
19775 *Fast
= Subtarget
.hasFastUnalignedAccess();
19776 return Subtarget
.hasFastUnalignedAccess();
19779 // All vector implementations must support element alignment
19780 EVT ElemVT
= VT
.getVectorElementType();
19781 if (Alignment
>= ElemVT
.getStoreSize()) {
19787 // Note: We lower an unmasked unaligned vector access to an equally sized
19788 // e8 element type access. Given this, we effectively support all unmasked
19789 // misaligned accesses. TODO: Work through the codegen implications of
19790 // allowing such accesses to be formed, and considered fast.
19792 *Fast
= Subtarget
.hasFastUnalignedAccess();
19793 return Subtarget
.hasFastUnalignedAccess();
19797 EVT
RISCVTargetLowering::getOptimalMemOpType(const MemOp
&Op
,
19798 const AttributeList
&FuncAttributes
) const {
19799 if (!Subtarget
.hasVInstructions())
19802 if (FuncAttributes
.hasFnAttr(Attribute::NoImplicitFloat
))
19805 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
19806 // has an expansion threshold, and we want the number of hardware memory
19807 // operations to correspond roughly to that threshold. LMUL>1 operations
19808 // are typically expanded linearly internally, and thus correspond to more
19809 // than one actual memory operation. Note that store merging and load
19810 // combining will typically form larger LMUL operations from the LMUL1
19811 // operations emitted here, and that's okay because combining isn't
19812 // introducing new memory operations; it's just merging existing ones.
19813 const unsigned MinVLenInBytes
= Subtarget
.getRealMinVLen()/8;
19814 if (Op
.size() < MinVLenInBytes
)
19815 // TODO: Figure out short memops. For the moment, do the default thing
19816 // which ends up using scalar sequences.
19819 // Prefer i8 for non-zero memset as it allows us to avoid materializing
19820 // a large scalar constant and instead use vmv.v.x/i to do the
19821 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
19822 // maximize the chance we can encode the size in the vsetvli.
19823 MVT ELenVT
= MVT::getIntegerVT(Subtarget
.getELen());
19824 MVT PreferredVT
= (Op
.isMemset() && !Op
.isZeroMemset()) ? MVT::i8
: ELenVT
;
19826 // Do we have sufficient alignment for our preferred VT? If not, revert
19827 // to largest size allowed by our alignment criteria.
19828 if (PreferredVT
!= MVT::i8
&& !Subtarget
.hasFastUnalignedAccess()) {
19829 Align
RequiredAlign(PreferredVT
.getStoreSize());
19830 if (Op
.isFixedDstAlign())
19831 RequiredAlign
= std::min(RequiredAlign
, Op
.getDstAlign());
19833 RequiredAlign
= std::min(RequiredAlign
, Op
.getSrcAlign());
19834 PreferredVT
= MVT::getIntegerVT(RequiredAlign
.value() * 8);
19836 return MVT::getVectorVT(PreferredVT
, MinVLenInBytes
/PreferredVT
.getStoreSize());
19839 bool RISCVTargetLowering::splitValueIntoRegisterParts(
19840 SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Val
, SDValue
*Parts
,
19841 unsigned NumParts
, MVT PartVT
, std::optional
<CallingConv::ID
> CC
) const {
19842 bool IsABIRegCopy
= CC
.has_value();
19843 EVT ValueVT
= Val
.getValueType();
19844 if (IsABIRegCopy
&& (ValueVT
== MVT::f16
|| ValueVT
== MVT::bf16
) &&
19845 PartVT
== MVT::f32
) {
19846 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
19847 // nan, and cast to f32.
19848 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i16
, Val
);
19849 Val
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i32
, Val
);
19850 Val
= DAG
.getNode(ISD::OR
, DL
, MVT::i32
, Val
,
19851 DAG
.getConstant(0xFFFF0000, DL
, MVT::i32
));
19852 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f32
, Val
);
19857 if (ValueVT
.isScalableVector() && PartVT
.isScalableVector()) {
19858 LLVMContext
&Context
= *DAG
.getContext();
19859 EVT ValueEltVT
= ValueVT
.getVectorElementType();
19860 EVT PartEltVT
= PartVT
.getVectorElementType();
19861 unsigned ValueVTBitSize
= ValueVT
.getSizeInBits().getKnownMinValue();
19862 unsigned PartVTBitSize
= PartVT
.getSizeInBits().getKnownMinValue();
19863 if (PartVTBitSize
% ValueVTBitSize
== 0) {
19864 assert(PartVTBitSize
>= ValueVTBitSize
);
19865 // If the element types are different, bitcast to the same element type of
19867 // Give an example here, we want copy a <vscale x 1 x i8> value to
19868 // <vscale x 4 x i16>.
19869 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
19870 // subvector, then we can bitcast to <vscale x 4 x i16>.
19871 if (ValueEltVT
!= PartEltVT
) {
19872 if (PartVTBitSize
> ValueVTBitSize
) {
19873 unsigned Count
= PartVTBitSize
/ ValueEltVT
.getFixedSizeInBits();
19874 assert(Count
!= 0 && "The number of element should not be zero.");
19875 EVT SameEltTypeVT
=
19876 EVT::getVectorVT(Context
, ValueEltVT
, Count
, /*IsScalable=*/true);
19877 Val
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, SameEltTypeVT
,
19878 DAG
.getUNDEF(SameEltTypeVT
), Val
,
19879 DAG
.getVectorIdxConstant(0, DL
));
19881 Val
= DAG
.getNode(ISD::BITCAST
, DL
, PartVT
, Val
);
19884 DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, PartVT
, DAG
.getUNDEF(PartVT
),
19885 Val
, DAG
.getVectorIdxConstant(0, DL
));
19894 SDValue
RISCVTargetLowering::joinRegisterPartsIntoValue(
19895 SelectionDAG
&DAG
, const SDLoc
&DL
, const SDValue
*Parts
, unsigned NumParts
,
19896 MVT PartVT
, EVT ValueVT
, std::optional
<CallingConv::ID
> CC
) const {
19897 bool IsABIRegCopy
= CC
.has_value();
19898 if (IsABIRegCopy
&& (ValueVT
== MVT::f16
|| ValueVT
== MVT::bf16
) &&
19899 PartVT
== MVT::f32
) {
19900 SDValue Val
= Parts
[0];
19902 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
19903 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i32
, Val
);
19904 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i16
, Val
);
19905 Val
= DAG
.getNode(ISD::BITCAST
, DL
, ValueVT
, Val
);
19909 if (ValueVT
.isScalableVector() && PartVT
.isScalableVector()) {
19910 LLVMContext
&Context
= *DAG
.getContext();
19911 SDValue Val
= Parts
[0];
19912 EVT ValueEltVT
= ValueVT
.getVectorElementType();
19913 EVT PartEltVT
= PartVT
.getVectorElementType();
19914 unsigned ValueVTBitSize
= ValueVT
.getSizeInBits().getKnownMinValue();
19915 unsigned PartVTBitSize
= PartVT
.getSizeInBits().getKnownMinValue();
19916 if (PartVTBitSize
% ValueVTBitSize
== 0) {
19917 assert(PartVTBitSize
>= ValueVTBitSize
);
19918 EVT SameEltTypeVT
= ValueVT
;
19919 // If the element types are different, convert it to the same element type
19921 // Give an example here, we want copy a <vscale x 1 x i8> value from
19922 // <vscale x 4 x i16>.
19923 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
19924 // then we can extract <vscale x 1 x i8>.
19925 if (ValueEltVT
!= PartEltVT
) {
19926 unsigned Count
= PartVTBitSize
/ ValueEltVT
.getFixedSizeInBits();
19927 assert(Count
!= 0 && "The number of element should not be zero.");
19929 EVT::getVectorVT(Context
, ValueEltVT
, Count
, /*IsScalable=*/true);
19930 Val
= DAG
.getNode(ISD::BITCAST
, DL
, SameEltTypeVT
, Val
);
19932 Val
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ValueVT
, Val
,
19933 DAG
.getVectorIdxConstant(0, DL
));
19940 bool RISCVTargetLowering::isIntDivCheap(EVT VT
, AttributeList Attr
) const {
19941 // When aggressively optimizing for code size, we prefer to use a div
19942 // instruction, as it is usually smaller than the alternative sequence.
19943 // TODO: Add vector division?
19944 bool OptSize
= Attr
.hasFnAttr(Attribute::MinSize
);
19945 return OptSize
&& !VT
.isVector();
19948 bool RISCVTargetLowering::preferScalarizeSplat(SDNode
*N
) const {
19949 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
19951 unsigned Opc
= N
->getOpcode();
19952 if (Opc
== ISD::ZERO_EXTEND
|| Opc
== ISD::SIGN_EXTEND
)
19957 static Value
*useTpOffset(IRBuilderBase
&IRB
, unsigned Offset
) {
19958 Module
*M
= IRB
.GetInsertBlock()->getParent()->getParent();
19959 Function
*ThreadPointerFunc
=
19960 Intrinsic::getDeclaration(M
, Intrinsic::thread_pointer
);
19961 return IRB
.CreateConstGEP1_32(IRB
.getInt8Ty(),
19962 IRB
.CreateCall(ThreadPointerFunc
), Offset
);
19965 Value
*RISCVTargetLowering::getIRStackGuard(IRBuilderBase
&IRB
) const {
19966 // Fuchsia provides a fixed TLS slot for the stack cookie.
19967 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
19968 if (Subtarget
.isTargetFuchsia())
19969 return useTpOffset(IRB
, -0x10);
19971 return TargetLowering::getIRStackGuard(IRB
);
19974 bool RISCVTargetLowering::isLegalInterleavedAccessType(
19975 VectorType
*VTy
, unsigned Factor
, Align Alignment
, unsigned AddrSpace
,
19976 const DataLayout
&DL
) const {
19977 EVT VT
= getValueType(DL
, VTy
);
19978 // Don't lower vlseg/vsseg for vector types that can't be split.
19979 if (!isTypeLegal(VT
))
19982 if (!isLegalElementTypeForRVV(VT
.getScalarType()) ||
19983 !allowsMemoryAccessForAlignment(VTy
->getContext(), DL
, VT
, AddrSpace
,
19987 MVT ContainerVT
= VT
.getSimpleVT();
19989 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
19990 if (!Subtarget
.useRVVForFixedLengthVectors())
19992 // Sometimes the interleaved access pass picks up splats as interleaves of
19993 // one element. Don't lower these.
19994 if (FVTy
->getNumElements() < 2)
19997 ContainerVT
= getContainerForFixedLengthVector(VT
.getSimpleVT());
20000 // Need to make sure that EMUL * NFIELDS ≤ 8
20001 auto [LMUL
, Fractional
] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT
));
20004 return Factor
* LMUL
<= 8;
20007 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType
,
20008 Align Alignment
) const {
20009 if (!Subtarget
.hasVInstructions())
20012 // Only support fixed vectors if we know the minimum vector size.
20013 if (DataType
.isFixedLengthVector() && !Subtarget
.useRVVForFixedLengthVectors())
20016 EVT ScalarType
= DataType
.getScalarType();
20017 if (!isLegalElementTypeForRVV(ScalarType
))
20020 if (!Subtarget
.hasFastUnalignedAccess() &&
20021 Alignment
< ScalarType
.getStoreSize())
20027 static const Intrinsic::ID FixedVlsegIntrIds
[] = {
20028 Intrinsic::riscv_seg2_load
, Intrinsic::riscv_seg3_load
,
20029 Intrinsic::riscv_seg4_load
, Intrinsic::riscv_seg5_load
,
20030 Intrinsic::riscv_seg6_load
, Intrinsic::riscv_seg7_load
,
20031 Intrinsic::riscv_seg8_load
};
20033 /// Lower an interleaved load into a vlsegN intrinsic.
20035 /// E.g. Lower an interleaved load (Factor = 2):
20036 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
20037 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20038 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20041 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20043 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20044 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20045 bool RISCVTargetLowering::lowerInterleavedLoad(
20046 LoadInst
*LI
, ArrayRef
<ShuffleVectorInst
*> Shuffles
,
20047 ArrayRef
<unsigned> Indices
, unsigned Factor
) const {
20048 IRBuilder
<> Builder(LI
);
20050 auto *VTy
= cast
<FixedVectorType
>(Shuffles
[0]->getType());
20051 if (!isLegalInterleavedAccessType(VTy
, Factor
, LI
->getAlign(),
20052 LI
->getPointerAddressSpace(),
20053 LI
->getModule()->getDataLayout()))
20056 auto *XLenTy
= Type::getIntNTy(LI
->getContext(), Subtarget
.getXLen());
20058 Function
*VlsegNFunc
=
20059 Intrinsic::getDeclaration(LI
->getModule(), FixedVlsegIntrIds
[Factor
- 2],
20060 {VTy
, LI
->getPointerOperandType(), XLenTy
});
20062 Value
*VL
= ConstantInt::get(XLenTy
, VTy
->getNumElements());
20065 Builder
.CreateCall(VlsegNFunc
, {LI
->getPointerOperand(), VL
});
20067 for (unsigned i
= 0; i
< Shuffles
.size(); i
++) {
20068 Value
*SubVec
= Builder
.CreateExtractValue(VlsegN
, Indices
[i
]);
20069 Shuffles
[i
]->replaceAllUsesWith(SubVec
);
20075 static const Intrinsic::ID FixedVssegIntrIds
[] = {
20076 Intrinsic::riscv_seg2_store
, Intrinsic::riscv_seg3_store
,
20077 Intrinsic::riscv_seg4_store
, Intrinsic::riscv_seg5_store
,
20078 Intrinsic::riscv_seg6_store
, Intrinsic::riscv_seg7_store
,
20079 Intrinsic::riscv_seg8_store
};
20081 /// Lower an interleaved store into a vssegN intrinsic.
20083 /// E.g. Lower an interleaved store (Factor = 3):
20084 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20085 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20086 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
20089 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20090 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20091 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
20092 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
20095 /// Note that the new shufflevectors will be removed and we'll only generate one
20096 /// vsseg3 instruction in CodeGen.
20097 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst
*SI
,
20098 ShuffleVectorInst
*SVI
,
20099 unsigned Factor
) const {
20100 IRBuilder
<> Builder(SI
);
20101 auto *ShuffleVTy
= cast
<FixedVectorType
>(SVI
->getType());
20102 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
20103 auto *VTy
= FixedVectorType::get(ShuffleVTy
->getElementType(),
20104 ShuffleVTy
->getNumElements() / Factor
);
20105 if (!isLegalInterleavedAccessType(VTy
, Factor
, SI
->getAlign(),
20106 SI
->getPointerAddressSpace(),
20107 SI
->getModule()->getDataLayout()))
20110 auto *XLenTy
= Type::getIntNTy(SI
->getContext(), Subtarget
.getXLen());
20112 Function
*VssegNFunc
=
20113 Intrinsic::getDeclaration(SI
->getModule(), FixedVssegIntrIds
[Factor
- 2],
20114 {VTy
, SI
->getPointerOperandType(), XLenTy
});
20116 auto Mask
= SVI
->getShuffleMask();
20117 SmallVector
<Value
*, 10> Ops
;
20119 for (unsigned i
= 0; i
< Factor
; i
++) {
20120 Value
*Shuffle
= Builder
.CreateShuffleVector(
20121 SVI
->getOperand(0), SVI
->getOperand(1),
20122 createSequentialMask(Mask
[i
], VTy
->getNumElements(), 0));
20123 Ops
.push_back(Shuffle
);
20125 // This VL should be OK (should be executable in one vsseg instruction,
20126 // potentially under larger LMULs) because we checked that the fixed vector
20127 // type fits in isLegalInterleavedAccessType
20128 Value
*VL
= ConstantInt::get(XLenTy
, VTy
->getNumElements());
20129 Ops
.append({SI
->getPointerOperand(), VL
});
20131 Builder
.CreateCall(VssegNFunc
, Ops
);
20136 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst
*DI
,
20137 LoadInst
*LI
) const {
20138 assert(LI
->isSimple());
20139 IRBuilder
<> Builder(LI
);
20141 // Only deinterleave2 supported at present.
20142 if (DI
->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2
)
20145 unsigned Factor
= 2;
20147 VectorType
*VTy
= cast
<VectorType
>(DI
->getOperand(0)->getType());
20148 VectorType
*ResVTy
= cast
<VectorType
>(DI
->getType()->getContainedType(0));
20150 if (!isLegalInterleavedAccessType(ResVTy
, Factor
, LI
->getAlign(),
20151 LI
->getPointerAddressSpace(),
20152 LI
->getModule()->getDataLayout()))
20155 Function
*VlsegNFunc
;
20157 Type
*XLenTy
= Type::getIntNTy(LI
->getContext(), Subtarget
.getXLen());
20158 SmallVector
<Value
*, 10> Ops
;
20160 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
20161 VlsegNFunc
= Intrinsic::getDeclaration(
20162 LI
->getModule(), FixedVlsegIntrIds
[Factor
- 2],
20163 {ResVTy
, LI
->getPointerOperandType(), XLenTy
});
20164 VL
= ConstantInt::get(XLenTy
, FVTy
->getNumElements());
20166 static const Intrinsic::ID IntrIds
[] = {
20167 Intrinsic::riscv_vlseg2
, Intrinsic::riscv_vlseg3
,
20168 Intrinsic::riscv_vlseg4
, Intrinsic::riscv_vlseg5
,
20169 Intrinsic::riscv_vlseg6
, Intrinsic::riscv_vlseg7
,
20170 Intrinsic::riscv_vlseg8
};
20172 VlsegNFunc
= Intrinsic::getDeclaration(LI
->getModule(), IntrIds
[Factor
- 2],
20174 VL
= Constant::getAllOnesValue(XLenTy
);
20175 Ops
.append(Factor
, PoisonValue::get(ResVTy
));
20178 Ops
.append({LI
->getPointerOperand(), VL
});
20180 Value
*Vlseg
= Builder
.CreateCall(VlsegNFunc
, Ops
);
20181 DI
->replaceAllUsesWith(Vlseg
);
20186 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst
*II
,
20187 StoreInst
*SI
) const {
20188 assert(SI
->isSimple());
20189 IRBuilder
<> Builder(SI
);
20191 // Only interleave2 supported at present.
20192 if (II
->getIntrinsicID() != Intrinsic::experimental_vector_interleave2
)
20195 unsigned Factor
= 2;
20197 VectorType
*VTy
= cast
<VectorType
>(II
->getType());
20198 VectorType
*InVTy
= cast
<VectorType
>(II
->getOperand(0)->getType());
20200 if (!isLegalInterleavedAccessType(InVTy
, Factor
, SI
->getAlign(),
20201 SI
->getPointerAddressSpace(),
20202 SI
->getModule()->getDataLayout()))
20205 Function
*VssegNFunc
;
20207 Type
*XLenTy
= Type::getIntNTy(SI
->getContext(), Subtarget
.getXLen());
20209 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(VTy
)) {
20210 VssegNFunc
= Intrinsic::getDeclaration(
20211 SI
->getModule(), FixedVssegIntrIds
[Factor
- 2],
20212 {InVTy
, SI
->getPointerOperandType(), XLenTy
});
20213 VL
= ConstantInt::get(XLenTy
, FVTy
->getNumElements());
20215 static const Intrinsic::ID IntrIds
[] = {
20216 Intrinsic::riscv_vsseg2
, Intrinsic::riscv_vsseg3
,
20217 Intrinsic::riscv_vsseg4
, Intrinsic::riscv_vsseg5
,
20218 Intrinsic::riscv_vsseg6
, Intrinsic::riscv_vsseg7
,
20219 Intrinsic::riscv_vsseg8
};
20221 VssegNFunc
= Intrinsic::getDeclaration(SI
->getModule(), IntrIds
[Factor
- 2],
20223 VL
= Constant::getAllOnesValue(XLenTy
);
20226 Builder
.CreateCall(VssegNFunc
, {II
->getOperand(0), II
->getOperand(1),
20227 SI
->getPointerOperand(), VL
});
20233 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock
&MBB
,
20234 MachineBasicBlock::instr_iterator
&MBBI
,
20235 const TargetInstrInfo
*TII
) const {
20236 assert(MBBI
->isCall() && MBBI
->getCFIType() &&
20237 "Invalid call instruction for a KCFI check");
20238 assert(is_contained({RISCV::PseudoCALLIndirect
, RISCV::PseudoTAILIndirect
},
20239 MBBI
->getOpcode()));
20241 MachineOperand
&Target
= MBBI
->getOperand(0);
20242 Target
.setIsRenamable(false);
20244 return BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(RISCV::KCFI_CHECK
))
20245 .addReg(Target
.getReg())
20246 .addImm(MBBI
->getCFIType())
20250 #define GET_REGISTER_MATCHER
20251 #include "RISCVGenAsmMatcher.inc"
20254 RISCVTargetLowering::getRegisterByName(const char *RegName
, LLT VT
,
20255 const MachineFunction
&MF
) const {
20256 Register Reg
= MatchRegisterAltName(RegName
);
20257 if (Reg
== RISCV::NoRegister
)
20258 Reg
= MatchRegisterName(RegName
);
20259 if (Reg
== RISCV::NoRegister
)
20260 report_fatal_error(
20261 Twine("Invalid register name \"" + StringRef(RegName
) + "\"."));
20262 BitVector ReservedRegs
= Subtarget
.getRegisterInfo()->getReservedRegs(MF
);
20263 if (!ReservedRegs
.test(Reg
) && !Subtarget
.isRegisterReservedByUser(Reg
))
20264 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
20265 StringRef(RegName
) + "\"."));
20269 MachineMemOperand::Flags
20270 RISCVTargetLowering::getTargetMMOFlags(const Instruction
&I
) const {
20271 const MDNode
*NontemporalInfo
= I
.getMetadata(LLVMContext::MD_nontemporal
);
20273 if (NontemporalInfo
== nullptr)
20274 return MachineMemOperand::MONone
;
20276 // 1 for default value work as __RISCV_NTLH_ALL
20277 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
20278 // 3 -> __RISCV_NTLH_ALL_PRIVATE
20279 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
20280 // 5 -> __RISCV_NTLH_ALL
20281 int NontemporalLevel
= 5;
20282 const MDNode
*RISCVNontemporalInfo
=
20283 I
.getMetadata("riscv-nontemporal-domain");
20284 if (RISCVNontemporalInfo
!= nullptr)
20287 cast
<ConstantAsMetadata
>(RISCVNontemporalInfo
->getOperand(0))
20291 assert((1 <= NontemporalLevel
&& NontemporalLevel
<= 5) &&
20292 "RISC-V target doesn't support this non-temporal domain.");
20294 NontemporalLevel
-= 2;
20295 MachineMemOperand::Flags Flags
= MachineMemOperand::MONone
;
20296 if (NontemporalLevel
& 0b1)
20297 Flags
|= MONontemporalBit0
;
20298 if (NontemporalLevel
& 0b10)
20299 Flags
|= MONontemporalBit1
;
20304 MachineMemOperand::Flags
20305 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode
&Node
) const {
20307 MachineMemOperand::Flags NodeFlags
= Node
.getMemOperand()->getFlags();
20308 MachineMemOperand::Flags TargetFlags
= MachineMemOperand::MONone
;
20309 TargetFlags
|= (NodeFlags
& MONontemporalBit0
);
20310 TargetFlags
|= (NodeFlags
& MONontemporalBit1
);
20312 return TargetFlags
;
20315 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
20316 const MemSDNode
&NodeX
, const MemSDNode
&NodeY
) const {
20317 return getTargetMMOFlags(NodeX
) == getTargetMMOFlags(NodeY
);
20320 bool RISCVTargetLowering::isCtpopFast(EVT VT
) const {
20321 if (VT
.isScalableVector())
20322 return isTypeLegal(VT
) && Subtarget
.hasStdExtZvbb();
20323 if (VT
.isFixedLengthVector() && Subtarget
.hasStdExtZvbb())
20325 return Subtarget
.hasStdExtZbb() &&
20326 (VT
== MVT::i32
|| VT
== MVT::i64
|| VT
.isFixedLengthVector());
20329 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT
,
20330 ISD::CondCode Cond
) const {
20331 return isCtpopFast(VT
) ? 0 : 1;
20334 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction
&Inst
) const {
20336 // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and
20338 unsigned Op
= Inst
.getOpcode();
20339 if (Op
== Instruction::Add
|| Op
== Instruction::Sub
||
20340 Op
== Instruction::And
|| Op
== Instruction::Or
|| Op
== Instruction::Xor
)
20343 if (Inst
.getType()->isScalableTy())
20346 for (unsigned i
= 0; i
< Inst
.getNumOperands(); ++i
)
20347 if (Inst
.getOperand(i
)->getType()->isScalableTy() &&
20348 !isa
<ReturnInst
>(&Inst
))
20351 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(&Inst
)) {
20352 if (AI
->getAllocatedType()->isScalableTy())
20360 RISCVTargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
20362 SmallVectorImpl
<SDNode
*> &Created
) const {
20363 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
20364 if (isIntDivCheap(N
->getValueType(0), Attr
))
20365 return SDValue(N
, 0); // Lower SDIV as SDIV
20367 // Only perform this transform if short forward branch opt is supported.
20368 if (!Subtarget
.hasShortForwardBranchOpt())
20370 EVT VT
= N
->getValueType(0);
20371 if (!(VT
== MVT::i32
|| (VT
== MVT::i64
&& Subtarget
.is64Bit())))
20374 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
20375 if (Divisor
.sgt(2048) || Divisor
.slt(-2048))
20377 return TargetLowering::buildSDIVPow2WithCMov(N
, Divisor
, DAG
, Created
);
20380 bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
20381 EVT VT
, const APInt
&AndMask
) const {
20382 if (Subtarget
.hasStdExtZicond() || Subtarget
.hasVendorXVentanaCondOps())
20383 return !Subtarget
.hasStdExtZbs() && AndMask
.ugt(1024);
20384 return TargetLowering::shouldFoldSelectWithSingleBitTest(VT
, AndMask
);
20387 unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
20388 return Subtarget
.getMinimumJumpTableEntries();
20391 namespace llvm::RISCVVIntrinsicsTable
{
20393 #define GET_RISCVVIntrinsicsTable_IMPL
20394 #include "RISCVGenSearchableTables.inc"
20396 } // namespace llvm::RISCVVIntrinsicsTable