[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Target / RISCV / RISCVISelLowering.cpp
bloba0cec426002b6fffb1d0216796306ea1eb7a0b3e
1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/DiagnosticPrinter.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicsRISCV.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/InstructionCost.h"
43 #include "llvm/Support/KnownBits.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include <optional>
48 using namespace llvm;
50 #define DEBUG_TYPE "riscv-lower"
52 STATISTIC(NumTailCalls, "Number of tail calls");
54 static cl::opt<unsigned> ExtensionMaxWebSize(
55 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
58 cl::init(18));
60 static cl::opt<bool>
61 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
64 cl::init(false));
66 static cl::opt<unsigned> NumRepeatedDivisors(
67 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
70 cl::init(2));
72 static cl::opt<int>
73 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
76 cl::init(2));
78 static cl::opt<bool>
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
83 const RISCVSubtarget &STI)
84 : TargetLowering(TM), Subtarget(STI) {
86 RISCVABI::ABI ABI = Subtarget.getTargetABI();
87 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
90 !Subtarget.hasStdExtF()) {
91 errs() << "Hard-float 'f' ABI can't be used for a target that "
92 "doesn't support the F instruction set extension (ignoring "
93 "target-abi)\n";
94 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
95 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
96 !Subtarget.hasStdExtD()) {
97 errs() << "Hard-float 'd' ABI can't be used for a target that "
98 "doesn't support the D instruction set extension (ignoring "
99 "target-abi)\n";
100 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
103 switch (ABI) {
104 default:
105 report_fatal_error("Don't know how to lower this ABI");
106 case RISCVABI::ABI_ILP32:
107 case RISCVABI::ABI_ILP32E:
108 case RISCVABI::ABI_LP64E:
109 case RISCVABI::ABI_ILP32F:
110 case RISCVABI::ABI_ILP32D:
111 case RISCVABI::ABI_LP64:
112 case RISCVABI::ABI_LP64F:
113 case RISCVABI::ABI_LP64D:
114 break;
117 MVT XLenVT = Subtarget.getXLenVT();
119 // Set up the register classes.
120 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
121 if (Subtarget.is64Bit() && RV64LegalI32)
122 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124 if (Subtarget.hasStdExtZfhmin())
125 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
126 if (Subtarget.hasStdExtZfbfmin())
127 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
128 if (Subtarget.hasStdExtF())
129 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
130 if (Subtarget.hasStdExtD())
131 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
132 if (Subtarget.hasStdExtZhinxmin())
133 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
134 if (Subtarget.hasStdExtZfinx())
135 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
136 if (Subtarget.hasStdExtZdinx()) {
137 if (Subtarget.is64Bit())
138 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
139 else
140 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
143 static const MVT::SimpleValueType BoolVecVTs[] = {
144 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
145 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
146 static const MVT::SimpleValueType IntVecVTs[] = {
147 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
148 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
149 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
150 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
151 MVT::nxv4i64, MVT::nxv8i64};
152 static const MVT::SimpleValueType F16VecVTs[] = {
153 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
154 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
155 static const MVT::SimpleValueType BF16VecVTs[] = {
156 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
157 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
158 static const MVT::SimpleValueType F32VecVTs[] = {
159 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
160 static const MVT::SimpleValueType F64VecVTs[] = {
161 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163 if (Subtarget.hasVInstructions()) {
164 auto addRegClassForRVV = [this](MVT VT) {
165 // Disable the smallest fractional LMUL types if ELEN is less than
166 // RVVBitsPerBlock.
167 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
168 if (VT.getVectorMinNumElements() < MinElts)
169 return;
171 unsigned Size = VT.getSizeInBits().getKnownMinValue();
172 const TargetRegisterClass *RC;
173 if (Size <= RISCV::RVVBitsPerBlock)
174 RC = &RISCV::VRRegClass;
175 else if (Size == 2 * RISCV::RVVBitsPerBlock)
176 RC = &RISCV::VRM2RegClass;
177 else if (Size == 4 * RISCV::RVVBitsPerBlock)
178 RC = &RISCV::VRM4RegClass;
179 else if (Size == 8 * RISCV::RVVBitsPerBlock)
180 RC = &RISCV::VRM8RegClass;
181 else
182 llvm_unreachable("Unexpected size");
184 addRegisterClass(VT, RC);
187 for (MVT VT : BoolVecVTs)
188 addRegClassForRVV(VT);
189 for (MVT VT : IntVecVTs) {
190 if (VT.getVectorElementType() == MVT::i64 &&
191 !Subtarget.hasVInstructionsI64())
192 continue;
193 addRegClassForRVV(VT);
196 if (Subtarget.hasVInstructionsF16Minimal())
197 for (MVT VT : F16VecVTs)
198 addRegClassForRVV(VT);
200 if (Subtarget.hasVInstructionsBF16())
201 for (MVT VT : BF16VecVTs)
202 addRegClassForRVV(VT);
204 if (Subtarget.hasVInstructionsF32())
205 for (MVT VT : F32VecVTs)
206 addRegClassForRVV(VT);
208 if (Subtarget.hasVInstructionsF64())
209 for (MVT VT : F64VecVTs)
210 addRegClassForRVV(VT);
212 if (Subtarget.useRVVForFixedLengthVectors()) {
213 auto addRegClassForFixedVectors = [this](MVT VT) {
214 MVT ContainerVT = getContainerForFixedLengthVector(VT);
215 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
216 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
217 addRegisterClass(VT, TRI.getRegClass(RCID));
219 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
220 if (useRVVForFixedLengthVectorVT(VT))
221 addRegClassForFixedVectors(VT);
223 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
224 if (useRVVForFixedLengthVectorVT(VT))
225 addRegClassForFixedVectors(VT);
229 // Compute derived properties from the register classes.
230 computeRegisterProperties(STI.getRegisterInfo());
232 setStackPointerRegisterToSaveRestore(RISCV::X2);
234 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
235 MVT::i1, Promote);
236 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
237 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
238 MVT::i1, Promote);
240 // TODO: add all necessary setOperationAction calls.
241 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
243 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
244 setOperationAction(ISD::BR_CC, XLenVT, Expand);
245 if (RV64LegalI32 && Subtarget.is64Bit())
246 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
247 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
248 setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
249 if (RV64LegalI32 && Subtarget.is64Bit())
250 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
252 setCondCodeAction(ISD::SETLE, XLenVT, Expand);
253 setCondCodeAction(ISD::SETGT, XLenVT, Custom);
254 setCondCodeAction(ISD::SETGE, XLenVT, Expand);
255 setCondCodeAction(ISD::SETULE, XLenVT, Expand);
256 setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
257 setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
259 if (RV64LegalI32 && Subtarget.is64Bit())
260 setOperationAction(ISD::SETCC, MVT::i32, Promote);
262 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
264 setOperationAction(ISD::VASTART, MVT::Other, Custom);
265 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
267 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
269 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
271 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
272 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
274 if (Subtarget.is64Bit()) {
275 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
277 if (!RV64LegalI32) {
278 setOperationAction(ISD::LOAD, MVT::i32, Custom);
279 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
280 MVT::i32, Custom);
281 setOperationAction(ISD::SADDO, MVT::i32, Custom);
282 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
283 MVT::i32, Custom);
285 } else {
286 setLibcallName(
287 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
288 nullptr);
289 setLibcallName(RTLIB::MULO_I64, nullptr);
292 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
293 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
294 if (RV64LegalI32 && Subtarget.is64Bit())
295 setOperationAction(ISD::MUL, MVT::i32, Promote);
296 } else if (Subtarget.is64Bit()) {
297 setOperationAction(ISD::MUL, MVT::i128, Custom);
298 if (!RV64LegalI32)
299 setOperationAction(ISD::MUL, MVT::i32, Custom);
300 } else {
301 setOperationAction(ISD::MUL, MVT::i64, Custom);
304 if (!Subtarget.hasStdExtM()) {
305 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
306 XLenVT, Expand);
307 if (RV64LegalI32 && Subtarget.is64Bit())
308 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
309 Promote);
310 } else if (Subtarget.is64Bit()) {
311 if (!RV64LegalI32)
312 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
313 {MVT::i8, MVT::i16, MVT::i32}, Custom);
316 if (RV64LegalI32 && Subtarget.is64Bit()) {
317 setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
318 setOperationAction(
319 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
320 Expand);
323 setOperationAction(
324 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
325 Expand);
327 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
328 Custom);
330 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
331 if (!RV64LegalI32 && Subtarget.is64Bit())
332 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
333 } else if (Subtarget.hasVendorXTHeadBb()) {
334 if (Subtarget.is64Bit())
335 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
336 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
337 } else if (Subtarget.hasVendorXCVbitmanip()) {
338 setOperationAction(ISD::ROTL, XLenVT, Expand);
339 } else {
340 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
341 if (RV64LegalI32 && Subtarget.is64Bit())
342 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
345 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
346 // pattern match it directly in isel.
347 setOperationAction(ISD::BSWAP, XLenVT,
348 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
349 Subtarget.hasVendorXTHeadBb())
350 ? Legal
351 : Expand);
352 if (RV64LegalI32 && Subtarget.is64Bit())
353 setOperationAction(ISD::BSWAP, MVT::i32,
354 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
355 Subtarget.hasVendorXTHeadBb())
356 ? Promote
357 : Expand);
360 if (Subtarget.hasVendorXCVbitmanip()) {
361 setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
362 } else {
363 // Zbkb can use rev8+brev8 to implement bitreverse.
364 setOperationAction(ISD::BITREVERSE, XLenVT,
365 Subtarget.hasStdExtZbkb() ? Custom : Expand);
368 if (Subtarget.hasStdExtZbb()) {
369 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
370 Legal);
371 if (RV64LegalI32 && Subtarget.is64Bit())
372 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
373 Promote);
375 if (Subtarget.is64Bit()) {
376 if (RV64LegalI32)
377 setOperationAction(ISD::CTTZ, MVT::i32, Legal);
378 else
379 setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
381 } else if (!Subtarget.hasVendorXCVbitmanip()) {
382 setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
383 if (RV64LegalI32 && Subtarget.is64Bit())
384 setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
387 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
388 Subtarget.hasVendorXCVbitmanip()) {
389 // We need the custom lowering to make sure that the resulting sequence
390 // for the 32bit case is efficient on 64bit targets.
391 if (Subtarget.is64Bit()) {
392 if (RV64LegalI32) {
393 setOperationAction(ISD::CTLZ, MVT::i32,
394 Subtarget.hasStdExtZbb() ? Legal : Promote);
395 if (!Subtarget.hasStdExtZbb())
396 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
397 } else
398 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
400 } else {
401 setOperationAction(ISD::CTLZ, XLenVT, Expand);
402 if (RV64LegalI32 && Subtarget.is64Bit())
403 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
406 if (!RV64LegalI32 && Subtarget.is64Bit() &&
407 !Subtarget.hasShortForwardBranchOpt())
408 setOperationAction(ISD::ABS, MVT::i32, Custom);
410 // We can use PseudoCCSUB to implement ABS.
411 if (Subtarget.hasShortForwardBranchOpt())
412 setOperationAction(ISD::ABS, XLenVT, Legal);
414 if (!Subtarget.hasVendorXTHeadCondMov())
415 setOperationAction(ISD::SELECT, XLenVT, Custom);
417 if (RV64LegalI32 && Subtarget.is64Bit())
418 setOperationAction(ISD::SELECT, MVT::i32, Promote);
420 static const unsigned FPLegalNodeTypes[] = {
421 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
422 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
423 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
424 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
425 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
426 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
428 static const ISD::CondCode FPCCToExpand[] = {
429 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
430 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
431 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
433 static const unsigned FPOpToExpand[] = {
434 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
435 ISD::FREM};
437 static const unsigned FPRndMode[] = {
438 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
439 ISD::FROUNDEVEN};
441 if (Subtarget.hasStdExtZfhminOrZhinxmin())
442 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
444 static const unsigned ZfhminZfbfminPromoteOps[] = {
445 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
446 ISD::FSUB, ISD::FMUL, ISD::FMA,
447 ISD::FDIV, ISD::FSQRT, ISD::FABS,
448 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
449 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
450 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
451 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
452 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
453 ISD::FROUNDEVEN, ISD::SELECT};
455 if (Subtarget.hasStdExtZfbfmin()) {
456 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
457 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
458 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
459 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
460 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
461 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
462 setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
463 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
464 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
465 setOperationAction(ISD::FREM, MVT::bf16, Promote);
466 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
467 // DAGCombiner::visitFP_ROUND probably needs improvements first.
468 setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
471 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
472 if (Subtarget.hasStdExtZfhOrZhinx()) {
473 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
474 setOperationAction(FPRndMode, MVT::f16,
475 Subtarget.hasStdExtZfa() ? Legal : Custom);
476 setOperationAction(ISD::SELECT, MVT::f16, Custom);
477 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
478 } else {
479 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
480 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
481 ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
482 MVT::f16, Legal);
483 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
484 // DAGCombiner::visitFP_ROUND probably needs improvements first.
485 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
488 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
489 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
490 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
491 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
492 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
494 setOperationAction(ISD::FNEARBYINT, MVT::f16,
495 Subtarget.hasStdExtZfa() ? Legal : Promote);
496 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
497 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
498 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
499 ISD::FLOG10},
500 MVT::f16, Promote);
502 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
503 // complete support for all operations in LegalizeDAG.
504 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
505 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
506 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
507 ISD::STRICT_FTRUNC},
508 MVT::f16, Promote);
510 // We need to custom promote this.
511 if (Subtarget.is64Bit())
512 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
514 if (!Subtarget.hasStdExtZfa())
515 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
518 if (Subtarget.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
520 setOperationAction(FPRndMode, MVT::f32,
521 Subtarget.hasStdExtZfa() ? Legal : Custom);
522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
523 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
524 setOperationAction(ISD::SELECT, MVT::f32, Custom);
525 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
526 setOperationAction(FPOpToExpand, MVT::f32, Expand);
527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
528 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
531 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
532 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
533 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
534 Subtarget.isSoftFPABI() ? LibCall : Custom);
535 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
536 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
538 if (Subtarget.hasStdExtZfa())
539 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
540 else
541 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
544 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
545 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
547 if (Subtarget.hasStdExtDOrZdinx()) {
548 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
550 if (Subtarget.hasStdExtZfa()) {
551 setOperationAction(FPRndMode, MVT::f64, Legal);
552 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
553 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
554 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
555 } else {
556 if (Subtarget.is64Bit())
557 setOperationAction(FPRndMode, MVT::f64, Custom);
559 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
562 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
563 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
564 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
565 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
566 setOperationAction(ISD::SELECT, MVT::f64, Custom);
567 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
568 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
569 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
570 setOperationAction(FPOpToExpand, MVT::f64, Expand);
571 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
572 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
573 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
574 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
575 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
576 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
577 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
578 Subtarget.isSoftFPABI() ? LibCall : Custom);
579 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
580 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
583 if (Subtarget.is64Bit()) {
584 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
585 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
586 MVT::i32, Custom);
587 setOperationAction(ISD::LROUND, MVT::i32, Custom);
590 if (Subtarget.hasStdExtFOrZfinx()) {
591 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
592 Custom);
594 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
595 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
596 XLenVT, Legal);
598 if (RV64LegalI32 && Subtarget.is64Bit())
599 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
600 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
601 MVT::i32, Legal);
603 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
604 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
607 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
608 ISD::JumpTable},
609 XLenVT, Custom);
611 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
613 if (Subtarget.is64Bit())
614 setOperationAction(ISD::Constant, MVT::i64, Custom);
616 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
617 // Unfortunately this can't be determined just from the ISA naming string.
618 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
619 Subtarget.is64Bit() ? Legal : Custom);
621 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
622 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
623 if (Subtarget.is64Bit())
624 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
626 if (Subtarget.hasStdExtZicbop()) {
627 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
630 if (Subtarget.hasStdExtA()) {
631 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
632 setMinCmpXchgSizeInBits(32);
633 } else if (Subtarget.hasForcedAtomics()) {
634 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
635 } else {
636 setMaxAtomicSizeInBitsSupported(0);
639 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
641 setBooleanContents(ZeroOrOneBooleanContent);
643 if (Subtarget.hasVInstructions()) {
644 setBooleanVectorContents(ZeroOrOneBooleanContent);
646 setOperationAction(ISD::VSCALE, XLenVT, Custom);
647 if (RV64LegalI32 && Subtarget.is64Bit())
648 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
650 // RVV intrinsics may have illegal operands.
651 // We also need to custom legalize vmv.x.s.
652 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
653 ISD::INTRINSIC_VOID},
654 {MVT::i8, MVT::i16}, Custom);
655 if (Subtarget.is64Bit())
656 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
657 MVT::i32, Custom);
658 else
659 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
660 MVT::i64, Custom);
662 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
663 MVT::Other, Custom);
665 static const unsigned IntegerVPOps[] = {
666 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
667 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
668 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
669 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
670 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
671 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
672 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
673 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
674 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
675 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
676 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
677 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};
679 static const unsigned FloatingPointVPOps[] = {
680 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
681 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
682 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
683 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
684 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
685 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
686 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
687 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
688 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
689 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
690 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE,
691 ISD::EXPERIMENTAL_VP_SPLICE};
693 static const unsigned IntegerVecReduceOps[] = {
694 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
695 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
696 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
698 static const unsigned FloatingPointVecReduceOps[] = {
699 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
700 ISD::VECREDUCE_FMAX};
702 if (!Subtarget.is64Bit()) {
703 // We must custom-lower certain vXi64 operations on RV32 due to the vector
704 // element type being illegal.
705 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
706 MVT::i64, Custom);
708 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
710 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
711 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
712 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
713 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
714 MVT::i64, Custom);
717 for (MVT VT : BoolVecVTs) {
718 if (!isTypeLegal(VT))
719 continue;
721 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
723 // Mask VTs are custom-expanded into a series of standard nodes
724 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
725 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
726 ISD::SCALAR_TO_VECTOR},
727 VT, Custom);
729 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
730 Custom);
732 setOperationAction(ISD::SELECT, VT, Custom);
733 setOperationAction(
734 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
735 Expand);
737 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
739 setOperationAction(
740 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
741 Custom);
743 setOperationAction(
744 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
745 Custom);
747 // RVV has native int->float & float->int conversions where the
748 // element type sizes are within one power-of-two of each other. Any
749 // wider distances between type sizes have to be lowered as sequences
750 // which progressively narrow the gap in stages.
751 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
752 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
753 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
754 ISD::STRICT_FP_TO_UINT},
755 VT, Custom);
756 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
757 Custom);
759 // Expand all extending loads to types larger than this, and truncating
760 // stores from types larger than this.
761 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
762 setTruncStoreAction(VT, OtherVT, Expand);
763 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
764 OtherVT, Expand);
767 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
768 ISD::VP_TRUNCATE, ISD::VP_SETCC},
769 VT, Custom);
771 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
772 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
774 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
776 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
777 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
779 setOperationPromotedToType(
780 ISD::VECTOR_SPLICE, VT,
781 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
784 for (MVT VT : IntVecVTs) {
785 if (!isTypeLegal(VT))
786 continue;
788 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
789 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
791 // Vectors implement MULHS/MULHU.
792 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
794 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
795 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
796 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
798 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
799 Legal);
801 // Custom-lower extensions and truncations from/to mask types.
802 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
803 VT, Custom);
805 // RVV has native int->float & float->int conversions where the
806 // element type sizes are within one power-of-two of each other. Any
807 // wider distances between type sizes have to be lowered as sequences
808 // which progressively narrow the gap in stages.
809 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
810 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
811 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
812 ISD::STRICT_FP_TO_UINT},
813 VT, Custom);
814 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
815 Custom);
816 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
817 setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
818 ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
819 VT, Legal);
821 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
822 // nodes which truncate by one power of two at a time.
823 setOperationAction(ISD::TRUNCATE, VT, Custom);
825 // Custom-lower insert/extract operations to simplify patterns.
826 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
827 Custom);
829 // Custom-lower reduction operations to set up the corresponding custom
830 // nodes' operands.
831 setOperationAction(IntegerVecReduceOps, VT, Custom);
833 setOperationAction(IntegerVPOps, VT, Custom);
835 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
837 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
838 VT, Custom);
840 setOperationAction(
841 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
842 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
843 VT, Custom);
845 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
846 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
847 VT, Custom);
849 setOperationAction(ISD::SELECT, VT, Custom);
850 setOperationAction(ISD::SELECT_CC, VT, Expand);
852 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
854 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
855 setTruncStoreAction(VT, OtherVT, Expand);
856 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
857 OtherVT, Expand);
860 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
861 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
863 // Splice
864 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
866 if (Subtarget.hasStdExtZvkb()) {
867 setOperationAction(ISD::BSWAP, VT, Legal);
868 setOperationAction(ISD::VP_BSWAP, VT, Custom);
869 } else {
870 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
871 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
874 if (Subtarget.hasStdExtZvbb()) {
875 setOperationAction(ISD::BITREVERSE, VT, Legal);
876 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
877 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
878 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
879 VT, Custom);
880 } else {
881 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
882 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
883 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
884 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
885 VT, Expand);
887 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
888 // range of f32.
889 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
890 if (isTypeLegal(FloatVT)) {
891 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
892 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
893 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
894 VT, Custom);
899 // Expand various CCs to best match the RVV ISA, which natively supports UNE
900 // but no other unordered comparisons, and supports all ordered comparisons
901 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
902 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
903 // and we pattern-match those back to the "original", swapping operands once
904 // more. This way we catch both operations and both "vf" and "fv" forms with
905 // fewer patterns.
906 static const ISD::CondCode VFPCCToExpand[] = {
907 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
908 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
909 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
912 // TODO: support more ops.
913 static const unsigned ZvfhminPromoteOps[] = {
914 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
915 ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
916 ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
917 ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
918 ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
919 ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
920 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
922 // TODO: support more vp ops.
923 static const unsigned ZvfhminPromoteVPOps[] = {
924 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
925 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
926 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
927 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
928 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
929 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
930 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
931 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
932 ISD::VP_FMAXIMUM};
934 // Sets common operation actions on RVV floating-point vector types.
935 const auto SetCommonVFPActions = [&](MVT VT) {
936 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
937 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
938 // sizes are within one power-of-two of each other. Therefore conversions
939 // between vXf16 and vXf64 must be lowered as sequences which convert via
940 // vXf32.
941 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
942 // Custom-lower insert/extract operations to simplify patterns.
943 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
944 Custom);
945 // Expand various condition codes (explained above).
946 setCondCodeAction(VFPCCToExpand, VT, Expand);
948 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
949 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
951 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
952 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
953 ISD::IS_FPCLASS},
954 VT, Custom);
956 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
958 // Expand FP operations that need libcalls.
959 setOperationAction(ISD::FREM, VT, Expand);
960 setOperationAction(ISD::FPOW, VT, Expand);
961 setOperationAction(ISD::FCOS, VT, Expand);
962 setOperationAction(ISD::FSIN, VT, Expand);
963 setOperationAction(ISD::FSINCOS, VT, Expand);
964 setOperationAction(ISD::FEXP, VT, Expand);
965 setOperationAction(ISD::FEXP2, VT, Expand);
966 setOperationAction(ISD::FEXP10, VT, Expand);
967 setOperationAction(ISD::FLOG, VT, Expand);
968 setOperationAction(ISD::FLOG2, VT, Expand);
969 setOperationAction(ISD::FLOG10, VT, Expand);
971 setOperationAction(ISD::FCOPYSIGN, VT, Legal);
973 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
975 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
976 VT, Custom);
978 setOperationAction(
979 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
980 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
981 VT, Custom);
983 setOperationAction(ISD::SELECT, VT, Custom);
984 setOperationAction(ISD::SELECT_CC, VT, Expand);
986 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
987 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
988 VT, Custom);
990 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
991 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
993 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
995 setOperationAction(FloatingPointVPOps, VT, Custom);
997 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
998 Custom);
999 setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1000 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
1001 VT, Legal);
1002 setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
1003 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
1004 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1005 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1006 VT, Custom);
1009 // Sets common extload/truncstore actions on RVV floating-point vector
1010 // types.
1011 const auto SetCommonVFPExtLoadTruncStoreActions =
1012 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1013 for (auto SmallVT : SmallerVTs) {
1014 setTruncStoreAction(VT, SmallVT, Expand);
1015 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1019 if (Subtarget.hasVInstructionsF16()) {
1020 for (MVT VT : F16VecVTs) {
1021 if (!isTypeLegal(VT))
1022 continue;
1023 SetCommonVFPActions(VT);
1025 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1026 for (MVT VT : F16VecVTs) {
1027 if (!isTypeLegal(VT))
1028 continue;
1029 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1030 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1031 Custom);
1032 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1033 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1034 Custom);
1035 setOperationAction(ISD::SELECT_CC, VT, Expand);
1036 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1037 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1038 VT, Custom);
1039 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1040 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1041 VT, Custom);
1042 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1043 // load/store
1044 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1046 // Custom split nxv32f16 since nxv32f32 if not legal.
1047 if (VT == MVT::nxv32f16) {
1048 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1049 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1050 continue;
1052 // Add more promote ops.
1053 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1054 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1055 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1059 if (Subtarget.hasVInstructionsF32()) {
1060 for (MVT VT : F32VecVTs) {
1061 if (!isTypeLegal(VT))
1062 continue;
1063 SetCommonVFPActions(VT);
1064 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1068 if (Subtarget.hasVInstructionsF64()) {
1069 for (MVT VT : F64VecVTs) {
1070 if (!isTypeLegal(VT))
1071 continue;
1072 SetCommonVFPActions(VT);
1073 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1074 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1078 if (Subtarget.useRVVForFixedLengthVectors()) {
1079 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1080 if (!useRVVForFixedLengthVectorVT(VT))
1081 continue;
1083 // By default everything must be expanded.
1084 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1085 setOperationAction(Op, VT, Expand);
1086 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
1087 setTruncStoreAction(VT, OtherVT, Expand);
1088 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
1089 OtherVT, Expand);
1092 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1093 // expansion to a build_vector of 0s.
1094 setOperationAction(ISD::UNDEF, VT, Custom);
1096 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1097 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1098 Custom);
1100 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
1101 Custom);
1103 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1104 VT, Custom);
1106 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1108 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1110 setOperationAction(ISD::SETCC, VT, Custom);
1112 setOperationAction(ISD::SELECT, VT, Custom);
1114 setOperationAction(ISD::TRUNCATE, VT, Custom);
1116 setOperationAction(ISD::BITCAST, VT, Custom);
1118 setOperationAction(
1119 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1120 Custom);
1122 setOperationAction(
1123 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1124 Custom);
1126 setOperationAction(
1128 ISD::SINT_TO_FP,
1129 ISD::UINT_TO_FP,
1130 ISD::FP_TO_SINT,
1131 ISD::FP_TO_UINT,
1132 ISD::STRICT_SINT_TO_FP,
1133 ISD::STRICT_UINT_TO_FP,
1134 ISD::STRICT_FP_TO_SINT,
1135 ISD::STRICT_FP_TO_UINT,
1137 VT, Custom);
1138 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
1139 Custom);
1141 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1143 // Operations below are different for between masks and other vectors.
1144 if (VT.getVectorElementType() == MVT::i1) {
1145 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1146 ISD::OR, ISD::XOR},
1147 VT, Custom);
1149 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1150 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1151 VT, Custom);
1153 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1154 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1155 continue;
1158 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1159 // it before type legalization for i64 vectors on RV32. It will then be
1160 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1161 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1162 // improvements first.
1163 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1164 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1165 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1168 setOperationAction(
1169 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1171 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1172 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1173 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1174 ISD::VP_SCATTER},
1175 VT, Custom);
1177 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1178 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1179 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1180 VT, Custom);
1182 setOperationAction(
1183 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1185 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1186 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1187 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1189 setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
1190 ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
1191 VT, Custom);
1193 setOperationAction(ISD::VSELECT, VT, Custom);
1194 setOperationAction(ISD::SELECT_CC, VT, Expand);
1196 setOperationAction(
1197 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1199 // Custom-lower reduction operations to set up the corresponding custom
1200 // nodes' operands.
1201 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1202 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1203 ISD::VECREDUCE_UMIN},
1204 VT, Custom);
1206 setOperationAction(IntegerVPOps, VT, Custom);
1208 if (Subtarget.hasStdExtZvkb())
1209 setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
1211 if (Subtarget.hasStdExtZvbb()) {
1212 setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
1213 ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
1214 VT, Custom);
1215 } else {
1216 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1217 // range of f32.
1218 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1219 if (isTypeLegal(FloatVT))
1220 setOperationAction(
1221 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1222 Custom);
1226 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1227 // There are no extending loads or truncating stores.
1228 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1229 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1230 setTruncStoreAction(VT, InnerVT, Expand);
1233 if (!useRVVForFixedLengthVectorVT(VT))
1234 continue;
1236 // By default everything must be expanded.
1237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1238 setOperationAction(Op, VT, Expand);
1240 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1241 // expansion to a build_vector of 0s.
1242 setOperationAction(ISD::UNDEF, VT, Custom);
1244 if (VT.getVectorElementType() == MVT::f16 &&
1245 !Subtarget.hasVInstructionsF16()) {
1246 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1247 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1248 Custom);
1249 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1250 setOperationAction(
1251 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1252 Custom);
1253 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1254 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1255 VT, Custom);
1256 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1257 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1258 VT, Custom);
1259 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1260 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1261 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1262 // Don't promote f16 vector operations to f32 if f32 vector type is
1263 // not legal.
1264 // TODO: could split the f16 vector into two vectors and do promotion.
1265 if (!isTypeLegal(F32VecVT))
1266 continue;
1267 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1268 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1269 continue;
1272 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1273 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1274 Custom);
1276 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1277 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
1278 ISD::EXTRACT_VECTOR_ELT},
1279 VT, Custom);
1281 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1282 ISD::MGATHER, ISD::MSCATTER},
1283 VT, Custom);
1285 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1286 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1287 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1288 ISD::VP_SCATTER},
1289 VT, Custom);
1291 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1292 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1293 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1294 ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1295 VT, Custom);
1297 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1299 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1300 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1301 VT, Custom);
1303 setCondCodeAction(VFPCCToExpand, VT, Expand);
1305 setOperationAction(ISD::SETCC, VT, Custom);
1306 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1307 setOperationAction(ISD::SELECT_CC, VT, Expand);
1309 setOperationAction(ISD::BITCAST, VT, Custom);
1311 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1313 setOperationAction(FloatingPointVPOps, VT, Custom);
1315 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1316 Custom);
1317 setOperationAction(
1318 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1319 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1320 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1321 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1322 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1323 VT, Custom);
1326 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1327 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1328 Custom);
1329 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1330 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1331 if (Subtarget.hasStdExtFOrZfinx())
1332 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1333 if (Subtarget.hasStdExtDOrZdinx())
1334 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1338 if (Subtarget.hasStdExtA()) {
1339 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1340 if (RV64LegalI32 && Subtarget.is64Bit())
1341 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1344 if (Subtarget.hasForcedAtomics()) {
1345 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1346 setOperationAction(
1347 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1348 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1349 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1350 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1351 XLenVT, LibCall);
1354 if (Subtarget.hasVendorXTHeadMemIdx()) {
1355 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1356 setIndexedLoadAction(im, MVT::i8, Legal);
1357 setIndexedStoreAction(im, MVT::i8, Legal);
1358 setIndexedLoadAction(im, MVT::i16, Legal);
1359 setIndexedStoreAction(im, MVT::i16, Legal);
1360 setIndexedLoadAction(im, MVT::i32, Legal);
1361 setIndexedStoreAction(im, MVT::i32, Legal);
1363 if (Subtarget.is64Bit()) {
1364 setIndexedLoadAction(im, MVT::i64, Legal);
1365 setIndexedStoreAction(im, MVT::i64, Legal);
1370 // Function alignments.
1371 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1372 setMinFunctionAlignment(FunctionAlignment);
1373 // Set preferred alignments.
1374 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1375 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1377 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1378 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
1379 ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1380 if (Subtarget.is64Bit())
1381 setTargetDAGCombine(ISD::SRA);
1383 if (Subtarget.hasStdExtFOrZfinx())
1384 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1386 if (Subtarget.hasStdExtZbb())
1387 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1389 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1390 setTargetDAGCombine(ISD::TRUNCATE);
1392 if (Subtarget.hasStdExtZbkb())
1393 setTargetDAGCombine(ISD::BITREVERSE);
1394 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1395 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1396 if (Subtarget.hasStdExtFOrZfinx())
1397 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1398 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1399 if (Subtarget.hasVInstructions())
1400 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1401 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1402 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1403 ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1404 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1405 ISD::INSERT_VECTOR_ELT});
1406 if (Subtarget.hasVendorXTHeadMemPair())
1407 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1408 if (Subtarget.useRVVForFixedLengthVectors())
1409 setTargetDAGCombine(ISD::BITCAST);
1411 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1412 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1414 // Disable strict node mutation.
1415 IsStrictFPEnabled = true;
1418 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1419 LLVMContext &Context,
1420 EVT VT) const {
1421 if (!VT.isVector())
1422 return getPointerTy(DL);
1423 if (Subtarget.hasVInstructions() &&
1424 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1425 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1426 return VT.changeVectorElementTypeToInteger();
1429 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1430 return Subtarget.getXLenVT();
1433 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1434 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1435 unsigned VF,
1436 bool IsScalable) const {
1437 if (!Subtarget.hasVInstructions())
1438 return true;
1440 if (!IsScalable)
1441 return true;
1443 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1444 return true;
1446 // Don't allow VF=1 if those types are't legal.
1447 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1448 return true;
1450 // VLEN=32 support is incomplete.
1451 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1452 return true;
1454 // The maximum VF is for the smallest element width with LMUL=8.
1455 // VF must be a power of 2.
1456 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1457 return VF > MaxVF || !isPowerOf2_32(VF);
1460 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1461 const CallInst &I,
1462 MachineFunction &MF,
1463 unsigned Intrinsic) const {
1464 auto &DL = I.getModule()->getDataLayout();
1466 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1467 bool IsUnitStrided) {
1468 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1469 Info.ptrVal = I.getArgOperand(PtrOp);
1470 Type *MemTy;
1471 if (IsStore) {
1472 // Store value is the first operand.
1473 MemTy = I.getArgOperand(0)->getType();
1474 } else {
1475 // Use return type. If it's segment load, return type is a struct.
1476 MemTy = I.getType();
1477 if (MemTy->isStructTy())
1478 MemTy = MemTy->getStructElementType(0);
1480 if (!IsUnitStrided)
1481 MemTy = MemTy->getScalarType();
1483 Info.memVT = getValueType(DL, MemTy);
1484 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1485 Info.size = MemoryLocation::UnknownSize;
1486 Info.flags |=
1487 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1488 return true;
1491 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1492 Info.flags |= MachineMemOperand::MONonTemporal;
1494 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
1495 switch (Intrinsic) {
1496 default:
1497 return false;
1498 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1499 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1500 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1501 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1502 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1503 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1504 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1505 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1506 case Intrinsic::riscv_masked_cmpxchg_i32:
1507 Info.opc = ISD::INTRINSIC_W_CHAIN;
1508 Info.memVT = MVT::i32;
1509 Info.ptrVal = I.getArgOperand(0);
1510 Info.offset = 0;
1511 Info.align = Align(4);
1512 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1513 MachineMemOperand::MOVolatile;
1514 return true;
1515 case Intrinsic::riscv_masked_strided_load:
1516 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1517 /*IsUnitStrided*/ false);
1518 case Intrinsic::riscv_masked_strided_store:
1519 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1520 /*IsUnitStrided*/ false);
1521 case Intrinsic::riscv_seg2_load:
1522 case Intrinsic::riscv_seg3_load:
1523 case Intrinsic::riscv_seg4_load:
1524 case Intrinsic::riscv_seg5_load:
1525 case Intrinsic::riscv_seg6_load:
1526 case Intrinsic::riscv_seg7_load:
1527 case Intrinsic::riscv_seg8_load:
1528 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1529 /*IsUnitStrided*/ false);
1530 case Intrinsic::riscv_seg2_store:
1531 case Intrinsic::riscv_seg3_store:
1532 case Intrinsic::riscv_seg4_store:
1533 case Intrinsic::riscv_seg5_store:
1534 case Intrinsic::riscv_seg6_store:
1535 case Intrinsic::riscv_seg7_store:
1536 case Intrinsic::riscv_seg8_store:
1537 // Operands are (vec, ..., vec, ptr, vl)
1538 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1539 /*IsStore*/ true,
1540 /*IsUnitStrided*/ false);
1541 case Intrinsic::riscv_vle:
1542 case Intrinsic::riscv_vle_mask:
1543 case Intrinsic::riscv_vleff:
1544 case Intrinsic::riscv_vleff_mask:
1545 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1546 /*IsStore*/ false,
1547 /*IsUnitStrided*/ true);
1548 case Intrinsic::riscv_vse:
1549 case Intrinsic::riscv_vse_mask:
1550 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1551 /*IsStore*/ true,
1552 /*IsUnitStrided*/ true);
1553 case Intrinsic::riscv_vlse:
1554 case Intrinsic::riscv_vlse_mask:
1555 case Intrinsic::riscv_vloxei:
1556 case Intrinsic::riscv_vloxei_mask:
1557 case Intrinsic::riscv_vluxei:
1558 case Intrinsic::riscv_vluxei_mask:
1559 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1560 /*IsStore*/ false,
1561 /*IsUnitStrided*/ false);
1562 case Intrinsic::riscv_vsse:
1563 case Intrinsic::riscv_vsse_mask:
1564 case Intrinsic::riscv_vsoxei:
1565 case Intrinsic::riscv_vsoxei_mask:
1566 case Intrinsic::riscv_vsuxei:
1567 case Intrinsic::riscv_vsuxei_mask:
1568 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1569 /*IsStore*/ true,
1570 /*IsUnitStrided*/ false);
1571 case Intrinsic::riscv_vlseg2:
1572 case Intrinsic::riscv_vlseg3:
1573 case Intrinsic::riscv_vlseg4:
1574 case Intrinsic::riscv_vlseg5:
1575 case Intrinsic::riscv_vlseg6:
1576 case Intrinsic::riscv_vlseg7:
1577 case Intrinsic::riscv_vlseg8:
1578 case Intrinsic::riscv_vlseg2ff:
1579 case Intrinsic::riscv_vlseg3ff:
1580 case Intrinsic::riscv_vlseg4ff:
1581 case Intrinsic::riscv_vlseg5ff:
1582 case Intrinsic::riscv_vlseg6ff:
1583 case Intrinsic::riscv_vlseg7ff:
1584 case Intrinsic::riscv_vlseg8ff:
1585 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1586 /*IsStore*/ false,
1587 /*IsUnitStrided*/ false);
1588 case Intrinsic::riscv_vlseg2_mask:
1589 case Intrinsic::riscv_vlseg3_mask:
1590 case Intrinsic::riscv_vlseg4_mask:
1591 case Intrinsic::riscv_vlseg5_mask:
1592 case Intrinsic::riscv_vlseg6_mask:
1593 case Intrinsic::riscv_vlseg7_mask:
1594 case Intrinsic::riscv_vlseg8_mask:
1595 case Intrinsic::riscv_vlseg2ff_mask:
1596 case Intrinsic::riscv_vlseg3ff_mask:
1597 case Intrinsic::riscv_vlseg4ff_mask:
1598 case Intrinsic::riscv_vlseg5ff_mask:
1599 case Intrinsic::riscv_vlseg6ff_mask:
1600 case Intrinsic::riscv_vlseg7ff_mask:
1601 case Intrinsic::riscv_vlseg8ff_mask:
1602 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1603 /*IsStore*/ false,
1604 /*IsUnitStrided*/ false);
1605 case Intrinsic::riscv_vlsseg2:
1606 case Intrinsic::riscv_vlsseg3:
1607 case Intrinsic::riscv_vlsseg4:
1608 case Intrinsic::riscv_vlsseg5:
1609 case Intrinsic::riscv_vlsseg6:
1610 case Intrinsic::riscv_vlsseg7:
1611 case Intrinsic::riscv_vlsseg8:
1612 case Intrinsic::riscv_vloxseg2:
1613 case Intrinsic::riscv_vloxseg3:
1614 case Intrinsic::riscv_vloxseg4:
1615 case Intrinsic::riscv_vloxseg5:
1616 case Intrinsic::riscv_vloxseg6:
1617 case Intrinsic::riscv_vloxseg7:
1618 case Intrinsic::riscv_vloxseg8:
1619 case Intrinsic::riscv_vluxseg2:
1620 case Intrinsic::riscv_vluxseg3:
1621 case Intrinsic::riscv_vluxseg4:
1622 case Intrinsic::riscv_vluxseg5:
1623 case Intrinsic::riscv_vluxseg6:
1624 case Intrinsic::riscv_vluxseg7:
1625 case Intrinsic::riscv_vluxseg8:
1626 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1627 /*IsStore*/ false,
1628 /*IsUnitStrided*/ false);
1629 case Intrinsic::riscv_vlsseg2_mask:
1630 case Intrinsic::riscv_vlsseg3_mask:
1631 case Intrinsic::riscv_vlsseg4_mask:
1632 case Intrinsic::riscv_vlsseg5_mask:
1633 case Intrinsic::riscv_vlsseg6_mask:
1634 case Intrinsic::riscv_vlsseg7_mask:
1635 case Intrinsic::riscv_vlsseg8_mask:
1636 case Intrinsic::riscv_vloxseg2_mask:
1637 case Intrinsic::riscv_vloxseg3_mask:
1638 case Intrinsic::riscv_vloxseg4_mask:
1639 case Intrinsic::riscv_vloxseg5_mask:
1640 case Intrinsic::riscv_vloxseg6_mask:
1641 case Intrinsic::riscv_vloxseg7_mask:
1642 case Intrinsic::riscv_vloxseg8_mask:
1643 case Intrinsic::riscv_vluxseg2_mask:
1644 case Intrinsic::riscv_vluxseg3_mask:
1645 case Intrinsic::riscv_vluxseg4_mask:
1646 case Intrinsic::riscv_vluxseg5_mask:
1647 case Intrinsic::riscv_vluxseg6_mask:
1648 case Intrinsic::riscv_vluxseg7_mask:
1649 case Intrinsic::riscv_vluxseg8_mask:
1650 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1651 /*IsStore*/ false,
1652 /*IsUnitStrided*/ false);
1653 case Intrinsic::riscv_vsseg2:
1654 case Intrinsic::riscv_vsseg3:
1655 case Intrinsic::riscv_vsseg4:
1656 case Intrinsic::riscv_vsseg5:
1657 case Intrinsic::riscv_vsseg6:
1658 case Intrinsic::riscv_vsseg7:
1659 case Intrinsic::riscv_vsseg8:
1660 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1661 /*IsStore*/ true,
1662 /*IsUnitStrided*/ false);
1663 case Intrinsic::riscv_vsseg2_mask:
1664 case Intrinsic::riscv_vsseg3_mask:
1665 case Intrinsic::riscv_vsseg4_mask:
1666 case Intrinsic::riscv_vsseg5_mask:
1667 case Intrinsic::riscv_vsseg6_mask:
1668 case Intrinsic::riscv_vsseg7_mask:
1669 case Intrinsic::riscv_vsseg8_mask:
1670 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1671 /*IsStore*/ true,
1672 /*IsUnitStrided*/ false);
1673 case Intrinsic::riscv_vssseg2:
1674 case Intrinsic::riscv_vssseg3:
1675 case Intrinsic::riscv_vssseg4:
1676 case Intrinsic::riscv_vssseg5:
1677 case Intrinsic::riscv_vssseg6:
1678 case Intrinsic::riscv_vssseg7:
1679 case Intrinsic::riscv_vssseg8:
1680 case Intrinsic::riscv_vsoxseg2:
1681 case Intrinsic::riscv_vsoxseg3:
1682 case Intrinsic::riscv_vsoxseg4:
1683 case Intrinsic::riscv_vsoxseg5:
1684 case Intrinsic::riscv_vsoxseg6:
1685 case Intrinsic::riscv_vsoxseg7:
1686 case Intrinsic::riscv_vsoxseg8:
1687 case Intrinsic::riscv_vsuxseg2:
1688 case Intrinsic::riscv_vsuxseg3:
1689 case Intrinsic::riscv_vsuxseg4:
1690 case Intrinsic::riscv_vsuxseg5:
1691 case Intrinsic::riscv_vsuxseg6:
1692 case Intrinsic::riscv_vsuxseg7:
1693 case Intrinsic::riscv_vsuxseg8:
1694 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1695 /*IsStore*/ true,
1696 /*IsUnitStrided*/ false);
1697 case Intrinsic::riscv_vssseg2_mask:
1698 case Intrinsic::riscv_vssseg3_mask:
1699 case Intrinsic::riscv_vssseg4_mask:
1700 case Intrinsic::riscv_vssseg5_mask:
1701 case Intrinsic::riscv_vssseg6_mask:
1702 case Intrinsic::riscv_vssseg7_mask:
1703 case Intrinsic::riscv_vssseg8_mask:
1704 case Intrinsic::riscv_vsoxseg2_mask:
1705 case Intrinsic::riscv_vsoxseg3_mask:
1706 case Intrinsic::riscv_vsoxseg4_mask:
1707 case Intrinsic::riscv_vsoxseg5_mask:
1708 case Intrinsic::riscv_vsoxseg6_mask:
1709 case Intrinsic::riscv_vsoxseg7_mask:
1710 case Intrinsic::riscv_vsoxseg8_mask:
1711 case Intrinsic::riscv_vsuxseg2_mask:
1712 case Intrinsic::riscv_vsuxseg3_mask:
1713 case Intrinsic::riscv_vsuxseg4_mask:
1714 case Intrinsic::riscv_vsuxseg5_mask:
1715 case Intrinsic::riscv_vsuxseg6_mask:
1716 case Intrinsic::riscv_vsuxseg7_mask:
1717 case Intrinsic::riscv_vsuxseg8_mask:
1718 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1719 /*IsStore*/ true,
1720 /*IsUnitStrided*/ false);
1724 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1725 const AddrMode &AM, Type *Ty,
1726 unsigned AS,
1727 Instruction *I) const {
1728 // No global is ever allowed as a base.
1729 if (AM.BaseGV)
1730 return false;
1732 // RVV instructions only support register addressing.
1733 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1734 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1736 // Require a 12-bit signed offset.
1737 if (!isInt<12>(AM.BaseOffs))
1738 return false;
1740 switch (AM.Scale) {
1741 case 0: // "r+i" or just "i", depending on HasBaseReg.
1742 break;
1743 case 1:
1744 if (!AM.HasBaseReg) // allow "r+i".
1745 break;
1746 return false; // disallow "r+r" or "r+r+i".
1747 default:
1748 return false;
1751 return true;
1754 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1755 return isInt<12>(Imm);
1758 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1759 return isInt<12>(Imm);
1762 // On RV32, 64-bit integers are split into their high and low parts and held
1763 // in two different registers, so the trunc is free since the low register can
1764 // just be used.
1765 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1766 // isTruncateFree?
1767 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1768 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1769 return false;
1770 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1771 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1772 return (SrcBits == 64 && DestBits == 32);
1775 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1776 // We consider i64->i32 free on RV64 since we have good selection of W
1777 // instructions that make promoting operations back to i64 free in many cases.
1778 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1779 !DstVT.isInteger())
1780 return false;
1781 unsigned SrcBits = SrcVT.getSizeInBits();
1782 unsigned DestBits = DstVT.getSizeInBits();
1783 return (SrcBits == 64 && DestBits == 32);
1786 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1787 // Zexts are free if they can be combined with a load.
1788 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1789 // poorly with type legalization of compares preferring sext.
1790 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1791 EVT MemVT = LD->getMemoryVT();
1792 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1793 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1794 LD->getExtensionType() == ISD::ZEXTLOAD))
1795 return true;
1798 return TargetLowering::isZExtFree(Val, VT2);
1801 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1802 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1805 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1806 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1809 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1810 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1813 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1814 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1815 Subtarget.hasVendorXCVbitmanip();
1818 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1819 const Instruction &AndI) const {
1820 // We expect to be able to match a bit extraction instruction if the Zbs
1821 // extension is supported and the mask is a power of two. However, we
1822 // conservatively return false if the mask would fit in an ANDI instruction,
1823 // on the basis that it's possible the sinking+duplication of the AND in
1824 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1825 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1826 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1827 return false;
1828 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1829 if (!Mask)
1830 return false;
1831 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1834 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1835 EVT VT = Y.getValueType();
1837 // FIXME: Support vectors once we have tests.
1838 if (VT.isVector())
1839 return false;
1841 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1842 !isa<ConstantSDNode>(Y);
1845 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1846 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1847 if (Subtarget.hasStdExtZbs())
1848 return X.getValueType().isScalarInteger();
1849 auto *C = dyn_cast<ConstantSDNode>(Y);
1850 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1851 if (Subtarget.hasVendorXTHeadBs())
1852 return C != nullptr;
1853 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1854 return C && C->getAPIntValue().ule(10);
1857 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
1858 EVT VT) const {
1859 // Only enable for rvv.
1860 if (!VT.isVector() || !Subtarget.hasVInstructions())
1861 return false;
1863 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1864 return false;
1866 return true;
1869 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1870 Type *Ty) const {
1871 assert(Ty->isIntegerTy());
1873 unsigned BitSize = Ty->getIntegerBitWidth();
1874 if (BitSize > Subtarget.getXLen())
1875 return false;
1877 // Fast path, assume 32-bit immediates are cheap.
1878 int64_t Val = Imm.getSExtValue();
1879 if (isInt<32>(Val))
1880 return true;
1882 // A constant pool entry may be more aligned thant he load we're trying to
1883 // replace. If we don't support unaligned scalar mem, prefer the constant
1884 // pool.
1885 // TODO: Can the caller pass down the alignment?
1886 if (!Subtarget.hasFastUnalignedAccess())
1887 return true;
1889 // Prefer to keep the load if it would require many instructions.
1890 // This uses the same threshold we use for constant pools but doesn't
1891 // check useConstantPoolForLargeInts.
1892 // TODO: Should we keep the load only when we're definitely going to emit a
1893 // constant pool?
1895 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);
1896 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1899 bool RISCVTargetLowering::
1900 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1901 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1902 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1903 SelectionDAG &DAG) const {
1904 // One interesting pattern that we'd want to form is 'bit extract':
1905 // ((1 >> Y) & 1) ==/!= 0
1906 // But we also need to be careful not to try to reverse that fold.
1908 // Is this '((1 >> Y) & 1)'?
1909 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1910 return false; // Keep the 'bit extract' pattern.
1912 // Will this be '((1 >> Y) & 1)' after the transform?
1913 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1914 return true; // Do form the 'bit extract' pattern.
1916 // If 'X' is a constant, and we transform, then we will immediately
1917 // try to undo the fold, thus causing endless combine loop.
1918 // So only do the transform if X is not a constant. This matches the default
1919 // implementation of this function.
1920 return !XC;
1923 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1924 switch (Opcode) {
1925 case Instruction::Add:
1926 case Instruction::Sub:
1927 case Instruction::Mul:
1928 case Instruction::And:
1929 case Instruction::Or:
1930 case Instruction::Xor:
1931 case Instruction::FAdd:
1932 case Instruction::FSub:
1933 case Instruction::FMul:
1934 case Instruction::FDiv:
1935 case Instruction::ICmp:
1936 case Instruction::FCmp:
1937 return true;
1938 case Instruction::Shl:
1939 case Instruction::LShr:
1940 case Instruction::AShr:
1941 case Instruction::UDiv:
1942 case Instruction::SDiv:
1943 case Instruction::URem:
1944 case Instruction::SRem:
1945 return Operand == 1;
1946 default:
1947 return false;
1952 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1953 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1954 return false;
1956 if (canSplatOperand(I->getOpcode(), Operand))
1957 return true;
1959 auto *II = dyn_cast<IntrinsicInst>(I);
1960 if (!II)
1961 return false;
1963 switch (II->getIntrinsicID()) {
1964 case Intrinsic::fma:
1965 case Intrinsic::vp_fma:
1966 return Operand == 0 || Operand == 1;
1967 case Intrinsic::vp_shl:
1968 case Intrinsic::vp_lshr:
1969 case Intrinsic::vp_ashr:
1970 case Intrinsic::vp_udiv:
1971 case Intrinsic::vp_sdiv:
1972 case Intrinsic::vp_urem:
1973 case Intrinsic::vp_srem:
1974 return Operand == 1;
1975 // These intrinsics are commutative.
1976 case Intrinsic::vp_add:
1977 case Intrinsic::vp_mul:
1978 case Intrinsic::vp_and:
1979 case Intrinsic::vp_or:
1980 case Intrinsic::vp_xor:
1981 case Intrinsic::vp_fadd:
1982 case Intrinsic::vp_fmul:
1983 case Intrinsic::vp_icmp:
1984 case Intrinsic::vp_fcmp:
1985 // These intrinsics have 'vr' versions.
1986 case Intrinsic::vp_sub:
1987 case Intrinsic::vp_fsub:
1988 case Intrinsic::vp_fdiv:
1989 return Operand == 0 || Operand == 1;
1990 default:
1991 return false;
1995 /// Check if sinking \p I's operands to I's basic block is profitable, because
1996 /// the operands can be folded into a target instruction, e.g.
1997 /// splats of scalars can fold into vector instructions.
1998 bool RISCVTargetLowering::shouldSinkOperands(
1999 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2000 using namespace llvm::PatternMatch;
2002 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2003 return false;
2005 for (auto OpIdx : enumerate(I->operands())) {
2006 if (!canSplatOperand(I, OpIdx.index()))
2007 continue;
2009 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2010 // Make sure we are not already sinking this operand
2011 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2012 continue;
2014 // We are looking for a splat that can be sunk.
2015 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
2016 m_Undef(), m_ZeroMask())))
2017 continue;
2019 // Don't sink i1 splats.
2020 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2021 continue;
2023 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2024 // and vector registers
2025 for (Use &U : Op->uses()) {
2026 Instruction *Insn = cast<Instruction>(U.getUser());
2027 if (!canSplatOperand(Insn, U.getOperandNo()))
2028 return false;
2031 Ops.push_back(&Op->getOperandUse(0));
2032 Ops.push_back(&OpIdx.value());
2034 return true;
2037 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
2038 unsigned Opc = VecOp.getOpcode();
2040 // Assume target opcodes can't be scalarized.
2041 // TODO - do we have any exceptions?
2042 if (Opc >= ISD::BUILTIN_OP_END)
2043 return false;
2045 // If the vector op is not supported, try to convert to scalar.
2046 EVT VecVT = VecOp.getValueType();
2047 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2048 return true;
2050 // If the vector op is supported, but the scalar op is not, the transform may
2051 // not be worthwhile.
2052 // Permit a vector binary operation can be converted to scalar binary
2053 // operation which is custom lowered with illegal type.
2054 EVT ScalarVT = VecVT.getScalarType();
2055 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2056 isOperationCustom(Opc, ScalarVT);
2059 bool RISCVTargetLowering::isOffsetFoldingLegal(
2060 const GlobalAddressSDNode *GA) const {
2061 // In order to maximise the opportunity for common subexpression elimination,
2062 // keep a separate ADD node for the global address offset instead of folding
2063 // it in the global address node. Later peephole optimisations may choose to
2064 // fold it back in when profitable.
2065 return false;
2068 // Return one of the followings:
2069 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2070 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2071 // positive counterpart, which will be materialized from the first returned
2072 // element. The second returned element indicated that there should be a FNEG
2073 // followed.
2074 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2075 std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2076 EVT VT) const {
2077 if (!Subtarget.hasStdExtZfa())
2078 return std::make_pair(-1, false);
2080 bool IsSupportedVT = false;
2081 if (VT == MVT::f16) {
2082 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2083 } else if (VT == MVT::f32) {
2084 IsSupportedVT = true;
2085 } else if (VT == MVT::f64) {
2086 assert(Subtarget.hasStdExtD() && "Expect D extension");
2087 IsSupportedVT = true;
2090 if (!IsSupportedVT)
2091 return std::make_pair(-1, false);
2093 int Index = RISCVLoadFPImm::getLoadFPImm(Imm);
2094 if (Index < 0 && Imm.isNegative())
2095 // Try the combination of its positive counterpart + FNEG.
2096 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2097 else
2098 return std::make_pair(Index, false);
2101 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2102 bool ForCodeSize) const {
2103 bool IsLegalVT = false;
2104 if (VT == MVT::f16)
2105 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2106 else if (VT == MVT::f32)
2107 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2108 else if (VT == MVT::f64)
2109 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2110 else if (VT == MVT::bf16)
2111 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2113 if (!IsLegalVT)
2114 return false;
2116 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2117 return true;
2119 // Cannot create a 64 bit floating-point immediate value for rv32.
2120 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2121 // td can handle +0.0 or -0.0 already.
2122 // -0.0 can be created by fmv + fneg.
2123 return Imm.isZero();
2126 // Special case: fmv + fneg
2127 if (Imm.isNegZero())
2128 return true;
2130 // Building an integer and then converting requires a fmv at the end of
2131 // the integer sequence.
2132 const int Cost =
2133 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2134 Subtarget);
2135 return Cost <= FPImmCost;
2138 // TODO: This is very conservative.
2139 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2140 unsigned Index) const {
2141 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
2142 return false;
2144 // Only support extracting a fixed from a fixed vector for now.
2145 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2146 return false;
2148 unsigned ResElts = ResVT.getVectorNumElements();
2149 unsigned SrcElts = SrcVT.getVectorNumElements();
2151 // Convervatively only handle extracting half of a vector.
2152 // TODO: Relax this.
2153 if ((ResElts * 2) != SrcElts)
2154 return false;
2156 // The smallest type we can slide is i8.
2157 // TODO: We can extract index 0 from a mask vector without a slide.
2158 if (ResVT.getVectorElementType() == MVT::i1)
2159 return false;
2161 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2162 // cheap.
2163 if (Index >= 32)
2164 return false;
2166 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2167 // the upper half of a vector until we have more test coverage.
2168 return Index == 0 || Index == ResElts;
2171 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2172 CallingConv::ID CC,
2173 EVT VT) const {
2174 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2175 // We might still end up using a GPR but that will be decided based on ABI.
2176 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2177 !Subtarget.hasStdExtZfhminOrZhinxmin())
2178 return MVT::f32;
2180 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2182 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2183 return MVT::i64;
2185 return PartVT;
2188 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2189 CallingConv::ID CC,
2190 EVT VT) const {
2191 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2192 // We might still end up using a GPR but that will be decided based on ABI.
2193 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2194 !Subtarget.hasStdExtZfhminOrZhinxmin())
2195 return 1;
2197 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2200 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2201 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2202 unsigned &NumIntermediates, MVT &RegisterVT) const {
2203 unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
2204 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2206 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2207 IntermediateVT = MVT::i64;
2209 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2210 RegisterVT = MVT::i64;
2212 return NumRegs;
2215 // Changes the condition code and swaps operands if necessary, so the SetCC
2216 // operation matches one of the comparisons supported directly by branches
2217 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2218 // with 1/-1.
2219 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2220 ISD::CondCode &CC, SelectionDAG &DAG) {
2221 // If this is a single bit test that can't be handled by ANDI, shift the
2222 // bit to be tested to the MSB and perform a signed compare with 0.
2223 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2224 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2225 isa<ConstantSDNode>(LHS.getOperand(1))) {
2226 uint64_t Mask = LHS.getConstantOperandVal(1);
2227 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2228 unsigned ShAmt = 0;
2229 if (isPowerOf2_64(Mask)) {
2230 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2231 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2232 } else {
2233 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2236 LHS = LHS.getOperand(0);
2237 if (ShAmt != 0)
2238 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2239 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2240 return;
2244 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2245 int64_t C = RHSC->getSExtValue();
2246 switch (CC) {
2247 default: break;
2248 case ISD::SETGT:
2249 // Convert X > -1 to X >= 0.
2250 if (C == -1) {
2251 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2252 CC = ISD::SETGE;
2253 return;
2255 break;
2256 case ISD::SETLT:
2257 // Convert X < 1 to 0 >= X.
2258 if (C == 1) {
2259 RHS = LHS;
2260 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2261 CC = ISD::SETGE;
2262 return;
2264 break;
2268 switch (CC) {
2269 default:
2270 break;
2271 case ISD::SETGT:
2272 case ISD::SETLE:
2273 case ISD::SETUGT:
2274 case ISD::SETULE:
2275 CC = ISD::getSetCCSwappedOperands(CC);
2276 std::swap(LHS, RHS);
2277 break;
2281 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2282 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2283 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2284 if (VT.getVectorElementType() == MVT::i1)
2285 KnownSize *= 8;
2287 switch (KnownSize) {
2288 default:
2289 llvm_unreachable("Invalid LMUL.");
2290 case 8:
2291 return RISCVII::VLMUL::LMUL_F8;
2292 case 16:
2293 return RISCVII::VLMUL::LMUL_F4;
2294 case 32:
2295 return RISCVII::VLMUL::LMUL_F2;
2296 case 64:
2297 return RISCVII::VLMUL::LMUL_1;
2298 case 128:
2299 return RISCVII::VLMUL::LMUL_2;
2300 case 256:
2301 return RISCVII::VLMUL::LMUL_4;
2302 case 512:
2303 return RISCVII::VLMUL::LMUL_8;
2307 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2308 switch (LMul) {
2309 default:
2310 llvm_unreachable("Invalid LMUL.");
2311 case RISCVII::VLMUL::LMUL_F8:
2312 case RISCVII::VLMUL::LMUL_F4:
2313 case RISCVII::VLMUL::LMUL_F2:
2314 case RISCVII::VLMUL::LMUL_1:
2315 return RISCV::VRRegClassID;
2316 case RISCVII::VLMUL::LMUL_2:
2317 return RISCV::VRM2RegClassID;
2318 case RISCVII::VLMUL::LMUL_4:
2319 return RISCV::VRM4RegClassID;
2320 case RISCVII::VLMUL::LMUL_8:
2321 return RISCV::VRM8RegClassID;
2325 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2326 RISCVII::VLMUL LMUL = getLMUL(VT);
2327 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2328 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2329 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2330 LMUL == RISCVII::VLMUL::LMUL_1) {
2331 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2332 "Unexpected subreg numbering");
2333 return RISCV::sub_vrm1_0 + Index;
2335 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2336 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2337 "Unexpected subreg numbering");
2338 return RISCV::sub_vrm2_0 + Index;
2340 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2341 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2342 "Unexpected subreg numbering");
2343 return RISCV::sub_vrm4_0 + Index;
2345 llvm_unreachable("Invalid vector type.");
2348 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2349 if (VT.getVectorElementType() == MVT::i1)
2350 return RISCV::VRRegClassID;
2351 return getRegClassIDForLMUL(getLMUL(VT));
2354 // Attempt to decompose a subvector insert/extract between VecVT and
2355 // SubVecVT via subregister indices. Returns the subregister index that
2356 // can perform the subvector insert/extract with the given element index, as
2357 // well as the index corresponding to any leftover subvectors that must be
2358 // further inserted/extracted within the register class for SubVecVT.
2359 std::pair<unsigned, unsigned>
2360 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2361 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2362 const RISCVRegisterInfo *TRI) {
2363 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2364 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2365 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2366 "Register classes not ordered");
2367 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2368 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2369 // Try to compose a subregister index that takes us from the incoming
2370 // LMUL>1 register class down to the outgoing one. At each step we half
2371 // the LMUL:
2372 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2373 // Note that this is not guaranteed to find a subregister index, such as
2374 // when we are extracting from one VR type to another.
2375 unsigned SubRegIdx = RISCV::NoSubRegister;
2376 for (const unsigned RCID :
2377 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2378 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2379 VecVT = VecVT.getHalfNumVectorElementsVT();
2380 bool IsHi =
2381 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2382 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2383 getSubregIndexByMVT(VecVT, IsHi));
2384 if (IsHi)
2385 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2387 return {SubRegIdx, InsertExtractIdx};
2390 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2391 // stores for those types.
2392 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2393 return !Subtarget.useRVVForFixedLengthVectors() ||
2394 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2397 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2398 if (!ScalarTy.isSimple())
2399 return false;
2400 switch (ScalarTy.getSimpleVT().SimpleTy) {
2401 case MVT::iPTR:
2402 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2403 case MVT::i8:
2404 case MVT::i16:
2405 case MVT::i32:
2406 return true;
2407 case MVT::i64:
2408 return Subtarget.hasVInstructionsI64();
2409 case MVT::f16:
2410 return Subtarget.hasVInstructionsF16();
2411 case MVT::f32:
2412 return Subtarget.hasVInstructionsF32();
2413 case MVT::f64:
2414 return Subtarget.hasVInstructionsF64();
2415 default:
2416 return false;
2421 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2422 return NumRepeatedDivisors;
2425 static SDValue getVLOperand(SDValue Op) {
2426 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2427 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2428 "Unexpected opcode");
2429 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2430 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2431 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2432 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2433 if (!II)
2434 return SDValue();
2435 return Op.getOperand(II->VLOperand + 1 + HasChain);
2438 static bool useRVVForFixedLengthVectorVT(MVT VT,
2439 const RISCVSubtarget &Subtarget) {
2440 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2441 if (!Subtarget.useRVVForFixedLengthVectors())
2442 return false;
2444 // We only support a set of vector types with a consistent maximum fixed size
2445 // across all supported vector element types to avoid legalization issues.
2446 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2447 // fixed-length vector type we support is 1024 bytes.
2448 if (VT.getFixedSizeInBits() > 1024 * 8)
2449 return false;
2451 unsigned MinVLen = Subtarget.getRealMinVLen();
2453 MVT EltVT = VT.getVectorElementType();
2455 // Don't use RVV for vectors we cannot scalarize if required.
2456 switch (EltVT.SimpleTy) {
2457 // i1 is supported but has different rules.
2458 default:
2459 return false;
2460 case MVT::i1:
2461 // Masks can only use a single register.
2462 if (VT.getVectorNumElements() > MinVLen)
2463 return false;
2464 MinVLen /= 8;
2465 break;
2466 case MVT::i8:
2467 case MVT::i16:
2468 case MVT::i32:
2469 break;
2470 case MVT::i64:
2471 if (!Subtarget.hasVInstructionsI64())
2472 return false;
2473 break;
2474 case MVT::f16:
2475 if (!Subtarget.hasVInstructionsF16Minimal())
2476 return false;
2477 break;
2478 case MVT::f32:
2479 if (!Subtarget.hasVInstructionsF32())
2480 return false;
2481 break;
2482 case MVT::f64:
2483 if (!Subtarget.hasVInstructionsF64())
2484 return false;
2485 break;
2488 // Reject elements larger than ELEN.
2489 if (EltVT.getSizeInBits() > Subtarget.getELen())
2490 return false;
2492 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2493 // Don't use RVV for types that don't fit.
2494 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2495 return false;
2497 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2498 // the base fixed length RVV support in place.
2499 if (!VT.isPow2VectorType())
2500 return false;
2502 return true;
2505 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2506 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2509 // Return the largest legal scalable vector type that matches VT's element type.
2510 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2511 const RISCVSubtarget &Subtarget) {
2512 // This may be called before legal types are setup.
2513 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2514 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2515 "Expected legal fixed length vector!");
2517 unsigned MinVLen = Subtarget.getRealMinVLen();
2518 unsigned MaxELen = Subtarget.getELen();
2520 MVT EltVT = VT.getVectorElementType();
2521 switch (EltVT.SimpleTy) {
2522 default:
2523 llvm_unreachable("unexpected element type for RVV container");
2524 case MVT::i1:
2525 case MVT::i8:
2526 case MVT::i16:
2527 case MVT::i32:
2528 case MVT::i64:
2529 case MVT::f16:
2530 case MVT::f32:
2531 case MVT::f64: {
2532 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2533 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2534 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2535 unsigned NumElts =
2536 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2537 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2538 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2539 return MVT::getScalableVectorVT(EltVT, NumElts);
2544 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2545 const RISCVSubtarget &Subtarget) {
2546 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
2547 Subtarget);
2550 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2551 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2554 // Grow V to consume an entire RVV register.
2555 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2556 const RISCVSubtarget &Subtarget) {
2557 assert(VT.isScalableVector() &&
2558 "Expected to convert into a scalable vector!");
2559 assert(V.getValueType().isFixedLengthVector() &&
2560 "Expected a fixed length vector operand!");
2561 SDLoc DL(V);
2562 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2563 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2566 // Shrink V so it's just big enough to maintain a VT's worth of data.
2567 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2568 const RISCVSubtarget &Subtarget) {
2569 assert(VT.isFixedLengthVector() &&
2570 "Expected to convert into a fixed length vector!");
2571 assert(V.getValueType().isScalableVector() &&
2572 "Expected a scalable vector operand!");
2573 SDLoc DL(V);
2574 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2575 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2578 /// Return the type of the mask type suitable for masking the provided
2579 /// vector type. This is simply an i1 element type vector of the same
2580 /// (possibly scalable) length.
2581 static MVT getMaskTypeFor(MVT VecVT) {
2582 assert(VecVT.isVector());
2583 ElementCount EC = VecVT.getVectorElementCount();
2584 return MVT::getVectorVT(MVT::i1, EC);
2587 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2588 /// vector length VL. .
2589 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2590 SelectionDAG &DAG) {
2591 MVT MaskVT = getMaskTypeFor(VecVT);
2592 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2595 static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2596 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2597 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2598 // canonicalize the representation. InsertVSETVLI will pick the immediate
2599 // encoding later if profitable.
2600 const auto [MinVLMAX, MaxVLMAX] =
2601 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2602 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2603 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2605 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2608 static std::pair<SDValue, SDValue>
2609 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2610 const RISCVSubtarget &Subtarget) {
2611 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2612 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2613 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2614 return {Mask, VL};
2617 static std::pair<SDValue, SDValue>
2618 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2619 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2620 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2621 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2622 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2623 return {Mask, VL};
2626 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2627 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2628 // the vector type that the fixed-length vector is contained in. Otherwise if
2629 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2630 static std::pair<SDValue, SDValue>
2631 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2632 const RISCVSubtarget &Subtarget) {
2633 if (VecVT.isFixedLengthVector())
2634 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2635 Subtarget);
2636 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2637 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2640 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2641 SelectionDAG &DAG) const {
2642 assert(VecVT.isScalableVector() && "Expected scalable vector");
2643 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2644 VecVT.getVectorElementCount());
2647 std::pair<unsigned, unsigned>
2648 RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
2649 const RISCVSubtarget &Subtarget) {
2650 assert(VecVT.isScalableVector() && "Expected scalable vector");
2652 unsigned EltSize = VecVT.getScalarSizeInBits();
2653 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2655 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2656 unsigned MaxVLMAX =
2657 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2659 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2660 unsigned MinVLMAX =
2661 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2663 return std::make_pair(MinVLMAX, MaxVLMAX);
2666 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2667 // of either is (currently) supported. This can get us into an infinite loop
2668 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2669 // as a ..., etc.
2670 // Until either (or both) of these can reliably lower any node, reporting that
2671 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2672 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2673 // which is not desirable.
2674 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2675 EVT VT, unsigned DefinedValues) const {
2676 return false;
2679 InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2680 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2681 // implementation-defined.
2682 if (!VT.isVector())
2683 return InstructionCost::getInvalid();
2684 unsigned DLenFactor = Subtarget.getDLenFactor();
2685 unsigned Cost;
2686 if (VT.isScalableVector()) {
2687 unsigned LMul;
2688 bool Fractional;
2689 std::tie(LMul, Fractional) =
2690 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
2691 if (Fractional)
2692 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2693 else
2694 Cost = (LMul * DLenFactor);
2695 } else {
2696 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2698 return Cost;
2702 /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2703 /// is generally quadratic in the number of vreg implied by LMUL. Note that
2704 /// operand (index and possibly mask) are handled separately.
2705 InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2706 return getLMULCost(VT) * getLMULCost(VT);
2709 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2710 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2711 /// or may track the vrgather.vv cost. It is implementation-dependent.
2712 InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
2713 return getLMULCost(VT);
2716 /// Return the cost of a vslidedown.vx or vslideup.vx instruction
2717 /// for the type VT. (This does not cover the vslide1up or vslide1down
2718 /// variants.) Slides may be linear in the number of vregs implied by LMUL,
2719 /// or may track the vrgather.vv cost. It is implementation-dependent.
2720 InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {
2721 return getLMULCost(VT);
2724 /// Return the cost of a vslidedown.vi or vslideup.vi instruction
2725 /// for the type VT. (This does not cover the vslide1up or vslide1down
2726 /// variants.) Slides may be linear in the number of vregs implied by LMUL,
2727 /// or may track the vrgather.vv cost. It is implementation-dependent.
2728 InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {
2729 return getLMULCost(VT);
2732 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2733 const RISCVSubtarget &Subtarget) {
2734 // RISC-V FP-to-int conversions saturate to the destination register size, but
2735 // don't produce 0 for nan. We can use a conversion instruction and fix the
2736 // nan case with a compare and a select.
2737 SDValue Src = Op.getOperand(0);
2739 MVT DstVT = Op.getSimpleValueType();
2740 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2742 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2744 if (!DstVT.isVector()) {
2745 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2746 // the result.
2747 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2748 Src.getValueType() == MVT::bf16) {
2749 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2752 unsigned Opc;
2753 if (SatVT == DstVT)
2754 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2755 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2756 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2757 else
2758 return SDValue();
2759 // FIXME: Support other SatVTs by clamping before or after the conversion.
2761 SDLoc DL(Op);
2762 SDValue FpToInt = DAG.getNode(
2763 Opc, DL, DstVT, Src,
2764 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
2766 if (Opc == RISCVISD::FCVT_WU_RV64)
2767 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2769 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2770 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2771 ISD::CondCode::SETUO);
2774 // Vectors.
2776 MVT DstEltVT = DstVT.getVectorElementType();
2777 MVT SrcVT = Src.getSimpleValueType();
2778 MVT SrcEltVT = SrcVT.getVectorElementType();
2779 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2780 unsigned DstEltSize = DstEltVT.getSizeInBits();
2782 // Only handle saturating to the destination type.
2783 if (SatVT != DstEltVT)
2784 return SDValue();
2786 // FIXME: Don't support narrowing by more than 1 steps for now.
2787 if (SrcEltSize > (2 * DstEltSize))
2788 return SDValue();
2790 MVT DstContainerVT = DstVT;
2791 MVT SrcContainerVT = SrcVT;
2792 if (DstVT.isFixedLengthVector()) {
2793 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2794 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2795 assert(DstContainerVT.getVectorElementCount() ==
2796 SrcContainerVT.getVectorElementCount() &&
2797 "Expected same element count");
2798 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2801 SDLoc DL(Op);
2803 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2805 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2806 {Src, Src, DAG.getCondCode(ISD::SETNE),
2807 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2809 // Need to widen by more than 1 step, promote the FP type, then do a widening
2810 // convert.
2811 if (DstEltSize > (2 * SrcEltSize)) {
2812 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2813 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2814 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2817 unsigned RVVOpc =
2818 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2819 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2821 SDValue SplatZero = DAG.getNode(
2822 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2823 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2824 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2825 Res, DAG.getUNDEF(DstContainerVT), VL);
2827 if (DstVT.isFixedLengthVector())
2828 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2830 return Res;
2833 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
2834 switch (Opc) {
2835 case ISD::FROUNDEVEN:
2836 case ISD::STRICT_FROUNDEVEN:
2837 case ISD::VP_FROUNDEVEN:
2838 return RISCVFPRndMode::RNE;
2839 case ISD::FTRUNC:
2840 case ISD::STRICT_FTRUNC:
2841 case ISD::VP_FROUNDTOZERO:
2842 return RISCVFPRndMode::RTZ;
2843 case ISD::FFLOOR:
2844 case ISD::STRICT_FFLOOR:
2845 case ISD::VP_FFLOOR:
2846 return RISCVFPRndMode::RDN;
2847 case ISD::FCEIL:
2848 case ISD::STRICT_FCEIL:
2849 case ISD::VP_FCEIL:
2850 return RISCVFPRndMode::RUP;
2851 case ISD::FROUND:
2852 case ISD::STRICT_FROUND:
2853 case ISD::VP_FROUND:
2854 return RISCVFPRndMode::RMM;
2855 case ISD::FRINT:
2856 return RISCVFPRndMode::DYN;
2859 return RISCVFPRndMode::Invalid;
2862 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2863 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2864 // the integer domain and back. Taking care to avoid converting values that are
2865 // nan or already correct.
2866 static SDValue
2867 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2868 const RISCVSubtarget &Subtarget) {
2869 MVT VT = Op.getSimpleValueType();
2870 assert(VT.isVector() && "Unexpected type");
2872 SDLoc DL(Op);
2874 SDValue Src = Op.getOperand(0);
2876 MVT ContainerVT = VT;
2877 if (VT.isFixedLengthVector()) {
2878 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2879 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2882 SDValue Mask, VL;
2883 if (Op->isVPOpcode()) {
2884 Mask = Op.getOperand(1);
2885 if (VT.isFixedLengthVector())
2886 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2887 Subtarget);
2888 VL = Op.getOperand(2);
2889 } else {
2890 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2893 // Freeze the source since we are increasing the number of uses.
2894 Src = DAG.getFreeze(Src);
2896 // We do the conversion on the absolute value and fix the sign at the end.
2897 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2899 // Determine the largest integer that can be represented exactly. This and
2900 // values larger than it don't have any fractional bits so don't need to
2901 // be converted.
2902 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2903 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2904 APFloat MaxVal = APFloat(FltSem);
2905 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2906 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2907 SDValue MaxValNode =
2908 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2909 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2910 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2912 // If abs(Src) was larger than MaxVal or nan, keep it.
2913 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2914 Mask =
2915 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2916 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2917 Mask, Mask, VL});
2919 // Truncate to integer and convert back to FP.
2920 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2921 MVT XLenVT = Subtarget.getXLenVT();
2922 SDValue Truncated;
2924 switch (Op.getOpcode()) {
2925 default:
2926 llvm_unreachable("Unexpected opcode");
2927 case ISD::FCEIL:
2928 case ISD::VP_FCEIL:
2929 case ISD::FFLOOR:
2930 case ISD::VP_FFLOOR:
2931 case ISD::FROUND:
2932 case ISD::FROUNDEVEN:
2933 case ISD::VP_FROUND:
2934 case ISD::VP_FROUNDEVEN:
2935 case ISD::VP_FROUNDTOZERO: {
2936 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2937 assert(FRM != RISCVFPRndMode::Invalid);
2938 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2939 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2940 break;
2942 case ISD::FTRUNC:
2943 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2944 Mask, VL);
2945 break;
2946 case ISD::FRINT:
2947 case ISD::VP_FRINT:
2948 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2949 break;
2950 case ISD::FNEARBYINT:
2951 case ISD::VP_FNEARBYINT:
2952 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2953 Mask, VL);
2954 break;
2957 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2958 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2959 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2960 Mask, VL);
2962 // Restore the original sign so that -0.0 is preserved.
2963 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2964 Src, Src, Mask, VL);
2966 if (!VT.isFixedLengthVector())
2967 return Truncated;
2969 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2972 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2973 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2974 // qNan and coverting the new source to integer and back to FP.
2975 static SDValue
2976 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2977 const RISCVSubtarget &Subtarget) {
2978 SDLoc DL(Op);
2979 MVT VT = Op.getSimpleValueType();
2980 SDValue Chain = Op.getOperand(0);
2981 SDValue Src = Op.getOperand(1);
2983 MVT ContainerVT = VT;
2984 if (VT.isFixedLengthVector()) {
2985 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2986 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2989 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2991 // Freeze the source since we are increasing the number of uses.
2992 Src = DAG.getFreeze(Src);
2994 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2995 MVT MaskVT = Mask.getSimpleValueType();
2996 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
2997 DAG.getVTList(MaskVT, MVT::Other),
2998 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2999 DAG.getUNDEF(MaskVT), Mask, VL});
3000 Chain = Unorder.getValue(1);
3001 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3002 DAG.getVTList(ContainerVT, MVT::Other),
3003 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3004 Chain = Src.getValue(1);
3006 // We do the conversion on the absolute value and fix the sign at the end.
3007 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3009 // Determine the largest integer that can be represented exactly. This and
3010 // values larger than it don't have any fractional bits so don't need to
3011 // be converted.
3012 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3013 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3014 APFloat MaxVal = APFloat(FltSem);
3015 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3016 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3017 SDValue MaxValNode =
3018 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3019 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3020 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3022 // If abs(Src) was larger than MaxVal or nan, keep it.
3023 Mask = DAG.getNode(
3024 RISCVISD::SETCC_VL, DL, MaskVT,
3025 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3027 // Truncate to integer and convert back to FP.
3028 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3029 MVT XLenVT = Subtarget.getXLenVT();
3030 SDValue Truncated;
3032 switch (Op.getOpcode()) {
3033 default:
3034 llvm_unreachable("Unexpected opcode");
3035 case ISD::STRICT_FCEIL:
3036 case ISD::STRICT_FFLOOR:
3037 case ISD::STRICT_FROUND:
3038 case ISD::STRICT_FROUNDEVEN: {
3039 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3040 assert(FRM != RISCVFPRndMode::Invalid);
3041 Truncated = DAG.getNode(
3042 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3043 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3044 break;
3046 case ISD::STRICT_FTRUNC:
3047 Truncated =
3048 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3049 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3050 break;
3051 case ISD::STRICT_FNEARBYINT:
3052 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3053 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3054 Mask, VL);
3055 break;
3057 Chain = Truncated.getValue(1);
3059 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3060 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3061 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3062 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3063 Truncated, Mask, VL);
3064 Chain = Truncated.getValue(1);
3067 // Restore the original sign so that -0.0 is preserved.
3068 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3069 Src, Src, Mask, VL);
3071 if (VT.isFixedLengthVector())
3072 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3073 return DAG.getMergeValues({Truncated, Chain}, DL);
3076 static SDValue
3077 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3078 const RISCVSubtarget &Subtarget) {
3079 MVT VT = Op.getSimpleValueType();
3080 if (VT.isVector())
3081 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3083 if (DAG.shouldOptForSize())
3084 return SDValue();
3086 SDLoc DL(Op);
3087 SDValue Src = Op.getOperand(0);
3089 // Create an integer the size of the mantissa with the MSB set. This and all
3090 // values larger than it don't have any fractional bits so don't need to be
3091 // converted.
3092 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3093 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3094 APFloat MaxVal = APFloat(FltSem);
3095 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3096 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3097 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3099 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3100 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3101 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3104 // Expand vector LRINT and LLRINT by converting to the integer domain.
3105 static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
3106 const RISCVSubtarget &Subtarget) {
3107 MVT VT = Op.getSimpleValueType();
3108 assert(VT.isVector() && "Unexpected type");
3110 SDLoc DL(Op);
3111 SDValue Src = Op.getOperand(0);
3112 MVT ContainerVT = VT;
3114 if (VT.isFixedLengthVector()) {
3115 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3116 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3119 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3120 SDValue Truncated =
3121 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3123 if (!VT.isFixedLengthVector())
3124 return Truncated;
3126 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3129 static SDValue
3130 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
3131 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3132 SDValue Offset, SDValue Mask, SDValue VL,
3133 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3134 if (Merge.isUndef())
3135 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3136 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3137 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3138 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3141 static SDValue
3142 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3143 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
3144 SDValue VL,
3145 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3146 if (Merge.isUndef())
3147 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3148 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3149 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3150 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3153 static MVT getLMUL1VT(MVT VT) {
3154 assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3155 "Unexpected vector MVT");
3156 return MVT::getScalableVectorVT(
3157 VT.getVectorElementType(),
3158 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3161 struct VIDSequence {
3162 int64_t StepNumerator;
3163 unsigned StepDenominator;
3164 int64_t Addend;
3167 static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3168 uint32_t BitWidth) {
3169 APSInt ValInt(BitWidth, !APF.isNegative());
3170 // We use an arbitrary rounding mode here. If a floating-point is an exact
3171 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3172 // the rounding mode changes the output value, then it is not an exact
3173 // integer.
3174 RoundingMode ArbitraryRM = RoundingMode::TowardZero;
3175 bool IsExact;
3176 // If it is out of signed integer range, it will return an invalid operation.
3177 // If it is not an exact integer, IsExact is false.
3178 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3179 APFloatBase::opInvalidOp) ||
3180 !IsExact)
3181 return std::nullopt;
3182 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3185 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3186 // to the (non-zero) step S and start value X. This can be then lowered as the
3187 // RVV sequence (VID * S) + X, for example.
3188 // The step S is represented as an integer numerator divided by a positive
3189 // denominator. Note that the implementation currently only identifies
3190 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
3191 // cannot detect 2/3, for example.
3192 // Note that this method will also match potentially unappealing index
3193 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3194 // determine whether this is worth generating code for.
3195 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3196 unsigned EltSizeInBits) {
3197 unsigned NumElts = Op.getNumOperands();
3198 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3199 bool IsInteger = Op.getValueType().isInteger();
3201 std::optional<unsigned> SeqStepDenom;
3202 std::optional<int64_t> SeqStepNum, SeqAddend;
3203 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3204 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3205 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3206 // Assume undef elements match the sequence; we just have to be careful
3207 // when interpolating across them.
3208 if (Op.getOperand(Idx).isUndef())
3209 continue;
3211 uint64_t Val;
3212 if (IsInteger) {
3213 // The BUILD_VECTOR must be all constants.
3214 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
3215 return std::nullopt;
3216 Val = Op.getConstantOperandVal(Idx) &
3217 maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits());
3218 } else {
3219 // The BUILD_VECTOR must be all constants.
3220 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
3221 return std::nullopt;
3222 if (auto ExactInteger = getExactInteger(
3223 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3224 Op.getScalarValueSizeInBits()))
3225 Val = *ExactInteger;
3226 else
3227 return std::nullopt;
3230 if (PrevElt) {
3231 // Calculate the step since the last non-undef element, and ensure
3232 // it's consistent across the entire sequence.
3233 unsigned IdxDiff = Idx - PrevElt->second;
3234 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
3236 // A zero-value value difference means that we're somewhere in the middle
3237 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3238 // step change before evaluating the sequence.
3239 if (ValDiff == 0)
3240 continue;
3242 int64_t Remainder = ValDiff % IdxDiff;
3243 // Normalize the step if it's greater than 1.
3244 if (Remainder != ValDiff) {
3245 // The difference must cleanly divide the element span.
3246 if (Remainder != 0)
3247 return std::nullopt;
3248 ValDiff /= IdxDiff;
3249 IdxDiff = 1;
3252 if (!SeqStepNum)
3253 SeqStepNum = ValDiff;
3254 else if (ValDiff != SeqStepNum)
3255 return std::nullopt;
3257 if (!SeqStepDenom)
3258 SeqStepDenom = IdxDiff;
3259 else if (IdxDiff != *SeqStepDenom)
3260 return std::nullopt;
3263 // Record this non-undef element for later.
3264 if (!PrevElt || PrevElt->first != Val)
3265 PrevElt = std::make_pair(Val, Idx);
3268 // We need to have logged a step for this to count as a legal index sequence.
3269 if (!SeqStepNum || !SeqStepDenom)
3270 return std::nullopt;
3272 // Loop back through the sequence and validate elements we might have skipped
3273 // while waiting for a valid step. While doing this, log any sequence addend.
3274 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3275 if (Op.getOperand(Idx).isUndef())
3276 continue;
3277 uint64_t Val;
3278 if (IsInteger) {
3279 Val = Op.getConstantOperandVal(Idx) &
3280 maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits());
3281 } else {
3282 Val = *getExactInteger(
3283 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3284 Op.getScalarValueSizeInBits());
3286 uint64_t ExpectedVal =
3287 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3288 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
3289 if (!SeqAddend)
3290 SeqAddend = Addend;
3291 else if (Addend != SeqAddend)
3292 return std::nullopt;
3295 assert(SeqAddend && "Must have an addend if we have a step");
3297 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3300 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3301 // and lower it as a VRGATHER_VX_VL from the source vector.
3302 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3303 SelectionDAG &DAG,
3304 const RISCVSubtarget &Subtarget) {
3305 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3306 return SDValue();
3307 SDValue Vec = SplatVal.getOperand(0);
3308 // Only perform this optimization on vectors of the same size for simplicity.
3309 // Don't perform this optimization for i1 vectors.
3310 // FIXME: Support i1 vectors, maybe by promoting to i8?
3311 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3312 return SDValue();
3313 SDValue Idx = SplatVal.getOperand(1);
3314 // The index must be a legal type.
3315 if (Idx.getValueType() != Subtarget.getXLenVT())
3316 return SDValue();
3318 MVT ContainerVT = VT;
3319 if (VT.isFixedLengthVector()) {
3320 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3321 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3324 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3326 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3327 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3329 if (!VT.isFixedLengthVector())
3330 return Gather;
3332 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3336 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3337 /// which constitute a large proportion of the elements. In such cases we can
3338 /// splat a vector with the dominant element and make up the shortfall with
3339 /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3340 /// Note that this includes vectors of 2 elements by association. The
3341 /// upper-most element is the "dominant" one, allowing us to use a splat to
3342 /// "insert" the upper element, and an insert of the lower element at position
3343 /// 0, which improves codegen.
3344 static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
3345 const RISCVSubtarget &Subtarget) {
3346 MVT VT = Op.getSimpleValueType();
3347 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3349 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3351 SDLoc DL(Op);
3352 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3354 MVT XLenVT = Subtarget.getXLenVT();
3355 unsigned NumElts = Op.getNumOperands();
3357 SDValue DominantValue;
3358 unsigned MostCommonCount = 0;
3359 DenseMap<SDValue, unsigned> ValueCounts;
3360 unsigned NumUndefElts =
3361 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3363 // Track the number of scalar loads we know we'd be inserting, estimated as
3364 // any non-zero floating-point constant. Other kinds of element are either
3365 // already in registers or are materialized on demand. The threshold at which
3366 // a vector load is more desirable than several scalar materializion and
3367 // vector-insertion instructions is not known.
3368 unsigned NumScalarLoads = 0;
3370 for (SDValue V : Op->op_values()) {
3371 if (V.isUndef())
3372 continue;
3374 ValueCounts.insert(std::make_pair(V, 0));
3375 unsigned &Count = ValueCounts[V];
3376 if (0 == Count)
3377 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3378 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3380 // Is this value dominant? In case of a tie, prefer the highest element as
3381 // it's cheaper to insert near the beginning of a vector than it is at the
3382 // end.
3383 if (++Count >= MostCommonCount) {
3384 DominantValue = V;
3385 MostCommonCount = Count;
3389 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3390 unsigned NumDefElts = NumElts - NumUndefElts;
3391 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3393 // Don't perform this optimization when optimizing for size, since
3394 // materializing elements and inserting them tends to cause code bloat.
3395 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3396 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3397 ((MostCommonCount > DominantValueCountThreshold) ||
3398 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3399 // Start by splatting the most common element.
3400 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3402 DenseSet<SDValue> Processed{DominantValue};
3404 // We can handle an insert into the last element (of a splat) via
3405 // v(f)slide1down. This is slightly better than the vslideup insert
3406 // lowering as it avoids the need for a vector group temporary. It
3407 // is also better than using vmerge.vx as it avoids the need to
3408 // materialize the mask in a vector register.
3409 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3410 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3411 LastOp != DominantValue) {
3412 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3413 auto OpCode =
3414 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3415 if (!VT.isFloatingPoint())
3416 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3417 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3418 LastOp, Mask, VL);
3419 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3420 Processed.insert(LastOp);
3423 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3424 for (const auto &OpIdx : enumerate(Op->ops())) {
3425 const SDValue &V = OpIdx.value();
3426 if (V.isUndef() || !Processed.insert(V).second)
3427 continue;
3428 if (ValueCounts[V] == 1) {
3429 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3430 DAG.getConstant(OpIdx.index(), DL, XLenVT));
3431 } else {
3432 // Blend in all instances of this value using a VSELECT, using a
3433 // mask where each bit signals whether that element is the one
3434 // we're after.
3435 SmallVector<SDValue> Ops;
3436 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3437 return DAG.getConstant(V == V1, DL, XLenVT);
3439 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3440 DAG.getBuildVector(SelMaskTy, DL, Ops),
3441 DAG.getSplatBuildVector(VT, DL, V), Vec);
3445 return Vec;
3448 return SDValue();
3451 static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3452 const RISCVSubtarget &Subtarget) {
3453 MVT VT = Op.getSimpleValueType();
3454 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3456 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3458 SDLoc DL(Op);
3459 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3461 MVT XLenVT = Subtarget.getXLenVT();
3462 unsigned NumElts = Op.getNumOperands();
3464 if (VT.getVectorElementType() == MVT::i1) {
3465 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3466 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3467 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3470 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3471 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3472 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3475 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3476 // scalar integer chunks whose bit-width depends on the number of mask
3477 // bits and XLEN.
3478 // First, determine the most appropriate scalar integer type to use. This
3479 // is at most XLenVT, but may be shrunk to a smaller vector element type
3480 // according to the size of the final vector - use i8 chunks rather than
3481 // XLenVT if we're producing a v8i1. This results in more consistent
3482 // codegen across RV32 and RV64.
3483 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3484 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3485 // If we have to use more than one INSERT_VECTOR_ELT then this
3486 // optimization is likely to increase code size; avoid peforming it in
3487 // such a case. We can use a load from a constant pool in this case.
3488 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3489 return SDValue();
3490 // Now we can create our integer vector type. Note that it may be larger
3491 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3492 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3493 MVT IntegerViaVecVT =
3494 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3495 IntegerViaVecElts);
3497 uint64_t Bits = 0;
3498 unsigned BitPos = 0, IntegerEltIdx = 0;
3499 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3501 for (unsigned I = 0; I < NumElts;) {
3502 SDValue V = Op.getOperand(I);
3503 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3504 Bits |= ((uint64_t)BitValue << BitPos);
3505 ++BitPos;
3506 ++I;
3508 // Once we accumulate enough bits to fill our scalar type or process the
3509 // last element, insert into our vector and clear our accumulated data.
3510 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3511 if (NumViaIntegerBits <= 32)
3512 Bits = SignExtend64<32>(Bits);
3513 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3514 Elts[IntegerEltIdx] = Elt;
3515 Bits = 0;
3516 BitPos = 0;
3517 IntegerEltIdx++;
3521 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3523 if (NumElts < NumViaIntegerBits) {
3524 // If we're producing a smaller vector than our minimum legal integer
3525 // type, bitcast to the equivalent (known-legal) mask type, and extract
3526 // our final mask.
3527 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3528 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3529 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3530 DAG.getConstant(0, DL, XLenVT));
3531 } else {
3532 // Else we must have produced an integer type with the same size as the
3533 // mask type; bitcast for the final result.
3534 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3535 Vec = DAG.getBitcast(VT, Vec);
3538 return Vec;
3541 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3542 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3543 : RISCVISD::VMV_V_X_VL;
3544 if (!VT.isFloatingPoint())
3545 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3546 Splat =
3547 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3548 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3551 // Try and match index sequences, which we can lower to the vid instruction
3552 // with optional modifications. An all-undef vector is matched by
3553 // getSplatValue, above.
3554 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3555 int64_t StepNumerator = SimpleVID->StepNumerator;
3556 unsigned StepDenominator = SimpleVID->StepDenominator;
3557 int64_t Addend = SimpleVID->Addend;
3559 assert(StepNumerator != 0 && "Invalid step");
3560 bool Negate = false;
3561 int64_t SplatStepVal = StepNumerator;
3562 unsigned StepOpcode = ISD::MUL;
3563 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3564 // anyway as the shift of 63 won't fit in uimm5.
3565 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3566 isPowerOf2_64(std::abs(StepNumerator))) {
3567 Negate = StepNumerator < 0;
3568 StepOpcode = ISD::SHL;
3569 SplatStepVal = Log2_64(std::abs(StepNumerator));
3572 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3573 // threshold since it's the immediate value many RVV instructions accept.
3574 // There is no vmul.vi instruction so ensure multiply constant can fit in
3575 // a single addi instruction.
3576 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3577 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3578 isPowerOf2_32(StepDenominator) &&
3579 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3580 MVT VIDVT =
3581 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3582 MVT VIDContainerVT =
3583 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3584 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3585 // Convert right out of the scalable type so we can use standard ISD
3586 // nodes for the rest of the computation. If we used scalable types with
3587 // these, we'd lose the fixed-length vector info and generate worse
3588 // vsetvli code.
3589 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3590 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3591 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3592 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3593 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3595 if (StepDenominator != 1) {
3596 SDValue SplatStep =
3597 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3598 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3600 if (Addend != 0 || Negate) {
3601 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3602 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3603 VID);
3605 if (VT.isFloatingPoint()) {
3606 // TODO: Use vfwcvt to reduce register pressure.
3607 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3609 return VID;
3613 // For very small build_vectors, use a single scalar insert of a constant.
3614 // TODO: Base this on constant rematerialization cost, not size.
3615 const unsigned EltBitSize = VT.getScalarSizeInBits();
3616 if (VT.getSizeInBits() <= 32 &&
3617 ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3618 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3619 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3620 "Unexpected sequence type");
3621 // If we can use the original VL with the modified element type, this
3622 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3623 // be moved into InsertVSETVLI?
3624 unsigned ViaVecLen =
3625 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3626 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3628 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3629 uint64_t SplatValue = 0;
3630 // Construct the amalgamated value at this larger vector type.
3631 for (const auto &OpIdx : enumerate(Op->op_values())) {
3632 const auto &SeqV = OpIdx.value();
3633 if (!SeqV.isUndef())
3634 SplatValue |=
3635 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3638 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3639 // achieve better constant materializion.
3640 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3641 SplatValue = SignExtend64<32>(SplatValue);
3643 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3644 DAG.getUNDEF(ViaVecVT),
3645 DAG.getConstant(SplatValue, DL, XLenVT),
3646 DAG.getConstant(0, DL, XLenVT));
3647 if (ViaVecLen != 1)
3648 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3649 MVT::getVectorVT(ViaIntVT, 1), Vec,
3650 DAG.getConstant(0, DL, XLenVT));
3651 return DAG.getBitcast(VT, Vec);
3655 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3656 // when re-interpreted as a vector with a larger element type. For example,
3657 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3658 // could be instead splat as
3659 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3660 // TODO: This optimization could also work on non-constant splats, but it
3661 // would require bit-manipulation instructions to construct the splat value.
3662 SmallVector<SDValue> Sequence;
3663 const auto *BV = cast<BuildVectorSDNode>(Op);
3664 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3665 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
3666 BV->getRepeatedSequence(Sequence) &&
3667 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3668 unsigned SeqLen = Sequence.size();
3669 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3670 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3671 ViaIntVT == MVT::i64) &&
3672 "Unexpected sequence type");
3674 // If we can use the original VL with the modified element type, this
3675 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3676 // be moved into InsertVSETVLI?
3677 const unsigned RequiredVL = NumElts / SeqLen;
3678 const unsigned ViaVecLen =
3679 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3680 NumElts : RequiredVL;
3681 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3683 unsigned EltIdx = 0;
3684 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3685 uint64_t SplatValue = 0;
3686 // Construct the amalgamated value which can be splatted as this larger
3687 // vector type.
3688 for (const auto &SeqV : Sequence) {
3689 if (!SeqV.isUndef())
3690 SplatValue |=
3691 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3692 EltIdx++;
3695 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3696 // achieve better constant materializion.
3697 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3698 SplatValue = SignExtend64<32>(SplatValue);
3700 // Since we can't introduce illegal i64 types at this stage, we can only
3701 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3702 // way we can use RVV instructions to splat.
3703 assert((ViaIntVT.bitsLE(XLenVT) ||
3704 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3705 "Unexpected bitcast sequence");
3706 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3707 SDValue ViaVL =
3708 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3709 MVT ViaContainerVT =
3710 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3711 SDValue Splat =
3712 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3713 DAG.getUNDEF(ViaContainerVT),
3714 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3715 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3716 if (ViaVecLen != RequiredVL)
3717 Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3718 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3719 DAG.getConstant(0, DL, XLenVT));
3720 return DAG.getBitcast(VT, Splat);
3724 // If the number of signbits allows, see if we can lower as a <N x i8>.
3725 // Our main goal here is to reduce LMUL (and thus work) required to
3726 // build the constant, but we will also narrow if the resulting
3727 // narrow vector is known to materialize cheaply.
3728 // TODO: We really should be costing the smaller vector. There are
3729 // profitable cases this misses.
3730 if (EltBitSize > 8 && VT.isInteger() &&
3731 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3732 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3733 if (EltBitSize - SignBits < 8) {
3734 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3735 DL, Op->ops());
3736 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3737 Source, DAG, Subtarget);
3738 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3739 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3743 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3744 return Res;
3746 // For constant vectors, use generic constant pool lowering. Otherwise,
3747 // we'd have to materialize constants in GPRs just to move them into the
3748 // vector.
3749 return SDValue();
3752 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3753 const RISCVSubtarget &Subtarget) {
3754 MVT VT = Op.getSimpleValueType();
3755 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3757 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3758 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
3759 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3761 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3763 SDLoc DL(Op);
3764 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3766 MVT XLenVT = Subtarget.getXLenVT();
3768 if (VT.getVectorElementType() == MVT::i1) {
3769 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3770 // vector type, we have a legal equivalently-sized i8 type, so we can use
3771 // that.
3772 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3773 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3775 SDValue WideVec;
3776 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3777 // For a splat, perform a scalar truncate before creating the wider
3778 // vector.
3779 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3780 DAG.getConstant(1, DL, Splat.getValueType()));
3781 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3782 } else {
3783 SmallVector<SDValue, 8> Ops(Op->op_values());
3784 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3785 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3786 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3789 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3792 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3793 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3794 return Gather;
3795 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3796 : RISCVISD::VMV_V_X_VL;
3797 if (!VT.isFloatingPoint())
3798 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3799 Splat =
3800 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3801 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3804 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3805 return Res;
3807 // If we're compiling for an exact VLEN value, we can split our work per
3808 // register in the register group.
3809 const unsigned MinVLen = Subtarget.getRealMinVLen();
3810 const unsigned MaxVLen = Subtarget.getRealMaxVLen();
3811 if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) {
3812 MVT ElemVT = VT.getVectorElementType();
3813 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
3814 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3815 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3816 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3817 assert(M1VT == getLMUL1VT(M1VT));
3819 // The following semantically builds up a fixed length concat_vector
3820 // of the component build_vectors. We eagerly lower to scalable and
3821 // insert_subvector here to avoid DAG combining it back to a large
3822 // build_vector.
3823 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3824 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3825 SDValue Vec = DAG.getUNDEF(ContainerVT);
3826 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3827 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3828 SDValue SubBV =
3829 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3830 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3831 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3832 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3833 DAG.getVectorIdxConstant(InsertIdx, DL));
3835 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3838 // Cap the cost at a value linear to the number of elements in the vector.
3839 // The default lowering is to use the stack. The vector store + scalar loads
3840 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3841 // being (at least) linear in LMUL. As a result, using the vslidedown
3842 // lowering for every element ends up being VL*LMUL..
3843 // TODO: Should we be directly costing the stack alternative? Doing so might
3844 // give us a more accurate upper bound.
3845 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3847 // TODO: unify with TTI getSlideCost.
3848 InstructionCost PerSlideCost = 1;
3849 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3850 default: break;
3851 case RISCVII::VLMUL::LMUL_2:
3852 PerSlideCost = 2;
3853 break;
3854 case RISCVII::VLMUL::LMUL_4:
3855 PerSlideCost = 4;
3856 break;
3857 case RISCVII::VLMUL::LMUL_8:
3858 PerSlideCost = 8;
3859 break;
3862 // TODO: Should we be using the build instseq then cost + evaluate scheme
3863 // we use for integer constants here?
3864 unsigned UndefCount = 0;
3865 for (const SDValue &V : Op->ops()) {
3866 if (V.isUndef()) {
3867 UndefCount++;
3868 continue;
3870 if (UndefCount) {
3871 LinearBudget -= PerSlideCost;
3872 UndefCount = 0;
3874 LinearBudget -= PerSlideCost;
3876 if (UndefCount) {
3877 LinearBudget -= PerSlideCost;
3880 if (LinearBudget < 0)
3881 return SDValue();
3883 assert((!VT.isFloatingPoint() ||
3884 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3885 "Illegal type which will result in reserved encoding");
3887 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3889 SDValue Vec;
3890 UndefCount = 0;
3891 for (SDValue V : Op->ops()) {
3892 if (V.isUndef()) {
3893 UndefCount++;
3894 continue;
3897 // Start our sequence with a TA splat in the hopes that hardware is able to
3898 // recognize there's no dependency on the prior value of our temporary
3899 // register.
3900 if (!Vec) {
3901 Vec = DAG.getSplatVector(VT, DL, V);
3902 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3903 UndefCount = 0;
3904 continue;
3907 if (UndefCount) {
3908 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3909 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3910 Vec, Offset, Mask, VL, Policy);
3911 UndefCount = 0;
3913 auto OpCode =
3914 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3915 if (!VT.isFloatingPoint())
3916 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
3917 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3918 V, Mask, VL);
3920 if (UndefCount) {
3921 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3922 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3923 Vec, Offset, Mask, VL, Policy);
3925 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3928 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3929 SDValue Lo, SDValue Hi, SDValue VL,
3930 SelectionDAG &DAG) {
3931 if (!Passthru)
3932 Passthru = DAG.getUNDEF(VT);
3933 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3934 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3935 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3936 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3937 // node in order to try and match RVV vector/scalar instructions.
3938 if ((LoC >> 31) == HiC)
3939 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3941 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3942 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3943 // vlmax vsetvli or vsetivli to change the VL.
3944 // FIXME: Support larger constants?
3945 // FIXME: Support non-constant VLs by saturating?
3946 if (LoC == HiC) {
3947 SDValue NewVL;
3948 if (isAllOnesConstant(VL) ||
3949 (isa<RegisterSDNode>(VL) &&
3950 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
3951 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
3952 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
3953 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
3955 if (NewVL) {
3956 MVT InterVT =
3957 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3958 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
3959 DAG.getUNDEF(InterVT), Lo,
3960 DAG.getRegister(RISCV::X0, MVT::i32));
3961 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3966 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3967 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3968 isa<ConstantSDNode>(Hi.getOperand(1)) &&
3969 Hi.getConstantOperandVal(1) == 31)
3970 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3972 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3973 // even if it might be sign extended.
3974 if (Hi.isUndef())
3975 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3977 // Fall back to a stack store and stride x0 vector load.
3978 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3979 Hi, VL);
3982 // Called by type legalization to handle splat of i64 on RV32.
3983 // FIXME: We can optimize this when the type has sign or zero bits in one
3984 // of the halves.
3985 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3986 SDValue Scalar, SDValue VL,
3987 SelectionDAG &DAG) {
3988 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3989 SDValue Lo, Hi;
3990 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3991 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3994 // This function lowers a splat of a scalar operand Splat with the vector
3995 // length VL. It ensures the final sequence is type legal, which is useful when
3996 // lowering a splat after type legalization.
3997 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3998 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3999 const RISCVSubtarget &Subtarget) {
4000 bool HasPassthru = Passthru && !Passthru.isUndef();
4001 if (!HasPassthru && !Passthru)
4002 Passthru = DAG.getUNDEF(VT);
4003 if (VT.isFloatingPoint())
4004 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4006 MVT XLenVT = Subtarget.getXLenVT();
4008 // Simplest case is that the operand needs to be promoted to XLenVT.
4009 if (Scalar.getValueType().bitsLE(XLenVT)) {
4010 // If the operand is a constant, sign extend to increase our chances
4011 // of being able to use a .vi instruction. ANY_EXTEND would become a
4012 // a zero extend and the simm5 check in isel would fail.
4013 // FIXME: Should we ignore the upper bits in isel instead?
4014 unsigned ExtOpc =
4015 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4016 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4017 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4020 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4021 "Unexpected scalar for splat lowering!");
4023 if (isOneConstant(VL) && isNullConstant(Scalar))
4024 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4025 DAG.getConstant(0, DL, XLenVT), VL);
4027 // Otherwise use the more complicated splatting algorithm.
4028 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4031 // This function lowers an insert of a scalar operand Scalar into lane
4032 // 0 of the vector regardless of the value of VL. The contents of the
4033 // remaining lanes of the result vector are unspecified. VL is assumed
4034 // to be non-zero.
4035 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
4036 const SDLoc &DL, SelectionDAG &DAG,
4037 const RISCVSubtarget &Subtarget) {
4038 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4040 const MVT XLenVT = Subtarget.getXLenVT();
4041 SDValue Passthru = DAG.getUNDEF(VT);
4043 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4044 isNullConstant(Scalar.getOperand(1))) {
4045 SDValue ExtractedVal = Scalar.getOperand(0);
4046 // The element types must be the same.
4047 if (ExtractedVal.getValueType().getVectorElementType() ==
4048 VT.getVectorElementType()) {
4049 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4050 MVT ExtractedContainerVT = ExtractedVT;
4051 if (ExtractedContainerVT.isFixedLengthVector()) {
4052 ExtractedContainerVT = getContainerForFixedLengthVector(
4053 DAG, ExtractedContainerVT, Subtarget);
4054 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4055 ExtractedVal, DAG, Subtarget);
4057 if (ExtractedContainerVT.bitsLE(VT))
4058 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4059 ExtractedVal, DAG.getConstant(0, DL, XLenVT));
4060 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4061 DAG.getConstant(0, DL, XLenVT));
4066 if (VT.isFloatingPoint())
4067 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4068 DAG.getUNDEF(VT), Scalar, VL);
4070 // Avoid the tricky legalization cases by falling back to using the
4071 // splat code which already handles it gracefully.
4072 if (!Scalar.getValueType().bitsLE(XLenVT))
4073 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4074 DAG.getConstant(1, DL, XLenVT),
4075 VT, DL, DAG, Subtarget);
4077 // If the operand is a constant, sign extend to increase our chances
4078 // of being able to use a .vi instruction. ANY_EXTEND would become a
4079 // a zero extend and the simm5 check in isel would fail.
4080 // FIXME: Should we ignore the upper bits in isel instead?
4081 unsigned ExtOpc =
4082 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4083 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4084 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4085 DAG.getUNDEF(VT), Scalar, VL);
4088 // Is this a shuffle extracts either the even or odd elements of a vector?
4089 // That is, specifically, either (a) or (b) below.
4090 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
4091 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
4092 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4093 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4094 // Returns {Src Vector, Even Elements} om success
4095 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4096 SDValue V2, ArrayRef<int> Mask,
4097 const RISCVSubtarget &Subtarget) {
4098 // Need to be able to widen the vector.
4099 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4100 return false;
4102 // Both input must be extracts.
4103 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4104 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4105 return false;
4107 // Extracting from the same source.
4108 SDValue Src = V1.getOperand(0);
4109 if (Src != V2.getOperand(0))
4110 return false;
4112 // Src needs to have twice the number of elements.
4113 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4114 return false;
4116 // The extracts must extract the two halves of the source.
4117 if (V1.getConstantOperandVal(1) != 0 ||
4118 V2.getConstantOperandVal(1) != Mask.size())
4119 return false;
4121 // First index must be the first even or odd element from V1.
4122 if (Mask[0] != 0 && Mask[0] != 1)
4123 return false;
4125 // The others must increase by 2 each time.
4126 // TODO: Support undef elements?
4127 for (unsigned i = 1; i != Mask.size(); ++i)
4128 if (Mask[i] != Mask[i - 1] + 2)
4129 return false;
4131 return true;
4134 /// Is this shuffle interleaving contiguous elements from one vector into the
4135 /// even elements and contiguous elements from another vector into the odd
4136 /// elements. \p EvenSrc will contain the element that should be in the first
4137 /// even element. \p OddSrc will contain the element that should be in the first
4138 /// odd element. These can be the first element in a source or the element half
4139 /// way through the source.
4140 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4141 int &OddSrc, const RISCVSubtarget &Subtarget) {
4142 // We need to be able to widen elements to the next larger integer type.
4143 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4144 return false;
4146 int Size = Mask.size();
4147 int NumElts = VT.getVectorNumElements();
4148 assert(Size == (int)NumElts && "Unexpected mask size");
4150 SmallVector<unsigned, 2> StartIndexes;
4151 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4152 return false;
4154 EvenSrc = StartIndexes[0];
4155 OddSrc = StartIndexes[1];
4157 // One source should be low half of first vector.
4158 if (EvenSrc != 0 && OddSrc != 0)
4159 return false;
4161 // Subvectors will be subtracted from either at the start of the two input
4162 // vectors, or at the start and middle of the first vector if it's an unary
4163 // interleave.
4164 // In both cases, HalfNumElts will be extracted.
4165 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4166 // we'll create an illegal extract_subvector.
4167 // FIXME: We could support other values using a slidedown first.
4168 int HalfNumElts = NumElts / 2;
4169 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4172 /// Match shuffles that concatenate two vectors, rotate the concatenation,
4173 /// and then extract the original number of elements from the rotated result.
4174 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4175 /// returned rotation amount is for a rotate right, where elements move from
4176 /// higher elements to lower elements. \p LoSrc indicates the first source
4177 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4178 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4179 /// 0 or 1 if a rotation is found.
4181 /// NOTE: We talk about rotate to the right which matches how bit shift and
4182 /// rotate instructions are described where LSBs are on the right, but LLVM IR
4183 /// and the table below write vectors with the lowest elements on the left.
4184 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4185 int Size = Mask.size();
4187 // We need to detect various ways of spelling a rotation:
4188 // [11, 12, 13, 14, 15, 0, 1, 2]
4189 // [-1, 12, 13, 14, -1, -1, 1, -1]
4190 // [-1, -1, -1, -1, -1, -1, 1, 2]
4191 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4192 // [-1, 4, 5, 6, -1, -1, 9, -1]
4193 // [-1, 4, 5, 6, -1, -1, -1, -1]
4194 int Rotation = 0;
4195 LoSrc = -1;
4196 HiSrc = -1;
4197 for (int i = 0; i != Size; ++i) {
4198 int M = Mask[i];
4199 if (M < 0)
4200 continue;
4202 // Determine where a rotate vector would have started.
4203 int StartIdx = i - (M % Size);
4204 // The identity rotation isn't interesting, stop.
4205 if (StartIdx == 0)
4206 return -1;
4208 // If we found the tail of a vector the rotation must be the missing
4209 // front. If we found the head of a vector, it must be how much of the
4210 // head.
4211 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4213 if (Rotation == 0)
4214 Rotation = CandidateRotation;
4215 else if (Rotation != CandidateRotation)
4216 // The rotations don't match, so we can't match this mask.
4217 return -1;
4219 // Compute which value this mask is pointing at.
4220 int MaskSrc = M < Size ? 0 : 1;
4222 // Compute which of the two target values this index should be assigned to.
4223 // This reflects whether the high elements are remaining or the low elemnts
4224 // are remaining.
4225 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4227 // Either set up this value if we've not encountered it before, or check
4228 // that it remains consistent.
4229 if (TargetSrc < 0)
4230 TargetSrc = MaskSrc;
4231 else if (TargetSrc != MaskSrc)
4232 // This may be a rotation, but it pulls from the inputs in some
4233 // unsupported interleaving.
4234 return -1;
4237 // Check that we successfully analyzed the mask, and normalize the results.
4238 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4239 assert((LoSrc >= 0 || HiSrc >= 0) &&
4240 "Failed to find a rotated input vector!");
4242 return Rotation;
4245 // Lower a deinterleave shuffle to vnsrl.
4246 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4247 // -> [p, q, r, s] (EvenElts == false)
4248 // VT is the type of the vector to return, <[vscale x ]n x ty>
4249 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4250 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
4251 bool EvenElts,
4252 const RISCVSubtarget &Subtarget,
4253 SelectionDAG &DAG) {
4254 // The result is a vector of type <m x n x ty>
4255 MVT ContainerVT = VT;
4256 // Convert fixed vectors to scalable if needed
4257 if (ContainerVT.isFixedLengthVector()) {
4258 assert(Src.getSimpleValueType().isFixedLengthVector());
4259 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4261 // The source is a vector of type <m x n*2 x ty>
4262 MVT SrcContainerVT =
4263 MVT::getVectorVT(ContainerVT.getVectorElementType(),
4264 ContainerVT.getVectorElementCount() * 2);
4265 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4268 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4270 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4271 // This also converts FP to int.
4272 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4273 MVT WideSrcContainerVT = MVT::getVectorVT(
4274 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4275 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4277 // The integer version of the container type.
4278 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4280 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4281 // the original element size.
4282 unsigned Shift = EvenElts ? 0 : EltBits;
4283 SDValue SplatShift = DAG.getNode(
4284 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4285 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4286 SDValue Res =
4287 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4288 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4289 // Cast back to FP if needed.
4290 Res = DAG.getBitcast(ContainerVT, Res);
4292 if (VT.isFixedLengthVector())
4293 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4294 return Res;
4297 // Lower the following shuffle to vslidedown.
4298 // a)
4299 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
4300 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
4301 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4302 // b)
4303 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
4304 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
4305 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
4306 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
4307 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
4308 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4309 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
4310 SDValue V1, SDValue V2,
4311 ArrayRef<int> Mask,
4312 const RISCVSubtarget &Subtarget,
4313 SelectionDAG &DAG) {
4314 auto findNonEXTRACT_SUBVECTORParent =
4315 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4316 uint64_t Offset = 0;
4317 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4318 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4319 // a scalable vector. But we don't want to match the case.
4320 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4321 Offset += Parent.getConstantOperandVal(1);
4322 Parent = Parent.getOperand(0);
4324 return std::make_pair(Parent, Offset);
4327 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4328 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4330 // Extracting from the same source.
4331 SDValue Src = V1Src;
4332 if (Src != V2Src)
4333 return SDValue();
4335 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4336 SmallVector<int, 16> NewMask(Mask);
4337 for (size_t i = 0; i != NewMask.size(); ++i) {
4338 if (NewMask[i] == -1)
4339 continue;
4341 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4342 NewMask[i] = NewMask[i] + V1IndexOffset;
4343 } else {
4344 // Minus NewMask.size() is needed. Otherwise, the b case would be
4345 // <5,6,7,12> instead of <5,6,7,8>.
4346 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4350 // First index must be known and non-zero. It will be used as the slidedown
4351 // amount.
4352 if (NewMask[0] <= 0)
4353 return SDValue();
4355 // NewMask is also continuous.
4356 for (unsigned i = 1; i != NewMask.size(); ++i)
4357 if (NewMask[i - 1] + 1 != NewMask[i])
4358 return SDValue();
4360 MVT XLenVT = Subtarget.getXLenVT();
4361 MVT SrcVT = Src.getSimpleValueType();
4362 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4363 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4364 SDValue Slidedown =
4365 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4366 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4367 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4368 return DAG.getNode(
4369 ISD::EXTRACT_SUBVECTOR, DL, VT,
4370 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4371 DAG.getConstant(0, DL, XLenVT));
4374 // Because vslideup leaves the destination elements at the start intact, we can
4375 // use it to perform shuffles that insert subvectors:
4377 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4378 // ->
4379 // vsetvli zero, 8, e8, mf2, ta, ma
4380 // vslideup.vi v8, v9, 4
4382 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4383 // ->
4384 // vsetvli zero, 5, e8, mf2, tu, ma
4385 // vslideup.v1 v8, v9, 2
4386 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
4387 SDValue V1, SDValue V2,
4388 ArrayRef<int> Mask,
4389 const RISCVSubtarget &Subtarget,
4390 SelectionDAG &DAG) {
4391 unsigned NumElts = VT.getVectorNumElements();
4392 int NumSubElts, Index;
4393 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4394 Index))
4395 return SDValue();
4397 bool OpsSwapped = Mask[Index] < (int)NumElts;
4398 SDValue InPlace = OpsSwapped ? V2 : V1;
4399 SDValue ToInsert = OpsSwapped ? V1 : V2;
4401 MVT XLenVT = Subtarget.getXLenVT();
4402 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4403 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4404 // We slide up by the index that the subvector is being inserted at, and set
4405 // VL to the index + the number of elements being inserted.
4406 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
4407 // If the we're adding a suffix to the in place vector, i.e. inserting right
4408 // up to the very end of it, then we don't actually care about the tail.
4409 if (NumSubElts + Index >= (int)NumElts)
4410 Policy |= RISCVII::TAIL_AGNOSTIC;
4412 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4413 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4414 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4416 SDValue Res;
4417 // If we're inserting into the lowest elements, use a tail undisturbed
4418 // vmv.v.v.
4419 if (Index == 0)
4420 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4421 VL);
4422 else
4423 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4424 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4425 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4428 /// Match v(f)slide1up/down idioms. These operations involve sliding
4429 /// N-1 elements to make room for an inserted scalar at one end.
4430 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
4431 SDValue V1, SDValue V2,
4432 ArrayRef<int> Mask,
4433 const RISCVSubtarget &Subtarget,
4434 SelectionDAG &DAG) {
4435 bool OpsSwapped = false;
4436 if (!isa<BuildVectorSDNode>(V1)) {
4437 if (!isa<BuildVectorSDNode>(V2))
4438 return SDValue();
4439 std::swap(V1, V2);
4440 OpsSwapped = true;
4442 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4443 if (!Splat)
4444 return SDValue();
4446 // Return true if the mask could describe a slide of Mask.size() - 1
4447 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4448 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4449 const unsigned S = (Offset > 0) ? 0 : -Offset;
4450 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4451 for (unsigned i = S; i != E; ++i)
4452 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4453 return false;
4454 return true;
4457 const unsigned NumElts = VT.getVectorNumElements();
4458 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4459 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4460 return SDValue();
4462 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4463 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4464 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4465 return SDValue();
4467 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4468 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4469 auto OpCode = IsVSlidedown ?
4470 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
4471 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
4472 if (!VT.isFloatingPoint())
4473 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4474 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4475 DAG.getUNDEF(ContainerVT),
4476 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4477 Splat, TrueMask, VL);
4478 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4481 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4482 // to create an interleaved vector of <[vscale x] n*2 x ty>.
4483 // This requires that the size of ty is less than the subtarget's maximum ELEN.
4484 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4485 const SDLoc &DL, SelectionDAG &DAG,
4486 const RISCVSubtarget &Subtarget) {
4487 MVT VecVT = EvenV.getSimpleValueType();
4488 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4489 // Convert fixed vectors to scalable if needed
4490 if (VecContainerVT.isFixedLengthVector()) {
4491 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4492 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4493 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4496 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4498 // We're working with a vector of the same size as the resulting
4499 // interleaved vector, but with half the number of elements and
4500 // twice the SEW (Hence the restriction on not using the maximum
4501 // ELEN)
4502 MVT WideVT =
4503 MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
4504 VecVT.getVectorElementCount());
4505 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4506 if (WideContainerVT.isFixedLengthVector())
4507 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4509 // Bitcast the input vectors to integers in case they are FP
4510 VecContainerVT = VecContainerVT.changeTypeToInteger();
4511 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4512 OddV = DAG.getBitcast(VecContainerVT, OddV);
4514 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4515 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4517 SDValue Interleaved;
4518 if (Subtarget.hasStdExtZvbb()) {
4519 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4520 SDValue OffsetVec =
4521 DAG.getSplatVector(VecContainerVT, DL,
4522 DAG.getConstant(VecVT.getScalarSizeInBits(), DL,
4523 Subtarget.getXLenVT()));
4524 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4525 OffsetVec, Passthru, Mask, VL);
4526 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4527 Interleaved, EvenV, Passthru, Mask, VL);
4528 } else {
4529 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4530 // vwaddu.vv
4531 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4532 OddV, Passthru, Mask, VL);
4534 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4535 SDValue AllOnesVec = DAG.getSplatVector(
4536 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4537 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4538 OddV, AllOnesVec, Passthru, Mask, VL);
4540 // Add the two together so we get
4541 // (OddV * 0xff...ff) + (OddV + EvenV)
4542 // = (OddV * 0x100...00) + EvenV
4543 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4544 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4545 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4546 Interleaved, OddsMul, Passthru, Mask, VL);
4549 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4550 MVT ResultContainerVT = MVT::getVectorVT(
4551 VecVT.getVectorElementType(), // Make sure to use original type
4552 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4553 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4555 // Convert back to a fixed vector if needed
4556 MVT ResultVT =
4557 MVT::getVectorVT(VecVT.getVectorElementType(),
4558 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
4559 if (ResultVT.isFixedLengthVector())
4560 Interleaved =
4561 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4563 return Interleaved;
4566 // If we have a vector of bits that we want to reverse, we can use a vbrev on a
4567 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4568 static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
4569 SelectionDAG &DAG,
4570 const RISCVSubtarget &Subtarget) {
4571 SDLoc DL(SVN);
4572 MVT VT = SVN->getSimpleValueType(0);
4573 SDValue V = SVN->getOperand(0);
4574 unsigned NumElts = VT.getVectorNumElements();
4576 assert(VT.getVectorElementType() == MVT::i1);
4578 if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),
4579 SVN->getMask().size()) ||
4580 !SVN->getOperand(1).isUndef())
4581 return SDValue();
4583 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4584 EVT ViaVT = EVT::getVectorVT(
4585 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4586 EVT ViaBitVT =
4587 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4589 // If we don't have zvbb or the larger element type > ELEN, the operation will
4590 // be illegal.
4591 if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,
4592 ViaVT) ||
4593 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4594 return SDValue();
4596 // If the bit vector doesn't fit exactly into the larger element type, we need
4597 // to insert it into the larger vector and then shift up the reversed bits
4598 // afterwards to get rid of the gap introduced.
4599 if (ViaEltSize > NumElts)
4600 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4601 V, DAG.getVectorIdxConstant(0, DL));
4603 SDValue Res =
4604 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4606 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4607 // element type.
4608 if (ViaEltSize > NumElts)
4609 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4610 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4612 Res = DAG.getBitcast(ViaBitVT, Res);
4614 if (ViaEltSize > NumElts)
4615 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4616 DAG.getVectorIdxConstant(0, DL));
4617 return Res;
4620 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4621 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4622 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4623 static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
4624 SelectionDAG &DAG,
4625 const RISCVSubtarget &Subtarget) {
4626 SDLoc DL(SVN);
4628 EVT VT = SVN->getValueType(0);
4629 unsigned NumElts = VT.getVectorNumElements();
4630 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4631 unsigned NumSubElts, RotateAmt;
4632 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4633 NumElts, NumSubElts, RotateAmt))
4634 return SDValue();
4635 MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4636 NumElts / NumSubElts);
4638 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4639 if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
4640 return SDValue();
4642 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4644 SDValue Rotate;
4645 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4646 // so canonicalize to vrev8.
4647 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4648 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4649 else
4650 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4651 DAG.getConstant(RotateAmt, DL, RotateVT));
4653 return DAG.getBitcast(VT, Rotate);
4656 // If compiling with an exactly known VLEN, see if we can split a
4657 // shuffle on m2 or larger into a small number of m1 sized shuffles
4658 // which write each destination registers exactly once.
4659 static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
4660 SelectionDAG &DAG,
4661 const RISCVSubtarget &Subtarget) {
4662 SDLoc DL(SVN);
4663 MVT VT = SVN->getSimpleValueType(0);
4664 SDValue V1 = SVN->getOperand(0);
4665 SDValue V2 = SVN->getOperand(1);
4666 ArrayRef<int> Mask = SVN->getMask();
4667 unsigned NumElts = VT.getVectorNumElements();
4669 // If we don't know exact data layout, not much we can do. If this
4670 // is already m1 or smaller, no point in splitting further.
4671 const unsigned MinVLen = Subtarget.getRealMinVLen();
4672 const unsigned MaxVLen = Subtarget.getRealMaxVLen();
4673 if (MinVLen != MaxVLen || VT.getSizeInBits().getFixedValue() <= MinVLen)
4674 return SDValue();
4676 MVT ElemVT = VT.getVectorElementType();
4677 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
4678 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4680 SmallVector<std::pair<int, SmallVector<int>>>
4681 OutMasks(VRegsPerSrc, {-1, {}});
4683 // Check if our mask can be done as a 1-to-1 mapping from source
4684 // to destination registers in the group without needing to
4685 // write each destination more than once.
4686 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4687 int DstVecIdx = DstIdx / ElemsPerVReg;
4688 int DstSubIdx = DstIdx % ElemsPerVReg;
4689 int SrcIdx = Mask[DstIdx];
4690 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4691 continue;
4692 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4693 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4694 if (OutMasks[DstVecIdx].first == -1)
4695 OutMasks[DstVecIdx].first = SrcVecIdx;
4696 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4697 // Note: This case could easily be handled by keeping track of a chain
4698 // of source values and generating two element shuffles below. This is
4699 // less an implementation question, and more a profitability one.
4700 return SDValue();
4702 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4703 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4706 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4707 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4708 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4709 assert(M1VT == getLMUL1VT(M1VT));
4710 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4711 SDValue Vec = DAG.getUNDEF(ContainerVT);
4712 // The following semantically builds up a fixed length concat_vector
4713 // of the component shuffle_vectors. We eagerly lower to scalable here
4714 // to avoid DAG combining it back to a large shuffle_vector again.
4715 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4716 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4717 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4718 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4719 if (SrcVecIdx == -1)
4720 continue;
4721 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4722 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4723 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4724 DAG.getVectorIdxConstant(ExtractIdx, DL));
4725 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4726 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4727 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4728 unsigned InsertIdx = DstVecIdx * NumOpElts;
4729 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4730 DAG.getVectorIdxConstant(InsertIdx, DL));
4732 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4735 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4736 const RISCVSubtarget &Subtarget) {
4737 SDValue V1 = Op.getOperand(0);
4738 SDValue V2 = Op.getOperand(1);
4739 SDLoc DL(Op);
4740 MVT XLenVT = Subtarget.getXLenVT();
4741 MVT VT = Op.getSimpleValueType();
4742 unsigned NumElts = VT.getVectorNumElements();
4743 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4745 if (VT.getVectorElementType() == MVT::i1) {
4746 // Lower to a vror.vi of a larger element type if possible before we promote
4747 // i1s to i8s.
4748 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4749 return V;
4750 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4751 return V;
4753 // Promote i1 shuffle to i8 shuffle.
4754 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4755 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4756 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4757 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4758 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4759 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4760 ISD::SETNE);
4763 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4765 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4767 if (SVN->isSplat()) {
4768 const int Lane = SVN->getSplatIndex();
4769 if (Lane >= 0) {
4770 MVT SVT = VT.getVectorElementType();
4772 // Turn splatted vector load into a strided load with an X0 stride.
4773 SDValue V = V1;
4774 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4775 // with undef.
4776 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4777 int Offset = Lane;
4778 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4779 int OpElements =
4780 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4781 V = V.getOperand(Offset / OpElements);
4782 Offset %= OpElements;
4785 // We need to ensure the load isn't atomic or volatile.
4786 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4787 auto *Ld = cast<LoadSDNode>(V);
4788 Offset *= SVT.getStoreSize();
4789 SDValue NewAddr = DAG.getMemBasePlusOffset(
4790 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4792 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4793 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4794 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4795 SDValue IntID =
4796 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4797 SDValue Ops[] = {Ld->getChain(),
4798 IntID,
4799 DAG.getUNDEF(ContainerVT),
4800 NewAddr,
4801 DAG.getRegister(RISCV::X0, XLenVT),
4802 VL};
4803 SDValue NewLoad = DAG.getMemIntrinsicNode(
4804 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4805 DAG.getMachineFunction().getMachineMemOperand(
4806 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4807 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4808 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4811 // Otherwise use a scalar load and splat. This will give the best
4812 // opportunity to fold a splat into the operation. ISel can turn it into
4813 // the x0 strided load if we aren't able to fold away the select.
4814 if (SVT.isFloatingPoint())
4815 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4816 Ld->getPointerInfo().getWithOffset(Offset),
4817 Ld->getOriginalAlign(),
4818 Ld->getMemOperand()->getFlags());
4819 else
4820 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4821 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4822 Ld->getOriginalAlign(),
4823 Ld->getMemOperand()->getFlags());
4824 DAG.makeEquivalentMemoryOrdering(Ld, V);
4826 unsigned Opc =
4827 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4828 SDValue Splat =
4829 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4830 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4833 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4834 assert(Lane < (int)NumElts && "Unexpected lane!");
4835 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4836 V1, DAG.getConstant(Lane, DL, XLenVT),
4837 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4838 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4842 // For exact VLEN m2 or greater, try to split to m1 operations if we
4843 // can split cleanly.
4844 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
4845 return V;
4847 ArrayRef<int> Mask = SVN->getMask();
4849 if (SDValue V =
4850 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
4851 return V;
4853 if (SDValue V =
4854 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
4855 return V;
4857 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
4858 // available.
4859 if (Subtarget.hasStdExtZvkb())
4860 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4861 return V;
4863 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4864 // be undef which can be handled with a single SLIDEDOWN/UP.
4865 int LoSrc, HiSrc;
4866 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
4867 if (Rotation > 0) {
4868 SDValue LoV, HiV;
4869 if (LoSrc >= 0) {
4870 LoV = LoSrc == 0 ? V1 : V2;
4871 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
4873 if (HiSrc >= 0) {
4874 HiV = HiSrc == 0 ? V1 : V2;
4875 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
4878 // We found a rotation. We need to slide HiV down by Rotation. Then we need
4879 // to slide LoV up by (NumElts - Rotation).
4880 unsigned InvRotate = NumElts - Rotation;
4882 SDValue Res = DAG.getUNDEF(ContainerVT);
4883 if (HiV) {
4884 // Even though we could use a smaller VL, don't to avoid a vsetivli
4885 // toggle.
4886 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
4887 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
4889 if (LoV)
4890 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
4891 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
4892 RISCVII::TAIL_AGNOSTIC);
4894 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4897 // If this is a deinterleave and we can widen the vector, then we can use
4898 // vnsrl to deinterleave.
4899 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
4900 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
4901 Subtarget, DAG);
4904 if (SDValue V =
4905 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
4906 return V;
4908 // Detect an interleave shuffle and lower to
4909 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4910 int EvenSrc, OddSrc;
4911 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
4912 // Extract the halves of the vectors.
4913 MVT HalfVT = VT.getHalfNumVectorElementsVT();
4915 int Size = Mask.size();
4916 SDValue EvenV, OddV;
4917 assert(EvenSrc >= 0 && "Undef source?");
4918 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
4919 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
4920 DAG.getConstant(EvenSrc % Size, DL, XLenVT));
4922 assert(OddSrc >= 0 && "Undef source?");
4923 OddV = (OddSrc / Size) == 0 ? V1 : V2;
4924 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
4925 DAG.getConstant(OddSrc % Size, DL, XLenVT));
4927 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
4930 // Detect shuffles which can be re-expressed as vector selects; these are
4931 // shuffles in which each element in the destination is taken from an element
4932 // at the corresponding index in either source vectors.
4933 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4934 int MaskIndex = MaskIdx.value();
4935 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4938 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4940 // By default we preserve the original operand order, and use a mask to
4941 // select LHS as true and RHS as false. However, since RVV vector selects may
4942 // feature splats but only on the LHS, we may choose to invert our mask and
4943 // instead select between RHS and LHS.
4944 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
4946 if (IsSelect) {
4947 // Now construct the mask that will be used by the vselect operation.
4948 SmallVector<SDValue> MaskVals;
4949 for (int MaskIndex : Mask) {
4950 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
4951 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4954 if (SwapOps)
4955 std::swap(V1, V2);
4957 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4958 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4959 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4960 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4963 // We might be able to express the shuffle as a bitrotate. But even if we
4964 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
4965 // shifts and a vor will have a higher throughput than a vrgather.
4966 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4967 return V;
4969 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
4970 // On such a large vector we're unable to use i8 as the index type.
4971 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4972 // may involve vector splitting if we're already at LMUL=8, or our
4973 // user-supplied maximum fixed-length LMUL.
4974 return SDValue();
4977 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4978 // merged with a second vrgather.
4979 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4981 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4982 // half.
4983 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4985 SmallVector<SDValue> MaskVals;
4987 // Now construct the mask that will be used by the blended vrgather operation.
4988 // Cconstruct the appropriate indices into each vector.
4989 for (int MaskIndex : Mask) {
4990 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
4991 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4992 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4993 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4994 ? DAG.getConstant(MaskIndex, DL, XLenVT)
4995 : DAG.getUNDEF(XLenVT));
4996 GatherIndicesRHS.push_back(
4997 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4998 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4999 if (IsLHSOrUndefIndex && MaskIndex >= 0)
5000 ++LHSIndexCounts[MaskIndex];
5001 if (!IsLHSOrUndefIndex)
5002 ++RHSIndexCounts[MaskIndex - NumElts];
5005 if (SwapOps) {
5006 std::swap(V1, V2);
5007 std::swap(GatherIndicesLHS, GatherIndicesRHS);
5010 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5011 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5012 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5014 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
5015 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5016 MVT IndexVT = VT.changeTypeToInteger();
5017 // Since we can't introduce illegal index types at this stage, use i16 and
5018 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5019 // than XLenVT.
5020 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5021 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5022 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5025 // If the mask allows, we can do all the index computation in 16 bits. This
5026 // requires less work and less register pressure at high LMUL, and creates
5027 // smaller constants which may be cheaper to materialize.
5028 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5029 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5030 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5031 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5034 MVT IndexContainerVT =
5035 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5037 SDValue Gather;
5038 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
5039 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
5040 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
5041 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
5042 Subtarget);
5043 } else {
5044 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5045 // If only one index is used, we can use a "splat" vrgather.
5046 // TODO: We can splat the most-common index and fix-up any stragglers, if
5047 // that's beneficial.
5048 if (LHSIndexCounts.size() == 1) {
5049 int SplatIndex = LHSIndexCounts.begin()->getFirst();
5050 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
5051 DAG.getConstant(SplatIndex, DL, XLenVT),
5052 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5053 } else {
5054 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5055 LHSIndices =
5056 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
5058 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5059 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5063 // If a second vector operand is used by this shuffle, blend it in with an
5064 // additional vrgather.
5065 if (!V2.isUndef()) {
5066 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5068 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
5069 SelectMask =
5070 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
5072 // If only one index is used, we can use a "splat" vrgather.
5073 // TODO: We can splat the most-common index and fix-up any stragglers, if
5074 // that's beneficial.
5075 if (RHSIndexCounts.size() == 1) {
5076 int SplatIndex = RHSIndexCounts.begin()->getFirst();
5077 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
5078 DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
5079 SelectMask, VL);
5080 } else {
5081 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
5082 RHSIndices =
5083 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
5084 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
5085 SelectMask, VL);
5089 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5092 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
5093 // Support splats for any type. These should type legalize well.
5094 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5095 return true;
5097 // Only support legal VTs for other shuffles for now.
5098 if (!isTypeLegal(VT))
5099 return false;
5101 MVT SVT = VT.getSimpleVT();
5103 // Not for i1 vectors.
5104 if (SVT.getScalarType() == MVT::i1)
5105 return false;
5107 int Dummy1, Dummy2;
5108 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5109 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5112 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5113 // the exponent.
5114 SDValue
5115 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5116 SelectionDAG &DAG) const {
5117 MVT VT = Op.getSimpleValueType();
5118 unsigned EltSize = VT.getScalarSizeInBits();
5119 SDValue Src = Op.getOperand(0);
5120 SDLoc DL(Op);
5121 MVT ContainerVT = VT;
5123 SDValue Mask, VL;
5124 if (Op->isVPOpcode()) {
5125 Mask = Op.getOperand(1);
5126 if (VT.isFixedLengthVector())
5127 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5128 Subtarget);
5129 VL = Op.getOperand(2);
5132 // We choose FP type that can represent the value if possible. Otherwise, we
5133 // use rounding to zero conversion for correct exponent of the result.
5134 // TODO: Use f16 for i8 when possible?
5135 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5136 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5137 FloatEltVT = MVT::f32;
5138 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5140 // Legal types should have been checked in the RISCVTargetLowering
5141 // constructor.
5142 // TODO: Splitting may make sense in some cases.
5143 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5144 "Expected legal float type!");
5146 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5147 // The trailing zero count is equal to log2 of this single bit value.
5148 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5149 SDValue Neg = DAG.getNegative(Src, DL, VT);
5150 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5151 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5152 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5153 Src, Mask, VL);
5154 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5157 // We have a legal FP type, convert to it.
5158 SDValue FloatVal;
5159 if (FloatVT.bitsGT(VT)) {
5160 if (Op->isVPOpcode())
5161 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5162 else
5163 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5164 } else {
5165 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5166 if (VT.isFixedLengthVector()) {
5167 ContainerVT = getContainerForFixedLengthVector(VT);
5168 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5170 if (!Op->isVPOpcode())
5171 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5172 SDValue RTZRM =
5173 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
5174 MVT ContainerFloatVT =
5175 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5176 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5177 Src, Mask, RTZRM, VL);
5178 if (VT.isFixedLengthVector())
5179 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5181 // Bitcast to integer and shift the exponent to the LSB.
5182 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5183 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5184 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5186 SDValue Exp;
5187 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5188 if (Op->isVPOpcode()) {
5189 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5190 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5191 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5192 } else {
5193 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5194 DAG.getConstant(ShiftAmt, DL, IntVT));
5195 if (IntVT.bitsLT(VT))
5196 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5197 else if (IntVT.bitsGT(VT))
5198 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5201 // The exponent contains log2 of the value in biased form.
5202 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5203 // For trailing zeros, we just need to subtract the bias.
5204 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5205 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5206 DAG.getConstant(ExponentBias, DL, VT));
5207 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5208 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5209 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5211 // For leading zeros, we need to remove the bias and convert from log2 to
5212 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5213 unsigned Adjust = ExponentBias + (EltSize - 1);
5214 SDValue Res;
5215 if (Op->isVPOpcode())
5216 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5217 Mask, VL);
5218 else
5219 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5221 // The above result with zero input equals to Adjust which is greater than
5222 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5223 if (Op.getOpcode() == ISD::CTLZ)
5224 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5225 else if (Op.getOpcode() == ISD::VP_CTLZ)
5226 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5227 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5228 return Res;
5231 // While RVV has alignment restrictions, we should always be able to load as a
5232 // legal equivalently-sized byte-typed vector instead. This method is
5233 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5234 // the load is already correctly-aligned, it returns SDValue().
5235 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5236 SelectionDAG &DAG) const {
5237 auto *Load = cast<LoadSDNode>(Op);
5238 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5240 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5241 Load->getMemoryVT(),
5242 *Load->getMemOperand()))
5243 return SDValue();
5245 SDLoc DL(Op);
5246 MVT VT = Op.getSimpleValueType();
5247 unsigned EltSizeBits = VT.getScalarSizeInBits();
5248 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5249 "Unexpected unaligned RVV load type");
5250 MVT NewVT =
5251 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5252 assert(NewVT.isValid() &&
5253 "Expecting equally-sized RVV vector types to be legal");
5254 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5255 Load->getPointerInfo(), Load->getOriginalAlign(),
5256 Load->getMemOperand()->getFlags());
5257 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5260 // While RVV has alignment restrictions, we should always be able to store as a
5261 // legal equivalently-sized byte-typed vector instead. This method is
5262 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5263 // returns SDValue() if the store is already correctly aligned.
5264 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5265 SelectionDAG &DAG) const {
5266 auto *Store = cast<StoreSDNode>(Op);
5267 assert(Store && Store->getValue().getValueType().isVector() &&
5268 "Expected vector store");
5270 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5271 Store->getMemoryVT(),
5272 *Store->getMemOperand()))
5273 return SDValue();
5275 SDLoc DL(Op);
5276 SDValue StoredVal = Store->getValue();
5277 MVT VT = StoredVal.getSimpleValueType();
5278 unsigned EltSizeBits = VT.getScalarSizeInBits();
5279 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5280 "Unexpected unaligned RVV store type");
5281 MVT NewVT =
5282 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5283 assert(NewVT.isValid() &&
5284 "Expecting equally-sized RVV vector types to be legal");
5285 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5286 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5287 Store->getPointerInfo(), Store->getOriginalAlign(),
5288 Store->getMemOperand()->getFlags());
5291 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
5292 const RISCVSubtarget &Subtarget) {
5293 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5295 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5297 // All simm32 constants should be handled by isel.
5298 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5299 // this check redundant, but small immediates are common so this check
5300 // should have better compile time.
5301 if (isInt<32>(Imm))
5302 return Op;
5304 // We only need to cost the immediate, if constant pool lowering is enabled.
5305 if (!Subtarget.useConstantPoolForLargeInts())
5306 return Op;
5308 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
5309 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5310 return Op;
5312 // Optimizations below are disabled for opt size. If we're optimizing for
5313 // size, use a constant pool.
5314 if (DAG.shouldOptForSize())
5315 return SDValue();
5317 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5318 // that if it will avoid a constant pool.
5319 // It will require an extra temporary register though.
5320 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5321 // low and high 32 bits are the same and bit 31 and 63 are set.
5322 unsigned ShiftAmt, AddOpc;
5323 RISCVMatInt::InstSeq SeqLo =
5324 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5325 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5326 return Op;
5328 return SDValue();
5331 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
5332 const RISCVSubtarget &Subtarget) {
5333 SDLoc dl(Op);
5334 AtomicOrdering FenceOrdering =
5335 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5336 SyncScope::ID FenceSSID =
5337 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5339 if (Subtarget.hasStdExtZtso()) {
5340 // The only fence that needs an instruction is a sequentially-consistent
5341 // cross-thread fence.
5342 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5343 FenceSSID == SyncScope::System)
5344 return Op;
5346 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5347 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5350 // singlethread fences only synchronize with signal handlers on the same
5351 // thread and thus only need to preserve instruction order, not actually
5352 // enforce memory ordering.
5353 if (FenceSSID == SyncScope::SingleThread)
5354 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5355 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5357 return Op;
5360 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5361 SelectionDAG &DAG) const {
5362 SDLoc DL(Op);
5363 MVT VT = Op.getSimpleValueType();
5364 MVT XLenVT = Subtarget.getXLenVT();
5365 unsigned Check = Op.getConstantOperandVal(1);
5366 unsigned TDCMask = 0;
5367 if (Check & fcSNan)
5368 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5369 if (Check & fcQNan)
5370 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5371 if (Check & fcPosInf)
5372 TDCMask |= RISCV::FPMASK_Positive_Infinity;
5373 if (Check & fcNegInf)
5374 TDCMask |= RISCV::FPMASK_Negative_Infinity;
5375 if (Check & fcPosNormal)
5376 TDCMask |= RISCV::FPMASK_Positive_Normal;
5377 if (Check & fcNegNormal)
5378 TDCMask |= RISCV::FPMASK_Negative_Normal;
5379 if (Check & fcPosSubnormal)
5380 TDCMask |= RISCV::FPMASK_Positive_Subnormal;
5381 if (Check & fcNegSubnormal)
5382 TDCMask |= RISCV::FPMASK_Negative_Subnormal;
5383 if (Check & fcPosZero)
5384 TDCMask |= RISCV::FPMASK_Positive_Zero;
5385 if (Check & fcNegZero)
5386 TDCMask |= RISCV::FPMASK_Negative_Zero;
5388 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5390 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5392 if (VT.isVector()) {
5393 SDValue Op0 = Op.getOperand(0);
5394 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5396 if (VT.isScalableVector()) {
5397 MVT DstVT = VT0.changeVectorElementTypeToInteger();
5398 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5399 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5400 Mask = Op.getOperand(2);
5401 VL = Op.getOperand(3);
5403 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5404 VL, Op->getFlags());
5405 if (IsOneBitMask)
5406 return DAG.getSetCC(DL, VT, FPCLASS,
5407 DAG.getConstant(TDCMask, DL, DstVT),
5408 ISD::CondCode::SETEQ);
5409 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5410 DAG.getConstant(TDCMask, DL, DstVT));
5411 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5412 ISD::SETNE);
5415 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5416 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5417 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5418 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5419 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5420 Mask = Op.getOperand(2);
5421 MVT MaskContainerVT =
5422 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5423 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5424 VL = Op.getOperand(3);
5426 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5428 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5429 Mask, VL, Op->getFlags());
5431 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5432 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5433 if (IsOneBitMask) {
5434 SDValue VMSEQ =
5435 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5436 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5437 DAG.getUNDEF(ContainerVT), Mask, VL});
5438 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5440 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5441 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5443 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5444 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5445 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5447 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5448 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5449 DAG.getUNDEF(ContainerVT), Mask, VL});
5450 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5453 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5454 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5455 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5456 ISD::CondCode::SETNE);
5457 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5460 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5461 // operations propagate nans.
5462 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
5463 const RISCVSubtarget &Subtarget) {
5464 SDLoc DL(Op);
5465 MVT VT = Op.getSimpleValueType();
5467 SDValue X = Op.getOperand(0);
5468 SDValue Y = Op.getOperand(1);
5470 if (!VT.isVector()) {
5471 MVT XLenVT = Subtarget.getXLenVT();
5473 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5474 // ensures that when one input is a nan, the other will also be a nan
5475 // allowing the nan to propagate. If both inputs are nan, this will swap the
5476 // inputs which is harmless.
5478 SDValue NewY = Y;
5479 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5480 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5481 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5484 SDValue NewX = X;
5485 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5486 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5487 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5490 unsigned Opc =
5491 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5492 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5495 // Check no NaNs before converting to fixed vector scalable.
5496 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5497 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5499 MVT ContainerVT = VT;
5500 if (VT.isFixedLengthVector()) {
5501 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5502 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5503 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5506 SDValue Mask, VL;
5507 if (Op->isVPOpcode()) {
5508 Mask = Op.getOperand(2);
5509 if (VT.isFixedLengthVector())
5510 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5511 Subtarget);
5512 VL = Op.getOperand(3);
5513 } else {
5514 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5517 SDValue NewY = Y;
5518 if (!XIsNeverNan) {
5519 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5520 {X, X, DAG.getCondCode(ISD::SETOEQ),
5521 DAG.getUNDEF(ContainerVT), Mask, VL});
5522 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5523 DAG.getUNDEF(ContainerVT), VL);
5526 SDValue NewX = X;
5527 if (!YIsNeverNan) {
5528 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5529 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5530 DAG.getUNDEF(ContainerVT), Mask, VL});
5531 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5532 DAG.getUNDEF(ContainerVT), VL);
5535 unsigned Opc =
5536 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5537 ? RISCVISD::VFMAX_VL
5538 : RISCVISD::VFMIN_VL;
5539 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5540 DAG.getUNDEF(ContainerVT), Mask, VL);
5541 if (VT.isFixedLengthVector())
5542 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5543 return Res;
5546 /// Get a RISC-V target specified VL op for a given SDNode.
5547 static unsigned getRISCVVLOp(SDValue Op) {
5548 #define OP_CASE(NODE) \
5549 case ISD::NODE: \
5550 return RISCVISD::NODE##_VL;
5551 #define VP_CASE(NODE) \
5552 case ISD::VP_##NODE: \
5553 return RISCVISD::NODE##_VL;
5554 // clang-format off
5555 switch (Op.getOpcode()) {
5556 default:
5557 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5558 OP_CASE(ADD)
5559 OP_CASE(SUB)
5560 OP_CASE(MUL)
5561 OP_CASE(MULHS)
5562 OP_CASE(MULHU)
5563 OP_CASE(SDIV)
5564 OP_CASE(SREM)
5565 OP_CASE(UDIV)
5566 OP_CASE(UREM)
5567 OP_CASE(SHL)
5568 OP_CASE(SRA)
5569 OP_CASE(SRL)
5570 OP_CASE(ROTL)
5571 OP_CASE(ROTR)
5572 OP_CASE(BSWAP)
5573 OP_CASE(CTTZ)
5574 OP_CASE(CTLZ)
5575 OP_CASE(CTPOP)
5576 OP_CASE(BITREVERSE)
5577 OP_CASE(SADDSAT)
5578 OP_CASE(UADDSAT)
5579 OP_CASE(SSUBSAT)
5580 OP_CASE(USUBSAT)
5581 OP_CASE(AVGFLOORU)
5582 OP_CASE(AVGCEILU)
5583 OP_CASE(FADD)
5584 OP_CASE(FSUB)
5585 OP_CASE(FMUL)
5586 OP_CASE(FDIV)
5587 OP_CASE(FNEG)
5588 OP_CASE(FABS)
5589 OP_CASE(FSQRT)
5590 OP_CASE(SMIN)
5591 OP_CASE(SMAX)
5592 OP_CASE(UMIN)
5593 OP_CASE(UMAX)
5594 OP_CASE(STRICT_FADD)
5595 OP_CASE(STRICT_FSUB)
5596 OP_CASE(STRICT_FMUL)
5597 OP_CASE(STRICT_FDIV)
5598 OP_CASE(STRICT_FSQRT)
5599 VP_CASE(ADD) // VP_ADD
5600 VP_CASE(SUB) // VP_SUB
5601 VP_CASE(MUL) // VP_MUL
5602 VP_CASE(SDIV) // VP_SDIV
5603 VP_CASE(SREM) // VP_SREM
5604 VP_CASE(UDIV) // VP_UDIV
5605 VP_CASE(UREM) // VP_UREM
5606 VP_CASE(SHL) // VP_SHL
5607 VP_CASE(FADD) // VP_FADD
5608 VP_CASE(FSUB) // VP_FSUB
5609 VP_CASE(FMUL) // VP_FMUL
5610 VP_CASE(FDIV) // VP_FDIV
5611 VP_CASE(FNEG) // VP_FNEG
5612 VP_CASE(FABS) // VP_FABS
5613 VP_CASE(SMIN) // VP_SMIN
5614 VP_CASE(SMAX) // VP_SMAX
5615 VP_CASE(UMIN) // VP_UMIN
5616 VP_CASE(UMAX) // VP_UMAX
5617 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5618 VP_CASE(SETCC) // VP_SETCC
5619 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5620 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5621 VP_CASE(BITREVERSE) // VP_BITREVERSE
5622 VP_CASE(BSWAP) // VP_BSWAP
5623 VP_CASE(CTLZ) // VP_CTLZ
5624 VP_CASE(CTTZ) // VP_CTTZ
5625 VP_CASE(CTPOP) // VP_CTPOP
5626 case ISD::CTLZ_ZERO_UNDEF:
5627 case ISD::VP_CTLZ_ZERO_UNDEF:
5628 return RISCVISD::CTLZ_VL;
5629 case ISD::CTTZ_ZERO_UNDEF:
5630 case ISD::VP_CTTZ_ZERO_UNDEF:
5631 return RISCVISD::CTTZ_VL;
5632 case ISD::FMA:
5633 case ISD::VP_FMA:
5634 return RISCVISD::VFMADD_VL;
5635 case ISD::STRICT_FMA:
5636 return RISCVISD::STRICT_VFMADD_VL;
5637 case ISD::AND:
5638 case ISD::VP_AND:
5639 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5640 return RISCVISD::VMAND_VL;
5641 return RISCVISD::AND_VL;
5642 case ISD::OR:
5643 case ISD::VP_OR:
5644 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5645 return RISCVISD::VMOR_VL;
5646 return RISCVISD::OR_VL;
5647 case ISD::XOR:
5648 case ISD::VP_XOR:
5649 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5650 return RISCVISD::VMXOR_VL;
5651 return RISCVISD::XOR_VL;
5652 case ISD::VP_SELECT:
5653 case ISD::VP_MERGE:
5654 return RISCVISD::VMERGE_VL;
5655 case ISD::VP_ASHR:
5656 return RISCVISD::SRA_VL;
5657 case ISD::VP_LSHR:
5658 return RISCVISD::SRL_VL;
5659 case ISD::VP_SQRT:
5660 return RISCVISD::FSQRT_VL;
5661 case ISD::VP_SIGN_EXTEND:
5662 return RISCVISD::VSEXT_VL;
5663 case ISD::VP_ZERO_EXTEND:
5664 return RISCVISD::VZEXT_VL;
5665 case ISD::VP_FP_TO_SINT:
5666 return RISCVISD::VFCVT_RTZ_X_F_VL;
5667 case ISD::VP_FP_TO_UINT:
5668 return RISCVISD::VFCVT_RTZ_XU_F_VL;
5669 case ISD::FMINNUM:
5670 case ISD::VP_FMINNUM:
5671 return RISCVISD::VFMIN_VL;
5672 case ISD::FMAXNUM:
5673 case ISD::VP_FMAXNUM:
5674 return RISCVISD::VFMAX_VL;
5676 // clang-format on
5677 #undef OP_CASE
5678 #undef VP_CASE
5681 /// Return true if a RISC-V target specified op has a merge operand.
5682 static bool hasMergeOp(unsigned Opcode) {
5683 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5684 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5685 "not a RISC-V target specific op");
5686 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5687 126 &&
5688 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5689 ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5690 21 &&
5691 "adding target specific op should update this function");
5692 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5693 return true;
5694 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5695 return true;
5696 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5697 return true;
5698 if (Opcode == RISCVISD::SETCC_VL)
5699 return true;
5700 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5701 return true;
5702 if (Opcode == RISCVISD::VMERGE_VL)
5703 return true;
5704 return false;
5707 /// Return true if a RISC-V target specified op has a mask operand.
5708 static bool hasMaskOp(unsigned Opcode) {
5709 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5710 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5711 "not a RISC-V target specific op");
5712 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5713 126 &&
5714 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5715 ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5716 21 &&
5717 "adding target specific op should update this function");
5718 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5719 return true;
5720 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5721 return true;
5722 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5723 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
5724 return true;
5725 return false;
5728 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
5729 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5730 SDLoc DL(Op);
5732 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5733 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5735 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5736 if (!Op.getOperand(j).getValueType().isVector()) {
5737 LoOperands[j] = Op.getOperand(j);
5738 HiOperands[j] = Op.getOperand(j);
5739 continue;
5741 std::tie(LoOperands[j], HiOperands[j]) =
5742 DAG.SplitVector(Op.getOperand(j), DL);
5745 SDValue LoRes =
5746 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5747 SDValue HiRes =
5748 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5750 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5753 static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
5754 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5755 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5756 SDLoc DL(Op);
5758 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5759 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5761 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5762 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5763 std::tie(LoOperands[j], HiOperands[j]) =
5764 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5765 continue;
5767 if (!Op.getOperand(j).getValueType().isVector()) {
5768 LoOperands[j] = Op.getOperand(j);
5769 HiOperands[j] = Op.getOperand(j);
5770 continue;
5772 std::tie(LoOperands[j], HiOperands[j]) =
5773 DAG.SplitVector(Op.getOperand(j), DL);
5776 SDValue LoRes =
5777 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5778 SDValue HiRes =
5779 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5781 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5784 static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
5785 SDLoc DL(Op);
5787 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5788 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5789 auto [EVLLo, EVLHi] =
5790 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5792 SDValue ResLo =
5793 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5794 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5795 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5796 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5799 static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
5801 assert(Op->isStrictFPOpcode());
5803 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
5805 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
5806 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
5808 SDLoc DL(Op);
5810 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5811 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5813 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5814 if (!Op.getOperand(j).getValueType().isVector()) {
5815 LoOperands[j] = Op.getOperand(j);
5816 HiOperands[j] = Op.getOperand(j);
5817 continue;
5819 std::tie(LoOperands[j], HiOperands[j]) =
5820 DAG.SplitVector(Op.getOperand(j), DL);
5823 SDValue LoRes =
5824 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
5825 HiOperands[0] = LoRes.getValue(1);
5826 SDValue HiRes =
5827 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
5829 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
5830 LoRes.getValue(0), HiRes.getValue(0));
5831 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
5834 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
5835 SelectionDAG &DAG) const {
5836 switch (Op.getOpcode()) {
5837 default:
5838 report_fatal_error("unimplemented operand");
5839 case ISD::ATOMIC_FENCE:
5840 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
5841 case ISD::GlobalAddress:
5842 return lowerGlobalAddress(Op, DAG);
5843 case ISD::BlockAddress:
5844 return lowerBlockAddress(Op, DAG);
5845 case ISD::ConstantPool:
5846 return lowerConstantPool(Op, DAG);
5847 case ISD::JumpTable:
5848 return lowerJumpTable(Op, DAG);
5849 case ISD::GlobalTLSAddress:
5850 return lowerGlobalTLSAddress(Op, DAG);
5851 case ISD::Constant:
5852 return lowerConstant(Op, DAG, Subtarget);
5853 case ISD::SELECT:
5854 return lowerSELECT(Op, DAG);
5855 case ISD::BRCOND:
5856 return lowerBRCOND(Op, DAG);
5857 case ISD::VASTART:
5858 return lowerVASTART(Op, DAG);
5859 case ISD::FRAMEADDR:
5860 return lowerFRAMEADDR(Op, DAG);
5861 case ISD::RETURNADDR:
5862 return lowerRETURNADDR(Op, DAG);
5863 case ISD::SHL_PARTS:
5864 return lowerShiftLeftParts(Op, DAG);
5865 case ISD::SRA_PARTS:
5866 return lowerShiftRightParts(Op, DAG, true);
5867 case ISD::SRL_PARTS:
5868 return lowerShiftRightParts(Op, DAG, false);
5869 case ISD::ROTL:
5870 case ISD::ROTR:
5871 if (Op.getValueType().isFixedLengthVector()) {
5872 assert(Subtarget.hasStdExtZvkb());
5873 return lowerToScalableOp(Op, DAG);
5875 assert(Subtarget.hasVendorXTHeadBb() &&
5876 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
5877 "Unexpected custom legalization");
5878 // XTHeadBb only supports rotate by constant.
5879 if (!isa<ConstantSDNode>(Op.getOperand(1)))
5880 return SDValue();
5881 return Op;
5882 case ISD::BITCAST: {
5883 SDLoc DL(Op);
5884 EVT VT = Op.getValueType();
5885 SDValue Op0 = Op.getOperand(0);
5886 EVT Op0VT = Op0.getValueType();
5887 MVT XLenVT = Subtarget.getXLenVT();
5888 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
5889 Subtarget.hasStdExtZfhminOrZhinxmin()) {
5890 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
5891 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
5892 return FPConv;
5894 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
5895 Subtarget.hasStdExtZfbfmin()) {
5896 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
5897 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
5898 return FPConv;
5900 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
5901 Subtarget.hasStdExtFOrZfinx()) {
5902 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5903 SDValue FPConv =
5904 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
5905 return FPConv;
5907 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
5908 Subtarget.hasStdExtZfa()) {
5909 SDValue Lo, Hi;
5910 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
5911 SDValue RetReg =
5912 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
5913 return RetReg;
5916 // Consider other scalar<->scalar casts as legal if the types are legal.
5917 // Otherwise expand them.
5918 if (!VT.isVector() && !Op0VT.isVector()) {
5919 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
5920 return Op;
5921 return SDValue();
5924 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
5925 "Unexpected types");
5927 if (VT.isFixedLengthVector()) {
5928 // We can handle fixed length vector bitcasts with a simple replacement
5929 // in isel.
5930 if (Op0VT.isFixedLengthVector())
5931 return Op;
5932 // When bitcasting from scalar to fixed-length vector, insert the scalar
5933 // into a one-element vector of the result type, and perform a vector
5934 // bitcast.
5935 if (!Op0VT.isVector()) {
5936 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
5937 if (!isTypeLegal(BVT))
5938 return SDValue();
5939 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
5940 DAG.getUNDEF(BVT), Op0,
5941 DAG.getConstant(0, DL, XLenVT)));
5943 return SDValue();
5945 // Custom-legalize bitcasts from fixed-length vector types to scalar types
5946 // thus: bitcast the vector to a one-element vector type whose element type
5947 // is the same as the result type, and extract the first element.
5948 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
5949 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
5950 if (!isTypeLegal(BVT))
5951 return SDValue();
5952 SDValue BVec = DAG.getBitcast(BVT, Op0);
5953 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
5954 DAG.getConstant(0, DL, XLenVT));
5956 return SDValue();
5958 case ISD::INTRINSIC_WO_CHAIN:
5959 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
5960 case ISD::INTRINSIC_W_CHAIN:
5961 return LowerINTRINSIC_W_CHAIN(Op, DAG);
5962 case ISD::INTRINSIC_VOID:
5963 return LowerINTRINSIC_VOID(Op, DAG);
5964 case ISD::IS_FPCLASS:
5965 return LowerIS_FPCLASS(Op, DAG);
5966 case ISD::BITREVERSE: {
5967 MVT VT = Op.getSimpleValueType();
5968 if (VT.isFixedLengthVector()) {
5969 assert(Subtarget.hasStdExtZvbb());
5970 return lowerToScalableOp(Op, DAG);
5972 SDLoc DL(Op);
5973 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
5974 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
5975 // Expand bitreverse to a bswap(rev8) followed by brev8.
5976 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
5977 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
5979 case ISD::TRUNCATE:
5980 // Only custom-lower vector truncates
5981 if (!Op.getSimpleValueType().isVector())
5982 return Op;
5983 return lowerVectorTruncLike(Op, DAG);
5984 case ISD::ANY_EXTEND:
5985 case ISD::ZERO_EXTEND:
5986 if (Op.getOperand(0).getValueType().isVector() &&
5987 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5988 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
5989 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
5990 case ISD::SIGN_EXTEND:
5991 if (Op.getOperand(0).getValueType().isVector() &&
5992 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5993 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
5994 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
5995 case ISD::SPLAT_VECTOR_PARTS:
5996 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
5997 case ISD::INSERT_VECTOR_ELT:
5998 return lowerINSERT_VECTOR_ELT(Op, DAG);
5999 case ISD::EXTRACT_VECTOR_ELT:
6000 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6001 case ISD::SCALAR_TO_VECTOR: {
6002 MVT VT = Op.getSimpleValueType();
6003 SDLoc DL(Op);
6004 SDValue Scalar = Op.getOperand(0);
6005 if (VT.getVectorElementType() == MVT::i1) {
6006 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6007 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6008 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6010 MVT ContainerVT = VT;
6011 if (VT.isFixedLengthVector())
6012 ContainerVT = getContainerForFixedLengthVector(VT);
6013 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6014 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6015 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6016 DAG.getUNDEF(ContainerVT), Scalar, VL);
6017 if (VT.isFixedLengthVector())
6018 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6019 return V;
6021 case ISD::VSCALE: {
6022 MVT XLenVT = Subtarget.getXLenVT();
6023 MVT VT = Op.getSimpleValueType();
6024 SDLoc DL(Op);
6025 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6026 // We define our scalable vector types for lmul=1 to use a 64 bit known
6027 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6028 // vscale as VLENB / 8.
6029 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6030 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6031 report_fatal_error("Support for VLEN==32 is incomplete.");
6032 // We assume VLENB is a multiple of 8. We manually choose the best shift
6033 // here because SimplifyDemandedBits isn't always able to simplify it.
6034 uint64_t Val = Op.getConstantOperandVal(0);
6035 if (isPowerOf2_64(Val)) {
6036 uint64_t Log2 = Log2_64(Val);
6037 if (Log2 < 3)
6038 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6039 DAG.getConstant(3 - Log2, DL, VT));
6040 else if (Log2 > 3)
6041 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6042 DAG.getConstant(Log2 - 3, DL, XLenVT));
6043 } else if ((Val % 8) == 0) {
6044 // If the multiplier is a multiple of 8, scale it down to avoid needing
6045 // to shift the VLENB value.
6046 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6047 DAG.getConstant(Val / 8, DL, XLenVT));
6048 } else {
6049 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6050 DAG.getConstant(3, DL, XLenVT));
6051 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6052 DAG.getConstant(Val, DL, XLenVT));
6054 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6056 case ISD::FPOWI: {
6057 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6058 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6059 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6060 Op.getOperand(1).getValueType() == MVT::i32) {
6061 SDLoc DL(Op);
6062 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6063 SDValue Powi =
6064 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6065 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6066 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6068 return SDValue();
6070 case ISD::FMAXIMUM:
6071 case ISD::FMINIMUM:
6072 if (Op.getValueType() == MVT::nxv32f16 &&
6073 (Subtarget.hasVInstructionsF16Minimal() &&
6074 !Subtarget.hasVInstructionsF16()))
6075 return SplitVectorOp(Op, DAG);
6076 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6077 case ISD::FP_EXTEND: {
6078 SDLoc DL(Op);
6079 EVT VT = Op.getValueType();
6080 SDValue Op0 = Op.getOperand(0);
6081 EVT Op0VT = Op0.getValueType();
6082 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6083 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6084 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6085 SDValue FloatVal =
6086 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6087 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6090 if (!Op.getValueType().isVector())
6091 return Op;
6092 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6094 case ISD::FP_ROUND: {
6095 SDLoc DL(Op);
6096 EVT VT = Op.getValueType();
6097 SDValue Op0 = Op.getOperand(0);
6098 EVT Op0VT = Op0.getValueType();
6099 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6100 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6101 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6102 Subtarget.hasStdExtDOrZdinx()) {
6103 SDValue FloatVal =
6104 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6105 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6106 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6109 if (!Op.getValueType().isVector())
6110 return Op;
6111 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6113 case ISD::STRICT_FP_ROUND:
6114 case ISD::STRICT_FP_EXTEND:
6115 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6116 case ISD::SINT_TO_FP:
6117 case ISD::UINT_TO_FP:
6118 if (Op.getValueType().isVector() &&
6119 Op.getValueType().getScalarType() == MVT::f16 &&
6120 (Subtarget.hasVInstructionsF16Minimal() &&
6121 !Subtarget.hasVInstructionsF16())) {
6122 if (Op.getValueType() == MVT::nxv32f16)
6123 return SplitVectorOp(Op, DAG);
6124 // int -> f32
6125 SDLoc DL(Op);
6126 MVT NVT =
6127 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6128 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6129 // f32 -> f16
6130 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6131 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6133 [[fallthrough]];
6134 case ISD::FP_TO_SINT:
6135 case ISD::FP_TO_UINT:
6136 if (SDValue Op1 = Op.getOperand(0);
6137 Op1.getValueType().isVector() &&
6138 Op1.getValueType().getScalarType() == MVT::f16 &&
6139 (Subtarget.hasVInstructionsF16Minimal() &&
6140 !Subtarget.hasVInstructionsF16())) {
6141 if (Op1.getValueType() == MVT::nxv32f16)
6142 return SplitVectorOp(Op, DAG);
6143 // f16 -> f32
6144 SDLoc DL(Op);
6145 MVT NVT = MVT::getVectorVT(MVT::f32,
6146 Op1.getValueType().getVectorElementCount());
6147 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6148 // f32 -> int
6149 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6151 [[fallthrough]];
6152 case ISD::STRICT_FP_TO_SINT:
6153 case ISD::STRICT_FP_TO_UINT:
6154 case ISD::STRICT_SINT_TO_FP:
6155 case ISD::STRICT_UINT_TO_FP: {
6156 // RVV can only do fp<->int conversions to types half/double the size as
6157 // the source. We custom-lower any conversions that do two hops into
6158 // sequences.
6159 MVT VT = Op.getSimpleValueType();
6160 if (!VT.isVector())
6161 return Op;
6162 SDLoc DL(Op);
6163 bool IsStrict = Op->isStrictFPOpcode();
6164 SDValue Src = Op.getOperand(0 + IsStrict);
6165 MVT EltVT = VT.getVectorElementType();
6166 MVT SrcVT = Src.getSimpleValueType();
6167 MVT SrcEltVT = SrcVT.getVectorElementType();
6168 unsigned EltSize = EltVT.getSizeInBits();
6169 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6170 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6171 "Unexpected vector element types");
6173 bool IsInt2FP = SrcEltVT.isInteger();
6174 // Widening conversions
6175 if (EltSize > (2 * SrcEltSize)) {
6176 if (IsInt2FP) {
6177 // Do a regular integer sign/zero extension then convert to float.
6178 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6179 VT.getVectorElementCount());
6180 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6181 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6182 ? ISD::ZERO_EXTEND
6183 : ISD::SIGN_EXTEND;
6184 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6185 if (IsStrict)
6186 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6187 Op.getOperand(0), Ext);
6188 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6190 // FP2Int
6191 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6192 // Do one doubling fp_extend then complete the operation by converting
6193 // to int.
6194 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6195 if (IsStrict) {
6196 auto [FExt, Chain] =
6197 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6198 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6200 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6201 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6204 // Narrowing conversions
6205 if (SrcEltSize > (2 * EltSize)) {
6206 if (IsInt2FP) {
6207 // One narrowing int_to_fp, then an fp_round.
6208 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6209 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6210 if (IsStrict) {
6211 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6212 DAG.getVTList(InterimFVT, MVT::Other),
6213 Op.getOperand(0), Src);
6214 SDValue Chain = Int2FP.getValue(1);
6215 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6217 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6218 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6220 // FP2Int
6221 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6222 // representable by the integer, the result is poison.
6223 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6224 VT.getVectorElementCount());
6225 if (IsStrict) {
6226 SDValue FP2Int =
6227 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6228 Op.getOperand(0), Src);
6229 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6230 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6232 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6233 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6236 // Scalable vectors can exit here. Patterns will handle equally-sized
6237 // conversions halving/doubling ones.
6238 if (!VT.isFixedLengthVector())
6239 return Op;
6241 // For fixed-length vectors we lower to a custom "VL" node.
6242 unsigned RVVOpc = 0;
6243 switch (Op.getOpcode()) {
6244 default:
6245 llvm_unreachable("Impossible opcode");
6246 case ISD::FP_TO_SINT:
6247 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
6248 break;
6249 case ISD::FP_TO_UINT:
6250 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
6251 break;
6252 case ISD::SINT_TO_FP:
6253 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6254 break;
6255 case ISD::UINT_TO_FP:
6256 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6257 break;
6258 case ISD::STRICT_FP_TO_SINT:
6259 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
6260 break;
6261 case ISD::STRICT_FP_TO_UINT:
6262 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
6263 break;
6264 case ISD::STRICT_SINT_TO_FP:
6265 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
6266 break;
6267 case ISD::STRICT_UINT_TO_FP:
6268 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
6269 break;
6272 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6273 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6274 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6275 "Expected same element count");
6277 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6279 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6280 if (IsStrict) {
6281 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6282 Op.getOperand(0), Src, Mask, VL);
6283 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6284 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6286 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6287 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6289 case ISD::FP_TO_SINT_SAT:
6290 case ISD::FP_TO_UINT_SAT:
6291 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6292 case ISD::FP_TO_BF16: {
6293 // Custom lower to ensure the libcall return is passed in an FPR on hard
6294 // float ABIs.
6295 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6296 SDLoc DL(Op);
6297 MakeLibCallOptions CallOptions;
6298 RTLIB::Libcall LC =
6299 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6300 SDValue Res =
6301 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6302 if (Subtarget.is64Bit() && !RV64LegalI32)
6303 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6304 return DAG.getBitcast(MVT::i32, Res);
6306 case ISD::BF16_TO_FP: {
6307 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6308 MVT VT = Op.getSimpleValueType();
6309 SDLoc DL(Op);
6310 Op = DAG.getNode(
6311 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6312 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6313 SDValue Res = Subtarget.is64Bit()
6314 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6315 : DAG.getBitcast(MVT::f32, Op);
6316 // fp_extend if the target VT is bigger than f32.
6317 if (VT != MVT::f32)
6318 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6319 return Res;
6321 case ISD::FP_TO_FP16: {
6322 // Custom lower to ensure the libcall return is passed in an FPR on hard
6323 // float ABIs.
6324 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6325 SDLoc DL(Op);
6326 MakeLibCallOptions CallOptions;
6327 RTLIB::Libcall LC =
6328 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6329 SDValue Res =
6330 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6331 if (Subtarget.is64Bit() && !RV64LegalI32)
6332 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6333 return DAG.getBitcast(MVT::i32, Res);
6335 case ISD::FP16_TO_FP: {
6336 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6337 // float ABIs.
6338 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6339 SDLoc DL(Op);
6340 MakeLibCallOptions CallOptions;
6341 SDValue Arg = Subtarget.is64Bit()
6342 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6343 Op.getOperand(0))
6344 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6345 SDValue Res =
6346 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6347 .first;
6348 return Res;
6350 case ISD::FTRUNC:
6351 case ISD::FCEIL:
6352 case ISD::FFLOOR:
6353 case ISD::FNEARBYINT:
6354 case ISD::FRINT:
6355 case ISD::FROUND:
6356 case ISD::FROUNDEVEN:
6357 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6358 case ISD::LRINT:
6359 case ISD::LLRINT:
6360 return lowerVectorXRINT(Op, DAG, Subtarget);
6361 case ISD::VECREDUCE_ADD:
6362 case ISD::VECREDUCE_UMAX:
6363 case ISD::VECREDUCE_SMAX:
6364 case ISD::VECREDUCE_UMIN:
6365 case ISD::VECREDUCE_SMIN:
6366 return lowerVECREDUCE(Op, DAG);
6367 case ISD::VECREDUCE_AND:
6368 case ISD::VECREDUCE_OR:
6369 case ISD::VECREDUCE_XOR:
6370 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6371 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6372 return lowerVECREDUCE(Op, DAG);
6373 case ISD::VECREDUCE_FADD:
6374 case ISD::VECREDUCE_SEQ_FADD:
6375 case ISD::VECREDUCE_FMIN:
6376 case ISD::VECREDUCE_FMAX:
6377 return lowerFPVECREDUCE(Op, DAG);
6378 case ISD::VP_REDUCE_ADD:
6379 case ISD::VP_REDUCE_UMAX:
6380 case ISD::VP_REDUCE_SMAX:
6381 case ISD::VP_REDUCE_UMIN:
6382 case ISD::VP_REDUCE_SMIN:
6383 case ISD::VP_REDUCE_FADD:
6384 case ISD::VP_REDUCE_SEQ_FADD:
6385 case ISD::VP_REDUCE_FMIN:
6386 case ISD::VP_REDUCE_FMAX:
6387 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6388 (Subtarget.hasVInstructionsF16Minimal() &&
6389 !Subtarget.hasVInstructionsF16()))
6390 return SplitVectorReductionOp(Op, DAG);
6391 return lowerVPREDUCE(Op, DAG);
6392 case ISD::VP_REDUCE_AND:
6393 case ISD::VP_REDUCE_OR:
6394 case ISD::VP_REDUCE_XOR:
6395 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6396 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6397 return lowerVPREDUCE(Op, DAG);
6398 case ISD::UNDEF: {
6399 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6400 return convertFromScalableVector(Op.getSimpleValueType(),
6401 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6403 case ISD::INSERT_SUBVECTOR:
6404 return lowerINSERT_SUBVECTOR(Op, DAG);
6405 case ISD::EXTRACT_SUBVECTOR:
6406 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6407 case ISD::VECTOR_DEINTERLEAVE:
6408 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6409 case ISD::VECTOR_INTERLEAVE:
6410 return lowerVECTOR_INTERLEAVE(Op, DAG);
6411 case ISD::STEP_VECTOR:
6412 return lowerSTEP_VECTOR(Op, DAG);
6413 case ISD::VECTOR_REVERSE:
6414 return lowerVECTOR_REVERSE(Op, DAG);
6415 case ISD::VECTOR_SPLICE:
6416 return lowerVECTOR_SPLICE(Op, DAG);
6417 case ISD::BUILD_VECTOR:
6418 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6419 case ISD::SPLAT_VECTOR:
6420 if (Op.getValueType().getScalarType() == MVT::f16 &&
6421 (Subtarget.hasVInstructionsF16Minimal() &&
6422 !Subtarget.hasVInstructionsF16())) {
6423 if (Op.getValueType() == MVT::nxv32f16)
6424 return SplitVectorOp(Op, DAG);
6425 SDLoc DL(Op);
6426 SDValue NewScalar =
6427 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6428 SDValue NewSplat = DAG.getNode(
6429 ISD::SPLAT_VECTOR, DL,
6430 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6431 NewScalar);
6432 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6433 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6435 if (Op.getValueType().getVectorElementType() == MVT::i1)
6436 return lowerVectorMaskSplat(Op, DAG);
6437 return SDValue();
6438 case ISD::VECTOR_SHUFFLE:
6439 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6440 case ISD::CONCAT_VECTORS: {
6441 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6442 // better than going through the stack, as the default expansion does.
6443 SDLoc DL(Op);
6444 MVT VT = Op.getSimpleValueType();
6445 unsigned NumOpElts =
6446 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6447 SDValue Vec = DAG.getUNDEF(VT);
6448 for (const auto &OpIdx : enumerate(Op->ops())) {
6449 SDValue SubVec = OpIdx.value();
6450 // Don't insert undef subvectors.
6451 if (SubVec.isUndef())
6452 continue;
6453 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6454 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
6456 return Vec;
6458 case ISD::LOAD:
6459 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6460 return V;
6461 if (Op.getValueType().isFixedLengthVector())
6462 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6463 return Op;
6464 case ISD::STORE:
6465 if (auto V = expandUnalignedRVVStore(Op, DAG))
6466 return V;
6467 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6468 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6469 return Op;
6470 case ISD::MLOAD:
6471 case ISD::VP_LOAD:
6472 return lowerMaskedLoad(Op, DAG);
6473 case ISD::MSTORE:
6474 case ISD::VP_STORE:
6475 return lowerMaskedStore(Op, DAG);
6476 case ISD::SELECT_CC: {
6477 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6478 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6479 // into separate SETCC+SELECT just like LegalizeDAG.
6480 SDValue Tmp1 = Op.getOperand(0);
6481 SDValue Tmp2 = Op.getOperand(1);
6482 SDValue True = Op.getOperand(2);
6483 SDValue False = Op.getOperand(3);
6484 EVT VT = Op.getValueType();
6485 SDValue CC = Op.getOperand(4);
6486 EVT CmpVT = Tmp1.getValueType();
6487 EVT CCVT =
6488 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6489 SDLoc DL(Op);
6490 SDValue Cond =
6491 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6492 return DAG.getSelect(DL, VT, Cond, True, False);
6494 case ISD::SETCC: {
6495 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6496 if (OpVT.isScalarInteger()) {
6497 MVT VT = Op.getSimpleValueType();
6498 SDValue LHS = Op.getOperand(0);
6499 SDValue RHS = Op.getOperand(1);
6500 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6501 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6502 "Unexpected CondCode");
6504 SDLoc DL(Op);
6506 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6507 // convert this to the equivalent of (set(u)ge X, C+1) by using
6508 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6509 // in a register.
6510 if (isa<ConstantSDNode>(RHS)) {
6511 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6512 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6513 // If this is an unsigned compare and the constant is -1, incrementing
6514 // the constant would change behavior. The result should be false.
6515 if (CCVal == ISD::SETUGT && Imm == -1)
6516 return DAG.getConstant(0, DL, VT);
6517 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6518 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6519 SDValue SetCC = DAG.getSetCC(
6520 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6521 return DAG.getLogicalNOT(DL, SetCC, VT);
6525 // Not a constant we could handle, swap the operands and condition code to
6526 // SETLT/SETULT.
6527 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6528 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6531 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6532 (Subtarget.hasVInstructionsF16Minimal() &&
6533 !Subtarget.hasVInstructionsF16()))
6534 return SplitVectorOp(Op, DAG);
6536 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6538 case ISD::ADD:
6539 case ISD::SUB:
6540 case ISD::MUL:
6541 case ISD::MULHS:
6542 case ISD::MULHU:
6543 case ISD::AND:
6544 case ISD::OR:
6545 case ISD::XOR:
6546 case ISD::SDIV:
6547 case ISD::SREM:
6548 case ISD::UDIV:
6549 case ISD::UREM:
6550 case ISD::BSWAP:
6551 case ISD::CTPOP:
6552 return lowerToScalableOp(Op, DAG);
6553 case ISD::SHL:
6554 case ISD::SRA:
6555 case ISD::SRL:
6556 if (Op.getSimpleValueType().isFixedLengthVector())
6557 return lowerToScalableOp(Op, DAG);
6558 // This can be called for an i32 shift amount that needs to be promoted.
6559 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6560 "Unexpected custom legalisation");
6561 return SDValue();
6562 case ISD::FADD:
6563 case ISD::FSUB:
6564 case ISD::FMUL:
6565 case ISD::FDIV:
6566 case ISD::FNEG:
6567 case ISD::FABS:
6568 case ISD::FSQRT:
6569 case ISD::FMA:
6570 case ISD::FMINNUM:
6571 case ISD::FMAXNUM:
6572 if (Op.getValueType() == MVT::nxv32f16 &&
6573 (Subtarget.hasVInstructionsF16Minimal() &&
6574 !Subtarget.hasVInstructionsF16()))
6575 return SplitVectorOp(Op, DAG);
6576 [[fallthrough]];
6577 case ISD::AVGFLOORU:
6578 case ISD::AVGCEILU:
6579 case ISD::SADDSAT:
6580 case ISD::UADDSAT:
6581 case ISD::SSUBSAT:
6582 case ISD::USUBSAT:
6583 case ISD::SMIN:
6584 case ISD::SMAX:
6585 case ISD::UMIN:
6586 case ISD::UMAX:
6587 return lowerToScalableOp(Op, DAG);
6588 case ISD::ABS:
6589 case ISD::VP_ABS:
6590 return lowerABS(Op, DAG);
6591 case ISD::CTLZ:
6592 case ISD::CTLZ_ZERO_UNDEF:
6593 case ISD::CTTZ:
6594 case ISD::CTTZ_ZERO_UNDEF:
6595 if (Subtarget.hasStdExtZvbb())
6596 return lowerToScalableOp(Op, DAG);
6597 assert(Op.getOpcode() != ISD::CTTZ);
6598 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6599 case ISD::VSELECT:
6600 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6601 case ISD::FCOPYSIGN:
6602 if (Op.getValueType() == MVT::nxv32f16 &&
6603 (Subtarget.hasVInstructionsF16Minimal() &&
6604 !Subtarget.hasVInstructionsF16()))
6605 return SplitVectorOp(Op, DAG);
6606 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6607 case ISD::STRICT_FADD:
6608 case ISD::STRICT_FSUB:
6609 case ISD::STRICT_FMUL:
6610 case ISD::STRICT_FDIV:
6611 case ISD::STRICT_FSQRT:
6612 case ISD::STRICT_FMA:
6613 if (Op.getValueType() == MVT::nxv32f16 &&
6614 (Subtarget.hasVInstructionsF16Minimal() &&
6615 !Subtarget.hasVInstructionsF16()))
6616 return SplitStrictFPVectorOp(Op, DAG);
6617 return lowerToScalableOp(Op, DAG);
6618 case ISD::STRICT_FSETCC:
6619 case ISD::STRICT_FSETCCS:
6620 return lowerVectorStrictFSetcc(Op, DAG);
6621 case ISD::STRICT_FCEIL:
6622 case ISD::STRICT_FRINT:
6623 case ISD::STRICT_FFLOOR:
6624 case ISD::STRICT_FTRUNC:
6625 case ISD::STRICT_FNEARBYINT:
6626 case ISD::STRICT_FROUND:
6627 case ISD::STRICT_FROUNDEVEN:
6628 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6629 case ISD::MGATHER:
6630 case ISD::VP_GATHER:
6631 return lowerMaskedGather(Op, DAG);
6632 case ISD::MSCATTER:
6633 case ISD::VP_SCATTER:
6634 return lowerMaskedScatter(Op, DAG);
6635 case ISD::GET_ROUNDING:
6636 return lowerGET_ROUNDING(Op, DAG);
6637 case ISD::SET_ROUNDING:
6638 return lowerSET_ROUNDING(Op, DAG);
6639 case ISD::EH_DWARF_CFA:
6640 return lowerEH_DWARF_CFA(Op, DAG);
6641 case ISD::VP_SELECT:
6642 case ISD::VP_MERGE:
6643 case ISD::VP_ADD:
6644 case ISD::VP_SUB:
6645 case ISD::VP_MUL:
6646 case ISD::VP_SDIV:
6647 case ISD::VP_UDIV:
6648 case ISD::VP_SREM:
6649 case ISD::VP_UREM:
6650 return lowerVPOp(Op, DAG);
6651 case ISD::VP_AND:
6652 case ISD::VP_OR:
6653 case ISD::VP_XOR:
6654 return lowerLogicVPOp(Op, DAG);
6655 case ISD::VP_FADD:
6656 case ISD::VP_FSUB:
6657 case ISD::VP_FMUL:
6658 case ISD::VP_FDIV:
6659 case ISD::VP_FNEG:
6660 case ISD::VP_FABS:
6661 case ISD::VP_SQRT:
6662 case ISD::VP_FMA:
6663 case ISD::VP_FMINNUM:
6664 case ISD::VP_FMAXNUM:
6665 case ISD::VP_FCOPYSIGN:
6666 if (Op.getValueType() == MVT::nxv32f16 &&
6667 (Subtarget.hasVInstructionsF16Minimal() &&
6668 !Subtarget.hasVInstructionsF16()))
6669 return SplitVPOp(Op, DAG);
6670 [[fallthrough]];
6671 case ISD::VP_ASHR:
6672 case ISD::VP_LSHR:
6673 case ISD::VP_SHL:
6674 return lowerVPOp(Op, DAG);
6675 case ISD::VP_IS_FPCLASS:
6676 return LowerIS_FPCLASS(Op, DAG);
6677 case ISD::VP_SIGN_EXTEND:
6678 case ISD::VP_ZERO_EXTEND:
6679 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6680 return lowerVPExtMaskOp(Op, DAG);
6681 return lowerVPOp(Op, DAG);
6682 case ISD::VP_TRUNCATE:
6683 return lowerVectorTruncLike(Op, DAG);
6684 case ISD::VP_FP_EXTEND:
6685 case ISD::VP_FP_ROUND:
6686 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6687 case ISD::VP_SINT_TO_FP:
6688 case ISD::VP_UINT_TO_FP:
6689 if (Op.getValueType().isVector() &&
6690 Op.getValueType().getScalarType() == MVT::f16 &&
6691 (Subtarget.hasVInstructionsF16Minimal() &&
6692 !Subtarget.hasVInstructionsF16())) {
6693 if (Op.getValueType() == MVT::nxv32f16)
6694 return SplitVPOp(Op, DAG);
6695 // int -> f32
6696 SDLoc DL(Op);
6697 MVT NVT =
6698 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6699 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6700 // f32 -> f16
6701 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6702 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6704 [[fallthrough]];
6705 case ISD::VP_FP_TO_SINT:
6706 case ISD::VP_FP_TO_UINT:
6707 if (SDValue Op1 = Op.getOperand(0);
6708 Op1.getValueType().isVector() &&
6709 Op1.getValueType().getScalarType() == MVT::f16 &&
6710 (Subtarget.hasVInstructionsF16Minimal() &&
6711 !Subtarget.hasVInstructionsF16())) {
6712 if (Op1.getValueType() == MVT::nxv32f16)
6713 return SplitVPOp(Op, DAG);
6714 // f16 -> f32
6715 SDLoc DL(Op);
6716 MVT NVT = MVT::getVectorVT(MVT::f32,
6717 Op1.getValueType().getVectorElementCount());
6718 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6719 // f32 -> int
6720 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6721 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6723 return lowerVPFPIntConvOp(Op, DAG);
6724 case ISD::VP_SETCC:
6725 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6726 (Subtarget.hasVInstructionsF16Minimal() &&
6727 !Subtarget.hasVInstructionsF16()))
6728 return SplitVPOp(Op, DAG);
6729 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6730 return lowerVPSetCCMaskOp(Op, DAG);
6731 [[fallthrough]];
6732 case ISD::VP_SMIN:
6733 case ISD::VP_SMAX:
6734 case ISD::VP_UMIN:
6735 case ISD::VP_UMAX:
6736 case ISD::VP_BITREVERSE:
6737 case ISD::VP_BSWAP:
6738 return lowerVPOp(Op, DAG);
6739 case ISD::VP_CTLZ:
6740 case ISD::VP_CTLZ_ZERO_UNDEF:
6741 if (Subtarget.hasStdExtZvbb())
6742 return lowerVPOp(Op, DAG);
6743 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6744 case ISD::VP_CTTZ:
6745 case ISD::VP_CTTZ_ZERO_UNDEF:
6746 if (Subtarget.hasStdExtZvbb())
6747 return lowerVPOp(Op, DAG);
6748 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6749 case ISD::VP_CTPOP:
6750 return lowerVPOp(Op, DAG);
6751 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
6752 return lowerVPStridedLoad(Op, DAG);
6753 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
6754 return lowerVPStridedStore(Op, DAG);
6755 case ISD::VP_FCEIL:
6756 case ISD::VP_FFLOOR:
6757 case ISD::VP_FRINT:
6758 case ISD::VP_FNEARBYINT:
6759 case ISD::VP_FROUND:
6760 case ISD::VP_FROUNDEVEN:
6761 case ISD::VP_FROUNDTOZERO:
6762 if (Op.getValueType() == MVT::nxv32f16 &&
6763 (Subtarget.hasVInstructionsF16Minimal() &&
6764 !Subtarget.hasVInstructionsF16()))
6765 return SplitVPOp(Op, DAG);
6766 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6767 case ISD::VP_FMAXIMUM:
6768 case ISD::VP_FMINIMUM:
6769 if (Op.getValueType() == MVT::nxv32f16 &&
6770 (Subtarget.hasVInstructionsF16Minimal() &&
6771 !Subtarget.hasVInstructionsF16()))
6772 return SplitVPOp(Op, DAG);
6773 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6774 case ISD::EXPERIMENTAL_VP_SPLICE:
6775 return lowerVPSpliceExperimental(Op, DAG);
6776 case ISD::EXPERIMENTAL_VP_REVERSE:
6777 return lowerVPReverseExperimental(Op, DAG);
6781 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
6782 SelectionDAG &DAG, unsigned Flags) {
6783 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
6786 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
6787 SelectionDAG &DAG, unsigned Flags) {
6788 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
6789 Flags);
6792 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
6793 SelectionDAG &DAG, unsigned Flags) {
6794 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
6795 N->getOffset(), Flags);
6798 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
6799 SelectionDAG &DAG, unsigned Flags) {
6800 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
6803 template <class NodeTy>
6804 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6805 bool IsLocal, bool IsExternWeak) const {
6806 SDLoc DL(N);
6807 EVT Ty = getPointerTy(DAG.getDataLayout());
6809 // When HWASAN is used and tagging of global variables is enabled
6810 // they should be accessed via the GOT, since the tagged address of a global
6811 // is incompatible with existing code models. This also applies to non-pic
6812 // mode.
6813 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
6814 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
6815 if (IsLocal && !Subtarget.allowTaggedGlobals())
6816 // Use PC-relative addressing to access the symbol. This generates the
6817 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
6818 // %pcrel_lo(auipc)).
6819 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
6821 // Use PC-relative addressing to access the GOT for this symbol, then load
6822 // the address from the GOT. This generates the pattern (PseudoLGA sym),
6823 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6824 SDValue Load =
6825 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
6826 MachineFunction &MF = DAG.getMachineFunction();
6827 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6828 MachinePointerInfo::getGOT(MF),
6829 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6830 MachineMemOperand::MOInvariant,
6831 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6832 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6833 return Load;
6836 switch (getTargetMachine().getCodeModel()) {
6837 default:
6838 report_fatal_error("Unsupported code model for lowering");
6839 case CodeModel::Small: {
6840 // Generate a sequence for accessing addresses within the first 2 GiB of
6841 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
6842 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
6843 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
6844 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
6845 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
6847 case CodeModel::Medium: {
6848 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
6849 if (IsExternWeak) {
6850 // An extern weak symbol may be undefined, i.e. have value 0, which may
6851 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
6852 // symbol. This generates the pattern (PseudoLGA sym), which expands to
6853 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6854 SDValue Load =
6855 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
6856 MachineFunction &MF = DAG.getMachineFunction();
6857 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6858 MachinePointerInfo::getGOT(MF),
6859 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6860 MachineMemOperand::MOInvariant,
6861 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6862 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6863 return Load;
6866 // Generate a sequence for accessing addresses within any 2GiB range within
6867 // the address space. This generates the pattern (PseudoLLA sym), which
6868 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
6869 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
6874 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
6875 SelectionDAG &DAG) const {
6876 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
6877 assert(N->getOffset() == 0 && "unexpected offset in global node");
6878 const GlobalValue *GV = N->getGlobal();
6879 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
6882 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
6883 SelectionDAG &DAG) const {
6884 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
6886 return getAddr(N, DAG);
6889 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
6890 SelectionDAG &DAG) const {
6891 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
6893 return getAddr(N, DAG);
6896 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
6897 SelectionDAG &DAG) const {
6898 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
6900 return getAddr(N, DAG);
6903 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
6904 SelectionDAG &DAG,
6905 bool UseGOT) const {
6906 SDLoc DL(N);
6907 EVT Ty = getPointerTy(DAG.getDataLayout());
6908 const GlobalValue *GV = N->getGlobal();
6909 MVT XLenVT = Subtarget.getXLenVT();
6911 if (UseGOT) {
6912 // Use PC-relative addressing to access the GOT for this TLS symbol, then
6913 // load the address from the GOT and add the thread pointer. This generates
6914 // the pattern (PseudoLA_TLS_IE sym), which expands to
6915 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
6916 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6917 SDValue Load =
6918 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
6919 MachineFunction &MF = DAG.getMachineFunction();
6920 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6921 MachinePointerInfo::getGOT(MF),
6922 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6923 MachineMemOperand::MOInvariant,
6924 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6925 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6927 // Add the thread pointer.
6928 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
6929 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
6932 // Generate a sequence for accessing the address relative to the thread
6933 // pointer, with the appropriate adjustment for the thread pointer offset.
6934 // This generates the pattern
6935 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
6936 SDValue AddrHi =
6937 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
6938 SDValue AddrAdd =
6939 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
6940 SDValue AddrLo =
6941 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
6943 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
6944 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
6945 SDValue MNAdd =
6946 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
6947 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
6950 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
6951 SelectionDAG &DAG) const {
6952 SDLoc DL(N);
6953 EVT Ty = getPointerTy(DAG.getDataLayout());
6954 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
6955 const GlobalValue *GV = N->getGlobal();
6957 // Use a PC-relative addressing mode to access the global dynamic GOT address.
6958 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
6959 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
6960 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6961 SDValue Load =
6962 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
6964 // Prepare argument list to generate call.
6965 ArgListTy Args;
6966 ArgListEntry Entry;
6967 Entry.Node = Load;
6968 Entry.Ty = CallTy;
6969 Args.push_back(Entry);
6971 // Setup call to __tls_get_addr.
6972 TargetLowering::CallLoweringInfo CLI(DAG);
6973 CLI.setDebugLoc(DL)
6974 .setChain(DAG.getEntryNode())
6975 .setLibCallee(CallingConv::C, CallTy,
6976 DAG.getExternalSymbol("__tls_get_addr", Ty),
6977 std::move(Args));
6979 return LowerCallTo(CLI).first;
6982 SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
6983 SelectionDAG &DAG) const {
6984 SDLoc DL(N);
6985 EVT Ty = getPointerTy(DAG.getDataLayout());
6986 const GlobalValue *GV = N->getGlobal();
6988 // Use a PC-relative addressing mode to access the global dynamic GOT address.
6989 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
6991 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
6992 // lw tY, tX, %tlsdesc_lo_load(label) // R_RISCV_TLSDESC_LOAD_LO12_I(label)
6993 // addi a0, tX, %tlsdesc_lo_add(label) // R_RISCV_TLSDESC_ADD_LO12_I(label)
6994 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
6995 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6996 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
6999 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7000 SelectionDAG &DAG) const {
7001 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7002 assert(N->getOffset() == 0 && "unexpected offset in global node");
7004 if (DAG.getTarget().useEmulatedTLS())
7005 return LowerToTLSEmulatedModel(N, DAG);
7007 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
7009 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
7010 CallingConv::GHC)
7011 report_fatal_error("In GHC calling convention TLS is not supported");
7013 SDValue Addr;
7014 switch (Model) {
7015 case TLSModel::LocalExec:
7016 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7017 break;
7018 case TLSModel::InitialExec:
7019 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7020 break;
7021 case TLSModel::LocalDynamic:
7022 case TLSModel::GeneralDynamic:
7023 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7024 : getDynamicTLSAddr(N, DAG);
7025 break;
7028 return Addr;
7031 // Return true if Val is equal to (setcc LHS, RHS, CC).
7032 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
7033 // Otherwise, return std::nullopt.
7034 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7035 ISD::CondCode CC, SDValue Val) {
7036 assert(Val->getOpcode() == ISD::SETCC);
7037 SDValue LHS2 = Val.getOperand(0);
7038 SDValue RHS2 = Val.getOperand(1);
7039 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7041 if (LHS == LHS2 && RHS == RHS2) {
7042 if (CC == CC2)
7043 return true;
7044 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7045 return false;
7046 } else if (LHS == RHS2 && RHS == LHS2) {
7047 CC2 = ISD::getSetCCSwappedOperands(CC2);
7048 if (CC == CC2)
7049 return true;
7050 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7051 return false;
7054 return std::nullopt;
7057 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
7058 const RISCVSubtarget &Subtarget) {
7059 SDValue CondV = N->getOperand(0);
7060 SDValue TrueV = N->getOperand(1);
7061 SDValue FalseV = N->getOperand(2);
7062 MVT VT = N->getSimpleValueType(0);
7063 SDLoc DL(N);
7065 if (!Subtarget.hasConditionalMoveFusion()) {
7066 // (select c, -1, y) -> -c | y
7067 if (isAllOnesConstant(TrueV)) {
7068 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7069 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
7071 // (select c, y, -1) -> (c-1) | y
7072 if (isAllOnesConstant(FalseV)) {
7073 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7074 DAG.getAllOnesConstant(DL, VT));
7075 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
7078 // (select c, 0, y) -> (c-1) & y
7079 if (isNullConstant(TrueV)) {
7080 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7081 DAG.getAllOnesConstant(DL, VT));
7082 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
7084 // (select c, y, 0) -> -c & y
7085 if (isNullConstant(FalseV)) {
7086 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7087 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
7091 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7092 // when both truev and falsev are also setcc.
7093 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7094 FalseV.getOpcode() == ISD::SETCC) {
7095 SDValue LHS = CondV.getOperand(0);
7096 SDValue RHS = CondV.getOperand(1);
7097 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7099 // (select x, x, y) -> x | y
7100 // (select !x, x, y) -> x & y
7101 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7102 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7103 FalseV);
7105 // (select x, y, x) -> x & y
7106 // (select !x, y, x) -> x | y
7107 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7108 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
7109 FalseV);
7113 return SDValue();
7116 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7117 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7118 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7119 // being `0` or `-1`. In such cases we can replace `select` with `and`.
7120 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7121 // than `c0`?
7122 static SDValue
7123 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
7124 const RISCVSubtarget &Subtarget) {
7125 if (Subtarget.hasShortForwardBranchOpt())
7126 return SDValue();
7128 unsigned SelOpNo = 0;
7129 SDValue Sel = BO->getOperand(0);
7130 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7131 SelOpNo = 1;
7132 Sel = BO->getOperand(1);
7135 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7136 return SDValue();
7138 unsigned ConstSelOpNo = 1;
7139 unsigned OtherSelOpNo = 2;
7140 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7141 ConstSelOpNo = 2;
7142 OtherSelOpNo = 1;
7144 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7145 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7146 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7147 return SDValue();
7149 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7150 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7151 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7152 return SDValue();
7154 SDLoc DL(Sel);
7155 EVT VT = BO->getValueType(0);
7157 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7158 if (SelOpNo == 1)
7159 std::swap(NewConstOps[0], NewConstOps[1]);
7161 SDValue NewConstOp =
7162 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7163 if (!NewConstOp)
7164 return SDValue();
7166 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7167 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7168 return SDValue();
7170 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7171 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7172 if (SelOpNo == 1)
7173 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7174 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7176 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7177 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7178 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7181 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7182 SDValue CondV = Op.getOperand(0);
7183 SDValue TrueV = Op.getOperand(1);
7184 SDValue FalseV = Op.getOperand(2);
7185 SDLoc DL(Op);
7186 MVT VT = Op.getSimpleValueType();
7187 MVT XLenVT = Subtarget.getXLenVT();
7189 // Lower vector SELECTs to VSELECTs by splatting the condition.
7190 if (VT.isVector()) {
7191 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7192 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7193 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7196 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7197 // nodes to implement the SELECT. Performing the lowering here allows for
7198 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7199 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7200 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7201 VT.isScalarInteger()) {
7202 // (select c, t, 0) -> (czero_eqz t, c)
7203 if (isNullConstant(FalseV))
7204 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7205 // (select c, 0, f) -> (czero_nez f, c)
7206 if (isNullConstant(TrueV))
7207 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7209 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7210 if (TrueV.getOpcode() == ISD::AND &&
7211 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7212 return DAG.getNode(
7213 ISD::OR, DL, VT, TrueV,
7214 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7215 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7216 if (FalseV.getOpcode() == ISD::AND &&
7217 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7218 return DAG.getNode(
7219 ISD::OR, DL, VT, FalseV,
7220 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7222 // Try some other optimizations before falling back to generic lowering.
7223 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7224 return V;
7226 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7227 // Unless we have the short forward branch optimization.
7228 if (!Subtarget.hasConditionalMoveFusion())
7229 return DAG.getNode(
7230 ISD::OR, DL, VT,
7231 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7232 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7235 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7236 return V;
7238 if (Op.hasOneUse()) {
7239 unsigned UseOpc = Op->use_begin()->getOpcode();
7240 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7241 SDNode *BinOp = *Op->use_begin();
7242 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7243 DAG, Subtarget)) {
7244 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7245 return lowerSELECT(NewSel, DAG);
7250 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7251 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7252 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7253 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7254 if (FPTV && FPFV) {
7255 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7256 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7257 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7258 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7259 DAG.getConstant(1, DL, XLenVT));
7260 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7264 // If the condition is not an integer SETCC which operates on XLenVT, we need
7265 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7266 // (select condv, truev, falsev)
7267 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7268 if (CondV.getOpcode() != ISD::SETCC ||
7269 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7270 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7271 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7273 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7275 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7278 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7279 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7280 // advantage of the integer compare+branch instructions. i.e.:
7281 // (select (setcc lhs, rhs, cc), truev, falsev)
7282 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7283 SDValue LHS = CondV.getOperand(0);
7284 SDValue RHS = CondV.getOperand(1);
7285 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7287 // Special case for a select of 2 constants that have a diffence of 1.
7288 // Normally this is done by DAGCombine, but if the select is introduced by
7289 // type legalization or op legalization, we miss it. Restricting to SETLT
7290 // case for now because that is what signed saturating add/sub need.
7291 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7292 // but we would probably want to swap the true/false values if the condition
7293 // is SETGE/SETLE to avoid an XORI.
7294 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7295 CCVal == ISD::SETLT) {
7296 const APInt &TrueVal = TrueV->getAsAPIntVal();
7297 const APInt &FalseVal = FalseV->getAsAPIntVal();
7298 if (TrueVal - 1 == FalseVal)
7299 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7300 if (TrueVal + 1 == FalseVal)
7301 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7304 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7305 // 1 < x ? x : 1 -> 0 < x ? x : 1
7306 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7307 RHS == TrueV && LHS == FalseV) {
7308 LHS = DAG.getConstant(0, DL, VT);
7309 // 0 <u x is the same as x != 0.
7310 if (CCVal == ISD::SETULT) {
7311 std::swap(LHS, RHS);
7312 CCVal = ISD::SETNE;
7316 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7317 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7318 RHS == FalseV) {
7319 RHS = DAG.getConstant(0, DL, VT);
7322 SDValue TargetCC = DAG.getCondCode(CCVal);
7324 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7325 // (select (setcc lhs, rhs, CC), constant, falsev)
7326 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7327 std::swap(TrueV, FalseV);
7328 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7331 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7332 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7335 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7336 SDValue CondV = Op.getOperand(1);
7337 SDLoc DL(Op);
7338 MVT XLenVT = Subtarget.getXLenVT();
7340 if (CondV.getOpcode() == ISD::SETCC &&
7341 CondV.getOperand(0).getValueType() == XLenVT) {
7342 SDValue LHS = CondV.getOperand(0);
7343 SDValue RHS = CondV.getOperand(1);
7344 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7346 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7348 SDValue TargetCC = DAG.getCondCode(CCVal);
7349 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7350 LHS, RHS, TargetCC, Op.getOperand(2));
7353 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7354 CondV, DAG.getConstant(0, DL, XLenVT),
7355 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7358 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7359 MachineFunction &MF = DAG.getMachineFunction();
7360 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
7362 SDLoc DL(Op);
7363 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7364 getPointerTy(MF.getDataLayout()));
7366 // vastart just stores the address of the VarArgsFrameIndex slot into the
7367 // memory location argument.
7368 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7369 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7370 MachinePointerInfo(SV));
7373 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7374 SelectionDAG &DAG) const {
7375 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7376 MachineFunction &MF = DAG.getMachineFunction();
7377 MachineFrameInfo &MFI = MF.getFrameInfo();
7378 MFI.setFrameAddressIsTaken(true);
7379 Register FrameReg = RI.getFrameRegister(MF);
7380 int XLenInBytes = Subtarget.getXLen() / 8;
7382 EVT VT = Op.getValueType();
7383 SDLoc DL(Op);
7384 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7385 unsigned Depth = Op.getConstantOperandVal(0);
7386 while (Depth--) {
7387 int Offset = -(XLenInBytes * 2);
7388 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7389 DAG.getIntPtrConstant(Offset, DL));
7390 FrameAddr =
7391 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7393 return FrameAddr;
7396 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7397 SelectionDAG &DAG) const {
7398 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7399 MachineFunction &MF = DAG.getMachineFunction();
7400 MachineFrameInfo &MFI = MF.getFrameInfo();
7401 MFI.setReturnAddressIsTaken(true);
7402 MVT XLenVT = Subtarget.getXLenVT();
7403 int XLenInBytes = Subtarget.getXLen() / 8;
7405 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
7406 return SDValue();
7408 EVT VT = Op.getValueType();
7409 SDLoc DL(Op);
7410 unsigned Depth = Op.getConstantOperandVal(0);
7411 if (Depth) {
7412 int Off = -XLenInBytes;
7413 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7414 SDValue Offset = DAG.getConstant(Off, DL, VT);
7415 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7416 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7417 MachinePointerInfo());
7420 // Return the value of the return address register, marking it an implicit
7421 // live-in.
7422 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7423 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7426 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7427 SelectionDAG &DAG) const {
7428 SDLoc DL(Op);
7429 SDValue Lo = Op.getOperand(0);
7430 SDValue Hi = Op.getOperand(1);
7431 SDValue Shamt = Op.getOperand(2);
7432 EVT VT = Lo.getValueType();
7434 // if Shamt-XLEN < 0: // Shamt < XLEN
7435 // Lo = Lo << Shamt
7436 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7437 // else:
7438 // Lo = 0
7439 // Hi = Lo << (Shamt-XLEN)
7441 SDValue Zero = DAG.getConstant(0, DL, VT);
7442 SDValue One = DAG.getConstant(1, DL, VT);
7443 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7444 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7445 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7446 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7448 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7449 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7450 SDValue ShiftRightLo =
7451 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7452 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7453 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7454 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7456 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7458 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7459 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7461 SDValue Parts[2] = {Lo, Hi};
7462 return DAG.getMergeValues(Parts, DL);
7465 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7466 bool IsSRA) const {
7467 SDLoc DL(Op);
7468 SDValue Lo = Op.getOperand(0);
7469 SDValue Hi = Op.getOperand(1);
7470 SDValue Shamt = Op.getOperand(2);
7471 EVT VT = Lo.getValueType();
7473 // SRA expansion:
7474 // if Shamt-XLEN < 0: // Shamt < XLEN
7475 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7476 // Hi = Hi >>s Shamt
7477 // else:
7478 // Lo = Hi >>s (Shamt-XLEN);
7479 // Hi = Hi >>s (XLEN-1)
7481 // SRL expansion:
7482 // if Shamt-XLEN < 0: // Shamt < XLEN
7483 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7484 // Hi = Hi >>u Shamt
7485 // else:
7486 // Lo = Hi >>u (Shamt-XLEN);
7487 // Hi = 0;
7489 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7491 SDValue Zero = DAG.getConstant(0, DL, VT);
7492 SDValue One = DAG.getConstant(1, DL, VT);
7493 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7494 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7495 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7496 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7498 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7499 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7500 SDValue ShiftLeftHi =
7501 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7502 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7503 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7504 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7505 SDValue HiFalse =
7506 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7508 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7510 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7511 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7513 SDValue Parts[2] = {Lo, Hi};
7514 return DAG.getMergeValues(Parts, DL);
7517 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
7518 // legal equivalently-sized i8 type, so we can use that as a go-between.
7519 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7520 SelectionDAG &DAG) const {
7521 SDLoc DL(Op);
7522 MVT VT = Op.getSimpleValueType();
7523 SDValue SplatVal = Op.getOperand(0);
7524 // All-zeros or all-ones splats are handled specially.
7525 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7526 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7527 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7529 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7530 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7531 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7533 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7534 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7535 DAG.getConstant(1, DL, SplatVal.getValueType()));
7536 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7537 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7538 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7541 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7542 // illegal (currently only vXi64 RV32).
7543 // FIXME: We could also catch non-constant sign-extended i32 values and lower
7544 // them to VMV_V_X_VL.
7545 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7546 SelectionDAG &DAG) const {
7547 SDLoc DL(Op);
7548 MVT VecVT = Op.getSimpleValueType();
7549 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7550 "Unexpected SPLAT_VECTOR_PARTS lowering");
7552 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7553 SDValue Lo = Op.getOperand(0);
7554 SDValue Hi = Op.getOperand(1);
7556 MVT ContainerVT = VecVT;
7557 if (VecVT.isFixedLengthVector())
7558 ContainerVT = getContainerForFixedLengthVector(VecVT);
7560 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7562 SDValue Res =
7563 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7565 if (VecVT.isFixedLengthVector())
7566 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7568 return Res;
7571 // Custom-lower extensions from mask vectors by using a vselect either with 1
7572 // for zero/any-extension or -1 for sign-extension:
7573 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7574 // Note that any-extension is lowered identically to zero-extension.
7575 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7576 int64_t ExtTrueVal) const {
7577 SDLoc DL(Op);
7578 MVT VecVT = Op.getSimpleValueType();
7579 SDValue Src = Op.getOperand(0);
7580 // Only custom-lower extensions from mask types
7581 assert(Src.getValueType().isVector() &&
7582 Src.getValueType().getVectorElementType() == MVT::i1);
7584 if (VecVT.isScalableVector()) {
7585 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7586 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7587 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7590 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7591 MVT I1ContainerVT =
7592 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7594 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7596 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7598 MVT XLenVT = Subtarget.getXLenVT();
7599 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7600 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7602 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7603 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7604 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7605 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7606 SDValue Select =
7607 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7608 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7610 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7613 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7614 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7615 MVT ExtVT = Op.getSimpleValueType();
7616 // Only custom-lower extensions from fixed-length vector types.
7617 if (!ExtVT.isFixedLengthVector())
7618 return Op;
7619 MVT VT = Op.getOperand(0).getSimpleValueType();
7620 // Grab the canonical container type for the extended type. Infer the smaller
7621 // type from that to ensure the same number of vector elements, as we know
7622 // the LMUL will be sufficient to hold the smaller type.
7623 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7624 // Get the extended container type manually to ensure the same number of
7625 // vector elements between source and dest.
7626 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7627 ContainerExtVT.getVectorElementCount());
7629 SDValue Op1 =
7630 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7632 SDLoc DL(Op);
7633 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7635 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7637 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7640 // Custom-lower truncations from vectors to mask vectors by using a mask and a
7641 // setcc operation:
7642 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7643 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7644 SelectionDAG &DAG) const {
7645 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7646 SDLoc DL(Op);
7647 EVT MaskVT = Op.getValueType();
7648 // Only expect to custom-lower truncations to mask types
7649 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7650 "Unexpected type for vector mask lowering");
7651 SDValue Src = Op.getOperand(0);
7652 MVT VecVT = Src.getSimpleValueType();
7653 SDValue Mask, VL;
7654 if (IsVPTrunc) {
7655 Mask = Op.getOperand(1);
7656 VL = Op.getOperand(2);
7658 // If this is a fixed vector, we need to convert it to a scalable vector.
7659 MVT ContainerVT = VecVT;
7661 if (VecVT.isFixedLengthVector()) {
7662 ContainerVT = getContainerForFixedLengthVector(VecVT);
7663 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7664 if (IsVPTrunc) {
7665 MVT MaskContainerVT =
7666 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7667 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7671 if (!IsVPTrunc) {
7672 std::tie(Mask, VL) =
7673 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7676 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7677 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7679 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7680 DAG.getUNDEF(ContainerVT), SplatOne, VL);
7681 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7682 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7684 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7685 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7686 DAG.getUNDEF(ContainerVT), Mask, VL);
7687 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7688 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7689 DAG.getUNDEF(MaskContainerVT), Mask, VL});
7690 if (MaskVT.isFixedLengthVector())
7691 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7692 return Trunc;
7695 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7696 SelectionDAG &DAG) const {
7697 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7698 SDLoc DL(Op);
7700 MVT VT = Op.getSimpleValueType();
7701 // Only custom-lower vector truncates
7702 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7704 // Truncates to mask types are handled differently
7705 if (VT.getVectorElementType() == MVT::i1)
7706 return lowerVectorMaskTruncLike(Op, DAG);
7708 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7709 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7710 // truncate by one power of two at a time.
7711 MVT DstEltVT = VT.getVectorElementType();
7713 SDValue Src = Op.getOperand(0);
7714 MVT SrcVT = Src.getSimpleValueType();
7715 MVT SrcEltVT = SrcVT.getVectorElementType();
7717 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7718 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7719 "Unexpected vector truncate lowering");
7721 MVT ContainerVT = SrcVT;
7722 SDValue Mask, VL;
7723 if (IsVPTrunc) {
7724 Mask = Op.getOperand(1);
7725 VL = Op.getOperand(2);
7727 if (SrcVT.isFixedLengthVector()) {
7728 ContainerVT = getContainerForFixedLengthVector(SrcVT);
7729 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7730 if (IsVPTrunc) {
7731 MVT MaskVT = getMaskTypeFor(ContainerVT);
7732 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7736 SDValue Result = Src;
7737 if (!IsVPTrunc) {
7738 std::tie(Mask, VL) =
7739 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7742 LLVMContext &Context = *DAG.getContext();
7743 const ElementCount Count = ContainerVT.getVectorElementCount();
7744 do {
7745 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
7746 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
7747 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
7748 Mask, VL);
7749 } while (SrcEltVT != DstEltVT);
7751 if (SrcVT.isFixedLengthVector())
7752 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
7754 return Result;
7757 SDValue
7758 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
7759 SelectionDAG &DAG) const {
7760 SDLoc DL(Op);
7761 SDValue Chain = Op.getOperand(0);
7762 SDValue Src = Op.getOperand(1);
7763 MVT VT = Op.getSimpleValueType();
7764 MVT SrcVT = Src.getSimpleValueType();
7765 MVT ContainerVT = VT;
7766 if (VT.isFixedLengthVector()) {
7767 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7768 ContainerVT =
7769 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
7770 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7773 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7775 // RVV can only widen/truncate fp to types double/half the size as the source.
7776 if ((VT.getVectorElementType() == MVT::f64 &&
7777 SrcVT.getVectorElementType() == MVT::f16) ||
7778 (VT.getVectorElementType() == MVT::f16 &&
7779 SrcVT.getVectorElementType() == MVT::f64)) {
7780 // For double rounding, the intermediate rounding should be round-to-odd.
7781 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
7782 ? RISCVISD::STRICT_FP_EXTEND_VL
7783 : RISCVISD::STRICT_VFNCVT_ROD_VL;
7784 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
7785 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
7786 Chain, Src, Mask, VL);
7787 Chain = Src.getValue(1);
7790 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
7791 ? RISCVISD::STRICT_FP_EXTEND_VL
7792 : RISCVISD::STRICT_FP_ROUND_VL;
7793 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7794 Chain, Src, Mask, VL);
7795 if (VT.isFixedLengthVector()) {
7796 // StrictFP operations have two result values. Their lowered result should
7797 // have same result count.
7798 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
7799 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
7801 return Res;
7804 SDValue
7805 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
7806 SelectionDAG &DAG) const {
7807 bool IsVP =
7808 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
7809 bool IsExtend =
7810 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
7811 // RVV can only do truncate fp to types half the size as the source. We
7812 // custom-lower f64->f16 rounds via RVV's round-to-odd float
7813 // conversion instruction.
7814 SDLoc DL(Op);
7815 MVT VT = Op.getSimpleValueType();
7817 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7819 SDValue Src = Op.getOperand(0);
7820 MVT SrcVT = Src.getSimpleValueType();
7822 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
7823 SrcVT.getVectorElementType() != MVT::f16);
7824 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
7825 SrcVT.getVectorElementType() != MVT::f64);
7827 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
7829 // Prepare any fixed-length vector operands.
7830 MVT ContainerVT = VT;
7831 SDValue Mask, VL;
7832 if (IsVP) {
7833 Mask = Op.getOperand(1);
7834 VL = Op.getOperand(2);
7836 if (VT.isFixedLengthVector()) {
7837 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7838 ContainerVT =
7839 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
7840 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7841 if (IsVP) {
7842 MVT MaskVT = getMaskTypeFor(ContainerVT);
7843 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7847 if (!IsVP)
7848 std::tie(Mask, VL) =
7849 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7851 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
7853 if (IsDirectConv) {
7854 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
7855 if (VT.isFixedLengthVector())
7856 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
7857 return Src;
7860 unsigned InterConvOpc =
7861 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
7863 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
7864 SDValue IntermediateConv =
7865 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
7866 SDValue Result =
7867 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
7868 if (VT.isFixedLengthVector())
7869 return convertFromScalableVector(VT, Result, DAG, Subtarget);
7870 return Result;
7873 // Given a scalable vector type and an index into it, returns the type for the
7874 // smallest subvector that the index fits in. This can be used to reduce LMUL
7875 // for operations like vslidedown.
7877 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
7878 static std::optional<MVT>
7879 getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
7880 const RISCVSubtarget &Subtarget) {
7881 assert(VecVT.isScalableVector());
7882 const unsigned EltSize = VecVT.getScalarSizeInBits();
7883 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
7884 const unsigned MinVLMAX = VectorBitsMin / EltSize;
7885 MVT SmallerVT;
7886 if (MaxIdx < MinVLMAX)
7887 SmallerVT = getLMUL1VT(VecVT);
7888 else if (MaxIdx < MinVLMAX * 2)
7889 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
7890 else if (MaxIdx < MinVLMAX * 4)
7891 SmallerVT = getLMUL1VT(VecVT)
7892 .getDoubleNumVectorElementsVT()
7893 .getDoubleNumVectorElementsVT();
7894 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
7895 return std::nullopt;
7896 return SmallerVT;
7899 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
7900 // first position of a vector, and that vector is slid up to the insert index.
7901 // By limiting the active vector length to index+1 and merging with the
7902 // original vector (with an undisturbed tail policy for elements >= VL), we
7903 // achieve the desired result of leaving all elements untouched except the one
7904 // at VL-1, which is replaced with the desired value.
7905 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
7906 SelectionDAG &DAG) const {
7907 SDLoc DL(Op);
7908 MVT VecVT = Op.getSimpleValueType();
7909 SDValue Vec = Op.getOperand(0);
7910 SDValue Val = Op.getOperand(1);
7911 SDValue Idx = Op.getOperand(2);
7913 if (VecVT.getVectorElementType() == MVT::i1) {
7914 // FIXME: For now we just promote to an i8 vector and insert into that,
7915 // but this is probably not optimal.
7916 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
7917 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
7918 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
7919 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
7922 MVT ContainerVT = VecVT;
7923 // If the operand is a fixed-length vector, convert to a scalable one.
7924 if (VecVT.isFixedLengthVector()) {
7925 ContainerVT = getContainerForFixedLengthVector(VecVT);
7926 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7929 // If we know the index we're going to insert at, we can shrink Vec so that
7930 // we're performing the scalar inserts and slideup on a smaller LMUL.
7931 MVT OrigContainerVT = ContainerVT;
7932 SDValue OrigVec = Vec;
7933 SDValue AlignedIdx;
7934 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
7935 const unsigned OrigIdx = IdxC->getZExtValue();
7936 // Do we know an upper bound on LMUL?
7937 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
7938 DL, DAG, Subtarget)) {
7939 ContainerVT = *ShrunkVT;
7940 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
7943 // If we're compiling for an exact VLEN value, we can always perform
7944 // the insert in m1 as we can determine the register corresponding to
7945 // the index in the register group.
7946 const unsigned MinVLen = Subtarget.getRealMinVLen();
7947 const unsigned MaxVLen = Subtarget.getRealMaxVLen();
7948 const MVT M1VT = getLMUL1VT(ContainerVT);
7949 if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) {
7950 EVT ElemVT = VecVT.getVectorElementType();
7951 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
7952 unsigned RemIdx = OrigIdx % ElemsPerVReg;
7953 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
7954 unsigned ExtractIdx =
7955 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
7956 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
7957 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
7958 ContainerVT = M1VT;
7961 if (AlignedIdx)
7962 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
7963 AlignedIdx);
7966 MVT XLenVT = Subtarget.getXLenVT();
7968 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
7969 // Even i64-element vectors on RV32 can be lowered without scalar
7970 // legalization if the most-significant 32 bits of the value are not affected
7971 // by the sign-extension of the lower 32 bits.
7972 // TODO: We could also catch sign extensions of a 32-bit value.
7973 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
7974 const auto *CVal = cast<ConstantSDNode>(Val);
7975 if (isInt<32>(CVal->getSExtValue())) {
7976 IsLegalInsert = true;
7977 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
7981 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7983 SDValue ValInVec;
7985 if (IsLegalInsert) {
7986 unsigned Opc =
7987 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
7988 if (isNullConstant(Idx)) {
7989 if (!VecVT.isFloatingPoint())
7990 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
7991 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
7993 if (AlignedIdx)
7994 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7995 Vec, AlignedIdx);
7996 if (!VecVT.isFixedLengthVector())
7997 return Vec;
7998 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8000 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8001 } else {
8002 // On RV32, i64-element vectors must be specially handled to place the
8003 // value at element 0, by using two vslide1down instructions in sequence on
8004 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8005 // this.
8006 SDValue ValLo, ValHi;
8007 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8008 MVT I32ContainerVT =
8009 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8010 SDValue I32Mask =
8011 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8012 // Limit the active VL to two.
8013 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8014 // If the Idx is 0 we can insert directly into the vector.
8015 if (isNullConstant(Idx)) {
8016 // First slide in the lo value, then the hi in above it. We use slide1down
8017 // to avoid the register group overlap constraint of vslide1up.
8018 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8019 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8020 // If the source vector is undef don't pass along the tail elements from
8021 // the previous slide1down.
8022 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8023 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8024 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8025 // Bitcast back to the right container type.
8026 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8028 if (AlignedIdx)
8029 ValInVec =
8030 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8031 ValInVec, AlignedIdx);
8032 if (!VecVT.isFixedLengthVector())
8033 return ValInVec;
8034 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8037 // First slide in the lo value, then the hi in above it. We use slide1down
8038 // to avoid the register group overlap constraint of vslide1up.
8039 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8040 DAG.getUNDEF(I32ContainerVT),
8041 DAG.getUNDEF(I32ContainerVT), ValLo,
8042 I32Mask, InsertI64VL);
8043 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8044 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8045 I32Mask, InsertI64VL);
8046 // Bitcast back to the right container type.
8047 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8050 // Now that the value is in a vector, slide it into position.
8051 SDValue InsertVL =
8052 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8054 // Use tail agnostic policy if Idx is the last index of Vec.
8055 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
8056 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8057 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8058 Policy = RISCVII::TAIL_AGNOSTIC;
8059 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8060 Idx, Mask, InsertVL, Policy);
8062 if (AlignedIdx)
8063 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8064 Slideup, AlignedIdx);
8065 if (!VecVT.isFixedLengthVector())
8066 return Slideup;
8067 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8070 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8071 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8072 // types this is done using VMV_X_S to allow us to glean information about the
8073 // sign bits of the result.
8074 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8075 SelectionDAG &DAG) const {
8076 SDLoc DL(Op);
8077 SDValue Idx = Op.getOperand(1);
8078 SDValue Vec = Op.getOperand(0);
8079 EVT EltVT = Op.getValueType();
8080 MVT VecVT = Vec.getSimpleValueType();
8081 MVT XLenVT = Subtarget.getXLenVT();
8083 if (VecVT.getVectorElementType() == MVT::i1) {
8084 // Use vfirst.m to extract the first bit.
8085 if (isNullConstant(Idx)) {
8086 MVT ContainerVT = VecVT;
8087 if (VecVT.isFixedLengthVector()) {
8088 ContainerVT = getContainerForFixedLengthVector(VecVT);
8089 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8091 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8092 SDValue Vfirst =
8093 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8094 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8095 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8096 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8098 if (VecVT.isFixedLengthVector()) {
8099 unsigned NumElts = VecVT.getVectorNumElements();
8100 if (NumElts >= 8) {
8101 MVT WideEltVT;
8102 unsigned WidenVecLen;
8103 SDValue ExtractElementIdx;
8104 SDValue ExtractBitIdx;
8105 unsigned MaxEEW = Subtarget.getELen();
8106 MVT LargestEltVT = MVT::getIntegerVT(
8107 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8108 if (NumElts <= LargestEltVT.getSizeInBits()) {
8109 assert(isPowerOf2_32(NumElts) &&
8110 "the number of elements should be power of 2");
8111 WideEltVT = MVT::getIntegerVT(NumElts);
8112 WidenVecLen = 1;
8113 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8114 ExtractBitIdx = Idx;
8115 } else {
8116 WideEltVT = LargestEltVT;
8117 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8118 // extract element index = index / element width
8119 ExtractElementIdx = DAG.getNode(
8120 ISD::SRL, DL, XLenVT, Idx,
8121 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8122 // mask bit index = index % element width
8123 ExtractBitIdx = DAG.getNode(
8124 ISD::AND, DL, XLenVT, Idx,
8125 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8127 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8128 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8129 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8130 Vec, ExtractElementIdx);
8131 // Extract the bit from GPR.
8132 SDValue ShiftRight =
8133 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8134 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8135 DAG.getConstant(1, DL, XLenVT));
8136 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8139 // Otherwise, promote to an i8 vector and extract from that.
8140 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8141 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8142 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8145 // If this is a fixed vector, we need to convert it to a scalable vector.
8146 MVT ContainerVT = VecVT;
8147 if (VecVT.isFixedLengthVector()) {
8148 ContainerVT = getContainerForFixedLengthVector(VecVT);
8149 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8152 // If we're compiling for an exact VLEN value and we have a known
8153 // constant index, we can always perform the extract in m1 (or
8154 // smaller) as we can determine the register corresponding to
8155 // the index in the register group.
8156 const unsigned MinVLen = Subtarget.getRealMinVLen();
8157 const unsigned MaxVLen = Subtarget.getRealMaxVLen();
8158 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8159 IdxC && MinVLen == MaxVLen &&
8160 VecVT.getSizeInBits().getKnownMinValue() > MinVLen) {
8161 MVT M1VT = getLMUL1VT(ContainerVT);
8162 unsigned OrigIdx = IdxC->getZExtValue();
8163 EVT ElemVT = VecVT.getVectorElementType();
8164 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
8165 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8166 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8167 unsigned ExtractIdx =
8168 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8169 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8170 DAG.getVectorIdxConstant(ExtractIdx, DL));
8171 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8172 ContainerVT = M1VT;
8175 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8176 // contains our index.
8177 std::optional<uint64_t> MaxIdx;
8178 if (VecVT.isFixedLengthVector())
8179 MaxIdx = VecVT.getVectorNumElements() - 1;
8180 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8181 MaxIdx = IdxC->getZExtValue();
8182 if (MaxIdx) {
8183 if (auto SmallerVT =
8184 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8185 ContainerVT = *SmallerVT;
8186 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8187 DAG.getConstant(0, DL, XLenVT));
8191 // If after narrowing, the required slide is still greater than LMUL2,
8192 // fallback to generic expansion and go through the stack. This is done
8193 // for a subtle reason: extracting *all* elements out of a vector is
8194 // widely expected to be linear in vector size, but because vslidedown
8195 // is linear in LMUL, performing N extracts using vslidedown becomes
8196 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8197 // seems to have the same problem (the store is linear in LMUL), but the
8198 // generic expansion *memoizes* the store, and thus for many extracts of
8199 // the same vector we end up with one store and a bunch of loads.
8200 // TODO: We don't have the same code for insert_vector_elt because we
8201 // have BUILD_VECTOR and handle the degenerate case there. Should we
8202 // consider adding an inverse BUILD_VECTOR node?
8203 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8204 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8205 return SDValue();
8207 // If the index is 0, the vector is already in the right position.
8208 if (!isNullConstant(Idx)) {
8209 // Use a VL of 1 to avoid processing more elements than we need.
8210 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8211 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8212 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8215 if (!EltVT.isInteger()) {
8216 // Floating-point extracts are handled in TableGen.
8217 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8218 DAG.getConstant(0, DL, XLenVT));
8221 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8222 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8225 // Some RVV intrinsics may claim that they want an integer operand to be
8226 // promoted or expanded.
8227 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
8228 const RISCVSubtarget &Subtarget) {
8229 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8230 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8231 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8232 "Unexpected opcode");
8234 if (!Subtarget.hasVInstructions())
8235 return SDValue();
8237 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8238 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8239 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8241 SDLoc DL(Op);
8243 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
8244 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8245 if (!II || !II->hasScalarOperand())
8246 return SDValue();
8248 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8249 assert(SplatOp < Op.getNumOperands());
8251 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8252 SDValue &ScalarOp = Operands[SplatOp];
8253 MVT OpVT = ScalarOp.getSimpleValueType();
8254 MVT XLenVT = Subtarget.getXLenVT();
8256 // If this isn't a scalar, or its type is XLenVT we're done.
8257 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8258 return SDValue();
8260 // Simplest case is that the operand needs to be promoted to XLenVT.
8261 if (OpVT.bitsLT(XLenVT)) {
8262 // If the operand is a constant, sign extend to increase our chances
8263 // of being able to use a .vi instruction. ANY_EXTEND would become a
8264 // a zero extend and the simm5 check in isel would fail.
8265 // FIXME: Should we ignore the upper bits in isel instead?
8266 unsigned ExtOpc =
8267 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8268 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8269 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8272 // Use the previous operand to get the vXi64 VT. The result might be a mask
8273 // VT for compares. Using the previous operand assumes that the previous
8274 // operand will never have a smaller element size than a scalar operand and
8275 // that a widening operation never uses SEW=64.
8276 // NOTE: If this fails the below assert, we can probably just find the
8277 // element count from any operand or result and use it to construct the VT.
8278 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8279 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8281 // The more complex case is when the scalar is larger than XLenVT.
8282 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8283 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8285 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8286 // instruction to sign-extend since SEW>XLEN.
8287 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8288 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8289 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8292 switch (IntNo) {
8293 case Intrinsic::riscv_vslide1up:
8294 case Intrinsic::riscv_vslide1down:
8295 case Intrinsic::riscv_vslide1up_mask:
8296 case Intrinsic::riscv_vslide1down_mask: {
8297 // We need to special case these when the scalar is larger than XLen.
8298 unsigned NumOps = Op.getNumOperands();
8299 bool IsMasked = NumOps == 7;
8301 // Convert the vector source to the equivalent nxvXi32 vector.
8302 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8303 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8304 SDValue ScalarLo, ScalarHi;
8305 std::tie(ScalarLo, ScalarHi) =
8306 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8308 // Double the VL since we halved SEW.
8309 SDValue AVL = getVLOperand(Op);
8310 SDValue I32VL;
8312 // Optimize for constant AVL
8313 if (isa<ConstantSDNode>(AVL)) {
8314 const auto [MinVLMAX, MaxVLMAX] =
8315 RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);
8317 uint64_t AVLInt = AVL->getAsZExtVal();
8318 if (AVLInt <= MinVLMAX) {
8319 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8320 } else if (AVLInt >= 2 * MaxVLMAX) {
8321 // Just set vl to VLMAX in this situation
8322 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
8323 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8324 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8325 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8326 SDValue SETVLMAX = DAG.getTargetConstant(
8327 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8328 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8329 LMUL);
8330 } else {
8331 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8332 // is related to the hardware implementation.
8333 // So let the following code handle
8336 if (!I32VL) {
8337 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
8338 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8339 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8340 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8341 SDValue SETVL =
8342 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8343 // Using vsetvli instruction to get actually used length which related to
8344 // the hardware implementation
8345 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8346 SEW, LMUL);
8347 I32VL =
8348 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8351 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8353 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8354 // instructions.
8355 SDValue Passthru;
8356 if (IsMasked)
8357 Passthru = DAG.getUNDEF(I32VT);
8358 else
8359 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8361 if (IntNo == Intrinsic::riscv_vslide1up ||
8362 IntNo == Intrinsic::riscv_vslide1up_mask) {
8363 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8364 ScalarHi, I32Mask, I32VL);
8365 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8366 ScalarLo, I32Mask, I32VL);
8367 } else {
8368 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8369 ScalarLo, I32Mask, I32VL);
8370 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8371 ScalarHi, I32Mask, I32VL);
8374 // Convert back to nxvXi64.
8375 Vec = DAG.getBitcast(VT, Vec);
8377 if (!IsMasked)
8378 return Vec;
8379 // Apply mask after the operation.
8380 SDValue Mask = Operands[NumOps - 3];
8381 SDValue MaskedOff = Operands[1];
8382 // Assume Policy operand is the last operand.
8383 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8384 // We don't need to select maskedoff if it's undef.
8385 if (MaskedOff.isUndef())
8386 return Vec;
8387 // TAMU
8388 if (Policy == RISCVII::TAIL_AGNOSTIC)
8389 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8390 DAG.getUNDEF(VT), AVL);
8391 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8392 // It's fine because vmerge does not care mask policy.
8393 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8394 MaskedOff, AVL);
8398 // We need to convert the scalar to a splat vector.
8399 SDValue VL = getVLOperand(Op);
8400 assert(VL.getValueType() == XLenVT);
8401 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8402 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8405 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8406 // scalable vector llvm.get.vector.length for now.
8408 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
8409 // (vscale * VF). The vscale and VF are independent of element width. We use
8410 // SEW=8 for the vsetvli because it is the only element width that supports all
8411 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8412 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8413 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8414 // SEW and LMUL are better for the surrounding vector instructions.
8415 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
8416 const RISCVSubtarget &Subtarget) {
8417 MVT XLenVT = Subtarget.getXLenVT();
8419 // The smallest LMUL is only valid for the smallest element width.
8420 const unsigned ElementWidth = 8;
8422 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8423 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8424 // We don't support VF==1 with ELEN==32.
8425 unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen();
8427 unsigned VF = N->getConstantOperandVal(2);
8428 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8429 "Unexpected VF");
8430 (void)MinVF;
8432 bool Fractional = VF < LMul1VF;
8433 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8434 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8435 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8437 SDLoc DL(N);
8439 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8440 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8442 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8444 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8445 SDValue Res =
8446 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8447 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8450 static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG,
8451 SmallVector<SDValue> &Ops) {
8452 SDLoc DL(Op);
8454 const RISCVSubtarget &Subtarget =
8455 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
8456 for (const SDValue &V : Op->op_values()) {
8457 EVT ValType = V.getValueType();
8458 if (ValType.isScalableVector() && ValType.isFloatingPoint()) {
8459 MVT InterimIVT =
8460 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8461 ValType.getVectorElementCount());
8462 Ops.push_back(DAG.getBitcast(InterimIVT, V));
8463 } else if (ValType.isFixedLengthVector()) {
8464 MVT OpContainerVT = getContainerForFixedLengthVector(
8465 DAG, V.getSimpleValueType(), Subtarget);
8466 Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
8467 } else
8468 Ops.push_back(V);
8472 // LMUL * VLEN should be greater than or equal to EGS * SEW
8473 static inline bool isValidEGW(int EGS, EVT VT,
8474 const RISCVSubtarget &Subtarget) {
8475 return (Subtarget.getRealMinVLen() *
8476 VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
8477 EGS * VT.getScalarSizeInBits();
8480 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8481 SelectionDAG &DAG) const {
8482 unsigned IntNo = Op.getConstantOperandVal(0);
8483 SDLoc DL(Op);
8484 MVT XLenVT = Subtarget.getXLenVT();
8486 switch (IntNo) {
8487 default:
8488 break; // Don't custom lower most intrinsics.
8489 case Intrinsic::thread_pointer: {
8490 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8491 return DAG.getRegister(RISCV::X4, PtrVT);
8493 case Intrinsic::riscv_orc_b:
8494 case Intrinsic::riscv_brev8:
8495 case Intrinsic::riscv_sha256sig0:
8496 case Intrinsic::riscv_sha256sig1:
8497 case Intrinsic::riscv_sha256sum0:
8498 case Intrinsic::riscv_sha256sum1:
8499 case Intrinsic::riscv_sm3p0:
8500 case Intrinsic::riscv_sm3p1: {
8501 unsigned Opc;
8502 switch (IntNo) {
8503 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8504 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8505 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8506 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8507 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8508 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8509 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8510 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8513 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8514 SDValue NewOp =
8515 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8516 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8517 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8520 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8522 case Intrinsic::riscv_sm4ks:
8523 case Intrinsic::riscv_sm4ed: {
8524 unsigned Opc =
8525 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8527 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8528 SDValue NewOp0 =
8529 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8530 SDValue NewOp1 =
8531 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8532 SDValue Res =
8533 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8534 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8537 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8538 Op.getOperand(3));
8540 case Intrinsic::riscv_zip:
8541 case Intrinsic::riscv_unzip: {
8542 unsigned Opc =
8543 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8544 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8546 case Intrinsic::riscv_clmul:
8547 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8548 SDValue NewOp0 =
8549 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8550 SDValue NewOp1 =
8551 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8552 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8553 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8555 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8556 Op.getOperand(2));
8557 case Intrinsic::riscv_clmulh:
8558 case Intrinsic::riscv_clmulr: {
8559 unsigned Opc =
8560 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8561 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8562 SDValue NewOp0 =
8563 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8564 SDValue NewOp1 =
8565 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8566 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8567 DAG.getConstant(32, DL, MVT::i64));
8568 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8569 DAG.getConstant(32, DL, MVT::i64));
8570 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8571 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8572 DAG.getConstant(32, DL, MVT::i64));
8573 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8576 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8578 case Intrinsic::experimental_get_vector_length:
8579 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8580 case Intrinsic::riscv_vmv_x_s: {
8581 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8582 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8584 case Intrinsic::riscv_vfmv_f_s:
8585 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8586 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
8587 case Intrinsic::riscv_vmv_v_x:
8588 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8589 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8590 Subtarget);
8591 case Intrinsic::riscv_vfmv_v_f:
8592 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8593 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8594 case Intrinsic::riscv_vmv_s_x: {
8595 SDValue Scalar = Op.getOperand(2);
8597 if (Scalar.getValueType().bitsLE(XLenVT)) {
8598 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8599 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8600 Op.getOperand(1), Scalar, Op.getOperand(3));
8603 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8605 // This is an i64 value that lives in two scalar registers. We have to
8606 // insert this in a convoluted way. First we build vXi64 splat containing
8607 // the two values that we assemble using some bit math. Next we'll use
8608 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8609 // to merge element 0 from our splat into the source vector.
8610 // FIXME: This is probably not the best way to do this, but it is
8611 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8612 // point.
8613 // sw lo, (a0)
8614 // sw hi, 4(a0)
8615 // vlse vX, (a0)
8617 // vid.v vVid
8618 // vmseq.vx mMask, vVid, 0
8619 // vmerge.vvm vDest, vSrc, vVal, mMask
8620 MVT VT = Op.getSimpleValueType();
8621 SDValue Vec = Op.getOperand(1);
8622 SDValue VL = getVLOperand(Op);
8624 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8625 if (Op.getOperand(1).isUndef())
8626 return SplattedVal;
8627 SDValue SplattedIdx =
8628 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8629 DAG.getConstant(0, DL, MVT::i32), VL);
8631 MVT MaskVT = getMaskTypeFor(VT);
8632 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
8633 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
8634 SDValue SelectCond =
8635 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
8636 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
8637 DAG.getUNDEF(MaskVT), Mask, VL});
8638 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
8639 Vec, DAG.getUNDEF(VT), VL);
8641 case Intrinsic::riscv_vfmv_s_f:
8642 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
8643 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8644 // EGS * EEW >= 128 bits
8645 case Intrinsic::riscv_vaesdf_vv:
8646 case Intrinsic::riscv_vaesdf_vs:
8647 case Intrinsic::riscv_vaesdm_vv:
8648 case Intrinsic::riscv_vaesdm_vs:
8649 case Intrinsic::riscv_vaesef_vv:
8650 case Intrinsic::riscv_vaesef_vs:
8651 case Intrinsic::riscv_vaesem_vv:
8652 case Intrinsic::riscv_vaesem_vs:
8653 case Intrinsic::riscv_vaeskf1:
8654 case Intrinsic::riscv_vaeskf2:
8655 case Intrinsic::riscv_vaesz_vs:
8656 case Intrinsic::riscv_vsm4k:
8657 case Intrinsic::riscv_vsm4r_vv:
8658 case Intrinsic::riscv_vsm4r_vs: {
8659 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8660 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8661 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8662 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8663 return Op;
8665 // EGS * EEW >= 256 bits
8666 case Intrinsic::riscv_vsm3c:
8667 case Intrinsic::riscv_vsm3me: {
8668 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
8669 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
8670 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
8671 return Op;
8673 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
8674 case Intrinsic::riscv_vsha2ch:
8675 case Intrinsic::riscv_vsha2cl:
8676 case Intrinsic::riscv_vsha2ms: {
8677 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
8678 !Subtarget.hasStdExtZvknhb())
8679 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
8680 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8681 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8682 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8683 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8684 return Op;
8686 case Intrinsic::riscv_sf_vc_v_x:
8687 case Intrinsic::riscv_sf_vc_v_i:
8688 case Intrinsic::riscv_sf_vc_v_xv:
8689 case Intrinsic::riscv_sf_vc_v_iv:
8690 case Intrinsic::riscv_sf_vc_v_vv:
8691 case Intrinsic::riscv_sf_vc_v_fv:
8692 case Intrinsic::riscv_sf_vc_v_xvv:
8693 case Intrinsic::riscv_sf_vc_v_ivv:
8694 case Intrinsic::riscv_sf_vc_v_vvv:
8695 case Intrinsic::riscv_sf_vc_v_fvv:
8696 case Intrinsic::riscv_sf_vc_v_xvw:
8697 case Intrinsic::riscv_sf_vc_v_ivw:
8698 case Intrinsic::riscv_sf_vc_v_vvw:
8699 case Intrinsic::riscv_sf_vc_v_fvw: {
8700 MVT VT = Op.getSimpleValueType();
8702 SmallVector<SDValue> Ops;
8703 getVCIXOperands(Op, DAG, Ops);
8705 MVT RetVT = VT;
8706 if (VT.isFixedLengthVector())
8707 RetVT = getContainerForFixedLengthVector(VT);
8708 else if (VT.isFloatingPoint())
8709 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
8710 VT.getVectorElementCount());
8712 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops);
8714 if (VT.isFixedLengthVector())
8715 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
8716 else if (VT.isFloatingPoint())
8717 NewNode = DAG.getBitcast(VT, NewNode);
8719 if (Op == NewNode)
8720 break;
8722 return NewNode;
8726 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8729 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
8730 SelectionDAG &DAG) const {
8731 unsigned IntNo = Op.getConstantOperandVal(1);
8732 switch (IntNo) {
8733 default:
8734 break;
8735 case Intrinsic::riscv_masked_strided_load: {
8736 SDLoc DL(Op);
8737 MVT XLenVT = Subtarget.getXLenVT();
8739 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8740 // the selection of the masked intrinsics doesn't do this for us.
8741 SDValue Mask = Op.getOperand(5);
8742 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8744 MVT VT = Op->getSimpleValueType(0);
8745 MVT ContainerVT = VT;
8746 if (VT.isFixedLengthVector())
8747 ContainerVT = getContainerForFixedLengthVector(VT);
8749 SDValue PassThru = Op.getOperand(2);
8750 if (!IsUnmasked) {
8751 MVT MaskVT = getMaskTypeFor(ContainerVT);
8752 if (VT.isFixedLengthVector()) {
8753 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8754 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
8758 auto *Load = cast<MemIntrinsicSDNode>(Op);
8759 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8760 SDValue Ptr = Op.getOperand(3);
8761 SDValue Stride = Op.getOperand(4);
8762 SDValue Result, Chain;
8764 // TODO: We restrict this to unmasked loads currently in consideration of
8765 // the complexity of hanlding all falses masks.
8766 if (IsUnmasked && isNullConstant(Stride)) {
8767 MVT ScalarVT = ContainerVT.getVectorElementType();
8768 SDValue ScalarLoad =
8769 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
8770 ScalarVT, Load->getMemOperand());
8771 Chain = ScalarLoad.getValue(1);
8772 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
8773 Subtarget);
8774 } else {
8775 SDValue IntID = DAG.getTargetConstant(
8776 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
8777 XLenVT);
8779 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
8780 if (IsUnmasked)
8781 Ops.push_back(DAG.getUNDEF(ContainerVT));
8782 else
8783 Ops.push_back(PassThru);
8784 Ops.push_back(Ptr);
8785 Ops.push_back(Stride);
8786 if (!IsUnmasked)
8787 Ops.push_back(Mask);
8788 Ops.push_back(VL);
8789 if (!IsUnmasked) {
8790 SDValue Policy =
8791 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
8792 Ops.push_back(Policy);
8795 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8796 Result =
8797 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8798 Load->getMemoryVT(), Load->getMemOperand());
8799 Chain = Result.getValue(1);
8801 if (VT.isFixedLengthVector())
8802 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8803 return DAG.getMergeValues({Result, Chain}, DL);
8805 case Intrinsic::riscv_seg2_load:
8806 case Intrinsic::riscv_seg3_load:
8807 case Intrinsic::riscv_seg4_load:
8808 case Intrinsic::riscv_seg5_load:
8809 case Intrinsic::riscv_seg6_load:
8810 case Intrinsic::riscv_seg7_load:
8811 case Intrinsic::riscv_seg8_load: {
8812 SDLoc DL(Op);
8813 static const Intrinsic::ID VlsegInts[7] = {
8814 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
8815 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
8816 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
8817 Intrinsic::riscv_vlseg8};
8818 unsigned NF = Op->getNumValues() - 1;
8819 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
8820 MVT XLenVT = Subtarget.getXLenVT();
8821 MVT VT = Op->getSimpleValueType(0);
8822 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8824 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
8825 Subtarget);
8826 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
8827 auto *Load = cast<MemIntrinsicSDNode>(Op);
8828 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
8829 ContainerVTs.push_back(MVT::Other);
8830 SDVTList VTs = DAG.getVTList(ContainerVTs);
8831 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
8832 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
8833 Ops.push_back(Op.getOperand(2));
8834 Ops.push_back(VL);
8835 SDValue Result =
8836 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8837 Load->getMemoryVT(), Load->getMemOperand());
8838 SmallVector<SDValue, 9> Results;
8839 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
8840 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
8841 DAG, Subtarget));
8842 Results.push_back(Result.getValue(NF));
8843 return DAG.getMergeValues(Results, DL);
8845 case Intrinsic::riscv_sf_vc_v_x_se:
8846 case Intrinsic::riscv_sf_vc_v_i_se:
8847 case Intrinsic::riscv_sf_vc_v_xv_se:
8848 case Intrinsic::riscv_sf_vc_v_iv_se:
8849 case Intrinsic::riscv_sf_vc_v_vv_se:
8850 case Intrinsic::riscv_sf_vc_v_fv_se:
8851 case Intrinsic::riscv_sf_vc_v_xvv_se:
8852 case Intrinsic::riscv_sf_vc_v_ivv_se:
8853 case Intrinsic::riscv_sf_vc_v_vvv_se:
8854 case Intrinsic::riscv_sf_vc_v_fvv_se:
8855 case Intrinsic::riscv_sf_vc_v_xvw_se:
8856 case Intrinsic::riscv_sf_vc_v_ivw_se:
8857 case Intrinsic::riscv_sf_vc_v_vvw_se:
8858 case Intrinsic::riscv_sf_vc_v_fvw_se: {
8859 MVT VT = Op.getSimpleValueType();
8860 SDLoc DL(Op);
8861 SmallVector<SDValue> Ops;
8862 getVCIXOperands(Op, DAG, Ops);
8864 MVT RetVT = VT;
8865 if (VT.isFixedLengthVector())
8866 RetVT = getContainerForFixedLengthVector(VT);
8867 else if (VT.isFloatingPoint())
8868 RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()),
8869 RetVT.getVectorElementCount());
8871 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
8872 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
8874 if (VT.isFixedLengthVector()) {
8875 SDValue FixedVector =
8876 convertFromScalableVector(VT, NewNode, DAG, Subtarget);
8877 NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL);
8878 } else if (VT.isFloatingPoint()) {
8879 SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0));
8880 NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL);
8883 if (Op == NewNode)
8884 break;
8886 return NewNode;
8890 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8893 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8894 SelectionDAG &DAG) const {
8895 unsigned IntNo = Op.getConstantOperandVal(1);
8896 switch (IntNo) {
8897 default:
8898 break;
8899 case Intrinsic::riscv_masked_strided_store: {
8900 SDLoc DL(Op);
8901 MVT XLenVT = Subtarget.getXLenVT();
8903 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8904 // the selection of the masked intrinsics doesn't do this for us.
8905 SDValue Mask = Op.getOperand(5);
8906 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8908 SDValue Val = Op.getOperand(2);
8909 MVT VT = Val.getSimpleValueType();
8910 MVT ContainerVT = VT;
8911 if (VT.isFixedLengthVector()) {
8912 ContainerVT = getContainerForFixedLengthVector(VT);
8913 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
8915 if (!IsUnmasked) {
8916 MVT MaskVT = getMaskTypeFor(ContainerVT);
8917 if (VT.isFixedLengthVector())
8918 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8921 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8923 SDValue IntID = DAG.getTargetConstant(
8924 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
8925 XLenVT);
8927 auto *Store = cast<MemIntrinsicSDNode>(Op);
8928 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
8929 Ops.push_back(Val);
8930 Ops.push_back(Op.getOperand(3)); // Ptr
8931 Ops.push_back(Op.getOperand(4)); // Stride
8932 if (!IsUnmasked)
8933 Ops.push_back(Mask);
8934 Ops.push_back(VL);
8936 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
8937 Ops, Store->getMemoryVT(),
8938 Store->getMemOperand());
8940 case Intrinsic::riscv_seg2_store:
8941 case Intrinsic::riscv_seg3_store:
8942 case Intrinsic::riscv_seg4_store:
8943 case Intrinsic::riscv_seg5_store:
8944 case Intrinsic::riscv_seg6_store:
8945 case Intrinsic::riscv_seg7_store:
8946 case Intrinsic::riscv_seg8_store: {
8947 SDLoc DL(Op);
8948 static const Intrinsic::ID VssegInts[] = {
8949 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
8950 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
8951 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
8952 Intrinsic::riscv_vsseg8};
8953 // Operands are (chain, int_id, vec*, ptr, vl)
8954 unsigned NF = Op->getNumOperands() - 4;
8955 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
8956 MVT XLenVT = Subtarget.getXLenVT();
8957 MVT VT = Op->getOperand(2).getSimpleValueType();
8958 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8960 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
8961 Subtarget);
8962 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
8963 SDValue Ptr = Op->getOperand(NF + 2);
8965 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
8966 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
8967 for (unsigned i = 0; i < NF; i++)
8968 Ops.push_back(convertToScalableVector(
8969 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
8970 Ops.append({Ptr, VL});
8972 return DAG.getMemIntrinsicNode(
8973 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
8974 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
8976 case Intrinsic::riscv_sf_vc_x_se_e8mf8:
8977 case Intrinsic::riscv_sf_vc_x_se_e8mf4:
8978 case Intrinsic::riscv_sf_vc_x_se_e8mf2:
8979 case Intrinsic::riscv_sf_vc_x_se_e8m1:
8980 case Intrinsic::riscv_sf_vc_x_se_e8m2:
8981 case Intrinsic::riscv_sf_vc_x_se_e8m4:
8982 case Intrinsic::riscv_sf_vc_x_se_e8m8:
8983 case Intrinsic::riscv_sf_vc_x_se_e16mf4:
8984 case Intrinsic::riscv_sf_vc_x_se_e16mf2:
8985 case Intrinsic::riscv_sf_vc_x_se_e16m1:
8986 case Intrinsic::riscv_sf_vc_x_se_e16m2:
8987 case Intrinsic::riscv_sf_vc_x_se_e16m4:
8988 case Intrinsic::riscv_sf_vc_x_se_e16m8:
8989 case Intrinsic::riscv_sf_vc_x_se_e32mf2:
8990 case Intrinsic::riscv_sf_vc_x_se_e32m1:
8991 case Intrinsic::riscv_sf_vc_x_se_e32m2:
8992 case Intrinsic::riscv_sf_vc_x_se_e32m4:
8993 case Intrinsic::riscv_sf_vc_x_se_e32m8:
8994 case Intrinsic::riscv_sf_vc_x_se_e64m1:
8995 case Intrinsic::riscv_sf_vc_x_se_e64m2:
8996 case Intrinsic::riscv_sf_vc_x_se_e64m4:
8997 case Intrinsic::riscv_sf_vc_x_se_e64m8:
8998 case Intrinsic::riscv_sf_vc_i_se_e8mf8:
8999 case Intrinsic::riscv_sf_vc_i_se_e8mf4:
9000 case Intrinsic::riscv_sf_vc_i_se_e8mf2:
9001 case Intrinsic::riscv_sf_vc_i_se_e8m1:
9002 case Intrinsic::riscv_sf_vc_i_se_e8m2:
9003 case Intrinsic::riscv_sf_vc_i_se_e8m4:
9004 case Intrinsic::riscv_sf_vc_i_se_e8m8:
9005 case Intrinsic::riscv_sf_vc_i_se_e16mf4:
9006 case Intrinsic::riscv_sf_vc_i_se_e16mf2:
9007 case Intrinsic::riscv_sf_vc_i_se_e16m1:
9008 case Intrinsic::riscv_sf_vc_i_se_e16m2:
9009 case Intrinsic::riscv_sf_vc_i_se_e16m4:
9010 case Intrinsic::riscv_sf_vc_i_se_e16m8:
9011 case Intrinsic::riscv_sf_vc_i_se_e32mf2:
9012 case Intrinsic::riscv_sf_vc_i_se_e32m1:
9013 case Intrinsic::riscv_sf_vc_i_se_e32m2:
9014 case Intrinsic::riscv_sf_vc_i_se_e32m4:
9015 case Intrinsic::riscv_sf_vc_i_se_e32m8:
9016 case Intrinsic::riscv_sf_vc_i_se_e64m1:
9017 case Intrinsic::riscv_sf_vc_i_se_e64m2:
9018 case Intrinsic::riscv_sf_vc_i_se_e64m4:
9019 case Intrinsic::riscv_sf_vc_i_se_e64m8:
9020 case Intrinsic::riscv_sf_vc_xv_se:
9021 case Intrinsic::riscv_sf_vc_iv_se:
9022 case Intrinsic::riscv_sf_vc_vv_se:
9023 case Intrinsic::riscv_sf_vc_fv_se:
9024 case Intrinsic::riscv_sf_vc_xvv_se:
9025 case Intrinsic::riscv_sf_vc_ivv_se:
9026 case Intrinsic::riscv_sf_vc_vvv_se:
9027 case Intrinsic::riscv_sf_vc_fvv_se:
9028 case Intrinsic::riscv_sf_vc_xvw_se:
9029 case Intrinsic::riscv_sf_vc_ivw_se:
9030 case Intrinsic::riscv_sf_vc_vvw_se:
9031 case Intrinsic::riscv_sf_vc_fvw_se: {
9032 SmallVector<SDValue> Ops;
9033 getVCIXOperands(Op, DAG, Ops);
9035 SDValue NewNode =
9036 DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
9038 if (Op == NewNode)
9039 break;
9041 return NewNode;
9045 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9048 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9049 switch (ISDOpcode) {
9050 default:
9051 llvm_unreachable("Unhandled reduction");
9052 case ISD::VP_REDUCE_ADD:
9053 case ISD::VECREDUCE_ADD:
9054 return RISCVISD::VECREDUCE_ADD_VL;
9055 case ISD::VP_REDUCE_UMAX:
9056 case ISD::VECREDUCE_UMAX:
9057 return RISCVISD::VECREDUCE_UMAX_VL;
9058 case ISD::VP_REDUCE_SMAX:
9059 case ISD::VECREDUCE_SMAX:
9060 return RISCVISD::VECREDUCE_SMAX_VL;
9061 case ISD::VP_REDUCE_UMIN:
9062 case ISD::VECREDUCE_UMIN:
9063 return RISCVISD::VECREDUCE_UMIN_VL;
9064 case ISD::VP_REDUCE_SMIN:
9065 case ISD::VECREDUCE_SMIN:
9066 return RISCVISD::VECREDUCE_SMIN_VL;
9067 case ISD::VP_REDUCE_AND:
9068 case ISD::VECREDUCE_AND:
9069 return RISCVISD::VECREDUCE_AND_VL;
9070 case ISD::VP_REDUCE_OR:
9071 case ISD::VECREDUCE_OR:
9072 return RISCVISD::VECREDUCE_OR_VL;
9073 case ISD::VP_REDUCE_XOR:
9074 case ISD::VECREDUCE_XOR:
9075 return RISCVISD::VECREDUCE_XOR_VL;
9076 case ISD::VP_REDUCE_FADD:
9077 return RISCVISD::VECREDUCE_FADD_VL;
9078 case ISD::VP_REDUCE_SEQ_FADD:
9079 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
9080 case ISD::VP_REDUCE_FMAX:
9081 return RISCVISD::VECREDUCE_FMAX_VL;
9082 case ISD::VP_REDUCE_FMIN:
9083 return RISCVISD::VECREDUCE_FMIN_VL;
9088 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9089 SelectionDAG &DAG,
9090 bool IsVP) const {
9091 SDLoc DL(Op);
9092 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9093 MVT VecVT = Vec.getSimpleValueType();
9094 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9095 Op.getOpcode() == ISD::VECREDUCE_OR ||
9096 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9097 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9098 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9099 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9100 "Unexpected reduction lowering");
9102 MVT XLenVT = Subtarget.getXLenVT();
9104 MVT ContainerVT = VecVT;
9105 if (VecVT.isFixedLengthVector()) {
9106 ContainerVT = getContainerForFixedLengthVector(VecVT);
9107 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9110 SDValue Mask, VL;
9111 if (IsVP) {
9112 Mask = Op.getOperand(2);
9113 VL = Op.getOperand(3);
9114 } else {
9115 std::tie(Mask, VL) =
9116 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9119 unsigned BaseOpc;
9120 ISD::CondCode CC;
9121 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9123 switch (Op.getOpcode()) {
9124 default:
9125 llvm_unreachable("Unhandled reduction");
9126 case ISD::VECREDUCE_AND:
9127 case ISD::VP_REDUCE_AND: {
9128 // vcpop ~x == 0
9129 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9130 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9131 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9132 CC = ISD::SETEQ;
9133 BaseOpc = ISD::AND;
9134 break;
9136 case ISD::VECREDUCE_OR:
9137 case ISD::VP_REDUCE_OR:
9138 // vcpop x != 0
9139 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9140 CC = ISD::SETNE;
9141 BaseOpc = ISD::OR;
9142 break;
9143 case ISD::VECREDUCE_XOR:
9144 case ISD::VP_REDUCE_XOR: {
9145 // ((vcpop x) & 1) != 0
9146 SDValue One = DAG.getConstant(1, DL, XLenVT);
9147 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9148 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9149 CC = ISD::SETNE;
9150 BaseOpc = ISD::XOR;
9151 break;
9155 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9156 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9158 if (!IsVP)
9159 return SetCC;
9161 // Now include the start value in the operation.
9162 // Note that we must return the start value when no elements are operated
9163 // upon. The vcpop instructions we've emitted in each case above will return
9164 // 0 for an inactive vector, and so we've already received the neutral value:
9165 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9166 // can simply include the start value.
9167 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9170 static bool isNonZeroAVL(SDValue AVL) {
9171 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9172 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9173 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9174 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9177 /// Helper to lower a reduction sequence of the form:
9178 /// scalar = reduce_op vec, scalar_start
9179 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9180 SDValue StartValue, SDValue Vec, SDValue Mask,
9181 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9182 const RISCVSubtarget &Subtarget) {
9183 const MVT VecVT = Vec.getSimpleValueType();
9184 const MVT M1VT = getLMUL1VT(VecVT);
9185 const MVT XLenVT = Subtarget.getXLenVT();
9186 const bool NonZeroAVL = isNonZeroAVL(VL);
9188 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9189 // or the original VT if fractional.
9190 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9191 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9192 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9193 // be the result of the reduction operation.
9194 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9195 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9196 DAG, Subtarget);
9197 if (M1VT != InnerVT)
9198 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
9199 DAG.getUNDEF(M1VT),
9200 InitialValue, DAG.getConstant(0, DL, XLenVT));
9201 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9202 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9203 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9204 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9205 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9206 DAG.getConstant(0, DL, XLenVT));
9209 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9210 SelectionDAG &DAG) const {
9211 SDLoc DL(Op);
9212 SDValue Vec = Op.getOperand(0);
9213 EVT VecEVT = Vec.getValueType();
9215 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9217 // Due to ordering in legalize types we may have a vector type that needs to
9218 // be split. Do that manually so we can get down to a legal type.
9219 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9220 TargetLowering::TypeSplitVector) {
9221 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9222 VecEVT = Lo.getValueType();
9223 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9226 // TODO: The type may need to be widened rather than split. Or widened before
9227 // it can be split.
9228 if (!isTypeLegal(VecEVT))
9229 return SDValue();
9231 MVT VecVT = VecEVT.getSimpleVT();
9232 MVT VecEltVT = VecVT.getVectorElementType();
9233 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9235 MVT ContainerVT = VecVT;
9236 if (VecVT.isFixedLengthVector()) {
9237 ContainerVT = getContainerForFixedLengthVector(VecVT);
9238 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9241 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9243 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9244 switch (BaseOpc) {
9245 case ISD::AND:
9246 case ISD::OR:
9247 case ISD::UMAX:
9248 case ISD::UMIN:
9249 case ISD::SMAX:
9250 case ISD::SMIN:
9251 MVT XLenVT = Subtarget.getXLenVT();
9252 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9253 DAG.getConstant(0, DL, XLenVT));
9255 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9256 Mask, VL, DL, DAG, Subtarget);
9259 // Given a reduction op, this function returns the matching reduction opcode,
9260 // the vector SDValue and the scalar SDValue required to lower this to a
9261 // RISCVISD node.
9262 static std::tuple<unsigned, SDValue, SDValue>
9263 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
9264 const RISCVSubtarget &Subtarget) {
9265 SDLoc DL(Op);
9266 auto Flags = Op->getFlags();
9267 unsigned Opcode = Op.getOpcode();
9268 switch (Opcode) {
9269 default:
9270 llvm_unreachable("Unhandled reduction");
9271 case ISD::VECREDUCE_FADD: {
9272 // Use positive zero if we can. It is cheaper to materialize.
9273 SDValue Zero =
9274 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9275 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9277 case ISD::VECREDUCE_SEQ_FADD:
9278 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9279 Op.getOperand(0));
9280 case ISD::VECREDUCE_FMIN:
9281 case ISD::VECREDUCE_FMAX: {
9282 MVT XLenVT = Subtarget.getXLenVT();
9283 SDValue Front =
9284 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9285 DAG.getConstant(0, DL, XLenVT));
9286 unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN)
9287 ? RISCVISD::VECREDUCE_FMIN_VL
9288 : RISCVISD::VECREDUCE_FMAX_VL;
9289 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9294 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9295 SelectionDAG &DAG) const {
9296 SDLoc DL(Op);
9297 MVT VecEltVT = Op.getSimpleValueType();
9299 unsigned RVVOpcode;
9300 SDValue VectorVal, ScalarVal;
9301 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9302 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9303 MVT VecVT = VectorVal.getSimpleValueType();
9305 MVT ContainerVT = VecVT;
9306 if (VecVT.isFixedLengthVector()) {
9307 ContainerVT = getContainerForFixedLengthVector(VecVT);
9308 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9311 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9312 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal,
9313 VectorVal, Mask, VL, DL, DAG, Subtarget);
9316 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9317 SelectionDAG &DAG) const {
9318 SDLoc DL(Op);
9319 SDValue Vec = Op.getOperand(1);
9320 EVT VecEVT = Vec.getValueType();
9322 // TODO: The type may need to be widened rather than split. Or widened before
9323 // it can be split.
9324 if (!isTypeLegal(VecEVT))
9325 return SDValue();
9327 MVT VecVT = VecEVT.getSimpleVT();
9328 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9330 if (VecVT.isFixedLengthVector()) {
9331 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9332 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9335 SDValue VL = Op.getOperand(3);
9336 SDValue Mask = Op.getOperand(2);
9337 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9338 Vec, Mask, VL, DL, DAG, Subtarget);
9341 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9342 SelectionDAG &DAG) const {
9343 SDValue Vec = Op.getOperand(0);
9344 SDValue SubVec = Op.getOperand(1);
9345 MVT VecVT = Vec.getSimpleValueType();
9346 MVT SubVecVT = SubVec.getSimpleValueType();
9348 SDLoc DL(Op);
9349 MVT XLenVT = Subtarget.getXLenVT();
9350 unsigned OrigIdx = Op.getConstantOperandVal(2);
9351 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9353 // We don't have the ability to slide mask vectors up indexed by their i1
9354 // elements; the smallest we can do is i8. Often we are able to bitcast to
9355 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9356 // into a scalable one, we might not necessarily have enough scalable
9357 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9358 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9359 (OrigIdx != 0 || !Vec.isUndef())) {
9360 if (VecVT.getVectorMinNumElements() >= 8 &&
9361 SubVecVT.getVectorMinNumElements() >= 8) {
9362 assert(OrigIdx % 8 == 0 && "Invalid index");
9363 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9364 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9365 "Unexpected mask vector lowering");
9366 OrigIdx /= 8;
9367 SubVecVT =
9368 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9369 SubVecVT.isScalableVector());
9370 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9371 VecVT.isScalableVector());
9372 Vec = DAG.getBitcast(VecVT, Vec);
9373 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9374 } else {
9375 // We can't slide this mask vector up indexed by its i1 elements.
9376 // This poses a problem when we wish to insert a scalable vector which
9377 // can't be re-expressed as a larger type. Just choose the slow path and
9378 // extend to a larger type, then truncate back down.
9379 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9380 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9381 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9382 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9383 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9384 Op.getOperand(2));
9385 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9386 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9390 // If the subvector vector is a fixed-length type, we cannot use subregister
9391 // manipulation to simplify the codegen; we don't know which register of a
9392 // LMUL group contains the specific subvector as we only know the minimum
9393 // register size. Therefore we must slide the vector group up the full
9394 // amount.
9395 if (SubVecVT.isFixedLengthVector()) {
9396 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9397 return Op;
9398 MVT ContainerVT = VecVT;
9399 if (VecVT.isFixedLengthVector()) {
9400 ContainerVT = getContainerForFixedLengthVector(VecVT);
9401 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9404 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9405 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9406 DAG.getUNDEF(ContainerVT), SubVec,
9407 DAG.getConstant(0, DL, XLenVT));
9408 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9409 return DAG.getBitcast(Op.getValueType(), SubVec);
9412 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9413 DAG.getUNDEF(ContainerVT), SubVec,
9414 DAG.getConstant(0, DL, XLenVT));
9415 SDValue Mask =
9416 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9417 // Set the vector length to only the number of elements we care about. Note
9418 // that for slideup this includes the offset.
9419 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9420 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9422 // Use tail agnostic policy if we're inserting over Vec's tail.
9423 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9424 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9425 Policy = RISCVII::TAIL_AGNOSTIC;
9427 // If we're inserting into the lowest elements, use a tail undisturbed
9428 // vmv.v.v.
9429 if (OrigIdx == 0) {
9430 SubVec =
9431 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9432 } else {
9433 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9434 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9435 SlideupAmt, Mask, VL, Policy);
9438 if (VecVT.isFixedLengthVector())
9439 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9440 return DAG.getBitcast(Op.getValueType(), SubVec);
9443 unsigned SubRegIdx, RemIdx;
9444 std::tie(SubRegIdx, RemIdx) =
9445 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9446 VecVT, SubVecVT, OrigIdx, TRI);
9448 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9449 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9450 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9451 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9453 // 1. If the Idx has been completely eliminated and this subvector's size is
9454 // a vector register or a multiple thereof, or the surrounding elements are
9455 // undef, then this is a subvector insert which naturally aligns to a vector
9456 // register. These can easily be handled using subregister manipulation.
9457 // 2. If the subvector is smaller than a vector register, then the insertion
9458 // must preserve the undisturbed elements of the register. We do this by
9459 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9460 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9461 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9462 // LMUL=1 type back into the larger vector (resolving to another subregister
9463 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9464 // to avoid allocating a large register group to hold our subvector.
9465 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9466 return Op;
9468 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9469 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9470 // (in our case undisturbed). This means we can set up a subvector insertion
9471 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9472 // size of the subvector.
9473 MVT InterSubVT = VecVT;
9474 SDValue AlignedExtract = Vec;
9475 unsigned AlignedIdx = OrigIdx - RemIdx;
9476 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9477 InterSubVT = getLMUL1VT(VecVT);
9478 // Extract a subvector equal to the nearest full vector register type. This
9479 // should resolve to a EXTRACT_SUBREG instruction.
9480 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9481 DAG.getConstant(AlignedIdx, DL, XLenVT));
9484 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9485 DAG.getUNDEF(InterSubVT), SubVec,
9486 DAG.getConstant(0, DL, XLenVT));
9488 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9490 VL = computeVLMax(SubVecVT, DL, DAG);
9492 // If we're inserting into the lowest elements, use a tail undisturbed
9493 // vmv.v.v.
9494 if (RemIdx == 0) {
9495 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9496 SubVec, VL);
9497 } else {
9498 SDValue SlideupAmt =
9499 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9501 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9502 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9504 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9505 SlideupAmt, Mask, VL);
9508 // If required, insert this subvector back into the correct vector register.
9509 // This should resolve to an INSERT_SUBREG instruction.
9510 if (VecVT.bitsGT(InterSubVT))
9511 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9512 DAG.getConstant(AlignedIdx, DL, XLenVT));
9514 // We might have bitcast from a mask type: cast back to the original type if
9515 // required.
9516 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9519 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9520 SelectionDAG &DAG) const {
9521 SDValue Vec = Op.getOperand(0);
9522 MVT SubVecVT = Op.getSimpleValueType();
9523 MVT VecVT = Vec.getSimpleValueType();
9525 SDLoc DL(Op);
9526 MVT XLenVT = Subtarget.getXLenVT();
9527 unsigned OrigIdx = Op.getConstantOperandVal(1);
9528 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9530 // We don't have the ability to slide mask vectors down indexed by their i1
9531 // elements; the smallest we can do is i8. Often we are able to bitcast to
9532 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9533 // from a scalable one, we might not necessarily have enough scalable
9534 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9535 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9536 if (VecVT.getVectorMinNumElements() >= 8 &&
9537 SubVecVT.getVectorMinNumElements() >= 8) {
9538 assert(OrigIdx % 8 == 0 && "Invalid index");
9539 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9540 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9541 "Unexpected mask vector lowering");
9542 OrigIdx /= 8;
9543 SubVecVT =
9544 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9545 SubVecVT.isScalableVector());
9546 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9547 VecVT.isScalableVector());
9548 Vec = DAG.getBitcast(VecVT, Vec);
9549 } else {
9550 // We can't slide this mask vector down, indexed by its i1 elements.
9551 // This poses a problem when we wish to extract a scalable vector which
9552 // can't be re-expressed as a larger type. Just choose the slow path and
9553 // extend to a larger type, then truncate back down.
9554 // TODO: We could probably improve this when extracting certain fixed
9555 // from fixed, where we can extract as i8 and shift the correct element
9556 // right to reach the desired subvector?
9557 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9558 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9559 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9560 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9561 Op.getOperand(1));
9562 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9563 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9567 // With an index of 0 this is a cast-like subvector, which can be performed
9568 // with subregister operations.
9569 if (OrigIdx == 0)
9570 return Op;
9572 // If the subvector vector is a fixed-length type, we cannot use subregister
9573 // manipulation to simplify the codegen; we don't know which register of a
9574 // LMUL group contains the specific subvector as we only know the minimum
9575 // register size. Therefore we must slide the vector group down the full
9576 // amount.
9577 if (SubVecVT.isFixedLengthVector()) {
9578 MVT ContainerVT = VecVT;
9579 if (VecVT.isFixedLengthVector()) {
9580 ContainerVT = getContainerForFixedLengthVector(VecVT);
9581 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9584 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9585 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9586 if (auto ShrunkVT =
9587 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9588 ContainerVT = *ShrunkVT;
9589 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9590 DAG.getVectorIdxConstant(0, DL));
9593 SDValue Mask =
9594 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9595 // Set the vector length to only the number of elements we care about. This
9596 // avoids sliding down elements we're going to discard straight away.
9597 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9598 Subtarget);
9599 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9600 SDValue Slidedown =
9601 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9602 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9603 // Now we can use a cast-like subvector extract to get the result.
9604 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9605 DAG.getConstant(0, DL, XLenVT));
9606 return DAG.getBitcast(Op.getValueType(), Slidedown);
9609 unsigned SubRegIdx, RemIdx;
9610 std::tie(SubRegIdx, RemIdx) =
9611 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9612 VecVT, SubVecVT, OrigIdx, TRI);
9614 // If the Idx has been completely eliminated then this is a subvector extract
9615 // which naturally aligns to a vector register. These can easily be handled
9616 // using subregister manipulation.
9617 if (RemIdx == 0)
9618 return Op;
9620 // Else SubVecVT is a fractional LMUL and may need to be slid down.
9621 assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second);
9623 // If the vector type is an LMUL-group type, extract a subvector equal to the
9624 // nearest full vector register type.
9625 MVT InterSubVT = VecVT;
9626 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9627 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
9628 // we should have successfully decomposed the extract into a subregister.
9629 assert(SubRegIdx != RISCV::NoSubRegister);
9630 InterSubVT = getLMUL1VT(VecVT);
9631 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
9634 // Slide this vector register down by the desired number of elements in order
9635 // to place the desired subvector starting at element 0.
9636 SDValue SlidedownAmt =
9637 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9639 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
9640 SDValue Slidedown =
9641 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
9642 Vec, SlidedownAmt, Mask, VL);
9644 // Now the vector is in the right position, extract our final subvector. This
9645 // should resolve to a COPY.
9646 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9647 DAG.getConstant(0, DL, XLenVT));
9649 // We might have bitcast from a mask type: cast back to the original type if
9650 // required.
9651 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
9654 // Widen a vector's operands to i8, then truncate its results back to the
9655 // original type, typically i1. All operand and result types must be the same.
9656 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
9657 SelectionDAG &DAG) {
9658 MVT VT = N.getSimpleValueType();
9659 MVT WideVT = VT.changeVectorElementType(MVT::i8);
9660 SmallVector<SDValue, 4> WideOps;
9661 for (SDValue Op : N->ops()) {
9662 assert(Op.getSimpleValueType() == VT &&
9663 "Operands and result must be same type");
9664 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
9667 unsigned NumVals = N->getNumValues();
9669 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
9670 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
9671 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
9672 SmallVector<SDValue, 4> TruncVals;
9673 for (unsigned I = 0; I < NumVals; I++) {
9674 TruncVals.push_back(
9675 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
9676 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
9679 if (TruncVals.size() > 1)
9680 return DAG.getMergeValues(TruncVals, DL);
9681 return TruncVals.front();
9684 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
9685 SelectionDAG &DAG) const {
9686 SDLoc DL(Op);
9687 MVT VecVT = Op.getSimpleValueType();
9688 MVT XLenVT = Subtarget.getXLenVT();
9690 assert(VecVT.isScalableVector() &&
9691 "vector_interleave on non-scalable vector!");
9693 // 1 bit element vectors need to be widened to e8
9694 if (VecVT.getVectorElementType() == MVT::i1)
9695 return widenVectorOpsToi8(Op, DL, DAG);
9697 // If the VT is LMUL=8, we need to split and reassemble.
9698 if (VecVT.getSizeInBits().getKnownMinValue() ==
9699 (8 * RISCV::RVVBitsPerBlock)) {
9700 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9701 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
9702 EVT SplitVT = Op0Lo.getValueType();
9704 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
9705 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
9706 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
9707 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
9709 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9710 ResLo.getValue(0), ResHi.getValue(0));
9711 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
9712 ResHi.getValue(1));
9713 return DAG.getMergeValues({Even, Odd}, DL);
9716 // Concatenate the two vectors as one vector to deinterleave
9717 MVT ConcatVT =
9718 MVT::getVectorVT(VecVT.getVectorElementType(),
9719 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
9720 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
9721 Op.getOperand(0), Op.getOperand(1));
9723 // We want to operate on all lanes, so get the mask and VL and mask for it
9724 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
9725 SDValue Passthru = DAG.getUNDEF(ConcatVT);
9727 // We can deinterleave through vnsrl.wi if the element type is smaller than
9728 // ELEN
9729 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
9730 SDValue Even =
9731 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
9732 SDValue Odd =
9733 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
9734 return DAG.getMergeValues({Even, Odd}, DL);
9737 // For the indices, use the same SEW to avoid an extra vsetvli
9738 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
9739 // Create a vector of even indices {0, 2, 4, ...}
9740 SDValue EvenIdx =
9741 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
9742 // Create a vector of odd indices {1, 3, 5, ... }
9743 SDValue OddIdx =
9744 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
9746 // Gather the even and odd elements into two separate vectors
9747 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
9748 Concat, EvenIdx, Passthru, Mask, VL);
9749 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
9750 Concat, OddIdx, Passthru, Mask, VL);
9752 // Extract the result half of the gather for even and odd
9753 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
9754 DAG.getConstant(0, DL, XLenVT));
9755 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
9756 DAG.getConstant(0, DL, XLenVT));
9758 return DAG.getMergeValues({Even, Odd}, DL);
9761 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
9762 SelectionDAG &DAG) const {
9763 SDLoc DL(Op);
9764 MVT VecVT = Op.getSimpleValueType();
9766 assert(VecVT.isScalableVector() &&
9767 "vector_interleave on non-scalable vector!");
9769 // i1 vectors need to be widened to i8
9770 if (VecVT.getVectorElementType() == MVT::i1)
9771 return widenVectorOpsToi8(Op, DL, DAG);
9773 MVT XLenVT = Subtarget.getXLenVT();
9774 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
9776 // If the VT is LMUL=8, we need to split and reassemble.
9777 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
9778 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9779 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
9780 EVT SplitVT = Op0Lo.getValueType();
9782 SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
9783 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
9784 SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
9785 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
9787 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9788 ResLo.getValue(0), ResLo.getValue(1));
9789 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9790 ResHi.getValue(0), ResHi.getValue(1));
9791 return DAG.getMergeValues({Lo, Hi}, DL);
9794 SDValue Interleaved;
9796 // If the element type is smaller than ELEN, then we can interleave with
9797 // vwaddu.vv and vwmaccu.vx
9798 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
9799 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
9800 DAG, Subtarget);
9801 } else {
9802 // Otherwise, fallback to using vrgathere16.vv
9803 MVT ConcatVT =
9804 MVT::getVectorVT(VecVT.getVectorElementType(),
9805 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
9806 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
9807 Op.getOperand(0), Op.getOperand(1));
9809 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
9811 // 0 1 2 3 4 5 6 7 ...
9812 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
9814 // 1 1 1 1 1 1 1 1 ...
9815 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
9817 // 1 0 1 0 1 0 1 0 ...
9818 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
9819 OddMask = DAG.getSetCC(
9820 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
9821 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
9822 ISD::CondCode::SETNE);
9824 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
9826 // Build up the index vector for interleaving the concatenated vector
9827 // 0 0 1 1 2 2 3 3 ...
9828 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
9829 // 0 n 1 n+1 2 n+2 3 n+3 ...
9830 Idx =
9831 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
9833 // Then perform the interleave
9834 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
9835 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
9836 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
9837 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
9840 // Extract the two halves from the interleaved result
9841 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
9842 DAG.getVectorIdxConstant(0, DL));
9843 SDValue Hi = DAG.getNode(
9844 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
9845 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));
9847 return DAG.getMergeValues({Lo, Hi}, DL);
9850 // Lower step_vector to the vid instruction. Any non-identity step value must
9851 // be accounted for my manual expansion.
9852 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
9853 SelectionDAG &DAG) const {
9854 SDLoc DL(Op);
9855 MVT VT = Op.getSimpleValueType();
9856 assert(VT.isScalableVector() && "Expected scalable vector");
9857 MVT XLenVT = Subtarget.getXLenVT();
9858 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
9859 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9860 uint64_t StepValImm = Op.getConstantOperandVal(0);
9861 if (StepValImm != 1) {
9862 if (isPowerOf2_64(StepValImm)) {
9863 SDValue StepVal =
9864 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9865 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
9866 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
9867 } else {
9868 SDValue StepVal = lowerScalarSplat(
9869 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
9870 VL, VT, DL, DAG, Subtarget);
9871 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
9874 return StepVec;
9877 // Implement vector_reverse using vrgather.vv with indices determined by
9878 // subtracting the id of each element from (VLMAX-1). This will convert
9879 // the indices like so:
9880 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
9881 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
9882 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
9883 SelectionDAG &DAG) const {
9884 SDLoc DL(Op);
9885 MVT VecVT = Op.getSimpleValueType();
9886 if (VecVT.getVectorElementType() == MVT::i1) {
9887 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9888 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
9889 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
9890 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
9892 unsigned EltSize = VecVT.getScalarSizeInBits();
9893 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
9894 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
9895 unsigned MaxVLMAX =
9896 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
9898 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
9899 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
9901 // If this is SEW=8 and VLMAX is potentially more than 256, we need
9902 // to use vrgatherei16.vv.
9903 // TODO: It's also possible to use vrgatherei16.vv for other types to
9904 // decrease register width for the index calculation.
9905 if (MaxVLMAX > 256 && EltSize == 8) {
9906 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
9907 // Reverse each half, then reassemble them in reverse order.
9908 // NOTE: It's also possible that after splitting that VLMAX no longer
9909 // requires vrgatherei16.vv.
9910 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
9911 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9912 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
9913 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
9914 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
9915 // Reassemble the low and high pieces reversed.
9916 // FIXME: This is a CONCAT_VECTORS.
9917 SDValue Res =
9918 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
9919 DAG.getIntPtrConstant(0, DL));
9920 return DAG.getNode(
9921 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
9922 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
9925 // Just promote the int type to i16 which will double the LMUL.
9926 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
9927 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
9930 MVT XLenVT = Subtarget.getXLenVT();
9931 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9933 // Calculate VLMAX-1 for the desired SEW.
9934 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
9935 computeVLMax(VecVT, DL, DAG),
9936 DAG.getConstant(1, DL, XLenVT));
9938 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
9939 bool IsRV32E64 =
9940 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
9941 SDValue SplatVL;
9942 if (!IsRV32E64)
9943 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
9944 else
9945 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
9946 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
9948 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
9949 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
9950 DAG.getUNDEF(IntVT), Mask, VL);
9952 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
9953 DAG.getUNDEF(VecVT), Mask, VL);
9956 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
9957 SelectionDAG &DAG) const {
9958 SDLoc DL(Op);
9959 SDValue V1 = Op.getOperand(0);
9960 SDValue V2 = Op.getOperand(1);
9961 MVT XLenVT = Subtarget.getXLenVT();
9962 MVT VecVT = Op.getSimpleValueType();
9964 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
9966 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
9967 SDValue DownOffset, UpOffset;
9968 if (ImmValue >= 0) {
9969 // The operand is a TargetConstant, we need to rebuild it as a regular
9970 // constant.
9971 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
9972 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
9973 } else {
9974 // The operand is a TargetConstant, we need to rebuild it as a regular
9975 // constant rather than negating the original operand.
9976 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
9977 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
9980 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
9982 SDValue SlideDown =
9983 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
9984 DownOffset, TrueMask, UpOffset);
9985 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
9986 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
9987 RISCVII::TAIL_AGNOSTIC);
9990 SDValue
9991 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
9992 SelectionDAG &DAG) const {
9993 SDLoc DL(Op);
9994 auto *Load = cast<LoadSDNode>(Op);
9996 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
9997 Load->getMemoryVT(),
9998 *Load->getMemOperand()) &&
9999 "Expecting a correctly-aligned load");
10001 MVT VT = Op.getSimpleValueType();
10002 MVT XLenVT = Subtarget.getXLenVT();
10003 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10005 // If we know the exact VLEN and our fixed length vector completely fills
10006 // the container, use a whole register load instead.
10007 const auto [MinVLMAX, MaxVLMAX] =
10008 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10009 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10010 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10011 SDValue NewLoad =
10012 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10013 Load->getMemOperand());
10014 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10015 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10018 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10020 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10021 SDValue IntID = DAG.getTargetConstant(
10022 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10023 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10024 if (!IsMaskOp)
10025 Ops.push_back(DAG.getUNDEF(ContainerVT));
10026 Ops.push_back(Load->getBasePtr());
10027 Ops.push_back(VL);
10028 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10029 SDValue NewLoad =
10030 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
10031 Load->getMemoryVT(), Load->getMemOperand());
10033 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10034 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10037 SDValue
10038 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10039 SelectionDAG &DAG) const {
10040 SDLoc DL(Op);
10041 auto *Store = cast<StoreSDNode>(Op);
10043 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
10044 Store->getMemoryVT(),
10045 *Store->getMemOperand()) &&
10046 "Expecting a correctly-aligned store");
10048 SDValue StoreVal = Store->getValue();
10049 MVT VT = StoreVal.getSimpleValueType();
10050 MVT XLenVT = Subtarget.getXLenVT();
10052 // If the size less than a byte, we need to pad with zeros to make a byte.
10053 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10054 VT = MVT::v8i1;
10055 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
10056 DAG.getConstant(0, DL, VT), StoreVal,
10057 DAG.getIntPtrConstant(0, DL));
10060 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10062 SDValue NewValue =
10063 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10066 // If we know the exact VLEN and our fixed length vector completely fills
10067 // the container, use a whole register store instead.
10068 const auto [MinVLMAX, MaxVLMAX] =
10069 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10070 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10071 getLMUL1VT(ContainerVT).bitsLE(ContainerVT))
10072 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10073 Store->getMemOperand());
10075 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10076 Subtarget);
10078 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10079 SDValue IntID = DAG.getTargetConstant(
10080 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10081 return DAG.getMemIntrinsicNode(
10082 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10083 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10084 Store->getMemoryVT(), Store->getMemOperand());
10087 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10088 SelectionDAG &DAG) const {
10089 SDLoc DL(Op);
10090 MVT VT = Op.getSimpleValueType();
10092 const auto *MemSD = cast<MemSDNode>(Op);
10093 EVT MemVT = MemSD->getMemoryVT();
10094 MachineMemOperand *MMO = MemSD->getMemOperand();
10095 SDValue Chain = MemSD->getChain();
10096 SDValue BasePtr = MemSD->getBasePtr();
10098 SDValue Mask, PassThru, VL;
10099 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10100 Mask = VPLoad->getMask();
10101 PassThru = DAG.getUNDEF(VT);
10102 VL = VPLoad->getVectorLength();
10103 } else {
10104 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10105 Mask = MLoad->getMask();
10106 PassThru = MLoad->getPassThru();
10109 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10111 MVT XLenVT = Subtarget.getXLenVT();
10113 MVT ContainerVT = VT;
10114 if (VT.isFixedLengthVector()) {
10115 ContainerVT = getContainerForFixedLengthVector(VT);
10116 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10117 if (!IsUnmasked) {
10118 MVT MaskVT = getMaskTypeFor(ContainerVT);
10119 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10123 if (!VL)
10124 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10126 unsigned IntID =
10127 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10128 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10129 if (IsUnmasked)
10130 Ops.push_back(DAG.getUNDEF(ContainerVT));
10131 else
10132 Ops.push_back(PassThru);
10133 Ops.push_back(BasePtr);
10134 if (!IsUnmasked)
10135 Ops.push_back(Mask);
10136 Ops.push_back(VL);
10137 if (!IsUnmasked)
10138 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
10140 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10142 SDValue Result =
10143 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10144 Chain = Result.getValue(1);
10146 if (VT.isFixedLengthVector())
10147 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10149 return DAG.getMergeValues({Result, Chain}, DL);
10152 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10153 SelectionDAG &DAG) const {
10154 SDLoc DL(Op);
10156 const auto *MemSD = cast<MemSDNode>(Op);
10157 EVT MemVT = MemSD->getMemoryVT();
10158 MachineMemOperand *MMO = MemSD->getMemOperand();
10159 SDValue Chain = MemSD->getChain();
10160 SDValue BasePtr = MemSD->getBasePtr();
10161 SDValue Val, Mask, VL;
10163 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10164 Val = VPStore->getValue();
10165 Mask = VPStore->getMask();
10166 VL = VPStore->getVectorLength();
10167 } else {
10168 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10169 Val = MStore->getValue();
10170 Mask = MStore->getMask();
10173 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10175 MVT VT = Val.getSimpleValueType();
10176 MVT XLenVT = Subtarget.getXLenVT();
10178 MVT ContainerVT = VT;
10179 if (VT.isFixedLengthVector()) {
10180 ContainerVT = getContainerForFixedLengthVector(VT);
10182 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10183 if (!IsUnmasked) {
10184 MVT MaskVT = getMaskTypeFor(ContainerVT);
10185 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10189 if (!VL)
10190 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10192 unsigned IntID =
10193 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10194 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10195 Ops.push_back(Val);
10196 Ops.push_back(BasePtr);
10197 if (!IsUnmasked)
10198 Ops.push_back(Mask);
10199 Ops.push_back(VL);
10201 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
10202 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10205 SDValue
10206 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10207 SelectionDAG &DAG) const {
10208 MVT InVT = Op.getOperand(0).getSimpleValueType();
10209 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10211 MVT VT = Op.getSimpleValueType();
10213 SDValue Op1 =
10214 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10215 SDValue Op2 =
10216 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10218 SDLoc DL(Op);
10219 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10220 DAG, Subtarget);
10221 MVT MaskVT = getMaskTypeFor(ContainerVT);
10223 SDValue Cmp =
10224 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10225 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10227 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10230 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10231 SelectionDAG &DAG) const {
10232 unsigned Opc = Op.getOpcode();
10233 SDLoc DL(Op);
10234 SDValue Chain = Op.getOperand(0);
10235 SDValue Op1 = Op.getOperand(1);
10236 SDValue Op2 = Op.getOperand(2);
10237 SDValue CC = Op.getOperand(3);
10238 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10239 MVT VT = Op.getSimpleValueType();
10240 MVT InVT = Op1.getSimpleValueType();
10242 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10243 // condition code.
10244 if (Opc == ISD::STRICT_FSETCCS) {
10245 // Expand strict_fsetccs(x, oeq) to
10246 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10247 SDVTList VTList = Op->getVTList();
10248 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10249 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10250 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10251 Op2, OLECCVal);
10252 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10253 Op1, OLECCVal);
10254 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10255 Tmp1.getValue(1), Tmp2.getValue(1));
10256 // Tmp1 and Tmp2 might be the same node.
10257 if (Tmp1 != Tmp2)
10258 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10259 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10262 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10263 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10264 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10265 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10266 Op2, OEQCCVal);
10267 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10268 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10272 MVT ContainerInVT = InVT;
10273 if (InVT.isFixedLengthVector()) {
10274 ContainerInVT = getContainerForFixedLengthVector(InVT);
10275 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10276 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10278 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10280 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10282 SDValue Res;
10283 if (Opc == ISD::STRICT_FSETCC &&
10284 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10285 CCVal == ISD::SETOLE)) {
10286 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10287 // active when both input elements are ordered.
10288 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10289 SDValue OrderMask1 = DAG.getNode(
10290 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10291 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10292 True, VL});
10293 SDValue OrderMask2 = DAG.getNode(
10294 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10295 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10296 True, VL});
10297 Mask =
10298 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10299 // Use Mask as the merge operand to let the result be 0 if either of the
10300 // inputs is unordered.
10301 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
10302 DAG.getVTList(MaskVT, MVT::Other),
10303 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10304 } else {
10305 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10306 : RISCVISD::STRICT_FSETCCS_VL;
10307 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10308 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10311 if (VT.isFixedLengthVector()) {
10312 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10313 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10315 return Res;
10318 // Lower vector ABS to smax(X, sub(0, X)).
10319 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10320 SDLoc DL(Op);
10321 MVT VT = Op.getSimpleValueType();
10322 SDValue X = Op.getOperand(0);
10324 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10325 "Unexpected type for ISD::ABS");
10327 MVT ContainerVT = VT;
10328 if (VT.isFixedLengthVector()) {
10329 ContainerVT = getContainerForFixedLengthVector(VT);
10330 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10333 SDValue Mask, VL;
10334 if (Op->getOpcode() == ISD::VP_ABS) {
10335 Mask = Op->getOperand(1);
10336 if (VT.isFixedLengthVector())
10337 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10338 Subtarget);
10339 VL = Op->getOperand(2);
10340 } else
10341 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10343 SDValue SplatZero = DAG.getNode(
10344 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10345 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10346 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10347 DAG.getUNDEF(ContainerVT), Mask, VL);
10348 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10349 DAG.getUNDEF(ContainerVT), Mask, VL);
10351 if (VT.isFixedLengthVector())
10352 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10353 return Max;
10356 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10357 SDValue Op, SelectionDAG &DAG) const {
10358 SDLoc DL(Op);
10359 MVT VT = Op.getSimpleValueType();
10360 SDValue Mag = Op.getOperand(0);
10361 SDValue Sign = Op.getOperand(1);
10362 assert(Mag.getValueType() == Sign.getValueType() &&
10363 "Can only handle COPYSIGN with matching types.");
10365 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10366 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10367 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10369 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10371 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10372 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10374 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10377 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10378 SDValue Op, SelectionDAG &DAG) const {
10379 MVT VT = Op.getSimpleValueType();
10380 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10382 MVT I1ContainerVT =
10383 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10385 SDValue CC =
10386 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10387 SDValue Op1 =
10388 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10389 SDValue Op2 =
10390 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10392 SDLoc DL(Op);
10393 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10395 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10396 Op2, DAG.getUNDEF(ContainerVT), VL);
10398 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10401 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10402 SelectionDAG &DAG) const {
10403 unsigned NewOpc = getRISCVVLOp(Op);
10404 bool HasMergeOp = hasMergeOp(NewOpc);
10405 bool HasMask = hasMaskOp(NewOpc);
10407 MVT VT = Op.getSimpleValueType();
10408 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10410 // Create list of operands by converting existing ones to scalable types.
10411 SmallVector<SDValue, 6> Ops;
10412 for (const SDValue &V : Op->op_values()) {
10413 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10415 // Pass through non-vector operands.
10416 if (!V.getValueType().isVector()) {
10417 Ops.push_back(V);
10418 continue;
10421 // "cast" fixed length vector to a scalable vector.
10422 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10423 "Only fixed length vectors are supported!");
10424 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10427 SDLoc DL(Op);
10428 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10429 if (HasMergeOp)
10430 Ops.push_back(DAG.getUNDEF(ContainerVT));
10431 if (HasMask)
10432 Ops.push_back(Mask);
10433 Ops.push_back(VL);
10435 // StrictFP operations have two result values. Their lowered result should
10436 // have same result count.
10437 if (Op->isStrictFPOpcode()) {
10438 SDValue ScalableRes =
10439 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10440 Op->getFlags());
10441 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10442 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10445 SDValue ScalableRes =
10446 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10447 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10450 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10451 // * Operands of each node are assumed to be in the same order.
10452 // * The EVL operand is promoted from i32 to i64 on RV64.
10453 // * Fixed-length vectors are converted to their scalable-vector container
10454 // types.
10455 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10456 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10457 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10459 SDLoc DL(Op);
10460 MVT VT = Op.getSimpleValueType();
10461 SmallVector<SDValue, 4> Ops;
10463 MVT ContainerVT = VT;
10464 if (VT.isFixedLengthVector())
10465 ContainerVT = getContainerForFixedLengthVector(VT);
10467 for (const auto &OpIdx : enumerate(Op->ops())) {
10468 SDValue V = OpIdx.value();
10469 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10470 // Add dummy merge value before the mask. Or if there isn't a mask, before
10471 // EVL.
10472 if (HasMergeOp) {
10473 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10474 if (MaskIdx) {
10475 if (*MaskIdx == OpIdx.index())
10476 Ops.push_back(DAG.getUNDEF(ContainerVT));
10477 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10478 OpIdx.index()) {
10479 if (Op.getOpcode() == ISD::VP_MERGE) {
10480 // For VP_MERGE, copy the false operand instead of an undef value.
10481 Ops.push_back(Ops.back());
10482 } else {
10483 assert(Op.getOpcode() == ISD::VP_SELECT);
10484 // For VP_SELECT, add an undef value.
10485 Ops.push_back(DAG.getUNDEF(ContainerVT));
10489 // Pass through operands which aren't fixed-length vectors.
10490 if (!V.getValueType().isFixedLengthVector()) {
10491 Ops.push_back(V);
10492 continue;
10494 // "cast" fixed length vector to a scalable vector.
10495 MVT OpVT = V.getSimpleValueType();
10496 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10497 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10498 "Only fixed length vectors are supported!");
10499 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10502 if (!VT.isFixedLengthVector())
10503 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10505 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10507 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10510 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10511 SelectionDAG &DAG) const {
10512 SDLoc DL(Op);
10513 MVT VT = Op.getSimpleValueType();
10515 SDValue Src = Op.getOperand(0);
10516 // NOTE: Mask is dropped.
10517 SDValue VL = Op.getOperand(2);
10519 MVT ContainerVT = VT;
10520 if (VT.isFixedLengthVector()) {
10521 ContainerVT = getContainerForFixedLengthVector(VT);
10522 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10523 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10526 MVT XLenVT = Subtarget.getXLenVT();
10527 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10528 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10529 DAG.getUNDEF(ContainerVT), Zero, VL);
10531 SDValue SplatValue = DAG.getConstant(
10532 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10533 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10534 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10536 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
10537 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
10538 if (!VT.isFixedLengthVector())
10539 return Result;
10540 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10543 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10544 SelectionDAG &DAG) const {
10545 SDLoc DL(Op);
10546 MVT VT = Op.getSimpleValueType();
10548 SDValue Op1 = Op.getOperand(0);
10549 SDValue Op2 = Op.getOperand(1);
10550 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10551 // NOTE: Mask is dropped.
10552 SDValue VL = Op.getOperand(4);
10554 MVT ContainerVT = VT;
10555 if (VT.isFixedLengthVector()) {
10556 ContainerVT = getContainerForFixedLengthVector(VT);
10557 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10558 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10561 SDValue Result;
10562 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10564 switch (Condition) {
10565 default:
10566 break;
10567 // X != Y --> (X^Y)
10568 case ISD::SETNE:
10569 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10570 break;
10571 // X == Y --> ~(X^Y)
10572 case ISD::SETEQ: {
10573 SDValue Temp =
10574 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10575 Result =
10576 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
10577 break;
10579 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
10580 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
10581 case ISD::SETGT:
10582 case ISD::SETULT: {
10583 SDValue Temp =
10584 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10585 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
10586 break;
10588 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
10589 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
10590 case ISD::SETLT:
10591 case ISD::SETUGT: {
10592 SDValue Temp =
10593 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10594 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
10595 break;
10597 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
10598 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
10599 case ISD::SETGE:
10600 case ISD::SETULE: {
10601 SDValue Temp =
10602 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10603 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
10604 break;
10606 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
10607 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
10608 case ISD::SETLE:
10609 case ISD::SETUGE: {
10610 SDValue Temp =
10611 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10612 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
10613 break;
10617 if (!VT.isFixedLengthVector())
10618 return Result;
10619 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10622 // Lower Floating-Point/Integer Type-Convert VP SDNodes
10623 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
10624 SelectionDAG &DAG) const {
10625 SDLoc DL(Op);
10627 SDValue Src = Op.getOperand(0);
10628 SDValue Mask = Op.getOperand(1);
10629 SDValue VL = Op.getOperand(2);
10630 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10632 MVT DstVT = Op.getSimpleValueType();
10633 MVT SrcVT = Src.getSimpleValueType();
10634 if (DstVT.isFixedLengthVector()) {
10635 DstVT = getContainerForFixedLengthVector(DstVT);
10636 SrcVT = getContainerForFixedLengthVector(SrcVT);
10637 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10638 MVT MaskVT = getMaskTypeFor(DstVT);
10639 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10642 unsigned DstEltSize = DstVT.getScalarSizeInBits();
10643 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
10645 SDValue Result;
10646 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
10647 if (SrcVT.isInteger()) {
10648 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
10650 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
10651 ? RISCVISD::VSEXT_VL
10652 : RISCVISD::VZEXT_VL;
10654 // Do we need to do any pre-widening before converting?
10655 if (SrcEltSize == 1) {
10656 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
10657 MVT XLenVT = Subtarget.getXLenVT();
10658 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10659 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
10660 DAG.getUNDEF(IntVT), Zero, VL);
10661 SDValue One = DAG.getConstant(
10662 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
10663 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
10664 DAG.getUNDEF(IntVT), One, VL);
10665 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
10666 ZeroSplat, DAG.getUNDEF(IntVT), VL);
10667 } else if (DstEltSize > (2 * SrcEltSize)) {
10668 // Widen before converting.
10669 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
10670 DstVT.getVectorElementCount());
10671 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
10674 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
10675 } else {
10676 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
10677 "Wrong input/output vector types");
10679 // Convert f16 to f32 then convert f32 to i64.
10680 if (DstEltSize > (2 * SrcEltSize)) {
10681 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
10682 MVT InterimFVT =
10683 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
10684 Src =
10685 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
10688 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
10690 } else { // Narrowing + Conversion
10691 if (SrcVT.isInteger()) {
10692 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
10693 // First do a narrowing convert to an FP type half the size, then round
10694 // the FP type to a small FP type if needed.
10696 MVT InterimFVT = DstVT;
10697 if (SrcEltSize > (2 * DstEltSize)) {
10698 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
10699 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
10700 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
10703 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
10705 if (InterimFVT != DstVT) {
10706 Src = Result;
10707 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
10709 } else {
10710 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
10711 "Wrong input/output vector types");
10712 // First do a narrowing conversion to an integer half the size, then
10713 // truncate if needed.
10715 if (DstEltSize == 1) {
10716 // First convert to the same size integer, then convert to mask using
10717 // setcc.
10718 assert(SrcEltSize >= 16 && "Unexpected FP type!");
10719 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
10720 DstVT.getVectorElementCount());
10721 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
10723 // Compare the integer result to 0. The integer should be 0 or 1/-1,
10724 // otherwise the conversion was undefined.
10725 MVT XLenVT = Subtarget.getXLenVT();
10726 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
10727 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
10728 DAG.getUNDEF(InterimIVT), SplatZero, VL);
10729 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
10730 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
10731 DAG.getUNDEF(DstVT), Mask, VL});
10732 } else {
10733 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
10734 DstVT.getVectorElementCount());
10736 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
10738 while (InterimIVT != DstVT) {
10739 SrcEltSize /= 2;
10740 Src = Result;
10741 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
10742 DstVT.getVectorElementCount());
10743 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
10744 Src, Mask, VL);
10750 MVT VT = Op.getSimpleValueType();
10751 if (!VT.isFixedLengthVector())
10752 return Result;
10753 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10756 SDValue
10757 RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
10758 SelectionDAG &DAG) const {
10759 SDLoc DL(Op);
10761 SDValue Op1 = Op.getOperand(0);
10762 SDValue Op2 = Op.getOperand(1);
10763 SDValue Offset = Op.getOperand(2);
10764 SDValue Mask = Op.getOperand(3);
10765 SDValue EVL1 = Op.getOperand(4);
10766 SDValue EVL2 = Op.getOperand(5);
10768 const MVT XLenVT = Subtarget.getXLenVT();
10769 MVT VT = Op.getSimpleValueType();
10770 MVT ContainerVT = VT;
10771 if (VT.isFixedLengthVector()) {
10772 ContainerVT = getContainerForFixedLengthVector(VT);
10773 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10774 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10775 MVT MaskVT = getMaskTypeFor(ContainerVT);
10776 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10779 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
10780 if (IsMaskVector) {
10781 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
10783 // Expand input operands
10784 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10785 DAG.getUNDEF(ContainerVT),
10786 DAG.getConstant(1, DL, XLenVT), EVL1);
10787 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10788 DAG.getUNDEF(ContainerVT),
10789 DAG.getConstant(0, DL, XLenVT), EVL1);
10790 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
10791 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
10793 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10794 DAG.getUNDEF(ContainerVT),
10795 DAG.getConstant(1, DL, XLenVT), EVL2);
10796 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10797 DAG.getUNDEF(ContainerVT),
10798 DAG.getConstant(0, DL, XLenVT), EVL2);
10799 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
10800 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
10803 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
10804 SDValue DownOffset, UpOffset;
10805 if (ImmValue >= 0) {
10806 // The operand is a TargetConstant, we need to rebuild it as a regular
10807 // constant.
10808 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10809 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
10810 } else {
10811 // The operand is a TargetConstant, we need to rebuild it as a regular
10812 // constant rather than negating the original operand.
10813 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10814 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
10817 SDValue SlideDown =
10818 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10819 Op1, DownOffset, Mask, UpOffset);
10820 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
10821 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
10823 if (IsMaskVector) {
10824 // Truncate Result back to a mask vector (Result has same EVL as Op2)
10825 Result = DAG.getNode(
10826 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
10827 {Result, DAG.getConstant(0, DL, ContainerVT),
10828 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
10829 Mask, EVL2});
10832 if (!VT.isFixedLengthVector())
10833 return Result;
10834 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10837 SDValue
10838 RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
10839 SelectionDAG &DAG) const {
10840 SDLoc DL(Op);
10841 MVT VT = Op.getSimpleValueType();
10842 MVT XLenVT = Subtarget.getXLenVT();
10844 SDValue Op1 = Op.getOperand(0);
10845 SDValue Mask = Op.getOperand(1);
10846 SDValue EVL = Op.getOperand(2);
10848 MVT ContainerVT = VT;
10849 if (VT.isFixedLengthVector()) {
10850 ContainerVT = getContainerForFixedLengthVector(VT);
10851 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10852 MVT MaskVT = getMaskTypeFor(ContainerVT);
10853 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10856 MVT GatherVT = ContainerVT;
10857 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
10858 // Check if we are working with mask vectors
10859 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
10860 if (IsMaskVector) {
10861 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
10863 // Expand input operand
10864 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
10865 DAG.getUNDEF(IndicesVT),
10866 DAG.getConstant(1, DL, XLenVT), EVL);
10867 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
10868 DAG.getUNDEF(IndicesVT),
10869 DAG.getConstant(0, DL, XLenVT), EVL);
10870 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
10871 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
10874 unsigned EltSize = GatherVT.getScalarSizeInBits();
10875 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
10876 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10877 unsigned MaxVLMAX =
10878 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10880 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10881 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
10882 // to use vrgatherei16.vv.
10883 // TODO: It's also possible to use vrgatherei16.vv for other types to
10884 // decrease register width for the index calculation.
10885 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10886 if (MaxVLMAX > 256 && EltSize == 8) {
10887 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
10888 // Split the vector in half and reverse each half using a full register
10889 // reverse.
10890 // Swap the halves and concatenate them.
10891 // Slide the concatenated result by (VLMax - VL).
10892 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10893 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
10894 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
10896 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10897 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10899 // Reassemble the low and high pieces reversed.
10900 // NOTE: this Result is unmasked (because we do not need masks for
10901 // shuffles). If in the future this has to change, we can use a SELECT_VL
10902 // between Result and UNDEF using the mask originally passed to VP_REVERSE
10903 SDValue Result =
10904 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
10906 // Slide off any elements from past EVL that were reversed into the low
10907 // elements.
10908 unsigned MinElts = GatherVT.getVectorMinNumElements();
10909 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
10910 DAG.getConstant(MinElts, DL, XLenVT));
10911 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
10913 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
10914 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
10916 if (IsMaskVector) {
10917 // Truncate Result back to a mask vector
10918 Result =
10919 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
10920 {Result, DAG.getConstant(0, DL, GatherVT),
10921 DAG.getCondCode(ISD::SETNE),
10922 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
10925 if (!VT.isFixedLengthVector())
10926 return Result;
10927 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10930 // Just promote the int type to i16 which will double the LMUL.
10931 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
10932 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10935 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
10936 SDValue VecLen =
10937 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
10938 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
10939 DAG.getUNDEF(IndicesVT), VecLen, EVL);
10940 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
10941 DAG.getUNDEF(IndicesVT), Mask, EVL);
10942 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
10943 DAG.getUNDEF(GatherVT), Mask, EVL);
10945 if (IsMaskVector) {
10946 // Truncate Result back to a mask vector
10947 Result = DAG.getNode(
10948 RISCVISD::SETCC_VL, DL, ContainerVT,
10949 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
10950 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
10953 if (!VT.isFixedLengthVector())
10954 return Result;
10955 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10958 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
10959 SelectionDAG &DAG) const {
10960 MVT VT = Op.getSimpleValueType();
10961 if (VT.getVectorElementType() != MVT::i1)
10962 return lowerVPOp(Op, DAG);
10964 // It is safe to drop mask parameter as masked-off elements are undef.
10965 SDValue Op1 = Op->getOperand(0);
10966 SDValue Op2 = Op->getOperand(1);
10967 SDValue VL = Op->getOperand(3);
10969 MVT ContainerVT = VT;
10970 const bool IsFixed = VT.isFixedLengthVector();
10971 if (IsFixed) {
10972 ContainerVT = getContainerForFixedLengthVector(VT);
10973 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10974 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10977 SDLoc DL(Op);
10978 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
10979 if (!IsFixed)
10980 return Val;
10981 return convertFromScalableVector(VT, Val, DAG, Subtarget);
10984 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
10985 SelectionDAG &DAG) const {
10986 SDLoc DL(Op);
10987 MVT XLenVT = Subtarget.getXLenVT();
10988 MVT VT = Op.getSimpleValueType();
10989 MVT ContainerVT = VT;
10990 if (VT.isFixedLengthVector())
10991 ContainerVT = getContainerForFixedLengthVector(VT);
10993 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10995 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
10996 // Check if the mask is known to be all ones
10997 SDValue Mask = VPNode->getMask();
10998 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11000 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11001 : Intrinsic::riscv_vlse_mask,
11002 DL, XLenVT);
11003 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11004 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11005 VPNode->getStride()};
11006 if (!IsUnmasked) {
11007 if (VT.isFixedLengthVector()) {
11008 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11009 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11011 Ops.push_back(Mask);
11013 Ops.push_back(VPNode->getVectorLength());
11014 if (!IsUnmasked) {
11015 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11016 Ops.push_back(Policy);
11019 SDValue Result =
11020 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
11021 VPNode->getMemoryVT(), VPNode->getMemOperand());
11022 SDValue Chain = Result.getValue(1);
11024 if (VT.isFixedLengthVector())
11025 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11027 return DAG.getMergeValues({Result, Chain}, DL);
11030 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11031 SelectionDAG &DAG) const {
11032 SDLoc DL(Op);
11033 MVT XLenVT = Subtarget.getXLenVT();
11035 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11036 SDValue StoreVal = VPNode->getValue();
11037 MVT VT = StoreVal.getSimpleValueType();
11038 MVT ContainerVT = VT;
11039 if (VT.isFixedLengthVector()) {
11040 ContainerVT = getContainerForFixedLengthVector(VT);
11041 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11044 // Check if the mask is known to be all ones
11045 SDValue Mask = VPNode->getMask();
11046 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11048 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11049 : Intrinsic::riscv_vsse_mask,
11050 DL, XLenVT);
11051 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11052 VPNode->getBasePtr(), VPNode->getStride()};
11053 if (!IsUnmasked) {
11054 if (VT.isFixedLengthVector()) {
11055 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11056 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11058 Ops.push_back(Mask);
11060 Ops.push_back(VPNode->getVectorLength());
11062 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11063 Ops, VPNode->getMemoryVT(),
11064 VPNode->getMemOperand());
11067 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11068 // matched to a RVV indexed load. The RVV indexed load instructions only
11069 // support the "unsigned unscaled" addressing mode; indices are implicitly
11070 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
11071 // signed or scaled indexing is extended to the XLEN value type and scaled
11072 // accordingly.
11073 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11074 SelectionDAG &DAG) const {
11075 SDLoc DL(Op);
11076 MVT VT = Op.getSimpleValueType();
11078 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11079 EVT MemVT = MemSD->getMemoryVT();
11080 MachineMemOperand *MMO = MemSD->getMemOperand();
11081 SDValue Chain = MemSD->getChain();
11082 SDValue BasePtr = MemSD->getBasePtr();
11084 ISD::LoadExtType LoadExtType;
11085 SDValue Index, Mask, PassThru, VL;
11087 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11088 Index = VPGN->getIndex();
11089 Mask = VPGN->getMask();
11090 PassThru = DAG.getUNDEF(VT);
11091 VL = VPGN->getVectorLength();
11092 // VP doesn't support extending loads.
11093 LoadExtType = ISD::NON_EXTLOAD;
11094 } else {
11095 // Else it must be a MGATHER.
11096 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11097 Index = MGN->getIndex();
11098 Mask = MGN->getMask();
11099 PassThru = MGN->getPassThru();
11100 LoadExtType = MGN->getExtensionType();
11103 MVT IndexVT = Index.getSimpleValueType();
11104 MVT XLenVT = Subtarget.getXLenVT();
11106 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
11107 "Unexpected VTs!");
11108 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11109 // Targets have to explicitly opt-in for extending vector loads.
11110 assert(LoadExtType == ISD::NON_EXTLOAD &&
11111 "Unexpected extending MGATHER/VP_GATHER");
11112 (void)LoadExtType;
11114 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11115 // the selection of the masked intrinsics doesn't do this for us.
11116 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11118 MVT ContainerVT = VT;
11119 if (VT.isFixedLengthVector()) {
11120 ContainerVT = getContainerForFixedLengthVector(VT);
11121 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11122 ContainerVT.getVectorElementCount());
11124 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11126 if (!IsUnmasked) {
11127 MVT MaskVT = getMaskTypeFor(ContainerVT);
11128 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11129 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11133 if (!VL)
11134 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11136 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11137 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11138 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11141 unsigned IntID =
11142 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11143 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11144 if (IsUnmasked)
11145 Ops.push_back(DAG.getUNDEF(ContainerVT));
11146 else
11147 Ops.push_back(PassThru);
11148 Ops.push_back(BasePtr);
11149 Ops.push_back(Index);
11150 if (!IsUnmasked)
11151 Ops.push_back(Mask);
11152 Ops.push_back(VL);
11153 if (!IsUnmasked)
11154 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11156 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11157 SDValue Result =
11158 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11159 Chain = Result.getValue(1);
11161 if (VT.isFixedLengthVector())
11162 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11164 return DAG.getMergeValues({Result, Chain}, DL);
11167 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11168 // matched to a RVV indexed store. The RVV indexed store instructions only
11169 // support the "unsigned unscaled" addressing mode; indices are implicitly
11170 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
11171 // signed or scaled indexing is extended to the XLEN value type and scaled
11172 // accordingly.
11173 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11174 SelectionDAG &DAG) const {
11175 SDLoc DL(Op);
11176 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11177 EVT MemVT = MemSD->getMemoryVT();
11178 MachineMemOperand *MMO = MemSD->getMemOperand();
11179 SDValue Chain = MemSD->getChain();
11180 SDValue BasePtr = MemSD->getBasePtr();
11182 bool IsTruncatingStore = false;
11183 SDValue Index, Mask, Val, VL;
11185 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11186 Index = VPSN->getIndex();
11187 Mask = VPSN->getMask();
11188 Val = VPSN->getValue();
11189 VL = VPSN->getVectorLength();
11190 // VP doesn't support truncating stores.
11191 IsTruncatingStore = false;
11192 } else {
11193 // Else it must be a MSCATTER.
11194 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11195 Index = MSN->getIndex();
11196 Mask = MSN->getMask();
11197 Val = MSN->getValue();
11198 IsTruncatingStore = MSN->isTruncatingStore();
11201 MVT VT = Val.getSimpleValueType();
11202 MVT IndexVT = Index.getSimpleValueType();
11203 MVT XLenVT = Subtarget.getXLenVT();
11205 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
11206 "Unexpected VTs!");
11207 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11208 // Targets have to explicitly opt-in for extending vector loads and
11209 // truncating vector stores.
11210 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11211 (void)IsTruncatingStore;
11213 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11214 // the selection of the masked intrinsics doesn't do this for us.
11215 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11217 MVT ContainerVT = VT;
11218 if (VT.isFixedLengthVector()) {
11219 ContainerVT = getContainerForFixedLengthVector(VT);
11220 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11221 ContainerVT.getVectorElementCount());
11223 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11224 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11226 if (!IsUnmasked) {
11227 MVT MaskVT = getMaskTypeFor(ContainerVT);
11228 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11232 if (!VL)
11233 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11235 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11236 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11237 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11240 unsigned IntID =
11241 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11242 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11243 Ops.push_back(Val);
11244 Ops.push_back(BasePtr);
11245 Ops.push_back(Index);
11246 if (!IsUnmasked)
11247 Ops.push_back(Mask);
11248 Ops.push_back(VL);
11250 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
11251 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11254 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11255 SelectionDAG &DAG) const {
11256 const MVT XLenVT = Subtarget.getXLenVT();
11257 SDLoc DL(Op);
11258 SDValue Chain = Op->getOperand(0);
11259 SDValue SysRegNo = DAG.getTargetConstant(
11260 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11261 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11262 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11264 // Encoding used for rounding mode in RISC-V differs from that used in
11265 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11266 // table, which consists of a sequence of 4-bit fields, each representing
11267 // corresponding FLT_ROUNDS mode.
11268 static const int Table =
11269 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
11270 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
11271 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
11272 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
11273 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
11275 SDValue Shift =
11276 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11277 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11278 DAG.getConstant(Table, DL, XLenVT), Shift);
11279 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11280 DAG.getConstant(7, DL, XLenVT));
11282 return DAG.getMergeValues({Masked, Chain}, DL);
11285 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11286 SelectionDAG &DAG) const {
11287 const MVT XLenVT = Subtarget.getXLenVT();
11288 SDLoc DL(Op);
11289 SDValue Chain = Op->getOperand(0);
11290 SDValue RMValue = Op->getOperand(1);
11291 SDValue SysRegNo = DAG.getTargetConstant(
11292 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11294 // Encoding used for rounding mode in RISC-V differs from that used in
11295 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11296 // a table, which consists of a sequence of 4-bit fields, each representing
11297 // corresponding RISC-V mode.
11298 static const unsigned Table =
11299 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
11300 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
11301 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
11302 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
11303 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
11305 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11307 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11308 DAG.getConstant(2, DL, XLenVT));
11309 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11310 DAG.getConstant(Table, DL, XLenVT), Shift);
11311 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11312 DAG.getConstant(0x7, DL, XLenVT));
11313 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11314 RMValue);
11317 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11318 SelectionDAG &DAG) const {
11319 MachineFunction &MF = DAG.getMachineFunction();
11321 bool isRISCV64 = Subtarget.is64Bit();
11322 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11324 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11325 return DAG.getFrameIndex(FI, PtrVT);
11328 // Returns the opcode of the target-specific SDNode that implements the 32-bit
11329 // form of the given Opcode.
11330 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11331 switch (Opcode) {
11332 default:
11333 llvm_unreachable("Unexpected opcode");
11334 case ISD::SHL:
11335 return RISCVISD::SLLW;
11336 case ISD::SRA:
11337 return RISCVISD::SRAW;
11338 case ISD::SRL:
11339 return RISCVISD::SRLW;
11340 case ISD::SDIV:
11341 return RISCVISD::DIVW;
11342 case ISD::UDIV:
11343 return RISCVISD::DIVUW;
11344 case ISD::UREM:
11345 return RISCVISD::REMUW;
11346 case ISD::ROTL:
11347 return RISCVISD::ROLW;
11348 case ISD::ROTR:
11349 return RISCVISD::RORW;
11353 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11354 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11355 // otherwise be promoted to i64, making it difficult to select the
11356 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11357 // type i8/i16/i32 is lost.
11358 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
11359 unsigned ExtOpc = ISD::ANY_EXTEND) {
11360 SDLoc DL(N);
11361 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11362 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11363 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11364 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11365 // ReplaceNodeResults requires we maintain the same type for the return value.
11366 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11369 // Converts the given 32-bit operation to a i64 operation with signed extension
11370 // semantic to reduce the signed extension instructions.
11371 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
11372 SDLoc DL(N);
11373 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11374 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11375 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11376 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11377 DAG.getValueType(MVT::i32));
11378 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11381 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
11382 SmallVectorImpl<SDValue> &Results,
11383 SelectionDAG &DAG) const {
11384 SDLoc DL(N);
11385 switch (N->getOpcode()) {
11386 default:
11387 llvm_unreachable("Don't know how to custom type legalize this operation!");
11388 case ISD::STRICT_FP_TO_SINT:
11389 case ISD::STRICT_FP_TO_UINT:
11390 case ISD::FP_TO_SINT:
11391 case ISD::FP_TO_UINT: {
11392 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11393 "Unexpected custom legalisation");
11394 bool IsStrict = N->isStrictFPOpcode();
11395 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11396 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11397 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11398 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11399 TargetLowering::TypeSoftenFloat) {
11400 if (!isTypeLegal(Op0.getValueType()))
11401 return;
11402 if (IsStrict) {
11403 SDValue Chain = N->getOperand(0);
11404 // In absense of Zfh, promote f16 to f32, then convert.
11405 if (Op0.getValueType() == MVT::f16 &&
11406 !Subtarget.hasStdExtZfhOrZhinx()) {
11407 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11408 {Chain, Op0});
11409 Chain = Op0.getValue(1);
11411 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11412 : RISCVISD::STRICT_FCVT_WU_RV64;
11413 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11414 SDValue Res = DAG.getNode(
11415 Opc, DL, VTs, Chain, Op0,
11416 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11417 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11418 Results.push_back(Res.getValue(1));
11419 return;
11421 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11422 // convert.
11423 if ((Op0.getValueType() == MVT::f16 &&
11424 !Subtarget.hasStdExtZfhOrZhinx()) ||
11425 Op0.getValueType() == MVT::bf16)
11426 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11428 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11429 SDValue Res =
11430 DAG.getNode(Opc, DL, MVT::i64, Op0,
11431 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11432 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11433 return;
11435 // If the FP type needs to be softened, emit a library call using the 'si'
11436 // version. If we left it to default legalization we'd end up with 'di'. If
11437 // the FP type doesn't need to be softened just let generic type
11438 // legalization promote the result type.
11439 RTLIB::Libcall LC;
11440 if (IsSigned)
11441 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11442 else
11443 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11444 MakeLibCallOptions CallOptions;
11445 EVT OpVT = Op0.getValueType();
11446 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11447 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11448 SDValue Result;
11449 std::tie(Result, Chain) =
11450 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11451 Results.push_back(Result);
11452 if (IsStrict)
11453 Results.push_back(Chain);
11454 break;
11456 case ISD::LROUND: {
11457 SDValue Op0 = N->getOperand(0);
11458 EVT Op0VT = Op0.getValueType();
11459 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11460 TargetLowering::TypeSoftenFloat) {
11461 if (!isTypeLegal(Op0VT))
11462 return;
11464 // In absense of Zfh, promote f16 to f32, then convert.
11465 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11466 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11468 SDValue Res =
11469 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11470 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11472 return;
11474 // If the FP type needs to be softened, emit a library call to lround. We'll
11475 // need to truncate the result. We assume any value that doesn't fit in i32
11476 // is allowed to return an unspecified value.
11477 RTLIB::Libcall LC =
11478 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11479 MakeLibCallOptions CallOptions;
11480 EVT OpVT = Op0.getValueType();
11481 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11482 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11483 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11484 Results.push_back(Result);
11485 break;
11487 case ISD::READCYCLECOUNTER: {
11488 assert(!Subtarget.is64Bit() &&
11489 "READCYCLECOUNTER only has custom type legalization on riscv32");
11491 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11492 SDValue RCW =
11493 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
11495 Results.push_back(
11496 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11497 Results.push_back(RCW.getValue(2));
11498 break;
11500 case ISD::LOAD: {
11501 if (!ISD::isNON_EXTLoad(N))
11502 return;
11504 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11505 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11506 LoadSDNode *Ld = cast<LoadSDNode>(N);
11508 SDLoc dl(N);
11509 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11510 Ld->getBasePtr(), Ld->getMemoryVT(),
11511 Ld->getMemOperand());
11512 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11513 Results.push_back(Res.getValue(1));
11514 return;
11516 case ISD::MUL: {
11517 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
11518 unsigned XLen = Subtarget.getXLen();
11519 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11520 if (Size > XLen) {
11521 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11522 SDValue LHS = N->getOperand(0);
11523 SDValue RHS = N->getOperand(1);
11524 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
11526 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
11527 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
11528 // We need exactly one side to be unsigned.
11529 if (LHSIsU == RHSIsU)
11530 return;
11532 auto MakeMULPair = [&](SDValue S, SDValue U) {
11533 MVT XLenVT = Subtarget.getXLenVT();
11534 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
11535 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
11536 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
11537 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
11538 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
11541 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
11542 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
11544 // The other operand should be signed, but still prefer MULH when
11545 // possible.
11546 if (RHSIsU && LHSIsS && !RHSIsS)
11547 Results.push_back(MakeMULPair(LHS, RHS));
11548 else if (LHSIsU && RHSIsS && !LHSIsS)
11549 Results.push_back(MakeMULPair(RHS, LHS));
11551 return;
11553 [[fallthrough]];
11555 case ISD::ADD:
11556 case ISD::SUB:
11557 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11558 "Unexpected custom legalisation");
11559 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
11560 break;
11561 case ISD::SHL:
11562 case ISD::SRA:
11563 case ISD::SRL:
11564 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11565 "Unexpected custom legalisation");
11566 if (N->getOperand(1).getOpcode() != ISD::Constant) {
11567 // If we can use a BSET instruction, allow default promotion to apply.
11568 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
11569 isOneConstant(N->getOperand(0)))
11570 break;
11571 Results.push_back(customLegalizeToWOp(N, DAG));
11572 break;
11575 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
11576 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
11577 // shift amount.
11578 if (N->getOpcode() == ISD::SHL) {
11579 SDLoc DL(N);
11580 SDValue NewOp0 =
11581 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11582 SDValue NewOp1 =
11583 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
11584 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
11585 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11586 DAG.getValueType(MVT::i32));
11587 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
11590 break;
11591 case ISD::ROTL:
11592 case ISD::ROTR:
11593 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11594 "Unexpected custom legalisation");
11595 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
11596 Subtarget.hasVendorXTHeadBb()) &&
11597 "Unexpected custom legalization");
11598 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
11599 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
11600 return;
11601 Results.push_back(customLegalizeToWOp(N, DAG));
11602 break;
11603 case ISD::CTTZ:
11604 case ISD::CTTZ_ZERO_UNDEF:
11605 case ISD::CTLZ:
11606 case ISD::CTLZ_ZERO_UNDEF: {
11607 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11608 "Unexpected custom legalisation");
11610 SDValue NewOp0 =
11611 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11612 bool IsCTZ =
11613 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
11614 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
11615 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
11616 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11617 return;
11619 case ISD::SDIV:
11620 case ISD::UDIV:
11621 case ISD::UREM: {
11622 MVT VT = N->getSimpleValueType(0);
11623 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
11624 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
11625 "Unexpected custom legalisation");
11626 // Don't promote division/remainder by constant since we should expand those
11627 // to multiply by magic constant.
11628 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
11629 if (N->getOperand(1).getOpcode() == ISD::Constant &&
11630 !isIntDivCheap(N->getValueType(0), Attr))
11631 return;
11633 // If the input is i32, use ANY_EXTEND since the W instructions don't read
11634 // the upper 32 bits. For other types we need to sign or zero extend
11635 // based on the opcode.
11636 unsigned ExtOpc = ISD::ANY_EXTEND;
11637 if (VT != MVT::i32)
11638 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
11639 : ISD::ZERO_EXTEND;
11641 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
11642 break;
11644 case ISD::SADDO: {
11645 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11646 "Unexpected custom legalisation");
11648 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
11649 // use the default legalization.
11650 if (!isa<ConstantSDNode>(N->getOperand(1)))
11651 return;
11653 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11654 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
11655 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
11656 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
11657 DAG.getValueType(MVT::i32));
11659 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
11661 // For an addition, the result should be less than one of the operands (LHS)
11662 // if and only if the other operand (RHS) is negative, otherwise there will
11663 // be overflow.
11664 // For a subtraction, the result should be less than one of the operands
11665 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11666 // otherwise there will be overflow.
11667 EVT OType = N->getValueType(1);
11668 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
11669 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
11671 SDValue Overflow =
11672 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
11673 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11674 Results.push_back(Overflow);
11675 return;
11677 case ISD::UADDO:
11678 case ISD::USUBO: {
11679 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11680 "Unexpected custom legalisation");
11681 bool IsAdd = N->getOpcode() == ISD::UADDO;
11682 // Create an ADDW or SUBW.
11683 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11684 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11685 SDValue Res =
11686 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
11687 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
11688 DAG.getValueType(MVT::i32));
11690 SDValue Overflow;
11691 if (IsAdd && isOneConstant(RHS)) {
11692 // Special case uaddo X, 1 overflowed if the addition result is 0.
11693 // The general case (X + C) < C is not necessarily beneficial. Although we
11694 // reduce the live range of X, we may introduce the materialization of
11695 // constant C, especially when the setcc result is used by branch. We have
11696 // no compare with constant and branch instructions.
11697 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
11698 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
11699 } else if (IsAdd && isAllOnesConstant(RHS)) {
11700 // Special case uaddo X, -1 overflowed if X != 0.
11701 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
11702 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
11703 } else {
11704 // Sign extend the LHS and perform an unsigned compare with the ADDW
11705 // result. Since the inputs are sign extended from i32, this is equivalent
11706 // to comparing the lower 32 bits.
11707 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11708 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
11709 IsAdd ? ISD::SETULT : ISD::SETUGT);
11712 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11713 Results.push_back(Overflow);
11714 return;
11716 case ISD::UADDSAT:
11717 case ISD::USUBSAT: {
11718 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11719 "Unexpected custom legalisation");
11720 if (Subtarget.hasStdExtZbb()) {
11721 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
11722 // sign extend allows overflow of the lower 32 bits to be detected on
11723 // the promoted size.
11724 SDValue LHS =
11725 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11726 SDValue RHS =
11727 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
11728 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
11729 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11730 return;
11733 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
11734 // promotion for UADDO/USUBO.
11735 Results.push_back(expandAddSubSat(N, DAG));
11736 return;
11738 case ISD::ABS: {
11739 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11740 "Unexpected custom legalisation");
11742 if (Subtarget.hasStdExtZbb()) {
11743 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
11744 // This allows us to remember that the result is sign extended. Expanding
11745 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
11746 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
11747 N->getOperand(0));
11748 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
11749 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
11750 return;
11753 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
11754 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11756 // Freeze the source so we can increase it's use count.
11757 Src = DAG.getFreeze(Src);
11759 // Copy sign bit to all bits using the sraiw pattern.
11760 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
11761 DAG.getValueType(MVT::i32));
11762 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
11763 DAG.getConstant(31, DL, MVT::i64));
11765 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
11766 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
11768 // NOTE: The result is only required to be anyextended, but sext is
11769 // consistent with type legalization of sub.
11770 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
11771 DAG.getValueType(MVT::i32));
11772 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
11773 return;
11775 case ISD::BITCAST: {
11776 EVT VT = N->getValueType(0);
11777 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
11778 SDValue Op0 = N->getOperand(0);
11779 EVT Op0VT = Op0.getValueType();
11780 MVT XLenVT = Subtarget.getXLenVT();
11781 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
11782 Subtarget.hasStdExtZfhminOrZhinxmin()) {
11783 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
11784 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
11785 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
11786 Subtarget.hasStdExtZfbfmin()) {
11787 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
11788 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
11789 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
11790 Subtarget.hasStdExtFOrZfinx()) {
11791 SDValue FPConv =
11792 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
11793 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
11794 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 &&
11795 Subtarget.hasStdExtZfa()) {
11796 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
11797 DAG.getVTList(MVT::i32, MVT::i32), Op0);
11798 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
11799 NewReg.getValue(0), NewReg.getValue(1));
11800 Results.push_back(RetReg);
11801 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
11802 isTypeLegal(Op0VT)) {
11803 // Custom-legalize bitcasts from fixed-length vector types to illegal
11804 // scalar types in order to improve codegen. Bitcast the vector to a
11805 // one-element vector type whose element type is the same as the result
11806 // type, and extract the first element.
11807 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
11808 if (isTypeLegal(BVT)) {
11809 SDValue BVec = DAG.getBitcast(BVT, Op0);
11810 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
11811 DAG.getConstant(0, DL, XLenVT)));
11814 break;
11816 case RISCVISD::BREV8: {
11817 MVT VT = N->getSimpleValueType(0);
11818 MVT XLenVT = Subtarget.getXLenVT();
11819 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
11820 "Unexpected custom legalisation");
11821 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
11822 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
11823 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
11824 // ReplaceNodeResults requires we maintain the same type for the return
11825 // value.
11826 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
11827 break;
11829 case ISD::EXTRACT_VECTOR_ELT: {
11830 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
11831 // type is illegal (currently only vXi64 RV32).
11832 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
11833 // transferred to the destination register. We issue two of these from the
11834 // upper- and lower- halves of the SEW-bit vector element, slid down to the
11835 // first element.
11836 SDValue Vec = N->getOperand(0);
11837 SDValue Idx = N->getOperand(1);
11839 // The vector type hasn't been legalized yet so we can't issue target
11840 // specific nodes if it needs legalization.
11841 // FIXME: We would manually legalize if it's important.
11842 if (!isTypeLegal(Vec.getValueType()))
11843 return;
11845 MVT VecVT = Vec.getSimpleValueType();
11847 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
11848 VecVT.getVectorElementType() == MVT::i64 &&
11849 "Unexpected EXTRACT_VECTOR_ELT legalization");
11851 // If this is a fixed vector, we need to convert it to a scalable vector.
11852 MVT ContainerVT = VecVT;
11853 if (VecVT.isFixedLengthVector()) {
11854 ContainerVT = getContainerForFixedLengthVector(VecVT);
11855 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11858 MVT XLenVT = Subtarget.getXLenVT();
11860 // Use a VL of 1 to avoid processing more elements than we need.
11861 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
11863 // Unless the index is known to be 0, we must slide the vector down to get
11864 // the desired element into index 0.
11865 if (!isNullConstant(Idx)) {
11866 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11867 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
11870 // Extract the lower XLEN bits of the correct vector element.
11871 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
11873 // To extract the upper XLEN bits of the vector element, shift the first
11874 // element right by 32 bits and re-extract the lower XLEN bits.
11875 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11876 DAG.getUNDEF(ContainerVT),
11877 DAG.getConstant(32, DL, XLenVT), VL);
11878 SDValue LShr32 =
11879 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
11880 DAG.getUNDEF(ContainerVT), Mask, VL);
11882 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
11884 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
11885 break;
11887 case ISD::INTRINSIC_WO_CHAIN: {
11888 unsigned IntNo = N->getConstantOperandVal(0);
11889 switch (IntNo) {
11890 default:
11891 llvm_unreachable(
11892 "Don't know how to custom type legalize this intrinsic!");
11893 case Intrinsic::experimental_get_vector_length: {
11894 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
11895 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11896 return;
11898 case Intrinsic::riscv_orc_b:
11899 case Intrinsic::riscv_brev8:
11900 case Intrinsic::riscv_sha256sig0:
11901 case Intrinsic::riscv_sha256sig1:
11902 case Intrinsic::riscv_sha256sum0:
11903 case Intrinsic::riscv_sha256sum1:
11904 case Intrinsic::riscv_sm3p0:
11905 case Intrinsic::riscv_sm3p1: {
11906 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11907 return;
11908 unsigned Opc;
11909 switch (IntNo) {
11910 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
11911 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
11912 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
11913 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
11914 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
11915 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
11916 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
11917 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
11920 SDValue NewOp =
11921 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11922 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
11923 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11924 return;
11926 case Intrinsic::riscv_sm4ks:
11927 case Intrinsic::riscv_sm4ed: {
11928 unsigned Opc =
11929 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
11930 SDValue NewOp0 =
11931 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11932 SDValue NewOp1 =
11933 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11934 SDValue Res =
11935 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
11936 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11937 return;
11939 case Intrinsic::riscv_clmul: {
11940 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11941 return;
11943 SDValue NewOp0 =
11944 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11945 SDValue NewOp1 =
11946 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11947 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
11948 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11949 return;
11951 case Intrinsic::riscv_clmulh:
11952 case Intrinsic::riscv_clmulr: {
11953 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11954 return;
11956 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
11957 // to the full 128-bit clmul result of multiplying two xlen values.
11958 // Perform clmulr or clmulh on the shifted values. Finally, extract the
11959 // upper 32 bits.
11961 // The alternative is to mask the inputs to 32 bits and use clmul, but
11962 // that requires two shifts to mask each input without zext.w.
11963 // FIXME: If the inputs are known zero extended or could be freely
11964 // zero extended, the mask form would be better.
11965 SDValue NewOp0 =
11966 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11967 SDValue NewOp1 =
11968 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11969 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
11970 DAG.getConstant(32, DL, MVT::i64));
11971 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
11972 DAG.getConstant(32, DL, MVT::i64));
11973 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
11974 : RISCVISD::CLMULR;
11975 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
11976 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
11977 DAG.getConstant(32, DL, MVT::i64));
11978 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11979 return;
11981 case Intrinsic::riscv_vmv_x_s: {
11982 EVT VT = N->getValueType(0);
11983 MVT XLenVT = Subtarget.getXLenVT();
11984 if (VT.bitsLT(XLenVT)) {
11985 // Simple case just extract using vmv.x.s and truncate.
11986 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
11987 Subtarget.getXLenVT(), N->getOperand(1));
11988 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
11989 return;
11992 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
11993 "Unexpected custom legalization");
11995 // We need to do the move in two steps.
11996 SDValue Vec = N->getOperand(1);
11997 MVT VecVT = Vec.getSimpleValueType();
11999 // First extract the lower XLEN bits of the element.
12000 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12002 // To extract the upper XLEN bits of the vector element, shift the first
12003 // element right by 32 bits and re-extract the lower XLEN bits.
12004 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12006 SDValue ThirtyTwoV =
12007 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12008 DAG.getConstant(32, DL, XLenVT), VL);
12009 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12010 DAG.getUNDEF(VecVT), Mask, VL);
12011 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12013 Results.push_back(
12014 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12015 break;
12018 break;
12020 case ISD::VECREDUCE_ADD:
12021 case ISD::VECREDUCE_AND:
12022 case ISD::VECREDUCE_OR:
12023 case ISD::VECREDUCE_XOR:
12024 case ISD::VECREDUCE_SMAX:
12025 case ISD::VECREDUCE_UMAX:
12026 case ISD::VECREDUCE_SMIN:
12027 case ISD::VECREDUCE_UMIN:
12028 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12029 Results.push_back(V);
12030 break;
12031 case ISD::VP_REDUCE_ADD:
12032 case ISD::VP_REDUCE_AND:
12033 case ISD::VP_REDUCE_OR:
12034 case ISD::VP_REDUCE_XOR:
12035 case ISD::VP_REDUCE_SMAX:
12036 case ISD::VP_REDUCE_UMAX:
12037 case ISD::VP_REDUCE_SMIN:
12038 case ISD::VP_REDUCE_UMIN:
12039 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12040 Results.push_back(V);
12041 break;
12042 case ISD::GET_ROUNDING: {
12043 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12044 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12045 Results.push_back(Res.getValue(0));
12046 Results.push_back(Res.getValue(1));
12047 break;
12052 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12053 /// which corresponds to it.
12054 static unsigned getVecReduceOpcode(unsigned Opc) {
12055 switch (Opc) {
12056 default:
12057 llvm_unreachable("Unhandled binary to transfrom reduction");
12058 case ISD::ADD:
12059 return ISD::VECREDUCE_ADD;
12060 case ISD::UMAX:
12061 return ISD::VECREDUCE_UMAX;
12062 case ISD::SMAX:
12063 return ISD::VECREDUCE_SMAX;
12064 case ISD::UMIN:
12065 return ISD::VECREDUCE_UMIN;
12066 case ISD::SMIN:
12067 return ISD::VECREDUCE_SMIN;
12068 case ISD::AND:
12069 return ISD::VECREDUCE_AND;
12070 case ISD::OR:
12071 return ISD::VECREDUCE_OR;
12072 case ISD::XOR:
12073 return ISD::VECREDUCE_XOR;
12074 case ISD::FADD:
12075 // Note: This is the associative form of the generic reduction opcode.
12076 return ISD::VECREDUCE_FADD;
12080 /// Perform two related transforms whose purpose is to incrementally recognize
12081 /// an explode_vector followed by scalar reduction as a vector reduction node.
12082 /// This exists to recover from a deficiency in SLP which can't handle
12083 /// forests with multiple roots sharing common nodes. In some cases, one
12084 /// of the trees will be vectorized, and the other will remain (unprofitably)
12085 /// scalarized.
12086 static SDValue
12087 combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
12088 const RISCVSubtarget &Subtarget) {
12090 // This transforms need to run before all integer types have been legalized
12091 // to i64 (so that the vector element type matches the add type), and while
12092 // it's safe to introduce odd sized vector types.
12093 if (DAG.NewNodesMustHaveLegalTypes)
12094 return SDValue();
12096 // Without V, this transform isn't useful. We could form the (illegal)
12097 // operations and let them be scalarized again, but there's really no point.
12098 if (!Subtarget.hasVInstructions())
12099 return SDValue();
12101 const SDLoc DL(N);
12102 const EVT VT = N->getValueType(0);
12103 const unsigned Opc = N->getOpcode();
12105 // For FADD, we only handle the case with reassociation allowed. We
12106 // could handle strict reduction order, but at the moment, there's no
12107 // known reason to, and the complexity isn't worth it.
12108 // TODO: Handle fminnum and fmaxnum here
12109 if (!VT.isInteger() &&
12110 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12111 return SDValue();
12113 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12114 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12115 "Inconsistent mappings");
12116 SDValue LHS = N->getOperand(0);
12117 SDValue RHS = N->getOperand(1);
12119 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12120 return SDValue();
12122 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12123 std::swap(LHS, RHS);
12125 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12126 !isa<ConstantSDNode>(RHS.getOperand(1)))
12127 return SDValue();
12129 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12130 SDValue SrcVec = RHS.getOperand(0);
12131 EVT SrcVecVT = SrcVec.getValueType();
12132 assert(SrcVecVT.getVectorElementType() == VT);
12133 if (SrcVecVT.isScalableVector())
12134 return SDValue();
12136 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12137 return SDValue();
12139 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12140 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12141 // root of our reduction tree. TODO: We could extend this to any two
12142 // adjacent aligned constant indices if desired.
12143 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12144 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12145 uint64_t LHSIdx =
12146 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12147 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12148 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12149 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12150 DAG.getVectorIdxConstant(0, DL));
12151 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12155 // Match (binop (reduce (extract_subvector V, 0),
12156 // (extract_vector_elt V, sizeof(SubVec))))
12157 // into a reduction of one more element from the original vector V.
12158 if (LHS.getOpcode() != ReduceOpc)
12159 return SDValue();
12161 SDValue ReduceVec = LHS.getOperand(0);
12162 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12163 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12164 isNullConstant(ReduceVec.getOperand(1)) &&
12165 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12166 // For illegal types (e.g. 3xi32), most will be combined again into a
12167 // wider (hopefully legal) type. If this is a terminal state, we are
12168 // relying on type legalization here to produce something reasonable
12169 // and this lowering quality could probably be improved. (TODO)
12170 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12171 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12172 DAG.getVectorIdxConstant(0, DL));
12173 auto Flags = ReduceVec->getFlags();
12174 Flags.intersectWith(N->getFlags());
12175 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12178 return SDValue();
12182 // Try to fold (<bop> x, (reduction.<bop> vec, start))
12183 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
12184 const RISCVSubtarget &Subtarget) {
12185 auto BinOpToRVVReduce = [](unsigned Opc) {
12186 switch (Opc) {
12187 default:
12188 llvm_unreachable("Unhandled binary to transfrom reduction");
12189 case ISD::ADD:
12190 return RISCVISD::VECREDUCE_ADD_VL;
12191 case ISD::UMAX:
12192 return RISCVISD::VECREDUCE_UMAX_VL;
12193 case ISD::SMAX:
12194 return RISCVISD::VECREDUCE_SMAX_VL;
12195 case ISD::UMIN:
12196 return RISCVISD::VECREDUCE_UMIN_VL;
12197 case ISD::SMIN:
12198 return RISCVISD::VECREDUCE_SMIN_VL;
12199 case ISD::AND:
12200 return RISCVISD::VECREDUCE_AND_VL;
12201 case ISD::OR:
12202 return RISCVISD::VECREDUCE_OR_VL;
12203 case ISD::XOR:
12204 return RISCVISD::VECREDUCE_XOR_VL;
12205 case ISD::FADD:
12206 return RISCVISD::VECREDUCE_FADD_VL;
12207 case ISD::FMAXNUM:
12208 return RISCVISD::VECREDUCE_FMAX_VL;
12209 case ISD::FMINNUM:
12210 return RISCVISD::VECREDUCE_FMIN_VL;
12214 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12215 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12216 isNullConstant(V.getOperand(1)) &&
12217 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12220 unsigned Opc = N->getOpcode();
12221 unsigned ReduceIdx;
12222 if (IsReduction(N->getOperand(0), Opc))
12223 ReduceIdx = 0;
12224 else if (IsReduction(N->getOperand(1), Opc))
12225 ReduceIdx = 1;
12226 else
12227 return SDValue();
12229 // Skip if FADD disallows reassociation but the combiner needs.
12230 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12231 return SDValue();
12233 SDValue Extract = N->getOperand(ReduceIdx);
12234 SDValue Reduce = Extract.getOperand(0);
12235 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12236 return SDValue();
12238 SDValue ScalarV = Reduce.getOperand(2);
12239 EVT ScalarVT = ScalarV.getValueType();
12240 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12241 ScalarV.getOperand(0)->isUndef() &&
12242 isNullConstant(ScalarV.getOperand(2)))
12243 ScalarV = ScalarV.getOperand(1);
12245 // Make sure that ScalarV is a splat with VL=1.
12246 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12247 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12248 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12249 return SDValue();
12251 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12252 return SDValue();
12254 // Check the scalar of ScalarV is neutral element
12255 // TODO: Deal with value other than neutral element.
12256 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12258 return SDValue();
12260 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12261 // FIXME: We might be able to improve this if operand 0 is undef.
12262 if (!isNonZeroAVL(Reduce.getOperand(5)))
12263 return SDValue();
12265 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12267 SDLoc DL(N);
12268 SDValue NewScalarV =
12269 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12270 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12272 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12273 if (ScalarVT != ScalarV.getValueType())
12274 NewScalarV =
12275 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12276 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
12278 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12279 NewScalarV, Reduce.getOperand(3),
12280 Reduce.getOperand(4), Reduce.getOperand(5)};
12281 SDValue NewReduce =
12282 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12283 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12284 Extract.getOperand(1));
12287 // Optimize (add (shl x, c0), (shl y, c1)) ->
12288 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12289 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
12290 const RISCVSubtarget &Subtarget) {
12291 // Perform this optimization only in the zba extension.
12292 if (!Subtarget.hasStdExtZba())
12293 return SDValue();
12295 // Skip for vector types and larger types.
12296 EVT VT = N->getValueType(0);
12297 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12298 return SDValue();
12300 // The two operand nodes must be SHL and have no other use.
12301 SDValue N0 = N->getOperand(0);
12302 SDValue N1 = N->getOperand(1);
12303 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12304 !N0->hasOneUse() || !N1->hasOneUse())
12305 return SDValue();
12307 // Check c0 and c1.
12308 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12309 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12310 if (!N0C || !N1C)
12311 return SDValue();
12312 int64_t C0 = N0C->getSExtValue();
12313 int64_t C1 = N1C->getSExtValue();
12314 if (C0 <= 0 || C1 <= 0)
12315 return SDValue();
12317 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12318 int64_t Bits = std::min(C0, C1);
12319 int64_t Diff = std::abs(C0 - C1);
12320 if (Diff != 1 && Diff != 2 && Diff != 3)
12321 return SDValue();
12323 // Build nodes.
12324 SDLoc DL(N);
12325 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12326 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12327 SDValue NA0 =
12328 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12329 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12330 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12333 // Combine a constant select operand into its use:
12335 // (and (select cond, -1, c), x)
12336 // -> (select cond, x, (and x, c)) [AllOnes=1]
12337 // (or (select cond, 0, c), x)
12338 // -> (select cond, x, (or x, c)) [AllOnes=0]
12339 // (xor (select cond, 0, c), x)
12340 // -> (select cond, x, (xor x, c)) [AllOnes=0]
12341 // (add (select cond, 0, c), x)
12342 // -> (select cond, x, (add x, c)) [AllOnes=0]
12343 // (sub x, (select cond, 0, c))
12344 // -> (select cond, x, (sub x, c)) [AllOnes=0]
12345 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
12346 SelectionDAG &DAG, bool AllOnes,
12347 const RISCVSubtarget &Subtarget) {
12348 EVT VT = N->getValueType(0);
12350 // Skip vectors.
12351 if (VT.isVector())
12352 return SDValue();
12354 if (!Subtarget.hasConditionalMoveFusion()) {
12355 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12356 if ((!Subtarget.hasStdExtZicond() &&
12357 !Subtarget.hasVendorXVentanaCondOps()) ||
12358 N->getOpcode() != ISD::AND)
12359 return SDValue();
12361 // Maybe harmful when condition code has multiple use.
12362 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12363 return SDValue();
12365 // Maybe harmful when VT is wider than XLen.
12366 if (VT.getSizeInBits() > Subtarget.getXLen())
12367 return SDValue();
12370 if ((Slct.getOpcode() != ISD::SELECT &&
12371 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12372 !Slct.hasOneUse())
12373 return SDValue();
12375 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12376 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
12379 bool SwapSelectOps;
12380 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12381 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12382 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12383 SDValue NonConstantVal;
12384 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12385 SwapSelectOps = false;
12386 NonConstantVal = FalseVal;
12387 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12388 SwapSelectOps = true;
12389 NonConstantVal = TrueVal;
12390 } else
12391 return SDValue();
12393 // Slct is now know to be the desired identity constant when CC is true.
12394 TrueVal = OtherOp;
12395 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12396 // Unless SwapSelectOps says the condition should be false.
12397 if (SwapSelectOps)
12398 std::swap(TrueVal, FalseVal);
12400 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12401 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12402 {Slct.getOperand(0), Slct.getOperand(1),
12403 Slct.getOperand(2), TrueVal, FalseVal});
12405 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12406 {Slct.getOperand(0), TrueVal, FalseVal});
12409 // Attempt combineSelectAndUse on each operand of a commutative operator N.
12410 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
12411 bool AllOnes,
12412 const RISCVSubtarget &Subtarget) {
12413 SDValue N0 = N->getOperand(0);
12414 SDValue N1 = N->getOperand(1);
12415 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12416 return Result;
12417 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12418 return Result;
12419 return SDValue();
12422 // Transform (add (mul x, c0), c1) ->
12423 // (add (mul (add x, c1/c0), c0), c1%c0).
12424 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12425 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
12426 // to an infinite loop in DAGCombine if transformed.
12427 // Or transform (add (mul x, c0), c1) ->
12428 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12429 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12430 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12431 // lead to an infinite loop in DAGCombine if transformed.
12432 // Or transform (add (mul x, c0), c1) ->
12433 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12434 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12435 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12436 // lead to an infinite loop in DAGCombine if transformed.
12437 // Or transform (add (mul x, c0), c1) ->
12438 // (mul (add x, c1/c0), c0).
12439 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12440 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
12441 const RISCVSubtarget &Subtarget) {
12442 // Skip for vector types and larger types.
12443 EVT VT = N->getValueType(0);
12444 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12445 return SDValue();
12446 // The first operand node must be a MUL and has no other use.
12447 SDValue N0 = N->getOperand(0);
12448 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12449 return SDValue();
12450 // Check if c0 and c1 match above conditions.
12451 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12452 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12453 if (!N0C || !N1C)
12454 return SDValue();
12455 // If N0C has multiple uses it's possible one of the cases in
12456 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12457 // in an infinite loop.
12458 if (!N0C->hasOneUse())
12459 return SDValue();
12460 int64_t C0 = N0C->getSExtValue();
12461 int64_t C1 = N1C->getSExtValue();
12462 int64_t CA, CB;
12463 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12464 return SDValue();
12465 // Search for proper CA (non-zero) and CB that both are simm12.
12466 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12467 !isInt<12>(C0 * (C1 / C0))) {
12468 CA = C1 / C0;
12469 CB = C1 % C0;
12470 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12471 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12472 CA = C1 / C0 + 1;
12473 CB = C1 % C0 - C0;
12474 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12475 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12476 CA = C1 / C0 - 1;
12477 CB = C1 % C0 + C0;
12478 } else
12479 return SDValue();
12480 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12481 SDLoc DL(N);
12482 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
12483 DAG.getConstant(CA, DL, VT));
12484 SDValue New1 =
12485 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
12486 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
12489 // Try to turn (add (xor bool, 1) -1) into (neg bool).
12490 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
12491 SDValue N0 = N->getOperand(0);
12492 SDValue N1 = N->getOperand(1);
12493 EVT VT = N->getValueType(0);
12494 SDLoc DL(N);
12496 // RHS should be -1.
12497 if (!isAllOnesConstant(N1))
12498 return SDValue();
12500 // Look for (xor X, 1).
12501 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
12502 return SDValue();
12504 // First xor input should be 0 or 1.
12505 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
12506 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
12507 return SDValue();
12509 // Emit a negate of the setcc.
12510 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
12511 N0.getOperand(0));
12514 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
12515 const RISCVSubtarget &Subtarget) {
12516 if (SDValue V = combineAddOfBooleanXor(N, DAG))
12517 return V;
12518 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
12519 return V;
12520 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
12521 return V;
12522 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12523 return V;
12524 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12525 return V;
12527 // fold (add (select lhs, rhs, cc, 0, y), x) ->
12528 // (select lhs, rhs, cc, x, (add x, y))
12529 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12532 // Try to turn a sub boolean RHS and constant LHS into an addi.
12533 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
12534 SDValue N0 = N->getOperand(0);
12535 SDValue N1 = N->getOperand(1);
12536 EVT VT = N->getValueType(0);
12537 SDLoc DL(N);
12539 // Require a constant LHS.
12540 auto *N0C = dyn_cast<ConstantSDNode>(N0);
12541 if (!N0C)
12542 return SDValue();
12544 // All our optimizations involve subtracting 1 from the immediate and forming
12545 // an ADDI. Make sure the new immediate is valid for an ADDI.
12546 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
12547 if (!ImmValMinus1.isSignedIntN(12))
12548 return SDValue();
12550 SDValue NewLHS;
12551 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
12552 // (sub constant, (setcc x, y, eq/neq)) ->
12553 // (add (setcc x, y, neq/eq), constant - 1)
12554 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
12555 EVT SetCCOpVT = N1.getOperand(0).getValueType();
12556 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
12557 return SDValue();
12558 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
12559 NewLHS =
12560 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
12561 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
12562 N1.getOperand(0).getOpcode() == ISD::SETCC) {
12563 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
12564 // Since setcc returns a bool the xor is equivalent to 1-setcc.
12565 NewLHS = N1.getOperand(0);
12566 } else
12567 return SDValue();
12569 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
12570 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
12573 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
12574 const RISCVSubtarget &Subtarget) {
12575 if (SDValue V = combineSubOfBoolean(N, DAG))
12576 return V;
12578 SDValue N0 = N->getOperand(0);
12579 SDValue N1 = N->getOperand(1);
12580 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
12581 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
12582 isNullConstant(N1.getOperand(1))) {
12583 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
12584 if (CCVal == ISD::SETLT) {
12585 EVT VT = N->getValueType(0);
12586 SDLoc DL(N);
12587 unsigned ShAmt = N0.getValueSizeInBits() - 1;
12588 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
12589 DAG.getConstant(ShAmt, DL, VT));
12593 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
12594 // (select lhs, rhs, cc, x, (sub x, y))
12595 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
12598 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
12599 // Legalizing setcc can introduce xors like this. Doing this transform reduces
12600 // the number of xors and may allow the xor to fold into a branch condition.
12601 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
12602 SDValue N0 = N->getOperand(0);
12603 SDValue N1 = N->getOperand(1);
12604 bool IsAnd = N->getOpcode() == ISD::AND;
12606 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
12607 return SDValue();
12609 if (!N0.hasOneUse() || !N1.hasOneUse())
12610 return SDValue();
12612 SDValue N01 = N0.getOperand(1);
12613 SDValue N11 = N1.getOperand(1);
12615 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
12616 // (xor X, -1) based on the upper bits of the other operand being 0. If the
12617 // operation is And, allow one of the Xors to use -1.
12618 if (isOneConstant(N01)) {
12619 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
12620 return SDValue();
12621 } else if (isOneConstant(N11)) {
12622 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
12623 if (!(IsAnd && isAllOnesConstant(N01)))
12624 return SDValue();
12625 } else
12626 return SDValue();
12628 EVT VT = N->getValueType(0);
12630 SDValue N00 = N0.getOperand(0);
12631 SDValue N10 = N1.getOperand(0);
12633 // The LHS of the xors needs to be 0/1.
12634 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
12635 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
12636 return SDValue();
12638 // Invert the opcode and insert a new xor.
12639 SDLoc DL(N);
12640 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
12641 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
12642 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
12645 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
12646 const RISCVSubtarget &Subtarget) {
12647 SDValue N0 = N->getOperand(0);
12648 EVT VT = N->getValueType(0);
12650 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
12651 // extending X. This is safe since we only need the LSB after the shift and
12652 // shift amounts larger than 31 would produce poison. If we wait until
12653 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12654 // to use a BEXT instruction.
12655 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
12656 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
12657 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
12658 SDLoc DL(N0);
12659 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12660 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12661 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
12662 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
12665 return SDValue();
12668 // Combines two comparison operation and logic operation to one selection
12669 // operation(min, max) and logic operation. Returns new constructed Node if
12670 // conditions for optimization are satisfied.
12671 static SDValue performANDCombine(SDNode *N,
12672 TargetLowering::DAGCombinerInfo &DCI,
12673 const RISCVSubtarget &Subtarget) {
12674 SelectionDAG &DAG = DCI.DAG;
12676 SDValue N0 = N->getOperand(0);
12677 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
12678 // extending X. This is safe since we only need the LSB after the shift and
12679 // shift amounts larger than 31 would produce poison. If we wait until
12680 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12681 // to use a BEXT instruction.
12682 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
12683 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
12684 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
12685 N0.hasOneUse()) {
12686 SDLoc DL(N);
12687 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12688 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12689 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
12690 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
12691 DAG.getConstant(1, DL, MVT::i64));
12692 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
12695 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12696 return V;
12697 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12698 return V;
12700 if (DCI.isAfterLegalizeDAG())
12701 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
12702 return V;
12704 // fold (and (select lhs, rhs, cc, -1, y), x) ->
12705 // (select lhs, rhs, cc, x, (and x, y))
12706 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
12709 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
12710 // FIXME: Generalize to other binary operators with same operand.
12711 static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
12712 SelectionDAG &DAG) {
12713 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
12715 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
12716 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
12717 !N0.hasOneUse() || !N1.hasOneUse())
12718 return SDValue();
12720 // Should have the same condition.
12721 SDValue Cond = N0.getOperand(1);
12722 if (Cond != N1.getOperand(1))
12723 return SDValue();
12725 SDValue TrueV = N0.getOperand(0);
12726 SDValue FalseV = N1.getOperand(0);
12728 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
12729 TrueV.getOperand(1) != FalseV.getOperand(1) ||
12730 !isOneConstant(TrueV.getOperand(1)) ||
12731 !TrueV.hasOneUse() || !FalseV.hasOneUse())
12732 return SDValue();
12734 EVT VT = N->getValueType(0);
12735 SDLoc DL(N);
12737 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
12738 Cond);
12739 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
12740 Cond);
12741 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
12742 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
12745 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
12746 const RISCVSubtarget &Subtarget) {
12747 SelectionDAG &DAG = DCI.DAG;
12749 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12750 return V;
12751 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12752 return V;
12754 if (DCI.isAfterLegalizeDAG())
12755 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
12756 return V;
12758 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
12759 // We may be able to pull a common operation out of the true and false value.
12760 SDValue N0 = N->getOperand(0);
12761 SDValue N1 = N->getOperand(1);
12762 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
12763 return V;
12764 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
12765 return V;
12767 // fold (or (select cond, 0, y), x) ->
12768 // (select cond, x, (or x, y))
12769 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12772 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
12773 const RISCVSubtarget &Subtarget) {
12774 SDValue N0 = N->getOperand(0);
12775 SDValue N1 = N->getOperand(1);
12777 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
12778 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
12779 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
12780 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
12781 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
12782 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
12783 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
12784 SDLoc DL(N);
12785 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12786 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12787 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
12788 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
12789 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
12792 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
12793 // NOTE: Assumes ROL being legal means ROLW is legal.
12794 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12795 if (N0.getOpcode() == RISCVISD::SLLW &&
12796 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
12797 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
12798 SDLoc DL(N);
12799 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
12800 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
12803 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
12804 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
12805 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
12806 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12807 if (ConstN00 && CC == ISD::SETLT) {
12808 EVT VT = N0.getValueType();
12809 SDLoc DL(N0);
12810 const APInt &Imm = ConstN00->getAPIntValue();
12811 if ((Imm + 1).isSignedIntN(12))
12812 return DAG.getSetCC(DL, VT, N0.getOperand(1),
12813 DAG.getConstant(Imm + 1, DL, VT), CC);
12817 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12818 return V;
12819 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12820 return V;
12822 // fold (xor (select cond, 0, y), x) ->
12823 // (select cond, x, (xor x, y))
12824 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12827 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
12828 EVT VT = N->getValueType(0);
12829 if (!VT.isVector())
12830 return SDValue();
12832 SDLoc DL(N);
12833 SDValue N0 = N->getOperand(0);
12834 SDValue N1 = N->getOperand(1);
12835 SDValue MulOper;
12836 unsigned AddSubOpc;
12838 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
12839 // (mul x, add (y, 1)) -> (add x, (mul x, y))
12840 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
12841 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
12842 auto IsAddSubWith1 = [&](SDValue V) -> bool {
12843 AddSubOpc = V->getOpcode();
12844 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
12845 SDValue Opnd = V->getOperand(1);
12846 MulOper = V->getOperand(0);
12847 if (AddSubOpc == ISD::SUB)
12848 std::swap(Opnd, MulOper);
12849 if (isOneOrOneSplat(Opnd))
12850 return true;
12852 return false;
12855 if (IsAddSubWith1(N0)) {
12856 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
12857 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
12860 if (IsAddSubWith1(N1)) {
12861 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
12862 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
12865 return SDValue();
12868 /// According to the property that indexed load/store instructions zero-extend
12869 /// their indices, try to narrow the type of index operand.
12870 static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
12871 if (isIndexTypeSigned(IndexType))
12872 return false;
12874 if (!N->hasOneUse())
12875 return false;
12877 EVT VT = N.getValueType();
12878 SDLoc DL(N);
12880 // In general, what we're doing here is seeing if we can sink a truncate to
12881 // a smaller element type into the expression tree building our index.
12882 // TODO: We can generalize this and handle a bunch more cases if useful.
12884 // Narrow a buildvector to the narrowest element type. This requires less
12885 // work and less register pressure at high LMUL, and creates smaller constants
12886 // which may be cheaper to materialize.
12887 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
12888 KnownBits Known = DAG.computeKnownBits(N);
12889 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
12890 LLVMContext &C = *DAG.getContext();
12891 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
12892 if (ResultVT.bitsLT(VT.getVectorElementType())) {
12893 N = DAG.getNode(ISD::TRUNCATE, DL,
12894 VT.changeVectorElementType(ResultVT), N);
12895 return true;
12899 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
12900 if (N.getOpcode() != ISD::SHL)
12901 return false;
12903 SDValue N0 = N.getOperand(0);
12904 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
12905 N0.getOpcode() != RISCVISD::VZEXT_VL)
12906 return false;
12907 if (!N0->hasOneUse())
12908 return false;
12910 APInt ShAmt;
12911 SDValue N1 = N.getOperand(1);
12912 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
12913 return false;
12915 SDValue Src = N0.getOperand(0);
12916 EVT SrcVT = Src.getValueType();
12917 unsigned SrcElen = SrcVT.getScalarSizeInBits();
12918 unsigned ShAmtV = ShAmt.getZExtValue();
12919 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
12920 NewElen = std::max(NewElen, 8U);
12922 // Skip if NewElen is not narrower than the original extended type.
12923 if (NewElen >= N0.getValueType().getScalarSizeInBits())
12924 return false;
12926 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
12927 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
12929 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
12930 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
12931 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
12932 return true;
12935 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
12936 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
12937 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
12938 // can become a sext.w instead of a shift pair.
12939 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
12940 const RISCVSubtarget &Subtarget) {
12941 SDValue N0 = N->getOperand(0);
12942 SDValue N1 = N->getOperand(1);
12943 EVT VT = N->getValueType(0);
12944 EVT OpVT = N0.getValueType();
12946 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
12947 return SDValue();
12949 // RHS needs to be a constant.
12950 auto *N1C = dyn_cast<ConstantSDNode>(N1);
12951 if (!N1C)
12952 return SDValue();
12954 // LHS needs to be (and X, 0xffffffff).
12955 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
12956 !isa<ConstantSDNode>(N0.getOperand(1)) ||
12957 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
12958 return SDValue();
12960 // Looking for an equality compare.
12961 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12962 if (!isIntEqualitySetCC(Cond))
12963 return SDValue();
12965 // Don't do this if the sign bit is provably zero, it will be turned back into
12966 // an AND.
12967 APInt SignMask = APInt::getOneBitSet(64, 31);
12968 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
12969 return SDValue();
12971 const APInt &C1 = N1C->getAPIntValue();
12973 SDLoc dl(N);
12974 // If the constant is larger than 2^32 - 1 it is impossible for both sides
12975 // to be equal.
12976 if (C1.getActiveBits() > 32)
12977 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
12979 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
12980 N0.getOperand(0), DAG.getValueType(MVT::i32));
12981 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
12982 dl, OpVT), Cond);
12985 static SDValue
12986 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
12987 const RISCVSubtarget &Subtarget) {
12988 SDValue Src = N->getOperand(0);
12989 EVT VT = N->getValueType(0);
12991 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
12992 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
12993 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
12994 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
12995 Src.getOperand(0));
12997 return SDValue();
13000 namespace {
13001 // Forward declaration of the structure holding the necessary information to
13002 // apply a combine.
13003 struct CombineResult;
13005 /// Helper class for folding sign/zero extensions.
13006 /// In particular, this class is used for the following combines:
13007 /// add | add_vl -> vwadd(u) | vwadd(u)_w
13008 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13009 /// mul | mul_vl -> vwmul(u) | vwmul_su
13011 /// An object of this class represents an operand of the operation we want to
13012 /// combine.
13013 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13014 /// NodeExtensionHelper for `a` and one for `b`.
13016 /// This class abstracts away how the extension is materialized and
13017 /// how its Mask, VL, number of users affect the combines.
13019 /// In particular:
13020 /// - VWADD_W is conceptually == add(op0, sext(op1))
13021 /// - VWADDU_W == add(op0, zext(op1))
13022 /// - VWSUB_W == sub(op0, sext(op1))
13023 /// - VWSUBU_W == sub(op0, zext(op1))
13025 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13026 /// zext|sext(smaller_value).
13027 struct NodeExtensionHelper {
13028 /// Records if this operand is like being zero extended.
13029 bool SupportsZExt;
13030 /// Records if this operand is like being sign extended.
13031 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13032 /// instance, a splat constant (e.g., 3), would support being both sign and
13033 /// zero extended.
13034 bool SupportsSExt;
13035 /// This boolean captures whether we care if this operand would still be
13036 /// around after the folding happens.
13037 bool EnforceOneUse;
13038 /// Records if this operand's mask needs to match the mask of the operation
13039 /// that it will fold into.
13040 bool CheckMask;
13041 /// Value of the Mask for this operand.
13042 /// It may be SDValue().
13043 SDValue Mask;
13044 /// Value of the vector length operand.
13045 /// It may be SDValue().
13046 SDValue VL;
13047 /// Original value that this NodeExtensionHelper represents.
13048 SDValue OrigOperand;
13050 /// Get the value feeding the extension or the value itself.
13051 /// E.g., for zext(a), this would return a.
13052 SDValue getSource() const {
13053 switch (OrigOperand.getOpcode()) {
13054 case ISD::ZERO_EXTEND:
13055 case ISD::SIGN_EXTEND:
13056 case RISCVISD::VSEXT_VL:
13057 case RISCVISD::VZEXT_VL:
13058 return OrigOperand.getOperand(0);
13059 default:
13060 return OrigOperand;
13064 /// Check if this instance represents a splat.
13065 bool isSplat() const {
13066 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
13069 /// Get or create a value that can feed \p Root with the given extension \p
13070 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
13071 /// \see ::getSource().
13072 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13073 const RISCVSubtarget &Subtarget,
13074 std::optional<bool> SExt) const {
13075 if (!SExt.has_value())
13076 return OrigOperand;
13078 MVT NarrowVT = getNarrowType(Root);
13080 SDValue Source = getSource();
13081 if (Source.getValueType() == NarrowVT)
13082 return Source;
13084 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
13086 // If we need an extension, we should be changing the type.
13087 SDLoc DL(Root);
13088 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13089 switch (OrigOperand.getOpcode()) {
13090 case ISD::ZERO_EXTEND:
13091 case ISD::SIGN_EXTEND:
13092 case RISCVISD::VSEXT_VL:
13093 case RISCVISD::VZEXT_VL:
13094 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13095 case RISCVISD::VMV_V_X_VL:
13096 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13097 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13098 default:
13099 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13100 // and that operand should already have the right NarrowVT so no
13101 // extension should be required at this point.
13102 llvm_unreachable("Unsupported opcode");
13106 /// Helper function to get the narrow type for \p Root.
13107 /// The narrow type is the type of \p Root where we divided the size of each
13108 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13109 /// \pre The size of the type of the elements of Root must be a multiple of 2
13110 /// and be greater than 16.
13111 static MVT getNarrowType(const SDNode *Root) {
13112 MVT VT = Root->getSimpleValueType(0);
13114 // Determine the narrow size.
13115 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13116 assert(NarrowSize >= 8 && "Trying to extend something we can't represent");
13117 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
13118 VT.getVectorElementCount());
13119 return NarrowVT;
13122 /// Return the opcode required to materialize the folding of the sign
13123 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
13124 /// both operands for \p Opcode.
13125 /// Put differently, get the opcode to materialize:
13126 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13127 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13128 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
13129 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
13130 switch (Opcode) {
13131 case ISD::ADD:
13132 case RISCVISD::ADD_VL:
13133 case RISCVISD::VWADD_W_VL:
13134 case RISCVISD::VWADDU_W_VL:
13135 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
13136 case ISD::MUL:
13137 case RISCVISD::MUL_VL:
13138 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
13139 case ISD::SUB:
13140 case RISCVISD::SUB_VL:
13141 case RISCVISD::VWSUB_W_VL:
13142 case RISCVISD::VWSUBU_W_VL:
13143 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL;
13144 default:
13145 llvm_unreachable("Unexpected opcode");
13149 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13150 /// newOpcode(a, b).
13151 static unsigned getSUOpcode(unsigned Opcode) {
13152 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
13153 "SU is only supported for MUL");
13154 return RISCVISD::VWMULSU_VL;
13157 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
13158 /// newOpcode(a, b).
13159 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
13160 switch (Opcode) {
13161 case ISD::ADD:
13162 case RISCVISD::ADD_VL:
13163 return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
13164 case ISD::SUB:
13165 case RISCVISD::SUB_VL:
13166 return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
13167 default:
13168 llvm_unreachable("Unexpected opcode");
13172 using CombineToTry = std::function<std::optional<CombineResult>(
13173 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
13174 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
13175 const RISCVSubtarget &)>;
13177 /// Check if this node needs to be fully folded or extended for all users.
13178 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13180 /// Helper method to set the various fields of this struct based on the
13181 /// type of \p Root.
13182 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
13183 const RISCVSubtarget &Subtarget) {
13184 SupportsZExt = false;
13185 SupportsSExt = false;
13186 EnforceOneUse = true;
13187 CheckMask = true;
13188 unsigned Opc = OrigOperand.getOpcode();
13189 switch (Opc) {
13190 case ISD::ZERO_EXTEND:
13191 case ISD::SIGN_EXTEND: {
13192 MVT VT = OrigOperand.getSimpleValueType();
13193 if (!VT.isVector())
13194 break;
13196 SDValue NarrowElt = OrigOperand.getOperand(0);
13197 MVT NarrowVT = NarrowElt.getSimpleValueType();
13199 unsigned ScalarBits = VT.getScalarSizeInBits();
13200 unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits();
13202 // Ensure the narrowing element type is legal
13203 if (!Subtarget.getTargetLowering()->isTypeLegal(NarrowElt.getValueType()))
13204 break;
13206 // Ensure the extension's semantic is equivalent to rvv vzext or vsext.
13207 if (ScalarBits != NarrowScalarBits * 2)
13208 break;
13210 SupportsZExt = Opc == ISD::ZERO_EXTEND;
13211 SupportsSExt = Opc == ISD::SIGN_EXTEND;
13213 SDLoc DL(Root);
13214 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
13215 break;
13217 case RISCVISD::VZEXT_VL:
13218 SupportsZExt = true;
13219 Mask = OrigOperand.getOperand(1);
13220 VL = OrigOperand.getOperand(2);
13221 break;
13222 case RISCVISD::VSEXT_VL:
13223 SupportsSExt = true;
13224 Mask = OrigOperand.getOperand(1);
13225 VL = OrigOperand.getOperand(2);
13226 break;
13227 case RISCVISD::VMV_V_X_VL: {
13228 // Historically, we didn't care about splat values not disappearing during
13229 // combines.
13230 EnforceOneUse = false;
13231 CheckMask = false;
13232 VL = OrigOperand.getOperand(2);
13234 // The operand is a splat of a scalar.
13236 // The pasthru must be undef for tail agnostic.
13237 if (!OrigOperand.getOperand(0).isUndef())
13238 break;
13240 // Get the scalar value.
13241 SDValue Op = OrigOperand.getOperand(1);
13243 // See if we have enough sign bits or zero bits in the scalar to use a
13244 // widening opcode by splatting to smaller element size.
13245 MVT VT = Root->getSimpleValueType(0);
13246 unsigned EltBits = VT.getScalarSizeInBits();
13247 unsigned ScalarBits = Op.getValueSizeInBits();
13248 // Make sure we're getting all element bits from the scalar register.
13249 // FIXME: Support implicit sign extension of vmv.v.x?
13250 if (ScalarBits < EltBits)
13251 break;
13253 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13254 // If the narrow type cannot be expressed with a legal VMV,
13255 // this is not a valid candidate.
13256 if (NarrowSize < 8)
13257 break;
13259 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13260 SupportsSExt = true;
13261 if (DAG.MaskedValueIsZero(Op,
13262 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13263 SupportsZExt = true;
13264 break;
13266 default:
13267 break;
13271 /// Check if \p Root supports any extension folding combines.
13272 static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) {
13273 switch (Root->getOpcode()) {
13274 case ISD::ADD:
13275 case ISD::SUB:
13276 case ISD::MUL: {
13277 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13278 if (!TLI.isTypeLegal(Root->getValueType(0)))
13279 return false;
13280 return Root->getValueType(0).isScalableVector();
13282 case RISCVISD::ADD_VL:
13283 case RISCVISD::MUL_VL:
13284 case RISCVISD::VWADD_W_VL:
13285 case RISCVISD::VWADDU_W_VL:
13286 case RISCVISD::SUB_VL:
13287 case RISCVISD::VWSUB_W_VL:
13288 case RISCVISD::VWSUBU_W_VL:
13289 return true;
13290 default:
13291 return false;
13295 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
13296 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
13297 const RISCVSubtarget &Subtarget) {
13298 assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an "
13299 "unsupported root");
13300 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
13301 OrigOperand = Root->getOperand(OperandIdx);
13303 unsigned Opc = Root->getOpcode();
13304 switch (Opc) {
13305 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
13306 // <ADD|SUB>(LHS, S|ZEXT(RHS))
13307 case RISCVISD::VWADD_W_VL:
13308 case RISCVISD::VWADDU_W_VL:
13309 case RISCVISD::VWSUB_W_VL:
13310 case RISCVISD::VWSUBU_W_VL:
13311 if (OperandIdx == 1) {
13312 SupportsZExt =
13313 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
13314 SupportsSExt = !SupportsZExt;
13315 std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget);
13316 CheckMask = true;
13317 // There's no existing extension here, so we don't have to worry about
13318 // making sure it gets removed.
13319 EnforceOneUse = false;
13320 break;
13322 [[fallthrough]];
13323 default:
13324 fillUpExtensionSupport(Root, DAG, Subtarget);
13325 break;
13329 /// Check if this operand is compatible with the given vector length \p VL.
13330 bool isVLCompatible(SDValue VL) const {
13331 return this->VL != SDValue() && this->VL == VL;
13334 /// Check if this operand is compatible with the given \p Mask.
13335 bool isMaskCompatible(SDValue Mask) const {
13336 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask);
13339 /// Helper function to get the Mask and VL from \p Root.
13340 static std::pair<SDValue, SDValue>
13341 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
13342 const RISCVSubtarget &Subtarget) {
13343 assert(isSupportedRoot(Root, DAG) && "Unexpected root");
13344 switch (Root->getOpcode()) {
13345 case ISD::ADD:
13346 case ISD::SUB:
13347 case ISD::MUL: {
13348 SDLoc DL(Root);
13349 MVT VT = Root->getSimpleValueType(0);
13350 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
13352 default:
13353 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
13357 /// Check if the Mask and VL of this operand are compatible with \p Root.
13358 bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG,
13359 const RISCVSubtarget &Subtarget) const {
13360 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13361 return isMaskCompatible(Mask) && isVLCompatible(VL);
13364 /// Helper function to check if \p N is commutative with respect to the
13365 /// foldings that are supported by this class.
13366 static bool isCommutative(const SDNode *N) {
13367 switch (N->getOpcode()) {
13368 case ISD::ADD:
13369 case ISD::MUL:
13370 case RISCVISD::ADD_VL:
13371 case RISCVISD::MUL_VL:
13372 case RISCVISD::VWADD_W_VL:
13373 case RISCVISD::VWADDU_W_VL:
13374 return true;
13375 case ISD::SUB:
13376 case RISCVISD::SUB_VL:
13377 case RISCVISD::VWSUB_W_VL:
13378 case RISCVISD::VWSUBU_W_VL:
13379 return false;
13380 default:
13381 llvm_unreachable("Unexpected opcode");
13385 /// Get a list of combine to try for folding extensions in \p Root.
13386 /// Note that each returned CombineToTry function doesn't actually modify
13387 /// anything. Instead they produce an optional CombineResult that if not None,
13388 /// need to be materialized for the combine to be applied.
13389 /// \see CombineResult::materialize.
13390 /// If the related CombineToTry function returns std::nullopt, that means the
13391 /// combine didn't match.
13392 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
13395 /// Helper structure that holds all the necessary information to materialize a
13396 /// combine that does some extension folding.
13397 struct CombineResult {
13398 /// Opcode to be generated when materializing the combine.
13399 unsigned TargetOpcode;
13400 // No value means no extension is needed. If extension is needed, the value
13401 // indicates if it needs to be sign extended.
13402 std::optional<bool> SExtLHS;
13403 std::optional<bool> SExtRHS;
13404 /// Root of the combine.
13405 SDNode *Root;
13406 /// LHS of the TargetOpcode.
13407 NodeExtensionHelper LHS;
13408 /// RHS of the TargetOpcode.
13409 NodeExtensionHelper RHS;
13411 CombineResult(unsigned TargetOpcode, SDNode *Root,
13412 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS,
13413 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS)
13414 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS),
13415 Root(Root), LHS(LHS), RHS(RHS) {}
13417 /// Return a value that uses TargetOpcode and that can be used to replace
13418 /// Root.
13419 /// The actual replacement is *not* done in that method.
13420 SDValue materialize(SelectionDAG &DAG,
13421 const RISCVSubtarget &Subtarget) const {
13422 SDValue Mask, VL, Merge;
13423 std::tie(Mask, VL) =
13424 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
13425 switch (Root->getOpcode()) {
13426 default:
13427 Merge = Root->getOperand(2);
13428 break;
13429 case ISD::ADD:
13430 case ISD::SUB:
13431 case ISD::MUL:
13432 Merge = DAG.getUNDEF(Root->getValueType(0));
13433 break;
13435 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
13436 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS),
13437 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS),
13438 Merge, Mask, VL);
13442 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
13443 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
13444 /// are zext) and LHS and RHS can be folded into Root.
13445 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
13447 /// \note If the pattern can match with both zext and sext, the returned
13448 /// CombineResult will feature the zext result.
13450 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13451 /// can be used to apply the pattern.
13452 static std::optional<CombineResult>
13453 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
13454 const NodeExtensionHelper &RHS, bool AllowSExt,
13455 bool AllowZExt, SelectionDAG &DAG,
13456 const RISCVSubtarget &Subtarget) {
13457 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
13458 if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
13459 !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
13460 return std::nullopt;
13461 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
13462 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
13463 Root->getOpcode(), /*IsSExt=*/false),
13464 Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false);
13465 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
13466 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
13467 Root->getOpcode(), /*IsSExt=*/true),
13468 Root, LHS, /*SExtLHS=*/true, RHS,
13469 /*SExtRHS=*/true);
13470 return std::nullopt;
13473 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
13474 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
13475 /// are zext) and LHS and RHS can be folded into Root.
13477 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13478 /// can be used to apply the pattern.
13479 static std::optional<CombineResult>
13480 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
13481 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
13482 const RISCVSubtarget &Subtarget) {
13483 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
13484 /*AllowZExt=*/true, DAG, Subtarget);
13487 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
13489 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13490 /// can be used to apply the pattern.
13491 static std::optional<CombineResult>
13492 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
13493 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
13494 const RISCVSubtarget &Subtarget) {
13495 if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
13496 return std::nullopt;
13498 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
13499 // sext/zext?
13500 // Control this behavior behind an option (AllowSplatInVW_W) for testing
13501 // purposes.
13502 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
13503 return CombineResult(
13504 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false),
13505 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false);
13506 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
13507 return CombineResult(
13508 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true),
13509 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true);
13510 return std::nullopt;
13513 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
13515 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13516 /// can be used to apply the pattern.
13517 static std::optional<CombineResult>
13518 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
13519 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
13520 const RISCVSubtarget &Subtarget) {
13521 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
13522 /*AllowZExt=*/false, DAG, Subtarget);
13525 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
13527 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13528 /// can be used to apply the pattern.
13529 static std::optional<CombineResult>
13530 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
13531 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
13532 const RISCVSubtarget &Subtarget) {
13533 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
13534 /*AllowZExt=*/true, DAG, Subtarget);
13537 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
13539 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13540 /// can be used to apply the pattern.
13541 static std::optional<CombineResult>
13542 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
13543 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
13544 const RISCVSubtarget &Subtarget) {
13546 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
13547 return std::nullopt;
13548 if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
13549 !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
13550 return std::nullopt;
13551 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
13552 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
13555 SmallVector<NodeExtensionHelper::CombineToTry>
13556 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
13557 SmallVector<CombineToTry> Strategies;
13558 switch (Root->getOpcode()) {
13559 case ISD::ADD:
13560 case ISD::SUB:
13561 case RISCVISD::ADD_VL:
13562 case RISCVISD::SUB_VL:
13563 // add|sub -> vwadd(u)|vwsub(u)
13564 Strategies.push_back(canFoldToVWWithSameExtension);
13565 // add|sub -> vwadd(u)_w|vwsub(u)_w
13566 Strategies.push_back(canFoldToVW_W);
13567 break;
13568 case ISD::MUL:
13569 case RISCVISD::MUL_VL:
13570 // mul -> vwmul(u)
13571 Strategies.push_back(canFoldToVWWithSameExtension);
13572 // mul -> vwmulsu
13573 Strategies.push_back(canFoldToVW_SU);
13574 break;
13575 case RISCVISD::VWADD_W_VL:
13576 case RISCVISD::VWSUB_W_VL:
13577 // vwadd_w|vwsub_w -> vwadd|vwsub
13578 Strategies.push_back(canFoldToVWWithSEXT);
13579 break;
13580 case RISCVISD::VWADDU_W_VL:
13581 case RISCVISD::VWSUBU_W_VL:
13582 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
13583 Strategies.push_back(canFoldToVWWithZEXT);
13584 break;
13585 default:
13586 llvm_unreachable("Unexpected opcode");
13588 return Strategies;
13590 } // End anonymous namespace.
13592 /// Combine a binary operation to its equivalent VW or VW_W form.
13593 /// The supported combines are:
13594 /// add_vl -> vwadd(u) | vwadd(u)_w
13595 /// sub_vl -> vwsub(u) | vwsub(u)_w
13596 /// mul_vl -> vwmul(u) | vwmul_su
13597 /// vwadd_w(u) -> vwadd(u)
13598 /// vwub_w(u) -> vwadd(u)
13599 static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
13600 TargetLowering::DAGCombinerInfo &DCI,
13601 const RISCVSubtarget &Subtarget) {
13602 SelectionDAG &DAG = DCI.DAG;
13604 if (!NodeExtensionHelper::isSupportedRoot(N, DAG))
13605 return SDValue();
13607 SmallVector<SDNode *> Worklist;
13608 SmallSet<SDNode *, 8> Inserted;
13609 Worklist.push_back(N);
13610 Inserted.insert(N);
13611 SmallVector<CombineResult> CombinesToApply;
13613 while (!Worklist.empty()) {
13614 SDNode *Root = Worklist.pop_back_val();
13615 if (!NodeExtensionHelper::isSupportedRoot(Root, DAG))
13616 return SDValue();
13618 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
13619 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
13620 auto AppendUsersIfNeeded = [&Worklist,
13621 &Inserted](const NodeExtensionHelper &Op) {
13622 if (Op.needToPromoteOtherUsers()) {
13623 for (SDNode *TheUse : Op.OrigOperand->uses()) {
13624 if (Inserted.insert(TheUse).second)
13625 Worklist.push_back(TheUse);
13630 // Control the compile time by limiting the number of node we look at in
13631 // total.
13632 if (Inserted.size() > ExtensionMaxWebSize)
13633 return SDValue();
13635 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
13636 NodeExtensionHelper::getSupportedFoldings(N);
13638 assert(!FoldingStrategies.empty() && "Nothing to be folded");
13639 bool Matched = false;
13640 for (int Attempt = 0;
13641 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
13642 ++Attempt) {
13644 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
13645 FoldingStrategies) {
13646 std::optional<CombineResult> Res =
13647 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
13648 if (Res) {
13649 Matched = true;
13650 CombinesToApply.push_back(*Res);
13651 // All the inputs that are extended need to be folded, otherwise
13652 // we would be leaving the old input (since it is may still be used),
13653 // and the new one.
13654 if (Res->SExtLHS.has_value())
13655 AppendUsersIfNeeded(LHS);
13656 if (Res->SExtRHS.has_value())
13657 AppendUsersIfNeeded(RHS);
13658 break;
13661 std::swap(LHS, RHS);
13663 // Right now we do an all or nothing approach.
13664 if (!Matched)
13665 return SDValue();
13667 // Store the value for the replacement of the input node separately.
13668 SDValue InputRootReplacement;
13669 // We do the RAUW after we materialize all the combines, because some replaced
13670 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
13671 // some of these nodes may appear in the NodeExtensionHelpers of some of the
13672 // yet-to-be-visited CombinesToApply roots.
13673 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
13674 ValuesToReplace.reserve(CombinesToApply.size());
13675 for (CombineResult Res : CombinesToApply) {
13676 SDValue NewValue = Res.materialize(DAG, Subtarget);
13677 if (!InputRootReplacement) {
13678 assert(Res.Root == N &&
13679 "First element is expected to be the current node");
13680 InputRootReplacement = NewValue;
13681 } else {
13682 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
13685 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
13686 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
13687 DCI.AddToWorklist(OldNewValues.second.getNode());
13689 return InputRootReplacement;
13692 // Helper function for performMemPairCombine.
13693 // Try to combine the memory loads/stores LSNode1 and LSNode2
13694 // into a single memory pair operation.
13695 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
13696 LSBaseSDNode *LSNode2, SDValue BasePtr,
13697 uint64_t Imm) {
13698 SmallPtrSet<const SDNode *, 32> Visited;
13699 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
13701 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
13702 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
13703 return SDValue();
13705 MachineFunction &MF = DAG.getMachineFunction();
13706 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
13708 // The new operation has twice the width.
13709 MVT XLenVT = Subtarget.getXLenVT();
13710 EVT MemVT = LSNode1->getMemoryVT();
13711 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
13712 MachineMemOperand *MMO = LSNode1->getMemOperand();
13713 MachineMemOperand *NewMMO = MF.getMachineMemOperand(
13714 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
13716 if (LSNode1->getOpcode() == ISD::LOAD) {
13717 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
13718 unsigned Opcode;
13719 if (MemVT == MVT::i32)
13720 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
13721 else
13722 Opcode = RISCVISD::TH_LDD;
13724 SDValue Res = DAG.getMemIntrinsicNode(
13725 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
13726 {LSNode1->getChain(), BasePtr,
13727 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
13728 NewMemVT, NewMMO);
13730 SDValue Node1 =
13731 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
13732 SDValue Node2 =
13733 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
13735 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
13736 return Node1;
13737 } else {
13738 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
13740 SDValue Res = DAG.getMemIntrinsicNode(
13741 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
13742 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
13743 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
13744 NewMemVT, NewMMO);
13746 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
13747 return Res;
13751 // Try to combine two adjacent loads/stores to a single pair instruction from
13752 // the XTHeadMemPair vendor extension.
13753 static SDValue performMemPairCombine(SDNode *N,
13754 TargetLowering::DAGCombinerInfo &DCI) {
13755 SelectionDAG &DAG = DCI.DAG;
13756 MachineFunction &MF = DAG.getMachineFunction();
13757 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
13759 // Target does not support load/store pair.
13760 if (!Subtarget.hasVendorXTHeadMemPair())
13761 return SDValue();
13763 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
13764 EVT MemVT = LSNode1->getMemoryVT();
13765 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
13767 // No volatile, indexed or atomic loads/stores.
13768 if (!LSNode1->isSimple() || LSNode1->isIndexed())
13769 return SDValue();
13771 // Function to get a base + constant representation from a memory value.
13772 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
13773 if (Ptr->getOpcode() == ISD::ADD)
13774 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
13775 return {Ptr->getOperand(0), C1->getZExtValue()};
13776 return {Ptr, 0};
13779 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
13781 SDValue Chain = N->getOperand(0);
13782 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
13783 UI != UE; ++UI) {
13784 SDUse &Use = UI.getUse();
13785 if (Use.getUser() != N && Use.getResNo() == 0 &&
13786 Use.getUser()->getOpcode() == N->getOpcode()) {
13787 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
13789 // No volatile, indexed or atomic loads/stores.
13790 if (!LSNode2->isSimple() || LSNode2->isIndexed())
13791 continue;
13793 // Check if LSNode1 and LSNode2 have the same type and extension.
13794 if (LSNode1->getOpcode() == ISD::LOAD)
13795 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
13796 cast<LoadSDNode>(LSNode1)->getExtensionType())
13797 continue;
13799 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
13800 continue;
13802 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
13804 // Check if the base pointer is the same for both instruction.
13805 if (Base1 != Base2)
13806 continue;
13808 // Check if the offsets match the XTHeadMemPair encoding contraints.
13809 bool Valid = false;
13810 if (MemVT == MVT::i32) {
13811 // Check for adjacent i32 values and a 2-bit index.
13812 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
13813 Valid = true;
13814 } else if (MemVT == MVT::i64) {
13815 // Check for adjacent i64 values and a 2-bit index.
13816 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
13817 Valid = true;
13820 if (!Valid)
13821 continue;
13823 // Try to combine.
13824 if (SDValue Res =
13825 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
13826 return Res;
13830 return SDValue();
13833 // Fold
13834 // (fp_to_int (froundeven X)) -> fcvt X, rne
13835 // (fp_to_int (ftrunc X)) -> fcvt X, rtz
13836 // (fp_to_int (ffloor X)) -> fcvt X, rdn
13837 // (fp_to_int (fceil X)) -> fcvt X, rup
13838 // (fp_to_int (fround X)) -> fcvt X, rmm
13839 // (fp_to_int (frint X)) -> fcvt X
13840 static SDValue performFP_TO_INTCombine(SDNode *N,
13841 TargetLowering::DAGCombinerInfo &DCI,
13842 const RISCVSubtarget &Subtarget) {
13843 SelectionDAG &DAG = DCI.DAG;
13844 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13845 MVT XLenVT = Subtarget.getXLenVT();
13847 SDValue Src = N->getOperand(0);
13849 // Don't do this for strict-fp Src.
13850 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
13851 return SDValue();
13853 // Ensure the FP type is legal.
13854 if (!TLI.isTypeLegal(Src.getValueType()))
13855 return SDValue();
13857 // Don't do this for f16 with Zfhmin and not Zfh.
13858 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
13859 return SDValue();
13861 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
13862 // If the result is invalid, we didn't find a foldable instruction.
13863 if (FRM == RISCVFPRndMode::Invalid)
13864 return SDValue();
13866 SDLoc DL(N);
13867 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
13868 EVT VT = N->getValueType(0);
13870 if (VT.isVector() && TLI.isTypeLegal(VT)) {
13871 MVT SrcVT = Src.getSimpleValueType();
13872 MVT SrcContainerVT = SrcVT;
13873 MVT ContainerVT = VT.getSimpleVT();
13874 SDValue XVal = Src.getOperand(0);
13876 // For widening and narrowing conversions we just combine it into a
13877 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
13878 // end up getting lowered to their appropriate pseudo instructions based on
13879 // their operand types
13880 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
13881 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
13882 return SDValue();
13884 // Make fixed-length vectors scalable first
13885 if (SrcVT.isFixedLengthVector()) {
13886 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
13887 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
13888 ContainerVT =
13889 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
13892 auto [Mask, VL] =
13893 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
13895 SDValue FpToInt;
13896 if (FRM == RISCVFPRndMode::RTZ) {
13897 // Use the dedicated trunc static rounding mode if we're truncating so we
13898 // don't need to generate calls to fsrmi/fsrm
13899 unsigned Opc =
13900 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
13901 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
13902 } else if (FRM == RISCVFPRndMode::DYN) {
13903 unsigned Opc =
13904 IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL;
13905 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
13906 } else {
13907 unsigned Opc =
13908 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
13909 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
13910 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
13913 // If converted from fixed-length to scalable, convert back
13914 if (VT.isFixedLengthVector())
13915 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
13917 return FpToInt;
13920 // Only handle XLen or i32 types. Other types narrower than XLen will
13921 // eventually be legalized to XLenVT.
13922 if (VT != MVT::i32 && VT != XLenVT)
13923 return SDValue();
13925 unsigned Opc;
13926 if (VT == XLenVT)
13927 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
13928 else
13929 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13931 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
13932 DAG.getTargetConstant(FRM, DL, XLenVT));
13933 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
13936 // Fold
13937 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
13938 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
13939 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
13940 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
13941 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
13942 // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
13943 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
13944 TargetLowering::DAGCombinerInfo &DCI,
13945 const RISCVSubtarget &Subtarget) {
13946 SelectionDAG &DAG = DCI.DAG;
13947 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13948 MVT XLenVT = Subtarget.getXLenVT();
13950 // Only handle XLen types. Other types narrower than XLen will eventually be
13951 // legalized to XLenVT.
13952 EVT DstVT = N->getValueType(0);
13953 if (DstVT != XLenVT)
13954 return SDValue();
13956 SDValue Src = N->getOperand(0);
13958 // Don't do this for strict-fp Src.
13959 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
13960 return SDValue();
13962 // Ensure the FP type is also legal.
13963 if (!TLI.isTypeLegal(Src.getValueType()))
13964 return SDValue();
13966 // Don't do this for f16 with Zfhmin and not Zfh.
13967 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
13968 return SDValue();
13970 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13972 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
13973 if (FRM == RISCVFPRndMode::Invalid)
13974 return SDValue();
13976 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
13978 unsigned Opc;
13979 if (SatVT == DstVT)
13980 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
13981 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
13982 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13983 else
13984 return SDValue();
13985 // FIXME: Support other SatVTs by clamping before or after the conversion.
13987 Src = Src.getOperand(0);
13989 SDLoc DL(N);
13990 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
13991 DAG.getTargetConstant(FRM, DL, XLenVT));
13993 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
13994 // extend.
13995 if (Opc == RISCVISD::FCVT_WU_RV64)
13996 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
13998 // RISC-V FP-to-int conversions saturate to the destination register size, but
13999 // don't produce 0 for nan.
14000 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
14001 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
14004 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14005 // smaller than XLenVT.
14006 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
14007 const RISCVSubtarget &Subtarget) {
14008 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14010 SDValue Src = N->getOperand(0);
14011 if (Src.getOpcode() != ISD::BSWAP)
14012 return SDValue();
14014 EVT VT = N->getValueType(0);
14015 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
14016 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
14017 return SDValue();
14019 SDLoc DL(N);
14020 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
14023 // Convert from one FMA opcode to another based on whether we are negating the
14024 // multiply result and/or the accumulator.
14025 // NOTE: Only supports RVV operations with VL.
14026 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14027 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14028 if (NegMul) {
14029 // clang-format off
14030 switch (Opcode) {
14031 default: llvm_unreachable("Unexpected opcode");
14032 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14033 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14034 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14035 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14036 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
14037 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
14038 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
14039 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
14041 // clang-format on
14044 // Negating the accumulator changes ADD<->SUB.
14045 if (NegAcc) {
14046 // clang-format off
14047 switch (Opcode) {
14048 default: llvm_unreachable("Unexpected opcode");
14049 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14050 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14051 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14052 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14053 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
14054 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
14055 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
14056 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
14058 // clang-format on
14061 return Opcode;
14064 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
14065 // Fold FNEG_VL into FMA opcodes.
14066 // The first operand of strict-fp is chain.
14067 unsigned Offset = N->isTargetStrictFPOpcode();
14068 SDValue A = N->getOperand(0 + Offset);
14069 SDValue B = N->getOperand(1 + Offset);
14070 SDValue C = N->getOperand(2 + Offset);
14071 SDValue Mask = N->getOperand(3 + Offset);
14072 SDValue VL = N->getOperand(4 + Offset);
14074 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
14075 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
14076 V.getOperand(2) == VL) {
14077 // Return the negated input.
14078 V = V.getOperand(0);
14079 return true;
14082 return false;
14085 bool NegA = invertIfNegative(A);
14086 bool NegB = invertIfNegative(B);
14087 bool NegC = invertIfNegative(C);
14089 // If no operands are negated, we're done.
14090 if (!NegA && !NegB && !NegC)
14091 return SDValue();
14093 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
14094 if (N->isTargetStrictFPOpcode())
14095 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
14096 {N->getOperand(0), A, B, C, Mask, VL});
14097 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
14098 VL);
14101 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,
14102 const RISCVSubtarget &Subtarget) {
14103 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
14104 return V;
14106 if (N->getValueType(0).isScalableVector() &&
14107 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14108 (Subtarget.hasVInstructionsF16Minimal() &&
14109 !Subtarget.hasVInstructionsF16())) {
14110 return SDValue();
14113 // FIXME: Ignore strict opcodes for now.
14114 if (N->isTargetStrictFPOpcode())
14115 return SDValue();
14117 // Try to form widening FMA.
14118 SDValue Op0 = N->getOperand(0);
14119 SDValue Op1 = N->getOperand(1);
14120 SDValue Mask = N->getOperand(3);
14121 SDValue VL = N->getOperand(4);
14123 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
14124 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
14125 return SDValue();
14127 // TODO: Refactor to handle more complex cases similar to
14128 // combineBinOp_VLToVWBinOp_VL.
14129 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
14130 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
14131 return SDValue();
14133 // Check the mask and VL are the same.
14134 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
14135 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
14136 return SDValue();
14138 unsigned NewOpc;
14139 switch (N->getOpcode()) {
14140 default:
14141 llvm_unreachable("Unexpected opcode");
14142 case RISCVISD::VFMADD_VL:
14143 NewOpc = RISCVISD::VFWMADD_VL;
14144 break;
14145 case RISCVISD::VFNMSUB_VL:
14146 NewOpc = RISCVISD::VFWNMSUB_VL;
14147 break;
14148 case RISCVISD::VFNMADD_VL:
14149 NewOpc = RISCVISD::VFWNMADD_VL;
14150 break;
14151 case RISCVISD::VFMSUB_VL:
14152 NewOpc = RISCVISD::VFWMSUB_VL;
14153 break;
14156 Op0 = Op0.getOperand(0);
14157 Op1 = Op1.getOperand(0);
14159 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
14160 N->getOperand(2), Mask, VL);
14163 static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG,
14164 const RISCVSubtarget &Subtarget) {
14165 if (N->getValueType(0).isScalableVector() &&
14166 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14167 (Subtarget.hasVInstructionsF16Minimal() &&
14168 !Subtarget.hasVInstructionsF16())) {
14169 return SDValue();
14172 // FIXME: Ignore strict opcodes for now.
14173 assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
14175 // Try to form widening multiply.
14176 SDValue Op0 = N->getOperand(0);
14177 SDValue Op1 = N->getOperand(1);
14178 SDValue Merge = N->getOperand(2);
14179 SDValue Mask = N->getOperand(3);
14180 SDValue VL = N->getOperand(4);
14182 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
14183 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
14184 return SDValue();
14186 // TODO: Refactor to handle more complex cases similar to
14187 // combineBinOp_VLToVWBinOp_VL.
14188 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
14189 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
14190 return SDValue();
14192 // Check the mask and VL are the same.
14193 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
14194 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
14195 return SDValue();
14197 Op0 = Op0.getOperand(0);
14198 Op1 = Op1.getOperand(0);
14200 return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0,
14201 Op1, Merge, Mask, VL);
14204 static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG,
14205 const RISCVSubtarget &Subtarget) {
14206 if (N->getValueType(0).isScalableVector() &&
14207 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14208 (Subtarget.hasVInstructionsF16Minimal() &&
14209 !Subtarget.hasVInstructionsF16())) {
14210 return SDValue();
14213 SDValue Op0 = N->getOperand(0);
14214 SDValue Op1 = N->getOperand(1);
14215 SDValue Merge = N->getOperand(2);
14216 SDValue Mask = N->getOperand(3);
14217 SDValue VL = N->getOperand(4);
14219 bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL;
14221 // Look for foldable FP_EXTENDS.
14222 bool Op0IsExtend =
14223 Op0.getOpcode() == RISCVISD::FP_EXTEND_VL &&
14224 (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0)));
14225 bool Op1IsExtend =
14226 (Op0 == Op1 && Op0IsExtend) ||
14227 (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse());
14229 // Check the mask and VL.
14230 if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL))
14231 Op0IsExtend = false;
14232 if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL))
14233 Op1IsExtend = false;
14235 // Canonicalize.
14236 if (!Op1IsExtend) {
14237 // Sub requires at least operand 1 to be an extend.
14238 if (!IsAdd)
14239 return SDValue();
14241 // Add is commutable, if the other operand is foldable, swap them.
14242 if (!Op0IsExtend)
14243 return SDValue();
14245 std::swap(Op0, Op1);
14246 std::swap(Op0IsExtend, Op1IsExtend);
14249 // Op1 is a foldable extend. Op0 might be foldable.
14250 Op1 = Op1.getOperand(0);
14251 if (Op0IsExtend)
14252 Op0 = Op0.getOperand(0);
14254 unsigned Opc;
14255 if (IsAdd)
14256 Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL;
14257 else
14258 Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL;
14260 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask,
14261 VL);
14264 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
14265 const RISCVSubtarget &Subtarget) {
14266 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
14268 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
14269 return SDValue();
14271 if (!isa<ConstantSDNode>(N->getOperand(1)))
14272 return SDValue();
14273 uint64_t ShAmt = N->getConstantOperandVal(1);
14274 if (ShAmt > 32)
14275 return SDValue();
14277 SDValue N0 = N->getOperand(0);
14279 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
14280 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
14281 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
14282 if (ShAmt < 32 &&
14283 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
14284 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
14285 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
14286 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
14287 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
14288 if (LShAmt < 32) {
14289 SDLoc ShlDL(N0.getOperand(0));
14290 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
14291 N0.getOperand(0).getOperand(0),
14292 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
14293 SDLoc DL(N);
14294 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
14295 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
14299 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
14300 // FIXME: Should this be a generic combine? There's a similar combine on X86.
14302 // Also try these folds where an add or sub is in the middle.
14303 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
14304 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
14305 SDValue Shl;
14306 ConstantSDNode *AddC = nullptr;
14308 // We might have an ADD or SUB between the SRA and SHL.
14309 bool IsAdd = N0.getOpcode() == ISD::ADD;
14310 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
14311 // Other operand needs to be a constant we can modify.
14312 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
14313 if (!AddC)
14314 return SDValue();
14316 // AddC needs to have at least 32 trailing zeros.
14317 if (AddC->getAPIntValue().countr_zero() < 32)
14318 return SDValue();
14320 // All users should be a shift by constant less than or equal to 32. This
14321 // ensures we'll do this optimization for each of them to produce an
14322 // add/sub+sext_inreg they can all share.
14323 for (SDNode *U : N0->uses()) {
14324 if (U->getOpcode() != ISD::SRA ||
14325 !isa<ConstantSDNode>(U->getOperand(1)) ||
14326 U->getConstantOperandVal(1) > 32)
14327 return SDValue();
14330 Shl = N0.getOperand(IsAdd ? 0 : 1);
14331 } else {
14332 // Not an ADD or SUB.
14333 Shl = N0;
14336 // Look for a shift left by 32.
14337 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
14338 Shl.getConstantOperandVal(1) != 32)
14339 return SDValue();
14341 // We if we didn't look through an add/sub, then the shl should have one use.
14342 // If we did look through an add/sub, the sext_inreg we create is free so
14343 // we're only creating 2 new instructions. It's enough to only remove the
14344 // original sra+add/sub.
14345 if (!AddC && !Shl.hasOneUse())
14346 return SDValue();
14348 SDLoc DL(N);
14349 SDValue In = Shl.getOperand(0);
14351 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
14352 // constant.
14353 if (AddC) {
14354 SDValue ShiftedAddC =
14355 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
14356 if (IsAdd)
14357 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
14358 else
14359 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
14362 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
14363 DAG.getValueType(MVT::i32));
14364 if (ShAmt == 32)
14365 return SExt;
14367 return DAG.getNode(
14368 ISD::SHL, DL, MVT::i64, SExt,
14369 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
14372 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
14373 // the result is used as the conditon of a br_cc or select_cc we can invert,
14374 // inverting the setcc is free, and Z is 0/1. Caller will invert the
14375 // br_cc/select_cc.
14376 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
14377 bool IsAnd = Cond.getOpcode() == ISD::AND;
14378 if (!IsAnd && Cond.getOpcode() != ISD::OR)
14379 return SDValue();
14381 if (!Cond.hasOneUse())
14382 return SDValue();
14384 SDValue Setcc = Cond.getOperand(0);
14385 SDValue Xor = Cond.getOperand(1);
14386 // Canonicalize setcc to LHS.
14387 if (Setcc.getOpcode() != ISD::SETCC)
14388 std::swap(Setcc, Xor);
14389 // LHS should be a setcc and RHS should be an xor.
14390 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
14391 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
14392 return SDValue();
14394 // If the condition is an And, SimplifyDemandedBits may have changed
14395 // (xor Z, 1) to (not Z).
14396 SDValue Xor1 = Xor.getOperand(1);
14397 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
14398 return SDValue();
14400 EVT VT = Cond.getValueType();
14401 SDValue Xor0 = Xor.getOperand(0);
14403 // The LHS of the xor needs to be 0/1.
14404 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
14405 if (!DAG.MaskedValueIsZero(Xor0, Mask))
14406 return SDValue();
14408 // We can only invert integer setccs.
14409 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
14410 if (!SetCCOpVT.isScalarInteger())
14411 return SDValue();
14413 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
14414 if (ISD::isIntEqualitySetCC(CCVal)) {
14415 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14416 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
14417 Setcc.getOperand(1), CCVal);
14418 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
14419 // Invert (setlt 0, X) by converting to (setlt X, 1).
14420 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
14421 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
14422 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
14423 // (setlt X, 1) by converting to (setlt 0, X).
14424 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
14425 DAG.getConstant(0, SDLoc(Setcc), VT),
14426 Setcc.getOperand(0), CCVal);
14427 } else
14428 return SDValue();
14430 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14431 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
14434 // Perform common combines for BR_CC and SELECT_CC condtions.
14435 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
14436 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
14437 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
14439 // As far as arithmetic right shift always saves the sign,
14440 // shift can be omitted.
14441 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
14442 // setge (sra X, N), 0 -> setge X, 0
14443 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
14444 LHS.getOpcode() == ISD::SRA) {
14445 LHS = LHS.getOperand(0);
14446 return true;
14449 if (!ISD::isIntEqualitySetCC(CCVal))
14450 return false;
14452 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
14453 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
14454 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
14455 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
14456 // If we're looking for eq 0 instead of ne 0, we need to invert the
14457 // condition.
14458 bool Invert = CCVal == ISD::SETEQ;
14459 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
14460 if (Invert)
14461 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
14463 RHS = LHS.getOperand(1);
14464 LHS = LHS.getOperand(0);
14465 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
14467 CC = DAG.getCondCode(CCVal);
14468 return true;
14471 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
14472 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
14473 RHS = LHS.getOperand(1);
14474 LHS = LHS.getOperand(0);
14475 return true;
14478 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
14479 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
14480 LHS.getOperand(1).getOpcode() == ISD::Constant) {
14481 SDValue LHS0 = LHS.getOperand(0);
14482 if (LHS0.getOpcode() == ISD::AND &&
14483 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
14484 uint64_t Mask = LHS0.getConstantOperandVal(1);
14485 uint64_t ShAmt = LHS.getConstantOperandVal(1);
14486 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
14487 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
14488 CC = DAG.getCondCode(CCVal);
14490 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
14491 LHS = LHS0.getOperand(0);
14492 if (ShAmt != 0)
14493 LHS =
14494 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
14495 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
14496 return true;
14501 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
14502 // This can occur when legalizing some floating point comparisons.
14503 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
14504 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
14505 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
14506 CC = DAG.getCondCode(CCVal);
14507 RHS = DAG.getConstant(0, DL, LHS.getValueType());
14508 return true;
14511 if (isNullConstant(RHS)) {
14512 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
14513 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
14514 CC = DAG.getCondCode(CCVal);
14515 LHS = NewCond;
14516 return true;
14520 return false;
14523 // Fold
14524 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
14525 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
14526 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
14527 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
14528 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
14529 SDValue TrueVal, SDValue FalseVal,
14530 bool Swapped) {
14531 bool Commutative = true;
14532 unsigned Opc = TrueVal.getOpcode();
14533 switch (Opc) {
14534 default:
14535 return SDValue();
14536 case ISD::SHL:
14537 case ISD::SRA:
14538 case ISD::SRL:
14539 case ISD::SUB:
14540 Commutative = false;
14541 break;
14542 case ISD::ADD:
14543 case ISD::OR:
14544 case ISD::XOR:
14545 break;
14548 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
14549 return SDValue();
14551 unsigned OpToFold;
14552 if (FalseVal == TrueVal.getOperand(0))
14553 OpToFold = 0;
14554 else if (Commutative && FalseVal == TrueVal.getOperand(1))
14555 OpToFold = 1;
14556 else
14557 return SDValue();
14559 EVT VT = N->getValueType(0);
14560 SDLoc DL(N);
14561 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
14562 EVT OtherOpVT = OtherOp->getValueType(0);
14563 SDValue IdentityOperand =
14564 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
14565 if (!Commutative)
14566 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
14567 assert(IdentityOperand && "No identity operand!");
14569 if (Swapped)
14570 std::swap(OtherOp, IdentityOperand);
14571 SDValue NewSel =
14572 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
14573 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
14576 // This tries to get rid of `select` and `icmp` that are being used to handle
14577 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
14578 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
14579 SDValue Cond = N->getOperand(0);
14581 // This represents either CTTZ or CTLZ instruction.
14582 SDValue CountZeroes;
14584 SDValue ValOnZero;
14586 if (Cond.getOpcode() != ISD::SETCC)
14587 return SDValue();
14589 if (!isNullConstant(Cond->getOperand(1)))
14590 return SDValue();
14592 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
14593 if (CCVal == ISD::CondCode::SETEQ) {
14594 CountZeroes = N->getOperand(2);
14595 ValOnZero = N->getOperand(1);
14596 } else if (CCVal == ISD::CondCode::SETNE) {
14597 CountZeroes = N->getOperand(1);
14598 ValOnZero = N->getOperand(2);
14599 } else {
14600 return SDValue();
14603 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
14604 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
14605 CountZeroes = CountZeroes.getOperand(0);
14607 if (CountZeroes.getOpcode() != ISD::CTTZ &&
14608 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
14609 CountZeroes.getOpcode() != ISD::CTLZ &&
14610 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
14611 return SDValue();
14613 if (!isNullConstant(ValOnZero))
14614 return SDValue();
14616 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
14617 if (Cond->getOperand(0) != CountZeroesArgument)
14618 return SDValue();
14620 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
14621 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
14622 CountZeroes.getValueType(), CountZeroesArgument);
14623 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
14624 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
14625 CountZeroes.getValueType(), CountZeroesArgument);
14628 unsigned BitWidth = CountZeroes.getValueSizeInBits();
14629 SDValue BitWidthMinusOne =
14630 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
14632 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
14633 CountZeroes, BitWidthMinusOne);
14634 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
14637 static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
14638 const RISCVSubtarget &Subtarget) {
14639 SDValue Cond = N->getOperand(0);
14640 SDValue True = N->getOperand(1);
14641 SDValue False = N->getOperand(2);
14642 SDLoc DL(N);
14643 EVT VT = N->getValueType(0);
14644 EVT CondVT = Cond.getValueType();
14646 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
14647 return SDValue();
14649 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
14650 // BEXTI, where C is power of 2.
14651 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
14652 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
14653 SDValue LHS = Cond.getOperand(0);
14654 SDValue RHS = Cond.getOperand(1);
14655 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14656 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
14657 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
14658 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
14659 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
14660 return DAG.getSelect(DL, VT,
14661 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
14662 False, True);
14665 return SDValue();
14668 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
14669 const RISCVSubtarget &Subtarget) {
14670 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
14671 return Folded;
14673 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
14674 return V;
14676 if (Subtarget.hasConditionalMoveFusion())
14677 return SDValue();
14679 SDValue TrueVal = N->getOperand(1);
14680 SDValue FalseVal = N->getOperand(2);
14681 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
14682 return V;
14683 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
14686 /// If we have a build_vector where each lane is binop X, C, where C
14687 /// is a constant (but not necessarily the same constant on all lanes),
14688 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
14689 /// We assume that materializing a constant build vector will be no more
14690 /// expensive that performing O(n) binops.
14691 static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
14692 const RISCVSubtarget &Subtarget,
14693 const RISCVTargetLowering &TLI) {
14694 SDLoc DL(N);
14695 EVT VT = N->getValueType(0);
14697 assert(!VT.isScalableVector() && "unexpected build vector");
14699 if (VT.getVectorNumElements() == 1)
14700 return SDValue();
14702 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
14703 if (!TLI.isBinOp(Opcode))
14704 return SDValue();
14706 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
14707 return SDValue();
14709 SmallVector<SDValue> LHSOps;
14710 SmallVector<SDValue> RHSOps;
14711 for (SDValue Op : N->ops()) {
14712 if (Op.isUndef()) {
14713 // We can't form a divide or remainder from undef.
14714 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
14715 return SDValue();
14717 LHSOps.push_back(Op);
14718 RHSOps.push_back(Op);
14719 continue;
14722 // TODO: We can handle operations which have an neutral rhs value
14723 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
14724 // of profit in a more explicit manner.
14725 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
14726 return SDValue();
14728 LHSOps.push_back(Op.getOperand(0));
14729 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
14730 !isa<ConstantFPSDNode>(Op.getOperand(1)))
14731 return SDValue();
14732 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
14733 // have different LHS and RHS types.
14734 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
14735 return SDValue();
14736 RHSOps.push_back(Op.getOperand(1));
14739 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
14740 DAG.getBuildVector(VT, DL, RHSOps));
14743 static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
14744 const RISCVSubtarget &Subtarget,
14745 const RISCVTargetLowering &TLI) {
14746 SDValue InVec = N->getOperand(0);
14747 SDValue InVal = N->getOperand(1);
14748 SDValue EltNo = N->getOperand(2);
14749 SDLoc DL(N);
14751 EVT VT = InVec.getValueType();
14752 if (VT.isScalableVector())
14753 return SDValue();
14755 if (!InVec.hasOneUse())
14756 return SDValue();
14758 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
14759 // move the insert_vector_elts into the arms of the binop. Note that
14760 // the new RHS must be a constant.
14761 const unsigned InVecOpcode = InVec->getOpcode();
14762 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
14763 InVal.hasOneUse()) {
14764 SDValue InVecLHS = InVec->getOperand(0);
14765 SDValue InVecRHS = InVec->getOperand(1);
14766 SDValue InValLHS = InVal->getOperand(0);
14767 SDValue InValRHS = InVal->getOperand(1);
14769 if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))
14770 return SDValue();
14771 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
14772 return SDValue();
14773 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
14774 // have different LHS and RHS types.
14775 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
14776 return SDValue();
14777 SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
14778 InVecLHS, InValLHS, EltNo);
14779 SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
14780 InVecRHS, InValRHS, EltNo);
14781 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
14784 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
14785 // move the insert_vector_elt to the source operand of the concat_vector.
14786 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
14787 return SDValue();
14789 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
14790 if (!IndexC)
14791 return SDValue();
14792 unsigned Elt = IndexC->getZExtValue();
14794 EVT ConcatVT = InVec.getOperand(0).getValueType();
14795 if (ConcatVT.getVectorElementType() != InVal.getValueType())
14796 return SDValue();
14797 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
14798 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
14799 EltNo.getValueType());
14801 unsigned ConcatOpIdx = Elt / ConcatNumElts;
14802 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
14803 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
14804 ConcatOp, InVal, NewIdx);
14806 SmallVector<SDValue> ConcatOps;
14807 ConcatOps.append(InVec->op_begin(), InVec->op_end());
14808 ConcatOps[ConcatOpIdx] = ConcatOp;
14809 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14812 // If we're concatenating a series of vector loads like
14813 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
14814 // Then we can turn this into a strided load by widening the vector elements
14815 // vlse32 p, stride=n
14816 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
14817 const RISCVSubtarget &Subtarget,
14818 const RISCVTargetLowering &TLI) {
14819 SDLoc DL(N);
14820 EVT VT = N->getValueType(0);
14822 // Only perform this combine on legal MVTs.
14823 if (!TLI.isTypeLegal(VT))
14824 return SDValue();
14826 // TODO: Potentially extend this to scalable vectors
14827 if (VT.isScalableVector())
14828 return SDValue();
14830 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
14831 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
14832 !SDValue(BaseLd, 0).hasOneUse())
14833 return SDValue();
14835 EVT BaseLdVT = BaseLd->getValueType(0);
14837 // Go through the loads and check that they're strided
14838 SmallVector<LoadSDNode *> Lds;
14839 Lds.push_back(BaseLd);
14840 Align Align = BaseLd->getAlign();
14841 for (SDValue Op : N->ops().drop_front()) {
14842 auto *Ld = dyn_cast<LoadSDNode>(Op);
14843 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
14844 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
14845 Ld->getValueType(0) != BaseLdVT)
14846 return SDValue();
14848 Lds.push_back(Ld);
14850 // The common alignment is the most restrictive (smallest) of all the loads
14851 Align = std::min(Align, Ld->getAlign());
14854 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
14855 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
14856 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
14857 // If the load ptrs can be decomposed into a common (Base + Index) with a
14858 // common constant stride, then return the constant stride.
14859 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
14860 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
14861 if (BIO1.equalBaseIndex(BIO2, DAG))
14862 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
14864 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
14865 SDValue P1 = Ld1->getBasePtr();
14866 SDValue P2 = Ld2->getBasePtr();
14867 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
14868 return {{P2.getOperand(1), false}};
14869 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
14870 return {{P1.getOperand(1), true}};
14872 return std::nullopt;
14875 // Get the distance between the first and second loads
14876 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
14877 if (!BaseDiff)
14878 return SDValue();
14880 // Check all the loads are the same distance apart
14881 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
14882 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
14883 return SDValue();
14885 // TODO: At this point, we've successfully matched a generalized gather
14886 // load. Maybe we should emit that, and then move the specialized
14887 // matchers above and below into a DAG combine?
14889 // Get the widened scalar type, e.g. v4i8 -> i64
14890 unsigned WideScalarBitWidth =
14891 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
14892 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
14894 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
14895 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
14896 if (!TLI.isTypeLegal(WideVecVT))
14897 return SDValue();
14899 // Check that the operation is legal
14900 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
14901 return SDValue();
14903 auto [StrideVariant, MustNegateStride] = *BaseDiff;
14904 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
14905 ? std::get<SDValue>(StrideVariant)
14906 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
14907 Lds[0]->getOffset().getValueType());
14908 if (MustNegateStride)
14909 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
14911 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
14912 SDValue IntID =
14913 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
14914 Subtarget.getXLenVT());
14916 SDValue AllOneMask =
14917 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
14918 DAG.getConstant(1, DL, MVT::i1));
14920 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
14921 BaseLd->getBasePtr(), Stride, AllOneMask};
14923 uint64_t MemSize;
14924 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
14925 ConstStride && ConstStride->getSExtValue() >= 0)
14926 // total size = (elsize * n) + (stride - elsize) * (n-1)
14927 // = elsize + stride * (n-1)
14928 MemSize = WideScalarVT.getSizeInBits() +
14929 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
14930 else
14931 // If Stride isn't constant, then we can't know how much it will load
14932 MemSize = MemoryLocation::UnknownSize;
14934 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
14935 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
14936 Align);
14938 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
14939 Ops, WideVecVT, MMO);
14940 for (SDValue Ld : N->ops())
14941 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
14943 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
14946 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
14947 const RISCVSubtarget &Subtarget) {
14949 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
14951 if (N->getValueType(0).isFixedLengthVector())
14952 return SDValue();
14954 SDValue Addend = N->getOperand(0);
14955 SDValue MulOp = N->getOperand(1);
14957 if (N->getOpcode() == RISCVISD::ADD_VL) {
14958 SDValue AddMergeOp = N->getOperand(2);
14959 if (!AddMergeOp.isUndef())
14960 return SDValue();
14963 auto IsVWMulOpc = [](unsigned Opc) {
14964 switch (Opc) {
14965 case RISCVISD::VWMUL_VL:
14966 case RISCVISD::VWMULU_VL:
14967 case RISCVISD::VWMULSU_VL:
14968 return true;
14969 default:
14970 return false;
14974 if (!IsVWMulOpc(MulOp.getOpcode()))
14975 std::swap(Addend, MulOp);
14977 if (!IsVWMulOpc(MulOp.getOpcode()))
14978 return SDValue();
14980 SDValue MulMergeOp = MulOp.getOperand(2);
14982 if (!MulMergeOp.isUndef())
14983 return SDValue();
14985 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
14986 const RISCVSubtarget &Subtarget) {
14987 if (N->getOpcode() == ISD::ADD) {
14988 SDLoc DL(N);
14989 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
14990 Subtarget);
14992 return std::make_pair(N->getOperand(3), N->getOperand(4));
14993 }(N, DAG, Subtarget);
14995 SDValue MulMask = MulOp.getOperand(3);
14996 SDValue MulVL = MulOp.getOperand(4);
14998 if (AddMask != MulMask || AddVL != MulVL)
14999 return SDValue();
15001 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15002 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15003 "Unexpected opcode after VWMACC_VL");
15004 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15005 "Unexpected opcode after VWMACC_VL!");
15006 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15007 "Unexpected opcode after VWMUL_VL!");
15008 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15009 "Unexpected opcode after VWMUL_VL!");
15011 SDLoc DL(N);
15012 EVT VT = N->getValueType(0);
15013 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15014 AddVL};
15015 return DAG.getNode(Opc, DL, VT, Ops);
15018 static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
15019 ISD::MemIndexType &IndexType,
15020 RISCVTargetLowering::DAGCombinerInfo &DCI) {
15021 if (!DCI.isBeforeLegalize())
15022 return false;
15024 SelectionDAG &DAG = DCI.DAG;
15025 const MVT XLenVT =
15026 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15028 const EVT IndexVT = Index.getValueType();
15030 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15031 // mode, so anything else must be manually legalized.
15032 if (!isIndexTypeSigned(IndexType))
15033 return false;
15035 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15036 // Any index legalization should first promote to XLenVT, so we don't lose
15037 // bits when scaling. This may create an illegal index type so we let
15038 // LLVM's legalization take care of the splitting.
15039 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15040 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
15041 IndexVT.changeVectorElementType(XLenVT), Index);
15043 IndexType = ISD::UNSIGNED_SCALED;
15044 return true;
15047 /// Match the index vector of a scatter or gather node as the shuffle mask
15048 /// which performs the rearrangement if possible. Will only match if
15049 /// all lanes are touched, and thus replacing the scatter or gather with
15050 /// a unit strided access and shuffle is legal.
15051 static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
15052 SmallVector<int> &ShuffleMask) {
15053 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15054 return false;
15055 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
15056 return false;
15058 const unsigned ElementSize = VT.getScalarStoreSize();
15059 const unsigned NumElems = VT.getVectorNumElements();
15061 // Create the shuffle mask and check all bits active
15062 assert(ShuffleMask.empty());
15063 BitVector ActiveLanes(NumElems);
15064 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15065 // TODO: We've found an active bit of UB, and could be
15066 // more aggressive here if desired.
15067 if (Index->getOperand(i)->isUndef())
15068 return false;
15069 uint64_t C = Index->getConstantOperandVal(i);
15070 if (C % ElementSize != 0)
15071 return false;
15072 C = C / ElementSize;
15073 if (C >= NumElems)
15074 return false;
15075 ShuffleMask.push_back(C);
15076 ActiveLanes.set(C);
15078 return ActiveLanes.all();
15081 /// Match the index of a gather or scatter operation as an operation
15082 /// with twice the element width and half the number of elements. This is
15083 /// generally profitable (if legal) because these operations are linear
15084 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15085 /// come out ahead.
15086 static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
15087 Align BaseAlign, const RISCVSubtarget &ST) {
15088 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15089 return false;
15090 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
15091 return false;
15093 // Attempt a doubling. If we can use a element type 4x or 8x in
15094 // size, this will happen via multiply iterations of the transform.
15095 const unsigned NumElems = VT.getVectorNumElements();
15096 if (NumElems % 2 != 0)
15097 return false;
15099 const unsigned ElementSize = VT.getScalarStoreSize();
15100 const unsigned WiderElementSize = ElementSize * 2;
15101 if (WiderElementSize > ST.getELen()/8)
15102 return false;
15104 if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize)
15105 return false;
15107 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15108 // TODO: We've found an active bit of UB, and could be
15109 // more aggressive here if desired.
15110 if (Index->getOperand(i)->isUndef())
15111 return false;
15112 // TODO: This offset check is too strict if we support fully
15113 // misaligned memory operations.
15114 uint64_t C = Index->getConstantOperandVal(i);
15115 if (i % 2 == 0) {
15116 if (C % WiderElementSize != 0)
15117 return false;
15118 continue;
15120 uint64_t Last = Index->getConstantOperandVal(i-1);
15121 if (C != Last + ElementSize)
15122 return false;
15124 return true;
15128 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
15129 DAGCombinerInfo &DCI) const {
15130 SelectionDAG &DAG = DCI.DAG;
15131 const MVT XLenVT = Subtarget.getXLenVT();
15132 SDLoc DL(N);
15134 // Helper to call SimplifyDemandedBits on an operand of N where only some low
15135 // bits are demanded. N will be added to the Worklist if it was not deleted.
15136 // Caller should return SDValue(N, 0) if this returns true.
15137 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
15138 SDValue Op = N->getOperand(OpNo);
15139 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
15140 if (!SimplifyDemandedBits(Op, Mask, DCI))
15141 return false;
15143 if (N->getOpcode() != ISD::DELETED_NODE)
15144 DCI.AddToWorklist(N);
15145 return true;
15148 switch (N->getOpcode()) {
15149 default:
15150 break;
15151 case RISCVISD::SplitF64: {
15152 SDValue Op0 = N->getOperand(0);
15153 // If the input to SplitF64 is just BuildPairF64 then the operation is
15154 // redundant. Instead, use BuildPairF64's operands directly.
15155 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
15156 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
15158 if (Op0->isUndef()) {
15159 SDValue Lo = DAG.getUNDEF(MVT::i32);
15160 SDValue Hi = DAG.getUNDEF(MVT::i32);
15161 return DCI.CombineTo(N, Lo, Hi);
15164 // It's cheaper to materialise two 32-bit integers than to load a double
15165 // from the constant pool and transfer it to integer registers through the
15166 // stack.
15167 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
15168 APInt V = C->getValueAPF().bitcastToAPInt();
15169 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
15170 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
15171 return DCI.CombineTo(N, Lo, Hi);
15174 // This is a target-specific version of a DAGCombine performed in
15175 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15176 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15177 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15178 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15179 !Op0.getNode()->hasOneUse())
15180 break;
15181 SDValue NewSplitF64 =
15182 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
15183 Op0.getOperand(0));
15184 SDValue Lo = NewSplitF64.getValue(0);
15185 SDValue Hi = NewSplitF64.getValue(1);
15186 APInt SignBit = APInt::getSignMask(32);
15187 if (Op0.getOpcode() == ISD::FNEG) {
15188 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
15189 DAG.getConstant(SignBit, DL, MVT::i32));
15190 return DCI.CombineTo(N, Lo, NewHi);
15192 assert(Op0.getOpcode() == ISD::FABS);
15193 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
15194 DAG.getConstant(~SignBit, DL, MVT::i32));
15195 return DCI.CombineTo(N, Lo, NewHi);
15197 case RISCVISD::SLLW:
15198 case RISCVISD::SRAW:
15199 case RISCVISD::SRLW:
15200 case RISCVISD::RORW:
15201 case RISCVISD::ROLW: {
15202 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15203 if (SimplifyDemandedLowBitsHelper(0, 32) ||
15204 SimplifyDemandedLowBitsHelper(1, 5))
15205 return SDValue(N, 0);
15207 break;
15209 case RISCVISD::CLZW:
15210 case RISCVISD::CTZW: {
15211 // Only the lower 32 bits of the first operand are read
15212 if (SimplifyDemandedLowBitsHelper(0, 32))
15213 return SDValue(N, 0);
15214 break;
15216 case RISCVISD::FMV_W_X_RV64: {
15217 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15218 // conversion is unnecessary and can be replaced with the
15219 // FMV_X_ANYEXTW_RV64 operand.
15220 SDValue Op0 = N->getOperand(0);
15221 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
15222 return Op0.getOperand(0);
15223 break;
15225 case RISCVISD::FMV_X_ANYEXTH:
15226 case RISCVISD::FMV_X_ANYEXTW_RV64: {
15227 SDLoc DL(N);
15228 SDValue Op0 = N->getOperand(0);
15229 MVT VT = N->getSimpleValueType(0);
15230 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
15231 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
15232 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
15233 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
15234 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
15235 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
15236 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
15237 assert(Op0.getOperand(0).getValueType() == VT &&
15238 "Unexpected value type!");
15239 return Op0.getOperand(0);
15242 // This is a target-specific version of a DAGCombine performed in
15243 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15244 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15245 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15246 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15247 !Op0.getNode()->hasOneUse())
15248 break;
15249 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
15250 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
15251 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
15252 if (Op0.getOpcode() == ISD::FNEG)
15253 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
15254 DAG.getConstant(SignBit, DL, VT));
15256 assert(Op0.getOpcode() == ISD::FABS);
15257 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
15258 DAG.getConstant(~SignBit, DL, VT));
15260 case ISD::ADD: {
15261 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15262 return V;
15263 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
15264 return V;
15265 return performADDCombine(N, DAG, Subtarget);
15267 case ISD::SUB: {
15268 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15269 return V;
15270 return performSUBCombine(N, DAG, Subtarget);
15272 case ISD::AND:
15273 return performANDCombine(N, DCI, Subtarget);
15274 case ISD::OR:
15275 return performORCombine(N, DCI, Subtarget);
15276 case ISD::XOR:
15277 return performXORCombine(N, DAG, Subtarget);
15278 case ISD::MUL:
15279 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15280 return V;
15281 return performMULCombine(N, DAG);
15282 case ISD::FADD:
15283 case ISD::UMAX:
15284 case ISD::UMIN:
15285 case ISD::SMAX:
15286 case ISD::SMIN:
15287 case ISD::FMAXNUM:
15288 case ISD::FMINNUM: {
15289 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15290 return V;
15291 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15292 return V;
15293 return SDValue();
15295 case ISD::SETCC:
15296 return performSETCCCombine(N, DAG, Subtarget);
15297 case ISD::SIGN_EXTEND_INREG:
15298 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
15299 case ISD::ZERO_EXTEND:
15300 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
15301 // type legalization. This is safe because fp_to_uint produces poison if
15302 // it overflows.
15303 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
15304 SDValue Src = N->getOperand(0);
15305 if (Src.getOpcode() == ISD::FP_TO_UINT &&
15306 isTypeLegal(Src.getOperand(0).getValueType()))
15307 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
15308 Src.getOperand(0));
15309 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
15310 isTypeLegal(Src.getOperand(1).getValueType())) {
15311 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
15312 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
15313 Src.getOperand(0), Src.getOperand(1));
15314 DCI.CombineTo(N, Res);
15315 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
15316 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
15317 return SDValue(N, 0); // Return N so it doesn't get rechecked.
15320 return SDValue();
15321 case RISCVISD::TRUNCATE_VECTOR_VL: {
15322 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
15323 // This would be benefit for the cases where X and Y are both the same value
15324 // type of low precision vectors. Since the truncate would be lowered into
15325 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
15326 // restriction, such pattern would be expanded into a series of "vsetvli"
15327 // and "vnsrl" instructions later to reach this point.
15328 auto IsTruncNode = [](SDValue V) {
15329 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
15330 return false;
15331 SDValue VL = V.getOperand(2);
15332 auto *C = dyn_cast<ConstantSDNode>(VL);
15333 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
15334 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
15335 (isa<RegisterSDNode>(VL) &&
15336 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
15337 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
15338 IsVLMAXForVMSET;
15341 SDValue Op = N->getOperand(0);
15343 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
15344 // to distinguish such pattern.
15345 while (IsTruncNode(Op)) {
15346 if (!Op.hasOneUse())
15347 return SDValue();
15348 Op = Op.getOperand(0);
15351 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
15352 SDValue N0 = Op.getOperand(0);
15353 SDValue N1 = Op.getOperand(1);
15354 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
15355 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
15356 SDValue N00 = N0.getOperand(0);
15357 SDValue N10 = N1.getOperand(0);
15358 if (N00.getValueType().isVector() &&
15359 N00.getValueType() == N10.getValueType() &&
15360 N->getValueType(0) == N10.getValueType()) {
15361 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
15362 SDValue SMin = DAG.getNode(
15363 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
15364 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
15365 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
15369 break;
15371 case ISD::TRUNCATE:
15372 return performTRUNCATECombine(N, DAG, Subtarget);
15373 case ISD::SELECT:
15374 return performSELECTCombine(N, DAG, Subtarget);
15375 case RISCVISD::CZERO_EQZ:
15376 case RISCVISD::CZERO_NEZ:
15377 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
15378 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
15379 if (N->getOperand(1).getOpcode() == ISD::XOR &&
15380 isOneConstant(N->getOperand(1).getOperand(1))) {
15381 SDValue Cond = N->getOperand(1).getOperand(0);
15382 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
15383 if (DAG.MaskedValueIsZero(Cond, Mask)) {
15384 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
15385 ? RISCVISD::CZERO_NEZ
15386 : RISCVISD::CZERO_EQZ;
15387 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
15388 N->getOperand(0), Cond);
15391 return SDValue();
15393 case RISCVISD::SELECT_CC: {
15394 // Transform
15395 SDValue LHS = N->getOperand(0);
15396 SDValue RHS = N->getOperand(1);
15397 SDValue CC = N->getOperand(2);
15398 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15399 SDValue TrueV = N->getOperand(3);
15400 SDValue FalseV = N->getOperand(4);
15401 SDLoc DL(N);
15402 EVT VT = N->getValueType(0);
15404 // If the True and False values are the same, we don't need a select_cc.
15405 if (TrueV == FalseV)
15406 return TrueV;
15408 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
15409 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
15410 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
15411 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
15412 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
15413 if (CCVal == ISD::CondCode::SETGE)
15414 std::swap(TrueV, FalseV);
15416 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
15417 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
15418 // Only handle simm12, if it is not in this range, it can be considered as
15419 // register.
15420 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
15421 isInt<12>(TrueSImm - FalseSImm)) {
15422 SDValue SRA =
15423 DAG.getNode(ISD::SRA, DL, VT, LHS,
15424 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
15425 SDValue AND =
15426 DAG.getNode(ISD::AND, DL, VT, SRA,
15427 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
15428 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
15431 if (CCVal == ISD::CondCode::SETGE)
15432 std::swap(TrueV, FalseV);
15435 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
15436 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
15437 {LHS, RHS, CC, TrueV, FalseV});
15439 if (!Subtarget.hasConditionalMoveFusion()) {
15440 // (select c, -1, y) -> -c | y
15441 if (isAllOnesConstant(TrueV)) {
15442 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
15443 SDValue Neg = DAG.getNegative(C, DL, VT);
15444 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
15446 // (select c, y, -1) -> -!c | y
15447 if (isAllOnesConstant(FalseV)) {
15448 SDValue C =
15449 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
15450 SDValue Neg = DAG.getNegative(C, DL, VT);
15451 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
15454 // (select c, 0, y) -> -!c & y
15455 if (isNullConstant(TrueV)) {
15456 SDValue C =
15457 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
15458 SDValue Neg = DAG.getNegative(C, DL, VT);
15459 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
15461 // (select c, y, 0) -> -c & y
15462 if (isNullConstant(FalseV)) {
15463 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
15464 SDValue Neg = DAG.getNegative(C, DL, VT);
15465 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
15467 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
15468 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
15469 if (((isOneConstant(FalseV) && LHS == TrueV &&
15470 CCVal == ISD::CondCode::SETNE) ||
15471 (isOneConstant(TrueV) && LHS == FalseV &&
15472 CCVal == ISD::CondCode::SETEQ)) &&
15473 isNullConstant(RHS)) {
15474 // freeze it to be safe.
15475 LHS = DAG.getFreeze(LHS);
15476 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
15477 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
15481 // If both true/false are an xor with 1, pull through the select.
15482 // This can occur after op legalization if both operands are setccs that
15483 // require an xor to invert.
15484 // FIXME: Generalize to other binary ops with identical operand?
15485 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
15486 TrueV.getOperand(1) == FalseV.getOperand(1) &&
15487 isOneConstant(TrueV.getOperand(1)) &&
15488 TrueV.hasOneUse() && FalseV.hasOneUse()) {
15489 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
15490 TrueV.getOperand(0), FalseV.getOperand(0));
15491 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
15494 return SDValue();
15496 case RISCVISD::BR_CC: {
15497 SDValue LHS = N->getOperand(1);
15498 SDValue RHS = N->getOperand(2);
15499 SDValue CC = N->getOperand(3);
15500 SDLoc DL(N);
15502 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
15503 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
15504 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
15506 return SDValue();
15508 case ISD::BITREVERSE:
15509 return performBITREVERSECombine(N, DAG, Subtarget);
15510 case ISD::FP_TO_SINT:
15511 case ISD::FP_TO_UINT:
15512 return performFP_TO_INTCombine(N, DCI, Subtarget);
15513 case ISD::FP_TO_SINT_SAT:
15514 case ISD::FP_TO_UINT_SAT:
15515 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
15516 case ISD::FCOPYSIGN: {
15517 EVT VT = N->getValueType(0);
15518 if (!VT.isVector())
15519 break;
15520 // There is a form of VFSGNJ which injects the negated sign of its second
15521 // operand. Try and bubble any FNEG up after the extend/round to produce
15522 // this optimized pattern. Avoid modifying cases where FP_ROUND and
15523 // TRUNC=1.
15524 SDValue In2 = N->getOperand(1);
15525 // Avoid cases where the extend/round has multiple uses, as duplicating
15526 // those is typically more expensive than removing a fneg.
15527 if (!In2.hasOneUse())
15528 break;
15529 if (In2.getOpcode() != ISD::FP_EXTEND &&
15530 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
15531 break;
15532 In2 = In2.getOperand(0);
15533 if (In2.getOpcode() != ISD::FNEG)
15534 break;
15535 SDLoc DL(N);
15536 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
15537 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
15538 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
15540 case ISD::MGATHER: {
15541 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
15542 const EVT VT = N->getValueType(0);
15543 SDValue Index = MGN->getIndex();
15544 SDValue ScaleOp = MGN->getScale();
15545 ISD::MemIndexType IndexType = MGN->getIndexType();
15546 assert(!MGN->isIndexScaled() &&
15547 "Scaled gather/scatter should not be formed");
15549 SDLoc DL(N);
15550 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
15551 return DAG.getMaskedGather(
15552 N->getVTList(), MGN->getMemoryVT(), DL,
15553 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
15554 MGN->getBasePtr(), Index, ScaleOp},
15555 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
15557 if (narrowIndex(Index, IndexType, DAG))
15558 return DAG.getMaskedGather(
15559 N->getVTList(), MGN->getMemoryVT(), DL,
15560 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
15561 MGN->getBasePtr(), Index, ScaleOp},
15562 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
15564 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
15565 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
15566 // The sequence will be XLenVT, not the type of Index. Tell
15567 // isSimpleVIDSequence this so we avoid overflow.
15568 if (std::optional<VIDSequence> SimpleVID =
15569 isSimpleVIDSequence(Index, Subtarget.getXLen());
15570 SimpleVID && SimpleVID->StepDenominator == 1) {
15571 const int64_t StepNumerator = SimpleVID->StepNumerator;
15572 const int64_t Addend = SimpleVID->Addend;
15574 // Note: We don't need to check alignment here since (by assumption
15575 // from the existance of the gather), our offsets must be sufficiently
15576 // aligned.
15578 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
15579 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
15580 assert(IndexType == ISD::UNSIGNED_SCALED);
15581 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
15582 DAG.getConstant(Addend, DL, PtrVT));
15584 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
15585 SDValue IntID =
15586 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15587 XLenVT);
15588 SDValue Ops[] =
15589 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
15590 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
15591 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15592 Ops, VT, MGN->getMemOperand());
15596 SmallVector<int> ShuffleMask;
15597 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
15598 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
15599 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
15600 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
15601 MGN->getMask(), DAG.getUNDEF(VT),
15602 MGN->getMemoryVT(), MGN->getMemOperand(),
15603 ISD::UNINDEXED, ISD::NON_EXTLOAD);
15604 SDValue Shuffle =
15605 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
15606 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
15609 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
15610 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
15611 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
15612 SmallVector<SDValue> NewIndices;
15613 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
15614 NewIndices.push_back(Index.getOperand(i));
15615 EVT IndexVT = Index.getValueType()
15616 .getHalfNumVectorElementsVT(*DAG.getContext());
15617 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
15619 unsigned ElementSize = VT.getScalarStoreSize();
15620 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
15621 auto EltCnt = VT.getVectorElementCount();
15622 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
15623 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
15624 EltCnt.divideCoefficientBy(2));
15625 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
15626 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
15627 EltCnt.divideCoefficientBy(2));
15628 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
15630 SDValue Gather =
15631 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
15632 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
15633 Index, ScaleOp},
15634 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
15635 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
15636 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
15638 break;
15640 case ISD::MSCATTER:{
15641 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
15642 SDValue Index = MSN->getIndex();
15643 SDValue ScaleOp = MSN->getScale();
15644 ISD::MemIndexType IndexType = MSN->getIndexType();
15645 assert(!MSN->isIndexScaled() &&
15646 "Scaled gather/scatter should not be formed");
15648 SDLoc DL(N);
15649 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
15650 return DAG.getMaskedScatter(
15651 N->getVTList(), MSN->getMemoryVT(), DL,
15652 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
15653 Index, ScaleOp},
15654 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
15656 if (narrowIndex(Index, IndexType, DAG))
15657 return DAG.getMaskedScatter(
15658 N->getVTList(), MSN->getMemoryVT(), DL,
15659 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
15660 Index, ScaleOp},
15661 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
15663 EVT VT = MSN->getValue()->getValueType(0);
15664 SmallVector<int> ShuffleMask;
15665 if (!MSN->isTruncatingStore() &&
15666 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
15667 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
15668 DAG.getUNDEF(VT), ShuffleMask);
15669 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
15670 DAG.getUNDEF(XLenVT), MSN->getMask(),
15671 MSN->getMemoryVT(), MSN->getMemOperand(),
15672 ISD::UNINDEXED, false);
15674 break;
15676 case ISD::VP_GATHER: {
15677 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
15678 SDValue Index = VPGN->getIndex();
15679 SDValue ScaleOp = VPGN->getScale();
15680 ISD::MemIndexType IndexType = VPGN->getIndexType();
15681 assert(!VPGN->isIndexScaled() &&
15682 "Scaled gather/scatter should not be formed");
15684 SDLoc DL(N);
15685 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
15686 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
15687 {VPGN->getChain(), VPGN->getBasePtr(), Index,
15688 ScaleOp, VPGN->getMask(),
15689 VPGN->getVectorLength()},
15690 VPGN->getMemOperand(), IndexType);
15692 if (narrowIndex(Index, IndexType, DAG))
15693 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
15694 {VPGN->getChain(), VPGN->getBasePtr(), Index,
15695 ScaleOp, VPGN->getMask(),
15696 VPGN->getVectorLength()},
15697 VPGN->getMemOperand(), IndexType);
15699 break;
15701 case ISD::VP_SCATTER: {
15702 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
15703 SDValue Index = VPSN->getIndex();
15704 SDValue ScaleOp = VPSN->getScale();
15705 ISD::MemIndexType IndexType = VPSN->getIndexType();
15706 assert(!VPSN->isIndexScaled() &&
15707 "Scaled gather/scatter should not be formed");
15709 SDLoc DL(N);
15710 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
15711 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
15712 {VPSN->getChain(), VPSN->getValue(),
15713 VPSN->getBasePtr(), Index, ScaleOp,
15714 VPSN->getMask(), VPSN->getVectorLength()},
15715 VPSN->getMemOperand(), IndexType);
15717 if (narrowIndex(Index, IndexType, DAG))
15718 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
15719 {VPSN->getChain(), VPSN->getValue(),
15720 VPSN->getBasePtr(), Index, ScaleOp,
15721 VPSN->getMask(), VPSN->getVectorLength()},
15722 VPSN->getMemOperand(), IndexType);
15723 break;
15725 case RISCVISD::SRA_VL:
15726 case RISCVISD::SRL_VL:
15727 case RISCVISD::SHL_VL: {
15728 SDValue ShAmt = N->getOperand(1);
15729 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
15730 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
15731 SDLoc DL(N);
15732 SDValue VL = N->getOperand(4);
15733 EVT VT = N->getValueType(0);
15734 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
15735 ShAmt.getOperand(1), VL);
15736 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
15737 N->getOperand(2), N->getOperand(3), N->getOperand(4));
15739 break;
15741 case ISD::SRA:
15742 if (SDValue V = performSRACombine(N, DAG, Subtarget))
15743 return V;
15744 [[fallthrough]];
15745 case ISD::SRL:
15746 case ISD::SHL: {
15747 SDValue ShAmt = N->getOperand(1);
15748 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
15749 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
15750 SDLoc DL(N);
15751 EVT VT = N->getValueType(0);
15752 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
15753 ShAmt.getOperand(1),
15754 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
15755 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
15757 break;
15759 case RISCVISD::ADD_VL:
15760 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15761 return V;
15762 return combineToVWMACC(N, DAG, Subtarget);
15763 case RISCVISD::SUB_VL:
15764 case RISCVISD::VWADD_W_VL:
15765 case RISCVISD::VWADDU_W_VL:
15766 case RISCVISD::VWSUB_W_VL:
15767 case RISCVISD::VWSUBU_W_VL:
15768 case RISCVISD::MUL_VL:
15769 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
15770 case RISCVISD::VFMADD_VL:
15771 case RISCVISD::VFNMADD_VL:
15772 case RISCVISD::VFMSUB_VL:
15773 case RISCVISD::VFNMSUB_VL:
15774 case RISCVISD::STRICT_VFMADD_VL:
15775 case RISCVISD::STRICT_VFNMADD_VL:
15776 case RISCVISD::STRICT_VFMSUB_VL:
15777 case RISCVISD::STRICT_VFNMSUB_VL:
15778 return performVFMADD_VLCombine(N, DAG, Subtarget);
15779 case RISCVISD::FMUL_VL:
15780 return performVFMUL_VLCombine(N, DAG, Subtarget);
15781 case RISCVISD::FADD_VL:
15782 case RISCVISD::FSUB_VL:
15783 return performFADDSUB_VLCombine(N, DAG, Subtarget);
15784 case ISD::LOAD:
15785 case ISD::STORE: {
15786 if (DCI.isAfterLegalizeDAG())
15787 if (SDValue V = performMemPairCombine(N, DCI))
15788 return V;
15790 if (N->getOpcode() != ISD::STORE)
15791 break;
15793 auto *Store = cast<StoreSDNode>(N);
15794 SDValue Chain = Store->getChain();
15795 EVT MemVT = Store->getMemoryVT();
15796 SDValue Val = Store->getValue();
15797 SDLoc DL(N);
15799 bool IsScalarizable =
15800 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
15801 Store->isSimple() &&
15802 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
15803 isPowerOf2_64(MemVT.getSizeInBits()) &&
15804 MemVT.getSizeInBits() <= Subtarget.getXLen();
15806 // If sufficiently aligned we can scalarize stores of constant vectors of
15807 // any power-of-two size up to XLen bits, provided that they aren't too
15808 // expensive to materialize.
15809 // vsetivli zero, 2, e8, m1, ta, ma
15810 // vmv.v.i v8, 4
15811 // vse64.v v8, (a0)
15812 // ->
15813 // li a1, 1028
15814 // sh a1, 0(a0)
15815 if (DCI.isBeforeLegalize() && IsScalarizable &&
15816 ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
15817 // Get the constant vector bits
15818 APInt NewC(Val.getValueSizeInBits(), 0);
15819 uint64_t EltSize = Val.getScalarValueSizeInBits();
15820 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
15821 if (Val.getOperand(i).isUndef())
15822 continue;
15823 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
15824 i * EltSize);
15826 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
15828 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
15829 true) <= 2 &&
15830 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
15831 NewVT, *Store->getMemOperand())) {
15832 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
15833 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
15834 Store->getPointerInfo(), Store->getOriginalAlign(),
15835 Store->getMemOperand()->getFlags());
15839 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
15840 // vsetivli zero, 2, e16, m1, ta, ma
15841 // vle16.v v8, (a0)
15842 // vse16.v v8, (a1)
15843 if (auto *L = dyn_cast<LoadSDNode>(Val);
15844 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
15845 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
15846 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
15847 L->getMemoryVT() == MemVT) {
15848 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
15849 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
15850 NewVT, *Store->getMemOperand()) &&
15851 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
15852 NewVT, *L->getMemOperand())) {
15853 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
15854 L->getPointerInfo(), L->getOriginalAlign(),
15855 L->getMemOperand()->getFlags());
15856 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
15857 Store->getPointerInfo(), Store->getOriginalAlign(),
15858 Store->getMemOperand()->getFlags());
15862 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
15863 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
15864 // any illegal types.
15865 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
15866 (DCI.isAfterLegalizeDAG() &&
15867 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15868 isNullConstant(Val.getOperand(1)))) {
15869 SDValue Src = Val.getOperand(0);
15870 MVT VecVT = Src.getSimpleValueType();
15871 // VecVT should be scalable and memory VT should match the element type.
15872 if (!Store->isIndexed() && VecVT.isScalableVector() &&
15873 MemVT == VecVT.getVectorElementType()) {
15874 SDLoc DL(N);
15875 MVT MaskVT = getMaskTypeFor(VecVT);
15876 return DAG.getStoreVP(
15877 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
15878 DAG.getConstant(1, DL, MaskVT),
15879 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
15880 Store->getMemOperand(), Store->getAddressingMode(),
15881 Store->isTruncatingStore(), /*IsCompress*/ false);
15885 break;
15887 case ISD::SPLAT_VECTOR: {
15888 EVT VT = N->getValueType(0);
15889 // Only perform this combine on legal MVT types.
15890 if (!isTypeLegal(VT))
15891 break;
15892 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
15893 DAG, Subtarget))
15894 return Gather;
15895 break;
15897 case ISD::BUILD_VECTOR:
15898 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
15899 return V;
15900 break;
15901 case ISD::CONCAT_VECTORS:
15902 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
15903 return V;
15904 break;
15905 case ISD::INSERT_VECTOR_ELT:
15906 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
15907 return V;
15908 break;
15909 case RISCVISD::VFMV_V_F_VL: {
15910 const MVT VT = N->getSimpleValueType(0);
15911 SDValue Passthru = N->getOperand(0);
15912 SDValue Scalar = N->getOperand(1);
15913 SDValue VL = N->getOperand(2);
15915 // If VL is 1, we can use vfmv.s.f.
15916 if (isOneConstant(VL))
15917 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
15918 break;
15920 case RISCVISD::VMV_V_X_VL: {
15921 const MVT VT = N->getSimpleValueType(0);
15922 SDValue Passthru = N->getOperand(0);
15923 SDValue Scalar = N->getOperand(1);
15924 SDValue VL = N->getOperand(2);
15926 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
15927 // scalar input.
15928 unsigned ScalarSize = Scalar.getValueSizeInBits();
15929 unsigned EltWidth = VT.getScalarSizeInBits();
15930 if (ScalarSize > EltWidth && Passthru.isUndef())
15931 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
15932 return SDValue(N, 0);
15934 // If VL is 1 and the scalar value won't benefit from immediate, we can
15935 // use vmv.s.x.
15936 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
15937 if (isOneConstant(VL) &&
15938 (!Const || Const->isZero() ||
15939 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
15940 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
15942 break;
15944 case RISCVISD::VFMV_S_F_VL: {
15945 SDValue Src = N->getOperand(1);
15946 // Try to remove vector->scalar->vector if the scalar->vector is inserting
15947 // into an undef vector.
15948 // TODO: Could use a vslide or vmv.v.v for non-undef.
15949 if (N->getOperand(0).isUndef() &&
15950 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15951 isNullConstant(Src.getOperand(1)) &&
15952 Src.getOperand(0).getValueType().isScalableVector()) {
15953 EVT VT = N->getValueType(0);
15954 EVT SrcVT = Src.getOperand(0).getValueType();
15955 assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
15956 // Widths match, just return the original vector.
15957 if (SrcVT == VT)
15958 return Src.getOperand(0);
15959 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
15961 [[fallthrough]];
15963 case RISCVISD::VMV_S_X_VL: {
15964 const MVT VT = N->getSimpleValueType(0);
15965 SDValue Passthru = N->getOperand(0);
15966 SDValue Scalar = N->getOperand(1);
15967 SDValue VL = N->getOperand(2);
15969 // Use M1 or smaller to avoid over constraining register allocation
15970 const MVT M1VT = getLMUL1VT(VT);
15971 if (M1VT.bitsLT(VT)) {
15972 SDValue M1Passthru =
15973 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
15974 DAG.getVectorIdxConstant(0, DL));
15975 SDValue Result =
15976 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
15977 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
15978 DAG.getConstant(0, DL, XLenVT));
15979 return Result;
15982 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
15983 // higher would involve overly constraining the register allocator for
15984 // no purpose.
15985 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
15986 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
15987 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
15988 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
15990 break;
15992 case RISCVISD::VMV_X_S: {
15993 SDValue Vec = N->getOperand(0);
15994 MVT VecVT = N->getOperand(0).getSimpleValueType();
15995 const MVT M1VT = getLMUL1VT(VecVT);
15996 if (M1VT.bitsLT(VecVT)) {
15997 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
15998 DAG.getVectorIdxConstant(0, DL));
15999 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
16001 break;
16003 case ISD::INTRINSIC_VOID:
16004 case ISD::INTRINSIC_W_CHAIN:
16005 case ISD::INTRINSIC_WO_CHAIN: {
16006 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
16007 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
16008 switch (IntNo) {
16009 // By default we do not combine any intrinsic.
16010 default:
16011 return SDValue();
16012 case Intrinsic::riscv_masked_strided_load: {
16013 MVT VT = N->getSimpleValueType(0);
16014 auto *Load = cast<MemIntrinsicSDNode>(N);
16015 SDValue PassThru = N->getOperand(2);
16016 SDValue Base = N->getOperand(3);
16017 SDValue Stride = N->getOperand(4);
16018 SDValue Mask = N->getOperand(5);
16020 // If the stride is equal to the element size in bytes, we can use
16021 // a masked.load.
16022 const unsigned ElementSize = VT.getScalarStoreSize();
16023 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16024 StrideC && StrideC->getZExtValue() == ElementSize)
16025 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
16026 DAG.getUNDEF(XLenVT), Mask, PassThru,
16027 Load->getMemoryVT(), Load->getMemOperand(),
16028 ISD::UNINDEXED, ISD::NON_EXTLOAD);
16029 return SDValue();
16031 case Intrinsic::riscv_masked_strided_store: {
16032 auto *Store = cast<MemIntrinsicSDNode>(N);
16033 SDValue Value = N->getOperand(2);
16034 SDValue Base = N->getOperand(3);
16035 SDValue Stride = N->getOperand(4);
16036 SDValue Mask = N->getOperand(5);
16038 // If the stride is equal to the element size in bytes, we can use
16039 // a masked.store.
16040 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16041 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16042 StrideC && StrideC->getZExtValue() == ElementSize)
16043 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
16044 DAG.getUNDEF(XLenVT), Mask,
16045 Store->getMemoryVT(), Store->getMemOperand(),
16046 ISD::UNINDEXED, false);
16047 return SDValue();
16049 case Intrinsic::riscv_vcpop:
16050 case Intrinsic::riscv_vcpop_mask:
16051 case Intrinsic::riscv_vfirst:
16052 case Intrinsic::riscv_vfirst_mask: {
16053 SDValue VL = N->getOperand(2);
16054 if (IntNo == Intrinsic::riscv_vcpop_mask ||
16055 IntNo == Intrinsic::riscv_vfirst_mask)
16056 VL = N->getOperand(3);
16057 if (!isNullConstant(VL))
16058 return SDValue();
16059 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16060 SDLoc DL(N);
16061 EVT VT = N->getValueType(0);
16062 if (IntNo == Intrinsic::riscv_vfirst ||
16063 IntNo == Intrinsic::riscv_vfirst_mask)
16064 return DAG.getConstant(-1, DL, VT);
16065 return DAG.getConstant(0, DL, VT);
16069 case ISD::BITCAST: {
16070 assert(Subtarget.useRVVForFixedLengthVectors());
16071 SDValue N0 = N->getOperand(0);
16072 EVT VT = N->getValueType(0);
16073 EVT SrcVT = N0.getValueType();
16074 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16075 // type, widen both sides to avoid a trip through memory.
16076 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
16077 VT.isScalarInteger()) {
16078 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
16079 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
16080 Ops[0] = N0;
16081 SDLoc DL(N);
16082 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
16083 N0 = DAG.getBitcast(MVT::i8, N0);
16084 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
16087 return SDValue();
16091 return SDValue();
16094 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
16095 EVT XVT, unsigned KeptBits) const {
16096 // For vectors, we don't have a preference..
16097 if (XVT.isVector())
16098 return false;
16100 if (XVT != MVT::i32 && XVT != MVT::i64)
16101 return false;
16103 // We can use sext.w for RV64 or an srai 31 on RV32.
16104 if (KeptBits == 32 || KeptBits == 64)
16105 return true;
16107 // With Zbb we can use sext.h/sext.b.
16108 return Subtarget.hasStdExtZbb() &&
16109 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
16110 KeptBits == 16);
16113 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
16114 const SDNode *N, CombineLevel Level) const {
16115 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
16116 N->getOpcode() == ISD::SRL) &&
16117 "Expected shift op");
16119 // The following folds are only desirable if `(OP _, c1 << c2)` can be
16120 // materialised in fewer instructions than `(OP _, c1)`:
16122 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16123 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16124 SDValue N0 = N->getOperand(0);
16125 EVT Ty = N0.getValueType();
16126 if (Ty.isScalarInteger() &&
16127 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
16128 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16129 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16130 if (C1 && C2) {
16131 const APInt &C1Int = C1->getAPIntValue();
16132 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
16134 // We can materialise `c1 << c2` into an add immediate, so it's "free",
16135 // and the combine should happen, to potentially allow further combines
16136 // later.
16137 if (ShiftedC1Int.getSignificantBits() <= 64 &&
16138 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
16139 return true;
16141 // We can materialise `c1` in an add immediate, so it's "free", and the
16142 // combine should be prevented.
16143 if (C1Int.getSignificantBits() <= 64 &&
16144 isLegalAddImmediate(C1Int.getSExtValue()))
16145 return false;
16147 // Neither constant will fit into an immediate, so find materialisation
16148 // costs.
16149 int C1Cost =
16150 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
16151 /*CompressionCost*/ true);
16152 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
16153 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
16154 /*CompressionCost*/ true);
16156 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16157 // combine should be prevented.
16158 if (C1Cost < ShiftedC1Cost)
16159 return false;
16162 return true;
16165 bool RISCVTargetLowering::targetShrinkDemandedConstant(
16166 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
16167 TargetLoweringOpt &TLO) const {
16168 // Delay this optimization as late as possible.
16169 if (!TLO.LegalOps)
16170 return false;
16172 EVT VT = Op.getValueType();
16173 if (VT.isVector())
16174 return false;
16176 unsigned Opcode = Op.getOpcode();
16177 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
16178 return false;
16180 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16181 if (!C)
16182 return false;
16184 const APInt &Mask = C->getAPIntValue();
16186 // Clear all non-demanded bits initially.
16187 APInt ShrunkMask = Mask & DemandedBits;
16189 // Try to make a smaller immediate by setting undemanded bits.
16191 APInt ExpandedMask = Mask | ~DemandedBits;
16193 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
16194 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
16196 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
16197 if (NewMask == Mask)
16198 return true;
16199 SDLoc DL(Op);
16200 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
16201 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
16202 Op.getOperand(0), NewC);
16203 return TLO.CombineTo(Op, NewOp);
16206 // If the shrunk mask fits in sign extended 12 bits, let the target
16207 // independent code apply it.
16208 if (ShrunkMask.isSignedIntN(12))
16209 return false;
16211 // And has a few special cases for zext.
16212 if (Opcode == ISD::AND) {
16213 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
16214 // otherwise use SLLI + SRLI.
16215 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
16216 if (IsLegalMask(NewMask))
16217 return UseMask(NewMask);
16219 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
16220 if (VT == MVT::i64) {
16221 APInt NewMask = APInt(64, 0xffffffff);
16222 if (IsLegalMask(NewMask))
16223 return UseMask(NewMask);
16227 // For the remaining optimizations, we need to be able to make a negative
16228 // number through a combination of mask and undemanded bits.
16229 if (!ExpandedMask.isNegative())
16230 return false;
16232 // What is the fewest number of bits we need to represent the negative number.
16233 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
16235 // Try to make a 12 bit negative immediate. If that fails try to make a 32
16236 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
16237 // If we can't create a simm12, we shouldn't change opaque constants.
16238 APInt NewMask = ShrunkMask;
16239 if (MinSignedBits <= 12)
16240 NewMask.setBitsFrom(11);
16241 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
16242 NewMask.setBitsFrom(31);
16243 else
16244 return false;
16246 // Check that our new mask is a subset of the demanded mask.
16247 assert(IsLegalMask(NewMask));
16248 return UseMask(NewMask);
16251 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
16252 static const uint64_t GREVMasks[] = {
16253 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
16254 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
16256 for (unsigned Stage = 0; Stage != 6; ++Stage) {
16257 unsigned Shift = 1 << Stage;
16258 if (ShAmt & Shift) {
16259 uint64_t Mask = GREVMasks[Stage];
16260 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
16261 if (IsGORC)
16262 Res |= x;
16263 x = Res;
16267 return x;
16270 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
16271 KnownBits &Known,
16272 const APInt &DemandedElts,
16273 const SelectionDAG &DAG,
16274 unsigned Depth) const {
16275 unsigned BitWidth = Known.getBitWidth();
16276 unsigned Opc = Op.getOpcode();
16277 assert((Opc >= ISD::BUILTIN_OP_END ||
16278 Opc == ISD::INTRINSIC_WO_CHAIN ||
16279 Opc == ISD::INTRINSIC_W_CHAIN ||
16280 Opc == ISD::INTRINSIC_VOID) &&
16281 "Should use MaskedValueIsZero if you don't know whether Op"
16282 " is a target node!");
16284 Known.resetAll();
16285 switch (Opc) {
16286 default: break;
16287 case RISCVISD::SELECT_CC: {
16288 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
16289 // If we don't know any bits, early out.
16290 if (Known.isUnknown())
16291 break;
16292 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
16294 // Only known if known in both the LHS and RHS.
16295 Known = Known.intersectWith(Known2);
16296 break;
16298 case RISCVISD::CZERO_EQZ:
16299 case RISCVISD::CZERO_NEZ:
16300 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16301 // Result is either all zero or operand 0. We can propagate zeros, but not
16302 // ones.
16303 Known.One.clearAllBits();
16304 break;
16305 case RISCVISD::REMUW: {
16306 KnownBits Known2;
16307 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16308 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16309 // We only care about the lower 32 bits.
16310 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
16311 // Restore the original width by sign extending.
16312 Known = Known.sext(BitWidth);
16313 break;
16315 case RISCVISD::DIVUW: {
16316 KnownBits Known2;
16317 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16318 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16319 // We only care about the lower 32 bits.
16320 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
16321 // Restore the original width by sign extending.
16322 Known = Known.sext(BitWidth);
16323 break;
16325 case RISCVISD::SLLW: {
16326 KnownBits Known2;
16327 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16328 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16329 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
16330 // Restore the original width by sign extending.
16331 Known = Known.sext(BitWidth);
16332 break;
16334 case RISCVISD::CTZW: {
16335 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16336 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
16337 unsigned LowBits = llvm::bit_width(PossibleTZ);
16338 Known.Zero.setBitsFrom(LowBits);
16339 break;
16341 case RISCVISD::CLZW: {
16342 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16343 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
16344 unsigned LowBits = llvm::bit_width(PossibleLZ);
16345 Known.Zero.setBitsFrom(LowBits);
16346 break;
16348 case RISCVISD::BREV8:
16349 case RISCVISD::ORC_B: {
16350 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
16351 // control value of 7 is equivalent to brev8 and orc.b.
16352 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16353 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
16354 // To compute zeros, we need to invert the value and invert it back after.
16355 Known.Zero =
16356 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
16357 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
16358 break;
16360 case RISCVISD::READ_VLENB: {
16361 // We can use the minimum and maximum VLEN values to bound VLENB. We
16362 // know VLEN must be a power of two.
16363 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
16364 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
16365 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
16366 Known.Zero.setLowBits(Log2_32(MinVLenB));
16367 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
16368 if (MaxVLenB == MinVLenB)
16369 Known.One.setBit(Log2_32(MinVLenB));
16370 break;
16372 case RISCVISD::FCLASS: {
16373 // fclass will only set one of the low 10 bits.
16374 Known.Zero.setBitsFrom(10);
16375 break;
16377 case ISD::INTRINSIC_W_CHAIN:
16378 case ISD::INTRINSIC_WO_CHAIN: {
16379 unsigned IntNo =
16380 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
16381 switch (IntNo) {
16382 default:
16383 // We can't do anything for most intrinsics.
16384 break;
16385 case Intrinsic::riscv_vsetvli:
16386 case Intrinsic::riscv_vsetvlimax: {
16387 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
16388 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
16389 RISCVII::VLMUL VLMUL =
16390 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
16391 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
16392 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
16393 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
16394 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
16396 // Result of vsetvli must be not larger than AVL.
16397 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
16398 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
16400 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
16401 if (BitWidth > KnownZeroFirstBit)
16402 Known.Zero.setBitsFrom(KnownZeroFirstBit);
16403 break;
16406 break;
16411 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
16412 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
16413 unsigned Depth) const {
16414 switch (Op.getOpcode()) {
16415 default:
16416 break;
16417 case RISCVISD::SELECT_CC: {
16418 unsigned Tmp =
16419 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
16420 if (Tmp == 1) return 1; // Early out.
16421 unsigned Tmp2 =
16422 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
16423 return std::min(Tmp, Tmp2);
16425 case RISCVISD::CZERO_EQZ:
16426 case RISCVISD::CZERO_NEZ:
16427 // Output is either all zero or operand 0. We can propagate sign bit count
16428 // from operand 0.
16429 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
16430 case RISCVISD::ABSW: {
16431 // We expand this at isel to negw+max. The result will have 33 sign bits
16432 // if the input has at least 33 sign bits.
16433 unsigned Tmp =
16434 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
16435 if (Tmp < 33) return 1;
16436 return 33;
16438 case RISCVISD::SLLW:
16439 case RISCVISD::SRAW:
16440 case RISCVISD::SRLW:
16441 case RISCVISD::DIVW:
16442 case RISCVISD::DIVUW:
16443 case RISCVISD::REMUW:
16444 case RISCVISD::ROLW:
16445 case RISCVISD::RORW:
16446 case RISCVISD::FCVT_W_RV64:
16447 case RISCVISD::FCVT_WU_RV64:
16448 case RISCVISD::STRICT_FCVT_W_RV64:
16449 case RISCVISD::STRICT_FCVT_WU_RV64:
16450 // TODO: As the result is sign-extended, this is conservatively correct. A
16451 // more precise answer could be calculated for SRAW depending on known
16452 // bits in the shift amount.
16453 return 33;
16454 case RISCVISD::VMV_X_S: {
16455 // The number of sign bits of the scalar result is computed by obtaining the
16456 // element type of the input vector operand, subtracting its width from the
16457 // XLEN, and then adding one (sign bit within the element type). If the
16458 // element type is wider than XLen, the least-significant XLEN bits are
16459 // taken.
16460 unsigned XLen = Subtarget.getXLen();
16461 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
16462 if (EltBits <= XLen)
16463 return XLen - EltBits + 1;
16464 break;
16466 case ISD::INTRINSIC_W_CHAIN: {
16467 unsigned IntNo = Op.getConstantOperandVal(1);
16468 switch (IntNo) {
16469 default:
16470 break;
16471 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
16472 case Intrinsic::riscv_masked_atomicrmw_add_i64:
16473 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
16474 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
16475 case Intrinsic::riscv_masked_atomicrmw_max_i64:
16476 case Intrinsic::riscv_masked_atomicrmw_min_i64:
16477 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
16478 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
16479 case Intrinsic::riscv_masked_cmpxchg_i64:
16480 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
16481 // narrow atomic operation. These are implemented using atomic
16482 // operations at the minimum supported atomicrmw/cmpxchg width whose
16483 // result is then sign extended to XLEN. With +A, the minimum width is
16484 // 32 for both 64 and 32.
16485 assert(Subtarget.getXLen() == 64);
16486 assert(getMinCmpXchgSizeInBits() == 32);
16487 assert(Subtarget.hasStdExtA());
16488 return 33;
16490 break;
16494 return 1;
16497 const Constant *
16498 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
16499 assert(Ld && "Unexpected null LoadSDNode");
16500 if (!ISD::isNormalLoad(Ld))
16501 return nullptr;
16503 SDValue Ptr = Ld->getBasePtr();
16505 // Only constant pools with no offset are supported.
16506 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
16507 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
16508 if (!CNode || CNode->isMachineConstantPoolEntry() ||
16509 CNode->getOffset() != 0)
16510 return nullptr;
16512 return CNode;
16515 // Simple case, LLA.
16516 if (Ptr.getOpcode() == RISCVISD::LLA) {
16517 auto *CNode = GetSupportedConstantPool(Ptr);
16518 if (!CNode || CNode->getTargetFlags() != 0)
16519 return nullptr;
16521 return CNode->getConstVal();
16524 // Look for a HI and ADD_LO pair.
16525 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
16526 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
16527 return nullptr;
16529 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
16530 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
16532 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
16533 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
16534 return nullptr;
16536 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
16537 return nullptr;
16539 return CNodeLo->getConstVal();
16542 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
16543 MachineBasicBlock *BB) {
16544 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
16546 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
16547 // Should the count have wrapped while it was being read, we need to try
16548 // again.
16549 // ...
16550 // read:
16551 // rdcycleh x3 # load high word of cycle
16552 // rdcycle x2 # load low word of cycle
16553 // rdcycleh x4 # load high word of cycle
16554 // bne x3, x4, read # check if high word reads match, otherwise try again
16555 // ...
16557 MachineFunction &MF = *BB->getParent();
16558 const BasicBlock *LLVM_BB = BB->getBasicBlock();
16559 MachineFunction::iterator It = ++BB->getIterator();
16561 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
16562 MF.insert(It, LoopMBB);
16564 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
16565 MF.insert(It, DoneMBB);
16567 // Transfer the remainder of BB and its successor edges to DoneMBB.
16568 DoneMBB->splice(DoneMBB->begin(), BB,
16569 std::next(MachineBasicBlock::iterator(MI)), BB->end());
16570 DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
16572 BB->addSuccessor(LoopMBB);
16574 MachineRegisterInfo &RegInfo = MF.getRegInfo();
16575 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
16576 Register LoReg = MI.getOperand(0).getReg();
16577 Register HiReg = MI.getOperand(1).getReg();
16578 DebugLoc DL = MI.getDebugLoc();
16580 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
16581 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
16582 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
16583 .addReg(RISCV::X0);
16584 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
16585 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
16586 .addReg(RISCV::X0);
16587 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
16588 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
16589 .addReg(RISCV::X0);
16591 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
16592 .addReg(HiReg)
16593 .addReg(ReadAgainReg)
16594 .addMBB(LoopMBB);
16596 LoopMBB->addSuccessor(LoopMBB);
16597 LoopMBB->addSuccessor(DoneMBB);
16599 MI.eraseFromParent();
16601 return DoneMBB;
16604 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
16605 MachineBasicBlock *BB,
16606 const RISCVSubtarget &Subtarget) {
16607 assert((MI.getOpcode() == RISCV::SplitF64Pseudo ||
16608 MI.getOpcode() == RISCV::SplitF64Pseudo_INX) &&
16609 "Unexpected instruction");
16611 MachineFunction &MF = *BB->getParent();
16612 DebugLoc DL = MI.getDebugLoc();
16613 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
16614 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
16615 Register LoReg = MI.getOperand(0).getReg();
16616 Register HiReg = MI.getOperand(1).getReg();
16617 Register SrcReg = MI.getOperand(2).getReg();
16619 const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
16620 ? &RISCV::GPRPairRegClass
16621 : &RISCV::FPR64RegClass;
16622 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
16624 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
16625 RI, Register());
16626 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
16627 MachineMemOperand *MMOLo =
16628 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
16629 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
16630 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
16631 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
16632 .addFrameIndex(FI)
16633 .addImm(0)
16634 .addMemOperand(MMOLo);
16635 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
16636 .addFrameIndex(FI)
16637 .addImm(4)
16638 .addMemOperand(MMOHi);
16639 MI.eraseFromParent(); // The pseudo instruction is gone now.
16640 return BB;
16643 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
16644 MachineBasicBlock *BB,
16645 const RISCVSubtarget &Subtarget) {
16646 assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo ||
16647 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) &&
16648 "Unexpected instruction");
16650 MachineFunction &MF = *BB->getParent();
16651 DebugLoc DL = MI.getDebugLoc();
16652 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
16653 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
16654 Register DstReg = MI.getOperand(0).getReg();
16655 Register LoReg = MI.getOperand(1).getReg();
16656 Register HiReg = MI.getOperand(2).getReg();
16658 const TargetRegisterClass *DstRC =
16659 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass
16660 : &RISCV::FPR64RegClass;
16661 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
16663 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
16664 MachineMemOperand *MMOLo =
16665 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
16666 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
16667 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
16668 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
16669 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
16670 .addFrameIndex(FI)
16671 .addImm(0)
16672 .addMemOperand(MMOLo);
16673 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
16674 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
16675 .addFrameIndex(FI)
16676 .addImm(4)
16677 .addMemOperand(MMOHi);
16678 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
16679 MI.eraseFromParent(); // The pseudo instruction is gone now.
16680 return BB;
16683 static bool isSelectPseudo(MachineInstr &MI) {
16684 switch (MI.getOpcode()) {
16685 default:
16686 return false;
16687 case RISCV::Select_GPR_Using_CC_GPR:
16688 case RISCV::Select_FPR16_Using_CC_GPR:
16689 case RISCV::Select_FPR16INX_Using_CC_GPR:
16690 case RISCV::Select_FPR32_Using_CC_GPR:
16691 case RISCV::Select_FPR32INX_Using_CC_GPR:
16692 case RISCV::Select_FPR64_Using_CC_GPR:
16693 case RISCV::Select_FPR64INX_Using_CC_GPR:
16694 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
16695 return true;
16699 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
16700 unsigned RelOpcode, unsigned EqOpcode,
16701 const RISCVSubtarget &Subtarget) {
16702 DebugLoc DL = MI.getDebugLoc();
16703 Register DstReg = MI.getOperand(0).getReg();
16704 Register Src1Reg = MI.getOperand(1).getReg();
16705 Register Src2Reg = MI.getOperand(2).getReg();
16706 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16707 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16708 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16710 // Save the current FFLAGS.
16711 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
16713 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
16714 .addReg(Src1Reg)
16715 .addReg(Src2Reg);
16716 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16717 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16719 // Restore the FFLAGS.
16720 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
16721 .addReg(SavedFFlags, RegState::Kill);
16723 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
16724 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
16725 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
16726 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
16727 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16728 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
16730 // Erase the pseudoinstruction.
16731 MI.eraseFromParent();
16732 return BB;
16735 static MachineBasicBlock *
16736 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
16737 MachineBasicBlock *ThisMBB,
16738 const RISCVSubtarget &Subtarget) {
16739 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
16740 // Without this, custom-inserter would have generated:
16742 // A
16743 // | \
16744 // | B
16745 // | /
16746 // C
16747 // | \
16748 // | D
16749 // | /
16750 // E
16752 // A: X = ...; Y = ...
16753 // B: empty
16754 // C: Z = PHI [X, A], [Y, B]
16755 // D: empty
16756 // E: PHI [X, C], [Z, D]
16758 // If we lower both Select_FPRX_ in a single step, we can instead generate:
16760 // A
16761 // | \
16762 // | C
16763 // | /|
16764 // |/ |
16765 // | |
16766 // | D
16767 // | /
16768 // E
16770 // A: X = ...; Y = ...
16771 // D: empty
16772 // E: PHI [X, A], [X, C], [Y, D]
16774 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16775 const DebugLoc &DL = First.getDebugLoc();
16776 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
16777 MachineFunction *F = ThisMBB->getParent();
16778 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
16779 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
16780 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
16781 MachineFunction::iterator It = ++ThisMBB->getIterator();
16782 F->insert(It, FirstMBB);
16783 F->insert(It, SecondMBB);
16784 F->insert(It, SinkMBB);
16786 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
16787 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
16788 std::next(MachineBasicBlock::iterator(First)),
16789 ThisMBB->end());
16790 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
16792 // Fallthrough block for ThisMBB.
16793 ThisMBB->addSuccessor(FirstMBB);
16794 // Fallthrough block for FirstMBB.
16795 FirstMBB->addSuccessor(SecondMBB);
16796 ThisMBB->addSuccessor(SinkMBB);
16797 FirstMBB->addSuccessor(SinkMBB);
16798 // This is fallthrough.
16799 SecondMBB->addSuccessor(SinkMBB);
16801 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
16802 Register FLHS = First.getOperand(1).getReg();
16803 Register FRHS = First.getOperand(2).getReg();
16804 // Insert appropriate branch.
16805 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
16806 .addReg(FLHS)
16807 .addReg(FRHS)
16808 .addMBB(SinkMBB);
16810 Register SLHS = Second.getOperand(1).getReg();
16811 Register SRHS = Second.getOperand(2).getReg();
16812 Register Op1Reg4 = First.getOperand(4).getReg();
16813 Register Op1Reg5 = First.getOperand(5).getReg();
16815 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
16816 // Insert appropriate branch.
16817 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
16818 .addReg(SLHS)
16819 .addReg(SRHS)
16820 .addMBB(SinkMBB);
16822 Register DestReg = Second.getOperand(0).getReg();
16823 Register Op2Reg4 = Second.getOperand(4).getReg();
16824 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
16825 .addReg(Op2Reg4)
16826 .addMBB(ThisMBB)
16827 .addReg(Op1Reg4)
16828 .addMBB(FirstMBB)
16829 .addReg(Op1Reg5)
16830 .addMBB(SecondMBB);
16832 // Now remove the Select_FPRX_s.
16833 First.eraseFromParent();
16834 Second.eraseFromParent();
16835 return SinkMBB;
16838 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
16839 MachineBasicBlock *BB,
16840 const RISCVSubtarget &Subtarget) {
16841 // To "insert" Select_* instructions, we actually have to insert the triangle
16842 // control-flow pattern. The incoming instructions know the destination vreg
16843 // to set, the condition code register to branch on, the true/false values to
16844 // select between, and the condcode to use to select the appropriate branch.
16846 // We produce the following control flow:
16847 // HeadMBB
16848 // | \
16849 // | IfFalseMBB
16850 // | /
16851 // TailMBB
16853 // When we find a sequence of selects we attempt to optimize their emission
16854 // by sharing the control flow. Currently we only handle cases where we have
16855 // multiple selects with the exact same condition (same LHS, RHS and CC).
16856 // The selects may be interleaved with other instructions if the other
16857 // instructions meet some requirements we deem safe:
16858 // - They are not pseudo instructions.
16859 // - They are debug instructions. Otherwise,
16860 // - They do not have side-effects, do not access memory and their inputs do
16861 // not depend on the results of the select pseudo-instructions.
16862 // The TrueV/FalseV operands of the selects cannot depend on the result of
16863 // previous selects in the sequence.
16864 // These conditions could be further relaxed. See the X86 target for a
16865 // related approach and more information.
16867 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
16868 // is checked here and handled by a separate function -
16869 // EmitLoweredCascadedSelect.
16870 Register LHS = MI.getOperand(1).getReg();
16871 Register RHS = MI.getOperand(2).getReg();
16872 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
16874 SmallVector<MachineInstr *, 4> SelectDebugValues;
16875 SmallSet<Register, 4> SelectDests;
16876 SelectDests.insert(MI.getOperand(0).getReg());
16878 MachineInstr *LastSelectPseudo = &MI;
16879 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
16880 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
16881 Next->getOpcode() == MI.getOpcode() &&
16882 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
16883 Next->getOperand(5).isKill()) {
16884 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
16887 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
16888 SequenceMBBI != E; ++SequenceMBBI) {
16889 if (SequenceMBBI->isDebugInstr())
16890 continue;
16891 if (isSelectPseudo(*SequenceMBBI)) {
16892 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
16893 SequenceMBBI->getOperand(2).getReg() != RHS ||
16894 SequenceMBBI->getOperand(3).getImm() != CC ||
16895 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
16896 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
16897 break;
16898 LastSelectPseudo = &*SequenceMBBI;
16899 SequenceMBBI->collectDebugValues(SelectDebugValues);
16900 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
16901 continue;
16903 if (SequenceMBBI->hasUnmodeledSideEffects() ||
16904 SequenceMBBI->mayLoadOrStore() ||
16905 SequenceMBBI->usesCustomInsertionHook())
16906 break;
16907 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
16908 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
16910 break;
16913 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16914 const BasicBlock *LLVM_BB = BB->getBasicBlock();
16915 DebugLoc DL = MI.getDebugLoc();
16916 MachineFunction::iterator I = ++BB->getIterator();
16918 MachineBasicBlock *HeadMBB = BB;
16919 MachineFunction *F = BB->getParent();
16920 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
16921 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
16923 F->insert(I, IfFalseMBB);
16924 F->insert(I, TailMBB);
16926 // Transfer debug instructions associated with the selects to TailMBB.
16927 for (MachineInstr *DebugInstr : SelectDebugValues) {
16928 TailMBB->push_back(DebugInstr->removeFromParent());
16931 // Move all instructions after the sequence to TailMBB.
16932 TailMBB->splice(TailMBB->end(), HeadMBB,
16933 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
16934 // Update machine-CFG edges by transferring all successors of the current
16935 // block to the new block which will contain the Phi nodes for the selects.
16936 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
16937 // Set the successors for HeadMBB.
16938 HeadMBB->addSuccessor(IfFalseMBB);
16939 HeadMBB->addSuccessor(TailMBB);
16941 // Insert appropriate branch.
16942 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
16943 .addReg(LHS)
16944 .addReg(RHS)
16945 .addMBB(TailMBB);
16947 // IfFalseMBB just falls through to TailMBB.
16948 IfFalseMBB->addSuccessor(TailMBB);
16950 // Create PHIs for all of the select pseudo-instructions.
16951 auto SelectMBBI = MI.getIterator();
16952 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
16953 auto InsertionPoint = TailMBB->begin();
16954 while (SelectMBBI != SelectEnd) {
16955 auto Next = std::next(SelectMBBI);
16956 if (isSelectPseudo(*SelectMBBI)) {
16957 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
16958 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
16959 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
16960 .addReg(SelectMBBI->getOperand(4).getReg())
16961 .addMBB(HeadMBB)
16962 .addReg(SelectMBBI->getOperand(5).getReg())
16963 .addMBB(IfFalseMBB);
16964 SelectMBBI->eraseFromParent();
16966 SelectMBBI = Next;
16969 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
16970 return TailMBB;
16973 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
16974 MachineBasicBlock *BB,
16975 unsigned CVTXOpc,
16976 unsigned CVTFOpc) {
16977 DebugLoc DL = MI.getDebugLoc();
16979 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16981 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16982 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16984 // Save the old value of FFLAGS.
16985 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
16987 assert(MI.getNumOperands() == 7);
16989 // Emit a VFCVT_X_F
16990 const TargetRegisterInfo *TRI =
16991 BB->getParent()->getSubtarget().getRegisterInfo();
16992 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
16993 Register Tmp = MRI.createVirtualRegister(RC);
16994 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
16995 .add(MI.getOperand(1))
16996 .add(MI.getOperand(2))
16997 .add(MI.getOperand(3))
16998 .add(MachineOperand::CreateImm(7)) // frm = DYN
16999 .add(MI.getOperand(4))
17000 .add(MI.getOperand(5))
17001 .add(MI.getOperand(6))
17002 .add(MachineOperand::CreateReg(RISCV::FRM,
17003 /*IsDef*/ false,
17004 /*IsImp*/ true));
17006 // Emit a VFCVT_F_X
17007 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17008 .add(MI.getOperand(0))
17009 .add(MI.getOperand(1))
17010 .addReg(Tmp)
17011 .add(MI.getOperand(3))
17012 .add(MachineOperand::CreateImm(7)) // frm = DYN
17013 .add(MI.getOperand(4))
17014 .add(MI.getOperand(5))
17015 .add(MI.getOperand(6))
17016 .add(MachineOperand::CreateReg(RISCV::FRM,
17017 /*IsDef*/ false,
17018 /*IsImp*/ true));
17020 // Restore FFLAGS.
17021 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17022 .addReg(SavedFFLAGS, RegState::Kill);
17024 // Erase the pseudoinstruction.
17025 MI.eraseFromParent();
17026 return BB;
17029 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
17030 const RISCVSubtarget &Subtarget) {
17031 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17032 const TargetRegisterClass *RC;
17033 switch (MI.getOpcode()) {
17034 default:
17035 llvm_unreachable("Unexpected opcode");
17036 case RISCV::PseudoFROUND_H:
17037 CmpOpc = RISCV::FLT_H;
17038 F2IOpc = RISCV::FCVT_W_H;
17039 I2FOpc = RISCV::FCVT_H_W;
17040 FSGNJOpc = RISCV::FSGNJ_H;
17041 FSGNJXOpc = RISCV::FSGNJX_H;
17042 RC = &RISCV::FPR16RegClass;
17043 break;
17044 case RISCV::PseudoFROUND_H_INX:
17045 CmpOpc = RISCV::FLT_H_INX;
17046 F2IOpc = RISCV::FCVT_W_H_INX;
17047 I2FOpc = RISCV::FCVT_H_W_INX;
17048 FSGNJOpc = RISCV::FSGNJ_H_INX;
17049 FSGNJXOpc = RISCV::FSGNJX_H_INX;
17050 RC = &RISCV::GPRF16RegClass;
17051 break;
17052 case RISCV::PseudoFROUND_S:
17053 CmpOpc = RISCV::FLT_S;
17054 F2IOpc = RISCV::FCVT_W_S;
17055 I2FOpc = RISCV::FCVT_S_W;
17056 FSGNJOpc = RISCV::FSGNJ_S;
17057 FSGNJXOpc = RISCV::FSGNJX_S;
17058 RC = &RISCV::FPR32RegClass;
17059 break;
17060 case RISCV::PseudoFROUND_S_INX:
17061 CmpOpc = RISCV::FLT_S_INX;
17062 F2IOpc = RISCV::FCVT_W_S_INX;
17063 I2FOpc = RISCV::FCVT_S_W_INX;
17064 FSGNJOpc = RISCV::FSGNJ_S_INX;
17065 FSGNJXOpc = RISCV::FSGNJX_S_INX;
17066 RC = &RISCV::GPRF32RegClass;
17067 break;
17068 case RISCV::PseudoFROUND_D:
17069 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17070 CmpOpc = RISCV::FLT_D;
17071 F2IOpc = RISCV::FCVT_L_D;
17072 I2FOpc = RISCV::FCVT_D_L;
17073 FSGNJOpc = RISCV::FSGNJ_D;
17074 FSGNJXOpc = RISCV::FSGNJX_D;
17075 RC = &RISCV::FPR64RegClass;
17076 break;
17077 case RISCV::PseudoFROUND_D_INX:
17078 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17079 CmpOpc = RISCV::FLT_D_INX;
17080 F2IOpc = RISCV::FCVT_L_D_INX;
17081 I2FOpc = RISCV::FCVT_D_L_INX;
17082 FSGNJOpc = RISCV::FSGNJ_D_INX;
17083 FSGNJXOpc = RISCV::FSGNJX_D_INX;
17084 RC = &RISCV::GPRRegClass;
17085 break;
17088 const BasicBlock *BB = MBB->getBasicBlock();
17089 DebugLoc DL = MI.getDebugLoc();
17090 MachineFunction::iterator I = ++MBB->getIterator();
17092 MachineFunction *F = MBB->getParent();
17093 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
17094 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
17096 F->insert(I, CvtMBB);
17097 F->insert(I, DoneMBB);
17098 // Move all instructions after the sequence to DoneMBB.
17099 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
17100 MBB->end());
17101 // Update machine-CFG edges by transferring all successors of the current
17102 // block to the new block which will contain the Phi nodes for the selects.
17103 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
17104 // Set the successors for MBB.
17105 MBB->addSuccessor(CvtMBB);
17106 MBB->addSuccessor(DoneMBB);
17108 Register DstReg = MI.getOperand(0).getReg();
17109 Register SrcReg = MI.getOperand(1).getReg();
17110 Register MaxReg = MI.getOperand(2).getReg();
17111 int64_t FRM = MI.getOperand(3).getImm();
17113 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17114 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
17116 Register FabsReg = MRI.createVirtualRegister(RC);
17117 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
17119 // Compare the FP value to the max value.
17120 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17121 auto MIB =
17122 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
17123 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
17124 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17126 // Insert branch.
17127 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
17128 .addReg(CmpReg)
17129 .addReg(RISCV::X0)
17130 .addMBB(DoneMBB);
17132 CvtMBB->addSuccessor(DoneMBB);
17134 // Convert to integer.
17135 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17136 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
17137 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
17138 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17140 // Convert back to FP.
17141 Register I2FReg = MRI.createVirtualRegister(RC);
17142 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
17143 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
17144 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17146 // Restore the sign bit.
17147 Register CvtReg = MRI.createVirtualRegister(RC);
17148 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
17150 // Merge the results.
17151 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
17152 .addReg(SrcReg)
17153 .addMBB(MBB)
17154 .addReg(CvtReg)
17155 .addMBB(CvtMBB);
17157 MI.eraseFromParent();
17158 return DoneMBB;
17161 MachineBasicBlock *
17162 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
17163 MachineBasicBlock *BB) const {
17164 switch (MI.getOpcode()) {
17165 default:
17166 llvm_unreachable("Unexpected instr type to insert");
17167 case RISCV::ReadCycleWide:
17168 assert(!Subtarget.is64Bit() &&
17169 "ReadCycleWrite is only to be used on riscv32");
17170 return emitReadCycleWidePseudo(MI, BB);
17171 case RISCV::Select_GPR_Using_CC_GPR:
17172 case RISCV::Select_FPR16_Using_CC_GPR:
17173 case RISCV::Select_FPR16INX_Using_CC_GPR:
17174 case RISCV::Select_FPR32_Using_CC_GPR:
17175 case RISCV::Select_FPR32INX_Using_CC_GPR:
17176 case RISCV::Select_FPR64_Using_CC_GPR:
17177 case RISCV::Select_FPR64INX_Using_CC_GPR:
17178 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17179 return emitSelectPseudo(MI, BB, Subtarget);
17180 case RISCV::BuildPairF64Pseudo:
17181 case RISCV::BuildPairF64Pseudo_INX:
17182 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
17183 case RISCV::SplitF64Pseudo:
17184 case RISCV::SplitF64Pseudo_INX:
17185 return emitSplitF64Pseudo(MI, BB, Subtarget);
17186 case RISCV::PseudoQuietFLE_H:
17187 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
17188 case RISCV::PseudoQuietFLE_H_INX:
17189 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
17190 case RISCV::PseudoQuietFLT_H:
17191 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
17192 case RISCV::PseudoQuietFLT_H_INX:
17193 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
17194 case RISCV::PseudoQuietFLE_S:
17195 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
17196 case RISCV::PseudoQuietFLE_S_INX:
17197 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
17198 case RISCV::PseudoQuietFLT_S:
17199 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
17200 case RISCV::PseudoQuietFLT_S_INX:
17201 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
17202 case RISCV::PseudoQuietFLE_D:
17203 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
17204 case RISCV::PseudoQuietFLE_D_INX:
17205 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
17206 case RISCV::PseudoQuietFLE_D_IN32X:
17207 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
17208 Subtarget);
17209 case RISCV::PseudoQuietFLT_D:
17210 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
17211 case RISCV::PseudoQuietFLT_D_INX:
17212 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
17213 case RISCV::PseudoQuietFLT_D_IN32X:
17214 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
17215 Subtarget);
17217 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
17218 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
17219 RISCV::PseudoVFCVT_F_X_V_M1_MASK);
17220 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
17221 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
17222 RISCV::PseudoVFCVT_F_X_V_M2_MASK);
17223 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
17224 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
17225 RISCV::PseudoVFCVT_F_X_V_M4_MASK);
17226 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
17227 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
17228 RISCV::PseudoVFCVT_F_X_V_M8_MASK);
17229 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
17230 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
17231 RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
17232 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
17233 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
17234 RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
17235 case RISCV::PseudoFROUND_H:
17236 case RISCV::PseudoFROUND_H_INX:
17237 case RISCV::PseudoFROUND_S:
17238 case RISCV::PseudoFROUND_S_INX:
17239 case RISCV::PseudoFROUND_D:
17240 case RISCV::PseudoFROUND_D_INX:
17241 case RISCV::PseudoFROUND_D_IN32X:
17242 return emitFROUND(MI, BB, Subtarget);
17243 case TargetOpcode::STATEPOINT:
17244 case TargetOpcode::STACKMAP:
17245 case TargetOpcode::PATCHPOINT:
17246 if (!Subtarget.is64Bit())
17247 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
17248 "supported on 64-bit targets");
17249 return emitPatchPoint(MI, BB);
17253 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
17254 SDNode *Node) const {
17255 // Add FRM dependency to any instructions with dynamic rounding mode.
17256 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
17257 if (Idx < 0) {
17258 // Vector pseudos have FRM index indicated by TSFlags.
17259 Idx = RISCVII::getFRMOpNum(MI.getDesc());
17260 if (Idx < 0)
17261 return;
17263 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
17264 return;
17265 // If the instruction already reads FRM, don't add another read.
17266 if (MI.readsRegister(RISCV::FRM))
17267 return;
17268 MI.addOperand(
17269 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
17272 // Calling Convention Implementation.
17273 // The expectations for frontend ABI lowering vary from target to target.
17274 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
17275 // details, but this is a longer term goal. For now, we simply try to keep the
17276 // role of the frontend as simple and well-defined as possible. The rules can
17277 // be summarised as:
17278 // * Never split up large scalar arguments. We handle them here.
17279 // * If a hardfloat calling convention is being used, and the struct may be
17280 // passed in a pair of registers (fp+fp, int+fp), and both registers are
17281 // available, then pass as two separate arguments. If either the GPRs or FPRs
17282 // are exhausted, then pass according to the rule below.
17283 // * If a struct could never be passed in registers or directly in a stack
17284 // slot (as it is larger than 2*XLEN and the floating point rules don't
17285 // apply), then pass it using a pointer with the byval attribute.
17286 // * If a struct is less than 2*XLEN, then coerce to either a two-element
17287 // word-sized array or a 2*XLEN scalar (depending on alignment).
17288 // * The frontend can determine whether a struct is returned by reference or
17289 // not based on its size and fields. If it will be returned by reference, the
17290 // frontend must modify the prototype so a pointer with the sret annotation is
17291 // passed as the first argument. This is not necessary for large scalar
17292 // returns.
17293 // * Struct return values and varargs should be coerced to structs containing
17294 // register-size fields in the same situations they would be for fixed
17295 // arguments.
17297 static const MCPhysReg ArgFPR16s[] = {
17298 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
17299 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
17301 static const MCPhysReg ArgFPR32s[] = {
17302 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
17303 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
17305 static const MCPhysReg ArgFPR64s[] = {
17306 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
17307 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
17309 // This is an interim calling convention and it may be changed in the future.
17310 static const MCPhysReg ArgVRs[] = {
17311 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
17312 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
17313 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
17314 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
17315 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
17316 RISCV::V20M2, RISCV::V22M2};
17317 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
17318 RISCV::V20M4};
17319 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
17321 ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
17322 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
17323 // the ILP32E ABI.
17324 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
17325 RISCV::X13, RISCV::X14, RISCV::X15,
17326 RISCV::X16, RISCV::X17};
17327 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
17328 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
17329 RISCV::X13, RISCV::X14, RISCV::X15};
17331 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
17332 return ArrayRef(ArgEGPRs);
17334 return ArrayRef(ArgIGPRs);
17337 static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
17338 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
17339 // for save-restore libcall, so we don't use them.
17340 static const MCPhysReg FastCCIGPRs[] = {
17341 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
17342 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
17343 RISCV::X29, RISCV::X30, RISCV::X31};
17345 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
17346 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
17347 RISCV::X13, RISCV::X14, RISCV::X15,
17348 RISCV::X7};
17350 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
17351 return ArrayRef(FastCCEGPRs);
17353 return ArrayRef(FastCCIGPRs);
17356 // Pass a 2*XLEN argument that has been split into two XLEN values through
17357 // registers or the stack as necessary.
17358 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
17359 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
17360 MVT ValVT2, MVT LocVT2,
17361 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
17362 unsigned XLenInBytes = XLen / 8;
17363 const RISCVSubtarget &STI =
17364 State.getMachineFunction().getSubtarget<RISCVSubtarget>();
17365 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI());
17367 if (Register Reg = State.AllocateReg(ArgGPRs)) {
17368 // At least one half can be passed via register.
17369 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
17370 VA1.getLocVT(), CCValAssign::Full));
17371 } else {
17372 // Both halves must be passed on the stack, with proper alignment.
17373 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
17374 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
17375 Align StackAlign(XLenInBytes);
17376 if (!EABI || XLen != 32)
17377 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
17378 State.addLoc(
17379 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
17380 State.AllocateStack(XLenInBytes, StackAlign),
17381 VA1.getLocVT(), CCValAssign::Full));
17382 State.addLoc(CCValAssign::getMem(
17383 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
17384 LocVT2, CCValAssign::Full));
17385 return false;
17388 if (Register Reg = State.AllocateReg(ArgGPRs)) {
17389 // The second half can also be passed via register.
17390 State.addLoc(
17391 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
17392 } else {
17393 // The second half is passed via the stack, without additional alignment.
17394 State.addLoc(CCValAssign::getMem(
17395 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
17396 LocVT2, CCValAssign::Full));
17399 return false;
17402 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
17403 std::optional<unsigned> FirstMaskArgument,
17404 CCState &State, const RISCVTargetLowering &TLI) {
17405 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
17406 if (RC == &RISCV::VRRegClass) {
17407 // Assign the first mask argument to V0.
17408 // This is an interim calling convention and it may be changed in the
17409 // future.
17410 if (FirstMaskArgument && ValNo == *FirstMaskArgument)
17411 return State.AllocateReg(RISCV::V0);
17412 return State.AllocateReg(ArgVRs);
17414 if (RC == &RISCV::VRM2RegClass)
17415 return State.AllocateReg(ArgVRM2s);
17416 if (RC == &RISCV::VRM4RegClass)
17417 return State.AllocateReg(ArgVRM4s);
17418 if (RC == &RISCV::VRM8RegClass)
17419 return State.AllocateReg(ArgVRM8s);
17420 llvm_unreachable("Unhandled register class for ValueType");
17423 // Implements the RISC-V calling convention. Returns true upon failure.
17424 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
17425 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
17426 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
17427 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
17428 std::optional<unsigned> FirstMaskArgument) {
17429 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
17430 assert(XLen == 32 || XLen == 64);
17431 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
17433 // Static chain parameter must not be passed in normal argument registers,
17434 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
17435 if (ArgFlags.isNest()) {
17436 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
17437 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17438 return false;
17442 // Any return value split in to more than two values can't be returned
17443 // directly. Vectors are returned via the available vector registers.
17444 if (!LocVT.isVector() && IsRet && ValNo > 1)
17445 return true;
17447 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
17448 // variadic argument, or if no F16/F32 argument registers are available.
17449 bool UseGPRForF16_F32 = true;
17450 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
17451 // variadic argument, or if no F64 argument registers are available.
17452 bool UseGPRForF64 = true;
17454 switch (ABI) {
17455 default:
17456 llvm_unreachable("Unexpected ABI");
17457 case RISCVABI::ABI_ILP32:
17458 case RISCVABI::ABI_ILP32E:
17459 case RISCVABI::ABI_LP64:
17460 case RISCVABI::ABI_LP64E:
17461 break;
17462 case RISCVABI::ABI_ILP32F:
17463 case RISCVABI::ABI_LP64F:
17464 UseGPRForF16_F32 = !IsFixed;
17465 break;
17466 case RISCVABI::ABI_ILP32D:
17467 case RISCVABI::ABI_LP64D:
17468 UseGPRForF16_F32 = !IsFixed;
17469 UseGPRForF64 = !IsFixed;
17470 break;
17473 // FPR16, FPR32, and FPR64 alias each other.
17474 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
17475 UseGPRForF16_F32 = true;
17476 UseGPRForF64 = true;
17479 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
17480 // similar local variables rather than directly checking against the target
17481 // ABI.
17483 if (UseGPRForF16_F32 &&
17484 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
17485 LocVT = XLenVT;
17486 LocInfo = CCValAssign::BCvt;
17487 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
17488 LocVT = MVT::i64;
17489 LocInfo = CCValAssign::BCvt;
17492 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
17494 // If this is a variadic argument, the RISC-V calling convention requires
17495 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
17496 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
17497 // be used regardless of whether the original argument was split during
17498 // legalisation or not. The argument will not be passed by registers if the
17499 // original type is larger than 2*XLEN, so the register alignment rule does
17500 // not apply.
17501 // TODO: To be compatible with GCC's behaviors, we don't align registers
17502 // currently if we are using ILP32E calling convention. This behavior may be
17503 // changed when RV32E/ILP32E is ratified.
17504 unsigned TwoXLenInBytes = (2 * XLen) / 8;
17505 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
17506 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
17507 ABI != RISCVABI::ABI_ILP32E) {
17508 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
17509 // Skip 'odd' register if necessary.
17510 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
17511 State.AllocateReg(ArgGPRs);
17514 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
17515 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
17516 State.getPendingArgFlags();
17518 assert(PendingLocs.size() == PendingArgFlags.size() &&
17519 "PendingLocs and PendingArgFlags out of sync");
17521 // Handle passing f64 on RV32D with a soft float ABI or when floating point
17522 // registers are exhausted.
17523 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
17524 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
17525 // Depending on available argument GPRS, f64 may be passed in a pair of
17526 // GPRs, split between a GPR and the stack, or passed completely on the
17527 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
17528 // cases.
17529 Register Reg = State.AllocateReg(ArgGPRs);
17530 if (!Reg) {
17531 unsigned StackOffset = State.AllocateStack(8, Align(8));
17532 State.addLoc(
17533 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17534 return false;
17536 LocVT = MVT::i32;
17537 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17538 Register HiReg = State.AllocateReg(ArgGPRs);
17539 if (HiReg) {
17540 State.addLoc(
17541 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
17542 } else {
17543 unsigned StackOffset = State.AllocateStack(4, Align(4));
17544 State.addLoc(
17545 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17547 return false;
17550 // Fixed-length vectors are located in the corresponding scalable-vector
17551 // container types.
17552 if (ValVT.isFixedLengthVector())
17553 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
17555 // Split arguments might be passed indirectly, so keep track of the pending
17556 // values. Split vectors are passed via a mix of registers and indirectly, so
17557 // treat them as we would any other argument.
17558 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
17559 LocVT = XLenVT;
17560 LocInfo = CCValAssign::Indirect;
17561 PendingLocs.push_back(
17562 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
17563 PendingArgFlags.push_back(ArgFlags);
17564 if (!ArgFlags.isSplitEnd()) {
17565 return false;
17569 // If the split argument only had two elements, it should be passed directly
17570 // in registers or on the stack.
17571 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
17572 PendingLocs.size() <= 2) {
17573 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
17574 // Apply the normal calling convention rules to the first half of the
17575 // split argument.
17576 CCValAssign VA = PendingLocs[0];
17577 ISD::ArgFlagsTy AF = PendingArgFlags[0];
17578 PendingLocs.clear();
17579 PendingArgFlags.clear();
17580 return CC_RISCVAssign2XLen(
17581 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
17582 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
17585 // Allocate to a register if possible, or else a stack slot.
17586 Register Reg;
17587 unsigned StoreSizeBytes = XLen / 8;
17588 Align StackAlign = Align(XLen / 8);
17590 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
17591 Reg = State.AllocateReg(ArgFPR16s);
17592 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
17593 Reg = State.AllocateReg(ArgFPR32s);
17594 else if (ValVT == MVT::f64 && !UseGPRForF64)
17595 Reg = State.AllocateReg(ArgFPR64s);
17596 else if (ValVT.isVector()) {
17597 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
17598 if (!Reg) {
17599 // For return values, the vector must be passed fully via registers or
17600 // via the stack.
17601 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
17602 // but we're using all of them.
17603 if (IsRet)
17604 return true;
17605 // Try using a GPR to pass the address
17606 if ((Reg = State.AllocateReg(ArgGPRs))) {
17607 LocVT = XLenVT;
17608 LocInfo = CCValAssign::Indirect;
17609 } else if (ValVT.isScalableVector()) {
17610 LocVT = XLenVT;
17611 LocInfo = CCValAssign::Indirect;
17612 } else {
17613 // Pass fixed-length vectors on the stack.
17614 LocVT = ValVT;
17615 StoreSizeBytes = ValVT.getStoreSize();
17616 // Align vectors to their element sizes, being careful for vXi1
17617 // vectors.
17618 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
17621 } else {
17622 Reg = State.AllocateReg(ArgGPRs);
17625 unsigned StackOffset =
17626 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
17628 // If we reach this point and PendingLocs is non-empty, we must be at the
17629 // end of a split argument that must be passed indirectly.
17630 if (!PendingLocs.empty()) {
17631 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
17632 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
17634 for (auto &It : PendingLocs) {
17635 if (Reg)
17636 It.convertToReg(Reg);
17637 else
17638 It.convertToMem(StackOffset);
17639 State.addLoc(It);
17641 PendingLocs.clear();
17642 PendingArgFlags.clear();
17643 return false;
17646 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
17647 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
17648 "Expected an XLenVT or vector types at this stage");
17650 if (Reg) {
17651 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17652 return false;
17655 // When a scalar floating-point value is passed on the stack, no
17656 // bit-conversion is needed.
17657 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
17658 assert(!ValVT.isVector());
17659 LocVT = ValVT;
17660 LocInfo = CCValAssign::Full;
17662 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17663 return false;
17666 template <typename ArgTy>
17667 static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
17668 for (const auto &ArgIdx : enumerate(Args)) {
17669 MVT ArgVT = ArgIdx.value().VT;
17670 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
17671 return ArgIdx.index();
17673 return std::nullopt;
17676 void RISCVTargetLowering::analyzeInputArgs(
17677 MachineFunction &MF, CCState &CCInfo,
17678 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
17679 RISCVCCAssignFn Fn) const {
17680 unsigned NumArgs = Ins.size();
17681 FunctionType *FType = MF.getFunction().getFunctionType();
17683 std::optional<unsigned> FirstMaskArgument;
17684 if (Subtarget.hasVInstructions())
17685 FirstMaskArgument = preAssignMask(Ins);
17687 for (unsigned i = 0; i != NumArgs; ++i) {
17688 MVT ArgVT = Ins[i].VT;
17689 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
17691 Type *ArgTy = nullptr;
17692 if (IsRet)
17693 ArgTy = FType->getReturnType();
17694 else if (Ins[i].isOrigArg())
17695 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
17697 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
17698 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
17699 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
17700 FirstMaskArgument)) {
17701 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
17702 << ArgVT << '\n');
17703 llvm_unreachable(nullptr);
17708 void RISCVTargetLowering::analyzeOutputArgs(
17709 MachineFunction &MF, CCState &CCInfo,
17710 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
17711 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
17712 unsigned NumArgs = Outs.size();
17714 std::optional<unsigned> FirstMaskArgument;
17715 if (Subtarget.hasVInstructions())
17716 FirstMaskArgument = preAssignMask(Outs);
17718 for (unsigned i = 0; i != NumArgs; i++) {
17719 MVT ArgVT = Outs[i].VT;
17720 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
17721 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
17723 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
17724 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
17725 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
17726 FirstMaskArgument)) {
17727 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
17728 << ArgVT << "\n");
17729 llvm_unreachable(nullptr);
17734 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
17735 // values.
17736 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
17737 const CCValAssign &VA, const SDLoc &DL,
17738 const RISCVSubtarget &Subtarget) {
17739 switch (VA.getLocInfo()) {
17740 default:
17741 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17742 case CCValAssign::Full:
17743 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
17744 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
17745 break;
17746 case CCValAssign::BCvt:
17747 if (VA.getLocVT().isInteger() &&
17748 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
17749 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
17750 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
17751 if (RV64LegalI32) {
17752 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
17753 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
17754 } else {
17755 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
17757 } else {
17758 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
17760 break;
17762 return Val;
17765 // The caller is responsible for loading the full value if the argument is
17766 // passed with CCValAssign::Indirect.
17767 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
17768 const CCValAssign &VA, const SDLoc &DL,
17769 const ISD::InputArg &In,
17770 const RISCVTargetLowering &TLI) {
17771 MachineFunction &MF = DAG.getMachineFunction();
17772 MachineRegisterInfo &RegInfo = MF.getRegInfo();
17773 EVT LocVT = VA.getLocVT();
17774 SDValue Val;
17775 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
17776 Register VReg = RegInfo.createVirtualRegister(RC);
17777 RegInfo.addLiveIn(VA.getLocReg(), VReg);
17778 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
17780 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
17781 if (In.isOrigArg()) {
17782 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
17783 if (OrigArg->getType()->isIntegerTy()) {
17784 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
17785 // An input zero extended from i31 can also be considered sign extended.
17786 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
17787 (BitWidth < 32 && In.Flags.isZExt())) {
17788 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
17789 RVFI->addSExt32Register(VReg);
17794 if (VA.getLocInfo() == CCValAssign::Indirect)
17795 return Val;
17797 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
17800 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
17801 const CCValAssign &VA, const SDLoc &DL,
17802 const RISCVSubtarget &Subtarget) {
17803 EVT LocVT = VA.getLocVT();
17805 switch (VA.getLocInfo()) {
17806 default:
17807 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17808 case CCValAssign::Full:
17809 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
17810 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
17811 break;
17812 case CCValAssign::BCvt:
17813 if (LocVT.isInteger() &&
17814 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
17815 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
17816 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
17817 if (RV64LegalI32) {
17818 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
17819 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
17820 } else {
17821 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
17823 } else {
17824 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
17826 break;
17828 return Val;
17831 // The caller is responsible for loading the full value if the argument is
17832 // passed with CCValAssign::Indirect.
17833 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
17834 const CCValAssign &VA, const SDLoc &DL) {
17835 MachineFunction &MF = DAG.getMachineFunction();
17836 MachineFrameInfo &MFI = MF.getFrameInfo();
17837 EVT LocVT = VA.getLocVT();
17838 EVT ValVT = VA.getValVT();
17839 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
17840 if (ValVT.isScalableVector()) {
17841 // When the value is a scalable vector, we save the pointer which points to
17842 // the scalable vector value in the stack. The ValVT will be the pointer
17843 // type, instead of the scalable vector type.
17844 ValVT = LocVT;
17846 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
17847 /*IsImmutable=*/true);
17848 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
17849 SDValue Val;
17851 ISD::LoadExtType ExtType;
17852 switch (VA.getLocInfo()) {
17853 default:
17854 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17855 case CCValAssign::Full:
17856 case CCValAssign::Indirect:
17857 case CCValAssign::BCvt:
17858 ExtType = ISD::NON_EXTLOAD;
17859 break;
17861 Val = DAG.getExtLoad(
17862 ExtType, DL, LocVT, Chain, FIN,
17863 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
17864 return Val;
17867 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
17868 const CCValAssign &VA,
17869 const CCValAssign &HiVA,
17870 const SDLoc &DL) {
17871 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
17872 "Unexpected VA");
17873 MachineFunction &MF = DAG.getMachineFunction();
17874 MachineFrameInfo &MFI = MF.getFrameInfo();
17875 MachineRegisterInfo &RegInfo = MF.getRegInfo();
17877 assert(VA.isRegLoc() && "Expected register VA assignment");
17879 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17880 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
17881 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
17882 SDValue Hi;
17883 if (HiVA.isMemLoc()) {
17884 // Second half of f64 is passed on the stack.
17885 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
17886 /*IsImmutable=*/true);
17887 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
17888 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
17889 MachinePointerInfo::getFixedStack(MF, FI));
17890 } else {
17891 // Second half of f64 is passed in another GPR.
17892 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17893 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
17894 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
17896 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
17899 // FastCC has less than 1% performance improvement for some particular
17900 // benchmark. But theoretically, it may has benenfit for some cases.
17901 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
17902 unsigned ValNo, MVT ValVT, MVT LocVT,
17903 CCValAssign::LocInfo LocInfo,
17904 ISD::ArgFlagsTy ArgFlags, CCState &State,
17905 bool IsFixed, bool IsRet, Type *OrigTy,
17906 const RISCVTargetLowering &TLI,
17907 std::optional<unsigned> FirstMaskArgument) {
17908 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
17909 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
17910 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17911 return false;
17915 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
17917 if (LocVT == MVT::f16 &&
17918 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
17919 static const MCPhysReg FPR16List[] = {
17920 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
17921 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
17922 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
17923 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
17924 if (unsigned Reg = State.AllocateReg(FPR16List)) {
17925 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17926 return false;
17930 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
17931 static const MCPhysReg FPR32List[] = {
17932 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
17933 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
17934 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
17935 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
17936 if (unsigned Reg = State.AllocateReg(FPR32List)) {
17937 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17938 return false;
17942 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
17943 static const MCPhysReg FPR64List[] = {
17944 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
17945 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
17946 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
17947 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
17948 if (unsigned Reg = State.AllocateReg(FPR64List)) {
17949 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17950 return false;
17954 // Check if there is an available GPR before hitting the stack.
17955 if ((LocVT == MVT::f16 &&
17956 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
17957 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
17958 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
17959 Subtarget.hasStdExtZdinx())) {
17960 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
17961 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17962 return false;
17966 if (LocVT == MVT::f16) {
17967 unsigned Offset2 = State.AllocateStack(2, Align(2));
17968 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
17969 return false;
17972 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
17973 unsigned Offset4 = State.AllocateStack(4, Align(4));
17974 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
17975 return false;
17978 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
17979 unsigned Offset5 = State.AllocateStack(8, Align(8));
17980 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
17981 return false;
17984 if (LocVT.isVector()) {
17985 if (unsigned Reg =
17986 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
17987 // Fixed-length vectors are located in the corresponding scalable-vector
17988 // container types.
17989 if (ValVT.isFixedLengthVector())
17990 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
17991 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17992 } else {
17993 // Try and pass the address via a "fast" GPR.
17994 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
17995 LocInfo = CCValAssign::Indirect;
17996 LocVT = TLI.getSubtarget().getXLenVT();
17997 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
17998 } else if (ValVT.isFixedLengthVector()) {
17999 auto StackAlign =
18000 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18001 unsigned StackOffset =
18002 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
18003 State.addLoc(
18004 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18005 } else {
18006 // Can't pass scalable vectors on the stack.
18007 return true;
18011 return false;
18014 return true; // CC didn't match.
18017 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18018 CCValAssign::LocInfo LocInfo,
18019 ISD::ArgFlagsTy ArgFlags, CCState &State) {
18020 if (ArgFlags.isNest()) {
18021 report_fatal_error(
18022 "Attribute 'nest' is not supported in GHC calling convention");
18025 static const MCPhysReg GPRList[] = {
18026 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18027 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18029 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18030 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18031 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18032 if (unsigned Reg = State.AllocateReg(GPRList)) {
18033 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18034 return false;
18038 const RISCVSubtarget &Subtarget =
18039 State.getMachineFunction().getSubtarget<RISCVSubtarget>();
18041 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18042 // Pass in STG registers: F1, ..., F6
18043 // fs0 ... fs5
18044 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18045 RISCV::F18_F, RISCV::F19_F,
18046 RISCV::F20_F, RISCV::F21_F};
18047 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18048 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18049 return false;
18053 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18054 // Pass in STG registers: D1, ..., D6
18055 // fs6 ... fs11
18056 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
18057 RISCV::F24_D, RISCV::F25_D,
18058 RISCV::F26_D, RISCV::F27_D};
18059 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18060 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18061 return false;
18065 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18066 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
18067 Subtarget.is64Bit())) {
18068 if (unsigned Reg = State.AllocateReg(GPRList)) {
18069 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18070 return false;
18074 report_fatal_error("No registers left in GHC calling convention");
18075 return true;
18078 // Transform physical registers into virtual registers.
18079 SDValue RISCVTargetLowering::LowerFormalArguments(
18080 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
18081 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
18082 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
18084 MachineFunction &MF = DAG.getMachineFunction();
18086 switch (CallConv) {
18087 default:
18088 report_fatal_error("Unsupported calling convention");
18089 case CallingConv::C:
18090 case CallingConv::Fast:
18091 case CallingConv::SPIR_KERNEL:
18092 case CallingConv::GRAAL:
18093 break;
18094 case CallingConv::GHC:
18095 if (Subtarget.isRVE())
18096 report_fatal_error("GHC calling convention is not supported on RVE!");
18097 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
18098 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
18099 "(Zdinx/D) instruction set extensions");
18102 const Function &Func = MF.getFunction();
18103 if (Func.hasFnAttribute("interrupt")) {
18104 if (!Func.arg_empty())
18105 report_fatal_error(
18106 "Functions with the interrupt attribute cannot have arguments!");
18108 StringRef Kind =
18109 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
18111 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
18112 report_fatal_error(
18113 "Function interrupt attribute argument not supported!");
18116 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18117 MVT XLenVT = Subtarget.getXLenVT();
18118 unsigned XLenInBytes = Subtarget.getXLen() / 8;
18119 // Used with vargs to acumulate store chains.
18120 std::vector<SDValue> OutChains;
18122 // Assign locations to all of the incoming arguments.
18123 SmallVector<CCValAssign, 16> ArgLocs;
18124 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18126 if (CallConv == CallingConv::GHC)
18127 CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC);
18128 else
18129 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
18130 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
18131 : RISCV::CC_RISCV);
18133 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
18134 CCValAssign &VA = ArgLocs[i];
18135 SDValue ArgValue;
18136 // Passing f64 on RV32D with a soft float ABI must be handled as a special
18137 // case.
18138 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18139 assert(VA.needsCustom());
18140 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
18141 } else if (VA.isRegLoc())
18142 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
18143 else
18144 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
18146 if (VA.getLocInfo() == CCValAssign::Indirect) {
18147 // If the original argument was split and passed by reference (e.g. i128
18148 // on RV32), we need to load all parts of it here (using the same
18149 // address). Vectors may be partly split to registers and partly to the
18150 // stack, in which case the base address is partly offset and subsequent
18151 // stores are relative to that.
18152 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
18153 MachinePointerInfo()));
18154 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
18155 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
18156 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
18157 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
18158 CCValAssign &PartVA = ArgLocs[i + 1];
18159 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
18160 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
18161 if (PartVA.getValVT().isScalableVector())
18162 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
18163 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
18164 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
18165 MachinePointerInfo()));
18166 ++i;
18167 ++InsIdx;
18169 continue;
18171 InVals.push_back(ArgValue);
18174 if (any_of(ArgLocs,
18175 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
18176 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
18178 if (IsVarArg) {
18179 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
18180 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
18181 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
18182 MachineFrameInfo &MFI = MF.getFrameInfo();
18183 MachineRegisterInfo &RegInfo = MF.getRegInfo();
18184 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
18186 // Size of the vararg save area. For now, the varargs save area is either
18187 // zero or large enough to hold a0-a7.
18188 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
18189 int FI;
18191 // If all registers are allocated, then all varargs must be passed on the
18192 // stack and we don't need to save any argregs.
18193 if (VarArgsSaveSize == 0) {
18194 int VaArgOffset = CCInfo.getStackSize();
18195 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
18196 } else {
18197 int VaArgOffset = -VarArgsSaveSize;
18198 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
18200 // If saving an odd number of registers then create an extra stack slot to
18201 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
18202 // offsets to even-numbered registered remain 2*XLEN-aligned.
18203 if (Idx % 2) {
18204 MFI.CreateFixedObject(
18205 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
18206 VarArgsSaveSize += XLenInBytes;
18209 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18211 // Copy the integer registers that may have been used for passing varargs
18212 // to the vararg save area.
18213 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
18214 const Register Reg = RegInfo.createVirtualRegister(RC);
18215 RegInfo.addLiveIn(ArgRegs[I], Reg);
18216 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
18217 SDValue Store = DAG.getStore(
18218 Chain, DL, ArgValue, FIN,
18219 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
18220 OutChains.push_back(Store);
18221 FIN =
18222 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
18226 // Record the frame index of the first variable argument
18227 // which is a value necessary to VASTART.
18228 RVFI->setVarArgsFrameIndex(FI);
18229 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
18232 // All stores are grouped in one node to allow the matching between
18233 // the size of Ins and InVals. This only happens for vararg functions.
18234 if (!OutChains.empty()) {
18235 OutChains.push_back(Chain);
18236 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
18239 return Chain;
18242 /// isEligibleForTailCallOptimization - Check whether the call is eligible
18243 /// for tail call optimization.
18244 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
18245 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
18246 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
18247 const SmallVector<CCValAssign, 16> &ArgLocs) const {
18249 auto CalleeCC = CLI.CallConv;
18250 auto &Outs = CLI.Outs;
18251 auto &Caller = MF.getFunction();
18252 auto CallerCC = Caller.getCallingConv();
18254 // Exception-handling functions need a special set of instructions to
18255 // indicate a return to the hardware. Tail-calling another function would
18256 // probably break this.
18257 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
18258 // should be expanded as new function attributes are introduced.
18259 if (Caller.hasFnAttribute("interrupt"))
18260 return false;
18262 // Do not tail call opt if the stack is used to pass parameters.
18263 if (CCInfo.getStackSize() != 0)
18264 return false;
18266 // Do not tail call opt if any parameters need to be passed indirectly.
18267 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
18268 // passed indirectly. So the address of the value will be passed in a
18269 // register, or if not available, then the address is put on the stack. In
18270 // order to pass indirectly, space on the stack often needs to be allocated
18271 // in order to store the value. In this case the CCInfo.getNextStackOffset()
18272 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
18273 // are passed CCValAssign::Indirect.
18274 for (auto &VA : ArgLocs)
18275 if (VA.getLocInfo() == CCValAssign::Indirect)
18276 return false;
18278 // Do not tail call opt if either caller or callee uses struct return
18279 // semantics.
18280 auto IsCallerStructRet = Caller.hasStructRetAttr();
18281 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
18282 if (IsCallerStructRet || IsCalleeStructRet)
18283 return false;
18285 // The callee has to preserve all registers the caller needs to preserve.
18286 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
18287 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
18288 if (CalleeCC != CallerCC) {
18289 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
18290 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
18291 return false;
18294 // Byval parameters hand the function a pointer directly into the stack area
18295 // we want to reuse during a tail call. Working around this *is* possible
18296 // but less efficient and uglier in LowerCall.
18297 for (auto &Arg : Outs)
18298 if (Arg.Flags.isByVal())
18299 return false;
18301 return true;
18304 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
18305 return DAG.getDataLayout().getPrefTypeAlign(
18306 VT.getTypeForEVT(*DAG.getContext()));
18309 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
18310 // and output parameter nodes.
18311 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
18312 SmallVectorImpl<SDValue> &InVals) const {
18313 SelectionDAG &DAG = CLI.DAG;
18314 SDLoc &DL = CLI.DL;
18315 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
18316 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
18317 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
18318 SDValue Chain = CLI.Chain;
18319 SDValue Callee = CLI.Callee;
18320 bool &IsTailCall = CLI.IsTailCall;
18321 CallingConv::ID CallConv = CLI.CallConv;
18322 bool IsVarArg = CLI.IsVarArg;
18323 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18324 MVT XLenVT = Subtarget.getXLenVT();
18326 MachineFunction &MF = DAG.getMachineFunction();
18328 // Analyze the operands of the call, assigning locations to each operand.
18329 SmallVector<CCValAssign, 16> ArgLocs;
18330 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18332 if (CallConv == CallingConv::GHC) {
18333 if (Subtarget.isRVE())
18334 report_fatal_error("GHC calling convention is not supported on RVE!");
18335 ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
18336 } else
18337 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
18338 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
18339 : RISCV::CC_RISCV);
18341 // Check if it's really possible to do a tail call.
18342 if (IsTailCall)
18343 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
18345 if (IsTailCall)
18346 ++NumTailCalls;
18347 else if (CLI.CB && CLI.CB->isMustTailCall())
18348 report_fatal_error("failed to perform tail call elimination on a call "
18349 "site marked musttail");
18351 // Get a count of how many bytes are to be pushed on the stack.
18352 unsigned NumBytes = ArgCCInfo.getStackSize();
18354 // Create local copies for byval args
18355 SmallVector<SDValue, 8> ByValArgs;
18356 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
18357 ISD::ArgFlagsTy Flags = Outs[i].Flags;
18358 if (!Flags.isByVal())
18359 continue;
18361 SDValue Arg = OutVals[i];
18362 unsigned Size = Flags.getByValSize();
18363 Align Alignment = Flags.getNonZeroByValAlign();
18365 int FI =
18366 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
18367 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
18368 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
18370 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
18371 /*IsVolatile=*/false,
18372 /*AlwaysInline=*/false, IsTailCall,
18373 MachinePointerInfo(), MachinePointerInfo());
18374 ByValArgs.push_back(FIPtr);
18377 if (!IsTailCall)
18378 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
18380 // Copy argument values to their designated locations.
18381 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
18382 SmallVector<SDValue, 8> MemOpChains;
18383 SDValue StackPtr;
18384 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
18385 ++i, ++OutIdx) {
18386 CCValAssign &VA = ArgLocs[i];
18387 SDValue ArgValue = OutVals[OutIdx];
18388 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
18390 // Handle passing f64 on RV32D with a soft float ABI as a special case.
18391 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18392 assert(VA.isRegLoc() && "Expected register VA assignment");
18393 assert(VA.needsCustom());
18394 SDValue SplitF64 = DAG.getNode(
18395 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
18396 SDValue Lo = SplitF64.getValue(0);
18397 SDValue Hi = SplitF64.getValue(1);
18399 Register RegLo = VA.getLocReg();
18400 RegsToPass.push_back(std::make_pair(RegLo, Lo));
18402 // Get the CCValAssign for the Hi part.
18403 CCValAssign &HiVA = ArgLocs[++i];
18405 if (HiVA.isMemLoc()) {
18406 // Second half of f64 is passed on the stack.
18407 if (!StackPtr.getNode())
18408 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
18409 SDValue Address =
18410 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
18411 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
18412 // Emit the store.
18413 MemOpChains.push_back(
18414 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
18415 } else {
18416 // Second half of f64 is passed in another GPR.
18417 Register RegHigh = HiVA.getLocReg();
18418 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
18420 continue;
18423 // Promote the value if needed.
18424 // For now, only handle fully promoted and indirect arguments.
18425 if (VA.getLocInfo() == CCValAssign::Indirect) {
18426 // Store the argument in a stack slot and pass its address.
18427 Align StackAlign =
18428 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
18429 getPrefTypeAlign(ArgValue.getValueType(), DAG));
18430 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
18431 // If the original argument was split (e.g. i128), we need
18432 // to store the required parts of it here (and pass just one address).
18433 // Vectors may be partly split to registers and partly to the stack, in
18434 // which case the base address is partly offset and subsequent stores are
18435 // relative to that.
18436 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
18437 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
18438 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
18439 // Calculate the total size to store. We don't have access to what we're
18440 // actually storing other than performing the loop and collecting the
18441 // info.
18442 SmallVector<std::pair<SDValue, SDValue>> Parts;
18443 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
18444 SDValue PartValue = OutVals[OutIdx + 1];
18445 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
18446 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
18447 EVT PartVT = PartValue.getValueType();
18448 if (PartVT.isScalableVector())
18449 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
18450 StoredSize += PartVT.getStoreSize();
18451 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
18452 Parts.push_back(std::make_pair(PartValue, Offset));
18453 ++i;
18454 ++OutIdx;
18456 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
18457 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
18458 MemOpChains.push_back(
18459 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
18460 MachinePointerInfo::getFixedStack(MF, FI)));
18461 for (const auto &Part : Parts) {
18462 SDValue PartValue = Part.first;
18463 SDValue PartOffset = Part.second;
18464 SDValue Address =
18465 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
18466 MemOpChains.push_back(
18467 DAG.getStore(Chain, DL, PartValue, Address,
18468 MachinePointerInfo::getFixedStack(MF, FI)));
18470 ArgValue = SpillSlot;
18471 } else {
18472 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
18475 // Use local copy if it is a byval arg.
18476 if (Flags.isByVal())
18477 ArgValue = ByValArgs[j++];
18479 if (VA.isRegLoc()) {
18480 // Queue up the argument copies and emit them at the end.
18481 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
18482 } else {
18483 assert(VA.isMemLoc() && "Argument not register or memory");
18484 assert(!IsTailCall && "Tail call not allowed if stack is used "
18485 "for passing parameters");
18487 // Work out the address of the stack slot.
18488 if (!StackPtr.getNode())
18489 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
18490 SDValue Address =
18491 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
18492 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
18494 // Emit the store.
18495 MemOpChains.push_back(
18496 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
18500 // Join the stores, which are independent of one another.
18501 if (!MemOpChains.empty())
18502 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
18504 SDValue Glue;
18506 // Build a sequence of copy-to-reg nodes, chained and glued together.
18507 for (auto &Reg : RegsToPass) {
18508 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
18509 Glue = Chain.getValue(1);
18512 // Validate that none of the argument registers have been marked as
18513 // reserved, if so report an error. Do the same for the return address if this
18514 // is not a tailcall.
18515 validateCCReservedRegs(RegsToPass, MF);
18516 if (!IsTailCall &&
18517 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
18518 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
18519 MF.getFunction(),
18520 "Return address register required, but has been reserved."});
18522 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
18523 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
18524 // split it and then direct call can be matched by PseudoCALL.
18525 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
18526 const GlobalValue *GV = S->getGlobal();
18527 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
18528 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
18529 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
18532 // The first call operand is the chain and the second is the target address.
18533 SmallVector<SDValue, 8> Ops;
18534 Ops.push_back(Chain);
18535 Ops.push_back(Callee);
18537 // Add argument registers to the end of the list so that they are
18538 // known live into the call.
18539 for (auto &Reg : RegsToPass)
18540 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
18542 if (!IsTailCall) {
18543 // Add a register mask operand representing the call-preserved registers.
18544 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
18545 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
18546 assert(Mask && "Missing call preserved mask for calling convention");
18547 Ops.push_back(DAG.getRegisterMask(Mask));
18550 // Glue the call to the argument copies, if any.
18551 if (Glue.getNode())
18552 Ops.push_back(Glue);
18554 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
18555 "Unexpected CFI type for a direct call");
18557 // Emit the call.
18558 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
18560 if (IsTailCall) {
18561 MF.getFrameInfo().setHasTailCall();
18562 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
18563 if (CLI.CFIType)
18564 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
18565 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
18566 return Ret;
18569 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
18570 if (CLI.CFIType)
18571 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
18572 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
18573 Glue = Chain.getValue(1);
18575 // Mark the end of the call, which is glued to the call itself.
18576 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
18577 Glue = Chain.getValue(1);
18579 // Assign locations to each value returned by this call.
18580 SmallVector<CCValAssign, 16> RVLocs;
18581 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
18582 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
18584 // Copy all of the result registers out of their specified physreg.
18585 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
18586 auto &VA = RVLocs[i];
18587 // Copy the value out
18588 SDValue RetValue =
18589 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
18590 // Glue the RetValue to the end of the call sequence
18591 Chain = RetValue.getValue(1);
18592 Glue = RetValue.getValue(2);
18594 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18595 assert(VA.needsCustom());
18596 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
18597 MVT::i32, Glue);
18598 Chain = RetValue2.getValue(1);
18599 Glue = RetValue2.getValue(2);
18600 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
18601 RetValue2);
18604 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
18606 InVals.push_back(RetValue);
18609 return Chain;
18612 bool RISCVTargetLowering::CanLowerReturn(
18613 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
18614 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
18615 SmallVector<CCValAssign, 16> RVLocs;
18616 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
18618 std::optional<unsigned> FirstMaskArgument;
18619 if (Subtarget.hasVInstructions())
18620 FirstMaskArgument = preAssignMask(Outs);
18622 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
18623 MVT VT = Outs[i].VT;
18624 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18625 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
18626 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
18627 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
18628 *this, FirstMaskArgument))
18629 return false;
18631 return true;
18634 SDValue
18635 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
18636 bool IsVarArg,
18637 const SmallVectorImpl<ISD::OutputArg> &Outs,
18638 const SmallVectorImpl<SDValue> &OutVals,
18639 const SDLoc &DL, SelectionDAG &DAG) const {
18640 MachineFunction &MF = DAG.getMachineFunction();
18641 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
18643 // Stores the assignment of the return value to a location.
18644 SmallVector<CCValAssign, 16> RVLocs;
18646 // Info about the registers and stack slot.
18647 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
18648 *DAG.getContext());
18650 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
18651 nullptr, RISCV::CC_RISCV);
18653 if (CallConv == CallingConv::GHC && !RVLocs.empty())
18654 report_fatal_error("GHC functions return void only");
18656 SDValue Glue;
18657 SmallVector<SDValue, 4> RetOps(1, Chain);
18659 // Copy the result values into the output registers.
18660 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
18661 SDValue Val = OutVals[OutIdx];
18662 CCValAssign &VA = RVLocs[i];
18663 assert(VA.isRegLoc() && "Can only return in registers!");
18665 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18666 // Handle returning f64 on RV32D with a soft float ABI.
18667 assert(VA.isRegLoc() && "Expected return via registers");
18668 assert(VA.needsCustom());
18669 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
18670 DAG.getVTList(MVT::i32, MVT::i32), Val);
18671 SDValue Lo = SplitF64.getValue(0);
18672 SDValue Hi = SplitF64.getValue(1);
18673 Register RegLo = VA.getLocReg();
18674 Register RegHi = RVLocs[++i].getLocReg();
18676 if (STI.isRegisterReservedByUser(RegLo) ||
18677 STI.isRegisterReservedByUser(RegHi))
18678 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
18679 MF.getFunction(),
18680 "Return value register required, but has been reserved."});
18682 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
18683 Glue = Chain.getValue(1);
18684 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
18685 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
18686 Glue = Chain.getValue(1);
18687 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
18688 } else {
18689 // Handle a 'normal' return.
18690 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
18691 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
18693 if (STI.isRegisterReservedByUser(VA.getLocReg()))
18694 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
18695 MF.getFunction(),
18696 "Return value register required, but has been reserved."});
18698 // Guarantee that all emitted copies are stuck together.
18699 Glue = Chain.getValue(1);
18700 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
18704 RetOps[0] = Chain; // Update chain.
18706 // Add the glue node if we have it.
18707 if (Glue.getNode()) {
18708 RetOps.push_back(Glue);
18711 if (any_of(RVLocs,
18712 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
18713 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
18715 unsigned RetOpc = RISCVISD::RET_GLUE;
18716 // Interrupt service routines use different return instructions.
18717 const Function &Func = DAG.getMachineFunction().getFunction();
18718 if (Func.hasFnAttribute("interrupt")) {
18719 if (!Func.getReturnType()->isVoidTy())
18720 report_fatal_error(
18721 "Functions with the interrupt attribute must have void return type!");
18723 MachineFunction &MF = DAG.getMachineFunction();
18724 StringRef Kind =
18725 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
18727 if (Kind == "supervisor")
18728 RetOpc = RISCVISD::SRET_GLUE;
18729 else
18730 RetOpc = RISCVISD::MRET_GLUE;
18733 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
18736 void RISCVTargetLowering::validateCCReservedRegs(
18737 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
18738 MachineFunction &MF) const {
18739 const Function &F = MF.getFunction();
18740 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
18742 if (llvm::any_of(Regs, [&STI](auto Reg) {
18743 return STI.isRegisterReservedByUser(Reg.first);
18745 F.getContext().diagnose(DiagnosticInfoUnsupported{
18746 F, "Argument register required, but has been reserved."});
18749 // Check if the result of the node is only used as a return value, as
18750 // otherwise we can't perform a tail-call.
18751 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
18752 if (N->getNumValues() != 1)
18753 return false;
18754 if (!N->hasNUsesOfValue(1, 0))
18755 return false;
18757 SDNode *Copy = *N->use_begin();
18759 if (Copy->getOpcode() == ISD::BITCAST) {
18760 return isUsedByReturnOnly(Copy, Chain);
18763 // TODO: Handle additional opcodes in order to support tail-calling libcalls
18764 // with soft float ABIs.
18765 if (Copy->getOpcode() != ISD::CopyToReg) {
18766 return false;
18769 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
18770 // isn't safe to perform a tail call.
18771 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
18772 return false;
18774 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
18775 bool HasRet = false;
18776 for (SDNode *Node : Copy->uses()) {
18777 if (Node->getOpcode() != RISCVISD::RET_GLUE)
18778 return false;
18779 HasRet = true;
18781 if (!HasRet)
18782 return false;
18784 Chain = Copy->getOperand(0);
18785 return true;
18788 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18789 return CI->isTailCall();
18792 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
18793 #define NODE_NAME_CASE(NODE) \
18794 case RISCVISD::NODE: \
18795 return "RISCVISD::" #NODE;
18796 // clang-format off
18797 switch ((RISCVISD::NodeType)Opcode) {
18798 case RISCVISD::FIRST_NUMBER:
18799 break;
18800 NODE_NAME_CASE(RET_GLUE)
18801 NODE_NAME_CASE(SRET_GLUE)
18802 NODE_NAME_CASE(MRET_GLUE)
18803 NODE_NAME_CASE(CALL)
18804 NODE_NAME_CASE(SELECT_CC)
18805 NODE_NAME_CASE(BR_CC)
18806 NODE_NAME_CASE(BuildPairF64)
18807 NODE_NAME_CASE(SplitF64)
18808 NODE_NAME_CASE(TAIL)
18809 NODE_NAME_CASE(ADD_LO)
18810 NODE_NAME_CASE(HI)
18811 NODE_NAME_CASE(LLA)
18812 NODE_NAME_CASE(ADD_TPREL)
18813 NODE_NAME_CASE(MULHSU)
18814 NODE_NAME_CASE(SLLW)
18815 NODE_NAME_CASE(SRAW)
18816 NODE_NAME_CASE(SRLW)
18817 NODE_NAME_CASE(DIVW)
18818 NODE_NAME_CASE(DIVUW)
18819 NODE_NAME_CASE(REMUW)
18820 NODE_NAME_CASE(ROLW)
18821 NODE_NAME_CASE(RORW)
18822 NODE_NAME_CASE(CLZW)
18823 NODE_NAME_CASE(CTZW)
18824 NODE_NAME_CASE(ABSW)
18825 NODE_NAME_CASE(FMV_H_X)
18826 NODE_NAME_CASE(FMV_X_ANYEXTH)
18827 NODE_NAME_CASE(FMV_X_SIGNEXTH)
18828 NODE_NAME_CASE(FMV_W_X_RV64)
18829 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
18830 NODE_NAME_CASE(FCVT_X)
18831 NODE_NAME_CASE(FCVT_XU)
18832 NODE_NAME_CASE(FCVT_W_RV64)
18833 NODE_NAME_CASE(FCVT_WU_RV64)
18834 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
18835 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
18836 NODE_NAME_CASE(FP_ROUND_BF16)
18837 NODE_NAME_CASE(FP_EXTEND_BF16)
18838 NODE_NAME_CASE(FROUND)
18839 NODE_NAME_CASE(FCLASS)
18840 NODE_NAME_CASE(FMAX)
18841 NODE_NAME_CASE(FMIN)
18842 NODE_NAME_CASE(READ_CYCLE_WIDE)
18843 NODE_NAME_CASE(BREV8)
18844 NODE_NAME_CASE(ORC_B)
18845 NODE_NAME_CASE(ZIP)
18846 NODE_NAME_CASE(UNZIP)
18847 NODE_NAME_CASE(CLMUL)
18848 NODE_NAME_CASE(CLMULH)
18849 NODE_NAME_CASE(CLMULR)
18850 NODE_NAME_CASE(SHA256SIG0)
18851 NODE_NAME_CASE(SHA256SIG1)
18852 NODE_NAME_CASE(SHA256SUM0)
18853 NODE_NAME_CASE(SHA256SUM1)
18854 NODE_NAME_CASE(SM4KS)
18855 NODE_NAME_CASE(SM4ED)
18856 NODE_NAME_CASE(SM3P0)
18857 NODE_NAME_CASE(SM3P1)
18858 NODE_NAME_CASE(TH_LWD)
18859 NODE_NAME_CASE(TH_LWUD)
18860 NODE_NAME_CASE(TH_LDD)
18861 NODE_NAME_CASE(TH_SWD)
18862 NODE_NAME_CASE(TH_SDD)
18863 NODE_NAME_CASE(VMV_V_V_VL)
18864 NODE_NAME_CASE(VMV_V_X_VL)
18865 NODE_NAME_CASE(VFMV_V_F_VL)
18866 NODE_NAME_CASE(VMV_X_S)
18867 NODE_NAME_CASE(VMV_S_X_VL)
18868 NODE_NAME_CASE(VFMV_S_F_VL)
18869 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
18870 NODE_NAME_CASE(READ_VLENB)
18871 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
18872 NODE_NAME_CASE(VSLIDEUP_VL)
18873 NODE_NAME_CASE(VSLIDE1UP_VL)
18874 NODE_NAME_CASE(VSLIDEDOWN_VL)
18875 NODE_NAME_CASE(VSLIDE1DOWN_VL)
18876 NODE_NAME_CASE(VFSLIDE1UP_VL)
18877 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
18878 NODE_NAME_CASE(VID_VL)
18879 NODE_NAME_CASE(VFNCVT_ROD_VL)
18880 NODE_NAME_CASE(VECREDUCE_ADD_VL)
18881 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
18882 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
18883 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
18884 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
18885 NODE_NAME_CASE(VECREDUCE_AND_VL)
18886 NODE_NAME_CASE(VECREDUCE_OR_VL)
18887 NODE_NAME_CASE(VECREDUCE_XOR_VL)
18888 NODE_NAME_CASE(VECREDUCE_FADD_VL)
18889 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
18890 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
18891 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
18892 NODE_NAME_CASE(ADD_VL)
18893 NODE_NAME_CASE(AND_VL)
18894 NODE_NAME_CASE(MUL_VL)
18895 NODE_NAME_CASE(OR_VL)
18896 NODE_NAME_CASE(SDIV_VL)
18897 NODE_NAME_CASE(SHL_VL)
18898 NODE_NAME_CASE(SREM_VL)
18899 NODE_NAME_CASE(SRA_VL)
18900 NODE_NAME_CASE(SRL_VL)
18901 NODE_NAME_CASE(ROTL_VL)
18902 NODE_NAME_CASE(ROTR_VL)
18903 NODE_NAME_CASE(SUB_VL)
18904 NODE_NAME_CASE(UDIV_VL)
18905 NODE_NAME_CASE(UREM_VL)
18906 NODE_NAME_CASE(XOR_VL)
18907 NODE_NAME_CASE(AVGFLOORU_VL)
18908 NODE_NAME_CASE(AVGCEILU_VL)
18909 NODE_NAME_CASE(SADDSAT_VL)
18910 NODE_NAME_CASE(UADDSAT_VL)
18911 NODE_NAME_CASE(SSUBSAT_VL)
18912 NODE_NAME_CASE(USUBSAT_VL)
18913 NODE_NAME_CASE(FADD_VL)
18914 NODE_NAME_CASE(FSUB_VL)
18915 NODE_NAME_CASE(FMUL_VL)
18916 NODE_NAME_CASE(FDIV_VL)
18917 NODE_NAME_CASE(FNEG_VL)
18918 NODE_NAME_CASE(FABS_VL)
18919 NODE_NAME_CASE(FSQRT_VL)
18920 NODE_NAME_CASE(FCLASS_VL)
18921 NODE_NAME_CASE(VFMADD_VL)
18922 NODE_NAME_CASE(VFNMADD_VL)
18923 NODE_NAME_CASE(VFMSUB_VL)
18924 NODE_NAME_CASE(VFNMSUB_VL)
18925 NODE_NAME_CASE(VFWMADD_VL)
18926 NODE_NAME_CASE(VFWNMADD_VL)
18927 NODE_NAME_CASE(VFWMSUB_VL)
18928 NODE_NAME_CASE(VFWNMSUB_VL)
18929 NODE_NAME_CASE(FCOPYSIGN_VL)
18930 NODE_NAME_CASE(SMIN_VL)
18931 NODE_NAME_CASE(SMAX_VL)
18932 NODE_NAME_CASE(UMIN_VL)
18933 NODE_NAME_CASE(UMAX_VL)
18934 NODE_NAME_CASE(BITREVERSE_VL)
18935 NODE_NAME_CASE(BSWAP_VL)
18936 NODE_NAME_CASE(CTLZ_VL)
18937 NODE_NAME_CASE(CTTZ_VL)
18938 NODE_NAME_CASE(CTPOP_VL)
18939 NODE_NAME_CASE(VFMIN_VL)
18940 NODE_NAME_CASE(VFMAX_VL)
18941 NODE_NAME_CASE(MULHS_VL)
18942 NODE_NAME_CASE(MULHU_VL)
18943 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
18944 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
18945 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
18946 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
18947 NODE_NAME_CASE(VFCVT_X_F_VL)
18948 NODE_NAME_CASE(VFCVT_XU_F_VL)
18949 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
18950 NODE_NAME_CASE(SINT_TO_FP_VL)
18951 NODE_NAME_CASE(UINT_TO_FP_VL)
18952 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
18953 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
18954 NODE_NAME_CASE(FP_EXTEND_VL)
18955 NODE_NAME_CASE(FP_ROUND_VL)
18956 NODE_NAME_CASE(STRICT_FADD_VL)
18957 NODE_NAME_CASE(STRICT_FSUB_VL)
18958 NODE_NAME_CASE(STRICT_FMUL_VL)
18959 NODE_NAME_CASE(STRICT_FDIV_VL)
18960 NODE_NAME_CASE(STRICT_FSQRT_VL)
18961 NODE_NAME_CASE(STRICT_VFMADD_VL)
18962 NODE_NAME_CASE(STRICT_VFNMADD_VL)
18963 NODE_NAME_CASE(STRICT_VFMSUB_VL)
18964 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
18965 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
18966 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
18967 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
18968 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
18969 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
18970 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
18971 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
18972 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
18973 NODE_NAME_CASE(STRICT_FSETCC_VL)
18974 NODE_NAME_CASE(STRICT_FSETCCS_VL)
18975 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
18976 NODE_NAME_CASE(VWMUL_VL)
18977 NODE_NAME_CASE(VWMULU_VL)
18978 NODE_NAME_CASE(VWMULSU_VL)
18979 NODE_NAME_CASE(VWADD_VL)
18980 NODE_NAME_CASE(VWADDU_VL)
18981 NODE_NAME_CASE(VWSUB_VL)
18982 NODE_NAME_CASE(VWSUBU_VL)
18983 NODE_NAME_CASE(VWADD_W_VL)
18984 NODE_NAME_CASE(VWADDU_W_VL)
18985 NODE_NAME_CASE(VWSUB_W_VL)
18986 NODE_NAME_CASE(VWSUBU_W_VL)
18987 NODE_NAME_CASE(VWSLL_VL)
18988 NODE_NAME_CASE(VFWMUL_VL)
18989 NODE_NAME_CASE(VFWADD_VL)
18990 NODE_NAME_CASE(VFWSUB_VL)
18991 NODE_NAME_CASE(VFWADD_W_VL)
18992 NODE_NAME_CASE(VFWSUB_W_VL)
18993 NODE_NAME_CASE(VWMACC_VL)
18994 NODE_NAME_CASE(VWMACCU_VL)
18995 NODE_NAME_CASE(VWMACCSU_VL)
18996 NODE_NAME_CASE(VNSRL_VL)
18997 NODE_NAME_CASE(SETCC_VL)
18998 NODE_NAME_CASE(VMERGE_VL)
18999 NODE_NAME_CASE(VMAND_VL)
19000 NODE_NAME_CASE(VMOR_VL)
19001 NODE_NAME_CASE(VMXOR_VL)
19002 NODE_NAME_CASE(VMCLR_VL)
19003 NODE_NAME_CASE(VMSET_VL)
19004 NODE_NAME_CASE(VRGATHER_VX_VL)
19005 NODE_NAME_CASE(VRGATHER_VV_VL)
19006 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19007 NODE_NAME_CASE(VSEXT_VL)
19008 NODE_NAME_CASE(VZEXT_VL)
19009 NODE_NAME_CASE(VCPOP_VL)
19010 NODE_NAME_CASE(VFIRST_VL)
19011 NODE_NAME_CASE(READ_CSR)
19012 NODE_NAME_CASE(WRITE_CSR)
19013 NODE_NAME_CASE(SWAP_CSR)
19014 NODE_NAME_CASE(CZERO_EQZ)
19015 NODE_NAME_CASE(CZERO_NEZ)
19017 // clang-format on
19018 return nullptr;
19019 #undef NODE_NAME_CASE
19022 /// getConstraintType - Given a constraint letter, return the type of
19023 /// constraint it is for this target.
19024 RISCVTargetLowering::ConstraintType
19025 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
19026 if (Constraint.size() == 1) {
19027 switch (Constraint[0]) {
19028 default:
19029 break;
19030 case 'f':
19031 return C_RegisterClass;
19032 case 'I':
19033 case 'J':
19034 case 'K':
19035 return C_Immediate;
19036 case 'A':
19037 return C_Memory;
19038 case 'S': // A symbolic address
19039 return C_Other;
19041 } else {
19042 if (Constraint == "vr" || Constraint == "vm")
19043 return C_RegisterClass;
19045 return TargetLowering::getConstraintType(Constraint);
19048 std::pair<unsigned, const TargetRegisterClass *>
19049 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
19050 StringRef Constraint,
19051 MVT VT) const {
19052 // First, see if this is a constraint that directly corresponds to a RISC-V
19053 // register class.
19054 if (Constraint.size() == 1) {
19055 switch (Constraint[0]) {
19056 case 'r':
19057 // TODO: Support fixed vectors up to XLen for P extension?
19058 if (VT.isVector())
19059 break;
19060 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
19061 return std::make_pair(0U, &RISCV::GPRF16RegClass);
19062 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
19063 return std::make_pair(0U, &RISCV::GPRF32RegClass);
19064 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
19065 return std::make_pair(0U, &RISCV::GPRPairRegClass);
19066 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
19067 case 'f':
19068 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
19069 return std::make_pair(0U, &RISCV::FPR16RegClass);
19070 if (Subtarget.hasStdExtF() && VT == MVT::f32)
19071 return std::make_pair(0U, &RISCV::FPR32RegClass);
19072 if (Subtarget.hasStdExtD() && VT == MVT::f64)
19073 return std::make_pair(0U, &RISCV::FPR64RegClass);
19074 break;
19075 default:
19076 break;
19078 } else if (Constraint == "vr") {
19079 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
19080 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19081 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
19082 return std::make_pair(0U, RC);
19084 } else if (Constraint == "vm") {
19085 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
19086 return std::make_pair(0U, &RISCV::VMV0RegClass);
19089 // Clang will correctly decode the usage of register name aliases into their
19090 // official names. However, other frontends like `rustc` do not. This allows
19091 // users of these frontends to use the ABI names for registers in LLVM-style
19092 // register constraints.
19093 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
19094 .Case("{zero}", RISCV::X0)
19095 .Case("{ra}", RISCV::X1)
19096 .Case("{sp}", RISCV::X2)
19097 .Case("{gp}", RISCV::X3)
19098 .Case("{tp}", RISCV::X4)
19099 .Case("{t0}", RISCV::X5)
19100 .Case("{t1}", RISCV::X6)
19101 .Case("{t2}", RISCV::X7)
19102 .Cases("{s0}", "{fp}", RISCV::X8)
19103 .Case("{s1}", RISCV::X9)
19104 .Case("{a0}", RISCV::X10)
19105 .Case("{a1}", RISCV::X11)
19106 .Case("{a2}", RISCV::X12)
19107 .Case("{a3}", RISCV::X13)
19108 .Case("{a4}", RISCV::X14)
19109 .Case("{a5}", RISCV::X15)
19110 .Case("{a6}", RISCV::X16)
19111 .Case("{a7}", RISCV::X17)
19112 .Case("{s2}", RISCV::X18)
19113 .Case("{s3}", RISCV::X19)
19114 .Case("{s4}", RISCV::X20)
19115 .Case("{s5}", RISCV::X21)
19116 .Case("{s6}", RISCV::X22)
19117 .Case("{s7}", RISCV::X23)
19118 .Case("{s8}", RISCV::X24)
19119 .Case("{s9}", RISCV::X25)
19120 .Case("{s10}", RISCV::X26)
19121 .Case("{s11}", RISCV::X27)
19122 .Case("{t3}", RISCV::X28)
19123 .Case("{t4}", RISCV::X29)
19124 .Case("{t5}", RISCV::X30)
19125 .Case("{t6}", RISCV::X31)
19126 .Default(RISCV::NoRegister);
19127 if (XRegFromAlias != RISCV::NoRegister)
19128 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
19130 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
19131 // TableGen record rather than the AsmName to choose registers for InlineAsm
19132 // constraints, plus we want to match those names to the widest floating point
19133 // register type available, manually select floating point registers here.
19135 // The second case is the ABI name of the register, so that frontends can also
19136 // use the ABI names in register constraint lists.
19137 if (Subtarget.hasStdExtF()) {
19138 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
19139 .Cases("{f0}", "{ft0}", RISCV::F0_F)
19140 .Cases("{f1}", "{ft1}", RISCV::F1_F)
19141 .Cases("{f2}", "{ft2}", RISCV::F2_F)
19142 .Cases("{f3}", "{ft3}", RISCV::F3_F)
19143 .Cases("{f4}", "{ft4}", RISCV::F4_F)
19144 .Cases("{f5}", "{ft5}", RISCV::F5_F)
19145 .Cases("{f6}", "{ft6}", RISCV::F6_F)
19146 .Cases("{f7}", "{ft7}", RISCV::F7_F)
19147 .Cases("{f8}", "{fs0}", RISCV::F8_F)
19148 .Cases("{f9}", "{fs1}", RISCV::F9_F)
19149 .Cases("{f10}", "{fa0}", RISCV::F10_F)
19150 .Cases("{f11}", "{fa1}", RISCV::F11_F)
19151 .Cases("{f12}", "{fa2}", RISCV::F12_F)
19152 .Cases("{f13}", "{fa3}", RISCV::F13_F)
19153 .Cases("{f14}", "{fa4}", RISCV::F14_F)
19154 .Cases("{f15}", "{fa5}", RISCV::F15_F)
19155 .Cases("{f16}", "{fa6}", RISCV::F16_F)
19156 .Cases("{f17}", "{fa7}", RISCV::F17_F)
19157 .Cases("{f18}", "{fs2}", RISCV::F18_F)
19158 .Cases("{f19}", "{fs3}", RISCV::F19_F)
19159 .Cases("{f20}", "{fs4}", RISCV::F20_F)
19160 .Cases("{f21}", "{fs5}", RISCV::F21_F)
19161 .Cases("{f22}", "{fs6}", RISCV::F22_F)
19162 .Cases("{f23}", "{fs7}", RISCV::F23_F)
19163 .Cases("{f24}", "{fs8}", RISCV::F24_F)
19164 .Cases("{f25}", "{fs9}", RISCV::F25_F)
19165 .Cases("{f26}", "{fs10}", RISCV::F26_F)
19166 .Cases("{f27}", "{fs11}", RISCV::F27_F)
19167 .Cases("{f28}", "{ft8}", RISCV::F28_F)
19168 .Cases("{f29}", "{ft9}", RISCV::F29_F)
19169 .Cases("{f30}", "{ft10}", RISCV::F30_F)
19170 .Cases("{f31}", "{ft11}", RISCV::F31_F)
19171 .Default(RISCV::NoRegister);
19172 if (FReg != RISCV::NoRegister) {
19173 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
19174 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
19175 unsigned RegNo = FReg - RISCV::F0_F;
19176 unsigned DReg = RISCV::F0_D + RegNo;
19177 return std::make_pair(DReg, &RISCV::FPR64RegClass);
19179 if (VT == MVT::f32 || VT == MVT::Other)
19180 return std::make_pair(FReg, &RISCV::FPR32RegClass);
19181 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
19182 unsigned RegNo = FReg - RISCV::F0_F;
19183 unsigned HReg = RISCV::F0_H + RegNo;
19184 return std::make_pair(HReg, &RISCV::FPR16RegClass);
19189 if (Subtarget.hasVInstructions()) {
19190 Register VReg = StringSwitch<Register>(Constraint.lower())
19191 .Case("{v0}", RISCV::V0)
19192 .Case("{v1}", RISCV::V1)
19193 .Case("{v2}", RISCV::V2)
19194 .Case("{v3}", RISCV::V3)
19195 .Case("{v4}", RISCV::V4)
19196 .Case("{v5}", RISCV::V5)
19197 .Case("{v6}", RISCV::V6)
19198 .Case("{v7}", RISCV::V7)
19199 .Case("{v8}", RISCV::V8)
19200 .Case("{v9}", RISCV::V9)
19201 .Case("{v10}", RISCV::V10)
19202 .Case("{v11}", RISCV::V11)
19203 .Case("{v12}", RISCV::V12)
19204 .Case("{v13}", RISCV::V13)
19205 .Case("{v14}", RISCV::V14)
19206 .Case("{v15}", RISCV::V15)
19207 .Case("{v16}", RISCV::V16)
19208 .Case("{v17}", RISCV::V17)
19209 .Case("{v18}", RISCV::V18)
19210 .Case("{v19}", RISCV::V19)
19211 .Case("{v20}", RISCV::V20)
19212 .Case("{v21}", RISCV::V21)
19213 .Case("{v22}", RISCV::V22)
19214 .Case("{v23}", RISCV::V23)
19215 .Case("{v24}", RISCV::V24)
19216 .Case("{v25}", RISCV::V25)
19217 .Case("{v26}", RISCV::V26)
19218 .Case("{v27}", RISCV::V27)
19219 .Case("{v28}", RISCV::V28)
19220 .Case("{v29}", RISCV::V29)
19221 .Case("{v30}", RISCV::V30)
19222 .Case("{v31}", RISCV::V31)
19223 .Default(RISCV::NoRegister);
19224 if (VReg != RISCV::NoRegister) {
19225 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
19226 return std::make_pair(VReg, &RISCV::VMRegClass);
19227 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
19228 return std::make_pair(VReg, &RISCV::VRRegClass);
19229 for (const auto *RC :
19230 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19231 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
19232 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
19233 return std::make_pair(VReg, RC);
19239 std::pair<Register, const TargetRegisterClass *> Res =
19240 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
19242 // If we picked one of the Zfinx register classes, remap it to the GPR class.
19243 // FIXME: When Zfinx is supported in CodeGen this will need to take the
19244 // Subtarget into account.
19245 if (Res.second == &RISCV::GPRF16RegClass ||
19246 Res.second == &RISCV::GPRF32RegClass ||
19247 Res.second == &RISCV::GPRPairRegClass)
19248 return std::make_pair(Res.first, &RISCV::GPRRegClass);
19250 return Res;
19253 InlineAsm::ConstraintCode
19254 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
19255 // Currently only support length 1 constraints.
19256 if (ConstraintCode.size() == 1) {
19257 switch (ConstraintCode[0]) {
19258 case 'A':
19259 return InlineAsm::ConstraintCode::A;
19260 default:
19261 break;
19265 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
19268 void RISCVTargetLowering::LowerAsmOperandForConstraint(
19269 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
19270 SelectionDAG &DAG) const {
19271 // Currently only support length 1 constraints.
19272 if (Constraint.size() == 1) {
19273 switch (Constraint[0]) {
19274 case 'I':
19275 // Validate & create a 12-bit signed immediate operand.
19276 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
19277 uint64_t CVal = C->getSExtValue();
19278 if (isInt<12>(CVal))
19279 Ops.push_back(
19280 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
19282 return;
19283 case 'J':
19284 // Validate & create an integer zero operand.
19285 if (isNullConstant(Op))
19286 Ops.push_back(
19287 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
19288 return;
19289 case 'K':
19290 // Validate & create a 5-bit unsigned immediate operand.
19291 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
19292 uint64_t CVal = C->getZExtValue();
19293 if (isUInt<5>(CVal))
19294 Ops.push_back(
19295 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
19297 return;
19298 case 'S':
19299 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
19300 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
19301 GA->getValueType(0)));
19302 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
19303 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
19304 BA->getValueType(0)));
19306 return;
19307 default:
19308 break;
19311 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
19314 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
19315 Instruction *Inst,
19316 AtomicOrdering Ord) const {
19317 if (Subtarget.hasStdExtZtso()) {
19318 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19319 return Builder.CreateFence(Ord);
19320 return nullptr;
19323 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19324 return Builder.CreateFence(Ord);
19325 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
19326 return Builder.CreateFence(AtomicOrdering::Release);
19327 return nullptr;
19330 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
19331 Instruction *Inst,
19332 AtomicOrdering Ord) const {
19333 if (Subtarget.hasStdExtZtso()) {
19334 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19335 return Builder.CreateFence(Ord);
19336 return nullptr;
19339 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
19340 return Builder.CreateFence(AtomicOrdering::Acquire);
19341 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
19342 Ord == AtomicOrdering::SequentiallyConsistent)
19343 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
19344 return nullptr;
19347 TargetLowering::AtomicExpansionKind
19348 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
19349 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
19350 // point operations can't be used in an lr/sc sequence without breaking the
19351 // forward-progress guarantee.
19352 if (AI->isFloatingPointOperation() ||
19353 AI->getOperation() == AtomicRMWInst::UIncWrap ||
19354 AI->getOperation() == AtomicRMWInst::UDecWrap)
19355 return AtomicExpansionKind::CmpXChg;
19357 // Don't expand forced atomics, we want to have __sync libcalls instead.
19358 if (Subtarget.hasForcedAtomics())
19359 return AtomicExpansionKind::None;
19361 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19362 if (Size == 8 || Size == 16)
19363 return AtomicExpansionKind::MaskedIntrinsic;
19364 return AtomicExpansionKind::None;
19367 static Intrinsic::ID
19368 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
19369 if (XLen == 32) {
19370 switch (BinOp) {
19371 default:
19372 llvm_unreachable("Unexpected AtomicRMW BinOp");
19373 case AtomicRMWInst::Xchg:
19374 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
19375 case AtomicRMWInst::Add:
19376 return Intrinsic::riscv_masked_atomicrmw_add_i32;
19377 case AtomicRMWInst::Sub:
19378 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
19379 case AtomicRMWInst::Nand:
19380 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
19381 case AtomicRMWInst::Max:
19382 return Intrinsic::riscv_masked_atomicrmw_max_i32;
19383 case AtomicRMWInst::Min:
19384 return Intrinsic::riscv_masked_atomicrmw_min_i32;
19385 case AtomicRMWInst::UMax:
19386 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
19387 case AtomicRMWInst::UMin:
19388 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
19392 if (XLen == 64) {
19393 switch (BinOp) {
19394 default:
19395 llvm_unreachable("Unexpected AtomicRMW BinOp");
19396 case AtomicRMWInst::Xchg:
19397 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
19398 case AtomicRMWInst::Add:
19399 return Intrinsic::riscv_masked_atomicrmw_add_i64;
19400 case AtomicRMWInst::Sub:
19401 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
19402 case AtomicRMWInst::Nand:
19403 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
19404 case AtomicRMWInst::Max:
19405 return Intrinsic::riscv_masked_atomicrmw_max_i64;
19406 case AtomicRMWInst::Min:
19407 return Intrinsic::riscv_masked_atomicrmw_min_i64;
19408 case AtomicRMWInst::UMax:
19409 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
19410 case AtomicRMWInst::UMin:
19411 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
19415 llvm_unreachable("Unexpected XLen\n");
19418 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
19419 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19420 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19421 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
19422 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
19423 // mask, as this produces better code than the LR/SC loop emitted by
19424 // int_riscv_masked_atomicrmw_xchg.
19425 if (AI->getOperation() == AtomicRMWInst::Xchg &&
19426 isa<ConstantInt>(AI->getValOperand())) {
19427 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
19428 if (CVal->isZero())
19429 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
19430 Builder.CreateNot(Mask, "Inv_Mask"),
19431 AI->getAlign(), Ord);
19432 if (CVal->isMinusOne())
19433 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
19434 AI->getAlign(), Ord);
19437 unsigned XLen = Subtarget.getXLen();
19438 Value *Ordering =
19439 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
19440 Type *Tys[] = {AlignedAddr->getType()};
19441 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
19442 AI->getModule(),
19443 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
19445 if (XLen == 64) {
19446 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
19447 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
19448 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
19451 Value *Result;
19453 // Must pass the shift amount needed to sign extend the loaded value prior
19454 // to performing a signed comparison for min/max. ShiftAmt is the number of
19455 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
19456 // is the number of bits to left+right shift the value in order to
19457 // sign-extend.
19458 if (AI->getOperation() == AtomicRMWInst::Min ||
19459 AI->getOperation() == AtomicRMWInst::Max) {
19460 const DataLayout &DL = AI->getModule()->getDataLayout();
19461 unsigned ValWidth =
19462 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
19463 Value *SextShamt =
19464 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
19465 Result = Builder.CreateCall(LrwOpScwLoop,
19466 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
19467 } else {
19468 Result =
19469 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
19472 if (XLen == 64)
19473 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
19474 return Result;
19477 TargetLowering::AtomicExpansionKind
19478 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
19479 AtomicCmpXchgInst *CI) const {
19480 // Don't expand forced atomics, we want to have __sync libcalls instead.
19481 if (Subtarget.hasForcedAtomics())
19482 return AtomicExpansionKind::None;
19484 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
19485 if (Size == 8 || Size == 16)
19486 return AtomicExpansionKind::MaskedIntrinsic;
19487 return AtomicExpansionKind::None;
19490 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
19491 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19492 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19493 unsigned XLen = Subtarget.getXLen();
19494 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
19495 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
19496 if (XLen == 64) {
19497 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
19498 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
19499 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
19500 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
19502 Type *Tys[] = {AlignedAddr->getType()};
19503 Function *MaskedCmpXchg =
19504 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
19505 Value *Result = Builder.CreateCall(
19506 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
19507 if (XLen == 64)
19508 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
19509 return Result;
19512 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
19513 EVT DataVT) const {
19514 // We have indexed loads for all legal index types. Indices are always
19515 // zero extended
19516 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
19517 isTypeLegal(Extend.getValueType()) &&
19518 isTypeLegal(Extend.getOperand(0).getValueType());
19521 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
19522 EVT VT) const {
19523 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
19524 return false;
19526 switch (FPVT.getSimpleVT().SimpleTy) {
19527 case MVT::f16:
19528 return Subtarget.hasStdExtZfhmin();
19529 case MVT::f32:
19530 return Subtarget.hasStdExtF();
19531 case MVT::f64:
19532 return Subtarget.hasStdExtD();
19533 default:
19534 return false;
19538 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
19539 // If we are using the small code model, we can reduce size of jump table
19540 // entry to 4 bytes.
19541 if (Subtarget.is64Bit() && !isPositionIndependent() &&
19542 getTargetMachine().getCodeModel() == CodeModel::Small) {
19543 return MachineJumpTableInfo::EK_Custom32;
19545 return TargetLowering::getJumpTableEncoding();
19548 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
19549 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
19550 unsigned uid, MCContext &Ctx) const {
19551 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
19552 getTargetMachine().getCodeModel() == CodeModel::Small);
19553 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
19556 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
19557 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
19558 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
19559 // a power of two as well.
19560 // FIXME: This doesn't work for zve32, but that's already broken
19561 // elsewhere for the same reason.
19562 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
19563 static_assert(RISCV::RVVBitsPerBlock == 64,
19564 "RVVBitsPerBlock changed, audit needed");
19565 return true;
19568 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
19569 SDValue &Offset,
19570 ISD::MemIndexedMode &AM,
19571 SelectionDAG &DAG) const {
19572 // Target does not support indexed loads.
19573 if (!Subtarget.hasVendorXTHeadMemIdx())
19574 return false;
19576 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
19577 return false;
19579 Base = Op->getOperand(0);
19580 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
19581 int64_t RHSC = RHS->getSExtValue();
19582 if (Op->getOpcode() == ISD::SUB)
19583 RHSC = -(uint64_t)RHSC;
19585 // The constants that can be encoded in the THeadMemIdx instructions
19586 // are of the form (sign_extend(imm5) << imm2).
19587 bool isLegalIndexedOffset = false;
19588 for (unsigned i = 0; i < 4; i++)
19589 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
19590 isLegalIndexedOffset = true;
19591 break;
19594 if (!isLegalIndexedOffset)
19595 return false;
19597 Offset = Op->getOperand(1);
19598 return true;
19601 return false;
19604 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
19605 SDValue &Offset,
19606 ISD::MemIndexedMode &AM,
19607 SelectionDAG &DAG) const {
19608 EVT VT;
19609 SDValue Ptr;
19610 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19611 VT = LD->getMemoryVT();
19612 Ptr = LD->getBasePtr();
19613 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19614 VT = ST->getMemoryVT();
19615 Ptr = ST->getBasePtr();
19616 } else
19617 return false;
19619 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
19620 return false;
19622 AM = ISD::PRE_INC;
19623 return true;
19626 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
19627 SDValue &Base,
19628 SDValue &Offset,
19629 ISD::MemIndexedMode &AM,
19630 SelectionDAG &DAG) const {
19631 EVT VT;
19632 SDValue Ptr;
19633 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19634 VT = LD->getMemoryVT();
19635 Ptr = LD->getBasePtr();
19636 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19637 VT = ST->getMemoryVT();
19638 Ptr = ST->getBasePtr();
19639 } else
19640 return false;
19642 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
19643 return false;
19644 // Post-indexing updates the base, so it's not a valid transform
19645 // if that's not the same as the load's pointer.
19646 if (Ptr != Base)
19647 return false;
19649 AM = ISD::POST_INC;
19650 return true;
19653 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19654 EVT VT) const {
19655 EVT SVT = VT.getScalarType();
19657 if (!SVT.isSimple())
19658 return false;
19660 switch (SVT.getSimpleVT().SimpleTy) {
19661 case MVT::f16:
19662 return VT.isVector() ? Subtarget.hasVInstructionsF16()
19663 : Subtarget.hasStdExtZfhOrZhinx();
19664 case MVT::f32:
19665 return Subtarget.hasStdExtFOrZfinx();
19666 case MVT::f64:
19667 return Subtarget.hasStdExtDOrZdinx();
19668 default:
19669 break;
19672 return false;
19675 ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
19676 // Zacas will use amocas.w which does not require extension.
19677 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
19680 Register RISCVTargetLowering::getExceptionPointerRegister(
19681 const Constant *PersonalityFn) const {
19682 return RISCV::X10;
19685 Register RISCVTargetLowering::getExceptionSelectorRegister(
19686 const Constant *PersonalityFn) const {
19687 return RISCV::X11;
19690 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
19691 // Return false to suppress the unnecessary extensions if the LibCall
19692 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
19693 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
19694 Type.getSizeInBits() < Subtarget.getXLen()))
19695 return false;
19697 return true;
19700 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
19701 if (Subtarget.is64Bit() && Type == MVT::i32)
19702 return true;
19704 return IsSigned;
19707 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
19708 SDValue C) const {
19709 // Check integral scalar types.
19710 const bool HasExtMOrZmmul =
19711 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
19712 if (!VT.isScalarInteger())
19713 return false;
19715 // Omit the optimization if the sub target has the M extension and the data
19716 // size exceeds XLen.
19717 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
19718 return false;
19720 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19721 // Break the MUL to a SLLI and an ADD/SUB.
19722 const APInt &Imm = ConstNode->getAPIntValue();
19723 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
19724 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
19725 return true;
19727 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
19728 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
19729 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
19730 (Imm - 8).isPowerOf2()))
19731 return true;
19733 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
19734 // a pair of LUI/ADDI.
19735 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
19736 ConstNode->hasOneUse()) {
19737 APInt ImmS = Imm.ashr(Imm.countr_zero());
19738 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
19739 (1 - ImmS).isPowerOf2())
19740 return true;
19744 return false;
19747 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
19748 SDValue ConstNode) const {
19749 // Let the DAGCombiner decide for vectors.
19750 EVT VT = AddNode.getValueType();
19751 if (VT.isVector())
19752 return true;
19754 // Let the DAGCombiner decide for larger types.
19755 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
19756 return true;
19758 // It is worse if c1 is simm12 while c1*c2 is not.
19759 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19760 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
19761 const APInt &C1 = C1Node->getAPIntValue();
19762 const APInt &C2 = C2Node->getAPIntValue();
19763 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
19764 return false;
19766 // Default to true and let the DAGCombiner decide.
19767 return true;
19770 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
19771 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
19772 unsigned *Fast) const {
19773 if (!VT.isVector()) {
19774 if (Fast)
19775 *Fast = Subtarget.hasFastUnalignedAccess();
19776 return Subtarget.hasFastUnalignedAccess();
19779 // All vector implementations must support element alignment
19780 EVT ElemVT = VT.getVectorElementType();
19781 if (Alignment >= ElemVT.getStoreSize()) {
19782 if (Fast)
19783 *Fast = 1;
19784 return true;
19787 // Note: We lower an unmasked unaligned vector access to an equally sized
19788 // e8 element type access. Given this, we effectively support all unmasked
19789 // misaligned accesses. TODO: Work through the codegen implications of
19790 // allowing such accesses to be formed, and considered fast.
19791 if (Fast)
19792 *Fast = Subtarget.hasFastUnalignedAccess();
19793 return Subtarget.hasFastUnalignedAccess();
19797 EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
19798 const AttributeList &FuncAttributes) const {
19799 if (!Subtarget.hasVInstructions())
19800 return MVT::Other;
19802 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
19803 return MVT::Other;
19805 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
19806 // has an expansion threshold, and we want the number of hardware memory
19807 // operations to correspond roughly to that threshold. LMUL>1 operations
19808 // are typically expanded linearly internally, and thus correspond to more
19809 // than one actual memory operation. Note that store merging and load
19810 // combining will typically form larger LMUL operations from the LMUL1
19811 // operations emitted here, and that's okay because combining isn't
19812 // introducing new memory operations; it's just merging existing ones.
19813 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
19814 if (Op.size() < MinVLenInBytes)
19815 // TODO: Figure out short memops. For the moment, do the default thing
19816 // which ends up using scalar sequences.
19817 return MVT::Other;
19819 // Prefer i8 for non-zero memset as it allows us to avoid materializing
19820 // a large scalar constant and instead use vmv.v.x/i to do the
19821 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
19822 // maximize the chance we can encode the size in the vsetvli.
19823 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
19824 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
19826 // Do we have sufficient alignment for our preferred VT? If not, revert
19827 // to largest size allowed by our alignment criteria.
19828 if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) {
19829 Align RequiredAlign(PreferredVT.getStoreSize());
19830 if (Op.isFixedDstAlign())
19831 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
19832 if (Op.isMemcpy())
19833 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
19834 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
19836 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
19839 bool RISCVTargetLowering::splitValueIntoRegisterParts(
19840 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19841 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19842 bool IsABIRegCopy = CC.has_value();
19843 EVT ValueVT = Val.getValueType();
19844 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
19845 PartVT == MVT::f32) {
19846 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
19847 // nan, and cast to f32.
19848 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
19849 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
19850 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
19851 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
19852 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
19853 Parts[0] = Val;
19854 return true;
19857 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
19858 LLVMContext &Context = *DAG.getContext();
19859 EVT ValueEltVT = ValueVT.getVectorElementType();
19860 EVT PartEltVT = PartVT.getVectorElementType();
19861 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
19862 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
19863 if (PartVTBitSize % ValueVTBitSize == 0) {
19864 assert(PartVTBitSize >= ValueVTBitSize);
19865 // If the element types are different, bitcast to the same element type of
19866 // PartVT first.
19867 // Give an example here, we want copy a <vscale x 1 x i8> value to
19868 // <vscale x 4 x i16>.
19869 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
19870 // subvector, then we can bitcast to <vscale x 4 x i16>.
19871 if (ValueEltVT != PartEltVT) {
19872 if (PartVTBitSize > ValueVTBitSize) {
19873 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
19874 assert(Count != 0 && "The number of element should not be zero.");
19875 EVT SameEltTypeVT =
19876 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
19877 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
19878 DAG.getUNDEF(SameEltTypeVT), Val,
19879 DAG.getVectorIdxConstant(0, DL));
19881 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
19882 } else {
19883 Val =
19884 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
19885 Val, DAG.getVectorIdxConstant(0, DL));
19887 Parts[0] = Val;
19888 return true;
19891 return false;
19894 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
19895 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
19896 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
19897 bool IsABIRegCopy = CC.has_value();
19898 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
19899 PartVT == MVT::f32) {
19900 SDValue Val = Parts[0];
19902 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
19903 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
19904 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
19905 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
19906 return Val;
19909 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
19910 LLVMContext &Context = *DAG.getContext();
19911 SDValue Val = Parts[0];
19912 EVT ValueEltVT = ValueVT.getVectorElementType();
19913 EVT PartEltVT = PartVT.getVectorElementType();
19914 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
19915 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
19916 if (PartVTBitSize % ValueVTBitSize == 0) {
19917 assert(PartVTBitSize >= ValueVTBitSize);
19918 EVT SameEltTypeVT = ValueVT;
19919 // If the element types are different, convert it to the same element type
19920 // of PartVT.
19921 // Give an example here, we want copy a <vscale x 1 x i8> value from
19922 // <vscale x 4 x i16>.
19923 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
19924 // then we can extract <vscale x 1 x i8>.
19925 if (ValueEltVT != PartEltVT) {
19926 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
19927 assert(Count != 0 && "The number of element should not be zero.");
19928 SameEltTypeVT =
19929 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
19930 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
19932 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
19933 DAG.getVectorIdxConstant(0, DL));
19934 return Val;
19937 return SDValue();
19940 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
19941 // When aggressively optimizing for code size, we prefer to use a div
19942 // instruction, as it is usually smaller than the alternative sequence.
19943 // TODO: Add vector division?
19944 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
19945 return OptSize && !VT.isVector();
19948 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
19949 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
19950 // some situation.
19951 unsigned Opc = N->getOpcode();
19952 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
19953 return false;
19954 return true;
19957 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
19958 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
19959 Function *ThreadPointerFunc =
19960 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
19961 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
19962 IRB.CreateCall(ThreadPointerFunc), Offset);
19965 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
19966 // Fuchsia provides a fixed TLS slot for the stack cookie.
19967 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
19968 if (Subtarget.isTargetFuchsia())
19969 return useTpOffset(IRB, -0x10);
19971 return TargetLowering::getIRStackGuard(IRB);
19974 bool RISCVTargetLowering::isLegalInterleavedAccessType(
19975 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
19976 const DataLayout &DL) const {
19977 EVT VT = getValueType(DL, VTy);
19978 // Don't lower vlseg/vsseg for vector types that can't be split.
19979 if (!isTypeLegal(VT))
19980 return false;
19982 if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
19983 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
19984 Alignment))
19985 return false;
19987 MVT ContainerVT = VT.getSimpleVT();
19989 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19990 if (!Subtarget.useRVVForFixedLengthVectors())
19991 return false;
19992 // Sometimes the interleaved access pass picks up splats as interleaves of
19993 // one element. Don't lower these.
19994 if (FVTy->getNumElements() < 2)
19995 return false;
19997 ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());
20000 // Need to make sure that EMUL * NFIELDS ≤ 8
20001 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
20002 if (Fractional)
20003 return true;
20004 return Factor * LMUL <= 8;
20007 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
20008 Align Alignment) const {
20009 if (!Subtarget.hasVInstructions())
20010 return false;
20012 // Only support fixed vectors if we know the minimum vector size.
20013 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20014 return false;
20016 EVT ScalarType = DataType.getScalarType();
20017 if (!isLegalElementTypeForRVV(ScalarType))
20018 return false;
20020 if (!Subtarget.hasFastUnalignedAccess() &&
20021 Alignment < ScalarType.getStoreSize())
20022 return false;
20024 return true;
20027 static const Intrinsic::ID FixedVlsegIntrIds[] = {
20028 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
20029 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
20030 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
20031 Intrinsic::riscv_seg8_load};
20033 /// Lower an interleaved load into a vlsegN intrinsic.
20035 /// E.g. Lower an interleaved load (Factor = 2):
20036 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
20037 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20038 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20040 /// Into:
20041 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20042 /// %ptr, i64 4)
20043 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20044 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20045 bool RISCVTargetLowering::lowerInterleavedLoad(
20046 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
20047 ArrayRef<unsigned> Indices, unsigned Factor) const {
20048 IRBuilder<> Builder(LI);
20050 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
20051 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
20052 LI->getPointerAddressSpace(),
20053 LI->getModule()->getDataLayout()))
20054 return false;
20056 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
20058 Function *VlsegNFunc =
20059 Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2],
20060 {VTy, LI->getPointerOperandType(), XLenTy});
20062 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20064 CallInst *VlsegN =
20065 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
20067 for (unsigned i = 0; i < Shuffles.size(); i++) {
20068 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
20069 Shuffles[i]->replaceAllUsesWith(SubVec);
20072 return true;
20075 static const Intrinsic::ID FixedVssegIntrIds[] = {
20076 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
20077 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
20078 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
20079 Intrinsic::riscv_seg8_store};
20081 /// Lower an interleaved store into a vssegN intrinsic.
20083 /// E.g. Lower an interleaved store (Factor = 3):
20084 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20085 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20086 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
20088 /// Into:
20089 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20090 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20091 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
20092 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
20093 /// %ptr, i32 4)
20095 /// Note that the new shufflevectors will be removed and we'll only generate one
20096 /// vsseg3 instruction in CodeGen.
20097 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
20098 ShuffleVectorInst *SVI,
20099 unsigned Factor) const {
20100 IRBuilder<> Builder(SI);
20101 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
20102 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
20103 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
20104 ShuffleVTy->getNumElements() / Factor);
20105 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
20106 SI->getPointerAddressSpace(),
20107 SI->getModule()->getDataLayout()))
20108 return false;
20110 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
20112 Function *VssegNFunc =
20113 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
20114 {VTy, SI->getPointerOperandType(), XLenTy});
20116 auto Mask = SVI->getShuffleMask();
20117 SmallVector<Value *, 10> Ops;
20119 for (unsigned i = 0; i < Factor; i++) {
20120 Value *Shuffle = Builder.CreateShuffleVector(
20121 SVI->getOperand(0), SVI->getOperand(1),
20122 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
20123 Ops.push_back(Shuffle);
20125 // This VL should be OK (should be executable in one vsseg instruction,
20126 // potentially under larger LMULs) because we checked that the fixed vector
20127 // type fits in isLegalInterleavedAccessType
20128 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20129 Ops.append({SI->getPointerOperand(), VL});
20131 Builder.CreateCall(VssegNFunc, Ops);
20133 return true;
20136 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
20137 LoadInst *LI) const {
20138 assert(LI->isSimple());
20139 IRBuilder<> Builder(LI);
20141 // Only deinterleave2 supported at present.
20142 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
20143 return false;
20145 unsigned Factor = 2;
20147 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
20148 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
20150 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
20151 LI->getPointerAddressSpace(),
20152 LI->getModule()->getDataLayout()))
20153 return false;
20155 Function *VlsegNFunc;
20156 Value *VL;
20157 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
20158 SmallVector<Value *, 10> Ops;
20160 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20161 VlsegNFunc = Intrinsic::getDeclaration(
20162 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
20163 {ResVTy, LI->getPointerOperandType(), XLenTy});
20164 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
20165 } else {
20166 static const Intrinsic::ID IntrIds[] = {
20167 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
20168 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
20169 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
20170 Intrinsic::riscv_vlseg8};
20172 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
20173 {ResVTy, XLenTy});
20174 VL = Constant::getAllOnesValue(XLenTy);
20175 Ops.append(Factor, PoisonValue::get(ResVTy));
20178 Ops.append({LI->getPointerOperand(), VL});
20180 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
20181 DI->replaceAllUsesWith(Vlseg);
20183 return true;
20186 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
20187 StoreInst *SI) const {
20188 assert(SI->isSimple());
20189 IRBuilder<> Builder(SI);
20191 // Only interleave2 supported at present.
20192 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
20193 return false;
20195 unsigned Factor = 2;
20197 VectorType *VTy = cast<VectorType>(II->getType());
20198 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
20200 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
20201 SI->getPointerAddressSpace(),
20202 SI->getModule()->getDataLayout()))
20203 return false;
20205 Function *VssegNFunc;
20206 Value *VL;
20207 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
20209 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20210 VssegNFunc = Intrinsic::getDeclaration(
20211 SI->getModule(), FixedVssegIntrIds[Factor - 2],
20212 {InVTy, SI->getPointerOperandType(), XLenTy});
20213 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
20214 } else {
20215 static const Intrinsic::ID IntrIds[] = {
20216 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
20217 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
20218 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
20219 Intrinsic::riscv_vsseg8};
20221 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
20222 {InVTy, XLenTy});
20223 VL = Constant::getAllOnesValue(XLenTy);
20226 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
20227 SI->getPointerOperand(), VL});
20229 return true;
20232 MachineInstr *
20233 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
20234 MachineBasicBlock::instr_iterator &MBBI,
20235 const TargetInstrInfo *TII) const {
20236 assert(MBBI->isCall() && MBBI->getCFIType() &&
20237 "Invalid call instruction for a KCFI check");
20238 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
20239 MBBI->getOpcode()));
20241 MachineOperand &Target = MBBI->getOperand(0);
20242 Target.setIsRenamable(false);
20244 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
20245 .addReg(Target.getReg())
20246 .addImm(MBBI->getCFIType())
20247 .getInstr();
20250 #define GET_REGISTER_MATCHER
20251 #include "RISCVGenAsmMatcher.inc"
20253 Register
20254 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
20255 const MachineFunction &MF) const {
20256 Register Reg = MatchRegisterAltName(RegName);
20257 if (Reg == RISCV::NoRegister)
20258 Reg = MatchRegisterName(RegName);
20259 if (Reg == RISCV::NoRegister)
20260 report_fatal_error(
20261 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
20262 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
20263 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
20264 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
20265 StringRef(RegName) + "\"."));
20266 return Reg;
20269 MachineMemOperand::Flags
20270 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
20271 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
20273 if (NontemporalInfo == nullptr)
20274 return MachineMemOperand::MONone;
20276 // 1 for default value work as __RISCV_NTLH_ALL
20277 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
20278 // 3 -> __RISCV_NTLH_ALL_PRIVATE
20279 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
20280 // 5 -> __RISCV_NTLH_ALL
20281 int NontemporalLevel = 5;
20282 const MDNode *RISCVNontemporalInfo =
20283 I.getMetadata("riscv-nontemporal-domain");
20284 if (RISCVNontemporalInfo != nullptr)
20285 NontemporalLevel =
20286 cast<ConstantInt>(
20287 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
20288 ->getValue())
20289 ->getZExtValue();
20291 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
20292 "RISC-V target doesn't support this non-temporal domain.");
20294 NontemporalLevel -= 2;
20295 MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
20296 if (NontemporalLevel & 0b1)
20297 Flags |= MONontemporalBit0;
20298 if (NontemporalLevel & 0b10)
20299 Flags |= MONontemporalBit1;
20301 return Flags;
20304 MachineMemOperand::Flags
20305 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
20307 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
20308 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
20309 TargetFlags |= (NodeFlags & MONontemporalBit0);
20310 TargetFlags |= (NodeFlags & MONontemporalBit1);
20312 return TargetFlags;
20315 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
20316 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
20317 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
20320 bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
20321 if (VT.isScalableVector())
20322 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
20323 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
20324 return true;
20325 return Subtarget.hasStdExtZbb() &&
20326 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
20329 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
20330 ISD::CondCode Cond) const {
20331 return isCtpopFast(VT) ? 0 : 1;
20334 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
20336 // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and
20337 // G_XOR.
20338 unsigned Op = Inst.getOpcode();
20339 if (Op == Instruction::Add || Op == Instruction::Sub ||
20340 Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor)
20341 return false;
20343 if (Inst.getType()->isScalableTy())
20344 return true;
20346 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
20347 if (Inst.getOperand(i)->getType()->isScalableTy() &&
20348 !isa<ReturnInst>(&Inst))
20349 return true;
20351 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
20352 if (AI->getAllocatedType()->isScalableTy())
20353 return true;
20356 return false;
20359 SDValue
20360 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
20361 SelectionDAG &DAG,
20362 SmallVectorImpl<SDNode *> &Created) const {
20363 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
20364 if (isIntDivCheap(N->getValueType(0), Attr))
20365 return SDValue(N, 0); // Lower SDIV as SDIV
20367 // Only perform this transform if short forward branch opt is supported.
20368 if (!Subtarget.hasShortForwardBranchOpt())
20369 return SDValue();
20370 EVT VT = N->getValueType(0);
20371 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
20372 return SDValue();
20374 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
20375 if (Divisor.sgt(2048) || Divisor.slt(-2048))
20376 return SDValue();
20377 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
20380 bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
20381 EVT VT, const APInt &AndMask) const {
20382 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
20383 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
20384 return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
20387 unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
20388 return Subtarget.getMinimumJumpTableEntries();
20391 namespace llvm::RISCVVIntrinsicsTable {
20393 #define GET_RISCVVIntrinsicsTable_IMPL
20394 #include "RISCVGenSearchableTables.inc"
20396 } // namespace llvm::RISCVVIntrinsicsTable