Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / lib / Target / RISCV / RISCVISelLowering.cpp
blobec5ae3e8f8d541999695cab6e76dc7c788c3cc37
1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/DiagnosticPrinter.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicsRISCV.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/InstructionCost.h"
43 #include "llvm/Support/KnownBits.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include <optional>
48 using namespace llvm;
50 #define DEBUG_TYPE "riscv-lower"
52 STATISTIC(NumTailCalls, "Number of tail calls");
54 static cl::opt<unsigned> ExtensionMaxWebSize(
55 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
58 cl::init(18));
60 static cl::opt<bool>
61 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
64 cl::init(false));
66 static cl::opt<unsigned> NumRepeatedDivisors(
67 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
70 cl::init(2));
72 static cl::opt<int>
73 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
76 cl::init(2));
78 static cl::opt<bool>
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
83 const RISCVSubtarget &STI)
84 : TargetLowering(TM), Subtarget(STI) {
86 if (Subtarget.isRVE())
87 report_fatal_error("Codegen not yet implemented for RVE");
89 RISCVABI::ABI ABI = Subtarget.getTargetABI();
90 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
92 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
93 !Subtarget.hasStdExtF()) {
94 errs() << "Hard-float 'f' ABI can't be used for a target that "
95 "doesn't support the F instruction set extension (ignoring "
96 "target-abi)\n";
97 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
98 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
99 !Subtarget.hasStdExtD()) {
100 errs() << "Hard-float 'd' ABI can't be used for a target that "
101 "doesn't support the D instruction set extension (ignoring "
102 "target-abi)\n";
103 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
106 switch (ABI) {
107 default:
108 report_fatal_error("Don't know how to lower this ABI");
109 case RISCVABI::ABI_ILP32:
110 case RISCVABI::ABI_ILP32F:
111 case RISCVABI::ABI_ILP32D:
112 case RISCVABI::ABI_LP64:
113 case RISCVABI::ABI_LP64F:
114 case RISCVABI::ABI_LP64D:
115 break;
118 MVT XLenVT = Subtarget.getXLenVT();
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
125 if (Subtarget.hasStdExtZfhOrZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxOrZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
174 if (Size <= RISCV::RVVBitsPerBlock)
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
185 addRegisterClass(VT, RC);
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
220 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
224 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
230 // Compute derived properties from the register classes.
231 computeRegisterProperties(STI.getRegisterInfo());
233 setStackPointerRegisterToSaveRestore(RISCV::X2);
235 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
238 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
239 MVT::i1, Promote);
241 // TODO: add all necessary setOperationAction calls.
242 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
244 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
245 setOperationAction(ISD::BR_CC, XLenVT, Expand);
246 if (RV64LegalI32 && Subtarget.is64Bit())
247 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
248 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
249 setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
250 if (RV64LegalI32 && Subtarget.is64Bit())
251 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
253 setCondCodeAction(ISD::SETLE, XLenVT, Expand);
254 setCondCodeAction(ISD::SETGT, XLenVT, Custom);
255 setCondCodeAction(ISD::SETGE, XLenVT, Expand);
256 setCondCodeAction(ISD::SETULE, XLenVT, Expand);
257 setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
258 setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
260 if (RV64LegalI32 && Subtarget.is64Bit())
261 setOperationAction(ISD::SETCC, MVT::i32, Promote);
263 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
265 setOperationAction(ISD::VASTART, MVT::Other, Custom);
266 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
268 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
270 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
272 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
273 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
275 if (Subtarget.is64Bit()) {
276 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
278 if (!RV64LegalI32) {
279 setOperationAction(ISD::LOAD, MVT::i32, Custom);
280 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
281 MVT::i32, Custom);
282 setOperationAction(ISD::SADDO, MVT::i32, Custom);
283 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
284 MVT::i32, Custom);
286 } else {
287 setLibcallName(
288 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
289 nullptr);
290 setLibcallName(RTLIB::MULO_I64, nullptr);
293 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
294 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
295 if (RV64LegalI32 && Subtarget.is64Bit())
296 setOperationAction(ISD::MUL, MVT::i32, Promote);
297 } else if (Subtarget.is64Bit()) {
298 setOperationAction(ISD::MUL, MVT::i128, Custom);
299 if (!RV64LegalI32)
300 setOperationAction(ISD::MUL, MVT::i32, Custom);
301 } else {
302 setOperationAction(ISD::MUL, MVT::i64, Custom);
305 if (!Subtarget.hasStdExtM()) {
306 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
307 XLenVT, Expand);
308 if (RV64LegalI32 && Subtarget.is64Bit())
309 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
310 Promote);
311 } else if (Subtarget.is64Bit()) {
312 if (!RV64LegalI32)
313 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
314 {MVT::i8, MVT::i16, MVT::i32}, Custom);
317 if (RV64LegalI32 && Subtarget.is64Bit()) {
318 setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
319 setOperationAction(
320 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
321 Expand);
324 setOperationAction(
325 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
326 Expand);
328 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
329 Custom);
331 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
332 if (!RV64LegalI32 && Subtarget.is64Bit())
333 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
334 } else if (Subtarget.hasVendorXTHeadBb()) {
335 if (Subtarget.is64Bit())
336 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
337 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
338 } else {
339 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
340 if (RV64LegalI32 && Subtarget.is64Bit())
341 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
344 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
345 // pattern match it directly in isel.
346 setOperationAction(ISD::BSWAP, XLenVT,
347 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
348 Subtarget.hasVendorXTHeadBb())
349 ? Legal
350 : Expand);
351 if (RV64LegalI32 && Subtarget.is64Bit())
352 setOperationAction(ISD::BSWAP, MVT::i32,
353 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
354 Subtarget.hasVendorXTHeadBb())
355 ? Promote
356 : Expand);
358 // Zbkb can use rev8+brev8 to implement bitreverse.
359 setOperationAction(ISD::BITREVERSE, XLenVT,
360 Subtarget.hasStdExtZbkb() ? Custom : Expand);
362 if (Subtarget.hasStdExtZbb()) {
363 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
364 Legal);
365 if (RV64LegalI32 && Subtarget.is64Bit())
366 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
367 Promote);
369 if (Subtarget.is64Bit()) {
370 if (RV64LegalI32)
371 setOperationAction(ISD::CTTZ, MVT::i32, Legal);
372 else
373 setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
375 } else {
376 setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
377 if (RV64LegalI32 && Subtarget.is64Bit())
378 setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
381 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
382 // We need the custom lowering to make sure that the resulting sequence
383 // for the 32bit case is efficient on 64bit targets.
384 if (Subtarget.is64Bit()) {
385 if (RV64LegalI32) {
386 setOperationAction(ISD::CTLZ, MVT::i32,
387 Subtarget.hasStdExtZbb() ? Legal : Promote);
388 if (!Subtarget.hasStdExtZbb())
389 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
390 } else
391 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
393 } else {
394 setOperationAction(ISD::CTLZ, XLenVT, Expand);
395 if (RV64LegalI32 && Subtarget.is64Bit())
396 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
399 if (!RV64LegalI32 && Subtarget.is64Bit())
400 setOperationAction(ISD::ABS, MVT::i32, Custom);
402 if (!Subtarget.hasVendorXTHeadCondMov())
403 setOperationAction(ISD::SELECT, XLenVT, Custom);
405 if (RV64LegalI32 && Subtarget.is64Bit())
406 setOperationAction(ISD::SELECT, MVT::i32, Promote);
408 static const unsigned FPLegalNodeTypes[] = {
409 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
410 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
411 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
412 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
413 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
414 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
416 static const ISD::CondCode FPCCToExpand[] = {
417 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
418 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
419 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
421 static const unsigned FPOpToExpand[] = {
422 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
423 ISD::FREM};
425 static const unsigned FPRndMode[] = {
426 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
427 ISD::FROUNDEVEN};
429 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
430 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
432 static const unsigned ZfhminZfbfminPromoteOps[] = {
433 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
434 ISD::FSUB, ISD::FMUL, ISD::FMA,
435 ISD::FDIV, ISD::FSQRT, ISD::FABS,
436 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
437 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
438 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
439 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
440 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
441 ISD::FROUNDEVEN, ISD::SELECT};
443 if (Subtarget.hasStdExtZfbfmin()) {
444 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
445 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
446 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
447 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
448 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
449 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
450 setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
451 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
452 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
453 setOperationAction(ISD::FREM, MVT::bf16, Promote);
454 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
455 // DAGCombiner::visitFP_ROUND probably needs improvements first.
456 setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
459 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
460 if (Subtarget.hasStdExtZfhOrZhinx()) {
461 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
462 setOperationAction(FPRndMode, MVT::f16,
463 Subtarget.hasStdExtZfa() ? Legal : Custom);
464 setOperationAction(ISD::SELECT, MVT::f16, Custom);
465 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
466 } else {
467 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
468 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
469 ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
470 MVT::f16, Legal);
471 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
472 // DAGCombiner::visitFP_ROUND probably needs improvements first.
473 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
476 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
477 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
478 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
479 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
480 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
482 setOperationAction(ISD::FNEARBYINT, MVT::f16,
483 Subtarget.hasStdExtZfa() ? Legal : Promote);
484 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
485 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
486 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
487 ISD::FLOG10},
488 MVT::f16, Promote);
490 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
491 // complete support for all operations in LegalizeDAG.
492 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
493 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
494 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
495 ISD::STRICT_FTRUNC},
496 MVT::f16, Promote);
498 // We need to custom promote this.
499 if (Subtarget.is64Bit())
500 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
502 if (!Subtarget.hasStdExtZfa())
503 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
506 if (Subtarget.hasStdExtFOrZfinx()) {
507 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
508 setOperationAction(FPRndMode, MVT::f32,
509 Subtarget.hasStdExtZfa() ? Legal : Custom);
510 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
511 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
512 setOperationAction(ISD::SELECT, MVT::f32, Custom);
513 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
514 setOperationAction(FPOpToExpand, MVT::f32, Expand);
515 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
516 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
517 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
518 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
519 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
520 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
521 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
522 Subtarget.isSoftFPABI() ? LibCall : Custom);
523 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
524 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
526 if (Subtarget.hasStdExtZfa())
527 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
528 else
529 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
532 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
533 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
535 if (Subtarget.hasStdExtDOrZdinx()) {
536 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
538 if (Subtarget.hasStdExtZfa()) {
539 setOperationAction(FPRndMode, MVT::f64, Legal);
540 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
541 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
542 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
543 } else {
544 if (Subtarget.is64Bit())
545 setOperationAction(FPRndMode, MVT::f64, Custom);
547 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
550 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
551 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
552 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
553 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
554 setOperationAction(ISD::SELECT, MVT::f64, Custom);
555 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
556 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
557 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
558 setOperationAction(FPOpToExpand, MVT::f64, Expand);
559 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
560 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
561 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
562 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
563 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
564 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
565 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
566 Subtarget.isSoftFPABI() ? LibCall : Custom);
567 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
568 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
571 if (Subtarget.is64Bit()) {
572 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
573 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
574 MVT::i32, Custom);
575 setOperationAction(ISD::LROUND, MVT::i32, Custom);
578 if (Subtarget.hasStdExtFOrZfinx()) {
579 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
580 Custom);
582 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
583 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
584 XLenVT, Legal);
586 if (RV64LegalI32 && Subtarget.is64Bit())
587 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
588 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
589 MVT::i32, Legal);
591 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
592 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
595 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
596 ISD::JumpTable},
597 XLenVT, Custom);
599 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
601 if (Subtarget.is64Bit())
602 setOperationAction(ISD::Constant, MVT::i64, Custom);
604 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
605 // Unfortunately this can't be determined just from the ISA naming string.
606 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
607 Subtarget.is64Bit() ? Legal : Custom);
609 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
610 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
611 if (Subtarget.is64Bit())
612 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
614 if (Subtarget.hasStdExtZicbop()) {
615 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
618 if (Subtarget.hasStdExtA()) {
619 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
620 setMinCmpXchgSizeInBits(32);
621 } else if (Subtarget.hasForcedAtomics()) {
622 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
623 } else {
624 setMaxAtomicSizeInBitsSupported(0);
627 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
629 setBooleanContents(ZeroOrOneBooleanContent);
631 if (Subtarget.hasVInstructions()) {
632 setBooleanVectorContents(ZeroOrOneBooleanContent);
634 setOperationAction(ISD::VSCALE, XLenVT, Custom);
635 if (RV64LegalI32 && Subtarget.is64Bit())
636 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
638 // RVV intrinsics may have illegal operands.
639 // We also need to custom legalize vmv.x.s.
640 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
641 ISD::INTRINSIC_VOID},
642 {MVT::i8, MVT::i16}, Custom);
643 if (Subtarget.is64Bit())
644 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
645 MVT::i32, Custom);
646 else
647 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
648 MVT::i64, Custom);
650 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
651 MVT::Other, Custom);
653 static const unsigned IntegerVPOps[] = {
654 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
655 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
656 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
657 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
658 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
659 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
660 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
661 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
662 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
663 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
664 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
665 ISD::VP_ABS};
667 static const unsigned FloatingPointVPOps[] = {
668 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
669 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
670 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
671 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
672 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
673 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
674 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
675 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
676 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
677 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS};
679 static const unsigned IntegerVecReduceOps[] = {
680 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
681 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
682 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
684 static const unsigned FloatingPointVecReduceOps[] = {
685 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
686 ISD::VECREDUCE_FMAX};
688 if (!Subtarget.is64Bit()) {
689 // We must custom-lower certain vXi64 operations on RV32 due to the vector
690 // element type being illegal.
691 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
692 MVT::i64, Custom);
694 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
696 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
697 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
698 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
699 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
700 MVT::i64, Custom);
703 for (MVT VT : BoolVecVTs) {
704 if (!isTypeLegal(VT))
705 continue;
707 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
709 // Mask VTs are custom-expanded into a series of standard nodes
710 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
711 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
712 ISD::SCALAR_TO_VECTOR},
713 VT, Custom);
715 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
716 Custom);
718 setOperationAction(ISD::SELECT, VT, Custom);
719 setOperationAction(
720 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
721 Expand);
723 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
725 setOperationAction(
726 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
727 Custom);
729 setOperationAction(
730 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
731 Custom);
733 // RVV has native int->float & float->int conversions where the
734 // element type sizes are within one power-of-two of each other. Any
735 // wider distances between type sizes have to be lowered as sequences
736 // which progressively narrow the gap in stages.
737 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
738 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
739 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
740 ISD::STRICT_FP_TO_UINT},
741 VT, Custom);
742 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
743 Custom);
745 // Expand all extending loads to types larger than this, and truncating
746 // stores from types larger than this.
747 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
748 setTruncStoreAction(OtherVT, VT, Expand);
749 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
750 VT, Expand);
753 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
754 ISD::VP_TRUNCATE, ISD::VP_SETCC},
755 VT, Custom);
757 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
758 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
760 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
762 setOperationPromotedToType(
763 ISD::VECTOR_SPLICE, VT,
764 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
767 for (MVT VT : IntVecVTs) {
768 if (!isTypeLegal(VT))
769 continue;
771 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
772 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
774 // Vectors implement MULHS/MULHU.
775 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
777 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
778 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
779 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
781 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
782 Legal);
784 // Custom-lower extensions and truncations from/to mask types.
785 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
786 VT, Custom);
788 // RVV has native int->float & float->int conversions where the
789 // element type sizes are within one power-of-two of each other. Any
790 // wider distances between type sizes have to be lowered as sequences
791 // which progressively narrow the gap in stages.
792 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
793 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
794 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
795 ISD::STRICT_FP_TO_UINT},
796 VT, Custom);
797 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
798 Custom);
799 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
800 setOperationAction(
801 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
803 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
804 // nodes which truncate by one power of two at a time.
805 setOperationAction(ISD::TRUNCATE, VT, Custom);
807 // Custom-lower insert/extract operations to simplify patterns.
808 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
809 Custom);
811 // Custom-lower reduction operations to set up the corresponding custom
812 // nodes' operands.
813 setOperationAction(IntegerVecReduceOps, VT, Custom);
815 setOperationAction(IntegerVPOps, VT, Custom);
817 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
819 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
820 VT, Custom);
822 setOperationAction(
823 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
824 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
825 VT, Custom);
827 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
828 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
829 VT, Custom);
831 setOperationAction(ISD::SELECT, VT, Custom);
832 setOperationAction(ISD::SELECT_CC, VT, Expand);
834 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
836 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
837 setTruncStoreAction(VT, OtherVT, Expand);
838 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
839 VT, Expand);
842 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
843 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
845 // Splice
846 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
848 if (Subtarget.hasStdExtZvkb()) {
849 setOperationAction(ISD::BSWAP, VT, Legal);
850 setOperationAction(ISD::VP_BSWAP, VT, Custom);
851 } else {
852 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
853 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
856 if (Subtarget.hasStdExtZvbb()) {
857 setOperationAction(ISD::BITREVERSE, VT, Legal);
858 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
859 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
860 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
861 VT, Custom);
862 } else {
863 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
864 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
865 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
866 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
867 VT, Expand);
869 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
870 // range of f32.
871 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
872 if (isTypeLegal(FloatVT)) {
873 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
874 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
875 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
876 VT, Custom);
881 // Expand various CCs to best match the RVV ISA, which natively supports UNE
882 // but no other unordered comparisons, and supports all ordered comparisons
883 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
884 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
885 // and we pattern-match those back to the "original", swapping operands once
886 // more. This way we catch both operations and both "vf" and "fv" forms with
887 // fewer patterns.
888 static const ISD::CondCode VFPCCToExpand[] = {
889 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
890 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
891 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
894 // TODO: support more ops.
895 static const unsigned ZvfhminPromoteOps[] = {
896 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
897 ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
898 ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
899 ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
900 ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
901 ISD::FMINIMUM};
903 // TODO: support more vp ops.
904 static const unsigned ZvfhminPromoteVPOps[] = {
905 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
906 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
907 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
908 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
909 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
910 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
911 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
912 ISD::VP_FNEARBYINT, ISD::VP_SETCC};
914 // Sets common operation actions on RVV floating-point vector types.
915 const auto SetCommonVFPActions = [&](MVT VT) {
916 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
917 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
918 // sizes are within one power-of-two of each other. Therefore conversions
919 // between vXf16 and vXf64 must be lowered as sequences which convert via
920 // vXf32.
921 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
922 // Custom-lower insert/extract operations to simplify patterns.
923 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
924 Custom);
925 // Expand various condition codes (explained above).
926 setCondCodeAction(VFPCCToExpand, VT, Expand);
928 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
929 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
931 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
932 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
933 ISD::IS_FPCLASS},
934 VT, Custom);
936 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
938 // Expand FP operations that need libcalls.
939 setOperationAction(ISD::FREM, VT, Expand);
940 setOperationAction(ISD::FPOW, VT, Expand);
941 setOperationAction(ISD::FCOS, VT, Expand);
942 setOperationAction(ISD::FSIN, VT, Expand);
943 setOperationAction(ISD::FSINCOS, VT, Expand);
944 setOperationAction(ISD::FEXP, VT, Expand);
945 setOperationAction(ISD::FEXP2, VT, Expand);
946 setOperationAction(ISD::FEXP10, VT, Expand);
947 setOperationAction(ISD::FLOG, VT, Expand);
948 setOperationAction(ISD::FLOG2, VT, Expand);
949 setOperationAction(ISD::FLOG10, VT, Expand);
951 setOperationAction(ISD::FCOPYSIGN, VT, Legal);
953 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
955 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
956 VT, Custom);
958 setOperationAction(
959 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
960 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
961 VT, Custom);
963 setOperationAction(ISD::SELECT, VT, Custom);
964 setOperationAction(ISD::SELECT_CC, VT, Expand);
966 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
967 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
968 VT, Custom);
970 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
971 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
973 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
975 setOperationAction(FloatingPointVPOps, VT, Custom);
977 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
978 Custom);
979 setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
980 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
981 VT, Legal);
982 setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
983 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
984 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
985 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
986 VT, Custom);
989 // Sets common extload/truncstore actions on RVV floating-point vector
990 // types.
991 const auto SetCommonVFPExtLoadTruncStoreActions =
992 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
993 for (auto SmallVT : SmallerVTs) {
994 setTruncStoreAction(VT, SmallVT, Expand);
995 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
999 if (Subtarget.hasVInstructionsF16()) {
1000 for (MVT VT : F16VecVTs) {
1001 if (!isTypeLegal(VT))
1002 continue;
1003 SetCommonVFPActions(VT);
1005 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1006 for (MVT VT : F16VecVTs) {
1007 if (!isTypeLegal(VT))
1008 continue;
1009 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1010 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1011 Custom);
1012 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1013 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1014 Custom);
1015 setOperationAction(ISD::SELECT_CC, VT, Expand);
1016 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1017 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1018 VT, Custom);
1019 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1020 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1021 VT, Custom);
1022 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1023 // load/store
1024 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1026 // Custom split nxv32f16 since nxv32f32 if not legal.
1027 if (VT == MVT::nxv32f16) {
1028 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1029 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1030 continue;
1032 // Add more promote ops.
1033 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1034 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1035 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1039 if (Subtarget.hasVInstructionsF32()) {
1040 for (MVT VT : F32VecVTs) {
1041 if (!isTypeLegal(VT))
1042 continue;
1043 SetCommonVFPActions(VT);
1044 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1048 if (Subtarget.hasVInstructionsF64()) {
1049 for (MVT VT : F64VecVTs) {
1050 if (!isTypeLegal(VT))
1051 continue;
1052 SetCommonVFPActions(VT);
1053 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1054 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1058 if (Subtarget.useRVVForFixedLengthVectors()) {
1059 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1060 if (!useRVVForFixedLengthVectorVT(VT))
1061 continue;
1063 // By default everything must be expanded.
1064 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1065 setOperationAction(Op, VT, Expand);
1066 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
1067 setTruncStoreAction(VT, OtherVT, Expand);
1068 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
1069 OtherVT, VT, Expand);
1072 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1073 // expansion to a build_vector of 0s.
1074 setOperationAction(ISD::UNDEF, VT, Custom);
1076 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1077 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1078 Custom);
1080 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
1081 Custom);
1083 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1084 VT, Custom);
1086 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1088 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1090 setOperationAction(ISD::SETCC, VT, Custom);
1092 setOperationAction(ISD::SELECT, VT, Custom);
1094 setOperationAction(ISD::TRUNCATE, VT, Custom);
1096 setOperationAction(ISD::BITCAST, VT, Custom);
1098 setOperationAction(
1099 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1100 Custom);
1102 setOperationAction(
1103 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1104 Custom);
1106 setOperationAction(
1108 ISD::SINT_TO_FP,
1109 ISD::UINT_TO_FP,
1110 ISD::FP_TO_SINT,
1111 ISD::FP_TO_UINT,
1112 ISD::STRICT_SINT_TO_FP,
1113 ISD::STRICT_UINT_TO_FP,
1114 ISD::STRICT_FP_TO_SINT,
1115 ISD::STRICT_FP_TO_UINT,
1117 VT, Custom);
1118 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
1119 Custom);
1121 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1123 // Operations below are different for between masks and other vectors.
1124 if (VT.getVectorElementType() == MVT::i1) {
1125 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1126 ISD::OR, ISD::XOR},
1127 VT, Custom);
1129 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1130 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1131 VT, Custom);
1132 continue;
1135 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1136 // it before type legalization for i64 vectors on RV32. It will then be
1137 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1138 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1139 // improvements first.
1140 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1141 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1142 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1145 setOperationAction(
1146 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1148 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1149 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1150 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1151 ISD::VP_SCATTER},
1152 VT, Custom);
1154 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1155 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1156 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1157 VT, Custom);
1159 setOperationAction(
1160 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1162 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1163 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1164 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1166 setOperationAction(
1167 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
1168 Custom);
1170 setOperationAction(ISD::VSELECT, VT, Custom);
1171 setOperationAction(ISD::SELECT_CC, VT, Expand);
1173 setOperationAction(
1174 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1176 // Custom-lower reduction operations to set up the corresponding custom
1177 // nodes' operands.
1178 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1179 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1180 ISD::VECREDUCE_UMIN},
1181 VT, Custom);
1183 setOperationAction(IntegerVPOps, VT, Custom);
1185 if (Subtarget.hasStdExtZvkb())
1186 setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
1188 if (Subtarget.hasStdExtZvbb()) {
1189 setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
1190 ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
1191 VT, Custom);
1192 } else {
1193 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1194 // range of f32.
1195 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1196 if (isTypeLegal(FloatVT))
1197 setOperationAction(
1198 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1199 Custom);
1203 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1204 // There are no extending loads or truncating stores.
1205 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1206 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1207 setTruncStoreAction(VT, InnerVT, Expand);
1210 if (!useRVVForFixedLengthVectorVT(VT))
1211 continue;
1213 // By default everything must be expanded.
1214 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1215 setOperationAction(Op, VT, Expand);
1217 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1218 // expansion to a build_vector of 0s.
1219 setOperationAction(ISD::UNDEF, VT, Custom);
1221 if (VT.getVectorElementType() == MVT::f16 &&
1222 !Subtarget.hasVInstructionsF16()) {
1223 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1224 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1225 Custom);
1226 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1227 setOperationAction(
1228 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1229 Custom);
1230 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1231 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1232 VT, Custom);
1233 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1234 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1235 VT, Custom);
1236 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1237 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1238 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1239 // Don't promote f16 vector operations to f32 if f32 vector type is
1240 // not legal.
1241 // TODO: could split the f16 vector into two vectors and do promotion.
1242 if (!isTypeLegal(F32VecVT))
1243 continue;
1244 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1245 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1246 continue;
1249 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1250 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1251 Custom);
1253 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1254 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
1255 ISD::EXTRACT_VECTOR_ELT},
1256 VT, Custom);
1258 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1259 ISD::MGATHER, ISD::MSCATTER},
1260 VT, Custom);
1262 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1263 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1264 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1265 ISD::VP_SCATTER},
1266 VT, Custom);
1268 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1269 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1270 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1271 ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1272 VT, Custom);
1274 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1276 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1277 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1278 VT, Custom);
1280 setCondCodeAction(VFPCCToExpand, VT, Expand);
1282 setOperationAction(ISD::SETCC, VT, Custom);
1283 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1284 setOperationAction(ISD::SELECT_CC, VT, Expand);
1286 setOperationAction(ISD::BITCAST, VT, Custom);
1288 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1290 setOperationAction(FloatingPointVPOps, VT, Custom);
1292 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1293 Custom);
1294 setOperationAction(
1295 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1296 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1297 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1298 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1299 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1300 VT, Custom);
1303 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1304 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1305 Custom);
1306 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1307 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1308 if (Subtarget.hasStdExtFOrZfinx())
1309 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1310 if (Subtarget.hasStdExtDOrZdinx())
1311 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1315 if (Subtarget.hasStdExtA()) {
1316 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1317 if (RV64LegalI32 && Subtarget.is64Bit())
1318 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1321 if (Subtarget.hasForcedAtomics()) {
1322 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1323 setOperationAction(
1324 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1325 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1326 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1327 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1328 XLenVT, LibCall);
1331 if (Subtarget.hasVendorXTHeadMemIdx()) {
1332 for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1333 ++im) {
1334 setIndexedLoadAction(im, MVT::i8, Legal);
1335 setIndexedStoreAction(im, MVT::i8, Legal);
1336 setIndexedLoadAction(im, MVT::i16, Legal);
1337 setIndexedStoreAction(im, MVT::i16, Legal);
1338 setIndexedLoadAction(im, MVT::i32, Legal);
1339 setIndexedStoreAction(im, MVT::i32, Legal);
1341 if (Subtarget.is64Bit()) {
1342 setIndexedLoadAction(im, MVT::i64, Legal);
1343 setIndexedStoreAction(im, MVT::i64, Legal);
1348 // Function alignments.
1349 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1350 setMinFunctionAlignment(FunctionAlignment);
1351 // Set preferred alignments.
1352 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1353 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1355 setMinimumJumpTableEntries(5);
1357 // Jumps are expensive, compared to logic
1358 setJumpIsExpensive();
1360 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1361 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
1362 ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1363 if (Subtarget.is64Bit())
1364 setTargetDAGCombine(ISD::SRA);
1366 if (Subtarget.hasStdExtFOrZfinx())
1367 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1369 if (Subtarget.hasStdExtZbb())
1370 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1372 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1373 setTargetDAGCombine(ISD::TRUNCATE);
1375 if (Subtarget.hasStdExtZbkb())
1376 setTargetDAGCombine(ISD::BITREVERSE);
1377 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1378 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1379 if (Subtarget.hasStdExtFOrZfinx())
1380 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1381 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1382 if (Subtarget.hasVInstructions())
1383 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1384 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1385 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1386 ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS});
1387 if (Subtarget.hasVendorXTHeadMemPair())
1388 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1389 if (Subtarget.useRVVForFixedLengthVectors())
1390 setTargetDAGCombine(ISD::BITCAST);
1392 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1393 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1395 // Disable strict node mutation.
1396 IsStrictFPEnabled = true;
1399 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1400 LLVMContext &Context,
1401 EVT VT) const {
1402 if (!VT.isVector())
1403 return getPointerTy(DL);
1404 if (Subtarget.hasVInstructions() &&
1405 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1406 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1407 return VT.changeVectorElementTypeToInteger();
1410 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1411 return Subtarget.getXLenVT();
1414 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1415 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1416 unsigned VF,
1417 bool IsScalable) const {
1418 if (!Subtarget.hasVInstructions())
1419 return true;
1421 if (!IsScalable)
1422 return true;
1424 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1425 return true;
1427 // Don't allow VF=1 if those types are't legal.
1428 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1429 return true;
1431 // VLEN=32 support is incomplete.
1432 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1433 return true;
1435 // The maximum VF is for the smallest element width with LMUL=8.
1436 // VF must be a power of 2.
1437 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1438 return VF > MaxVF || !isPowerOf2_32(VF);
1441 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1442 const CallInst &I,
1443 MachineFunction &MF,
1444 unsigned Intrinsic) const {
1445 auto &DL = I.getModule()->getDataLayout();
1447 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1448 bool IsUnitStrided) {
1449 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1450 Info.ptrVal = I.getArgOperand(PtrOp);
1451 Type *MemTy;
1452 if (IsStore) {
1453 // Store value is the first operand.
1454 MemTy = I.getArgOperand(0)->getType();
1455 } else {
1456 // Use return type. If it's segment load, return type is a struct.
1457 MemTy = I.getType();
1458 if (MemTy->isStructTy())
1459 MemTy = MemTy->getStructElementType(0);
1461 if (!IsUnitStrided)
1462 MemTy = MemTy->getScalarType();
1464 Info.memVT = getValueType(DL, MemTy);
1465 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1466 Info.size = MemoryLocation::UnknownSize;
1467 Info.flags |=
1468 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1469 return true;
1472 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1473 Info.flags |= MachineMemOperand::MONonTemporal;
1475 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
1476 switch (Intrinsic) {
1477 default:
1478 return false;
1479 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1480 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1481 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1482 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1483 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1484 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1485 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1486 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1487 case Intrinsic::riscv_masked_cmpxchg_i32:
1488 Info.opc = ISD::INTRINSIC_W_CHAIN;
1489 Info.memVT = MVT::i32;
1490 Info.ptrVal = I.getArgOperand(0);
1491 Info.offset = 0;
1492 Info.align = Align(4);
1493 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1494 MachineMemOperand::MOVolatile;
1495 return true;
1496 case Intrinsic::riscv_masked_strided_load:
1497 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1498 /*IsUnitStrided*/ false);
1499 case Intrinsic::riscv_masked_strided_store:
1500 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1501 /*IsUnitStrided*/ false);
1502 case Intrinsic::riscv_seg2_load:
1503 case Intrinsic::riscv_seg3_load:
1504 case Intrinsic::riscv_seg4_load:
1505 case Intrinsic::riscv_seg5_load:
1506 case Intrinsic::riscv_seg6_load:
1507 case Intrinsic::riscv_seg7_load:
1508 case Intrinsic::riscv_seg8_load:
1509 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1510 /*IsUnitStrided*/ false);
1511 case Intrinsic::riscv_seg2_store:
1512 case Intrinsic::riscv_seg3_store:
1513 case Intrinsic::riscv_seg4_store:
1514 case Intrinsic::riscv_seg5_store:
1515 case Intrinsic::riscv_seg6_store:
1516 case Intrinsic::riscv_seg7_store:
1517 case Intrinsic::riscv_seg8_store:
1518 // Operands are (vec, ..., vec, ptr, vl)
1519 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1520 /*IsStore*/ true,
1521 /*IsUnitStrided*/ false);
1522 case Intrinsic::riscv_vle:
1523 case Intrinsic::riscv_vle_mask:
1524 case Intrinsic::riscv_vleff:
1525 case Intrinsic::riscv_vleff_mask:
1526 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1527 /*IsStore*/ false,
1528 /*IsUnitStrided*/ true);
1529 case Intrinsic::riscv_vse:
1530 case Intrinsic::riscv_vse_mask:
1531 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1532 /*IsStore*/ true,
1533 /*IsUnitStrided*/ true);
1534 case Intrinsic::riscv_vlse:
1535 case Intrinsic::riscv_vlse_mask:
1536 case Intrinsic::riscv_vloxei:
1537 case Intrinsic::riscv_vloxei_mask:
1538 case Intrinsic::riscv_vluxei:
1539 case Intrinsic::riscv_vluxei_mask:
1540 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1541 /*IsStore*/ false,
1542 /*IsUnitStrided*/ false);
1543 case Intrinsic::riscv_vsse:
1544 case Intrinsic::riscv_vsse_mask:
1545 case Intrinsic::riscv_vsoxei:
1546 case Intrinsic::riscv_vsoxei_mask:
1547 case Intrinsic::riscv_vsuxei:
1548 case Intrinsic::riscv_vsuxei_mask:
1549 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1550 /*IsStore*/ true,
1551 /*IsUnitStrided*/ false);
1552 case Intrinsic::riscv_vlseg2:
1553 case Intrinsic::riscv_vlseg3:
1554 case Intrinsic::riscv_vlseg4:
1555 case Intrinsic::riscv_vlseg5:
1556 case Intrinsic::riscv_vlseg6:
1557 case Intrinsic::riscv_vlseg7:
1558 case Intrinsic::riscv_vlseg8:
1559 case Intrinsic::riscv_vlseg2ff:
1560 case Intrinsic::riscv_vlseg3ff:
1561 case Intrinsic::riscv_vlseg4ff:
1562 case Intrinsic::riscv_vlseg5ff:
1563 case Intrinsic::riscv_vlseg6ff:
1564 case Intrinsic::riscv_vlseg7ff:
1565 case Intrinsic::riscv_vlseg8ff:
1566 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1567 /*IsStore*/ false,
1568 /*IsUnitStrided*/ false);
1569 case Intrinsic::riscv_vlseg2_mask:
1570 case Intrinsic::riscv_vlseg3_mask:
1571 case Intrinsic::riscv_vlseg4_mask:
1572 case Intrinsic::riscv_vlseg5_mask:
1573 case Intrinsic::riscv_vlseg6_mask:
1574 case Intrinsic::riscv_vlseg7_mask:
1575 case Intrinsic::riscv_vlseg8_mask:
1576 case Intrinsic::riscv_vlseg2ff_mask:
1577 case Intrinsic::riscv_vlseg3ff_mask:
1578 case Intrinsic::riscv_vlseg4ff_mask:
1579 case Intrinsic::riscv_vlseg5ff_mask:
1580 case Intrinsic::riscv_vlseg6ff_mask:
1581 case Intrinsic::riscv_vlseg7ff_mask:
1582 case Intrinsic::riscv_vlseg8ff_mask:
1583 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1584 /*IsStore*/ false,
1585 /*IsUnitStrided*/ false);
1586 case Intrinsic::riscv_vlsseg2:
1587 case Intrinsic::riscv_vlsseg3:
1588 case Intrinsic::riscv_vlsseg4:
1589 case Intrinsic::riscv_vlsseg5:
1590 case Intrinsic::riscv_vlsseg6:
1591 case Intrinsic::riscv_vlsseg7:
1592 case Intrinsic::riscv_vlsseg8:
1593 case Intrinsic::riscv_vloxseg2:
1594 case Intrinsic::riscv_vloxseg3:
1595 case Intrinsic::riscv_vloxseg4:
1596 case Intrinsic::riscv_vloxseg5:
1597 case Intrinsic::riscv_vloxseg6:
1598 case Intrinsic::riscv_vloxseg7:
1599 case Intrinsic::riscv_vloxseg8:
1600 case Intrinsic::riscv_vluxseg2:
1601 case Intrinsic::riscv_vluxseg3:
1602 case Intrinsic::riscv_vluxseg4:
1603 case Intrinsic::riscv_vluxseg5:
1604 case Intrinsic::riscv_vluxseg6:
1605 case Intrinsic::riscv_vluxseg7:
1606 case Intrinsic::riscv_vluxseg8:
1607 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1608 /*IsStore*/ false,
1609 /*IsUnitStrided*/ false);
1610 case Intrinsic::riscv_vlsseg2_mask:
1611 case Intrinsic::riscv_vlsseg3_mask:
1612 case Intrinsic::riscv_vlsseg4_mask:
1613 case Intrinsic::riscv_vlsseg5_mask:
1614 case Intrinsic::riscv_vlsseg6_mask:
1615 case Intrinsic::riscv_vlsseg7_mask:
1616 case Intrinsic::riscv_vlsseg8_mask:
1617 case Intrinsic::riscv_vloxseg2_mask:
1618 case Intrinsic::riscv_vloxseg3_mask:
1619 case Intrinsic::riscv_vloxseg4_mask:
1620 case Intrinsic::riscv_vloxseg5_mask:
1621 case Intrinsic::riscv_vloxseg6_mask:
1622 case Intrinsic::riscv_vloxseg7_mask:
1623 case Intrinsic::riscv_vloxseg8_mask:
1624 case Intrinsic::riscv_vluxseg2_mask:
1625 case Intrinsic::riscv_vluxseg3_mask:
1626 case Intrinsic::riscv_vluxseg4_mask:
1627 case Intrinsic::riscv_vluxseg5_mask:
1628 case Intrinsic::riscv_vluxseg6_mask:
1629 case Intrinsic::riscv_vluxseg7_mask:
1630 case Intrinsic::riscv_vluxseg8_mask:
1631 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1632 /*IsStore*/ false,
1633 /*IsUnitStrided*/ false);
1634 case Intrinsic::riscv_vsseg2:
1635 case Intrinsic::riscv_vsseg3:
1636 case Intrinsic::riscv_vsseg4:
1637 case Intrinsic::riscv_vsseg5:
1638 case Intrinsic::riscv_vsseg6:
1639 case Intrinsic::riscv_vsseg7:
1640 case Intrinsic::riscv_vsseg8:
1641 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1642 /*IsStore*/ true,
1643 /*IsUnitStrided*/ false);
1644 case Intrinsic::riscv_vsseg2_mask:
1645 case Intrinsic::riscv_vsseg3_mask:
1646 case Intrinsic::riscv_vsseg4_mask:
1647 case Intrinsic::riscv_vsseg5_mask:
1648 case Intrinsic::riscv_vsseg6_mask:
1649 case Intrinsic::riscv_vsseg7_mask:
1650 case Intrinsic::riscv_vsseg8_mask:
1651 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1652 /*IsStore*/ true,
1653 /*IsUnitStrided*/ false);
1654 case Intrinsic::riscv_vssseg2:
1655 case Intrinsic::riscv_vssseg3:
1656 case Intrinsic::riscv_vssseg4:
1657 case Intrinsic::riscv_vssseg5:
1658 case Intrinsic::riscv_vssseg6:
1659 case Intrinsic::riscv_vssseg7:
1660 case Intrinsic::riscv_vssseg8:
1661 case Intrinsic::riscv_vsoxseg2:
1662 case Intrinsic::riscv_vsoxseg3:
1663 case Intrinsic::riscv_vsoxseg4:
1664 case Intrinsic::riscv_vsoxseg5:
1665 case Intrinsic::riscv_vsoxseg6:
1666 case Intrinsic::riscv_vsoxseg7:
1667 case Intrinsic::riscv_vsoxseg8:
1668 case Intrinsic::riscv_vsuxseg2:
1669 case Intrinsic::riscv_vsuxseg3:
1670 case Intrinsic::riscv_vsuxseg4:
1671 case Intrinsic::riscv_vsuxseg5:
1672 case Intrinsic::riscv_vsuxseg6:
1673 case Intrinsic::riscv_vsuxseg7:
1674 case Intrinsic::riscv_vsuxseg8:
1675 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1676 /*IsStore*/ true,
1677 /*IsUnitStrided*/ false);
1678 case Intrinsic::riscv_vssseg2_mask:
1679 case Intrinsic::riscv_vssseg3_mask:
1680 case Intrinsic::riscv_vssseg4_mask:
1681 case Intrinsic::riscv_vssseg5_mask:
1682 case Intrinsic::riscv_vssseg6_mask:
1683 case Intrinsic::riscv_vssseg7_mask:
1684 case Intrinsic::riscv_vssseg8_mask:
1685 case Intrinsic::riscv_vsoxseg2_mask:
1686 case Intrinsic::riscv_vsoxseg3_mask:
1687 case Intrinsic::riscv_vsoxseg4_mask:
1688 case Intrinsic::riscv_vsoxseg5_mask:
1689 case Intrinsic::riscv_vsoxseg6_mask:
1690 case Intrinsic::riscv_vsoxseg7_mask:
1691 case Intrinsic::riscv_vsoxseg8_mask:
1692 case Intrinsic::riscv_vsuxseg2_mask:
1693 case Intrinsic::riscv_vsuxseg3_mask:
1694 case Intrinsic::riscv_vsuxseg4_mask:
1695 case Intrinsic::riscv_vsuxseg5_mask:
1696 case Intrinsic::riscv_vsuxseg6_mask:
1697 case Intrinsic::riscv_vsuxseg7_mask:
1698 case Intrinsic::riscv_vsuxseg8_mask:
1699 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1700 /*IsStore*/ true,
1701 /*IsUnitStrided*/ false);
1705 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1706 const AddrMode &AM, Type *Ty,
1707 unsigned AS,
1708 Instruction *I) const {
1709 // No global is ever allowed as a base.
1710 if (AM.BaseGV)
1711 return false;
1713 // RVV instructions only support register addressing.
1714 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1715 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1717 // Require a 12-bit signed offset.
1718 if (!isInt<12>(AM.BaseOffs))
1719 return false;
1721 switch (AM.Scale) {
1722 case 0: // "r+i" or just "i", depending on HasBaseReg.
1723 break;
1724 case 1:
1725 if (!AM.HasBaseReg) // allow "r+i".
1726 break;
1727 return false; // disallow "r+r" or "r+r+i".
1728 default:
1729 return false;
1732 return true;
1735 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1736 return isInt<12>(Imm);
1739 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1740 return isInt<12>(Imm);
1743 // On RV32, 64-bit integers are split into their high and low parts and held
1744 // in two different registers, so the trunc is free since the low register can
1745 // just be used.
1746 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1747 // isTruncateFree?
1748 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1749 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1750 return false;
1751 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1752 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1753 return (SrcBits == 64 && DestBits == 32);
1756 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1757 // We consider i64->i32 free on RV64 since we have good selection of W
1758 // instructions that make promoting operations back to i64 free in many cases.
1759 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1760 !DstVT.isInteger())
1761 return false;
1762 unsigned SrcBits = SrcVT.getSizeInBits();
1763 unsigned DestBits = DstVT.getSizeInBits();
1764 return (SrcBits == 64 && DestBits == 32);
1767 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1768 // Zexts are free if they can be combined with a load.
1769 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1770 // poorly with type legalization of compares preferring sext.
1771 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1772 EVT MemVT = LD->getMemoryVT();
1773 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1774 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1775 LD->getExtensionType() == ISD::ZEXTLOAD))
1776 return true;
1779 return TargetLowering::isZExtFree(Val, VT2);
1782 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1783 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1786 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1787 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1790 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1791 return Subtarget.hasStdExtZbb();
1794 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1795 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1798 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1799 const Instruction &AndI) const {
1800 // We expect to be able to match a bit extraction instruction if the Zbs
1801 // extension is supported and the mask is a power of two. However, we
1802 // conservatively return false if the mask would fit in an ANDI instruction,
1803 // on the basis that it's possible the sinking+duplication of the AND in
1804 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1805 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1806 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1807 return false;
1808 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1809 if (!Mask)
1810 return false;
1811 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1814 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1815 EVT VT = Y.getValueType();
1817 // FIXME: Support vectors once we have tests.
1818 if (VT.isVector())
1819 return false;
1821 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1822 !isa<ConstantSDNode>(Y);
1825 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1826 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1827 if (Subtarget.hasStdExtZbs())
1828 return X.getValueType().isScalarInteger();
1829 auto *C = dyn_cast<ConstantSDNode>(Y);
1830 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1831 if (Subtarget.hasVendorXTHeadBs())
1832 return C != nullptr;
1833 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1834 return C && C->getAPIntValue().ule(10);
1837 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
1838 EVT VT) const {
1839 // Only enable for rvv.
1840 if (!VT.isVector() || !Subtarget.hasVInstructions())
1841 return false;
1843 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1844 return false;
1846 return true;
1849 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1850 Type *Ty) const {
1851 assert(Ty->isIntegerTy());
1853 unsigned BitSize = Ty->getIntegerBitWidth();
1854 if (BitSize > Subtarget.getXLen())
1855 return false;
1857 // Fast path, assume 32-bit immediates are cheap.
1858 int64_t Val = Imm.getSExtValue();
1859 if (isInt<32>(Val))
1860 return true;
1862 // A constant pool entry may be more aligned thant he load we're trying to
1863 // replace. If we don't support unaligned scalar mem, prefer the constant
1864 // pool.
1865 // TODO: Can the caller pass down the alignment?
1866 if (!Subtarget.enableUnalignedScalarMem())
1867 return true;
1869 // Prefer to keep the load if it would require many instructions.
1870 // This uses the same threshold we use for constant pools but doesn't
1871 // check useConstantPoolForLargeInts.
1872 // TODO: Should we keep the load only when we're definitely going to emit a
1873 // constant pool?
1875 RISCVMatInt::InstSeq Seq =
1876 RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1877 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1880 bool RISCVTargetLowering::
1881 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1882 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1883 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1884 SelectionDAG &DAG) const {
1885 // One interesting pattern that we'd want to form is 'bit extract':
1886 // ((1 >> Y) & 1) ==/!= 0
1887 // But we also need to be careful not to try to reverse that fold.
1889 // Is this '((1 >> Y) & 1)'?
1890 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1891 return false; // Keep the 'bit extract' pattern.
1893 // Will this be '((1 >> Y) & 1)' after the transform?
1894 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1895 return true; // Do form the 'bit extract' pattern.
1897 // If 'X' is a constant, and we transform, then we will immediately
1898 // try to undo the fold, thus causing endless combine loop.
1899 // So only do the transform if X is not a constant. This matches the default
1900 // implementation of this function.
1901 return !XC;
1904 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1905 switch (Opcode) {
1906 case Instruction::Add:
1907 case Instruction::Sub:
1908 case Instruction::Mul:
1909 case Instruction::And:
1910 case Instruction::Or:
1911 case Instruction::Xor:
1912 case Instruction::FAdd:
1913 case Instruction::FSub:
1914 case Instruction::FMul:
1915 case Instruction::FDiv:
1916 case Instruction::ICmp:
1917 case Instruction::FCmp:
1918 return true;
1919 case Instruction::Shl:
1920 case Instruction::LShr:
1921 case Instruction::AShr:
1922 case Instruction::UDiv:
1923 case Instruction::SDiv:
1924 case Instruction::URem:
1925 case Instruction::SRem:
1926 return Operand == 1;
1927 default:
1928 return false;
1933 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1934 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1935 return false;
1937 if (canSplatOperand(I->getOpcode(), Operand))
1938 return true;
1940 auto *II = dyn_cast<IntrinsicInst>(I);
1941 if (!II)
1942 return false;
1944 switch (II->getIntrinsicID()) {
1945 case Intrinsic::fma:
1946 case Intrinsic::vp_fma:
1947 return Operand == 0 || Operand == 1;
1948 case Intrinsic::vp_shl:
1949 case Intrinsic::vp_lshr:
1950 case Intrinsic::vp_ashr:
1951 case Intrinsic::vp_udiv:
1952 case Intrinsic::vp_sdiv:
1953 case Intrinsic::vp_urem:
1954 case Intrinsic::vp_srem:
1955 return Operand == 1;
1956 // These intrinsics are commutative.
1957 case Intrinsic::vp_add:
1958 case Intrinsic::vp_mul:
1959 case Intrinsic::vp_and:
1960 case Intrinsic::vp_or:
1961 case Intrinsic::vp_xor:
1962 case Intrinsic::vp_fadd:
1963 case Intrinsic::vp_fmul:
1964 case Intrinsic::vp_icmp:
1965 case Intrinsic::vp_fcmp:
1966 // These intrinsics have 'vr' versions.
1967 case Intrinsic::vp_sub:
1968 case Intrinsic::vp_fsub:
1969 case Intrinsic::vp_fdiv:
1970 return Operand == 0 || Operand == 1;
1971 default:
1972 return false;
1976 /// Check if sinking \p I's operands to I's basic block is profitable, because
1977 /// the operands can be folded into a target instruction, e.g.
1978 /// splats of scalars can fold into vector instructions.
1979 bool RISCVTargetLowering::shouldSinkOperands(
1980 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1981 using namespace llvm::PatternMatch;
1983 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1984 return false;
1986 for (auto OpIdx : enumerate(I->operands())) {
1987 if (!canSplatOperand(I, OpIdx.index()))
1988 continue;
1990 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1991 // Make sure we are not already sinking this operand
1992 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1993 continue;
1995 // We are looking for a splat that can be sunk.
1996 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1997 m_Undef(), m_ZeroMask())))
1998 continue;
2000 // Don't sink i1 splats.
2001 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2002 continue;
2004 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2005 // and vector registers
2006 for (Use &U : Op->uses()) {
2007 Instruction *Insn = cast<Instruction>(U.getUser());
2008 if (!canSplatOperand(Insn, U.getOperandNo()))
2009 return false;
2012 Ops.push_back(&Op->getOperandUse(0));
2013 Ops.push_back(&OpIdx.value());
2015 return true;
2018 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
2019 unsigned Opc = VecOp.getOpcode();
2021 // Assume target opcodes can't be scalarized.
2022 // TODO - do we have any exceptions?
2023 if (Opc >= ISD::BUILTIN_OP_END)
2024 return false;
2026 // If the vector op is not supported, try to convert to scalar.
2027 EVT VecVT = VecOp.getValueType();
2028 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2029 return true;
2031 // If the vector op is supported, but the scalar op is not, the transform may
2032 // not be worthwhile.
2033 // Permit a vector binary operation can be converted to scalar binary
2034 // operation which is custom lowered with illegal type.
2035 EVT ScalarVT = VecVT.getScalarType();
2036 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2037 isOperationCustom(Opc, ScalarVT);
2040 bool RISCVTargetLowering::isOffsetFoldingLegal(
2041 const GlobalAddressSDNode *GA) const {
2042 // In order to maximise the opportunity for common subexpression elimination,
2043 // keep a separate ADD node for the global address offset instead of folding
2044 // it in the global address node. Later peephole optimisations may choose to
2045 // fold it back in when profitable.
2046 return false;
2049 // Return one of the followings:
2050 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2051 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2052 // positive counterpart, which will be materialized from the first returned
2053 // element. The second returned element indicated that there should be a FNEG
2054 // followed.
2055 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2056 std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2057 EVT VT) const {
2058 if (!Subtarget.hasStdExtZfa())
2059 return std::make_pair(-1, false);
2061 bool IsSupportedVT = false;
2062 if (VT == MVT::f16) {
2063 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2064 } else if (VT == MVT::f32) {
2065 IsSupportedVT = true;
2066 } else if (VT == MVT::f64) {
2067 assert(Subtarget.hasStdExtD() && "Expect D extension");
2068 IsSupportedVT = true;
2071 if (!IsSupportedVT)
2072 return std::make_pair(-1, false);
2074 int Index = RISCVLoadFPImm::getLoadFPImm(Imm);
2075 if (Index < 0 && Imm.isNegative())
2076 // Try the combination of its positive counterpart + FNEG.
2077 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2078 else
2079 return std::make_pair(Index, false);
2082 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2083 bool ForCodeSize) const {
2084 bool IsLegalVT = false;
2085 if (VT == MVT::f16)
2086 IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
2087 else if (VT == MVT::f32)
2088 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2089 else if (VT == MVT::f64)
2090 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2091 else if (VT == MVT::bf16)
2092 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2094 if (!IsLegalVT)
2095 return false;
2097 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2098 return true;
2100 // Cannot create a 64 bit floating-point immediate value for rv32.
2101 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2102 // td can handle +0.0 or -0.0 already.
2103 // -0.0 can be created by fmv + fneg.
2104 return Imm.isZero();
2107 // Special case: fmv + fneg
2108 if (Imm.isNegZero())
2109 return true;
2111 // Building an integer and then converting requires a fmv at the end of
2112 // the integer sequence.
2113 const int Cost =
2114 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2115 Subtarget.getFeatureBits());
2116 return Cost <= FPImmCost;
2119 // TODO: This is very conservative.
2120 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2121 unsigned Index) const {
2122 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
2123 return false;
2125 // Only support extracting a fixed from a fixed vector for now.
2126 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2127 return false;
2129 unsigned ResElts = ResVT.getVectorNumElements();
2130 unsigned SrcElts = SrcVT.getVectorNumElements();
2132 // Convervatively only handle extracting half of a vector.
2133 // TODO: Relax this.
2134 if ((ResElts * 2) != SrcElts)
2135 return false;
2137 // The smallest type we can slide is i8.
2138 // TODO: We can extract index 0 from a mask vector without a slide.
2139 if (ResVT.getVectorElementType() == MVT::i1)
2140 return false;
2142 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2143 // cheap.
2144 if (Index >= 32)
2145 return false;
2147 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2148 // the upper half of a vector until we have more test coverage.
2149 return Index == 0 || Index == ResElts;
2152 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2153 CallingConv::ID CC,
2154 EVT VT) const {
2155 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2156 // We might still end up using a GPR but that will be decided based on ABI.
2157 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2158 !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
2159 return MVT::f32;
2161 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2163 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2164 return MVT::i64;
2166 return PartVT;
2169 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2170 CallingConv::ID CC,
2171 EVT VT) const {
2172 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2173 // We might still end up using a GPR but that will be decided based on ABI.
2174 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2175 !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
2176 return 1;
2178 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2181 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2182 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2183 unsigned &NumIntermediates, MVT &RegisterVT) const {
2184 unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
2185 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2187 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2188 IntermediateVT = MVT::i64;
2190 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2191 RegisterVT = MVT::i64;
2193 return NumRegs;
2196 // Changes the condition code and swaps operands if necessary, so the SetCC
2197 // operation matches one of the comparisons supported directly by branches
2198 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2199 // with 1/-1.
2200 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2201 ISD::CondCode &CC, SelectionDAG &DAG) {
2202 // If this is a single bit test that can't be handled by ANDI, shift the
2203 // bit to be tested to the MSB and perform a signed compare with 0.
2204 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2205 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2206 isa<ConstantSDNode>(LHS.getOperand(1))) {
2207 uint64_t Mask = LHS.getConstantOperandVal(1);
2208 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2209 unsigned ShAmt = 0;
2210 if (isPowerOf2_64(Mask)) {
2211 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2212 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2213 } else {
2214 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2217 LHS = LHS.getOperand(0);
2218 if (ShAmt != 0)
2219 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2220 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2221 return;
2225 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2226 int64_t C = RHSC->getSExtValue();
2227 switch (CC) {
2228 default: break;
2229 case ISD::SETGT:
2230 // Convert X > -1 to X >= 0.
2231 if (C == -1) {
2232 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2233 CC = ISD::SETGE;
2234 return;
2236 break;
2237 case ISD::SETLT:
2238 // Convert X < 1 to 0 >= X.
2239 if (C == 1) {
2240 RHS = LHS;
2241 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2242 CC = ISD::SETGE;
2243 return;
2245 break;
2249 switch (CC) {
2250 default:
2251 break;
2252 case ISD::SETGT:
2253 case ISD::SETLE:
2254 case ISD::SETUGT:
2255 case ISD::SETULE:
2256 CC = ISD::getSetCCSwappedOperands(CC);
2257 std::swap(LHS, RHS);
2258 break;
2262 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2263 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2264 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2265 if (VT.getVectorElementType() == MVT::i1)
2266 KnownSize *= 8;
2268 switch (KnownSize) {
2269 default:
2270 llvm_unreachable("Invalid LMUL.");
2271 case 8:
2272 return RISCVII::VLMUL::LMUL_F8;
2273 case 16:
2274 return RISCVII::VLMUL::LMUL_F4;
2275 case 32:
2276 return RISCVII::VLMUL::LMUL_F2;
2277 case 64:
2278 return RISCVII::VLMUL::LMUL_1;
2279 case 128:
2280 return RISCVII::VLMUL::LMUL_2;
2281 case 256:
2282 return RISCVII::VLMUL::LMUL_4;
2283 case 512:
2284 return RISCVII::VLMUL::LMUL_8;
2288 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2289 switch (LMul) {
2290 default:
2291 llvm_unreachable("Invalid LMUL.");
2292 case RISCVII::VLMUL::LMUL_F8:
2293 case RISCVII::VLMUL::LMUL_F4:
2294 case RISCVII::VLMUL::LMUL_F2:
2295 case RISCVII::VLMUL::LMUL_1:
2296 return RISCV::VRRegClassID;
2297 case RISCVII::VLMUL::LMUL_2:
2298 return RISCV::VRM2RegClassID;
2299 case RISCVII::VLMUL::LMUL_4:
2300 return RISCV::VRM4RegClassID;
2301 case RISCVII::VLMUL::LMUL_8:
2302 return RISCV::VRM8RegClassID;
2306 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2307 RISCVII::VLMUL LMUL = getLMUL(VT);
2308 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2309 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2310 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2311 LMUL == RISCVII::VLMUL::LMUL_1) {
2312 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2313 "Unexpected subreg numbering");
2314 return RISCV::sub_vrm1_0 + Index;
2316 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2317 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2318 "Unexpected subreg numbering");
2319 return RISCV::sub_vrm2_0 + Index;
2321 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2322 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2323 "Unexpected subreg numbering");
2324 return RISCV::sub_vrm4_0 + Index;
2326 llvm_unreachable("Invalid vector type.");
2329 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2330 if (VT.getVectorElementType() == MVT::i1)
2331 return RISCV::VRRegClassID;
2332 return getRegClassIDForLMUL(getLMUL(VT));
2335 // Attempt to decompose a subvector insert/extract between VecVT and
2336 // SubVecVT via subregister indices. Returns the subregister index that
2337 // can perform the subvector insert/extract with the given element index, as
2338 // well as the index corresponding to any leftover subvectors that must be
2339 // further inserted/extracted within the register class for SubVecVT.
2340 std::pair<unsigned, unsigned>
2341 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2342 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2343 const RISCVRegisterInfo *TRI) {
2344 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2345 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2346 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2347 "Register classes not ordered");
2348 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2349 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2350 // Try to compose a subregister index that takes us from the incoming
2351 // LMUL>1 register class down to the outgoing one. At each step we half
2352 // the LMUL:
2353 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2354 // Note that this is not guaranteed to find a subregister index, such as
2355 // when we are extracting from one VR type to another.
2356 unsigned SubRegIdx = RISCV::NoSubRegister;
2357 for (const unsigned RCID :
2358 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2359 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2360 VecVT = VecVT.getHalfNumVectorElementsVT();
2361 bool IsHi =
2362 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2363 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2364 getSubregIndexByMVT(VecVT, IsHi));
2365 if (IsHi)
2366 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2368 return {SubRegIdx, InsertExtractIdx};
2371 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2372 // stores for those types.
2373 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2374 return !Subtarget.useRVVForFixedLengthVectors() ||
2375 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2378 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2379 if (!ScalarTy.isSimple())
2380 return false;
2381 switch (ScalarTy.getSimpleVT().SimpleTy) {
2382 case MVT::iPTR:
2383 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2384 case MVT::i8:
2385 case MVT::i16:
2386 case MVT::i32:
2387 return true;
2388 case MVT::i64:
2389 return Subtarget.hasVInstructionsI64();
2390 case MVT::f16:
2391 return Subtarget.hasVInstructionsF16();
2392 case MVT::f32:
2393 return Subtarget.hasVInstructionsF32();
2394 case MVT::f64:
2395 return Subtarget.hasVInstructionsF64();
2396 default:
2397 return false;
2402 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2403 return NumRepeatedDivisors;
2406 static SDValue getVLOperand(SDValue Op) {
2407 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2408 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2409 "Unexpected opcode");
2410 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2411 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2412 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2413 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2414 if (!II)
2415 return SDValue();
2416 return Op.getOperand(II->VLOperand + 1 + HasChain);
2419 static bool useRVVForFixedLengthVectorVT(MVT VT,
2420 const RISCVSubtarget &Subtarget) {
2421 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2422 if (!Subtarget.useRVVForFixedLengthVectors())
2423 return false;
2425 // We only support a set of vector types with a consistent maximum fixed size
2426 // across all supported vector element types to avoid legalization issues.
2427 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2428 // fixed-length vector type we support is 1024 bytes.
2429 if (VT.getFixedSizeInBits() > 1024 * 8)
2430 return false;
2432 unsigned MinVLen = Subtarget.getRealMinVLen();
2434 MVT EltVT = VT.getVectorElementType();
2436 // Don't use RVV for vectors we cannot scalarize if required.
2437 switch (EltVT.SimpleTy) {
2438 // i1 is supported but has different rules.
2439 default:
2440 return false;
2441 case MVT::i1:
2442 // Masks can only use a single register.
2443 if (VT.getVectorNumElements() > MinVLen)
2444 return false;
2445 MinVLen /= 8;
2446 break;
2447 case MVT::i8:
2448 case MVT::i16:
2449 case MVT::i32:
2450 break;
2451 case MVT::i64:
2452 if (!Subtarget.hasVInstructionsI64())
2453 return false;
2454 break;
2455 case MVT::f16:
2456 if (!Subtarget.hasVInstructionsF16Minimal())
2457 return false;
2458 break;
2459 case MVT::f32:
2460 if (!Subtarget.hasVInstructionsF32())
2461 return false;
2462 break;
2463 case MVT::f64:
2464 if (!Subtarget.hasVInstructionsF64())
2465 return false;
2466 break;
2469 // Reject elements larger than ELEN.
2470 if (EltVT.getSizeInBits() > Subtarget.getELen())
2471 return false;
2473 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2474 // Don't use RVV for types that don't fit.
2475 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2476 return false;
2478 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2479 // the base fixed length RVV support in place.
2480 if (!VT.isPow2VectorType())
2481 return false;
2483 return true;
2486 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2487 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2490 // Return the largest legal scalable vector type that matches VT's element type.
2491 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2492 const RISCVSubtarget &Subtarget) {
2493 // This may be called before legal types are setup.
2494 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2495 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2496 "Expected legal fixed length vector!");
2498 unsigned MinVLen = Subtarget.getRealMinVLen();
2499 unsigned MaxELen = Subtarget.getELen();
2501 MVT EltVT = VT.getVectorElementType();
2502 switch (EltVT.SimpleTy) {
2503 default:
2504 llvm_unreachable("unexpected element type for RVV container");
2505 case MVT::i1:
2506 case MVT::i8:
2507 case MVT::i16:
2508 case MVT::i32:
2509 case MVT::i64:
2510 case MVT::f16:
2511 case MVT::f32:
2512 case MVT::f64: {
2513 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2514 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2515 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2516 unsigned NumElts =
2517 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2518 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2519 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2520 return MVT::getScalableVectorVT(EltVT, NumElts);
2525 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2526 const RISCVSubtarget &Subtarget) {
2527 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
2528 Subtarget);
2531 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2532 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2535 // Grow V to consume an entire RVV register.
2536 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2537 const RISCVSubtarget &Subtarget) {
2538 assert(VT.isScalableVector() &&
2539 "Expected to convert into a scalable vector!");
2540 assert(V.getValueType().isFixedLengthVector() &&
2541 "Expected a fixed length vector operand!");
2542 SDLoc DL(V);
2543 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2544 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2547 // Shrink V so it's just big enough to maintain a VT's worth of data.
2548 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2549 const RISCVSubtarget &Subtarget) {
2550 assert(VT.isFixedLengthVector() &&
2551 "Expected to convert into a fixed length vector!");
2552 assert(V.getValueType().isScalableVector() &&
2553 "Expected a scalable vector operand!");
2554 SDLoc DL(V);
2555 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2556 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2559 /// Return the type of the mask type suitable for masking the provided
2560 /// vector type. This is simply an i1 element type vector of the same
2561 /// (possibly scalable) length.
2562 static MVT getMaskTypeFor(MVT VecVT) {
2563 assert(VecVT.isVector());
2564 ElementCount EC = VecVT.getVectorElementCount();
2565 return MVT::getVectorVT(MVT::i1, EC);
2568 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2569 /// vector length VL. .
2570 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2571 SelectionDAG &DAG) {
2572 MVT MaskVT = getMaskTypeFor(VecVT);
2573 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2576 static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
2577 const RISCVSubtarget &Subtarget) {
2578 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2581 static std::pair<SDValue, SDValue>
2582 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2583 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2584 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2585 SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
2586 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2587 return {Mask, VL};
2590 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2591 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2592 // the vector type that the fixed-length vector is contained in. Otherwise if
2593 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2594 static std::pair<SDValue, SDValue>
2595 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2596 const RISCVSubtarget &Subtarget) {
2597 if (VecVT.isFixedLengthVector())
2598 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2599 Subtarget);
2600 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2601 MVT XLenVT = Subtarget.getXLenVT();
2602 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
2603 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2604 return {Mask, VL};
2607 // As above but assuming the given type is a scalable vector type.
2608 static std::pair<SDValue, SDValue>
2609 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2610 const RISCVSubtarget &Subtarget) {
2611 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2612 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
2615 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2616 SelectionDAG &DAG) const {
2617 assert(VecVT.isScalableVector() && "Expected scalable vector");
2618 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2619 VecVT.getVectorElementCount());
2622 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2623 // of either is (currently) supported. This can get us into an infinite loop
2624 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2625 // as a ..., etc.
2626 // Until either (or both) of these can reliably lower any node, reporting that
2627 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2628 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2629 // which is not desirable.
2630 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2631 EVT VT, unsigned DefinedValues) const {
2632 return false;
2635 InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2636 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2637 // implementation-defined.
2638 if (!VT.isVector())
2639 return InstructionCost::getInvalid();
2640 unsigned DLenFactor = Subtarget.getDLenFactor();
2641 unsigned Cost;
2642 if (VT.isScalableVector()) {
2643 unsigned LMul;
2644 bool Fractional;
2645 std::tie(LMul, Fractional) =
2646 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
2647 if (Fractional)
2648 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2649 else
2650 Cost = (LMul * DLenFactor);
2651 } else {
2652 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2654 return Cost;
2658 /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2659 /// is generally quadratic in the number of vreg implied by LMUL. Note that
2660 /// operand (index and possibly mask) are handled separately.
2661 InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2662 return getLMULCost(VT) * getLMULCost(VT);
2665 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2666 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2667 /// or may track the vrgather.vv cost. It is implementation-dependent.
2668 InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
2669 return getLMULCost(VT);
2672 /// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2673 /// for the type VT. (This does not cover the vslide1up or vslide1down
2674 /// variants.) Slides may be linear in the number of vregs implied by LMUL,
2675 /// or may track the vrgather.vv cost. It is implementation-dependent.
2676 InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
2677 return getLMULCost(VT);
2680 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2681 const RISCVSubtarget &Subtarget) {
2682 // RISC-V FP-to-int conversions saturate to the destination register size, but
2683 // don't produce 0 for nan. We can use a conversion instruction and fix the
2684 // nan case with a compare and a select.
2685 SDValue Src = Op.getOperand(0);
2687 MVT DstVT = Op.getSimpleValueType();
2688 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2690 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2692 if (!DstVT.isVector()) {
2693 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2694 // the result.
2695 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2696 Src.getValueType() == MVT::bf16) {
2697 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2700 unsigned Opc;
2701 if (SatVT == DstVT)
2702 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2703 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2704 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2705 else
2706 return SDValue();
2707 // FIXME: Support other SatVTs by clamping before or after the conversion.
2709 SDLoc DL(Op);
2710 SDValue FpToInt = DAG.getNode(
2711 Opc, DL, DstVT, Src,
2712 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
2714 if (Opc == RISCVISD::FCVT_WU_RV64)
2715 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2717 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2718 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2719 ISD::CondCode::SETUO);
2722 // Vectors.
2724 MVT DstEltVT = DstVT.getVectorElementType();
2725 MVT SrcVT = Src.getSimpleValueType();
2726 MVT SrcEltVT = SrcVT.getVectorElementType();
2727 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2728 unsigned DstEltSize = DstEltVT.getSizeInBits();
2730 // Only handle saturating to the destination type.
2731 if (SatVT != DstEltVT)
2732 return SDValue();
2734 // FIXME: Don't support narrowing by more than 1 steps for now.
2735 if (SrcEltSize > (2 * DstEltSize))
2736 return SDValue();
2738 MVT DstContainerVT = DstVT;
2739 MVT SrcContainerVT = SrcVT;
2740 if (DstVT.isFixedLengthVector()) {
2741 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2742 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2743 assert(DstContainerVT.getVectorElementCount() ==
2744 SrcContainerVT.getVectorElementCount() &&
2745 "Expected same element count");
2746 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2749 SDLoc DL(Op);
2751 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2753 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2754 {Src, Src, DAG.getCondCode(ISD::SETNE),
2755 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2757 // Need to widen by more than 1 step, promote the FP type, then do a widening
2758 // convert.
2759 if (DstEltSize > (2 * SrcEltSize)) {
2760 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2761 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2762 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2765 unsigned RVVOpc =
2766 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2767 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2769 SDValue SplatZero = DAG.getNode(
2770 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2771 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2772 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2773 Res, VL);
2775 if (DstVT.isFixedLengthVector())
2776 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2778 return Res;
2781 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
2782 switch (Opc) {
2783 case ISD::FROUNDEVEN:
2784 case ISD::STRICT_FROUNDEVEN:
2785 case ISD::VP_FROUNDEVEN:
2786 return RISCVFPRndMode::RNE;
2787 case ISD::FTRUNC:
2788 case ISD::STRICT_FTRUNC:
2789 case ISD::VP_FROUNDTOZERO:
2790 return RISCVFPRndMode::RTZ;
2791 case ISD::FFLOOR:
2792 case ISD::STRICT_FFLOOR:
2793 case ISD::VP_FFLOOR:
2794 return RISCVFPRndMode::RDN;
2795 case ISD::FCEIL:
2796 case ISD::STRICT_FCEIL:
2797 case ISD::VP_FCEIL:
2798 return RISCVFPRndMode::RUP;
2799 case ISD::FROUND:
2800 case ISD::STRICT_FROUND:
2801 case ISD::VP_FROUND:
2802 return RISCVFPRndMode::RMM;
2803 case ISD::FRINT:
2804 return RISCVFPRndMode::DYN;
2807 return RISCVFPRndMode::Invalid;
2810 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2811 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2812 // the integer domain and back. Taking care to avoid converting values that are
2813 // nan or already correct.
2814 static SDValue
2815 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2816 const RISCVSubtarget &Subtarget) {
2817 MVT VT = Op.getSimpleValueType();
2818 assert(VT.isVector() && "Unexpected type");
2820 SDLoc DL(Op);
2822 SDValue Src = Op.getOperand(0);
2824 MVT ContainerVT = VT;
2825 if (VT.isFixedLengthVector()) {
2826 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2827 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2830 SDValue Mask, VL;
2831 if (Op->isVPOpcode()) {
2832 Mask = Op.getOperand(1);
2833 if (VT.isFixedLengthVector())
2834 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2835 Subtarget);
2836 VL = Op.getOperand(2);
2837 } else {
2838 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2841 // Freeze the source since we are increasing the number of uses.
2842 Src = DAG.getFreeze(Src);
2844 // We do the conversion on the absolute value and fix the sign at the end.
2845 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2847 // Determine the largest integer that can be represented exactly. This and
2848 // values larger than it don't have any fractional bits so don't need to
2849 // be converted.
2850 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2851 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2852 APFloat MaxVal = APFloat(FltSem);
2853 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2854 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2855 SDValue MaxValNode =
2856 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2857 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2858 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2860 // If abs(Src) was larger than MaxVal or nan, keep it.
2861 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2862 Mask =
2863 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2864 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2865 Mask, Mask, VL});
2867 // Truncate to integer and convert back to FP.
2868 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2869 MVT XLenVT = Subtarget.getXLenVT();
2870 SDValue Truncated;
2872 switch (Op.getOpcode()) {
2873 default:
2874 llvm_unreachable("Unexpected opcode");
2875 case ISD::FCEIL:
2876 case ISD::VP_FCEIL:
2877 case ISD::FFLOOR:
2878 case ISD::VP_FFLOOR:
2879 case ISD::FROUND:
2880 case ISD::FROUNDEVEN:
2881 case ISD::VP_FROUND:
2882 case ISD::VP_FROUNDEVEN:
2883 case ISD::VP_FROUNDTOZERO: {
2884 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2885 assert(FRM != RISCVFPRndMode::Invalid);
2886 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2887 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2888 break;
2890 case ISD::FTRUNC:
2891 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2892 Mask, VL);
2893 break;
2894 case ISD::FRINT:
2895 case ISD::VP_FRINT:
2896 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2897 break;
2898 case ISD::FNEARBYINT:
2899 case ISD::VP_FNEARBYINT:
2900 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2901 Mask, VL);
2902 break;
2905 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2906 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2907 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2908 Mask, VL);
2910 // Restore the original sign so that -0.0 is preserved.
2911 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2912 Src, Src, Mask, VL);
2914 if (!VT.isFixedLengthVector())
2915 return Truncated;
2917 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2920 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2921 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2922 // qNan and coverting the new source to integer and back to FP.
2923 static SDValue
2924 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2925 const RISCVSubtarget &Subtarget) {
2926 SDLoc DL(Op);
2927 MVT VT = Op.getSimpleValueType();
2928 SDValue Chain = Op.getOperand(0);
2929 SDValue Src = Op.getOperand(1);
2931 MVT ContainerVT = VT;
2932 if (VT.isFixedLengthVector()) {
2933 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2934 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2937 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2939 // Freeze the source since we are increasing the number of uses.
2940 Src = DAG.getFreeze(Src);
2942 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2943 MVT MaskVT = Mask.getSimpleValueType();
2944 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
2945 DAG.getVTList(MaskVT, MVT::Other),
2946 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2947 DAG.getUNDEF(MaskVT), Mask, VL});
2948 Chain = Unorder.getValue(1);
2949 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
2950 DAG.getVTList(ContainerVT, MVT::Other),
2951 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2952 Chain = Src.getValue(1);
2954 // We do the conversion on the absolute value and fix the sign at the end.
2955 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2957 // Determine the largest integer that can be represented exactly. This and
2958 // values larger than it don't have any fractional bits so don't need to
2959 // be converted.
2960 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2961 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2962 APFloat MaxVal = APFloat(FltSem);
2963 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2964 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2965 SDValue MaxValNode =
2966 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2967 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2968 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2970 // If abs(Src) was larger than MaxVal or nan, keep it.
2971 Mask = DAG.getNode(
2972 RISCVISD::SETCC_VL, DL, MaskVT,
2973 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
2975 // Truncate to integer and convert back to FP.
2976 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2977 MVT XLenVT = Subtarget.getXLenVT();
2978 SDValue Truncated;
2980 switch (Op.getOpcode()) {
2981 default:
2982 llvm_unreachable("Unexpected opcode");
2983 case ISD::STRICT_FCEIL:
2984 case ISD::STRICT_FFLOOR:
2985 case ISD::STRICT_FROUND:
2986 case ISD::STRICT_FROUNDEVEN: {
2987 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2988 assert(FRM != RISCVFPRndMode::Invalid);
2989 Truncated = DAG.getNode(
2990 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
2991 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
2992 break;
2994 case ISD::STRICT_FTRUNC:
2995 Truncated =
2996 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
2997 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
2998 break;
2999 case ISD::STRICT_FNEARBYINT:
3000 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3001 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3002 Mask, VL);
3003 break;
3005 Chain = Truncated.getValue(1);
3007 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3008 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3009 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3010 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3011 Truncated, Mask, VL);
3012 Chain = Truncated.getValue(1);
3015 // Restore the original sign so that -0.0 is preserved.
3016 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3017 Src, Src, Mask, VL);
3019 if (VT.isFixedLengthVector())
3020 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3021 return DAG.getMergeValues({Truncated, Chain}, DL);
3024 static SDValue
3025 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3026 const RISCVSubtarget &Subtarget) {
3027 MVT VT = Op.getSimpleValueType();
3028 if (VT.isVector())
3029 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3031 if (DAG.shouldOptForSize())
3032 return SDValue();
3034 SDLoc DL(Op);
3035 SDValue Src = Op.getOperand(0);
3037 // Create an integer the size of the mantissa with the MSB set. This and all
3038 // values larger than it don't have any fractional bits so don't need to be
3039 // converted.
3040 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3041 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3042 APFloat MaxVal = APFloat(FltSem);
3043 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3044 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3045 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3047 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3048 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3049 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3052 // Expand vector LRINT and LLRINT by converting to the integer domain.
3053 static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
3054 const RISCVSubtarget &Subtarget) {
3055 MVT VT = Op.getSimpleValueType();
3056 assert(VT.isVector() && "Unexpected type");
3058 SDLoc DL(Op);
3059 SDValue Src = Op.getOperand(0);
3060 MVT ContainerVT = VT;
3062 if (VT.isFixedLengthVector()) {
3063 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3064 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3067 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3068 SDValue Truncated =
3069 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3071 if (!VT.isFixedLengthVector())
3072 return Truncated;
3074 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3077 static SDValue
3078 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
3079 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3080 SDValue Offset, SDValue Mask, SDValue VL,
3081 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3082 if (Merge.isUndef())
3083 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3084 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3085 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3086 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3089 static SDValue
3090 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3091 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
3092 SDValue VL,
3093 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3094 if (Merge.isUndef())
3095 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3096 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3097 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3098 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3101 struct VIDSequence {
3102 int64_t StepNumerator;
3103 unsigned StepDenominator;
3104 int64_t Addend;
3107 static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3108 uint32_t BitWidth) {
3109 APSInt ValInt(BitWidth, !APF.isNegative());
3110 // We use an arbitrary rounding mode here. If a floating-point is an exact
3111 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3112 // the rounding mode changes the output value, then it is not an exact
3113 // integer.
3114 RoundingMode ArbitraryRM = RoundingMode::TowardZero;
3115 bool IsExact;
3116 // If it is out of signed integer range, it will return an invalid operation.
3117 // If it is not an exact integer, IsExact is false.
3118 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3119 APFloatBase::opInvalidOp) ||
3120 !IsExact)
3121 return std::nullopt;
3122 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3125 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3126 // to the (non-zero) step S and start value X. This can be then lowered as the
3127 // RVV sequence (VID * S) + X, for example.
3128 // The step S is represented as an integer numerator divided by a positive
3129 // denominator. Note that the implementation currently only identifies
3130 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
3131 // cannot detect 2/3, for example.
3132 // Note that this method will also match potentially unappealing index
3133 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3134 // determine whether this is worth generating code for.
3135 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
3136 unsigned NumElts = Op.getNumOperands();
3137 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3138 bool IsInteger = Op.getValueType().isInteger();
3140 std::optional<unsigned> SeqStepDenom;
3141 std::optional<int64_t> SeqStepNum, SeqAddend;
3142 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3143 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
3144 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3145 // Assume undef elements match the sequence; we just have to be careful
3146 // when interpolating across them.
3147 if (Op.getOperand(Idx).isUndef())
3148 continue;
3150 uint64_t Val;
3151 if (IsInteger) {
3152 // The BUILD_VECTOR must be all constants.
3153 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
3154 return std::nullopt;
3155 Val = Op.getConstantOperandVal(Idx) &
3156 maskTrailingOnes<uint64_t>(EltSizeInBits);
3157 } else {
3158 // The BUILD_VECTOR must be all constants.
3159 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
3160 return std::nullopt;
3161 if (auto ExactInteger = getExactInteger(
3162 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3163 EltSizeInBits))
3164 Val = *ExactInteger;
3165 else
3166 return std::nullopt;
3169 if (PrevElt) {
3170 // Calculate the step since the last non-undef element, and ensure
3171 // it's consistent across the entire sequence.
3172 unsigned IdxDiff = Idx - PrevElt->second;
3173 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
3175 // A zero-value value difference means that we're somewhere in the middle
3176 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3177 // step change before evaluating the sequence.
3178 if (ValDiff == 0)
3179 continue;
3181 int64_t Remainder = ValDiff % IdxDiff;
3182 // Normalize the step if it's greater than 1.
3183 if (Remainder != ValDiff) {
3184 // The difference must cleanly divide the element span.
3185 if (Remainder != 0)
3186 return std::nullopt;
3187 ValDiff /= IdxDiff;
3188 IdxDiff = 1;
3191 if (!SeqStepNum)
3192 SeqStepNum = ValDiff;
3193 else if (ValDiff != SeqStepNum)
3194 return std::nullopt;
3196 if (!SeqStepDenom)
3197 SeqStepDenom = IdxDiff;
3198 else if (IdxDiff != *SeqStepDenom)
3199 return std::nullopt;
3202 // Record this non-undef element for later.
3203 if (!PrevElt || PrevElt->first != Val)
3204 PrevElt = std::make_pair(Val, Idx);
3207 // We need to have logged a step for this to count as a legal index sequence.
3208 if (!SeqStepNum || !SeqStepDenom)
3209 return std::nullopt;
3211 // Loop back through the sequence and validate elements we might have skipped
3212 // while waiting for a valid step. While doing this, log any sequence addend.
3213 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3214 if (Op.getOperand(Idx).isUndef())
3215 continue;
3216 uint64_t Val;
3217 if (IsInteger) {
3218 Val = Op.getConstantOperandVal(Idx) &
3219 maskTrailingOnes<uint64_t>(EltSizeInBits);
3220 } else {
3221 Val = *getExactInteger(
3222 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3223 EltSizeInBits);
3225 uint64_t ExpectedVal =
3226 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3227 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
3228 if (!SeqAddend)
3229 SeqAddend = Addend;
3230 else if (Addend != SeqAddend)
3231 return std::nullopt;
3234 assert(SeqAddend && "Must have an addend if we have a step");
3236 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3239 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3240 // and lower it as a VRGATHER_VX_VL from the source vector.
3241 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3242 SelectionDAG &DAG,
3243 const RISCVSubtarget &Subtarget) {
3244 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3245 return SDValue();
3246 SDValue Vec = SplatVal.getOperand(0);
3247 // Only perform this optimization on vectors of the same size for simplicity.
3248 // Don't perform this optimization for i1 vectors.
3249 // FIXME: Support i1 vectors, maybe by promoting to i8?
3250 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3251 return SDValue();
3252 SDValue Idx = SplatVal.getOperand(1);
3253 // The index must be a legal type.
3254 if (Idx.getValueType() != Subtarget.getXLenVT())
3255 return SDValue();
3257 MVT ContainerVT = VT;
3258 if (VT.isFixedLengthVector()) {
3259 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3260 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3263 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3265 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3266 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3268 if (!VT.isFixedLengthVector())
3269 return Gather;
3271 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3275 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3276 /// which constitute a large proportion of the elements. In such cases we can
3277 /// splat a vector with the dominant element and make up the shortfall with
3278 /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3279 /// Note that this includes vectors of 2 elements by association. The
3280 /// upper-most element is the "dominant" one, allowing us to use a splat to
3281 /// "insert" the upper element, and an insert of the lower element at position
3282 /// 0, which improves codegen.
3283 static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
3284 const RISCVSubtarget &Subtarget) {
3285 MVT VT = Op.getSimpleValueType();
3286 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3288 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3290 SDLoc DL(Op);
3291 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3293 MVT XLenVT = Subtarget.getXLenVT();
3294 unsigned NumElts = Op.getNumOperands();
3296 SDValue DominantValue;
3297 unsigned MostCommonCount = 0;
3298 DenseMap<SDValue, unsigned> ValueCounts;
3299 unsigned NumUndefElts =
3300 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3302 // Track the number of scalar loads we know we'd be inserting, estimated as
3303 // any non-zero floating-point constant. Other kinds of element are either
3304 // already in registers or are materialized on demand. The threshold at which
3305 // a vector load is more desirable than several scalar materializion and
3306 // vector-insertion instructions is not known.
3307 unsigned NumScalarLoads = 0;
3309 for (SDValue V : Op->op_values()) {
3310 if (V.isUndef())
3311 continue;
3313 ValueCounts.insert(std::make_pair(V, 0));
3314 unsigned &Count = ValueCounts[V];
3315 if (0 == Count)
3316 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3317 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3319 // Is this value dominant? In case of a tie, prefer the highest element as
3320 // it's cheaper to insert near the beginning of a vector than it is at the
3321 // end.
3322 if (++Count >= MostCommonCount) {
3323 DominantValue = V;
3324 MostCommonCount = Count;
3328 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3329 unsigned NumDefElts = NumElts - NumUndefElts;
3330 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3332 // Don't perform this optimization when optimizing for size, since
3333 // materializing elements and inserting them tends to cause code bloat.
3334 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3335 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3336 ((MostCommonCount > DominantValueCountThreshold) ||
3337 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3338 // Start by splatting the most common element.
3339 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3341 DenseSet<SDValue> Processed{DominantValue};
3343 // We can handle an insert into the last element (of a splat) via
3344 // v(f)slide1down. This is slightly better than the vslideup insert
3345 // lowering as it avoids the need for a vector group temporary. It
3346 // is also better than using vmerge.vx as it avoids the need to
3347 // materialize the mask in a vector register.
3348 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3349 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3350 LastOp != DominantValue) {
3351 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3352 auto OpCode =
3353 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3354 if (!VT.isFloatingPoint())
3355 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3356 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3357 LastOp, Mask, VL);
3358 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3359 Processed.insert(LastOp);
3362 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3363 for (const auto &OpIdx : enumerate(Op->ops())) {
3364 const SDValue &V = OpIdx.value();
3365 if (V.isUndef() || !Processed.insert(V).second)
3366 continue;
3367 if (ValueCounts[V] == 1) {
3368 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3369 DAG.getConstant(OpIdx.index(), DL, XLenVT));
3370 } else {
3371 // Blend in all instances of this value using a VSELECT, using a
3372 // mask where each bit signals whether that element is the one
3373 // we're after.
3374 SmallVector<SDValue> Ops;
3375 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3376 return DAG.getConstant(V == V1, DL, XLenVT);
3378 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3379 DAG.getBuildVector(SelMaskTy, DL, Ops),
3380 DAG.getSplatBuildVector(VT, DL, V), Vec);
3384 return Vec;
3387 return SDValue();
3390 static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3391 const RISCVSubtarget &Subtarget) {
3392 MVT VT = Op.getSimpleValueType();
3393 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3395 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3397 SDLoc DL(Op);
3398 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3400 MVT XLenVT = Subtarget.getXLenVT();
3401 unsigned NumElts = Op.getNumOperands();
3403 if (VT.getVectorElementType() == MVT::i1) {
3404 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3405 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3406 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3409 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3410 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3411 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3414 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3415 // scalar integer chunks whose bit-width depends on the number of mask
3416 // bits and XLEN.
3417 // First, determine the most appropriate scalar integer type to use. This
3418 // is at most XLenVT, but may be shrunk to a smaller vector element type
3419 // according to the size of the final vector - use i8 chunks rather than
3420 // XLenVT if we're producing a v8i1. This results in more consistent
3421 // codegen across RV32 and RV64.
3422 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3423 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3424 // If we have to use more than one INSERT_VECTOR_ELT then this
3425 // optimization is likely to increase code size; avoid peforming it in
3426 // such a case. We can use a load from a constant pool in this case.
3427 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3428 return SDValue();
3429 // Now we can create our integer vector type. Note that it may be larger
3430 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3431 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3432 MVT IntegerViaVecVT =
3433 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3434 IntegerViaVecElts);
3436 uint64_t Bits = 0;
3437 unsigned BitPos = 0, IntegerEltIdx = 0;
3438 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3440 for (unsigned I = 0; I < NumElts;) {
3441 SDValue V = Op.getOperand(I);
3442 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3443 Bits |= ((uint64_t)BitValue << BitPos);
3444 ++BitPos;
3445 ++I;
3447 // Once we accumulate enough bits to fill our scalar type or process the
3448 // last element, insert into our vector and clear our accumulated data.
3449 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3450 if (NumViaIntegerBits <= 32)
3451 Bits = SignExtend64<32>(Bits);
3452 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3453 Elts[IntegerEltIdx] = Elt;
3454 Bits = 0;
3455 BitPos = 0;
3456 IntegerEltIdx++;
3460 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3462 if (NumElts < NumViaIntegerBits) {
3463 // If we're producing a smaller vector than our minimum legal integer
3464 // type, bitcast to the equivalent (known-legal) mask type, and extract
3465 // our final mask.
3466 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3467 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3468 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3469 DAG.getConstant(0, DL, XLenVT));
3470 } else {
3471 // Else we must have produced an integer type with the same size as the
3472 // mask type; bitcast for the final result.
3473 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3474 Vec = DAG.getBitcast(VT, Vec);
3477 return Vec;
3480 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3481 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3482 : RISCVISD::VMV_V_X_VL;
3483 if (!VT.isFloatingPoint())
3484 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3485 Splat =
3486 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3487 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3490 // Try and match index sequences, which we can lower to the vid instruction
3491 // with optional modifications. An all-undef vector is matched by
3492 // getSplatValue, above.
3493 if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3494 int64_t StepNumerator = SimpleVID->StepNumerator;
3495 unsigned StepDenominator = SimpleVID->StepDenominator;
3496 int64_t Addend = SimpleVID->Addend;
3498 assert(StepNumerator != 0 && "Invalid step");
3499 bool Negate = false;
3500 int64_t SplatStepVal = StepNumerator;
3501 unsigned StepOpcode = ISD::MUL;
3502 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3503 // anyway as the shift of 63 won't fit in uimm5.
3504 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3505 isPowerOf2_64(std::abs(StepNumerator))) {
3506 Negate = StepNumerator < 0;
3507 StepOpcode = ISD::SHL;
3508 SplatStepVal = Log2_64(std::abs(StepNumerator));
3511 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3512 // threshold since it's the immediate value many RVV instructions accept.
3513 // There is no vmul.vi instruction so ensure multiply constant can fit in
3514 // a single addi instruction.
3515 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3516 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3517 isPowerOf2_32(StepDenominator) &&
3518 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3519 MVT VIDVT =
3520 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3521 MVT VIDContainerVT =
3522 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3523 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3524 // Convert right out of the scalable type so we can use standard ISD
3525 // nodes for the rest of the computation. If we used scalable types with
3526 // these, we'd lose the fixed-length vector info and generate worse
3527 // vsetvli code.
3528 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3529 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3530 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3531 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3532 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3534 if (StepDenominator != 1) {
3535 SDValue SplatStep =
3536 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3537 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3539 if (Addend != 0 || Negate) {
3540 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3541 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3542 VID);
3544 if (VT.isFloatingPoint()) {
3545 // TODO: Use vfwcvt to reduce register pressure.
3546 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3548 return VID;
3552 // For very small build_vectors, use a single scalar insert of a constant.
3553 // TODO: Base this on constant rematerialization cost, not size.
3554 const unsigned EltBitSize = VT.getScalarSizeInBits();
3555 if (VT.getSizeInBits() <= 32 &&
3556 ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3557 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3558 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3559 "Unexpected sequence type");
3560 // If we can use the original VL with the modified element type, this
3561 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3562 // be moved into InsertVSETVLI?
3563 unsigned ViaVecLen =
3564 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3565 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3567 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3568 uint64_t SplatValue = 0;
3569 // Construct the amalgamated value at this larger vector type.
3570 for (const auto &OpIdx : enumerate(Op->op_values())) {
3571 const auto &SeqV = OpIdx.value();
3572 if (!SeqV.isUndef())
3573 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3574 << (OpIdx.index() * EltBitSize));
3577 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3578 // achieve better constant materializion.
3579 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3580 SplatValue = SignExtend64<32>(SplatValue);
3582 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3583 DAG.getUNDEF(ViaVecVT),
3584 DAG.getConstant(SplatValue, DL, XLenVT),
3585 DAG.getConstant(0, DL, XLenVT));
3586 if (ViaVecLen != 1)
3587 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3588 MVT::getVectorVT(ViaIntVT, 1), Vec,
3589 DAG.getConstant(0, DL, XLenVT));
3590 return DAG.getBitcast(VT, Vec);
3594 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3595 // when re-interpreted as a vector with a larger element type. For example,
3596 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3597 // could be instead splat as
3598 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3599 // TODO: This optimization could also work on non-constant splats, but it
3600 // would require bit-manipulation instructions to construct the splat value.
3601 SmallVector<SDValue> Sequence;
3602 const auto *BV = cast<BuildVectorSDNode>(Op);
3603 if (VT.isInteger() && EltBitSize < 64 &&
3604 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
3605 BV->getRepeatedSequence(Sequence) &&
3606 (Sequence.size() * EltBitSize) <= 64) {
3607 unsigned SeqLen = Sequence.size();
3608 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3609 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3610 ViaIntVT == MVT::i64) &&
3611 "Unexpected sequence type");
3613 // If we can use the original VL with the modified element type, this
3614 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3615 // be moved into InsertVSETVLI?
3616 const unsigned RequiredVL = NumElts / SeqLen;
3617 const unsigned ViaVecLen =
3618 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3619 NumElts : RequiredVL;
3620 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3622 unsigned EltIdx = 0;
3623 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3624 uint64_t SplatValue = 0;
3625 // Construct the amalgamated value which can be splatted as this larger
3626 // vector type.
3627 for (const auto &SeqV : Sequence) {
3628 if (!SeqV.isUndef())
3629 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3630 << (EltIdx * EltBitSize));
3631 EltIdx++;
3634 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3635 // achieve better constant materializion.
3636 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3637 SplatValue = SignExtend64<32>(SplatValue);
3639 // Since we can't introduce illegal i64 types at this stage, we can only
3640 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3641 // way we can use RVV instructions to splat.
3642 assert((ViaIntVT.bitsLE(XLenVT) ||
3643 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3644 "Unexpected bitcast sequence");
3645 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3646 SDValue ViaVL =
3647 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3648 MVT ViaContainerVT =
3649 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3650 SDValue Splat =
3651 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3652 DAG.getUNDEF(ViaContainerVT),
3653 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3654 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3655 if (ViaVecLen != RequiredVL)
3656 Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3657 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3658 DAG.getConstant(0, DL, XLenVT));
3659 return DAG.getBitcast(VT, Splat);
3663 // If the number of signbits allows, see if we can lower as a <N x i8>.
3664 // Our main goal here is to reduce LMUL (and thus work) required to
3665 // build the constant, but we will also narrow if the resulting
3666 // narrow vector is known to materialize cheaply.
3667 // TODO: We really should be costing the smaller vector. There are
3668 // profitable cases this misses.
3669 if (EltBitSize > 8 && VT.isInteger() &&
3670 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3671 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3672 if (EltBitSize - SignBits < 8) {
3673 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3674 DL, Op->ops());
3675 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3676 Source, DAG, Subtarget);
3677 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3678 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3682 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3683 return Res;
3685 // For constant vectors, use generic constant pool lowering. Otherwise,
3686 // we'd have to materialize constants in GPRs just to move them into the
3687 // vector.
3688 return SDValue();
3691 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3692 const RISCVSubtarget &Subtarget) {
3693 MVT VT = Op.getSimpleValueType();
3694 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3696 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3697 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
3698 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3700 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3702 SDLoc DL(Op);
3703 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3705 MVT XLenVT = Subtarget.getXLenVT();
3707 if (VT.getVectorElementType() == MVT::i1) {
3708 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3709 // vector type, we have a legal equivalently-sized i8 type, so we can use
3710 // that.
3711 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3712 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3714 SDValue WideVec;
3715 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3716 // For a splat, perform a scalar truncate before creating the wider
3717 // vector.
3718 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3719 DAG.getConstant(1, DL, Splat.getValueType()));
3720 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3721 } else {
3722 SmallVector<SDValue, 8> Ops(Op->op_values());
3723 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3724 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3725 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3728 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3731 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3732 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3733 return Gather;
3734 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3735 : RISCVISD::VMV_V_X_VL;
3736 if (!VT.isFloatingPoint())
3737 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3738 Splat =
3739 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3740 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3743 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3744 return Res;
3746 // Cap the cost at a value linear to the number of elements in the vector.
3747 // The default lowering is to use the stack. The vector store + scalar loads
3748 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3749 // being (at least) linear in LMUL. As a result, using the vslidedown
3750 // lowering for every element ends up being VL*LMUL..
3751 // TODO: Should we be directly costing the stack alternative? Doing so might
3752 // give us a more accurate upper bound.
3753 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3755 // TODO: unify with TTI getSlideCost.
3756 InstructionCost PerSlideCost = 1;
3757 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3758 default: break;
3759 case RISCVII::VLMUL::LMUL_2:
3760 PerSlideCost = 2;
3761 break;
3762 case RISCVII::VLMUL::LMUL_4:
3763 PerSlideCost = 4;
3764 break;
3765 case RISCVII::VLMUL::LMUL_8:
3766 PerSlideCost = 8;
3767 break;
3770 // TODO: Should we be using the build instseq then cost + evaluate scheme
3771 // we use for integer constants here?
3772 unsigned UndefCount = 0;
3773 for (const SDValue &V : Op->ops()) {
3774 if (V.isUndef()) {
3775 UndefCount++;
3776 continue;
3778 if (UndefCount) {
3779 LinearBudget -= PerSlideCost;
3780 UndefCount = 0;
3782 LinearBudget -= PerSlideCost;
3784 if (UndefCount) {
3785 LinearBudget -= PerSlideCost;
3788 if (LinearBudget < 0)
3789 return SDValue();
3791 assert((!VT.isFloatingPoint() ||
3792 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3793 "Illegal type which will result in reserved encoding");
3795 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3797 SDValue Vec = DAG.getUNDEF(ContainerVT);
3798 UndefCount = 0;
3799 for (SDValue V : Op->ops()) {
3800 if (V.isUndef()) {
3801 UndefCount++;
3802 continue;
3804 if (UndefCount) {
3805 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3806 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3807 Vec, Offset, Mask, VL, Policy);
3808 UndefCount = 0;
3810 auto OpCode =
3811 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3812 if (!VT.isFloatingPoint())
3813 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
3814 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3815 V, Mask, VL);
3817 if (UndefCount) {
3818 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3819 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3820 Vec, Offset, Mask, VL, Policy);
3822 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3825 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3826 SDValue Lo, SDValue Hi, SDValue VL,
3827 SelectionDAG &DAG) {
3828 if (!Passthru)
3829 Passthru = DAG.getUNDEF(VT);
3830 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3831 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3832 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3833 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3834 // node in order to try and match RVV vector/scalar instructions.
3835 if ((LoC >> 31) == HiC)
3836 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3838 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3839 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3840 // vlmax vsetvli or vsetivli to change the VL.
3841 // FIXME: Support larger constants?
3842 // FIXME: Support non-constant VLs by saturating?
3843 if (LoC == HiC) {
3844 SDValue NewVL;
3845 if (isAllOnesConstant(VL) ||
3846 (isa<RegisterSDNode>(VL) &&
3847 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
3848 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
3849 else if (isa<ConstantSDNode>(VL) &&
3850 isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
3851 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
3853 if (NewVL) {
3854 MVT InterVT =
3855 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3856 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
3857 DAG.getUNDEF(InterVT), Lo,
3858 DAG.getRegister(RISCV::X0, MVT::i32));
3859 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3864 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3865 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3866 isa<ConstantSDNode>(Hi.getOperand(1)) &&
3867 Hi.getConstantOperandVal(1) == 31)
3868 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3870 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3871 // even if it might be sign extended.
3872 if (Hi.isUndef())
3873 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3875 // Fall back to a stack store and stride x0 vector load.
3876 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3877 Hi, VL);
3880 // Called by type legalization to handle splat of i64 on RV32.
3881 // FIXME: We can optimize this when the type has sign or zero bits in one
3882 // of the halves.
3883 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3884 SDValue Scalar, SDValue VL,
3885 SelectionDAG &DAG) {
3886 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3887 SDValue Lo, Hi;
3888 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3889 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3892 // This function lowers a splat of a scalar operand Splat with the vector
3893 // length VL. It ensures the final sequence is type legal, which is useful when
3894 // lowering a splat after type legalization.
3895 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3896 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3897 const RISCVSubtarget &Subtarget) {
3898 bool HasPassthru = Passthru && !Passthru.isUndef();
3899 if (!HasPassthru && !Passthru)
3900 Passthru = DAG.getUNDEF(VT);
3901 if (VT.isFloatingPoint())
3902 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
3904 MVT XLenVT = Subtarget.getXLenVT();
3906 // Simplest case is that the operand needs to be promoted to XLenVT.
3907 if (Scalar.getValueType().bitsLE(XLenVT)) {
3908 // If the operand is a constant, sign extend to increase our chances
3909 // of being able to use a .vi instruction. ANY_EXTEND would become a
3910 // a zero extend and the simm5 check in isel would fail.
3911 // FIXME: Should we ignore the upper bits in isel instead?
3912 unsigned ExtOpc =
3913 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3914 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3915 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3918 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
3919 "Unexpected scalar for splat lowering!");
3921 if (isOneConstant(VL) && isNullConstant(Scalar))
3922 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
3923 DAG.getConstant(0, DL, XLenVT), VL);
3925 // Otherwise use the more complicated splatting algorithm.
3926 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
3929 static MVT getLMUL1VT(MVT VT) {
3930 assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3931 "Unexpected vector MVT");
3932 return MVT::getScalableVectorVT(
3933 VT.getVectorElementType(),
3934 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3937 // This function lowers an insert of a scalar operand Scalar into lane
3938 // 0 of the vector regardless of the value of VL. The contents of the
3939 // remaining lanes of the result vector are unspecified. VL is assumed
3940 // to be non-zero.
3941 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
3942 const SDLoc &DL, SelectionDAG &DAG,
3943 const RISCVSubtarget &Subtarget) {
3944 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
3946 const MVT XLenVT = Subtarget.getXLenVT();
3947 SDValue Passthru = DAG.getUNDEF(VT);
3949 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3950 isNullConstant(Scalar.getOperand(1))) {
3951 SDValue ExtractedVal = Scalar.getOperand(0);
3952 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
3953 MVT ExtractedContainerVT = ExtractedVT;
3954 if (ExtractedContainerVT.isFixedLengthVector()) {
3955 ExtractedContainerVT = getContainerForFixedLengthVector(
3956 DAG, ExtractedContainerVT, Subtarget);
3957 ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,
3958 DAG, Subtarget);
3960 if (ExtractedContainerVT.bitsLE(VT))
3961 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,
3962 DAG.getConstant(0, DL, XLenVT));
3963 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
3964 DAG.getConstant(0, DL, XLenVT));
3968 if (VT.isFloatingPoint())
3969 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
3970 DAG.getUNDEF(VT), Scalar, VL);
3972 // Avoid the tricky legalization cases by falling back to using the
3973 // splat code which already handles it gracefully.
3974 if (!Scalar.getValueType().bitsLE(XLenVT))
3975 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
3976 DAG.getConstant(1, DL, XLenVT),
3977 VT, DL, DAG, Subtarget);
3979 // If the operand is a constant, sign extend to increase our chances
3980 // of being able to use a .vi instruction. ANY_EXTEND would become a
3981 // a zero extend and the simm5 check in isel would fail.
3982 // FIXME: Should we ignore the upper bits in isel instead?
3983 unsigned ExtOpc =
3984 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3985 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3986 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
3987 DAG.getUNDEF(VT), Scalar, VL);
3990 // Is this a shuffle extracts either the even or odd elements of a vector?
3991 // That is, specifically, either (a) or (b) below.
3992 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
3993 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
3994 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
3995 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
3996 // Returns {Src Vector, Even Elements} om success
3997 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
3998 SDValue V2, ArrayRef<int> Mask,
3999 const RISCVSubtarget &Subtarget) {
4000 // Need to be able to widen the vector.
4001 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4002 return false;
4004 // Both input must be extracts.
4005 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4006 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4007 return false;
4009 // Extracting from the same source.
4010 SDValue Src = V1.getOperand(0);
4011 if (Src != V2.getOperand(0))
4012 return false;
4014 // Src needs to have twice the number of elements.
4015 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4016 return false;
4018 // The extracts must extract the two halves of the source.
4019 if (V1.getConstantOperandVal(1) != 0 ||
4020 V2.getConstantOperandVal(1) != Mask.size())
4021 return false;
4023 // First index must be the first even or odd element from V1.
4024 if (Mask[0] != 0 && Mask[0] != 1)
4025 return false;
4027 // The others must increase by 2 each time.
4028 // TODO: Support undef elements?
4029 for (unsigned i = 1; i != Mask.size(); ++i)
4030 if (Mask[i] != Mask[i - 1] + 2)
4031 return false;
4033 return true;
4036 /// Is this shuffle interleaving contiguous elements from one vector into the
4037 /// even elements and contiguous elements from another vector into the odd
4038 /// elements. \p EvenSrc will contain the element that should be in the first
4039 /// even element. \p OddSrc will contain the element that should be in the first
4040 /// odd element. These can be the first element in a source or the element half
4041 /// way through the source.
4042 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4043 int &OddSrc, const RISCVSubtarget &Subtarget) {
4044 // We need to be able to widen elements to the next larger integer type.
4045 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4046 return false;
4048 int Size = Mask.size();
4049 int NumElts = VT.getVectorNumElements();
4050 assert(Size == (int)NumElts && "Unexpected mask size");
4052 SmallVector<unsigned, 2> StartIndexes;
4053 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4054 return false;
4056 EvenSrc = StartIndexes[0];
4057 OddSrc = StartIndexes[1];
4059 // One source should be low half of first vector.
4060 if (EvenSrc != 0 && OddSrc != 0)
4061 return false;
4063 // Subvectors will be subtracted from either at the start of the two input
4064 // vectors, or at the start and middle of the first vector if it's an unary
4065 // interleave.
4066 // In both cases, HalfNumElts will be extracted.
4067 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4068 // we'll create an illegal extract_subvector.
4069 // FIXME: We could support other values using a slidedown first.
4070 int HalfNumElts = NumElts / 2;
4071 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4074 /// Match shuffles that concatenate two vectors, rotate the concatenation,
4075 /// and then extract the original number of elements from the rotated result.
4076 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4077 /// returned rotation amount is for a rotate right, where elements move from
4078 /// higher elements to lower elements. \p LoSrc indicates the first source
4079 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4080 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4081 /// 0 or 1 if a rotation is found.
4083 /// NOTE: We talk about rotate to the right which matches how bit shift and
4084 /// rotate instructions are described where LSBs are on the right, but LLVM IR
4085 /// and the table below write vectors with the lowest elements on the left.
4086 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4087 int Size = Mask.size();
4089 // We need to detect various ways of spelling a rotation:
4090 // [11, 12, 13, 14, 15, 0, 1, 2]
4091 // [-1, 12, 13, 14, -1, -1, 1, -1]
4092 // [-1, -1, -1, -1, -1, -1, 1, 2]
4093 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4094 // [-1, 4, 5, 6, -1, -1, 9, -1]
4095 // [-1, 4, 5, 6, -1, -1, -1, -1]
4096 int Rotation = 0;
4097 LoSrc = -1;
4098 HiSrc = -1;
4099 for (int i = 0; i != Size; ++i) {
4100 int M = Mask[i];
4101 if (M < 0)
4102 continue;
4104 // Determine where a rotate vector would have started.
4105 int StartIdx = i - (M % Size);
4106 // The identity rotation isn't interesting, stop.
4107 if (StartIdx == 0)
4108 return -1;
4110 // If we found the tail of a vector the rotation must be the missing
4111 // front. If we found the head of a vector, it must be how much of the
4112 // head.
4113 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4115 if (Rotation == 0)
4116 Rotation = CandidateRotation;
4117 else if (Rotation != CandidateRotation)
4118 // The rotations don't match, so we can't match this mask.
4119 return -1;
4121 // Compute which value this mask is pointing at.
4122 int MaskSrc = M < Size ? 0 : 1;
4124 // Compute which of the two target values this index should be assigned to.
4125 // This reflects whether the high elements are remaining or the low elemnts
4126 // are remaining.
4127 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4129 // Either set up this value if we've not encountered it before, or check
4130 // that it remains consistent.
4131 if (TargetSrc < 0)
4132 TargetSrc = MaskSrc;
4133 else if (TargetSrc != MaskSrc)
4134 // This may be a rotation, but it pulls from the inputs in some
4135 // unsupported interleaving.
4136 return -1;
4139 // Check that we successfully analyzed the mask, and normalize the results.
4140 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4141 assert((LoSrc >= 0 || HiSrc >= 0) &&
4142 "Failed to find a rotated input vector!");
4144 return Rotation;
4147 // Lower a deinterleave shuffle to vnsrl.
4148 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4149 // -> [p, q, r, s] (EvenElts == false)
4150 // VT is the type of the vector to return, <[vscale x ]n x ty>
4151 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4152 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
4153 bool EvenElts,
4154 const RISCVSubtarget &Subtarget,
4155 SelectionDAG &DAG) {
4156 // The result is a vector of type <m x n x ty>
4157 MVT ContainerVT = VT;
4158 // Convert fixed vectors to scalable if needed
4159 if (ContainerVT.isFixedLengthVector()) {
4160 assert(Src.getSimpleValueType().isFixedLengthVector());
4161 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4163 // The source is a vector of type <m x n*2 x ty>
4164 MVT SrcContainerVT =
4165 MVT::getVectorVT(ContainerVT.getVectorElementType(),
4166 ContainerVT.getVectorElementCount() * 2);
4167 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4170 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4172 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4173 // This also converts FP to int.
4174 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4175 MVT WideSrcContainerVT = MVT::getVectorVT(
4176 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4177 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4179 // The integer version of the container type.
4180 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4182 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4183 // the original element size.
4184 unsigned Shift = EvenElts ? 0 : EltBits;
4185 SDValue SplatShift = DAG.getNode(
4186 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4187 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4188 SDValue Res =
4189 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4190 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4191 // Cast back to FP if needed.
4192 Res = DAG.getBitcast(ContainerVT, Res);
4194 if (VT.isFixedLengthVector())
4195 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4196 return Res;
4199 // Lower the following shuffle to vslidedown.
4200 // a)
4201 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
4202 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
4203 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4204 // b)
4205 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
4206 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
4207 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
4208 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
4209 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
4210 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4211 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
4212 SDValue V1, SDValue V2,
4213 ArrayRef<int> Mask,
4214 const RISCVSubtarget &Subtarget,
4215 SelectionDAG &DAG) {
4216 auto findNonEXTRACT_SUBVECTORParent =
4217 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4218 uint64_t Offset = 0;
4219 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4220 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4221 // a scalable vector. But we don't want to match the case.
4222 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4223 Offset += Parent.getConstantOperandVal(1);
4224 Parent = Parent.getOperand(0);
4226 return std::make_pair(Parent, Offset);
4229 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4230 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4232 // Extracting from the same source.
4233 SDValue Src = V1Src;
4234 if (Src != V2Src)
4235 return SDValue();
4237 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4238 SmallVector<int, 16> NewMask(Mask);
4239 for (size_t i = 0; i != NewMask.size(); ++i) {
4240 if (NewMask[i] == -1)
4241 continue;
4243 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4244 NewMask[i] = NewMask[i] + V1IndexOffset;
4245 } else {
4246 // Minus NewMask.size() is needed. Otherwise, the b case would be
4247 // <5,6,7,12> instead of <5,6,7,8>.
4248 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4252 // First index must be known and non-zero. It will be used as the slidedown
4253 // amount.
4254 if (NewMask[0] <= 0)
4255 return SDValue();
4257 // NewMask is also continuous.
4258 for (unsigned i = 1; i != NewMask.size(); ++i)
4259 if (NewMask[i - 1] + 1 != NewMask[i])
4260 return SDValue();
4262 MVT XLenVT = Subtarget.getXLenVT();
4263 MVT SrcVT = Src.getSimpleValueType();
4264 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4265 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4266 SDValue Slidedown =
4267 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4268 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4269 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4270 return DAG.getNode(
4271 ISD::EXTRACT_SUBVECTOR, DL, VT,
4272 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4273 DAG.getConstant(0, DL, XLenVT));
4276 // Because vslideup leaves the destination elements at the start intact, we can
4277 // use it to perform shuffles that insert subvectors:
4279 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4280 // ->
4281 // vsetvli zero, 8, e8, mf2, ta, ma
4282 // vslideup.vi v8, v9, 4
4284 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4285 // ->
4286 // vsetvli zero, 5, e8, mf2, tu, ma
4287 // vslideup.v1 v8, v9, 2
4288 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
4289 SDValue V1, SDValue V2,
4290 ArrayRef<int> Mask,
4291 const RISCVSubtarget &Subtarget,
4292 SelectionDAG &DAG) {
4293 unsigned NumElts = VT.getVectorNumElements();
4294 int NumSubElts, Index;
4295 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4296 Index))
4297 return SDValue();
4299 bool OpsSwapped = Mask[Index] < (int)NumElts;
4300 SDValue InPlace = OpsSwapped ? V2 : V1;
4301 SDValue ToInsert = OpsSwapped ? V1 : V2;
4303 MVT XLenVT = Subtarget.getXLenVT();
4304 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4305 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4306 // We slide up by the index that the subvector is being inserted at, and set
4307 // VL to the index + the number of elements being inserted.
4308 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
4309 // If the we're adding a suffix to the in place vector, i.e. inserting right
4310 // up to the very end of it, then we don't actually care about the tail.
4311 if (NumSubElts + Index >= (int)NumElts)
4312 Policy |= RISCVII::TAIL_AGNOSTIC;
4314 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4315 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4316 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4318 SDValue Res;
4319 // If we're inserting into the lowest elements, use a tail undisturbed
4320 // vmv.v.v.
4321 if (Index == 0)
4322 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4323 VL);
4324 else
4325 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4326 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4327 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4330 /// Match v(f)slide1up/down idioms. These operations involve sliding
4331 /// N-1 elements to make room for an inserted scalar at one end.
4332 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
4333 SDValue V1, SDValue V2,
4334 ArrayRef<int> Mask,
4335 const RISCVSubtarget &Subtarget,
4336 SelectionDAG &DAG) {
4337 bool OpsSwapped = false;
4338 if (!isa<BuildVectorSDNode>(V1)) {
4339 if (!isa<BuildVectorSDNode>(V2))
4340 return SDValue();
4341 std::swap(V1, V2);
4342 OpsSwapped = true;
4344 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4345 if (!Splat)
4346 return SDValue();
4348 // Return true if the mask could describe a slide of Mask.size() - 1
4349 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4350 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4351 const unsigned S = (Offset > 0) ? 0 : -Offset;
4352 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4353 for (unsigned i = S; i != E; ++i)
4354 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4355 return false;
4356 return true;
4359 const unsigned NumElts = VT.getVectorNumElements();
4360 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4361 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4362 return SDValue();
4364 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4365 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4366 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4367 return SDValue();
4369 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4370 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4371 auto OpCode = IsVSlidedown ?
4372 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
4373 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
4374 if (!VT.isFloatingPoint())
4375 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4376 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4377 DAG.getUNDEF(ContainerVT),
4378 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4379 Splat, TrueMask, VL);
4380 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4383 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4384 // to create an interleaved vector of <[vscale x] n*2 x ty>.
4385 // This requires that the size of ty is less than the subtarget's maximum ELEN.
4386 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4387 const SDLoc &DL, SelectionDAG &DAG,
4388 const RISCVSubtarget &Subtarget) {
4389 MVT VecVT = EvenV.getSimpleValueType();
4390 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4391 // Convert fixed vectors to scalable if needed
4392 if (VecContainerVT.isFixedLengthVector()) {
4393 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4394 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4395 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4398 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4400 // We're working with a vector of the same size as the resulting
4401 // interleaved vector, but with half the number of elements and
4402 // twice the SEW (Hence the restriction on not using the maximum
4403 // ELEN)
4404 MVT WideVT =
4405 MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
4406 VecVT.getVectorElementCount());
4407 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4408 if (WideContainerVT.isFixedLengthVector())
4409 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4411 // Bitcast the input vectors to integers in case they are FP
4412 VecContainerVT = VecContainerVT.changeTypeToInteger();
4413 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4414 OddV = DAG.getBitcast(VecContainerVT, OddV);
4416 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4417 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4419 SDValue Interleaved;
4420 if (Subtarget.hasStdExtZvbb()) {
4421 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4422 SDValue OffsetVec =
4423 DAG.getSplatVector(VecContainerVT, DL,
4424 DAG.getConstant(VecVT.getScalarSizeInBits(), DL,
4425 Subtarget.getXLenVT()));
4426 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4427 OffsetVec, Passthru, Mask, VL);
4428 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4429 Interleaved, EvenV, Passthru, Mask, VL);
4430 } else {
4431 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4432 // vwaddu.vv
4433 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4434 OddV, Passthru, Mask, VL);
4436 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4437 SDValue AllOnesVec = DAG.getSplatVector(
4438 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4439 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4440 OddV, AllOnesVec, Passthru, Mask, VL);
4442 // Add the two together so we get
4443 // (OddV * 0xff...ff) + (OddV + EvenV)
4444 // = (OddV * 0x100...00) + EvenV
4445 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4446 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4447 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4448 Interleaved, OddsMul, Passthru, Mask, VL);
4451 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4452 MVT ResultContainerVT = MVT::getVectorVT(
4453 VecVT.getVectorElementType(), // Make sure to use original type
4454 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4455 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4457 // Convert back to a fixed vector if needed
4458 MVT ResultVT =
4459 MVT::getVectorVT(VecVT.getVectorElementType(),
4460 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
4461 if (ResultVT.isFixedLengthVector())
4462 Interleaved =
4463 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4465 return Interleaved;
4468 // If we have a vector of bits that we want to reverse, we can use a vbrev on a
4469 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4470 static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
4471 SelectionDAG &DAG,
4472 const RISCVSubtarget &Subtarget) {
4473 SDLoc DL(SVN);
4474 MVT VT = SVN->getSimpleValueType(0);
4475 SDValue V = SVN->getOperand(0);
4476 unsigned NumElts = VT.getVectorNumElements();
4478 assert(VT.getVectorElementType() == MVT::i1);
4480 if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),
4481 SVN->getMask().size()) ||
4482 !SVN->getOperand(1).isUndef())
4483 return SDValue();
4485 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4486 EVT ViaVT = EVT::getVectorVT(
4487 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4488 EVT ViaBitVT =
4489 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4491 // If we don't have zvbb or the larger element type > ELEN, the operation will
4492 // be illegal.
4493 if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,
4494 ViaVT) ||
4495 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4496 return SDValue();
4498 // If the bit vector doesn't fit exactly into the larger element type, we need
4499 // to insert it into the larger vector and then shift up the reversed bits
4500 // afterwards to get rid of the gap introduced.
4501 if (ViaEltSize > NumElts)
4502 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4503 V, DAG.getVectorIdxConstant(0, DL));
4505 SDValue Res =
4506 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4508 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4509 // element type.
4510 if (ViaEltSize > NumElts)
4511 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4512 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4514 Res = DAG.getBitcast(ViaBitVT, Res);
4516 if (ViaEltSize > NumElts)
4517 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4518 DAG.getVectorIdxConstant(0, DL));
4519 return Res;
4522 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4523 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4524 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4525 static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
4526 SelectionDAG &DAG,
4527 const RISCVSubtarget &Subtarget) {
4528 SDLoc DL(SVN);
4530 EVT VT = SVN->getValueType(0);
4531 unsigned NumElts = VT.getVectorNumElements();
4532 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4533 unsigned NumSubElts, RotateAmt;
4534 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4535 NumElts, NumSubElts, RotateAmt))
4536 return SDValue();
4537 MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4538 NumElts / NumSubElts);
4540 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4541 if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
4542 return SDValue();
4544 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4546 SDValue Rotate;
4547 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4548 // so canonicalize to vrev8.
4549 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4550 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4551 else
4552 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4553 DAG.getConstant(RotateAmt, DL, RotateVT));
4555 return DAG.getBitcast(VT, Rotate);
4558 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4559 const RISCVSubtarget &Subtarget) {
4560 SDValue V1 = Op.getOperand(0);
4561 SDValue V2 = Op.getOperand(1);
4562 SDLoc DL(Op);
4563 MVT XLenVT = Subtarget.getXLenVT();
4564 MVT VT = Op.getSimpleValueType();
4565 unsigned NumElts = VT.getVectorNumElements();
4566 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4568 if (VT.getVectorElementType() == MVT::i1) {
4569 // Lower to a vror.vi of a larger element type if possible before we promote
4570 // i1s to i8s.
4571 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4572 return V;
4573 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4574 return V;
4576 // Promote i1 shuffle to i8 shuffle.
4577 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4578 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4579 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4580 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4581 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4582 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4583 ISD::SETNE);
4586 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4588 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4590 if (SVN->isSplat()) {
4591 const int Lane = SVN->getSplatIndex();
4592 if (Lane >= 0) {
4593 MVT SVT = VT.getVectorElementType();
4595 // Turn splatted vector load into a strided load with an X0 stride.
4596 SDValue V = V1;
4597 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4598 // with undef.
4599 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4600 int Offset = Lane;
4601 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4602 int OpElements =
4603 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4604 V = V.getOperand(Offset / OpElements);
4605 Offset %= OpElements;
4608 // We need to ensure the load isn't atomic or volatile.
4609 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4610 auto *Ld = cast<LoadSDNode>(V);
4611 Offset *= SVT.getStoreSize();
4612 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
4613 TypeSize::Fixed(Offset), DL);
4615 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4616 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4617 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4618 SDValue IntID =
4619 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4620 SDValue Ops[] = {Ld->getChain(),
4621 IntID,
4622 DAG.getUNDEF(ContainerVT),
4623 NewAddr,
4624 DAG.getRegister(RISCV::X0, XLenVT),
4625 VL};
4626 SDValue NewLoad = DAG.getMemIntrinsicNode(
4627 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4628 DAG.getMachineFunction().getMachineMemOperand(
4629 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4630 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4631 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4634 // Otherwise use a scalar load and splat. This will give the best
4635 // opportunity to fold a splat into the operation. ISel can turn it into
4636 // the x0 strided load if we aren't able to fold away the select.
4637 if (SVT.isFloatingPoint())
4638 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4639 Ld->getPointerInfo().getWithOffset(Offset),
4640 Ld->getOriginalAlign(),
4641 Ld->getMemOperand()->getFlags());
4642 else
4643 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4644 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4645 Ld->getOriginalAlign(),
4646 Ld->getMemOperand()->getFlags());
4647 DAG.makeEquivalentMemoryOrdering(Ld, V);
4649 unsigned Opc =
4650 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4651 SDValue Splat =
4652 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4653 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4656 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4657 assert(Lane < (int)NumElts && "Unexpected lane!");
4658 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4659 V1, DAG.getConstant(Lane, DL, XLenVT),
4660 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4661 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4665 ArrayRef<int> Mask = SVN->getMask();
4667 if (SDValue V =
4668 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
4669 return V;
4671 if (SDValue V =
4672 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
4673 return V;
4675 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
4676 // available.
4677 if (Subtarget.hasStdExtZvkb())
4678 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4679 return V;
4681 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4682 // be undef which can be handled with a single SLIDEDOWN/UP.
4683 int LoSrc, HiSrc;
4684 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
4685 if (Rotation > 0) {
4686 SDValue LoV, HiV;
4687 if (LoSrc >= 0) {
4688 LoV = LoSrc == 0 ? V1 : V2;
4689 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
4691 if (HiSrc >= 0) {
4692 HiV = HiSrc == 0 ? V1 : V2;
4693 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
4696 // We found a rotation. We need to slide HiV down by Rotation. Then we need
4697 // to slide LoV up by (NumElts - Rotation).
4698 unsigned InvRotate = NumElts - Rotation;
4700 SDValue Res = DAG.getUNDEF(ContainerVT);
4701 if (HiV) {
4702 // Even though we could use a smaller VL, don't to avoid a vsetivli
4703 // toggle.
4704 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
4705 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
4707 if (LoV)
4708 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
4709 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
4710 RISCVII::TAIL_AGNOSTIC);
4712 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4715 // If this is a deinterleave and we can widen the vector, then we can use
4716 // vnsrl to deinterleave.
4717 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
4718 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
4719 Subtarget, DAG);
4722 if (SDValue V =
4723 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
4724 return V;
4726 // Detect an interleave shuffle and lower to
4727 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4728 int EvenSrc, OddSrc;
4729 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
4730 // Extract the halves of the vectors.
4731 MVT HalfVT = VT.getHalfNumVectorElementsVT();
4733 int Size = Mask.size();
4734 SDValue EvenV, OddV;
4735 assert(EvenSrc >= 0 && "Undef source?");
4736 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
4737 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
4738 DAG.getConstant(EvenSrc % Size, DL, XLenVT));
4740 assert(OddSrc >= 0 && "Undef source?");
4741 OddV = (OddSrc / Size) == 0 ? V1 : V2;
4742 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
4743 DAG.getConstant(OddSrc % Size, DL, XLenVT));
4745 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
4748 // Detect shuffles which can be re-expressed as vector selects; these are
4749 // shuffles in which each element in the destination is taken from an element
4750 // at the corresponding index in either source vectors.
4751 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4752 int MaskIndex = MaskIdx.value();
4753 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4756 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4758 SmallVector<SDValue> MaskVals;
4759 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4760 // merged with a second vrgather.
4761 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4763 // By default we preserve the original operand order, and use a mask to
4764 // select LHS as true and RHS as false. However, since RVV vector selects may
4765 // feature splats but only on the LHS, we may choose to invert our mask and
4766 // instead select between RHS and LHS.
4767 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
4768 bool InvertMask = IsSelect == SwapOps;
4770 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4771 // half.
4772 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4774 // Now construct the mask that will be used by the vselect or blended
4775 // vrgather operation. For vrgathers, construct the appropriate indices into
4776 // each vector.
4777 for (int MaskIndex : Mask) {
4778 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
4779 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4780 if (!IsSelect) {
4781 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4782 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4783 ? DAG.getConstant(MaskIndex, DL, XLenVT)
4784 : DAG.getUNDEF(XLenVT));
4785 GatherIndicesRHS.push_back(
4786 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4787 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4788 if (IsLHSOrUndefIndex && MaskIndex >= 0)
4789 ++LHSIndexCounts[MaskIndex];
4790 if (!IsLHSOrUndefIndex)
4791 ++RHSIndexCounts[MaskIndex - NumElts];
4795 if (SwapOps) {
4796 std::swap(V1, V2);
4797 std::swap(GatherIndicesLHS, GatherIndicesRHS);
4800 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4801 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4802 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4804 if (IsSelect)
4805 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4807 // We might be able to express the shuffle as a bitrotate. But even if we
4808 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
4809 // shifts and a vor will have a higher throughput than a vrgather.
4810 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4811 return V;
4813 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
4814 // On such a large vector we're unable to use i8 as the index type.
4815 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4816 // may involve vector splitting if we're already at LMUL=8, or our
4817 // user-supplied maximum fixed-length LMUL.
4818 return SDValue();
4821 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
4822 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
4823 MVT IndexVT = VT.changeTypeToInteger();
4824 // Since we can't introduce illegal index types at this stage, use i16 and
4825 // vrgatherei16 if the corresponding index type for plain vrgather is greater
4826 // than XLenVT.
4827 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
4828 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4829 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4832 // If the mask allows, we can do all the index computation in 16 bits. This
4833 // requires less work and less register pressure at high LMUL, and creates
4834 // smaller constants which may be cheaper to materialize.
4835 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
4836 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
4837 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4838 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4841 MVT IndexContainerVT =
4842 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
4844 SDValue Gather;
4845 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
4846 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
4847 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
4848 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
4849 Subtarget);
4850 } else {
4851 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4852 // If only one index is used, we can use a "splat" vrgather.
4853 // TODO: We can splat the most-common index and fix-up any stragglers, if
4854 // that's beneficial.
4855 if (LHSIndexCounts.size() == 1) {
4856 int SplatIndex = LHSIndexCounts.begin()->getFirst();
4857 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
4858 DAG.getConstant(SplatIndex, DL, XLenVT),
4859 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4860 } else {
4861 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
4862 LHSIndices =
4863 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
4865 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
4866 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4870 // If a second vector operand is used by this shuffle, blend it in with an
4871 // additional vrgather.
4872 if (!V2.isUndef()) {
4873 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4875 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4876 SelectMask =
4877 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
4879 // If only one index is used, we can use a "splat" vrgather.
4880 // TODO: We can splat the most-common index and fix-up any stragglers, if
4881 // that's beneficial.
4882 if (RHSIndexCounts.size() == 1) {
4883 int SplatIndex = RHSIndexCounts.begin()->getFirst();
4884 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
4885 DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
4886 SelectMask, VL);
4887 } else {
4888 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
4889 RHSIndices =
4890 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
4891 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
4892 SelectMask, VL);
4896 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4899 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
4900 // Support splats for any type. These should type legalize well.
4901 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
4902 return true;
4904 // Only support legal VTs for other shuffles for now.
4905 if (!isTypeLegal(VT))
4906 return false;
4908 MVT SVT = VT.getSimpleVT();
4910 // Not for i1 vectors.
4911 if (SVT.getScalarType() == MVT::i1)
4912 return false;
4914 int Dummy1, Dummy2;
4915 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
4916 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
4919 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
4920 // the exponent.
4921 SDValue
4922 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
4923 SelectionDAG &DAG) const {
4924 MVT VT = Op.getSimpleValueType();
4925 unsigned EltSize = VT.getScalarSizeInBits();
4926 SDValue Src = Op.getOperand(0);
4927 SDLoc DL(Op);
4928 MVT ContainerVT = VT;
4930 SDValue Mask, VL;
4931 if (Op->isVPOpcode()) {
4932 Mask = Op.getOperand(1);
4933 if (VT.isFixedLengthVector())
4934 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
4935 Subtarget);
4936 VL = Op.getOperand(2);
4939 // We choose FP type that can represent the value if possible. Otherwise, we
4940 // use rounding to zero conversion for correct exponent of the result.
4941 // TODO: Use f16 for i8 when possible?
4942 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
4943 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
4944 FloatEltVT = MVT::f32;
4945 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
4947 // Legal types should have been checked in the RISCVTargetLowering
4948 // constructor.
4949 // TODO: Splitting may make sense in some cases.
4950 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
4951 "Expected legal float type!");
4953 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
4954 // The trailing zero count is equal to log2 of this single bit value.
4955 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
4956 SDValue Neg = DAG.getNegative(Src, DL, VT);
4957 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
4958 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
4959 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
4960 Src, Mask, VL);
4961 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
4964 // We have a legal FP type, convert to it.
4965 SDValue FloatVal;
4966 if (FloatVT.bitsGT(VT)) {
4967 if (Op->isVPOpcode())
4968 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
4969 else
4970 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
4971 } else {
4972 // Use RTZ to avoid rounding influencing exponent of FloatVal.
4973 if (VT.isFixedLengthVector()) {
4974 ContainerVT = getContainerForFixedLengthVector(VT);
4975 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4977 if (!Op->isVPOpcode())
4978 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4979 SDValue RTZRM =
4980 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
4981 MVT ContainerFloatVT =
4982 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
4983 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
4984 Src, Mask, RTZRM, VL);
4985 if (VT.isFixedLengthVector())
4986 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
4988 // Bitcast to integer and shift the exponent to the LSB.
4989 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
4990 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
4991 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
4993 SDValue Exp;
4994 // Restore back to original type. Truncation after SRL is to generate vnsrl.
4995 if (Op->isVPOpcode()) {
4996 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
4997 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
4998 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
4999 } else {
5000 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5001 DAG.getConstant(ShiftAmt, DL, IntVT));
5002 if (IntVT.bitsLT(VT))
5003 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5004 else if (IntVT.bitsGT(VT))
5005 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5008 // The exponent contains log2 of the value in biased form.
5009 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5010 // For trailing zeros, we just need to subtract the bias.
5011 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5012 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5013 DAG.getConstant(ExponentBias, DL, VT));
5014 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5015 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5016 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5018 // For leading zeros, we need to remove the bias and convert from log2 to
5019 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5020 unsigned Adjust = ExponentBias + (EltSize - 1);
5021 SDValue Res;
5022 if (Op->isVPOpcode())
5023 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5024 Mask, VL);
5025 else
5026 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5028 // The above result with zero input equals to Adjust which is greater than
5029 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5030 if (Op.getOpcode() == ISD::CTLZ)
5031 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5032 else if (Op.getOpcode() == ISD::VP_CTLZ)
5033 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5034 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5035 return Res;
5038 // While RVV has alignment restrictions, we should always be able to load as a
5039 // legal equivalently-sized byte-typed vector instead. This method is
5040 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5041 // the load is already correctly-aligned, it returns SDValue().
5042 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5043 SelectionDAG &DAG) const {
5044 auto *Load = cast<LoadSDNode>(Op);
5045 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5047 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5048 Load->getMemoryVT(),
5049 *Load->getMemOperand()))
5050 return SDValue();
5052 SDLoc DL(Op);
5053 MVT VT = Op.getSimpleValueType();
5054 unsigned EltSizeBits = VT.getScalarSizeInBits();
5055 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5056 "Unexpected unaligned RVV load type");
5057 MVT NewVT =
5058 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5059 assert(NewVT.isValid() &&
5060 "Expecting equally-sized RVV vector types to be legal");
5061 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5062 Load->getPointerInfo(), Load->getOriginalAlign(),
5063 Load->getMemOperand()->getFlags());
5064 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5067 // While RVV has alignment restrictions, we should always be able to store as a
5068 // legal equivalently-sized byte-typed vector instead. This method is
5069 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5070 // returns SDValue() if the store is already correctly aligned.
5071 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5072 SelectionDAG &DAG) const {
5073 auto *Store = cast<StoreSDNode>(Op);
5074 assert(Store && Store->getValue().getValueType().isVector() &&
5075 "Expected vector store");
5077 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5078 Store->getMemoryVT(),
5079 *Store->getMemOperand()))
5080 return SDValue();
5082 SDLoc DL(Op);
5083 SDValue StoredVal = Store->getValue();
5084 MVT VT = StoredVal.getSimpleValueType();
5085 unsigned EltSizeBits = VT.getScalarSizeInBits();
5086 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5087 "Unexpected unaligned RVV store type");
5088 MVT NewVT =
5089 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5090 assert(NewVT.isValid() &&
5091 "Expecting equally-sized RVV vector types to be legal");
5092 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5093 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5094 Store->getPointerInfo(), Store->getOriginalAlign(),
5095 Store->getMemOperand()->getFlags());
5098 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
5099 const RISCVSubtarget &Subtarget) {
5100 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5102 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5104 // All simm32 constants should be handled by isel.
5105 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5106 // this check redundant, but small immediates are common so this check
5107 // should have better compile time.
5108 if (isInt<32>(Imm))
5109 return Op;
5111 // We only need to cost the immediate, if constant pool lowering is enabled.
5112 if (!Subtarget.useConstantPoolForLargeInts())
5113 return Op;
5115 RISCVMatInt::InstSeq Seq =
5116 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
5117 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5118 return Op;
5120 // Optimizations below are disabled for opt size. If we're optimizing for
5121 // size, use a constant pool.
5122 if (DAG.shouldOptForSize())
5123 return SDValue();
5125 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5126 // that if it will avoid a constant pool.
5127 // It will require an extra temporary register though.
5128 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5129 // low and high 32 bits are the same and bit 31 and 63 are set.
5130 unsigned ShiftAmt, AddOpc;
5131 RISCVMatInt::InstSeq SeqLo = RISCVMatInt::generateTwoRegInstSeq(
5132 Imm, Subtarget.getFeatureBits(), ShiftAmt, AddOpc);
5133 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5134 return Op;
5136 return SDValue();
5139 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
5140 const RISCVSubtarget &Subtarget) {
5141 SDLoc dl(Op);
5142 AtomicOrdering FenceOrdering =
5143 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5144 SyncScope::ID FenceSSID =
5145 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5147 if (Subtarget.hasStdExtZtso()) {
5148 // The only fence that needs an instruction is a sequentially-consistent
5149 // cross-thread fence.
5150 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5151 FenceSSID == SyncScope::System)
5152 return Op;
5154 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5155 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5158 // singlethread fences only synchronize with signal handlers on the same
5159 // thread and thus only need to preserve instruction order, not actually
5160 // enforce memory ordering.
5161 if (FenceSSID == SyncScope::SingleThread)
5162 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5163 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5165 return Op;
5168 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5169 SelectionDAG &DAG) const {
5170 SDLoc DL(Op);
5171 MVT VT = Op.getSimpleValueType();
5172 MVT XLenVT = Subtarget.getXLenVT();
5173 unsigned Check = Op.getConstantOperandVal(1);
5174 unsigned TDCMask = 0;
5175 if (Check & fcSNan)
5176 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5177 if (Check & fcQNan)
5178 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5179 if (Check & fcPosInf)
5180 TDCMask |= RISCV::FPMASK_Positive_Infinity;
5181 if (Check & fcNegInf)
5182 TDCMask |= RISCV::FPMASK_Negative_Infinity;
5183 if (Check & fcPosNormal)
5184 TDCMask |= RISCV::FPMASK_Positive_Normal;
5185 if (Check & fcNegNormal)
5186 TDCMask |= RISCV::FPMASK_Negative_Normal;
5187 if (Check & fcPosSubnormal)
5188 TDCMask |= RISCV::FPMASK_Positive_Subnormal;
5189 if (Check & fcNegSubnormal)
5190 TDCMask |= RISCV::FPMASK_Negative_Subnormal;
5191 if (Check & fcPosZero)
5192 TDCMask |= RISCV::FPMASK_Positive_Zero;
5193 if (Check & fcNegZero)
5194 TDCMask |= RISCV::FPMASK_Negative_Zero;
5196 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5198 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5200 if (VT.isVector()) {
5201 SDValue Op0 = Op.getOperand(0);
5202 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5204 if (VT.isScalableVector()) {
5205 MVT DstVT = VT0.changeVectorElementTypeToInteger();
5206 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5207 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5208 Mask = Op.getOperand(2);
5209 VL = Op.getOperand(3);
5211 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5212 VL, Op->getFlags());
5213 if (IsOneBitMask)
5214 return DAG.getSetCC(DL, VT, FPCLASS,
5215 DAG.getConstant(TDCMask, DL, DstVT),
5216 ISD::CondCode::SETEQ);
5217 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5218 DAG.getConstant(TDCMask, DL, DstVT));
5219 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5220 ISD::SETNE);
5223 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5224 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5225 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5226 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5227 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5228 Mask = Op.getOperand(2);
5229 MVT MaskContainerVT =
5230 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5231 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5232 VL = Op.getOperand(3);
5234 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5236 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5237 Mask, VL, Op->getFlags());
5239 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5240 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5241 if (IsOneBitMask) {
5242 SDValue VMSEQ =
5243 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5244 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5245 DAG.getUNDEF(ContainerVT), Mask, VL});
5246 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5248 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5249 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5251 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5252 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5253 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5255 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5256 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5257 DAG.getUNDEF(ContainerVT), Mask, VL});
5258 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5261 SDValue FPCLASS =
5262 DAG.getNode(RISCVISD::FPCLASS, DL, XLenVT, Op.getOperand(0));
5263 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FPCLASS, TDCMaskV);
5264 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5265 ISD::CondCode::SETNE);
5266 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5269 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5270 // operations propagate nans.
5271 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
5272 const RISCVSubtarget &Subtarget) {
5273 SDLoc DL(Op);
5274 MVT VT = Op.getSimpleValueType();
5276 SDValue X = Op.getOperand(0);
5277 SDValue Y = Op.getOperand(1);
5279 if (!VT.isVector()) {
5280 MVT XLenVT = Subtarget.getXLenVT();
5282 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5283 // ensures that when one input is a nan, the other will also be a nan
5284 // allowing the nan to propagate. If both inputs are nan, this will swap the
5285 // inputs which is harmless.
5287 SDValue NewY = Y;
5288 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5289 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5290 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5293 SDValue NewX = X;
5294 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5295 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5296 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5299 unsigned Opc =
5300 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5301 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5304 // Check no NaNs before converting to fixed vector scalable.
5305 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5306 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5308 MVT ContainerVT = VT;
5309 if (VT.isFixedLengthVector()) {
5310 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5311 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5312 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5315 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5317 SDValue NewY = Y;
5318 if (!XIsNeverNan) {
5319 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5320 {X, X, DAG.getCondCode(ISD::SETOEQ),
5321 DAG.getUNDEF(ContainerVT), Mask, VL});
5322 NewY =
5323 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, XIsNonNan, Y, X, VL);
5326 SDValue NewX = X;
5327 if (!YIsNeverNan) {
5328 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5329 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5330 DAG.getUNDEF(ContainerVT), Mask, VL});
5331 NewX =
5332 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, YIsNonNan, X, Y, VL);
5335 unsigned Opc =
5336 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;
5337 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5338 DAG.getUNDEF(ContainerVT), Mask, VL);
5339 if (VT.isFixedLengthVector())
5340 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5341 return Res;
5344 /// Get a RISC-V target specified VL op for a given SDNode.
5345 static unsigned getRISCVVLOp(SDValue Op) {
5346 #define OP_CASE(NODE) \
5347 case ISD::NODE: \
5348 return RISCVISD::NODE##_VL;
5349 #define VP_CASE(NODE) \
5350 case ISD::VP_##NODE: \
5351 return RISCVISD::NODE##_VL;
5352 // clang-format off
5353 switch (Op.getOpcode()) {
5354 default:
5355 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5356 OP_CASE(ADD)
5357 OP_CASE(SUB)
5358 OP_CASE(MUL)
5359 OP_CASE(MULHS)
5360 OP_CASE(MULHU)
5361 OP_CASE(SDIV)
5362 OP_CASE(SREM)
5363 OP_CASE(UDIV)
5364 OP_CASE(UREM)
5365 OP_CASE(SHL)
5366 OP_CASE(SRA)
5367 OP_CASE(SRL)
5368 OP_CASE(ROTL)
5369 OP_CASE(ROTR)
5370 OP_CASE(BSWAP)
5371 OP_CASE(CTTZ)
5372 OP_CASE(CTLZ)
5373 OP_CASE(CTPOP)
5374 OP_CASE(BITREVERSE)
5375 OP_CASE(SADDSAT)
5376 OP_CASE(UADDSAT)
5377 OP_CASE(SSUBSAT)
5378 OP_CASE(USUBSAT)
5379 OP_CASE(FADD)
5380 OP_CASE(FSUB)
5381 OP_CASE(FMUL)
5382 OP_CASE(FDIV)
5383 OP_CASE(FNEG)
5384 OP_CASE(FABS)
5385 OP_CASE(FSQRT)
5386 OP_CASE(SMIN)
5387 OP_CASE(SMAX)
5388 OP_CASE(UMIN)
5389 OP_CASE(UMAX)
5390 OP_CASE(STRICT_FADD)
5391 OP_CASE(STRICT_FSUB)
5392 OP_CASE(STRICT_FMUL)
5393 OP_CASE(STRICT_FDIV)
5394 OP_CASE(STRICT_FSQRT)
5395 VP_CASE(ADD) // VP_ADD
5396 VP_CASE(SUB) // VP_SUB
5397 VP_CASE(MUL) // VP_MUL
5398 VP_CASE(SDIV) // VP_SDIV
5399 VP_CASE(SREM) // VP_SREM
5400 VP_CASE(UDIV) // VP_UDIV
5401 VP_CASE(UREM) // VP_UREM
5402 VP_CASE(SHL) // VP_SHL
5403 VP_CASE(FADD) // VP_FADD
5404 VP_CASE(FSUB) // VP_FSUB
5405 VP_CASE(FMUL) // VP_FMUL
5406 VP_CASE(FDIV) // VP_FDIV
5407 VP_CASE(FNEG) // VP_FNEG
5408 VP_CASE(FABS) // VP_FABS
5409 VP_CASE(SMIN) // VP_SMIN
5410 VP_CASE(SMAX) // VP_SMAX
5411 VP_CASE(UMIN) // VP_UMIN
5412 VP_CASE(UMAX) // VP_UMAX
5413 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5414 VP_CASE(SETCC) // VP_SETCC
5415 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5416 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5417 VP_CASE(BITREVERSE) // VP_BITREVERSE
5418 VP_CASE(BSWAP) // VP_BSWAP
5419 VP_CASE(CTLZ) // VP_CTLZ
5420 VP_CASE(CTTZ) // VP_CTTZ
5421 VP_CASE(CTPOP) // VP_CTPOP
5422 case ISD::CTLZ_ZERO_UNDEF:
5423 case ISD::VP_CTLZ_ZERO_UNDEF:
5424 return RISCVISD::CTLZ_VL;
5425 case ISD::CTTZ_ZERO_UNDEF:
5426 case ISD::VP_CTTZ_ZERO_UNDEF:
5427 return RISCVISD::CTTZ_VL;
5428 case ISD::FMA:
5429 case ISD::VP_FMA:
5430 return RISCVISD::VFMADD_VL;
5431 case ISD::STRICT_FMA:
5432 return RISCVISD::STRICT_VFMADD_VL;
5433 case ISD::AND:
5434 case ISD::VP_AND:
5435 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5436 return RISCVISD::VMAND_VL;
5437 return RISCVISD::AND_VL;
5438 case ISD::OR:
5439 case ISD::VP_OR:
5440 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5441 return RISCVISD::VMOR_VL;
5442 return RISCVISD::OR_VL;
5443 case ISD::XOR:
5444 case ISD::VP_XOR:
5445 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5446 return RISCVISD::VMXOR_VL;
5447 return RISCVISD::XOR_VL;
5448 case ISD::VP_SELECT:
5449 return RISCVISD::VSELECT_VL;
5450 case ISD::VP_MERGE:
5451 return RISCVISD::VP_MERGE_VL;
5452 case ISD::VP_ASHR:
5453 return RISCVISD::SRA_VL;
5454 case ISD::VP_LSHR:
5455 return RISCVISD::SRL_VL;
5456 case ISD::VP_SQRT:
5457 return RISCVISD::FSQRT_VL;
5458 case ISD::VP_SIGN_EXTEND:
5459 return RISCVISD::VSEXT_VL;
5460 case ISD::VP_ZERO_EXTEND:
5461 return RISCVISD::VZEXT_VL;
5462 case ISD::VP_FP_TO_SINT:
5463 return RISCVISD::VFCVT_RTZ_X_F_VL;
5464 case ISD::VP_FP_TO_UINT:
5465 return RISCVISD::VFCVT_RTZ_XU_F_VL;
5466 case ISD::FMINNUM:
5467 case ISD::VP_FMINNUM:
5468 return RISCVISD::VFMIN_VL;
5469 case ISD::FMAXNUM:
5470 case ISD::VP_FMAXNUM:
5471 return RISCVISD::VFMAX_VL;
5473 // clang-format on
5474 #undef OP_CASE
5475 #undef VP_CASE
5478 /// Return true if a RISC-V target specified op has a merge operand.
5479 static bool hasMergeOp(unsigned Opcode) {
5480 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5481 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5482 "not a RISC-V target specific op");
5483 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5484 125 &&
5485 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5486 ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5487 21 &&
5488 "adding target specific op should update this function");
5489 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5490 return true;
5491 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5492 return true;
5493 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5494 return true;
5495 if (Opcode == RISCVISD::SETCC_VL)
5496 return true;
5497 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5498 return true;
5499 return false;
5502 /// Return true if a RISC-V target specified op has a mask operand.
5503 static bool hasMaskOp(unsigned Opcode) {
5504 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5505 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5506 "not a RISC-V target specific op");
5507 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5508 125 &&
5509 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5510 ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5511 21 &&
5512 "adding target specific op should update this function");
5513 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5514 return true;
5515 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5516 return true;
5517 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5518 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
5519 return true;
5520 return false;
5523 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
5524 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5525 SDLoc DL(Op);
5527 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5528 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5530 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5531 if (!Op.getOperand(j).getValueType().isVector()) {
5532 LoOperands[j] = Op.getOperand(j);
5533 HiOperands[j] = Op.getOperand(j);
5534 continue;
5536 std::tie(LoOperands[j], HiOperands[j]) =
5537 DAG.SplitVector(Op.getOperand(j), DL);
5540 SDValue LoRes =
5541 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5542 SDValue HiRes =
5543 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5545 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5548 static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
5549 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5550 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5551 SDLoc DL(Op);
5553 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5554 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5556 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5557 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5558 std::tie(LoOperands[j], HiOperands[j]) =
5559 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5560 continue;
5562 if (!Op.getOperand(j).getValueType().isVector()) {
5563 LoOperands[j] = Op.getOperand(j);
5564 HiOperands[j] = Op.getOperand(j);
5565 continue;
5567 std::tie(LoOperands[j], HiOperands[j]) =
5568 DAG.SplitVector(Op.getOperand(j), DL);
5571 SDValue LoRes =
5572 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5573 SDValue HiRes =
5574 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5576 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5579 static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
5580 SDLoc DL(Op);
5582 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5583 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5584 auto [EVLLo, EVLHi] =
5585 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5587 SDValue ResLo =
5588 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5589 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5590 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5591 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5594 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
5595 SelectionDAG &DAG) const {
5596 switch (Op.getOpcode()) {
5597 default:
5598 report_fatal_error("unimplemented operand");
5599 case ISD::ATOMIC_FENCE:
5600 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
5601 case ISD::GlobalAddress:
5602 return lowerGlobalAddress(Op, DAG);
5603 case ISD::BlockAddress:
5604 return lowerBlockAddress(Op, DAG);
5605 case ISD::ConstantPool:
5606 return lowerConstantPool(Op, DAG);
5607 case ISD::JumpTable:
5608 return lowerJumpTable(Op, DAG);
5609 case ISD::GlobalTLSAddress:
5610 return lowerGlobalTLSAddress(Op, DAG);
5611 case ISD::Constant:
5612 return lowerConstant(Op, DAG, Subtarget);
5613 case ISD::SELECT:
5614 return lowerSELECT(Op, DAG);
5615 case ISD::BRCOND:
5616 return lowerBRCOND(Op, DAG);
5617 case ISD::VASTART:
5618 return lowerVASTART(Op, DAG);
5619 case ISD::FRAMEADDR:
5620 return lowerFRAMEADDR(Op, DAG);
5621 case ISD::RETURNADDR:
5622 return lowerRETURNADDR(Op, DAG);
5623 case ISD::SHL_PARTS:
5624 return lowerShiftLeftParts(Op, DAG);
5625 case ISD::SRA_PARTS:
5626 return lowerShiftRightParts(Op, DAG, true);
5627 case ISD::SRL_PARTS:
5628 return lowerShiftRightParts(Op, DAG, false);
5629 case ISD::ROTL:
5630 case ISD::ROTR:
5631 if (Op.getValueType().isFixedLengthVector()) {
5632 assert(Subtarget.hasStdExtZvkb());
5633 return lowerToScalableOp(Op, DAG);
5635 assert(Subtarget.hasVendorXTHeadBb() &&
5636 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
5637 "Unexpected custom legalization");
5638 // XTHeadBb only supports rotate by constant.
5639 if (!isa<ConstantSDNode>(Op.getOperand(1)))
5640 return SDValue();
5641 return Op;
5642 case ISD::BITCAST: {
5643 SDLoc DL(Op);
5644 EVT VT = Op.getValueType();
5645 SDValue Op0 = Op.getOperand(0);
5646 EVT Op0VT = Op0.getValueType();
5647 MVT XLenVT = Subtarget.getXLenVT();
5648 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
5649 Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
5650 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
5651 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
5652 return FPConv;
5654 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
5655 Subtarget.hasStdExtZfbfmin()) {
5656 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
5657 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
5658 return FPConv;
5660 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
5661 Subtarget.hasStdExtFOrZfinx()) {
5662 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5663 SDValue FPConv =
5664 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
5665 return FPConv;
5667 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
5668 Subtarget.hasStdExtZfa()) {
5669 SDValue Lo, Hi;
5670 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
5671 SDValue RetReg =
5672 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
5673 return RetReg;
5676 // Consider other scalar<->scalar casts as legal if the types are legal.
5677 // Otherwise expand them.
5678 if (!VT.isVector() && !Op0VT.isVector()) {
5679 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
5680 return Op;
5681 return SDValue();
5684 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
5685 "Unexpected types");
5687 if (VT.isFixedLengthVector()) {
5688 // We can handle fixed length vector bitcasts with a simple replacement
5689 // in isel.
5690 if (Op0VT.isFixedLengthVector())
5691 return Op;
5692 // When bitcasting from scalar to fixed-length vector, insert the scalar
5693 // into a one-element vector of the result type, and perform a vector
5694 // bitcast.
5695 if (!Op0VT.isVector()) {
5696 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
5697 if (!isTypeLegal(BVT))
5698 return SDValue();
5699 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
5700 DAG.getUNDEF(BVT), Op0,
5701 DAG.getConstant(0, DL, XLenVT)));
5703 return SDValue();
5705 // Custom-legalize bitcasts from fixed-length vector types to scalar types
5706 // thus: bitcast the vector to a one-element vector type whose element type
5707 // is the same as the result type, and extract the first element.
5708 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
5709 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
5710 if (!isTypeLegal(BVT))
5711 return SDValue();
5712 SDValue BVec = DAG.getBitcast(BVT, Op0);
5713 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
5714 DAG.getConstant(0, DL, XLenVT));
5716 return SDValue();
5718 case ISD::INTRINSIC_WO_CHAIN:
5719 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
5720 case ISD::INTRINSIC_W_CHAIN:
5721 return LowerINTRINSIC_W_CHAIN(Op, DAG);
5722 case ISD::INTRINSIC_VOID:
5723 return LowerINTRINSIC_VOID(Op, DAG);
5724 case ISD::IS_FPCLASS:
5725 return LowerIS_FPCLASS(Op, DAG);
5726 case ISD::BITREVERSE: {
5727 MVT VT = Op.getSimpleValueType();
5728 if (VT.isFixedLengthVector()) {
5729 assert(Subtarget.hasStdExtZvbb());
5730 return lowerToScalableOp(Op, DAG);
5732 SDLoc DL(Op);
5733 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
5734 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
5735 // Expand bitreverse to a bswap(rev8) followed by brev8.
5736 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
5737 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
5739 case ISD::TRUNCATE:
5740 // Only custom-lower vector truncates
5741 if (!Op.getSimpleValueType().isVector())
5742 return Op;
5743 return lowerVectorTruncLike(Op, DAG);
5744 case ISD::ANY_EXTEND:
5745 case ISD::ZERO_EXTEND:
5746 if (Op.getOperand(0).getValueType().isVector() &&
5747 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5748 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
5749 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
5750 case ISD::SIGN_EXTEND:
5751 if (Op.getOperand(0).getValueType().isVector() &&
5752 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5753 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
5754 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
5755 case ISD::SPLAT_VECTOR_PARTS:
5756 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
5757 case ISD::INSERT_VECTOR_ELT:
5758 return lowerINSERT_VECTOR_ELT(Op, DAG);
5759 case ISD::EXTRACT_VECTOR_ELT:
5760 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5761 case ISD::SCALAR_TO_VECTOR: {
5762 MVT VT = Op.getSimpleValueType();
5763 SDLoc DL(Op);
5764 SDValue Scalar = Op.getOperand(0);
5765 if (VT.getVectorElementType() == MVT::i1) {
5766 MVT WideVT = VT.changeVectorElementType(MVT::i8);
5767 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
5768 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
5770 MVT ContainerVT = VT;
5771 if (VT.isFixedLengthVector())
5772 ContainerVT = getContainerForFixedLengthVector(VT);
5773 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5774 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
5775 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
5776 DAG.getUNDEF(ContainerVT), Scalar, VL);
5777 if (VT.isFixedLengthVector())
5778 V = convertFromScalableVector(VT, V, DAG, Subtarget);
5779 return V;
5781 case ISD::VSCALE: {
5782 MVT XLenVT = Subtarget.getXLenVT();
5783 MVT VT = Op.getSimpleValueType();
5784 SDLoc DL(Op);
5785 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
5786 // We define our scalable vector types for lmul=1 to use a 64 bit known
5787 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
5788 // vscale as VLENB / 8.
5789 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
5790 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
5791 report_fatal_error("Support for VLEN==32 is incomplete.");
5792 // We assume VLENB is a multiple of 8. We manually choose the best shift
5793 // here because SimplifyDemandedBits isn't always able to simplify it.
5794 uint64_t Val = Op.getConstantOperandVal(0);
5795 if (isPowerOf2_64(Val)) {
5796 uint64_t Log2 = Log2_64(Val);
5797 if (Log2 < 3)
5798 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
5799 DAG.getConstant(3 - Log2, DL, VT));
5800 else if (Log2 > 3)
5801 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
5802 DAG.getConstant(Log2 - 3, DL, XLenVT));
5803 } else if ((Val % 8) == 0) {
5804 // If the multiplier is a multiple of 8, scale it down to avoid needing
5805 // to shift the VLENB value.
5806 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
5807 DAG.getConstant(Val / 8, DL, XLenVT));
5808 } else {
5809 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
5810 DAG.getConstant(3, DL, XLenVT));
5811 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
5812 DAG.getConstant(Val, DL, XLenVT));
5814 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5816 case ISD::FPOWI: {
5817 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
5818 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
5819 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
5820 Op.getOperand(1).getValueType() == MVT::i32) {
5821 SDLoc DL(Op);
5822 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
5823 SDValue Powi =
5824 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
5825 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
5826 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5828 return SDValue();
5830 case ISD::FMAXIMUM:
5831 case ISD::FMINIMUM:
5832 if (Op.getValueType() == MVT::nxv32f16 &&
5833 (Subtarget.hasVInstructionsF16Minimal() &&
5834 !Subtarget.hasVInstructionsF16()))
5835 return SplitVectorOp(Op, DAG);
5836 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
5837 case ISD::FP_EXTEND: {
5838 SDLoc DL(Op);
5839 EVT VT = Op.getValueType();
5840 SDValue Op0 = Op.getOperand(0);
5841 EVT Op0VT = Op0.getValueType();
5842 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
5843 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
5844 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
5845 SDValue FloatVal =
5846 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
5847 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
5850 if (!Op.getValueType().isVector())
5851 return Op;
5852 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5854 case ISD::FP_ROUND: {
5855 SDLoc DL(Op);
5856 EVT VT = Op.getValueType();
5857 SDValue Op0 = Op.getOperand(0);
5858 EVT Op0VT = Op0.getValueType();
5859 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
5860 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
5861 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
5862 Subtarget.hasStdExtDOrZdinx()) {
5863 SDValue FloatVal =
5864 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
5865 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5866 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
5869 if (!Op.getValueType().isVector())
5870 return Op;
5871 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5873 case ISD::STRICT_FP_ROUND:
5874 case ISD::STRICT_FP_EXTEND:
5875 return lowerStrictFPExtendOrRoundLike(Op, DAG);
5876 case ISD::SINT_TO_FP:
5877 case ISD::UINT_TO_FP:
5878 if (Op.getValueType().isVector() &&
5879 Op.getValueType().getScalarType() == MVT::f16 &&
5880 (Subtarget.hasVInstructionsF16Minimal() &&
5881 !Subtarget.hasVInstructionsF16())) {
5882 if (Op.getValueType() == MVT::nxv32f16)
5883 return SplitVectorOp(Op, DAG);
5884 // int -> f32
5885 SDLoc DL(Op);
5886 MVT NVT =
5887 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
5888 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
5889 // f32 -> f16
5890 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
5891 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5893 [[fallthrough]];
5894 case ISD::FP_TO_SINT:
5895 case ISD::FP_TO_UINT:
5896 if (SDValue Op1 = Op.getOperand(0);
5897 Op1.getValueType().isVector() &&
5898 Op1.getValueType().getScalarType() == MVT::f16 &&
5899 (Subtarget.hasVInstructionsF16Minimal() &&
5900 !Subtarget.hasVInstructionsF16())) {
5901 if (Op1.getValueType() == MVT::nxv32f16)
5902 return SplitVectorOp(Op, DAG);
5903 // f16 -> f32
5904 SDLoc DL(Op);
5905 MVT NVT = MVT::getVectorVT(MVT::f32,
5906 Op1.getValueType().getVectorElementCount());
5907 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
5908 // f32 -> int
5909 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
5911 [[fallthrough]];
5912 case ISD::STRICT_FP_TO_SINT:
5913 case ISD::STRICT_FP_TO_UINT:
5914 case ISD::STRICT_SINT_TO_FP:
5915 case ISD::STRICT_UINT_TO_FP: {
5916 // RVV can only do fp<->int conversions to types half/double the size as
5917 // the source. We custom-lower any conversions that do two hops into
5918 // sequences.
5919 MVT VT = Op.getSimpleValueType();
5920 if (!VT.isVector())
5921 return Op;
5922 SDLoc DL(Op);
5923 bool IsStrict = Op->isStrictFPOpcode();
5924 SDValue Src = Op.getOperand(0 + IsStrict);
5925 MVT EltVT = VT.getVectorElementType();
5926 MVT SrcVT = Src.getSimpleValueType();
5927 MVT SrcEltVT = SrcVT.getVectorElementType();
5928 unsigned EltSize = EltVT.getSizeInBits();
5929 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
5930 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
5931 "Unexpected vector element types");
5933 bool IsInt2FP = SrcEltVT.isInteger();
5934 // Widening conversions
5935 if (EltSize > (2 * SrcEltSize)) {
5936 if (IsInt2FP) {
5937 // Do a regular integer sign/zero extension then convert to float.
5938 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
5939 VT.getVectorElementCount());
5940 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
5941 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
5942 ? ISD::ZERO_EXTEND
5943 : ISD::SIGN_EXTEND;
5944 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
5945 if (IsStrict)
5946 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
5947 Op.getOperand(0), Ext);
5948 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
5950 // FP2Int
5951 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
5952 // Do one doubling fp_extend then complete the operation by converting
5953 // to int.
5954 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5955 if (IsStrict) {
5956 auto [FExt, Chain] =
5957 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
5958 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
5960 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
5961 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
5964 // Narrowing conversions
5965 if (SrcEltSize > (2 * EltSize)) {
5966 if (IsInt2FP) {
5967 // One narrowing int_to_fp, then an fp_round.
5968 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
5969 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5970 if (IsStrict) {
5971 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
5972 DAG.getVTList(InterimFVT, MVT::Other),
5973 Op.getOperand(0), Src);
5974 SDValue Chain = Int2FP.getValue(1);
5975 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
5977 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
5978 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
5980 // FP2Int
5981 // One narrowing fp_to_int, then truncate the integer. If the float isn't
5982 // representable by the integer, the result is poison.
5983 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
5984 VT.getVectorElementCount());
5985 if (IsStrict) {
5986 SDValue FP2Int =
5987 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
5988 Op.getOperand(0), Src);
5989 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5990 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
5992 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
5993 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5996 // Scalable vectors can exit here. Patterns will handle equally-sized
5997 // conversions halving/doubling ones.
5998 if (!VT.isFixedLengthVector())
5999 return Op;
6001 // For fixed-length vectors we lower to a custom "VL" node.
6002 unsigned RVVOpc = 0;
6003 switch (Op.getOpcode()) {
6004 default:
6005 llvm_unreachable("Impossible opcode");
6006 case ISD::FP_TO_SINT:
6007 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
6008 break;
6009 case ISD::FP_TO_UINT:
6010 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
6011 break;
6012 case ISD::SINT_TO_FP:
6013 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6014 break;
6015 case ISD::UINT_TO_FP:
6016 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6017 break;
6018 case ISD::STRICT_FP_TO_SINT:
6019 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
6020 break;
6021 case ISD::STRICT_FP_TO_UINT:
6022 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
6023 break;
6024 case ISD::STRICT_SINT_TO_FP:
6025 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
6026 break;
6027 case ISD::STRICT_UINT_TO_FP:
6028 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
6029 break;
6032 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6033 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6034 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6035 "Expected same element count");
6037 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6039 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6040 if (IsStrict) {
6041 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6042 Op.getOperand(0), Src, Mask, VL);
6043 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6044 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6046 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6047 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6049 case ISD::FP_TO_SINT_SAT:
6050 case ISD::FP_TO_UINT_SAT:
6051 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6052 case ISD::FP_TO_BF16: {
6053 // Custom lower to ensure the libcall return is passed in an FPR on hard
6054 // float ABIs.
6055 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6056 SDLoc DL(Op);
6057 MakeLibCallOptions CallOptions;
6058 RTLIB::Libcall LC =
6059 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6060 SDValue Res =
6061 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6062 if (Subtarget.is64Bit() && !RV64LegalI32)
6063 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6064 return DAG.getBitcast(MVT::i32, Res);
6066 case ISD::BF16_TO_FP: {
6067 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6068 MVT VT = Op.getSimpleValueType();
6069 SDLoc DL(Op);
6070 Op = DAG.getNode(
6071 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6072 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6073 SDValue Res = Subtarget.is64Bit()
6074 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6075 : DAG.getBitcast(MVT::f32, Op);
6076 // fp_extend if the target VT is bigger than f32.
6077 if (VT != MVT::f32)
6078 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6079 return Res;
6081 case ISD::FP_TO_FP16: {
6082 // Custom lower to ensure the libcall return is passed in an FPR on hard
6083 // float ABIs.
6084 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6085 SDLoc DL(Op);
6086 MakeLibCallOptions CallOptions;
6087 RTLIB::Libcall LC =
6088 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6089 SDValue Res =
6090 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6091 if (Subtarget.is64Bit() && !RV64LegalI32)
6092 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6093 return DAG.getBitcast(MVT::i32, Res);
6095 case ISD::FP16_TO_FP: {
6096 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6097 // float ABIs.
6098 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6099 SDLoc DL(Op);
6100 MakeLibCallOptions CallOptions;
6101 SDValue Arg = Subtarget.is64Bit()
6102 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6103 Op.getOperand(0))
6104 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6105 SDValue Res =
6106 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6107 .first;
6108 return Res;
6110 case ISD::FTRUNC:
6111 case ISD::FCEIL:
6112 case ISD::FFLOOR:
6113 case ISD::FNEARBYINT:
6114 case ISD::FRINT:
6115 case ISD::FROUND:
6116 case ISD::FROUNDEVEN:
6117 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6118 case ISD::LRINT:
6119 case ISD::LLRINT:
6120 return lowerVectorXRINT(Op, DAG, Subtarget);
6121 case ISD::VECREDUCE_ADD:
6122 case ISD::VECREDUCE_UMAX:
6123 case ISD::VECREDUCE_SMAX:
6124 case ISD::VECREDUCE_UMIN:
6125 case ISD::VECREDUCE_SMIN:
6126 return lowerVECREDUCE(Op, DAG);
6127 case ISD::VECREDUCE_AND:
6128 case ISD::VECREDUCE_OR:
6129 case ISD::VECREDUCE_XOR:
6130 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6131 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6132 return lowerVECREDUCE(Op, DAG);
6133 case ISD::VECREDUCE_FADD:
6134 case ISD::VECREDUCE_SEQ_FADD:
6135 case ISD::VECREDUCE_FMIN:
6136 case ISD::VECREDUCE_FMAX:
6137 return lowerFPVECREDUCE(Op, DAG);
6138 case ISD::VP_REDUCE_ADD:
6139 case ISD::VP_REDUCE_UMAX:
6140 case ISD::VP_REDUCE_SMAX:
6141 case ISD::VP_REDUCE_UMIN:
6142 case ISD::VP_REDUCE_SMIN:
6143 case ISD::VP_REDUCE_FADD:
6144 case ISD::VP_REDUCE_SEQ_FADD:
6145 case ISD::VP_REDUCE_FMIN:
6146 case ISD::VP_REDUCE_FMAX:
6147 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6148 (Subtarget.hasVInstructionsF16Minimal() &&
6149 !Subtarget.hasVInstructionsF16()))
6150 return SplitVectorReductionOp(Op, DAG);
6151 return lowerVPREDUCE(Op, DAG);
6152 case ISD::VP_REDUCE_AND:
6153 case ISD::VP_REDUCE_OR:
6154 case ISD::VP_REDUCE_XOR:
6155 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6156 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6157 return lowerVPREDUCE(Op, DAG);
6158 case ISD::UNDEF: {
6159 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6160 return convertFromScalableVector(Op.getSimpleValueType(),
6161 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6163 case ISD::INSERT_SUBVECTOR:
6164 return lowerINSERT_SUBVECTOR(Op, DAG);
6165 case ISD::EXTRACT_SUBVECTOR:
6166 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6167 case ISD::VECTOR_DEINTERLEAVE:
6168 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6169 case ISD::VECTOR_INTERLEAVE:
6170 return lowerVECTOR_INTERLEAVE(Op, DAG);
6171 case ISD::STEP_VECTOR:
6172 return lowerSTEP_VECTOR(Op, DAG);
6173 case ISD::VECTOR_REVERSE:
6174 return lowerVECTOR_REVERSE(Op, DAG);
6175 case ISD::VECTOR_SPLICE:
6176 return lowerVECTOR_SPLICE(Op, DAG);
6177 case ISD::BUILD_VECTOR:
6178 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6179 case ISD::SPLAT_VECTOR:
6180 if (Op.getValueType().getScalarType() == MVT::f16 &&
6181 (Subtarget.hasVInstructionsF16Minimal() &&
6182 !Subtarget.hasVInstructionsF16())) {
6183 if (Op.getValueType() == MVT::nxv32f16)
6184 return SplitVectorOp(Op, DAG);
6185 SDLoc DL(Op);
6186 SDValue NewScalar =
6187 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6188 SDValue NewSplat = DAG.getNode(
6189 ISD::SPLAT_VECTOR, DL,
6190 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6191 NewScalar);
6192 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6193 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6195 if (Op.getValueType().getVectorElementType() == MVT::i1)
6196 return lowerVectorMaskSplat(Op, DAG);
6197 return SDValue();
6198 case ISD::VECTOR_SHUFFLE:
6199 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6200 case ISD::CONCAT_VECTORS: {
6201 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6202 // better than going through the stack, as the default expansion does.
6203 SDLoc DL(Op);
6204 MVT VT = Op.getSimpleValueType();
6205 unsigned NumOpElts =
6206 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6207 SDValue Vec = DAG.getUNDEF(VT);
6208 for (const auto &OpIdx : enumerate(Op->ops())) {
6209 SDValue SubVec = OpIdx.value();
6210 // Don't insert undef subvectors.
6211 if (SubVec.isUndef())
6212 continue;
6213 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6214 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
6216 return Vec;
6218 case ISD::LOAD:
6219 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6220 return V;
6221 if (Op.getValueType().isFixedLengthVector())
6222 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6223 return Op;
6224 case ISD::STORE:
6225 if (auto V = expandUnalignedRVVStore(Op, DAG))
6226 return V;
6227 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6228 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6229 return Op;
6230 case ISD::MLOAD:
6231 case ISD::VP_LOAD:
6232 return lowerMaskedLoad(Op, DAG);
6233 case ISD::MSTORE:
6234 case ISD::VP_STORE:
6235 return lowerMaskedStore(Op, DAG);
6236 case ISD::SELECT_CC: {
6237 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6238 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6239 // into separate SETCC+SELECT just like LegalizeDAG.
6240 SDValue Tmp1 = Op.getOperand(0);
6241 SDValue Tmp2 = Op.getOperand(1);
6242 SDValue True = Op.getOperand(2);
6243 SDValue False = Op.getOperand(3);
6244 EVT VT = Op.getValueType();
6245 SDValue CC = Op.getOperand(4);
6246 EVT CmpVT = Tmp1.getValueType();
6247 EVT CCVT =
6248 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6249 SDLoc DL(Op);
6250 SDValue Cond =
6251 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6252 return DAG.getSelect(DL, VT, Cond, True, False);
6254 case ISD::SETCC: {
6255 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6256 if (OpVT.isScalarInteger()) {
6257 MVT VT = Op.getSimpleValueType();
6258 SDValue LHS = Op.getOperand(0);
6259 SDValue RHS = Op.getOperand(1);
6260 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6261 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6262 "Unexpected CondCode");
6264 SDLoc DL(Op);
6266 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6267 // convert this to the equivalent of (set(u)ge X, C+1) by using
6268 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6269 // in a register.
6270 if (isa<ConstantSDNode>(RHS)) {
6271 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6272 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6273 // If this is an unsigned compare and the constant is -1, incrementing
6274 // the constant would change behavior. The result should be false.
6275 if (CCVal == ISD::SETUGT && Imm == -1)
6276 return DAG.getConstant(0, DL, VT);
6277 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6278 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6279 SDValue SetCC = DAG.getSetCC(
6280 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6281 return DAG.getLogicalNOT(DL, SetCC, VT);
6285 // Not a constant we could handle, swap the operands and condition code to
6286 // SETLT/SETULT.
6287 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6288 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6291 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6292 (Subtarget.hasVInstructionsF16Minimal() &&
6293 !Subtarget.hasVInstructionsF16()))
6294 return SplitVectorOp(Op, DAG);
6296 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6298 case ISD::ADD:
6299 case ISD::SUB:
6300 case ISD::MUL:
6301 case ISD::MULHS:
6302 case ISD::MULHU:
6303 case ISD::AND:
6304 case ISD::OR:
6305 case ISD::XOR:
6306 case ISD::SDIV:
6307 case ISD::SREM:
6308 case ISD::UDIV:
6309 case ISD::UREM:
6310 case ISD::BSWAP:
6311 case ISD::CTPOP:
6312 return lowerToScalableOp(Op, DAG);
6313 case ISD::SHL:
6314 case ISD::SRA:
6315 case ISD::SRL:
6316 if (Op.getSimpleValueType().isFixedLengthVector())
6317 return lowerToScalableOp(Op, DAG);
6318 // This can be called for an i32 shift amount that needs to be promoted.
6319 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6320 "Unexpected custom legalisation");
6321 return SDValue();
6322 case ISD::FADD:
6323 case ISD::FSUB:
6324 case ISD::FMUL:
6325 case ISD::FDIV:
6326 case ISD::FNEG:
6327 case ISD::FABS:
6328 case ISD::FSQRT:
6329 case ISD::FMA:
6330 case ISD::FMINNUM:
6331 case ISD::FMAXNUM:
6332 if (Op.getValueType() == MVT::nxv32f16 &&
6333 (Subtarget.hasVInstructionsF16Minimal() &&
6334 !Subtarget.hasVInstructionsF16()))
6335 return SplitVectorOp(Op, DAG);
6336 [[fallthrough]];
6337 case ISD::SADDSAT:
6338 case ISD::UADDSAT:
6339 case ISD::SSUBSAT:
6340 case ISD::USUBSAT:
6341 case ISD::SMIN:
6342 case ISD::SMAX:
6343 case ISD::UMIN:
6344 case ISD::UMAX:
6345 return lowerToScalableOp(Op, DAG);
6346 case ISD::ABS:
6347 case ISD::VP_ABS:
6348 return lowerABS(Op, DAG);
6349 case ISD::CTLZ:
6350 case ISD::CTLZ_ZERO_UNDEF:
6351 case ISD::CTTZ:
6352 case ISD::CTTZ_ZERO_UNDEF:
6353 if (Subtarget.hasStdExtZvbb())
6354 return lowerToScalableOp(Op, DAG);
6355 assert(Op.getOpcode() != ISD::CTTZ);
6356 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6357 case ISD::VSELECT:
6358 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6359 case ISD::FCOPYSIGN:
6360 if (Op.getValueType() == MVT::nxv32f16 &&
6361 (Subtarget.hasVInstructionsF16Minimal() &&
6362 !Subtarget.hasVInstructionsF16()))
6363 return SplitVectorOp(Op, DAG);
6364 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6365 case ISD::STRICT_FADD:
6366 case ISD::STRICT_FSUB:
6367 case ISD::STRICT_FMUL:
6368 case ISD::STRICT_FDIV:
6369 case ISD::STRICT_FSQRT:
6370 case ISD::STRICT_FMA:
6371 return lowerToScalableOp(Op, DAG);
6372 case ISD::STRICT_FSETCC:
6373 case ISD::STRICT_FSETCCS:
6374 return lowerVectorStrictFSetcc(Op, DAG);
6375 case ISD::STRICT_FCEIL:
6376 case ISD::STRICT_FRINT:
6377 case ISD::STRICT_FFLOOR:
6378 case ISD::STRICT_FTRUNC:
6379 case ISD::STRICT_FNEARBYINT:
6380 case ISD::STRICT_FROUND:
6381 case ISD::STRICT_FROUNDEVEN:
6382 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6383 case ISD::MGATHER:
6384 case ISD::VP_GATHER:
6385 return lowerMaskedGather(Op, DAG);
6386 case ISD::MSCATTER:
6387 case ISD::VP_SCATTER:
6388 return lowerMaskedScatter(Op, DAG);
6389 case ISD::GET_ROUNDING:
6390 return lowerGET_ROUNDING(Op, DAG);
6391 case ISD::SET_ROUNDING:
6392 return lowerSET_ROUNDING(Op, DAG);
6393 case ISD::EH_DWARF_CFA:
6394 return lowerEH_DWARF_CFA(Op, DAG);
6395 case ISD::VP_SELECT:
6396 case ISD::VP_MERGE:
6397 case ISD::VP_ADD:
6398 case ISD::VP_SUB:
6399 case ISD::VP_MUL:
6400 case ISD::VP_SDIV:
6401 case ISD::VP_UDIV:
6402 case ISD::VP_SREM:
6403 case ISD::VP_UREM:
6404 return lowerVPOp(Op, DAG);
6405 case ISD::VP_AND:
6406 case ISD::VP_OR:
6407 case ISD::VP_XOR:
6408 return lowerLogicVPOp(Op, DAG);
6409 case ISD::VP_FADD:
6410 case ISD::VP_FSUB:
6411 case ISD::VP_FMUL:
6412 case ISD::VP_FDIV:
6413 case ISD::VP_FNEG:
6414 case ISD::VP_FABS:
6415 case ISD::VP_SQRT:
6416 case ISD::VP_FMA:
6417 case ISD::VP_FMINNUM:
6418 case ISD::VP_FMAXNUM:
6419 case ISD::VP_FCOPYSIGN:
6420 if (Op.getValueType() == MVT::nxv32f16 &&
6421 (Subtarget.hasVInstructionsF16Minimal() &&
6422 !Subtarget.hasVInstructionsF16()))
6423 return SplitVPOp(Op, DAG);
6424 [[fallthrough]];
6425 case ISD::VP_ASHR:
6426 case ISD::VP_LSHR:
6427 case ISD::VP_SHL:
6428 return lowerVPOp(Op, DAG);
6429 case ISD::VP_IS_FPCLASS:
6430 return LowerIS_FPCLASS(Op, DAG);
6431 case ISD::VP_SIGN_EXTEND:
6432 case ISD::VP_ZERO_EXTEND:
6433 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6434 return lowerVPExtMaskOp(Op, DAG);
6435 return lowerVPOp(Op, DAG);
6436 case ISD::VP_TRUNCATE:
6437 return lowerVectorTruncLike(Op, DAG);
6438 case ISD::VP_FP_EXTEND:
6439 case ISD::VP_FP_ROUND:
6440 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6441 case ISD::VP_SINT_TO_FP:
6442 case ISD::VP_UINT_TO_FP:
6443 if (Op.getValueType().isVector() &&
6444 Op.getValueType().getScalarType() == MVT::f16 &&
6445 (Subtarget.hasVInstructionsF16Minimal() &&
6446 !Subtarget.hasVInstructionsF16())) {
6447 if (Op.getValueType() == MVT::nxv32f16)
6448 return SplitVPOp(Op, DAG);
6449 // int -> f32
6450 SDLoc DL(Op);
6451 MVT NVT =
6452 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6453 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6454 // f32 -> f16
6455 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6456 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6458 [[fallthrough]];
6459 case ISD::VP_FP_TO_SINT:
6460 case ISD::VP_FP_TO_UINT:
6461 if (SDValue Op1 = Op.getOperand(0);
6462 Op1.getValueType().isVector() &&
6463 Op1.getValueType().getScalarType() == MVT::f16 &&
6464 (Subtarget.hasVInstructionsF16Minimal() &&
6465 !Subtarget.hasVInstructionsF16())) {
6466 if (Op1.getValueType() == MVT::nxv32f16)
6467 return SplitVPOp(Op, DAG);
6468 // f16 -> f32
6469 SDLoc DL(Op);
6470 MVT NVT = MVT::getVectorVT(MVT::f32,
6471 Op1.getValueType().getVectorElementCount());
6472 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6473 // f32 -> int
6474 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6475 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6477 return lowerVPFPIntConvOp(Op, DAG);
6478 case ISD::VP_SETCC:
6479 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6480 (Subtarget.hasVInstructionsF16Minimal() &&
6481 !Subtarget.hasVInstructionsF16()))
6482 return SplitVPOp(Op, DAG);
6483 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6484 return lowerVPSetCCMaskOp(Op, DAG);
6485 [[fallthrough]];
6486 case ISD::VP_SMIN:
6487 case ISD::VP_SMAX:
6488 case ISD::VP_UMIN:
6489 case ISD::VP_UMAX:
6490 case ISD::VP_BITREVERSE:
6491 case ISD::VP_BSWAP:
6492 return lowerVPOp(Op, DAG);
6493 case ISD::VP_CTLZ:
6494 case ISD::VP_CTLZ_ZERO_UNDEF:
6495 if (Subtarget.hasStdExtZvbb())
6496 return lowerVPOp(Op, DAG);
6497 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6498 case ISD::VP_CTTZ:
6499 case ISD::VP_CTTZ_ZERO_UNDEF:
6500 if (Subtarget.hasStdExtZvbb())
6501 return lowerVPOp(Op, DAG);
6502 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6503 case ISD::VP_CTPOP:
6504 return lowerVPOp(Op, DAG);
6505 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
6506 return lowerVPStridedLoad(Op, DAG);
6507 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
6508 return lowerVPStridedStore(Op, DAG);
6509 case ISD::VP_FCEIL:
6510 case ISD::VP_FFLOOR:
6511 case ISD::VP_FRINT:
6512 case ISD::VP_FNEARBYINT:
6513 case ISD::VP_FROUND:
6514 case ISD::VP_FROUNDEVEN:
6515 case ISD::VP_FROUNDTOZERO:
6516 if (Op.getValueType() == MVT::nxv32f16 &&
6517 (Subtarget.hasVInstructionsF16Minimal() &&
6518 !Subtarget.hasVInstructionsF16()))
6519 return SplitVPOp(Op, DAG);
6520 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6524 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
6525 SelectionDAG &DAG, unsigned Flags) {
6526 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
6529 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
6530 SelectionDAG &DAG, unsigned Flags) {
6531 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
6532 Flags);
6535 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
6536 SelectionDAG &DAG, unsigned Flags) {
6537 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
6538 N->getOffset(), Flags);
6541 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
6542 SelectionDAG &DAG, unsigned Flags) {
6543 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
6546 template <class NodeTy>
6547 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6548 bool IsLocal, bool IsExternWeak) const {
6549 SDLoc DL(N);
6550 EVT Ty = getPointerTy(DAG.getDataLayout());
6552 // When HWASAN is used and tagging of global variables is enabled
6553 // they should be accessed via the GOT, since the tagged address of a global
6554 // is incompatible with existing code models. This also applies to non-pic
6555 // mode.
6556 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
6557 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
6558 if (IsLocal && !Subtarget.allowTaggedGlobals())
6559 // Use PC-relative addressing to access the symbol. This generates the
6560 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
6561 // %pcrel_lo(auipc)).
6562 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
6564 // Use PC-relative addressing to access the GOT for this symbol, then load
6565 // the address from the GOT. This generates the pattern (PseudoLGA sym),
6566 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6567 SDValue Load =
6568 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
6569 MachineFunction &MF = DAG.getMachineFunction();
6570 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6571 MachinePointerInfo::getGOT(MF),
6572 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6573 MachineMemOperand::MOInvariant,
6574 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6575 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6576 return Load;
6579 switch (getTargetMachine().getCodeModel()) {
6580 default:
6581 report_fatal_error("Unsupported code model for lowering");
6582 case CodeModel::Small: {
6583 // Generate a sequence for accessing addresses within the first 2 GiB of
6584 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
6585 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
6586 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
6587 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
6588 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
6590 case CodeModel::Medium: {
6591 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
6592 if (IsExternWeak) {
6593 // An extern weak symbol may be undefined, i.e. have value 0, which may
6594 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
6595 // symbol. This generates the pattern (PseudoLGA sym), which expands to
6596 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6597 SDValue Load =
6598 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
6599 MachineFunction &MF = DAG.getMachineFunction();
6600 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6601 MachinePointerInfo::getGOT(MF),
6602 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6603 MachineMemOperand::MOInvariant,
6604 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6605 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6606 return Load;
6609 // Generate a sequence for accessing addresses within any 2GiB range within
6610 // the address space. This generates the pattern (PseudoLLA sym), which
6611 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
6612 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
6617 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
6618 SelectionDAG &DAG) const {
6619 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
6620 assert(N->getOffset() == 0 && "unexpected offset in global node");
6621 const GlobalValue *GV = N->getGlobal();
6622 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
6625 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
6626 SelectionDAG &DAG) const {
6627 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
6629 return getAddr(N, DAG);
6632 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
6633 SelectionDAG &DAG) const {
6634 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
6636 return getAddr(N, DAG);
6639 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
6640 SelectionDAG &DAG) const {
6641 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
6643 return getAddr(N, DAG);
6646 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
6647 SelectionDAG &DAG,
6648 bool UseGOT) const {
6649 SDLoc DL(N);
6650 EVT Ty = getPointerTy(DAG.getDataLayout());
6651 const GlobalValue *GV = N->getGlobal();
6652 MVT XLenVT = Subtarget.getXLenVT();
6654 if (UseGOT) {
6655 // Use PC-relative addressing to access the GOT for this TLS symbol, then
6656 // load the address from the GOT and add the thread pointer. This generates
6657 // the pattern (PseudoLA_TLS_IE sym), which expands to
6658 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
6659 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6660 SDValue Load =
6661 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
6662 MachineFunction &MF = DAG.getMachineFunction();
6663 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6664 MachinePointerInfo::getGOT(MF),
6665 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6666 MachineMemOperand::MOInvariant,
6667 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6668 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6670 // Add the thread pointer.
6671 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
6672 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
6675 // Generate a sequence for accessing the address relative to the thread
6676 // pointer, with the appropriate adjustment for the thread pointer offset.
6677 // This generates the pattern
6678 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
6679 SDValue AddrHi =
6680 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
6681 SDValue AddrAdd =
6682 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
6683 SDValue AddrLo =
6684 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
6686 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
6687 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
6688 SDValue MNAdd =
6689 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
6690 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
6693 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
6694 SelectionDAG &DAG) const {
6695 SDLoc DL(N);
6696 EVT Ty = getPointerTy(DAG.getDataLayout());
6697 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
6698 const GlobalValue *GV = N->getGlobal();
6700 // Use a PC-relative addressing mode to access the global dynamic GOT address.
6701 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
6702 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
6703 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6704 SDValue Load =
6705 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
6707 // Prepare argument list to generate call.
6708 ArgListTy Args;
6709 ArgListEntry Entry;
6710 Entry.Node = Load;
6711 Entry.Ty = CallTy;
6712 Args.push_back(Entry);
6714 // Setup call to __tls_get_addr.
6715 TargetLowering::CallLoweringInfo CLI(DAG);
6716 CLI.setDebugLoc(DL)
6717 .setChain(DAG.getEntryNode())
6718 .setLibCallee(CallingConv::C, CallTy,
6719 DAG.getExternalSymbol("__tls_get_addr", Ty),
6720 std::move(Args));
6722 return LowerCallTo(CLI).first;
6725 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
6726 SelectionDAG &DAG) const {
6727 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
6728 assert(N->getOffset() == 0 && "unexpected offset in global node");
6730 if (DAG.getTarget().useEmulatedTLS())
6731 return LowerToTLSEmulatedModel(N, DAG);
6733 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
6735 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
6736 CallingConv::GHC)
6737 report_fatal_error("In GHC calling convention TLS is not supported");
6739 SDValue Addr;
6740 switch (Model) {
6741 case TLSModel::LocalExec:
6742 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
6743 break;
6744 case TLSModel::InitialExec:
6745 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
6746 break;
6747 case TLSModel::LocalDynamic:
6748 case TLSModel::GeneralDynamic:
6749 Addr = getDynamicTLSAddr(N, DAG);
6750 break;
6753 return Addr;
6756 // Return true if Val is equal to (setcc LHS, RHS, CC).
6757 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
6758 // Otherwise, return std::nullopt.
6759 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
6760 ISD::CondCode CC, SDValue Val) {
6761 assert(Val->getOpcode() == ISD::SETCC);
6762 SDValue LHS2 = Val.getOperand(0);
6763 SDValue RHS2 = Val.getOperand(1);
6764 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
6766 if (LHS == LHS2 && RHS == RHS2) {
6767 if (CC == CC2)
6768 return true;
6769 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
6770 return false;
6771 } else if (LHS == RHS2 && RHS == LHS2) {
6772 CC2 = ISD::getSetCCSwappedOperands(CC2);
6773 if (CC == CC2)
6774 return true;
6775 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
6776 return false;
6779 return std::nullopt;
6782 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
6783 const RISCVSubtarget &Subtarget) {
6784 SDValue CondV = N->getOperand(0);
6785 SDValue TrueV = N->getOperand(1);
6786 SDValue FalseV = N->getOperand(2);
6787 MVT VT = N->getSimpleValueType(0);
6788 SDLoc DL(N);
6790 if (!Subtarget.hasShortForwardBranchOpt()) {
6791 // (select c, -1, y) -> -c | y
6792 if (isAllOnesConstant(TrueV)) {
6793 SDValue Neg = DAG.getNegative(CondV, DL, VT);
6794 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
6796 // (select c, y, -1) -> (c-1) | y
6797 if (isAllOnesConstant(FalseV)) {
6798 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
6799 DAG.getAllOnesConstant(DL, VT));
6800 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
6803 // (select c, 0, y) -> (c-1) & y
6804 if (isNullConstant(TrueV)) {
6805 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
6806 DAG.getAllOnesConstant(DL, VT));
6807 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
6809 // (select c, y, 0) -> -c & y
6810 if (isNullConstant(FalseV)) {
6811 SDValue Neg = DAG.getNegative(CondV, DL, VT);
6812 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
6816 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
6817 // when both truev and falsev are also setcc.
6818 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
6819 FalseV.getOpcode() == ISD::SETCC) {
6820 SDValue LHS = CondV.getOperand(0);
6821 SDValue RHS = CondV.getOperand(1);
6822 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6824 // (select x, x, y) -> x | y
6825 // (select !x, x, y) -> x & y
6826 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
6827 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
6828 FalseV);
6830 // (select x, y, x) -> x & y
6831 // (select !x, y, x) -> x | y
6832 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
6833 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
6834 FalseV);
6838 return SDValue();
6841 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
6842 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
6843 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
6844 // being `0` or `-1`. In such cases we can replace `select` with `and`.
6845 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
6846 // than `c0`?
6847 static SDValue
6848 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
6849 const RISCVSubtarget &Subtarget) {
6850 if (Subtarget.hasShortForwardBranchOpt())
6851 return SDValue();
6853 unsigned SelOpNo = 0;
6854 SDValue Sel = BO->getOperand(0);
6855 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
6856 SelOpNo = 1;
6857 Sel = BO->getOperand(1);
6860 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
6861 return SDValue();
6863 unsigned ConstSelOpNo = 1;
6864 unsigned OtherSelOpNo = 2;
6865 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
6866 ConstSelOpNo = 2;
6867 OtherSelOpNo = 1;
6869 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
6870 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
6871 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
6872 return SDValue();
6874 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
6875 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
6876 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
6877 return SDValue();
6879 SDLoc DL(Sel);
6880 EVT VT = BO->getValueType(0);
6882 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
6883 if (SelOpNo == 1)
6884 std::swap(NewConstOps[0], NewConstOps[1]);
6886 SDValue NewConstOp =
6887 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
6888 if (!NewConstOp)
6889 return SDValue();
6891 const APInt &NewConstAPInt =
6892 cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
6893 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
6894 return SDValue();
6896 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
6897 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
6898 if (SelOpNo == 1)
6899 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
6900 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
6902 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
6903 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
6904 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
6907 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
6908 SDValue CondV = Op.getOperand(0);
6909 SDValue TrueV = Op.getOperand(1);
6910 SDValue FalseV = Op.getOperand(2);
6911 SDLoc DL(Op);
6912 MVT VT = Op.getSimpleValueType();
6913 MVT XLenVT = Subtarget.getXLenVT();
6915 // Lower vector SELECTs to VSELECTs by splatting the condition.
6916 if (VT.isVector()) {
6917 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
6918 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
6919 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
6922 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
6923 // nodes to implement the SELECT. Performing the lowering here allows for
6924 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
6925 // sequence or RISCVISD::SELECT_CC node (branch-based select).
6926 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
6927 VT.isScalarInteger()) {
6928 // (select c, t, 0) -> (czero_eqz t, c)
6929 if (isNullConstant(FalseV))
6930 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
6931 // (select c, 0, f) -> (czero_nez f, c)
6932 if (isNullConstant(TrueV))
6933 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
6935 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
6936 if (TrueV.getOpcode() == ISD::AND &&
6937 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
6938 return DAG.getNode(
6939 ISD::OR, DL, VT, TrueV,
6940 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6941 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
6942 if (FalseV.getOpcode() == ISD::AND &&
6943 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
6944 return DAG.getNode(
6945 ISD::OR, DL, VT, FalseV,
6946 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
6948 // Try some other optimizations before falling back to generic lowering.
6949 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
6950 return V;
6952 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
6953 // Unless we have the short forward branch optimization.
6954 if (!Subtarget.hasShortForwardBranchOpt())
6955 return DAG.getNode(
6956 ISD::OR, DL, VT,
6957 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
6958 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6961 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
6962 return V;
6964 if (Op.hasOneUse()) {
6965 unsigned UseOpc = Op->use_begin()->getOpcode();
6966 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
6967 SDNode *BinOp = *Op->use_begin();
6968 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
6969 DAG, Subtarget)) {
6970 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
6971 return lowerSELECT(NewSel, DAG);
6976 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
6977 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
6978 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
6979 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
6980 if (FPTV && FPFV) {
6981 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
6982 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
6983 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
6984 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
6985 DAG.getConstant(1, DL, XLenVT));
6986 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
6990 // If the condition is not an integer SETCC which operates on XLenVT, we need
6991 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
6992 // (select condv, truev, falsev)
6993 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
6994 if (CondV.getOpcode() != ISD::SETCC ||
6995 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
6996 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6997 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
6999 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7001 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7004 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7005 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7006 // advantage of the integer compare+branch instructions. i.e.:
7007 // (select (setcc lhs, rhs, cc), truev, falsev)
7008 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7009 SDValue LHS = CondV.getOperand(0);
7010 SDValue RHS = CondV.getOperand(1);
7011 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7013 // Special case for a select of 2 constants that have a diffence of 1.
7014 // Normally this is done by DAGCombine, but if the select is introduced by
7015 // type legalization or op legalization, we miss it. Restricting to SETLT
7016 // case for now because that is what signed saturating add/sub need.
7017 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7018 // but we would probably want to swap the true/false values if the condition
7019 // is SETGE/SETLE to avoid an XORI.
7020 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7021 CCVal == ISD::SETLT) {
7022 const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
7023 const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
7024 if (TrueVal - 1 == FalseVal)
7025 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7026 if (TrueVal + 1 == FalseVal)
7027 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7030 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7031 // 1 < x ? x : 1 -> 0 < x ? x : 1
7032 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7033 RHS == TrueV && LHS == FalseV) {
7034 LHS = DAG.getConstant(0, DL, VT);
7035 // 0 <u x is the same as x != 0.
7036 if (CCVal == ISD::SETULT) {
7037 std::swap(LHS, RHS);
7038 CCVal = ISD::SETNE;
7042 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7043 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7044 RHS == FalseV) {
7045 RHS = DAG.getConstant(0, DL, VT);
7048 SDValue TargetCC = DAG.getCondCode(CCVal);
7050 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7051 // (select (setcc lhs, rhs, CC), constant, falsev)
7052 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7053 std::swap(TrueV, FalseV);
7054 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7057 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7058 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7061 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7062 SDValue CondV = Op.getOperand(1);
7063 SDLoc DL(Op);
7064 MVT XLenVT = Subtarget.getXLenVT();
7066 if (CondV.getOpcode() == ISD::SETCC &&
7067 CondV.getOperand(0).getValueType() == XLenVT) {
7068 SDValue LHS = CondV.getOperand(0);
7069 SDValue RHS = CondV.getOperand(1);
7070 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7072 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7074 SDValue TargetCC = DAG.getCondCode(CCVal);
7075 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7076 LHS, RHS, TargetCC, Op.getOperand(2));
7079 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7080 CondV, DAG.getConstant(0, DL, XLenVT),
7081 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7084 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7085 MachineFunction &MF = DAG.getMachineFunction();
7086 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
7088 SDLoc DL(Op);
7089 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7090 getPointerTy(MF.getDataLayout()));
7092 // vastart just stores the address of the VarArgsFrameIndex slot into the
7093 // memory location argument.
7094 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7095 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7096 MachinePointerInfo(SV));
7099 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7100 SelectionDAG &DAG) const {
7101 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7102 MachineFunction &MF = DAG.getMachineFunction();
7103 MachineFrameInfo &MFI = MF.getFrameInfo();
7104 MFI.setFrameAddressIsTaken(true);
7105 Register FrameReg = RI.getFrameRegister(MF);
7106 int XLenInBytes = Subtarget.getXLen() / 8;
7108 EVT VT = Op.getValueType();
7109 SDLoc DL(Op);
7110 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7111 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7112 while (Depth--) {
7113 int Offset = -(XLenInBytes * 2);
7114 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7115 DAG.getIntPtrConstant(Offset, DL));
7116 FrameAddr =
7117 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7119 return FrameAddr;
7122 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7123 SelectionDAG &DAG) const {
7124 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7125 MachineFunction &MF = DAG.getMachineFunction();
7126 MachineFrameInfo &MFI = MF.getFrameInfo();
7127 MFI.setReturnAddressIsTaken(true);
7128 MVT XLenVT = Subtarget.getXLenVT();
7129 int XLenInBytes = Subtarget.getXLen() / 8;
7131 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
7132 return SDValue();
7134 EVT VT = Op.getValueType();
7135 SDLoc DL(Op);
7136 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7137 if (Depth) {
7138 int Off = -XLenInBytes;
7139 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7140 SDValue Offset = DAG.getConstant(Off, DL, VT);
7141 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7142 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7143 MachinePointerInfo());
7146 // Return the value of the return address register, marking it an implicit
7147 // live-in.
7148 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7149 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7152 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7153 SelectionDAG &DAG) const {
7154 SDLoc DL(Op);
7155 SDValue Lo = Op.getOperand(0);
7156 SDValue Hi = Op.getOperand(1);
7157 SDValue Shamt = Op.getOperand(2);
7158 EVT VT = Lo.getValueType();
7160 // if Shamt-XLEN < 0: // Shamt < XLEN
7161 // Lo = Lo << Shamt
7162 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
7163 // else:
7164 // Lo = 0
7165 // Hi = Lo << (Shamt-XLEN)
7167 SDValue Zero = DAG.getConstant(0, DL, VT);
7168 SDValue One = DAG.getConstant(1, DL, VT);
7169 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7170 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7171 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7172 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7174 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7175 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7176 SDValue ShiftRightLo =
7177 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7178 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7179 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7180 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7182 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7184 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7185 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7187 SDValue Parts[2] = {Lo, Hi};
7188 return DAG.getMergeValues(Parts, DL);
7191 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7192 bool IsSRA) const {
7193 SDLoc DL(Op);
7194 SDValue Lo = Op.getOperand(0);
7195 SDValue Hi = Op.getOperand(1);
7196 SDValue Shamt = Op.getOperand(2);
7197 EVT VT = Lo.getValueType();
7199 // SRA expansion:
7200 // if Shamt-XLEN < 0: // Shamt < XLEN
7201 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
7202 // Hi = Hi >>s Shamt
7203 // else:
7204 // Lo = Hi >>s (Shamt-XLEN);
7205 // Hi = Hi >>s (XLEN-1)
7207 // SRL expansion:
7208 // if Shamt-XLEN < 0: // Shamt < XLEN
7209 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
7210 // Hi = Hi >>u Shamt
7211 // else:
7212 // Lo = Hi >>u (Shamt-XLEN);
7213 // Hi = 0;
7215 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7217 SDValue Zero = DAG.getConstant(0, DL, VT);
7218 SDValue One = DAG.getConstant(1, DL, VT);
7219 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7220 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7221 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7222 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7224 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7225 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7226 SDValue ShiftLeftHi =
7227 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7228 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7229 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7230 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7231 SDValue HiFalse =
7232 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7234 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7236 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7237 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7239 SDValue Parts[2] = {Lo, Hi};
7240 return DAG.getMergeValues(Parts, DL);
7243 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
7244 // legal equivalently-sized i8 type, so we can use that as a go-between.
7245 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7246 SelectionDAG &DAG) const {
7247 SDLoc DL(Op);
7248 MVT VT = Op.getSimpleValueType();
7249 SDValue SplatVal = Op.getOperand(0);
7250 // All-zeros or all-ones splats are handled specially.
7251 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7252 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7253 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7255 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7256 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7257 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7259 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7260 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7261 DAG.getConstant(1, DL, SplatVal.getValueType()));
7262 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7263 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7264 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7267 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7268 // illegal (currently only vXi64 RV32).
7269 // FIXME: We could also catch non-constant sign-extended i32 values and lower
7270 // them to VMV_V_X_VL.
7271 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7272 SelectionDAG &DAG) const {
7273 SDLoc DL(Op);
7274 MVT VecVT = Op.getSimpleValueType();
7275 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7276 "Unexpected SPLAT_VECTOR_PARTS lowering");
7278 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7279 SDValue Lo = Op.getOperand(0);
7280 SDValue Hi = Op.getOperand(1);
7282 MVT ContainerVT = VecVT;
7283 if (VecVT.isFixedLengthVector())
7284 ContainerVT = getContainerForFixedLengthVector(VecVT);
7286 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7288 SDValue Res =
7289 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7291 if (VecVT.isFixedLengthVector())
7292 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7294 return Res;
7297 // Custom-lower extensions from mask vectors by using a vselect either with 1
7298 // for zero/any-extension or -1 for sign-extension:
7299 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7300 // Note that any-extension is lowered identically to zero-extension.
7301 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7302 int64_t ExtTrueVal) const {
7303 SDLoc DL(Op);
7304 MVT VecVT = Op.getSimpleValueType();
7305 SDValue Src = Op.getOperand(0);
7306 // Only custom-lower extensions from mask types
7307 assert(Src.getValueType().isVector() &&
7308 Src.getValueType().getVectorElementType() == MVT::i1);
7310 if (VecVT.isScalableVector()) {
7311 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7312 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7313 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7316 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7317 MVT I1ContainerVT =
7318 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7320 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7322 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7324 MVT XLenVT = Subtarget.getXLenVT();
7325 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7326 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7328 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7329 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7330 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7331 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7332 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
7333 SplatTrueVal, SplatZero, VL);
7335 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7338 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7339 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7340 MVT ExtVT = Op.getSimpleValueType();
7341 // Only custom-lower extensions from fixed-length vector types.
7342 if (!ExtVT.isFixedLengthVector())
7343 return Op;
7344 MVT VT = Op.getOperand(0).getSimpleValueType();
7345 // Grab the canonical container type for the extended type. Infer the smaller
7346 // type from that to ensure the same number of vector elements, as we know
7347 // the LMUL will be sufficient to hold the smaller type.
7348 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7349 // Get the extended container type manually to ensure the same number of
7350 // vector elements between source and dest.
7351 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7352 ContainerExtVT.getVectorElementCount());
7354 SDValue Op1 =
7355 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7357 SDLoc DL(Op);
7358 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7360 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7362 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7365 // Custom-lower truncations from vectors to mask vectors by using a mask and a
7366 // setcc operation:
7367 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7368 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7369 SelectionDAG &DAG) const {
7370 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7371 SDLoc DL(Op);
7372 EVT MaskVT = Op.getValueType();
7373 // Only expect to custom-lower truncations to mask types
7374 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7375 "Unexpected type for vector mask lowering");
7376 SDValue Src = Op.getOperand(0);
7377 MVT VecVT = Src.getSimpleValueType();
7378 SDValue Mask, VL;
7379 if (IsVPTrunc) {
7380 Mask = Op.getOperand(1);
7381 VL = Op.getOperand(2);
7383 // If this is a fixed vector, we need to convert it to a scalable vector.
7384 MVT ContainerVT = VecVT;
7386 if (VecVT.isFixedLengthVector()) {
7387 ContainerVT = getContainerForFixedLengthVector(VecVT);
7388 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7389 if (IsVPTrunc) {
7390 MVT MaskContainerVT =
7391 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7392 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7396 if (!IsVPTrunc) {
7397 std::tie(Mask, VL) =
7398 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7401 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7402 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7404 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7405 DAG.getUNDEF(ContainerVT), SplatOne, VL);
7406 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7407 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7409 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7410 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7411 DAG.getUNDEF(ContainerVT), Mask, VL);
7412 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7413 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7414 DAG.getUNDEF(MaskContainerVT), Mask, VL});
7415 if (MaskVT.isFixedLengthVector())
7416 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7417 return Trunc;
7420 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7421 SelectionDAG &DAG) const {
7422 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7423 SDLoc DL(Op);
7425 MVT VT = Op.getSimpleValueType();
7426 // Only custom-lower vector truncates
7427 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7429 // Truncates to mask types are handled differently
7430 if (VT.getVectorElementType() == MVT::i1)
7431 return lowerVectorMaskTruncLike(Op, DAG);
7433 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7434 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7435 // truncate by one power of two at a time.
7436 MVT DstEltVT = VT.getVectorElementType();
7438 SDValue Src = Op.getOperand(0);
7439 MVT SrcVT = Src.getSimpleValueType();
7440 MVT SrcEltVT = SrcVT.getVectorElementType();
7442 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7443 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7444 "Unexpected vector truncate lowering");
7446 MVT ContainerVT = SrcVT;
7447 SDValue Mask, VL;
7448 if (IsVPTrunc) {
7449 Mask = Op.getOperand(1);
7450 VL = Op.getOperand(2);
7452 if (SrcVT.isFixedLengthVector()) {
7453 ContainerVT = getContainerForFixedLengthVector(SrcVT);
7454 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7455 if (IsVPTrunc) {
7456 MVT MaskVT = getMaskTypeFor(ContainerVT);
7457 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7461 SDValue Result = Src;
7462 if (!IsVPTrunc) {
7463 std::tie(Mask, VL) =
7464 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7467 LLVMContext &Context = *DAG.getContext();
7468 const ElementCount Count = ContainerVT.getVectorElementCount();
7469 do {
7470 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
7471 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
7472 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
7473 Mask, VL);
7474 } while (SrcEltVT != DstEltVT);
7476 if (SrcVT.isFixedLengthVector())
7477 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
7479 return Result;
7482 SDValue
7483 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
7484 SelectionDAG &DAG) const {
7485 SDLoc DL(Op);
7486 SDValue Chain = Op.getOperand(0);
7487 SDValue Src = Op.getOperand(1);
7488 MVT VT = Op.getSimpleValueType();
7489 MVT SrcVT = Src.getSimpleValueType();
7490 MVT ContainerVT = VT;
7491 if (VT.isFixedLengthVector()) {
7492 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7493 ContainerVT =
7494 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
7495 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7498 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7500 // RVV can only widen/truncate fp to types double/half the size as the source.
7501 if ((VT.getVectorElementType() == MVT::f64 &&
7502 SrcVT.getVectorElementType() == MVT::f16) ||
7503 (VT.getVectorElementType() == MVT::f16 &&
7504 SrcVT.getVectorElementType() == MVT::f64)) {
7505 // For double rounding, the intermediate rounding should be round-to-odd.
7506 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
7507 ? RISCVISD::STRICT_FP_EXTEND_VL
7508 : RISCVISD::STRICT_VFNCVT_ROD_VL;
7509 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
7510 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
7511 Chain, Src, Mask, VL);
7512 Chain = Src.getValue(1);
7515 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
7516 ? RISCVISD::STRICT_FP_EXTEND_VL
7517 : RISCVISD::STRICT_FP_ROUND_VL;
7518 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7519 Chain, Src, Mask, VL);
7520 if (VT.isFixedLengthVector()) {
7521 // StrictFP operations have two result values. Their lowered result should
7522 // have same result count.
7523 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
7524 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
7526 return Res;
7529 SDValue
7530 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
7531 SelectionDAG &DAG) const {
7532 bool IsVP =
7533 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
7534 bool IsExtend =
7535 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
7536 // RVV can only do truncate fp to types half the size as the source. We
7537 // custom-lower f64->f16 rounds via RVV's round-to-odd float
7538 // conversion instruction.
7539 SDLoc DL(Op);
7540 MVT VT = Op.getSimpleValueType();
7542 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7544 SDValue Src = Op.getOperand(0);
7545 MVT SrcVT = Src.getSimpleValueType();
7547 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
7548 SrcVT.getVectorElementType() != MVT::f16);
7549 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
7550 SrcVT.getVectorElementType() != MVT::f64);
7552 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
7554 // Prepare any fixed-length vector operands.
7555 MVT ContainerVT = VT;
7556 SDValue Mask, VL;
7557 if (IsVP) {
7558 Mask = Op.getOperand(1);
7559 VL = Op.getOperand(2);
7561 if (VT.isFixedLengthVector()) {
7562 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7563 ContainerVT =
7564 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
7565 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7566 if (IsVP) {
7567 MVT MaskVT = getMaskTypeFor(ContainerVT);
7568 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7572 if (!IsVP)
7573 std::tie(Mask, VL) =
7574 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7576 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
7578 if (IsDirectConv) {
7579 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
7580 if (VT.isFixedLengthVector())
7581 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
7582 return Src;
7585 unsigned InterConvOpc =
7586 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
7588 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
7589 SDValue IntermediateConv =
7590 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
7591 SDValue Result =
7592 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
7593 if (VT.isFixedLengthVector())
7594 return convertFromScalableVector(VT, Result, DAG, Subtarget);
7595 return Result;
7598 // Given a scalable vector type and an index into it, returns the type for the
7599 // smallest subvector that the index fits in. This can be used to reduce LMUL
7600 // for operations like vslidedown.
7602 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
7603 static std::optional<MVT>
7604 getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
7605 const RISCVSubtarget &Subtarget) {
7606 assert(VecVT.isScalableVector());
7607 const unsigned EltSize = VecVT.getScalarSizeInBits();
7608 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
7609 const unsigned MinVLMAX = VectorBitsMin / EltSize;
7610 MVT SmallerVT;
7611 if (MaxIdx < MinVLMAX)
7612 SmallerVT = getLMUL1VT(VecVT);
7613 else if (MaxIdx < MinVLMAX * 2)
7614 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
7615 else if (MaxIdx < MinVLMAX * 4)
7616 SmallerVT = getLMUL1VT(VecVT)
7617 .getDoubleNumVectorElementsVT()
7618 .getDoubleNumVectorElementsVT();
7619 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
7620 return std::nullopt;
7621 return SmallerVT;
7624 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
7625 // first position of a vector, and that vector is slid up to the insert index.
7626 // By limiting the active vector length to index+1 and merging with the
7627 // original vector (with an undisturbed tail policy for elements >= VL), we
7628 // achieve the desired result of leaving all elements untouched except the one
7629 // at VL-1, which is replaced with the desired value.
7630 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
7631 SelectionDAG &DAG) const {
7632 SDLoc DL(Op);
7633 MVT VecVT = Op.getSimpleValueType();
7634 SDValue Vec = Op.getOperand(0);
7635 SDValue Val = Op.getOperand(1);
7636 SDValue Idx = Op.getOperand(2);
7638 if (VecVT.getVectorElementType() == MVT::i1) {
7639 // FIXME: For now we just promote to an i8 vector and insert into that,
7640 // but this is probably not optimal.
7641 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
7642 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
7643 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
7644 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
7647 MVT ContainerVT = VecVT;
7648 // If the operand is a fixed-length vector, convert to a scalable one.
7649 if (VecVT.isFixedLengthVector()) {
7650 ContainerVT = getContainerForFixedLengthVector(VecVT);
7651 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7654 MVT OrigContainerVT = ContainerVT;
7655 SDValue OrigVec = Vec;
7656 // If we know the index we're going to insert at, we can shrink Vec so that
7657 // we're performing the scalar inserts and slideup on a smaller LMUL.
7658 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
7659 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, CIdx->getZExtValue(),
7660 DL, DAG, Subtarget)) {
7661 ContainerVT = *ShrunkVT;
7662 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
7663 DAG.getVectorIdxConstant(0, DL));
7667 MVT XLenVT = Subtarget.getXLenVT();
7669 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
7670 // Even i64-element vectors on RV32 can be lowered without scalar
7671 // legalization if the most-significant 32 bits of the value are not affected
7672 // by the sign-extension of the lower 32 bits.
7673 // TODO: We could also catch sign extensions of a 32-bit value.
7674 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
7675 const auto *CVal = cast<ConstantSDNode>(Val);
7676 if (isInt<32>(CVal->getSExtValue())) {
7677 IsLegalInsert = true;
7678 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
7682 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7684 SDValue ValInVec;
7686 if (IsLegalInsert) {
7687 unsigned Opc =
7688 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
7689 if (isNullConstant(Idx)) {
7690 if (!VecVT.isFloatingPoint())
7691 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
7692 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
7694 if (ContainerVT != OrigContainerVT)
7695 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7696 Vec, DAG.getVectorIdxConstant(0, DL));
7697 if (!VecVT.isFixedLengthVector())
7698 return Vec;
7699 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
7701 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
7702 } else {
7703 // On RV32, i64-element vectors must be specially handled to place the
7704 // value at element 0, by using two vslide1down instructions in sequence on
7705 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
7706 // this.
7707 SDValue ValLo, ValHi;
7708 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
7709 MVT I32ContainerVT =
7710 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
7711 SDValue I32Mask =
7712 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
7713 // Limit the active VL to two.
7714 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
7715 // If the Idx is 0 we can insert directly into the vector.
7716 if (isNullConstant(Idx)) {
7717 // First slide in the lo value, then the hi in above it. We use slide1down
7718 // to avoid the register group overlap constraint of vslide1up.
7719 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7720 Vec, Vec, ValLo, I32Mask, InsertI64VL);
7721 // If the source vector is undef don't pass along the tail elements from
7722 // the previous slide1down.
7723 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
7724 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7725 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
7726 // Bitcast back to the right container type.
7727 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
7729 if (ContainerVT != OrigContainerVT)
7730 ValInVec =
7731 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7732 ValInVec, DAG.getVectorIdxConstant(0, DL));
7733 if (!VecVT.isFixedLengthVector())
7734 return ValInVec;
7735 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
7738 // First slide in the lo value, then the hi in above it. We use slide1down
7739 // to avoid the register group overlap constraint of vslide1up.
7740 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7741 DAG.getUNDEF(I32ContainerVT),
7742 DAG.getUNDEF(I32ContainerVT), ValLo,
7743 I32Mask, InsertI64VL);
7744 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7745 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
7746 I32Mask, InsertI64VL);
7747 // Bitcast back to the right container type.
7748 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
7751 // Now that the value is in a vector, slide it into position.
7752 SDValue InsertVL =
7753 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
7755 // Use tail agnostic policy if Idx is the last index of Vec.
7756 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
7757 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
7758 cast<ConstantSDNode>(Idx)->getZExtValue() + 1 ==
7759 VecVT.getVectorNumElements())
7760 Policy = RISCVII::TAIL_AGNOSTIC;
7761 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
7762 Idx, Mask, InsertVL, Policy);
7764 if (ContainerVT != OrigContainerVT)
7765 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7766 Slideup, DAG.getVectorIdxConstant(0, DL));
7767 if (!VecVT.isFixedLengthVector())
7768 return Slideup;
7769 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
7772 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
7773 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
7774 // types this is done using VMV_X_S to allow us to glean information about the
7775 // sign bits of the result.
7776 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
7777 SelectionDAG &DAG) const {
7778 SDLoc DL(Op);
7779 SDValue Idx = Op.getOperand(1);
7780 SDValue Vec = Op.getOperand(0);
7781 EVT EltVT = Op.getValueType();
7782 MVT VecVT = Vec.getSimpleValueType();
7783 MVT XLenVT = Subtarget.getXLenVT();
7785 if (VecVT.getVectorElementType() == MVT::i1) {
7786 // Use vfirst.m to extract the first bit.
7787 if (isNullConstant(Idx)) {
7788 MVT ContainerVT = VecVT;
7789 if (VecVT.isFixedLengthVector()) {
7790 ContainerVT = getContainerForFixedLengthVector(VecVT);
7791 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7793 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7794 SDValue Vfirst =
7795 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
7796 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
7797 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
7798 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
7800 if (VecVT.isFixedLengthVector()) {
7801 unsigned NumElts = VecVT.getVectorNumElements();
7802 if (NumElts >= 8) {
7803 MVT WideEltVT;
7804 unsigned WidenVecLen;
7805 SDValue ExtractElementIdx;
7806 SDValue ExtractBitIdx;
7807 unsigned MaxEEW = Subtarget.getELen();
7808 MVT LargestEltVT = MVT::getIntegerVT(
7809 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
7810 if (NumElts <= LargestEltVT.getSizeInBits()) {
7811 assert(isPowerOf2_32(NumElts) &&
7812 "the number of elements should be power of 2");
7813 WideEltVT = MVT::getIntegerVT(NumElts);
7814 WidenVecLen = 1;
7815 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
7816 ExtractBitIdx = Idx;
7817 } else {
7818 WideEltVT = LargestEltVT;
7819 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
7820 // extract element index = index / element width
7821 ExtractElementIdx = DAG.getNode(
7822 ISD::SRL, DL, XLenVT, Idx,
7823 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
7824 // mask bit index = index % element width
7825 ExtractBitIdx = DAG.getNode(
7826 ISD::AND, DL, XLenVT, Idx,
7827 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
7829 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
7830 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
7831 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
7832 Vec, ExtractElementIdx);
7833 // Extract the bit from GPR.
7834 SDValue ShiftRight =
7835 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
7836 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
7837 DAG.getConstant(1, DL, XLenVT));
7838 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
7841 // Otherwise, promote to an i8 vector and extract from that.
7842 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
7843 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
7844 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
7847 // If this is a fixed vector, we need to convert it to a scalable vector.
7848 MVT ContainerVT = VecVT;
7849 if (VecVT.isFixedLengthVector()) {
7850 ContainerVT = getContainerForFixedLengthVector(VecVT);
7851 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7854 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
7855 // contains our index.
7856 std::optional<uint64_t> MaxIdx;
7857 if (VecVT.isFixedLengthVector())
7858 MaxIdx = VecVT.getVectorNumElements() - 1;
7859 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
7860 MaxIdx = IdxC->getZExtValue();
7861 if (MaxIdx) {
7862 if (auto SmallerVT =
7863 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
7864 ContainerVT = *SmallerVT;
7865 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
7866 DAG.getConstant(0, DL, XLenVT));
7870 // If after narrowing, the required slide is still greater than LMUL2,
7871 // fallback to generic expansion and go through the stack. This is done
7872 // for a subtle reason: extracting *all* elements out of a vector is
7873 // widely expected to be linear in vector size, but because vslidedown
7874 // is linear in LMUL, performing N extracts using vslidedown becomes
7875 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
7876 // seems to have the same problem (the store is linear in LMUL), but the
7877 // generic expansion *memoizes* the store, and thus for many extracts of
7878 // the same vector we end up with one store and a bunch of loads.
7879 // TODO: We don't have the same code for insert_vector_elt because we
7880 // have BUILD_VECTOR and handle the degenerate case there. Should we
7881 // consider adding an inverse BUILD_VECTOR node?
7882 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
7883 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
7884 return SDValue();
7886 // If the index is 0, the vector is already in the right position.
7887 if (!isNullConstant(Idx)) {
7888 // Use a VL of 1 to avoid processing more elements than we need.
7889 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
7890 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
7891 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
7894 if (!EltVT.isInteger()) {
7895 // Floating-point extracts are handled in TableGen.
7896 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
7897 DAG.getConstant(0, DL, XLenVT));
7900 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
7901 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
7904 // Some RVV intrinsics may claim that they want an integer operand to be
7905 // promoted or expanded.
7906 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
7907 const RISCVSubtarget &Subtarget) {
7908 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
7909 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
7910 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
7911 "Unexpected opcode");
7913 if (!Subtarget.hasVInstructions())
7914 return SDValue();
7916 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
7917 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
7918 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
7920 SDLoc DL(Op);
7922 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
7923 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
7924 if (!II || !II->hasScalarOperand())
7925 return SDValue();
7927 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
7928 assert(SplatOp < Op.getNumOperands());
7930 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
7931 SDValue &ScalarOp = Operands[SplatOp];
7932 MVT OpVT = ScalarOp.getSimpleValueType();
7933 MVT XLenVT = Subtarget.getXLenVT();
7935 // If this isn't a scalar, or its type is XLenVT we're done.
7936 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
7937 return SDValue();
7939 // Simplest case is that the operand needs to be promoted to XLenVT.
7940 if (OpVT.bitsLT(XLenVT)) {
7941 // If the operand is a constant, sign extend to increase our chances
7942 // of being able to use a .vi instruction. ANY_EXTEND would become a
7943 // a zero extend and the simm5 check in isel would fail.
7944 // FIXME: Should we ignore the upper bits in isel instead?
7945 unsigned ExtOpc =
7946 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
7947 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
7948 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7951 // Use the previous operand to get the vXi64 VT. The result might be a mask
7952 // VT for compares. Using the previous operand assumes that the previous
7953 // operand will never have a smaller element size than a scalar operand and
7954 // that a widening operation never uses SEW=64.
7955 // NOTE: If this fails the below assert, we can probably just find the
7956 // element count from any operand or result and use it to construct the VT.
7957 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
7958 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
7960 // The more complex case is when the scalar is larger than XLenVT.
7961 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
7962 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
7964 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
7965 // instruction to sign-extend since SEW>XLEN.
7966 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
7967 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
7968 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7971 switch (IntNo) {
7972 case Intrinsic::riscv_vslide1up:
7973 case Intrinsic::riscv_vslide1down:
7974 case Intrinsic::riscv_vslide1up_mask:
7975 case Intrinsic::riscv_vslide1down_mask: {
7976 // We need to special case these when the scalar is larger than XLen.
7977 unsigned NumOps = Op.getNumOperands();
7978 bool IsMasked = NumOps == 7;
7980 // Convert the vector source to the equivalent nxvXi32 vector.
7981 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
7982 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
7983 SDValue ScalarLo, ScalarHi;
7984 std::tie(ScalarLo, ScalarHi) =
7985 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
7987 // Double the VL since we halved SEW.
7988 SDValue AVL = getVLOperand(Op);
7989 SDValue I32VL;
7991 // Optimize for constant AVL
7992 if (isa<ConstantSDNode>(AVL)) {
7993 unsigned EltSize = VT.getScalarSizeInBits();
7994 unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
7996 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
7997 unsigned MaxVLMAX =
7998 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
8000 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8001 unsigned MinVLMAX =
8002 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
8004 uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
8005 if (AVLInt <= MinVLMAX) {
8006 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8007 } else if (AVLInt >= 2 * MaxVLMAX) {
8008 // Just set vl to VLMAX in this situation
8009 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
8010 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8011 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8012 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8013 SDValue SETVLMAX = DAG.getTargetConstant(
8014 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8015 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8016 LMUL);
8017 } else {
8018 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8019 // is related to the hardware implementation.
8020 // So let the following code handle
8023 if (!I32VL) {
8024 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
8025 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8026 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8027 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8028 SDValue SETVL =
8029 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8030 // Using vsetvli instruction to get actually used length which related to
8031 // the hardware implementation
8032 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8033 SEW, LMUL);
8034 I32VL =
8035 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8038 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8040 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8041 // instructions.
8042 SDValue Passthru;
8043 if (IsMasked)
8044 Passthru = DAG.getUNDEF(I32VT);
8045 else
8046 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8048 if (IntNo == Intrinsic::riscv_vslide1up ||
8049 IntNo == Intrinsic::riscv_vslide1up_mask) {
8050 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8051 ScalarHi, I32Mask, I32VL);
8052 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8053 ScalarLo, I32Mask, I32VL);
8054 } else {
8055 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8056 ScalarLo, I32Mask, I32VL);
8057 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8058 ScalarHi, I32Mask, I32VL);
8061 // Convert back to nxvXi64.
8062 Vec = DAG.getBitcast(VT, Vec);
8064 if (!IsMasked)
8065 return Vec;
8066 // Apply mask after the operation.
8067 SDValue Mask = Operands[NumOps - 3];
8068 SDValue MaskedOff = Operands[1];
8069 // Assume Policy operand is the last operand.
8070 uint64_t Policy =
8071 cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
8072 // We don't need to select maskedoff if it's undef.
8073 if (MaskedOff.isUndef())
8074 return Vec;
8075 // TAMU
8076 if (Policy == RISCVII::TAIL_AGNOSTIC)
8077 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
8078 AVL);
8079 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8080 // It's fine because vmerge does not care mask policy.
8081 return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8082 AVL);
8086 // We need to convert the scalar to a splat vector.
8087 SDValue VL = getVLOperand(Op);
8088 assert(VL.getValueType() == XLenVT);
8089 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8090 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8093 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8094 // scalable vector llvm.get.vector.length for now.
8096 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
8097 // (vscale * VF). The vscale and VF are independent of element width. We use
8098 // SEW=8 for the vsetvli because it is the only element width that supports all
8099 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8100 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8101 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8102 // SEW and LMUL are better for the surrounding vector instructions.
8103 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
8104 const RISCVSubtarget &Subtarget) {
8105 MVT XLenVT = Subtarget.getXLenVT();
8107 // The smallest LMUL is only valid for the smallest element width.
8108 const unsigned ElementWidth = 8;
8110 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8111 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8112 // We don't support VF==1 with ELEN==32.
8113 unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen();
8115 unsigned VF = N->getConstantOperandVal(2);
8116 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8117 "Unexpected VF");
8118 (void)MinVF;
8120 bool Fractional = VF < LMul1VF;
8121 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8122 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8123 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8125 SDLoc DL(N);
8127 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8128 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8130 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8132 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8133 SDValue Res =
8134 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8135 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8138 // LMUL * VLEN should be greater than or equal to EGS * SEW
8139 static inline bool isValidEGW(int EGS, EVT VT,
8140 const RISCVSubtarget &Subtarget) {
8141 return (Subtarget.getRealMinVLen() *
8142 VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
8143 EGS * VT.getScalarSizeInBits();
8146 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8147 SelectionDAG &DAG) const {
8148 unsigned IntNo = Op.getConstantOperandVal(0);
8149 SDLoc DL(Op);
8150 MVT XLenVT = Subtarget.getXLenVT();
8152 switch (IntNo) {
8153 default:
8154 break; // Don't custom lower most intrinsics.
8155 case Intrinsic::thread_pointer: {
8156 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8157 return DAG.getRegister(RISCV::X4, PtrVT);
8159 case Intrinsic::riscv_orc_b:
8160 case Intrinsic::riscv_brev8:
8161 case Intrinsic::riscv_sha256sig0:
8162 case Intrinsic::riscv_sha256sig1:
8163 case Intrinsic::riscv_sha256sum0:
8164 case Intrinsic::riscv_sha256sum1:
8165 case Intrinsic::riscv_sm3p0:
8166 case Intrinsic::riscv_sm3p1: {
8167 unsigned Opc;
8168 switch (IntNo) {
8169 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8170 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8171 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8172 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8173 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8174 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8175 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8176 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8179 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8180 SDValue NewOp =
8181 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8182 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8183 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8186 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8188 case Intrinsic::riscv_sm4ks:
8189 case Intrinsic::riscv_sm4ed: {
8190 unsigned Opc =
8191 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8193 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8194 SDValue NewOp0 =
8195 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8196 SDValue NewOp1 =
8197 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8198 SDValue Res =
8199 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8200 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8203 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8204 Op.getOperand(3));
8206 case Intrinsic::riscv_zip:
8207 case Intrinsic::riscv_unzip: {
8208 unsigned Opc =
8209 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8210 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8212 case Intrinsic::riscv_clmul:
8213 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8214 SDValue NewOp0 =
8215 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8216 SDValue NewOp1 =
8217 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8218 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8219 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8221 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8222 Op.getOperand(2));
8223 case Intrinsic::riscv_clmulh:
8224 case Intrinsic::riscv_clmulr: {
8225 unsigned Opc =
8226 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8227 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8228 SDValue NewOp0 =
8229 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8230 SDValue NewOp1 =
8231 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8232 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8233 DAG.getConstant(32, DL, MVT::i64));
8234 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8235 DAG.getConstant(32, DL, MVT::i64));
8236 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8237 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8238 DAG.getConstant(32, DL, MVT::i64));
8239 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8242 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8244 case Intrinsic::experimental_get_vector_length:
8245 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8246 case Intrinsic::riscv_vmv_x_s: {
8247 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8248 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8250 case Intrinsic::riscv_vfmv_f_s:
8251 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8252 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
8253 case Intrinsic::riscv_vmv_v_x:
8254 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8255 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8256 Subtarget);
8257 case Intrinsic::riscv_vfmv_v_f:
8258 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8259 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8260 case Intrinsic::riscv_vmv_s_x: {
8261 SDValue Scalar = Op.getOperand(2);
8263 if (Scalar.getValueType().bitsLE(XLenVT)) {
8264 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8265 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8266 Op.getOperand(1), Scalar, Op.getOperand(3));
8269 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8271 // This is an i64 value that lives in two scalar registers. We have to
8272 // insert this in a convoluted way. First we build vXi64 splat containing
8273 // the two values that we assemble using some bit math. Next we'll use
8274 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8275 // to merge element 0 from our splat into the source vector.
8276 // FIXME: This is probably not the best way to do this, but it is
8277 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8278 // point.
8279 // sw lo, (a0)
8280 // sw hi, 4(a0)
8281 // vlse vX, (a0)
8283 // vid.v vVid
8284 // vmseq.vx mMask, vVid, 0
8285 // vmerge.vvm vDest, vSrc, vVal, mMask
8286 MVT VT = Op.getSimpleValueType();
8287 SDValue Vec = Op.getOperand(1);
8288 SDValue VL = getVLOperand(Op);
8290 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8291 if (Op.getOperand(1).isUndef())
8292 return SplattedVal;
8293 SDValue SplattedIdx =
8294 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8295 DAG.getConstant(0, DL, MVT::i32), VL);
8297 MVT MaskVT = getMaskTypeFor(VT);
8298 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
8299 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
8300 SDValue SelectCond =
8301 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
8302 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
8303 DAG.getUNDEF(MaskVT), Mask, VL});
8304 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
8305 Vec, VL);
8307 // EGS * EEW >= 128 bits
8308 case Intrinsic::riscv_vaesdf_vv:
8309 case Intrinsic::riscv_vaesdf_vs:
8310 case Intrinsic::riscv_vaesdm_vv:
8311 case Intrinsic::riscv_vaesdm_vs:
8312 case Intrinsic::riscv_vaesef_vv:
8313 case Intrinsic::riscv_vaesef_vs:
8314 case Intrinsic::riscv_vaesem_vv:
8315 case Intrinsic::riscv_vaesem_vs:
8316 case Intrinsic::riscv_vaeskf1:
8317 case Intrinsic::riscv_vaeskf2:
8318 case Intrinsic::riscv_vaesz_vs:
8319 case Intrinsic::riscv_vsm4k:
8320 case Intrinsic::riscv_vsm4r_vv:
8321 case Intrinsic::riscv_vsm4r_vs: {
8322 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8323 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8324 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8325 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8326 return Op;
8328 // EGS * EEW >= 256 bits
8329 case Intrinsic::riscv_vsm3c:
8330 case Intrinsic::riscv_vsm3me: {
8331 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
8332 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
8333 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
8334 return Op;
8336 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
8337 case Intrinsic::riscv_vsha2ch:
8338 case Intrinsic::riscv_vsha2cl:
8339 case Intrinsic::riscv_vsha2ms: {
8340 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
8341 !Subtarget.hasStdExtZvknhb())
8342 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
8343 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8344 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8345 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8346 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8347 return Op;
8349 case Intrinsic::riscv_sf_vc_v_x:
8350 case Intrinsic::riscv_sf_vc_v_i:
8351 case Intrinsic::riscv_sf_vc_v_xv:
8352 case Intrinsic::riscv_sf_vc_v_iv:
8353 case Intrinsic::riscv_sf_vc_v_vv:
8354 case Intrinsic::riscv_sf_vc_v_fv:
8355 case Intrinsic::riscv_sf_vc_v_xvv:
8356 case Intrinsic::riscv_sf_vc_v_ivv:
8357 case Intrinsic::riscv_sf_vc_v_vvv:
8358 case Intrinsic::riscv_sf_vc_v_fvv:
8359 case Intrinsic::riscv_sf_vc_v_xvw:
8360 case Intrinsic::riscv_sf_vc_v_ivw:
8361 case Intrinsic::riscv_sf_vc_v_vvw:
8362 case Intrinsic::riscv_sf_vc_v_fvw: {
8363 MVT VT = Op.getSimpleValueType();
8365 if (!VT.isFixedLengthVector())
8366 break;
8368 SmallVector<SDValue, 6> Ops;
8369 for (const SDValue &V : Op->op_values()) {
8370 // Skip non-fixed vector operands.
8371 if (!V.getValueType().isFixedLengthVector()) {
8372 Ops.push_back(V);
8373 continue;
8376 MVT OpContainerVT =
8377 getContainerForFixedLengthVector(V.getSimpleValueType());
8378 Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
8381 MVT RetContainerVT = getContainerForFixedLengthVector(VT);
8382 SDValue Scalable =
8383 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetContainerVT, Ops);
8384 return convertFromScalableVector(VT, Scalable, DAG, Subtarget);
8388 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8391 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
8392 SelectionDAG &DAG) const {
8393 unsigned IntNo = Op.getConstantOperandVal(1);
8394 switch (IntNo) {
8395 default:
8396 break;
8397 case Intrinsic::riscv_masked_strided_load: {
8398 SDLoc DL(Op);
8399 MVT XLenVT = Subtarget.getXLenVT();
8401 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8402 // the selection of the masked intrinsics doesn't do this for us.
8403 SDValue Mask = Op.getOperand(5);
8404 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8406 MVT VT = Op->getSimpleValueType(0);
8407 MVT ContainerVT = VT;
8408 if (VT.isFixedLengthVector())
8409 ContainerVT = getContainerForFixedLengthVector(VT);
8411 SDValue PassThru = Op.getOperand(2);
8412 if (!IsUnmasked) {
8413 MVT MaskVT = getMaskTypeFor(ContainerVT);
8414 if (VT.isFixedLengthVector()) {
8415 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8416 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
8420 auto *Load = cast<MemIntrinsicSDNode>(Op);
8421 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8422 SDValue Ptr = Op.getOperand(3);
8423 SDValue Stride = Op.getOperand(4);
8424 SDValue Result, Chain;
8426 // TODO: We restrict this to unmasked loads currently in consideration of
8427 // the complexity of hanlding all falses masks.
8428 if (IsUnmasked && isNullConstant(Stride)) {
8429 MVT ScalarVT = ContainerVT.getVectorElementType();
8430 SDValue ScalarLoad =
8431 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
8432 ScalarVT, Load->getMemOperand());
8433 Chain = ScalarLoad.getValue(1);
8434 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
8435 Subtarget);
8436 } else {
8437 SDValue IntID = DAG.getTargetConstant(
8438 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
8439 XLenVT);
8441 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
8442 if (IsUnmasked)
8443 Ops.push_back(DAG.getUNDEF(ContainerVT));
8444 else
8445 Ops.push_back(PassThru);
8446 Ops.push_back(Ptr);
8447 Ops.push_back(Stride);
8448 if (!IsUnmasked)
8449 Ops.push_back(Mask);
8450 Ops.push_back(VL);
8451 if (!IsUnmasked) {
8452 SDValue Policy =
8453 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
8454 Ops.push_back(Policy);
8457 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8458 Result =
8459 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8460 Load->getMemoryVT(), Load->getMemOperand());
8461 Chain = Result.getValue(1);
8463 if (VT.isFixedLengthVector())
8464 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8465 return DAG.getMergeValues({Result, Chain}, DL);
8467 case Intrinsic::riscv_seg2_load:
8468 case Intrinsic::riscv_seg3_load:
8469 case Intrinsic::riscv_seg4_load:
8470 case Intrinsic::riscv_seg5_load:
8471 case Intrinsic::riscv_seg6_load:
8472 case Intrinsic::riscv_seg7_load:
8473 case Intrinsic::riscv_seg8_load: {
8474 SDLoc DL(Op);
8475 static const Intrinsic::ID VlsegInts[7] = {
8476 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
8477 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
8478 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
8479 Intrinsic::riscv_vlseg8};
8480 unsigned NF = Op->getNumValues() - 1;
8481 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
8482 MVT XLenVT = Subtarget.getXLenVT();
8483 MVT VT = Op->getSimpleValueType(0);
8484 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8486 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8487 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
8488 auto *Load = cast<MemIntrinsicSDNode>(Op);
8489 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
8490 ContainerVTs.push_back(MVT::Other);
8491 SDVTList VTs = DAG.getVTList(ContainerVTs);
8492 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
8493 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
8494 Ops.push_back(Op.getOperand(2));
8495 Ops.push_back(VL);
8496 SDValue Result =
8497 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8498 Load->getMemoryVT(), Load->getMemOperand());
8499 SmallVector<SDValue, 9> Results;
8500 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
8501 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
8502 DAG, Subtarget));
8503 Results.push_back(Result.getValue(NF));
8504 return DAG.getMergeValues(Results, DL);
8506 case Intrinsic::riscv_sf_vc_v_x_se:
8507 case Intrinsic::riscv_sf_vc_v_i_se:
8508 case Intrinsic::riscv_sf_vc_v_xv_se:
8509 case Intrinsic::riscv_sf_vc_v_iv_se:
8510 case Intrinsic::riscv_sf_vc_v_vv_se:
8511 case Intrinsic::riscv_sf_vc_v_fv_se:
8512 case Intrinsic::riscv_sf_vc_v_xvv_se:
8513 case Intrinsic::riscv_sf_vc_v_ivv_se:
8514 case Intrinsic::riscv_sf_vc_v_vvv_se:
8515 case Intrinsic::riscv_sf_vc_v_fvv_se:
8516 case Intrinsic::riscv_sf_vc_v_xvw_se:
8517 case Intrinsic::riscv_sf_vc_v_ivw_se:
8518 case Intrinsic::riscv_sf_vc_v_vvw_se:
8519 case Intrinsic::riscv_sf_vc_v_fvw_se: {
8520 MVT VT = Op.getSimpleValueType();
8522 if (!VT.isFixedLengthVector())
8523 break;
8525 SmallVector<SDValue, 6> Ops;
8526 for (const SDValue &V : Op->op_values()) {
8527 // Skip non-fixed vector operands.
8528 if (!V.getValueType().isFixedLengthVector()) {
8529 Ops.push_back(V);
8530 continue;
8533 MVT OpContainerVT =
8534 getContainerForFixedLengthVector(V.getSimpleValueType());
8535 Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
8538 SDLoc DL(Op);
8539 MVT RetContainerVT = getContainerForFixedLengthVector(VT);
8540 SDVTList VTs = DAG.getVTList({RetContainerVT, MVT::Other});
8541 SDValue ScalableVector = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
8542 SDValue FixedVector =
8543 convertFromScalableVector(VT, ScalableVector, DAG, Subtarget);
8544 return DAG.getMergeValues({FixedVector, ScalableVector.getValue(1)}, DL);
8548 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8551 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8552 SelectionDAG &DAG) const {
8553 unsigned IntNo = Op.getConstantOperandVal(1);
8554 switch (IntNo) {
8555 default:
8556 break;
8557 case Intrinsic::riscv_masked_strided_store: {
8558 SDLoc DL(Op);
8559 MVT XLenVT = Subtarget.getXLenVT();
8561 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8562 // the selection of the masked intrinsics doesn't do this for us.
8563 SDValue Mask = Op.getOperand(5);
8564 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8566 SDValue Val = Op.getOperand(2);
8567 MVT VT = Val.getSimpleValueType();
8568 MVT ContainerVT = VT;
8569 if (VT.isFixedLengthVector()) {
8570 ContainerVT = getContainerForFixedLengthVector(VT);
8571 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
8573 if (!IsUnmasked) {
8574 MVT MaskVT = getMaskTypeFor(ContainerVT);
8575 if (VT.isFixedLengthVector())
8576 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8579 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8581 SDValue IntID = DAG.getTargetConstant(
8582 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
8583 XLenVT);
8585 auto *Store = cast<MemIntrinsicSDNode>(Op);
8586 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
8587 Ops.push_back(Val);
8588 Ops.push_back(Op.getOperand(3)); // Ptr
8589 Ops.push_back(Op.getOperand(4)); // Stride
8590 if (!IsUnmasked)
8591 Ops.push_back(Mask);
8592 Ops.push_back(VL);
8594 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
8595 Ops, Store->getMemoryVT(),
8596 Store->getMemOperand());
8598 case Intrinsic::riscv_seg2_store:
8599 case Intrinsic::riscv_seg3_store:
8600 case Intrinsic::riscv_seg4_store:
8601 case Intrinsic::riscv_seg5_store:
8602 case Intrinsic::riscv_seg6_store:
8603 case Intrinsic::riscv_seg7_store:
8604 case Intrinsic::riscv_seg8_store: {
8605 SDLoc DL(Op);
8606 static const Intrinsic::ID VssegInts[] = {
8607 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
8608 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
8609 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
8610 Intrinsic::riscv_vsseg8};
8611 // Operands are (chain, int_id, vec*, ptr, vl)
8612 unsigned NF = Op->getNumOperands() - 4;
8613 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
8614 MVT XLenVT = Subtarget.getXLenVT();
8615 MVT VT = Op->getOperand(2).getSimpleValueType();
8616 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8618 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8619 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
8620 SDValue Ptr = Op->getOperand(NF + 2);
8622 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
8623 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
8624 for (unsigned i = 0; i < NF; i++)
8625 Ops.push_back(convertToScalableVector(
8626 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
8627 Ops.append({Ptr, VL});
8629 return DAG.getMemIntrinsicNode(
8630 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
8631 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
8633 case Intrinsic::riscv_sf_vc_x_se_e8mf8:
8634 case Intrinsic::riscv_sf_vc_x_se_e8mf4:
8635 case Intrinsic::riscv_sf_vc_x_se_e8mf2:
8636 case Intrinsic::riscv_sf_vc_x_se_e8m1:
8637 case Intrinsic::riscv_sf_vc_x_se_e8m2:
8638 case Intrinsic::riscv_sf_vc_x_se_e8m4:
8639 case Intrinsic::riscv_sf_vc_x_se_e8m8:
8640 case Intrinsic::riscv_sf_vc_x_se_e16mf4:
8641 case Intrinsic::riscv_sf_vc_x_se_e16mf2:
8642 case Intrinsic::riscv_sf_vc_x_se_e16m1:
8643 case Intrinsic::riscv_sf_vc_x_se_e16m2:
8644 case Intrinsic::riscv_sf_vc_x_se_e16m4:
8645 case Intrinsic::riscv_sf_vc_x_se_e16m8:
8646 case Intrinsic::riscv_sf_vc_x_se_e32mf2:
8647 case Intrinsic::riscv_sf_vc_x_se_e32m1:
8648 case Intrinsic::riscv_sf_vc_x_se_e32m2:
8649 case Intrinsic::riscv_sf_vc_x_se_e32m4:
8650 case Intrinsic::riscv_sf_vc_x_se_e32m8:
8651 case Intrinsic::riscv_sf_vc_x_se_e64m1:
8652 case Intrinsic::riscv_sf_vc_x_se_e64m2:
8653 case Intrinsic::riscv_sf_vc_x_se_e64m4:
8654 case Intrinsic::riscv_sf_vc_x_se_e64m8:
8655 case Intrinsic::riscv_sf_vc_i_se_e8mf8:
8656 case Intrinsic::riscv_sf_vc_i_se_e8mf4:
8657 case Intrinsic::riscv_sf_vc_i_se_e8mf2:
8658 case Intrinsic::riscv_sf_vc_i_se_e8m1:
8659 case Intrinsic::riscv_sf_vc_i_se_e8m2:
8660 case Intrinsic::riscv_sf_vc_i_se_e8m4:
8661 case Intrinsic::riscv_sf_vc_i_se_e8m8:
8662 case Intrinsic::riscv_sf_vc_i_se_e16mf4:
8663 case Intrinsic::riscv_sf_vc_i_se_e16mf2:
8664 case Intrinsic::riscv_sf_vc_i_se_e16m1:
8665 case Intrinsic::riscv_sf_vc_i_se_e16m2:
8666 case Intrinsic::riscv_sf_vc_i_se_e16m4:
8667 case Intrinsic::riscv_sf_vc_i_se_e16m8:
8668 case Intrinsic::riscv_sf_vc_i_se_e32mf2:
8669 case Intrinsic::riscv_sf_vc_i_se_e32m1:
8670 case Intrinsic::riscv_sf_vc_i_se_e32m2:
8671 case Intrinsic::riscv_sf_vc_i_se_e32m4:
8672 case Intrinsic::riscv_sf_vc_i_se_e32m8:
8673 case Intrinsic::riscv_sf_vc_i_se_e64m1:
8674 case Intrinsic::riscv_sf_vc_i_se_e64m2:
8675 case Intrinsic::riscv_sf_vc_i_se_e64m4:
8676 case Intrinsic::riscv_sf_vc_i_se_e64m8:
8677 case Intrinsic::riscv_sf_vc_xv_se:
8678 case Intrinsic::riscv_sf_vc_iv_se:
8679 case Intrinsic::riscv_sf_vc_vv_se:
8680 case Intrinsic::riscv_sf_vc_fv_se:
8681 case Intrinsic::riscv_sf_vc_xvv_se:
8682 case Intrinsic::riscv_sf_vc_ivv_se:
8683 case Intrinsic::riscv_sf_vc_vvv_se:
8684 case Intrinsic::riscv_sf_vc_fvv_se:
8685 case Intrinsic::riscv_sf_vc_xvw_se:
8686 case Intrinsic::riscv_sf_vc_ivw_se:
8687 case Intrinsic::riscv_sf_vc_vvw_se:
8688 case Intrinsic::riscv_sf_vc_fvw_se: {
8689 if (!llvm::any_of(Op->op_values(), [&](const SDValue &V) {
8690 return V.getValueType().isFixedLengthVector();
8692 break;
8694 SmallVector<SDValue, 6> Ops;
8695 for (const SDValue &V : Op->op_values()) {
8696 // Skip non-fixed vector operands.
8697 if (!V.getValueType().isFixedLengthVector()) {
8698 Ops.push_back(V);
8699 continue;
8702 MVT OpContainerVT =
8703 getContainerForFixedLengthVector(V.getSimpleValueType());
8704 Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
8707 return DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
8711 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8714 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
8715 switch (ISDOpcode) {
8716 default:
8717 llvm_unreachable("Unhandled reduction");
8718 case ISD::VP_REDUCE_ADD:
8719 case ISD::VECREDUCE_ADD:
8720 return RISCVISD::VECREDUCE_ADD_VL;
8721 case ISD::VP_REDUCE_UMAX:
8722 case ISD::VECREDUCE_UMAX:
8723 return RISCVISD::VECREDUCE_UMAX_VL;
8724 case ISD::VP_REDUCE_SMAX:
8725 case ISD::VECREDUCE_SMAX:
8726 return RISCVISD::VECREDUCE_SMAX_VL;
8727 case ISD::VP_REDUCE_UMIN:
8728 case ISD::VECREDUCE_UMIN:
8729 return RISCVISD::VECREDUCE_UMIN_VL;
8730 case ISD::VP_REDUCE_SMIN:
8731 case ISD::VECREDUCE_SMIN:
8732 return RISCVISD::VECREDUCE_SMIN_VL;
8733 case ISD::VP_REDUCE_AND:
8734 case ISD::VECREDUCE_AND:
8735 return RISCVISD::VECREDUCE_AND_VL;
8736 case ISD::VP_REDUCE_OR:
8737 case ISD::VECREDUCE_OR:
8738 return RISCVISD::VECREDUCE_OR_VL;
8739 case ISD::VP_REDUCE_XOR:
8740 case ISD::VECREDUCE_XOR:
8741 return RISCVISD::VECREDUCE_XOR_VL;
8742 case ISD::VP_REDUCE_FADD:
8743 return RISCVISD::VECREDUCE_FADD_VL;
8744 case ISD::VP_REDUCE_SEQ_FADD:
8745 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
8746 case ISD::VP_REDUCE_FMAX:
8747 return RISCVISD::VECREDUCE_FMAX_VL;
8748 case ISD::VP_REDUCE_FMIN:
8749 return RISCVISD::VECREDUCE_FMIN_VL;
8754 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
8755 SelectionDAG &DAG,
8756 bool IsVP) const {
8757 SDLoc DL(Op);
8758 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
8759 MVT VecVT = Vec.getSimpleValueType();
8760 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
8761 Op.getOpcode() == ISD::VECREDUCE_OR ||
8762 Op.getOpcode() == ISD::VECREDUCE_XOR ||
8763 Op.getOpcode() == ISD::VP_REDUCE_AND ||
8764 Op.getOpcode() == ISD::VP_REDUCE_OR ||
8765 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
8766 "Unexpected reduction lowering");
8768 MVT XLenVT = Subtarget.getXLenVT();
8770 MVT ContainerVT = VecVT;
8771 if (VecVT.isFixedLengthVector()) {
8772 ContainerVT = getContainerForFixedLengthVector(VecVT);
8773 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8776 SDValue Mask, VL;
8777 if (IsVP) {
8778 Mask = Op.getOperand(2);
8779 VL = Op.getOperand(3);
8780 } else {
8781 std::tie(Mask, VL) =
8782 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8785 unsigned BaseOpc;
8786 ISD::CondCode CC;
8787 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8789 switch (Op.getOpcode()) {
8790 default:
8791 llvm_unreachable("Unhandled reduction");
8792 case ISD::VECREDUCE_AND:
8793 case ISD::VP_REDUCE_AND: {
8794 // vcpop ~x == 0
8795 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
8796 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
8797 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8798 CC = ISD::SETEQ;
8799 BaseOpc = ISD::AND;
8800 break;
8802 case ISD::VECREDUCE_OR:
8803 case ISD::VP_REDUCE_OR:
8804 // vcpop x != 0
8805 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8806 CC = ISD::SETNE;
8807 BaseOpc = ISD::OR;
8808 break;
8809 case ISD::VECREDUCE_XOR:
8810 case ISD::VP_REDUCE_XOR: {
8811 // ((vcpop x) & 1) != 0
8812 SDValue One = DAG.getConstant(1, DL, XLenVT);
8813 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8814 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
8815 CC = ISD::SETNE;
8816 BaseOpc = ISD::XOR;
8817 break;
8821 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
8822 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
8824 if (!IsVP)
8825 return SetCC;
8827 // Now include the start value in the operation.
8828 // Note that we must return the start value when no elements are operated
8829 // upon. The vcpop instructions we've emitted in each case above will return
8830 // 0 for an inactive vector, and so we've already received the neutral value:
8831 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
8832 // can simply include the start value.
8833 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
8836 static bool isNonZeroAVL(SDValue AVL) {
8837 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
8838 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
8839 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
8840 (ImmAVL && ImmAVL->getZExtValue() >= 1);
8843 /// Helper to lower a reduction sequence of the form:
8844 /// scalar = reduce_op vec, scalar_start
8845 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
8846 SDValue StartValue, SDValue Vec, SDValue Mask,
8847 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
8848 const RISCVSubtarget &Subtarget) {
8849 const MVT VecVT = Vec.getSimpleValueType();
8850 const MVT M1VT = getLMUL1VT(VecVT);
8851 const MVT XLenVT = Subtarget.getXLenVT();
8852 const bool NonZeroAVL = isNonZeroAVL(VL);
8854 // The reduction needs an LMUL1 input; do the splat at either LMUL1
8855 // or the original VT if fractional.
8856 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
8857 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
8858 // prove it is non-zero. For the AVL=0 case, we need the scalar to
8859 // be the result of the reduction operation.
8860 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
8861 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
8862 DAG, Subtarget);
8863 if (M1VT != InnerVT)
8864 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
8865 DAG.getUNDEF(M1VT),
8866 InitialValue, DAG.getConstant(0, DL, XLenVT));
8867 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
8868 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
8869 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
8870 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
8871 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
8872 DAG.getConstant(0, DL, XLenVT));
8875 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
8876 SelectionDAG &DAG) const {
8877 SDLoc DL(Op);
8878 SDValue Vec = Op.getOperand(0);
8879 EVT VecEVT = Vec.getValueType();
8881 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
8883 // Due to ordering in legalize types we may have a vector type that needs to
8884 // be split. Do that manually so we can get down to a legal type.
8885 while (getTypeAction(*DAG.getContext(), VecEVT) ==
8886 TargetLowering::TypeSplitVector) {
8887 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
8888 VecEVT = Lo.getValueType();
8889 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
8892 // TODO: The type may need to be widened rather than split. Or widened before
8893 // it can be split.
8894 if (!isTypeLegal(VecEVT))
8895 return SDValue();
8897 MVT VecVT = VecEVT.getSimpleVT();
8898 MVT VecEltVT = VecVT.getVectorElementType();
8899 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
8901 MVT ContainerVT = VecVT;
8902 if (VecVT.isFixedLengthVector()) {
8903 ContainerVT = getContainerForFixedLengthVector(VecVT);
8904 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8907 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8909 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
8910 switch (BaseOpc) {
8911 case ISD::AND:
8912 case ISD::OR:
8913 case ISD::UMAX:
8914 case ISD::UMIN:
8915 case ISD::SMAX:
8916 case ISD::SMIN:
8917 MVT XLenVT = Subtarget.getXLenVT();
8918 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
8919 DAG.getConstant(0, DL, XLenVT));
8921 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
8922 Mask, VL, DL, DAG, Subtarget);
8925 // Given a reduction op, this function returns the matching reduction opcode,
8926 // the vector SDValue and the scalar SDValue required to lower this to a
8927 // RISCVISD node.
8928 static std::tuple<unsigned, SDValue, SDValue>
8929 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
8930 const RISCVSubtarget &Subtarget) {
8931 SDLoc DL(Op);
8932 auto Flags = Op->getFlags();
8933 unsigned Opcode = Op.getOpcode();
8934 switch (Opcode) {
8935 default:
8936 llvm_unreachable("Unhandled reduction");
8937 case ISD::VECREDUCE_FADD: {
8938 // Use positive zero if we can. It is cheaper to materialize.
8939 SDValue Zero =
8940 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
8941 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
8943 case ISD::VECREDUCE_SEQ_FADD:
8944 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
8945 Op.getOperand(0));
8946 case ISD::VECREDUCE_FMIN:
8947 case ISD::VECREDUCE_FMAX: {
8948 MVT XLenVT = Subtarget.getXLenVT();
8949 SDValue Front =
8950 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
8951 DAG.getConstant(0, DL, XLenVT));
8952 unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN)
8953 ? RISCVISD::VECREDUCE_FMIN_VL
8954 : RISCVISD::VECREDUCE_FMAX_VL;
8955 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
8960 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
8961 SelectionDAG &DAG) const {
8962 SDLoc DL(Op);
8963 MVT VecEltVT = Op.getSimpleValueType();
8965 unsigned RVVOpcode;
8966 SDValue VectorVal, ScalarVal;
8967 std::tie(RVVOpcode, VectorVal, ScalarVal) =
8968 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
8969 MVT VecVT = VectorVal.getSimpleValueType();
8971 MVT ContainerVT = VecVT;
8972 if (VecVT.isFixedLengthVector()) {
8973 ContainerVT = getContainerForFixedLengthVector(VecVT);
8974 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
8977 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8978 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal,
8979 VectorVal, Mask, VL, DL, DAG, Subtarget);
8982 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
8983 SelectionDAG &DAG) const {
8984 SDLoc DL(Op);
8985 SDValue Vec = Op.getOperand(1);
8986 EVT VecEVT = Vec.getValueType();
8988 // TODO: The type may need to be widened rather than split. Or widened before
8989 // it can be split.
8990 if (!isTypeLegal(VecEVT))
8991 return SDValue();
8993 MVT VecVT = VecEVT.getSimpleVT();
8994 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
8996 if (VecVT.isFixedLengthVector()) {
8997 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
8998 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9001 SDValue VL = Op.getOperand(3);
9002 SDValue Mask = Op.getOperand(2);
9003 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9004 Vec, Mask, VL, DL, DAG, Subtarget);
9007 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9008 SelectionDAG &DAG) const {
9009 SDValue Vec = Op.getOperand(0);
9010 SDValue SubVec = Op.getOperand(1);
9011 MVT VecVT = Vec.getSimpleValueType();
9012 MVT SubVecVT = SubVec.getSimpleValueType();
9014 SDLoc DL(Op);
9015 MVT XLenVT = Subtarget.getXLenVT();
9016 unsigned OrigIdx = Op.getConstantOperandVal(2);
9017 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9019 // We don't have the ability to slide mask vectors up indexed by their i1
9020 // elements; the smallest we can do is i8. Often we are able to bitcast to
9021 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9022 // into a scalable one, we might not necessarily have enough scalable
9023 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9024 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9025 (OrigIdx != 0 || !Vec.isUndef())) {
9026 if (VecVT.getVectorMinNumElements() >= 8 &&
9027 SubVecVT.getVectorMinNumElements() >= 8) {
9028 assert(OrigIdx % 8 == 0 && "Invalid index");
9029 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9030 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9031 "Unexpected mask vector lowering");
9032 OrigIdx /= 8;
9033 SubVecVT =
9034 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9035 SubVecVT.isScalableVector());
9036 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9037 VecVT.isScalableVector());
9038 Vec = DAG.getBitcast(VecVT, Vec);
9039 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9040 } else {
9041 // We can't slide this mask vector up indexed by its i1 elements.
9042 // This poses a problem when we wish to insert a scalable vector which
9043 // can't be re-expressed as a larger type. Just choose the slow path and
9044 // extend to a larger type, then truncate back down.
9045 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9046 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9047 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9048 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9049 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9050 Op.getOperand(2));
9051 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9052 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9056 // If the subvector vector is a fixed-length type, we cannot use subregister
9057 // manipulation to simplify the codegen; we don't know which register of a
9058 // LMUL group contains the specific subvector as we only know the minimum
9059 // register size. Therefore we must slide the vector group up the full
9060 // amount.
9061 if (SubVecVT.isFixedLengthVector()) {
9062 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9063 return Op;
9064 MVT ContainerVT = VecVT;
9065 if (VecVT.isFixedLengthVector()) {
9066 ContainerVT = getContainerForFixedLengthVector(VecVT);
9067 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9070 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9071 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9072 DAG.getUNDEF(ContainerVT), SubVec,
9073 DAG.getConstant(0, DL, XLenVT));
9074 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9075 return DAG.getBitcast(Op.getValueType(), SubVec);
9078 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9079 DAG.getUNDEF(ContainerVT), SubVec,
9080 DAG.getConstant(0, DL, XLenVT));
9081 SDValue Mask =
9082 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9083 // Set the vector length to only the number of elements we care about. Note
9084 // that for slideup this includes the offset.
9085 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9086 SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
9088 // Use tail agnostic policy if we're inserting over Vec's tail.
9089 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9090 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9091 Policy = RISCVII::TAIL_AGNOSTIC;
9093 // If we're inserting into the lowest elements, use a tail undisturbed
9094 // vmv.v.v.
9095 if (OrigIdx == 0) {
9096 SubVec =
9097 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9098 } else {
9099 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9100 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9101 SlideupAmt, Mask, VL, Policy);
9104 if (VecVT.isFixedLengthVector())
9105 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9106 return DAG.getBitcast(Op.getValueType(), SubVec);
9109 unsigned SubRegIdx, RemIdx;
9110 std::tie(SubRegIdx, RemIdx) =
9111 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9112 VecVT, SubVecVT, OrigIdx, TRI);
9114 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9115 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9116 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9117 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9119 // 1. If the Idx has been completely eliminated and this subvector's size is
9120 // a vector register or a multiple thereof, or the surrounding elements are
9121 // undef, then this is a subvector insert which naturally aligns to a vector
9122 // register. These can easily be handled using subregister manipulation.
9123 // 2. If the subvector is smaller than a vector register, then the insertion
9124 // must preserve the undisturbed elements of the register. We do this by
9125 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9126 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9127 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9128 // LMUL=1 type back into the larger vector (resolving to another subregister
9129 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9130 // to avoid allocating a large register group to hold our subvector.
9131 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9132 return Op;
9134 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9135 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9136 // (in our case undisturbed). This means we can set up a subvector insertion
9137 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9138 // size of the subvector.
9139 MVT InterSubVT = VecVT;
9140 SDValue AlignedExtract = Vec;
9141 unsigned AlignedIdx = OrigIdx - RemIdx;
9142 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9143 InterSubVT = getLMUL1VT(VecVT);
9144 // Extract a subvector equal to the nearest full vector register type. This
9145 // should resolve to a EXTRACT_SUBREG instruction.
9146 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9147 DAG.getConstant(AlignedIdx, DL, XLenVT));
9150 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9151 DAG.getUNDEF(InterSubVT), SubVec,
9152 DAG.getConstant(0, DL, XLenVT));
9154 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9156 VL = computeVLMax(SubVecVT, DL, DAG);
9158 // If we're inserting into the lowest elements, use a tail undisturbed
9159 // vmv.v.v.
9160 if (RemIdx == 0) {
9161 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9162 SubVec, VL);
9163 } else {
9164 SDValue SlideupAmt =
9165 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9167 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9168 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9170 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9171 SlideupAmt, Mask, VL);
9174 // If required, insert this subvector back into the correct vector register.
9175 // This should resolve to an INSERT_SUBREG instruction.
9176 if (VecVT.bitsGT(InterSubVT))
9177 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9178 DAG.getConstant(AlignedIdx, DL, XLenVT));
9180 // We might have bitcast from a mask type: cast back to the original type if
9181 // required.
9182 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9185 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9186 SelectionDAG &DAG) const {
9187 SDValue Vec = Op.getOperand(0);
9188 MVT SubVecVT = Op.getSimpleValueType();
9189 MVT VecVT = Vec.getSimpleValueType();
9191 SDLoc DL(Op);
9192 MVT XLenVT = Subtarget.getXLenVT();
9193 unsigned OrigIdx = Op.getConstantOperandVal(1);
9194 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9196 // We don't have the ability to slide mask vectors down indexed by their i1
9197 // elements; the smallest we can do is i8. Often we are able to bitcast to
9198 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9199 // from a scalable one, we might not necessarily have enough scalable
9200 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9201 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9202 if (VecVT.getVectorMinNumElements() >= 8 &&
9203 SubVecVT.getVectorMinNumElements() >= 8) {
9204 assert(OrigIdx % 8 == 0 && "Invalid index");
9205 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9206 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9207 "Unexpected mask vector lowering");
9208 OrigIdx /= 8;
9209 SubVecVT =
9210 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9211 SubVecVT.isScalableVector());
9212 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9213 VecVT.isScalableVector());
9214 Vec = DAG.getBitcast(VecVT, Vec);
9215 } else {
9216 // We can't slide this mask vector down, indexed by its i1 elements.
9217 // This poses a problem when we wish to extract a scalable vector which
9218 // can't be re-expressed as a larger type. Just choose the slow path and
9219 // extend to a larger type, then truncate back down.
9220 // TODO: We could probably improve this when extracting certain fixed
9221 // from fixed, where we can extract as i8 and shift the correct element
9222 // right to reach the desired subvector?
9223 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9224 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9225 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9226 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9227 Op.getOperand(1));
9228 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9229 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9233 // With an index of 0 this is a cast-like subvector, which can be performed
9234 // with subregister operations.
9235 if (OrigIdx == 0)
9236 return Op;
9238 // If the subvector vector is a fixed-length type, we cannot use subregister
9239 // manipulation to simplify the codegen; we don't know which register of a
9240 // LMUL group contains the specific subvector as we only know the minimum
9241 // register size. Therefore we must slide the vector group down the full
9242 // amount.
9243 if (SubVecVT.isFixedLengthVector()) {
9244 MVT ContainerVT = VecVT;
9245 if (VecVT.isFixedLengthVector()) {
9246 ContainerVT = getContainerForFixedLengthVector(VecVT);
9247 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9250 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9251 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9252 if (auto ShrunkVT =
9253 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9254 ContainerVT = *ShrunkVT;
9255 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9256 DAG.getVectorIdxConstant(0, DL));
9259 SDValue Mask =
9260 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9261 // Set the vector length to only the number of elements we care about. This
9262 // avoids sliding down elements we're going to discard straight away.
9263 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
9264 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9265 SDValue Slidedown =
9266 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9267 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9268 // Now we can use a cast-like subvector extract to get the result.
9269 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9270 DAG.getConstant(0, DL, XLenVT));
9271 return DAG.getBitcast(Op.getValueType(), Slidedown);
9274 unsigned SubRegIdx, RemIdx;
9275 std::tie(SubRegIdx, RemIdx) =
9276 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9277 VecVT, SubVecVT, OrigIdx, TRI);
9279 // If the Idx has been completely eliminated then this is a subvector extract
9280 // which naturally aligns to a vector register. These can easily be handled
9281 // using subregister manipulation.
9282 if (RemIdx == 0)
9283 return Op;
9285 // Else SubVecVT is a fractional LMUL and may need to be slid down.
9286 assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second);
9288 // If the vector type is an LMUL-group type, extract a subvector equal to the
9289 // nearest full vector register type.
9290 MVT InterSubVT = VecVT;
9291 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9292 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
9293 // we should have successfully decomposed the extract into a subregister.
9294 assert(SubRegIdx != RISCV::NoSubRegister);
9295 InterSubVT = getLMUL1VT(VecVT);
9296 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
9299 // Slide this vector register down by the desired number of elements in order
9300 // to place the desired subvector starting at element 0.
9301 SDValue SlidedownAmt =
9302 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9304 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
9305 SDValue Slidedown =
9306 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
9307 Vec, SlidedownAmt, Mask, VL);
9309 // Now the vector is in the right position, extract our final subvector. This
9310 // should resolve to a COPY.
9311 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9312 DAG.getConstant(0, DL, XLenVT));
9314 // We might have bitcast from a mask type: cast back to the original type if
9315 // required.
9316 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
9319 // Widen a vector's operands to i8, then truncate its results back to the
9320 // original type, typically i1. All operand and result types must be the same.
9321 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
9322 SelectionDAG &DAG) {
9323 MVT VT = N.getSimpleValueType();
9324 MVT WideVT = VT.changeVectorElementType(MVT::i8);
9325 SmallVector<SDValue, 4> WideOps;
9326 for (SDValue Op : N->ops()) {
9327 assert(Op.getSimpleValueType() == VT &&
9328 "Operands and result must be same type");
9329 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
9332 unsigned NumVals = N->getNumValues();
9334 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
9335 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
9336 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
9337 SmallVector<SDValue, 4> TruncVals;
9338 for (unsigned I = 0; I < NumVals; I++) {
9339 TruncVals.push_back(
9340 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
9341 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
9344 if (TruncVals.size() > 1)
9345 return DAG.getMergeValues(TruncVals, DL);
9346 return TruncVals.front();
9349 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
9350 SelectionDAG &DAG) const {
9351 SDLoc DL(Op);
9352 MVT VecVT = Op.getSimpleValueType();
9353 MVT XLenVT = Subtarget.getXLenVT();
9355 assert(VecVT.isScalableVector() &&
9356 "vector_interleave on non-scalable vector!");
9358 // 1 bit element vectors need to be widened to e8
9359 if (VecVT.getVectorElementType() == MVT::i1)
9360 return widenVectorOpsToi8(Op, DL, DAG);
9362 // If the VT is LMUL=8, we need to split and reassemble.
9363 if (VecVT.getSizeInBits().getKnownMinValue() ==
9364 (8 * RISCV::RVVBitsPerBlock)) {
9365 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9366 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
9367 EVT SplitVT = Op0Lo.getValueType();
9369 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
9370 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
9371 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
9372 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
9374 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9375 ResLo.getValue(0), ResHi.getValue(0));
9376 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
9377 ResHi.getValue(1));
9378 return DAG.getMergeValues({Even, Odd}, DL);
9381 // Concatenate the two vectors as one vector to deinterleave
9382 MVT ConcatVT =
9383 MVT::getVectorVT(VecVT.getVectorElementType(),
9384 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
9385 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
9386 Op.getOperand(0), Op.getOperand(1));
9388 // We want to operate on all lanes, so get the mask and VL and mask for it
9389 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
9390 SDValue Passthru = DAG.getUNDEF(ConcatVT);
9392 // We can deinterleave through vnsrl.wi if the element type is smaller than
9393 // ELEN
9394 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
9395 SDValue Even =
9396 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
9397 SDValue Odd =
9398 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
9399 return DAG.getMergeValues({Even, Odd}, DL);
9402 // For the indices, use the same SEW to avoid an extra vsetvli
9403 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
9404 // Create a vector of even indices {0, 2, 4, ...}
9405 SDValue EvenIdx =
9406 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
9407 // Create a vector of odd indices {1, 3, 5, ... }
9408 SDValue OddIdx =
9409 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
9411 // Gather the even and odd elements into two separate vectors
9412 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
9413 Concat, EvenIdx, Passthru, Mask, VL);
9414 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
9415 Concat, OddIdx, Passthru, Mask, VL);
9417 // Extract the result half of the gather for even and odd
9418 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
9419 DAG.getConstant(0, DL, XLenVT));
9420 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
9421 DAG.getConstant(0, DL, XLenVT));
9423 return DAG.getMergeValues({Even, Odd}, DL);
9426 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
9427 SelectionDAG &DAG) const {
9428 SDLoc DL(Op);
9429 MVT VecVT = Op.getSimpleValueType();
9431 assert(VecVT.isScalableVector() &&
9432 "vector_interleave on non-scalable vector!");
9434 // i1 vectors need to be widened to i8
9435 if (VecVT.getVectorElementType() == MVT::i1)
9436 return widenVectorOpsToi8(Op, DL, DAG);
9438 MVT XLenVT = Subtarget.getXLenVT();
9439 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
9441 // If the VT is LMUL=8, we need to split and reassemble.
9442 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
9443 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9444 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
9445 EVT SplitVT = Op0Lo.getValueType();
9447 SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
9448 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
9449 SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
9450 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
9452 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9453 ResLo.getValue(0), ResLo.getValue(1));
9454 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9455 ResHi.getValue(0), ResHi.getValue(1));
9456 return DAG.getMergeValues({Lo, Hi}, DL);
9459 SDValue Interleaved;
9461 // If the element type is smaller than ELEN, then we can interleave with
9462 // vwaddu.vv and vwmaccu.vx
9463 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
9464 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
9465 DAG, Subtarget);
9466 } else {
9467 // Otherwise, fallback to using vrgathere16.vv
9468 MVT ConcatVT =
9469 MVT::getVectorVT(VecVT.getVectorElementType(),
9470 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
9471 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
9472 Op.getOperand(0), Op.getOperand(1));
9474 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
9476 // 0 1 2 3 4 5 6 7 ...
9477 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
9479 // 1 1 1 1 1 1 1 1 ...
9480 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
9482 // 1 0 1 0 1 0 1 0 ...
9483 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
9484 OddMask = DAG.getSetCC(
9485 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
9486 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
9487 ISD::CondCode::SETNE);
9489 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
9491 // Build up the index vector for interleaving the concatenated vector
9492 // 0 0 1 1 2 2 3 3 ...
9493 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
9494 // 0 n 1 n+1 2 n+2 3 n+3 ...
9495 Idx =
9496 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
9498 // Then perform the interleave
9499 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
9500 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
9501 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
9502 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
9505 // Extract the two halves from the interleaved result
9506 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
9507 DAG.getVectorIdxConstant(0, DL));
9508 SDValue Hi = DAG.getNode(
9509 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
9510 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));
9512 return DAG.getMergeValues({Lo, Hi}, DL);
9515 // Lower step_vector to the vid instruction. Any non-identity step value must
9516 // be accounted for my manual expansion.
9517 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
9518 SelectionDAG &DAG) const {
9519 SDLoc DL(Op);
9520 MVT VT = Op.getSimpleValueType();
9521 assert(VT.isScalableVector() && "Expected scalable vector");
9522 MVT XLenVT = Subtarget.getXLenVT();
9523 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
9524 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9525 uint64_t StepValImm = Op.getConstantOperandVal(0);
9526 if (StepValImm != 1) {
9527 if (isPowerOf2_64(StepValImm)) {
9528 SDValue StepVal =
9529 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9530 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
9531 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
9532 } else {
9533 SDValue StepVal = lowerScalarSplat(
9534 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
9535 VL, VT, DL, DAG, Subtarget);
9536 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
9539 return StepVec;
9542 // Implement vector_reverse using vrgather.vv with indices determined by
9543 // subtracting the id of each element from (VLMAX-1). This will convert
9544 // the indices like so:
9545 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
9546 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
9547 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
9548 SelectionDAG &DAG) const {
9549 SDLoc DL(Op);
9550 MVT VecVT = Op.getSimpleValueType();
9551 if (VecVT.getVectorElementType() == MVT::i1) {
9552 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9553 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
9554 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
9555 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
9557 unsigned EltSize = VecVT.getScalarSizeInBits();
9558 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
9559 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
9560 unsigned MaxVLMAX =
9561 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
9563 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
9564 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
9566 // If this is SEW=8 and VLMAX is potentially more than 256, we need
9567 // to use vrgatherei16.vv.
9568 // TODO: It's also possible to use vrgatherei16.vv for other types to
9569 // decrease register width for the index calculation.
9570 if (MaxVLMAX > 256 && EltSize == 8) {
9571 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
9572 // Reverse each half, then reassemble them in reverse order.
9573 // NOTE: It's also possible that after splitting that VLMAX no longer
9574 // requires vrgatherei16.vv.
9575 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
9576 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9577 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
9578 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
9579 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
9580 // Reassemble the low and high pieces reversed.
9581 // FIXME: This is a CONCAT_VECTORS.
9582 SDValue Res =
9583 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
9584 DAG.getIntPtrConstant(0, DL));
9585 return DAG.getNode(
9586 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
9587 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
9590 // Just promote the int type to i16 which will double the LMUL.
9591 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
9592 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
9595 MVT XLenVT = Subtarget.getXLenVT();
9596 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9598 // Calculate VLMAX-1 for the desired SEW.
9599 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
9600 computeVLMax(VecVT, DL, DAG),
9601 DAG.getConstant(1, DL, XLenVT));
9603 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
9604 bool IsRV32E64 =
9605 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
9606 SDValue SplatVL;
9607 if (!IsRV32E64)
9608 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
9609 else
9610 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
9611 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
9613 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
9614 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
9615 DAG.getUNDEF(IntVT), Mask, VL);
9617 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
9618 DAG.getUNDEF(VecVT), Mask, VL);
9621 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
9622 SelectionDAG &DAG) const {
9623 SDLoc DL(Op);
9624 SDValue V1 = Op.getOperand(0);
9625 SDValue V2 = Op.getOperand(1);
9626 MVT XLenVT = Subtarget.getXLenVT();
9627 MVT VecVT = Op.getSimpleValueType();
9629 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
9631 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
9632 SDValue DownOffset, UpOffset;
9633 if (ImmValue >= 0) {
9634 // The operand is a TargetConstant, we need to rebuild it as a regular
9635 // constant.
9636 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
9637 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
9638 } else {
9639 // The operand is a TargetConstant, we need to rebuild it as a regular
9640 // constant rather than negating the original operand.
9641 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
9642 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
9645 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
9647 SDValue SlideDown =
9648 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
9649 DownOffset, TrueMask, UpOffset);
9650 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
9651 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
9652 RISCVII::TAIL_AGNOSTIC);
9655 SDValue
9656 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
9657 SelectionDAG &DAG) const {
9658 SDLoc DL(Op);
9659 auto *Load = cast<LoadSDNode>(Op);
9661 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
9662 Load->getMemoryVT(),
9663 *Load->getMemOperand()) &&
9664 "Expecting a correctly-aligned load");
9666 MVT VT = Op.getSimpleValueType();
9667 MVT XLenVT = Subtarget.getXLenVT();
9668 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9670 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
9672 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
9673 SDValue IntID = DAG.getTargetConstant(
9674 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
9675 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
9676 if (!IsMaskOp)
9677 Ops.push_back(DAG.getUNDEF(ContainerVT));
9678 Ops.push_back(Load->getBasePtr());
9679 Ops.push_back(VL);
9680 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9681 SDValue NewLoad =
9682 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
9683 Load->getMemoryVT(), Load->getMemOperand());
9685 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
9686 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
9689 SDValue
9690 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
9691 SelectionDAG &DAG) const {
9692 SDLoc DL(Op);
9693 auto *Store = cast<StoreSDNode>(Op);
9695 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
9696 Store->getMemoryVT(),
9697 *Store->getMemOperand()) &&
9698 "Expecting a correctly-aligned store");
9700 SDValue StoreVal = Store->getValue();
9701 MVT VT = StoreVal.getSimpleValueType();
9702 MVT XLenVT = Subtarget.getXLenVT();
9704 // If the size less than a byte, we need to pad with zeros to make a byte.
9705 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
9706 VT = MVT::v8i1;
9707 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
9708 DAG.getConstant(0, DL, VT), StoreVal,
9709 DAG.getIntPtrConstant(0, DL));
9712 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9714 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
9716 SDValue NewValue =
9717 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
9719 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
9720 SDValue IntID = DAG.getTargetConstant(
9721 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
9722 return DAG.getMemIntrinsicNode(
9723 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
9724 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
9725 Store->getMemoryVT(), Store->getMemOperand());
9728 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
9729 SelectionDAG &DAG) const {
9730 SDLoc DL(Op);
9731 MVT VT = Op.getSimpleValueType();
9733 const auto *MemSD = cast<MemSDNode>(Op);
9734 EVT MemVT = MemSD->getMemoryVT();
9735 MachineMemOperand *MMO = MemSD->getMemOperand();
9736 SDValue Chain = MemSD->getChain();
9737 SDValue BasePtr = MemSD->getBasePtr();
9739 SDValue Mask, PassThru, VL;
9740 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
9741 Mask = VPLoad->getMask();
9742 PassThru = DAG.getUNDEF(VT);
9743 VL = VPLoad->getVectorLength();
9744 } else {
9745 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
9746 Mask = MLoad->getMask();
9747 PassThru = MLoad->getPassThru();
9750 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9752 MVT XLenVT = Subtarget.getXLenVT();
9754 MVT ContainerVT = VT;
9755 if (VT.isFixedLengthVector()) {
9756 ContainerVT = getContainerForFixedLengthVector(VT);
9757 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9758 if (!IsUnmasked) {
9759 MVT MaskVT = getMaskTypeFor(ContainerVT);
9760 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9764 if (!VL)
9765 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9767 unsigned IntID =
9768 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
9769 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9770 if (IsUnmasked)
9771 Ops.push_back(DAG.getUNDEF(ContainerVT));
9772 else
9773 Ops.push_back(PassThru);
9774 Ops.push_back(BasePtr);
9775 if (!IsUnmasked)
9776 Ops.push_back(Mask);
9777 Ops.push_back(VL);
9778 if (!IsUnmasked)
9779 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
9781 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9783 SDValue Result =
9784 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
9785 Chain = Result.getValue(1);
9787 if (VT.isFixedLengthVector())
9788 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9790 return DAG.getMergeValues({Result, Chain}, DL);
9793 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
9794 SelectionDAG &DAG) const {
9795 SDLoc DL(Op);
9797 const auto *MemSD = cast<MemSDNode>(Op);
9798 EVT MemVT = MemSD->getMemoryVT();
9799 MachineMemOperand *MMO = MemSD->getMemOperand();
9800 SDValue Chain = MemSD->getChain();
9801 SDValue BasePtr = MemSD->getBasePtr();
9802 SDValue Val, Mask, VL;
9804 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
9805 Val = VPStore->getValue();
9806 Mask = VPStore->getMask();
9807 VL = VPStore->getVectorLength();
9808 } else {
9809 const auto *MStore = cast<MaskedStoreSDNode>(Op);
9810 Val = MStore->getValue();
9811 Mask = MStore->getMask();
9814 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9816 MVT VT = Val.getSimpleValueType();
9817 MVT XLenVT = Subtarget.getXLenVT();
9819 MVT ContainerVT = VT;
9820 if (VT.isFixedLengthVector()) {
9821 ContainerVT = getContainerForFixedLengthVector(VT);
9823 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9824 if (!IsUnmasked) {
9825 MVT MaskVT = getMaskTypeFor(ContainerVT);
9826 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9830 if (!VL)
9831 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9833 unsigned IntID =
9834 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
9835 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9836 Ops.push_back(Val);
9837 Ops.push_back(BasePtr);
9838 if (!IsUnmasked)
9839 Ops.push_back(Mask);
9840 Ops.push_back(VL);
9842 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
9843 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
9846 SDValue
9847 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
9848 SelectionDAG &DAG) const {
9849 MVT InVT = Op.getOperand(0).getSimpleValueType();
9850 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
9852 MVT VT = Op.getSimpleValueType();
9854 SDValue Op1 =
9855 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
9856 SDValue Op2 =
9857 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
9859 SDLoc DL(Op);
9860 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
9861 DAG, Subtarget);
9862 MVT MaskVT = getMaskTypeFor(ContainerVT);
9864 SDValue Cmp =
9865 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9866 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
9868 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
9871 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
9872 SelectionDAG &DAG) const {
9873 unsigned Opc = Op.getOpcode();
9874 SDLoc DL(Op);
9875 SDValue Chain = Op.getOperand(0);
9876 SDValue Op1 = Op.getOperand(1);
9877 SDValue Op2 = Op.getOperand(2);
9878 SDValue CC = Op.getOperand(3);
9879 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
9880 MVT VT = Op.getSimpleValueType();
9881 MVT InVT = Op1.getSimpleValueType();
9883 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
9884 // condition code.
9885 if (Opc == ISD::STRICT_FSETCCS) {
9886 // Expand strict_fsetccs(x, oeq) to
9887 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
9888 SDVTList VTList = Op->getVTList();
9889 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
9890 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
9891 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
9892 Op2, OLECCVal);
9893 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
9894 Op1, OLECCVal);
9895 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
9896 Tmp1.getValue(1), Tmp2.getValue(1));
9897 // Tmp1 and Tmp2 might be the same node.
9898 if (Tmp1 != Tmp2)
9899 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
9900 return DAG.getMergeValues({Tmp1, OutChain}, DL);
9903 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
9904 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
9905 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
9906 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
9907 Op2, OEQCCVal);
9908 SDValue Res = DAG.getNOT(DL, OEQ, VT);
9909 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
9913 MVT ContainerInVT = InVT;
9914 if (InVT.isFixedLengthVector()) {
9915 ContainerInVT = getContainerForFixedLengthVector(InVT);
9916 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
9917 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
9919 MVT MaskVT = getMaskTypeFor(ContainerInVT);
9921 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
9923 SDValue Res;
9924 if (Opc == ISD::STRICT_FSETCC &&
9925 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
9926 CCVal == ISD::SETOLE)) {
9927 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
9928 // active when both input elements are ordered.
9929 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
9930 SDValue OrderMask1 = DAG.getNode(
9931 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
9932 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
9933 True, VL});
9934 SDValue OrderMask2 = DAG.getNode(
9935 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
9936 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
9937 True, VL});
9938 Mask =
9939 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
9940 // Use Mask as the merge operand to let the result be 0 if either of the
9941 // inputs is unordered.
9942 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
9943 DAG.getVTList(MaskVT, MVT::Other),
9944 {Chain, Op1, Op2, CC, Mask, Mask, VL});
9945 } else {
9946 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
9947 : RISCVISD::STRICT_FSETCCS_VL;
9948 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
9949 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
9952 if (VT.isFixedLengthVector()) {
9953 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
9954 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
9956 return Res;
9959 // Lower vector ABS to smax(X, sub(0, X)).
9960 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
9961 SDLoc DL(Op);
9962 MVT VT = Op.getSimpleValueType();
9963 SDValue X = Op.getOperand(0);
9965 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
9966 "Unexpected type for ISD::ABS");
9968 MVT ContainerVT = VT;
9969 if (VT.isFixedLengthVector()) {
9970 ContainerVT = getContainerForFixedLengthVector(VT);
9971 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
9974 SDValue Mask, VL;
9975 if (Op->getOpcode() == ISD::VP_ABS) {
9976 Mask = Op->getOperand(1);
9977 if (VT.isFixedLengthVector())
9978 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
9979 Subtarget);
9980 VL = Op->getOperand(2);
9981 } else
9982 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
9984 SDValue SplatZero = DAG.getNode(
9985 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
9986 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
9987 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
9988 DAG.getUNDEF(ContainerVT), Mask, VL);
9989 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
9990 DAG.getUNDEF(ContainerVT), Mask, VL);
9992 if (VT.isFixedLengthVector())
9993 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
9994 return Max;
9997 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
9998 SDValue Op, SelectionDAG &DAG) const {
9999 SDLoc DL(Op);
10000 MVT VT = Op.getSimpleValueType();
10001 SDValue Mag = Op.getOperand(0);
10002 SDValue Sign = Op.getOperand(1);
10003 assert(Mag.getValueType() == Sign.getValueType() &&
10004 "Can only handle COPYSIGN with matching types.");
10006 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10007 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10008 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10010 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10012 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10013 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10015 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10018 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10019 SDValue Op, SelectionDAG &DAG) const {
10020 MVT VT = Op.getSimpleValueType();
10021 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10023 MVT I1ContainerVT =
10024 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10026 SDValue CC =
10027 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10028 SDValue Op1 =
10029 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10030 SDValue Op2 =
10031 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10033 SDLoc DL(Op);
10034 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10036 SDValue Select =
10037 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
10039 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10042 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10043 SelectionDAG &DAG) const {
10044 unsigned NewOpc = getRISCVVLOp(Op);
10045 bool HasMergeOp = hasMergeOp(NewOpc);
10046 bool HasMask = hasMaskOp(NewOpc);
10048 MVT VT = Op.getSimpleValueType();
10049 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10051 // Create list of operands by converting existing ones to scalable types.
10052 SmallVector<SDValue, 6> Ops;
10053 for (const SDValue &V : Op->op_values()) {
10054 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10056 // Pass through non-vector operands.
10057 if (!V.getValueType().isVector()) {
10058 Ops.push_back(V);
10059 continue;
10062 // "cast" fixed length vector to a scalable vector.
10063 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10064 "Only fixed length vectors are supported!");
10065 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10068 SDLoc DL(Op);
10069 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10070 if (HasMergeOp)
10071 Ops.push_back(DAG.getUNDEF(ContainerVT));
10072 if (HasMask)
10073 Ops.push_back(Mask);
10074 Ops.push_back(VL);
10076 // StrictFP operations have two result values. Their lowered result should
10077 // have same result count.
10078 if (Op->isStrictFPOpcode()) {
10079 SDValue ScalableRes =
10080 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10081 Op->getFlags());
10082 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10083 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10086 SDValue ScalableRes =
10087 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10088 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10091 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10092 // * Operands of each node are assumed to be in the same order.
10093 // * The EVL operand is promoted from i32 to i64 on RV64.
10094 // * Fixed-length vectors are converted to their scalable-vector container
10095 // types.
10096 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10097 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10098 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10100 SDLoc DL(Op);
10101 MVT VT = Op.getSimpleValueType();
10102 SmallVector<SDValue, 4> Ops;
10104 MVT ContainerVT = VT;
10105 if (VT.isFixedLengthVector())
10106 ContainerVT = getContainerForFixedLengthVector(VT);
10108 for (const auto &OpIdx : enumerate(Op->ops())) {
10109 SDValue V = OpIdx.value();
10110 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10111 // Add dummy merge value before the mask.
10112 if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index())
10113 Ops.push_back(DAG.getUNDEF(ContainerVT));
10114 // Pass through operands which aren't fixed-length vectors.
10115 if (!V.getValueType().isFixedLengthVector()) {
10116 Ops.push_back(V);
10117 continue;
10119 // "cast" fixed length vector to a scalable vector.
10120 MVT OpVT = V.getSimpleValueType();
10121 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10122 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10123 "Only fixed length vectors are supported!");
10124 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10127 if (!VT.isFixedLengthVector())
10128 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10130 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10132 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10135 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10136 SelectionDAG &DAG) const {
10137 SDLoc DL(Op);
10138 MVT VT = Op.getSimpleValueType();
10140 SDValue Src = Op.getOperand(0);
10141 // NOTE: Mask is dropped.
10142 SDValue VL = Op.getOperand(2);
10144 MVT ContainerVT = VT;
10145 if (VT.isFixedLengthVector()) {
10146 ContainerVT = getContainerForFixedLengthVector(VT);
10147 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10148 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10151 MVT XLenVT = Subtarget.getXLenVT();
10152 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10153 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10154 DAG.getUNDEF(ContainerVT), Zero, VL);
10156 SDValue SplatValue = DAG.getConstant(
10157 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10158 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10159 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10161 SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
10162 Splat, ZeroSplat, VL);
10163 if (!VT.isFixedLengthVector())
10164 return Result;
10165 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10168 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10169 SelectionDAG &DAG) const {
10170 SDLoc DL(Op);
10171 MVT VT = Op.getSimpleValueType();
10173 SDValue Op1 = Op.getOperand(0);
10174 SDValue Op2 = Op.getOperand(1);
10175 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10176 // NOTE: Mask is dropped.
10177 SDValue VL = Op.getOperand(4);
10179 MVT ContainerVT = VT;
10180 if (VT.isFixedLengthVector()) {
10181 ContainerVT = getContainerForFixedLengthVector(VT);
10182 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10183 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10186 SDValue Result;
10187 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10189 switch (Condition) {
10190 default:
10191 break;
10192 // X != Y --> (X^Y)
10193 case ISD::SETNE:
10194 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10195 break;
10196 // X == Y --> ~(X^Y)
10197 case ISD::SETEQ: {
10198 SDValue Temp =
10199 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10200 Result =
10201 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
10202 break;
10204 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
10205 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
10206 case ISD::SETGT:
10207 case ISD::SETULT: {
10208 SDValue Temp =
10209 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10210 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
10211 break;
10213 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
10214 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
10215 case ISD::SETLT:
10216 case ISD::SETUGT: {
10217 SDValue Temp =
10218 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10219 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
10220 break;
10222 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
10223 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
10224 case ISD::SETGE:
10225 case ISD::SETULE: {
10226 SDValue Temp =
10227 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10228 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
10229 break;
10231 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
10232 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
10233 case ISD::SETLE:
10234 case ISD::SETUGE: {
10235 SDValue Temp =
10236 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10237 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
10238 break;
10242 if (!VT.isFixedLengthVector())
10243 return Result;
10244 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10247 // Lower Floating-Point/Integer Type-Convert VP SDNodes
10248 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
10249 SelectionDAG &DAG) const {
10250 SDLoc DL(Op);
10252 SDValue Src = Op.getOperand(0);
10253 SDValue Mask = Op.getOperand(1);
10254 SDValue VL = Op.getOperand(2);
10255 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10257 MVT DstVT = Op.getSimpleValueType();
10258 MVT SrcVT = Src.getSimpleValueType();
10259 if (DstVT.isFixedLengthVector()) {
10260 DstVT = getContainerForFixedLengthVector(DstVT);
10261 SrcVT = getContainerForFixedLengthVector(SrcVT);
10262 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10263 MVT MaskVT = getMaskTypeFor(DstVT);
10264 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10267 unsigned DstEltSize = DstVT.getScalarSizeInBits();
10268 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
10270 SDValue Result;
10271 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
10272 if (SrcVT.isInteger()) {
10273 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
10275 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
10276 ? RISCVISD::VSEXT_VL
10277 : RISCVISD::VZEXT_VL;
10279 // Do we need to do any pre-widening before converting?
10280 if (SrcEltSize == 1) {
10281 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
10282 MVT XLenVT = Subtarget.getXLenVT();
10283 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10284 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
10285 DAG.getUNDEF(IntVT), Zero, VL);
10286 SDValue One = DAG.getConstant(
10287 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
10288 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
10289 DAG.getUNDEF(IntVT), One, VL);
10290 Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
10291 ZeroSplat, VL);
10292 } else if (DstEltSize > (2 * SrcEltSize)) {
10293 // Widen before converting.
10294 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
10295 DstVT.getVectorElementCount());
10296 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
10299 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
10300 } else {
10301 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
10302 "Wrong input/output vector types");
10304 // Convert f16 to f32 then convert f32 to i64.
10305 if (DstEltSize > (2 * SrcEltSize)) {
10306 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
10307 MVT InterimFVT =
10308 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
10309 Src =
10310 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
10313 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
10315 } else { // Narrowing + Conversion
10316 if (SrcVT.isInteger()) {
10317 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
10318 // First do a narrowing convert to an FP type half the size, then round
10319 // the FP type to a small FP type if needed.
10321 MVT InterimFVT = DstVT;
10322 if (SrcEltSize > (2 * DstEltSize)) {
10323 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
10324 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
10325 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
10328 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
10330 if (InterimFVT != DstVT) {
10331 Src = Result;
10332 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
10334 } else {
10335 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
10336 "Wrong input/output vector types");
10337 // First do a narrowing conversion to an integer half the size, then
10338 // truncate if needed.
10340 if (DstEltSize == 1) {
10341 // First convert to the same size integer, then convert to mask using
10342 // setcc.
10343 assert(SrcEltSize >= 16 && "Unexpected FP type!");
10344 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
10345 DstVT.getVectorElementCount());
10346 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
10348 // Compare the integer result to 0. The integer should be 0 or 1/-1,
10349 // otherwise the conversion was undefined.
10350 MVT XLenVT = Subtarget.getXLenVT();
10351 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
10352 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
10353 DAG.getUNDEF(InterimIVT), SplatZero, VL);
10354 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
10355 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
10356 DAG.getUNDEF(DstVT), Mask, VL});
10357 } else {
10358 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
10359 DstVT.getVectorElementCount());
10361 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
10363 while (InterimIVT != DstVT) {
10364 SrcEltSize /= 2;
10365 Src = Result;
10366 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
10367 DstVT.getVectorElementCount());
10368 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
10369 Src, Mask, VL);
10375 MVT VT = Op.getSimpleValueType();
10376 if (!VT.isFixedLengthVector())
10377 return Result;
10378 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10381 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
10382 SelectionDAG &DAG) const {
10383 MVT VT = Op.getSimpleValueType();
10384 if (VT.getVectorElementType() != MVT::i1)
10385 return lowerVPOp(Op, DAG);
10387 // It is safe to drop mask parameter as masked-off elements are undef.
10388 SDValue Op1 = Op->getOperand(0);
10389 SDValue Op2 = Op->getOperand(1);
10390 SDValue VL = Op->getOperand(3);
10392 MVT ContainerVT = VT;
10393 const bool IsFixed = VT.isFixedLengthVector();
10394 if (IsFixed) {
10395 ContainerVT = getContainerForFixedLengthVector(VT);
10396 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10397 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10400 SDLoc DL(Op);
10401 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
10402 if (!IsFixed)
10403 return Val;
10404 return convertFromScalableVector(VT, Val, DAG, Subtarget);
10407 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
10408 SelectionDAG &DAG) const {
10409 SDLoc DL(Op);
10410 MVT XLenVT = Subtarget.getXLenVT();
10411 MVT VT = Op.getSimpleValueType();
10412 MVT ContainerVT = VT;
10413 if (VT.isFixedLengthVector())
10414 ContainerVT = getContainerForFixedLengthVector(VT);
10416 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10418 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
10419 // Check if the mask is known to be all ones
10420 SDValue Mask = VPNode->getMask();
10421 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10423 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
10424 : Intrinsic::riscv_vlse_mask,
10425 DL, XLenVT);
10426 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
10427 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
10428 VPNode->getStride()};
10429 if (!IsUnmasked) {
10430 if (VT.isFixedLengthVector()) {
10431 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
10432 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10434 Ops.push_back(Mask);
10436 Ops.push_back(VPNode->getVectorLength());
10437 if (!IsUnmasked) {
10438 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10439 Ops.push_back(Policy);
10442 SDValue Result =
10443 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
10444 VPNode->getMemoryVT(), VPNode->getMemOperand());
10445 SDValue Chain = Result.getValue(1);
10447 if (VT.isFixedLengthVector())
10448 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10450 return DAG.getMergeValues({Result, Chain}, DL);
10453 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
10454 SelectionDAG &DAG) const {
10455 SDLoc DL(Op);
10456 MVT XLenVT = Subtarget.getXLenVT();
10458 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
10459 SDValue StoreVal = VPNode->getValue();
10460 MVT VT = StoreVal.getSimpleValueType();
10461 MVT ContainerVT = VT;
10462 if (VT.isFixedLengthVector()) {
10463 ContainerVT = getContainerForFixedLengthVector(VT);
10464 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10467 // Check if the mask is known to be all ones
10468 SDValue Mask = VPNode->getMask();
10469 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10471 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
10472 : Intrinsic::riscv_vsse_mask,
10473 DL, XLenVT);
10474 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
10475 VPNode->getBasePtr(), VPNode->getStride()};
10476 if (!IsUnmasked) {
10477 if (VT.isFixedLengthVector()) {
10478 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
10479 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10481 Ops.push_back(Mask);
10483 Ops.push_back(VPNode->getVectorLength());
10485 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
10486 Ops, VPNode->getMemoryVT(),
10487 VPNode->getMemOperand());
10490 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
10491 // matched to a RVV indexed load. The RVV indexed load instructions only
10492 // support the "unsigned unscaled" addressing mode; indices are implicitly
10493 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
10494 // signed or scaled indexing is extended to the XLEN value type and scaled
10495 // accordingly.
10496 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
10497 SelectionDAG &DAG) const {
10498 SDLoc DL(Op);
10499 MVT VT = Op.getSimpleValueType();
10501 const auto *MemSD = cast<MemSDNode>(Op.getNode());
10502 EVT MemVT = MemSD->getMemoryVT();
10503 MachineMemOperand *MMO = MemSD->getMemOperand();
10504 SDValue Chain = MemSD->getChain();
10505 SDValue BasePtr = MemSD->getBasePtr();
10507 ISD::LoadExtType LoadExtType;
10508 SDValue Index, Mask, PassThru, VL;
10510 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
10511 Index = VPGN->getIndex();
10512 Mask = VPGN->getMask();
10513 PassThru = DAG.getUNDEF(VT);
10514 VL = VPGN->getVectorLength();
10515 // VP doesn't support extending loads.
10516 LoadExtType = ISD::NON_EXTLOAD;
10517 } else {
10518 // Else it must be a MGATHER.
10519 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
10520 Index = MGN->getIndex();
10521 Mask = MGN->getMask();
10522 PassThru = MGN->getPassThru();
10523 LoadExtType = MGN->getExtensionType();
10526 MVT IndexVT = Index.getSimpleValueType();
10527 MVT XLenVT = Subtarget.getXLenVT();
10529 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
10530 "Unexpected VTs!");
10531 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
10532 // Targets have to explicitly opt-in for extending vector loads.
10533 assert(LoadExtType == ISD::NON_EXTLOAD &&
10534 "Unexpected extending MGATHER/VP_GATHER");
10535 (void)LoadExtType;
10537 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
10538 // the selection of the masked intrinsics doesn't do this for us.
10539 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10541 MVT ContainerVT = VT;
10542 if (VT.isFixedLengthVector()) {
10543 ContainerVT = getContainerForFixedLengthVector(VT);
10544 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
10545 ContainerVT.getVectorElementCount());
10547 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
10549 if (!IsUnmasked) {
10550 MVT MaskVT = getMaskTypeFor(ContainerVT);
10551 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10552 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10556 if (!VL)
10557 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10559 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
10560 IndexVT = IndexVT.changeVectorElementType(XLenVT);
10561 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
10564 unsigned IntID =
10565 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
10566 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10567 if (IsUnmasked)
10568 Ops.push_back(DAG.getUNDEF(ContainerVT));
10569 else
10570 Ops.push_back(PassThru);
10571 Ops.push_back(BasePtr);
10572 Ops.push_back(Index);
10573 if (!IsUnmasked)
10574 Ops.push_back(Mask);
10575 Ops.push_back(VL);
10576 if (!IsUnmasked)
10577 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
10579 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10580 SDValue Result =
10581 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10582 Chain = Result.getValue(1);
10584 if (VT.isFixedLengthVector())
10585 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10587 return DAG.getMergeValues({Result, Chain}, DL);
10590 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
10591 // matched to a RVV indexed store. The RVV indexed store instructions only
10592 // support the "unsigned unscaled" addressing mode; indices are implicitly
10593 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
10594 // signed or scaled indexing is extended to the XLEN value type and scaled
10595 // accordingly.
10596 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
10597 SelectionDAG &DAG) const {
10598 SDLoc DL(Op);
10599 const auto *MemSD = cast<MemSDNode>(Op.getNode());
10600 EVT MemVT = MemSD->getMemoryVT();
10601 MachineMemOperand *MMO = MemSD->getMemOperand();
10602 SDValue Chain = MemSD->getChain();
10603 SDValue BasePtr = MemSD->getBasePtr();
10605 bool IsTruncatingStore = false;
10606 SDValue Index, Mask, Val, VL;
10608 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
10609 Index = VPSN->getIndex();
10610 Mask = VPSN->getMask();
10611 Val = VPSN->getValue();
10612 VL = VPSN->getVectorLength();
10613 // VP doesn't support truncating stores.
10614 IsTruncatingStore = false;
10615 } else {
10616 // Else it must be a MSCATTER.
10617 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
10618 Index = MSN->getIndex();
10619 Mask = MSN->getMask();
10620 Val = MSN->getValue();
10621 IsTruncatingStore = MSN->isTruncatingStore();
10624 MVT VT = Val.getSimpleValueType();
10625 MVT IndexVT = Index.getSimpleValueType();
10626 MVT XLenVT = Subtarget.getXLenVT();
10628 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
10629 "Unexpected VTs!");
10630 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
10631 // Targets have to explicitly opt-in for extending vector loads and
10632 // truncating vector stores.
10633 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
10634 (void)IsTruncatingStore;
10636 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
10637 // the selection of the masked intrinsics doesn't do this for us.
10638 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10640 MVT ContainerVT = VT;
10641 if (VT.isFixedLengthVector()) {
10642 ContainerVT = getContainerForFixedLengthVector(VT);
10643 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
10644 ContainerVT.getVectorElementCount());
10646 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
10647 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10649 if (!IsUnmasked) {
10650 MVT MaskVT = getMaskTypeFor(ContainerVT);
10651 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10655 if (!VL)
10656 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10658 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
10659 IndexVT = IndexVT.changeVectorElementType(XLenVT);
10660 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
10663 unsigned IntID =
10664 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
10665 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10666 Ops.push_back(Val);
10667 Ops.push_back(BasePtr);
10668 Ops.push_back(Index);
10669 if (!IsUnmasked)
10670 Ops.push_back(Mask);
10671 Ops.push_back(VL);
10673 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
10674 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10677 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
10678 SelectionDAG &DAG) const {
10679 const MVT XLenVT = Subtarget.getXLenVT();
10680 SDLoc DL(Op);
10681 SDValue Chain = Op->getOperand(0);
10682 SDValue SysRegNo = DAG.getTargetConstant(
10683 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
10684 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
10685 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
10687 // Encoding used for rounding mode in RISC-V differs from that used in
10688 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
10689 // table, which consists of a sequence of 4-bit fields, each representing
10690 // corresponding FLT_ROUNDS mode.
10691 static const int Table =
10692 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
10693 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
10694 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
10695 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
10696 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
10698 SDValue Shift =
10699 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
10700 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
10701 DAG.getConstant(Table, DL, XLenVT), Shift);
10702 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
10703 DAG.getConstant(7, DL, XLenVT));
10705 return DAG.getMergeValues({Masked, Chain}, DL);
10708 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
10709 SelectionDAG &DAG) const {
10710 const MVT XLenVT = Subtarget.getXLenVT();
10711 SDLoc DL(Op);
10712 SDValue Chain = Op->getOperand(0);
10713 SDValue RMValue = Op->getOperand(1);
10714 SDValue SysRegNo = DAG.getTargetConstant(
10715 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
10717 // Encoding used for rounding mode in RISC-V differs from that used in
10718 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
10719 // a table, which consists of a sequence of 4-bit fields, each representing
10720 // corresponding RISC-V mode.
10721 static const unsigned Table =
10722 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
10723 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
10724 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
10725 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
10726 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
10728 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
10730 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
10731 DAG.getConstant(2, DL, XLenVT));
10732 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
10733 DAG.getConstant(Table, DL, XLenVT), Shift);
10734 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
10735 DAG.getConstant(0x7, DL, XLenVT));
10736 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
10737 RMValue);
10740 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
10741 SelectionDAG &DAG) const {
10742 MachineFunction &MF = DAG.getMachineFunction();
10744 bool isRISCV64 = Subtarget.is64Bit();
10745 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10747 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
10748 return DAG.getFrameIndex(FI, PtrVT);
10751 // Returns the opcode of the target-specific SDNode that implements the 32-bit
10752 // form of the given Opcode.
10753 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
10754 switch (Opcode) {
10755 default:
10756 llvm_unreachable("Unexpected opcode");
10757 case ISD::SHL:
10758 return RISCVISD::SLLW;
10759 case ISD::SRA:
10760 return RISCVISD::SRAW;
10761 case ISD::SRL:
10762 return RISCVISD::SRLW;
10763 case ISD::SDIV:
10764 return RISCVISD::DIVW;
10765 case ISD::UDIV:
10766 return RISCVISD::DIVUW;
10767 case ISD::UREM:
10768 return RISCVISD::REMUW;
10769 case ISD::ROTL:
10770 return RISCVISD::ROLW;
10771 case ISD::ROTR:
10772 return RISCVISD::RORW;
10776 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
10777 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
10778 // otherwise be promoted to i64, making it difficult to select the
10779 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
10780 // type i8/i16/i32 is lost.
10781 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
10782 unsigned ExtOpc = ISD::ANY_EXTEND) {
10783 SDLoc DL(N);
10784 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
10785 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
10786 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
10787 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
10788 // ReplaceNodeResults requires we maintain the same type for the return value.
10789 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
10792 // Converts the given 32-bit operation to a i64 operation with signed extension
10793 // semantic to reduce the signed extension instructions.
10794 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
10795 SDLoc DL(N);
10796 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
10797 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10798 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
10799 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
10800 DAG.getValueType(MVT::i32));
10801 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
10804 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
10805 SmallVectorImpl<SDValue> &Results,
10806 SelectionDAG &DAG) const {
10807 SDLoc DL(N);
10808 switch (N->getOpcode()) {
10809 default:
10810 llvm_unreachable("Don't know how to custom type legalize this operation!");
10811 case ISD::STRICT_FP_TO_SINT:
10812 case ISD::STRICT_FP_TO_UINT:
10813 case ISD::FP_TO_SINT:
10814 case ISD::FP_TO_UINT: {
10815 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
10816 "Unexpected custom legalisation");
10817 bool IsStrict = N->isStrictFPOpcode();
10818 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
10819 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
10820 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
10821 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
10822 TargetLowering::TypeSoftenFloat) {
10823 if (!isTypeLegal(Op0.getValueType()))
10824 return;
10825 if (IsStrict) {
10826 SDValue Chain = N->getOperand(0);
10827 // In absense of Zfh, promote f16 to f32, then convert.
10828 if (Op0.getValueType() == MVT::f16 &&
10829 !Subtarget.hasStdExtZfhOrZhinx()) {
10830 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
10831 {Chain, Op0});
10832 Chain = Op0.getValue(1);
10834 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
10835 : RISCVISD::STRICT_FCVT_WU_RV64;
10836 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
10837 SDValue Res = DAG.getNode(
10838 Opc, DL, VTs, Chain, Op0,
10839 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
10840 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10841 Results.push_back(Res.getValue(1));
10842 return;
10844 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
10845 // convert.
10846 if ((Op0.getValueType() == MVT::f16 &&
10847 !Subtarget.hasStdExtZfhOrZhinx()) ||
10848 Op0.getValueType() == MVT::bf16)
10849 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
10851 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
10852 SDValue Res =
10853 DAG.getNode(Opc, DL, MVT::i64, Op0,
10854 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
10855 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10856 return;
10858 // If the FP type needs to be softened, emit a library call using the 'si'
10859 // version. If we left it to default legalization we'd end up with 'di'. If
10860 // the FP type doesn't need to be softened just let generic type
10861 // legalization promote the result type.
10862 RTLIB::Libcall LC;
10863 if (IsSigned)
10864 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
10865 else
10866 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
10867 MakeLibCallOptions CallOptions;
10868 EVT OpVT = Op0.getValueType();
10869 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
10870 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
10871 SDValue Result;
10872 std::tie(Result, Chain) =
10873 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
10874 Results.push_back(Result);
10875 if (IsStrict)
10876 Results.push_back(Chain);
10877 break;
10879 case ISD::LROUND: {
10880 SDValue Op0 = N->getOperand(0);
10881 EVT Op0VT = Op0.getValueType();
10882 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
10883 TargetLowering::TypeSoftenFloat) {
10884 if (!isTypeLegal(Op0VT))
10885 return;
10887 // In absense of Zfh, promote f16 to f32, then convert.
10888 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
10889 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
10891 SDValue Res =
10892 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
10893 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
10894 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10895 return;
10897 // If the FP type needs to be softened, emit a library call to lround. We'll
10898 // need to truncate the result. We assume any value that doesn't fit in i32
10899 // is allowed to return an unspecified value.
10900 RTLIB::Libcall LC =
10901 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
10902 MakeLibCallOptions CallOptions;
10903 EVT OpVT = Op0.getValueType();
10904 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
10905 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
10906 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
10907 Results.push_back(Result);
10908 break;
10910 case ISD::READCYCLECOUNTER: {
10911 assert(!Subtarget.is64Bit() &&
10912 "READCYCLECOUNTER only has custom type legalization on riscv32");
10914 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10915 SDValue RCW =
10916 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
10918 Results.push_back(
10919 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
10920 Results.push_back(RCW.getValue(2));
10921 break;
10923 case ISD::LOAD: {
10924 if (!ISD::isNON_EXTLoad(N))
10925 return;
10927 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
10928 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
10929 LoadSDNode *Ld = cast<LoadSDNode>(N);
10931 SDLoc dl(N);
10932 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
10933 Ld->getBasePtr(), Ld->getMemoryVT(),
10934 Ld->getMemOperand());
10935 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
10936 Results.push_back(Res.getValue(1));
10937 return;
10939 case ISD::MUL: {
10940 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
10941 unsigned XLen = Subtarget.getXLen();
10942 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
10943 if (Size > XLen) {
10944 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
10945 SDValue LHS = N->getOperand(0);
10946 SDValue RHS = N->getOperand(1);
10947 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
10949 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
10950 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
10951 // We need exactly one side to be unsigned.
10952 if (LHSIsU == RHSIsU)
10953 return;
10955 auto MakeMULPair = [&](SDValue S, SDValue U) {
10956 MVT XLenVT = Subtarget.getXLenVT();
10957 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
10958 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
10959 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
10960 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
10961 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
10964 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
10965 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
10967 // The other operand should be signed, but still prefer MULH when
10968 // possible.
10969 if (RHSIsU && LHSIsS && !RHSIsS)
10970 Results.push_back(MakeMULPair(LHS, RHS));
10971 else if (LHSIsU && RHSIsS && !LHSIsS)
10972 Results.push_back(MakeMULPair(RHS, LHS));
10974 return;
10976 [[fallthrough]];
10978 case ISD::ADD:
10979 case ISD::SUB:
10980 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
10981 "Unexpected custom legalisation");
10982 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
10983 break;
10984 case ISD::SHL:
10985 case ISD::SRA:
10986 case ISD::SRL:
10987 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
10988 "Unexpected custom legalisation");
10989 if (N->getOperand(1).getOpcode() != ISD::Constant) {
10990 // If we can use a BSET instruction, allow default promotion to apply.
10991 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
10992 isOneConstant(N->getOperand(0)))
10993 break;
10994 Results.push_back(customLegalizeToWOp(N, DAG));
10995 break;
10998 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
10999 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
11000 // shift amount.
11001 if (N->getOpcode() == ISD::SHL) {
11002 SDLoc DL(N);
11003 SDValue NewOp0 =
11004 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11005 SDValue NewOp1 =
11006 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
11007 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
11008 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11009 DAG.getValueType(MVT::i32));
11010 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
11013 break;
11014 case ISD::ROTL:
11015 case ISD::ROTR:
11016 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11017 "Unexpected custom legalisation");
11018 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
11019 Subtarget.hasVendorXTHeadBb()) &&
11020 "Unexpected custom legalization");
11021 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
11022 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
11023 return;
11024 Results.push_back(customLegalizeToWOp(N, DAG));
11025 break;
11026 case ISD::CTTZ:
11027 case ISD::CTTZ_ZERO_UNDEF:
11028 case ISD::CTLZ:
11029 case ISD::CTLZ_ZERO_UNDEF: {
11030 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11031 "Unexpected custom legalisation");
11033 SDValue NewOp0 =
11034 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11035 bool IsCTZ =
11036 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
11037 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
11038 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
11039 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11040 return;
11042 case ISD::SDIV:
11043 case ISD::UDIV:
11044 case ISD::UREM: {
11045 MVT VT = N->getSimpleValueType(0);
11046 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
11047 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
11048 "Unexpected custom legalisation");
11049 // Don't promote division/remainder by constant since we should expand those
11050 // to multiply by magic constant.
11051 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
11052 if (N->getOperand(1).getOpcode() == ISD::Constant &&
11053 !isIntDivCheap(N->getValueType(0), Attr))
11054 return;
11056 // If the input is i32, use ANY_EXTEND since the W instructions don't read
11057 // the upper 32 bits. For other types we need to sign or zero extend
11058 // based on the opcode.
11059 unsigned ExtOpc = ISD::ANY_EXTEND;
11060 if (VT != MVT::i32)
11061 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
11062 : ISD::ZERO_EXTEND;
11064 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
11065 break;
11067 case ISD::SADDO: {
11068 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11069 "Unexpected custom legalisation");
11071 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
11072 // use the default legalization.
11073 if (!isa<ConstantSDNode>(N->getOperand(1)))
11074 return;
11076 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11077 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
11078 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
11079 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
11080 DAG.getValueType(MVT::i32));
11082 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
11084 // For an addition, the result should be less than one of the operands (LHS)
11085 // if and only if the other operand (RHS) is negative, otherwise there will
11086 // be overflow.
11087 // For a subtraction, the result should be less than one of the operands
11088 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11089 // otherwise there will be overflow.
11090 EVT OType = N->getValueType(1);
11091 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
11092 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
11094 SDValue Overflow =
11095 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
11096 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11097 Results.push_back(Overflow);
11098 return;
11100 case ISD::UADDO:
11101 case ISD::USUBO: {
11102 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11103 "Unexpected custom legalisation");
11104 bool IsAdd = N->getOpcode() == ISD::UADDO;
11105 // Create an ADDW or SUBW.
11106 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11107 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11108 SDValue Res =
11109 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
11110 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
11111 DAG.getValueType(MVT::i32));
11113 SDValue Overflow;
11114 if (IsAdd && isOneConstant(RHS)) {
11115 // Special case uaddo X, 1 overflowed if the addition result is 0.
11116 // The general case (X + C) < C is not necessarily beneficial. Although we
11117 // reduce the live range of X, we may introduce the materialization of
11118 // constant C, especially when the setcc result is used by branch. We have
11119 // no compare with constant and branch instructions.
11120 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
11121 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
11122 } else if (IsAdd && isAllOnesConstant(RHS)) {
11123 // Special case uaddo X, -1 overflowed if X != 0.
11124 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
11125 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
11126 } else {
11127 // Sign extend the LHS and perform an unsigned compare with the ADDW
11128 // result. Since the inputs are sign extended from i32, this is equivalent
11129 // to comparing the lower 32 bits.
11130 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11131 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
11132 IsAdd ? ISD::SETULT : ISD::SETUGT);
11135 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11136 Results.push_back(Overflow);
11137 return;
11139 case ISD::UADDSAT:
11140 case ISD::USUBSAT: {
11141 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11142 "Unexpected custom legalisation");
11143 if (Subtarget.hasStdExtZbb()) {
11144 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
11145 // sign extend allows overflow of the lower 32 bits to be detected on
11146 // the promoted size.
11147 SDValue LHS =
11148 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11149 SDValue RHS =
11150 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
11151 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
11152 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11153 return;
11156 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
11157 // promotion for UADDO/USUBO.
11158 Results.push_back(expandAddSubSat(N, DAG));
11159 return;
11161 case ISD::ABS: {
11162 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11163 "Unexpected custom legalisation");
11165 if (Subtarget.hasStdExtZbb()) {
11166 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
11167 // This allows us to remember that the result is sign extended. Expanding
11168 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
11169 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
11170 N->getOperand(0));
11171 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
11172 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
11173 return;
11176 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
11177 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11179 // Freeze the source so we can increase it's use count.
11180 Src = DAG.getFreeze(Src);
11182 // Copy sign bit to all bits using the sraiw pattern.
11183 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
11184 DAG.getValueType(MVT::i32));
11185 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
11186 DAG.getConstant(31, DL, MVT::i64));
11188 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
11189 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
11191 // NOTE: The result is only required to be anyextended, but sext is
11192 // consistent with type legalization of sub.
11193 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
11194 DAG.getValueType(MVT::i32));
11195 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
11196 return;
11198 case ISD::BITCAST: {
11199 EVT VT = N->getValueType(0);
11200 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
11201 SDValue Op0 = N->getOperand(0);
11202 EVT Op0VT = Op0.getValueType();
11203 MVT XLenVT = Subtarget.getXLenVT();
11204 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
11205 Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
11206 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
11207 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
11208 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
11209 Subtarget.hasStdExtZfbfmin()) {
11210 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
11211 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
11212 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
11213 Subtarget.hasStdExtFOrZfinx()) {
11214 SDValue FPConv =
11215 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
11216 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
11217 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 &&
11218 Subtarget.hasStdExtZfa()) {
11219 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
11220 DAG.getVTList(MVT::i32, MVT::i32), Op0);
11221 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
11222 NewReg.getValue(0), NewReg.getValue(1));
11223 Results.push_back(RetReg);
11224 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
11225 isTypeLegal(Op0VT)) {
11226 // Custom-legalize bitcasts from fixed-length vector types to illegal
11227 // scalar types in order to improve codegen. Bitcast the vector to a
11228 // one-element vector type whose element type is the same as the result
11229 // type, and extract the first element.
11230 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
11231 if (isTypeLegal(BVT)) {
11232 SDValue BVec = DAG.getBitcast(BVT, Op0);
11233 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
11234 DAG.getConstant(0, DL, XLenVT)));
11237 break;
11239 case RISCVISD::BREV8: {
11240 MVT VT = N->getSimpleValueType(0);
11241 MVT XLenVT = Subtarget.getXLenVT();
11242 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
11243 "Unexpected custom legalisation");
11244 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
11245 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
11246 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
11247 // ReplaceNodeResults requires we maintain the same type for the return
11248 // value.
11249 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
11250 break;
11252 case ISD::EXTRACT_VECTOR_ELT: {
11253 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
11254 // type is illegal (currently only vXi64 RV32).
11255 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
11256 // transferred to the destination register. We issue two of these from the
11257 // upper- and lower- halves of the SEW-bit vector element, slid down to the
11258 // first element.
11259 SDValue Vec = N->getOperand(0);
11260 SDValue Idx = N->getOperand(1);
11262 // The vector type hasn't been legalized yet so we can't issue target
11263 // specific nodes if it needs legalization.
11264 // FIXME: We would manually legalize if it's important.
11265 if (!isTypeLegal(Vec.getValueType()))
11266 return;
11268 MVT VecVT = Vec.getSimpleValueType();
11270 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
11271 VecVT.getVectorElementType() == MVT::i64 &&
11272 "Unexpected EXTRACT_VECTOR_ELT legalization");
11274 // If this is a fixed vector, we need to convert it to a scalable vector.
11275 MVT ContainerVT = VecVT;
11276 if (VecVT.isFixedLengthVector()) {
11277 ContainerVT = getContainerForFixedLengthVector(VecVT);
11278 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11281 MVT XLenVT = Subtarget.getXLenVT();
11283 // Use a VL of 1 to avoid processing more elements than we need.
11284 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
11286 // Unless the index is known to be 0, we must slide the vector down to get
11287 // the desired element into index 0.
11288 if (!isNullConstant(Idx)) {
11289 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11290 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
11293 // Extract the lower XLEN bits of the correct vector element.
11294 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
11296 // To extract the upper XLEN bits of the vector element, shift the first
11297 // element right by 32 bits and re-extract the lower XLEN bits.
11298 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11299 DAG.getUNDEF(ContainerVT),
11300 DAG.getConstant(32, DL, XLenVT), VL);
11301 SDValue LShr32 =
11302 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
11303 DAG.getUNDEF(ContainerVT), Mask, VL);
11305 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
11307 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
11308 break;
11310 case ISD::INTRINSIC_WO_CHAIN: {
11311 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
11312 switch (IntNo) {
11313 default:
11314 llvm_unreachable(
11315 "Don't know how to custom type legalize this intrinsic!");
11316 case Intrinsic::experimental_get_vector_length: {
11317 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
11318 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11319 return;
11321 case Intrinsic::riscv_orc_b:
11322 case Intrinsic::riscv_brev8:
11323 case Intrinsic::riscv_sha256sig0:
11324 case Intrinsic::riscv_sha256sig1:
11325 case Intrinsic::riscv_sha256sum0:
11326 case Intrinsic::riscv_sha256sum1:
11327 case Intrinsic::riscv_sm3p0:
11328 case Intrinsic::riscv_sm3p1: {
11329 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11330 return;
11331 unsigned Opc;
11332 switch (IntNo) {
11333 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
11334 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
11335 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
11336 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
11337 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
11338 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
11339 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
11340 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
11343 SDValue NewOp =
11344 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11345 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
11346 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11347 return;
11349 case Intrinsic::riscv_sm4ks:
11350 case Intrinsic::riscv_sm4ed: {
11351 unsigned Opc =
11352 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
11353 SDValue NewOp0 =
11354 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11355 SDValue NewOp1 =
11356 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11357 SDValue Res =
11358 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
11359 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11360 return;
11362 case Intrinsic::riscv_clmul: {
11363 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11364 return;
11366 SDValue NewOp0 =
11367 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11368 SDValue NewOp1 =
11369 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11370 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
11371 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11372 return;
11374 case Intrinsic::riscv_clmulh:
11375 case Intrinsic::riscv_clmulr: {
11376 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11377 return;
11379 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
11380 // to the full 128-bit clmul result of multiplying two xlen values.
11381 // Perform clmulr or clmulh on the shifted values. Finally, extract the
11382 // upper 32 bits.
11384 // The alternative is to mask the inputs to 32 bits and use clmul, but
11385 // that requires two shifts to mask each input without zext.w.
11386 // FIXME: If the inputs are known zero extended or could be freely
11387 // zero extended, the mask form would be better.
11388 SDValue NewOp0 =
11389 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11390 SDValue NewOp1 =
11391 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11392 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
11393 DAG.getConstant(32, DL, MVT::i64));
11394 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
11395 DAG.getConstant(32, DL, MVT::i64));
11396 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
11397 : RISCVISD::CLMULR;
11398 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
11399 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
11400 DAG.getConstant(32, DL, MVT::i64));
11401 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11402 return;
11404 case Intrinsic::riscv_vmv_x_s: {
11405 EVT VT = N->getValueType(0);
11406 MVT XLenVT = Subtarget.getXLenVT();
11407 if (VT.bitsLT(XLenVT)) {
11408 // Simple case just extract using vmv.x.s and truncate.
11409 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
11410 Subtarget.getXLenVT(), N->getOperand(1));
11411 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
11412 return;
11415 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
11416 "Unexpected custom legalization");
11418 // We need to do the move in two steps.
11419 SDValue Vec = N->getOperand(1);
11420 MVT VecVT = Vec.getSimpleValueType();
11422 // First extract the lower XLEN bits of the element.
11423 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
11425 // To extract the upper XLEN bits of the vector element, shift the first
11426 // element right by 32 bits and re-extract the lower XLEN bits.
11427 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
11429 SDValue ThirtyTwoV =
11430 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
11431 DAG.getConstant(32, DL, XLenVT), VL);
11432 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
11433 DAG.getUNDEF(VecVT), Mask, VL);
11434 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
11436 Results.push_back(
11437 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
11438 break;
11441 break;
11443 case ISD::VECREDUCE_ADD:
11444 case ISD::VECREDUCE_AND:
11445 case ISD::VECREDUCE_OR:
11446 case ISD::VECREDUCE_XOR:
11447 case ISD::VECREDUCE_SMAX:
11448 case ISD::VECREDUCE_UMAX:
11449 case ISD::VECREDUCE_SMIN:
11450 case ISD::VECREDUCE_UMIN:
11451 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
11452 Results.push_back(V);
11453 break;
11454 case ISD::VP_REDUCE_ADD:
11455 case ISD::VP_REDUCE_AND:
11456 case ISD::VP_REDUCE_OR:
11457 case ISD::VP_REDUCE_XOR:
11458 case ISD::VP_REDUCE_SMAX:
11459 case ISD::VP_REDUCE_UMAX:
11460 case ISD::VP_REDUCE_SMIN:
11461 case ISD::VP_REDUCE_UMIN:
11462 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
11463 Results.push_back(V);
11464 break;
11465 case ISD::GET_ROUNDING: {
11466 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
11467 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
11468 Results.push_back(Res.getValue(0));
11469 Results.push_back(Res.getValue(1));
11470 break;
11475 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
11476 /// which corresponds to it.
11477 static unsigned getVecReduceOpcode(unsigned Opc) {
11478 switch (Opc) {
11479 default:
11480 llvm_unreachable("Unhandled binary to transfrom reduction");
11481 case ISD::ADD:
11482 return ISD::VECREDUCE_ADD;
11483 case ISD::UMAX:
11484 return ISD::VECREDUCE_UMAX;
11485 case ISD::SMAX:
11486 return ISD::VECREDUCE_SMAX;
11487 case ISD::UMIN:
11488 return ISD::VECREDUCE_UMIN;
11489 case ISD::SMIN:
11490 return ISD::VECREDUCE_SMIN;
11491 case ISD::AND:
11492 return ISD::VECREDUCE_AND;
11493 case ISD::OR:
11494 return ISD::VECREDUCE_OR;
11495 case ISD::XOR:
11496 return ISD::VECREDUCE_XOR;
11497 case ISD::FADD:
11498 // Note: This is the associative form of the generic reduction opcode.
11499 return ISD::VECREDUCE_FADD;
11503 /// Perform two related transforms whose purpose is to incrementally recognize
11504 /// an explode_vector followed by scalar reduction as a vector reduction node.
11505 /// This exists to recover from a deficiency in SLP which can't handle
11506 /// forests with multiple roots sharing common nodes. In some cases, one
11507 /// of the trees will be vectorized, and the other will remain (unprofitably)
11508 /// scalarized.
11509 static SDValue
11510 combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
11511 const RISCVSubtarget &Subtarget) {
11513 // This transforms need to run before all integer types have been legalized
11514 // to i64 (so that the vector element type matches the add type), and while
11515 // it's safe to introduce odd sized vector types.
11516 if (DAG.NewNodesMustHaveLegalTypes)
11517 return SDValue();
11519 // Without V, this transform isn't useful. We could form the (illegal)
11520 // operations and let them be scalarized again, but there's really no point.
11521 if (!Subtarget.hasVInstructions())
11522 return SDValue();
11524 const SDLoc DL(N);
11525 const EVT VT = N->getValueType(0);
11526 const unsigned Opc = N->getOpcode();
11528 // For FADD, we only handle the case with reassociation allowed. We
11529 // could handle strict reduction order, but at the moment, there's no
11530 // known reason to, and the complexity isn't worth it.
11531 // TODO: Handle fminnum and fmaxnum here
11532 if (!VT.isInteger() &&
11533 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
11534 return SDValue();
11536 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
11537 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
11538 "Inconsistent mappings");
11539 SDValue LHS = N->getOperand(0);
11540 SDValue RHS = N->getOperand(1);
11542 if (!LHS.hasOneUse() || !RHS.hasOneUse())
11543 return SDValue();
11545 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11546 std::swap(LHS, RHS);
11548 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11549 !isa<ConstantSDNode>(RHS.getOperand(1)))
11550 return SDValue();
11552 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
11553 SDValue SrcVec = RHS.getOperand(0);
11554 EVT SrcVecVT = SrcVec.getValueType();
11555 assert(SrcVecVT.getVectorElementType() == VT);
11556 if (SrcVecVT.isScalableVector())
11557 return SDValue();
11559 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
11560 return SDValue();
11562 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
11563 // reduce_op (extract_subvector [2 x VT] from V). This will form the
11564 // root of our reduction tree. TODO: We could extend this to any two
11565 // adjacent aligned constant indices if desired.
11566 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11567 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
11568 uint64_t LHSIdx =
11569 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
11570 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
11571 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
11572 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
11573 DAG.getVectorIdxConstant(0, DL));
11574 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
11578 // Match (binop (reduce (extract_subvector V, 0),
11579 // (extract_vector_elt V, sizeof(SubVec))))
11580 // into a reduction of one more element from the original vector V.
11581 if (LHS.getOpcode() != ReduceOpc)
11582 return SDValue();
11584 SDValue ReduceVec = LHS.getOperand(0);
11585 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
11586 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
11587 isNullConstant(ReduceVec.getOperand(1)) &&
11588 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
11589 // For illegal types (e.g. 3xi32), most will be combined again into a
11590 // wider (hopefully legal) type. If this is a terminal state, we are
11591 // relying on type legalization here to produce something reasonable
11592 // and this lowering quality could probably be improved. (TODO)
11593 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
11594 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
11595 DAG.getVectorIdxConstant(0, DL));
11596 auto Flags = ReduceVec->getFlags();
11597 Flags.intersectWith(N->getFlags());
11598 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
11601 return SDValue();
11605 // Try to fold (<bop> x, (reduction.<bop> vec, start))
11606 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
11607 const RISCVSubtarget &Subtarget) {
11608 auto BinOpToRVVReduce = [](unsigned Opc) {
11609 switch (Opc) {
11610 default:
11611 llvm_unreachable("Unhandled binary to transfrom reduction");
11612 case ISD::ADD:
11613 return RISCVISD::VECREDUCE_ADD_VL;
11614 case ISD::UMAX:
11615 return RISCVISD::VECREDUCE_UMAX_VL;
11616 case ISD::SMAX:
11617 return RISCVISD::VECREDUCE_SMAX_VL;
11618 case ISD::UMIN:
11619 return RISCVISD::VECREDUCE_UMIN_VL;
11620 case ISD::SMIN:
11621 return RISCVISD::VECREDUCE_SMIN_VL;
11622 case ISD::AND:
11623 return RISCVISD::VECREDUCE_AND_VL;
11624 case ISD::OR:
11625 return RISCVISD::VECREDUCE_OR_VL;
11626 case ISD::XOR:
11627 return RISCVISD::VECREDUCE_XOR_VL;
11628 case ISD::FADD:
11629 return RISCVISD::VECREDUCE_FADD_VL;
11630 case ISD::FMAXNUM:
11631 return RISCVISD::VECREDUCE_FMAX_VL;
11632 case ISD::FMINNUM:
11633 return RISCVISD::VECREDUCE_FMIN_VL;
11637 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
11638 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11639 isNullConstant(V.getOperand(1)) &&
11640 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
11643 unsigned Opc = N->getOpcode();
11644 unsigned ReduceIdx;
11645 if (IsReduction(N->getOperand(0), Opc))
11646 ReduceIdx = 0;
11647 else if (IsReduction(N->getOperand(1), Opc))
11648 ReduceIdx = 1;
11649 else
11650 return SDValue();
11652 // Skip if FADD disallows reassociation but the combiner needs.
11653 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
11654 return SDValue();
11656 SDValue Extract = N->getOperand(ReduceIdx);
11657 SDValue Reduce = Extract.getOperand(0);
11658 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
11659 return SDValue();
11661 SDValue ScalarV = Reduce.getOperand(2);
11662 EVT ScalarVT = ScalarV.getValueType();
11663 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
11664 ScalarV.getOperand(0)->isUndef() &&
11665 isNullConstant(ScalarV.getOperand(2)))
11666 ScalarV = ScalarV.getOperand(1);
11668 // Make sure that ScalarV is a splat with VL=1.
11669 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
11670 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
11671 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
11672 return SDValue();
11674 if (!isNonZeroAVL(ScalarV.getOperand(2)))
11675 return SDValue();
11677 // Check the scalar of ScalarV is neutral element
11678 // TODO: Deal with value other than neutral element.
11679 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
11681 return SDValue();
11683 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
11684 // FIXME: We might be able to improve this if operand 0 is undef.
11685 if (!isNonZeroAVL(Reduce.getOperand(5)))
11686 return SDValue();
11688 SDValue NewStart = N->getOperand(1 - ReduceIdx);
11690 SDLoc DL(N);
11691 SDValue NewScalarV =
11692 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
11693 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
11695 // If we looked through an INSERT_SUBVECTOR we need to restore it.
11696 if (ScalarVT != ScalarV.getValueType())
11697 NewScalarV =
11698 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
11699 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
11701 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
11702 NewScalarV, Reduce.getOperand(3),
11703 Reduce.getOperand(4), Reduce.getOperand(5)};
11704 SDValue NewReduce =
11705 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
11706 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
11707 Extract.getOperand(1));
11710 // Optimize (add (shl x, c0), (shl y, c1)) ->
11711 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
11712 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
11713 const RISCVSubtarget &Subtarget) {
11714 // Perform this optimization only in the zba extension.
11715 if (!Subtarget.hasStdExtZba())
11716 return SDValue();
11718 // Skip for vector types and larger types.
11719 EVT VT = N->getValueType(0);
11720 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
11721 return SDValue();
11723 // The two operand nodes must be SHL and have no other use.
11724 SDValue N0 = N->getOperand(0);
11725 SDValue N1 = N->getOperand(1);
11726 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
11727 !N0->hasOneUse() || !N1->hasOneUse())
11728 return SDValue();
11730 // Check c0 and c1.
11731 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
11732 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
11733 if (!N0C || !N1C)
11734 return SDValue();
11735 int64_t C0 = N0C->getSExtValue();
11736 int64_t C1 = N1C->getSExtValue();
11737 if (C0 <= 0 || C1 <= 0)
11738 return SDValue();
11740 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
11741 int64_t Bits = std::min(C0, C1);
11742 int64_t Diff = std::abs(C0 - C1);
11743 if (Diff != 1 && Diff != 2 && Diff != 3)
11744 return SDValue();
11746 // Build nodes.
11747 SDLoc DL(N);
11748 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
11749 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
11750 SDValue NA0 =
11751 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
11752 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
11753 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
11756 // Combine a constant select operand into its use:
11758 // (and (select cond, -1, c), x)
11759 // -> (select cond, x, (and x, c)) [AllOnes=1]
11760 // (or (select cond, 0, c), x)
11761 // -> (select cond, x, (or x, c)) [AllOnes=0]
11762 // (xor (select cond, 0, c), x)
11763 // -> (select cond, x, (xor x, c)) [AllOnes=0]
11764 // (add (select cond, 0, c), x)
11765 // -> (select cond, x, (add x, c)) [AllOnes=0]
11766 // (sub x, (select cond, 0, c))
11767 // -> (select cond, x, (sub x, c)) [AllOnes=0]
11768 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
11769 SelectionDAG &DAG, bool AllOnes,
11770 const RISCVSubtarget &Subtarget) {
11771 EVT VT = N->getValueType(0);
11773 // Skip vectors.
11774 if (VT.isVector())
11775 return SDValue();
11777 if (!Subtarget.hasShortForwardBranchOpt()) {
11778 // (select cond, x, (and x, c)) has custom lowering with Zicond.
11779 if ((!Subtarget.hasStdExtZicond() &&
11780 !Subtarget.hasVendorXVentanaCondOps()) ||
11781 N->getOpcode() != ISD::AND)
11782 return SDValue();
11784 // Maybe harmful when condition code has multiple use.
11785 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
11786 return SDValue();
11788 // Maybe harmful when VT is wider than XLen.
11789 if (VT.getSizeInBits() > Subtarget.getXLen())
11790 return SDValue();
11793 if ((Slct.getOpcode() != ISD::SELECT &&
11794 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
11795 !Slct.hasOneUse())
11796 return SDValue();
11798 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
11799 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
11802 bool SwapSelectOps;
11803 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
11804 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
11805 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
11806 SDValue NonConstantVal;
11807 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
11808 SwapSelectOps = false;
11809 NonConstantVal = FalseVal;
11810 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
11811 SwapSelectOps = true;
11812 NonConstantVal = TrueVal;
11813 } else
11814 return SDValue();
11816 // Slct is now know to be the desired identity constant when CC is true.
11817 TrueVal = OtherOp;
11818 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
11819 // Unless SwapSelectOps says the condition should be false.
11820 if (SwapSelectOps)
11821 std::swap(TrueVal, FalseVal);
11823 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
11824 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
11825 {Slct.getOperand(0), Slct.getOperand(1),
11826 Slct.getOperand(2), TrueVal, FalseVal});
11828 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
11829 {Slct.getOperand(0), TrueVal, FalseVal});
11832 // Attempt combineSelectAndUse on each operand of a commutative operator N.
11833 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
11834 bool AllOnes,
11835 const RISCVSubtarget &Subtarget) {
11836 SDValue N0 = N->getOperand(0);
11837 SDValue N1 = N->getOperand(1);
11838 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
11839 return Result;
11840 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
11841 return Result;
11842 return SDValue();
11845 // Transform (add (mul x, c0), c1) ->
11846 // (add (mul (add x, c1/c0), c0), c1%c0).
11847 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
11848 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
11849 // to an infinite loop in DAGCombine if transformed.
11850 // Or transform (add (mul x, c0), c1) ->
11851 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
11852 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
11853 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
11854 // lead to an infinite loop in DAGCombine if transformed.
11855 // Or transform (add (mul x, c0), c1) ->
11856 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
11857 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
11858 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
11859 // lead to an infinite loop in DAGCombine if transformed.
11860 // Or transform (add (mul x, c0), c1) ->
11861 // (mul (add x, c1/c0), c0).
11862 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
11863 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
11864 const RISCVSubtarget &Subtarget) {
11865 // Skip for vector types and larger types.
11866 EVT VT = N->getValueType(0);
11867 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
11868 return SDValue();
11869 // The first operand node must be a MUL and has no other use.
11870 SDValue N0 = N->getOperand(0);
11871 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
11872 return SDValue();
11873 // Check if c0 and c1 match above conditions.
11874 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
11875 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
11876 if (!N0C || !N1C)
11877 return SDValue();
11878 // If N0C has multiple uses it's possible one of the cases in
11879 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
11880 // in an infinite loop.
11881 if (!N0C->hasOneUse())
11882 return SDValue();
11883 int64_t C0 = N0C->getSExtValue();
11884 int64_t C1 = N1C->getSExtValue();
11885 int64_t CA, CB;
11886 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
11887 return SDValue();
11888 // Search for proper CA (non-zero) and CB that both are simm12.
11889 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
11890 !isInt<12>(C0 * (C1 / C0))) {
11891 CA = C1 / C0;
11892 CB = C1 % C0;
11893 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
11894 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
11895 CA = C1 / C0 + 1;
11896 CB = C1 % C0 - C0;
11897 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
11898 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
11899 CA = C1 / C0 - 1;
11900 CB = C1 % C0 + C0;
11901 } else
11902 return SDValue();
11903 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
11904 SDLoc DL(N);
11905 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
11906 DAG.getConstant(CA, DL, VT));
11907 SDValue New1 =
11908 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
11909 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
11912 // Try to turn (add (xor bool, 1) -1) into (neg bool).
11913 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
11914 SDValue N0 = N->getOperand(0);
11915 SDValue N1 = N->getOperand(1);
11916 EVT VT = N->getValueType(0);
11917 SDLoc DL(N);
11919 // RHS should be -1.
11920 if (!isAllOnesConstant(N1))
11921 return SDValue();
11923 // Look for (xor X, 1).
11924 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
11925 return SDValue();
11927 // First xor input should be 0 or 1.
11928 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
11929 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
11930 return SDValue();
11932 // Emit a negate of the setcc.
11933 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
11934 N0.getOperand(0));
11937 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
11938 const RISCVSubtarget &Subtarget) {
11939 if (SDValue V = combineAddOfBooleanXor(N, DAG))
11940 return V;
11941 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
11942 return V;
11943 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
11944 return V;
11945 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
11946 return V;
11947 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
11948 return V;
11950 // fold (add (select lhs, rhs, cc, 0, y), x) ->
11951 // (select lhs, rhs, cc, x, (add x, y))
11952 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
11955 // Try to turn a sub boolean RHS and constant LHS into an addi.
11956 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
11957 SDValue N0 = N->getOperand(0);
11958 SDValue N1 = N->getOperand(1);
11959 EVT VT = N->getValueType(0);
11960 SDLoc DL(N);
11962 // Require a constant LHS.
11963 auto *N0C = dyn_cast<ConstantSDNode>(N0);
11964 if (!N0C)
11965 return SDValue();
11967 // All our optimizations involve subtracting 1 from the immediate and forming
11968 // an ADDI. Make sure the new immediate is valid for an ADDI.
11969 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
11970 if (!ImmValMinus1.isSignedIntN(12))
11971 return SDValue();
11973 SDValue NewLHS;
11974 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
11975 // (sub constant, (setcc x, y, eq/neq)) ->
11976 // (add (setcc x, y, neq/eq), constant - 1)
11977 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
11978 EVT SetCCOpVT = N1.getOperand(0).getValueType();
11979 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
11980 return SDValue();
11981 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
11982 NewLHS =
11983 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
11984 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
11985 N1.getOperand(0).getOpcode() == ISD::SETCC) {
11986 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
11987 // Since setcc returns a bool the xor is equivalent to 1-setcc.
11988 NewLHS = N1.getOperand(0);
11989 } else
11990 return SDValue();
11992 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
11993 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
11996 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
11997 const RISCVSubtarget &Subtarget) {
11998 if (SDValue V = combineSubOfBoolean(N, DAG))
11999 return V;
12001 SDValue N0 = N->getOperand(0);
12002 SDValue N1 = N->getOperand(1);
12003 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
12004 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
12005 isNullConstant(N1.getOperand(1))) {
12006 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
12007 if (CCVal == ISD::SETLT) {
12008 EVT VT = N->getValueType(0);
12009 SDLoc DL(N);
12010 unsigned ShAmt = N0.getValueSizeInBits() - 1;
12011 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
12012 DAG.getConstant(ShAmt, DL, VT));
12016 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
12017 // (select lhs, rhs, cc, x, (sub x, y))
12018 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
12021 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
12022 // Legalizing setcc can introduce xors like this. Doing this transform reduces
12023 // the number of xors and may allow the xor to fold into a branch condition.
12024 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
12025 SDValue N0 = N->getOperand(0);
12026 SDValue N1 = N->getOperand(1);
12027 bool IsAnd = N->getOpcode() == ISD::AND;
12029 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
12030 return SDValue();
12032 if (!N0.hasOneUse() || !N1.hasOneUse())
12033 return SDValue();
12035 SDValue N01 = N0.getOperand(1);
12036 SDValue N11 = N1.getOperand(1);
12038 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
12039 // (xor X, -1) based on the upper bits of the other operand being 0. If the
12040 // operation is And, allow one of the Xors to use -1.
12041 if (isOneConstant(N01)) {
12042 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
12043 return SDValue();
12044 } else if (isOneConstant(N11)) {
12045 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
12046 if (!(IsAnd && isAllOnesConstant(N01)))
12047 return SDValue();
12048 } else
12049 return SDValue();
12051 EVT VT = N->getValueType(0);
12053 SDValue N00 = N0.getOperand(0);
12054 SDValue N10 = N1.getOperand(0);
12056 // The LHS of the xors needs to be 0/1.
12057 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
12058 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
12059 return SDValue();
12061 // Invert the opcode and insert a new xor.
12062 SDLoc DL(N);
12063 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
12064 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
12065 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
12068 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
12069 const RISCVSubtarget &Subtarget) {
12070 SDValue N0 = N->getOperand(0);
12071 EVT VT = N->getValueType(0);
12073 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
12074 // extending X. This is safe since we only need the LSB after the shift and
12075 // shift amounts larger than 31 would produce poison. If we wait until
12076 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12077 // to use a BEXT instruction.
12078 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
12079 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
12080 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
12081 SDLoc DL(N0);
12082 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12083 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12084 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
12085 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
12088 return SDValue();
12091 // Combines two comparison operation and logic operation to one selection
12092 // operation(min, max) and logic operation. Returns new constructed Node if
12093 // conditions for optimization are satisfied.
12094 static SDValue performANDCombine(SDNode *N,
12095 TargetLowering::DAGCombinerInfo &DCI,
12096 const RISCVSubtarget &Subtarget) {
12097 SelectionDAG &DAG = DCI.DAG;
12099 SDValue N0 = N->getOperand(0);
12100 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
12101 // extending X. This is safe since we only need the LSB after the shift and
12102 // shift amounts larger than 31 would produce poison. If we wait until
12103 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12104 // to use a BEXT instruction.
12105 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
12106 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
12107 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
12108 N0.hasOneUse()) {
12109 SDLoc DL(N);
12110 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12111 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12112 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
12113 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
12114 DAG.getConstant(1, DL, MVT::i64));
12115 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
12118 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12119 return V;
12120 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12121 return V;
12123 if (DCI.isAfterLegalizeDAG())
12124 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
12125 return V;
12127 // fold (and (select lhs, rhs, cc, -1, y), x) ->
12128 // (select lhs, rhs, cc, x, (and x, y))
12129 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
12132 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
12133 // FIXME: Generalize to other binary operators with same operand.
12134 static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
12135 SelectionDAG &DAG) {
12136 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
12138 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
12139 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
12140 !N0.hasOneUse() || !N1.hasOneUse())
12141 return SDValue();
12143 // Should have the same condition.
12144 SDValue Cond = N0.getOperand(1);
12145 if (Cond != N1.getOperand(1))
12146 return SDValue();
12148 SDValue TrueV = N0.getOperand(0);
12149 SDValue FalseV = N1.getOperand(0);
12151 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
12152 TrueV.getOperand(1) != FalseV.getOperand(1) ||
12153 !isOneConstant(TrueV.getOperand(1)) ||
12154 !TrueV.hasOneUse() || !FalseV.hasOneUse())
12155 return SDValue();
12157 EVT VT = N->getValueType(0);
12158 SDLoc DL(N);
12160 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
12161 Cond);
12162 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
12163 Cond);
12164 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
12165 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
12168 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
12169 const RISCVSubtarget &Subtarget) {
12170 SelectionDAG &DAG = DCI.DAG;
12172 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12173 return V;
12174 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12175 return V;
12177 if (DCI.isAfterLegalizeDAG())
12178 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
12179 return V;
12181 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
12182 // We may be able to pull a common operation out of the true and false value.
12183 SDValue N0 = N->getOperand(0);
12184 SDValue N1 = N->getOperand(1);
12185 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
12186 return V;
12187 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
12188 return V;
12190 // fold (or (select cond, 0, y), x) ->
12191 // (select cond, x, (or x, y))
12192 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12195 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
12196 const RISCVSubtarget &Subtarget) {
12197 SDValue N0 = N->getOperand(0);
12198 SDValue N1 = N->getOperand(1);
12200 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
12201 // NOTE: Assumes ROL being legal means ROLW is legal.
12202 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12203 if (N0.getOpcode() == RISCVISD::SLLW &&
12204 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
12205 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
12206 SDLoc DL(N);
12207 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
12208 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
12211 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
12212 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
12213 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
12214 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12215 if (ConstN00 && CC == ISD::SETLT) {
12216 EVT VT = N0.getValueType();
12217 SDLoc DL(N0);
12218 const APInt &Imm = ConstN00->getAPIntValue();
12219 if ((Imm + 1).isSignedIntN(12))
12220 return DAG.getSetCC(DL, VT, N0.getOperand(1),
12221 DAG.getConstant(Imm + 1, DL, VT), CC);
12225 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12226 return V;
12227 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12228 return V;
12230 // fold (xor (select cond, 0, y), x) ->
12231 // (select cond, x, (xor x, y))
12232 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12235 /// According to the property that indexed load/store instructions zero-extend
12236 /// their indices, try to narrow the type of index operand.
12237 static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
12238 if (isIndexTypeSigned(IndexType))
12239 return false;
12241 if (!N->hasOneUse())
12242 return false;
12244 EVT VT = N.getValueType();
12245 SDLoc DL(N);
12247 // In general, what we're doing here is seeing if we can sink a truncate to
12248 // a smaller element type into the expression tree building our index.
12249 // TODO: We can generalize this and handle a bunch more cases if useful.
12251 // Narrow a buildvector to the narrowest element type. This requires less
12252 // work and less register pressure at high LMUL, and creates smaller constants
12253 // which may be cheaper to materialize.
12254 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
12255 KnownBits Known = DAG.computeKnownBits(N);
12256 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
12257 LLVMContext &C = *DAG.getContext();
12258 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
12259 if (ResultVT.bitsLT(VT.getVectorElementType())) {
12260 N = DAG.getNode(ISD::TRUNCATE, DL,
12261 VT.changeVectorElementType(ResultVT), N);
12262 return true;
12266 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
12267 if (N.getOpcode() != ISD::SHL)
12268 return false;
12270 SDValue N0 = N.getOperand(0);
12271 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
12272 N0.getOpcode() != RISCVISD::VZEXT_VL)
12273 return false;;
12274 if (!N0->hasOneUse())
12275 return false;;
12277 APInt ShAmt;
12278 SDValue N1 = N.getOperand(1);
12279 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
12280 return false;;
12282 SDValue Src = N0.getOperand(0);
12283 EVT SrcVT = Src.getValueType();
12284 unsigned SrcElen = SrcVT.getScalarSizeInBits();
12285 unsigned ShAmtV = ShAmt.getZExtValue();
12286 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
12287 NewElen = std::max(NewElen, 8U);
12289 // Skip if NewElen is not narrower than the original extended type.
12290 if (NewElen >= N0.getValueType().getScalarSizeInBits())
12291 return false;
12293 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
12294 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
12296 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
12297 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
12298 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
12299 return true;
12302 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
12303 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
12304 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
12305 // can become a sext.w instead of a shift pair.
12306 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
12307 const RISCVSubtarget &Subtarget) {
12308 SDValue N0 = N->getOperand(0);
12309 SDValue N1 = N->getOperand(1);
12310 EVT VT = N->getValueType(0);
12311 EVT OpVT = N0.getValueType();
12313 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
12314 return SDValue();
12316 // RHS needs to be a constant.
12317 auto *N1C = dyn_cast<ConstantSDNode>(N1);
12318 if (!N1C)
12319 return SDValue();
12321 // LHS needs to be (and X, 0xffffffff).
12322 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
12323 !isa<ConstantSDNode>(N0.getOperand(1)) ||
12324 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
12325 return SDValue();
12327 // Looking for an equality compare.
12328 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12329 if (!isIntEqualitySetCC(Cond))
12330 return SDValue();
12332 // Don't do this if the sign bit is provably zero, it will be turned back into
12333 // an AND.
12334 APInt SignMask = APInt::getOneBitSet(64, 31);
12335 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
12336 return SDValue();
12338 const APInt &C1 = N1C->getAPIntValue();
12340 SDLoc dl(N);
12341 // If the constant is larger than 2^32 - 1 it is impossible for both sides
12342 // to be equal.
12343 if (C1.getActiveBits() > 32)
12344 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
12346 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
12347 N0.getOperand(0), DAG.getValueType(MVT::i32));
12348 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
12349 dl, OpVT), Cond);
12352 static SDValue
12353 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
12354 const RISCVSubtarget &Subtarget) {
12355 SDValue Src = N->getOperand(0);
12356 EVT VT = N->getValueType(0);
12358 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
12359 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
12360 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
12361 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
12362 Src.getOperand(0));
12364 return SDValue();
12367 namespace {
12368 // Forward declaration of the structure holding the necessary information to
12369 // apply a combine.
12370 struct CombineResult;
12372 /// Helper class for folding sign/zero extensions.
12373 /// In particular, this class is used for the following combines:
12374 /// add_vl -> vwadd(u) | vwadd(u)_w
12375 /// sub_vl -> vwsub(u) | vwsub(u)_w
12376 /// mul_vl -> vwmul(u) | vwmul_su
12378 /// An object of this class represents an operand of the operation we want to
12379 /// combine.
12380 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
12381 /// NodeExtensionHelper for `a` and one for `b`.
12383 /// This class abstracts away how the extension is materialized and
12384 /// how its Mask, VL, number of users affect the combines.
12386 /// In particular:
12387 /// - VWADD_W is conceptually == add(op0, sext(op1))
12388 /// - VWADDU_W == add(op0, zext(op1))
12389 /// - VWSUB_W == sub(op0, sext(op1))
12390 /// - VWSUBU_W == sub(op0, zext(op1))
12392 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
12393 /// zext|sext(smaller_value).
12394 struct NodeExtensionHelper {
12395 /// Records if this operand is like being zero extended.
12396 bool SupportsZExt;
12397 /// Records if this operand is like being sign extended.
12398 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
12399 /// instance, a splat constant (e.g., 3), would support being both sign and
12400 /// zero extended.
12401 bool SupportsSExt;
12402 /// This boolean captures whether we care if this operand would still be
12403 /// around after the folding happens.
12404 bool EnforceOneUse;
12405 /// Records if this operand's mask needs to match the mask of the operation
12406 /// that it will fold into.
12407 bool CheckMask;
12408 /// Value of the Mask for this operand.
12409 /// It may be SDValue().
12410 SDValue Mask;
12411 /// Value of the vector length operand.
12412 /// It may be SDValue().
12413 SDValue VL;
12414 /// Original value that this NodeExtensionHelper represents.
12415 SDValue OrigOperand;
12417 /// Get the value feeding the extension or the value itself.
12418 /// E.g., for zext(a), this would return a.
12419 SDValue getSource() const {
12420 switch (OrigOperand.getOpcode()) {
12421 case RISCVISD::VSEXT_VL:
12422 case RISCVISD::VZEXT_VL:
12423 return OrigOperand.getOperand(0);
12424 default:
12425 return OrigOperand;
12429 /// Check if this instance represents a splat.
12430 bool isSplat() const {
12431 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
12434 /// Get or create a value that can feed \p Root with the given extension \p
12435 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
12436 /// \see ::getSource().
12437 SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
12438 std::optional<bool> SExt) const {
12439 if (!SExt.has_value())
12440 return OrigOperand;
12442 MVT NarrowVT = getNarrowType(Root);
12444 SDValue Source = getSource();
12445 if (Source.getValueType() == NarrowVT)
12446 return Source;
12448 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
12450 // If we need an extension, we should be changing the type.
12451 SDLoc DL(Root);
12452 auto [Mask, VL] = getMaskAndVL(Root);
12453 switch (OrigOperand.getOpcode()) {
12454 case RISCVISD::VSEXT_VL:
12455 case RISCVISD::VZEXT_VL:
12456 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
12457 case RISCVISD::VMV_V_X_VL:
12458 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
12459 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
12460 default:
12461 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
12462 // and that operand should already have the right NarrowVT so no
12463 // extension should be required at this point.
12464 llvm_unreachable("Unsupported opcode");
12468 /// Helper function to get the narrow type for \p Root.
12469 /// The narrow type is the type of \p Root where we divided the size of each
12470 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
12471 /// \pre The size of the type of the elements of Root must be a multiple of 2
12472 /// and be greater than 16.
12473 static MVT getNarrowType(const SDNode *Root) {
12474 MVT VT = Root->getSimpleValueType(0);
12476 // Determine the narrow size.
12477 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
12478 assert(NarrowSize >= 8 && "Trying to extend something we can't represent");
12479 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
12480 VT.getVectorElementCount());
12481 return NarrowVT;
12484 /// Return the opcode required to materialize the folding of the sign
12485 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
12486 /// both operands for \p Opcode.
12487 /// Put differently, get the opcode to materialize:
12488 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
12489 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
12490 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
12491 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
12492 switch (Opcode) {
12493 case RISCVISD::ADD_VL:
12494 case RISCVISD::VWADD_W_VL:
12495 case RISCVISD::VWADDU_W_VL:
12496 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
12497 case RISCVISD::MUL_VL:
12498 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
12499 case RISCVISD::SUB_VL:
12500 case RISCVISD::VWSUB_W_VL:
12501 case RISCVISD::VWSUBU_W_VL:
12502 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL;
12503 default:
12504 llvm_unreachable("Unexpected opcode");
12508 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
12509 /// newOpcode(a, b).
12510 static unsigned getSUOpcode(unsigned Opcode) {
12511 assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
12512 return RISCVISD::VWMULSU_VL;
12515 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
12516 /// newOpcode(a, b).
12517 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
12518 switch (Opcode) {
12519 case RISCVISD::ADD_VL:
12520 return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
12521 case RISCVISD::SUB_VL:
12522 return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
12523 default:
12524 llvm_unreachable("Unexpected opcode");
12528 using CombineToTry = std::function<std::optional<CombineResult>(
12529 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
12530 const NodeExtensionHelper & /*RHS*/)>;
12532 /// Check if this node needs to be fully folded or extended for all users.
12533 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
12535 /// Helper method to set the various fields of this struct based on the
12536 /// type of \p Root.
12537 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
12538 SupportsZExt = false;
12539 SupportsSExt = false;
12540 EnforceOneUse = true;
12541 CheckMask = true;
12542 switch (OrigOperand.getOpcode()) {
12543 case RISCVISD::VZEXT_VL:
12544 SupportsZExt = true;
12545 Mask = OrigOperand.getOperand(1);
12546 VL = OrigOperand.getOperand(2);
12547 break;
12548 case RISCVISD::VSEXT_VL:
12549 SupportsSExt = true;
12550 Mask = OrigOperand.getOperand(1);
12551 VL = OrigOperand.getOperand(2);
12552 break;
12553 case RISCVISD::VMV_V_X_VL: {
12554 // Historically, we didn't care about splat values not disappearing during
12555 // combines.
12556 EnforceOneUse = false;
12557 CheckMask = false;
12558 VL = OrigOperand.getOperand(2);
12560 // The operand is a splat of a scalar.
12562 // The pasthru must be undef for tail agnostic.
12563 if (!OrigOperand.getOperand(0).isUndef())
12564 break;
12566 // Get the scalar value.
12567 SDValue Op = OrigOperand.getOperand(1);
12569 // See if we have enough sign bits or zero bits in the scalar to use a
12570 // widening opcode by splatting to smaller element size.
12571 MVT VT = Root->getSimpleValueType(0);
12572 unsigned EltBits = VT.getScalarSizeInBits();
12573 unsigned ScalarBits = Op.getValueSizeInBits();
12574 // Make sure we're getting all element bits from the scalar register.
12575 // FIXME: Support implicit sign extension of vmv.v.x?
12576 if (ScalarBits < EltBits)
12577 break;
12579 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
12580 // If the narrow type cannot be expressed with a legal VMV,
12581 // this is not a valid candidate.
12582 if (NarrowSize < 8)
12583 break;
12585 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
12586 SupportsSExt = true;
12587 if (DAG.MaskedValueIsZero(Op,
12588 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
12589 SupportsZExt = true;
12590 break;
12592 default:
12593 break;
12597 /// Check if \p Root supports any extension folding combines.
12598 static bool isSupportedRoot(const SDNode *Root) {
12599 switch (Root->getOpcode()) {
12600 case RISCVISD::ADD_VL:
12601 case RISCVISD::MUL_VL:
12602 case RISCVISD::VWADD_W_VL:
12603 case RISCVISD::VWADDU_W_VL:
12604 case RISCVISD::SUB_VL:
12605 case RISCVISD::VWSUB_W_VL:
12606 case RISCVISD::VWSUBU_W_VL:
12607 return true;
12608 default:
12609 return false;
12613 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
12614 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
12615 assert(isSupportedRoot(Root) && "Trying to build an helper with an "
12616 "unsupported root");
12617 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
12618 OrigOperand = Root->getOperand(OperandIdx);
12620 unsigned Opc = Root->getOpcode();
12621 switch (Opc) {
12622 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
12623 // <ADD|SUB>(LHS, S|ZEXT(RHS))
12624 case RISCVISD::VWADD_W_VL:
12625 case RISCVISD::VWADDU_W_VL:
12626 case RISCVISD::VWSUB_W_VL:
12627 case RISCVISD::VWSUBU_W_VL:
12628 if (OperandIdx == 1) {
12629 SupportsZExt =
12630 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
12631 SupportsSExt = !SupportsZExt;
12632 std::tie(Mask, VL) = getMaskAndVL(Root);
12633 CheckMask = true;
12634 // There's no existing extension here, so we don't have to worry about
12635 // making sure it gets removed.
12636 EnforceOneUse = false;
12637 break;
12639 [[fallthrough]];
12640 default:
12641 fillUpExtensionSupport(Root, DAG);
12642 break;
12646 /// Check if this operand is compatible with the given vector length \p VL.
12647 bool isVLCompatible(SDValue VL) const {
12648 return this->VL != SDValue() && this->VL == VL;
12651 /// Check if this operand is compatible with the given \p Mask.
12652 bool isMaskCompatible(SDValue Mask) const {
12653 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask);
12656 /// Helper function to get the Mask and VL from \p Root.
12657 static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
12658 assert(isSupportedRoot(Root) && "Unexpected root");
12659 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
12662 /// Check if the Mask and VL of this operand are compatible with \p Root.
12663 bool areVLAndMaskCompatible(const SDNode *Root) const {
12664 auto [Mask, VL] = getMaskAndVL(Root);
12665 return isMaskCompatible(Mask) && isVLCompatible(VL);
12668 /// Helper function to check if \p N is commutative with respect to the
12669 /// foldings that are supported by this class.
12670 static bool isCommutative(const SDNode *N) {
12671 switch (N->getOpcode()) {
12672 case RISCVISD::ADD_VL:
12673 case RISCVISD::MUL_VL:
12674 case RISCVISD::VWADD_W_VL:
12675 case RISCVISD::VWADDU_W_VL:
12676 return true;
12677 case RISCVISD::SUB_VL:
12678 case RISCVISD::VWSUB_W_VL:
12679 case RISCVISD::VWSUBU_W_VL:
12680 return false;
12681 default:
12682 llvm_unreachable("Unexpected opcode");
12686 /// Get a list of combine to try for folding extensions in \p Root.
12687 /// Note that each returned CombineToTry function doesn't actually modify
12688 /// anything. Instead they produce an optional CombineResult that if not None,
12689 /// need to be materialized for the combine to be applied.
12690 /// \see CombineResult::materialize.
12691 /// If the related CombineToTry function returns std::nullopt, that means the
12692 /// combine didn't match.
12693 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
12696 /// Helper structure that holds all the necessary information to materialize a
12697 /// combine that does some extension folding.
12698 struct CombineResult {
12699 /// Opcode to be generated when materializing the combine.
12700 unsigned TargetOpcode;
12701 // No value means no extension is needed. If extension is needed, the value
12702 // indicates if it needs to be sign extended.
12703 std::optional<bool> SExtLHS;
12704 std::optional<bool> SExtRHS;
12705 /// Root of the combine.
12706 SDNode *Root;
12707 /// LHS of the TargetOpcode.
12708 NodeExtensionHelper LHS;
12709 /// RHS of the TargetOpcode.
12710 NodeExtensionHelper RHS;
12712 CombineResult(unsigned TargetOpcode, SDNode *Root,
12713 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS,
12714 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS)
12715 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS),
12716 Root(Root), LHS(LHS), RHS(RHS) {}
12718 /// Return a value that uses TargetOpcode and that can be used to replace
12719 /// Root.
12720 /// The actual replacement is *not* done in that method.
12721 SDValue materialize(SelectionDAG &DAG) const {
12722 SDValue Mask, VL, Merge;
12723 std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
12724 Merge = Root->getOperand(2);
12725 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
12726 LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
12727 RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
12728 Mask, VL);
12732 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
12733 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
12734 /// are zext) and LHS and RHS can be folded into Root.
12735 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
12737 /// \note If the pattern can match with both zext and sext, the returned
12738 /// CombineResult will feature the zext result.
12740 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12741 /// can be used to apply the pattern.
12742 static std::optional<CombineResult>
12743 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
12744 const NodeExtensionHelper &RHS, bool AllowSExt,
12745 bool AllowZExt) {
12746 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
12747 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
12748 return std::nullopt;
12749 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
12750 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
12751 Root->getOpcode(), /*IsSExt=*/false),
12752 Root, LHS, /*SExtLHS=*/false, RHS,
12753 /*SExtRHS=*/false);
12754 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
12755 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
12756 Root->getOpcode(), /*IsSExt=*/true),
12757 Root, LHS, /*SExtLHS=*/true, RHS,
12758 /*SExtRHS=*/true);
12759 return std::nullopt;
12762 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
12763 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
12764 /// are zext) and LHS and RHS can be folded into Root.
12766 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12767 /// can be used to apply the pattern.
12768 static std::optional<CombineResult>
12769 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
12770 const NodeExtensionHelper &RHS) {
12771 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
12772 /*AllowZExt=*/true);
12775 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
12777 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12778 /// can be used to apply the pattern.
12779 static std::optional<CombineResult>
12780 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
12781 const NodeExtensionHelper &RHS) {
12782 if (!RHS.areVLAndMaskCompatible(Root))
12783 return std::nullopt;
12785 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
12786 // sext/zext?
12787 // Control this behavior behind an option (AllowSplatInVW_W) for testing
12788 // purposes.
12789 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
12790 return CombineResult(
12791 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false),
12792 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false);
12793 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
12794 return CombineResult(
12795 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true),
12796 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true);
12797 return std::nullopt;
12800 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
12802 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12803 /// can be used to apply the pattern.
12804 static std::optional<CombineResult>
12805 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
12806 const NodeExtensionHelper &RHS) {
12807 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
12808 /*AllowZExt=*/false);
12811 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
12813 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12814 /// can be used to apply the pattern.
12815 static std::optional<CombineResult>
12816 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
12817 const NodeExtensionHelper &RHS) {
12818 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
12819 /*AllowZExt=*/true);
12822 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
12824 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12825 /// can be used to apply the pattern.
12826 static std::optional<CombineResult>
12827 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
12828 const NodeExtensionHelper &RHS) {
12829 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
12830 return std::nullopt;
12831 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
12832 return std::nullopt;
12833 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
12834 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
12837 SmallVector<NodeExtensionHelper::CombineToTry>
12838 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
12839 SmallVector<CombineToTry> Strategies;
12840 switch (Root->getOpcode()) {
12841 case RISCVISD::ADD_VL:
12842 case RISCVISD::SUB_VL:
12843 // add|sub -> vwadd(u)|vwsub(u)
12844 Strategies.push_back(canFoldToVWWithSameExtension);
12845 // add|sub -> vwadd(u)_w|vwsub(u)_w
12846 Strategies.push_back(canFoldToVW_W);
12847 break;
12848 case RISCVISD::MUL_VL:
12849 // mul -> vwmul(u)
12850 Strategies.push_back(canFoldToVWWithSameExtension);
12851 // mul -> vwmulsu
12852 Strategies.push_back(canFoldToVW_SU);
12853 break;
12854 case RISCVISD::VWADD_W_VL:
12855 case RISCVISD::VWSUB_W_VL:
12856 // vwadd_w|vwsub_w -> vwadd|vwsub
12857 Strategies.push_back(canFoldToVWWithSEXT);
12858 break;
12859 case RISCVISD::VWADDU_W_VL:
12860 case RISCVISD::VWSUBU_W_VL:
12861 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
12862 Strategies.push_back(canFoldToVWWithZEXT);
12863 break;
12864 default:
12865 llvm_unreachable("Unexpected opcode");
12867 return Strategies;
12869 } // End anonymous namespace.
12871 /// Combine a binary operation to its equivalent VW or VW_W form.
12872 /// The supported combines are:
12873 /// add_vl -> vwadd(u) | vwadd(u)_w
12874 /// sub_vl -> vwsub(u) | vwsub(u)_w
12875 /// mul_vl -> vwmul(u) | vwmul_su
12876 /// vwadd_w(u) -> vwadd(u)
12877 /// vwub_w(u) -> vwadd(u)
12878 static SDValue
12879 combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
12880 SelectionDAG &DAG = DCI.DAG;
12882 assert(NodeExtensionHelper::isSupportedRoot(N) &&
12883 "Shouldn't have called this method");
12884 SmallVector<SDNode *> Worklist;
12885 SmallSet<SDNode *, 8> Inserted;
12886 Worklist.push_back(N);
12887 Inserted.insert(N);
12888 SmallVector<CombineResult> CombinesToApply;
12890 while (!Worklist.empty()) {
12891 SDNode *Root = Worklist.pop_back_val();
12892 if (!NodeExtensionHelper::isSupportedRoot(Root))
12893 return SDValue();
12895 NodeExtensionHelper LHS(N, 0, DAG);
12896 NodeExtensionHelper RHS(N, 1, DAG);
12897 auto AppendUsersIfNeeded = [&Worklist,
12898 &Inserted](const NodeExtensionHelper &Op) {
12899 if (Op.needToPromoteOtherUsers()) {
12900 for (SDNode *TheUse : Op.OrigOperand->uses()) {
12901 if (Inserted.insert(TheUse).second)
12902 Worklist.push_back(TheUse);
12907 // Control the compile time by limiting the number of node we look at in
12908 // total.
12909 if (Inserted.size() > ExtensionMaxWebSize)
12910 return SDValue();
12912 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
12913 NodeExtensionHelper::getSupportedFoldings(N);
12915 assert(!FoldingStrategies.empty() && "Nothing to be folded");
12916 bool Matched = false;
12917 for (int Attempt = 0;
12918 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
12919 ++Attempt) {
12921 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
12922 FoldingStrategies) {
12923 std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
12924 if (Res) {
12925 Matched = true;
12926 CombinesToApply.push_back(*Res);
12927 // All the inputs that are extended need to be folded, otherwise
12928 // we would be leaving the old input (since it is may still be used),
12929 // and the new one.
12930 if (Res->SExtLHS.has_value())
12931 AppendUsersIfNeeded(LHS);
12932 if (Res->SExtRHS.has_value())
12933 AppendUsersIfNeeded(RHS);
12934 break;
12937 std::swap(LHS, RHS);
12939 // Right now we do an all or nothing approach.
12940 if (!Matched)
12941 return SDValue();
12943 // Store the value for the replacement of the input node separately.
12944 SDValue InputRootReplacement;
12945 // We do the RAUW after we materialize all the combines, because some replaced
12946 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
12947 // some of these nodes may appear in the NodeExtensionHelpers of some of the
12948 // yet-to-be-visited CombinesToApply roots.
12949 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
12950 ValuesToReplace.reserve(CombinesToApply.size());
12951 for (CombineResult Res : CombinesToApply) {
12952 SDValue NewValue = Res.materialize(DAG);
12953 if (!InputRootReplacement) {
12954 assert(Res.Root == N &&
12955 "First element is expected to be the current node");
12956 InputRootReplacement = NewValue;
12957 } else {
12958 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
12961 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
12962 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
12963 DCI.AddToWorklist(OldNewValues.second.getNode());
12965 return InputRootReplacement;
12968 // Helper function for performMemPairCombine.
12969 // Try to combine the memory loads/stores LSNode1 and LSNode2
12970 // into a single memory pair operation.
12971 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
12972 LSBaseSDNode *LSNode2, SDValue BasePtr,
12973 uint64_t Imm) {
12974 SmallPtrSet<const SDNode *, 32> Visited;
12975 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
12977 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
12978 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
12979 return SDValue();
12981 MachineFunction &MF = DAG.getMachineFunction();
12982 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
12984 // The new operation has twice the width.
12985 MVT XLenVT = Subtarget.getXLenVT();
12986 EVT MemVT = LSNode1->getMemoryVT();
12987 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
12988 MachineMemOperand *MMO = LSNode1->getMemOperand();
12989 MachineMemOperand *NewMMO = MF.getMachineMemOperand(
12990 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
12992 if (LSNode1->getOpcode() == ISD::LOAD) {
12993 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
12994 unsigned Opcode;
12995 if (MemVT == MVT::i32)
12996 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
12997 else
12998 Opcode = RISCVISD::TH_LDD;
13000 SDValue Res = DAG.getMemIntrinsicNode(
13001 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
13002 {LSNode1->getChain(), BasePtr,
13003 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
13004 NewMemVT, NewMMO);
13006 SDValue Node1 =
13007 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
13008 SDValue Node2 =
13009 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
13011 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
13012 return Node1;
13013 } else {
13014 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
13016 SDValue Res = DAG.getMemIntrinsicNode(
13017 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
13018 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
13019 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
13020 NewMemVT, NewMMO);
13022 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
13023 return Res;
13027 // Try to combine two adjacent loads/stores to a single pair instruction from
13028 // the XTHeadMemPair vendor extension.
13029 static SDValue performMemPairCombine(SDNode *N,
13030 TargetLowering::DAGCombinerInfo &DCI) {
13031 SelectionDAG &DAG = DCI.DAG;
13032 MachineFunction &MF = DAG.getMachineFunction();
13033 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
13035 // Target does not support load/store pair.
13036 if (!Subtarget.hasVendorXTHeadMemPair())
13037 return SDValue();
13039 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
13040 EVT MemVT = LSNode1->getMemoryVT();
13041 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
13043 // No volatile, indexed or atomic loads/stores.
13044 if (!LSNode1->isSimple() || LSNode1->isIndexed())
13045 return SDValue();
13047 // Function to get a base + constant representation from a memory value.
13048 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
13049 if (Ptr->getOpcode() == ISD::ADD)
13050 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
13051 return {Ptr->getOperand(0), C1->getZExtValue()};
13052 return {Ptr, 0};
13055 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
13057 SDValue Chain = N->getOperand(0);
13058 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
13059 UI != UE; ++UI) {
13060 SDUse &Use = UI.getUse();
13061 if (Use.getUser() != N && Use.getResNo() == 0 &&
13062 Use.getUser()->getOpcode() == N->getOpcode()) {
13063 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
13065 // No volatile, indexed or atomic loads/stores.
13066 if (!LSNode2->isSimple() || LSNode2->isIndexed())
13067 continue;
13069 // Check if LSNode1 and LSNode2 have the same type and extension.
13070 if (LSNode1->getOpcode() == ISD::LOAD)
13071 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
13072 cast<LoadSDNode>(LSNode1)->getExtensionType())
13073 continue;
13075 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
13076 continue;
13078 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
13080 // Check if the base pointer is the same for both instruction.
13081 if (Base1 != Base2)
13082 continue;
13084 // Check if the offsets match the XTHeadMemPair encoding contraints.
13085 bool Valid = false;
13086 if (MemVT == MVT::i32) {
13087 // Check for adjacent i32 values and a 2-bit index.
13088 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
13089 Valid = true;
13090 } else if (MemVT == MVT::i64) {
13091 // Check for adjacent i64 values and a 2-bit index.
13092 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
13093 Valid = true;
13096 if (!Valid)
13097 continue;
13099 // Try to combine.
13100 if (SDValue Res =
13101 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
13102 return Res;
13106 return SDValue();
13109 // Fold
13110 // (fp_to_int (froundeven X)) -> fcvt X, rne
13111 // (fp_to_int (ftrunc X)) -> fcvt X, rtz
13112 // (fp_to_int (ffloor X)) -> fcvt X, rdn
13113 // (fp_to_int (fceil X)) -> fcvt X, rup
13114 // (fp_to_int (fround X)) -> fcvt X, rmm
13115 static SDValue performFP_TO_INTCombine(SDNode *N,
13116 TargetLowering::DAGCombinerInfo &DCI,
13117 const RISCVSubtarget &Subtarget) {
13118 SelectionDAG &DAG = DCI.DAG;
13119 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13120 MVT XLenVT = Subtarget.getXLenVT();
13122 SDValue Src = N->getOperand(0);
13124 // Don't do this for strict-fp Src.
13125 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
13126 return SDValue();
13128 // Ensure the FP type is legal.
13129 if (!TLI.isTypeLegal(Src.getValueType()))
13130 return SDValue();
13132 // Don't do this for f16 with Zfhmin and not Zfh.
13133 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
13134 return SDValue();
13136 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
13137 // If the result is invalid, we didn't find a foldable instruction.
13138 // If the result is dynamic, then we found an frint which we don't yet
13139 // support. It will cause 7 to be written to the FRM CSR for vector.
13140 // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below.
13141 if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN)
13142 return SDValue();
13144 SDLoc DL(N);
13145 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
13146 EVT VT = N->getValueType(0);
13148 if (VT.isVector() && TLI.isTypeLegal(VT)) {
13149 MVT SrcVT = Src.getSimpleValueType();
13150 MVT SrcContainerVT = SrcVT;
13151 MVT ContainerVT = VT.getSimpleVT();
13152 SDValue XVal = Src.getOperand(0);
13154 // For widening and narrowing conversions we just combine it into a
13155 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
13156 // end up getting lowered to their appropriate pseudo instructions based on
13157 // their operand types
13158 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
13159 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
13160 return SDValue();
13162 // Make fixed-length vectors scalable first
13163 if (SrcVT.isFixedLengthVector()) {
13164 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
13165 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
13166 ContainerVT =
13167 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
13170 auto [Mask, VL] =
13171 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
13173 SDValue FpToInt;
13174 if (FRM == RISCVFPRndMode::RTZ) {
13175 // Use the dedicated trunc static rounding mode if we're truncating so we
13176 // don't need to generate calls to fsrmi/fsrm
13177 unsigned Opc =
13178 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
13179 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
13180 } else {
13181 unsigned Opc =
13182 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
13183 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
13184 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
13187 // If converted from fixed-length to scalable, convert back
13188 if (VT.isFixedLengthVector())
13189 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
13191 return FpToInt;
13194 // Only handle XLen or i32 types. Other types narrower than XLen will
13195 // eventually be legalized to XLenVT.
13196 if (VT != MVT::i32 && VT != XLenVT)
13197 return SDValue();
13199 unsigned Opc;
13200 if (VT == XLenVT)
13201 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
13202 else
13203 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13205 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
13206 DAG.getTargetConstant(FRM, DL, XLenVT));
13207 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
13210 // Fold
13211 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
13212 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
13213 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
13214 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
13215 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
13216 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
13217 TargetLowering::DAGCombinerInfo &DCI,
13218 const RISCVSubtarget &Subtarget) {
13219 SelectionDAG &DAG = DCI.DAG;
13220 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13221 MVT XLenVT = Subtarget.getXLenVT();
13223 // Only handle XLen types. Other types narrower than XLen will eventually be
13224 // legalized to XLenVT.
13225 EVT DstVT = N->getValueType(0);
13226 if (DstVT != XLenVT)
13227 return SDValue();
13229 SDValue Src = N->getOperand(0);
13231 // Don't do this for strict-fp Src.
13232 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
13233 return SDValue();
13235 // Ensure the FP type is also legal.
13236 if (!TLI.isTypeLegal(Src.getValueType()))
13237 return SDValue();
13239 // Don't do this for f16 with Zfhmin and not Zfh.
13240 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
13241 return SDValue();
13243 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13245 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
13246 if (FRM == RISCVFPRndMode::Invalid)
13247 return SDValue();
13249 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
13251 unsigned Opc;
13252 if (SatVT == DstVT)
13253 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
13254 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
13255 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13256 else
13257 return SDValue();
13258 // FIXME: Support other SatVTs by clamping before or after the conversion.
13260 Src = Src.getOperand(0);
13262 SDLoc DL(N);
13263 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
13264 DAG.getTargetConstant(FRM, DL, XLenVT));
13266 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
13267 // extend.
13268 if (Opc == RISCVISD::FCVT_WU_RV64)
13269 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
13271 // RISC-V FP-to-int conversions saturate to the destination register size, but
13272 // don't produce 0 for nan.
13273 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
13274 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
13277 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
13278 // smaller than XLenVT.
13279 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
13280 const RISCVSubtarget &Subtarget) {
13281 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
13283 SDValue Src = N->getOperand(0);
13284 if (Src.getOpcode() != ISD::BSWAP)
13285 return SDValue();
13287 EVT VT = N->getValueType(0);
13288 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
13289 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
13290 return SDValue();
13292 SDLoc DL(N);
13293 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
13296 // Convert from one FMA opcode to another based on whether we are negating the
13297 // multiply result and/or the accumulator.
13298 // NOTE: Only supports RVV operations with VL.
13299 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
13300 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
13301 if (NegMul) {
13302 // clang-format off
13303 switch (Opcode) {
13304 default: llvm_unreachable("Unexpected opcode");
13305 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
13306 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
13307 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
13308 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
13309 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
13310 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
13311 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
13312 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
13314 // clang-format on
13317 // Negating the accumulator changes ADD<->SUB.
13318 if (NegAcc) {
13319 // clang-format off
13320 switch (Opcode) {
13321 default: llvm_unreachable("Unexpected opcode");
13322 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
13323 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
13324 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
13325 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
13326 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
13327 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
13328 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
13329 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
13331 // clang-format on
13334 return Opcode;
13337 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
13338 // Fold FNEG_VL into FMA opcodes.
13339 // The first operand of strict-fp is chain.
13340 unsigned Offset = N->isTargetStrictFPOpcode();
13341 SDValue A = N->getOperand(0 + Offset);
13342 SDValue B = N->getOperand(1 + Offset);
13343 SDValue C = N->getOperand(2 + Offset);
13344 SDValue Mask = N->getOperand(3 + Offset);
13345 SDValue VL = N->getOperand(4 + Offset);
13347 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
13348 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
13349 V.getOperand(2) == VL) {
13350 // Return the negated input.
13351 V = V.getOperand(0);
13352 return true;
13355 return false;
13358 bool NegA = invertIfNegative(A);
13359 bool NegB = invertIfNegative(B);
13360 bool NegC = invertIfNegative(C);
13362 // If no operands are negated, we're done.
13363 if (!NegA && !NegB && !NegC)
13364 return SDValue();
13366 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
13367 if (N->isTargetStrictFPOpcode())
13368 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
13369 {N->getOperand(0), A, B, C, Mask, VL});
13370 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
13371 VL);
13374 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,
13375 const RISCVSubtarget &Subtarget) {
13376 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
13377 return V;
13379 if (N->getValueType(0).isScalableVector() &&
13380 N->getValueType(0).getVectorElementType() == MVT::f32 &&
13381 (Subtarget.hasVInstructionsF16Minimal() &&
13382 !Subtarget.hasVInstructionsF16())) {
13383 return SDValue();
13386 // FIXME: Ignore strict opcodes for now.
13387 if (N->isTargetStrictFPOpcode())
13388 return SDValue();
13390 // Try to form widening FMA.
13391 SDValue Op0 = N->getOperand(0);
13392 SDValue Op1 = N->getOperand(1);
13393 SDValue Mask = N->getOperand(3);
13394 SDValue VL = N->getOperand(4);
13396 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
13397 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
13398 return SDValue();
13400 // TODO: Refactor to handle more complex cases similar to
13401 // combineBinOp_VLToVWBinOp_VL.
13402 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
13403 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
13404 return SDValue();
13406 // Check the mask and VL are the same.
13407 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
13408 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
13409 return SDValue();
13411 unsigned NewOpc;
13412 switch (N->getOpcode()) {
13413 default:
13414 llvm_unreachable("Unexpected opcode");
13415 case RISCVISD::VFMADD_VL:
13416 NewOpc = RISCVISD::VFWMADD_VL;
13417 break;
13418 case RISCVISD::VFNMSUB_VL:
13419 NewOpc = RISCVISD::VFWNMSUB_VL;
13420 break;
13421 case RISCVISD::VFNMADD_VL:
13422 NewOpc = RISCVISD::VFWNMADD_VL;
13423 break;
13424 case RISCVISD::VFMSUB_VL:
13425 NewOpc = RISCVISD::VFWMSUB_VL;
13426 break;
13429 Op0 = Op0.getOperand(0);
13430 Op1 = Op1.getOperand(0);
13432 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
13433 N->getOperand(2), Mask, VL);
13436 static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG,
13437 const RISCVSubtarget &Subtarget) {
13438 if (N->getValueType(0).isScalableVector() &&
13439 N->getValueType(0).getVectorElementType() == MVT::f32 &&
13440 (Subtarget.hasVInstructionsF16Minimal() &&
13441 !Subtarget.hasVInstructionsF16())) {
13442 return SDValue();
13445 // FIXME: Ignore strict opcodes for now.
13446 assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
13448 // Try to form widening multiply.
13449 SDValue Op0 = N->getOperand(0);
13450 SDValue Op1 = N->getOperand(1);
13451 SDValue Merge = N->getOperand(2);
13452 SDValue Mask = N->getOperand(3);
13453 SDValue VL = N->getOperand(4);
13455 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
13456 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
13457 return SDValue();
13459 // TODO: Refactor to handle more complex cases similar to
13460 // combineBinOp_VLToVWBinOp_VL.
13461 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
13462 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
13463 return SDValue();
13465 // Check the mask and VL are the same.
13466 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
13467 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
13468 return SDValue();
13470 Op0 = Op0.getOperand(0);
13471 Op1 = Op1.getOperand(0);
13473 return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0,
13474 Op1, Merge, Mask, VL);
13477 static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG,
13478 const RISCVSubtarget &Subtarget) {
13479 if (N->getValueType(0).isScalableVector() &&
13480 N->getValueType(0).getVectorElementType() == MVT::f32 &&
13481 (Subtarget.hasVInstructionsF16Minimal() &&
13482 !Subtarget.hasVInstructionsF16())) {
13483 return SDValue();
13486 SDValue Op0 = N->getOperand(0);
13487 SDValue Op1 = N->getOperand(1);
13488 SDValue Merge = N->getOperand(2);
13489 SDValue Mask = N->getOperand(3);
13490 SDValue VL = N->getOperand(4);
13492 bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL;
13494 // Look for foldable FP_EXTENDS.
13495 bool Op0IsExtend =
13496 Op0.getOpcode() == RISCVISD::FP_EXTEND_VL &&
13497 (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0)));
13498 bool Op1IsExtend =
13499 (Op0 == Op1 && Op0IsExtend) ||
13500 (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse());
13502 // Check the mask and VL.
13503 if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL))
13504 Op0IsExtend = false;
13505 if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL))
13506 Op1IsExtend = false;
13508 // Canonicalize.
13509 if (!Op1IsExtend) {
13510 // Sub requires at least operand 1 to be an extend.
13511 if (!IsAdd)
13512 return SDValue();
13514 // Add is commutable, if the other operand is foldable, swap them.
13515 if (!Op0IsExtend)
13516 return SDValue();
13518 std::swap(Op0, Op1);
13519 std::swap(Op0IsExtend, Op1IsExtend);
13522 // Op1 is a foldable extend. Op0 might be foldable.
13523 Op1 = Op1.getOperand(0);
13524 if (Op0IsExtend)
13525 Op0 = Op0.getOperand(0);
13527 unsigned Opc;
13528 if (IsAdd)
13529 Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL;
13530 else
13531 Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL;
13533 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask,
13534 VL);
13537 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
13538 const RISCVSubtarget &Subtarget) {
13539 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
13541 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
13542 return SDValue();
13544 if (!isa<ConstantSDNode>(N->getOperand(1)))
13545 return SDValue();
13546 uint64_t ShAmt = N->getConstantOperandVal(1);
13547 if (ShAmt > 32)
13548 return SDValue();
13550 SDValue N0 = N->getOperand(0);
13552 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
13553 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
13554 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
13555 if (ShAmt < 32 &&
13556 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
13557 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
13558 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
13559 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
13560 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
13561 if (LShAmt < 32) {
13562 SDLoc ShlDL(N0.getOperand(0));
13563 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
13564 N0.getOperand(0).getOperand(0),
13565 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
13566 SDLoc DL(N);
13567 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
13568 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
13572 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
13573 // FIXME: Should this be a generic combine? There's a similar combine on X86.
13575 // Also try these folds where an add or sub is in the middle.
13576 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
13577 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
13578 SDValue Shl;
13579 ConstantSDNode *AddC = nullptr;
13581 // We might have an ADD or SUB between the SRA and SHL.
13582 bool IsAdd = N0.getOpcode() == ISD::ADD;
13583 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
13584 // Other operand needs to be a constant we can modify.
13585 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
13586 if (!AddC)
13587 return SDValue();
13589 // AddC needs to have at least 32 trailing zeros.
13590 if (AddC->getAPIntValue().countr_zero() < 32)
13591 return SDValue();
13593 // All users should be a shift by constant less than or equal to 32. This
13594 // ensures we'll do this optimization for each of them to produce an
13595 // add/sub+sext_inreg they can all share.
13596 for (SDNode *U : N0->uses()) {
13597 if (U->getOpcode() != ISD::SRA ||
13598 !isa<ConstantSDNode>(U->getOperand(1)) ||
13599 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32)
13600 return SDValue();
13603 Shl = N0.getOperand(IsAdd ? 0 : 1);
13604 } else {
13605 // Not an ADD or SUB.
13606 Shl = N0;
13609 // Look for a shift left by 32.
13610 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
13611 Shl.getConstantOperandVal(1) != 32)
13612 return SDValue();
13614 // We if we didn't look through an add/sub, then the shl should have one use.
13615 // If we did look through an add/sub, the sext_inreg we create is free so
13616 // we're only creating 2 new instructions. It's enough to only remove the
13617 // original sra+add/sub.
13618 if (!AddC && !Shl.hasOneUse())
13619 return SDValue();
13621 SDLoc DL(N);
13622 SDValue In = Shl.getOperand(0);
13624 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
13625 // constant.
13626 if (AddC) {
13627 SDValue ShiftedAddC =
13628 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
13629 if (IsAdd)
13630 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
13631 else
13632 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
13635 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
13636 DAG.getValueType(MVT::i32));
13637 if (ShAmt == 32)
13638 return SExt;
13640 return DAG.getNode(
13641 ISD::SHL, DL, MVT::i64, SExt,
13642 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
13645 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
13646 // the result is used as the conditon of a br_cc or select_cc we can invert,
13647 // inverting the setcc is free, and Z is 0/1. Caller will invert the
13648 // br_cc/select_cc.
13649 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
13650 bool IsAnd = Cond.getOpcode() == ISD::AND;
13651 if (!IsAnd && Cond.getOpcode() != ISD::OR)
13652 return SDValue();
13654 if (!Cond.hasOneUse())
13655 return SDValue();
13657 SDValue Setcc = Cond.getOperand(0);
13658 SDValue Xor = Cond.getOperand(1);
13659 // Canonicalize setcc to LHS.
13660 if (Setcc.getOpcode() != ISD::SETCC)
13661 std::swap(Setcc, Xor);
13662 // LHS should be a setcc and RHS should be an xor.
13663 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
13664 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
13665 return SDValue();
13667 // If the condition is an And, SimplifyDemandedBits may have changed
13668 // (xor Z, 1) to (not Z).
13669 SDValue Xor1 = Xor.getOperand(1);
13670 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
13671 return SDValue();
13673 EVT VT = Cond.getValueType();
13674 SDValue Xor0 = Xor.getOperand(0);
13676 // The LHS of the xor needs to be 0/1.
13677 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
13678 if (!DAG.MaskedValueIsZero(Xor0, Mask))
13679 return SDValue();
13681 // We can only invert integer setccs.
13682 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
13683 if (!SetCCOpVT.isScalarInteger())
13684 return SDValue();
13686 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
13687 if (ISD::isIntEqualitySetCC(CCVal)) {
13688 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13689 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
13690 Setcc.getOperand(1), CCVal);
13691 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
13692 // Invert (setlt 0, X) by converting to (setlt X, 1).
13693 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
13694 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
13695 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
13696 // (setlt X, 1) by converting to (setlt 0, X).
13697 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
13698 DAG.getConstant(0, SDLoc(Setcc), VT),
13699 Setcc.getOperand(0), CCVal);
13700 } else
13701 return SDValue();
13703 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13704 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
13707 // Perform common combines for BR_CC and SELECT_CC condtions.
13708 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
13709 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
13710 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13712 // As far as arithmetic right shift always saves the sign,
13713 // shift can be omitted.
13714 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
13715 // setge (sra X, N), 0 -> setge X, 0
13716 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
13717 LHS.getOpcode() == ISD::SRA) {
13718 LHS = LHS.getOperand(0);
13719 return true;
13722 if (!ISD::isIntEqualitySetCC(CCVal))
13723 return false;
13725 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
13726 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
13727 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
13728 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
13729 // If we're looking for eq 0 instead of ne 0, we need to invert the
13730 // condition.
13731 bool Invert = CCVal == ISD::SETEQ;
13732 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
13733 if (Invert)
13734 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
13736 RHS = LHS.getOperand(1);
13737 LHS = LHS.getOperand(0);
13738 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
13740 CC = DAG.getCondCode(CCVal);
13741 return true;
13744 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
13745 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
13746 RHS = LHS.getOperand(1);
13747 LHS = LHS.getOperand(0);
13748 return true;
13751 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
13752 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
13753 LHS.getOperand(1).getOpcode() == ISD::Constant) {
13754 SDValue LHS0 = LHS.getOperand(0);
13755 if (LHS0.getOpcode() == ISD::AND &&
13756 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
13757 uint64_t Mask = LHS0.getConstantOperandVal(1);
13758 uint64_t ShAmt = LHS.getConstantOperandVal(1);
13759 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
13760 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
13761 CC = DAG.getCondCode(CCVal);
13763 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
13764 LHS = LHS0.getOperand(0);
13765 if (ShAmt != 0)
13766 LHS =
13767 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
13768 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
13769 return true;
13774 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
13775 // This can occur when legalizing some floating point comparisons.
13776 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
13777 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
13778 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
13779 CC = DAG.getCondCode(CCVal);
13780 RHS = DAG.getConstant(0, DL, LHS.getValueType());
13781 return true;
13784 if (isNullConstant(RHS)) {
13785 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
13786 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
13787 CC = DAG.getCondCode(CCVal);
13788 LHS = NewCond;
13789 return true;
13793 return false;
13796 // Fold
13797 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
13798 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
13799 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
13800 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
13801 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
13802 SDValue TrueVal, SDValue FalseVal,
13803 bool Swapped) {
13804 bool Commutative = true;
13805 unsigned Opc = TrueVal.getOpcode();
13806 switch (Opc) {
13807 default:
13808 return SDValue();
13809 case ISD::SHL:
13810 case ISD::SRA:
13811 case ISD::SRL:
13812 case ISD::SUB:
13813 Commutative = false;
13814 break;
13815 case ISD::ADD:
13816 case ISD::OR:
13817 case ISD::XOR:
13818 break;
13821 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
13822 return SDValue();
13824 unsigned OpToFold;
13825 if (FalseVal == TrueVal.getOperand(0))
13826 OpToFold = 0;
13827 else if (Commutative && FalseVal == TrueVal.getOperand(1))
13828 OpToFold = 1;
13829 else
13830 return SDValue();
13832 EVT VT = N->getValueType(0);
13833 SDLoc DL(N);
13834 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
13835 EVT OtherOpVT = OtherOp->getValueType(0);
13836 SDValue IdentityOperand =
13837 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
13838 if (!Commutative)
13839 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
13840 assert(IdentityOperand && "No identity operand!");
13842 if (Swapped)
13843 std::swap(OtherOp, IdentityOperand);
13844 SDValue NewSel =
13845 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
13846 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
13849 // This tries to get rid of `select` and `icmp` that are being used to handle
13850 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
13851 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
13852 SDValue Cond = N->getOperand(0);
13854 // This represents either CTTZ or CTLZ instruction.
13855 SDValue CountZeroes;
13857 SDValue ValOnZero;
13859 if (Cond.getOpcode() != ISD::SETCC)
13860 return SDValue();
13862 if (!isNullConstant(Cond->getOperand(1)))
13863 return SDValue();
13865 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
13866 if (CCVal == ISD::CondCode::SETEQ) {
13867 CountZeroes = N->getOperand(2);
13868 ValOnZero = N->getOperand(1);
13869 } else if (CCVal == ISD::CondCode::SETNE) {
13870 CountZeroes = N->getOperand(1);
13871 ValOnZero = N->getOperand(2);
13872 } else {
13873 return SDValue();
13876 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
13877 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
13878 CountZeroes = CountZeroes.getOperand(0);
13880 if (CountZeroes.getOpcode() != ISD::CTTZ &&
13881 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
13882 CountZeroes.getOpcode() != ISD::CTLZ &&
13883 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
13884 return SDValue();
13886 if (!isNullConstant(ValOnZero))
13887 return SDValue();
13889 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
13890 if (Cond->getOperand(0) != CountZeroesArgument)
13891 return SDValue();
13893 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
13894 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
13895 CountZeroes.getValueType(), CountZeroesArgument);
13896 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
13897 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
13898 CountZeroes.getValueType(), CountZeroesArgument);
13901 unsigned BitWidth = CountZeroes.getValueSizeInBits();
13902 SDValue BitWidthMinusOne =
13903 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
13905 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
13906 CountZeroes, BitWidthMinusOne);
13907 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
13910 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
13911 const RISCVSubtarget &Subtarget) {
13912 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
13913 return Folded;
13915 if (Subtarget.hasShortForwardBranchOpt())
13916 return SDValue();
13918 SDValue TrueVal = N->getOperand(1);
13919 SDValue FalseVal = N->getOperand(2);
13920 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
13921 return V;
13922 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
13925 /// If we have a build_vector where each lane is binop X, C, where C
13926 /// is a constant (but not necessarily the same constant on all lanes),
13927 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
13928 /// We assume that materializing a constant build vector will be no more
13929 /// expensive that performing O(n) binops.
13930 static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
13931 const RISCVSubtarget &Subtarget,
13932 const RISCVTargetLowering &TLI) {
13933 SDLoc DL(N);
13934 EVT VT = N->getValueType(0);
13936 assert(!VT.isScalableVector() && "unexpected build vector");
13938 if (VT.getVectorNumElements() == 1)
13939 return SDValue();
13941 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
13942 if (!TLI.isBinOp(Opcode))
13943 return SDValue();
13945 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
13946 return SDValue();
13948 SmallVector<SDValue> LHSOps;
13949 SmallVector<SDValue> RHSOps;
13950 for (SDValue Op : N->ops()) {
13951 if (Op.isUndef()) {
13952 // We can't form a divide or remainder from undef.
13953 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
13954 return SDValue();
13956 LHSOps.push_back(Op);
13957 RHSOps.push_back(Op);
13958 continue;
13961 // TODO: We can handle operations which have an neutral rhs value
13962 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
13963 // of profit in a more explicit manner.
13964 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
13965 return SDValue();
13967 LHSOps.push_back(Op.getOperand(0));
13968 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
13969 !isa<ConstantFPSDNode>(Op.getOperand(1)))
13970 return SDValue();
13971 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
13972 // have different LHS and RHS types.
13973 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
13974 return SDValue();
13975 RHSOps.push_back(Op.getOperand(1));
13978 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
13979 DAG.getBuildVector(VT, DL, RHSOps));
13982 // If we're concatenating a series of vector loads like
13983 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
13984 // Then we can turn this into a strided load by widening the vector elements
13985 // vlse32 p, stride=n
13986 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
13987 const RISCVSubtarget &Subtarget,
13988 const RISCVTargetLowering &TLI) {
13989 SDLoc DL(N);
13990 EVT VT = N->getValueType(0);
13992 // Only perform this combine on legal MVTs.
13993 if (!TLI.isTypeLegal(VT))
13994 return SDValue();
13996 // TODO: Potentially extend this to scalable vectors
13997 if (VT.isScalableVector())
13998 return SDValue();
14000 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
14001 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
14002 !SDValue(BaseLd, 0).hasOneUse())
14003 return SDValue();
14005 EVT BaseLdVT = BaseLd->getValueType(0);
14007 // Go through the loads and check that they're strided
14008 SmallVector<LoadSDNode *> Lds;
14009 Lds.push_back(BaseLd);
14010 Align Align = BaseLd->getAlign();
14011 for (SDValue Op : N->ops().drop_front()) {
14012 auto *Ld = dyn_cast<LoadSDNode>(Op);
14013 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
14014 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
14015 Ld->getValueType(0) != BaseLdVT)
14016 return SDValue();
14018 Lds.push_back(Ld);
14020 // The common alignment is the most restrictive (smallest) of all the loads
14021 Align = std::min(Align, Ld->getAlign());
14024 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
14025 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
14026 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
14027 // If the load ptrs can be decomposed into a common (Base + Index) with a
14028 // common constant stride, then return the constant stride.
14029 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
14030 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
14031 if (BIO1.equalBaseIndex(BIO2, DAG))
14032 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
14034 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
14035 SDValue P1 = Ld1->getBasePtr();
14036 SDValue P2 = Ld2->getBasePtr();
14037 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
14038 return {{P2.getOperand(1), false}};
14039 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
14040 return {{P1.getOperand(1), true}};
14042 return std::nullopt;
14045 // Get the distance between the first and second loads
14046 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
14047 if (!BaseDiff)
14048 return SDValue();
14050 // Check all the loads are the same distance apart
14051 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
14052 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
14053 return SDValue();
14055 // TODO: At this point, we've successfully matched a generalized gather
14056 // load. Maybe we should emit that, and then move the specialized
14057 // matchers above and below into a DAG combine?
14059 // Get the widened scalar type, e.g. v4i8 -> i64
14060 unsigned WideScalarBitWidth =
14061 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
14062 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
14064 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
14065 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
14066 if (!TLI.isTypeLegal(WideVecVT))
14067 return SDValue();
14069 // Check that the operation is legal
14070 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
14071 return SDValue();
14073 auto [StrideVariant, MustNegateStride] = *BaseDiff;
14074 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
14075 ? std::get<SDValue>(StrideVariant)
14076 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
14077 Lds[0]->getOffset().getValueType());
14078 if (MustNegateStride)
14079 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
14081 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
14082 SDValue IntID =
14083 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
14084 Subtarget.getXLenVT());
14086 SDValue AllOneMask =
14087 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
14088 DAG.getConstant(1, DL, MVT::i1));
14090 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
14091 BaseLd->getBasePtr(), Stride, AllOneMask};
14093 uint64_t MemSize;
14094 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
14095 ConstStride && ConstStride->getSExtValue() >= 0)
14096 // total size = (elsize * n) + (stride - elsize) * (n-1)
14097 // = elsize + stride * (n-1)
14098 MemSize = WideScalarVT.getSizeInBits() +
14099 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
14100 else
14101 // If Stride isn't constant, then we can't know how much it will load
14102 MemSize = MemoryLocation::UnknownSize;
14104 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
14105 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
14106 Align);
14108 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
14109 Ops, WideVecVT, MMO);
14110 for (SDValue Ld : N->ops())
14111 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
14113 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
14116 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
14117 const RISCVSubtarget &Subtarget) {
14118 assert(N->getOpcode() == RISCVISD::ADD_VL);
14119 SDValue Addend = N->getOperand(0);
14120 SDValue MulOp = N->getOperand(1);
14121 SDValue AddMergeOp = N->getOperand(2);
14123 if (!AddMergeOp.isUndef())
14124 return SDValue();
14126 auto IsVWMulOpc = [](unsigned Opc) {
14127 switch (Opc) {
14128 case RISCVISD::VWMUL_VL:
14129 case RISCVISD::VWMULU_VL:
14130 case RISCVISD::VWMULSU_VL:
14131 return true;
14132 default:
14133 return false;
14137 if (!IsVWMulOpc(MulOp.getOpcode()))
14138 std::swap(Addend, MulOp);
14140 if (!IsVWMulOpc(MulOp.getOpcode()))
14141 return SDValue();
14143 SDValue MulMergeOp = MulOp.getOperand(2);
14145 if (!MulMergeOp.isUndef())
14146 return SDValue();
14148 SDValue AddMask = N->getOperand(3);
14149 SDValue AddVL = N->getOperand(4);
14150 SDValue MulMask = MulOp.getOperand(3);
14151 SDValue MulVL = MulOp.getOperand(4);
14153 if (AddMask != MulMask || AddVL != MulVL)
14154 return SDValue();
14156 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
14157 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
14158 "Unexpected opcode after VWMACC_VL");
14159 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
14160 "Unexpected opcode after VWMACC_VL!");
14161 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
14162 "Unexpected opcode after VWMUL_VL!");
14163 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
14164 "Unexpected opcode after VWMUL_VL!");
14166 SDLoc DL(N);
14167 EVT VT = N->getValueType(0);
14168 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
14169 AddVL};
14170 return DAG.getNode(Opc, DL, VT, Ops);
14173 static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
14174 ISD::MemIndexType &IndexType,
14175 RISCVTargetLowering::DAGCombinerInfo &DCI) {
14176 if (!DCI.isBeforeLegalize())
14177 return false;
14179 SelectionDAG &DAG = DCI.DAG;
14180 const MVT XLenVT =
14181 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
14183 const EVT IndexVT = Index.getValueType();
14185 // RISC-V indexed loads only support the "unsigned unscaled" addressing
14186 // mode, so anything else must be manually legalized.
14187 if (!isIndexTypeSigned(IndexType))
14188 return false;
14190 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
14191 // Any index legalization should first promote to XLenVT, so we don't lose
14192 // bits when scaling. This may create an illegal index type so we let
14193 // LLVM's legalization take care of the splitting.
14194 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
14195 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
14196 IndexVT.changeVectorElementType(XLenVT), Index);
14198 IndexType = ISD::UNSIGNED_SCALED;
14199 return true;
14202 /// Match the index vector of a scatter or gather node as the shuffle mask
14203 /// which performs the rearrangement if possible. Will only match if
14204 /// all lanes are touched, and thus replacing the scatter or gather with
14205 /// a unit strided access and shuffle is legal.
14206 static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
14207 SmallVector<int> &ShuffleMask) {
14208 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
14209 return false;
14210 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
14211 return false;
14213 const unsigned ElementSize = VT.getScalarStoreSize();
14214 const unsigned NumElems = VT.getVectorNumElements();
14216 // Create the shuffle mask and check all bits active
14217 assert(ShuffleMask.empty());
14218 BitVector ActiveLanes(NumElems);
14219 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
14220 // TODO: We've found an active bit of UB, and could be
14221 // more aggressive here if desired.
14222 if (Index->getOperand(i)->isUndef())
14223 return false;
14224 uint64_t C = Index->getConstantOperandVal(i);
14225 if (C % ElementSize != 0)
14226 return false;
14227 C = C / ElementSize;
14228 if (C >= NumElems)
14229 return false;
14230 ShuffleMask.push_back(C);
14231 ActiveLanes.set(C);
14233 return ActiveLanes.all();
14236 /// Match the index of a gather or scatter operation as an operation
14237 /// with twice the element width and half the number of elements. This is
14238 /// generally profitable (if legal) because these operations are linear
14239 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
14240 /// come out ahead.
14241 static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
14242 Align BaseAlign, const RISCVSubtarget &ST) {
14243 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
14244 return false;
14245 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
14246 return false;
14248 // Attempt a doubling. If we can use a element type 4x or 8x in
14249 // size, this will happen via multiply iterations of the transform.
14250 const unsigned NumElems = VT.getVectorNumElements();
14251 if (NumElems % 2 != 0)
14252 return false;
14254 const unsigned ElementSize = VT.getScalarStoreSize();
14255 const unsigned WiderElementSize = ElementSize * 2;
14256 if (WiderElementSize > ST.getELen()/8)
14257 return false;
14259 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
14260 return false;
14262 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
14263 // TODO: We've found an active bit of UB, and could be
14264 // more aggressive here if desired.
14265 if (Index->getOperand(i)->isUndef())
14266 return false;
14267 // TODO: This offset check is too strict if we support fully
14268 // misaligned memory operations.
14269 uint64_t C = Index->getConstantOperandVal(i);
14270 if (i % 2 == 0) {
14271 if (C % WiderElementSize != 0)
14272 return false;
14273 continue;
14275 uint64_t Last = Index->getConstantOperandVal(i-1);
14276 if (C != Last + ElementSize)
14277 return false;
14279 return true;
14283 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
14284 DAGCombinerInfo &DCI) const {
14285 SelectionDAG &DAG = DCI.DAG;
14286 const MVT XLenVT = Subtarget.getXLenVT();
14287 SDLoc DL(N);
14289 // Helper to call SimplifyDemandedBits on an operand of N where only some low
14290 // bits are demanded. N will be added to the Worklist if it was not deleted.
14291 // Caller should return SDValue(N, 0) if this returns true.
14292 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
14293 SDValue Op = N->getOperand(OpNo);
14294 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
14295 if (!SimplifyDemandedBits(Op, Mask, DCI))
14296 return false;
14298 if (N->getOpcode() != ISD::DELETED_NODE)
14299 DCI.AddToWorklist(N);
14300 return true;
14303 switch (N->getOpcode()) {
14304 default:
14305 break;
14306 case RISCVISD::SplitF64: {
14307 SDValue Op0 = N->getOperand(0);
14308 // If the input to SplitF64 is just BuildPairF64 then the operation is
14309 // redundant. Instead, use BuildPairF64's operands directly.
14310 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
14311 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
14313 if (Op0->isUndef()) {
14314 SDValue Lo = DAG.getUNDEF(MVT::i32);
14315 SDValue Hi = DAG.getUNDEF(MVT::i32);
14316 return DCI.CombineTo(N, Lo, Hi);
14319 // It's cheaper to materialise two 32-bit integers than to load a double
14320 // from the constant pool and transfer it to integer registers through the
14321 // stack.
14322 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
14323 APInt V = C->getValueAPF().bitcastToAPInt();
14324 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
14325 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
14326 return DCI.CombineTo(N, Lo, Hi);
14329 // This is a target-specific version of a DAGCombine performed in
14330 // DAGCombiner::visitBITCAST. It performs the equivalent of:
14331 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14332 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14333 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
14334 !Op0.getNode()->hasOneUse())
14335 break;
14336 SDValue NewSplitF64 =
14337 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
14338 Op0.getOperand(0));
14339 SDValue Lo = NewSplitF64.getValue(0);
14340 SDValue Hi = NewSplitF64.getValue(1);
14341 APInt SignBit = APInt::getSignMask(32);
14342 if (Op0.getOpcode() == ISD::FNEG) {
14343 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
14344 DAG.getConstant(SignBit, DL, MVT::i32));
14345 return DCI.CombineTo(N, Lo, NewHi);
14347 assert(Op0.getOpcode() == ISD::FABS);
14348 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
14349 DAG.getConstant(~SignBit, DL, MVT::i32));
14350 return DCI.CombineTo(N, Lo, NewHi);
14352 case RISCVISD::SLLW:
14353 case RISCVISD::SRAW:
14354 case RISCVISD::SRLW:
14355 case RISCVISD::RORW:
14356 case RISCVISD::ROLW: {
14357 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
14358 if (SimplifyDemandedLowBitsHelper(0, 32) ||
14359 SimplifyDemandedLowBitsHelper(1, 5))
14360 return SDValue(N, 0);
14362 break;
14364 case RISCVISD::CLZW:
14365 case RISCVISD::CTZW: {
14366 // Only the lower 32 bits of the first operand are read
14367 if (SimplifyDemandedLowBitsHelper(0, 32))
14368 return SDValue(N, 0);
14369 break;
14371 case RISCVISD::FMV_W_X_RV64: {
14372 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
14373 // conversion is unnecessary and can be replaced with the
14374 // FMV_X_ANYEXTW_RV64 operand.
14375 SDValue Op0 = N->getOperand(0);
14376 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
14377 return Op0.getOperand(0);
14378 break;
14380 case RISCVISD::FMV_X_ANYEXTH:
14381 case RISCVISD::FMV_X_ANYEXTW_RV64: {
14382 SDLoc DL(N);
14383 SDValue Op0 = N->getOperand(0);
14384 MVT VT = N->getSimpleValueType(0);
14385 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
14386 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
14387 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
14388 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
14389 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
14390 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
14391 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
14392 assert(Op0.getOperand(0).getValueType() == VT &&
14393 "Unexpected value type!");
14394 return Op0.getOperand(0);
14397 // This is a target-specific version of a DAGCombine performed in
14398 // DAGCombiner::visitBITCAST. It performs the equivalent of:
14399 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14400 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14401 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
14402 !Op0.getNode()->hasOneUse())
14403 break;
14404 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
14405 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
14406 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
14407 if (Op0.getOpcode() == ISD::FNEG)
14408 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
14409 DAG.getConstant(SignBit, DL, VT));
14411 assert(Op0.getOpcode() == ISD::FABS);
14412 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
14413 DAG.getConstant(~SignBit, DL, VT));
14415 case ISD::ADD:
14416 return performADDCombine(N, DAG, Subtarget);
14417 case ISD::SUB:
14418 return performSUBCombine(N, DAG, Subtarget);
14419 case ISD::AND:
14420 return performANDCombine(N, DCI, Subtarget);
14421 case ISD::OR:
14422 return performORCombine(N, DCI, Subtarget);
14423 case ISD::XOR:
14424 return performXORCombine(N, DAG, Subtarget);
14425 case ISD::FADD:
14426 case ISD::UMAX:
14427 case ISD::UMIN:
14428 case ISD::SMAX:
14429 case ISD::SMIN:
14430 case ISD::FMAXNUM:
14431 case ISD::FMINNUM: {
14432 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14433 return V;
14434 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14435 return V;
14436 return SDValue();
14438 case ISD::SETCC:
14439 return performSETCCCombine(N, DAG, Subtarget);
14440 case ISD::SIGN_EXTEND_INREG:
14441 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
14442 case ISD::ZERO_EXTEND:
14443 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
14444 // type legalization. This is safe because fp_to_uint produces poison if
14445 // it overflows.
14446 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
14447 SDValue Src = N->getOperand(0);
14448 if (Src.getOpcode() == ISD::FP_TO_UINT &&
14449 isTypeLegal(Src.getOperand(0).getValueType()))
14450 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
14451 Src.getOperand(0));
14452 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
14453 isTypeLegal(Src.getOperand(1).getValueType())) {
14454 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14455 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
14456 Src.getOperand(0), Src.getOperand(1));
14457 DCI.CombineTo(N, Res);
14458 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
14459 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
14460 return SDValue(N, 0); // Return N so it doesn't get rechecked.
14463 return SDValue();
14464 case RISCVISD::TRUNCATE_VECTOR_VL: {
14465 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
14466 // This would be benefit for the cases where X and Y are both the same value
14467 // type of low precision vectors. Since the truncate would be lowered into
14468 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
14469 // restriction, such pattern would be expanded into a series of "vsetvli"
14470 // and "vnsrl" instructions later to reach this point.
14471 auto IsTruncNode = [](SDValue V) {
14472 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
14473 return false;
14474 SDValue VL = V.getOperand(2);
14475 auto *C = dyn_cast<ConstantSDNode>(VL);
14476 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
14477 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
14478 (isa<RegisterSDNode>(VL) &&
14479 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
14480 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
14481 IsVLMAXForVMSET;
14484 SDValue Op = N->getOperand(0);
14486 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
14487 // to distinguish such pattern.
14488 while (IsTruncNode(Op)) {
14489 if (!Op.hasOneUse())
14490 return SDValue();
14491 Op = Op.getOperand(0);
14494 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
14495 SDValue N0 = Op.getOperand(0);
14496 SDValue N1 = Op.getOperand(1);
14497 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
14498 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
14499 SDValue N00 = N0.getOperand(0);
14500 SDValue N10 = N1.getOperand(0);
14501 if (N00.getValueType().isVector() &&
14502 N00.getValueType() == N10.getValueType() &&
14503 N->getValueType(0) == N10.getValueType()) {
14504 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
14505 SDValue SMin = DAG.getNode(
14506 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
14507 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
14508 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
14512 break;
14514 case ISD::TRUNCATE:
14515 return performTRUNCATECombine(N, DAG, Subtarget);
14516 case ISD::SELECT:
14517 return performSELECTCombine(N, DAG, Subtarget);
14518 case RISCVISD::CZERO_EQZ:
14519 case RISCVISD::CZERO_NEZ:
14520 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
14521 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
14522 if (N->getOperand(1).getOpcode() == ISD::XOR &&
14523 isOneConstant(N->getOperand(1).getOperand(1))) {
14524 SDValue Cond = N->getOperand(1).getOperand(0);
14525 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
14526 if (DAG.MaskedValueIsZero(Cond, Mask)) {
14527 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
14528 ? RISCVISD::CZERO_NEZ
14529 : RISCVISD::CZERO_EQZ;
14530 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
14531 N->getOperand(0), Cond);
14534 return SDValue();
14536 case RISCVISD::SELECT_CC: {
14537 // Transform
14538 SDValue LHS = N->getOperand(0);
14539 SDValue RHS = N->getOperand(1);
14540 SDValue CC = N->getOperand(2);
14541 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
14542 SDValue TrueV = N->getOperand(3);
14543 SDValue FalseV = N->getOperand(4);
14544 SDLoc DL(N);
14545 EVT VT = N->getValueType(0);
14547 // If the True and False values are the same, we don't need a select_cc.
14548 if (TrueV == FalseV)
14549 return TrueV;
14551 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
14552 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
14553 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
14554 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
14555 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
14556 if (CCVal == ISD::CondCode::SETGE)
14557 std::swap(TrueV, FalseV);
14559 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
14560 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
14561 // Only handle simm12, if it is not in this range, it can be considered as
14562 // register.
14563 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
14564 isInt<12>(TrueSImm - FalseSImm)) {
14565 SDValue SRA =
14566 DAG.getNode(ISD::SRA, DL, VT, LHS,
14567 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
14568 SDValue AND =
14569 DAG.getNode(ISD::AND, DL, VT, SRA,
14570 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
14571 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
14574 if (CCVal == ISD::CondCode::SETGE)
14575 std::swap(TrueV, FalseV);
14578 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
14579 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
14580 {LHS, RHS, CC, TrueV, FalseV});
14582 if (!Subtarget.hasShortForwardBranchOpt()) {
14583 // (select c, -1, y) -> -c | y
14584 if (isAllOnesConstant(TrueV)) {
14585 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
14586 SDValue Neg = DAG.getNegative(C, DL, VT);
14587 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
14589 // (select c, y, -1) -> -!c | y
14590 if (isAllOnesConstant(FalseV)) {
14591 SDValue C =
14592 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
14593 SDValue Neg = DAG.getNegative(C, DL, VT);
14594 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
14597 // (select c, 0, y) -> -!c & y
14598 if (isNullConstant(TrueV)) {
14599 SDValue C =
14600 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
14601 SDValue Neg = DAG.getNegative(C, DL, VT);
14602 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
14604 // (select c, y, 0) -> -c & y
14605 if (isNullConstant(FalseV)) {
14606 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
14607 SDValue Neg = DAG.getNegative(C, DL, VT);
14608 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
14610 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
14611 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
14612 if (((isOneConstant(FalseV) && LHS == TrueV &&
14613 CCVal == ISD::CondCode::SETNE) ||
14614 (isOneConstant(TrueV) && LHS == FalseV &&
14615 CCVal == ISD::CondCode::SETEQ)) &&
14616 isNullConstant(RHS)) {
14617 // freeze it to be safe.
14618 LHS = DAG.getFreeze(LHS);
14619 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
14620 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
14624 // If both true/false are an xor with 1, pull through the select.
14625 // This can occur after op legalization if both operands are setccs that
14626 // require an xor to invert.
14627 // FIXME: Generalize to other binary ops with identical operand?
14628 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
14629 TrueV.getOperand(1) == FalseV.getOperand(1) &&
14630 isOneConstant(TrueV.getOperand(1)) &&
14631 TrueV.hasOneUse() && FalseV.hasOneUse()) {
14632 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
14633 TrueV.getOperand(0), FalseV.getOperand(0));
14634 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
14637 return SDValue();
14639 case RISCVISD::BR_CC: {
14640 SDValue LHS = N->getOperand(1);
14641 SDValue RHS = N->getOperand(2);
14642 SDValue CC = N->getOperand(3);
14643 SDLoc DL(N);
14645 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
14646 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
14647 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
14649 return SDValue();
14651 case ISD::BITREVERSE:
14652 return performBITREVERSECombine(N, DAG, Subtarget);
14653 case ISD::FP_TO_SINT:
14654 case ISD::FP_TO_UINT:
14655 return performFP_TO_INTCombine(N, DCI, Subtarget);
14656 case ISD::FP_TO_SINT_SAT:
14657 case ISD::FP_TO_UINT_SAT:
14658 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
14659 case ISD::FCOPYSIGN: {
14660 EVT VT = N->getValueType(0);
14661 if (!VT.isVector())
14662 break;
14663 // There is a form of VFSGNJ which injects the negated sign of its second
14664 // operand. Try and bubble any FNEG up after the extend/round to produce
14665 // this optimized pattern. Avoid modifying cases where FP_ROUND and
14666 // TRUNC=1.
14667 SDValue In2 = N->getOperand(1);
14668 // Avoid cases where the extend/round has multiple uses, as duplicating
14669 // those is typically more expensive than removing a fneg.
14670 if (!In2.hasOneUse())
14671 break;
14672 if (In2.getOpcode() != ISD::FP_EXTEND &&
14673 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
14674 break;
14675 In2 = In2.getOperand(0);
14676 if (In2.getOpcode() != ISD::FNEG)
14677 break;
14678 SDLoc DL(N);
14679 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
14680 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
14681 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
14683 case ISD::MGATHER: {
14684 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
14685 const EVT VT = N->getValueType(0);
14686 SDValue Index = MGN->getIndex();
14687 SDValue ScaleOp = MGN->getScale();
14688 ISD::MemIndexType IndexType = MGN->getIndexType();
14689 assert(!MGN->isIndexScaled() &&
14690 "Scaled gather/scatter should not be formed");
14692 SDLoc DL(N);
14693 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
14694 return DAG.getMaskedGather(
14695 N->getVTList(), MGN->getMemoryVT(), DL,
14696 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
14697 MGN->getBasePtr(), Index, ScaleOp},
14698 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
14700 if (narrowIndex(Index, IndexType, DAG))
14701 return DAG.getMaskedGather(
14702 N->getVTList(), MGN->getMemoryVT(), DL,
14703 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
14704 MGN->getBasePtr(), Index, ScaleOp},
14705 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
14707 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
14708 MGN->getExtensionType() == ISD::NON_EXTLOAD) {
14709 if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
14710 SimpleVID && SimpleVID->StepDenominator == 1) {
14711 const int64_t StepNumerator = SimpleVID->StepNumerator;
14712 const int64_t Addend = SimpleVID->Addend;
14714 // Note: We don't need to check alignment here since (by assumption
14715 // from the existance of the gather), our offsets must be sufficiently
14716 // aligned.
14718 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
14719 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
14720 assert(IndexType == ISD::UNSIGNED_SCALED);
14721 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
14722 DAG.getConstant(Addend, DL, PtrVT));
14724 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
14725 SDValue IntID =
14726 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
14727 XLenVT);
14728 SDValue Ops[] =
14729 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
14730 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
14731 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
14732 Ops, VT, MGN->getMemOperand());
14736 SmallVector<int> ShuffleMask;
14737 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
14738 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
14739 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
14740 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
14741 MGN->getMask(), DAG.getUNDEF(VT),
14742 MGN->getMemoryVT(), MGN->getMemOperand(),
14743 ISD::UNINDEXED, ISD::NON_EXTLOAD);
14744 SDValue Shuffle =
14745 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
14746 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
14749 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
14750 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
14751 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
14752 SmallVector<SDValue> NewIndices;
14753 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
14754 NewIndices.push_back(Index.getOperand(i));
14755 EVT IndexVT = Index.getValueType()
14756 .getHalfNumVectorElementsVT(*DAG.getContext());
14757 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
14759 unsigned ElementSize = VT.getScalarStoreSize();
14760 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
14761 auto EltCnt = VT.getVectorElementCount();
14762 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
14763 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
14764 EltCnt.divideCoefficientBy(2));
14765 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
14766 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
14767 EltCnt.divideCoefficientBy(2));
14768 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
14770 SDValue Gather =
14771 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
14772 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
14773 Index, ScaleOp},
14774 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
14775 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
14776 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
14778 break;
14780 case ISD::MSCATTER:{
14781 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
14782 SDValue Index = MSN->getIndex();
14783 SDValue ScaleOp = MSN->getScale();
14784 ISD::MemIndexType IndexType = MSN->getIndexType();
14785 assert(!MSN->isIndexScaled() &&
14786 "Scaled gather/scatter should not be formed");
14788 SDLoc DL(N);
14789 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
14790 return DAG.getMaskedScatter(
14791 N->getVTList(), MSN->getMemoryVT(), DL,
14792 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
14793 Index, ScaleOp},
14794 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
14796 if (narrowIndex(Index, IndexType, DAG))
14797 return DAG.getMaskedScatter(
14798 N->getVTList(), MSN->getMemoryVT(), DL,
14799 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
14800 Index, ScaleOp},
14801 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
14803 EVT VT = MSN->getValue()->getValueType(0);
14804 SmallVector<int> ShuffleMask;
14805 if (!MSN->isTruncatingStore() &&
14806 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
14807 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
14808 DAG.getUNDEF(VT), ShuffleMask);
14809 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
14810 DAG.getUNDEF(XLenVT), MSN->getMask(),
14811 MSN->getMemoryVT(), MSN->getMemOperand(),
14812 ISD::UNINDEXED, false);
14814 break;
14816 case ISD::VP_GATHER: {
14817 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
14818 SDValue Index = VPGN->getIndex();
14819 SDValue ScaleOp = VPGN->getScale();
14820 ISD::MemIndexType IndexType = VPGN->getIndexType();
14821 assert(!VPGN->isIndexScaled() &&
14822 "Scaled gather/scatter should not be formed");
14824 SDLoc DL(N);
14825 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
14826 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
14827 {VPGN->getChain(), VPGN->getBasePtr(), Index,
14828 ScaleOp, VPGN->getMask(),
14829 VPGN->getVectorLength()},
14830 VPGN->getMemOperand(), IndexType);
14832 if (narrowIndex(Index, IndexType, DAG))
14833 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
14834 {VPGN->getChain(), VPGN->getBasePtr(), Index,
14835 ScaleOp, VPGN->getMask(),
14836 VPGN->getVectorLength()},
14837 VPGN->getMemOperand(), IndexType);
14839 break;
14841 case ISD::VP_SCATTER: {
14842 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
14843 SDValue Index = VPSN->getIndex();
14844 SDValue ScaleOp = VPSN->getScale();
14845 ISD::MemIndexType IndexType = VPSN->getIndexType();
14846 assert(!VPSN->isIndexScaled() &&
14847 "Scaled gather/scatter should not be formed");
14849 SDLoc DL(N);
14850 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
14851 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
14852 {VPSN->getChain(), VPSN->getValue(),
14853 VPSN->getBasePtr(), Index, ScaleOp,
14854 VPSN->getMask(), VPSN->getVectorLength()},
14855 VPSN->getMemOperand(), IndexType);
14857 if (narrowIndex(Index, IndexType, DAG))
14858 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
14859 {VPSN->getChain(), VPSN->getValue(),
14860 VPSN->getBasePtr(), Index, ScaleOp,
14861 VPSN->getMask(), VPSN->getVectorLength()},
14862 VPSN->getMemOperand(), IndexType);
14863 break;
14865 case RISCVISD::SRA_VL:
14866 case RISCVISD::SRL_VL:
14867 case RISCVISD::SHL_VL: {
14868 SDValue ShAmt = N->getOperand(1);
14869 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
14870 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
14871 SDLoc DL(N);
14872 SDValue VL = N->getOperand(4);
14873 EVT VT = N->getValueType(0);
14874 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
14875 ShAmt.getOperand(1), VL);
14876 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
14877 N->getOperand(2), N->getOperand(3), N->getOperand(4));
14879 break;
14881 case ISD::SRA:
14882 if (SDValue V = performSRACombine(N, DAG, Subtarget))
14883 return V;
14884 [[fallthrough]];
14885 case ISD::SRL:
14886 case ISD::SHL: {
14887 SDValue ShAmt = N->getOperand(1);
14888 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
14889 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
14890 SDLoc DL(N);
14891 EVT VT = N->getValueType(0);
14892 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
14893 ShAmt.getOperand(1),
14894 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
14895 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
14897 break;
14899 case RISCVISD::ADD_VL:
14900 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
14901 return V;
14902 return combineToVWMACC(N, DAG, Subtarget);
14903 case RISCVISD::SUB_VL:
14904 case RISCVISD::VWADD_W_VL:
14905 case RISCVISD::VWADDU_W_VL:
14906 case RISCVISD::VWSUB_W_VL:
14907 case RISCVISD::VWSUBU_W_VL:
14908 case RISCVISD::MUL_VL:
14909 return combineBinOp_VLToVWBinOp_VL(N, DCI);
14910 case RISCVISD::VFMADD_VL:
14911 case RISCVISD::VFNMADD_VL:
14912 case RISCVISD::VFMSUB_VL:
14913 case RISCVISD::VFNMSUB_VL:
14914 case RISCVISD::STRICT_VFMADD_VL:
14915 case RISCVISD::STRICT_VFNMADD_VL:
14916 case RISCVISD::STRICT_VFMSUB_VL:
14917 case RISCVISD::STRICT_VFNMSUB_VL:
14918 return performVFMADD_VLCombine(N, DAG, Subtarget);
14919 case RISCVISD::FMUL_VL:
14920 return performVFMUL_VLCombine(N, DAG, Subtarget);
14921 case RISCVISD::FADD_VL:
14922 case RISCVISD::FSUB_VL:
14923 return performFADDSUB_VLCombine(N, DAG, Subtarget);
14924 case ISD::LOAD:
14925 case ISD::STORE: {
14926 if (DCI.isAfterLegalizeDAG())
14927 if (SDValue V = performMemPairCombine(N, DCI))
14928 return V;
14930 if (N->getOpcode() != ISD::STORE)
14931 break;
14933 auto *Store = cast<StoreSDNode>(N);
14934 SDValue Chain = Store->getChain();
14935 EVT MemVT = Store->getMemoryVT();
14936 SDValue Val = Store->getValue();
14937 SDLoc DL(N);
14939 bool IsScalarizable =
14940 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
14941 Store->isSimple() &&
14942 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
14943 isPowerOf2_64(MemVT.getSizeInBits()) &&
14944 MemVT.getSizeInBits() <= Subtarget.getXLen();
14946 // If sufficiently aligned we can scalarize stores of constant vectors of
14947 // any power-of-two size up to XLen bits, provided that they aren't too
14948 // expensive to materialize.
14949 // vsetivli zero, 2, e8, m1, ta, ma
14950 // vmv.v.i v8, 4
14951 // vse64.v v8, (a0)
14952 // ->
14953 // li a1, 1028
14954 // sh a1, 0(a0)
14955 if (DCI.isBeforeLegalize() && IsScalarizable &&
14956 ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
14957 // Get the constant vector bits
14958 APInt NewC(Val.getValueSizeInBits(), 0);
14959 uint64_t EltSize = Val.getScalarValueSizeInBits();
14960 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
14961 if (Val.getOperand(i).isUndef())
14962 continue;
14963 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
14964 i * EltSize);
14966 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
14968 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
14969 Subtarget.getFeatureBits(), true) <= 2 &&
14970 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
14971 NewVT, *Store->getMemOperand())) {
14972 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
14973 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
14974 Store->getPointerInfo(), Store->getOriginalAlign(),
14975 Store->getMemOperand()->getFlags());
14979 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
14980 // vsetivli zero, 2, e16, m1, ta, ma
14981 // vle16.v v8, (a0)
14982 // vse16.v v8, (a1)
14983 if (auto *L = dyn_cast<LoadSDNode>(Val);
14984 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
14985 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
14986 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
14987 L->getMemoryVT() == MemVT) {
14988 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
14989 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
14990 NewVT, *Store->getMemOperand()) &&
14991 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
14992 NewVT, *L->getMemOperand())) {
14993 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
14994 L->getPointerInfo(), L->getOriginalAlign(),
14995 L->getMemOperand()->getFlags());
14996 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
14997 Store->getPointerInfo(), Store->getOriginalAlign(),
14998 Store->getMemOperand()->getFlags());
15002 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
15003 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
15004 // any illegal types.
15005 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
15006 (DCI.isAfterLegalizeDAG() &&
15007 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15008 isNullConstant(Val.getOperand(1)))) {
15009 SDValue Src = Val.getOperand(0);
15010 MVT VecVT = Src.getSimpleValueType();
15011 // VecVT should be scalable and memory VT should match the element type.
15012 if (VecVT.isScalableVector() &&
15013 MemVT == VecVT.getVectorElementType()) {
15014 SDLoc DL(N);
15015 MVT MaskVT = getMaskTypeFor(VecVT);
15016 return DAG.getStoreVP(
15017 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
15018 DAG.getConstant(1, DL, MaskVT),
15019 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
15020 Store->getMemOperand(), Store->getAddressingMode(),
15021 Store->isTruncatingStore(), /*IsCompress*/ false);
15025 break;
15027 case ISD::SPLAT_VECTOR: {
15028 EVT VT = N->getValueType(0);
15029 // Only perform this combine on legal MVT types.
15030 if (!isTypeLegal(VT))
15031 break;
15032 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
15033 DAG, Subtarget))
15034 return Gather;
15035 break;
15037 case ISD::BUILD_VECTOR:
15038 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
15039 return V;
15040 break;
15041 case ISD::CONCAT_VECTORS:
15042 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
15043 return V;
15044 break;
15045 case RISCVISD::VFMV_V_F_VL: {
15046 const MVT VT = N->getSimpleValueType(0);
15047 SDValue Passthru = N->getOperand(0);
15048 SDValue Scalar = N->getOperand(1);
15049 SDValue VL = N->getOperand(2);
15051 // If VL is 1, we can use vfmv.s.f.
15052 if (isOneConstant(VL))
15053 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
15054 break;
15056 case RISCVISD::VMV_V_X_VL: {
15057 const MVT VT = N->getSimpleValueType(0);
15058 SDValue Passthru = N->getOperand(0);
15059 SDValue Scalar = N->getOperand(1);
15060 SDValue VL = N->getOperand(2);
15062 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
15063 // scalar input.
15064 unsigned ScalarSize = Scalar.getValueSizeInBits();
15065 unsigned EltWidth = VT.getScalarSizeInBits();
15066 if (ScalarSize > EltWidth && Passthru.isUndef())
15067 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
15068 return SDValue(N, 0);
15070 // If VL is 1 and the scalar value won't benefit from immediate, we can
15071 // use vmv.s.x.
15072 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
15073 if (isOneConstant(VL) &&
15074 (!Const || Const->isZero() ||
15075 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
15076 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
15078 break;
15080 case RISCVISD::VFMV_S_F_VL: {
15081 SDValue Src = N->getOperand(1);
15082 // Try to remove vector->scalar->vector if the scalar->vector is inserting
15083 // into an undef vector.
15084 // TODO: Could use a vslide or vmv.v.v for non-undef.
15085 if (N->getOperand(0).isUndef() &&
15086 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15087 isNullConstant(Src.getOperand(1)) &&
15088 Src.getOperand(0).getValueType().isScalableVector()) {
15089 EVT VT = N->getValueType(0);
15090 EVT SrcVT = Src.getOperand(0).getValueType();
15091 assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
15092 // Widths match, just return the original vector.
15093 if (SrcVT == VT)
15094 return Src.getOperand(0);
15095 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
15097 [[fallthrough]];
15099 case RISCVISD::VMV_S_X_VL: {
15100 const MVT VT = N->getSimpleValueType(0);
15101 SDValue Passthru = N->getOperand(0);
15102 SDValue Scalar = N->getOperand(1);
15103 SDValue VL = N->getOperand(2);
15105 // Use M1 or smaller to avoid over constraining register allocation
15106 const MVT M1VT = getLMUL1VT(VT);
15107 if (M1VT.bitsLT(VT)) {
15108 SDValue M1Passthru =
15109 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
15110 DAG.getVectorIdxConstant(0, DL));
15111 SDValue Result =
15112 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
15113 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
15114 DAG.getConstant(0, DL, XLenVT));
15115 return Result;
15118 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
15119 // higher would involve overly constraining the register allocator for
15120 // no purpose.
15121 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
15122 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
15123 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
15124 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
15126 break;
15128 case ISD::INTRINSIC_VOID:
15129 case ISD::INTRINSIC_W_CHAIN:
15130 case ISD::INTRINSIC_WO_CHAIN: {
15131 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
15132 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
15133 switch (IntNo) {
15134 // By default we do not combine any intrinsic.
15135 default:
15136 return SDValue();
15137 case Intrinsic::riscv_masked_strided_load: {
15138 MVT VT = N->getSimpleValueType(0);
15139 auto *Load = cast<MemIntrinsicSDNode>(N);
15140 SDValue PassThru = N->getOperand(2);
15141 SDValue Base = N->getOperand(3);
15142 SDValue Stride = N->getOperand(4);
15143 SDValue Mask = N->getOperand(5);
15145 // If the stride is equal to the element size in bytes, we can use
15146 // a masked.load.
15147 const unsigned ElementSize = VT.getScalarStoreSize();
15148 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
15149 StrideC && StrideC->getZExtValue() == ElementSize)
15150 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
15151 DAG.getUNDEF(XLenVT), Mask, PassThru,
15152 Load->getMemoryVT(), Load->getMemOperand(),
15153 ISD::UNINDEXED, ISD::NON_EXTLOAD);
15154 return SDValue();
15156 case Intrinsic::riscv_masked_strided_store: {
15157 auto *Store = cast<MemIntrinsicSDNode>(N);
15158 SDValue Value = N->getOperand(2);
15159 SDValue Base = N->getOperand(3);
15160 SDValue Stride = N->getOperand(4);
15161 SDValue Mask = N->getOperand(5);
15163 // If the stride is equal to the element size in bytes, we can use
15164 // a masked.store.
15165 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
15166 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
15167 StrideC && StrideC->getZExtValue() == ElementSize)
15168 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
15169 DAG.getUNDEF(XLenVT), Mask,
15170 Store->getMemoryVT(), Store->getMemOperand(),
15171 ISD::UNINDEXED, false);
15172 return SDValue();
15174 case Intrinsic::riscv_vcpop:
15175 case Intrinsic::riscv_vcpop_mask:
15176 case Intrinsic::riscv_vfirst:
15177 case Intrinsic::riscv_vfirst_mask: {
15178 SDValue VL = N->getOperand(2);
15179 if (IntNo == Intrinsic::riscv_vcpop_mask ||
15180 IntNo == Intrinsic::riscv_vfirst_mask)
15181 VL = N->getOperand(3);
15182 if (!isNullConstant(VL))
15183 return SDValue();
15184 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
15185 SDLoc DL(N);
15186 EVT VT = N->getValueType(0);
15187 if (IntNo == Intrinsic::riscv_vfirst ||
15188 IntNo == Intrinsic::riscv_vfirst_mask)
15189 return DAG.getConstant(-1, DL, VT);
15190 return DAG.getConstant(0, DL, VT);
15194 case ISD::BITCAST: {
15195 assert(Subtarget.useRVVForFixedLengthVectors());
15196 SDValue N0 = N->getOperand(0);
15197 EVT VT = N->getValueType(0);
15198 EVT SrcVT = N0.getValueType();
15199 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
15200 // type, widen both sides to avoid a trip through memory.
15201 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
15202 VT.isScalarInteger()) {
15203 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
15204 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
15205 Ops[0] = N0;
15206 SDLoc DL(N);
15207 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
15208 N0 = DAG.getBitcast(MVT::i8, N0);
15209 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
15212 return SDValue();
15216 return SDValue();
15219 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
15220 EVT XVT, unsigned KeptBits) const {
15221 // For vectors, we don't have a preference..
15222 if (XVT.isVector())
15223 return false;
15225 if (XVT != MVT::i32 && XVT != MVT::i64)
15226 return false;
15228 // We can use sext.w for RV64 or an srai 31 on RV32.
15229 if (KeptBits == 32 || KeptBits == 64)
15230 return true;
15232 // With Zbb we can use sext.h/sext.b.
15233 return Subtarget.hasStdExtZbb() &&
15234 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
15235 KeptBits == 16);
15238 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
15239 const SDNode *N, CombineLevel Level) const {
15240 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
15241 N->getOpcode() == ISD::SRL) &&
15242 "Expected shift op");
15244 // The following folds are only desirable if `(OP _, c1 << c2)` can be
15245 // materialised in fewer instructions than `(OP _, c1)`:
15247 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
15248 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
15249 SDValue N0 = N->getOperand(0);
15250 EVT Ty = N0.getValueType();
15251 if (Ty.isScalarInteger() &&
15252 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
15253 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15254 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15255 if (C1 && C2) {
15256 const APInt &C1Int = C1->getAPIntValue();
15257 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
15259 // We can materialise `c1 << c2` into an add immediate, so it's "free",
15260 // and the combine should happen, to potentially allow further combines
15261 // later.
15262 if (ShiftedC1Int.getSignificantBits() <= 64 &&
15263 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
15264 return true;
15266 // We can materialise `c1` in an add immediate, so it's "free", and the
15267 // combine should be prevented.
15268 if (C1Int.getSignificantBits() <= 64 &&
15269 isLegalAddImmediate(C1Int.getSExtValue()))
15270 return false;
15272 // Neither constant will fit into an immediate, so find materialisation
15273 // costs.
15274 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
15275 Subtarget.getFeatureBits(),
15276 /*CompressionCost*/true);
15277 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
15278 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
15279 /*CompressionCost*/true);
15281 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
15282 // combine should be prevented.
15283 if (C1Cost < ShiftedC1Cost)
15284 return false;
15287 return true;
15290 bool RISCVTargetLowering::targetShrinkDemandedConstant(
15291 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
15292 TargetLoweringOpt &TLO) const {
15293 // Delay this optimization as late as possible.
15294 if (!TLO.LegalOps)
15295 return false;
15297 EVT VT = Op.getValueType();
15298 if (VT.isVector())
15299 return false;
15301 unsigned Opcode = Op.getOpcode();
15302 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
15303 return false;
15305 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15306 if (!C)
15307 return false;
15309 const APInt &Mask = C->getAPIntValue();
15311 // Clear all non-demanded bits initially.
15312 APInt ShrunkMask = Mask & DemandedBits;
15314 // Try to make a smaller immediate by setting undemanded bits.
15316 APInt ExpandedMask = Mask | ~DemandedBits;
15318 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
15319 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
15321 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
15322 if (NewMask == Mask)
15323 return true;
15324 SDLoc DL(Op);
15325 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
15326 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
15327 Op.getOperand(0), NewC);
15328 return TLO.CombineTo(Op, NewOp);
15331 // If the shrunk mask fits in sign extended 12 bits, let the target
15332 // independent code apply it.
15333 if (ShrunkMask.isSignedIntN(12))
15334 return false;
15336 // And has a few special cases for zext.
15337 if (Opcode == ISD::AND) {
15338 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
15339 // otherwise use SLLI + SRLI.
15340 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
15341 if (IsLegalMask(NewMask))
15342 return UseMask(NewMask);
15344 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
15345 if (VT == MVT::i64) {
15346 APInt NewMask = APInt(64, 0xffffffff);
15347 if (IsLegalMask(NewMask))
15348 return UseMask(NewMask);
15352 // For the remaining optimizations, we need to be able to make a negative
15353 // number through a combination of mask and undemanded bits.
15354 if (!ExpandedMask.isNegative())
15355 return false;
15357 // What is the fewest number of bits we need to represent the negative number.
15358 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
15360 // Try to make a 12 bit negative immediate. If that fails try to make a 32
15361 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
15362 // If we can't create a simm12, we shouldn't change opaque constants.
15363 APInt NewMask = ShrunkMask;
15364 if (MinSignedBits <= 12)
15365 NewMask.setBitsFrom(11);
15366 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
15367 NewMask.setBitsFrom(31);
15368 else
15369 return false;
15371 // Check that our new mask is a subset of the demanded mask.
15372 assert(IsLegalMask(NewMask));
15373 return UseMask(NewMask);
15376 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
15377 static const uint64_t GREVMasks[] = {
15378 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
15379 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
15381 for (unsigned Stage = 0; Stage != 6; ++Stage) {
15382 unsigned Shift = 1 << Stage;
15383 if (ShAmt & Shift) {
15384 uint64_t Mask = GREVMasks[Stage];
15385 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
15386 if (IsGORC)
15387 Res |= x;
15388 x = Res;
15392 return x;
15395 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15396 KnownBits &Known,
15397 const APInt &DemandedElts,
15398 const SelectionDAG &DAG,
15399 unsigned Depth) const {
15400 unsigned BitWidth = Known.getBitWidth();
15401 unsigned Opc = Op.getOpcode();
15402 assert((Opc >= ISD::BUILTIN_OP_END ||
15403 Opc == ISD::INTRINSIC_WO_CHAIN ||
15404 Opc == ISD::INTRINSIC_W_CHAIN ||
15405 Opc == ISD::INTRINSIC_VOID) &&
15406 "Should use MaskedValueIsZero if you don't know whether Op"
15407 " is a target node!");
15409 Known.resetAll();
15410 switch (Opc) {
15411 default: break;
15412 case RISCVISD::SELECT_CC: {
15413 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
15414 // If we don't know any bits, early out.
15415 if (Known.isUnknown())
15416 break;
15417 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
15419 // Only known if known in both the LHS and RHS.
15420 Known = Known.intersectWith(Known2);
15421 break;
15423 case RISCVISD::CZERO_EQZ:
15424 case RISCVISD::CZERO_NEZ:
15425 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15426 // Result is either all zero or operand 0. We can propagate zeros, but not
15427 // ones.
15428 Known.One.clearAllBits();
15429 break;
15430 case RISCVISD::REMUW: {
15431 KnownBits Known2;
15432 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
15433 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
15434 // We only care about the lower 32 bits.
15435 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
15436 // Restore the original width by sign extending.
15437 Known = Known.sext(BitWidth);
15438 break;
15440 case RISCVISD::DIVUW: {
15441 KnownBits Known2;
15442 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
15443 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
15444 // We only care about the lower 32 bits.
15445 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
15446 // Restore the original width by sign extending.
15447 Known = Known.sext(BitWidth);
15448 break;
15450 case RISCVISD::CTZW: {
15451 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15452 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
15453 unsigned LowBits = llvm::bit_width(PossibleTZ);
15454 Known.Zero.setBitsFrom(LowBits);
15455 break;
15457 case RISCVISD::CLZW: {
15458 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15459 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
15460 unsigned LowBits = llvm::bit_width(PossibleLZ);
15461 Known.Zero.setBitsFrom(LowBits);
15462 break;
15464 case RISCVISD::BREV8:
15465 case RISCVISD::ORC_B: {
15466 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
15467 // control value of 7 is equivalent to brev8 and orc.b.
15468 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15469 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
15470 // To compute zeros, we need to invert the value and invert it back after.
15471 Known.Zero =
15472 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
15473 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
15474 break;
15476 case RISCVISD::READ_VLENB: {
15477 // We can use the minimum and maximum VLEN values to bound VLENB. We
15478 // know VLEN must be a power of two.
15479 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
15480 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
15481 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
15482 Known.Zero.setLowBits(Log2_32(MinVLenB));
15483 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
15484 if (MaxVLenB == MinVLenB)
15485 Known.One.setBit(Log2_32(MinVLenB));
15486 break;
15488 case RISCVISD::FPCLASS: {
15489 // fclass will only set one of the low 10 bits.
15490 Known.Zero.setBitsFrom(10);
15491 break;
15493 case ISD::INTRINSIC_W_CHAIN:
15494 case ISD::INTRINSIC_WO_CHAIN: {
15495 unsigned IntNo =
15496 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
15497 switch (IntNo) {
15498 default:
15499 // We can't do anything for most intrinsics.
15500 break;
15501 case Intrinsic::riscv_vsetvli:
15502 case Intrinsic::riscv_vsetvlimax:
15503 // Assume that VL output is >= 65536.
15504 // TODO: Take SEW and LMUL into account.
15505 if (BitWidth > 17)
15506 Known.Zero.setBitsFrom(17);
15507 break;
15509 break;
15514 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
15515 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
15516 unsigned Depth) const {
15517 switch (Op.getOpcode()) {
15518 default:
15519 break;
15520 case RISCVISD::SELECT_CC: {
15521 unsigned Tmp =
15522 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
15523 if (Tmp == 1) return 1; // Early out.
15524 unsigned Tmp2 =
15525 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
15526 return std::min(Tmp, Tmp2);
15528 case RISCVISD::CZERO_EQZ:
15529 case RISCVISD::CZERO_NEZ:
15530 // Output is either all zero or operand 0. We can propagate sign bit count
15531 // from operand 0.
15532 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
15533 case RISCVISD::ABSW: {
15534 // We expand this at isel to negw+max. The result will have 33 sign bits
15535 // if the input has at least 33 sign bits.
15536 unsigned Tmp =
15537 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
15538 if (Tmp < 33) return 1;
15539 return 33;
15541 case RISCVISD::SLLW:
15542 case RISCVISD::SRAW:
15543 case RISCVISD::SRLW:
15544 case RISCVISD::DIVW:
15545 case RISCVISD::DIVUW:
15546 case RISCVISD::REMUW:
15547 case RISCVISD::ROLW:
15548 case RISCVISD::RORW:
15549 case RISCVISD::FCVT_W_RV64:
15550 case RISCVISD::FCVT_WU_RV64:
15551 case RISCVISD::STRICT_FCVT_W_RV64:
15552 case RISCVISD::STRICT_FCVT_WU_RV64:
15553 // TODO: As the result is sign-extended, this is conservatively correct. A
15554 // more precise answer could be calculated for SRAW depending on known
15555 // bits in the shift amount.
15556 return 33;
15557 case RISCVISD::VMV_X_S: {
15558 // The number of sign bits of the scalar result is computed by obtaining the
15559 // element type of the input vector operand, subtracting its width from the
15560 // XLEN, and then adding one (sign bit within the element type). If the
15561 // element type is wider than XLen, the least-significant XLEN bits are
15562 // taken.
15563 unsigned XLen = Subtarget.getXLen();
15564 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
15565 if (EltBits <= XLen)
15566 return XLen - EltBits + 1;
15567 break;
15569 case ISD::INTRINSIC_W_CHAIN: {
15570 unsigned IntNo = Op.getConstantOperandVal(1);
15571 switch (IntNo) {
15572 default:
15573 break;
15574 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
15575 case Intrinsic::riscv_masked_atomicrmw_add_i64:
15576 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
15577 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
15578 case Intrinsic::riscv_masked_atomicrmw_max_i64:
15579 case Intrinsic::riscv_masked_atomicrmw_min_i64:
15580 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
15581 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
15582 case Intrinsic::riscv_masked_cmpxchg_i64:
15583 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
15584 // narrow atomic operation. These are implemented using atomic
15585 // operations at the minimum supported atomicrmw/cmpxchg width whose
15586 // result is then sign extended to XLEN. With +A, the minimum width is
15587 // 32 for both 64 and 32.
15588 assert(Subtarget.getXLen() == 64);
15589 assert(getMinCmpXchgSizeInBits() == 32);
15590 assert(Subtarget.hasStdExtA());
15591 return 33;
15596 return 1;
15599 const Constant *
15600 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
15601 assert(Ld && "Unexpected null LoadSDNode");
15602 if (!ISD::isNormalLoad(Ld))
15603 return nullptr;
15605 SDValue Ptr = Ld->getBasePtr();
15607 // Only constant pools with no offset are supported.
15608 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
15609 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
15610 if (!CNode || CNode->isMachineConstantPoolEntry() ||
15611 CNode->getOffset() != 0)
15612 return nullptr;
15614 return CNode;
15617 // Simple case, LLA.
15618 if (Ptr.getOpcode() == RISCVISD::LLA) {
15619 auto *CNode = GetSupportedConstantPool(Ptr);
15620 if (!CNode || CNode->getTargetFlags() != 0)
15621 return nullptr;
15623 return CNode->getConstVal();
15626 // Look for a HI and ADD_LO pair.
15627 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
15628 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
15629 return nullptr;
15631 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
15632 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
15634 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
15635 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
15636 return nullptr;
15638 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
15639 return nullptr;
15641 return CNodeLo->getConstVal();
15644 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
15645 MachineBasicBlock *BB) {
15646 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
15648 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
15649 // Should the count have wrapped while it was being read, we need to try
15650 // again.
15651 // ...
15652 // read:
15653 // rdcycleh x3 # load high word of cycle
15654 // rdcycle x2 # load low word of cycle
15655 // rdcycleh x4 # load high word of cycle
15656 // bne x3, x4, read # check if high word reads match, otherwise try again
15657 // ...
15659 MachineFunction &MF = *BB->getParent();
15660 const BasicBlock *LLVM_BB = BB->getBasicBlock();
15661 MachineFunction::iterator It = ++BB->getIterator();
15663 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
15664 MF.insert(It, LoopMBB);
15666 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
15667 MF.insert(It, DoneMBB);
15669 // Transfer the remainder of BB and its successor edges to DoneMBB.
15670 DoneMBB->splice(DoneMBB->begin(), BB,
15671 std::next(MachineBasicBlock::iterator(MI)), BB->end());
15672 DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
15674 BB->addSuccessor(LoopMBB);
15676 MachineRegisterInfo &RegInfo = MF.getRegInfo();
15677 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
15678 Register LoReg = MI.getOperand(0).getReg();
15679 Register HiReg = MI.getOperand(1).getReg();
15680 DebugLoc DL = MI.getDebugLoc();
15682 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
15683 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
15684 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
15685 .addReg(RISCV::X0);
15686 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
15687 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
15688 .addReg(RISCV::X0);
15689 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
15690 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
15691 .addReg(RISCV::X0);
15693 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
15694 .addReg(HiReg)
15695 .addReg(ReadAgainReg)
15696 .addMBB(LoopMBB);
15698 LoopMBB->addSuccessor(LoopMBB);
15699 LoopMBB->addSuccessor(DoneMBB);
15701 MI.eraseFromParent();
15703 return DoneMBB;
15706 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
15707 MachineBasicBlock *BB,
15708 const RISCVSubtarget &Subtarget) {
15709 assert((MI.getOpcode() == RISCV::SplitF64Pseudo ||
15710 MI.getOpcode() == RISCV::SplitF64Pseudo_INX) &&
15711 "Unexpected instruction");
15713 MachineFunction &MF = *BB->getParent();
15714 DebugLoc DL = MI.getDebugLoc();
15715 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
15716 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
15717 Register LoReg = MI.getOperand(0).getReg();
15718 Register HiReg = MI.getOperand(1).getReg();
15719 Register SrcReg = MI.getOperand(2).getReg();
15721 const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
15722 ? &RISCV::GPRPF64RegClass
15723 : &RISCV::FPR64RegClass;
15724 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
15726 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
15727 RI, Register());
15728 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
15729 MachineMemOperand *MMOLo =
15730 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
15731 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
15732 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
15733 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
15734 .addFrameIndex(FI)
15735 .addImm(0)
15736 .addMemOperand(MMOLo);
15737 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
15738 .addFrameIndex(FI)
15739 .addImm(4)
15740 .addMemOperand(MMOHi);
15741 MI.eraseFromParent(); // The pseudo instruction is gone now.
15742 return BB;
15745 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
15746 MachineBasicBlock *BB,
15747 const RISCVSubtarget &Subtarget) {
15748 assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo ||
15749 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) &&
15750 "Unexpected instruction");
15752 MachineFunction &MF = *BB->getParent();
15753 DebugLoc DL = MI.getDebugLoc();
15754 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
15755 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
15756 Register DstReg = MI.getOperand(0).getReg();
15757 Register LoReg = MI.getOperand(1).getReg();
15758 Register HiReg = MI.getOperand(2).getReg();
15760 const TargetRegisterClass *DstRC =
15761 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
15762 : &RISCV::FPR64RegClass;
15763 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
15765 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
15766 MachineMemOperand *MMOLo =
15767 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
15768 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
15769 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
15770 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
15771 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
15772 .addFrameIndex(FI)
15773 .addImm(0)
15774 .addMemOperand(MMOLo);
15775 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
15776 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
15777 .addFrameIndex(FI)
15778 .addImm(4)
15779 .addMemOperand(MMOHi);
15780 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
15781 MI.eraseFromParent(); // The pseudo instruction is gone now.
15782 return BB;
15785 static bool isSelectPseudo(MachineInstr &MI) {
15786 switch (MI.getOpcode()) {
15787 default:
15788 return false;
15789 case RISCV::Select_GPR_Using_CC_GPR:
15790 case RISCV::Select_FPR16_Using_CC_GPR:
15791 case RISCV::Select_FPR16INX_Using_CC_GPR:
15792 case RISCV::Select_FPR32_Using_CC_GPR:
15793 case RISCV::Select_FPR32INX_Using_CC_GPR:
15794 case RISCV::Select_FPR64_Using_CC_GPR:
15795 case RISCV::Select_FPR64INX_Using_CC_GPR:
15796 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
15797 return true;
15801 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
15802 unsigned RelOpcode, unsigned EqOpcode,
15803 const RISCVSubtarget &Subtarget) {
15804 DebugLoc DL = MI.getDebugLoc();
15805 Register DstReg = MI.getOperand(0).getReg();
15806 Register Src1Reg = MI.getOperand(1).getReg();
15807 Register Src2Reg = MI.getOperand(2).getReg();
15808 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
15809 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
15810 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
15812 // Save the current FFLAGS.
15813 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
15815 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
15816 .addReg(Src1Reg)
15817 .addReg(Src2Reg);
15818 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
15819 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
15821 // Restore the FFLAGS.
15822 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
15823 .addReg(SavedFFlags, RegState::Kill);
15825 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
15826 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
15827 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
15828 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
15829 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
15830 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
15832 // Erase the pseudoinstruction.
15833 MI.eraseFromParent();
15834 return BB;
15837 static MachineBasicBlock *
15838 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
15839 MachineBasicBlock *ThisMBB,
15840 const RISCVSubtarget &Subtarget) {
15841 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
15842 // Without this, custom-inserter would have generated:
15844 // A
15845 // | \
15846 // | B
15847 // | /
15848 // C
15849 // | \
15850 // | D
15851 // | /
15852 // E
15854 // A: X = ...; Y = ...
15855 // B: empty
15856 // C: Z = PHI [X, A], [Y, B]
15857 // D: empty
15858 // E: PHI [X, C], [Z, D]
15860 // If we lower both Select_FPRX_ in a single step, we can instead generate:
15862 // A
15863 // | \
15864 // | C
15865 // | /|
15866 // |/ |
15867 // | |
15868 // | D
15869 // | /
15870 // E
15872 // A: X = ...; Y = ...
15873 // D: empty
15874 // E: PHI [X, A], [X, C], [Y, D]
15876 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
15877 const DebugLoc &DL = First.getDebugLoc();
15878 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
15879 MachineFunction *F = ThisMBB->getParent();
15880 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
15881 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
15882 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
15883 MachineFunction::iterator It = ++ThisMBB->getIterator();
15884 F->insert(It, FirstMBB);
15885 F->insert(It, SecondMBB);
15886 F->insert(It, SinkMBB);
15888 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
15889 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
15890 std::next(MachineBasicBlock::iterator(First)),
15891 ThisMBB->end());
15892 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
15894 // Fallthrough block for ThisMBB.
15895 ThisMBB->addSuccessor(FirstMBB);
15896 // Fallthrough block for FirstMBB.
15897 FirstMBB->addSuccessor(SecondMBB);
15898 ThisMBB->addSuccessor(SinkMBB);
15899 FirstMBB->addSuccessor(SinkMBB);
15900 // This is fallthrough.
15901 SecondMBB->addSuccessor(SinkMBB);
15903 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
15904 Register FLHS = First.getOperand(1).getReg();
15905 Register FRHS = First.getOperand(2).getReg();
15906 // Insert appropriate branch.
15907 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
15908 .addReg(FLHS)
15909 .addReg(FRHS)
15910 .addMBB(SinkMBB);
15912 Register SLHS = Second.getOperand(1).getReg();
15913 Register SRHS = Second.getOperand(2).getReg();
15914 Register Op1Reg4 = First.getOperand(4).getReg();
15915 Register Op1Reg5 = First.getOperand(5).getReg();
15917 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
15918 // Insert appropriate branch.
15919 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
15920 .addReg(SLHS)
15921 .addReg(SRHS)
15922 .addMBB(SinkMBB);
15924 Register DestReg = Second.getOperand(0).getReg();
15925 Register Op2Reg4 = Second.getOperand(4).getReg();
15926 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
15927 .addReg(Op2Reg4)
15928 .addMBB(ThisMBB)
15929 .addReg(Op1Reg4)
15930 .addMBB(FirstMBB)
15931 .addReg(Op1Reg5)
15932 .addMBB(SecondMBB);
15934 // Now remove the Select_FPRX_s.
15935 First.eraseFromParent();
15936 Second.eraseFromParent();
15937 return SinkMBB;
15940 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
15941 MachineBasicBlock *BB,
15942 const RISCVSubtarget &Subtarget) {
15943 // To "insert" Select_* instructions, we actually have to insert the triangle
15944 // control-flow pattern. The incoming instructions know the destination vreg
15945 // to set, the condition code register to branch on, the true/false values to
15946 // select between, and the condcode to use to select the appropriate branch.
15948 // We produce the following control flow:
15949 // HeadMBB
15950 // | \
15951 // | IfFalseMBB
15952 // | /
15953 // TailMBB
15955 // When we find a sequence of selects we attempt to optimize their emission
15956 // by sharing the control flow. Currently we only handle cases where we have
15957 // multiple selects with the exact same condition (same LHS, RHS and CC).
15958 // The selects may be interleaved with other instructions if the other
15959 // instructions meet some requirements we deem safe:
15960 // - They are not pseudo instructions.
15961 // - They are debug instructions. Otherwise,
15962 // - They do not have side-effects, do not access memory and their inputs do
15963 // not depend on the results of the select pseudo-instructions.
15964 // The TrueV/FalseV operands of the selects cannot depend on the result of
15965 // previous selects in the sequence.
15966 // These conditions could be further relaxed. See the X86 target for a
15967 // related approach and more information.
15969 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
15970 // is checked here and handled by a separate function -
15971 // EmitLoweredCascadedSelect.
15972 Register LHS = MI.getOperand(1).getReg();
15973 Register RHS = MI.getOperand(2).getReg();
15974 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
15976 SmallVector<MachineInstr *, 4> SelectDebugValues;
15977 SmallSet<Register, 4> SelectDests;
15978 SelectDests.insert(MI.getOperand(0).getReg());
15980 MachineInstr *LastSelectPseudo = &MI;
15981 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
15982 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
15983 Next->getOpcode() == MI.getOpcode() &&
15984 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
15985 Next->getOperand(5).isKill()) {
15986 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
15989 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
15990 SequenceMBBI != E; ++SequenceMBBI) {
15991 if (SequenceMBBI->isDebugInstr())
15992 continue;
15993 if (isSelectPseudo(*SequenceMBBI)) {
15994 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
15995 SequenceMBBI->getOperand(2).getReg() != RHS ||
15996 SequenceMBBI->getOperand(3).getImm() != CC ||
15997 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
15998 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
15999 break;
16000 LastSelectPseudo = &*SequenceMBBI;
16001 SequenceMBBI->collectDebugValues(SelectDebugValues);
16002 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
16003 continue;
16005 if (SequenceMBBI->hasUnmodeledSideEffects() ||
16006 SequenceMBBI->mayLoadOrStore() ||
16007 SequenceMBBI->usesCustomInsertionHook())
16008 break;
16009 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
16010 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
16012 break;
16015 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16016 const BasicBlock *LLVM_BB = BB->getBasicBlock();
16017 DebugLoc DL = MI.getDebugLoc();
16018 MachineFunction::iterator I = ++BB->getIterator();
16020 MachineBasicBlock *HeadMBB = BB;
16021 MachineFunction *F = BB->getParent();
16022 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
16023 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
16025 F->insert(I, IfFalseMBB);
16026 F->insert(I, TailMBB);
16028 // Transfer debug instructions associated with the selects to TailMBB.
16029 for (MachineInstr *DebugInstr : SelectDebugValues) {
16030 TailMBB->push_back(DebugInstr->removeFromParent());
16033 // Move all instructions after the sequence to TailMBB.
16034 TailMBB->splice(TailMBB->end(), HeadMBB,
16035 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
16036 // Update machine-CFG edges by transferring all successors of the current
16037 // block to the new block which will contain the Phi nodes for the selects.
16038 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
16039 // Set the successors for HeadMBB.
16040 HeadMBB->addSuccessor(IfFalseMBB);
16041 HeadMBB->addSuccessor(TailMBB);
16043 // Insert appropriate branch.
16044 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
16045 .addReg(LHS)
16046 .addReg(RHS)
16047 .addMBB(TailMBB);
16049 // IfFalseMBB just falls through to TailMBB.
16050 IfFalseMBB->addSuccessor(TailMBB);
16052 // Create PHIs for all of the select pseudo-instructions.
16053 auto SelectMBBI = MI.getIterator();
16054 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
16055 auto InsertionPoint = TailMBB->begin();
16056 while (SelectMBBI != SelectEnd) {
16057 auto Next = std::next(SelectMBBI);
16058 if (isSelectPseudo(*SelectMBBI)) {
16059 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
16060 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
16061 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
16062 .addReg(SelectMBBI->getOperand(4).getReg())
16063 .addMBB(HeadMBB)
16064 .addReg(SelectMBBI->getOperand(5).getReg())
16065 .addMBB(IfFalseMBB);
16066 SelectMBBI->eraseFromParent();
16068 SelectMBBI = Next;
16071 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
16072 return TailMBB;
16075 static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB,
16076 unsigned Opcode) {
16077 DebugLoc DL = MI.getDebugLoc();
16079 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16081 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16082 Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16084 assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
16085 unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
16087 // Update FRM and save the old value.
16088 BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
16089 .addImm(MI.getOperand(FRMIdx).getImm());
16091 // Emit an VFCVT with the FRM == DYN
16092 auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
16094 for (unsigned I = 0; I < MI.getNumOperands(); I++)
16095 if (I != FRMIdx)
16096 MIB = MIB.add(MI.getOperand(I));
16097 else
16098 MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
16100 MIB.add(MachineOperand::CreateReg(RISCV::FRM,
16101 /*IsDef*/ false,
16102 /*IsImp*/ true));
16104 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16105 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16107 // Restore FRM.
16108 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
16109 .addReg(SavedFRM, RegState::Kill);
16111 // Erase the pseudoinstruction.
16112 MI.eraseFromParent();
16113 return BB;
16116 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
16117 MachineBasicBlock *BB,
16118 unsigned CVTXOpc,
16119 unsigned CVTFOpc) {
16120 DebugLoc DL = MI.getDebugLoc();
16122 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16124 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16125 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16127 // Save the old value of FFLAGS.
16128 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
16130 assert(MI.getNumOperands() == 7);
16132 // Emit a VFCVT_X_F
16133 const TargetRegisterInfo *TRI =
16134 BB->getParent()->getSubtarget().getRegisterInfo();
16135 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
16136 Register Tmp = MRI.createVirtualRegister(RC);
16137 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
16138 .add(MI.getOperand(1))
16139 .add(MI.getOperand(2))
16140 .add(MI.getOperand(3))
16141 .add(MachineOperand::CreateImm(7)) // frm = DYN
16142 .add(MI.getOperand(4))
16143 .add(MI.getOperand(5))
16144 .add(MI.getOperand(6))
16145 .add(MachineOperand::CreateReg(RISCV::FRM,
16146 /*IsDef*/ false,
16147 /*IsImp*/ true));
16149 // Emit a VFCVT_F_X
16150 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
16151 .add(MI.getOperand(0))
16152 .add(MI.getOperand(1))
16153 .addReg(Tmp)
16154 .add(MI.getOperand(3))
16155 .add(MachineOperand::CreateImm(7)) // frm = DYN
16156 .add(MI.getOperand(4))
16157 .add(MI.getOperand(5))
16158 .add(MI.getOperand(6))
16159 .add(MachineOperand::CreateReg(RISCV::FRM,
16160 /*IsDef*/ false,
16161 /*IsImp*/ true));
16163 // Restore FFLAGS.
16164 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
16165 .addReg(SavedFFLAGS, RegState::Kill);
16167 // Erase the pseudoinstruction.
16168 MI.eraseFromParent();
16169 return BB;
16172 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
16173 const RISCVSubtarget &Subtarget) {
16174 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
16175 const TargetRegisterClass *RC;
16176 switch (MI.getOpcode()) {
16177 default:
16178 llvm_unreachable("Unexpected opcode");
16179 case RISCV::PseudoFROUND_H:
16180 CmpOpc = RISCV::FLT_H;
16181 F2IOpc = RISCV::FCVT_W_H;
16182 I2FOpc = RISCV::FCVT_H_W;
16183 FSGNJOpc = RISCV::FSGNJ_H;
16184 FSGNJXOpc = RISCV::FSGNJX_H;
16185 RC = &RISCV::FPR16RegClass;
16186 break;
16187 case RISCV::PseudoFROUND_H_INX:
16188 CmpOpc = RISCV::FLT_H_INX;
16189 F2IOpc = RISCV::FCVT_W_H_INX;
16190 I2FOpc = RISCV::FCVT_H_W_INX;
16191 FSGNJOpc = RISCV::FSGNJ_H_INX;
16192 FSGNJXOpc = RISCV::FSGNJX_H_INX;
16193 RC = &RISCV::GPRF16RegClass;
16194 break;
16195 case RISCV::PseudoFROUND_S:
16196 CmpOpc = RISCV::FLT_S;
16197 F2IOpc = RISCV::FCVT_W_S;
16198 I2FOpc = RISCV::FCVT_S_W;
16199 FSGNJOpc = RISCV::FSGNJ_S;
16200 FSGNJXOpc = RISCV::FSGNJX_S;
16201 RC = &RISCV::FPR32RegClass;
16202 break;
16203 case RISCV::PseudoFROUND_S_INX:
16204 CmpOpc = RISCV::FLT_S_INX;
16205 F2IOpc = RISCV::FCVT_W_S_INX;
16206 I2FOpc = RISCV::FCVT_S_W_INX;
16207 FSGNJOpc = RISCV::FSGNJ_S_INX;
16208 FSGNJXOpc = RISCV::FSGNJX_S_INX;
16209 RC = &RISCV::GPRF32RegClass;
16210 break;
16211 case RISCV::PseudoFROUND_D:
16212 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
16213 CmpOpc = RISCV::FLT_D;
16214 F2IOpc = RISCV::FCVT_L_D;
16215 I2FOpc = RISCV::FCVT_D_L;
16216 FSGNJOpc = RISCV::FSGNJ_D;
16217 FSGNJXOpc = RISCV::FSGNJX_D;
16218 RC = &RISCV::FPR64RegClass;
16219 break;
16220 case RISCV::PseudoFROUND_D_INX:
16221 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
16222 CmpOpc = RISCV::FLT_D_INX;
16223 F2IOpc = RISCV::FCVT_L_D_INX;
16224 I2FOpc = RISCV::FCVT_D_L_INX;
16225 FSGNJOpc = RISCV::FSGNJ_D_INX;
16226 FSGNJXOpc = RISCV::FSGNJX_D_INX;
16227 RC = &RISCV::GPRRegClass;
16228 break;
16231 const BasicBlock *BB = MBB->getBasicBlock();
16232 DebugLoc DL = MI.getDebugLoc();
16233 MachineFunction::iterator I = ++MBB->getIterator();
16235 MachineFunction *F = MBB->getParent();
16236 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
16237 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
16239 F->insert(I, CvtMBB);
16240 F->insert(I, DoneMBB);
16241 // Move all instructions after the sequence to DoneMBB.
16242 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
16243 MBB->end());
16244 // Update machine-CFG edges by transferring all successors of the current
16245 // block to the new block which will contain the Phi nodes for the selects.
16246 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
16247 // Set the successors for MBB.
16248 MBB->addSuccessor(CvtMBB);
16249 MBB->addSuccessor(DoneMBB);
16251 Register DstReg = MI.getOperand(0).getReg();
16252 Register SrcReg = MI.getOperand(1).getReg();
16253 Register MaxReg = MI.getOperand(2).getReg();
16254 int64_t FRM = MI.getOperand(3).getImm();
16256 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16257 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
16259 Register FabsReg = MRI.createVirtualRegister(RC);
16260 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
16262 // Compare the FP value to the max value.
16263 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16264 auto MIB =
16265 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
16266 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16267 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16269 // Insert branch.
16270 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
16271 .addReg(CmpReg)
16272 .addReg(RISCV::X0)
16273 .addMBB(DoneMBB);
16275 CvtMBB->addSuccessor(DoneMBB);
16277 // Convert to integer.
16278 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16279 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
16280 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16281 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16283 // Convert back to FP.
16284 Register I2FReg = MRI.createVirtualRegister(RC);
16285 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
16286 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16287 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16289 // Restore the sign bit.
16290 Register CvtReg = MRI.createVirtualRegister(RC);
16291 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
16293 // Merge the results.
16294 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
16295 .addReg(SrcReg)
16296 .addMBB(MBB)
16297 .addReg(CvtReg)
16298 .addMBB(CvtMBB);
16300 MI.eraseFromParent();
16301 return DoneMBB;
16304 MachineBasicBlock *
16305 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
16306 MachineBasicBlock *BB) const {
16307 switch (MI.getOpcode()) {
16308 default:
16309 llvm_unreachable("Unexpected instr type to insert");
16310 case RISCV::ReadCycleWide:
16311 assert(!Subtarget.is64Bit() &&
16312 "ReadCycleWrite is only to be used on riscv32");
16313 return emitReadCycleWidePseudo(MI, BB);
16314 case RISCV::Select_GPR_Using_CC_GPR:
16315 case RISCV::Select_FPR16_Using_CC_GPR:
16316 case RISCV::Select_FPR16INX_Using_CC_GPR:
16317 case RISCV::Select_FPR32_Using_CC_GPR:
16318 case RISCV::Select_FPR32INX_Using_CC_GPR:
16319 case RISCV::Select_FPR64_Using_CC_GPR:
16320 case RISCV::Select_FPR64INX_Using_CC_GPR:
16321 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
16322 return emitSelectPseudo(MI, BB, Subtarget);
16323 case RISCV::BuildPairF64Pseudo:
16324 case RISCV::BuildPairF64Pseudo_INX:
16325 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
16326 case RISCV::SplitF64Pseudo:
16327 case RISCV::SplitF64Pseudo_INX:
16328 return emitSplitF64Pseudo(MI, BB, Subtarget);
16329 case RISCV::PseudoQuietFLE_H:
16330 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
16331 case RISCV::PseudoQuietFLE_H_INX:
16332 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
16333 case RISCV::PseudoQuietFLT_H:
16334 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
16335 case RISCV::PseudoQuietFLT_H_INX:
16336 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
16337 case RISCV::PseudoQuietFLE_S:
16338 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
16339 case RISCV::PseudoQuietFLE_S_INX:
16340 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
16341 case RISCV::PseudoQuietFLT_S:
16342 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
16343 case RISCV::PseudoQuietFLT_S_INX:
16344 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
16345 case RISCV::PseudoQuietFLE_D:
16346 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
16347 case RISCV::PseudoQuietFLE_D_INX:
16348 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
16349 case RISCV::PseudoQuietFLE_D_IN32X:
16350 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
16351 Subtarget);
16352 case RISCV::PseudoQuietFLT_D:
16353 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
16354 case RISCV::PseudoQuietFLT_D_INX:
16355 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
16356 case RISCV::PseudoQuietFLT_D_IN32X:
16357 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
16358 Subtarget);
16360 #define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \
16361 case RISCV::RMOpc##_##LMUL: \
16362 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \
16363 case RISCV::RMOpc##_##LMUL##_MASK: \
16364 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
16366 #define PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16367 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \
16368 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \
16369 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \
16370 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \
16371 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
16373 #define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \
16374 PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16375 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
16377 #define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \
16378 PseudoVFCVT_RM_CASE(RMOpc, Opc) \
16379 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
16381 // VFCVT
16382 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
16383 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
16384 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
16385 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
16387 // VFWCVT
16388 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
16389 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
16391 // VFNCVT
16392 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
16393 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
16394 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
16395 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
16397 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
16398 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
16399 RISCV::PseudoVFCVT_F_X_V_M1_MASK);
16400 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
16401 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
16402 RISCV::PseudoVFCVT_F_X_V_M2_MASK);
16403 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
16404 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
16405 RISCV::PseudoVFCVT_F_X_V_M4_MASK);
16406 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
16407 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
16408 RISCV::PseudoVFCVT_F_X_V_M8_MASK);
16409 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
16410 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
16411 RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
16412 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
16413 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
16414 RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
16415 case RISCV::PseudoFROUND_H:
16416 case RISCV::PseudoFROUND_H_INX:
16417 case RISCV::PseudoFROUND_S:
16418 case RISCV::PseudoFROUND_S_INX:
16419 case RISCV::PseudoFROUND_D:
16420 case RISCV::PseudoFROUND_D_INX:
16421 case RISCV::PseudoFROUND_D_IN32X:
16422 return emitFROUND(MI, BB, Subtarget);
16423 case TargetOpcode::STATEPOINT:
16424 case TargetOpcode::STACKMAP:
16425 case TargetOpcode::PATCHPOINT:
16426 if (!Subtarget.is64Bit())
16427 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
16428 "supported on 64-bit targets");
16429 return emitPatchPoint(MI, BB);
16433 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
16434 SDNode *Node) const {
16435 // Add FRM dependency to any instructions with dynamic rounding mode.
16436 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
16437 if (Idx < 0) {
16438 // Vector pseudos have FRM index indicated by TSFlags.
16439 Idx = RISCVII::getFRMOpNum(MI.getDesc());
16440 if (Idx < 0)
16441 return;
16443 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
16444 return;
16445 // If the instruction already reads FRM, don't add another read.
16446 if (MI.readsRegister(RISCV::FRM))
16447 return;
16448 MI.addOperand(
16449 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
16452 // Calling Convention Implementation.
16453 // The expectations for frontend ABI lowering vary from target to target.
16454 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
16455 // details, but this is a longer term goal. For now, we simply try to keep the
16456 // role of the frontend as simple and well-defined as possible. The rules can
16457 // be summarised as:
16458 // * Never split up large scalar arguments. We handle them here.
16459 // * If a hardfloat calling convention is being used, and the struct may be
16460 // passed in a pair of registers (fp+fp, int+fp), and both registers are
16461 // available, then pass as two separate arguments. If either the GPRs or FPRs
16462 // are exhausted, then pass according to the rule below.
16463 // * If a struct could never be passed in registers or directly in a stack
16464 // slot (as it is larger than 2*XLEN and the floating point rules don't
16465 // apply), then pass it using a pointer with the byval attribute.
16466 // * If a struct is less than 2*XLEN, then coerce to either a two-element
16467 // word-sized array or a 2*XLEN scalar (depending on alignment).
16468 // * The frontend can determine whether a struct is returned by reference or
16469 // not based on its size and fields. If it will be returned by reference, the
16470 // frontend must modify the prototype so a pointer with the sret annotation is
16471 // passed as the first argument. This is not necessary for large scalar
16472 // returns.
16473 // * Struct return values and varargs should be coerced to structs containing
16474 // register-size fields in the same situations they would be for fixed
16475 // arguments.
16477 static const MCPhysReg ArgGPRs[] = {
16478 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
16479 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
16481 static const MCPhysReg ArgFPR16s[] = {
16482 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
16483 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
16485 static const MCPhysReg ArgFPR32s[] = {
16486 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
16487 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
16489 static const MCPhysReg ArgFPR64s[] = {
16490 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
16491 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
16493 // This is an interim calling convention and it may be changed in the future.
16494 static const MCPhysReg ArgVRs[] = {
16495 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
16496 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
16497 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
16498 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
16499 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
16500 RISCV::V20M2, RISCV::V22M2};
16501 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
16502 RISCV::V20M4};
16503 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
16505 // Pass a 2*XLEN argument that has been split into two XLEN values through
16506 // registers or the stack as necessary.
16507 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
16508 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
16509 MVT ValVT2, MVT LocVT2,
16510 ISD::ArgFlagsTy ArgFlags2) {
16511 unsigned XLenInBytes = XLen / 8;
16512 if (Register Reg = State.AllocateReg(ArgGPRs)) {
16513 // At least one half can be passed via register.
16514 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
16515 VA1.getLocVT(), CCValAssign::Full));
16516 } else {
16517 // Both halves must be passed on the stack, with proper alignment.
16518 Align StackAlign =
16519 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
16520 State.addLoc(
16521 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
16522 State.AllocateStack(XLenInBytes, StackAlign),
16523 VA1.getLocVT(), CCValAssign::Full));
16524 State.addLoc(CCValAssign::getMem(
16525 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
16526 LocVT2, CCValAssign::Full));
16527 return false;
16530 if (Register Reg = State.AllocateReg(ArgGPRs)) {
16531 // The second half can also be passed via register.
16532 State.addLoc(
16533 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
16534 } else {
16535 // The second half is passed via the stack, without additional alignment.
16536 State.addLoc(CCValAssign::getMem(
16537 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
16538 LocVT2, CCValAssign::Full));
16541 return false;
16544 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
16545 std::optional<unsigned> FirstMaskArgument,
16546 CCState &State, const RISCVTargetLowering &TLI) {
16547 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
16548 if (RC == &RISCV::VRRegClass) {
16549 // Assign the first mask argument to V0.
16550 // This is an interim calling convention and it may be changed in the
16551 // future.
16552 if (FirstMaskArgument && ValNo == *FirstMaskArgument)
16553 return State.AllocateReg(RISCV::V0);
16554 return State.AllocateReg(ArgVRs);
16556 if (RC == &RISCV::VRM2RegClass)
16557 return State.AllocateReg(ArgVRM2s);
16558 if (RC == &RISCV::VRM4RegClass)
16559 return State.AllocateReg(ArgVRM4s);
16560 if (RC == &RISCV::VRM8RegClass)
16561 return State.AllocateReg(ArgVRM8s);
16562 llvm_unreachable("Unhandled register class for ValueType");
16565 // Implements the RISC-V calling convention. Returns true upon failure.
16566 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
16567 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
16568 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
16569 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
16570 std::optional<unsigned> FirstMaskArgument) {
16571 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
16572 assert(XLen == 32 || XLen == 64);
16573 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
16575 // Static chain parameter must not be passed in normal argument registers,
16576 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
16577 if (ArgFlags.isNest()) {
16578 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
16579 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
16580 return false;
16584 // Any return value split in to more than two values can't be returned
16585 // directly. Vectors are returned via the available vector registers.
16586 if (!LocVT.isVector() && IsRet && ValNo > 1)
16587 return true;
16589 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
16590 // variadic argument, or if no F16/F32 argument registers are available.
16591 bool UseGPRForF16_F32 = true;
16592 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
16593 // variadic argument, or if no F64 argument registers are available.
16594 bool UseGPRForF64 = true;
16596 switch (ABI) {
16597 default:
16598 llvm_unreachable("Unexpected ABI");
16599 case RISCVABI::ABI_ILP32:
16600 case RISCVABI::ABI_LP64:
16601 break;
16602 case RISCVABI::ABI_ILP32F:
16603 case RISCVABI::ABI_LP64F:
16604 UseGPRForF16_F32 = !IsFixed;
16605 break;
16606 case RISCVABI::ABI_ILP32D:
16607 case RISCVABI::ABI_LP64D:
16608 UseGPRForF16_F32 = !IsFixed;
16609 UseGPRForF64 = !IsFixed;
16610 break;
16613 // FPR16, FPR32, and FPR64 alias each other.
16614 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
16615 UseGPRForF16_F32 = true;
16616 UseGPRForF64 = true;
16619 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
16620 // similar local variables rather than directly checking against the target
16621 // ABI.
16623 if (UseGPRForF16_F32 &&
16624 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
16625 LocVT = XLenVT;
16626 LocInfo = CCValAssign::BCvt;
16627 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
16628 LocVT = MVT::i64;
16629 LocInfo = CCValAssign::BCvt;
16632 // If this is a variadic argument, the RISC-V calling convention requires
16633 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
16634 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
16635 // be used regardless of whether the original argument was split during
16636 // legalisation or not. The argument will not be passed by registers if the
16637 // original type is larger than 2*XLEN, so the register alignment rule does
16638 // not apply.
16639 unsigned TwoXLenInBytes = (2 * XLen) / 8;
16640 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
16641 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
16642 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
16643 // Skip 'odd' register if necessary.
16644 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
16645 State.AllocateReg(ArgGPRs);
16648 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
16649 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
16650 State.getPendingArgFlags();
16652 assert(PendingLocs.size() == PendingArgFlags.size() &&
16653 "PendingLocs and PendingArgFlags out of sync");
16655 // Handle passing f64 on RV32D with a soft float ABI or when floating point
16656 // registers are exhausted.
16657 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
16658 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
16659 // Depending on available argument GPRS, f64 may be passed in a pair of
16660 // GPRs, split between a GPR and the stack, or passed completely on the
16661 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
16662 // cases.
16663 Register Reg = State.AllocateReg(ArgGPRs);
16664 if (!Reg) {
16665 unsigned StackOffset = State.AllocateStack(8, Align(8));
16666 State.addLoc(
16667 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
16668 return false;
16670 LocVT = MVT::i32;
16671 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
16672 Register HiReg = State.AllocateReg(ArgGPRs);
16673 if (HiReg) {
16674 State.addLoc(
16675 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
16676 } else {
16677 unsigned StackOffset = State.AllocateStack(4, Align(4));
16678 State.addLoc(
16679 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
16681 return false;
16684 // Fixed-length vectors are located in the corresponding scalable-vector
16685 // container types.
16686 if (ValVT.isFixedLengthVector())
16687 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
16689 // Split arguments might be passed indirectly, so keep track of the pending
16690 // values. Split vectors are passed via a mix of registers and indirectly, so
16691 // treat them as we would any other argument.
16692 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
16693 LocVT = XLenVT;
16694 LocInfo = CCValAssign::Indirect;
16695 PendingLocs.push_back(
16696 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
16697 PendingArgFlags.push_back(ArgFlags);
16698 if (!ArgFlags.isSplitEnd()) {
16699 return false;
16703 // If the split argument only had two elements, it should be passed directly
16704 // in registers or on the stack.
16705 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
16706 PendingLocs.size() <= 2) {
16707 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
16708 // Apply the normal calling convention rules to the first half of the
16709 // split argument.
16710 CCValAssign VA = PendingLocs[0];
16711 ISD::ArgFlagsTy AF = PendingArgFlags[0];
16712 PendingLocs.clear();
16713 PendingArgFlags.clear();
16714 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
16715 ArgFlags);
16718 // Allocate to a register if possible, or else a stack slot.
16719 Register Reg;
16720 unsigned StoreSizeBytes = XLen / 8;
16721 Align StackAlign = Align(XLen / 8);
16723 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
16724 Reg = State.AllocateReg(ArgFPR16s);
16725 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
16726 Reg = State.AllocateReg(ArgFPR32s);
16727 else if (ValVT == MVT::f64 && !UseGPRForF64)
16728 Reg = State.AllocateReg(ArgFPR64s);
16729 else if (ValVT.isVector()) {
16730 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
16731 if (!Reg) {
16732 // For return values, the vector must be passed fully via registers or
16733 // via the stack.
16734 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
16735 // but we're using all of them.
16736 if (IsRet)
16737 return true;
16738 // Try using a GPR to pass the address
16739 if ((Reg = State.AllocateReg(ArgGPRs))) {
16740 LocVT = XLenVT;
16741 LocInfo = CCValAssign::Indirect;
16742 } else if (ValVT.isScalableVector()) {
16743 LocVT = XLenVT;
16744 LocInfo = CCValAssign::Indirect;
16745 } else {
16746 // Pass fixed-length vectors on the stack.
16747 LocVT = ValVT;
16748 StoreSizeBytes = ValVT.getStoreSize();
16749 // Align vectors to their element sizes, being careful for vXi1
16750 // vectors.
16751 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
16754 } else {
16755 Reg = State.AllocateReg(ArgGPRs);
16758 unsigned StackOffset =
16759 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
16761 // If we reach this point and PendingLocs is non-empty, we must be at the
16762 // end of a split argument that must be passed indirectly.
16763 if (!PendingLocs.empty()) {
16764 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
16765 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
16767 for (auto &It : PendingLocs) {
16768 if (Reg)
16769 It.convertToReg(Reg);
16770 else
16771 It.convertToMem(StackOffset);
16772 State.addLoc(It);
16774 PendingLocs.clear();
16775 PendingArgFlags.clear();
16776 return false;
16779 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
16780 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
16781 "Expected an XLenVT or vector types at this stage");
16783 if (Reg) {
16784 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
16785 return false;
16788 // When a scalar floating-point value is passed on the stack, no
16789 // bit-conversion is needed.
16790 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
16791 assert(!ValVT.isVector());
16792 LocVT = ValVT;
16793 LocInfo = CCValAssign::Full;
16795 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
16796 return false;
16799 template <typename ArgTy>
16800 static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
16801 for (const auto &ArgIdx : enumerate(Args)) {
16802 MVT ArgVT = ArgIdx.value().VT;
16803 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
16804 return ArgIdx.index();
16806 return std::nullopt;
16809 void RISCVTargetLowering::analyzeInputArgs(
16810 MachineFunction &MF, CCState &CCInfo,
16811 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
16812 RISCVCCAssignFn Fn) const {
16813 unsigned NumArgs = Ins.size();
16814 FunctionType *FType = MF.getFunction().getFunctionType();
16816 std::optional<unsigned> FirstMaskArgument;
16817 if (Subtarget.hasVInstructions())
16818 FirstMaskArgument = preAssignMask(Ins);
16820 for (unsigned i = 0; i != NumArgs; ++i) {
16821 MVT ArgVT = Ins[i].VT;
16822 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
16824 Type *ArgTy = nullptr;
16825 if (IsRet)
16826 ArgTy = FType->getReturnType();
16827 else if (Ins[i].isOrigArg())
16828 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
16830 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
16831 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
16832 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
16833 FirstMaskArgument)) {
16834 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
16835 << ArgVT << '\n');
16836 llvm_unreachable(nullptr);
16841 void RISCVTargetLowering::analyzeOutputArgs(
16842 MachineFunction &MF, CCState &CCInfo,
16843 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
16844 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
16845 unsigned NumArgs = Outs.size();
16847 std::optional<unsigned> FirstMaskArgument;
16848 if (Subtarget.hasVInstructions())
16849 FirstMaskArgument = preAssignMask(Outs);
16851 for (unsigned i = 0; i != NumArgs; i++) {
16852 MVT ArgVT = Outs[i].VT;
16853 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
16854 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
16856 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
16857 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
16858 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
16859 FirstMaskArgument)) {
16860 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
16861 << ArgVT << "\n");
16862 llvm_unreachable(nullptr);
16867 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
16868 // values.
16869 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
16870 const CCValAssign &VA, const SDLoc &DL,
16871 const RISCVSubtarget &Subtarget) {
16872 switch (VA.getLocInfo()) {
16873 default:
16874 llvm_unreachable("Unexpected CCValAssign::LocInfo");
16875 case CCValAssign::Full:
16876 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
16877 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
16878 break;
16879 case CCValAssign::BCvt:
16880 if (VA.getLocVT().isInteger() &&
16881 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
16882 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
16883 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
16884 if (RV64LegalI32) {
16885 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
16886 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
16887 } else {
16888 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
16890 } else {
16891 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
16893 break;
16895 return Val;
16898 // The caller is responsible for loading the full value if the argument is
16899 // passed with CCValAssign::Indirect.
16900 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
16901 const CCValAssign &VA, const SDLoc &DL,
16902 const ISD::InputArg &In,
16903 const RISCVTargetLowering &TLI) {
16904 MachineFunction &MF = DAG.getMachineFunction();
16905 MachineRegisterInfo &RegInfo = MF.getRegInfo();
16906 EVT LocVT = VA.getLocVT();
16907 SDValue Val;
16908 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
16909 Register VReg = RegInfo.createVirtualRegister(RC);
16910 RegInfo.addLiveIn(VA.getLocReg(), VReg);
16911 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
16913 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
16914 if (In.isOrigArg()) {
16915 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
16916 if (OrigArg->getType()->isIntegerTy()) {
16917 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
16918 // An input zero extended from i31 can also be considered sign extended.
16919 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
16920 (BitWidth < 32 && In.Flags.isZExt())) {
16921 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
16922 RVFI->addSExt32Register(VReg);
16927 if (VA.getLocInfo() == CCValAssign::Indirect)
16928 return Val;
16930 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
16933 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
16934 const CCValAssign &VA, const SDLoc &DL,
16935 const RISCVSubtarget &Subtarget) {
16936 EVT LocVT = VA.getLocVT();
16938 switch (VA.getLocInfo()) {
16939 default:
16940 llvm_unreachable("Unexpected CCValAssign::LocInfo");
16941 case CCValAssign::Full:
16942 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
16943 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
16944 break;
16945 case CCValAssign::BCvt:
16946 if (LocVT.isInteger() &&
16947 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
16948 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
16949 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
16950 if (RV64LegalI32) {
16951 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
16952 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
16953 } else {
16954 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
16956 } else {
16957 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
16959 break;
16961 return Val;
16964 // The caller is responsible for loading the full value if the argument is
16965 // passed with CCValAssign::Indirect.
16966 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
16967 const CCValAssign &VA, const SDLoc &DL) {
16968 MachineFunction &MF = DAG.getMachineFunction();
16969 MachineFrameInfo &MFI = MF.getFrameInfo();
16970 EVT LocVT = VA.getLocVT();
16971 EVT ValVT = VA.getValVT();
16972 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
16973 if (ValVT.isScalableVector()) {
16974 // When the value is a scalable vector, we save the pointer which points to
16975 // the scalable vector value in the stack. The ValVT will be the pointer
16976 // type, instead of the scalable vector type.
16977 ValVT = LocVT;
16979 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
16980 /*IsImmutable=*/true);
16981 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
16982 SDValue Val;
16984 ISD::LoadExtType ExtType;
16985 switch (VA.getLocInfo()) {
16986 default:
16987 llvm_unreachable("Unexpected CCValAssign::LocInfo");
16988 case CCValAssign::Full:
16989 case CCValAssign::Indirect:
16990 case CCValAssign::BCvt:
16991 ExtType = ISD::NON_EXTLOAD;
16992 break;
16994 Val = DAG.getExtLoad(
16995 ExtType, DL, LocVT, Chain, FIN,
16996 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
16997 return Val;
17000 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
17001 const CCValAssign &VA,
17002 const CCValAssign &HiVA,
17003 const SDLoc &DL) {
17004 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
17005 "Unexpected VA");
17006 MachineFunction &MF = DAG.getMachineFunction();
17007 MachineFrameInfo &MFI = MF.getFrameInfo();
17008 MachineRegisterInfo &RegInfo = MF.getRegInfo();
17010 assert(VA.isRegLoc() && "Expected register VA assignment");
17012 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17013 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
17014 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
17015 SDValue Hi;
17016 if (HiVA.isMemLoc()) {
17017 // Second half of f64 is passed on the stack.
17018 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
17019 /*IsImmutable=*/true);
17020 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
17021 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
17022 MachinePointerInfo::getFixedStack(MF, FI));
17023 } else {
17024 // Second half of f64 is passed in another GPR.
17025 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17026 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
17027 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
17029 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
17032 // FastCC has less than 1% performance improvement for some particular
17033 // benchmark. But theoretically, it may has benenfit for some cases.
17034 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
17035 unsigned ValNo, MVT ValVT, MVT LocVT,
17036 CCValAssign::LocInfo LocInfo,
17037 ISD::ArgFlagsTy ArgFlags, CCState &State,
17038 bool IsFixed, bool IsRet, Type *OrigTy,
17039 const RISCVTargetLowering &TLI,
17040 std::optional<unsigned> FirstMaskArgument) {
17042 // X5 and X6 might be used for save-restore libcall.
17043 static const MCPhysReg GPRList[] = {
17044 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
17045 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
17046 RISCV::X29, RISCV::X30, RISCV::X31};
17048 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
17049 if (unsigned Reg = State.AllocateReg(GPRList)) {
17050 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17051 return false;
17055 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
17057 if (LocVT == MVT::f16 &&
17058 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
17059 static const MCPhysReg FPR16List[] = {
17060 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
17061 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
17062 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
17063 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
17064 if (unsigned Reg = State.AllocateReg(FPR16List)) {
17065 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17066 return false;
17070 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
17071 static const MCPhysReg FPR32List[] = {
17072 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
17073 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
17074 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
17075 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
17076 if (unsigned Reg = State.AllocateReg(FPR32List)) {
17077 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17078 return false;
17082 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
17083 static const MCPhysReg FPR64List[] = {
17084 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
17085 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
17086 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
17087 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
17088 if (unsigned Reg = State.AllocateReg(FPR64List)) {
17089 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17090 return false;
17094 // Check if there is an available GPR before hitting the stack.
17095 if ((LocVT == MVT::f16 &&
17096 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
17097 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
17098 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
17099 Subtarget.hasStdExtZdinx())) {
17100 if (unsigned Reg = State.AllocateReg(GPRList)) {
17101 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17102 return false;
17106 if (LocVT == MVT::f16) {
17107 unsigned Offset2 = State.AllocateStack(2, Align(2));
17108 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
17109 return false;
17112 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
17113 unsigned Offset4 = State.AllocateStack(4, Align(4));
17114 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
17115 return false;
17118 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
17119 unsigned Offset5 = State.AllocateStack(8, Align(8));
17120 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
17121 return false;
17124 if (LocVT.isVector()) {
17125 if (unsigned Reg =
17126 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
17127 // Fixed-length vectors are located in the corresponding scalable-vector
17128 // container types.
17129 if (ValVT.isFixedLengthVector())
17130 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
17131 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17132 } else {
17133 // Try and pass the address via a "fast" GPR.
17134 if (unsigned GPRReg = State.AllocateReg(GPRList)) {
17135 LocInfo = CCValAssign::Indirect;
17136 LocVT = TLI.getSubtarget().getXLenVT();
17137 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
17138 } else if (ValVT.isFixedLengthVector()) {
17139 auto StackAlign =
17140 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
17141 unsigned StackOffset =
17142 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
17143 State.addLoc(
17144 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17145 } else {
17146 // Can't pass scalable vectors on the stack.
17147 return true;
17151 return false;
17154 return true; // CC didn't match.
17157 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
17158 CCValAssign::LocInfo LocInfo,
17159 ISD::ArgFlagsTy ArgFlags, CCState &State) {
17160 if (ArgFlags.isNest()) {
17161 report_fatal_error(
17162 "Attribute 'nest' is not supported in GHC calling convention");
17165 static const MCPhysReg GPRList[] = {
17166 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
17167 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
17169 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
17170 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
17171 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
17172 if (unsigned Reg = State.AllocateReg(GPRList)) {
17173 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17174 return false;
17178 const RISCVSubtarget &Subtarget =
17179 State.getMachineFunction().getSubtarget<RISCVSubtarget>();
17181 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
17182 // Pass in STG registers: F1, ..., F6
17183 // fs0 ... fs5
17184 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
17185 RISCV::F18_F, RISCV::F19_F,
17186 RISCV::F20_F, RISCV::F21_F};
17187 if (unsigned Reg = State.AllocateReg(FPR32List)) {
17188 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17189 return false;
17193 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
17194 // Pass in STG registers: D1, ..., D6
17195 // fs6 ... fs11
17196 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
17197 RISCV::F24_D, RISCV::F25_D,
17198 RISCV::F26_D, RISCV::F27_D};
17199 if (unsigned Reg = State.AllocateReg(FPR64List)) {
17200 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17201 return false;
17205 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
17206 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
17207 Subtarget.is64Bit())) {
17208 if (unsigned Reg = State.AllocateReg(GPRList)) {
17209 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17210 return false;
17214 report_fatal_error("No registers left in GHC calling convention");
17215 return true;
17218 // Transform physical registers into virtual registers.
17219 SDValue RISCVTargetLowering::LowerFormalArguments(
17220 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
17221 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
17222 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
17224 MachineFunction &MF = DAG.getMachineFunction();
17226 switch (CallConv) {
17227 default:
17228 report_fatal_error("Unsupported calling convention");
17229 case CallingConv::C:
17230 case CallingConv::Fast:
17231 case CallingConv::SPIR_KERNEL:
17232 break;
17233 case CallingConv::GHC:
17234 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
17235 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
17236 "(Zdinx/D) instruction set extensions");
17239 const Function &Func = MF.getFunction();
17240 if (Func.hasFnAttribute("interrupt")) {
17241 if (!Func.arg_empty())
17242 report_fatal_error(
17243 "Functions with the interrupt attribute cannot have arguments!");
17245 StringRef Kind =
17246 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
17248 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
17249 report_fatal_error(
17250 "Function interrupt attribute argument not supported!");
17253 EVT PtrVT = getPointerTy(DAG.getDataLayout());
17254 MVT XLenVT = Subtarget.getXLenVT();
17255 unsigned XLenInBytes = Subtarget.getXLen() / 8;
17256 // Used with vargs to acumulate store chains.
17257 std::vector<SDValue> OutChains;
17259 // Assign locations to all of the incoming arguments.
17260 SmallVector<CCValAssign, 16> ArgLocs;
17261 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
17263 if (CallConv == CallingConv::GHC)
17264 CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC);
17265 else
17266 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
17267 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
17268 : RISCV::CC_RISCV);
17270 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
17271 CCValAssign &VA = ArgLocs[i];
17272 SDValue ArgValue;
17273 // Passing f64 on RV32D with a soft float ABI must be handled as a special
17274 // case.
17275 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17276 assert(VA.needsCustom());
17277 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
17278 } else if (VA.isRegLoc())
17279 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
17280 else
17281 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
17283 if (VA.getLocInfo() == CCValAssign::Indirect) {
17284 // If the original argument was split and passed by reference (e.g. i128
17285 // on RV32), we need to load all parts of it here (using the same
17286 // address). Vectors may be partly split to registers and partly to the
17287 // stack, in which case the base address is partly offset and subsequent
17288 // stores are relative to that.
17289 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
17290 MachinePointerInfo()));
17291 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
17292 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
17293 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
17294 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
17295 CCValAssign &PartVA = ArgLocs[i + 1];
17296 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
17297 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
17298 if (PartVA.getValVT().isScalableVector())
17299 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
17300 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
17301 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
17302 MachinePointerInfo()));
17303 ++i;
17304 ++InsIdx;
17306 continue;
17308 InVals.push_back(ArgValue);
17311 if (any_of(ArgLocs,
17312 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
17313 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
17315 if (IsVarArg) {
17316 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
17317 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
17318 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
17319 MachineFrameInfo &MFI = MF.getFrameInfo();
17320 MachineRegisterInfo &RegInfo = MF.getRegInfo();
17321 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
17323 // Offset of the first variable argument from stack pointer, and size of
17324 // the vararg save area. For now, the varargs save area is either zero or
17325 // large enough to hold a0-a7.
17326 int VaArgOffset, VarArgsSaveSize;
17328 // If all registers are allocated, then all varargs must be passed on the
17329 // stack and we don't need to save any argregs.
17330 if (ArgRegs.size() == Idx) {
17331 VaArgOffset = CCInfo.getStackSize();
17332 VarArgsSaveSize = 0;
17333 } else {
17334 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
17335 VaArgOffset = -VarArgsSaveSize;
17338 // Record the frame index of the first variable argument
17339 // which is a value necessary to VASTART.
17340 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
17341 RVFI->setVarArgsFrameIndex(FI);
17343 // If saving an odd number of registers then create an extra stack slot to
17344 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
17345 // offsets to even-numbered registered remain 2*XLEN-aligned.
17346 if (Idx % 2) {
17347 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
17348 VarArgsSaveSize += XLenInBytes;
17351 // Copy the integer registers that may have been used for passing varargs
17352 // to the vararg save area.
17353 for (unsigned I = Idx; I < ArgRegs.size();
17354 ++I, VaArgOffset += XLenInBytes) {
17355 const Register Reg = RegInfo.createVirtualRegister(RC);
17356 RegInfo.addLiveIn(ArgRegs[I], Reg);
17357 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
17358 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
17359 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
17360 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
17361 MachinePointerInfo::getFixedStack(MF, FI));
17362 cast<StoreSDNode>(Store.getNode())
17363 ->getMemOperand()
17364 ->setValue((Value *)nullptr);
17365 OutChains.push_back(Store);
17367 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
17370 // All stores are grouped in one node to allow the matching between
17371 // the size of Ins and InVals. This only happens for vararg functions.
17372 if (!OutChains.empty()) {
17373 OutChains.push_back(Chain);
17374 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
17377 return Chain;
17380 /// isEligibleForTailCallOptimization - Check whether the call is eligible
17381 /// for tail call optimization.
17382 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
17383 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
17384 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
17385 const SmallVector<CCValAssign, 16> &ArgLocs) const {
17387 auto CalleeCC = CLI.CallConv;
17388 auto &Outs = CLI.Outs;
17389 auto &Caller = MF.getFunction();
17390 auto CallerCC = Caller.getCallingConv();
17392 // Exception-handling functions need a special set of instructions to
17393 // indicate a return to the hardware. Tail-calling another function would
17394 // probably break this.
17395 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
17396 // should be expanded as new function attributes are introduced.
17397 if (Caller.hasFnAttribute("interrupt"))
17398 return false;
17400 // Do not tail call opt if the stack is used to pass parameters.
17401 if (CCInfo.getStackSize() != 0)
17402 return false;
17404 // Do not tail call opt if any parameters need to be passed indirectly.
17405 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
17406 // passed indirectly. So the address of the value will be passed in a
17407 // register, or if not available, then the address is put on the stack. In
17408 // order to pass indirectly, space on the stack often needs to be allocated
17409 // in order to store the value. In this case the CCInfo.getNextStackOffset()
17410 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
17411 // are passed CCValAssign::Indirect.
17412 for (auto &VA : ArgLocs)
17413 if (VA.getLocInfo() == CCValAssign::Indirect)
17414 return false;
17416 // Do not tail call opt if either caller or callee uses struct return
17417 // semantics.
17418 auto IsCallerStructRet = Caller.hasStructRetAttr();
17419 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
17420 if (IsCallerStructRet || IsCalleeStructRet)
17421 return false;
17423 // The callee has to preserve all registers the caller needs to preserve.
17424 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
17425 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
17426 if (CalleeCC != CallerCC) {
17427 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
17428 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
17429 return false;
17432 // Byval parameters hand the function a pointer directly into the stack area
17433 // we want to reuse during a tail call. Working around this *is* possible
17434 // but less efficient and uglier in LowerCall.
17435 for (auto &Arg : Outs)
17436 if (Arg.Flags.isByVal())
17437 return false;
17439 return true;
17442 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
17443 return DAG.getDataLayout().getPrefTypeAlign(
17444 VT.getTypeForEVT(*DAG.getContext()));
17447 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
17448 // and output parameter nodes.
17449 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
17450 SmallVectorImpl<SDValue> &InVals) const {
17451 SelectionDAG &DAG = CLI.DAG;
17452 SDLoc &DL = CLI.DL;
17453 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
17454 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
17455 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
17456 SDValue Chain = CLI.Chain;
17457 SDValue Callee = CLI.Callee;
17458 bool &IsTailCall = CLI.IsTailCall;
17459 CallingConv::ID CallConv = CLI.CallConv;
17460 bool IsVarArg = CLI.IsVarArg;
17461 EVT PtrVT = getPointerTy(DAG.getDataLayout());
17462 MVT XLenVT = Subtarget.getXLenVT();
17464 MachineFunction &MF = DAG.getMachineFunction();
17466 // Analyze the operands of the call, assigning locations to each operand.
17467 SmallVector<CCValAssign, 16> ArgLocs;
17468 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
17470 if (CallConv == CallingConv::GHC)
17471 ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
17472 else
17473 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
17474 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
17475 : RISCV::CC_RISCV);
17477 // Check if it's really possible to do a tail call.
17478 if (IsTailCall)
17479 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
17481 if (IsTailCall)
17482 ++NumTailCalls;
17483 else if (CLI.CB && CLI.CB->isMustTailCall())
17484 report_fatal_error("failed to perform tail call elimination on a call "
17485 "site marked musttail");
17487 // Get a count of how many bytes are to be pushed on the stack.
17488 unsigned NumBytes = ArgCCInfo.getStackSize();
17490 // Create local copies for byval args
17491 SmallVector<SDValue, 8> ByValArgs;
17492 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
17493 ISD::ArgFlagsTy Flags = Outs[i].Flags;
17494 if (!Flags.isByVal())
17495 continue;
17497 SDValue Arg = OutVals[i];
17498 unsigned Size = Flags.getByValSize();
17499 Align Alignment = Flags.getNonZeroByValAlign();
17501 int FI =
17502 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
17503 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
17504 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
17506 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
17507 /*IsVolatile=*/false,
17508 /*AlwaysInline=*/false, IsTailCall,
17509 MachinePointerInfo(), MachinePointerInfo());
17510 ByValArgs.push_back(FIPtr);
17513 if (!IsTailCall)
17514 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
17516 // Copy argument values to their designated locations.
17517 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
17518 SmallVector<SDValue, 8> MemOpChains;
17519 SDValue StackPtr;
17520 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
17521 ++i, ++OutIdx) {
17522 CCValAssign &VA = ArgLocs[i];
17523 SDValue ArgValue = OutVals[OutIdx];
17524 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
17526 // Handle passing f64 on RV32D with a soft float ABI as a special case.
17527 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17528 assert(VA.isRegLoc() && "Expected register VA assignment");
17529 assert(VA.needsCustom());
17530 SDValue SplitF64 = DAG.getNode(
17531 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
17532 SDValue Lo = SplitF64.getValue(0);
17533 SDValue Hi = SplitF64.getValue(1);
17535 Register RegLo = VA.getLocReg();
17536 RegsToPass.push_back(std::make_pair(RegLo, Lo));
17538 // Get the CCValAssign for the Hi part.
17539 CCValAssign &HiVA = ArgLocs[++i];
17541 if (HiVA.isMemLoc()) {
17542 // Second half of f64 is passed on the stack.
17543 if (!StackPtr.getNode())
17544 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
17545 SDValue Address =
17546 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
17547 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
17548 // Emit the store.
17549 MemOpChains.push_back(
17550 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
17551 } else {
17552 // Second half of f64 is passed in another GPR.
17553 Register RegHigh = HiVA.getLocReg();
17554 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
17556 continue;
17559 // Promote the value if needed.
17560 // For now, only handle fully promoted and indirect arguments.
17561 if (VA.getLocInfo() == CCValAssign::Indirect) {
17562 // Store the argument in a stack slot and pass its address.
17563 Align StackAlign =
17564 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
17565 getPrefTypeAlign(ArgValue.getValueType(), DAG));
17566 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
17567 // If the original argument was split (e.g. i128), we need
17568 // to store the required parts of it here (and pass just one address).
17569 // Vectors may be partly split to registers and partly to the stack, in
17570 // which case the base address is partly offset and subsequent stores are
17571 // relative to that.
17572 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
17573 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
17574 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
17575 // Calculate the total size to store. We don't have access to what we're
17576 // actually storing other than performing the loop and collecting the
17577 // info.
17578 SmallVector<std::pair<SDValue, SDValue>> Parts;
17579 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
17580 SDValue PartValue = OutVals[OutIdx + 1];
17581 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
17582 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
17583 EVT PartVT = PartValue.getValueType();
17584 if (PartVT.isScalableVector())
17585 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
17586 StoredSize += PartVT.getStoreSize();
17587 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
17588 Parts.push_back(std::make_pair(PartValue, Offset));
17589 ++i;
17590 ++OutIdx;
17592 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
17593 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
17594 MemOpChains.push_back(
17595 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
17596 MachinePointerInfo::getFixedStack(MF, FI)));
17597 for (const auto &Part : Parts) {
17598 SDValue PartValue = Part.first;
17599 SDValue PartOffset = Part.second;
17600 SDValue Address =
17601 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
17602 MemOpChains.push_back(
17603 DAG.getStore(Chain, DL, PartValue, Address,
17604 MachinePointerInfo::getFixedStack(MF, FI)));
17606 ArgValue = SpillSlot;
17607 } else {
17608 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
17611 // Use local copy if it is a byval arg.
17612 if (Flags.isByVal())
17613 ArgValue = ByValArgs[j++];
17615 if (VA.isRegLoc()) {
17616 // Queue up the argument copies and emit them at the end.
17617 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
17618 } else {
17619 assert(VA.isMemLoc() && "Argument not register or memory");
17620 assert(!IsTailCall && "Tail call not allowed if stack is used "
17621 "for passing parameters");
17623 // Work out the address of the stack slot.
17624 if (!StackPtr.getNode())
17625 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
17626 SDValue Address =
17627 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
17628 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
17630 // Emit the store.
17631 MemOpChains.push_back(
17632 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
17636 // Join the stores, which are independent of one another.
17637 if (!MemOpChains.empty())
17638 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
17640 SDValue Glue;
17642 // Build a sequence of copy-to-reg nodes, chained and glued together.
17643 for (auto &Reg : RegsToPass) {
17644 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
17645 Glue = Chain.getValue(1);
17648 // Validate that none of the argument registers have been marked as
17649 // reserved, if so report an error. Do the same for the return address if this
17650 // is not a tailcall.
17651 validateCCReservedRegs(RegsToPass, MF);
17652 if (!IsTailCall &&
17653 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
17654 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
17655 MF.getFunction(),
17656 "Return address register required, but has been reserved."});
17658 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
17659 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
17660 // split it and then direct call can be matched by PseudoCALL.
17661 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
17662 const GlobalValue *GV = S->getGlobal();
17664 unsigned OpFlags = RISCVII::MO_CALL;
17665 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
17666 OpFlags = RISCVII::MO_PLT;
17668 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
17669 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
17670 unsigned OpFlags = RISCVII::MO_CALL;
17672 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
17673 nullptr))
17674 OpFlags = RISCVII::MO_PLT;
17676 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
17679 // The first call operand is the chain and the second is the target address.
17680 SmallVector<SDValue, 8> Ops;
17681 Ops.push_back(Chain);
17682 Ops.push_back(Callee);
17684 // Add argument registers to the end of the list so that they are
17685 // known live into the call.
17686 for (auto &Reg : RegsToPass)
17687 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
17689 if (!IsTailCall) {
17690 // Add a register mask operand representing the call-preserved registers.
17691 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
17692 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
17693 assert(Mask && "Missing call preserved mask for calling convention");
17694 Ops.push_back(DAG.getRegisterMask(Mask));
17697 // Glue the call to the argument copies, if any.
17698 if (Glue.getNode())
17699 Ops.push_back(Glue);
17701 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
17702 "Unexpected CFI type for a direct call");
17704 // Emit the call.
17705 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
17707 if (IsTailCall) {
17708 MF.getFrameInfo().setHasTailCall();
17709 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
17710 if (CLI.CFIType)
17711 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
17712 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
17713 return Ret;
17716 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
17717 if (CLI.CFIType)
17718 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
17719 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
17720 Glue = Chain.getValue(1);
17722 // Mark the end of the call, which is glued to the call itself.
17723 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
17724 Glue = Chain.getValue(1);
17726 // Assign locations to each value returned by this call.
17727 SmallVector<CCValAssign, 16> RVLocs;
17728 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
17729 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
17731 // Copy all of the result registers out of their specified physreg.
17732 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
17733 auto &VA = RVLocs[i];
17734 // Copy the value out
17735 SDValue RetValue =
17736 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
17737 // Glue the RetValue to the end of the call sequence
17738 Chain = RetValue.getValue(1);
17739 Glue = RetValue.getValue(2);
17741 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17742 assert(VA.needsCustom());
17743 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
17744 MVT::i32, Glue);
17745 Chain = RetValue2.getValue(1);
17746 Glue = RetValue2.getValue(2);
17747 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
17748 RetValue2);
17751 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
17753 InVals.push_back(RetValue);
17756 return Chain;
17759 bool RISCVTargetLowering::CanLowerReturn(
17760 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
17761 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
17762 SmallVector<CCValAssign, 16> RVLocs;
17763 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
17765 std::optional<unsigned> FirstMaskArgument;
17766 if (Subtarget.hasVInstructions())
17767 FirstMaskArgument = preAssignMask(Outs);
17769 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
17770 MVT VT = Outs[i].VT;
17771 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
17772 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
17773 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
17774 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
17775 *this, FirstMaskArgument))
17776 return false;
17778 return true;
17781 SDValue
17782 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
17783 bool IsVarArg,
17784 const SmallVectorImpl<ISD::OutputArg> &Outs,
17785 const SmallVectorImpl<SDValue> &OutVals,
17786 const SDLoc &DL, SelectionDAG &DAG) const {
17787 MachineFunction &MF = DAG.getMachineFunction();
17788 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
17790 // Stores the assignment of the return value to a location.
17791 SmallVector<CCValAssign, 16> RVLocs;
17793 // Info about the registers and stack slot.
17794 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
17795 *DAG.getContext());
17797 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
17798 nullptr, RISCV::CC_RISCV);
17800 if (CallConv == CallingConv::GHC && !RVLocs.empty())
17801 report_fatal_error("GHC functions return void only");
17803 SDValue Glue;
17804 SmallVector<SDValue, 4> RetOps(1, Chain);
17806 // Copy the result values into the output registers.
17807 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
17808 SDValue Val = OutVals[OutIdx];
17809 CCValAssign &VA = RVLocs[i];
17810 assert(VA.isRegLoc() && "Can only return in registers!");
17812 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17813 // Handle returning f64 on RV32D with a soft float ABI.
17814 assert(VA.isRegLoc() && "Expected return via registers");
17815 assert(VA.needsCustom());
17816 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
17817 DAG.getVTList(MVT::i32, MVT::i32), Val);
17818 SDValue Lo = SplitF64.getValue(0);
17819 SDValue Hi = SplitF64.getValue(1);
17820 Register RegLo = VA.getLocReg();
17821 Register RegHi = RVLocs[++i].getLocReg();
17823 if (STI.isRegisterReservedByUser(RegLo) ||
17824 STI.isRegisterReservedByUser(RegHi))
17825 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
17826 MF.getFunction(),
17827 "Return value register required, but has been reserved."});
17829 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
17830 Glue = Chain.getValue(1);
17831 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
17832 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
17833 Glue = Chain.getValue(1);
17834 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
17835 } else {
17836 // Handle a 'normal' return.
17837 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
17838 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
17840 if (STI.isRegisterReservedByUser(VA.getLocReg()))
17841 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
17842 MF.getFunction(),
17843 "Return value register required, but has been reserved."});
17845 // Guarantee that all emitted copies are stuck together.
17846 Glue = Chain.getValue(1);
17847 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
17851 RetOps[0] = Chain; // Update chain.
17853 // Add the glue node if we have it.
17854 if (Glue.getNode()) {
17855 RetOps.push_back(Glue);
17858 if (any_of(RVLocs,
17859 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
17860 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
17862 unsigned RetOpc = RISCVISD::RET_GLUE;
17863 // Interrupt service routines use different return instructions.
17864 const Function &Func = DAG.getMachineFunction().getFunction();
17865 if (Func.hasFnAttribute("interrupt")) {
17866 if (!Func.getReturnType()->isVoidTy())
17867 report_fatal_error(
17868 "Functions with the interrupt attribute must have void return type!");
17870 MachineFunction &MF = DAG.getMachineFunction();
17871 StringRef Kind =
17872 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
17874 if (Kind == "supervisor")
17875 RetOpc = RISCVISD::SRET_GLUE;
17876 else
17877 RetOpc = RISCVISD::MRET_GLUE;
17880 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
17883 void RISCVTargetLowering::validateCCReservedRegs(
17884 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
17885 MachineFunction &MF) const {
17886 const Function &F = MF.getFunction();
17887 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
17889 if (llvm::any_of(Regs, [&STI](auto Reg) {
17890 return STI.isRegisterReservedByUser(Reg.first);
17892 F.getContext().diagnose(DiagnosticInfoUnsupported{
17893 F, "Argument register required, but has been reserved."});
17896 // Check if the result of the node is only used as a return value, as
17897 // otherwise we can't perform a tail-call.
17898 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
17899 if (N->getNumValues() != 1)
17900 return false;
17901 if (!N->hasNUsesOfValue(1, 0))
17902 return false;
17904 SDNode *Copy = *N->use_begin();
17906 if (Copy->getOpcode() == ISD::BITCAST) {
17907 return isUsedByReturnOnly(Copy, Chain);
17910 // TODO: Handle additional opcodes in order to support tail-calling libcalls
17911 // with soft float ABIs.
17912 if (Copy->getOpcode() != ISD::CopyToReg) {
17913 return false;
17916 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
17917 // isn't safe to perform a tail call.
17918 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
17919 return false;
17921 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
17922 bool HasRet = false;
17923 for (SDNode *Node : Copy->uses()) {
17924 if (Node->getOpcode() != RISCVISD::RET_GLUE)
17925 return false;
17926 HasRet = true;
17928 if (!HasRet)
17929 return false;
17931 Chain = Copy->getOperand(0);
17932 return true;
17935 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17936 return CI->isTailCall();
17939 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
17940 #define NODE_NAME_CASE(NODE) \
17941 case RISCVISD::NODE: \
17942 return "RISCVISD::" #NODE;
17943 // clang-format off
17944 switch ((RISCVISD::NodeType)Opcode) {
17945 case RISCVISD::FIRST_NUMBER:
17946 break;
17947 NODE_NAME_CASE(RET_GLUE)
17948 NODE_NAME_CASE(SRET_GLUE)
17949 NODE_NAME_CASE(MRET_GLUE)
17950 NODE_NAME_CASE(CALL)
17951 NODE_NAME_CASE(SELECT_CC)
17952 NODE_NAME_CASE(BR_CC)
17953 NODE_NAME_CASE(BuildPairF64)
17954 NODE_NAME_CASE(SplitF64)
17955 NODE_NAME_CASE(TAIL)
17956 NODE_NAME_CASE(ADD_LO)
17957 NODE_NAME_CASE(HI)
17958 NODE_NAME_CASE(LLA)
17959 NODE_NAME_CASE(ADD_TPREL)
17960 NODE_NAME_CASE(MULHSU)
17961 NODE_NAME_CASE(SLLW)
17962 NODE_NAME_CASE(SRAW)
17963 NODE_NAME_CASE(SRLW)
17964 NODE_NAME_CASE(DIVW)
17965 NODE_NAME_CASE(DIVUW)
17966 NODE_NAME_CASE(REMUW)
17967 NODE_NAME_CASE(ROLW)
17968 NODE_NAME_CASE(RORW)
17969 NODE_NAME_CASE(CLZW)
17970 NODE_NAME_CASE(CTZW)
17971 NODE_NAME_CASE(ABSW)
17972 NODE_NAME_CASE(FMV_H_X)
17973 NODE_NAME_CASE(FMV_X_ANYEXTH)
17974 NODE_NAME_CASE(FMV_X_SIGNEXTH)
17975 NODE_NAME_CASE(FMV_W_X_RV64)
17976 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
17977 NODE_NAME_CASE(FCVT_X)
17978 NODE_NAME_CASE(FCVT_XU)
17979 NODE_NAME_CASE(FCVT_W_RV64)
17980 NODE_NAME_CASE(FCVT_WU_RV64)
17981 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
17982 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
17983 NODE_NAME_CASE(FP_ROUND_BF16)
17984 NODE_NAME_CASE(FP_EXTEND_BF16)
17985 NODE_NAME_CASE(FROUND)
17986 NODE_NAME_CASE(FPCLASS)
17987 NODE_NAME_CASE(FMAX)
17988 NODE_NAME_CASE(FMIN)
17989 NODE_NAME_CASE(READ_CYCLE_WIDE)
17990 NODE_NAME_CASE(BREV8)
17991 NODE_NAME_CASE(ORC_B)
17992 NODE_NAME_CASE(ZIP)
17993 NODE_NAME_CASE(UNZIP)
17994 NODE_NAME_CASE(CLMUL)
17995 NODE_NAME_CASE(CLMULH)
17996 NODE_NAME_CASE(CLMULR)
17997 NODE_NAME_CASE(SHA256SIG0)
17998 NODE_NAME_CASE(SHA256SIG1)
17999 NODE_NAME_CASE(SHA256SUM0)
18000 NODE_NAME_CASE(SHA256SUM1)
18001 NODE_NAME_CASE(SM4KS)
18002 NODE_NAME_CASE(SM4ED)
18003 NODE_NAME_CASE(SM3P0)
18004 NODE_NAME_CASE(SM3P1)
18005 NODE_NAME_CASE(TH_LWD)
18006 NODE_NAME_CASE(TH_LWUD)
18007 NODE_NAME_CASE(TH_LDD)
18008 NODE_NAME_CASE(TH_SWD)
18009 NODE_NAME_CASE(TH_SDD)
18010 NODE_NAME_CASE(VMV_V_V_VL)
18011 NODE_NAME_CASE(VMV_V_X_VL)
18012 NODE_NAME_CASE(VFMV_V_F_VL)
18013 NODE_NAME_CASE(VMV_X_S)
18014 NODE_NAME_CASE(VMV_S_X_VL)
18015 NODE_NAME_CASE(VFMV_S_F_VL)
18016 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
18017 NODE_NAME_CASE(READ_VLENB)
18018 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
18019 NODE_NAME_CASE(VSLIDEUP_VL)
18020 NODE_NAME_CASE(VSLIDE1UP_VL)
18021 NODE_NAME_CASE(VSLIDEDOWN_VL)
18022 NODE_NAME_CASE(VSLIDE1DOWN_VL)
18023 NODE_NAME_CASE(VFSLIDE1UP_VL)
18024 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
18025 NODE_NAME_CASE(VID_VL)
18026 NODE_NAME_CASE(VFNCVT_ROD_VL)
18027 NODE_NAME_CASE(VECREDUCE_ADD_VL)
18028 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
18029 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
18030 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
18031 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
18032 NODE_NAME_CASE(VECREDUCE_AND_VL)
18033 NODE_NAME_CASE(VECREDUCE_OR_VL)
18034 NODE_NAME_CASE(VECREDUCE_XOR_VL)
18035 NODE_NAME_CASE(VECREDUCE_FADD_VL)
18036 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
18037 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
18038 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
18039 NODE_NAME_CASE(ADD_VL)
18040 NODE_NAME_CASE(AND_VL)
18041 NODE_NAME_CASE(MUL_VL)
18042 NODE_NAME_CASE(OR_VL)
18043 NODE_NAME_CASE(SDIV_VL)
18044 NODE_NAME_CASE(SHL_VL)
18045 NODE_NAME_CASE(SREM_VL)
18046 NODE_NAME_CASE(SRA_VL)
18047 NODE_NAME_CASE(SRL_VL)
18048 NODE_NAME_CASE(ROTL_VL)
18049 NODE_NAME_CASE(ROTR_VL)
18050 NODE_NAME_CASE(SUB_VL)
18051 NODE_NAME_CASE(UDIV_VL)
18052 NODE_NAME_CASE(UREM_VL)
18053 NODE_NAME_CASE(XOR_VL)
18054 NODE_NAME_CASE(SADDSAT_VL)
18055 NODE_NAME_CASE(UADDSAT_VL)
18056 NODE_NAME_CASE(SSUBSAT_VL)
18057 NODE_NAME_CASE(USUBSAT_VL)
18058 NODE_NAME_CASE(FADD_VL)
18059 NODE_NAME_CASE(FSUB_VL)
18060 NODE_NAME_CASE(FMUL_VL)
18061 NODE_NAME_CASE(FDIV_VL)
18062 NODE_NAME_CASE(FNEG_VL)
18063 NODE_NAME_CASE(FABS_VL)
18064 NODE_NAME_CASE(FSQRT_VL)
18065 NODE_NAME_CASE(FCLASS_VL)
18066 NODE_NAME_CASE(VFMADD_VL)
18067 NODE_NAME_CASE(VFNMADD_VL)
18068 NODE_NAME_CASE(VFMSUB_VL)
18069 NODE_NAME_CASE(VFNMSUB_VL)
18070 NODE_NAME_CASE(VFWMADD_VL)
18071 NODE_NAME_CASE(VFWNMADD_VL)
18072 NODE_NAME_CASE(VFWMSUB_VL)
18073 NODE_NAME_CASE(VFWNMSUB_VL)
18074 NODE_NAME_CASE(FCOPYSIGN_VL)
18075 NODE_NAME_CASE(SMIN_VL)
18076 NODE_NAME_CASE(SMAX_VL)
18077 NODE_NAME_CASE(UMIN_VL)
18078 NODE_NAME_CASE(UMAX_VL)
18079 NODE_NAME_CASE(BITREVERSE_VL)
18080 NODE_NAME_CASE(BSWAP_VL)
18081 NODE_NAME_CASE(CTLZ_VL)
18082 NODE_NAME_CASE(CTTZ_VL)
18083 NODE_NAME_CASE(CTPOP_VL)
18084 NODE_NAME_CASE(VFMIN_VL)
18085 NODE_NAME_CASE(VFMAX_VL)
18086 NODE_NAME_CASE(MULHS_VL)
18087 NODE_NAME_CASE(MULHU_VL)
18088 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
18089 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
18090 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
18091 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
18092 NODE_NAME_CASE(VFCVT_X_F_VL)
18093 NODE_NAME_CASE(VFCVT_XU_F_VL)
18094 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
18095 NODE_NAME_CASE(SINT_TO_FP_VL)
18096 NODE_NAME_CASE(UINT_TO_FP_VL)
18097 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
18098 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
18099 NODE_NAME_CASE(FP_EXTEND_VL)
18100 NODE_NAME_CASE(FP_ROUND_VL)
18101 NODE_NAME_CASE(STRICT_FADD_VL)
18102 NODE_NAME_CASE(STRICT_FSUB_VL)
18103 NODE_NAME_CASE(STRICT_FMUL_VL)
18104 NODE_NAME_CASE(STRICT_FDIV_VL)
18105 NODE_NAME_CASE(STRICT_FSQRT_VL)
18106 NODE_NAME_CASE(STRICT_VFMADD_VL)
18107 NODE_NAME_CASE(STRICT_VFNMADD_VL)
18108 NODE_NAME_CASE(STRICT_VFMSUB_VL)
18109 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
18110 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
18111 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
18112 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
18113 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
18114 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
18115 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
18116 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
18117 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
18118 NODE_NAME_CASE(STRICT_FSETCC_VL)
18119 NODE_NAME_CASE(STRICT_FSETCCS_VL)
18120 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
18121 NODE_NAME_CASE(VWMUL_VL)
18122 NODE_NAME_CASE(VWMULU_VL)
18123 NODE_NAME_CASE(VWMULSU_VL)
18124 NODE_NAME_CASE(VWADD_VL)
18125 NODE_NAME_CASE(VWADDU_VL)
18126 NODE_NAME_CASE(VWSUB_VL)
18127 NODE_NAME_CASE(VWSUBU_VL)
18128 NODE_NAME_CASE(VWADD_W_VL)
18129 NODE_NAME_CASE(VWADDU_W_VL)
18130 NODE_NAME_CASE(VWSUB_W_VL)
18131 NODE_NAME_CASE(VWSUBU_W_VL)
18132 NODE_NAME_CASE(VWSLL_VL)
18133 NODE_NAME_CASE(VFWMUL_VL)
18134 NODE_NAME_CASE(VFWADD_VL)
18135 NODE_NAME_CASE(VFWSUB_VL)
18136 NODE_NAME_CASE(VFWADD_W_VL)
18137 NODE_NAME_CASE(VFWSUB_W_VL)
18138 NODE_NAME_CASE(VWMACC_VL)
18139 NODE_NAME_CASE(VWMACCU_VL)
18140 NODE_NAME_CASE(VWMACCSU_VL)
18141 NODE_NAME_CASE(VNSRL_VL)
18142 NODE_NAME_CASE(SETCC_VL)
18143 NODE_NAME_CASE(VSELECT_VL)
18144 NODE_NAME_CASE(VP_MERGE_VL)
18145 NODE_NAME_CASE(VMAND_VL)
18146 NODE_NAME_CASE(VMOR_VL)
18147 NODE_NAME_CASE(VMXOR_VL)
18148 NODE_NAME_CASE(VMCLR_VL)
18149 NODE_NAME_CASE(VMSET_VL)
18150 NODE_NAME_CASE(VRGATHER_VX_VL)
18151 NODE_NAME_CASE(VRGATHER_VV_VL)
18152 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
18153 NODE_NAME_CASE(VSEXT_VL)
18154 NODE_NAME_CASE(VZEXT_VL)
18155 NODE_NAME_CASE(VCPOP_VL)
18156 NODE_NAME_CASE(VFIRST_VL)
18157 NODE_NAME_CASE(READ_CSR)
18158 NODE_NAME_CASE(WRITE_CSR)
18159 NODE_NAME_CASE(SWAP_CSR)
18160 NODE_NAME_CASE(CZERO_EQZ)
18161 NODE_NAME_CASE(CZERO_NEZ)
18163 // clang-format on
18164 return nullptr;
18165 #undef NODE_NAME_CASE
18168 /// getConstraintType - Given a constraint letter, return the type of
18169 /// constraint it is for this target.
18170 RISCVTargetLowering::ConstraintType
18171 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
18172 if (Constraint.size() == 1) {
18173 switch (Constraint[0]) {
18174 default:
18175 break;
18176 case 'f':
18177 return C_RegisterClass;
18178 case 'I':
18179 case 'J':
18180 case 'K':
18181 return C_Immediate;
18182 case 'A':
18183 return C_Memory;
18184 case 'S': // A symbolic address
18185 return C_Other;
18187 } else {
18188 if (Constraint == "vr" || Constraint == "vm")
18189 return C_RegisterClass;
18191 return TargetLowering::getConstraintType(Constraint);
18194 std::pair<unsigned, const TargetRegisterClass *>
18195 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
18196 StringRef Constraint,
18197 MVT VT) const {
18198 // First, see if this is a constraint that directly corresponds to a RISC-V
18199 // register class.
18200 if (Constraint.size() == 1) {
18201 switch (Constraint[0]) {
18202 case 'r':
18203 // TODO: Support fixed vectors up to XLen for P extension?
18204 if (VT.isVector())
18205 break;
18206 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
18207 case 'f':
18208 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
18209 return std::make_pair(0U, &RISCV::FPR16RegClass);
18210 if (Subtarget.hasStdExtF() && VT == MVT::f32)
18211 return std::make_pair(0U, &RISCV::FPR32RegClass);
18212 if (Subtarget.hasStdExtD() && VT == MVT::f64)
18213 return std::make_pair(0U, &RISCV::FPR64RegClass);
18214 break;
18215 default:
18216 break;
18218 } else if (Constraint == "vr") {
18219 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
18220 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
18221 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
18222 return std::make_pair(0U, RC);
18224 } else if (Constraint == "vm") {
18225 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
18226 return std::make_pair(0U, &RISCV::VMV0RegClass);
18229 // Clang will correctly decode the usage of register name aliases into their
18230 // official names. However, other frontends like `rustc` do not. This allows
18231 // users of these frontends to use the ABI names for registers in LLVM-style
18232 // register constraints.
18233 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
18234 .Case("{zero}", RISCV::X0)
18235 .Case("{ra}", RISCV::X1)
18236 .Case("{sp}", RISCV::X2)
18237 .Case("{gp}", RISCV::X3)
18238 .Case("{tp}", RISCV::X4)
18239 .Case("{t0}", RISCV::X5)
18240 .Case("{t1}", RISCV::X6)
18241 .Case("{t2}", RISCV::X7)
18242 .Cases("{s0}", "{fp}", RISCV::X8)
18243 .Case("{s1}", RISCV::X9)
18244 .Case("{a0}", RISCV::X10)
18245 .Case("{a1}", RISCV::X11)
18246 .Case("{a2}", RISCV::X12)
18247 .Case("{a3}", RISCV::X13)
18248 .Case("{a4}", RISCV::X14)
18249 .Case("{a5}", RISCV::X15)
18250 .Case("{a6}", RISCV::X16)
18251 .Case("{a7}", RISCV::X17)
18252 .Case("{s2}", RISCV::X18)
18253 .Case("{s3}", RISCV::X19)
18254 .Case("{s4}", RISCV::X20)
18255 .Case("{s5}", RISCV::X21)
18256 .Case("{s6}", RISCV::X22)
18257 .Case("{s7}", RISCV::X23)
18258 .Case("{s8}", RISCV::X24)
18259 .Case("{s9}", RISCV::X25)
18260 .Case("{s10}", RISCV::X26)
18261 .Case("{s11}", RISCV::X27)
18262 .Case("{t3}", RISCV::X28)
18263 .Case("{t4}", RISCV::X29)
18264 .Case("{t5}", RISCV::X30)
18265 .Case("{t6}", RISCV::X31)
18266 .Default(RISCV::NoRegister);
18267 if (XRegFromAlias != RISCV::NoRegister)
18268 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
18270 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
18271 // TableGen record rather than the AsmName to choose registers for InlineAsm
18272 // constraints, plus we want to match those names to the widest floating point
18273 // register type available, manually select floating point registers here.
18275 // The second case is the ABI name of the register, so that frontends can also
18276 // use the ABI names in register constraint lists.
18277 if (Subtarget.hasStdExtF()) {
18278 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
18279 .Cases("{f0}", "{ft0}", RISCV::F0_F)
18280 .Cases("{f1}", "{ft1}", RISCV::F1_F)
18281 .Cases("{f2}", "{ft2}", RISCV::F2_F)
18282 .Cases("{f3}", "{ft3}", RISCV::F3_F)
18283 .Cases("{f4}", "{ft4}", RISCV::F4_F)
18284 .Cases("{f5}", "{ft5}", RISCV::F5_F)
18285 .Cases("{f6}", "{ft6}", RISCV::F6_F)
18286 .Cases("{f7}", "{ft7}", RISCV::F7_F)
18287 .Cases("{f8}", "{fs0}", RISCV::F8_F)
18288 .Cases("{f9}", "{fs1}", RISCV::F9_F)
18289 .Cases("{f10}", "{fa0}", RISCV::F10_F)
18290 .Cases("{f11}", "{fa1}", RISCV::F11_F)
18291 .Cases("{f12}", "{fa2}", RISCV::F12_F)
18292 .Cases("{f13}", "{fa3}", RISCV::F13_F)
18293 .Cases("{f14}", "{fa4}", RISCV::F14_F)
18294 .Cases("{f15}", "{fa5}", RISCV::F15_F)
18295 .Cases("{f16}", "{fa6}", RISCV::F16_F)
18296 .Cases("{f17}", "{fa7}", RISCV::F17_F)
18297 .Cases("{f18}", "{fs2}", RISCV::F18_F)
18298 .Cases("{f19}", "{fs3}", RISCV::F19_F)
18299 .Cases("{f20}", "{fs4}", RISCV::F20_F)
18300 .Cases("{f21}", "{fs5}", RISCV::F21_F)
18301 .Cases("{f22}", "{fs6}", RISCV::F22_F)
18302 .Cases("{f23}", "{fs7}", RISCV::F23_F)
18303 .Cases("{f24}", "{fs8}", RISCV::F24_F)
18304 .Cases("{f25}", "{fs9}", RISCV::F25_F)
18305 .Cases("{f26}", "{fs10}", RISCV::F26_F)
18306 .Cases("{f27}", "{fs11}", RISCV::F27_F)
18307 .Cases("{f28}", "{ft8}", RISCV::F28_F)
18308 .Cases("{f29}", "{ft9}", RISCV::F29_F)
18309 .Cases("{f30}", "{ft10}", RISCV::F30_F)
18310 .Cases("{f31}", "{ft11}", RISCV::F31_F)
18311 .Default(RISCV::NoRegister);
18312 if (FReg != RISCV::NoRegister) {
18313 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
18314 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
18315 unsigned RegNo = FReg - RISCV::F0_F;
18316 unsigned DReg = RISCV::F0_D + RegNo;
18317 return std::make_pair(DReg, &RISCV::FPR64RegClass);
18319 if (VT == MVT::f32 || VT == MVT::Other)
18320 return std::make_pair(FReg, &RISCV::FPR32RegClass);
18321 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) {
18322 unsigned RegNo = FReg - RISCV::F0_F;
18323 unsigned HReg = RISCV::F0_H + RegNo;
18324 return std::make_pair(HReg, &RISCV::FPR16RegClass);
18329 if (Subtarget.hasVInstructions()) {
18330 Register VReg = StringSwitch<Register>(Constraint.lower())
18331 .Case("{v0}", RISCV::V0)
18332 .Case("{v1}", RISCV::V1)
18333 .Case("{v2}", RISCV::V2)
18334 .Case("{v3}", RISCV::V3)
18335 .Case("{v4}", RISCV::V4)
18336 .Case("{v5}", RISCV::V5)
18337 .Case("{v6}", RISCV::V6)
18338 .Case("{v7}", RISCV::V7)
18339 .Case("{v8}", RISCV::V8)
18340 .Case("{v9}", RISCV::V9)
18341 .Case("{v10}", RISCV::V10)
18342 .Case("{v11}", RISCV::V11)
18343 .Case("{v12}", RISCV::V12)
18344 .Case("{v13}", RISCV::V13)
18345 .Case("{v14}", RISCV::V14)
18346 .Case("{v15}", RISCV::V15)
18347 .Case("{v16}", RISCV::V16)
18348 .Case("{v17}", RISCV::V17)
18349 .Case("{v18}", RISCV::V18)
18350 .Case("{v19}", RISCV::V19)
18351 .Case("{v20}", RISCV::V20)
18352 .Case("{v21}", RISCV::V21)
18353 .Case("{v22}", RISCV::V22)
18354 .Case("{v23}", RISCV::V23)
18355 .Case("{v24}", RISCV::V24)
18356 .Case("{v25}", RISCV::V25)
18357 .Case("{v26}", RISCV::V26)
18358 .Case("{v27}", RISCV::V27)
18359 .Case("{v28}", RISCV::V28)
18360 .Case("{v29}", RISCV::V29)
18361 .Case("{v30}", RISCV::V30)
18362 .Case("{v31}", RISCV::V31)
18363 .Default(RISCV::NoRegister);
18364 if (VReg != RISCV::NoRegister) {
18365 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
18366 return std::make_pair(VReg, &RISCV::VMRegClass);
18367 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
18368 return std::make_pair(VReg, &RISCV::VRRegClass);
18369 for (const auto *RC :
18370 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
18371 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
18372 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
18373 return std::make_pair(VReg, RC);
18379 std::pair<Register, const TargetRegisterClass *> Res =
18380 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
18382 // If we picked one of the Zfinx register classes, remap it to the GPR class.
18383 // FIXME: When Zfinx is supported in CodeGen this will need to take the
18384 // Subtarget into account.
18385 if (Res.second == &RISCV::GPRF16RegClass ||
18386 Res.second == &RISCV::GPRF32RegClass ||
18387 Res.second == &RISCV::GPRPF64RegClass)
18388 return std::make_pair(Res.first, &RISCV::GPRRegClass);
18390 return Res;
18393 InlineAsm::ConstraintCode
18394 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
18395 // Currently only support length 1 constraints.
18396 if (ConstraintCode.size() == 1) {
18397 switch (ConstraintCode[0]) {
18398 case 'A':
18399 return InlineAsm::ConstraintCode::A;
18400 default:
18401 break;
18405 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
18408 void RISCVTargetLowering::LowerAsmOperandForConstraint(
18409 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
18410 SelectionDAG &DAG) const {
18411 // Currently only support length 1 constraints.
18412 if (Constraint.size() == 1) {
18413 switch (Constraint[0]) {
18414 case 'I':
18415 // Validate & create a 12-bit signed immediate operand.
18416 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
18417 uint64_t CVal = C->getSExtValue();
18418 if (isInt<12>(CVal))
18419 Ops.push_back(
18420 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
18422 return;
18423 case 'J':
18424 // Validate & create an integer zero operand.
18425 if (isNullConstant(Op))
18426 Ops.push_back(
18427 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
18428 return;
18429 case 'K':
18430 // Validate & create a 5-bit unsigned immediate operand.
18431 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
18432 uint64_t CVal = C->getZExtValue();
18433 if (isUInt<5>(CVal))
18434 Ops.push_back(
18435 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
18437 return;
18438 case 'S':
18439 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
18440 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
18441 GA->getValueType(0)));
18442 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
18443 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
18444 BA->getValueType(0)));
18446 return;
18447 default:
18448 break;
18451 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
18454 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
18455 Instruction *Inst,
18456 AtomicOrdering Ord) const {
18457 if (Subtarget.hasStdExtZtso()) {
18458 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
18459 return Builder.CreateFence(Ord);
18460 return nullptr;
18463 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
18464 return Builder.CreateFence(Ord);
18465 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
18466 return Builder.CreateFence(AtomicOrdering::Release);
18467 return nullptr;
18470 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
18471 Instruction *Inst,
18472 AtomicOrdering Ord) const {
18473 if (Subtarget.hasStdExtZtso()) {
18474 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
18475 return Builder.CreateFence(Ord);
18476 return nullptr;
18479 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
18480 return Builder.CreateFence(AtomicOrdering::Acquire);
18481 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
18482 Ord == AtomicOrdering::SequentiallyConsistent)
18483 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
18484 return nullptr;
18487 TargetLowering::AtomicExpansionKind
18488 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
18489 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
18490 // point operations can't be used in an lr/sc sequence without breaking the
18491 // forward-progress guarantee.
18492 if (AI->isFloatingPointOperation() ||
18493 AI->getOperation() == AtomicRMWInst::UIncWrap ||
18494 AI->getOperation() == AtomicRMWInst::UDecWrap)
18495 return AtomicExpansionKind::CmpXChg;
18497 // Don't expand forced atomics, we want to have __sync libcalls instead.
18498 if (Subtarget.hasForcedAtomics())
18499 return AtomicExpansionKind::None;
18501 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18502 if (Size == 8 || Size == 16)
18503 return AtomicExpansionKind::MaskedIntrinsic;
18504 return AtomicExpansionKind::None;
18507 static Intrinsic::ID
18508 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
18509 if (XLen == 32) {
18510 switch (BinOp) {
18511 default:
18512 llvm_unreachable("Unexpected AtomicRMW BinOp");
18513 case AtomicRMWInst::Xchg:
18514 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
18515 case AtomicRMWInst::Add:
18516 return Intrinsic::riscv_masked_atomicrmw_add_i32;
18517 case AtomicRMWInst::Sub:
18518 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
18519 case AtomicRMWInst::Nand:
18520 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
18521 case AtomicRMWInst::Max:
18522 return Intrinsic::riscv_masked_atomicrmw_max_i32;
18523 case AtomicRMWInst::Min:
18524 return Intrinsic::riscv_masked_atomicrmw_min_i32;
18525 case AtomicRMWInst::UMax:
18526 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
18527 case AtomicRMWInst::UMin:
18528 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
18532 if (XLen == 64) {
18533 switch (BinOp) {
18534 default:
18535 llvm_unreachable("Unexpected AtomicRMW BinOp");
18536 case AtomicRMWInst::Xchg:
18537 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
18538 case AtomicRMWInst::Add:
18539 return Intrinsic::riscv_masked_atomicrmw_add_i64;
18540 case AtomicRMWInst::Sub:
18541 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
18542 case AtomicRMWInst::Nand:
18543 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
18544 case AtomicRMWInst::Max:
18545 return Intrinsic::riscv_masked_atomicrmw_max_i64;
18546 case AtomicRMWInst::Min:
18547 return Intrinsic::riscv_masked_atomicrmw_min_i64;
18548 case AtomicRMWInst::UMax:
18549 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
18550 case AtomicRMWInst::UMin:
18551 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
18555 llvm_unreachable("Unexpected XLen\n");
18558 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
18559 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18560 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18561 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
18562 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
18563 // mask, as this produces better code than the LR/SC loop emitted by
18564 // int_riscv_masked_atomicrmw_xchg.
18565 if (AI->getOperation() == AtomicRMWInst::Xchg &&
18566 isa<ConstantInt>(AI->getValOperand())) {
18567 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
18568 if (CVal->isZero())
18569 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
18570 Builder.CreateNot(Mask, "Inv_Mask"),
18571 AI->getAlign(), Ord);
18572 if (CVal->isMinusOne())
18573 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
18574 AI->getAlign(), Ord);
18577 unsigned XLen = Subtarget.getXLen();
18578 Value *Ordering =
18579 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
18580 Type *Tys[] = {AlignedAddr->getType()};
18581 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
18582 AI->getModule(),
18583 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
18585 if (XLen == 64) {
18586 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
18587 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
18588 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
18591 Value *Result;
18593 // Must pass the shift amount needed to sign extend the loaded value prior
18594 // to performing a signed comparison for min/max. ShiftAmt is the number of
18595 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
18596 // is the number of bits to left+right shift the value in order to
18597 // sign-extend.
18598 if (AI->getOperation() == AtomicRMWInst::Min ||
18599 AI->getOperation() == AtomicRMWInst::Max) {
18600 const DataLayout &DL = AI->getModule()->getDataLayout();
18601 unsigned ValWidth =
18602 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
18603 Value *SextShamt =
18604 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
18605 Result = Builder.CreateCall(LrwOpScwLoop,
18606 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
18607 } else {
18608 Result =
18609 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
18612 if (XLen == 64)
18613 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
18614 return Result;
18617 TargetLowering::AtomicExpansionKind
18618 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
18619 AtomicCmpXchgInst *CI) const {
18620 // Don't expand forced atomics, we want to have __sync libcalls instead.
18621 if (Subtarget.hasForcedAtomics())
18622 return AtomicExpansionKind::None;
18624 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
18625 if (Size == 8 || Size == 16)
18626 return AtomicExpansionKind::MaskedIntrinsic;
18627 return AtomicExpansionKind::None;
18630 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18631 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18632 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18633 unsigned XLen = Subtarget.getXLen();
18634 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
18635 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
18636 if (XLen == 64) {
18637 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
18638 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
18639 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
18640 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
18642 Type *Tys[] = {AlignedAddr->getType()};
18643 Function *MaskedCmpXchg =
18644 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
18645 Value *Result = Builder.CreateCall(
18646 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
18647 if (XLen == 64)
18648 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
18649 return Result;
18652 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
18653 EVT DataVT) const {
18654 // We have indexed loads for all legal index types. Indices are always
18655 // zero extended
18656 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
18657 isTypeLegal(Extend.getValueType()) &&
18658 isTypeLegal(Extend.getOperand(0).getValueType());
18661 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
18662 EVT VT) const {
18663 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
18664 return false;
18666 switch (FPVT.getSimpleVT().SimpleTy) {
18667 case MVT::f16:
18668 return Subtarget.hasStdExtZfhOrZfhmin();
18669 case MVT::f32:
18670 return Subtarget.hasStdExtF();
18671 case MVT::f64:
18672 return Subtarget.hasStdExtD();
18673 default:
18674 return false;
18678 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
18679 // If we are using the small code model, we can reduce size of jump table
18680 // entry to 4 bytes.
18681 if (Subtarget.is64Bit() && !isPositionIndependent() &&
18682 getTargetMachine().getCodeModel() == CodeModel::Small) {
18683 return MachineJumpTableInfo::EK_Custom32;
18685 return TargetLowering::getJumpTableEncoding();
18688 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
18689 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
18690 unsigned uid, MCContext &Ctx) const {
18691 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
18692 getTargetMachine().getCodeModel() == CodeModel::Small);
18693 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
18696 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
18697 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
18698 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
18699 // a power of two as well.
18700 // FIXME: This doesn't work for zve32, but that's already broken
18701 // elsewhere for the same reason.
18702 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
18703 static_assert(RISCV::RVVBitsPerBlock == 64,
18704 "RVVBitsPerBlock changed, audit needed");
18705 return true;
18708 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
18709 SDValue &Offset,
18710 ISD::MemIndexedMode &AM,
18711 bool &IsInc,
18712 SelectionDAG &DAG) const {
18713 // Target does not support indexed loads.
18714 if (!Subtarget.hasVendorXTHeadMemIdx())
18715 return false;
18717 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
18718 return false;
18720 Base = Op->getOperand(0);
18721 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
18722 int64_t RHSC = RHS->getSExtValue();
18723 if (Op->getOpcode() == ISD::SUB)
18724 RHSC = -(uint64_t)RHSC;
18726 // The constants that can be encoded in the THeadMemIdx instructions
18727 // are of the form (sign_extend(imm5) << imm2).
18728 bool isLegalIndexedOffset = false;
18729 for (unsigned i = 0; i < 4; i++)
18730 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
18731 isLegalIndexedOffset = true;
18732 break;
18735 if (!isLegalIndexedOffset)
18736 return false;
18738 IsInc = (Op->getOpcode() == ISD::ADD);
18739 Offset = Op->getOperand(1);
18740 return true;
18743 return false;
18746 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
18747 SDValue &Offset,
18748 ISD::MemIndexedMode &AM,
18749 SelectionDAG &DAG) const {
18750 EVT VT;
18751 SDValue Ptr;
18752 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18753 VT = LD->getMemoryVT();
18754 Ptr = LD->getBasePtr();
18755 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18756 VT = ST->getMemoryVT();
18757 Ptr = ST->getBasePtr();
18758 } else
18759 return false;
18761 bool IsInc;
18762 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
18763 return false;
18765 AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
18766 return true;
18769 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
18770 SDValue &Base,
18771 SDValue &Offset,
18772 ISD::MemIndexedMode &AM,
18773 SelectionDAG &DAG) const {
18774 EVT VT;
18775 SDValue Ptr;
18776 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18777 VT = LD->getMemoryVT();
18778 Ptr = LD->getBasePtr();
18779 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18780 VT = ST->getMemoryVT();
18781 Ptr = ST->getBasePtr();
18782 } else
18783 return false;
18785 bool IsInc;
18786 if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
18787 return false;
18788 // Post-indexing updates the base, so it's not a valid transform
18789 // if that's not the same as the load's pointer.
18790 if (Ptr != Base)
18791 return false;
18793 AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
18794 return true;
18797 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
18798 EVT VT) const {
18799 EVT SVT = VT.getScalarType();
18801 if (!SVT.isSimple())
18802 return false;
18804 switch (SVT.getSimpleVT().SimpleTy) {
18805 case MVT::f16:
18806 return VT.isVector() ? Subtarget.hasVInstructionsF16()
18807 : Subtarget.hasStdExtZfhOrZhinx();
18808 case MVT::f32:
18809 return Subtarget.hasStdExtFOrZfinx();
18810 case MVT::f64:
18811 return Subtarget.hasStdExtDOrZdinx();
18812 default:
18813 break;
18816 return false;
18819 Register RISCVTargetLowering::getExceptionPointerRegister(
18820 const Constant *PersonalityFn) const {
18821 return RISCV::X10;
18824 Register RISCVTargetLowering::getExceptionSelectorRegister(
18825 const Constant *PersonalityFn) const {
18826 return RISCV::X11;
18829 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
18830 // Return false to suppress the unnecessary extensions if the LibCall
18831 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
18832 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
18833 Type.getSizeInBits() < Subtarget.getXLen()))
18834 return false;
18836 return true;
18839 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
18840 if (Subtarget.is64Bit() && Type == MVT::i32)
18841 return true;
18843 return IsSigned;
18846 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
18847 SDValue C) const {
18848 // Check integral scalar types.
18849 const bool HasExtMOrZmmul =
18850 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
18851 if (!VT.isScalarInteger())
18852 return false;
18854 // Omit the optimization if the sub target has the M extension and the data
18855 // size exceeds XLen.
18856 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
18857 return false;
18859 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18860 // Break the MUL to a SLLI and an ADD/SUB.
18861 const APInt &Imm = ConstNode->getAPIntValue();
18862 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
18863 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
18864 return true;
18866 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
18867 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
18868 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
18869 (Imm - 8).isPowerOf2()))
18870 return true;
18872 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
18873 // a pair of LUI/ADDI.
18874 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
18875 ConstNode->hasOneUse()) {
18876 APInt ImmS = Imm.ashr(Imm.countr_zero());
18877 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
18878 (1 - ImmS).isPowerOf2())
18879 return true;
18883 return false;
18886 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
18887 SDValue ConstNode) const {
18888 // Let the DAGCombiner decide for vectors.
18889 EVT VT = AddNode.getValueType();
18890 if (VT.isVector())
18891 return true;
18893 // Let the DAGCombiner decide for larger types.
18894 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
18895 return true;
18897 // It is worse if c1 is simm12 while c1*c2 is not.
18898 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
18899 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
18900 const APInt &C1 = C1Node->getAPIntValue();
18901 const APInt &C2 = C2Node->getAPIntValue();
18902 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
18903 return false;
18905 // Default to true and let the DAGCombiner decide.
18906 return true;
18909 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
18910 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
18911 unsigned *Fast) const {
18912 if (!VT.isVector()) {
18913 if (Fast)
18914 *Fast = Subtarget.enableUnalignedScalarMem();
18915 return Subtarget.enableUnalignedScalarMem();
18918 // All vector implementations must support element alignment
18919 EVT ElemVT = VT.getVectorElementType();
18920 if (Alignment >= ElemVT.getStoreSize()) {
18921 if (Fast)
18922 *Fast = 1;
18923 return true;
18926 // Note: We lower an unmasked unaligned vector access to an equally sized
18927 // e8 element type access. Given this, we effectively support all unmasked
18928 // misaligned accesses. TODO: Work through the codegen implications of
18929 // allowing such accesses to be formed, and considered fast.
18930 if (Fast)
18931 *Fast = Subtarget.enableUnalignedVectorMem();
18932 return Subtarget.enableUnalignedVectorMem();
18936 EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
18937 const AttributeList &FuncAttributes) const {
18938 if (!Subtarget.hasVInstructions())
18939 return MVT::Other;
18941 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
18942 return MVT::Other;
18944 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
18945 // has an expansion threshold, and we want the number of hardware memory
18946 // operations to correspond roughly to that threshold. LMUL>1 operations
18947 // are typically expanded linearly internally, and thus correspond to more
18948 // than one actual memory operation. Note that store merging and load
18949 // combining will typically form larger LMUL operations from the LMUL1
18950 // operations emitted here, and that's okay because combining isn't
18951 // introducing new memory operations; it's just merging existing ones.
18952 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
18953 if (Op.size() < MinVLenInBytes)
18954 // TODO: Figure out short memops. For the moment, do the default thing
18955 // which ends up using scalar sequences.
18956 return MVT::Other;
18958 // Prefer i8 for non-zero memset as it allows us to avoid materializing
18959 // a large scalar constant and instead use vmv.v.x/i to do the
18960 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
18961 // maximize the chance we can encode the size in the vsetvli.
18962 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
18963 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
18965 // Do we have sufficient alignment for our preferred VT? If not, revert
18966 // to largest size allowed by our alignment criteria.
18967 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
18968 Align RequiredAlign(PreferredVT.getStoreSize());
18969 if (Op.isFixedDstAlign())
18970 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
18971 if (Op.isMemcpy())
18972 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
18973 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
18975 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
18978 bool RISCVTargetLowering::splitValueIntoRegisterParts(
18979 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18980 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18981 bool IsABIRegCopy = CC.has_value();
18982 EVT ValueVT = Val.getValueType();
18983 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
18984 PartVT == MVT::f32) {
18985 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
18986 // nan, and cast to f32.
18987 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
18988 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
18989 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
18990 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
18991 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18992 Parts[0] = Val;
18993 return true;
18996 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
18997 LLVMContext &Context = *DAG.getContext();
18998 EVT ValueEltVT = ValueVT.getVectorElementType();
18999 EVT PartEltVT = PartVT.getVectorElementType();
19000 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
19001 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
19002 if (PartVTBitSize % ValueVTBitSize == 0) {
19003 assert(PartVTBitSize >= ValueVTBitSize);
19004 // If the element types are different, bitcast to the same element type of
19005 // PartVT first.
19006 // Give an example here, we want copy a <vscale x 1 x i8> value to
19007 // <vscale x 4 x i16>.
19008 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
19009 // subvector, then we can bitcast to <vscale x 4 x i16>.
19010 if (ValueEltVT != PartEltVT) {
19011 if (PartVTBitSize > ValueVTBitSize) {
19012 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
19013 assert(Count != 0 && "The number of element should not be zero.");
19014 EVT SameEltTypeVT =
19015 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
19016 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
19017 DAG.getUNDEF(SameEltTypeVT), Val,
19018 DAG.getVectorIdxConstant(0, DL));
19020 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
19021 } else {
19022 Val =
19023 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
19024 Val, DAG.getVectorIdxConstant(0, DL));
19026 Parts[0] = Val;
19027 return true;
19030 return false;
19033 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
19034 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
19035 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
19036 bool IsABIRegCopy = CC.has_value();
19037 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
19038 PartVT == MVT::f32) {
19039 SDValue Val = Parts[0];
19041 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
19042 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
19043 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
19044 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
19045 return Val;
19048 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
19049 LLVMContext &Context = *DAG.getContext();
19050 SDValue Val = Parts[0];
19051 EVT ValueEltVT = ValueVT.getVectorElementType();
19052 EVT PartEltVT = PartVT.getVectorElementType();
19053 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
19054 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
19055 if (PartVTBitSize % ValueVTBitSize == 0) {
19056 assert(PartVTBitSize >= ValueVTBitSize);
19057 EVT SameEltTypeVT = ValueVT;
19058 // If the element types are different, convert it to the same element type
19059 // of PartVT.
19060 // Give an example here, we want copy a <vscale x 1 x i8> value from
19061 // <vscale x 4 x i16>.
19062 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
19063 // then we can extract <vscale x 1 x i8>.
19064 if (ValueEltVT != PartEltVT) {
19065 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
19066 assert(Count != 0 && "The number of element should not be zero.");
19067 SameEltTypeVT =
19068 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
19069 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
19071 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
19072 DAG.getVectorIdxConstant(0, DL));
19073 return Val;
19076 return SDValue();
19079 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
19080 // When aggressively optimizing for code size, we prefer to use a div
19081 // instruction, as it is usually smaller than the alternative sequence.
19082 // TODO: Add vector division?
19083 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
19084 return OptSize && !VT.isVector();
19087 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
19088 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
19089 // some situation.
19090 unsigned Opc = N->getOpcode();
19091 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
19092 return false;
19093 return true;
19096 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
19097 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
19098 Function *ThreadPointerFunc =
19099 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
19100 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
19101 IRB.CreateCall(ThreadPointerFunc), Offset);
19104 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
19105 // Fuchsia provides a fixed TLS slot for the stack cookie.
19106 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
19107 if (Subtarget.isTargetFuchsia())
19108 return useTpOffset(IRB, -0x10);
19110 return TargetLowering::getIRStackGuard(IRB);
19113 bool RISCVTargetLowering::isLegalInterleavedAccessType(
19114 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
19115 const DataLayout &DL) const {
19116 EVT VT = getValueType(DL, VTy);
19117 // Don't lower vlseg/vsseg for vector types that can't be split.
19118 if (!isTypeLegal(VT))
19119 return false;
19121 if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
19122 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
19123 Alignment))
19124 return false;
19126 MVT ContainerVT = VT.getSimpleVT();
19128 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19129 if (!Subtarget.useRVVForFixedLengthVectors())
19130 return false;
19131 // Sometimes the interleaved access pass picks up splats as interleaves of
19132 // one element. Don't lower these.
19133 if (FVTy->getNumElements() < 2)
19134 return false;
19136 ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());
19139 // Need to make sure that EMUL * NFIELDS ≤ 8
19140 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
19141 if (Fractional)
19142 return true;
19143 return Factor * LMUL <= 8;
19146 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
19147 Align Alignment) const {
19148 if (!Subtarget.hasVInstructions())
19149 return false;
19151 // Only support fixed vectors if we know the minimum vector size.
19152 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
19153 return false;
19155 EVT ScalarType = DataType.getScalarType();
19156 if (!isLegalElementTypeForRVV(ScalarType))
19157 return false;
19159 if (!Subtarget.enableUnalignedVectorMem() &&
19160 Alignment < ScalarType.getStoreSize())
19161 return false;
19163 return true;
19166 static const Intrinsic::ID FixedVlsegIntrIds[] = {
19167 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
19168 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
19169 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
19170 Intrinsic::riscv_seg8_load};
19172 /// Lower an interleaved load into a vlsegN intrinsic.
19174 /// E.g. Lower an interleaved load (Factor = 2):
19175 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
19176 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
19177 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
19179 /// Into:
19180 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
19181 /// %ptr, i64 4)
19182 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
19183 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
19184 bool RISCVTargetLowering::lowerInterleavedLoad(
19185 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
19186 ArrayRef<unsigned> Indices, unsigned Factor) const {
19187 IRBuilder<> Builder(LI);
19189 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
19190 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
19191 LI->getPointerAddressSpace(),
19192 LI->getModule()->getDataLayout()))
19193 return false;
19195 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
19197 Function *VlsegNFunc =
19198 Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2],
19199 {VTy, LI->getPointerOperandType(), XLenTy});
19201 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
19203 CallInst *VlsegN =
19204 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
19206 for (unsigned i = 0; i < Shuffles.size(); i++) {
19207 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
19208 Shuffles[i]->replaceAllUsesWith(SubVec);
19211 return true;
19214 static const Intrinsic::ID FixedVssegIntrIds[] = {
19215 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
19216 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
19217 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
19218 Intrinsic::riscv_seg8_store};
19220 /// Lower an interleaved store into a vssegN intrinsic.
19222 /// E.g. Lower an interleaved store (Factor = 3):
19223 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
19224 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
19225 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
19227 /// Into:
19228 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
19229 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
19230 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
19231 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
19232 /// %ptr, i32 4)
19234 /// Note that the new shufflevectors will be removed and we'll only generate one
19235 /// vsseg3 instruction in CodeGen.
19236 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
19237 ShuffleVectorInst *SVI,
19238 unsigned Factor) const {
19239 IRBuilder<> Builder(SI);
19240 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
19241 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
19242 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
19243 ShuffleVTy->getNumElements() / Factor);
19244 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
19245 SI->getPointerAddressSpace(),
19246 SI->getModule()->getDataLayout()))
19247 return false;
19249 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
19251 Function *VssegNFunc =
19252 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
19253 {VTy, SI->getPointerOperandType(), XLenTy});
19255 auto Mask = SVI->getShuffleMask();
19256 SmallVector<Value *, 10> Ops;
19258 for (unsigned i = 0; i < Factor; i++) {
19259 Value *Shuffle = Builder.CreateShuffleVector(
19260 SVI->getOperand(0), SVI->getOperand(1),
19261 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
19262 Ops.push_back(Shuffle);
19264 // This VL should be OK (should be executable in one vsseg instruction,
19265 // potentially under larger LMULs) because we checked that the fixed vector
19266 // type fits in isLegalInterleavedAccessType
19267 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
19268 Ops.append({SI->getPointerOperand(), VL});
19270 Builder.CreateCall(VssegNFunc, Ops);
19272 return true;
19275 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
19276 LoadInst *LI) const {
19277 assert(LI->isSimple());
19278 IRBuilder<> Builder(LI);
19280 // Only deinterleave2 supported at present.
19281 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
19282 return false;
19284 unsigned Factor = 2;
19286 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
19287 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
19289 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
19290 LI->getPointerAddressSpace(),
19291 LI->getModule()->getDataLayout()))
19292 return false;
19294 Function *VlsegNFunc;
19295 Value *VL;
19296 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
19297 SmallVector<Value *, 10> Ops;
19299 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19300 VlsegNFunc = Intrinsic::getDeclaration(
19301 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
19302 {ResVTy, LI->getPointerOperandType(), XLenTy});
19303 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
19304 } else {
19305 static const Intrinsic::ID IntrIds[] = {
19306 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
19307 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
19308 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
19309 Intrinsic::riscv_vlseg8};
19311 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
19312 {ResVTy, XLenTy});
19313 VL = Constant::getAllOnesValue(XLenTy);
19314 Ops.append(Factor, PoisonValue::get(ResVTy));
19317 Ops.append({LI->getPointerOperand(), VL});
19319 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
19320 DI->replaceAllUsesWith(Vlseg);
19322 return true;
19325 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
19326 StoreInst *SI) const {
19327 assert(SI->isSimple());
19328 IRBuilder<> Builder(SI);
19330 // Only interleave2 supported at present.
19331 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
19332 return false;
19334 unsigned Factor = 2;
19336 VectorType *VTy = cast<VectorType>(II->getType());
19337 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
19339 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
19340 SI->getPointerAddressSpace(),
19341 SI->getModule()->getDataLayout()))
19342 return false;
19344 Function *VssegNFunc;
19345 Value *VL;
19346 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
19348 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19349 VssegNFunc = Intrinsic::getDeclaration(
19350 SI->getModule(), FixedVssegIntrIds[Factor - 2],
19351 {InVTy, SI->getPointerOperandType(), XLenTy});
19352 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
19353 } else {
19354 static const Intrinsic::ID IntrIds[] = {
19355 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
19356 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
19357 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
19358 Intrinsic::riscv_vsseg8};
19360 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
19361 {InVTy, XLenTy});
19362 VL = Constant::getAllOnesValue(XLenTy);
19365 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
19366 SI->getPointerOperand(), VL});
19368 return true;
19371 MachineInstr *
19372 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
19373 MachineBasicBlock::instr_iterator &MBBI,
19374 const TargetInstrInfo *TII) const {
19375 assert(MBBI->isCall() && MBBI->getCFIType() &&
19376 "Invalid call instruction for a KCFI check");
19377 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
19378 MBBI->getOpcode()));
19380 MachineOperand &Target = MBBI->getOperand(0);
19381 Target.setIsRenamable(false);
19383 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
19384 .addReg(Target.getReg())
19385 .addImm(MBBI->getCFIType())
19386 .getInstr();
19389 #define GET_REGISTER_MATCHER
19390 #include "RISCVGenAsmMatcher.inc"
19392 Register
19393 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
19394 const MachineFunction &MF) const {
19395 Register Reg = MatchRegisterAltName(RegName);
19396 if (Reg == RISCV::NoRegister)
19397 Reg = MatchRegisterName(RegName);
19398 if (Reg == RISCV::NoRegister)
19399 report_fatal_error(
19400 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
19401 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
19402 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
19403 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
19404 StringRef(RegName) + "\"."));
19405 return Reg;
19408 MachineMemOperand::Flags
19409 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
19410 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
19412 if (NontemporalInfo == nullptr)
19413 return MachineMemOperand::MONone;
19415 // 1 for default value work as __RISCV_NTLH_ALL
19416 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
19417 // 3 -> __RISCV_NTLH_ALL_PRIVATE
19418 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
19419 // 5 -> __RISCV_NTLH_ALL
19420 int NontemporalLevel = 5;
19421 const MDNode *RISCVNontemporalInfo =
19422 I.getMetadata("riscv-nontemporal-domain");
19423 if (RISCVNontemporalInfo != nullptr)
19424 NontemporalLevel =
19425 cast<ConstantInt>(
19426 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
19427 ->getValue())
19428 ->getZExtValue();
19430 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
19431 "RISC-V target doesn't support this non-temporal domain.");
19433 NontemporalLevel -= 2;
19434 MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
19435 if (NontemporalLevel & 0b1)
19436 Flags |= MONontemporalBit0;
19437 if (NontemporalLevel & 0b10)
19438 Flags |= MONontemporalBit1;
19440 return Flags;
19443 MachineMemOperand::Flags
19444 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
19446 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
19447 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
19448 TargetFlags |= (NodeFlags & MONontemporalBit0);
19449 TargetFlags |= (NodeFlags & MONontemporalBit1);
19451 return TargetFlags;
19454 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
19455 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
19456 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
19459 bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
19460 if (VT.isScalableVector())
19461 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
19462 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
19463 return true;
19464 return Subtarget.hasStdExtZbb() &&
19465 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
19468 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
19469 ISD::CondCode Cond) const {
19470 return isCtpopFast(VT) ? 0 : 1;
19473 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
19474 // We don't support scalable vectors in GISel.
19475 if (Inst.getType()->isScalableTy())
19476 return true;
19478 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
19479 if (Inst.getOperand(i)->getType()->isScalableTy())
19480 return true;
19482 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
19483 if (AI->getAllocatedType()->isScalableTy())
19484 return true;
19487 return false;
19490 namespace llvm::RISCVVIntrinsicsTable {
19492 #define GET_RISCVVIntrinsicsTable_IMPL
19493 #include "RISCVGenSearchableTables.inc"
19495 } // namespace llvm::RISCVVIntrinsicsTable