Revert "[RISCV][ISel] Combine scalable vector add/sub/mul with zero/sign extension...
[llvm-project.git] / llvm / lib / Target / RISCV / RISCVISelLowering.cpp
blob27bb69dc9868c88f0b9cb91ab87c96014f5ac068
1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/DiagnosticPrinter.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicsRISCV.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/InstructionCost.h"
43 #include "llvm/Support/KnownBits.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include <optional>
48 using namespace llvm;
50 #define DEBUG_TYPE "riscv-lower"
52 STATISTIC(NumTailCalls, "Number of tail calls");
54 static cl::opt<unsigned> ExtensionMaxWebSize(
55 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
58 cl::init(18));
60 static cl::opt<bool>
61 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
64 cl::init(false));
66 static cl::opt<unsigned> NumRepeatedDivisors(
67 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
70 cl::init(2));
72 static cl::opt<int>
73 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
76 cl::init(2));
78 static cl::opt<bool>
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
83 const RISCVSubtarget &STI)
84 : TargetLowering(TM), Subtarget(STI) {
86 if (Subtarget.isRVE())
87 report_fatal_error("Codegen not yet implemented for RVE");
89 RISCVABI::ABI ABI = Subtarget.getTargetABI();
90 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
92 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
93 !Subtarget.hasStdExtF()) {
94 errs() << "Hard-float 'f' ABI can't be used for a target that "
95 "doesn't support the F instruction set extension (ignoring "
96 "target-abi)\n";
97 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
98 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
99 !Subtarget.hasStdExtD()) {
100 errs() << "Hard-float 'd' ABI can't be used for a target that "
101 "doesn't support the D instruction set extension (ignoring "
102 "target-abi)\n";
103 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
106 switch (ABI) {
107 default:
108 report_fatal_error("Don't know how to lower this ABI");
109 case RISCVABI::ABI_ILP32:
110 case RISCVABI::ABI_ILP32F:
111 case RISCVABI::ABI_ILP32D:
112 case RISCVABI::ABI_LP64:
113 case RISCVABI::ABI_LP64F:
114 case RISCVABI::ABI_LP64D:
115 break;
118 MVT XLenVT = Subtarget.getXLenVT();
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
174 if (Size <= RISCV::RVVBitsPerBlock)
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
185 addRegisterClass(VT, RC);
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
220 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
224 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
230 // Compute derived properties from the register classes.
231 computeRegisterProperties(STI.getRegisterInfo());
233 setStackPointerRegisterToSaveRestore(RISCV::X2);
235 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
238 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
239 MVT::i1, Promote);
241 // TODO: add all necessary setOperationAction calls.
242 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
244 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
245 setOperationAction(ISD::BR_CC, XLenVT, Expand);
246 if (RV64LegalI32 && Subtarget.is64Bit())
247 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
248 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
249 setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
250 if (RV64LegalI32 && Subtarget.is64Bit())
251 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
253 setCondCodeAction(ISD::SETLE, XLenVT, Expand);
254 setCondCodeAction(ISD::SETGT, XLenVT, Custom);
255 setCondCodeAction(ISD::SETGE, XLenVT, Expand);
256 setCondCodeAction(ISD::SETULE, XLenVT, Expand);
257 setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
258 setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
260 if (RV64LegalI32 && Subtarget.is64Bit())
261 setOperationAction(ISD::SETCC, MVT::i32, Promote);
263 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
265 setOperationAction(ISD::VASTART, MVT::Other, Custom);
266 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
268 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
270 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
272 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
273 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
275 if (Subtarget.is64Bit()) {
276 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
278 if (!RV64LegalI32) {
279 setOperationAction(ISD::LOAD, MVT::i32, Custom);
280 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
281 MVT::i32, Custom);
282 setOperationAction(ISD::SADDO, MVT::i32, Custom);
283 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
284 MVT::i32, Custom);
286 } else {
287 setLibcallName(
288 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
289 nullptr);
290 setLibcallName(RTLIB::MULO_I64, nullptr);
293 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
294 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
295 if (RV64LegalI32 && Subtarget.is64Bit())
296 setOperationAction(ISD::MUL, MVT::i32, Promote);
297 } else if (Subtarget.is64Bit()) {
298 setOperationAction(ISD::MUL, MVT::i128, Custom);
299 if (!RV64LegalI32)
300 setOperationAction(ISD::MUL, MVT::i32, Custom);
301 } else {
302 setOperationAction(ISD::MUL, MVT::i64, Custom);
305 if (!Subtarget.hasStdExtM()) {
306 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
307 XLenVT, Expand);
308 if (RV64LegalI32 && Subtarget.is64Bit())
309 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
310 Promote);
311 } else if (Subtarget.is64Bit()) {
312 if (!RV64LegalI32)
313 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
314 {MVT::i8, MVT::i16, MVT::i32}, Custom);
317 if (RV64LegalI32 && Subtarget.is64Bit()) {
318 setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
319 setOperationAction(
320 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
321 Expand);
324 setOperationAction(
325 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
326 Expand);
328 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
329 Custom);
331 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
332 if (!RV64LegalI32 && Subtarget.is64Bit())
333 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
334 } else if (Subtarget.hasVendorXTHeadBb()) {
335 if (Subtarget.is64Bit())
336 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
337 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
338 } else if (Subtarget.hasVendorXCVbitmanip()) {
339 setOperationAction(ISD::ROTL, XLenVT, Expand);
340 } else {
341 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
342 if (RV64LegalI32 && Subtarget.is64Bit())
343 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
346 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
347 // pattern match it directly in isel.
348 setOperationAction(ISD::BSWAP, XLenVT,
349 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
350 Subtarget.hasVendorXTHeadBb())
351 ? Legal
352 : Expand);
353 if (RV64LegalI32 && Subtarget.is64Bit())
354 setOperationAction(ISD::BSWAP, MVT::i32,
355 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
356 Subtarget.hasVendorXTHeadBb())
357 ? Promote
358 : Expand);
361 if (Subtarget.hasVendorXCVbitmanip()) {
362 setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
363 } else {
364 // Zbkb can use rev8+brev8 to implement bitreverse.
365 setOperationAction(ISD::BITREVERSE, XLenVT,
366 Subtarget.hasStdExtZbkb() ? Custom : Expand);
369 if (Subtarget.hasStdExtZbb()) {
370 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
371 Legal);
372 if (RV64LegalI32 && Subtarget.is64Bit())
373 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
374 Promote);
376 if (Subtarget.is64Bit()) {
377 if (RV64LegalI32)
378 setOperationAction(ISD::CTTZ, MVT::i32, Legal);
379 else
380 setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
382 } else if (!Subtarget.hasVendorXCVbitmanip()) {
383 setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
384 if (RV64LegalI32 && Subtarget.is64Bit())
385 setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
388 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
389 Subtarget.hasVendorXCVbitmanip()) {
390 // We need the custom lowering to make sure that the resulting sequence
391 // for the 32bit case is efficient on 64bit targets.
392 if (Subtarget.is64Bit()) {
393 if (RV64LegalI32) {
394 setOperationAction(ISD::CTLZ, MVT::i32,
395 Subtarget.hasStdExtZbb() ? Legal : Promote);
396 if (!Subtarget.hasStdExtZbb())
397 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
398 } else
399 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
401 } else {
402 setOperationAction(ISD::CTLZ, XLenVT, Expand);
403 if (RV64LegalI32 && Subtarget.is64Bit())
404 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
407 if (!RV64LegalI32 && Subtarget.is64Bit() &&
408 !Subtarget.hasShortForwardBranchOpt())
409 setOperationAction(ISD::ABS, MVT::i32, Custom);
411 // We can use PseudoCCSUB to implement ABS.
412 if (Subtarget.hasShortForwardBranchOpt())
413 setOperationAction(ISD::ABS, XLenVT, Legal);
415 if (!Subtarget.hasVendorXTHeadCondMov())
416 setOperationAction(ISD::SELECT, XLenVT, Custom);
418 if (RV64LegalI32 && Subtarget.is64Bit())
419 setOperationAction(ISD::SELECT, MVT::i32, Promote);
421 static const unsigned FPLegalNodeTypes[] = {
422 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
423 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
424 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
425 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
426 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
427 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
429 static const ISD::CondCode FPCCToExpand[] = {
430 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
431 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
432 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
434 static const unsigned FPOpToExpand[] = {
435 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
436 ISD::FREM};
438 static const unsigned FPRndMode[] = {
439 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
440 ISD::FROUNDEVEN};
442 if (Subtarget.hasStdExtZfhminOrZhinxmin())
443 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
445 static const unsigned ZfhminZfbfminPromoteOps[] = {
446 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
447 ISD::FSUB, ISD::FMUL, ISD::FMA,
448 ISD::FDIV, ISD::FSQRT, ISD::FABS,
449 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
450 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
451 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
452 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
453 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
454 ISD::FROUNDEVEN, ISD::SELECT};
456 if (Subtarget.hasStdExtZfbfmin()) {
457 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
458 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
459 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
460 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
461 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
462 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
463 setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
464 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
465 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
466 setOperationAction(ISD::FREM, MVT::bf16, Promote);
467 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
468 // DAGCombiner::visitFP_ROUND probably needs improvements first.
469 setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
472 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
473 if (Subtarget.hasStdExtZfhOrZhinx()) {
474 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
475 setOperationAction(FPRndMode, MVT::f16,
476 Subtarget.hasStdExtZfa() ? Legal : Custom);
477 setOperationAction(ISD::SELECT, MVT::f16, Custom);
478 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
479 } else {
480 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
481 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
482 ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
483 MVT::f16, Legal);
484 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
485 // DAGCombiner::visitFP_ROUND probably needs improvements first.
486 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
489 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
490 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
491 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
492 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
493 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
495 setOperationAction(ISD::FNEARBYINT, MVT::f16,
496 Subtarget.hasStdExtZfa() ? Legal : Promote);
497 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
498 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
499 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
500 ISD::FLOG10},
501 MVT::f16, Promote);
503 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
504 // complete support for all operations in LegalizeDAG.
505 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
506 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
507 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
508 ISD::STRICT_FTRUNC},
509 MVT::f16, Promote);
511 // We need to custom promote this.
512 if (Subtarget.is64Bit())
513 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
515 if (!Subtarget.hasStdExtZfa())
516 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
519 if (Subtarget.hasStdExtFOrZfinx()) {
520 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
521 setOperationAction(FPRndMode, MVT::f32,
522 Subtarget.hasStdExtZfa() ? Legal : Custom);
523 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
524 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
525 setOperationAction(ISD::SELECT, MVT::f32, Custom);
526 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
527 setOperationAction(FPOpToExpand, MVT::f32, Expand);
528 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
529 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
530 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
531 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
532 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
533 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
534 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
535 Subtarget.isSoftFPABI() ? LibCall : Custom);
536 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
537 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
539 if (Subtarget.hasStdExtZfa())
540 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
541 else
542 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
545 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
546 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
548 if (Subtarget.hasStdExtDOrZdinx()) {
549 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
551 if (Subtarget.hasStdExtZfa()) {
552 setOperationAction(FPRndMode, MVT::f64, Legal);
553 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
554 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
555 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
556 } else {
557 if (Subtarget.is64Bit())
558 setOperationAction(FPRndMode, MVT::f64, Custom);
560 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
563 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
564 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
565 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
566 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
567 setOperationAction(ISD::SELECT, MVT::f64, Custom);
568 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
569 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
570 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
571 setOperationAction(FPOpToExpand, MVT::f64, Expand);
572 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
573 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
574 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
575 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
576 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
577 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
578 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
579 Subtarget.isSoftFPABI() ? LibCall : Custom);
580 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
581 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
584 if (Subtarget.is64Bit()) {
585 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
586 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
587 MVT::i32, Custom);
588 setOperationAction(ISD::LROUND, MVT::i32, Custom);
591 if (Subtarget.hasStdExtFOrZfinx()) {
592 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
593 Custom);
595 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
596 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
597 XLenVT, Legal);
599 if (RV64LegalI32 && Subtarget.is64Bit())
600 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
601 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
602 MVT::i32, Legal);
604 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
605 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
608 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
609 ISD::JumpTable},
610 XLenVT, Custom);
612 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
614 if (Subtarget.is64Bit())
615 setOperationAction(ISD::Constant, MVT::i64, Custom);
617 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
618 // Unfortunately this can't be determined just from the ISA naming string.
619 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
620 Subtarget.is64Bit() ? Legal : Custom);
622 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
623 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
624 if (Subtarget.is64Bit())
625 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
627 if (Subtarget.hasStdExtZicbop()) {
628 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
631 if (Subtarget.hasStdExtA()) {
632 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
633 setMinCmpXchgSizeInBits(32);
634 } else if (Subtarget.hasForcedAtomics()) {
635 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
636 } else {
637 setMaxAtomicSizeInBitsSupported(0);
640 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
642 setBooleanContents(ZeroOrOneBooleanContent);
644 if (Subtarget.hasVInstructions()) {
645 setBooleanVectorContents(ZeroOrOneBooleanContent);
647 setOperationAction(ISD::VSCALE, XLenVT, Custom);
648 if (RV64LegalI32 && Subtarget.is64Bit())
649 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
651 // RVV intrinsics may have illegal operands.
652 // We also need to custom legalize vmv.x.s.
653 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
654 ISD::INTRINSIC_VOID},
655 {MVT::i8, MVT::i16}, Custom);
656 if (Subtarget.is64Bit())
657 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
658 MVT::i32, Custom);
659 else
660 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
661 MVT::i64, Custom);
663 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
664 MVT::Other, Custom);
666 static const unsigned IntegerVPOps[] = {
667 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
668 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
669 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
670 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
671 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
672 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
673 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
674 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
675 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
676 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
677 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
678 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};
680 static const unsigned FloatingPointVPOps[] = {
681 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
682 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
683 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
684 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
685 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
686 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
687 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
688 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
689 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
690 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
691 ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};
693 static const unsigned IntegerVecReduceOps[] = {
694 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
695 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
696 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
698 static const unsigned FloatingPointVecReduceOps[] = {
699 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
700 ISD::VECREDUCE_FMAX};
702 if (!Subtarget.is64Bit()) {
703 // We must custom-lower certain vXi64 operations on RV32 due to the vector
704 // element type being illegal.
705 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
706 MVT::i64, Custom);
708 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
710 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
711 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
712 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
713 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
714 MVT::i64, Custom);
717 for (MVT VT : BoolVecVTs) {
718 if (!isTypeLegal(VT))
719 continue;
721 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
723 // Mask VTs are custom-expanded into a series of standard nodes
724 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
725 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
726 ISD::SCALAR_TO_VECTOR},
727 VT, Custom);
729 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
730 Custom);
732 setOperationAction(ISD::SELECT, VT, Custom);
733 setOperationAction(
734 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
735 Expand);
737 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
739 setOperationAction(
740 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
741 Custom);
743 setOperationAction(
744 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
745 Custom);
747 // RVV has native int->float & float->int conversions where the
748 // element type sizes are within one power-of-two of each other. Any
749 // wider distances between type sizes have to be lowered as sequences
750 // which progressively narrow the gap in stages.
751 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
752 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
753 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
754 ISD::STRICT_FP_TO_UINT},
755 VT, Custom);
756 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
757 Custom);
759 // Expand all extending loads to types larger than this, and truncating
760 // stores from types larger than this.
761 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
762 setTruncStoreAction(VT, OtherVT, Expand);
763 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
764 OtherVT, Expand);
767 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
768 ISD::VP_TRUNCATE, ISD::VP_SETCC},
769 VT, Custom);
771 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
772 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
774 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
776 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
777 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
779 setOperationPromotedToType(
780 ISD::VECTOR_SPLICE, VT,
781 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
784 for (MVT VT : IntVecVTs) {
785 if (!isTypeLegal(VT))
786 continue;
788 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
789 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
791 // Vectors implement MULHS/MULHU.
792 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
794 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
795 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
796 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
798 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
799 Legal);
801 // Custom-lower extensions and truncations from/to mask types.
802 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
803 VT, Custom);
805 // RVV has native int->float & float->int conversions where the
806 // element type sizes are within one power-of-two of each other. Any
807 // wider distances between type sizes have to be lowered as sequences
808 // which progressively narrow the gap in stages.
809 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
810 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
811 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
812 ISD::STRICT_FP_TO_UINT},
813 VT, Custom);
814 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
815 Custom);
816 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
817 setOperationAction(
818 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
820 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
821 // nodes which truncate by one power of two at a time.
822 setOperationAction(ISD::TRUNCATE, VT, Custom);
824 // Custom-lower insert/extract operations to simplify patterns.
825 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
826 Custom);
828 // Custom-lower reduction operations to set up the corresponding custom
829 // nodes' operands.
830 setOperationAction(IntegerVecReduceOps, VT, Custom);
832 setOperationAction(IntegerVPOps, VT, Custom);
834 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
836 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
837 VT, Custom);
839 setOperationAction(
840 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
841 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
842 VT, Custom);
844 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
845 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
846 VT, Custom);
848 setOperationAction(ISD::SELECT, VT, Custom);
849 setOperationAction(ISD::SELECT_CC, VT, Expand);
851 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
853 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
854 setTruncStoreAction(VT, OtherVT, Expand);
855 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
856 OtherVT, Expand);
859 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
860 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
862 // Splice
863 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
865 if (Subtarget.hasStdExtZvkb()) {
866 setOperationAction(ISD::BSWAP, VT, Legal);
867 setOperationAction(ISD::VP_BSWAP, VT, Custom);
868 } else {
869 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
870 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
873 if (Subtarget.hasStdExtZvbb()) {
874 setOperationAction(ISD::BITREVERSE, VT, Legal);
875 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
876 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
877 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
878 VT, Custom);
879 } else {
880 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
881 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
882 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
883 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
884 VT, Expand);
886 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
887 // range of f32.
888 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
889 if (isTypeLegal(FloatVT)) {
890 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
891 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
892 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
893 VT, Custom);
898 // Expand various CCs to best match the RVV ISA, which natively supports UNE
899 // but no other unordered comparisons, and supports all ordered comparisons
900 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
901 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
902 // and we pattern-match those back to the "original", swapping operands once
903 // more. This way we catch both operations and both "vf" and "fv" forms with
904 // fewer patterns.
905 static const ISD::CondCode VFPCCToExpand[] = {
906 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
907 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
908 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
911 // TODO: support more ops.
912 static const unsigned ZvfhminPromoteOps[] = {
913 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
914 ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
915 ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
916 ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
917 ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
918 ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
919 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
921 // TODO: support more vp ops.
922 static const unsigned ZvfhminPromoteVPOps[] = {
923 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
924 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
925 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
926 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
927 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
928 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
929 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
930 ISD::VP_FNEARBYINT, ISD::VP_SETCC};
932 // Sets common operation actions on RVV floating-point vector types.
933 const auto SetCommonVFPActions = [&](MVT VT) {
934 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
935 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
936 // sizes are within one power-of-two of each other. Therefore conversions
937 // between vXf16 and vXf64 must be lowered as sequences which convert via
938 // vXf32.
939 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
940 // Custom-lower insert/extract operations to simplify patterns.
941 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
942 Custom);
943 // Expand various condition codes (explained above).
944 setCondCodeAction(VFPCCToExpand, VT, Expand);
946 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
947 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
949 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
950 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
951 ISD::IS_FPCLASS},
952 VT, Custom);
954 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
956 // Expand FP operations that need libcalls.
957 setOperationAction(ISD::FREM, VT, Expand);
958 setOperationAction(ISD::FPOW, VT, Expand);
959 setOperationAction(ISD::FCOS, VT, Expand);
960 setOperationAction(ISD::FSIN, VT, Expand);
961 setOperationAction(ISD::FSINCOS, VT, Expand);
962 setOperationAction(ISD::FEXP, VT, Expand);
963 setOperationAction(ISD::FEXP2, VT, Expand);
964 setOperationAction(ISD::FEXP10, VT, Expand);
965 setOperationAction(ISD::FLOG, VT, Expand);
966 setOperationAction(ISD::FLOG2, VT, Expand);
967 setOperationAction(ISD::FLOG10, VT, Expand);
969 setOperationAction(ISD::FCOPYSIGN, VT, Legal);
971 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
973 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
974 VT, Custom);
976 setOperationAction(
977 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
978 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
979 VT, Custom);
981 setOperationAction(ISD::SELECT, VT, Custom);
982 setOperationAction(ISD::SELECT_CC, VT, Expand);
984 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
985 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
986 VT, Custom);
988 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
989 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
991 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
993 setOperationAction(FloatingPointVPOps, VT, Custom);
995 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
996 Custom);
997 setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
998 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
999 VT, Legal);
1000 setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
1001 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
1002 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1003 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1004 VT, Custom);
1007 // Sets common extload/truncstore actions on RVV floating-point vector
1008 // types.
1009 const auto SetCommonVFPExtLoadTruncStoreActions =
1010 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1011 for (auto SmallVT : SmallerVTs) {
1012 setTruncStoreAction(VT, SmallVT, Expand);
1013 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1017 if (Subtarget.hasVInstructionsF16()) {
1018 for (MVT VT : F16VecVTs) {
1019 if (!isTypeLegal(VT))
1020 continue;
1021 SetCommonVFPActions(VT);
1023 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1024 for (MVT VT : F16VecVTs) {
1025 if (!isTypeLegal(VT))
1026 continue;
1027 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1028 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1029 Custom);
1030 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1031 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1032 Custom);
1033 setOperationAction(ISD::SELECT_CC, VT, Expand);
1034 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1035 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1036 VT, Custom);
1037 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1038 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1039 VT, Custom);
1040 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1041 // load/store
1042 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1044 // Custom split nxv32f16 since nxv32f32 if not legal.
1045 if (VT == MVT::nxv32f16) {
1046 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1047 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1048 continue;
1050 // Add more promote ops.
1051 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1052 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1053 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1057 if (Subtarget.hasVInstructionsF32()) {
1058 for (MVT VT : F32VecVTs) {
1059 if (!isTypeLegal(VT))
1060 continue;
1061 SetCommonVFPActions(VT);
1062 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1066 if (Subtarget.hasVInstructionsF64()) {
1067 for (MVT VT : F64VecVTs) {
1068 if (!isTypeLegal(VT))
1069 continue;
1070 SetCommonVFPActions(VT);
1071 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1072 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1076 if (Subtarget.useRVVForFixedLengthVectors()) {
1077 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1078 if (!useRVVForFixedLengthVectorVT(VT))
1079 continue;
1081 // By default everything must be expanded.
1082 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1083 setOperationAction(Op, VT, Expand);
1084 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
1085 setTruncStoreAction(VT, OtherVT, Expand);
1086 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
1087 OtherVT, Expand);
1090 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1091 // expansion to a build_vector of 0s.
1092 setOperationAction(ISD::UNDEF, VT, Custom);
1094 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1095 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1096 Custom);
1098 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
1099 Custom);
1101 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1102 VT, Custom);
1104 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1106 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1108 setOperationAction(ISD::SETCC, VT, Custom);
1110 setOperationAction(ISD::SELECT, VT, Custom);
1112 setOperationAction(ISD::TRUNCATE, VT, Custom);
1114 setOperationAction(ISD::BITCAST, VT, Custom);
1116 setOperationAction(
1117 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1118 Custom);
1120 setOperationAction(
1121 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1122 Custom);
1124 setOperationAction(
1126 ISD::SINT_TO_FP,
1127 ISD::UINT_TO_FP,
1128 ISD::FP_TO_SINT,
1129 ISD::FP_TO_UINT,
1130 ISD::STRICT_SINT_TO_FP,
1131 ISD::STRICT_UINT_TO_FP,
1132 ISD::STRICT_FP_TO_SINT,
1133 ISD::STRICT_FP_TO_UINT,
1135 VT, Custom);
1136 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
1137 Custom);
1139 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1141 // Operations below are different for between masks and other vectors.
1142 if (VT.getVectorElementType() == MVT::i1) {
1143 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1144 ISD::OR, ISD::XOR},
1145 VT, Custom);
1147 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1148 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1149 VT, Custom);
1151 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1152 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1153 continue;
1156 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1157 // it before type legalization for i64 vectors on RV32. It will then be
1158 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1159 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1160 // improvements first.
1161 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1162 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1163 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1166 setOperationAction(
1167 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1169 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1170 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1171 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1172 ISD::VP_SCATTER},
1173 VT, Custom);
1175 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1176 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1177 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1178 VT, Custom);
1180 setOperationAction(
1181 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1183 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1184 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1185 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1187 setOperationAction(
1188 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
1189 Custom);
1191 setOperationAction(ISD::VSELECT, VT, Custom);
1192 setOperationAction(ISD::SELECT_CC, VT, Expand);
1194 setOperationAction(
1195 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1197 // Custom-lower reduction operations to set up the corresponding custom
1198 // nodes' operands.
1199 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1200 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1201 ISD::VECREDUCE_UMIN},
1202 VT, Custom);
1204 setOperationAction(IntegerVPOps, VT, Custom);
1206 if (Subtarget.hasStdExtZvkb())
1207 setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
1209 if (Subtarget.hasStdExtZvbb()) {
1210 setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
1211 ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
1212 VT, Custom);
1213 } else {
1214 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1215 // range of f32.
1216 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1217 if (isTypeLegal(FloatVT))
1218 setOperationAction(
1219 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1220 Custom);
1224 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1225 // There are no extending loads or truncating stores.
1226 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1227 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1228 setTruncStoreAction(VT, InnerVT, Expand);
1231 if (!useRVVForFixedLengthVectorVT(VT))
1232 continue;
1234 // By default everything must be expanded.
1235 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1236 setOperationAction(Op, VT, Expand);
1238 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1239 // expansion to a build_vector of 0s.
1240 setOperationAction(ISD::UNDEF, VT, Custom);
1242 if (VT.getVectorElementType() == MVT::f16 &&
1243 !Subtarget.hasVInstructionsF16()) {
1244 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1245 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1246 Custom);
1247 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1248 setOperationAction(
1249 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1250 Custom);
1251 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1252 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1253 VT, Custom);
1254 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1255 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1256 VT, Custom);
1257 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1258 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1259 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1260 // Don't promote f16 vector operations to f32 if f32 vector type is
1261 // not legal.
1262 // TODO: could split the f16 vector into two vectors and do promotion.
1263 if (!isTypeLegal(F32VecVT))
1264 continue;
1265 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1266 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1267 continue;
1270 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1271 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1272 Custom);
1274 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1275 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
1276 ISD::EXTRACT_VECTOR_ELT},
1277 VT, Custom);
1279 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1280 ISD::MGATHER, ISD::MSCATTER},
1281 VT, Custom);
1283 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1284 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1285 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1286 ISD::VP_SCATTER},
1287 VT, Custom);
1289 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1290 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1291 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1292 ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1293 VT, Custom);
1295 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1297 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1298 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1299 VT, Custom);
1301 setCondCodeAction(VFPCCToExpand, VT, Expand);
1303 setOperationAction(ISD::SETCC, VT, Custom);
1304 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1305 setOperationAction(ISD::SELECT_CC, VT, Expand);
1307 setOperationAction(ISD::BITCAST, VT, Custom);
1309 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1311 setOperationAction(FloatingPointVPOps, VT, Custom);
1313 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1314 Custom);
1315 setOperationAction(
1316 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1317 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1318 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1319 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1320 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1321 VT, Custom);
1324 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1325 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1326 Custom);
1327 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1328 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1329 if (Subtarget.hasStdExtFOrZfinx())
1330 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1331 if (Subtarget.hasStdExtDOrZdinx())
1332 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1336 if (Subtarget.hasStdExtA()) {
1337 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1338 if (RV64LegalI32 && Subtarget.is64Bit())
1339 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1342 if (Subtarget.hasForcedAtomics()) {
1343 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1344 setOperationAction(
1345 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1346 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1347 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1348 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1349 XLenVT, LibCall);
1352 if (Subtarget.hasVendorXTHeadMemIdx()) {
1353 for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1354 ++im) {
1355 setIndexedLoadAction(im, MVT::i8, Legal);
1356 setIndexedStoreAction(im, MVT::i8, Legal);
1357 setIndexedLoadAction(im, MVT::i16, Legal);
1358 setIndexedStoreAction(im, MVT::i16, Legal);
1359 setIndexedLoadAction(im, MVT::i32, Legal);
1360 setIndexedStoreAction(im, MVT::i32, Legal);
1362 if (Subtarget.is64Bit()) {
1363 setIndexedLoadAction(im, MVT::i64, Legal);
1364 setIndexedStoreAction(im, MVT::i64, Legal);
1369 // Function alignments.
1370 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1371 setMinFunctionAlignment(FunctionAlignment);
1372 // Set preferred alignments.
1373 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1374 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1376 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1377 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
1378 ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1379 if (Subtarget.is64Bit())
1380 setTargetDAGCombine(ISD::SRA);
1382 if (Subtarget.hasStdExtFOrZfinx())
1383 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1385 if (Subtarget.hasStdExtZbb())
1386 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1388 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1389 setTargetDAGCombine(ISD::TRUNCATE);
1391 if (Subtarget.hasStdExtZbkb())
1392 setTargetDAGCombine(ISD::BITREVERSE);
1393 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1394 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1395 if (Subtarget.hasStdExtFOrZfinx())
1396 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1397 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1398 if (Subtarget.hasVInstructions())
1399 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1400 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1401 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1402 ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1403 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1404 ISD::INSERT_VECTOR_ELT});
1405 if (Subtarget.hasVendorXTHeadMemPair())
1406 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1407 if (Subtarget.useRVVForFixedLengthVectors())
1408 setTargetDAGCombine(ISD::BITCAST);
1410 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1411 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1413 // Disable strict node mutation.
1414 IsStrictFPEnabled = true;
1417 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1418 LLVMContext &Context,
1419 EVT VT) const {
1420 if (!VT.isVector())
1421 return getPointerTy(DL);
1422 if (Subtarget.hasVInstructions() &&
1423 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1424 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1425 return VT.changeVectorElementTypeToInteger();
1428 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1429 return Subtarget.getXLenVT();
1432 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1433 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1434 unsigned VF,
1435 bool IsScalable) const {
1436 if (!Subtarget.hasVInstructions())
1437 return true;
1439 if (!IsScalable)
1440 return true;
1442 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1443 return true;
1445 // Don't allow VF=1 if those types are't legal.
1446 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1447 return true;
1449 // VLEN=32 support is incomplete.
1450 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1451 return true;
1453 // The maximum VF is for the smallest element width with LMUL=8.
1454 // VF must be a power of 2.
1455 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1456 return VF > MaxVF || !isPowerOf2_32(VF);
1459 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1460 const CallInst &I,
1461 MachineFunction &MF,
1462 unsigned Intrinsic) const {
1463 auto &DL = I.getModule()->getDataLayout();
1465 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1466 bool IsUnitStrided) {
1467 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1468 Info.ptrVal = I.getArgOperand(PtrOp);
1469 Type *MemTy;
1470 if (IsStore) {
1471 // Store value is the first operand.
1472 MemTy = I.getArgOperand(0)->getType();
1473 } else {
1474 // Use return type. If it's segment load, return type is a struct.
1475 MemTy = I.getType();
1476 if (MemTy->isStructTy())
1477 MemTy = MemTy->getStructElementType(0);
1479 if (!IsUnitStrided)
1480 MemTy = MemTy->getScalarType();
1482 Info.memVT = getValueType(DL, MemTy);
1483 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1484 Info.size = MemoryLocation::UnknownSize;
1485 Info.flags |=
1486 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1487 return true;
1490 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1491 Info.flags |= MachineMemOperand::MONonTemporal;
1493 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
1494 switch (Intrinsic) {
1495 default:
1496 return false;
1497 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1498 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1499 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1500 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1501 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1502 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1503 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1504 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1505 case Intrinsic::riscv_masked_cmpxchg_i32:
1506 Info.opc = ISD::INTRINSIC_W_CHAIN;
1507 Info.memVT = MVT::i32;
1508 Info.ptrVal = I.getArgOperand(0);
1509 Info.offset = 0;
1510 Info.align = Align(4);
1511 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1512 MachineMemOperand::MOVolatile;
1513 return true;
1514 case Intrinsic::riscv_masked_strided_load:
1515 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1516 /*IsUnitStrided*/ false);
1517 case Intrinsic::riscv_masked_strided_store:
1518 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1519 /*IsUnitStrided*/ false);
1520 case Intrinsic::riscv_seg2_load:
1521 case Intrinsic::riscv_seg3_load:
1522 case Intrinsic::riscv_seg4_load:
1523 case Intrinsic::riscv_seg5_load:
1524 case Intrinsic::riscv_seg6_load:
1525 case Intrinsic::riscv_seg7_load:
1526 case Intrinsic::riscv_seg8_load:
1527 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1528 /*IsUnitStrided*/ false);
1529 case Intrinsic::riscv_seg2_store:
1530 case Intrinsic::riscv_seg3_store:
1531 case Intrinsic::riscv_seg4_store:
1532 case Intrinsic::riscv_seg5_store:
1533 case Intrinsic::riscv_seg6_store:
1534 case Intrinsic::riscv_seg7_store:
1535 case Intrinsic::riscv_seg8_store:
1536 // Operands are (vec, ..., vec, ptr, vl)
1537 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1538 /*IsStore*/ true,
1539 /*IsUnitStrided*/ false);
1540 case Intrinsic::riscv_vle:
1541 case Intrinsic::riscv_vle_mask:
1542 case Intrinsic::riscv_vleff:
1543 case Intrinsic::riscv_vleff_mask:
1544 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1545 /*IsStore*/ false,
1546 /*IsUnitStrided*/ true);
1547 case Intrinsic::riscv_vse:
1548 case Intrinsic::riscv_vse_mask:
1549 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1550 /*IsStore*/ true,
1551 /*IsUnitStrided*/ true);
1552 case Intrinsic::riscv_vlse:
1553 case Intrinsic::riscv_vlse_mask:
1554 case Intrinsic::riscv_vloxei:
1555 case Intrinsic::riscv_vloxei_mask:
1556 case Intrinsic::riscv_vluxei:
1557 case Intrinsic::riscv_vluxei_mask:
1558 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1559 /*IsStore*/ false,
1560 /*IsUnitStrided*/ false);
1561 case Intrinsic::riscv_vsse:
1562 case Intrinsic::riscv_vsse_mask:
1563 case Intrinsic::riscv_vsoxei:
1564 case Intrinsic::riscv_vsoxei_mask:
1565 case Intrinsic::riscv_vsuxei:
1566 case Intrinsic::riscv_vsuxei_mask:
1567 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1568 /*IsStore*/ true,
1569 /*IsUnitStrided*/ false);
1570 case Intrinsic::riscv_vlseg2:
1571 case Intrinsic::riscv_vlseg3:
1572 case Intrinsic::riscv_vlseg4:
1573 case Intrinsic::riscv_vlseg5:
1574 case Intrinsic::riscv_vlseg6:
1575 case Intrinsic::riscv_vlseg7:
1576 case Intrinsic::riscv_vlseg8:
1577 case Intrinsic::riscv_vlseg2ff:
1578 case Intrinsic::riscv_vlseg3ff:
1579 case Intrinsic::riscv_vlseg4ff:
1580 case Intrinsic::riscv_vlseg5ff:
1581 case Intrinsic::riscv_vlseg6ff:
1582 case Intrinsic::riscv_vlseg7ff:
1583 case Intrinsic::riscv_vlseg8ff:
1584 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1585 /*IsStore*/ false,
1586 /*IsUnitStrided*/ false);
1587 case Intrinsic::riscv_vlseg2_mask:
1588 case Intrinsic::riscv_vlseg3_mask:
1589 case Intrinsic::riscv_vlseg4_mask:
1590 case Intrinsic::riscv_vlseg5_mask:
1591 case Intrinsic::riscv_vlseg6_mask:
1592 case Intrinsic::riscv_vlseg7_mask:
1593 case Intrinsic::riscv_vlseg8_mask:
1594 case Intrinsic::riscv_vlseg2ff_mask:
1595 case Intrinsic::riscv_vlseg3ff_mask:
1596 case Intrinsic::riscv_vlseg4ff_mask:
1597 case Intrinsic::riscv_vlseg5ff_mask:
1598 case Intrinsic::riscv_vlseg6ff_mask:
1599 case Intrinsic::riscv_vlseg7ff_mask:
1600 case Intrinsic::riscv_vlseg8ff_mask:
1601 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1602 /*IsStore*/ false,
1603 /*IsUnitStrided*/ false);
1604 case Intrinsic::riscv_vlsseg2:
1605 case Intrinsic::riscv_vlsseg3:
1606 case Intrinsic::riscv_vlsseg4:
1607 case Intrinsic::riscv_vlsseg5:
1608 case Intrinsic::riscv_vlsseg6:
1609 case Intrinsic::riscv_vlsseg7:
1610 case Intrinsic::riscv_vlsseg8:
1611 case Intrinsic::riscv_vloxseg2:
1612 case Intrinsic::riscv_vloxseg3:
1613 case Intrinsic::riscv_vloxseg4:
1614 case Intrinsic::riscv_vloxseg5:
1615 case Intrinsic::riscv_vloxseg6:
1616 case Intrinsic::riscv_vloxseg7:
1617 case Intrinsic::riscv_vloxseg8:
1618 case Intrinsic::riscv_vluxseg2:
1619 case Intrinsic::riscv_vluxseg3:
1620 case Intrinsic::riscv_vluxseg4:
1621 case Intrinsic::riscv_vluxseg5:
1622 case Intrinsic::riscv_vluxseg6:
1623 case Intrinsic::riscv_vluxseg7:
1624 case Intrinsic::riscv_vluxseg8:
1625 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1626 /*IsStore*/ false,
1627 /*IsUnitStrided*/ false);
1628 case Intrinsic::riscv_vlsseg2_mask:
1629 case Intrinsic::riscv_vlsseg3_mask:
1630 case Intrinsic::riscv_vlsseg4_mask:
1631 case Intrinsic::riscv_vlsseg5_mask:
1632 case Intrinsic::riscv_vlsseg6_mask:
1633 case Intrinsic::riscv_vlsseg7_mask:
1634 case Intrinsic::riscv_vlsseg8_mask:
1635 case Intrinsic::riscv_vloxseg2_mask:
1636 case Intrinsic::riscv_vloxseg3_mask:
1637 case Intrinsic::riscv_vloxseg4_mask:
1638 case Intrinsic::riscv_vloxseg5_mask:
1639 case Intrinsic::riscv_vloxseg6_mask:
1640 case Intrinsic::riscv_vloxseg7_mask:
1641 case Intrinsic::riscv_vloxseg8_mask:
1642 case Intrinsic::riscv_vluxseg2_mask:
1643 case Intrinsic::riscv_vluxseg3_mask:
1644 case Intrinsic::riscv_vluxseg4_mask:
1645 case Intrinsic::riscv_vluxseg5_mask:
1646 case Intrinsic::riscv_vluxseg6_mask:
1647 case Intrinsic::riscv_vluxseg7_mask:
1648 case Intrinsic::riscv_vluxseg8_mask:
1649 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1650 /*IsStore*/ false,
1651 /*IsUnitStrided*/ false);
1652 case Intrinsic::riscv_vsseg2:
1653 case Intrinsic::riscv_vsseg3:
1654 case Intrinsic::riscv_vsseg4:
1655 case Intrinsic::riscv_vsseg5:
1656 case Intrinsic::riscv_vsseg6:
1657 case Intrinsic::riscv_vsseg7:
1658 case Intrinsic::riscv_vsseg8:
1659 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1660 /*IsStore*/ true,
1661 /*IsUnitStrided*/ false);
1662 case Intrinsic::riscv_vsseg2_mask:
1663 case Intrinsic::riscv_vsseg3_mask:
1664 case Intrinsic::riscv_vsseg4_mask:
1665 case Intrinsic::riscv_vsseg5_mask:
1666 case Intrinsic::riscv_vsseg6_mask:
1667 case Intrinsic::riscv_vsseg7_mask:
1668 case Intrinsic::riscv_vsseg8_mask:
1669 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1670 /*IsStore*/ true,
1671 /*IsUnitStrided*/ false);
1672 case Intrinsic::riscv_vssseg2:
1673 case Intrinsic::riscv_vssseg3:
1674 case Intrinsic::riscv_vssseg4:
1675 case Intrinsic::riscv_vssseg5:
1676 case Intrinsic::riscv_vssseg6:
1677 case Intrinsic::riscv_vssseg7:
1678 case Intrinsic::riscv_vssseg8:
1679 case Intrinsic::riscv_vsoxseg2:
1680 case Intrinsic::riscv_vsoxseg3:
1681 case Intrinsic::riscv_vsoxseg4:
1682 case Intrinsic::riscv_vsoxseg5:
1683 case Intrinsic::riscv_vsoxseg6:
1684 case Intrinsic::riscv_vsoxseg7:
1685 case Intrinsic::riscv_vsoxseg8:
1686 case Intrinsic::riscv_vsuxseg2:
1687 case Intrinsic::riscv_vsuxseg3:
1688 case Intrinsic::riscv_vsuxseg4:
1689 case Intrinsic::riscv_vsuxseg5:
1690 case Intrinsic::riscv_vsuxseg6:
1691 case Intrinsic::riscv_vsuxseg7:
1692 case Intrinsic::riscv_vsuxseg8:
1693 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1694 /*IsStore*/ true,
1695 /*IsUnitStrided*/ false);
1696 case Intrinsic::riscv_vssseg2_mask:
1697 case Intrinsic::riscv_vssseg3_mask:
1698 case Intrinsic::riscv_vssseg4_mask:
1699 case Intrinsic::riscv_vssseg5_mask:
1700 case Intrinsic::riscv_vssseg6_mask:
1701 case Intrinsic::riscv_vssseg7_mask:
1702 case Intrinsic::riscv_vssseg8_mask:
1703 case Intrinsic::riscv_vsoxseg2_mask:
1704 case Intrinsic::riscv_vsoxseg3_mask:
1705 case Intrinsic::riscv_vsoxseg4_mask:
1706 case Intrinsic::riscv_vsoxseg5_mask:
1707 case Intrinsic::riscv_vsoxseg6_mask:
1708 case Intrinsic::riscv_vsoxseg7_mask:
1709 case Intrinsic::riscv_vsoxseg8_mask:
1710 case Intrinsic::riscv_vsuxseg2_mask:
1711 case Intrinsic::riscv_vsuxseg3_mask:
1712 case Intrinsic::riscv_vsuxseg4_mask:
1713 case Intrinsic::riscv_vsuxseg5_mask:
1714 case Intrinsic::riscv_vsuxseg6_mask:
1715 case Intrinsic::riscv_vsuxseg7_mask:
1716 case Intrinsic::riscv_vsuxseg8_mask:
1717 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1718 /*IsStore*/ true,
1719 /*IsUnitStrided*/ false);
1723 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1724 const AddrMode &AM, Type *Ty,
1725 unsigned AS,
1726 Instruction *I) const {
1727 // No global is ever allowed as a base.
1728 if (AM.BaseGV)
1729 return false;
1731 // RVV instructions only support register addressing.
1732 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1733 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1735 // Require a 12-bit signed offset.
1736 if (!isInt<12>(AM.BaseOffs))
1737 return false;
1739 switch (AM.Scale) {
1740 case 0: // "r+i" or just "i", depending on HasBaseReg.
1741 break;
1742 case 1:
1743 if (!AM.HasBaseReg) // allow "r+i".
1744 break;
1745 return false; // disallow "r+r" or "r+r+i".
1746 default:
1747 return false;
1750 return true;
1753 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1754 return isInt<12>(Imm);
1757 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1758 return isInt<12>(Imm);
1761 // On RV32, 64-bit integers are split into their high and low parts and held
1762 // in two different registers, so the trunc is free since the low register can
1763 // just be used.
1764 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1765 // isTruncateFree?
1766 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1767 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1768 return false;
1769 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1770 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1771 return (SrcBits == 64 && DestBits == 32);
1774 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1775 // We consider i64->i32 free on RV64 since we have good selection of W
1776 // instructions that make promoting operations back to i64 free in many cases.
1777 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1778 !DstVT.isInteger())
1779 return false;
1780 unsigned SrcBits = SrcVT.getSizeInBits();
1781 unsigned DestBits = DstVT.getSizeInBits();
1782 return (SrcBits == 64 && DestBits == 32);
1785 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1786 // Zexts are free if they can be combined with a load.
1787 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1788 // poorly with type legalization of compares preferring sext.
1789 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1790 EVT MemVT = LD->getMemoryVT();
1791 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1792 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1793 LD->getExtensionType() == ISD::ZEXTLOAD))
1794 return true;
1797 return TargetLowering::isZExtFree(Val, VT2);
1800 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1801 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1804 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1805 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1808 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1809 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1812 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1813 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1814 Subtarget.hasVendorXCVbitmanip();
1817 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1818 const Instruction &AndI) const {
1819 // We expect to be able to match a bit extraction instruction if the Zbs
1820 // extension is supported and the mask is a power of two. However, we
1821 // conservatively return false if the mask would fit in an ANDI instruction,
1822 // on the basis that it's possible the sinking+duplication of the AND in
1823 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1824 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1825 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1826 return false;
1827 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1828 if (!Mask)
1829 return false;
1830 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1833 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1834 EVT VT = Y.getValueType();
1836 // FIXME: Support vectors once we have tests.
1837 if (VT.isVector())
1838 return false;
1840 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1841 !isa<ConstantSDNode>(Y);
1844 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1845 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1846 if (Subtarget.hasStdExtZbs())
1847 return X.getValueType().isScalarInteger();
1848 auto *C = dyn_cast<ConstantSDNode>(Y);
1849 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1850 if (Subtarget.hasVendorXTHeadBs())
1851 return C != nullptr;
1852 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1853 return C && C->getAPIntValue().ule(10);
1856 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
1857 EVT VT) const {
1858 // Only enable for rvv.
1859 if (!VT.isVector() || !Subtarget.hasVInstructions())
1860 return false;
1862 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1863 return false;
1865 return true;
1868 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1869 Type *Ty) const {
1870 assert(Ty->isIntegerTy());
1872 unsigned BitSize = Ty->getIntegerBitWidth();
1873 if (BitSize > Subtarget.getXLen())
1874 return false;
1876 // Fast path, assume 32-bit immediates are cheap.
1877 int64_t Val = Imm.getSExtValue();
1878 if (isInt<32>(Val))
1879 return true;
1881 // A constant pool entry may be more aligned thant he load we're trying to
1882 // replace. If we don't support unaligned scalar mem, prefer the constant
1883 // pool.
1884 // TODO: Can the caller pass down the alignment?
1885 if (!Subtarget.hasFastUnalignedAccess())
1886 return true;
1888 // Prefer to keep the load if it would require many instructions.
1889 // This uses the same threshold we use for constant pools but doesn't
1890 // check useConstantPoolForLargeInts.
1891 // TODO: Should we keep the load only when we're definitely going to emit a
1892 // constant pool?
1894 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);
1895 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1898 bool RISCVTargetLowering::
1899 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1900 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1901 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1902 SelectionDAG &DAG) const {
1903 // One interesting pattern that we'd want to form is 'bit extract':
1904 // ((1 >> Y) & 1) ==/!= 0
1905 // But we also need to be careful not to try to reverse that fold.
1907 // Is this '((1 >> Y) & 1)'?
1908 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1909 return false; // Keep the 'bit extract' pattern.
1911 // Will this be '((1 >> Y) & 1)' after the transform?
1912 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1913 return true; // Do form the 'bit extract' pattern.
1915 // If 'X' is a constant, and we transform, then we will immediately
1916 // try to undo the fold, thus causing endless combine loop.
1917 // So only do the transform if X is not a constant. This matches the default
1918 // implementation of this function.
1919 return !XC;
1922 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1923 switch (Opcode) {
1924 case Instruction::Add:
1925 case Instruction::Sub:
1926 case Instruction::Mul:
1927 case Instruction::And:
1928 case Instruction::Or:
1929 case Instruction::Xor:
1930 case Instruction::FAdd:
1931 case Instruction::FSub:
1932 case Instruction::FMul:
1933 case Instruction::FDiv:
1934 case Instruction::ICmp:
1935 case Instruction::FCmp:
1936 return true;
1937 case Instruction::Shl:
1938 case Instruction::LShr:
1939 case Instruction::AShr:
1940 case Instruction::UDiv:
1941 case Instruction::SDiv:
1942 case Instruction::URem:
1943 case Instruction::SRem:
1944 return Operand == 1;
1945 default:
1946 return false;
1951 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1952 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1953 return false;
1955 if (canSplatOperand(I->getOpcode(), Operand))
1956 return true;
1958 auto *II = dyn_cast<IntrinsicInst>(I);
1959 if (!II)
1960 return false;
1962 switch (II->getIntrinsicID()) {
1963 case Intrinsic::fma:
1964 case Intrinsic::vp_fma:
1965 return Operand == 0 || Operand == 1;
1966 case Intrinsic::vp_shl:
1967 case Intrinsic::vp_lshr:
1968 case Intrinsic::vp_ashr:
1969 case Intrinsic::vp_udiv:
1970 case Intrinsic::vp_sdiv:
1971 case Intrinsic::vp_urem:
1972 case Intrinsic::vp_srem:
1973 return Operand == 1;
1974 // These intrinsics are commutative.
1975 case Intrinsic::vp_add:
1976 case Intrinsic::vp_mul:
1977 case Intrinsic::vp_and:
1978 case Intrinsic::vp_or:
1979 case Intrinsic::vp_xor:
1980 case Intrinsic::vp_fadd:
1981 case Intrinsic::vp_fmul:
1982 case Intrinsic::vp_icmp:
1983 case Intrinsic::vp_fcmp:
1984 // These intrinsics have 'vr' versions.
1985 case Intrinsic::vp_sub:
1986 case Intrinsic::vp_fsub:
1987 case Intrinsic::vp_fdiv:
1988 return Operand == 0 || Operand == 1;
1989 default:
1990 return false;
1994 /// Check if sinking \p I's operands to I's basic block is profitable, because
1995 /// the operands can be folded into a target instruction, e.g.
1996 /// splats of scalars can fold into vector instructions.
1997 bool RISCVTargetLowering::shouldSinkOperands(
1998 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1999 using namespace llvm::PatternMatch;
2001 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2002 return false;
2004 for (auto OpIdx : enumerate(I->operands())) {
2005 if (!canSplatOperand(I, OpIdx.index()))
2006 continue;
2008 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2009 // Make sure we are not already sinking this operand
2010 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2011 continue;
2013 // We are looking for a splat that can be sunk.
2014 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
2015 m_Undef(), m_ZeroMask())))
2016 continue;
2018 // Don't sink i1 splats.
2019 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2020 continue;
2022 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2023 // and vector registers
2024 for (Use &U : Op->uses()) {
2025 Instruction *Insn = cast<Instruction>(U.getUser());
2026 if (!canSplatOperand(Insn, U.getOperandNo()))
2027 return false;
2030 Ops.push_back(&Op->getOperandUse(0));
2031 Ops.push_back(&OpIdx.value());
2033 return true;
2036 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
2037 unsigned Opc = VecOp.getOpcode();
2039 // Assume target opcodes can't be scalarized.
2040 // TODO - do we have any exceptions?
2041 if (Opc >= ISD::BUILTIN_OP_END)
2042 return false;
2044 // If the vector op is not supported, try to convert to scalar.
2045 EVT VecVT = VecOp.getValueType();
2046 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2047 return true;
2049 // If the vector op is supported, but the scalar op is not, the transform may
2050 // not be worthwhile.
2051 // Permit a vector binary operation can be converted to scalar binary
2052 // operation which is custom lowered with illegal type.
2053 EVT ScalarVT = VecVT.getScalarType();
2054 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2055 isOperationCustom(Opc, ScalarVT);
2058 bool RISCVTargetLowering::isOffsetFoldingLegal(
2059 const GlobalAddressSDNode *GA) const {
2060 // In order to maximise the opportunity for common subexpression elimination,
2061 // keep a separate ADD node for the global address offset instead of folding
2062 // it in the global address node. Later peephole optimisations may choose to
2063 // fold it back in when profitable.
2064 return false;
2067 // Return one of the followings:
2068 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2069 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2070 // positive counterpart, which will be materialized from the first returned
2071 // element. The second returned element indicated that there should be a FNEG
2072 // followed.
2073 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2074 std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2075 EVT VT) const {
2076 if (!Subtarget.hasStdExtZfa())
2077 return std::make_pair(-1, false);
2079 bool IsSupportedVT = false;
2080 if (VT == MVT::f16) {
2081 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2082 } else if (VT == MVT::f32) {
2083 IsSupportedVT = true;
2084 } else if (VT == MVT::f64) {
2085 assert(Subtarget.hasStdExtD() && "Expect D extension");
2086 IsSupportedVT = true;
2089 if (!IsSupportedVT)
2090 return std::make_pair(-1, false);
2092 int Index = RISCVLoadFPImm::getLoadFPImm(Imm);
2093 if (Index < 0 && Imm.isNegative())
2094 // Try the combination of its positive counterpart + FNEG.
2095 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2096 else
2097 return std::make_pair(Index, false);
2100 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2101 bool ForCodeSize) const {
2102 bool IsLegalVT = false;
2103 if (VT == MVT::f16)
2104 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2105 else if (VT == MVT::f32)
2106 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2107 else if (VT == MVT::f64)
2108 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2109 else if (VT == MVT::bf16)
2110 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2112 if (!IsLegalVT)
2113 return false;
2115 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2116 return true;
2118 // Cannot create a 64 bit floating-point immediate value for rv32.
2119 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2120 // td can handle +0.0 or -0.0 already.
2121 // -0.0 can be created by fmv + fneg.
2122 return Imm.isZero();
2125 // Special case: fmv + fneg
2126 if (Imm.isNegZero())
2127 return true;
2129 // Building an integer and then converting requires a fmv at the end of
2130 // the integer sequence.
2131 const int Cost =
2132 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2133 Subtarget);
2134 return Cost <= FPImmCost;
2137 // TODO: This is very conservative.
2138 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2139 unsigned Index) const {
2140 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
2141 return false;
2143 // Only support extracting a fixed from a fixed vector for now.
2144 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2145 return false;
2147 unsigned ResElts = ResVT.getVectorNumElements();
2148 unsigned SrcElts = SrcVT.getVectorNumElements();
2150 // Convervatively only handle extracting half of a vector.
2151 // TODO: Relax this.
2152 if ((ResElts * 2) != SrcElts)
2153 return false;
2155 // The smallest type we can slide is i8.
2156 // TODO: We can extract index 0 from a mask vector without a slide.
2157 if (ResVT.getVectorElementType() == MVT::i1)
2158 return false;
2160 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2161 // cheap.
2162 if (Index >= 32)
2163 return false;
2165 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2166 // the upper half of a vector until we have more test coverage.
2167 return Index == 0 || Index == ResElts;
2170 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2171 CallingConv::ID CC,
2172 EVT VT) const {
2173 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2174 // We might still end up using a GPR but that will be decided based on ABI.
2175 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2176 !Subtarget.hasStdExtZfhminOrZhinxmin())
2177 return MVT::f32;
2179 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2181 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2182 return MVT::i64;
2184 return PartVT;
2187 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2188 CallingConv::ID CC,
2189 EVT VT) const {
2190 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2191 // We might still end up using a GPR but that will be decided based on ABI.
2192 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2193 !Subtarget.hasStdExtZfhminOrZhinxmin())
2194 return 1;
2196 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2199 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2200 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2201 unsigned &NumIntermediates, MVT &RegisterVT) const {
2202 unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
2203 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2205 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2206 IntermediateVT = MVT::i64;
2208 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2209 RegisterVT = MVT::i64;
2211 return NumRegs;
2214 // Changes the condition code and swaps operands if necessary, so the SetCC
2215 // operation matches one of the comparisons supported directly by branches
2216 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2217 // with 1/-1.
2218 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2219 ISD::CondCode &CC, SelectionDAG &DAG) {
2220 // If this is a single bit test that can't be handled by ANDI, shift the
2221 // bit to be tested to the MSB and perform a signed compare with 0.
2222 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2223 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2224 isa<ConstantSDNode>(LHS.getOperand(1))) {
2225 uint64_t Mask = LHS.getConstantOperandVal(1);
2226 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2227 unsigned ShAmt = 0;
2228 if (isPowerOf2_64(Mask)) {
2229 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2230 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2231 } else {
2232 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2235 LHS = LHS.getOperand(0);
2236 if (ShAmt != 0)
2237 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2238 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2239 return;
2243 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2244 int64_t C = RHSC->getSExtValue();
2245 switch (CC) {
2246 default: break;
2247 case ISD::SETGT:
2248 // Convert X > -1 to X >= 0.
2249 if (C == -1) {
2250 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2251 CC = ISD::SETGE;
2252 return;
2254 break;
2255 case ISD::SETLT:
2256 // Convert X < 1 to 0 >= X.
2257 if (C == 1) {
2258 RHS = LHS;
2259 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2260 CC = ISD::SETGE;
2261 return;
2263 break;
2267 switch (CC) {
2268 default:
2269 break;
2270 case ISD::SETGT:
2271 case ISD::SETLE:
2272 case ISD::SETUGT:
2273 case ISD::SETULE:
2274 CC = ISD::getSetCCSwappedOperands(CC);
2275 std::swap(LHS, RHS);
2276 break;
2280 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2281 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2282 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2283 if (VT.getVectorElementType() == MVT::i1)
2284 KnownSize *= 8;
2286 switch (KnownSize) {
2287 default:
2288 llvm_unreachable("Invalid LMUL.");
2289 case 8:
2290 return RISCVII::VLMUL::LMUL_F8;
2291 case 16:
2292 return RISCVII::VLMUL::LMUL_F4;
2293 case 32:
2294 return RISCVII::VLMUL::LMUL_F2;
2295 case 64:
2296 return RISCVII::VLMUL::LMUL_1;
2297 case 128:
2298 return RISCVII::VLMUL::LMUL_2;
2299 case 256:
2300 return RISCVII::VLMUL::LMUL_4;
2301 case 512:
2302 return RISCVII::VLMUL::LMUL_8;
2306 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2307 switch (LMul) {
2308 default:
2309 llvm_unreachable("Invalid LMUL.");
2310 case RISCVII::VLMUL::LMUL_F8:
2311 case RISCVII::VLMUL::LMUL_F4:
2312 case RISCVII::VLMUL::LMUL_F2:
2313 case RISCVII::VLMUL::LMUL_1:
2314 return RISCV::VRRegClassID;
2315 case RISCVII::VLMUL::LMUL_2:
2316 return RISCV::VRM2RegClassID;
2317 case RISCVII::VLMUL::LMUL_4:
2318 return RISCV::VRM4RegClassID;
2319 case RISCVII::VLMUL::LMUL_8:
2320 return RISCV::VRM8RegClassID;
2324 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2325 RISCVII::VLMUL LMUL = getLMUL(VT);
2326 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2327 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2328 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2329 LMUL == RISCVII::VLMUL::LMUL_1) {
2330 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2331 "Unexpected subreg numbering");
2332 return RISCV::sub_vrm1_0 + Index;
2334 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2335 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2336 "Unexpected subreg numbering");
2337 return RISCV::sub_vrm2_0 + Index;
2339 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2340 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2341 "Unexpected subreg numbering");
2342 return RISCV::sub_vrm4_0 + Index;
2344 llvm_unreachable("Invalid vector type.");
2347 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2348 if (VT.getVectorElementType() == MVT::i1)
2349 return RISCV::VRRegClassID;
2350 return getRegClassIDForLMUL(getLMUL(VT));
2353 // Attempt to decompose a subvector insert/extract between VecVT and
2354 // SubVecVT via subregister indices. Returns the subregister index that
2355 // can perform the subvector insert/extract with the given element index, as
2356 // well as the index corresponding to any leftover subvectors that must be
2357 // further inserted/extracted within the register class for SubVecVT.
2358 std::pair<unsigned, unsigned>
2359 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2360 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2361 const RISCVRegisterInfo *TRI) {
2362 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2363 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2364 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2365 "Register classes not ordered");
2366 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2367 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2368 // Try to compose a subregister index that takes us from the incoming
2369 // LMUL>1 register class down to the outgoing one. At each step we half
2370 // the LMUL:
2371 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2372 // Note that this is not guaranteed to find a subregister index, such as
2373 // when we are extracting from one VR type to another.
2374 unsigned SubRegIdx = RISCV::NoSubRegister;
2375 for (const unsigned RCID :
2376 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2377 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2378 VecVT = VecVT.getHalfNumVectorElementsVT();
2379 bool IsHi =
2380 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2381 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2382 getSubregIndexByMVT(VecVT, IsHi));
2383 if (IsHi)
2384 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2386 return {SubRegIdx, InsertExtractIdx};
2389 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2390 // stores for those types.
2391 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2392 return !Subtarget.useRVVForFixedLengthVectors() ||
2393 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2396 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2397 if (!ScalarTy.isSimple())
2398 return false;
2399 switch (ScalarTy.getSimpleVT().SimpleTy) {
2400 case MVT::iPTR:
2401 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2402 case MVT::i8:
2403 case MVT::i16:
2404 case MVT::i32:
2405 return true;
2406 case MVT::i64:
2407 return Subtarget.hasVInstructionsI64();
2408 case MVT::f16:
2409 return Subtarget.hasVInstructionsF16();
2410 case MVT::f32:
2411 return Subtarget.hasVInstructionsF32();
2412 case MVT::f64:
2413 return Subtarget.hasVInstructionsF64();
2414 default:
2415 return false;
2420 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2421 return NumRepeatedDivisors;
2424 static SDValue getVLOperand(SDValue Op) {
2425 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2426 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2427 "Unexpected opcode");
2428 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2429 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2430 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2431 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2432 if (!II)
2433 return SDValue();
2434 return Op.getOperand(II->VLOperand + 1 + HasChain);
2437 static bool useRVVForFixedLengthVectorVT(MVT VT,
2438 const RISCVSubtarget &Subtarget) {
2439 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2440 if (!Subtarget.useRVVForFixedLengthVectors())
2441 return false;
2443 // We only support a set of vector types with a consistent maximum fixed size
2444 // across all supported vector element types to avoid legalization issues.
2445 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2446 // fixed-length vector type we support is 1024 bytes.
2447 if (VT.getFixedSizeInBits() > 1024 * 8)
2448 return false;
2450 unsigned MinVLen = Subtarget.getRealMinVLen();
2452 MVT EltVT = VT.getVectorElementType();
2454 // Don't use RVV for vectors we cannot scalarize if required.
2455 switch (EltVT.SimpleTy) {
2456 // i1 is supported but has different rules.
2457 default:
2458 return false;
2459 case MVT::i1:
2460 // Masks can only use a single register.
2461 if (VT.getVectorNumElements() > MinVLen)
2462 return false;
2463 MinVLen /= 8;
2464 break;
2465 case MVT::i8:
2466 case MVT::i16:
2467 case MVT::i32:
2468 break;
2469 case MVT::i64:
2470 if (!Subtarget.hasVInstructionsI64())
2471 return false;
2472 break;
2473 case MVT::f16:
2474 if (!Subtarget.hasVInstructionsF16Minimal())
2475 return false;
2476 break;
2477 case MVT::f32:
2478 if (!Subtarget.hasVInstructionsF32())
2479 return false;
2480 break;
2481 case MVT::f64:
2482 if (!Subtarget.hasVInstructionsF64())
2483 return false;
2484 break;
2487 // Reject elements larger than ELEN.
2488 if (EltVT.getSizeInBits() > Subtarget.getELen())
2489 return false;
2491 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2492 // Don't use RVV for types that don't fit.
2493 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2494 return false;
2496 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2497 // the base fixed length RVV support in place.
2498 if (!VT.isPow2VectorType())
2499 return false;
2501 return true;
2504 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2505 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2508 // Return the largest legal scalable vector type that matches VT's element type.
2509 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2510 const RISCVSubtarget &Subtarget) {
2511 // This may be called before legal types are setup.
2512 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2513 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2514 "Expected legal fixed length vector!");
2516 unsigned MinVLen = Subtarget.getRealMinVLen();
2517 unsigned MaxELen = Subtarget.getELen();
2519 MVT EltVT = VT.getVectorElementType();
2520 switch (EltVT.SimpleTy) {
2521 default:
2522 llvm_unreachable("unexpected element type for RVV container");
2523 case MVT::i1:
2524 case MVT::i8:
2525 case MVT::i16:
2526 case MVT::i32:
2527 case MVT::i64:
2528 case MVT::f16:
2529 case MVT::f32:
2530 case MVT::f64: {
2531 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2532 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2533 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2534 unsigned NumElts =
2535 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2536 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2537 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2538 return MVT::getScalableVectorVT(EltVT, NumElts);
2543 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2544 const RISCVSubtarget &Subtarget) {
2545 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
2546 Subtarget);
2549 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2550 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2553 // Grow V to consume an entire RVV register.
2554 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2555 const RISCVSubtarget &Subtarget) {
2556 assert(VT.isScalableVector() &&
2557 "Expected to convert into a scalable vector!");
2558 assert(V.getValueType().isFixedLengthVector() &&
2559 "Expected a fixed length vector operand!");
2560 SDLoc DL(V);
2561 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2562 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2565 // Shrink V so it's just big enough to maintain a VT's worth of data.
2566 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2567 const RISCVSubtarget &Subtarget) {
2568 assert(VT.isFixedLengthVector() &&
2569 "Expected to convert into a fixed length vector!");
2570 assert(V.getValueType().isScalableVector() &&
2571 "Expected a scalable vector operand!");
2572 SDLoc DL(V);
2573 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2574 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2577 /// Return the type of the mask type suitable for masking the provided
2578 /// vector type. This is simply an i1 element type vector of the same
2579 /// (possibly scalable) length.
2580 static MVT getMaskTypeFor(MVT VecVT) {
2581 assert(VecVT.isVector());
2582 ElementCount EC = VecVT.getVectorElementCount();
2583 return MVT::getVectorVT(MVT::i1, EC);
2586 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2587 /// vector length VL. .
2588 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2589 SelectionDAG &DAG) {
2590 MVT MaskVT = getMaskTypeFor(VecVT);
2591 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2594 static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2595 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2596 // If we know the exact VLEN, our VL is exactly equal to VLMAX, and
2597 // we can't encode the AVL as an immediate, use the VLMAX encoding.
2598 const auto [MinVLMAX, MaxVLMAX] =
2599 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2600 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31)
2601 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2603 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2606 static std::pair<SDValue, SDValue>
2607 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2608 const RISCVSubtarget &Subtarget) {
2609 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2610 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2611 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2612 return {Mask, VL};
2615 static std::pair<SDValue, SDValue>
2616 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2617 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2618 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2619 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2620 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2621 return {Mask, VL};
2624 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2625 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2626 // the vector type that the fixed-length vector is contained in. Otherwise if
2627 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2628 static std::pair<SDValue, SDValue>
2629 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2630 const RISCVSubtarget &Subtarget) {
2631 if (VecVT.isFixedLengthVector())
2632 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2633 Subtarget);
2634 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2635 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2638 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2639 SelectionDAG &DAG) const {
2640 assert(VecVT.isScalableVector() && "Expected scalable vector");
2641 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2642 VecVT.getVectorElementCount());
2645 std::pair<unsigned, unsigned>
2646 RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
2647 const RISCVSubtarget &Subtarget) {
2648 assert(VecVT.isScalableVector() && "Expected scalable vector");
2650 unsigned EltSize = VecVT.getScalarSizeInBits();
2651 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2653 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2654 unsigned MaxVLMAX =
2655 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2657 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2658 unsigned MinVLMAX =
2659 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2661 return std::make_pair(MinVLMAX, MaxVLMAX);
2664 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2665 // of either is (currently) supported. This can get us into an infinite loop
2666 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2667 // as a ..., etc.
2668 // Until either (or both) of these can reliably lower any node, reporting that
2669 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2670 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2671 // which is not desirable.
2672 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2673 EVT VT, unsigned DefinedValues) const {
2674 return false;
2677 InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2678 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2679 // implementation-defined.
2680 if (!VT.isVector())
2681 return InstructionCost::getInvalid();
2682 unsigned DLenFactor = Subtarget.getDLenFactor();
2683 unsigned Cost;
2684 if (VT.isScalableVector()) {
2685 unsigned LMul;
2686 bool Fractional;
2687 std::tie(LMul, Fractional) =
2688 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
2689 if (Fractional)
2690 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2691 else
2692 Cost = (LMul * DLenFactor);
2693 } else {
2694 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2696 return Cost;
2700 /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2701 /// is generally quadratic in the number of vreg implied by LMUL. Note that
2702 /// operand (index and possibly mask) are handled separately.
2703 InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2704 return getLMULCost(VT) * getLMULCost(VT);
2707 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2708 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2709 /// or may track the vrgather.vv cost. It is implementation-dependent.
2710 InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
2711 return getLMULCost(VT);
2714 /// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2715 /// for the type VT. (This does not cover the vslide1up or vslide1down
2716 /// variants.) Slides may be linear in the number of vregs implied by LMUL,
2717 /// or may track the vrgather.vv cost. It is implementation-dependent.
2718 InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
2719 return getLMULCost(VT);
2722 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2723 const RISCVSubtarget &Subtarget) {
2724 // RISC-V FP-to-int conversions saturate to the destination register size, but
2725 // don't produce 0 for nan. We can use a conversion instruction and fix the
2726 // nan case with a compare and a select.
2727 SDValue Src = Op.getOperand(0);
2729 MVT DstVT = Op.getSimpleValueType();
2730 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2732 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2734 if (!DstVT.isVector()) {
2735 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2736 // the result.
2737 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2738 Src.getValueType() == MVT::bf16) {
2739 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2742 unsigned Opc;
2743 if (SatVT == DstVT)
2744 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2745 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2746 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2747 else
2748 return SDValue();
2749 // FIXME: Support other SatVTs by clamping before or after the conversion.
2751 SDLoc DL(Op);
2752 SDValue FpToInt = DAG.getNode(
2753 Opc, DL, DstVT, Src,
2754 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
2756 if (Opc == RISCVISD::FCVT_WU_RV64)
2757 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2759 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2760 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2761 ISD::CondCode::SETUO);
2764 // Vectors.
2766 MVT DstEltVT = DstVT.getVectorElementType();
2767 MVT SrcVT = Src.getSimpleValueType();
2768 MVT SrcEltVT = SrcVT.getVectorElementType();
2769 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2770 unsigned DstEltSize = DstEltVT.getSizeInBits();
2772 // Only handle saturating to the destination type.
2773 if (SatVT != DstEltVT)
2774 return SDValue();
2776 // FIXME: Don't support narrowing by more than 1 steps for now.
2777 if (SrcEltSize > (2 * DstEltSize))
2778 return SDValue();
2780 MVT DstContainerVT = DstVT;
2781 MVT SrcContainerVT = SrcVT;
2782 if (DstVT.isFixedLengthVector()) {
2783 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2784 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2785 assert(DstContainerVT.getVectorElementCount() ==
2786 SrcContainerVT.getVectorElementCount() &&
2787 "Expected same element count");
2788 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2791 SDLoc DL(Op);
2793 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2795 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2796 {Src, Src, DAG.getCondCode(ISD::SETNE),
2797 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2799 // Need to widen by more than 1 step, promote the FP type, then do a widening
2800 // convert.
2801 if (DstEltSize > (2 * SrcEltSize)) {
2802 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2803 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2804 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2807 unsigned RVVOpc =
2808 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2809 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2811 SDValue SplatZero = DAG.getNode(
2812 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2813 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2814 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2815 Res, VL);
2817 if (DstVT.isFixedLengthVector())
2818 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2820 return Res;
2823 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
2824 switch (Opc) {
2825 case ISD::FROUNDEVEN:
2826 case ISD::STRICT_FROUNDEVEN:
2827 case ISD::VP_FROUNDEVEN:
2828 return RISCVFPRndMode::RNE;
2829 case ISD::FTRUNC:
2830 case ISD::STRICT_FTRUNC:
2831 case ISD::VP_FROUNDTOZERO:
2832 return RISCVFPRndMode::RTZ;
2833 case ISD::FFLOOR:
2834 case ISD::STRICT_FFLOOR:
2835 case ISD::VP_FFLOOR:
2836 return RISCVFPRndMode::RDN;
2837 case ISD::FCEIL:
2838 case ISD::STRICT_FCEIL:
2839 case ISD::VP_FCEIL:
2840 return RISCVFPRndMode::RUP;
2841 case ISD::FROUND:
2842 case ISD::STRICT_FROUND:
2843 case ISD::VP_FROUND:
2844 return RISCVFPRndMode::RMM;
2845 case ISD::FRINT:
2846 return RISCVFPRndMode::DYN;
2849 return RISCVFPRndMode::Invalid;
2852 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2853 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2854 // the integer domain and back. Taking care to avoid converting values that are
2855 // nan or already correct.
2856 static SDValue
2857 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2858 const RISCVSubtarget &Subtarget) {
2859 MVT VT = Op.getSimpleValueType();
2860 assert(VT.isVector() && "Unexpected type");
2862 SDLoc DL(Op);
2864 SDValue Src = Op.getOperand(0);
2866 MVT ContainerVT = VT;
2867 if (VT.isFixedLengthVector()) {
2868 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2869 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2872 SDValue Mask, VL;
2873 if (Op->isVPOpcode()) {
2874 Mask = Op.getOperand(1);
2875 if (VT.isFixedLengthVector())
2876 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2877 Subtarget);
2878 VL = Op.getOperand(2);
2879 } else {
2880 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2883 // Freeze the source since we are increasing the number of uses.
2884 Src = DAG.getFreeze(Src);
2886 // We do the conversion on the absolute value and fix the sign at the end.
2887 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2889 // Determine the largest integer that can be represented exactly. This and
2890 // values larger than it don't have any fractional bits so don't need to
2891 // be converted.
2892 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2893 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2894 APFloat MaxVal = APFloat(FltSem);
2895 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2896 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2897 SDValue MaxValNode =
2898 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2899 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2900 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2902 // If abs(Src) was larger than MaxVal or nan, keep it.
2903 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2904 Mask =
2905 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2906 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2907 Mask, Mask, VL});
2909 // Truncate to integer and convert back to FP.
2910 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2911 MVT XLenVT = Subtarget.getXLenVT();
2912 SDValue Truncated;
2914 switch (Op.getOpcode()) {
2915 default:
2916 llvm_unreachable("Unexpected opcode");
2917 case ISD::FCEIL:
2918 case ISD::VP_FCEIL:
2919 case ISD::FFLOOR:
2920 case ISD::VP_FFLOOR:
2921 case ISD::FROUND:
2922 case ISD::FROUNDEVEN:
2923 case ISD::VP_FROUND:
2924 case ISD::VP_FROUNDEVEN:
2925 case ISD::VP_FROUNDTOZERO: {
2926 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2927 assert(FRM != RISCVFPRndMode::Invalid);
2928 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2929 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2930 break;
2932 case ISD::FTRUNC:
2933 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2934 Mask, VL);
2935 break;
2936 case ISD::FRINT:
2937 case ISD::VP_FRINT:
2938 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2939 break;
2940 case ISD::FNEARBYINT:
2941 case ISD::VP_FNEARBYINT:
2942 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2943 Mask, VL);
2944 break;
2947 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2948 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2949 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2950 Mask, VL);
2952 // Restore the original sign so that -0.0 is preserved.
2953 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2954 Src, Src, Mask, VL);
2956 if (!VT.isFixedLengthVector())
2957 return Truncated;
2959 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2962 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2963 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2964 // qNan and coverting the new source to integer and back to FP.
2965 static SDValue
2966 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2967 const RISCVSubtarget &Subtarget) {
2968 SDLoc DL(Op);
2969 MVT VT = Op.getSimpleValueType();
2970 SDValue Chain = Op.getOperand(0);
2971 SDValue Src = Op.getOperand(1);
2973 MVT ContainerVT = VT;
2974 if (VT.isFixedLengthVector()) {
2975 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2976 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2979 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2981 // Freeze the source since we are increasing the number of uses.
2982 Src = DAG.getFreeze(Src);
2984 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2985 MVT MaskVT = Mask.getSimpleValueType();
2986 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
2987 DAG.getVTList(MaskVT, MVT::Other),
2988 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2989 DAG.getUNDEF(MaskVT), Mask, VL});
2990 Chain = Unorder.getValue(1);
2991 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
2992 DAG.getVTList(ContainerVT, MVT::Other),
2993 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2994 Chain = Src.getValue(1);
2996 // We do the conversion on the absolute value and fix the sign at the end.
2997 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2999 // Determine the largest integer that can be represented exactly. This and
3000 // values larger than it don't have any fractional bits so don't need to
3001 // be converted.
3002 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3003 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3004 APFloat MaxVal = APFloat(FltSem);
3005 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3006 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3007 SDValue MaxValNode =
3008 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3009 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3010 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3012 // If abs(Src) was larger than MaxVal or nan, keep it.
3013 Mask = DAG.getNode(
3014 RISCVISD::SETCC_VL, DL, MaskVT,
3015 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3017 // Truncate to integer and convert back to FP.
3018 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3019 MVT XLenVT = Subtarget.getXLenVT();
3020 SDValue Truncated;
3022 switch (Op.getOpcode()) {
3023 default:
3024 llvm_unreachable("Unexpected opcode");
3025 case ISD::STRICT_FCEIL:
3026 case ISD::STRICT_FFLOOR:
3027 case ISD::STRICT_FROUND:
3028 case ISD::STRICT_FROUNDEVEN: {
3029 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3030 assert(FRM != RISCVFPRndMode::Invalid);
3031 Truncated = DAG.getNode(
3032 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3033 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3034 break;
3036 case ISD::STRICT_FTRUNC:
3037 Truncated =
3038 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3039 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3040 break;
3041 case ISD::STRICT_FNEARBYINT:
3042 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3043 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3044 Mask, VL);
3045 break;
3047 Chain = Truncated.getValue(1);
3049 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3050 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3051 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3052 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3053 Truncated, Mask, VL);
3054 Chain = Truncated.getValue(1);
3057 // Restore the original sign so that -0.0 is preserved.
3058 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3059 Src, Src, Mask, VL);
3061 if (VT.isFixedLengthVector())
3062 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3063 return DAG.getMergeValues({Truncated, Chain}, DL);
3066 static SDValue
3067 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3068 const RISCVSubtarget &Subtarget) {
3069 MVT VT = Op.getSimpleValueType();
3070 if (VT.isVector())
3071 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3073 if (DAG.shouldOptForSize())
3074 return SDValue();
3076 SDLoc DL(Op);
3077 SDValue Src = Op.getOperand(0);
3079 // Create an integer the size of the mantissa with the MSB set. This and all
3080 // values larger than it don't have any fractional bits so don't need to be
3081 // converted.
3082 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3083 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3084 APFloat MaxVal = APFloat(FltSem);
3085 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3086 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3087 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3089 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3090 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3091 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3094 // Expand vector LRINT and LLRINT by converting to the integer domain.
3095 static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
3096 const RISCVSubtarget &Subtarget) {
3097 MVT VT = Op.getSimpleValueType();
3098 assert(VT.isVector() && "Unexpected type");
3100 SDLoc DL(Op);
3101 SDValue Src = Op.getOperand(0);
3102 MVT ContainerVT = VT;
3104 if (VT.isFixedLengthVector()) {
3105 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3106 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3109 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3110 SDValue Truncated =
3111 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3113 if (!VT.isFixedLengthVector())
3114 return Truncated;
3116 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3119 static SDValue
3120 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
3121 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3122 SDValue Offset, SDValue Mask, SDValue VL,
3123 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3124 if (Merge.isUndef())
3125 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3126 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3127 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3128 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3131 static SDValue
3132 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3133 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
3134 SDValue VL,
3135 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3136 if (Merge.isUndef())
3137 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3138 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3139 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3140 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3143 static MVT getLMUL1VT(MVT VT) {
3144 assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3145 "Unexpected vector MVT");
3146 return MVT::getScalableVectorVT(
3147 VT.getVectorElementType(),
3148 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3151 struct VIDSequence {
3152 int64_t StepNumerator;
3153 unsigned StepDenominator;
3154 int64_t Addend;
3157 static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3158 uint32_t BitWidth) {
3159 APSInt ValInt(BitWidth, !APF.isNegative());
3160 // We use an arbitrary rounding mode here. If a floating-point is an exact
3161 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3162 // the rounding mode changes the output value, then it is not an exact
3163 // integer.
3164 RoundingMode ArbitraryRM = RoundingMode::TowardZero;
3165 bool IsExact;
3166 // If it is out of signed integer range, it will return an invalid operation.
3167 // If it is not an exact integer, IsExact is false.
3168 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3169 APFloatBase::opInvalidOp) ||
3170 !IsExact)
3171 return std::nullopt;
3172 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3175 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3176 // to the (non-zero) step S and start value X. This can be then lowered as the
3177 // RVV sequence (VID * S) + X, for example.
3178 // The step S is represented as an integer numerator divided by a positive
3179 // denominator. Note that the implementation currently only identifies
3180 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
3181 // cannot detect 2/3, for example.
3182 // Note that this method will also match potentially unappealing index
3183 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3184 // determine whether this is worth generating code for.
3185 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
3186 unsigned NumElts = Op.getNumOperands();
3187 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3188 bool IsInteger = Op.getValueType().isInteger();
3190 std::optional<unsigned> SeqStepDenom;
3191 std::optional<int64_t> SeqStepNum, SeqAddend;
3192 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3193 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
3194 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3195 // Assume undef elements match the sequence; we just have to be careful
3196 // when interpolating across them.
3197 if (Op.getOperand(Idx).isUndef())
3198 continue;
3200 uint64_t Val;
3201 if (IsInteger) {
3202 // The BUILD_VECTOR must be all constants.
3203 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
3204 return std::nullopt;
3205 Val = Op.getConstantOperandVal(Idx) &
3206 maskTrailingOnes<uint64_t>(EltSizeInBits);
3207 } else {
3208 // The BUILD_VECTOR must be all constants.
3209 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
3210 return std::nullopt;
3211 if (auto ExactInteger = getExactInteger(
3212 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3213 EltSizeInBits))
3214 Val = *ExactInteger;
3215 else
3216 return std::nullopt;
3219 if (PrevElt) {
3220 // Calculate the step since the last non-undef element, and ensure
3221 // it's consistent across the entire sequence.
3222 unsigned IdxDiff = Idx - PrevElt->second;
3223 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
3225 // A zero-value value difference means that we're somewhere in the middle
3226 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3227 // step change before evaluating the sequence.
3228 if (ValDiff == 0)
3229 continue;
3231 int64_t Remainder = ValDiff % IdxDiff;
3232 // Normalize the step if it's greater than 1.
3233 if (Remainder != ValDiff) {
3234 // The difference must cleanly divide the element span.
3235 if (Remainder != 0)
3236 return std::nullopt;
3237 ValDiff /= IdxDiff;
3238 IdxDiff = 1;
3241 if (!SeqStepNum)
3242 SeqStepNum = ValDiff;
3243 else if (ValDiff != SeqStepNum)
3244 return std::nullopt;
3246 if (!SeqStepDenom)
3247 SeqStepDenom = IdxDiff;
3248 else if (IdxDiff != *SeqStepDenom)
3249 return std::nullopt;
3252 // Record this non-undef element for later.
3253 if (!PrevElt || PrevElt->first != Val)
3254 PrevElt = std::make_pair(Val, Idx);
3257 // We need to have logged a step for this to count as a legal index sequence.
3258 if (!SeqStepNum || !SeqStepDenom)
3259 return std::nullopt;
3261 // Loop back through the sequence and validate elements we might have skipped
3262 // while waiting for a valid step. While doing this, log any sequence addend.
3263 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3264 if (Op.getOperand(Idx).isUndef())
3265 continue;
3266 uint64_t Val;
3267 if (IsInteger) {
3268 Val = Op.getConstantOperandVal(Idx) &
3269 maskTrailingOnes<uint64_t>(EltSizeInBits);
3270 } else {
3271 Val = *getExactInteger(
3272 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3273 EltSizeInBits);
3275 uint64_t ExpectedVal =
3276 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3277 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
3278 if (!SeqAddend)
3279 SeqAddend = Addend;
3280 else if (Addend != SeqAddend)
3281 return std::nullopt;
3284 assert(SeqAddend && "Must have an addend if we have a step");
3286 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3289 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3290 // and lower it as a VRGATHER_VX_VL from the source vector.
3291 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3292 SelectionDAG &DAG,
3293 const RISCVSubtarget &Subtarget) {
3294 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3295 return SDValue();
3296 SDValue Vec = SplatVal.getOperand(0);
3297 // Only perform this optimization on vectors of the same size for simplicity.
3298 // Don't perform this optimization for i1 vectors.
3299 // FIXME: Support i1 vectors, maybe by promoting to i8?
3300 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3301 return SDValue();
3302 SDValue Idx = SplatVal.getOperand(1);
3303 // The index must be a legal type.
3304 if (Idx.getValueType() != Subtarget.getXLenVT())
3305 return SDValue();
3307 MVT ContainerVT = VT;
3308 if (VT.isFixedLengthVector()) {
3309 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3310 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3313 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3315 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3316 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3318 if (!VT.isFixedLengthVector())
3319 return Gather;
3321 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3325 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3326 /// which constitute a large proportion of the elements. In such cases we can
3327 /// splat a vector with the dominant element and make up the shortfall with
3328 /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3329 /// Note that this includes vectors of 2 elements by association. The
3330 /// upper-most element is the "dominant" one, allowing us to use a splat to
3331 /// "insert" the upper element, and an insert of the lower element at position
3332 /// 0, which improves codegen.
3333 static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
3334 const RISCVSubtarget &Subtarget) {
3335 MVT VT = Op.getSimpleValueType();
3336 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3338 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3340 SDLoc DL(Op);
3341 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3343 MVT XLenVT = Subtarget.getXLenVT();
3344 unsigned NumElts = Op.getNumOperands();
3346 SDValue DominantValue;
3347 unsigned MostCommonCount = 0;
3348 DenseMap<SDValue, unsigned> ValueCounts;
3349 unsigned NumUndefElts =
3350 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3352 // Track the number of scalar loads we know we'd be inserting, estimated as
3353 // any non-zero floating-point constant. Other kinds of element are either
3354 // already in registers or are materialized on demand. The threshold at which
3355 // a vector load is more desirable than several scalar materializion and
3356 // vector-insertion instructions is not known.
3357 unsigned NumScalarLoads = 0;
3359 for (SDValue V : Op->op_values()) {
3360 if (V.isUndef())
3361 continue;
3363 ValueCounts.insert(std::make_pair(V, 0));
3364 unsigned &Count = ValueCounts[V];
3365 if (0 == Count)
3366 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3367 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3369 // Is this value dominant? In case of a tie, prefer the highest element as
3370 // it's cheaper to insert near the beginning of a vector than it is at the
3371 // end.
3372 if (++Count >= MostCommonCount) {
3373 DominantValue = V;
3374 MostCommonCount = Count;
3378 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3379 unsigned NumDefElts = NumElts - NumUndefElts;
3380 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3382 // Don't perform this optimization when optimizing for size, since
3383 // materializing elements and inserting them tends to cause code bloat.
3384 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3385 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3386 ((MostCommonCount > DominantValueCountThreshold) ||
3387 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3388 // Start by splatting the most common element.
3389 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3391 DenseSet<SDValue> Processed{DominantValue};
3393 // We can handle an insert into the last element (of a splat) via
3394 // v(f)slide1down. This is slightly better than the vslideup insert
3395 // lowering as it avoids the need for a vector group temporary. It
3396 // is also better than using vmerge.vx as it avoids the need to
3397 // materialize the mask in a vector register.
3398 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3399 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3400 LastOp != DominantValue) {
3401 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3402 auto OpCode =
3403 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3404 if (!VT.isFloatingPoint())
3405 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3406 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3407 LastOp, Mask, VL);
3408 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3409 Processed.insert(LastOp);
3412 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3413 for (const auto &OpIdx : enumerate(Op->ops())) {
3414 const SDValue &V = OpIdx.value();
3415 if (V.isUndef() || !Processed.insert(V).second)
3416 continue;
3417 if (ValueCounts[V] == 1) {
3418 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3419 DAG.getConstant(OpIdx.index(), DL, XLenVT));
3420 } else {
3421 // Blend in all instances of this value using a VSELECT, using a
3422 // mask where each bit signals whether that element is the one
3423 // we're after.
3424 SmallVector<SDValue> Ops;
3425 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3426 return DAG.getConstant(V == V1, DL, XLenVT);
3428 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3429 DAG.getBuildVector(SelMaskTy, DL, Ops),
3430 DAG.getSplatBuildVector(VT, DL, V), Vec);
3434 return Vec;
3437 return SDValue();
3440 static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3441 const RISCVSubtarget &Subtarget) {
3442 MVT VT = Op.getSimpleValueType();
3443 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3445 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3447 SDLoc DL(Op);
3448 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3450 MVT XLenVT = Subtarget.getXLenVT();
3451 unsigned NumElts = Op.getNumOperands();
3453 if (VT.getVectorElementType() == MVT::i1) {
3454 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3455 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3456 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3459 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3460 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3461 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3464 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3465 // scalar integer chunks whose bit-width depends on the number of mask
3466 // bits and XLEN.
3467 // First, determine the most appropriate scalar integer type to use. This
3468 // is at most XLenVT, but may be shrunk to a smaller vector element type
3469 // according to the size of the final vector - use i8 chunks rather than
3470 // XLenVT if we're producing a v8i1. This results in more consistent
3471 // codegen across RV32 and RV64.
3472 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3473 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3474 // If we have to use more than one INSERT_VECTOR_ELT then this
3475 // optimization is likely to increase code size; avoid peforming it in
3476 // such a case. We can use a load from a constant pool in this case.
3477 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3478 return SDValue();
3479 // Now we can create our integer vector type. Note that it may be larger
3480 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3481 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3482 MVT IntegerViaVecVT =
3483 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3484 IntegerViaVecElts);
3486 uint64_t Bits = 0;
3487 unsigned BitPos = 0, IntegerEltIdx = 0;
3488 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3490 for (unsigned I = 0; I < NumElts;) {
3491 SDValue V = Op.getOperand(I);
3492 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3493 Bits |= ((uint64_t)BitValue << BitPos);
3494 ++BitPos;
3495 ++I;
3497 // Once we accumulate enough bits to fill our scalar type or process the
3498 // last element, insert into our vector and clear our accumulated data.
3499 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3500 if (NumViaIntegerBits <= 32)
3501 Bits = SignExtend64<32>(Bits);
3502 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3503 Elts[IntegerEltIdx] = Elt;
3504 Bits = 0;
3505 BitPos = 0;
3506 IntegerEltIdx++;
3510 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3512 if (NumElts < NumViaIntegerBits) {
3513 // If we're producing a smaller vector than our minimum legal integer
3514 // type, bitcast to the equivalent (known-legal) mask type, and extract
3515 // our final mask.
3516 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3517 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3518 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3519 DAG.getConstant(0, DL, XLenVT));
3520 } else {
3521 // Else we must have produced an integer type with the same size as the
3522 // mask type; bitcast for the final result.
3523 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3524 Vec = DAG.getBitcast(VT, Vec);
3527 return Vec;
3530 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3531 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3532 : RISCVISD::VMV_V_X_VL;
3533 if (!VT.isFloatingPoint())
3534 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3535 Splat =
3536 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3537 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3540 // Try and match index sequences, which we can lower to the vid instruction
3541 // with optional modifications. An all-undef vector is matched by
3542 // getSplatValue, above.
3543 if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3544 int64_t StepNumerator = SimpleVID->StepNumerator;
3545 unsigned StepDenominator = SimpleVID->StepDenominator;
3546 int64_t Addend = SimpleVID->Addend;
3548 assert(StepNumerator != 0 && "Invalid step");
3549 bool Negate = false;
3550 int64_t SplatStepVal = StepNumerator;
3551 unsigned StepOpcode = ISD::MUL;
3552 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3553 // anyway as the shift of 63 won't fit in uimm5.
3554 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3555 isPowerOf2_64(std::abs(StepNumerator))) {
3556 Negate = StepNumerator < 0;
3557 StepOpcode = ISD::SHL;
3558 SplatStepVal = Log2_64(std::abs(StepNumerator));
3561 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3562 // threshold since it's the immediate value many RVV instructions accept.
3563 // There is no vmul.vi instruction so ensure multiply constant can fit in
3564 // a single addi instruction.
3565 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3566 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3567 isPowerOf2_32(StepDenominator) &&
3568 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3569 MVT VIDVT =
3570 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3571 MVT VIDContainerVT =
3572 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3573 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3574 // Convert right out of the scalable type so we can use standard ISD
3575 // nodes for the rest of the computation. If we used scalable types with
3576 // these, we'd lose the fixed-length vector info and generate worse
3577 // vsetvli code.
3578 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3579 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3580 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3581 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3582 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3584 if (StepDenominator != 1) {
3585 SDValue SplatStep =
3586 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3587 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3589 if (Addend != 0 || Negate) {
3590 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3591 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3592 VID);
3594 if (VT.isFloatingPoint()) {
3595 // TODO: Use vfwcvt to reduce register pressure.
3596 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3598 return VID;
3602 // For very small build_vectors, use a single scalar insert of a constant.
3603 // TODO: Base this on constant rematerialization cost, not size.
3604 const unsigned EltBitSize = VT.getScalarSizeInBits();
3605 if (VT.getSizeInBits() <= 32 &&
3606 ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3607 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3608 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3609 "Unexpected sequence type");
3610 // If we can use the original VL with the modified element type, this
3611 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3612 // be moved into InsertVSETVLI?
3613 unsigned ViaVecLen =
3614 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3615 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3617 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3618 uint64_t SplatValue = 0;
3619 // Construct the amalgamated value at this larger vector type.
3620 for (const auto &OpIdx : enumerate(Op->op_values())) {
3621 const auto &SeqV = OpIdx.value();
3622 if (!SeqV.isUndef())
3623 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3624 << (OpIdx.index() * EltBitSize));
3627 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3628 // achieve better constant materializion.
3629 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3630 SplatValue = SignExtend64<32>(SplatValue);
3632 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3633 DAG.getUNDEF(ViaVecVT),
3634 DAG.getConstant(SplatValue, DL, XLenVT),
3635 DAG.getConstant(0, DL, XLenVT));
3636 if (ViaVecLen != 1)
3637 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3638 MVT::getVectorVT(ViaIntVT, 1), Vec,
3639 DAG.getConstant(0, DL, XLenVT));
3640 return DAG.getBitcast(VT, Vec);
3644 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3645 // when re-interpreted as a vector with a larger element type. For example,
3646 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3647 // could be instead splat as
3648 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3649 // TODO: This optimization could also work on non-constant splats, but it
3650 // would require bit-manipulation instructions to construct the splat value.
3651 SmallVector<SDValue> Sequence;
3652 const auto *BV = cast<BuildVectorSDNode>(Op);
3653 if (VT.isInteger() && EltBitSize < 64 &&
3654 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
3655 BV->getRepeatedSequence(Sequence) &&
3656 (Sequence.size() * EltBitSize) <= 64) {
3657 unsigned SeqLen = Sequence.size();
3658 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3659 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3660 ViaIntVT == MVT::i64) &&
3661 "Unexpected sequence type");
3663 // If we can use the original VL with the modified element type, this
3664 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3665 // be moved into InsertVSETVLI?
3666 const unsigned RequiredVL = NumElts / SeqLen;
3667 const unsigned ViaVecLen =
3668 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3669 NumElts : RequiredVL;
3670 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3672 unsigned EltIdx = 0;
3673 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3674 uint64_t SplatValue = 0;
3675 // Construct the amalgamated value which can be splatted as this larger
3676 // vector type.
3677 for (const auto &SeqV : Sequence) {
3678 if (!SeqV.isUndef())
3679 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3680 << (EltIdx * EltBitSize));
3681 EltIdx++;
3684 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3685 // achieve better constant materializion.
3686 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3687 SplatValue = SignExtend64<32>(SplatValue);
3689 // Since we can't introduce illegal i64 types at this stage, we can only
3690 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3691 // way we can use RVV instructions to splat.
3692 assert((ViaIntVT.bitsLE(XLenVT) ||
3693 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3694 "Unexpected bitcast sequence");
3695 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3696 SDValue ViaVL =
3697 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3698 MVT ViaContainerVT =
3699 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3700 SDValue Splat =
3701 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3702 DAG.getUNDEF(ViaContainerVT),
3703 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3704 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3705 if (ViaVecLen != RequiredVL)
3706 Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3707 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3708 DAG.getConstant(0, DL, XLenVT));
3709 return DAG.getBitcast(VT, Splat);
3713 // If the number of signbits allows, see if we can lower as a <N x i8>.
3714 // Our main goal here is to reduce LMUL (and thus work) required to
3715 // build the constant, but we will also narrow if the resulting
3716 // narrow vector is known to materialize cheaply.
3717 // TODO: We really should be costing the smaller vector. There are
3718 // profitable cases this misses.
3719 if (EltBitSize > 8 && VT.isInteger() &&
3720 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3721 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3722 if (EltBitSize - SignBits < 8) {
3723 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3724 DL, Op->ops());
3725 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3726 Source, DAG, Subtarget);
3727 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3728 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3732 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3733 return Res;
3735 // For constant vectors, use generic constant pool lowering. Otherwise,
3736 // we'd have to materialize constants in GPRs just to move them into the
3737 // vector.
3738 return SDValue();
3741 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3742 const RISCVSubtarget &Subtarget) {
3743 MVT VT = Op.getSimpleValueType();
3744 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3746 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3747 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
3748 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3750 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3752 SDLoc DL(Op);
3753 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3755 MVT XLenVT = Subtarget.getXLenVT();
3757 if (VT.getVectorElementType() == MVT::i1) {
3758 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3759 // vector type, we have a legal equivalently-sized i8 type, so we can use
3760 // that.
3761 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3762 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3764 SDValue WideVec;
3765 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3766 // For a splat, perform a scalar truncate before creating the wider
3767 // vector.
3768 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3769 DAG.getConstant(1, DL, Splat.getValueType()));
3770 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3771 } else {
3772 SmallVector<SDValue, 8> Ops(Op->op_values());
3773 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3774 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3775 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3778 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3781 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3782 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3783 return Gather;
3784 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3785 : RISCVISD::VMV_V_X_VL;
3786 if (!VT.isFloatingPoint())
3787 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3788 Splat =
3789 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3790 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3793 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3794 return Res;
3796 // If we're compiling for an exact VLEN value, we can split our work per
3797 // register in the register group.
3798 const unsigned MinVLen = Subtarget.getRealMinVLen();
3799 const unsigned MaxVLen = Subtarget.getRealMaxVLen();
3800 if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) {
3801 MVT ElemVT = VT.getVectorElementType();
3802 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
3803 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3804 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3805 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3806 assert(M1VT == getLMUL1VT(M1VT));
3808 // The following semantically builds up a fixed length concat_vector
3809 // of the component build_vectors. We eagerly lower to scalable and
3810 // insert_subvector here to avoid DAG combining it back to a large
3811 // build_vector.
3812 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3813 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3814 SDValue Vec = DAG.getUNDEF(ContainerVT);
3815 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3816 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3817 SDValue SubBV =
3818 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3819 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3820 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3821 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3822 DAG.getVectorIdxConstant(InsertIdx, DL));
3824 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3827 // Cap the cost at a value linear to the number of elements in the vector.
3828 // The default lowering is to use the stack. The vector store + scalar loads
3829 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3830 // being (at least) linear in LMUL. As a result, using the vslidedown
3831 // lowering for every element ends up being VL*LMUL..
3832 // TODO: Should we be directly costing the stack alternative? Doing so might
3833 // give us a more accurate upper bound.
3834 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3836 // TODO: unify with TTI getSlideCost.
3837 InstructionCost PerSlideCost = 1;
3838 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3839 default: break;
3840 case RISCVII::VLMUL::LMUL_2:
3841 PerSlideCost = 2;
3842 break;
3843 case RISCVII::VLMUL::LMUL_4:
3844 PerSlideCost = 4;
3845 break;
3846 case RISCVII::VLMUL::LMUL_8:
3847 PerSlideCost = 8;
3848 break;
3851 // TODO: Should we be using the build instseq then cost + evaluate scheme
3852 // we use for integer constants here?
3853 unsigned UndefCount = 0;
3854 for (const SDValue &V : Op->ops()) {
3855 if (V.isUndef()) {
3856 UndefCount++;
3857 continue;
3859 if (UndefCount) {
3860 LinearBudget -= PerSlideCost;
3861 UndefCount = 0;
3863 LinearBudget -= PerSlideCost;
3865 if (UndefCount) {
3866 LinearBudget -= PerSlideCost;
3869 if (LinearBudget < 0)
3870 return SDValue();
3872 assert((!VT.isFloatingPoint() ||
3873 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3874 "Illegal type which will result in reserved encoding");
3876 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3878 SDValue Vec;
3879 UndefCount = 0;
3880 for (SDValue V : Op->ops()) {
3881 if (V.isUndef()) {
3882 UndefCount++;
3883 continue;
3886 // Start our sequence with a TA splat in the hopes that hardware is able to
3887 // recognize there's no dependency on the prior value of our temporary
3888 // register.
3889 if (!Vec) {
3890 Vec = DAG.getSplatVector(VT, DL, V);
3891 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3892 UndefCount = 0;
3893 continue;
3896 if (UndefCount) {
3897 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3898 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3899 Vec, Offset, Mask, VL, Policy);
3900 UndefCount = 0;
3902 auto OpCode =
3903 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3904 if (!VT.isFloatingPoint())
3905 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
3906 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3907 V, Mask, VL);
3909 if (UndefCount) {
3910 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3911 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3912 Vec, Offset, Mask, VL, Policy);
3914 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3917 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3918 SDValue Lo, SDValue Hi, SDValue VL,
3919 SelectionDAG &DAG) {
3920 if (!Passthru)
3921 Passthru = DAG.getUNDEF(VT);
3922 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3923 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3924 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3925 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3926 // node in order to try and match RVV vector/scalar instructions.
3927 if ((LoC >> 31) == HiC)
3928 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3930 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3931 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3932 // vlmax vsetvli or vsetivli to change the VL.
3933 // FIXME: Support larger constants?
3934 // FIXME: Support non-constant VLs by saturating?
3935 if (LoC == HiC) {
3936 SDValue NewVL;
3937 if (isAllOnesConstant(VL) ||
3938 (isa<RegisterSDNode>(VL) &&
3939 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
3940 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
3941 else if (isa<ConstantSDNode>(VL) &&
3942 isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
3943 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
3945 if (NewVL) {
3946 MVT InterVT =
3947 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3948 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
3949 DAG.getUNDEF(InterVT), Lo,
3950 DAG.getRegister(RISCV::X0, MVT::i32));
3951 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3956 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3957 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3958 isa<ConstantSDNode>(Hi.getOperand(1)) &&
3959 Hi.getConstantOperandVal(1) == 31)
3960 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3962 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3963 // even if it might be sign extended.
3964 if (Hi.isUndef())
3965 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3967 // Fall back to a stack store and stride x0 vector load.
3968 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3969 Hi, VL);
3972 // Called by type legalization to handle splat of i64 on RV32.
3973 // FIXME: We can optimize this when the type has sign or zero bits in one
3974 // of the halves.
3975 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3976 SDValue Scalar, SDValue VL,
3977 SelectionDAG &DAG) {
3978 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3979 SDValue Lo, Hi;
3980 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3981 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3984 // This function lowers a splat of a scalar operand Splat with the vector
3985 // length VL. It ensures the final sequence is type legal, which is useful when
3986 // lowering a splat after type legalization.
3987 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3988 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3989 const RISCVSubtarget &Subtarget) {
3990 bool HasPassthru = Passthru && !Passthru.isUndef();
3991 if (!HasPassthru && !Passthru)
3992 Passthru = DAG.getUNDEF(VT);
3993 if (VT.isFloatingPoint())
3994 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
3996 MVT XLenVT = Subtarget.getXLenVT();
3998 // Simplest case is that the operand needs to be promoted to XLenVT.
3999 if (Scalar.getValueType().bitsLE(XLenVT)) {
4000 // If the operand is a constant, sign extend to increase our chances
4001 // of being able to use a .vi instruction. ANY_EXTEND would become a
4002 // a zero extend and the simm5 check in isel would fail.
4003 // FIXME: Should we ignore the upper bits in isel instead?
4004 unsigned ExtOpc =
4005 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4006 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4007 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4010 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4011 "Unexpected scalar for splat lowering!");
4013 if (isOneConstant(VL) && isNullConstant(Scalar))
4014 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4015 DAG.getConstant(0, DL, XLenVT), VL);
4017 // Otherwise use the more complicated splatting algorithm.
4018 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4021 // This function lowers an insert of a scalar operand Scalar into lane
4022 // 0 of the vector regardless of the value of VL. The contents of the
4023 // remaining lanes of the result vector are unspecified. VL is assumed
4024 // to be non-zero.
4025 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
4026 const SDLoc &DL, SelectionDAG &DAG,
4027 const RISCVSubtarget &Subtarget) {
4028 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4030 const MVT XLenVT = Subtarget.getXLenVT();
4031 SDValue Passthru = DAG.getUNDEF(VT);
4033 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4034 isNullConstant(Scalar.getOperand(1))) {
4035 SDValue ExtractedVal = Scalar.getOperand(0);
4036 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4037 MVT ExtractedContainerVT = ExtractedVT;
4038 if (ExtractedContainerVT.isFixedLengthVector()) {
4039 ExtractedContainerVT = getContainerForFixedLengthVector(
4040 DAG, ExtractedContainerVT, Subtarget);
4041 ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,
4042 DAG, Subtarget);
4044 if (ExtractedContainerVT.bitsLE(VT))
4045 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,
4046 DAG.getConstant(0, DL, XLenVT));
4047 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4048 DAG.getConstant(0, DL, XLenVT));
4052 if (VT.isFloatingPoint())
4053 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4054 DAG.getUNDEF(VT), Scalar, VL);
4056 // Avoid the tricky legalization cases by falling back to using the
4057 // splat code which already handles it gracefully.
4058 if (!Scalar.getValueType().bitsLE(XLenVT))
4059 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4060 DAG.getConstant(1, DL, XLenVT),
4061 VT, DL, DAG, Subtarget);
4063 // If the operand is a constant, sign extend to increase our chances
4064 // of being able to use a .vi instruction. ANY_EXTEND would become a
4065 // a zero extend and the simm5 check in isel would fail.
4066 // FIXME: Should we ignore the upper bits in isel instead?
4067 unsigned ExtOpc =
4068 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4069 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4070 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4071 DAG.getUNDEF(VT), Scalar, VL);
4074 // Is this a shuffle extracts either the even or odd elements of a vector?
4075 // That is, specifically, either (a) or (b) below.
4076 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
4077 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
4078 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4079 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4080 // Returns {Src Vector, Even Elements} om success
4081 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4082 SDValue V2, ArrayRef<int> Mask,
4083 const RISCVSubtarget &Subtarget) {
4084 // Need to be able to widen the vector.
4085 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4086 return false;
4088 // Both input must be extracts.
4089 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4090 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4091 return false;
4093 // Extracting from the same source.
4094 SDValue Src = V1.getOperand(0);
4095 if (Src != V2.getOperand(0))
4096 return false;
4098 // Src needs to have twice the number of elements.
4099 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4100 return false;
4102 // The extracts must extract the two halves of the source.
4103 if (V1.getConstantOperandVal(1) != 0 ||
4104 V2.getConstantOperandVal(1) != Mask.size())
4105 return false;
4107 // First index must be the first even or odd element from V1.
4108 if (Mask[0] != 0 && Mask[0] != 1)
4109 return false;
4111 // The others must increase by 2 each time.
4112 // TODO: Support undef elements?
4113 for (unsigned i = 1; i != Mask.size(); ++i)
4114 if (Mask[i] != Mask[i - 1] + 2)
4115 return false;
4117 return true;
4120 /// Is this shuffle interleaving contiguous elements from one vector into the
4121 /// even elements and contiguous elements from another vector into the odd
4122 /// elements. \p EvenSrc will contain the element that should be in the first
4123 /// even element. \p OddSrc will contain the element that should be in the first
4124 /// odd element. These can be the first element in a source or the element half
4125 /// way through the source.
4126 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4127 int &OddSrc, const RISCVSubtarget &Subtarget) {
4128 // We need to be able to widen elements to the next larger integer type.
4129 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4130 return false;
4132 int Size = Mask.size();
4133 int NumElts = VT.getVectorNumElements();
4134 assert(Size == (int)NumElts && "Unexpected mask size");
4136 SmallVector<unsigned, 2> StartIndexes;
4137 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4138 return false;
4140 EvenSrc = StartIndexes[0];
4141 OddSrc = StartIndexes[1];
4143 // One source should be low half of first vector.
4144 if (EvenSrc != 0 && OddSrc != 0)
4145 return false;
4147 // Subvectors will be subtracted from either at the start of the two input
4148 // vectors, or at the start and middle of the first vector if it's an unary
4149 // interleave.
4150 // In both cases, HalfNumElts will be extracted.
4151 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4152 // we'll create an illegal extract_subvector.
4153 // FIXME: We could support other values using a slidedown first.
4154 int HalfNumElts = NumElts / 2;
4155 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4158 /// Match shuffles that concatenate two vectors, rotate the concatenation,
4159 /// and then extract the original number of elements from the rotated result.
4160 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4161 /// returned rotation amount is for a rotate right, where elements move from
4162 /// higher elements to lower elements. \p LoSrc indicates the first source
4163 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4164 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4165 /// 0 or 1 if a rotation is found.
4167 /// NOTE: We talk about rotate to the right which matches how bit shift and
4168 /// rotate instructions are described where LSBs are on the right, but LLVM IR
4169 /// and the table below write vectors with the lowest elements on the left.
4170 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4171 int Size = Mask.size();
4173 // We need to detect various ways of spelling a rotation:
4174 // [11, 12, 13, 14, 15, 0, 1, 2]
4175 // [-1, 12, 13, 14, -1, -1, 1, -1]
4176 // [-1, -1, -1, -1, -1, -1, 1, 2]
4177 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4178 // [-1, 4, 5, 6, -1, -1, 9, -1]
4179 // [-1, 4, 5, 6, -1, -1, -1, -1]
4180 int Rotation = 0;
4181 LoSrc = -1;
4182 HiSrc = -1;
4183 for (int i = 0; i != Size; ++i) {
4184 int M = Mask[i];
4185 if (M < 0)
4186 continue;
4188 // Determine where a rotate vector would have started.
4189 int StartIdx = i - (M % Size);
4190 // The identity rotation isn't interesting, stop.
4191 if (StartIdx == 0)
4192 return -1;
4194 // If we found the tail of a vector the rotation must be the missing
4195 // front. If we found the head of a vector, it must be how much of the
4196 // head.
4197 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4199 if (Rotation == 0)
4200 Rotation = CandidateRotation;
4201 else if (Rotation != CandidateRotation)
4202 // The rotations don't match, so we can't match this mask.
4203 return -1;
4205 // Compute which value this mask is pointing at.
4206 int MaskSrc = M < Size ? 0 : 1;
4208 // Compute which of the two target values this index should be assigned to.
4209 // This reflects whether the high elements are remaining or the low elemnts
4210 // are remaining.
4211 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4213 // Either set up this value if we've not encountered it before, or check
4214 // that it remains consistent.
4215 if (TargetSrc < 0)
4216 TargetSrc = MaskSrc;
4217 else if (TargetSrc != MaskSrc)
4218 // This may be a rotation, but it pulls from the inputs in some
4219 // unsupported interleaving.
4220 return -1;
4223 // Check that we successfully analyzed the mask, and normalize the results.
4224 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4225 assert((LoSrc >= 0 || HiSrc >= 0) &&
4226 "Failed to find a rotated input vector!");
4228 return Rotation;
4231 // Lower a deinterleave shuffle to vnsrl.
4232 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4233 // -> [p, q, r, s] (EvenElts == false)
4234 // VT is the type of the vector to return, <[vscale x ]n x ty>
4235 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4236 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
4237 bool EvenElts,
4238 const RISCVSubtarget &Subtarget,
4239 SelectionDAG &DAG) {
4240 // The result is a vector of type <m x n x ty>
4241 MVT ContainerVT = VT;
4242 // Convert fixed vectors to scalable if needed
4243 if (ContainerVT.isFixedLengthVector()) {
4244 assert(Src.getSimpleValueType().isFixedLengthVector());
4245 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4247 // The source is a vector of type <m x n*2 x ty>
4248 MVT SrcContainerVT =
4249 MVT::getVectorVT(ContainerVT.getVectorElementType(),
4250 ContainerVT.getVectorElementCount() * 2);
4251 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4254 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4256 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4257 // This also converts FP to int.
4258 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4259 MVT WideSrcContainerVT = MVT::getVectorVT(
4260 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4261 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4263 // The integer version of the container type.
4264 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4266 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4267 // the original element size.
4268 unsigned Shift = EvenElts ? 0 : EltBits;
4269 SDValue SplatShift = DAG.getNode(
4270 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4271 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4272 SDValue Res =
4273 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4274 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4275 // Cast back to FP if needed.
4276 Res = DAG.getBitcast(ContainerVT, Res);
4278 if (VT.isFixedLengthVector())
4279 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4280 return Res;
4283 // Lower the following shuffle to vslidedown.
4284 // a)
4285 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
4286 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
4287 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4288 // b)
4289 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
4290 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
4291 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
4292 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
4293 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
4294 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4295 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
4296 SDValue V1, SDValue V2,
4297 ArrayRef<int> Mask,
4298 const RISCVSubtarget &Subtarget,
4299 SelectionDAG &DAG) {
4300 auto findNonEXTRACT_SUBVECTORParent =
4301 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4302 uint64_t Offset = 0;
4303 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4304 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4305 // a scalable vector. But we don't want to match the case.
4306 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4307 Offset += Parent.getConstantOperandVal(1);
4308 Parent = Parent.getOperand(0);
4310 return std::make_pair(Parent, Offset);
4313 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4314 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4316 // Extracting from the same source.
4317 SDValue Src = V1Src;
4318 if (Src != V2Src)
4319 return SDValue();
4321 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4322 SmallVector<int, 16> NewMask(Mask);
4323 for (size_t i = 0; i != NewMask.size(); ++i) {
4324 if (NewMask[i] == -1)
4325 continue;
4327 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4328 NewMask[i] = NewMask[i] + V1IndexOffset;
4329 } else {
4330 // Minus NewMask.size() is needed. Otherwise, the b case would be
4331 // <5,6,7,12> instead of <5,6,7,8>.
4332 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4336 // First index must be known and non-zero. It will be used as the slidedown
4337 // amount.
4338 if (NewMask[0] <= 0)
4339 return SDValue();
4341 // NewMask is also continuous.
4342 for (unsigned i = 1; i != NewMask.size(); ++i)
4343 if (NewMask[i - 1] + 1 != NewMask[i])
4344 return SDValue();
4346 MVT XLenVT = Subtarget.getXLenVT();
4347 MVT SrcVT = Src.getSimpleValueType();
4348 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4349 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4350 SDValue Slidedown =
4351 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4352 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4353 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4354 return DAG.getNode(
4355 ISD::EXTRACT_SUBVECTOR, DL, VT,
4356 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4357 DAG.getConstant(0, DL, XLenVT));
4360 // Because vslideup leaves the destination elements at the start intact, we can
4361 // use it to perform shuffles that insert subvectors:
4363 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4364 // ->
4365 // vsetvli zero, 8, e8, mf2, ta, ma
4366 // vslideup.vi v8, v9, 4
4368 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4369 // ->
4370 // vsetvli zero, 5, e8, mf2, tu, ma
4371 // vslideup.v1 v8, v9, 2
4372 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
4373 SDValue V1, SDValue V2,
4374 ArrayRef<int> Mask,
4375 const RISCVSubtarget &Subtarget,
4376 SelectionDAG &DAG) {
4377 unsigned NumElts = VT.getVectorNumElements();
4378 int NumSubElts, Index;
4379 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4380 Index))
4381 return SDValue();
4383 bool OpsSwapped = Mask[Index] < (int)NumElts;
4384 SDValue InPlace = OpsSwapped ? V2 : V1;
4385 SDValue ToInsert = OpsSwapped ? V1 : V2;
4387 MVT XLenVT = Subtarget.getXLenVT();
4388 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4389 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4390 // We slide up by the index that the subvector is being inserted at, and set
4391 // VL to the index + the number of elements being inserted.
4392 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
4393 // If the we're adding a suffix to the in place vector, i.e. inserting right
4394 // up to the very end of it, then we don't actually care about the tail.
4395 if (NumSubElts + Index >= (int)NumElts)
4396 Policy |= RISCVII::TAIL_AGNOSTIC;
4398 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4399 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4400 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4402 SDValue Res;
4403 // If we're inserting into the lowest elements, use a tail undisturbed
4404 // vmv.v.v.
4405 if (Index == 0)
4406 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4407 VL);
4408 else
4409 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4410 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4411 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4414 /// Match v(f)slide1up/down idioms. These operations involve sliding
4415 /// N-1 elements to make room for an inserted scalar at one end.
4416 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
4417 SDValue V1, SDValue V2,
4418 ArrayRef<int> Mask,
4419 const RISCVSubtarget &Subtarget,
4420 SelectionDAG &DAG) {
4421 bool OpsSwapped = false;
4422 if (!isa<BuildVectorSDNode>(V1)) {
4423 if (!isa<BuildVectorSDNode>(V2))
4424 return SDValue();
4425 std::swap(V1, V2);
4426 OpsSwapped = true;
4428 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4429 if (!Splat)
4430 return SDValue();
4432 // Return true if the mask could describe a slide of Mask.size() - 1
4433 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4434 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4435 const unsigned S = (Offset > 0) ? 0 : -Offset;
4436 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4437 for (unsigned i = S; i != E; ++i)
4438 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4439 return false;
4440 return true;
4443 const unsigned NumElts = VT.getVectorNumElements();
4444 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4445 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4446 return SDValue();
4448 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4449 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4450 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4451 return SDValue();
4453 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4454 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4455 auto OpCode = IsVSlidedown ?
4456 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
4457 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
4458 if (!VT.isFloatingPoint())
4459 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4460 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4461 DAG.getUNDEF(ContainerVT),
4462 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4463 Splat, TrueMask, VL);
4464 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4467 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4468 // to create an interleaved vector of <[vscale x] n*2 x ty>.
4469 // This requires that the size of ty is less than the subtarget's maximum ELEN.
4470 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4471 const SDLoc &DL, SelectionDAG &DAG,
4472 const RISCVSubtarget &Subtarget) {
4473 MVT VecVT = EvenV.getSimpleValueType();
4474 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4475 // Convert fixed vectors to scalable if needed
4476 if (VecContainerVT.isFixedLengthVector()) {
4477 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4478 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4479 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4482 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4484 // We're working with a vector of the same size as the resulting
4485 // interleaved vector, but with half the number of elements and
4486 // twice the SEW (Hence the restriction on not using the maximum
4487 // ELEN)
4488 MVT WideVT =
4489 MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
4490 VecVT.getVectorElementCount());
4491 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4492 if (WideContainerVT.isFixedLengthVector())
4493 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4495 // Bitcast the input vectors to integers in case they are FP
4496 VecContainerVT = VecContainerVT.changeTypeToInteger();
4497 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4498 OddV = DAG.getBitcast(VecContainerVT, OddV);
4500 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4501 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4503 SDValue Interleaved;
4504 if (Subtarget.hasStdExtZvbb()) {
4505 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4506 SDValue OffsetVec =
4507 DAG.getSplatVector(VecContainerVT, DL,
4508 DAG.getConstant(VecVT.getScalarSizeInBits(), DL,
4509 Subtarget.getXLenVT()));
4510 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4511 OffsetVec, Passthru, Mask, VL);
4512 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4513 Interleaved, EvenV, Passthru, Mask, VL);
4514 } else {
4515 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4516 // vwaddu.vv
4517 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4518 OddV, Passthru, Mask, VL);
4520 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4521 SDValue AllOnesVec = DAG.getSplatVector(
4522 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4523 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4524 OddV, AllOnesVec, Passthru, Mask, VL);
4526 // Add the two together so we get
4527 // (OddV * 0xff...ff) + (OddV + EvenV)
4528 // = (OddV * 0x100...00) + EvenV
4529 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4530 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4531 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4532 Interleaved, OddsMul, Passthru, Mask, VL);
4535 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4536 MVT ResultContainerVT = MVT::getVectorVT(
4537 VecVT.getVectorElementType(), // Make sure to use original type
4538 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4539 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4541 // Convert back to a fixed vector if needed
4542 MVT ResultVT =
4543 MVT::getVectorVT(VecVT.getVectorElementType(),
4544 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
4545 if (ResultVT.isFixedLengthVector())
4546 Interleaved =
4547 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4549 return Interleaved;
4552 // If we have a vector of bits that we want to reverse, we can use a vbrev on a
4553 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4554 static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
4555 SelectionDAG &DAG,
4556 const RISCVSubtarget &Subtarget) {
4557 SDLoc DL(SVN);
4558 MVT VT = SVN->getSimpleValueType(0);
4559 SDValue V = SVN->getOperand(0);
4560 unsigned NumElts = VT.getVectorNumElements();
4562 assert(VT.getVectorElementType() == MVT::i1);
4564 if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),
4565 SVN->getMask().size()) ||
4566 !SVN->getOperand(1).isUndef())
4567 return SDValue();
4569 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4570 EVT ViaVT = EVT::getVectorVT(
4571 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4572 EVT ViaBitVT =
4573 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4575 // If we don't have zvbb or the larger element type > ELEN, the operation will
4576 // be illegal.
4577 if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,
4578 ViaVT) ||
4579 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4580 return SDValue();
4582 // If the bit vector doesn't fit exactly into the larger element type, we need
4583 // to insert it into the larger vector and then shift up the reversed bits
4584 // afterwards to get rid of the gap introduced.
4585 if (ViaEltSize > NumElts)
4586 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4587 V, DAG.getVectorIdxConstant(0, DL));
4589 SDValue Res =
4590 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4592 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4593 // element type.
4594 if (ViaEltSize > NumElts)
4595 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4596 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4598 Res = DAG.getBitcast(ViaBitVT, Res);
4600 if (ViaEltSize > NumElts)
4601 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4602 DAG.getVectorIdxConstant(0, DL));
4603 return Res;
4606 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4607 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4608 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4609 static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
4610 SelectionDAG &DAG,
4611 const RISCVSubtarget &Subtarget) {
4612 SDLoc DL(SVN);
4614 EVT VT = SVN->getValueType(0);
4615 unsigned NumElts = VT.getVectorNumElements();
4616 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4617 unsigned NumSubElts, RotateAmt;
4618 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4619 NumElts, NumSubElts, RotateAmt))
4620 return SDValue();
4621 MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4622 NumElts / NumSubElts);
4624 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4625 if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
4626 return SDValue();
4628 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4630 SDValue Rotate;
4631 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4632 // so canonicalize to vrev8.
4633 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4634 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4635 else
4636 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4637 DAG.getConstant(RotateAmt, DL, RotateVT));
4639 return DAG.getBitcast(VT, Rotate);
4642 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4643 const RISCVSubtarget &Subtarget) {
4644 SDValue V1 = Op.getOperand(0);
4645 SDValue V2 = Op.getOperand(1);
4646 SDLoc DL(Op);
4647 MVT XLenVT = Subtarget.getXLenVT();
4648 MVT VT = Op.getSimpleValueType();
4649 unsigned NumElts = VT.getVectorNumElements();
4650 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4652 if (VT.getVectorElementType() == MVT::i1) {
4653 // Lower to a vror.vi of a larger element type if possible before we promote
4654 // i1s to i8s.
4655 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4656 return V;
4657 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4658 return V;
4660 // Promote i1 shuffle to i8 shuffle.
4661 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4662 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4663 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4664 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4665 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4666 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4667 ISD::SETNE);
4670 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4672 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4674 if (SVN->isSplat()) {
4675 const int Lane = SVN->getSplatIndex();
4676 if (Lane >= 0) {
4677 MVT SVT = VT.getVectorElementType();
4679 // Turn splatted vector load into a strided load with an X0 stride.
4680 SDValue V = V1;
4681 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4682 // with undef.
4683 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4684 int Offset = Lane;
4685 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4686 int OpElements =
4687 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4688 V = V.getOperand(Offset / OpElements);
4689 Offset %= OpElements;
4692 // We need to ensure the load isn't atomic or volatile.
4693 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4694 auto *Ld = cast<LoadSDNode>(V);
4695 Offset *= SVT.getStoreSize();
4696 SDValue NewAddr = DAG.getMemBasePlusOffset(
4697 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4699 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4700 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4701 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4702 SDValue IntID =
4703 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4704 SDValue Ops[] = {Ld->getChain(),
4705 IntID,
4706 DAG.getUNDEF(ContainerVT),
4707 NewAddr,
4708 DAG.getRegister(RISCV::X0, XLenVT),
4709 VL};
4710 SDValue NewLoad = DAG.getMemIntrinsicNode(
4711 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4712 DAG.getMachineFunction().getMachineMemOperand(
4713 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4714 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4715 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4718 // Otherwise use a scalar load and splat. This will give the best
4719 // opportunity to fold a splat into the operation. ISel can turn it into
4720 // the x0 strided load if we aren't able to fold away the select.
4721 if (SVT.isFloatingPoint())
4722 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4723 Ld->getPointerInfo().getWithOffset(Offset),
4724 Ld->getOriginalAlign(),
4725 Ld->getMemOperand()->getFlags());
4726 else
4727 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4728 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4729 Ld->getOriginalAlign(),
4730 Ld->getMemOperand()->getFlags());
4731 DAG.makeEquivalentMemoryOrdering(Ld, V);
4733 unsigned Opc =
4734 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4735 SDValue Splat =
4736 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4737 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4740 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4741 assert(Lane < (int)NumElts && "Unexpected lane!");
4742 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4743 V1, DAG.getConstant(Lane, DL, XLenVT),
4744 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4745 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4749 ArrayRef<int> Mask = SVN->getMask();
4751 if (SDValue V =
4752 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
4753 return V;
4755 if (SDValue V =
4756 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
4757 return V;
4759 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
4760 // available.
4761 if (Subtarget.hasStdExtZvkb())
4762 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4763 return V;
4765 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4766 // be undef which can be handled with a single SLIDEDOWN/UP.
4767 int LoSrc, HiSrc;
4768 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
4769 if (Rotation > 0) {
4770 SDValue LoV, HiV;
4771 if (LoSrc >= 0) {
4772 LoV = LoSrc == 0 ? V1 : V2;
4773 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
4775 if (HiSrc >= 0) {
4776 HiV = HiSrc == 0 ? V1 : V2;
4777 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
4780 // We found a rotation. We need to slide HiV down by Rotation. Then we need
4781 // to slide LoV up by (NumElts - Rotation).
4782 unsigned InvRotate = NumElts - Rotation;
4784 SDValue Res = DAG.getUNDEF(ContainerVT);
4785 if (HiV) {
4786 // Even though we could use a smaller VL, don't to avoid a vsetivli
4787 // toggle.
4788 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
4789 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
4791 if (LoV)
4792 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
4793 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
4794 RISCVII::TAIL_AGNOSTIC);
4796 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4799 // If this is a deinterleave and we can widen the vector, then we can use
4800 // vnsrl to deinterleave.
4801 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
4802 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
4803 Subtarget, DAG);
4806 if (SDValue V =
4807 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
4808 return V;
4810 // Detect an interleave shuffle and lower to
4811 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4812 int EvenSrc, OddSrc;
4813 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
4814 // Extract the halves of the vectors.
4815 MVT HalfVT = VT.getHalfNumVectorElementsVT();
4817 int Size = Mask.size();
4818 SDValue EvenV, OddV;
4819 assert(EvenSrc >= 0 && "Undef source?");
4820 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
4821 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
4822 DAG.getConstant(EvenSrc % Size, DL, XLenVT));
4824 assert(OddSrc >= 0 && "Undef source?");
4825 OddV = (OddSrc / Size) == 0 ? V1 : V2;
4826 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
4827 DAG.getConstant(OddSrc % Size, DL, XLenVT));
4829 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
4832 // Detect shuffles which can be re-expressed as vector selects; these are
4833 // shuffles in which each element in the destination is taken from an element
4834 // at the corresponding index in either source vectors.
4835 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4836 int MaskIndex = MaskIdx.value();
4837 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4840 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4842 SmallVector<SDValue> MaskVals;
4843 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4844 // merged with a second vrgather.
4845 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4847 // By default we preserve the original operand order, and use a mask to
4848 // select LHS as true and RHS as false. However, since RVV vector selects may
4849 // feature splats but only on the LHS, we may choose to invert our mask and
4850 // instead select between RHS and LHS.
4851 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
4852 bool InvertMask = IsSelect == SwapOps;
4854 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4855 // half.
4856 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4858 // Now construct the mask that will be used by the vselect or blended
4859 // vrgather operation. For vrgathers, construct the appropriate indices into
4860 // each vector.
4861 for (int MaskIndex : Mask) {
4862 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
4863 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4864 if (!IsSelect) {
4865 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4866 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4867 ? DAG.getConstant(MaskIndex, DL, XLenVT)
4868 : DAG.getUNDEF(XLenVT));
4869 GatherIndicesRHS.push_back(
4870 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4871 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4872 if (IsLHSOrUndefIndex && MaskIndex >= 0)
4873 ++LHSIndexCounts[MaskIndex];
4874 if (!IsLHSOrUndefIndex)
4875 ++RHSIndexCounts[MaskIndex - NumElts];
4879 if (SwapOps) {
4880 std::swap(V1, V2);
4881 std::swap(GatherIndicesLHS, GatherIndicesRHS);
4884 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4885 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4886 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4888 if (IsSelect)
4889 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4891 // We might be able to express the shuffle as a bitrotate. But even if we
4892 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
4893 // shifts and a vor will have a higher throughput than a vrgather.
4894 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4895 return V;
4897 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
4898 // On such a large vector we're unable to use i8 as the index type.
4899 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4900 // may involve vector splitting if we're already at LMUL=8, or our
4901 // user-supplied maximum fixed-length LMUL.
4902 return SDValue();
4905 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
4906 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
4907 MVT IndexVT = VT.changeTypeToInteger();
4908 // Since we can't introduce illegal index types at this stage, use i16 and
4909 // vrgatherei16 if the corresponding index type for plain vrgather is greater
4910 // than XLenVT.
4911 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
4912 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4913 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4916 // If the mask allows, we can do all the index computation in 16 bits. This
4917 // requires less work and less register pressure at high LMUL, and creates
4918 // smaller constants which may be cheaper to materialize.
4919 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
4920 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
4921 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4922 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4925 MVT IndexContainerVT =
4926 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
4928 SDValue Gather;
4929 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
4930 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
4931 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
4932 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
4933 Subtarget);
4934 } else {
4935 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4936 // If only one index is used, we can use a "splat" vrgather.
4937 // TODO: We can splat the most-common index and fix-up any stragglers, if
4938 // that's beneficial.
4939 if (LHSIndexCounts.size() == 1) {
4940 int SplatIndex = LHSIndexCounts.begin()->getFirst();
4941 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
4942 DAG.getConstant(SplatIndex, DL, XLenVT),
4943 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4944 } else {
4945 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
4946 LHSIndices =
4947 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
4949 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
4950 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4954 // If a second vector operand is used by this shuffle, blend it in with an
4955 // additional vrgather.
4956 if (!V2.isUndef()) {
4957 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4959 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4960 SelectMask =
4961 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
4963 // If only one index is used, we can use a "splat" vrgather.
4964 // TODO: We can splat the most-common index and fix-up any stragglers, if
4965 // that's beneficial.
4966 if (RHSIndexCounts.size() == 1) {
4967 int SplatIndex = RHSIndexCounts.begin()->getFirst();
4968 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
4969 DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
4970 SelectMask, VL);
4971 } else {
4972 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
4973 RHSIndices =
4974 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
4975 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
4976 SelectMask, VL);
4980 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4983 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
4984 // Support splats for any type. These should type legalize well.
4985 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
4986 return true;
4988 // Only support legal VTs for other shuffles for now.
4989 if (!isTypeLegal(VT))
4990 return false;
4992 MVT SVT = VT.getSimpleVT();
4994 // Not for i1 vectors.
4995 if (SVT.getScalarType() == MVT::i1)
4996 return false;
4998 int Dummy1, Dummy2;
4999 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5000 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5003 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5004 // the exponent.
5005 SDValue
5006 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5007 SelectionDAG &DAG) const {
5008 MVT VT = Op.getSimpleValueType();
5009 unsigned EltSize = VT.getScalarSizeInBits();
5010 SDValue Src = Op.getOperand(0);
5011 SDLoc DL(Op);
5012 MVT ContainerVT = VT;
5014 SDValue Mask, VL;
5015 if (Op->isVPOpcode()) {
5016 Mask = Op.getOperand(1);
5017 if (VT.isFixedLengthVector())
5018 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5019 Subtarget);
5020 VL = Op.getOperand(2);
5023 // We choose FP type that can represent the value if possible. Otherwise, we
5024 // use rounding to zero conversion for correct exponent of the result.
5025 // TODO: Use f16 for i8 when possible?
5026 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5027 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5028 FloatEltVT = MVT::f32;
5029 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5031 // Legal types should have been checked in the RISCVTargetLowering
5032 // constructor.
5033 // TODO: Splitting may make sense in some cases.
5034 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5035 "Expected legal float type!");
5037 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5038 // The trailing zero count is equal to log2 of this single bit value.
5039 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5040 SDValue Neg = DAG.getNegative(Src, DL, VT);
5041 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5042 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5043 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5044 Src, Mask, VL);
5045 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5048 // We have a legal FP type, convert to it.
5049 SDValue FloatVal;
5050 if (FloatVT.bitsGT(VT)) {
5051 if (Op->isVPOpcode())
5052 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5053 else
5054 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5055 } else {
5056 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5057 if (VT.isFixedLengthVector()) {
5058 ContainerVT = getContainerForFixedLengthVector(VT);
5059 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5061 if (!Op->isVPOpcode())
5062 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5063 SDValue RTZRM =
5064 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
5065 MVT ContainerFloatVT =
5066 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5067 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5068 Src, Mask, RTZRM, VL);
5069 if (VT.isFixedLengthVector())
5070 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5072 // Bitcast to integer and shift the exponent to the LSB.
5073 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5074 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5075 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5077 SDValue Exp;
5078 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5079 if (Op->isVPOpcode()) {
5080 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5081 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5082 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5083 } else {
5084 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5085 DAG.getConstant(ShiftAmt, DL, IntVT));
5086 if (IntVT.bitsLT(VT))
5087 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5088 else if (IntVT.bitsGT(VT))
5089 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5092 // The exponent contains log2 of the value in biased form.
5093 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5094 // For trailing zeros, we just need to subtract the bias.
5095 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5096 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5097 DAG.getConstant(ExponentBias, DL, VT));
5098 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5099 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5100 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5102 // For leading zeros, we need to remove the bias and convert from log2 to
5103 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5104 unsigned Adjust = ExponentBias + (EltSize - 1);
5105 SDValue Res;
5106 if (Op->isVPOpcode())
5107 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5108 Mask, VL);
5109 else
5110 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5112 // The above result with zero input equals to Adjust which is greater than
5113 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5114 if (Op.getOpcode() == ISD::CTLZ)
5115 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5116 else if (Op.getOpcode() == ISD::VP_CTLZ)
5117 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5118 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5119 return Res;
5122 // While RVV has alignment restrictions, we should always be able to load as a
5123 // legal equivalently-sized byte-typed vector instead. This method is
5124 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5125 // the load is already correctly-aligned, it returns SDValue().
5126 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5127 SelectionDAG &DAG) const {
5128 auto *Load = cast<LoadSDNode>(Op);
5129 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5131 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5132 Load->getMemoryVT(),
5133 *Load->getMemOperand()))
5134 return SDValue();
5136 SDLoc DL(Op);
5137 MVT VT = Op.getSimpleValueType();
5138 unsigned EltSizeBits = VT.getScalarSizeInBits();
5139 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5140 "Unexpected unaligned RVV load type");
5141 MVT NewVT =
5142 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5143 assert(NewVT.isValid() &&
5144 "Expecting equally-sized RVV vector types to be legal");
5145 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5146 Load->getPointerInfo(), Load->getOriginalAlign(),
5147 Load->getMemOperand()->getFlags());
5148 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5151 // While RVV has alignment restrictions, we should always be able to store as a
5152 // legal equivalently-sized byte-typed vector instead. This method is
5153 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5154 // returns SDValue() if the store is already correctly aligned.
5155 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5156 SelectionDAG &DAG) const {
5157 auto *Store = cast<StoreSDNode>(Op);
5158 assert(Store && Store->getValue().getValueType().isVector() &&
5159 "Expected vector store");
5161 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5162 Store->getMemoryVT(),
5163 *Store->getMemOperand()))
5164 return SDValue();
5166 SDLoc DL(Op);
5167 SDValue StoredVal = Store->getValue();
5168 MVT VT = StoredVal.getSimpleValueType();
5169 unsigned EltSizeBits = VT.getScalarSizeInBits();
5170 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5171 "Unexpected unaligned RVV store type");
5172 MVT NewVT =
5173 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5174 assert(NewVT.isValid() &&
5175 "Expecting equally-sized RVV vector types to be legal");
5176 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5177 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5178 Store->getPointerInfo(), Store->getOriginalAlign(),
5179 Store->getMemOperand()->getFlags());
5182 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
5183 const RISCVSubtarget &Subtarget) {
5184 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5186 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5188 // All simm32 constants should be handled by isel.
5189 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5190 // this check redundant, but small immediates are common so this check
5191 // should have better compile time.
5192 if (isInt<32>(Imm))
5193 return Op;
5195 // We only need to cost the immediate, if constant pool lowering is enabled.
5196 if (!Subtarget.useConstantPoolForLargeInts())
5197 return Op;
5199 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
5200 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5201 return Op;
5203 // Optimizations below are disabled for opt size. If we're optimizing for
5204 // size, use a constant pool.
5205 if (DAG.shouldOptForSize())
5206 return SDValue();
5208 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5209 // that if it will avoid a constant pool.
5210 // It will require an extra temporary register though.
5211 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5212 // low and high 32 bits are the same and bit 31 and 63 are set.
5213 unsigned ShiftAmt, AddOpc;
5214 RISCVMatInt::InstSeq SeqLo =
5215 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5216 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5217 return Op;
5219 return SDValue();
5222 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
5223 const RISCVSubtarget &Subtarget) {
5224 SDLoc dl(Op);
5225 AtomicOrdering FenceOrdering =
5226 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5227 SyncScope::ID FenceSSID =
5228 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5230 if (Subtarget.hasStdExtZtso()) {
5231 // The only fence that needs an instruction is a sequentially-consistent
5232 // cross-thread fence.
5233 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5234 FenceSSID == SyncScope::System)
5235 return Op;
5237 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5238 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5241 // singlethread fences only synchronize with signal handlers on the same
5242 // thread and thus only need to preserve instruction order, not actually
5243 // enforce memory ordering.
5244 if (FenceSSID == SyncScope::SingleThread)
5245 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5246 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5248 return Op;
5251 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5252 SelectionDAG &DAG) const {
5253 SDLoc DL(Op);
5254 MVT VT = Op.getSimpleValueType();
5255 MVT XLenVT = Subtarget.getXLenVT();
5256 unsigned Check = Op.getConstantOperandVal(1);
5257 unsigned TDCMask = 0;
5258 if (Check & fcSNan)
5259 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5260 if (Check & fcQNan)
5261 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5262 if (Check & fcPosInf)
5263 TDCMask |= RISCV::FPMASK_Positive_Infinity;
5264 if (Check & fcNegInf)
5265 TDCMask |= RISCV::FPMASK_Negative_Infinity;
5266 if (Check & fcPosNormal)
5267 TDCMask |= RISCV::FPMASK_Positive_Normal;
5268 if (Check & fcNegNormal)
5269 TDCMask |= RISCV::FPMASK_Negative_Normal;
5270 if (Check & fcPosSubnormal)
5271 TDCMask |= RISCV::FPMASK_Positive_Subnormal;
5272 if (Check & fcNegSubnormal)
5273 TDCMask |= RISCV::FPMASK_Negative_Subnormal;
5274 if (Check & fcPosZero)
5275 TDCMask |= RISCV::FPMASK_Positive_Zero;
5276 if (Check & fcNegZero)
5277 TDCMask |= RISCV::FPMASK_Negative_Zero;
5279 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5281 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5283 if (VT.isVector()) {
5284 SDValue Op0 = Op.getOperand(0);
5285 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5287 if (VT.isScalableVector()) {
5288 MVT DstVT = VT0.changeVectorElementTypeToInteger();
5289 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5290 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5291 Mask = Op.getOperand(2);
5292 VL = Op.getOperand(3);
5294 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5295 VL, Op->getFlags());
5296 if (IsOneBitMask)
5297 return DAG.getSetCC(DL, VT, FPCLASS,
5298 DAG.getConstant(TDCMask, DL, DstVT),
5299 ISD::CondCode::SETEQ);
5300 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5301 DAG.getConstant(TDCMask, DL, DstVT));
5302 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5303 ISD::SETNE);
5306 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5307 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5308 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5309 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5310 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5311 Mask = Op.getOperand(2);
5312 MVT MaskContainerVT =
5313 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5314 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5315 VL = Op.getOperand(3);
5317 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5319 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5320 Mask, VL, Op->getFlags());
5322 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5323 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5324 if (IsOneBitMask) {
5325 SDValue VMSEQ =
5326 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5327 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5328 DAG.getUNDEF(ContainerVT), Mask, VL});
5329 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5331 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5332 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5334 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5335 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5336 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5338 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5339 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5340 DAG.getUNDEF(ContainerVT), Mask, VL});
5341 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5344 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5345 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5346 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5347 ISD::CondCode::SETNE);
5348 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5351 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5352 // operations propagate nans.
5353 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
5354 const RISCVSubtarget &Subtarget) {
5355 SDLoc DL(Op);
5356 MVT VT = Op.getSimpleValueType();
5358 SDValue X = Op.getOperand(0);
5359 SDValue Y = Op.getOperand(1);
5361 if (!VT.isVector()) {
5362 MVT XLenVT = Subtarget.getXLenVT();
5364 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5365 // ensures that when one input is a nan, the other will also be a nan
5366 // allowing the nan to propagate. If both inputs are nan, this will swap the
5367 // inputs which is harmless.
5369 SDValue NewY = Y;
5370 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5371 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5372 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5375 SDValue NewX = X;
5376 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5377 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5378 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5381 unsigned Opc =
5382 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5383 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5386 // Check no NaNs before converting to fixed vector scalable.
5387 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5388 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5390 MVT ContainerVT = VT;
5391 if (VT.isFixedLengthVector()) {
5392 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5393 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5394 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5397 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5399 SDValue NewY = Y;
5400 if (!XIsNeverNan) {
5401 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5402 {X, X, DAG.getCondCode(ISD::SETOEQ),
5403 DAG.getUNDEF(ContainerVT), Mask, VL});
5404 NewY =
5405 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, XIsNonNan, Y, X, VL);
5408 SDValue NewX = X;
5409 if (!YIsNeverNan) {
5410 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5411 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5412 DAG.getUNDEF(ContainerVT), Mask, VL});
5413 NewX =
5414 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, YIsNonNan, X, Y, VL);
5417 unsigned Opc =
5418 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;
5419 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5420 DAG.getUNDEF(ContainerVT), Mask, VL);
5421 if (VT.isFixedLengthVector())
5422 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5423 return Res;
5426 /// Get a RISC-V target specified VL op for a given SDNode.
5427 static unsigned getRISCVVLOp(SDValue Op) {
5428 #define OP_CASE(NODE) \
5429 case ISD::NODE: \
5430 return RISCVISD::NODE##_VL;
5431 #define VP_CASE(NODE) \
5432 case ISD::VP_##NODE: \
5433 return RISCVISD::NODE##_VL;
5434 // clang-format off
5435 switch (Op.getOpcode()) {
5436 default:
5437 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5438 OP_CASE(ADD)
5439 OP_CASE(SUB)
5440 OP_CASE(MUL)
5441 OP_CASE(MULHS)
5442 OP_CASE(MULHU)
5443 OP_CASE(SDIV)
5444 OP_CASE(SREM)
5445 OP_CASE(UDIV)
5446 OP_CASE(UREM)
5447 OP_CASE(SHL)
5448 OP_CASE(SRA)
5449 OP_CASE(SRL)
5450 OP_CASE(ROTL)
5451 OP_CASE(ROTR)
5452 OP_CASE(BSWAP)
5453 OP_CASE(CTTZ)
5454 OP_CASE(CTLZ)
5455 OP_CASE(CTPOP)
5456 OP_CASE(BITREVERSE)
5457 OP_CASE(SADDSAT)
5458 OP_CASE(UADDSAT)
5459 OP_CASE(SSUBSAT)
5460 OP_CASE(USUBSAT)
5461 OP_CASE(FADD)
5462 OP_CASE(FSUB)
5463 OP_CASE(FMUL)
5464 OP_CASE(FDIV)
5465 OP_CASE(FNEG)
5466 OP_CASE(FABS)
5467 OP_CASE(FSQRT)
5468 OP_CASE(SMIN)
5469 OP_CASE(SMAX)
5470 OP_CASE(UMIN)
5471 OP_CASE(UMAX)
5472 OP_CASE(STRICT_FADD)
5473 OP_CASE(STRICT_FSUB)
5474 OP_CASE(STRICT_FMUL)
5475 OP_CASE(STRICT_FDIV)
5476 OP_CASE(STRICT_FSQRT)
5477 VP_CASE(ADD) // VP_ADD
5478 VP_CASE(SUB) // VP_SUB
5479 VP_CASE(MUL) // VP_MUL
5480 VP_CASE(SDIV) // VP_SDIV
5481 VP_CASE(SREM) // VP_SREM
5482 VP_CASE(UDIV) // VP_UDIV
5483 VP_CASE(UREM) // VP_UREM
5484 VP_CASE(SHL) // VP_SHL
5485 VP_CASE(FADD) // VP_FADD
5486 VP_CASE(FSUB) // VP_FSUB
5487 VP_CASE(FMUL) // VP_FMUL
5488 VP_CASE(FDIV) // VP_FDIV
5489 VP_CASE(FNEG) // VP_FNEG
5490 VP_CASE(FABS) // VP_FABS
5491 VP_CASE(SMIN) // VP_SMIN
5492 VP_CASE(SMAX) // VP_SMAX
5493 VP_CASE(UMIN) // VP_UMIN
5494 VP_CASE(UMAX) // VP_UMAX
5495 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5496 VP_CASE(SETCC) // VP_SETCC
5497 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5498 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5499 VP_CASE(BITREVERSE) // VP_BITREVERSE
5500 VP_CASE(BSWAP) // VP_BSWAP
5501 VP_CASE(CTLZ) // VP_CTLZ
5502 VP_CASE(CTTZ) // VP_CTTZ
5503 VP_CASE(CTPOP) // VP_CTPOP
5504 case ISD::CTLZ_ZERO_UNDEF:
5505 case ISD::VP_CTLZ_ZERO_UNDEF:
5506 return RISCVISD::CTLZ_VL;
5507 case ISD::CTTZ_ZERO_UNDEF:
5508 case ISD::VP_CTTZ_ZERO_UNDEF:
5509 return RISCVISD::CTTZ_VL;
5510 case ISD::FMA:
5511 case ISD::VP_FMA:
5512 return RISCVISD::VFMADD_VL;
5513 case ISD::STRICT_FMA:
5514 return RISCVISD::STRICT_VFMADD_VL;
5515 case ISD::AND:
5516 case ISD::VP_AND:
5517 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5518 return RISCVISD::VMAND_VL;
5519 return RISCVISD::AND_VL;
5520 case ISD::OR:
5521 case ISD::VP_OR:
5522 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5523 return RISCVISD::VMOR_VL;
5524 return RISCVISD::OR_VL;
5525 case ISD::XOR:
5526 case ISD::VP_XOR:
5527 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5528 return RISCVISD::VMXOR_VL;
5529 return RISCVISD::XOR_VL;
5530 case ISD::VP_SELECT:
5531 return RISCVISD::VSELECT_VL;
5532 case ISD::VP_MERGE:
5533 return RISCVISD::VMERGE_VL;
5534 case ISD::VP_ASHR:
5535 return RISCVISD::SRA_VL;
5536 case ISD::VP_LSHR:
5537 return RISCVISD::SRL_VL;
5538 case ISD::VP_SQRT:
5539 return RISCVISD::FSQRT_VL;
5540 case ISD::VP_SIGN_EXTEND:
5541 return RISCVISD::VSEXT_VL;
5542 case ISD::VP_ZERO_EXTEND:
5543 return RISCVISD::VZEXT_VL;
5544 case ISD::VP_FP_TO_SINT:
5545 return RISCVISD::VFCVT_RTZ_X_F_VL;
5546 case ISD::VP_FP_TO_UINT:
5547 return RISCVISD::VFCVT_RTZ_XU_F_VL;
5548 case ISD::FMINNUM:
5549 case ISD::VP_FMINNUM:
5550 return RISCVISD::VFMIN_VL;
5551 case ISD::FMAXNUM:
5552 case ISD::VP_FMAXNUM:
5553 return RISCVISD::VFMAX_VL;
5555 // clang-format on
5556 #undef OP_CASE
5557 #undef VP_CASE
5560 /// Return true if a RISC-V target specified op has a merge operand.
5561 static bool hasMergeOp(unsigned Opcode) {
5562 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5563 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5564 "not a RISC-V target specific op");
5565 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5566 125 &&
5567 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5568 ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5569 21 &&
5570 "adding target specific op should update this function");
5571 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5572 return true;
5573 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5574 return true;
5575 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5576 return true;
5577 if (Opcode == RISCVISD::SETCC_VL)
5578 return true;
5579 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5580 return true;
5581 if (Opcode == RISCVISD::VMERGE_VL)
5582 return true;
5583 return false;
5586 /// Return true if a RISC-V target specified op has a mask operand.
5587 static bool hasMaskOp(unsigned Opcode) {
5588 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5589 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5590 "not a RISC-V target specific op");
5591 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5592 125 &&
5593 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5594 ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5595 21 &&
5596 "adding target specific op should update this function");
5597 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5598 return true;
5599 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5600 return true;
5601 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5602 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
5603 return true;
5604 return false;
5607 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
5608 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5609 SDLoc DL(Op);
5611 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5612 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5614 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5615 if (!Op.getOperand(j).getValueType().isVector()) {
5616 LoOperands[j] = Op.getOperand(j);
5617 HiOperands[j] = Op.getOperand(j);
5618 continue;
5620 std::tie(LoOperands[j], HiOperands[j]) =
5621 DAG.SplitVector(Op.getOperand(j), DL);
5624 SDValue LoRes =
5625 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5626 SDValue HiRes =
5627 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5629 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5632 static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
5633 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5634 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5635 SDLoc DL(Op);
5637 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5638 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5640 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5641 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5642 std::tie(LoOperands[j], HiOperands[j]) =
5643 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5644 continue;
5646 if (!Op.getOperand(j).getValueType().isVector()) {
5647 LoOperands[j] = Op.getOperand(j);
5648 HiOperands[j] = Op.getOperand(j);
5649 continue;
5651 std::tie(LoOperands[j], HiOperands[j]) =
5652 DAG.SplitVector(Op.getOperand(j), DL);
5655 SDValue LoRes =
5656 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5657 SDValue HiRes =
5658 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5660 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5663 static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
5664 SDLoc DL(Op);
5666 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5667 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5668 auto [EVLLo, EVLHi] =
5669 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5671 SDValue ResLo =
5672 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5673 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5674 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5675 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5678 static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
5680 assert(Op->isStrictFPOpcode());
5682 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
5684 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
5685 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
5687 SDLoc DL(Op);
5689 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5690 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5692 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5693 if (!Op.getOperand(j).getValueType().isVector()) {
5694 LoOperands[j] = Op.getOperand(j);
5695 HiOperands[j] = Op.getOperand(j);
5696 continue;
5698 std::tie(LoOperands[j], HiOperands[j]) =
5699 DAG.SplitVector(Op.getOperand(j), DL);
5702 SDValue LoRes =
5703 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
5704 HiOperands[0] = LoRes.getValue(1);
5705 SDValue HiRes =
5706 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
5708 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
5709 LoRes.getValue(0), HiRes.getValue(0));
5710 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
5713 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
5714 SelectionDAG &DAG) const {
5715 switch (Op.getOpcode()) {
5716 default:
5717 report_fatal_error("unimplemented operand");
5718 case ISD::ATOMIC_FENCE:
5719 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
5720 case ISD::GlobalAddress:
5721 return lowerGlobalAddress(Op, DAG);
5722 case ISD::BlockAddress:
5723 return lowerBlockAddress(Op, DAG);
5724 case ISD::ConstantPool:
5725 return lowerConstantPool(Op, DAG);
5726 case ISD::JumpTable:
5727 return lowerJumpTable(Op, DAG);
5728 case ISD::GlobalTLSAddress:
5729 return lowerGlobalTLSAddress(Op, DAG);
5730 case ISD::Constant:
5731 return lowerConstant(Op, DAG, Subtarget);
5732 case ISD::SELECT:
5733 return lowerSELECT(Op, DAG);
5734 case ISD::BRCOND:
5735 return lowerBRCOND(Op, DAG);
5736 case ISD::VASTART:
5737 return lowerVASTART(Op, DAG);
5738 case ISD::FRAMEADDR:
5739 return lowerFRAMEADDR(Op, DAG);
5740 case ISD::RETURNADDR:
5741 return lowerRETURNADDR(Op, DAG);
5742 case ISD::SHL_PARTS:
5743 return lowerShiftLeftParts(Op, DAG);
5744 case ISD::SRA_PARTS:
5745 return lowerShiftRightParts(Op, DAG, true);
5746 case ISD::SRL_PARTS:
5747 return lowerShiftRightParts(Op, DAG, false);
5748 case ISD::ROTL:
5749 case ISD::ROTR:
5750 if (Op.getValueType().isFixedLengthVector()) {
5751 assert(Subtarget.hasStdExtZvkb());
5752 return lowerToScalableOp(Op, DAG);
5754 assert(Subtarget.hasVendorXTHeadBb() &&
5755 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
5756 "Unexpected custom legalization");
5757 // XTHeadBb only supports rotate by constant.
5758 if (!isa<ConstantSDNode>(Op.getOperand(1)))
5759 return SDValue();
5760 return Op;
5761 case ISD::BITCAST: {
5762 SDLoc DL(Op);
5763 EVT VT = Op.getValueType();
5764 SDValue Op0 = Op.getOperand(0);
5765 EVT Op0VT = Op0.getValueType();
5766 MVT XLenVT = Subtarget.getXLenVT();
5767 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
5768 Subtarget.hasStdExtZfhminOrZhinxmin()) {
5769 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
5770 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
5771 return FPConv;
5773 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
5774 Subtarget.hasStdExtZfbfmin()) {
5775 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
5776 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
5777 return FPConv;
5779 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
5780 Subtarget.hasStdExtFOrZfinx()) {
5781 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5782 SDValue FPConv =
5783 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
5784 return FPConv;
5786 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
5787 Subtarget.hasStdExtZfa()) {
5788 SDValue Lo, Hi;
5789 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
5790 SDValue RetReg =
5791 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
5792 return RetReg;
5795 // Consider other scalar<->scalar casts as legal if the types are legal.
5796 // Otherwise expand them.
5797 if (!VT.isVector() && !Op0VT.isVector()) {
5798 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
5799 return Op;
5800 return SDValue();
5803 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
5804 "Unexpected types");
5806 if (VT.isFixedLengthVector()) {
5807 // We can handle fixed length vector bitcasts with a simple replacement
5808 // in isel.
5809 if (Op0VT.isFixedLengthVector())
5810 return Op;
5811 // When bitcasting from scalar to fixed-length vector, insert the scalar
5812 // into a one-element vector of the result type, and perform a vector
5813 // bitcast.
5814 if (!Op0VT.isVector()) {
5815 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
5816 if (!isTypeLegal(BVT))
5817 return SDValue();
5818 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
5819 DAG.getUNDEF(BVT), Op0,
5820 DAG.getConstant(0, DL, XLenVT)));
5822 return SDValue();
5824 // Custom-legalize bitcasts from fixed-length vector types to scalar types
5825 // thus: bitcast the vector to a one-element vector type whose element type
5826 // is the same as the result type, and extract the first element.
5827 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
5828 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
5829 if (!isTypeLegal(BVT))
5830 return SDValue();
5831 SDValue BVec = DAG.getBitcast(BVT, Op0);
5832 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
5833 DAG.getConstant(0, DL, XLenVT));
5835 return SDValue();
5837 case ISD::INTRINSIC_WO_CHAIN:
5838 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
5839 case ISD::INTRINSIC_W_CHAIN:
5840 return LowerINTRINSIC_W_CHAIN(Op, DAG);
5841 case ISD::INTRINSIC_VOID:
5842 return LowerINTRINSIC_VOID(Op, DAG);
5843 case ISD::IS_FPCLASS:
5844 return LowerIS_FPCLASS(Op, DAG);
5845 case ISD::BITREVERSE: {
5846 MVT VT = Op.getSimpleValueType();
5847 if (VT.isFixedLengthVector()) {
5848 assert(Subtarget.hasStdExtZvbb());
5849 return lowerToScalableOp(Op, DAG);
5851 SDLoc DL(Op);
5852 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
5853 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
5854 // Expand bitreverse to a bswap(rev8) followed by brev8.
5855 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
5856 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
5858 case ISD::TRUNCATE:
5859 // Only custom-lower vector truncates
5860 if (!Op.getSimpleValueType().isVector())
5861 return Op;
5862 return lowerVectorTruncLike(Op, DAG);
5863 case ISD::ANY_EXTEND:
5864 case ISD::ZERO_EXTEND:
5865 if (Op.getOperand(0).getValueType().isVector() &&
5866 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5867 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
5868 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
5869 case ISD::SIGN_EXTEND:
5870 if (Op.getOperand(0).getValueType().isVector() &&
5871 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5872 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
5873 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
5874 case ISD::SPLAT_VECTOR_PARTS:
5875 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
5876 case ISD::INSERT_VECTOR_ELT:
5877 return lowerINSERT_VECTOR_ELT(Op, DAG);
5878 case ISD::EXTRACT_VECTOR_ELT:
5879 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5880 case ISD::SCALAR_TO_VECTOR: {
5881 MVT VT = Op.getSimpleValueType();
5882 SDLoc DL(Op);
5883 SDValue Scalar = Op.getOperand(0);
5884 if (VT.getVectorElementType() == MVT::i1) {
5885 MVT WideVT = VT.changeVectorElementType(MVT::i8);
5886 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
5887 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
5889 MVT ContainerVT = VT;
5890 if (VT.isFixedLengthVector())
5891 ContainerVT = getContainerForFixedLengthVector(VT);
5892 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5893 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
5894 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
5895 DAG.getUNDEF(ContainerVT), Scalar, VL);
5896 if (VT.isFixedLengthVector())
5897 V = convertFromScalableVector(VT, V, DAG, Subtarget);
5898 return V;
5900 case ISD::VSCALE: {
5901 MVT XLenVT = Subtarget.getXLenVT();
5902 MVT VT = Op.getSimpleValueType();
5903 SDLoc DL(Op);
5904 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
5905 // We define our scalable vector types for lmul=1 to use a 64 bit known
5906 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
5907 // vscale as VLENB / 8.
5908 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
5909 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
5910 report_fatal_error("Support for VLEN==32 is incomplete.");
5911 // We assume VLENB is a multiple of 8. We manually choose the best shift
5912 // here because SimplifyDemandedBits isn't always able to simplify it.
5913 uint64_t Val = Op.getConstantOperandVal(0);
5914 if (isPowerOf2_64(Val)) {
5915 uint64_t Log2 = Log2_64(Val);
5916 if (Log2 < 3)
5917 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
5918 DAG.getConstant(3 - Log2, DL, VT));
5919 else if (Log2 > 3)
5920 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
5921 DAG.getConstant(Log2 - 3, DL, XLenVT));
5922 } else if ((Val % 8) == 0) {
5923 // If the multiplier is a multiple of 8, scale it down to avoid needing
5924 // to shift the VLENB value.
5925 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
5926 DAG.getConstant(Val / 8, DL, XLenVT));
5927 } else {
5928 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
5929 DAG.getConstant(3, DL, XLenVT));
5930 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
5931 DAG.getConstant(Val, DL, XLenVT));
5933 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5935 case ISD::FPOWI: {
5936 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
5937 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
5938 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
5939 Op.getOperand(1).getValueType() == MVT::i32) {
5940 SDLoc DL(Op);
5941 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
5942 SDValue Powi =
5943 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
5944 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
5945 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5947 return SDValue();
5949 case ISD::FMAXIMUM:
5950 case ISD::FMINIMUM:
5951 if (Op.getValueType() == MVT::nxv32f16 &&
5952 (Subtarget.hasVInstructionsF16Minimal() &&
5953 !Subtarget.hasVInstructionsF16()))
5954 return SplitVectorOp(Op, DAG);
5955 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
5956 case ISD::FP_EXTEND: {
5957 SDLoc DL(Op);
5958 EVT VT = Op.getValueType();
5959 SDValue Op0 = Op.getOperand(0);
5960 EVT Op0VT = Op0.getValueType();
5961 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
5962 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
5963 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
5964 SDValue FloatVal =
5965 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
5966 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
5969 if (!Op.getValueType().isVector())
5970 return Op;
5971 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5973 case ISD::FP_ROUND: {
5974 SDLoc DL(Op);
5975 EVT VT = Op.getValueType();
5976 SDValue Op0 = Op.getOperand(0);
5977 EVT Op0VT = Op0.getValueType();
5978 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
5979 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
5980 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
5981 Subtarget.hasStdExtDOrZdinx()) {
5982 SDValue FloatVal =
5983 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
5984 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5985 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
5988 if (!Op.getValueType().isVector())
5989 return Op;
5990 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5992 case ISD::STRICT_FP_ROUND:
5993 case ISD::STRICT_FP_EXTEND:
5994 return lowerStrictFPExtendOrRoundLike(Op, DAG);
5995 case ISD::SINT_TO_FP:
5996 case ISD::UINT_TO_FP:
5997 if (Op.getValueType().isVector() &&
5998 Op.getValueType().getScalarType() == MVT::f16 &&
5999 (Subtarget.hasVInstructionsF16Minimal() &&
6000 !Subtarget.hasVInstructionsF16())) {
6001 if (Op.getValueType() == MVT::nxv32f16)
6002 return SplitVectorOp(Op, DAG);
6003 // int -> f32
6004 SDLoc DL(Op);
6005 MVT NVT =
6006 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6007 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6008 // f32 -> f16
6009 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6010 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6012 [[fallthrough]];
6013 case ISD::FP_TO_SINT:
6014 case ISD::FP_TO_UINT:
6015 if (SDValue Op1 = Op.getOperand(0);
6016 Op1.getValueType().isVector() &&
6017 Op1.getValueType().getScalarType() == MVT::f16 &&
6018 (Subtarget.hasVInstructionsF16Minimal() &&
6019 !Subtarget.hasVInstructionsF16())) {
6020 if (Op1.getValueType() == MVT::nxv32f16)
6021 return SplitVectorOp(Op, DAG);
6022 // f16 -> f32
6023 SDLoc DL(Op);
6024 MVT NVT = MVT::getVectorVT(MVT::f32,
6025 Op1.getValueType().getVectorElementCount());
6026 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6027 // f32 -> int
6028 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6030 [[fallthrough]];
6031 case ISD::STRICT_FP_TO_SINT:
6032 case ISD::STRICT_FP_TO_UINT:
6033 case ISD::STRICT_SINT_TO_FP:
6034 case ISD::STRICT_UINT_TO_FP: {
6035 // RVV can only do fp<->int conversions to types half/double the size as
6036 // the source. We custom-lower any conversions that do two hops into
6037 // sequences.
6038 MVT VT = Op.getSimpleValueType();
6039 if (!VT.isVector())
6040 return Op;
6041 SDLoc DL(Op);
6042 bool IsStrict = Op->isStrictFPOpcode();
6043 SDValue Src = Op.getOperand(0 + IsStrict);
6044 MVT EltVT = VT.getVectorElementType();
6045 MVT SrcVT = Src.getSimpleValueType();
6046 MVT SrcEltVT = SrcVT.getVectorElementType();
6047 unsigned EltSize = EltVT.getSizeInBits();
6048 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6049 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6050 "Unexpected vector element types");
6052 bool IsInt2FP = SrcEltVT.isInteger();
6053 // Widening conversions
6054 if (EltSize > (2 * SrcEltSize)) {
6055 if (IsInt2FP) {
6056 // Do a regular integer sign/zero extension then convert to float.
6057 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6058 VT.getVectorElementCount());
6059 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6060 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6061 ? ISD::ZERO_EXTEND
6062 : ISD::SIGN_EXTEND;
6063 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6064 if (IsStrict)
6065 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6066 Op.getOperand(0), Ext);
6067 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6069 // FP2Int
6070 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6071 // Do one doubling fp_extend then complete the operation by converting
6072 // to int.
6073 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6074 if (IsStrict) {
6075 auto [FExt, Chain] =
6076 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6077 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6079 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6080 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6083 // Narrowing conversions
6084 if (SrcEltSize > (2 * EltSize)) {
6085 if (IsInt2FP) {
6086 // One narrowing int_to_fp, then an fp_round.
6087 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6088 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6089 if (IsStrict) {
6090 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6091 DAG.getVTList(InterimFVT, MVT::Other),
6092 Op.getOperand(0), Src);
6093 SDValue Chain = Int2FP.getValue(1);
6094 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6096 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6097 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6099 // FP2Int
6100 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6101 // representable by the integer, the result is poison.
6102 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6103 VT.getVectorElementCount());
6104 if (IsStrict) {
6105 SDValue FP2Int =
6106 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6107 Op.getOperand(0), Src);
6108 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6109 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6111 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6112 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6115 // Scalable vectors can exit here. Patterns will handle equally-sized
6116 // conversions halving/doubling ones.
6117 if (!VT.isFixedLengthVector())
6118 return Op;
6120 // For fixed-length vectors we lower to a custom "VL" node.
6121 unsigned RVVOpc = 0;
6122 switch (Op.getOpcode()) {
6123 default:
6124 llvm_unreachable("Impossible opcode");
6125 case ISD::FP_TO_SINT:
6126 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
6127 break;
6128 case ISD::FP_TO_UINT:
6129 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
6130 break;
6131 case ISD::SINT_TO_FP:
6132 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6133 break;
6134 case ISD::UINT_TO_FP:
6135 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6136 break;
6137 case ISD::STRICT_FP_TO_SINT:
6138 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
6139 break;
6140 case ISD::STRICT_FP_TO_UINT:
6141 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
6142 break;
6143 case ISD::STRICT_SINT_TO_FP:
6144 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
6145 break;
6146 case ISD::STRICT_UINT_TO_FP:
6147 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
6148 break;
6151 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6152 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6153 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6154 "Expected same element count");
6156 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6158 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6159 if (IsStrict) {
6160 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6161 Op.getOperand(0), Src, Mask, VL);
6162 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6163 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6165 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6166 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6168 case ISD::FP_TO_SINT_SAT:
6169 case ISD::FP_TO_UINT_SAT:
6170 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6171 case ISD::FP_TO_BF16: {
6172 // Custom lower to ensure the libcall return is passed in an FPR on hard
6173 // float ABIs.
6174 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6175 SDLoc DL(Op);
6176 MakeLibCallOptions CallOptions;
6177 RTLIB::Libcall LC =
6178 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6179 SDValue Res =
6180 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6181 if (Subtarget.is64Bit() && !RV64LegalI32)
6182 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6183 return DAG.getBitcast(MVT::i32, Res);
6185 case ISD::BF16_TO_FP: {
6186 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6187 MVT VT = Op.getSimpleValueType();
6188 SDLoc DL(Op);
6189 Op = DAG.getNode(
6190 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6191 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6192 SDValue Res = Subtarget.is64Bit()
6193 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6194 : DAG.getBitcast(MVT::f32, Op);
6195 // fp_extend if the target VT is bigger than f32.
6196 if (VT != MVT::f32)
6197 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6198 return Res;
6200 case ISD::FP_TO_FP16: {
6201 // Custom lower to ensure the libcall return is passed in an FPR on hard
6202 // float ABIs.
6203 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6204 SDLoc DL(Op);
6205 MakeLibCallOptions CallOptions;
6206 RTLIB::Libcall LC =
6207 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6208 SDValue Res =
6209 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6210 if (Subtarget.is64Bit() && !RV64LegalI32)
6211 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6212 return DAG.getBitcast(MVT::i32, Res);
6214 case ISD::FP16_TO_FP: {
6215 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6216 // float ABIs.
6217 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6218 SDLoc DL(Op);
6219 MakeLibCallOptions CallOptions;
6220 SDValue Arg = Subtarget.is64Bit()
6221 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6222 Op.getOperand(0))
6223 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6224 SDValue Res =
6225 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6226 .first;
6227 return Res;
6229 case ISD::FTRUNC:
6230 case ISD::FCEIL:
6231 case ISD::FFLOOR:
6232 case ISD::FNEARBYINT:
6233 case ISD::FRINT:
6234 case ISD::FROUND:
6235 case ISD::FROUNDEVEN:
6236 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6237 case ISD::LRINT:
6238 case ISD::LLRINT:
6239 return lowerVectorXRINT(Op, DAG, Subtarget);
6240 case ISD::VECREDUCE_ADD:
6241 case ISD::VECREDUCE_UMAX:
6242 case ISD::VECREDUCE_SMAX:
6243 case ISD::VECREDUCE_UMIN:
6244 case ISD::VECREDUCE_SMIN:
6245 return lowerVECREDUCE(Op, DAG);
6246 case ISD::VECREDUCE_AND:
6247 case ISD::VECREDUCE_OR:
6248 case ISD::VECREDUCE_XOR:
6249 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6250 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6251 return lowerVECREDUCE(Op, DAG);
6252 case ISD::VECREDUCE_FADD:
6253 case ISD::VECREDUCE_SEQ_FADD:
6254 case ISD::VECREDUCE_FMIN:
6255 case ISD::VECREDUCE_FMAX:
6256 return lowerFPVECREDUCE(Op, DAG);
6257 case ISD::VP_REDUCE_ADD:
6258 case ISD::VP_REDUCE_UMAX:
6259 case ISD::VP_REDUCE_SMAX:
6260 case ISD::VP_REDUCE_UMIN:
6261 case ISD::VP_REDUCE_SMIN:
6262 case ISD::VP_REDUCE_FADD:
6263 case ISD::VP_REDUCE_SEQ_FADD:
6264 case ISD::VP_REDUCE_FMIN:
6265 case ISD::VP_REDUCE_FMAX:
6266 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6267 (Subtarget.hasVInstructionsF16Minimal() &&
6268 !Subtarget.hasVInstructionsF16()))
6269 return SplitVectorReductionOp(Op, DAG);
6270 return lowerVPREDUCE(Op, DAG);
6271 case ISD::VP_REDUCE_AND:
6272 case ISD::VP_REDUCE_OR:
6273 case ISD::VP_REDUCE_XOR:
6274 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6275 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6276 return lowerVPREDUCE(Op, DAG);
6277 case ISD::UNDEF: {
6278 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6279 return convertFromScalableVector(Op.getSimpleValueType(),
6280 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6282 case ISD::INSERT_SUBVECTOR:
6283 return lowerINSERT_SUBVECTOR(Op, DAG);
6284 case ISD::EXTRACT_SUBVECTOR:
6285 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6286 case ISD::VECTOR_DEINTERLEAVE:
6287 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6288 case ISD::VECTOR_INTERLEAVE:
6289 return lowerVECTOR_INTERLEAVE(Op, DAG);
6290 case ISD::STEP_VECTOR:
6291 return lowerSTEP_VECTOR(Op, DAG);
6292 case ISD::VECTOR_REVERSE:
6293 return lowerVECTOR_REVERSE(Op, DAG);
6294 case ISD::VECTOR_SPLICE:
6295 return lowerVECTOR_SPLICE(Op, DAG);
6296 case ISD::BUILD_VECTOR:
6297 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6298 case ISD::SPLAT_VECTOR:
6299 if (Op.getValueType().getScalarType() == MVT::f16 &&
6300 (Subtarget.hasVInstructionsF16Minimal() &&
6301 !Subtarget.hasVInstructionsF16())) {
6302 if (Op.getValueType() == MVT::nxv32f16)
6303 return SplitVectorOp(Op, DAG);
6304 SDLoc DL(Op);
6305 SDValue NewScalar =
6306 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6307 SDValue NewSplat = DAG.getNode(
6308 ISD::SPLAT_VECTOR, DL,
6309 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6310 NewScalar);
6311 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6312 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6314 if (Op.getValueType().getVectorElementType() == MVT::i1)
6315 return lowerVectorMaskSplat(Op, DAG);
6316 return SDValue();
6317 case ISD::VECTOR_SHUFFLE:
6318 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6319 case ISD::CONCAT_VECTORS: {
6320 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6321 // better than going through the stack, as the default expansion does.
6322 SDLoc DL(Op);
6323 MVT VT = Op.getSimpleValueType();
6324 unsigned NumOpElts =
6325 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6326 SDValue Vec = DAG.getUNDEF(VT);
6327 for (const auto &OpIdx : enumerate(Op->ops())) {
6328 SDValue SubVec = OpIdx.value();
6329 // Don't insert undef subvectors.
6330 if (SubVec.isUndef())
6331 continue;
6332 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6333 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
6335 return Vec;
6337 case ISD::LOAD:
6338 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6339 return V;
6340 if (Op.getValueType().isFixedLengthVector())
6341 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6342 return Op;
6343 case ISD::STORE:
6344 if (auto V = expandUnalignedRVVStore(Op, DAG))
6345 return V;
6346 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6347 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6348 return Op;
6349 case ISD::MLOAD:
6350 case ISD::VP_LOAD:
6351 return lowerMaskedLoad(Op, DAG);
6352 case ISD::MSTORE:
6353 case ISD::VP_STORE:
6354 return lowerMaskedStore(Op, DAG);
6355 case ISD::SELECT_CC: {
6356 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6357 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6358 // into separate SETCC+SELECT just like LegalizeDAG.
6359 SDValue Tmp1 = Op.getOperand(0);
6360 SDValue Tmp2 = Op.getOperand(1);
6361 SDValue True = Op.getOperand(2);
6362 SDValue False = Op.getOperand(3);
6363 EVT VT = Op.getValueType();
6364 SDValue CC = Op.getOperand(4);
6365 EVT CmpVT = Tmp1.getValueType();
6366 EVT CCVT =
6367 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6368 SDLoc DL(Op);
6369 SDValue Cond =
6370 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6371 return DAG.getSelect(DL, VT, Cond, True, False);
6373 case ISD::SETCC: {
6374 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6375 if (OpVT.isScalarInteger()) {
6376 MVT VT = Op.getSimpleValueType();
6377 SDValue LHS = Op.getOperand(0);
6378 SDValue RHS = Op.getOperand(1);
6379 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6380 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6381 "Unexpected CondCode");
6383 SDLoc DL(Op);
6385 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6386 // convert this to the equivalent of (set(u)ge X, C+1) by using
6387 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6388 // in a register.
6389 if (isa<ConstantSDNode>(RHS)) {
6390 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6391 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6392 // If this is an unsigned compare and the constant is -1, incrementing
6393 // the constant would change behavior. The result should be false.
6394 if (CCVal == ISD::SETUGT && Imm == -1)
6395 return DAG.getConstant(0, DL, VT);
6396 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6397 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6398 SDValue SetCC = DAG.getSetCC(
6399 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6400 return DAG.getLogicalNOT(DL, SetCC, VT);
6404 // Not a constant we could handle, swap the operands and condition code to
6405 // SETLT/SETULT.
6406 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6407 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6410 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6411 (Subtarget.hasVInstructionsF16Minimal() &&
6412 !Subtarget.hasVInstructionsF16()))
6413 return SplitVectorOp(Op, DAG);
6415 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6417 case ISD::ADD:
6418 case ISD::SUB:
6419 case ISD::MUL:
6420 case ISD::MULHS:
6421 case ISD::MULHU:
6422 case ISD::AND:
6423 case ISD::OR:
6424 case ISD::XOR:
6425 case ISD::SDIV:
6426 case ISD::SREM:
6427 case ISD::UDIV:
6428 case ISD::UREM:
6429 case ISD::BSWAP:
6430 case ISD::CTPOP:
6431 return lowerToScalableOp(Op, DAG);
6432 case ISD::SHL:
6433 case ISD::SRA:
6434 case ISD::SRL:
6435 if (Op.getSimpleValueType().isFixedLengthVector())
6436 return lowerToScalableOp(Op, DAG);
6437 // This can be called for an i32 shift amount that needs to be promoted.
6438 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6439 "Unexpected custom legalisation");
6440 return SDValue();
6441 case ISD::FADD:
6442 case ISD::FSUB:
6443 case ISD::FMUL:
6444 case ISD::FDIV:
6445 case ISD::FNEG:
6446 case ISD::FABS:
6447 case ISD::FSQRT:
6448 case ISD::FMA:
6449 case ISD::FMINNUM:
6450 case ISD::FMAXNUM:
6451 if (Op.getValueType() == MVT::nxv32f16 &&
6452 (Subtarget.hasVInstructionsF16Minimal() &&
6453 !Subtarget.hasVInstructionsF16()))
6454 return SplitVectorOp(Op, DAG);
6455 [[fallthrough]];
6456 case ISD::SADDSAT:
6457 case ISD::UADDSAT:
6458 case ISD::SSUBSAT:
6459 case ISD::USUBSAT:
6460 case ISD::SMIN:
6461 case ISD::SMAX:
6462 case ISD::UMIN:
6463 case ISD::UMAX:
6464 return lowerToScalableOp(Op, DAG);
6465 case ISD::ABS:
6466 case ISD::VP_ABS:
6467 return lowerABS(Op, DAG);
6468 case ISD::CTLZ:
6469 case ISD::CTLZ_ZERO_UNDEF:
6470 case ISD::CTTZ:
6471 case ISD::CTTZ_ZERO_UNDEF:
6472 if (Subtarget.hasStdExtZvbb())
6473 return lowerToScalableOp(Op, DAG);
6474 assert(Op.getOpcode() != ISD::CTTZ);
6475 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6476 case ISD::VSELECT:
6477 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6478 case ISD::FCOPYSIGN:
6479 if (Op.getValueType() == MVT::nxv32f16 &&
6480 (Subtarget.hasVInstructionsF16Minimal() &&
6481 !Subtarget.hasVInstructionsF16()))
6482 return SplitVectorOp(Op, DAG);
6483 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6484 case ISD::STRICT_FADD:
6485 case ISD::STRICT_FSUB:
6486 case ISD::STRICT_FMUL:
6487 case ISD::STRICT_FDIV:
6488 case ISD::STRICT_FSQRT:
6489 case ISD::STRICT_FMA:
6490 if (Op.getValueType() == MVT::nxv32f16 &&
6491 (Subtarget.hasVInstructionsF16Minimal() &&
6492 !Subtarget.hasVInstructionsF16()))
6493 return SplitStrictFPVectorOp(Op, DAG);
6494 return lowerToScalableOp(Op, DAG);
6495 case ISD::STRICT_FSETCC:
6496 case ISD::STRICT_FSETCCS:
6497 return lowerVectorStrictFSetcc(Op, DAG);
6498 case ISD::STRICT_FCEIL:
6499 case ISD::STRICT_FRINT:
6500 case ISD::STRICT_FFLOOR:
6501 case ISD::STRICT_FTRUNC:
6502 case ISD::STRICT_FNEARBYINT:
6503 case ISD::STRICT_FROUND:
6504 case ISD::STRICT_FROUNDEVEN:
6505 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6506 case ISD::MGATHER:
6507 case ISD::VP_GATHER:
6508 return lowerMaskedGather(Op, DAG);
6509 case ISD::MSCATTER:
6510 case ISD::VP_SCATTER:
6511 return lowerMaskedScatter(Op, DAG);
6512 case ISD::GET_ROUNDING:
6513 return lowerGET_ROUNDING(Op, DAG);
6514 case ISD::SET_ROUNDING:
6515 return lowerSET_ROUNDING(Op, DAG);
6516 case ISD::EH_DWARF_CFA:
6517 return lowerEH_DWARF_CFA(Op, DAG);
6518 case ISD::VP_SELECT:
6519 case ISD::VP_MERGE:
6520 case ISD::VP_ADD:
6521 case ISD::VP_SUB:
6522 case ISD::VP_MUL:
6523 case ISD::VP_SDIV:
6524 case ISD::VP_UDIV:
6525 case ISD::VP_SREM:
6526 case ISD::VP_UREM:
6527 return lowerVPOp(Op, DAG);
6528 case ISD::VP_AND:
6529 case ISD::VP_OR:
6530 case ISD::VP_XOR:
6531 return lowerLogicVPOp(Op, DAG);
6532 case ISD::VP_FADD:
6533 case ISD::VP_FSUB:
6534 case ISD::VP_FMUL:
6535 case ISD::VP_FDIV:
6536 case ISD::VP_FNEG:
6537 case ISD::VP_FABS:
6538 case ISD::VP_SQRT:
6539 case ISD::VP_FMA:
6540 case ISD::VP_FMINNUM:
6541 case ISD::VP_FMAXNUM:
6542 case ISD::VP_FCOPYSIGN:
6543 if (Op.getValueType() == MVT::nxv32f16 &&
6544 (Subtarget.hasVInstructionsF16Minimal() &&
6545 !Subtarget.hasVInstructionsF16()))
6546 return SplitVPOp(Op, DAG);
6547 [[fallthrough]];
6548 case ISD::VP_ASHR:
6549 case ISD::VP_LSHR:
6550 case ISD::VP_SHL:
6551 return lowerVPOp(Op, DAG);
6552 case ISD::VP_IS_FPCLASS:
6553 return LowerIS_FPCLASS(Op, DAG);
6554 case ISD::VP_SIGN_EXTEND:
6555 case ISD::VP_ZERO_EXTEND:
6556 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6557 return lowerVPExtMaskOp(Op, DAG);
6558 return lowerVPOp(Op, DAG);
6559 case ISD::VP_TRUNCATE:
6560 return lowerVectorTruncLike(Op, DAG);
6561 case ISD::VP_FP_EXTEND:
6562 case ISD::VP_FP_ROUND:
6563 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6564 case ISD::VP_SINT_TO_FP:
6565 case ISD::VP_UINT_TO_FP:
6566 if (Op.getValueType().isVector() &&
6567 Op.getValueType().getScalarType() == MVT::f16 &&
6568 (Subtarget.hasVInstructionsF16Minimal() &&
6569 !Subtarget.hasVInstructionsF16())) {
6570 if (Op.getValueType() == MVT::nxv32f16)
6571 return SplitVPOp(Op, DAG);
6572 // int -> f32
6573 SDLoc DL(Op);
6574 MVT NVT =
6575 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6576 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6577 // f32 -> f16
6578 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6579 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6581 [[fallthrough]];
6582 case ISD::VP_FP_TO_SINT:
6583 case ISD::VP_FP_TO_UINT:
6584 if (SDValue Op1 = Op.getOperand(0);
6585 Op1.getValueType().isVector() &&
6586 Op1.getValueType().getScalarType() == MVT::f16 &&
6587 (Subtarget.hasVInstructionsF16Minimal() &&
6588 !Subtarget.hasVInstructionsF16())) {
6589 if (Op1.getValueType() == MVT::nxv32f16)
6590 return SplitVPOp(Op, DAG);
6591 // f16 -> f32
6592 SDLoc DL(Op);
6593 MVT NVT = MVT::getVectorVT(MVT::f32,
6594 Op1.getValueType().getVectorElementCount());
6595 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6596 // f32 -> int
6597 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6598 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6600 return lowerVPFPIntConvOp(Op, DAG);
6601 case ISD::VP_SETCC:
6602 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6603 (Subtarget.hasVInstructionsF16Minimal() &&
6604 !Subtarget.hasVInstructionsF16()))
6605 return SplitVPOp(Op, DAG);
6606 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6607 return lowerVPSetCCMaskOp(Op, DAG);
6608 [[fallthrough]];
6609 case ISD::VP_SMIN:
6610 case ISD::VP_SMAX:
6611 case ISD::VP_UMIN:
6612 case ISD::VP_UMAX:
6613 case ISD::VP_BITREVERSE:
6614 case ISD::VP_BSWAP:
6615 return lowerVPOp(Op, DAG);
6616 case ISD::VP_CTLZ:
6617 case ISD::VP_CTLZ_ZERO_UNDEF:
6618 if (Subtarget.hasStdExtZvbb())
6619 return lowerVPOp(Op, DAG);
6620 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6621 case ISD::VP_CTTZ:
6622 case ISD::VP_CTTZ_ZERO_UNDEF:
6623 if (Subtarget.hasStdExtZvbb())
6624 return lowerVPOp(Op, DAG);
6625 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6626 case ISD::VP_CTPOP:
6627 return lowerVPOp(Op, DAG);
6628 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
6629 return lowerVPStridedLoad(Op, DAG);
6630 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
6631 return lowerVPStridedStore(Op, DAG);
6632 case ISD::VP_FCEIL:
6633 case ISD::VP_FFLOOR:
6634 case ISD::VP_FRINT:
6635 case ISD::VP_FNEARBYINT:
6636 case ISD::VP_FROUND:
6637 case ISD::VP_FROUNDEVEN:
6638 case ISD::VP_FROUNDTOZERO:
6639 if (Op.getValueType() == MVT::nxv32f16 &&
6640 (Subtarget.hasVInstructionsF16Minimal() &&
6641 !Subtarget.hasVInstructionsF16()))
6642 return SplitVPOp(Op, DAG);
6643 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6644 case ISD::EXPERIMENTAL_VP_SPLICE:
6645 return lowerVPSpliceExperimental(Op, DAG);
6646 case ISD::EXPERIMENTAL_VP_REVERSE:
6647 return lowerVPReverseExperimental(Op, DAG);
6651 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
6652 SelectionDAG &DAG, unsigned Flags) {
6653 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
6656 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
6657 SelectionDAG &DAG, unsigned Flags) {
6658 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
6659 Flags);
6662 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
6663 SelectionDAG &DAG, unsigned Flags) {
6664 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
6665 N->getOffset(), Flags);
6668 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
6669 SelectionDAG &DAG, unsigned Flags) {
6670 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
6673 template <class NodeTy>
6674 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6675 bool IsLocal, bool IsExternWeak) const {
6676 SDLoc DL(N);
6677 EVT Ty = getPointerTy(DAG.getDataLayout());
6679 // When HWASAN is used and tagging of global variables is enabled
6680 // they should be accessed via the GOT, since the tagged address of a global
6681 // is incompatible with existing code models. This also applies to non-pic
6682 // mode.
6683 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
6684 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
6685 if (IsLocal && !Subtarget.allowTaggedGlobals())
6686 // Use PC-relative addressing to access the symbol. This generates the
6687 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
6688 // %pcrel_lo(auipc)).
6689 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
6691 // Use PC-relative addressing to access the GOT for this symbol, then load
6692 // the address from the GOT. This generates the pattern (PseudoLGA sym),
6693 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6694 SDValue Load =
6695 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
6696 MachineFunction &MF = DAG.getMachineFunction();
6697 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6698 MachinePointerInfo::getGOT(MF),
6699 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6700 MachineMemOperand::MOInvariant,
6701 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6702 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6703 return Load;
6706 switch (getTargetMachine().getCodeModel()) {
6707 default:
6708 report_fatal_error("Unsupported code model for lowering");
6709 case CodeModel::Small: {
6710 // Generate a sequence for accessing addresses within the first 2 GiB of
6711 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
6712 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
6713 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
6714 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
6715 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
6717 case CodeModel::Medium: {
6718 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
6719 if (IsExternWeak) {
6720 // An extern weak symbol may be undefined, i.e. have value 0, which may
6721 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
6722 // symbol. This generates the pattern (PseudoLGA sym), which expands to
6723 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6724 SDValue Load =
6725 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
6726 MachineFunction &MF = DAG.getMachineFunction();
6727 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6728 MachinePointerInfo::getGOT(MF),
6729 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6730 MachineMemOperand::MOInvariant,
6731 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6732 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6733 return Load;
6736 // Generate a sequence for accessing addresses within any 2GiB range within
6737 // the address space. This generates the pattern (PseudoLLA sym), which
6738 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
6739 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
6744 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
6745 SelectionDAG &DAG) const {
6746 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
6747 assert(N->getOffset() == 0 && "unexpected offset in global node");
6748 const GlobalValue *GV = N->getGlobal();
6749 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
6752 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
6753 SelectionDAG &DAG) const {
6754 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
6756 return getAddr(N, DAG);
6759 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
6760 SelectionDAG &DAG) const {
6761 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
6763 return getAddr(N, DAG);
6766 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
6767 SelectionDAG &DAG) const {
6768 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
6770 return getAddr(N, DAG);
6773 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
6774 SelectionDAG &DAG,
6775 bool UseGOT) const {
6776 SDLoc DL(N);
6777 EVT Ty = getPointerTy(DAG.getDataLayout());
6778 const GlobalValue *GV = N->getGlobal();
6779 MVT XLenVT = Subtarget.getXLenVT();
6781 if (UseGOT) {
6782 // Use PC-relative addressing to access the GOT for this TLS symbol, then
6783 // load the address from the GOT and add the thread pointer. This generates
6784 // the pattern (PseudoLA_TLS_IE sym), which expands to
6785 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
6786 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6787 SDValue Load =
6788 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
6789 MachineFunction &MF = DAG.getMachineFunction();
6790 MachineMemOperand *MemOp = MF.getMachineMemOperand(
6791 MachinePointerInfo::getGOT(MF),
6792 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6793 MachineMemOperand::MOInvariant,
6794 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6795 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6797 // Add the thread pointer.
6798 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
6799 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
6802 // Generate a sequence for accessing the address relative to the thread
6803 // pointer, with the appropriate adjustment for the thread pointer offset.
6804 // This generates the pattern
6805 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
6806 SDValue AddrHi =
6807 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
6808 SDValue AddrAdd =
6809 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
6810 SDValue AddrLo =
6811 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
6813 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
6814 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
6815 SDValue MNAdd =
6816 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
6817 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
6820 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
6821 SelectionDAG &DAG) const {
6822 SDLoc DL(N);
6823 EVT Ty = getPointerTy(DAG.getDataLayout());
6824 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
6825 const GlobalValue *GV = N->getGlobal();
6827 // Use a PC-relative addressing mode to access the global dynamic GOT address.
6828 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
6829 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
6830 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6831 SDValue Load =
6832 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
6834 // Prepare argument list to generate call.
6835 ArgListTy Args;
6836 ArgListEntry Entry;
6837 Entry.Node = Load;
6838 Entry.Ty = CallTy;
6839 Args.push_back(Entry);
6841 // Setup call to __tls_get_addr.
6842 TargetLowering::CallLoweringInfo CLI(DAG);
6843 CLI.setDebugLoc(DL)
6844 .setChain(DAG.getEntryNode())
6845 .setLibCallee(CallingConv::C, CallTy,
6846 DAG.getExternalSymbol("__tls_get_addr", Ty),
6847 std::move(Args));
6849 return LowerCallTo(CLI).first;
6852 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
6853 SelectionDAG &DAG) const {
6854 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
6855 assert(N->getOffset() == 0 && "unexpected offset in global node");
6857 if (DAG.getTarget().useEmulatedTLS())
6858 return LowerToTLSEmulatedModel(N, DAG);
6860 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
6862 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
6863 CallingConv::GHC)
6864 report_fatal_error("In GHC calling convention TLS is not supported");
6866 SDValue Addr;
6867 switch (Model) {
6868 case TLSModel::LocalExec:
6869 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
6870 break;
6871 case TLSModel::InitialExec:
6872 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
6873 break;
6874 case TLSModel::LocalDynamic:
6875 case TLSModel::GeneralDynamic:
6876 Addr = getDynamicTLSAddr(N, DAG);
6877 break;
6880 return Addr;
6883 // Return true if Val is equal to (setcc LHS, RHS, CC).
6884 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
6885 // Otherwise, return std::nullopt.
6886 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
6887 ISD::CondCode CC, SDValue Val) {
6888 assert(Val->getOpcode() == ISD::SETCC);
6889 SDValue LHS2 = Val.getOperand(0);
6890 SDValue RHS2 = Val.getOperand(1);
6891 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
6893 if (LHS == LHS2 && RHS == RHS2) {
6894 if (CC == CC2)
6895 return true;
6896 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
6897 return false;
6898 } else if (LHS == RHS2 && RHS == LHS2) {
6899 CC2 = ISD::getSetCCSwappedOperands(CC2);
6900 if (CC == CC2)
6901 return true;
6902 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
6903 return false;
6906 return std::nullopt;
6909 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
6910 const RISCVSubtarget &Subtarget) {
6911 SDValue CondV = N->getOperand(0);
6912 SDValue TrueV = N->getOperand(1);
6913 SDValue FalseV = N->getOperand(2);
6914 MVT VT = N->getSimpleValueType(0);
6915 SDLoc DL(N);
6917 if (!Subtarget.hasShortForwardBranchOpt()) {
6918 // (select c, -1, y) -> -c | y
6919 if (isAllOnesConstant(TrueV)) {
6920 SDValue Neg = DAG.getNegative(CondV, DL, VT);
6921 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
6923 // (select c, y, -1) -> (c-1) | y
6924 if (isAllOnesConstant(FalseV)) {
6925 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
6926 DAG.getAllOnesConstant(DL, VT));
6927 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
6930 // (select c, 0, y) -> (c-1) & y
6931 if (isNullConstant(TrueV)) {
6932 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
6933 DAG.getAllOnesConstant(DL, VT));
6934 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
6936 // (select c, y, 0) -> -c & y
6937 if (isNullConstant(FalseV)) {
6938 SDValue Neg = DAG.getNegative(CondV, DL, VT);
6939 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
6943 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
6944 // when both truev and falsev are also setcc.
6945 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
6946 FalseV.getOpcode() == ISD::SETCC) {
6947 SDValue LHS = CondV.getOperand(0);
6948 SDValue RHS = CondV.getOperand(1);
6949 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6951 // (select x, x, y) -> x | y
6952 // (select !x, x, y) -> x & y
6953 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
6954 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
6955 FalseV);
6957 // (select x, y, x) -> x & y
6958 // (select !x, y, x) -> x | y
6959 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
6960 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
6961 FalseV);
6965 return SDValue();
6968 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
6969 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
6970 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
6971 // being `0` or `-1`. In such cases we can replace `select` with `and`.
6972 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
6973 // than `c0`?
6974 static SDValue
6975 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
6976 const RISCVSubtarget &Subtarget) {
6977 if (Subtarget.hasShortForwardBranchOpt())
6978 return SDValue();
6980 unsigned SelOpNo = 0;
6981 SDValue Sel = BO->getOperand(0);
6982 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
6983 SelOpNo = 1;
6984 Sel = BO->getOperand(1);
6987 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
6988 return SDValue();
6990 unsigned ConstSelOpNo = 1;
6991 unsigned OtherSelOpNo = 2;
6992 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
6993 ConstSelOpNo = 2;
6994 OtherSelOpNo = 1;
6996 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
6997 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
6998 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
6999 return SDValue();
7001 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7002 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7003 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7004 return SDValue();
7006 SDLoc DL(Sel);
7007 EVT VT = BO->getValueType(0);
7009 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7010 if (SelOpNo == 1)
7011 std::swap(NewConstOps[0], NewConstOps[1]);
7013 SDValue NewConstOp =
7014 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7015 if (!NewConstOp)
7016 return SDValue();
7018 const APInt &NewConstAPInt =
7019 cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
7020 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7021 return SDValue();
7023 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7024 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7025 if (SelOpNo == 1)
7026 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7027 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7029 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7030 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7031 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7034 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7035 SDValue CondV = Op.getOperand(0);
7036 SDValue TrueV = Op.getOperand(1);
7037 SDValue FalseV = Op.getOperand(2);
7038 SDLoc DL(Op);
7039 MVT VT = Op.getSimpleValueType();
7040 MVT XLenVT = Subtarget.getXLenVT();
7042 // Lower vector SELECTs to VSELECTs by splatting the condition.
7043 if (VT.isVector()) {
7044 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7045 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7046 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7049 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7050 // nodes to implement the SELECT. Performing the lowering here allows for
7051 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7052 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7053 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7054 VT.isScalarInteger()) {
7055 // (select c, t, 0) -> (czero_eqz t, c)
7056 if (isNullConstant(FalseV))
7057 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7058 // (select c, 0, f) -> (czero_nez f, c)
7059 if (isNullConstant(TrueV))
7060 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7062 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7063 if (TrueV.getOpcode() == ISD::AND &&
7064 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7065 return DAG.getNode(
7066 ISD::OR, DL, VT, TrueV,
7067 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7068 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7069 if (FalseV.getOpcode() == ISD::AND &&
7070 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7071 return DAG.getNode(
7072 ISD::OR, DL, VT, FalseV,
7073 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7075 // Try some other optimizations before falling back to generic lowering.
7076 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7077 return V;
7079 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7080 // Unless we have the short forward branch optimization.
7081 if (!Subtarget.hasShortForwardBranchOpt())
7082 return DAG.getNode(
7083 ISD::OR, DL, VT,
7084 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7085 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7088 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7089 return V;
7091 if (Op.hasOneUse()) {
7092 unsigned UseOpc = Op->use_begin()->getOpcode();
7093 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7094 SDNode *BinOp = *Op->use_begin();
7095 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7096 DAG, Subtarget)) {
7097 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7098 return lowerSELECT(NewSel, DAG);
7103 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7104 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7105 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7106 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7107 if (FPTV && FPFV) {
7108 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7109 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7110 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7111 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7112 DAG.getConstant(1, DL, XLenVT));
7113 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7117 // If the condition is not an integer SETCC which operates on XLenVT, we need
7118 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7119 // (select condv, truev, falsev)
7120 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7121 if (CondV.getOpcode() != ISD::SETCC ||
7122 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7123 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7124 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7126 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7128 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7131 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7132 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7133 // advantage of the integer compare+branch instructions. i.e.:
7134 // (select (setcc lhs, rhs, cc), truev, falsev)
7135 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7136 SDValue LHS = CondV.getOperand(0);
7137 SDValue RHS = CondV.getOperand(1);
7138 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7140 // Special case for a select of 2 constants that have a diffence of 1.
7141 // Normally this is done by DAGCombine, but if the select is introduced by
7142 // type legalization or op legalization, we miss it. Restricting to SETLT
7143 // case for now because that is what signed saturating add/sub need.
7144 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7145 // but we would probably want to swap the true/false values if the condition
7146 // is SETGE/SETLE to avoid an XORI.
7147 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7148 CCVal == ISD::SETLT) {
7149 const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
7150 const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
7151 if (TrueVal - 1 == FalseVal)
7152 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7153 if (TrueVal + 1 == FalseVal)
7154 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7157 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7158 // 1 < x ? x : 1 -> 0 < x ? x : 1
7159 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7160 RHS == TrueV && LHS == FalseV) {
7161 LHS = DAG.getConstant(0, DL, VT);
7162 // 0 <u x is the same as x != 0.
7163 if (CCVal == ISD::SETULT) {
7164 std::swap(LHS, RHS);
7165 CCVal = ISD::SETNE;
7169 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7170 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7171 RHS == FalseV) {
7172 RHS = DAG.getConstant(0, DL, VT);
7175 SDValue TargetCC = DAG.getCondCode(CCVal);
7177 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7178 // (select (setcc lhs, rhs, CC), constant, falsev)
7179 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7180 std::swap(TrueV, FalseV);
7181 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7184 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7185 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7188 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7189 SDValue CondV = Op.getOperand(1);
7190 SDLoc DL(Op);
7191 MVT XLenVT = Subtarget.getXLenVT();
7193 if (CondV.getOpcode() == ISD::SETCC &&
7194 CondV.getOperand(0).getValueType() == XLenVT) {
7195 SDValue LHS = CondV.getOperand(0);
7196 SDValue RHS = CondV.getOperand(1);
7197 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7199 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7201 SDValue TargetCC = DAG.getCondCode(CCVal);
7202 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7203 LHS, RHS, TargetCC, Op.getOperand(2));
7206 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7207 CondV, DAG.getConstant(0, DL, XLenVT),
7208 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7211 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7212 MachineFunction &MF = DAG.getMachineFunction();
7213 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
7215 SDLoc DL(Op);
7216 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7217 getPointerTy(MF.getDataLayout()));
7219 // vastart just stores the address of the VarArgsFrameIndex slot into the
7220 // memory location argument.
7221 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7222 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7223 MachinePointerInfo(SV));
7226 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7227 SelectionDAG &DAG) const {
7228 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7229 MachineFunction &MF = DAG.getMachineFunction();
7230 MachineFrameInfo &MFI = MF.getFrameInfo();
7231 MFI.setFrameAddressIsTaken(true);
7232 Register FrameReg = RI.getFrameRegister(MF);
7233 int XLenInBytes = Subtarget.getXLen() / 8;
7235 EVT VT = Op.getValueType();
7236 SDLoc DL(Op);
7237 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7238 unsigned Depth = Op.getConstantOperandVal(0);
7239 while (Depth--) {
7240 int Offset = -(XLenInBytes * 2);
7241 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7242 DAG.getIntPtrConstant(Offset, DL));
7243 FrameAddr =
7244 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7246 return FrameAddr;
7249 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7250 SelectionDAG &DAG) const {
7251 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7252 MachineFunction &MF = DAG.getMachineFunction();
7253 MachineFrameInfo &MFI = MF.getFrameInfo();
7254 MFI.setReturnAddressIsTaken(true);
7255 MVT XLenVT = Subtarget.getXLenVT();
7256 int XLenInBytes = Subtarget.getXLen() / 8;
7258 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
7259 return SDValue();
7261 EVT VT = Op.getValueType();
7262 SDLoc DL(Op);
7263 unsigned Depth = Op.getConstantOperandVal(0);
7264 if (Depth) {
7265 int Off = -XLenInBytes;
7266 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7267 SDValue Offset = DAG.getConstant(Off, DL, VT);
7268 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7269 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7270 MachinePointerInfo());
7273 // Return the value of the return address register, marking it an implicit
7274 // live-in.
7275 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7276 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7279 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7280 SelectionDAG &DAG) const {
7281 SDLoc DL(Op);
7282 SDValue Lo = Op.getOperand(0);
7283 SDValue Hi = Op.getOperand(1);
7284 SDValue Shamt = Op.getOperand(2);
7285 EVT VT = Lo.getValueType();
7287 // if Shamt-XLEN < 0: // Shamt < XLEN
7288 // Lo = Lo << Shamt
7289 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7290 // else:
7291 // Lo = 0
7292 // Hi = Lo << (Shamt-XLEN)
7294 SDValue Zero = DAG.getConstant(0, DL, VT);
7295 SDValue One = DAG.getConstant(1, DL, VT);
7296 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7297 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7298 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7299 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7301 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7302 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7303 SDValue ShiftRightLo =
7304 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7305 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7306 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7307 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7309 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7311 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7312 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7314 SDValue Parts[2] = {Lo, Hi};
7315 return DAG.getMergeValues(Parts, DL);
7318 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7319 bool IsSRA) const {
7320 SDLoc DL(Op);
7321 SDValue Lo = Op.getOperand(0);
7322 SDValue Hi = Op.getOperand(1);
7323 SDValue Shamt = Op.getOperand(2);
7324 EVT VT = Lo.getValueType();
7326 // SRA expansion:
7327 // if Shamt-XLEN < 0: // Shamt < XLEN
7328 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7329 // Hi = Hi >>s Shamt
7330 // else:
7331 // Lo = Hi >>s (Shamt-XLEN);
7332 // Hi = Hi >>s (XLEN-1)
7334 // SRL expansion:
7335 // if Shamt-XLEN < 0: // Shamt < XLEN
7336 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7337 // Hi = Hi >>u Shamt
7338 // else:
7339 // Lo = Hi >>u (Shamt-XLEN);
7340 // Hi = 0;
7342 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7344 SDValue Zero = DAG.getConstant(0, DL, VT);
7345 SDValue One = DAG.getConstant(1, DL, VT);
7346 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7347 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7348 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7349 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7351 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7352 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7353 SDValue ShiftLeftHi =
7354 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7355 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7356 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7357 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7358 SDValue HiFalse =
7359 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7361 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7363 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7364 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7366 SDValue Parts[2] = {Lo, Hi};
7367 return DAG.getMergeValues(Parts, DL);
7370 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
7371 // legal equivalently-sized i8 type, so we can use that as a go-between.
7372 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7373 SelectionDAG &DAG) const {
7374 SDLoc DL(Op);
7375 MVT VT = Op.getSimpleValueType();
7376 SDValue SplatVal = Op.getOperand(0);
7377 // All-zeros or all-ones splats are handled specially.
7378 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7379 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7380 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7382 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7383 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7384 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7386 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7387 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7388 DAG.getConstant(1, DL, SplatVal.getValueType()));
7389 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7390 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7391 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7394 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7395 // illegal (currently only vXi64 RV32).
7396 // FIXME: We could also catch non-constant sign-extended i32 values and lower
7397 // them to VMV_V_X_VL.
7398 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7399 SelectionDAG &DAG) const {
7400 SDLoc DL(Op);
7401 MVT VecVT = Op.getSimpleValueType();
7402 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7403 "Unexpected SPLAT_VECTOR_PARTS lowering");
7405 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7406 SDValue Lo = Op.getOperand(0);
7407 SDValue Hi = Op.getOperand(1);
7409 MVT ContainerVT = VecVT;
7410 if (VecVT.isFixedLengthVector())
7411 ContainerVT = getContainerForFixedLengthVector(VecVT);
7413 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7415 SDValue Res =
7416 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7418 if (VecVT.isFixedLengthVector())
7419 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7421 return Res;
7424 // Custom-lower extensions from mask vectors by using a vselect either with 1
7425 // for zero/any-extension or -1 for sign-extension:
7426 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7427 // Note that any-extension is lowered identically to zero-extension.
7428 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7429 int64_t ExtTrueVal) const {
7430 SDLoc DL(Op);
7431 MVT VecVT = Op.getSimpleValueType();
7432 SDValue Src = Op.getOperand(0);
7433 // Only custom-lower extensions from mask types
7434 assert(Src.getValueType().isVector() &&
7435 Src.getValueType().getVectorElementType() == MVT::i1);
7437 if (VecVT.isScalableVector()) {
7438 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7439 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7440 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7443 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7444 MVT I1ContainerVT =
7445 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7447 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7449 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7451 MVT XLenVT = Subtarget.getXLenVT();
7452 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7453 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7455 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7456 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7457 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7458 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7459 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
7460 SplatTrueVal, SplatZero, VL);
7462 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7465 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7466 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7467 MVT ExtVT = Op.getSimpleValueType();
7468 // Only custom-lower extensions from fixed-length vector types.
7469 if (!ExtVT.isFixedLengthVector())
7470 return Op;
7471 MVT VT = Op.getOperand(0).getSimpleValueType();
7472 // Grab the canonical container type for the extended type. Infer the smaller
7473 // type from that to ensure the same number of vector elements, as we know
7474 // the LMUL will be sufficient to hold the smaller type.
7475 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7476 // Get the extended container type manually to ensure the same number of
7477 // vector elements between source and dest.
7478 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7479 ContainerExtVT.getVectorElementCount());
7481 SDValue Op1 =
7482 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7484 SDLoc DL(Op);
7485 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7487 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7489 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7492 // Custom-lower truncations from vectors to mask vectors by using a mask and a
7493 // setcc operation:
7494 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7495 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7496 SelectionDAG &DAG) const {
7497 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7498 SDLoc DL(Op);
7499 EVT MaskVT = Op.getValueType();
7500 // Only expect to custom-lower truncations to mask types
7501 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7502 "Unexpected type for vector mask lowering");
7503 SDValue Src = Op.getOperand(0);
7504 MVT VecVT = Src.getSimpleValueType();
7505 SDValue Mask, VL;
7506 if (IsVPTrunc) {
7507 Mask = Op.getOperand(1);
7508 VL = Op.getOperand(2);
7510 // If this is a fixed vector, we need to convert it to a scalable vector.
7511 MVT ContainerVT = VecVT;
7513 if (VecVT.isFixedLengthVector()) {
7514 ContainerVT = getContainerForFixedLengthVector(VecVT);
7515 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7516 if (IsVPTrunc) {
7517 MVT MaskContainerVT =
7518 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7519 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7523 if (!IsVPTrunc) {
7524 std::tie(Mask, VL) =
7525 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7528 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7529 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7531 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7532 DAG.getUNDEF(ContainerVT), SplatOne, VL);
7533 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7534 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7536 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7537 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7538 DAG.getUNDEF(ContainerVT), Mask, VL);
7539 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7540 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7541 DAG.getUNDEF(MaskContainerVT), Mask, VL});
7542 if (MaskVT.isFixedLengthVector())
7543 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7544 return Trunc;
7547 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7548 SelectionDAG &DAG) const {
7549 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7550 SDLoc DL(Op);
7552 MVT VT = Op.getSimpleValueType();
7553 // Only custom-lower vector truncates
7554 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7556 // Truncates to mask types are handled differently
7557 if (VT.getVectorElementType() == MVT::i1)
7558 return lowerVectorMaskTruncLike(Op, DAG);
7560 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7561 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7562 // truncate by one power of two at a time.
7563 MVT DstEltVT = VT.getVectorElementType();
7565 SDValue Src = Op.getOperand(0);
7566 MVT SrcVT = Src.getSimpleValueType();
7567 MVT SrcEltVT = SrcVT.getVectorElementType();
7569 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7570 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7571 "Unexpected vector truncate lowering");
7573 MVT ContainerVT = SrcVT;
7574 SDValue Mask, VL;
7575 if (IsVPTrunc) {
7576 Mask = Op.getOperand(1);
7577 VL = Op.getOperand(2);
7579 if (SrcVT.isFixedLengthVector()) {
7580 ContainerVT = getContainerForFixedLengthVector(SrcVT);
7581 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7582 if (IsVPTrunc) {
7583 MVT MaskVT = getMaskTypeFor(ContainerVT);
7584 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7588 SDValue Result = Src;
7589 if (!IsVPTrunc) {
7590 std::tie(Mask, VL) =
7591 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7594 LLVMContext &Context = *DAG.getContext();
7595 const ElementCount Count = ContainerVT.getVectorElementCount();
7596 do {
7597 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
7598 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
7599 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
7600 Mask, VL);
7601 } while (SrcEltVT != DstEltVT);
7603 if (SrcVT.isFixedLengthVector())
7604 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
7606 return Result;
7609 SDValue
7610 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
7611 SelectionDAG &DAG) const {
7612 SDLoc DL(Op);
7613 SDValue Chain = Op.getOperand(0);
7614 SDValue Src = Op.getOperand(1);
7615 MVT VT = Op.getSimpleValueType();
7616 MVT SrcVT = Src.getSimpleValueType();
7617 MVT ContainerVT = VT;
7618 if (VT.isFixedLengthVector()) {
7619 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7620 ContainerVT =
7621 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
7622 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7625 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7627 // RVV can only widen/truncate fp to types double/half the size as the source.
7628 if ((VT.getVectorElementType() == MVT::f64 &&
7629 SrcVT.getVectorElementType() == MVT::f16) ||
7630 (VT.getVectorElementType() == MVT::f16 &&
7631 SrcVT.getVectorElementType() == MVT::f64)) {
7632 // For double rounding, the intermediate rounding should be round-to-odd.
7633 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
7634 ? RISCVISD::STRICT_FP_EXTEND_VL
7635 : RISCVISD::STRICT_VFNCVT_ROD_VL;
7636 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
7637 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
7638 Chain, Src, Mask, VL);
7639 Chain = Src.getValue(1);
7642 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
7643 ? RISCVISD::STRICT_FP_EXTEND_VL
7644 : RISCVISD::STRICT_FP_ROUND_VL;
7645 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7646 Chain, Src, Mask, VL);
7647 if (VT.isFixedLengthVector()) {
7648 // StrictFP operations have two result values. Their lowered result should
7649 // have same result count.
7650 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
7651 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
7653 return Res;
7656 SDValue
7657 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
7658 SelectionDAG &DAG) const {
7659 bool IsVP =
7660 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
7661 bool IsExtend =
7662 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
7663 // RVV can only do truncate fp to types half the size as the source. We
7664 // custom-lower f64->f16 rounds via RVV's round-to-odd float
7665 // conversion instruction.
7666 SDLoc DL(Op);
7667 MVT VT = Op.getSimpleValueType();
7669 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7671 SDValue Src = Op.getOperand(0);
7672 MVT SrcVT = Src.getSimpleValueType();
7674 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
7675 SrcVT.getVectorElementType() != MVT::f16);
7676 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
7677 SrcVT.getVectorElementType() != MVT::f64);
7679 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
7681 // Prepare any fixed-length vector operands.
7682 MVT ContainerVT = VT;
7683 SDValue Mask, VL;
7684 if (IsVP) {
7685 Mask = Op.getOperand(1);
7686 VL = Op.getOperand(2);
7688 if (VT.isFixedLengthVector()) {
7689 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7690 ContainerVT =
7691 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
7692 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7693 if (IsVP) {
7694 MVT MaskVT = getMaskTypeFor(ContainerVT);
7695 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7699 if (!IsVP)
7700 std::tie(Mask, VL) =
7701 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7703 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
7705 if (IsDirectConv) {
7706 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
7707 if (VT.isFixedLengthVector())
7708 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
7709 return Src;
7712 unsigned InterConvOpc =
7713 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
7715 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
7716 SDValue IntermediateConv =
7717 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
7718 SDValue Result =
7719 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
7720 if (VT.isFixedLengthVector())
7721 return convertFromScalableVector(VT, Result, DAG, Subtarget);
7722 return Result;
7725 // Given a scalable vector type and an index into it, returns the type for the
7726 // smallest subvector that the index fits in. This can be used to reduce LMUL
7727 // for operations like vslidedown.
7729 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
7730 static std::optional<MVT>
7731 getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
7732 const RISCVSubtarget &Subtarget) {
7733 assert(VecVT.isScalableVector());
7734 const unsigned EltSize = VecVT.getScalarSizeInBits();
7735 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
7736 const unsigned MinVLMAX = VectorBitsMin / EltSize;
7737 MVT SmallerVT;
7738 if (MaxIdx < MinVLMAX)
7739 SmallerVT = getLMUL1VT(VecVT);
7740 else if (MaxIdx < MinVLMAX * 2)
7741 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
7742 else if (MaxIdx < MinVLMAX * 4)
7743 SmallerVT = getLMUL1VT(VecVT)
7744 .getDoubleNumVectorElementsVT()
7745 .getDoubleNumVectorElementsVT();
7746 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
7747 return std::nullopt;
7748 return SmallerVT;
7751 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
7752 // first position of a vector, and that vector is slid up to the insert index.
7753 // By limiting the active vector length to index+1 and merging with the
7754 // original vector (with an undisturbed tail policy for elements >= VL), we
7755 // achieve the desired result of leaving all elements untouched except the one
7756 // at VL-1, which is replaced with the desired value.
7757 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
7758 SelectionDAG &DAG) const {
7759 SDLoc DL(Op);
7760 MVT VecVT = Op.getSimpleValueType();
7761 SDValue Vec = Op.getOperand(0);
7762 SDValue Val = Op.getOperand(1);
7763 SDValue Idx = Op.getOperand(2);
7765 if (VecVT.getVectorElementType() == MVT::i1) {
7766 // FIXME: For now we just promote to an i8 vector and insert into that,
7767 // but this is probably not optimal.
7768 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
7769 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
7770 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
7771 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
7774 MVT ContainerVT = VecVT;
7775 // If the operand is a fixed-length vector, convert to a scalable one.
7776 if (VecVT.isFixedLengthVector()) {
7777 ContainerVT = getContainerForFixedLengthVector(VecVT);
7778 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7781 // If we know the index we're going to insert at, we can shrink Vec so that
7782 // we're performing the scalar inserts and slideup on a smaller LMUL.
7783 MVT OrigContainerVT = ContainerVT;
7784 SDValue OrigVec = Vec;
7785 SDValue AlignedIdx;
7786 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
7787 const unsigned OrigIdx = IdxC->getZExtValue();
7788 // Do we know an upper bound on LMUL?
7789 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
7790 DL, DAG, Subtarget)) {
7791 ContainerVT = *ShrunkVT;
7792 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
7795 // If we're compiling for an exact VLEN value, we can always perform
7796 // the insert in m1 as we can determine the register corresponding to
7797 // the index in the register group.
7798 const unsigned MinVLen = Subtarget.getRealMinVLen();
7799 const unsigned MaxVLen = Subtarget.getRealMaxVLen();
7800 const MVT M1VT = getLMUL1VT(ContainerVT);
7801 if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) {
7802 EVT ElemVT = VecVT.getVectorElementType();
7803 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
7804 unsigned RemIdx = OrigIdx % ElemsPerVReg;
7805 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
7806 unsigned ExtractIdx =
7807 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
7808 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
7809 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
7810 ContainerVT = M1VT;
7813 if (AlignedIdx)
7814 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
7815 AlignedIdx);
7818 MVT XLenVT = Subtarget.getXLenVT();
7820 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
7821 // Even i64-element vectors on RV32 can be lowered without scalar
7822 // legalization if the most-significant 32 bits of the value are not affected
7823 // by the sign-extension of the lower 32 bits.
7824 // TODO: We could also catch sign extensions of a 32-bit value.
7825 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
7826 const auto *CVal = cast<ConstantSDNode>(Val);
7827 if (isInt<32>(CVal->getSExtValue())) {
7828 IsLegalInsert = true;
7829 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
7833 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7835 SDValue ValInVec;
7837 if (IsLegalInsert) {
7838 unsigned Opc =
7839 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
7840 if (isNullConstant(Idx)) {
7841 if (!VecVT.isFloatingPoint())
7842 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
7843 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
7845 if (AlignedIdx)
7846 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7847 Vec, AlignedIdx);
7848 if (!VecVT.isFixedLengthVector())
7849 return Vec;
7850 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
7852 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
7853 } else {
7854 // On RV32, i64-element vectors must be specially handled to place the
7855 // value at element 0, by using two vslide1down instructions in sequence on
7856 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
7857 // this.
7858 SDValue ValLo, ValHi;
7859 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
7860 MVT I32ContainerVT =
7861 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
7862 SDValue I32Mask =
7863 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
7864 // Limit the active VL to two.
7865 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
7866 // If the Idx is 0 we can insert directly into the vector.
7867 if (isNullConstant(Idx)) {
7868 // First slide in the lo value, then the hi in above it. We use slide1down
7869 // to avoid the register group overlap constraint of vslide1up.
7870 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7871 Vec, Vec, ValLo, I32Mask, InsertI64VL);
7872 // If the source vector is undef don't pass along the tail elements from
7873 // the previous slide1down.
7874 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
7875 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7876 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
7877 // Bitcast back to the right container type.
7878 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
7880 if (AlignedIdx)
7881 ValInVec =
7882 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7883 ValInVec, AlignedIdx);
7884 if (!VecVT.isFixedLengthVector())
7885 return ValInVec;
7886 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
7889 // First slide in the lo value, then the hi in above it. We use slide1down
7890 // to avoid the register group overlap constraint of vslide1up.
7891 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7892 DAG.getUNDEF(I32ContainerVT),
7893 DAG.getUNDEF(I32ContainerVT), ValLo,
7894 I32Mask, InsertI64VL);
7895 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7896 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
7897 I32Mask, InsertI64VL);
7898 // Bitcast back to the right container type.
7899 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
7902 // Now that the value is in a vector, slide it into position.
7903 SDValue InsertVL =
7904 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
7906 // Use tail agnostic policy if Idx is the last index of Vec.
7907 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
7908 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
7909 cast<ConstantSDNode>(Idx)->getZExtValue() + 1 ==
7910 VecVT.getVectorNumElements())
7911 Policy = RISCVII::TAIL_AGNOSTIC;
7912 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
7913 Idx, Mask, InsertVL, Policy);
7915 if (AlignedIdx)
7916 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7917 Slideup, AlignedIdx);
7918 if (!VecVT.isFixedLengthVector())
7919 return Slideup;
7920 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
7923 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
7924 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
7925 // types this is done using VMV_X_S to allow us to glean information about the
7926 // sign bits of the result.
7927 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
7928 SelectionDAG &DAG) const {
7929 SDLoc DL(Op);
7930 SDValue Idx = Op.getOperand(1);
7931 SDValue Vec = Op.getOperand(0);
7932 EVT EltVT = Op.getValueType();
7933 MVT VecVT = Vec.getSimpleValueType();
7934 MVT XLenVT = Subtarget.getXLenVT();
7936 if (VecVT.getVectorElementType() == MVT::i1) {
7937 // Use vfirst.m to extract the first bit.
7938 if (isNullConstant(Idx)) {
7939 MVT ContainerVT = VecVT;
7940 if (VecVT.isFixedLengthVector()) {
7941 ContainerVT = getContainerForFixedLengthVector(VecVT);
7942 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7944 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7945 SDValue Vfirst =
7946 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
7947 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
7948 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
7949 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
7951 if (VecVT.isFixedLengthVector()) {
7952 unsigned NumElts = VecVT.getVectorNumElements();
7953 if (NumElts >= 8) {
7954 MVT WideEltVT;
7955 unsigned WidenVecLen;
7956 SDValue ExtractElementIdx;
7957 SDValue ExtractBitIdx;
7958 unsigned MaxEEW = Subtarget.getELen();
7959 MVT LargestEltVT = MVT::getIntegerVT(
7960 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
7961 if (NumElts <= LargestEltVT.getSizeInBits()) {
7962 assert(isPowerOf2_32(NumElts) &&
7963 "the number of elements should be power of 2");
7964 WideEltVT = MVT::getIntegerVT(NumElts);
7965 WidenVecLen = 1;
7966 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
7967 ExtractBitIdx = Idx;
7968 } else {
7969 WideEltVT = LargestEltVT;
7970 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
7971 // extract element index = index / element width
7972 ExtractElementIdx = DAG.getNode(
7973 ISD::SRL, DL, XLenVT, Idx,
7974 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
7975 // mask bit index = index % element width
7976 ExtractBitIdx = DAG.getNode(
7977 ISD::AND, DL, XLenVT, Idx,
7978 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
7980 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
7981 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
7982 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
7983 Vec, ExtractElementIdx);
7984 // Extract the bit from GPR.
7985 SDValue ShiftRight =
7986 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
7987 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
7988 DAG.getConstant(1, DL, XLenVT));
7989 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
7992 // Otherwise, promote to an i8 vector and extract from that.
7993 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
7994 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
7995 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
7998 // If this is a fixed vector, we need to convert it to a scalable vector.
7999 MVT ContainerVT = VecVT;
8000 if (VecVT.isFixedLengthVector()) {
8001 ContainerVT = getContainerForFixedLengthVector(VecVT);
8002 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8005 // If we're compiling for an exact VLEN value and we have a known
8006 // constant index, we can always perform the extract in m1 (or
8007 // smaller) as we can determine the register corresponding to
8008 // the index in the register group.
8009 const unsigned MinVLen = Subtarget.getRealMinVLen();
8010 const unsigned MaxVLen = Subtarget.getRealMaxVLen();
8011 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8012 IdxC && MinVLen == MaxVLen &&
8013 VecVT.getSizeInBits().getKnownMinValue() > MinVLen) {
8014 MVT M1VT = getLMUL1VT(ContainerVT);
8015 unsigned OrigIdx = IdxC->getZExtValue();
8016 EVT ElemVT = VecVT.getVectorElementType();
8017 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
8018 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8019 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8020 unsigned ExtractIdx =
8021 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8022 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8023 DAG.getVectorIdxConstant(ExtractIdx, DL));
8024 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8025 ContainerVT = M1VT;
8028 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8029 // contains our index.
8030 std::optional<uint64_t> MaxIdx;
8031 if (VecVT.isFixedLengthVector())
8032 MaxIdx = VecVT.getVectorNumElements() - 1;
8033 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8034 MaxIdx = IdxC->getZExtValue();
8035 if (MaxIdx) {
8036 if (auto SmallerVT =
8037 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8038 ContainerVT = *SmallerVT;
8039 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8040 DAG.getConstant(0, DL, XLenVT));
8044 // If after narrowing, the required slide is still greater than LMUL2,
8045 // fallback to generic expansion and go through the stack. This is done
8046 // for a subtle reason: extracting *all* elements out of a vector is
8047 // widely expected to be linear in vector size, but because vslidedown
8048 // is linear in LMUL, performing N extracts using vslidedown becomes
8049 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8050 // seems to have the same problem (the store is linear in LMUL), but the
8051 // generic expansion *memoizes* the store, and thus for many extracts of
8052 // the same vector we end up with one store and a bunch of loads.
8053 // TODO: We don't have the same code for insert_vector_elt because we
8054 // have BUILD_VECTOR and handle the degenerate case there. Should we
8055 // consider adding an inverse BUILD_VECTOR node?
8056 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8057 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8058 return SDValue();
8060 // If the index is 0, the vector is already in the right position.
8061 if (!isNullConstant(Idx)) {
8062 // Use a VL of 1 to avoid processing more elements than we need.
8063 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8064 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8065 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8068 if (!EltVT.isInteger()) {
8069 // Floating-point extracts are handled in TableGen.
8070 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8071 DAG.getConstant(0, DL, XLenVT));
8074 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8075 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8078 // Some RVV intrinsics may claim that they want an integer operand to be
8079 // promoted or expanded.
8080 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
8081 const RISCVSubtarget &Subtarget) {
8082 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8083 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8084 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8085 "Unexpected opcode");
8087 if (!Subtarget.hasVInstructions())
8088 return SDValue();
8090 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8091 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8092 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8094 SDLoc DL(Op);
8096 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
8097 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8098 if (!II || !II->hasScalarOperand())
8099 return SDValue();
8101 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8102 assert(SplatOp < Op.getNumOperands());
8104 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8105 SDValue &ScalarOp = Operands[SplatOp];
8106 MVT OpVT = ScalarOp.getSimpleValueType();
8107 MVT XLenVT = Subtarget.getXLenVT();
8109 // If this isn't a scalar, or its type is XLenVT we're done.
8110 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8111 return SDValue();
8113 // Simplest case is that the operand needs to be promoted to XLenVT.
8114 if (OpVT.bitsLT(XLenVT)) {
8115 // If the operand is a constant, sign extend to increase our chances
8116 // of being able to use a .vi instruction. ANY_EXTEND would become a
8117 // a zero extend and the simm5 check in isel would fail.
8118 // FIXME: Should we ignore the upper bits in isel instead?
8119 unsigned ExtOpc =
8120 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8121 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8122 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8125 // Use the previous operand to get the vXi64 VT. The result might be a mask
8126 // VT for compares. Using the previous operand assumes that the previous
8127 // operand will never have a smaller element size than a scalar operand and
8128 // that a widening operation never uses SEW=64.
8129 // NOTE: If this fails the below assert, we can probably just find the
8130 // element count from any operand or result and use it to construct the VT.
8131 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8132 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8134 // The more complex case is when the scalar is larger than XLenVT.
8135 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8136 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8138 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8139 // instruction to sign-extend since SEW>XLEN.
8140 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8141 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8142 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8145 switch (IntNo) {
8146 case Intrinsic::riscv_vslide1up:
8147 case Intrinsic::riscv_vslide1down:
8148 case Intrinsic::riscv_vslide1up_mask:
8149 case Intrinsic::riscv_vslide1down_mask: {
8150 // We need to special case these when the scalar is larger than XLen.
8151 unsigned NumOps = Op.getNumOperands();
8152 bool IsMasked = NumOps == 7;
8154 // Convert the vector source to the equivalent nxvXi32 vector.
8155 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8156 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8157 SDValue ScalarLo, ScalarHi;
8158 std::tie(ScalarLo, ScalarHi) =
8159 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8161 // Double the VL since we halved SEW.
8162 SDValue AVL = getVLOperand(Op);
8163 SDValue I32VL;
8165 // Optimize for constant AVL
8166 if (isa<ConstantSDNode>(AVL)) {
8167 const auto [MinVLMAX, MaxVLMAX] =
8168 RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);
8170 uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
8171 if (AVLInt <= MinVLMAX) {
8172 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8173 } else if (AVLInt >= 2 * MaxVLMAX) {
8174 // Just set vl to VLMAX in this situation
8175 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
8176 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8177 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8178 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8179 SDValue SETVLMAX = DAG.getTargetConstant(
8180 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8181 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8182 LMUL);
8183 } else {
8184 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8185 // is related to the hardware implementation.
8186 // So let the following code handle
8189 if (!I32VL) {
8190 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
8191 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8192 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8193 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8194 SDValue SETVL =
8195 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8196 // Using vsetvli instruction to get actually used length which related to
8197 // the hardware implementation
8198 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8199 SEW, LMUL);
8200 I32VL =
8201 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8204 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8206 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8207 // instructions.
8208 SDValue Passthru;
8209 if (IsMasked)
8210 Passthru = DAG.getUNDEF(I32VT);
8211 else
8212 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8214 if (IntNo == Intrinsic::riscv_vslide1up ||
8215 IntNo == Intrinsic::riscv_vslide1up_mask) {
8216 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8217 ScalarHi, I32Mask, I32VL);
8218 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8219 ScalarLo, I32Mask, I32VL);
8220 } else {
8221 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8222 ScalarLo, I32Mask, I32VL);
8223 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8224 ScalarHi, I32Mask, I32VL);
8227 // Convert back to nxvXi64.
8228 Vec = DAG.getBitcast(VT, Vec);
8230 if (!IsMasked)
8231 return Vec;
8232 // Apply mask after the operation.
8233 SDValue Mask = Operands[NumOps - 3];
8234 SDValue MaskedOff = Operands[1];
8235 // Assume Policy operand is the last operand.
8236 uint64_t Policy =
8237 cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
8238 // We don't need to select maskedoff if it's undef.
8239 if (MaskedOff.isUndef())
8240 return Vec;
8241 // TAMU
8242 if (Policy == RISCVII::TAIL_AGNOSTIC)
8243 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
8244 AVL);
8245 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8246 // It's fine because vmerge does not care mask policy.
8247 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8248 MaskedOff, AVL);
8252 // We need to convert the scalar to a splat vector.
8253 SDValue VL = getVLOperand(Op);
8254 assert(VL.getValueType() == XLenVT);
8255 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8256 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8259 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8260 // scalable vector llvm.get.vector.length for now.
8262 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
8263 // (vscale * VF). The vscale and VF are independent of element width. We use
8264 // SEW=8 for the vsetvli because it is the only element width that supports all
8265 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8266 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8267 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8268 // SEW and LMUL are better for the surrounding vector instructions.
8269 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
8270 const RISCVSubtarget &Subtarget) {
8271 MVT XLenVT = Subtarget.getXLenVT();
8273 // The smallest LMUL is only valid for the smallest element width.
8274 const unsigned ElementWidth = 8;
8276 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8277 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8278 // We don't support VF==1 with ELEN==32.
8279 unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen();
8281 unsigned VF = N->getConstantOperandVal(2);
8282 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8283 "Unexpected VF");
8284 (void)MinVF;
8286 bool Fractional = VF < LMul1VF;
8287 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8288 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8289 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8291 SDLoc DL(N);
8293 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8294 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8296 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8298 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8299 SDValue Res =
8300 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8301 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8304 static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG,
8305 SmallVector<SDValue> &Ops) {
8306 SDLoc DL(Op);
8308 const RISCVSubtarget &Subtarget =
8309 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
8310 for (const SDValue &V : Op->op_values()) {
8311 EVT ValType = V.getValueType();
8312 if (ValType.isScalableVector() && ValType.isFloatingPoint()) {
8313 MVT InterimIVT =
8314 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8315 ValType.getVectorElementCount());
8316 Ops.push_back(DAG.getBitcast(InterimIVT, V));
8317 } else if (ValType.isFixedLengthVector()) {
8318 MVT OpContainerVT = getContainerForFixedLengthVector(
8319 DAG, V.getSimpleValueType(), Subtarget);
8320 Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
8321 } else
8322 Ops.push_back(V);
8326 // LMUL * VLEN should be greater than or equal to EGS * SEW
8327 static inline bool isValidEGW(int EGS, EVT VT,
8328 const RISCVSubtarget &Subtarget) {
8329 return (Subtarget.getRealMinVLen() *
8330 VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
8331 EGS * VT.getScalarSizeInBits();
8334 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8335 SelectionDAG &DAG) const {
8336 unsigned IntNo = Op.getConstantOperandVal(0);
8337 SDLoc DL(Op);
8338 MVT XLenVT = Subtarget.getXLenVT();
8340 switch (IntNo) {
8341 default:
8342 break; // Don't custom lower most intrinsics.
8343 case Intrinsic::thread_pointer: {
8344 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8345 return DAG.getRegister(RISCV::X4, PtrVT);
8347 case Intrinsic::riscv_orc_b:
8348 case Intrinsic::riscv_brev8:
8349 case Intrinsic::riscv_sha256sig0:
8350 case Intrinsic::riscv_sha256sig1:
8351 case Intrinsic::riscv_sha256sum0:
8352 case Intrinsic::riscv_sha256sum1:
8353 case Intrinsic::riscv_sm3p0:
8354 case Intrinsic::riscv_sm3p1: {
8355 unsigned Opc;
8356 switch (IntNo) {
8357 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8358 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8359 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8360 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8361 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8362 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8363 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8364 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8367 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8368 SDValue NewOp =
8369 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8370 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8371 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8374 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8376 case Intrinsic::riscv_sm4ks:
8377 case Intrinsic::riscv_sm4ed: {
8378 unsigned Opc =
8379 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8381 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8382 SDValue NewOp0 =
8383 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8384 SDValue NewOp1 =
8385 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8386 SDValue Res =
8387 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8388 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8391 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8392 Op.getOperand(3));
8394 case Intrinsic::riscv_zip:
8395 case Intrinsic::riscv_unzip: {
8396 unsigned Opc =
8397 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8398 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8400 case Intrinsic::riscv_clmul:
8401 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8402 SDValue NewOp0 =
8403 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8404 SDValue NewOp1 =
8405 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8406 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8407 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8409 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8410 Op.getOperand(2));
8411 case Intrinsic::riscv_clmulh:
8412 case Intrinsic::riscv_clmulr: {
8413 unsigned Opc =
8414 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8415 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8416 SDValue NewOp0 =
8417 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8418 SDValue NewOp1 =
8419 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8420 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8421 DAG.getConstant(32, DL, MVT::i64));
8422 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8423 DAG.getConstant(32, DL, MVT::i64));
8424 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8425 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8426 DAG.getConstant(32, DL, MVT::i64));
8427 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8430 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8432 case Intrinsic::experimental_get_vector_length:
8433 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8434 case Intrinsic::riscv_vmv_x_s: {
8435 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8436 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8438 case Intrinsic::riscv_vfmv_f_s:
8439 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8440 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
8441 case Intrinsic::riscv_vmv_v_x:
8442 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8443 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8444 Subtarget);
8445 case Intrinsic::riscv_vfmv_v_f:
8446 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8447 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8448 case Intrinsic::riscv_vmv_s_x: {
8449 SDValue Scalar = Op.getOperand(2);
8451 if (Scalar.getValueType().bitsLE(XLenVT)) {
8452 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8453 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8454 Op.getOperand(1), Scalar, Op.getOperand(3));
8457 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8459 // This is an i64 value that lives in two scalar registers. We have to
8460 // insert this in a convoluted way. First we build vXi64 splat containing
8461 // the two values that we assemble using some bit math. Next we'll use
8462 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8463 // to merge element 0 from our splat into the source vector.
8464 // FIXME: This is probably not the best way to do this, but it is
8465 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8466 // point.
8467 // sw lo, (a0)
8468 // sw hi, 4(a0)
8469 // vlse vX, (a0)
8471 // vid.v vVid
8472 // vmseq.vx mMask, vVid, 0
8473 // vmerge.vvm vDest, vSrc, vVal, mMask
8474 MVT VT = Op.getSimpleValueType();
8475 SDValue Vec = Op.getOperand(1);
8476 SDValue VL = getVLOperand(Op);
8478 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8479 if (Op.getOperand(1).isUndef())
8480 return SplattedVal;
8481 SDValue SplattedIdx =
8482 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8483 DAG.getConstant(0, DL, MVT::i32), VL);
8485 MVT MaskVT = getMaskTypeFor(VT);
8486 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
8487 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
8488 SDValue SelectCond =
8489 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
8490 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
8491 DAG.getUNDEF(MaskVT), Mask, VL});
8492 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
8493 Vec, VL);
8495 // EGS * EEW >= 128 bits
8496 case Intrinsic::riscv_vaesdf_vv:
8497 case Intrinsic::riscv_vaesdf_vs:
8498 case Intrinsic::riscv_vaesdm_vv:
8499 case Intrinsic::riscv_vaesdm_vs:
8500 case Intrinsic::riscv_vaesef_vv:
8501 case Intrinsic::riscv_vaesef_vs:
8502 case Intrinsic::riscv_vaesem_vv:
8503 case Intrinsic::riscv_vaesem_vs:
8504 case Intrinsic::riscv_vaeskf1:
8505 case Intrinsic::riscv_vaeskf2:
8506 case Intrinsic::riscv_vaesz_vs:
8507 case Intrinsic::riscv_vsm4k:
8508 case Intrinsic::riscv_vsm4r_vv:
8509 case Intrinsic::riscv_vsm4r_vs: {
8510 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8511 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8512 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8513 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8514 return Op;
8516 // EGS * EEW >= 256 bits
8517 case Intrinsic::riscv_vsm3c:
8518 case Intrinsic::riscv_vsm3me: {
8519 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
8520 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
8521 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
8522 return Op;
8524 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
8525 case Intrinsic::riscv_vsha2ch:
8526 case Intrinsic::riscv_vsha2cl:
8527 case Intrinsic::riscv_vsha2ms: {
8528 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
8529 !Subtarget.hasStdExtZvknhb())
8530 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
8531 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8532 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8533 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8534 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8535 return Op;
8537 case Intrinsic::riscv_sf_vc_v_x:
8538 case Intrinsic::riscv_sf_vc_v_i:
8539 case Intrinsic::riscv_sf_vc_v_xv:
8540 case Intrinsic::riscv_sf_vc_v_iv:
8541 case Intrinsic::riscv_sf_vc_v_vv:
8542 case Intrinsic::riscv_sf_vc_v_fv:
8543 case Intrinsic::riscv_sf_vc_v_xvv:
8544 case Intrinsic::riscv_sf_vc_v_ivv:
8545 case Intrinsic::riscv_sf_vc_v_vvv:
8546 case Intrinsic::riscv_sf_vc_v_fvv:
8547 case Intrinsic::riscv_sf_vc_v_xvw:
8548 case Intrinsic::riscv_sf_vc_v_ivw:
8549 case Intrinsic::riscv_sf_vc_v_vvw:
8550 case Intrinsic::riscv_sf_vc_v_fvw: {
8551 MVT VT = Op.getSimpleValueType();
8553 SmallVector<SDValue> Ops;
8554 getVCIXOperands(Op, DAG, Ops);
8556 MVT RetVT = VT;
8557 if (VT.isFixedLengthVector())
8558 RetVT = getContainerForFixedLengthVector(VT);
8559 else if (VT.isFloatingPoint())
8560 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
8561 VT.getVectorElementCount());
8563 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops);
8565 if (VT.isFixedLengthVector())
8566 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
8567 else if (VT.isFloatingPoint())
8568 NewNode = DAG.getBitcast(VT, NewNode);
8570 if (Op == NewNode)
8571 break;
8573 return NewNode;
8577 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8580 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
8581 SelectionDAG &DAG) const {
8582 unsigned IntNo = Op.getConstantOperandVal(1);
8583 switch (IntNo) {
8584 default:
8585 break;
8586 case Intrinsic::riscv_masked_strided_load: {
8587 SDLoc DL(Op);
8588 MVT XLenVT = Subtarget.getXLenVT();
8590 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8591 // the selection of the masked intrinsics doesn't do this for us.
8592 SDValue Mask = Op.getOperand(5);
8593 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8595 MVT VT = Op->getSimpleValueType(0);
8596 MVT ContainerVT = VT;
8597 if (VT.isFixedLengthVector())
8598 ContainerVT = getContainerForFixedLengthVector(VT);
8600 SDValue PassThru = Op.getOperand(2);
8601 if (!IsUnmasked) {
8602 MVT MaskVT = getMaskTypeFor(ContainerVT);
8603 if (VT.isFixedLengthVector()) {
8604 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8605 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
8609 auto *Load = cast<MemIntrinsicSDNode>(Op);
8610 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8611 SDValue Ptr = Op.getOperand(3);
8612 SDValue Stride = Op.getOperand(4);
8613 SDValue Result, Chain;
8615 // TODO: We restrict this to unmasked loads currently in consideration of
8616 // the complexity of hanlding all falses masks.
8617 if (IsUnmasked && isNullConstant(Stride)) {
8618 MVT ScalarVT = ContainerVT.getVectorElementType();
8619 SDValue ScalarLoad =
8620 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
8621 ScalarVT, Load->getMemOperand());
8622 Chain = ScalarLoad.getValue(1);
8623 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
8624 Subtarget);
8625 } else {
8626 SDValue IntID = DAG.getTargetConstant(
8627 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
8628 XLenVT);
8630 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
8631 if (IsUnmasked)
8632 Ops.push_back(DAG.getUNDEF(ContainerVT));
8633 else
8634 Ops.push_back(PassThru);
8635 Ops.push_back(Ptr);
8636 Ops.push_back(Stride);
8637 if (!IsUnmasked)
8638 Ops.push_back(Mask);
8639 Ops.push_back(VL);
8640 if (!IsUnmasked) {
8641 SDValue Policy =
8642 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
8643 Ops.push_back(Policy);
8646 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8647 Result =
8648 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8649 Load->getMemoryVT(), Load->getMemOperand());
8650 Chain = Result.getValue(1);
8652 if (VT.isFixedLengthVector())
8653 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8654 return DAG.getMergeValues({Result, Chain}, DL);
8656 case Intrinsic::riscv_seg2_load:
8657 case Intrinsic::riscv_seg3_load:
8658 case Intrinsic::riscv_seg4_load:
8659 case Intrinsic::riscv_seg5_load:
8660 case Intrinsic::riscv_seg6_load:
8661 case Intrinsic::riscv_seg7_load:
8662 case Intrinsic::riscv_seg8_load: {
8663 SDLoc DL(Op);
8664 static const Intrinsic::ID VlsegInts[7] = {
8665 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
8666 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
8667 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
8668 Intrinsic::riscv_vlseg8};
8669 unsigned NF = Op->getNumValues() - 1;
8670 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
8671 MVT XLenVT = Subtarget.getXLenVT();
8672 MVT VT = Op->getSimpleValueType(0);
8673 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8675 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
8676 Subtarget);
8677 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
8678 auto *Load = cast<MemIntrinsicSDNode>(Op);
8679 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
8680 ContainerVTs.push_back(MVT::Other);
8681 SDVTList VTs = DAG.getVTList(ContainerVTs);
8682 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
8683 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
8684 Ops.push_back(Op.getOperand(2));
8685 Ops.push_back(VL);
8686 SDValue Result =
8687 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8688 Load->getMemoryVT(), Load->getMemOperand());
8689 SmallVector<SDValue, 9> Results;
8690 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
8691 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
8692 DAG, Subtarget));
8693 Results.push_back(Result.getValue(NF));
8694 return DAG.getMergeValues(Results, DL);
8696 case Intrinsic::riscv_sf_vc_v_x_se:
8697 case Intrinsic::riscv_sf_vc_v_i_se:
8698 case Intrinsic::riscv_sf_vc_v_xv_se:
8699 case Intrinsic::riscv_sf_vc_v_iv_se:
8700 case Intrinsic::riscv_sf_vc_v_vv_se:
8701 case Intrinsic::riscv_sf_vc_v_fv_se:
8702 case Intrinsic::riscv_sf_vc_v_xvv_se:
8703 case Intrinsic::riscv_sf_vc_v_ivv_se:
8704 case Intrinsic::riscv_sf_vc_v_vvv_se:
8705 case Intrinsic::riscv_sf_vc_v_fvv_se:
8706 case Intrinsic::riscv_sf_vc_v_xvw_se:
8707 case Intrinsic::riscv_sf_vc_v_ivw_se:
8708 case Intrinsic::riscv_sf_vc_v_vvw_se:
8709 case Intrinsic::riscv_sf_vc_v_fvw_se: {
8710 MVT VT = Op.getSimpleValueType();
8711 SDLoc DL(Op);
8712 SmallVector<SDValue> Ops;
8713 getVCIXOperands(Op, DAG, Ops);
8715 MVT RetVT = VT;
8716 if (VT.isFixedLengthVector())
8717 RetVT = getContainerForFixedLengthVector(VT);
8718 else if (VT.isFloatingPoint())
8719 RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()),
8720 RetVT.getVectorElementCount());
8722 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
8723 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
8725 if (VT.isFixedLengthVector()) {
8726 SDValue FixedVector =
8727 convertFromScalableVector(VT, NewNode, DAG, Subtarget);
8728 NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL);
8729 } else if (VT.isFloatingPoint()) {
8730 SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0));
8731 NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL);
8734 if (Op == NewNode)
8735 break;
8737 return NewNode;
8741 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8744 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8745 SelectionDAG &DAG) const {
8746 unsigned IntNo = Op.getConstantOperandVal(1);
8747 switch (IntNo) {
8748 default:
8749 break;
8750 case Intrinsic::riscv_masked_strided_store: {
8751 SDLoc DL(Op);
8752 MVT XLenVT = Subtarget.getXLenVT();
8754 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8755 // the selection of the masked intrinsics doesn't do this for us.
8756 SDValue Mask = Op.getOperand(5);
8757 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8759 SDValue Val = Op.getOperand(2);
8760 MVT VT = Val.getSimpleValueType();
8761 MVT ContainerVT = VT;
8762 if (VT.isFixedLengthVector()) {
8763 ContainerVT = getContainerForFixedLengthVector(VT);
8764 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
8766 if (!IsUnmasked) {
8767 MVT MaskVT = getMaskTypeFor(ContainerVT);
8768 if (VT.isFixedLengthVector())
8769 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8772 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8774 SDValue IntID = DAG.getTargetConstant(
8775 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
8776 XLenVT);
8778 auto *Store = cast<MemIntrinsicSDNode>(Op);
8779 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
8780 Ops.push_back(Val);
8781 Ops.push_back(Op.getOperand(3)); // Ptr
8782 Ops.push_back(Op.getOperand(4)); // Stride
8783 if (!IsUnmasked)
8784 Ops.push_back(Mask);
8785 Ops.push_back(VL);
8787 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
8788 Ops, Store->getMemoryVT(),
8789 Store->getMemOperand());
8791 case Intrinsic::riscv_seg2_store:
8792 case Intrinsic::riscv_seg3_store:
8793 case Intrinsic::riscv_seg4_store:
8794 case Intrinsic::riscv_seg5_store:
8795 case Intrinsic::riscv_seg6_store:
8796 case Intrinsic::riscv_seg7_store:
8797 case Intrinsic::riscv_seg8_store: {
8798 SDLoc DL(Op);
8799 static const Intrinsic::ID VssegInts[] = {
8800 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
8801 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
8802 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
8803 Intrinsic::riscv_vsseg8};
8804 // Operands are (chain, int_id, vec*, ptr, vl)
8805 unsigned NF = Op->getNumOperands() - 4;
8806 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
8807 MVT XLenVT = Subtarget.getXLenVT();
8808 MVT VT = Op->getOperand(2).getSimpleValueType();
8809 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8811 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
8812 Subtarget);
8813 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
8814 SDValue Ptr = Op->getOperand(NF + 2);
8816 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
8817 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
8818 for (unsigned i = 0; i < NF; i++)
8819 Ops.push_back(convertToScalableVector(
8820 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
8821 Ops.append({Ptr, VL});
8823 return DAG.getMemIntrinsicNode(
8824 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
8825 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
8827 case Intrinsic::riscv_sf_vc_x_se_e8mf8:
8828 case Intrinsic::riscv_sf_vc_x_se_e8mf4:
8829 case Intrinsic::riscv_sf_vc_x_se_e8mf2:
8830 case Intrinsic::riscv_sf_vc_x_se_e8m1:
8831 case Intrinsic::riscv_sf_vc_x_se_e8m2:
8832 case Intrinsic::riscv_sf_vc_x_se_e8m4:
8833 case Intrinsic::riscv_sf_vc_x_se_e8m8:
8834 case Intrinsic::riscv_sf_vc_x_se_e16mf4:
8835 case Intrinsic::riscv_sf_vc_x_se_e16mf2:
8836 case Intrinsic::riscv_sf_vc_x_se_e16m1:
8837 case Intrinsic::riscv_sf_vc_x_se_e16m2:
8838 case Intrinsic::riscv_sf_vc_x_se_e16m4:
8839 case Intrinsic::riscv_sf_vc_x_se_e16m8:
8840 case Intrinsic::riscv_sf_vc_x_se_e32mf2:
8841 case Intrinsic::riscv_sf_vc_x_se_e32m1:
8842 case Intrinsic::riscv_sf_vc_x_se_e32m2:
8843 case Intrinsic::riscv_sf_vc_x_se_e32m4:
8844 case Intrinsic::riscv_sf_vc_x_se_e32m8:
8845 case Intrinsic::riscv_sf_vc_x_se_e64m1:
8846 case Intrinsic::riscv_sf_vc_x_se_e64m2:
8847 case Intrinsic::riscv_sf_vc_x_se_e64m4:
8848 case Intrinsic::riscv_sf_vc_x_se_e64m8:
8849 case Intrinsic::riscv_sf_vc_i_se_e8mf8:
8850 case Intrinsic::riscv_sf_vc_i_se_e8mf4:
8851 case Intrinsic::riscv_sf_vc_i_se_e8mf2:
8852 case Intrinsic::riscv_sf_vc_i_se_e8m1:
8853 case Intrinsic::riscv_sf_vc_i_se_e8m2:
8854 case Intrinsic::riscv_sf_vc_i_se_e8m4:
8855 case Intrinsic::riscv_sf_vc_i_se_e8m8:
8856 case Intrinsic::riscv_sf_vc_i_se_e16mf4:
8857 case Intrinsic::riscv_sf_vc_i_se_e16mf2:
8858 case Intrinsic::riscv_sf_vc_i_se_e16m1:
8859 case Intrinsic::riscv_sf_vc_i_se_e16m2:
8860 case Intrinsic::riscv_sf_vc_i_se_e16m4:
8861 case Intrinsic::riscv_sf_vc_i_se_e16m8:
8862 case Intrinsic::riscv_sf_vc_i_se_e32mf2:
8863 case Intrinsic::riscv_sf_vc_i_se_e32m1:
8864 case Intrinsic::riscv_sf_vc_i_se_e32m2:
8865 case Intrinsic::riscv_sf_vc_i_se_e32m4:
8866 case Intrinsic::riscv_sf_vc_i_se_e32m8:
8867 case Intrinsic::riscv_sf_vc_i_se_e64m1:
8868 case Intrinsic::riscv_sf_vc_i_se_e64m2:
8869 case Intrinsic::riscv_sf_vc_i_se_e64m4:
8870 case Intrinsic::riscv_sf_vc_i_se_e64m8:
8871 case Intrinsic::riscv_sf_vc_xv_se:
8872 case Intrinsic::riscv_sf_vc_iv_se:
8873 case Intrinsic::riscv_sf_vc_vv_se:
8874 case Intrinsic::riscv_sf_vc_fv_se:
8875 case Intrinsic::riscv_sf_vc_xvv_se:
8876 case Intrinsic::riscv_sf_vc_ivv_se:
8877 case Intrinsic::riscv_sf_vc_vvv_se:
8878 case Intrinsic::riscv_sf_vc_fvv_se:
8879 case Intrinsic::riscv_sf_vc_xvw_se:
8880 case Intrinsic::riscv_sf_vc_ivw_se:
8881 case Intrinsic::riscv_sf_vc_vvw_se:
8882 case Intrinsic::riscv_sf_vc_fvw_se: {
8883 SmallVector<SDValue> Ops;
8884 getVCIXOperands(Op, DAG, Ops);
8886 SDValue NewNode =
8887 DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
8889 if (Op == NewNode)
8890 break;
8892 return NewNode;
8896 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8899 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
8900 switch (ISDOpcode) {
8901 default:
8902 llvm_unreachable("Unhandled reduction");
8903 case ISD::VP_REDUCE_ADD:
8904 case ISD::VECREDUCE_ADD:
8905 return RISCVISD::VECREDUCE_ADD_VL;
8906 case ISD::VP_REDUCE_UMAX:
8907 case ISD::VECREDUCE_UMAX:
8908 return RISCVISD::VECREDUCE_UMAX_VL;
8909 case ISD::VP_REDUCE_SMAX:
8910 case ISD::VECREDUCE_SMAX:
8911 return RISCVISD::VECREDUCE_SMAX_VL;
8912 case ISD::VP_REDUCE_UMIN:
8913 case ISD::VECREDUCE_UMIN:
8914 return RISCVISD::VECREDUCE_UMIN_VL;
8915 case ISD::VP_REDUCE_SMIN:
8916 case ISD::VECREDUCE_SMIN:
8917 return RISCVISD::VECREDUCE_SMIN_VL;
8918 case ISD::VP_REDUCE_AND:
8919 case ISD::VECREDUCE_AND:
8920 return RISCVISD::VECREDUCE_AND_VL;
8921 case ISD::VP_REDUCE_OR:
8922 case ISD::VECREDUCE_OR:
8923 return RISCVISD::VECREDUCE_OR_VL;
8924 case ISD::VP_REDUCE_XOR:
8925 case ISD::VECREDUCE_XOR:
8926 return RISCVISD::VECREDUCE_XOR_VL;
8927 case ISD::VP_REDUCE_FADD:
8928 return RISCVISD::VECREDUCE_FADD_VL;
8929 case ISD::VP_REDUCE_SEQ_FADD:
8930 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
8931 case ISD::VP_REDUCE_FMAX:
8932 return RISCVISD::VECREDUCE_FMAX_VL;
8933 case ISD::VP_REDUCE_FMIN:
8934 return RISCVISD::VECREDUCE_FMIN_VL;
8939 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
8940 SelectionDAG &DAG,
8941 bool IsVP) const {
8942 SDLoc DL(Op);
8943 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
8944 MVT VecVT = Vec.getSimpleValueType();
8945 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
8946 Op.getOpcode() == ISD::VECREDUCE_OR ||
8947 Op.getOpcode() == ISD::VECREDUCE_XOR ||
8948 Op.getOpcode() == ISD::VP_REDUCE_AND ||
8949 Op.getOpcode() == ISD::VP_REDUCE_OR ||
8950 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
8951 "Unexpected reduction lowering");
8953 MVT XLenVT = Subtarget.getXLenVT();
8955 MVT ContainerVT = VecVT;
8956 if (VecVT.isFixedLengthVector()) {
8957 ContainerVT = getContainerForFixedLengthVector(VecVT);
8958 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8961 SDValue Mask, VL;
8962 if (IsVP) {
8963 Mask = Op.getOperand(2);
8964 VL = Op.getOperand(3);
8965 } else {
8966 std::tie(Mask, VL) =
8967 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8970 unsigned BaseOpc;
8971 ISD::CondCode CC;
8972 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8974 switch (Op.getOpcode()) {
8975 default:
8976 llvm_unreachable("Unhandled reduction");
8977 case ISD::VECREDUCE_AND:
8978 case ISD::VP_REDUCE_AND: {
8979 // vcpop ~x == 0
8980 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
8981 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
8982 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8983 CC = ISD::SETEQ;
8984 BaseOpc = ISD::AND;
8985 break;
8987 case ISD::VECREDUCE_OR:
8988 case ISD::VP_REDUCE_OR:
8989 // vcpop x != 0
8990 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8991 CC = ISD::SETNE;
8992 BaseOpc = ISD::OR;
8993 break;
8994 case ISD::VECREDUCE_XOR:
8995 case ISD::VP_REDUCE_XOR: {
8996 // ((vcpop x) & 1) != 0
8997 SDValue One = DAG.getConstant(1, DL, XLenVT);
8998 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8999 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9000 CC = ISD::SETNE;
9001 BaseOpc = ISD::XOR;
9002 break;
9006 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9007 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9009 if (!IsVP)
9010 return SetCC;
9012 // Now include the start value in the operation.
9013 // Note that we must return the start value when no elements are operated
9014 // upon. The vcpop instructions we've emitted in each case above will return
9015 // 0 for an inactive vector, and so we've already received the neutral value:
9016 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9017 // can simply include the start value.
9018 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9021 static bool isNonZeroAVL(SDValue AVL) {
9022 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9023 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9024 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9025 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9028 /// Helper to lower a reduction sequence of the form:
9029 /// scalar = reduce_op vec, scalar_start
9030 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9031 SDValue StartValue, SDValue Vec, SDValue Mask,
9032 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9033 const RISCVSubtarget &Subtarget) {
9034 const MVT VecVT = Vec.getSimpleValueType();
9035 const MVT M1VT = getLMUL1VT(VecVT);
9036 const MVT XLenVT = Subtarget.getXLenVT();
9037 const bool NonZeroAVL = isNonZeroAVL(VL);
9039 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9040 // or the original VT if fractional.
9041 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9042 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9043 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9044 // be the result of the reduction operation.
9045 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9046 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9047 DAG, Subtarget);
9048 if (M1VT != InnerVT)
9049 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
9050 DAG.getUNDEF(M1VT),
9051 InitialValue, DAG.getConstant(0, DL, XLenVT));
9052 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9053 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9054 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9055 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9056 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9057 DAG.getConstant(0, DL, XLenVT));
9060 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9061 SelectionDAG &DAG) const {
9062 SDLoc DL(Op);
9063 SDValue Vec = Op.getOperand(0);
9064 EVT VecEVT = Vec.getValueType();
9066 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9068 // Due to ordering in legalize types we may have a vector type that needs to
9069 // be split. Do that manually so we can get down to a legal type.
9070 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9071 TargetLowering::TypeSplitVector) {
9072 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9073 VecEVT = Lo.getValueType();
9074 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9077 // TODO: The type may need to be widened rather than split. Or widened before
9078 // it can be split.
9079 if (!isTypeLegal(VecEVT))
9080 return SDValue();
9082 MVT VecVT = VecEVT.getSimpleVT();
9083 MVT VecEltVT = VecVT.getVectorElementType();
9084 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9086 MVT ContainerVT = VecVT;
9087 if (VecVT.isFixedLengthVector()) {
9088 ContainerVT = getContainerForFixedLengthVector(VecVT);
9089 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9092 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9094 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9095 switch (BaseOpc) {
9096 case ISD::AND:
9097 case ISD::OR:
9098 case ISD::UMAX:
9099 case ISD::UMIN:
9100 case ISD::SMAX:
9101 case ISD::SMIN:
9102 MVT XLenVT = Subtarget.getXLenVT();
9103 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9104 DAG.getConstant(0, DL, XLenVT));
9106 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9107 Mask, VL, DL, DAG, Subtarget);
9110 // Given a reduction op, this function returns the matching reduction opcode,
9111 // the vector SDValue and the scalar SDValue required to lower this to a
9112 // RISCVISD node.
9113 static std::tuple<unsigned, SDValue, SDValue>
9114 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
9115 const RISCVSubtarget &Subtarget) {
9116 SDLoc DL(Op);
9117 auto Flags = Op->getFlags();
9118 unsigned Opcode = Op.getOpcode();
9119 switch (Opcode) {
9120 default:
9121 llvm_unreachable("Unhandled reduction");
9122 case ISD::VECREDUCE_FADD: {
9123 // Use positive zero if we can. It is cheaper to materialize.
9124 SDValue Zero =
9125 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9126 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9128 case ISD::VECREDUCE_SEQ_FADD:
9129 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9130 Op.getOperand(0));
9131 case ISD::VECREDUCE_FMIN:
9132 case ISD::VECREDUCE_FMAX: {
9133 MVT XLenVT = Subtarget.getXLenVT();
9134 SDValue Front =
9135 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9136 DAG.getConstant(0, DL, XLenVT));
9137 unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN)
9138 ? RISCVISD::VECREDUCE_FMIN_VL
9139 : RISCVISD::VECREDUCE_FMAX_VL;
9140 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9145 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9146 SelectionDAG &DAG) const {
9147 SDLoc DL(Op);
9148 MVT VecEltVT = Op.getSimpleValueType();
9150 unsigned RVVOpcode;
9151 SDValue VectorVal, ScalarVal;
9152 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9153 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9154 MVT VecVT = VectorVal.getSimpleValueType();
9156 MVT ContainerVT = VecVT;
9157 if (VecVT.isFixedLengthVector()) {
9158 ContainerVT = getContainerForFixedLengthVector(VecVT);
9159 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9162 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9163 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal,
9164 VectorVal, Mask, VL, DL, DAG, Subtarget);
9167 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9168 SelectionDAG &DAG) const {
9169 SDLoc DL(Op);
9170 SDValue Vec = Op.getOperand(1);
9171 EVT VecEVT = Vec.getValueType();
9173 // TODO: The type may need to be widened rather than split. Or widened before
9174 // it can be split.
9175 if (!isTypeLegal(VecEVT))
9176 return SDValue();
9178 MVT VecVT = VecEVT.getSimpleVT();
9179 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9181 if (VecVT.isFixedLengthVector()) {
9182 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9183 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9186 SDValue VL = Op.getOperand(3);
9187 SDValue Mask = Op.getOperand(2);
9188 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9189 Vec, Mask, VL, DL, DAG, Subtarget);
9192 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9193 SelectionDAG &DAG) const {
9194 SDValue Vec = Op.getOperand(0);
9195 SDValue SubVec = Op.getOperand(1);
9196 MVT VecVT = Vec.getSimpleValueType();
9197 MVT SubVecVT = SubVec.getSimpleValueType();
9199 SDLoc DL(Op);
9200 MVT XLenVT = Subtarget.getXLenVT();
9201 unsigned OrigIdx = Op.getConstantOperandVal(2);
9202 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9204 // We don't have the ability to slide mask vectors up indexed by their i1
9205 // elements; the smallest we can do is i8. Often we are able to bitcast to
9206 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9207 // into a scalable one, we might not necessarily have enough scalable
9208 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9209 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9210 (OrigIdx != 0 || !Vec.isUndef())) {
9211 if (VecVT.getVectorMinNumElements() >= 8 &&
9212 SubVecVT.getVectorMinNumElements() >= 8) {
9213 assert(OrigIdx % 8 == 0 && "Invalid index");
9214 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9215 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9216 "Unexpected mask vector lowering");
9217 OrigIdx /= 8;
9218 SubVecVT =
9219 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9220 SubVecVT.isScalableVector());
9221 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9222 VecVT.isScalableVector());
9223 Vec = DAG.getBitcast(VecVT, Vec);
9224 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9225 } else {
9226 // We can't slide this mask vector up indexed by its i1 elements.
9227 // This poses a problem when we wish to insert a scalable vector which
9228 // can't be re-expressed as a larger type. Just choose the slow path and
9229 // extend to a larger type, then truncate back down.
9230 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9231 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9232 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9233 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9234 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9235 Op.getOperand(2));
9236 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9237 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9241 // If the subvector vector is a fixed-length type, we cannot use subregister
9242 // manipulation to simplify the codegen; we don't know which register of a
9243 // LMUL group contains the specific subvector as we only know the minimum
9244 // register size. Therefore we must slide the vector group up the full
9245 // amount.
9246 if (SubVecVT.isFixedLengthVector()) {
9247 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9248 return Op;
9249 MVT ContainerVT = VecVT;
9250 if (VecVT.isFixedLengthVector()) {
9251 ContainerVT = getContainerForFixedLengthVector(VecVT);
9252 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9255 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9256 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9257 DAG.getUNDEF(ContainerVT), SubVec,
9258 DAG.getConstant(0, DL, XLenVT));
9259 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9260 return DAG.getBitcast(Op.getValueType(), SubVec);
9263 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9264 DAG.getUNDEF(ContainerVT), SubVec,
9265 DAG.getConstant(0, DL, XLenVT));
9266 SDValue Mask =
9267 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9268 // Set the vector length to only the number of elements we care about. Note
9269 // that for slideup this includes the offset.
9270 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9271 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9273 // Use tail agnostic policy if we're inserting over Vec's tail.
9274 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9275 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9276 Policy = RISCVII::TAIL_AGNOSTIC;
9278 // If we're inserting into the lowest elements, use a tail undisturbed
9279 // vmv.v.v.
9280 if (OrigIdx == 0) {
9281 SubVec =
9282 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9283 } else {
9284 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9285 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9286 SlideupAmt, Mask, VL, Policy);
9289 if (VecVT.isFixedLengthVector())
9290 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9291 return DAG.getBitcast(Op.getValueType(), SubVec);
9294 unsigned SubRegIdx, RemIdx;
9295 std::tie(SubRegIdx, RemIdx) =
9296 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9297 VecVT, SubVecVT, OrigIdx, TRI);
9299 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9300 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9301 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9302 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9304 // 1. If the Idx has been completely eliminated and this subvector's size is
9305 // a vector register or a multiple thereof, or the surrounding elements are
9306 // undef, then this is a subvector insert which naturally aligns to a vector
9307 // register. These can easily be handled using subregister manipulation.
9308 // 2. If the subvector is smaller than a vector register, then the insertion
9309 // must preserve the undisturbed elements of the register. We do this by
9310 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9311 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9312 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9313 // LMUL=1 type back into the larger vector (resolving to another subregister
9314 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9315 // to avoid allocating a large register group to hold our subvector.
9316 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9317 return Op;
9319 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9320 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9321 // (in our case undisturbed). This means we can set up a subvector insertion
9322 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9323 // size of the subvector.
9324 MVT InterSubVT = VecVT;
9325 SDValue AlignedExtract = Vec;
9326 unsigned AlignedIdx = OrigIdx - RemIdx;
9327 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9328 InterSubVT = getLMUL1VT(VecVT);
9329 // Extract a subvector equal to the nearest full vector register type. This
9330 // should resolve to a EXTRACT_SUBREG instruction.
9331 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9332 DAG.getConstant(AlignedIdx, DL, XLenVT));
9335 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9336 DAG.getUNDEF(InterSubVT), SubVec,
9337 DAG.getConstant(0, DL, XLenVT));
9339 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9341 VL = computeVLMax(SubVecVT, DL, DAG);
9343 // If we're inserting into the lowest elements, use a tail undisturbed
9344 // vmv.v.v.
9345 if (RemIdx == 0) {
9346 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9347 SubVec, VL);
9348 } else {
9349 SDValue SlideupAmt =
9350 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9352 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9353 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9355 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9356 SlideupAmt, Mask, VL);
9359 // If required, insert this subvector back into the correct vector register.
9360 // This should resolve to an INSERT_SUBREG instruction.
9361 if (VecVT.bitsGT(InterSubVT))
9362 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9363 DAG.getConstant(AlignedIdx, DL, XLenVT));
9365 // We might have bitcast from a mask type: cast back to the original type if
9366 // required.
9367 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9370 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9371 SelectionDAG &DAG) const {
9372 SDValue Vec = Op.getOperand(0);
9373 MVT SubVecVT = Op.getSimpleValueType();
9374 MVT VecVT = Vec.getSimpleValueType();
9376 SDLoc DL(Op);
9377 MVT XLenVT = Subtarget.getXLenVT();
9378 unsigned OrigIdx = Op.getConstantOperandVal(1);
9379 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9381 // We don't have the ability to slide mask vectors down indexed by their i1
9382 // elements; the smallest we can do is i8. Often we are able to bitcast to
9383 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9384 // from a scalable one, we might not necessarily have enough scalable
9385 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9386 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9387 if (VecVT.getVectorMinNumElements() >= 8 &&
9388 SubVecVT.getVectorMinNumElements() >= 8) {
9389 assert(OrigIdx % 8 == 0 && "Invalid index");
9390 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9391 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9392 "Unexpected mask vector lowering");
9393 OrigIdx /= 8;
9394 SubVecVT =
9395 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9396 SubVecVT.isScalableVector());
9397 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9398 VecVT.isScalableVector());
9399 Vec = DAG.getBitcast(VecVT, Vec);
9400 } else {
9401 // We can't slide this mask vector down, indexed by its i1 elements.
9402 // This poses a problem when we wish to extract a scalable vector which
9403 // can't be re-expressed as a larger type. Just choose the slow path and
9404 // extend to a larger type, then truncate back down.
9405 // TODO: We could probably improve this when extracting certain fixed
9406 // from fixed, where we can extract as i8 and shift the correct element
9407 // right to reach the desired subvector?
9408 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9409 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9410 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9411 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9412 Op.getOperand(1));
9413 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9414 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9418 // With an index of 0 this is a cast-like subvector, which can be performed
9419 // with subregister operations.
9420 if (OrigIdx == 0)
9421 return Op;
9423 // If the subvector vector is a fixed-length type, we cannot use subregister
9424 // manipulation to simplify the codegen; we don't know which register of a
9425 // LMUL group contains the specific subvector as we only know the minimum
9426 // register size. Therefore we must slide the vector group down the full
9427 // amount.
9428 if (SubVecVT.isFixedLengthVector()) {
9429 MVT ContainerVT = VecVT;
9430 if (VecVT.isFixedLengthVector()) {
9431 ContainerVT = getContainerForFixedLengthVector(VecVT);
9432 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9435 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9436 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9437 if (auto ShrunkVT =
9438 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9439 ContainerVT = *ShrunkVT;
9440 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9441 DAG.getVectorIdxConstant(0, DL));
9444 SDValue Mask =
9445 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9446 // Set the vector length to only the number of elements we care about. This
9447 // avoids sliding down elements we're going to discard straight away.
9448 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9449 Subtarget);
9450 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9451 SDValue Slidedown =
9452 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9453 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9454 // Now we can use a cast-like subvector extract to get the result.
9455 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9456 DAG.getConstant(0, DL, XLenVT));
9457 return DAG.getBitcast(Op.getValueType(), Slidedown);
9460 unsigned SubRegIdx, RemIdx;
9461 std::tie(SubRegIdx, RemIdx) =
9462 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9463 VecVT, SubVecVT, OrigIdx, TRI);
9465 // If the Idx has been completely eliminated then this is a subvector extract
9466 // which naturally aligns to a vector register. These can easily be handled
9467 // using subregister manipulation.
9468 if (RemIdx == 0)
9469 return Op;
9471 // Else SubVecVT is a fractional LMUL and may need to be slid down.
9472 assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second);
9474 // If the vector type is an LMUL-group type, extract a subvector equal to the
9475 // nearest full vector register type.
9476 MVT InterSubVT = VecVT;
9477 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9478 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
9479 // we should have successfully decomposed the extract into a subregister.
9480 assert(SubRegIdx != RISCV::NoSubRegister);
9481 InterSubVT = getLMUL1VT(VecVT);
9482 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
9485 // Slide this vector register down by the desired number of elements in order
9486 // to place the desired subvector starting at element 0.
9487 SDValue SlidedownAmt =
9488 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9490 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
9491 SDValue Slidedown =
9492 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
9493 Vec, SlidedownAmt, Mask, VL);
9495 // Now the vector is in the right position, extract our final subvector. This
9496 // should resolve to a COPY.
9497 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9498 DAG.getConstant(0, DL, XLenVT));
9500 // We might have bitcast from a mask type: cast back to the original type if
9501 // required.
9502 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
9505 // Widen a vector's operands to i8, then truncate its results back to the
9506 // original type, typically i1. All operand and result types must be the same.
9507 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
9508 SelectionDAG &DAG) {
9509 MVT VT = N.getSimpleValueType();
9510 MVT WideVT = VT.changeVectorElementType(MVT::i8);
9511 SmallVector<SDValue, 4> WideOps;
9512 for (SDValue Op : N->ops()) {
9513 assert(Op.getSimpleValueType() == VT &&
9514 "Operands and result must be same type");
9515 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
9518 unsigned NumVals = N->getNumValues();
9520 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
9521 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
9522 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
9523 SmallVector<SDValue, 4> TruncVals;
9524 for (unsigned I = 0; I < NumVals; I++) {
9525 TruncVals.push_back(
9526 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
9527 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
9530 if (TruncVals.size() > 1)
9531 return DAG.getMergeValues(TruncVals, DL);
9532 return TruncVals.front();
9535 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
9536 SelectionDAG &DAG) const {
9537 SDLoc DL(Op);
9538 MVT VecVT = Op.getSimpleValueType();
9539 MVT XLenVT = Subtarget.getXLenVT();
9541 assert(VecVT.isScalableVector() &&
9542 "vector_interleave on non-scalable vector!");
9544 // 1 bit element vectors need to be widened to e8
9545 if (VecVT.getVectorElementType() == MVT::i1)
9546 return widenVectorOpsToi8(Op, DL, DAG);
9548 // If the VT is LMUL=8, we need to split and reassemble.
9549 if (VecVT.getSizeInBits().getKnownMinValue() ==
9550 (8 * RISCV::RVVBitsPerBlock)) {
9551 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9552 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
9553 EVT SplitVT = Op0Lo.getValueType();
9555 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
9556 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
9557 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
9558 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
9560 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9561 ResLo.getValue(0), ResHi.getValue(0));
9562 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
9563 ResHi.getValue(1));
9564 return DAG.getMergeValues({Even, Odd}, DL);
9567 // Concatenate the two vectors as one vector to deinterleave
9568 MVT ConcatVT =
9569 MVT::getVectorVT(VecVT.getVectorElementType(),
9570 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
9571 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
9572 Op.getOperand(0), Op.getOperand(1));
9574 // We want to operate on all lanes, so get the mask and VL and mask for it
9575 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
9576 SDValue Passthru = DAG.getUNDEF(ConcatVT);
9578 // We can deinterleave through vnsrl.wi if the element type is smaller than
9579 // ELEN
9580 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
9581 SDValue Even =
9582 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
9583 SDValue Odd =
9584 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
9585 return DAG.getMergeValues({Even, Odd}, DL);
9588 // For the indices, use the same SEW to avoid an extra vsetvli
9589 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
9590 // Create a vector of even indices {0, 2, 4, ...}
9591 SDValue EvenIdx =
9592 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
9593 // Create a vector of odd indices {1, 3, 5, ... }
9594 SDValue OddIdx =
9595 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
9597 // Gather the even and odd elements into two separate vectors
9598 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
9599 Concat, EvenIdx, Passthru, Mask, VL);
9600 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
9601 Concat, OddIdx, Passthru, Mask, VL);
9603 // Extract the result half of the gather for even and odd
9604 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
9605 DAG.getConstant(0, DL, XLenVT));
9606 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
9607 DAG.getConstant(0, DL, XLenVT));
9609 return DAG.getMergeValues({Even, Odd}, DL);
9612 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
9613 SelectionDAG &DAG) const {
9614 SDLoc DL(Op);
9615 MVT VecVT = Op.getSimpleValueType();
9617 assert(VecVT.isScalableVector() &&
9618 "vector_interleave on non-scalable vector!");
9620 // i1 vectors need to be widened to i8
9621 if (VecVT.getVectorElementType() == MVT::i1)
9622 return widenVectorOpsToi8(Op, DL, DAG);
9624 MVT XLenVT = Subtarget.getXLenVT();
9625 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
9627 // If the VT is LMUL=8, we need to split and reassemble.
9628 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
9629 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9630 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
9631 EVT SplitVT = Op0Lo.getValueType();
9633 SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
9634 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
9635 SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
9636 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
9638 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9639 ResLo.getValue(0), ResLo.getValue(1));
9640 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9641 ResHi.getValue(0), ResHi.getValue(1));
9642 return DAG.getMergeValues({Lo, Hi}, DL);
9645 SDValue Interleaved;
9647 // If the element type is smaller than ELEN, then we can interleave with
9648 // vwaddu.vv and vwmaccu.vx
9649 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
9650 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
9651 DAG, Subtarget);
9652 } else {
9653 // Otherwise, fallback to using vrgathere16.vv
9654 MVT ConcatVT =
9655 MVT::getVectorVT(VecVT.getVectorElementType(),
9656 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
9657 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
9658 Op.getOperand(0), Op.getOperand(1));
9660 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
9662 // 0 1 2 3 4 5 6 7 ...
9663 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
9665 // 1 1 1 1 1 1 1 1 ...
9666 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
9668 // 1 0 1 0 1 0 1 0 ...
9669 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
9670 OddMask = DAG.getSetCC(
9671 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
9672 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
9673 ISD::CondCode::SETNE);
9675 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
9677 // Build up the index vector for interleaving the concatenated vector
9678 // 0 0 1 1 2 2 3 3 ...
9679 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
9680 // 0 n 1 n+1 2 n+2 3 n+3 ...
9681 Idx =
9682 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
9684 // Then perform the interleave
9685 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
9686 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
9687 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
9688 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
9691 // Extract the two halves from the interleaved result
9692 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
9693 DAG.getVectorIdxConstant(0, DL));
9694 SDValue Hi = DAG.getNode(
9695 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
9696 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));
9698 return DAG.getMergeValues({Lo, Hi}, DL);
9701 // Lower step_vector to the vid instruction. Any non-identity step value must
9702 // be accounted for my manual expansion.
9703 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
9704 SelectionDAG &DAG) const {
9705 SDLoc DL(Op);
9706 MVT VT = Op.getSimpleValueType();
9707 assert(VT.isScalableVector() && "Expected scalable vector");
9708 MVT XLenVT = Subtarget.getXLenVT();
9709 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
9710 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9711 uint64_t StepValImm = Op.getConstantOperandVal(0);
9712 if (StepValImm != 1) {
9713 if (isPowerOf2_64(StepValImm)) {
9714 SDValue StepVal =
9715 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9716 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
9717 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
9718 } else {
9719 SDValue StepVal = lowerScalarSplat(
9720 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
9721 VL, VT, DL, DAG, Subtarget);
9722 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
9725 return StepVec;
9728 // Implement vector_reverse using vrgather.vv with indices determined by
9729 // subtracting the id of each element from (VLMAX-1). This will convert
9730 // the indices like so:
9731 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
9732 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
9733 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
9734 SelectionDAG &DAG) const {
9735 SDLoc DL(Op);
9736 MVT VecVT = Op.getSimpleValueType();
9737 if (VecVT.getVectorElementType() == MVT::i1) {
9738 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9739 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
9740 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
9741 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
9743 unsigned EltSize = VecVT.getScalarSizeInBits();
9744 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
9745 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
9746 unsigned MaxVLMAX =
9747 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
9749 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
9750 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
9752 // If this is SEW=8 and VLMAX is potentially more than 256, we need
9753 // to use vrgatherei16.vv.
9754 // TODO: It's also possible to use vrgatherei16.vv for other types to
9755 // decrease register width for the index calculation.
9756 if (MaxVLMAX > 256 && EltSize == 8) {
9757 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
9758 // Reverse each half, then reassemble them in reverse order.
9759 // NOTE: It's also possible that after splitting that VLMAX no longer
9760 // requires vrgatherei16.vv.
9761 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
9762 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9763 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
9764 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
9765 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
9766 // Reassemble the low and high pieces reversed.
9767 // FIXME: This is a CONCAT_VECTORS.
9768 SDValue Res =
9769 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
9770 DAG.getIntPtrConstant(0, DL));
9771 return DAG.getNode(
9772 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
9773 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
9776 // Just promote the int type to i16 which will double the LMUL.
9777 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
9778 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
9781 MVT XLenVT = Subtarget.getXLenVT();
9782 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9784 // Calculate VLMAX-1 for the desired SEW.
9785 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
9786 computeVLMax(VecVT, DL, DAG),
9787 DAG.getConstant(1, DL, XLenVT));
9789 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
9790 bool IsRV32E64 =
9791 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
9792 SDValue SplatVL;
9793 if (!IsRV32E64)
9794 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
9795 else
9796 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
9797 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
9799 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
9800 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
9801 DAG.getUNDEF(IntVT), Mask, VL);
9803 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
9804 DAG.getUNDEF(VecVT), Mask, VL);
9807 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
9808 SelectionDAG &DAG) const {
9809 SDLoc DL(Op);
9810 SDValue V1 = Op.getOperand(0);
9811 SDValue V2 = Op.getOperand(1);
9812 MVT XLenVT = Subtarget.getXLenVT();
9813 MVT VecVT = Op.getSimpleValueType();
9815 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
9817 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
9818 SDValue DownOffset, UpOffset;
9819 if (ImmValue >= 0) {
9820 // The operand is a TargetConstant, we need to rebuild it as a regular
9821 // constant.
9822 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
9823 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
9824 } else {
9825 // The operand is a TargetConstant, we need to rebuild it as a regular
9826 // constant rather than negating the original operand.
9827 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
9828 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
9831 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
9833 SDValue SlideDown =
9834 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
9835 DownOffset, TrueMask, UpOffset);
9836 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
9837 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
9838 RISCVII::TAIL_AGNOSTIC);
9841 SDValue
9842 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
9843 SelectionDAG &DAG) const {
9844 SDLoc DL(Op);
9845 auto *Load = cast<LoadSDNode>(Op);
9847 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
9848 Load->getMemoryVT(),
9849 *Load->getMemOperand()) &&
9850 "Expecting a correctly-aligned load");
9852 MVT VT = Op.getSimpleValueType();
9853 MVT XLenVT = Subtarget.getXLenVT();
9854 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9856 // If we know the exact VLEN and our fixed length vector completely fills
9857 // the container, use a whole register load instead.
9858 const auto [MinVLMAX, MaxVLMAX] =
9859 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
9860 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
9861 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
9862 SDValue NewLoad =
9863 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
9864 Load->getMemOperand());
9865 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
9866 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
9869 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
9871 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
9872 SDValue IntID = DAG.getTargetConstant(
9873 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
9874 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
9875 if (!IsMaskOp)
9876 Ops.push_back(DAG.getUNDEF(ContainerVT));
9877 Ops.push_back(Load->getBasePtr());
9878 Ops.push_back(VL);
9879 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9880 SDValue NewLoad =
9881 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
9882 Load->getMemoryVT(), Load->getMemOperand());
9884 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
9885 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
9888 SDValue
9889 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
9890 SelectionDAG &DAG) const {
9891 SDLoc DL(Op);
9892 auto *Store = cast<StoreSDNode>(Op);
9894 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
9895 Store->getMemoryVT(),
9896 *Store->getMemOperand()) &&
9897 "Expecting a correctly-aligned store");
9899 SDValue StoreVal = Store->getValue();
9900 MVT VT = StoreVal.getSimpleValueType();
9901 MVT XLenVT = Subtarget.getXLenVT();
9903 // If the size less than a byte, we need to pad with zeros to make a byte.
9904 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
9905 VT = MVT::v8i1;
9906 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
9907 DAG.getConstant(0, DL, VT), StoreVal,
9908 DAG.getIntPtrConstant(0, DL));
9911 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9913 SDValue NewValue =
9914 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
9917 // If we know the exact VLEN and our fixed length vector completely fills
9918 // the container, use a whole register store instead.
9919 const auto [MinVLMAX, MaxVLMAX] =
9920 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
9921 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
9922 getLMUL1VT(ContainerVT).bitsLE(ContainerVT))
9923 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
9924 Store->getMemOperand());
9926 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9927 Subtarget);
9929 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
9930 SDValue IntID = DAG.getTargetConstant(
9931 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
9932 return DAG.getMemIntrinsicNode(
9933 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
9934 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
9935 Store->getMemoryVT(), Store->getMemOperand());
9938 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
9939 SelectionDAG &DAG) const {
9940 SDLoc DL(Op);
9941 MVT VT = Op.getSimpleValueType();
9943 const auto *MemSD = cast<MemSDNode>(Op);
9944 EVT MemVT = MemSD->getMemoryVT();
9945 MachineMemOperand *MMO = MemSD->getMemOperand();
9946 SDValue Chain = MemSD->getChain();
9947 SDValue BasePtr = MemSD->getBasePtr();
9949 SDValue Mask, PassThru, VL;
9950 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
9951 Mask = VPLoad->getMask();
9952 PassThru = DAG.getUNDEF(VT);
9953 VL = VPLoad->getVectorLength();
9954 } else {
9955 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
9956 Mask = MLoad->getMask();
9957 PassThru = MLoad->getPassThru();
9960 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9962 MVT XLenVT = Subtarget.getXLenVT();
9964 MVT ContainerVT = VT;
9965 if (VT.isFixedLengthVector()) {
9966 ContainerVT = getContainerForFixedLengthVector(VT);
9967 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9968 if (!IsUnmasked) {
9969 MVT MaskVT = getMaskTypeFor(ContainerVT);
9970 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9974 if (!VL)
9975 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9977 unsigned IntID =
9978 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
9979 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9980 if (IsUnmasked)
9981 Ops.push_back(DAG.getUNDEF(ContainerVT));
9982 else
9983 Ops.push_back(PassThru);
9984 Ops.push_back(BasePtr);
9985 if (!IsUnmasked)
9986 Ops.push_back(Mask);
9987 Ops.push_back(VL);
9988 if (!IsUnmasked)
9989 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
9991 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9993 SDValue Result =
9994 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
9995 Chain = Result.getValue(1);
9997 if (VT.isFixedLengthVector())
9998 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10000 return DAG.getMergeValues({Result, Chain}, DL);
10003 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10004 SelectionDAG &DAG) const {
10005 SDLoc DL(Op);
10007 const auto *MemSD = cast<MemSDNode>(Op);
10008 EVT MemVT = MemSD->getMemoryVT();
10009 MachineMemOperand *MMO = MemSD->getMemOperand();
10010 SDValue Chain = MemSD->getChain();
10011 SDValue BasePtr = MemSD->getBasePtr();
10012 SDValue Val, Mask, VL;
10014 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10015 Val = VPStore->getValue();
10016 Mask = VPStore->getMask();
10017 VL = VPStore->getVectorLength();
10018 } else {
10019 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10020 Val = MStore->getValue();
10021 Mask = MStore->getMask();
10024 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10026 MVT VT = Val.getSimpleValueType();
10027 MVT XLenVT = Subtarget.getXLenVT();
10029 MVT ContainerVT = VT;
10030 if (VT.isFixedLengthVector()) {
10031 ContainerVT = getContainerForFixedLengthVector(VT);
10033 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10034 if (!IsUnmasked) {
10035 MVT MaskVT = getMaskTypeFor(ContainerVT);
10036 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10040 if (!VL)
10041 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10043 unsigned IntID =
10044 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10045 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10046 Ops.push_back(Val);
10047 Ops.push_back(BasePtr);
10048 if (!IsUnmasked)
10049 Ops.push_back(Mask);
10050 Ops.push_back(VL);
10052 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
10053 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10056 SDValue
10057 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10058 SelectionDAG &DAG) const {
10059 MVT InVT = Op.getOperand(0).getSimpleValueType();
10060 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10062 MVT VT = Op.getSimpleValueType();
10064 SDValue Op1 =
10065 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10066 SDValue Op2 =
10067 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10069 SDLoc DL(Op);
10070 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10071 DAG, Subtarget);
10072 MVT MaskVT = getMaskTypeFor(ContainerVT);
10074 SDValue Cmp =
10075 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10076 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10078 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10081 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10082 SelectionDAG &DAG) const {
10083 unsigned Opc = Op.getOpcode();
10084 SDLoc DL(Op);
10085 SDValue Chain = Op.getOperand(0);
10086 SDValue Op1 = Op.getOperand(1);
10087 SDValue Op2 = Op.getOperand(2);
10088 SDValue CC = Op.getOperand(3);
10089 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10090 MVT VT = Op.getSimpleValueType();
10091 MVT InVT = Op1.getSimpleValueType();
10093 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10094 // condition code.
10095 if (Opc == ISD::STRICT_FSETCCS) {
10096 // Expand strict_fsetccs(x, oeq) to
10097 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10098 SDVTList VTList = Op->getVTList();
10099 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10100 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10101 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10102 Op2, OLECCVal);
10103 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10104 Op1, OLECCVal);
10105 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10106 Tmp1.getValue(1), Tmp2.getValue(1));
10107 // Tmp1 and Tmp2 might be the same node.
10108 if (Tmp1 != Tmp2)
10109 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10110 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10113 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10114 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10115 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10116 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10117 Op2, OEQCCVal);
10118 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10119 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10123 MVT ContainerInVT = InVT;
10124 if (InVT.isFixedLengthVector()) {
10125 ContainerInVT = getContainerForFixedLengthVector(InVT);
10126 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10127 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10129 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10131 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10133 SDValue Res;
10134 if (Opc == ISD::STRICT_FSETCC &&
10135 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10136 CCVal == ISD::SETOLE)) {
10137 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10138 // active when both input elements are ordered.
10139 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10140 SDValue OrderMask1 = DAG.getNode(
10141 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10142 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10143 True, VL});
10144 SDValue OrderMask2 = DAG.getNode(
10145 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10146 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10147 True, VL});
10148 Mask =
10149 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10150 // Use Mask as the merge operand to let the result be 0 if either of the
10151 // inputs is unordered.
10152 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
10153 DAG.getVTList(MaskVT, MVT::Other),
10154 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10155 } else {
10156 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10157 : RISCVISD::STRICT_FSETCCS_VL;
10158 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10159 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10162 if (VT.isFixedLengthVector()) {
10163 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10164 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10166 return Res;
10169 // Lower vector ABS to smax(X, sub(0, X)).
10170 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10171 SDLoc DL(Op);
10172 MVT VT = Op.getSimpleValueType();
10173 SDValue X = Op.getOperand(0);
10175 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10176 "Unexpected type for ISD::ABS");
10178 MVT ContainerVT = VT;
10179 if (VT.isFixedLengthVector()) {
10180 ContainerVT = getContainerForFixedLengthVector(VT);
10181 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10184 SDValue Mask, VL;
10185 if (Op->getOpcode() == ISD::VP_ABS) {
10186 Mask = Op->getOperand(1);
10187 if (VT.isFixedLengthVector())
10188 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10189 Subtarget);
10190 VL = Op->getOperand(2);
10191 } else
10192 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10194 SDValue SplatZero = DAG.getNode(
10195 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10196 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10197 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10198 DAG.getUNDEF(ContainerVT), Mask, VL);
10199 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10200 DAG.getUNDEF(ContainerVT), Mask, VL);
10202 if (VT.isFixedLengthVector())
10203 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10204 return Max;
10207 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10208 SDValue Op, SelectionDAG &DAG) const {
10209 SDLoc DL(Op);
10210 MVT VT = Op.getSimpleValueType();
10211 SDValue Mag = Op.getOperand(0);
10212 SDValue Sign = Op.getOperand(1);
10213 assert(Mag.getValueType() == Sign.getValueType() &&
10214 "Can only handle COPYSIGN with matching types.");
10216 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10217 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10218 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10220 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10222 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10223 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10225 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10228 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10229 SDValue Op, SelectionDAG &DAG) const {
10230 MVT VT = Op.getSimpleValueType();
10231 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10233 MVT I1ContainerVT =
10234 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10236 SDValue CC =
10237 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10238 SDValue Op1 =
10239 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10240 SDValue Op2 =
10241 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10243 SDLoc DL(Op);
10244 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10246 SDValue Select =
10247 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
10249 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10252 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10253 SelectionDAG &DAG) const {
10254 unsigned NewOpc = getRISCVVLOp(Op);
10255 bool HasMergeOp = hasMergeOp(NewOpc);
10256 bool HasMask = hasMaskOp(NewOpc);
10258 MVT VT = Op.getSimpleValueType();
10259 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10261 // Create list of operands by converting existing ones to scalable types.
10262 SmallVector<SDValue, 6> Ops;
10263 for (const SDValue &V : Op->op_values()) {
10264 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10266 // Pass through non-vector operands.
10267 if (!V.getValueType().isVector()) {
10268 Ops.push_back(V);
10269 continue;
10272 // "cast" fixed length vector to a scalable vector.
10273 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10274 "Only fixed length vectors are supported!");
10275 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10278 SDLoc DL(Op);
10279 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10280 if (HasMergeOp)
10281 Ops.push_back(DAG.getUNDEF(ContainerVT));
10282 if (HasMask)
10283 Ops.push_back(Mask);
10284 Ops.push_back(VL);
10286 // StrictFP operations have two result values. Their lowered result should
10287 // have same result count.
10288 if (Op->isStrictFPOpcode()) {
10289 SDValue ScalableRes =
10290 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10291 Op->getFlags());
10292 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10293 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10296 SDValue ScalableRes =
10297 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10298 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10301 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10302 // * Operands of each node are assumed to be in the same order.
10303 // * The EVL operand is promoted from i32 to i64 on RV64.
10304 // * Fixed-length vectors are converted to their scalable-vector container
10305 // types.
10306 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10307 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10308 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10310 SDLoc DL(Op);
10311 MVT VT = Op.getSimpleValueType();
10312 SmallVector<SDValue, 4> Ops;
10314 MVT ContainerVT = VT;
10315 if (VT.isFixedLengthVector())
10316 ContainerVT = getContainerForFixedLengthVector(VT);
10318 for (const auto &OpIdx : enumerate(Op->ops())) {
10319 SDValue V = OpIdx.value();
10320 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10321 // Add dummy merge value before the mask. Or if there isn't a mask, before
10322 // EVL.
10323 if (HasMergeOp) {
10324 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10325 if (MaskIdx) {
10326 if (*MaskIdx == OpIdx.index())
10327 Ops.push_back(DAG.getUNDEF(ContainerVT));
10328 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10329 OpIdx.index()) {
10330 // For VP_MERGE, copy the false operand instead of an undef value.
10331 assert(Op.getOpcode() == ISD::VP_MERGE);
10332 Ops.push_back(Ops.back());
10335 // Pass through operands which aren't fixed-length vectors.
10336 if (!V.getValueType().isFixedLengthVector()) {
10337 Ops.push_back(V);
10338 continue;
10340 // "cast" fixed length vector to a scalable vector.
10341 MVT OpVT = V.getSimpleValueType();
10342 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10343 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10344 "Only fixed length vectors are supported!");
10345 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10348 if (!VT.isFixedLengthVector())
10349 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10351 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10353 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10356 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10357 SelectionDAG &DAG) const {
10358 SDLoc DL(Op);
10359 MVT VT = Op.getSimpleValueType();
10361 SDValue Src = Op.getOperand(0);
10362 // NOTE: Mask is dropped.
10363 SDValue VL = Op.getOperand(2);
10365 MVT ContainerVT = VT;
10366 if (VT.isFixedLengthVector()) {
10367 ContainerVT = getContainerForFixedLengthVector(VT);
10368 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10369 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10372 MVT XLenVT = Subtarget.getXLenVT();
10373 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10374 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10375 DAG.getUNDEF(ContainerVT), Zero, VL);
10377 SDValue SplatValue = DAG.getConstant(
10378 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10379 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10380 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10382 SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
10383 Splat, ZeroSplat, VL);
10384 if (!VT.isFixedLengthVector())
10385 return Result;
10386 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10389 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10390 SelectionDAG &DAG) const {
10391 SDLoc DL(Op);
10392 MVT VT = Op.getSimpleValueType();
10394 SDValue Op1 = Op.getOperand(0);
10395 SDValue Op2 = Op.getOperand(1);
10396 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10397 // NOTE: Mask is dropped.
10398 SDValue VL = Op.getOperand(4);
10400 MVT ContainerVT = VT;
10401 if (VT.isFixedLengthVector()) {
10402 ContainerVT = getContainerForFixedLengthVector(VT);
10403 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10404 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10407 SDValue Result;
10408 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10410 switch (Condition) {
10411 default:
10412 break;
10413 // X != Y --> (X^Y)
10414 case ISD::SETNE:
10415 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10416 break;
10417 // X == Y --> ~(X^Y)
10418 case ISD::SETEQ: {
10419 SDValue Temp =
10420 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10421 Result =
10422 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
10423 break;
10425 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
10426 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
10427 case ISD::SETGT:
10428 case ISD::SETULT: {
10429 SDValue Temp =
10430 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10431 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
10432 break;
10434 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
10435 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
10436 case ISD::SETLT:
10437 case ISD::SETUGT: {
10438 SDValue Temp =
10439 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10440 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
10441 break;
10443 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
10444 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
10445 case ISD::SETGE:
10446 case ISD::SETULE: {
10447 SDValue Temp =
10448 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10449 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
10450 break;
10452 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
10453 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
10454 case ISD::SETLE:
10455 case ISD::SETUGE: {
10456 SDValue Temp =
10457 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10458 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
10459 break;
10463 if (!VT.isFixedLengthVector())
10464 return Result;
10465 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10468 // Lower Floating-Point/Integer Type-Convert VP SDNodes
10469 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
10470 SelectionDAG &DAG) const {
10471 SDLoc DL(Op);
10473 SDValue Src = Op.getOperand(0);
10474 SDValue Mask = Op.getOperand(1);
10475 SDValue VL = Op.getOperand(2);
10476 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10478 MVT DstVT = Op.getSimpleValueType();
10479 MVT SrcVT = Src.getSimpleValueType();
10480 if (DstVT.isFixedLengthVector()) {
10481 DstVT = getContainerForFixedLengthVector(DstVT);
10482 SrcVT = getContainerForFixedLengthVector(SrcVT);
10483 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10484 MVT MaskVT = getMaskTypeFor(DstVT);
10485 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10488 unsigned DstEltSize = DstVT.getScalarSizeInBits();
10489 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
10491 SDValue Result;
10492 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
10493 if (SrcVT.isInteger()) {
10494 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
10496 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
10497 ? RISCVISD::VSEXT_VL
10498 : RISCVISD::VZEXT_VL;
10500 // Do we need to do any pre-widening before converting?
10501 if (SrcEltSize == 1) {
10502 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
10503 MVT XLenVT = Subtarget.getXLenVT();
10504 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10505 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
10506 DAG.getUNDEF(IntVT), Zero, VL);
10507 SDValue One = DAG.getConstant(
10508 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
10509 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
10510 DAG.getUNDEF(IntVT), One, VL);
10511 Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
10512 ZeroSplat, VL);
10513 } else if (DstEltSize > (2 * SrcEltSize)) {
10514 // Widen before converting.
10515 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
10516 DstVT.getVectorElementCount());
10517 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
10520 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
10521 } else {
10522 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
10523 "Wrong input/output vector types");
10525 // Convert f16 to f32 then convert f32 to i64.
10526 if (DstEltSize > (2 * SrcEltSize)) {
10527 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
10528 MVT InterimFVT =
10529 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
10530 Src =
10531 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
10534 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
10536 } else { // Narrowing + Conversion
10537 if (SrcVT.isInteger()) {
10538 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
10539 // First do a narrowing convert to an FP type half the size, then round
10540 // the FP type to a small FP type if needed.
10542 MVT InterimFVT = DstVT;
10543 if (SrcEltSize > (2 * DstEltSize)) {
10544 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
10545 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
10546 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
10549 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
10551 if (InterimFVT != DstVT) {
10552 Src = Result;
10553 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
10555 } else {
10556 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
10557 "Wrong input/output vector types");
10558 // First do a narrowing conversion to an integer half the size, then
10559 // truncate if needed.
10561 if (DstEltSize == 1) {
10562 // First convert to the same size integer, then convert to mask using
10563 // setcc.
10564 assert(SrcEltSize >= 16 && "Unexpected FP type!");
10565 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
10566 DstVT.getVectorElementCount());
10567 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
10569 // Compare the integer result to 0. The integer should be 0 or 1/-1,
10570 // otherwise the conversion was undefined.
10571 MVT XLenVT = Subtarget.getXLenVT();
10572 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
10573 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
10574 DAG.getUNDEF(InterimIVT), SplatZero, VL);
10575 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
10576 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
10577 DAG.getUNDEF(DstVT), Mask, VL});
10578 } else {
10579 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
10580 DstVT.getVectorElementCount());
10582 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
10584 while (InterimIVT != DstVT) {
10585 SrcEltSize /= 2;
10586 Src = Result;
10587 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
10588 DstVT.getVectorElementCount());
10589 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
10590 Src, Mask, VL);
10596 MVT VT = Op.getSimpleValueType();
10597 if (!VT.isFixedLengthVector())
10598 return Result;
10599 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10602 SDValue
10603 RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
10604 SelectionDAG &DAG) const {
10605 SDLoc DL(Op);
10607 SDValue Op1 = Op.getOperand(0);
10608 SDValue Op2 = Op.getOperand(1);
10609 SDValue Offset = Op.getOperand(2);
10610 SDValue Mask = Op.getOperand(3);
10611 SDValue EVL1 = Op.getOperand(4);
10612 SDValue EVL2 = Op.getOperand(5);
10614 const MVT XLenVT = Subtarget.getXLenVT();
10615 MVT VT = Op.getSimpleValueType();
10616 MVT ContainerVT = VT;
10617 if (VT.isFixedLengthVector()) {
10618 ContainerVT = getContainerForFixedLengthVector(VT);
10619 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10620 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10621 MVT MaskVT = getMaskTypeFor(ContainerVT);
10622 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10625 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
10626 if (IsMaskVector) {
10627 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
10629 // Expand input operands
10630 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10631 DAG.getUNDEF(ContainerVT),
10632 DAG.getConstant(1, DL, XLenVT), EVL1);
10633 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10634 DAG.getUNDEF(ContainerVT),
10635 DAG.getConstant(0, DL, XLenVT), EVL1);
10636 Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op1, SplatOneOp1,
10637 SplatZeroOp1, EVL1);
10639 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10640 DAG.getUNDEF(ContainerVT),
10641 DAG.getConstant(1, DL, XLenVT), EVL2);
10642 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10643 DAG.getUNDEF(ContainerVT),
10644 DAG.getConstant(0, DL, XLenVT), EVL2);
10645 Op2 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op2, SplatOneOp2,
10646 SplatZeroOp2, EVL2);
10649 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
10650 SDValue DownOffset, UpOffset;
10651 if (ImmValue >= 0) {
10652 // The operand is a TargetConstant, we need to rebuild it as a regular
10653 // constant.
10654 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10655 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
10656 } else {
10657 // The operand is a TargetConstant, we need to rebuild it as a regular
10658 // constant rather than negating the original operand.
10659 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10660 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
10663 SDValue SlideDown =
10664 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10665 Op1, DownOffset, Mask, UpOffset);
10666 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
10667 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
10669 if (IsMaskVector) {
10670 // Truncate Result back to a mask vector (Result has same EVL as Op2)
10671 Result = DAG.getNode(
10672 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
10673 {Result, DAG.getConstant(0, DL, ContainerVT),
10674 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
10675 Mask, EVL2});
10678 if (!VT.isFixedLengthVector())
10679 return Result;
10680 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10683 SDValue
10684 RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
10685 SelectionDAG &DAG) const {
10686 SDLoc DL(Op);
10687 MVT VT = Op.getSimpleValueType();
10688 MVT XLenVT = Subtarget.getXLenVT();
10690 SDValue Op1 = Op.getOperand(0);
10691 SDValue Mask = Op.getOperand(1);
10692 SDValue EVL = Op.getOperand(2);
10694 MVT ContainerVT = VT;
10695 if (VT.isFixedLengthVector()) {
10696 ContainerVT = getContainerForFixedLengthVector(VT);
10697 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10698 MVT MaskVT = getMaskTypeFor(ContainerVT);
10699 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10702 MVT GatherVT = ContainerVT;
10703 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
10704 // Check if we are working with mask vectors
10705 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
10706 if (IsMaskVector) {
10707 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
10709 // Expand input operand
10710 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
10711 DAG.getUNDEF(IndicesVT),
10712 DAG.getConstant(1, DL, XLenVT), EVL);
10713 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
10714 DAG.getUNDEF(IndicesVT),
10715 DAG.getConstant(0, DL, XLenVT), EVL);
10716 Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, IndicesVT, Op1, SplatOne,
10717 SplatZero, EVL);
10720 unsigned EltSize = GatherVT.getScalarSizeInBits();
10721 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
10722 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10723 unsigned MaxVLMAX =
10724 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10726 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10727 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
10728 // to use vrgatherei16.vv.
10729 // TODO: It's also possible to use vrgatherei16.vv for other types to
10730 // decrease register width for the index calculation.
10731 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10732 if (MaxVLMAX > 256 && EltSize == 8) {
10733 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
10734 // Split the vector in half and reverse each half using a full register
10735 // reverse.
10736 // Swap the halves and concatenate them.
10737 // Slide the concatenated result by (VLMax - VL).
10738 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10739 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
10740 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
10742 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10743 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10745 // Reassemble the low and high pieces reversed.
10746 // NOTE: this Result is unmasked (because we do not need masks for
10747 // shuffles). If in the future this has to change, we can use a SELECT_VL
10748 // between Result and UNDEF using the mask originally passed to VP_REVERSE
10749 SDValue Result =
10750 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
10752 // Slide off any elements from past EVL that were reversed into the low
10753 // elements.
10754 unsigned MinElts = GatherVT.getVectorMinNumElements();
10755 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
10756 DAG.getConstant(MinElts, DL, XLenVT));
10757 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
10759 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
10760 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
10762 if (IsMaskVector) {
10763 // Truncate Result back to a mask vector
10764 Result =
10765 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
10766 {Result, DAG.getConstant(0, DL, GatherVT),
10767 DAG.getCondCode(ISD::SETNE),
10768 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
10771 if (!VT.isFixedLengthVector())
10772 return Result;
10773 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10776 // Just promote the int type to i16 which will double the LMUL.
10777 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
10778 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10781 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
10782 SDValue VecLen =
10783 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
10784 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
10785 DAG.getUNDEF(IndicesVT), VecLen, EVL);
10786 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
10787 DAG.getUNDEF(IndicesVT), Mask, EVL);
10788 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
10789 DAG.getUNDEF(GatherVT), Mask, EVL);
10791 if (IsMaskVector) {
10792 // Truncate Result back to a mask vector
10793 Result = DAG.getNode(
10794 RISCVISD::SETCC_VL, DL, ContainerVT,
10795 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
10796 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
10799 if (!VT.isFixedLengthVector())
10800 return Result;
10801 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10804 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
10805 SelectionDAG &DAG) const {
10806 MVT VT = Op.getSimpleValueType();
10807 if (VT.getVectorElementType() != MVT::i1)
10808 return lowerVPOp(Op, DAG);
10810 // It is safe to drop mask parameter as masked-off elements are undef.
10811 SDValue Op1 = Op->getOperand(0);
10812 SDValue Op2 = Op->getOperand(1);
10813 SDValue VL = Op->getOperand(3);
10815 MVT ContainerVT = VT;
10816 const bool IsFixed = VT.isFixedLengthVector();
10817 if (IsFixed) {
10818 ContainerVT = getContainerForFixedLengthVector(VT);
10819 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10820 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10823 SDLoc DL(Op);
10824 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
10825 if (!IsFixed)
10826 return Val;
10827 return convertFromScalableVector(VT, Val, DAG, Subtarget);
10830 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
10831 SelectionDAG &DAG) const {
10832 SDLoc DL(Op);
10833 MVT XLenVT = Subtarget.getXLenVT();
10834 MVT VT = Op.getSimpleValueType();
10835 MVT ContainerVT = VT;
10836 if (VT.isFixedLengthVector())
10837 ContainerVT = getContainerForFixedLengthVector(VT);
10839 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10841 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
10842 // Check if the mask is known to be all ones
10843 SDValue Mask = VPNode->getMask();
10844 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10846 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
10847 : Intrinsic::riscv_vlse_mask,
10848 DL, XLenVT);
10849 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
10850 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
10851 VPNode->getStride()};
10852 if (!IsUnmasked) {
10853 if (VT.isFixedLengthVector()) {
10854 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
10855 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10857 Ops.push_back(Mask);
10859 Ops.push_back(VPNode->getVectorLength());
10860 if (!IsUnmasked) {
10861 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10862 Ops.push_back(Policy);
10865 SDValue Result =
10866 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
10867 VPNode->getMemoryVT(), VPNode->getMemOperand());
10868 SDValue Chain = Result.getValue(1);
10870 if (VT.isFixedLengthVector())
10871 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10873 return DAG.getMergeValues({Result, Chain}, DL);
10876 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
10877 SelectionDAG &DAG) const {
10878 SDLoc DL(Op);
10879 MVT XLenVT = Subtarget.getXLenVT();
10881 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
10882 SDValue StoreVal = VPNode->getValue();
10883 MVT VT = StoreVal.getSimpleValueType();
10884 MVT ContainerVT = VT;
10885 if (VT.isFixedLengthVector()) {
10886 ContainerVT = getContainerForFixedLengthVector(VT);
10887 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10890 // Check if the mask is known to be all ones
10891 SDValue Mask = VPNode->getMask();
10892 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10894 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
10895 : Intrinsic::riscv_vsse_mask,
10896 DL, XLenVT);
10897 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
10898 VPNode->getBasePtr(), VPNode->getStride()};
10899 if (!IsUnmasked) {
10900 if (VT.isFixedLengthVector()) {
10901 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
10902 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10904 Ops.push_back(Mask);
10906 Ops.push_back(VPNode->getVectorLength());
10908 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
10909 Ops, VPNode->getMemoryVT(),
10910 VPNode->getMemOperand());
10913 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
10914 // matched to a RVV indexed load. The RVV indexed load instructions only
10915 // support the "unsigned unscaled" addressing mode; indices are implicitly
10916 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
10917 // signed or scaled indexing is extended to the XLEN value type and scaled
10918 // accordingly.
10919 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
10920 SelectionDAG &DAG) const {
10921 SDLoc DL(Op);
10922 MVT VT = Op.getSimpleValueType();
10924 const auto *MemSD = cast<MemSDNode>(Op.getNode());
10925 EVT MemVT = MemSD->getMemoryVT();
10926 MachineMemOperand *MMO = MemSD->getMemOperand();
10927 SDValue Chain = MemSD->getChain();
10928 SDValue BasePtr = MemSD->getBasePtr();
10930 ISD::LoadExtType LoadExtType;
10931 SDValue Index, Mask, PassThru, VL;
10933 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
10934 Index = VPGN->getIndex();
10935 Mask = VPGN->getMask();
10936 PassThru = DAG.getUNDEF(VT);
10937 VL = VPGN->getVectorLength();
10938 // VP doesn't support extending loads.
10939 LoadExtType = ISD::NON_EXTLOAD;
10940 } else {
10941 // Else it must be a MGATHER.
10942 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
10943 Index = MGN->getIndex();
10944 Mask = MGN->getMask();
10945 PassThru = MGN->getPassThru();
10946 LoadExtType = MGN->getExtensionType();
10949 MVT IndexVT = Index.getSimpleValueType();
10950 MVT XLenVT = Subtarget.getXLenVT();
10952 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
10953 "Unexpected VTs!");
10954 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
10955 // Targets have to explicitly opt-in for extending vector loads.
10956 assert(LoadExtType == ISD::NON_EXTLOAD &&
10957 "Unexpected extending MGATHER/VP_GATHER");
10958 (void)LoadExtType;
10960 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
10961 // the selection of the masked intrinsics doesn't do this for us.
10962 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10964 MVT ContainerVT = VT;
10965 if (VT.isFixedLengthVector()) {
10966 ContainerVT = getContainerForFixedLengthVector(VT);
10967 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
10968 ContainerVT.getVectorElementCount());
10970 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
10972 if (!IsUnmasked) {
10973 MVT MaskVT = getMaskTypeFor(ContainerVT);
10974 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10975 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10979 if (!VL)
10980 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10982 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
10983 IndexVT = IndexVT.changeVectorElementType(XLenVT);
10984 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
10987 unsigned IntID =
10988 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
10989 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10990 if (IsUnmasked)
10991 Ops.push_back(DAG.getUNDEF(ContainerVT));
10992 else
10993 Ops.push_back(PassThru);
10994 Ops.push_back(BasePtr);
10995 Ops.push_back(Index);
10996 if (!IsUnmasked)
10997 Ops.push_back(Mask);
10998 Ops.push_back(VL);
10999 if (!IsUnmasked)
11000 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11002 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11003 SDValue Result =
11004 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11005 Chain = Result.getValue(1);
11007 if (VT.isFixedLengthVector())
11008 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11010 return DAG.getMergeValues({Result, Chain}, DL);
11013 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11014 // matched to a RVV indexed store. The RVV indexed store instructions only
11015 // support the "unsigned unscaled" addressing mode; indices are implicitly
11016 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
11017 // signed or scaled indexing is extended to the XLEN value type and scaled
11018 // accordingly.
11019 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11020 SelectionDAG &DAG) const {
11021 SDLoc DL(Op);
11022 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11023 EVT MemVT = MemSD->getMemoryVT();
11024 MachineMemOperand *MMO = MemSD->getMemOperand();
11025 SDValue Chain = MemSD->getChain();
11026 SDValue BasePtr = MemSD->getBasePtr();
11028 bool IsTruncatingStore = false;
11029 SDValue Index, Mask, Val, VL;
11031 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11032 Index = VPSN->getIndex();
11033 Mask = VPSN->getMask();
11034 Val = VPSN->getValue();
11035 VL = VPSN->getVectorLength();
11036 // VP doesn't support truncating stores.
11037 IsTruncatingStore = false;
11038 } else {
11039 // Else it must be a MSCATTER.
11040 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11041 Index = MSN->getIndex();
11042 Mask = MSN->getMask();
11043 Val = MSN->getValue();
11044 IsTruncatingStore = MSN->isTruncatingStore();
11047 MVT VT = Val.getSimpleValueType();
11048 MVT IndexVT = Index.getSimpleValueType();
11049 MVT XLenVT = Subtarget.getXLenVT();
11051 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
11052 "Unexpected VTs!");
11053 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11054 // Targets have to explicitly opt-in for extending vector loads and
11055 // truncating vector stores.
11056 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11057 (void)IsTruncatingStore;
11059 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11060 // the selection of the masked intrinsics doesn't do this for us.
11061 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11063 MVT ContainerVT = VT;
11064 if (VT.isFixedLengthVector()) {
11065 ContainerVT = getContainerForFixedLengthVector(VT);
11066 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11067 ContainerVT.getVectorElementCount());
11069 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11070 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11072 if (!IsUnmasked) {
11073 MVT MaskVT = getMaskTypeFor(ContainerVT);
11074 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11078 if (!VL)
11079 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11081 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11082 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11083 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11086 unsigned IntID =
11087 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11088 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11089 Ops.push_back(Val);
11090 Ops.push_back(BasePtr);
11091 Ops.push_back(Index);
11092 if (!IsUnmasked)
11093 Ops.push_back(Mask);
11094 Ops.push_back(VL);
11096 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
11097 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11100 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11101 SelectionDAG &DAG) const {
11102 const MVT XLenVT = Subtarget.getXLenVT();
11103 SDLoc DL(Op);
11104 SDValue Chain = Op->getOperand(0);
11105 SDValue SysRegNo = DAG.getTargetConstant(
11106 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11107 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11108 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11110 // Encoding used for rounding mode in RISC-V differs from that used in
11111 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11112 // table, which consists of a sequence of 4-bit fields, each representing
11113 // corresponding FLT_ROUNDS mode.
11114 static const int Table =
11115 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
11116 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
11117 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
11118 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
11119 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
11121 SDValue Shift =
11122 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11123 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11124 DAG.getConstant(Table, DL, XLenVT), Shift);
11125 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11126 DAG.getConstant(7, DL, XLenVT));
11128 return DAG.getMergeValues({Masked, Chain}, DL);
11131 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11132 SelectionDAG &DAG) const {
11133 const MVT XLenVT = Subtarget.getXLenVT();
11134 SDLoc DL(Op);
11135 SDValue Chain = Op->getOperand(0);
11136 SDValue RMValue = Op->getOperand(1);
11137 SDValue SysRegNo = DAG.getTargetConstant(
11138 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11140 // Encoding used for rounding mode in RISC-V differs from that used in
11141 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11142 // a table, which consists of a sequence of 4-bit fields, each representing
11143 // corresponding RISC-V mode.
11144 static const unsigned Table =
11145 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
11146 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
11147 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
11148 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
11149 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
11151 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11153 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11154 DAG.getConstant(2, DL, XLenVT));
11155 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11156 DAG.getConstant(Table, DL, XLenVT), Shift);
11157 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11158 DAG.getConstant(0x7, DL, XLenVT));
11159 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11160 RMValue);
11163 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11164 SelectionDAG &DAG) const {
11165 MachineFunction &MF = DAG.getMachineFunction();
11167 bool isRISCV64 = Subtarget.is64Bit();
11168 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11170 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11171 return DAG.getFrameIndex(FI, PtrVT);
11174 // Returns the opcode of the target-specific SDNode that implements the 32-bit
11175 // form of the given Opcode.
11176 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11177 switch (Opcode) {
11178 default:
11179 llvm_unreachable("Unexpected opcode");
11180 case ISD::SHL:
11181 return RISCVISD::SLLW;
11182 case ISD::SRA:
11183 return RISCVISD::SRAW;
11184 case ISD::SRL:
11185 return RISCVISD::SRLW;
11186 case ISD::SDIV:
11187 return RISCVISD::DIVW;
11188 case ISD::UDIV:
11189 return RISCVISD::DIVUW;
11190 case ISD::UREM:
11191 return RISCVISD::REMUW;
11192 case ISD::ROTL:
11193 return RISCVISD::ROLW;
11194 case ISD::ROTR:
11195 return RISCVISD::RORW;
11199 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11200 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11201 // otherwise be promoted to i64, making it difficult to select the
11202 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11203 // type i8/i16/i32 is lost.
11204 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
11205 unsigned ExtOpc = ISD::ANY_EXTEND) {
11206 SDLoc DL(N);
11207 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11208 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11209 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11210 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11211 // ReplaceNodeResults requires we maintain the same type for the return value.
11212 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11215 // Converts the given 32-bit operation to a i64 operation with signed extension
11216 // semantic to reduce the signed extension instructions.
11217 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
11218 SDLoc DL(N);
11219 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11220 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11221 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11222 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11223 DAG.getValueType(MVT::i32));
11224 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11227 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
11228 SmallVectorImpl<SDValue> &Results,
11229 SelectionDAG &DAG) const {
11230 SDLoc DL(N);
11231 switch (N->getOpcode()) {
11232 default:
11233 llvm_unreachable("Don't know how to custom type legalize this operation!");
11234 case ISD::STRICT_FP_TO_SINT:
11235 case ISD::STRICT_FP_TO_UINT:
11236 case ISD::FP_TO_SINT:
11237 case ISD::FP_TO_UINT: {
11238 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11239 "Unexpected custom legalisation");
11240 bool IsStrict = N->isStrictFPOpcode();
11241 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11242 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11243 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11244 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11245 TargetLowering::TypeSoftenFloat) {
11246 if (!isTypeLegal(Op0.getValueType()))
11247 return;
11248 if (IsStrict) {
11249 SDValue Chain = N->getOperand(0);
11250 // In absense of Zfh, promote f16 to f32, then convert.
11251 if (Op0.getValueType() == MVT::f16 &&
11252 !Subtarget.hasStdExtZfhOrZhinx()) {
11253 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11254 {Chain, Op0});
11255 Chain = Op0.getValue(1);
11257 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11258 : RISCVISD::STRICT_FCVT_WU_RV64;
11259 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11260 SDValue Res = DAG.getNode(
11261 Opc, DL, VTs, Chain, Op0,
11262 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11263 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11264 Results.push_back(Res.getValue(1));
11265 return;
11267 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11268 // convert.
11269 if ((Op0.getValueType() == MVT::f16 &&
11270 !Subtarget.hasStdExtZfhOrZhinx()) ||
11271 Op0.getValueType() == MVT::bf16)
11272 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11274 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11275 SDValue Res =
11276 DAG.getNode(Opc, DL, MVT::i64, Op0,
11277 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11278 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11279 return;
11281 // If the FP type needs to be softened, emit a library call using the 'si'
11282 // version. If we left it to default legalization we'd end up with 'di'. If
11283 // the FP type doesn't need to be softened just let generic type
11284 // legalization promote the result type.
11285 RTLIB::Libcall LC;
11286 if (IsSigned)
11287 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11288 else
11289 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11290 MakeLibCallOptions CallOptions;
11291 EVT OpVT = Op0.getValueType();
11292 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11293 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11294 SDValue Result;
11295 std::tie(Result, Chain) =
11296 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11297 Results.push_back(Result);
11298 if (IsStrict)
11299 Results.push_back(Chain);
11300 break;
11302 case ISD::LROUND: {
11303 SDValue Op0 = N->getOperand(0);
11304 EVT Op0VT = Op0.getValueType();
11305 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11306 TargetLowering::TypeSoftenFloat) {
11307 if (!isTypeLegal(Op0VT))
11308 return;
11310 // In absense of Zfh, promote f16 to f32, then convert.
11311 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11312 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11314 SDValue Res =
11315 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11316 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11317 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11318 return;
11320 // If the FP type needs to be softened, emit a library call to lround. We'll
11321 // need to truncate the result. We assume any value that doesn't fit in i32
11322 // is allowed to return an unspecified value.
11323 RTLIB::Libcall LC =
11324 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11325 MakeLibCallOptions CallOptions;
11326 EVT OpVT = Op0.getValueType();
11327 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11328 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11329 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11330 Results.push_back(Result);
11331 break;
11333 case ISD::READCYCLECOUNTER: {
11334 assert(!Subtarget.is64Bit() &&
11335 "READCYCLECOUNTER only has custom type legalization on riscv32");
11337 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11338 SDValue RCW =
11339 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
11341 Results.push_back(
11342 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11343 Results.push_back(RCW.getValue(2));
11344 break;
11346 case ISD::LOAD: {
11347 if (!ISD::isNON_EXTLoad(N))
11348 return;
11350 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11351 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11352 LoadSDNode *Ld = cast<LoadSDNode>(N);
11354 SDLoc dl(N);
11355 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11356 Ld->getBasePtr(), Ld->getMemoryVT(),
11357 Ld->getMemOperand());
11358 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11359 Results.push_back(Res.getValue(1));
11360 return;
11362 case ISD::MUL: {
11363 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
11364 unsigned XLen = Subtarget.getXLen();
11365 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11366 if (Size > XLen) {
11367 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11368 SDValue LHS = N->getOperand(0);
11369 SDValue RHS = N->getOperand(1);
11370 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
11372 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
11373 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
11374 // We need exactly one side to be unsigned.
11375 if (LHSIsU == RHSIsU)
11376 return;
11378 auto MakeMULPair = [&](SDValue S, SDValue U) {
11379 MVT XLenVT = Subtarget.getXLenVT();
11380 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
11381 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
11382 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
11383 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
11384 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
11387 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
11388 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
11390 // The other operand should be signed, but still prefer MULH when
11391 // possible.
11392 if (RHSIsU && LHSIsS && !RHSIsS)
11393 Results.push_back(MakeMULPair(LHS, RHS));
11394 else if (LHSIsU && RHSIsS && !LHSIsS)
11395 Results.push_back(MakeMULPair(RHS, LHS));
11397 return;
11399 [[fallthrough]];
11401 case ISD::ADD:
11402 case ISD::SUB:
11403 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11404 "Unexpected custom legalisation");
11405 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
11406 break;
11407 case ISD::SHL:
11408 case ISD::SRA:
11409 case ISD::SRL:
11410 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11411 "Unexpected custom legalisation");
11412 if (N->getOperand(1).getOpcode() != ISD::Constant) {
11413 // If we can use a BSET instruction, allow default promotion to apply.
11414 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
11415 isOneConstant(N->getOperand(0)))
11416 break;
11417 Results.push_back(customLegalizeToWOp(N, DAG));
11418 break;
11421 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
11422 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
11423 // shift amount.
11424 if (N->getOpcode() == ISD::SHL) {
11425 SDLoc DL(N);
11426 SDValue NewOp0 =
11427 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11428 SDValue NewOp1 =
11429 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
11430 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
11431 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11432 DAG.getValueType(MVT::i32));
11433 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
11436 break;
11437 case ISD::ROTL:
11438 case ISD::ROTR:
11439 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11440 "Unexpected custom legalisation");
11441 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
11442 Subtarget.hasVendorXTHeadBb()) &&
11443 "Unexpected custom legalization");
11444 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
11445 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
11446 return;
11447 Results.push_back(customLegalizeToWOp(N, DAG));
11448 break;
11449 case ISD::CTTZ:
11450 case ISD::CTTZ_ZERO_UNDEF:
11451 case ISD::CTLZ:
11452 case ISD::CTLZ_ZERO_UNDEF: {
11453 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11454 "Unexpected custom legalisation");
11456 SDValue NewOp0 =
11457 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11458 bool IsCTZ =
11459 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
11460 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
11461 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
11462 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11463 return;
11465 case ISD::SDIV:
11466 case ISD::UDIV:
11467 case ISD::UREM: {
11468 MVT VT = N->getSimpleValueType(0);
11469 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
11470 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
11471 "Unexpected custom legalisation");
11472 // Don't promote division/remainder by constant since we should expand those
11473 // to multiply by magic constant.
11474 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
11475 if (N->getOperand(1).getOpcode() == ISD::Constant &&
11476 !isIntDivCheap(N->getValueType(0), Attr))
11477 return;
11479 // If the input is i32, use ANY_EXTEND since the W instructions don't read
11480 // the upper 32 bits. For other types we need to sign or zero extend
11481 // based on the opcode.
11482 unsigned ExtOpc = ISD::ANY_EXTEND;
11483 if (VT != MVT::i32)
11484 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
11485 : ISD::ZERO_EXTEND;
11487 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
11488 break;
11490 case ISD::SADDO: {
11491 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11492 "Unexpected custom legalisation");
11494 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
11495 // use the default legalization.
11496 if (!isa<ConstantSDNode>(N->getOperand(1)))
11497 return;
11499 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11500 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
11501 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
11502 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
11503 DAG.getValueType(MVT::i32));
11505 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
11507 // For an addition, the result should be less than one of the operands (LHS)
11508 // if and only if the other operand (RHS) is negative, otherwise there will
11509 // be overflow.
11510 // For a subtraction, the result should be less than one of the operands
11511 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11512 // otherwise there will be overflow.
11513 EVT OType = N->getValueType(1);
11514 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
11515 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
11517 SDValue Overflow =
11518 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
11519 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11520 Results.push_back(Overflow);
11521 return;
11523 case ISD::UADDO:
11524 case ISD::USUBO: {
11525 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11526 "Unexpected custom legalisation");
11527 bool IsAdd = N->getOpcode() == ISD::UADDO;
11528 // Create an ADDW or SUBW.
11529 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11530 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11531 SDValue Res =
11532 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
11533 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
11534 DAG.getValueType(MVT::i32));
11536 SDValue Overflow;
11537 if (IsAdd && isOneConstant(RHS)) {
11538 // Special case uaddo X, 1 overflowed if the addition result is 0.
11539 // The general case (X + C) < C is not necessarily beneficial. Although we
11540 // reduce the live range of X, we may introduce the materialization of
11541 // constant C, especially when the setcc result is used by branch. We have
11542 // no compare with constant and branch instructions.
11543 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
11544 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
11545 } else if (IsAdd && isAllOnesConstant(RHS)) {
11546 // Special case uaddo X, -1 overflowed if X != 0.
11547 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
11548 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
11549 } else {
11550 // Sign extend the LHS and perform an unsigned compare with the ADDW
11551 // result. Since the inputs are sign extended from i32, this is equivalent
11552 // to comparing the lower 32 bits.
11553 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11554 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
11555 IsAdd ? ISD::SETULT : ISD::SETUGT);
11558 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11559 Results.push_back(Overflow);
11560 return;
11562 case ISD::UADDSAT:
11563 case ISD::USUBSAT: {
11564 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11565 "Unexpected custom legalisation");
11566 if (Subtarget.hasStdExtZbb()) {
11567 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
11568 // sign extend allows overflow of the lower 32 bits to be detected on
11569 // the promoted size.
11570 SDValue LHS =
11571 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11572 SDValue RHS =
11573 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
11574 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
11575 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11576 return;
11579 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
11580 // promotion for UADDO/USUBO.
11581 Results.push_back(expandAddSubSat(N, DAG));
11582 return;
11584 case ISD::ABS: {
11585 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11586 "Unexpected custom legalisation");
11588 if (Subtarget.hasStdExtZbb()) {
11589 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
11590 // This allows us to remember that the result is sign extended. Expanding
11591 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
11592 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
11593 N->getOperand(0));
11594 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
11595 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
11596 return;
11599 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
11600 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11602 // Freeze the source so we can increase it's use count.
11603 Src = DAG.getFreeze(Src);
11605 // Copy sign bit to all bits using the sraiw pattern.
11606 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
11607 DAG.getValueType(MVT::i32));
11608 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
11609 DAG.getConstant(31, DL, MVT::i64));
11611 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
11612 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
11614 // NOTE: The result is only required to be anyextended, but sext is
11615 // consistent with type legalization of sub.
11616 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
11617 DAG.getValueType(MVT::i32));
11618 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
11619 return;
11621 case ISD::BITCAST: {
11622 EVT VT = N->getValueType(0);
11623 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
11624 SDValue Op0 = N->getOperand(0);
11625 EVT Op0VT = Op0.getValueType();
11626 MVT XLenVT = Subtarget.getXLenVT();
11627 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
11628 Subtarget.hasStdExtZfhminOrZhinxmin()) {
11629 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
11630 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
11631 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
11632 Subtarget.hasStdExtZfbfmin()) {
11633 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
11634 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
11635 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
11636 Subtarget.hasStdExtFOrZfinx()) {
11637 SDValue FPConv =
11638 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
11639 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
11640 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 &&
11641 Subtarget.hasStdExtZfa()) {
11642 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
11643 DAG.getVTList(MVT::i32, MVT::i32), Op0);
11644 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
11645 NewReg.getValue(0), NewReg.getValue(1));
11646 Results.push_back(RetReg);
11647 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
11648 isTypeLegal(Op0VT)) {
11649 // Custom-legalize bitcasts from fixed-length vector types to illegal
11650 // scalar types in order to improve codegen. Bitcast the vector to a
11651 // one-element vector type whose element type is the same as the result
11652 // type, and extract the first element.
11653 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
11654 if (isTypeLegal(BVT)) {
11655 SDValue BVec = DAG.getBitcast(BVT, Op0);
11656 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
11657 DAG.getConstant(0, DL, XLenVT)));
11660 break;
11662 case RISCVISD::BREV8: {
11663 MVT VT = N->getSimpleValueType(0);
11664 MVT XLenVT = Subtarget.getXLenVT();
11665 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
11666 "Unexpected custom legalisation");
11667 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
11668 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
11669 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
11670 // ReplaceNodeResults requires we maintain the same type for the return
11671 // value.
11672 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
11673 break;
11675 case ISD::EXTRACT_VECTOR_ELT: {
11676 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
11677 // type is illegal (currently only vXi64 RV32).
11678 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
11679 // transferred to the destination register. We issue two of these from the
11680 // upper- and lower- halves of the SEW-bit vector element, slid down to the
11681 // first element.
11682 SDValue Vec = N->getOperand(0);
11683 SDValue Idx = N->getOperand(1);
11685 // The vector type hasn't been legalized yet so we can't issue target
11686 // specific nodes if it needs legalization.
11687 // FIXME: We would manually legalize if it's important.
11688 if (!isTypeLegal(Vec.getValueType()))
11689 return;
11691 MVT VecVT = Vec.getSimpleValueType();
11693 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
11694 VecVT.getVectorElementType() == MVT::i64 &&
11695 "Unexpected EXTRACT_VECTOR_ELT legalization");
11697 // If this is a fixed vector, we need to convert it to a scalable vector.
11698 MVT ContainerVT = VecVT;
11699 if (VecVT.isFixedLengthVector()) {
11700 ContainerVT = getContainerForFixedLengthVector(VecVT);
11701 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11704 MVT XLenVT = Subtarget.getXLenVT();
11706 // Use a VL of 1 to avoid processing more elements than we need.
11707 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
11709 // Unless the index is known to be 0, we must slide the vector down to get
11710 // the desired element into index 0.
11711 if (!isNullConstant(Idx)) {
11712 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11713 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
11716 // Extract the lower XLEN bits of the correct vector element.
11717 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
11719 // To extract the upper XLEN bits of the vector element, shift the first
11720 // element right by 32 bits and re-extract the lower XLEN bits.
11721 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11722 DAG.getUNDEF(ContainerVT),
11723 DAG.getConstant(32, DL, XLenVT), VL);
11724 SDValue LShr32 =
11725 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
11726 DAG.getUNDEF(ContainerVT), Mask, VL);
11728 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
11730 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
11731 break;
11733 case ISD::INTRINSIC_WO_CHAIN: {
11734 unsigned IntNo = N->getConstantOperandVal(0);
11735 switch (IntNo) {
11736 default:
11737 llvm_unreachable(
11738 "Don't know how to custom type legalize this intrinsic!");
11739 case Intrinsic::experimental_get_vector_length: {
11740 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
11741 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11742 return;
11744 case Intrinsic::riscv_orc_b:
11745 case Intrinsic::riscv_brev8:
11746 case Intrinsic::riscv_sha256sig0:
11747 case Intrinsic::riscv_sha256sig1:
11748 case Intrinsic::riscv_sha256sum0:
11749 case Intrinsic::riscv_sha256sum1:
11750 case Intrinsic::riscv_sm3p0:
11751 case Intrinsic::riscv_sm3p1: {
11752 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11753 return;
11754 unsigned Opc;
11755 switch (IntNo) {
11756 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
11757 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
11758 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
11759 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
11760 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
11761 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
11762 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
11763 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
11766 SDValue NewOp =
11767 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11768 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
11769 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11770 return;
11772 case Intrinsic::riscv_sm4ks:
11773 case Intrinsic::riscv_sm4ed: {
11774 unsigned Opc =
11775 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
11776 SDValue NewOp0 =
11777 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11778 SDValue NewOp1 =
11779 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11780 SDValue Res =
11781 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
11782 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11783 return;
11785 case Intrinsic::riscv_clmul: {
11786 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11787 return;
11789 SDValue NewOp0 =
11790 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11791 SDValue NewOp1 =
11792 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11793 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
11794 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11795 return;
11797 case Intrinsic::riscv_clmulh:
11798 case Intrinsic::riscv_clmulr: {
11799 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11800 return;
11802 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
11803 // to the full 128-bit clmul result of multiplying two xlen values.
11804 // Perform clmulr or clmulh on the shifted values. Finally, extract the
11805 // upper 32 bits.
11807 // The alternative is to mask the inputs to 32 bits and use clmul, but
11808 // that requires two shifts to mask each input without zext.w.
11809 // FIXME: If the inputs are known zero extended or could be freely
11810 // zero extended, the mask form would be better.
11811 SDValue NewOp0 =
11812 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11813 SDValue NewOp1 =
11814 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11815 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
11816 DAG.getConstant(32, DL, MVT::i64));
11817 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
11818 DAG.getConstant(32, DL, MVT::i64));
11819 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
11820 : RISCVISD::CLMULR;
11821 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
11822 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
11823 DAG.getConstant(32, DL, MVT::i64));
11824 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11825 return;
11827 case Intrinsic::riscv_vmv_x_s: {
11828 EVT VT = N->getValueType(0);
11829 MVT XLenVT = Subtarget.getXLenVT();
11830 if (VT.bitsLT(XLenVT)) {
11831 // Simple case just extract using vmv.x.s and truncate.
11832 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
11833 Subtarget.getXLenVT(), N->getOperand(1));
11834 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
11835 return;
11838 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
11839 "Unexpected custom legalization");
11841 // We need to do the move in two steps.
11842 SDValue Vec = N->getOperand(1);
11843 MVT VecVT = Vec.getSimpleValueType();
11845 // First extract the lower XLEN bits of the element.
11846 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
11848 // To extract the upper XLEN bits of the vector element, shift the first
11849 // element right by 32 bits and re-extract the lower XLEN bits.
11850 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
11852 SDValue ThirtyTwoV =
11853 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
11854 DAG.getConstant(32, DL, XLenVT), VL);
11855 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
11856 DAG.getUNDEF(VecVT), Mask, VL);
11857 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
11859 Results.push_back(
11860 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
11861 break;
11864 break;
11866 case ISD::VECREDUCE_ADD:
11867 case ISD::VECREDUCE_AND:
11868 case ISD::VECREDUCE_OR:
11869 case ISD::VECREDUCE_XOR:
11870 case ISD::VECREDUCE_SMAX:
11871 case ISD::VECREDUCE_UMAX:
11872 case ISD::VECREDUCE_SMIN:
11873 case ISD::VECREDUCE_UMIN:
11874 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
11875 Results.push_back(V);
11876 break;
11877 case ISD::VP_REDUCE_ADD:
11878 case ISD::VP_REDUCE_AND:
11879 case ISD::VP_REDUCE_OR:
11880 case ISD::VP_REDUCE_XOR:
11881 case ISD::VP_REDUCE_SMAX:
11882 case ISD::VP_REDUCE_UMAX:
11883 case ISD::VP_REDUCE_SMIN:
11884 case ISD::VP_REDUCE_UMIN:
11885 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
11886 Results.push_back(V);
11887 break;
11888 case ISD::GET_ROUNDING: {
11889 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
11890 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
11891 Results.push_back(Res.getValue(0));
11892 Results.push_back(Res.getValue(1));
11893 break;
11898 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
11899 /// which corresponds to it.
11900 static unsigned getVecReduceOpcode(unsigned Opc) {
11901 switch (Opc) {
11902 default:
11903 llvm_unreachable("Unhandled binary to transfrom reduction");
11904 case ISD::ADD:
11905 return ISD::VECREDUCE_ADD;
11906 case ISD::UMAX:
11907 return ISD::VECREDUCE_UMAX;
11908 case ISD::SMAX:
11909 return ISD::VECREDUCE_SMAX;
11910 case ISD::UMIN:
11911 return ISD::VECREDUCE_UMIN;
11912 case ISD::SMIN:
11913 return ISD::VECREDUCE_SMIN;
11914 case ISD::AND:
11915 return ISD::VECREDUCE_AND;
11916 case ISD::OR:
11917 return ISD::VECREDUCE_OR;
11918 case ISD::XOR:
11919 return ISD::VECREDUCE_XOR;
11920 case ISD::FADD:
11921 // Note: This is the associative form of the generic reduction opcode.
11922 return ISD::VECREDUCE_FADD;
11926 /// Perform two related transforms whose purpose is to incrementally recognize
11927 /// an explode_vector followed by scalar reduction as a vector reduction node.
11928 /// This exists to recover from a deficiency in SLP which can't handle
11929 /// forests with multiple roots sharing common nodes. In some cases, one
11930 /// of the trees will be vectorized, and the other will remain (unprofitably)
11931 /// scalarized.
11932 static SDValue
11933 combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
11934 const RISCVSubtarget &Subtarget) {
11936 // This transforms need to run before all integer types have been legalized
11937 // to i64 (so that the vector element type matches the add type), and while
11938 // it's safe to introduce odd sized vector types.
11939 if (DAG.NewNodesMustHaveLegalTypes)
11940 return SDValue();
11942 // Without V, this transform isn't useful. We could form the (illegal)
11943 // operations and let them be scalarized again, but there's really no point.
11944 if (!Subtarget.hasVInstructions())
11945 return SDValue();
11947 const SDLoc DL(N);
11948 const EVT VT = N->getValueType(0);
11949 const unsigned Opc = N->getOpcode();
11951 // For FADD, we only handle the case with reassociation allowed. We
11952 // could handle strict reduction order, but at the moment, there's no
11953 // known reason to, and the complexity isn't worth it.
11954 // TODO: Handle fminnum and fmaxnum here
11955 if (!VT.isInteger() &&
11956 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
11957 return SDValue();
11959 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
11960 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
11961 "Inconsistent mappings");
11962 SDValue LHS = N->getOperand(0);
11963 SDValue RHS = N->getOperand(1);
11965 if (!LHS.hasOneUse() || !RHS.hasOneUse())
11966 return SDValue();
11968 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11969 std::swap(LHS, RHS);
11971 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11972 !isa<ConstantSDNode>(RHS.getOperand(1)))
11973 return SDValue();
11975 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
11976 SDValue SrcVec = RHS.getOperand(0);
11977 EVT SrcVecVT = SrcVec.getValueType();
11978 assert(SrcVecVT.getVectorElementType() == VT);
11979 if (SrcVecVT.isScalableVector())
11980 return SDValue();
11982 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
11983 return SDValue();
11985 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
11986 // reduce_op (extract_subvector [2 x VT] from V). This will form the
11987 // root of our reduction tree. TODO: We could extend this to any two
11988 // adjacent aligned constant indices if desired.
11989 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11990 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
11991 uint64_t LHSIdx =
11992 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
11993 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
11994 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
11995 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
11996 DAG.getVectorIdxConstant(0, DL));
11997 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12001 // Match (binop (reduce (extract_subvector V, 0),
12002 // (extract_vector_elt V, sizeof(SubVec))))
12003 // into a reduction of one more element from the original vector V.
12004 if (LHS.getOpcode() != ReduceOpc)
12005 return SDValue();
12007 SDValue ReduceVec = LHS.getOperand(0);
12008 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12009 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12010 isNullConstant(ReduceVec.getOperand(1)) &&
12011 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12012 // For illegal types (e.g. 3xi32), most will be combined again into a
12013 // wider (hopefully legal) type. If this is a terminal state, we are
12014 // relying on type legalization here to produce something reasonable
12015 // and this lowering quality could probably be improved. (TODO)
12016 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12017 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12018 DAG.getVectorIdxConstant(0, DL));
12019 auto Flags = ReduceVec->getFlags();
12020 Flags.intersectWith(N->getFlags());
12021 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12024 return SDValue();
12028 // Try to fold (<bop> x, (reduction.<bop> vec, start))
12029 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
12030 const RISCVSubtarget &Subtarget) {
12031 auto BinOpToRVVReduce = [](unsigned Opc) {
12032 switch (Opc) {
12033 default:
12034 llvm_unreachable("Unhandled binary to transfrom reduction");
12035 case ISD::ADD:
12036 return RISCVISD::VECREDUCE_ADD_VL;
12037 case ISD::UMAX:
12038 return RISCVISD::VECREDUCE_UMAX_VL;
12039 case ISD::SMAX:
12040 return RISCVISD::VECREDUCE_SMAX_VL;
12041 case ISD::UMIN:
12042 return RISCVISD::VECREDUCE_UMIN_VL;
12043 case ISD::SMIN:
12044 return RISCVISD::VECREDUCE_SMIN_VL;
12045 case ISD::AND:
12046 return RISCVISD::VECREDUCE_AND_VL;
12047 case ISD::OR:
12048 return RISCVISD::VECREDUCE_OR_VL;
12049 case ISD::XOR:
12050 return RISCVISD::VECREDUCE_XOR_VL;
12051 case ISD::FADD:
12052 return RISCVISD::VECREDUCE_FADD_VL;
12053 case ISD::FMAXNUM:
12054 return RISCVISD::VECREDUCE_FMAX_VL;
12055 case ISD::FMINNUM:
12056 return RISCVISD::VECREDUCE_FMIN_VL;
12060 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12061 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12062 isNullConstant(V.getOperand(1)) &&
12063 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12066 unsigned Opc = N->getOpcode();
12067 unsigned ReduceIdx;
12068 if (IsReduction(N->getOperand(0), Opc))
12069 ReduceIdx = 0;
12070 else if (IsReduction(N->getOperand(1), Opc))
12071 ReduceIdx = 1;
12072 else
12073 return SDValue();
12075 // Skip if FADD disallows reassociation but the combiner needs.
12076 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12077 return SDValue();
12079 SDValue Extract = N->getOperand(ReduceIdx);
12080 SDValue Reduce = Extract.getOperand(0);
12081 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12082 return SDValue();
12084 SDValue ScalarV = Reduce.getOperand(2);
12085 EVT ScalarVT = ScalarV.getValueType();
12086 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12087 ScalarV.getOperand(0)->isUndef() &&
12088 isNullConstant(ScalarV.getOperand(2)))
12089 ScalarV = ScalarV.getOperand(1);
12091 // Make sure that ScalarV is a splat with VL=1.
12092 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12093 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12094 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12095 return SDValue();
12097 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12098 return SDValue();
12100 // Check the scalar of ScalarV is neutral element
12101 // TODO: Deal with value other than neutral element.
12102 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12104 return SDValue();
12106 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12107 // FIXME: We might be able to improve this if operand 0 is undef.
12108 if (!isNonZeroAVL(Reduce.getOperand(5)))
12109 return SDValue();
12111 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12113 SDLoc DL(N);
12114 SDValue NewScalarV =
12115 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12116 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12118 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12119 if (ScalarVT != ScalarV.getValueType())
12120 NewScalarV =
12121 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12122 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
12124 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12125 NewScalarV, Reduce.getOperand(3),
12126 Reduce.getOperand(4), Reduce.getOperand(5)};
12127 SDValue NewReduce =
12128 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12129 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12130 Extract.getOperand(1));
12133 // Optimize (add (shl x, c0), (shl y, c1)) ->
12134 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12135 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
12136 const RISCVSubtarget &Subtarget) {
12137 // Perform this optimization only in the zba extension.
12138 if (!Subtarget.hasStdExtZba())
12139 return SDValue();
12141 // Skip for vector types and larger types.
12142 EVT VT = N->getValueType(0);
12143 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12144 return SDValue();
12146 // The two operand nodes must be SHL and have no other use.
12147 SDValue N0 = N->getOperand(0);
12148 SDValue N1 = N->getOperand(1);
12149 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12150 !N0->hasOneUse() || !N1->hasOneUse())
12151 return SDValue();
12153 // Check c0 and c1.
12154 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12155 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12156 if (!N0C || !N1C)
12157 return SDValue();
12158 int64_t C0 = N0C->getSExtValue();
12159 int64_t C1 = N1C->getSExtValue();
12160 if (C0 <= 0 || C1 <= 0)
12161 return SDValue();
12163 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12164 int64_t Bits = std::min(C0, C1);
12165 int64_t Diff = std::abs(C0 - C1);
12166 if (Diff != 1 && Diff != 2 && Diff != 3)
12167 return SDValue();
12169 // Build nodes.
12170 SDLoc DL(N);
12171 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12172 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12173 SDValue NA0 =
12174 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12175 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12176 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12179 // Combine a constant select operand into its use:
12181 // (and (select cond, -1, c), x)
12182 // -> (select cond, x, (and x, c)) [AllOnes=1]
12183 // (or (select cond, 0, c), x)
12184 // -> (select cond, x, (or x, c)) [AllOnes=0]
12185 // (xor (select cond, 0, c), x)
12186 // -> (select cond, x, (xor x, c)) [AllOnes=0]
12187 // (add (select cond, 0, c), x)
12188 // -> (select cond, x, (add x, c)) [AllOnes=0]
12189 // (sub x, (select cond, 0, c))
12190 // -> (select cond, x, (sub x, c)) [AllOnes=0]
12191 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
12192 SelectionDAG &DAG, bool AllOnes,
12193 const RISCVSubtarget &Subtarget) {
12194 EVT VT = N->getValueType(0);
12196 // Skip vectors.
12197 if (VT.isVector())
12198 return SDValue();
12200 if (!Subtarget.hasShortForwardBranchOpt()) {
12201 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12202 if ((!Subtarget.hasStdExtZicond() &&
12203 !Subtarget.hasVendorXVentanaCondOps()) ||
12204 N->getOpcode() != ISD::AND)
12205 return SDValue();
12207 // Maybe harmful when condition code has multiple use.
12208 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12209 return SDValue();
12211 // Maybe harmful when VT is wider than XLen.
12212 if (VT.getSizeInBits() > Subtarget.getXLen())
12213 return SDValue();
12216 if ((Slct.getOpcode() != ISD::SELECT &&
12217 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12218 !Slct.hasOneUse())
12219 return SDValue();
12221 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12222 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
12225 bool SwapSelectOps;
12226 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12227 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12228 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12229 SDValue NonConstantVal;
12230 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12231 SwapSelectOps = false;
12232 NonConstantVal = FalseVal;
12233 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12234 SwapSelectOps = true;
12235 NonConstantVal = TrueVal;
12236 } else
12237 return SDValue();
12239 // Slct is now know to be the desired identity constant when CC is true.
12240 TrueVal = OtherOp;
12241 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12242 // Unless SwapSelectOps says the condition should be false.
12243 if (SwapSelectOps)
12244 std::swap(TrueVal, FalseVal);
12246 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12247 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12248 {Slct.getOperand(0), Slct.getOperand(1),
12249 Slct.getOperand(2), TrueVal, FalseVal});
12251 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12252 {Slct.getOperand(0), TrueVal, FalseVal});
12255 // Attempt combineSelectAndUse on each operand of a commutative operator N.
12256 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
12257 bool AllOnes,
12258 const RISCVSubtarget &Subtarget) {
12259 SDValue N0 = N->getOperand(0);
12260 SDValue N1 = N->getOperand(1);
12261 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12262 return Result;
12263 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12264 return Result;
12265 return SDValue();
12268 // Transform (add (mul x, c0), c1) ->
12269 // (add (mul (add x, c1/c0), c0), c1%c0).
12270 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12271 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
12272 // to an infinite loop in DAGCombine if transformed.
12273 // Or transform (add (mul x, c0), c1) ->
12274 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12275 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12276 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12277 // lead to an infinite loop in DAGCombine if transformed.
12278 // Or transform (add (mul x, c0), c1) ->
12279 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12280 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12281 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12282 // lead to an infinite loop in DAGCombine if transformed.
12283 // Or transform (add (mul x, c0), c1) ->
12284 // (mul (add x, c1/c0), c0).
12285 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12286 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
12287 const RISCVSubtarget &Subtarget) {
12288 // Skip for vector types and larger types.
12289 EVT VT = N->getValueType(0);
12290 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12291 return SDValue();
12292 // The first operand node must be a MUL and has no other use.
12293 SDValue N0 = N->getOperand(0);
12294 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12295 return SDValue();
12296 // Check if c0 and c1 match above conditions.
12297 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12298 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12299 if (!N0C || !N1C)
12300 return SDValue();
12301 // If N0C has multiple uses it's possible one of the cases in
12302 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12303 // in an infinite loop.
12304 if (!N0C->hasOneUse())
12305 return SDValue();
12306 int64_t C0 = N0C->getSExtValue();
12307 int64_t C1 = N1C->getSExtValue();
12308 int64_t CA, CB;
12309 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12310 return SDValue();
12311 // Search for proper CA (non-zero) and CB that both are simm12.
12312 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12313 !isInt<12>(C0 * (C1 / C0))) {
12314 CA = C1 / C0;
12315 CB = C1 % C0;
12316 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12317 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12318 CA = C1 / C0 + 1;
12319 CB = C1 % C0 - C0;
12320 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12321 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12322 CA = C1 / C0 - 1;
12323 CB = C1 % C0 + C0;
12324 } else
12325 return SDValue();
12326 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12327 SDLoc DL(N);
12328 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
12329 DAG.getConstant(CA, DL, VT));
12330 SDValue New1 =
12331 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
12332 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
12335 // Try to turn (add (xor bool, 1) -1) into (neg bool).
12336 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
12337 SDValue N0 = N->getOperand(0);
12338 SDValue N1 = N->getOperand(1);
12339 EVT VT = N->getValueType(0);
12340 SDLoc DL(N);
12342 // RHS should be -1.
12343 if (!isAllOnesConstant(N1))
12344 return SDValue();
12346 // Look for (xor X, 1).
12347 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
12348 return SDValue();
12350 // First xor input should be 0 or 1.
12351 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
12352 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
12353 return SDValue();
12355 // Emit a negate of the setcc.
12356 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
12357 N0.getOperand(0));
12360 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
12361 const RISCVSubtarget &Subtarget) {
12362 if (SDValue V = combineAddOfBooleanXor(N, DAG))
12363 return V;
12364 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
12365 return V;
12366 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
12367 return V;
12368 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12369 return V;
12370 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12371 return V;
12373 // fold (add (select lhs, rhs, cc, 0, y), x) ->
12374 // (select lhs, rhs, cc, x, (add x, y))
12375 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12378 // Try to turn a sub boolean RHS and constant LHS into an addi.
12379 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
12380 SDValue N0 = N->getOperand(0);
12381 SDValue N1 = N->getOperand(1);
12382 EVT VT = N->getValueType(0);
12383 SDLoc DL(N);
12385 // Require a constant LHS.
12386 auto *N0C = dyn_cast<ConstantSDNode>(N0);
12387 if (!N0C)
12388 return SDValue();
12390 // All our optimizations involve subtracting 1 from the immediate and forming
12391 // an ADDI. Make sure the new immediate is valid for an ADDI.
12392 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
12393 if (!ImmValMinus1.isSignedIntN(12))
12394 return SDValue();
12396 SDValue NewLHS;
12397 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
12398 // (sub constant, (setcc x, y, eq/neq)) ->
12399 // (add (setcc x, y, neq/eq), constant - 1)
12400 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
12401 EVT SetCCOpVT = N1.getOperand(0).getValueType();
12402 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
12403 return SDValue();
12404 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
12405 NewLHS =
12406 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
12407 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
12408 N1.getOperand(0).getOpcode() == ISD::SETCC) {
12409 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
12410 // Since setcc returns a bool the xor is equivalent to 1-setcc.
12411 NewLHS = N1.getOperand(0);
12412 } else
12413 return SDValue();
12415 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
12416 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
12419 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
12420 const RISCVSubtarget &Subtarget) {
12421 if (SDValue V = combineSubOfBoolean(N, DAG))
12422 return V;
12424 SDValue N0 = N->getOperand(0);
12425 SDValue N1 = N->getOperand(1);
12426 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
12427 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
12428 isNullConstant(N1.getOperand(1))) {
12429 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
12430 if (CCVal == ISD::SETLT) {
12431 EVT VT = N->getValueType(0);
12432 SDLoc DL(N);
12433 unsigned ShAmt = N0.getValueSizeInBits() - 1;
12434 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
12435 DAG.getConstant(ShAmt, DL, VT));
12439 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
12440 // (select lhs, rhs, cc, x, (sub x, y))
12441 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
12444 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
12445 // Legalizing setcc can introduce xors like this. Doing this transform reduces
12446 // the number of xors and may allow the xor to fold into a branch condition.
12447 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
12448 SDValue N0 = N->getOperand(0);
12449 SDValue N1 = N->getOperand(1);
12450 bool IsAnd = N->getOpcode() == ISD::AND;
12452 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
12453 return SDValue();
12455 if (!N0.hasOneUse() || !N1.hasOneUse())
12456 return SDValue();
12458 SDValue N01 = N0.getOperand(1);
12459 SDValue N11 = N1.getOperand(1);
12461 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
12462 // (xor X, -1) based on the upper bits of the other operand being 0. If the
12463 // operation is And, allow one of the Xors to use -1.
12464 if (isOneConstant(N01)) {
12465 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
12466 return SDValue();
12467 } else if (isOneConstant(N11)) {
12468 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
12469 if (!(IsAnd && isAllOnesConstant(N01)))
12470 return SDValue();
12471 } else
12472 return SDValue();
12474 EVT VT = N->getValueType(0);
12476 SDValue N00 = N0.getOperand(0);
12477 SDValue N10 = N1.getOperand(0);
12479 // The LHS of the xors needs to be 0/1.
12480 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
12481 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
12482 return SDValue();
12484 // Invert the opcode and insert a new xor.
12485 SDLoc DL(N);
12486 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
12487 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
12488 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
12491 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
12492 const RISCVSubtarget &Subtarget) {
12493 SDValue N0 = N->getOperand(0);
12494 EVT VT = N->getValueType(0);
12496 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
12497 // extending X. This is safe since we only need the LSB after the shift and
12498 // shift amounts larger than 31 would produce poison. If we wait until
12499 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12500 // to use a BEXT instruction.
12501 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
12502 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
12503 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
12504 SDLoc DL(N0);
12505 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12506 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12507 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
12508 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
12511 return SDValue();
12514 // Combines two comparison operation and logic operation to one selection
12515 // operation(min, max) and logic operation. Returns new constructed Node if
12516 // conditions for optimization are satisfied.
12517 static SDValue performANDCombine(SDNode *N,
12518 TargetLowering::DAGCombinerInfo &DCI,
12519 const RISCVSubtarget &Subtarget) {
12520 SelectionDAG &DAG = DCI.DAG;
12522 SDValue N0 = N->getOperand(0);
12523 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
12524 // extending X. This is safe since we only need the LSB after the shift and
12525 // shift amounts larger than 31 would produce poison. If we wait until
12526 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12527 // to use a BEXT instruction.
12528 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
12529 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
12530 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
12531 N0.hasOneUse()) {
12532 SDLoc DL(N);
12533 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12534 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12535 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
12536 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
12537 DAG.getConstant(1, DL, MVT::i64));
12538 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
12541 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12542 return V;
12543 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12544 return V;
12546 if (DCI.isAfterLegalizeDAG())
12547 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
12548 return V;
12550 // fold (and (select lhs, rhs, cc, -1, y), x) ->
12551 // (select lhs, rhs, cc, x, (and x, y))
12552 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
12555 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
12556 // FIXME: Generalize to other binary operators with same operand.
12557 static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
12558 SelectionDAG &DAG) {
12559 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
12561 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
12562 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
12563 !N0.hasOneUse() || !N1.hasOneUse())
12564 return SDValue();
12566 // Should have the same condition.
12567 SDValue Cond = N0.getOperand(1);
12568 if (Cond != N1.getOperand(1))
12569 return SDValue();
12571 SDValue TrueV = N0.getOperand(0);
12572 SDValue FalseV = N1.getOperand(0);
12574 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
12575 TrueV.getOperand(1) != FalseV.getOperand(1) ||
12576 !isOneConstant(TrueV.getOperand(1)) ||
12577 !TrueV.hasOneUse() || !FalseV.hasOneUse())
12578 return SDValue();
12580 EVT VT = N->getValueType(0);
12581 SDLoc DL(N);
12583 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
12584 Cond);
12585 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
12586 Cond);
12587 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
12588 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
12591 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
12592 const RISCVSubtarget &Subtarget) {
12593 SelectionDAG &DAG = DCI.DAG;
12595 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12596 return V;
12597 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12598 return V;
12600 if (DCI.isAfterLegalizeDAG())
12601 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
12602 return V;
12604 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
12605 // We may be able to pull a common operation out of the true and false value.
12606 SDValue N0 = N->getOperand(0);
12607 SDValue N1 = N->getOperand(1);
12608 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
12609 return V;
12610 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
12611 return V;
12613 // fold (or (select cond, 0, y), x) ->
12614 // (select cond, x, (or x, y))
12615 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12618 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
12619 const RISCVSubtarget &Subtarget) {
12620 SDValue N0 = N->getOperand(0);
12621 SDValue N1 = N->getOperand(1);
12623 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
12624 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
12625 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
12626 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
12627 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
12628 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
12629 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
12630 SDLoc DL(N);
12631 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12632 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12633 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
12634 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
12635 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
12638 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
12639 // NOTE: Assumes ROL being legal means ROLW is legal.
12640 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12641 if (N0.getOpcode() == RISCVISD::SLLW &&
12642 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
12643 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
12644 SDLoc DL(N);
12645 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
12646 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
12649 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
12650 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
12651 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
12652 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12653 if (ConstN00 && CC == ISD::SETLT) {
12654 EVT VT = N0.getValueType();
12655 SDLoc DL(N0);
12656 const APInt &Imm = ConstN00->getAPIntValue();
12657 if ((Imm + 1).isSignedIntN(12))
12658 return DAG.getSetCC(DL, VT, N0.getOperand(1),
12659 DAG.getConstant(Imm + 1, DL, VT), CC);
12663 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12664 return V;
12665 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12666 return V;
12668 // fold (xor (select cond, 0, y), x) ->
12669 // (select cond, x, (xor x, y))
12670 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12673 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
12674 EVT VT = N->getValueType(0);
12675 if (!VT.isVector())
12676 return SDValue();
12678 SDLoc DL(N);
12679 SDValue N0 = N->getOperand(0);
12680 SDValue N1 = N->getOperand(1);
12681 SDValue MulOper;
12682 unsigned AddSubOpc;
12684 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
12685 // (mul x, add (y, 1)) -> (add x, (mul x, y))
12686 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
12687 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
12688 auto IsAddSubWith1 = [&](SDValue V) -> bool {
12689 AddSubOpc = V->getOpcode();
12690 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
12691 SDValue Opnd = V->getOperand(1);
12692 MulOper = V->getOperand(0);
12693 if (AddSubOpc == ISD::SUB)
12694 std::swap(Opnd, MulOper);
12695 if (isOneOrOneSplat(Opnd))
12696 return true;
12698 return false;
12701 if (IsAddSubWith1(N0)) {
12702 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
12703 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
12706 if (IsAddSubWith1(N1)) {
12707 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
12708 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
12711 return SDValue();
12714 /// According to the property that indexed load/store instructions zero-extend
12715 /// their indices, try to narrow the type of index operand.
12716 static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
12717 if (isIndexTypeSigned(IndexType))
12718 return false;
12720 if (!N->hasOneUse())
12721 return false;
12723 EVT VT = N.getValueType();
12724 SDLoc DL(N);
12726 // In general, what we're doing here is seeing if we can sink a truncate to
12727 // a smaller element type into the expression tree building our index.
12728 // TODO: We can generalize this and handle a bunch more cases if useful.
12730 // Narrow a buildvector to the narrowest element type. This requires less
12731 // work and less register pressure at high LMUL, and creates smaller constants
12732 // which may be cheaper to materialize.
12733 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
12734 KnownBits Known = DAG.computeKnownBits(N);
12735 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
12736 LLVMContext &C = *DAG.getContext();
12737 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
12738 if (ResultVT.bitsLT(VT.getVectorElementType())) {
12739 N = DAG.getNode(ISD::TRUNCATE, DL,
12740 VT.changeVectorElementType(ResultVT), N);
12741 return true;
12745 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
12746 if (N.getOpcode() != ISD::SHL)
12747 return false;
12749 SDValue N0 = N.getOperand(0);
12750 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
12751 N0.getOpcode() != RISCVISD::VZEXT_VL)
12752 return false;;
12753 if (!N0->hasOneUse())
12754 return false;;
12756 APInt ShAmt;
12757 SDValue N1 = N.getOperand(1);
12758 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
12759 return false;;
12761 SDValue Src = N0.getOperand(0);
12762 EVT SrcVT = Src.getValueType();
12763 unsigned SrcElen = SrcVT.getScalarSizeInBits();
12764 unsigned ShAmtV = ShAmt.getZExtValue();
12765 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
12766 NewElen = std::max(NewElen, 8U);
12768 // Skip if NewElen is not narrower than the original extended type.
12769 if (NewElen >= N0.getValueType().getScalarSizeInBits())
12770 return false;
12772 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
12773 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
12775 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
12776 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
12777 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
12778 return true;
12781 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
12782 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
12783 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
12784 // can become a sext.w instead of a shift pair.
12785 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
12786 const RISCVSubtarget &Subtarget) {
12787 SDValue N0 = N->getOperand(0);
12788 SDValue N1 = N->getOperand(1);
12789 EVT VT = N->getValueType(0);
12790 EVT OpVT = N0.getValueType();
12792 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
12793 return SDValue();
12795 // RHS needs to be a constant.
12796 auto *N1C = dyn_cast<ConstantSDNode>(N1);
12797 if (!N1C)
12798 return SDValue();
12800 // LHS needs to be (and X, 0xffffffff).
12801 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
12802 !isa<ConstantSDNode>(N0.getOperand(1)) ||
12803 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
12804 return SDValue();
12806 // Looking for an equality compare.
12807 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12808 if (!isIntEqualitySetCC(Cond))
12809 return SDValue();
12811 // Don't do this if the sign bit is provably zero, it will be turned back into
12812 // an AND.
12813 APInt SignMask = APInt::getOneBitSet(64, 31);
12814 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
12815 return SDValue();
12817 const APInt &C1 = N1C->getAPIntValue();
12819 SDLoc dl(N);
12820 // If the constant is larger than 2^32 - 1 it is impossible for both sides
12821 // to be equal.
12822 if (C1.getActiveBits() > 32)
12823 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
12825 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
12826 N0.getOperand(0), DAG.getValueType(MVT::i32));
12827 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
12828 dl, OpVT), Cond);
12831 static SDValue
12832 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
12833 const RISCVSubtarget &Subtarget) {
12834 SDValue Src = N->getOperand(0);
12835 EVT VT = N->getValueType(0);
12837 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
12838 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
12839 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
12840 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
12841 Src.getOperand(0));
12843 return SDValue();
12846 namespace {
12847 // Forward declaration of the structure holding the necessary information to
12848 // apply a combine.
12849 struct CombineResult;
12851 /// Helper class for folding sign/zero extensions.
12852 /// In particular, this class is used for the following combines:
12853 /// add_vl -> vwadd(u) | vwadd(u)_w
12854 /// sub_vl -> vwsub(u) | vwsub(u)_w
12855 /// mul_vl -> vwmul(u) | vwmul_su
12857 /// An object of this class represents an operand of the operation we want to
12858 /// combine.
12859 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
12860 /// NodeExtensionHelper for `a` and one for `b`.
12862 /// This class abstracts away how the extension is materialized and
12863 /// how its Mask, VL, number of users affect the combines.
12865 /// In particular:
12866 /// - VWADD_W is conceptually == add(op0, sext(op1))
12867 /// - VWADDU_W == add(op0, zext(op1))
12868 /// - VWSUB_W == sub(op0, sext(op1))
12869 /// - VWSUBU_W == sub(op0, zext(op1))
12871 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
12872 /// zext|sext(smaller_value).
12873 struct NodeExtensionHelper {
12874 /// Records if this operand is like being zero extended.
12875 bool SupportsZExt;
12876 /// Records if this operand is like being sign extended.
12877 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
12878 /// instance, a splat constant (e.g., 3), would support being both sign and
12879 /// zero extended.
12880 bool SupportsSExt;
12881 /// This boolean captures whether we care if this operand would still be
12882 /// around after the folding happens.
12883 bool EnforceOneUse;
12884 /// Records if this operand's mask needs to match the mask of the operation
12885 /// that it will fold into.
12886 bool CheckMask;
12887 /// Value of the Mask for this operand.
12888 /// It may be SDValue().
12889 SDValue Mask;
12890 /// Value of the vector length operand.
12891 /// It may be SDValue().
12892 SDValue VL;
12893 /// Original value that this NodeExtensionHelper represents.
12894 SDValue OrigOperand;
12896 /// Get the value feeding the extension or the value itself.
12897 /// E.g., for zext(a), this would return a.
12898 SDValue getSource() const {
12899 switch (OrigOperand.getOpcode()) {
12900 case RISCVISD::VSEXT_VL:
12901 case RISCVISD::VZEXT_VL:
12902 return OrigOperand.getOperand(0);
12903 default:
12904 return OrigOperand;
12908 /// Check if this instance represents a splat.
12909 bool isSplat() const {
12910 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
12913 /// Get or create a value that can feed \p Root with the given extension \p
12914 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
12915 /// \see ::getSource().
12916 SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
12917 std::optional<bool> SExt) const {
12918 if (!SExt.has_value())
12919 return OrigOperand;
12921 MVT NarrowVT = getNarrowType(Root);
12923 SDValue Source = getSource();
12924 if (Source.getValueType() == NarrowVT)
12925 return Source;
12927 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
12929 // If we need an extension, we should be changing the type.
12930 SDLoc DL(Root);
12931 auto [Mask, VL] = getMaskAndVL(Root);
12932 switch (OrigOperand.getOpcode()) {
12933 case RISCVISD::VSEXT_VL:
12934 case RISCVISD::VZEXT_VL:
12935 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
12936 case RISCVISD::VMV_V_X_VL:
12937 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
12938 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
12939 default:
12940 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
12941 // and that operand should already have the right NarrowVT so no
12942 // extension should be required at this point.
12943 llvm_unreachable("Unsupported opcode");
12947 /// Helper function to get the narrow type for \p Root.
12948 /// The narrow type is the type of \p Root where we divided the size of each
12949 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
12950 /// \pre The size of the type of the elements of Root must be a multiple of 2
12951 /// and be greater than 16.
12952 static MVT getNarrowType(const SDNode *Root) {
12953 MVT VT = Root->getSimpleValueType(0);
12955 // Determine the narrow size.
12956 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
12957 assert(NarrowSize >= 8 && "Trying to extend something we can't represent");
12958 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
12959 VT.getVectorElementCount());
12960 return NarrowVT;
12963 /// Return the opcode required to materialize the folding of the sign
12964 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
12965 /// both operands for \p Opcode.
12966 /// Put differently, get the opcode to materialize:
12967 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
12968 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
12969 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
12970 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
12971 switch (Opcode) {
12972 case RISCVISD::ADD_VL:
12973 case RISCVISD::VWADD_W_VL:
12974 case RISCVISD::VWADDU_W_VL:
12975 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
12976 case RISCVISD::MUL_VL:
12977 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
12978 case RISCVISD::SUB_VL:
12979 case RISCVISD::VWSUB_W_VL:
12980 case RISCVISD::VWSUBU_W_VL:
12981 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL;
12982 default:
12983 llvm_unreachable("Unexpected opcode");
12987 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
12988 /// newOpcode(a, b).
12989 static unsigned getSUOpcode(unsigned Opcode) {
12990 assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
12991 return RISCVISD::VWMULSU_VL;
12994 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
12995 /// newOpcode(a, b).
12996 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
12997 switch (Opcode) {
12998 case RISCVISD::ADD_VL:
12999 return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
13000 case RISCVISD::SUB_VL:
13001 return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
13002 default:
13003 llvm_unreachable("Unexpected opcode");
13007 using CombineToTry = std::function<std::optional<CombineResult>(
13008 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
13009 const NodeExtensionHelper & /*RHS*/)>;
13011 /// Check if this node needs to be fully folded or extended for all users.
13012 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13014 /// Helper method to set the various fields of this struct based on the
13015 /// type of \p Root.
13016 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
13017 SupportsZExt = false;
13018 SupportsSExt = false;
13019 EnforceOneUse = true;
13020 CheckMask = true;
13021 switch (OrigOperand.getOpcode()) {
13022 case RISCVISD::VZEXT_VL:
13023 SupportsZExt = true;
13024 Mask = OrigOperand.getOperand(1);
13025 VL = OrigOperand.getOperand(2);
13026 break;
13027 case RISCVISD::VSEXT_VL:
13028 SupportsSExt = true;
13029 Mask = OrigOperand.getOperand(1);
13030 VL = OrigOperand.getOperand(2);
13031 break;
13032 case RISCVISD::VMV_V_X_VL: {
13033 // Historically, we didn't care about splat values not disappearing during
13034 // combines.
13035 EnforceOneUse = false;
13036 CheckMask = false;
13037 VL = OrigOperand.getOperand(2);
13039 // The operand is a splat of a scalar.
13041 // The pasthru must be undef for tail agnostic.
13042 if (!OrigOperand.getOperand(0).isUndef())
13043 break;
13045 // Get the scalar value.
13046 SDValue Op = OrigOperand.getOperand(1);
13048 // See if we have enough sign bits or zero bits in the scalar to use a
13049 // widening opcode by splatting to smaller element size.
13050 MVT VT = Root->getSimpleValueType(0);
13051 unsigned EltBits = VT.getScalarSizeInBits();
13052 unsigned ScalarBits = Op.getValueSizeInBits();
13053 // Make sure we're getting all element bits from the scalar register.
13054 // FIXME: Support implicit sign extension of vmv.v.x?
13055 if (ScalarBits < EltBits)
13056 break;
13058 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13059 // If the narrow type cannot be expressed with a legal VMV,
13060 // this is not a valid candidate.
13061 if (NarrowSize < 8)
13062 break;
13064 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13065 SupportsSExt = true;
13066 if (DAG.MaskedValueIsZero(Op,
13067 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13068 SupportsZExt = true;
13069 break;
13071 default:
13072 break;
13076 /// Check if \p Root supports any extension folding combines.
13077 static bool isSupportedRoot(const SDNode *Root) {
13078 switch (Root->getOpcode()) {
13079 case RISCVISD::ADD_VL:
13080 case RISCVISD::MUL_VL:
13081 case RISCVISD::VWADD_W_VL:
13082 case RISCVISD::VWADDU_W_VL:
13083 case RISCVISD::SUB_VL:
13084 case RISCVISD::VWSUB_W_VL:
13085 case RISCVISD::VWSUBU_W_VL:
13086 return true;
13087 default:
13088 return false;
13092 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
13093 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
13094 assert(isSupportedRoot(Root) && "Trying to build an helper with an "
13095 "unsupported root");
13096 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
13097 OrigOperand = Root->getOperand(OperandIdx);
13099 unsigned Opc = Root->getOpcode();
13100 switch (Opc) {
13101 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
13102 // <ADD|SUB>(LHS, S|ZEXT(RHS))
13103 case RISCVISD::VWADD_W_VL:
13104 case RISCVISD::VWADDU_W_VL:
13105 case RISCVISD::VWSUB_W_VL:
13106 case RISCVISD::VWSUBU_W_VL:
13107 if (OperandIdx == 1) {
13108 SupportsZExt =
13109 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
13110 SupportsSExt = !SupportsZExt;
13111 std::tie(Mask, VL) = getMaskAndVL(Root);
13112 CheckMask = true;
13113 // There's no existing extension here, so we don't have to worry about
13114 // making sure it gets removed.
13115 EnforceOneUse = false;
13116 break;
13118 [[fallthrough]];
13119 default:
13120 fillUpExtensionSupport(Root, DAG);
13121 break;
13125 /// Check if this operand is compatible with the given vector length \p VL.
13126 bool isVLCompatible(SDValue VL) const {
13127 return this->VL != SDValue() && this->VL == VL;
13130 /// Check if this operand is compatible with the given \p Mask.
13131 bool isMaskCompatible(SDValue Mask) const {
13132 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask);
13135 /// Helper function to get the Mask and VL from \p Root.
13136 static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
13137 assert(isSupportedRoot(Root) && "Unexpected root");
13138 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
13141 /// Check if the Mask and VL of this operand are compatible with \p Root.
13142 bool areVLAndMaskCompatible(const SDNode *Root) const {
13143 auto [Mask, VL] = getMaskAndVL(Root);
13144 return isMaskCompatible(Mask) && isVLCompatible(VL);
13147 /// Helper function to check if \p N is commutative with respect to the
13148 /// foldings that are supported by this class.
13149 static bool isCommutative(const SDNode *N) {
13150 switch (N->getOpcode()) {
13151 case RISCVISD::ADD_VL:
13152 case RISCVISD::MUL_VL:
13153 case RISCVISD::VWADD_W_VL:
13154 case RISCVISD::VWADDU_W_VL:
13155 return true;
13156 case RISCVISD::SUB_VL:
13157 case RISCVISD::VWSUB_W_VL:
13158 case RISCVISD::VWSUBU_W_VL:
13159 return false;
13160 default:
13161 llvm_unreachable("Unexpected opcode");
13165 /// Get a list of combine to try for folding extensions in \p Root.
13166 /// Note that each returned CombineToTry function doesn't actually modify
13167 /// anything. Instead they produce an optional CombineResult that if not None,
13168 /// need to be materialized for the combine to be applied.
13169 /// \see CombineResult::materialize.
13170 /// If the related CombineToTry function returns std::nullopt, that means the
13171 /// combine didn't match.
13172 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
13175 /// Helper structure that holds all the necessary information to materialize a
13176 /// combine that does some extension folding.
13177 struct CombineResult {
13178 /// Opcode to be generated when materializing the combine.
13179 unsigned TargetOpcode;
13180 // No value means no extension is needed. If extension is needed, the value
13181 // indicates if it needs to be sign extended.
13182 std::optional<bool> SExtLHS;
13183 std::optional<bool> SExtRHS;
13184 /// Root of the combine.
13185 SDNode *Root;
13186 /// LHS of the TargetOpcode.
13187 NodeExtensionHelper LHS;
13188 /// RHS of the TargetOpcode.
13189 NodeExtensionHelper RHS;
13191 CombineResult(unsigned TargetOpcode, SDNode *Root,
13192 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS,
13193 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS)
13194 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS),
13195 Root(Root), LHS(LHS), RHS(RHS) {}
13197 /// Return a value that uses TargetOpcode and that can be used to replace
13198 /// Root.
13199 /// The actual replacement is *not* done in that method.
13200 SDValue materialize(SelectionDAG &DAG) const {
13201 SDValue Mask, VL, Merge;
13202 std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
13203 Merge = Root->getOperand(2);
13204 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
13205 LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
13206 RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
13207 Mask, VL);
13211 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
13212 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
13213 /// are zext) and LHS and RHS can be folded into Root.
13214 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
13216 /// \note If the pattern can match with both zext and sext, the returned
13217 /// CombineResult will feature the zext result.
13219 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13220 /// can be used to apply the pattern.
13221 static std::optional<CombineResult>
13222 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
13223 const NodeExtensionHelper &RHS, bool AllowSExt,
13224 bool AllowZExt) {
13225 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
13226 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
13227 return std::nullopt;
13228 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
13229 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
13230 Root->getOpcode(), /*IsSExt=*/false),
13231 Root, LHS, /*SExtLHS=*/false, RHS,
13232 /*SExtRHS=*/false);
13233 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
13234 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
13235 Root->getOpcode(), /*IsSExt=*/true),
13236 Root, LHS, /*SExtLHS=*/true, RHS,
13237 /*SExtRHS=*/true);
13238 return std::nullopt;
13241 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
13242 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
13243 /// are zext) and LHS and RHS can be folded into Root.
13245 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13246 /// can be used to apply the pattern.
13247 static std::optional<CombineResult>
13248 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
13249 const NodeExtensionHelper &RHS) {
13250 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
13251 /*AllowZExt=*/true);
13254 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
13256 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13257 /// can be used to apply the pattern.
13258 static std::optional<CombineResult>
13259 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
13260 const NodeExtensionHelper &RHS) {
13261 if (!RHS.areVLAndMaskCompatible(Root))
13262 return std::nullopt;
13264 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
13265 // sext/zext?
13266 // Control this behavior behind an option (AllowSplatInVW_W) for testing
13267 // purposes.
13268 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
13269 return CombineResult(
13270 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false),
13271 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false);
13272 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
13273 return CombineResult(
13274 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true),
13275 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true);
13276 return std::nullopt;
13279 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
13281 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13282 /// can be used to apply the pattern.
13283 static std::optional<CombineResult>
13284 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
13285 const NodeExtensionHelper &RHS) {
13286 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
13287 /*AllowZExt=*/false);
13290 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
13292 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13293 /// can be used to apply the pattern.
13294 static std::optional<CombineResult>
13295 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
13296 const NodeExtensionHelper &RHS) {
13297 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
13298 /*AllowZExt=*/true);
13301 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
13303 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
13304 /// can be used to apply the pattern.
13305 static std::optional<CombineResult>
13306 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
13307 const NodeExtensionHelper &RHS) {
13308 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
13309 return std::nullopt;
13310 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
13311 return std::nullopt;
13312 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
13313 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
13316 SmallVector<NodeExtensionHelper::CombineToTry>
13317 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
13318 SmallVector<CombineToTry> Strategies;
13319 switch (Root->getOpcode()) {
13320 case RISCVISD::ADD_VL:
13321 case RISCVISD::SUB_VL:
13322 // add|sub -> vwadd(u)|vwsub(u)
13323 Strategies.push_back(canFoldToVWWithSameExtension);
13324 // add|sub -> vwadd(u)_w|vwsub(u)_w
13325 Strategies.push_back(canFoldToVW_W);
13326 break;
13327 case RISCVISD::MUL_VL:
13328 // mul -> vwmul(u)
13329 Strategies.push_back(canFoldToVWWithSameExtension);
13330 // mul -> vwmulsu
13331 Strategies.push_back(canFoldToVW_SU);
13332 break;
13333 case RISCVISD::VWADD_W_VL:
13334 case RISCVISD::VWSUB_W_VL:
13335 // vwadd_w|vwsub_w -> vwadd|vwsub
13336 Strategies.push_back(canFoldToVWWithSEXT);
13337 break;
13338 case RISCVISD::VWADDU_W_VL:
13339 case RISCVISD::VWSUBU_W_VL:
13340 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
13341 Strategies.push_back(canFoldToVWWithZEXT);
13342 break;
13343 default:
13344 llvm_unreachable("Unexpected opcode");
13346 return Strategies;
13348 } // End anonymous namespace.
13350 /// Combine a binary operation to its equivalent VW or VW_W form.
13351 /// The supported combines are:
13352 /// add_vl -> vwadd(u) | vwadd(u)_w
13353 /// sub_vl -> vwsub(u) | vwsub(u)_w
13354 /// mul_vl -> vwmul(u) | vwmul_su
13355 /// vwadd_w(u) -> vwadd(u)
13356 /// vwub_w(u) -> vwadd(u)
13357 static SDValue
13358 combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
13359 SelectionDAG &DAG = DCI.DAG;
13361 assert(NodeExtensionHelper::isSupportedRoot(N) &&
13362 "Shouldn't have called this method");
13363 SmallVector<SDNode *> Worklist;
13364 SmallSet<SDNode *, 8> Inserted;
13365 Worklist.push_back(N);
13366 Inserted.insert(N);
13367 SmallVector<CombineResult> CombinesToApply;
13369 while (!Worklist.empty()) {
13370 SDNode *Root = Worklist.pop_back_val();
13371 if (!NodeExtensionHelper::isSupportedRoot(Root))
13372 return SDValue();
13374 NodeExtensionHelper LHS(N, 0, DAG);
13375 NodeExtensionHelper RHS(N, 1, DAG);
13376 auto AppendUsersIfNeeded = [&Worklist,
13377 &Inserted](const NodeExtensionHelper &Op) {
13378 if (Op.needToPromoteOtherUsers()) {
13379 for (SDNode *TheUse : Op.OrigOperand->uses()) {
13380 if (Inserted.insert(TheUse).second)
13381 Worklist.push_back(TheUse);
13386 // Control the compile time by limiting the number of node we look at in
13387 // total.
13388 if (Inserted.size() > ExtensionMaxWebSize)
13389 return SDValue();
13391 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
13392 NodeExtensionHelper::getSupportedFoldings(N);
13394 assert(!FoldingStrategies.empty() && "Nothing to be folded");
13395 bool Matched = false;
13396 for (int Attempt = 0;
13397 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
13398 ++Attempt) {
13400 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
13401 FoldingStrategies) {
13402 std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
13403 if (Res) {
13404 Matched = true;
13405 CombinesToApply.push_back(*Res);
13406 // All the inputs that are extended need to be folded, otherwise
13407 // we would be leaving the old input (since it is may still be used),
13408 // and the new one.
13409 if (Res->SExtLHS.has_value())
13410 AppendUsersIfNeeded(LHS);
13411 if (Res->SExtRHS.has_value())
13412 AppendUsersIfNeeded(RHS);
13413 break;
13416 std::swap(LHS, RHS);
13418 // Right now we do an all or nothing approach.
13419 if (!Matched)
13420 return SDValue();
13422 // Store the value for the replacement of the input node separately.
13423 SDValue InputRootReplacement;
13424 // We do the RAUW after we materialize all the combines, because some replaced
13425 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
13426 // some of these nodes may appear in the NodeExtensionHelpers of some of the
13427 // yet-to-be-visited CombinesToApply roots.
13428 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
13429 ValuesToReplace.reserve(CombinesToApply.size());
13430 for (CombineResult Res : CombinesToApply) {
13431 SDValue NewValue = Res.materialize(DAG);
13432 if (!InputRootReplacement) {
13433 assert(Res.Root == N &&
13434 "First element is expected to be the current node");
13435 InputRootReplacement = NewValue;
13436 } else {
13437 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
13440 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
13441 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
13442 DCI.AddToWorklist(OldNewValues.second.getNode());
13444 return InputRootReplacement;
13447 // Helper function for performMemPairCombine.
13448 // Try to combine the memory loads/stores LSNode1 and LSNode2
13449 // into a single memory pair operation.
13450 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
13451 LSBaseSDNode *LSNode2, SDValue BasePtr,
13452 uint64_t Imm) {
13453 SmallPtrSet<const SDNode *, 32> Visited;
13454 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
13456 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
13457 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
13458 return SDValue();
13460 MachineFunction &MF = DAG.getMachineFunction();
13461 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
13463 // The new operation has twice the width.
13464 MVT XLenVT = Subtarget.getXLenVT();
13465 EVT MemVT = LSNode1->getMemoryVT();
13466 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
13467 MachineMemOperand *MMO = LSNode1->getMemOperand();
13468 MachineMemOperand *NewMMO = MF.getMachineMemOperand(
13469 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
13471 if (LSNode1->getOpcode() == ISD::LOAD) {
13472 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
13473 unsigned Opcode;
13474 if (MemVT == MVT::i32)
13475 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
13476 else
13477 Opcode = RISCVISD::TH_LDD;
13479 SDValue Res = DAG.getMemIntrinsicNode(
13480 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
13481 {LSNode1->getChain(), BasePtr,
13482 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
13483 NewMemVT, NewMMO);
13485 SDValue Node1 =
13486 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
13487 SDValue Node2 =
13488 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
13490 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
13491 return Node1;
13492 } else {
13493 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
13495 SDValue Res = DAG.getMemIntrinsicNode(
13496 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
13497 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
13498 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
13499 NewMemVT, NewMMO);
13501 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
13502 return Res;
13506 // Try to combine two adjacent loads/stores to a single pair instruction from
13507 // the XTHeadMemPair vendor extension.
13508 static SDValue performMemPairCombine(SDNode *N,
13509 TargetLowering::DAGCombinerInfo &DCI) {
13510 SelectionDAG &DAG = DCI.DAG;
13511 MachineFunction &MF = DAG.getMachineFunction();
13512 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
13514 // Target does not support load/store pair.
13515 if (!Subtarget.hasVendorXTHeadMemPair())
13516 return SDValue();
13518 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
13519 EVT MemVT = LSNode1->getMemoryVT();
13520 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
13522 // No volatile, indexed or atomic loads/stores.
13523 if (!LSNode1->isSimple() || LSNode1->isIndexed())
13524 return SDValue();
13526 // Function to get a base + constant representation from a memory value.
13527 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
13528 if (Ptr->getOpcode() == ISD::ADD)
13529 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
13530 return {Ptr->getOperand(0), C1->getZExtValue()};
13531 return {Ptr, 0};
13534 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
13536 SDValue Chain = N->getOperand(0);
13537 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
13538 UI != UE; ++UI) {
13539 SDUse &Use = UI.getUse();
13540 if (Use.getUser() != N && Use.getResNo() == 0 &&
13541 Use.getUser()->getOpcode() == N->getOpcode()) {
13542 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
13544 // No volatile, indexed or atomic loads/stores.
13545 if (!LSNode2->isSimple() || LSNode2->isIndexed())
13546 continue;
13548 // Check if LSNode1 and LSNode2 have the same type and extension.
13549 if (LSNode1->getOpcode() == ISD::LOAD)
13550 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
13551 cast<LoadSDNode>(LSNode1)->getExtensionType())
13552 continue;
13554 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
13555 continue;
13557 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
13559 // Check if the base pointer is the same for both instruction.
13560 if (Base1 != Base2)
13561 continue;
13563 // Check if the offsets match the XTHeadMemPair encoding contraints.
13564 bool Valid = false;
13565 if (MemVT == MVT::i32) {
13566 // Check for adjacent i32 values and a 2-bit index.
13567 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
13568 Valid = true;
13569 } else if (MemVT == MVT::i64) {
13570 // Check for adjacent i64 values and a 2-bit index.
13571 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
13572 Valid = true;
13575 if (!Valid)
13576 continue;
13578 // Try to combine.
13579 if (SDValue Res =
13580 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
13581 return Res;
13585 return SDValue();
13588 // Fold
13589 // (fp_to_int (froundeven X)) -> fcvt X, rne
13590 // (fp_to_int (ftrunc X)) -> fcvt X, rtz
13591 // (fp_to_int (ffloor X)) -> fcvt X, rdn
13592 // (fp_to_int (fceil X)) -> fcvt X, rup
13593 // (fp_to_int (fround X)) -> fcvt X, rmm
13594 // (fp_to_int (frint X)) -> fcvt X
13595 static SDValue performFP_TO_INTCombine(SDNode *N,
13596 TargetLowering::DAGCombinerInfo &DCI,
13597 const RISCVSubtarget &Subtarget) {
13598 SelectionDAG &DAG = DCI.DAG;
13599 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13600 MVT XLenVT = Subtarget.getXLenVT();
13602 SDValue Src = N->getOperand(0);
13604 // Don't do this for strict-fp Src.
13605 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
13606 return SDValue();
13608 // Ensure the FP type is legal.
13609 if (!TLI.isTypeLegal(Src.getValueType()))
13610 return SDValue();
13612 // Don't do this for f16 with Zfhmin and not Zfh.
13613 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
13614 return SDValue();
13616 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
13617 // If the result is invalid, we didn't find a foldable instruction.
13618 if (FRM == RISCVFPRndMode::Invalid)
13619 return SDValue();
13621 SDLoc DL(N);
13622 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
13623 EVT VT = N->getValueType(0);
13625 if (VT.isVector() && TLI.isTypeLegal(VT)) {
13626 MVT SrcVT = Src.getSimpleValueType();
13627 MVT SrcContainerVT = SrcVT;
13628 MVT ContainerVT = VT.getSimpleVT();
13629 SDValue XVal = Src.getOperand(0);
13631 // For widening and narrowing conversions we just combine it into a
13632 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
13633 // end up getting lowered to their appropriate pseudo instructions based on
13634 // their operand types
13635 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
13636 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
13637 return SDValue();
13639 // Make fixed-length vectors scalable first
13640 if (SrcVT.isFixedLengthVector()) {
13641 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
13642 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
13643 ContainerVT =
13644 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
13647 auto [Mask, VL] =
13648 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
13650 SDValue FpToInt;
13651 if (FRM == RISCVFPRndMode::RTZ) {
13652 // Use the dedicated trunc static rounding mode if we're truncating so we
13653 // don't need to generate calls to fsrmi/fsrm
13654 unsigned Opc =
13655 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
13656 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
13657 } else if (FRM == RISCVFPRndMode::DYN) {
13658 unsigned Opc =
13659 IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL;
13660 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
13661 } else {
13662 unsigned Opc =
13663 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
13664 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
13665 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
13668 // If converted from fixed-length to scalable, convert back
13669 if (VT.isFixedLengthVector())
13670 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
13672 return FpToInt;
13675 // Only handle XLen or i32 types. Other types narrower than XLen will
13676 // eventually be legalized to XLenVT.
13677 if (VT != MVT::i32 && VT != XLenVT)
13678 return SDValue();
13680 unsigned Opc;
13681 if (VT == XLenVT)
13682 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
13683 else
13684 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13686 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
13687 DAG.getTargetConstant(FRM, DL, XLenVT));
13688 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
13691 // Fold
13692 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
13693 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
13694 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
13695 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
13696 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
13697 // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
13698 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
13699 TargetLowering::DAGCombinerInfo &DCI,
13700 const RISCVSubtarget &Subtarget) {
13701 SelectionDAG &DAG = DCI.DAG;
13702 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13703 MVT XLenVT = Subtarget.getXLenVT();
13705 // Only handle XLen types. Other types narrower than XLen will eventually be
13706 // legalized to XLenVT.
13707 EVT DstVT = N->getValueType(0);
13708 if (DstVT != XLenVT)
13709 return SDValue();
13711 SDValue Src = N->getOperand(0);
13713 // Don't do this for strict-fp Src.
13714 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
13715 return SDValue();
13717 // Ensure the FP type is also legal.
13718 if (!TLI.isTypeLegal(Src.getValueType()))
13719 return SDValue();
13721 // Don't do this for f16 with Zfhmin and not Zfh.
13722 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
13723 return SDValue();
13725 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13727 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
13728 if (FRM == RISCVFPRndMode::Invalid)
13729 return SDValue();
13731 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
13733 unsigned Opc;
13734 if (SatVT == DstVT)
13735 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
13736 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
13737 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13738 else
13739 return SDValue();
13740 // FIXME: Support other SatVTs by clamping before or after the conversion.
13742 Src = Src.getOperand(0);
13744 SDLoc DL(N);
13745 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
13746 DAG.getTargetConstant(FRM, DL, XLenVT));
13748 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
13749 // extend.
13750 if (Opc == RISCVISD::FCVT_WU_RV64)
13751 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
13753 // RISC-V FP-to-int conversions saturate to the destination register size, but
13754 // don't produce 0 for nan.
13755 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
13756 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
13759 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
13760 // smaller than XLenVT.
13761 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
13762 const RISCVSubtarget &Subtarget) {
13763 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
13765 SDValue Src = N->getOperand(0);
13766 if (Src.getOpcode() != ISD::BSWAP)
13767 return SDValue();
13769 EVT VT = N->getValueType(0);
13770 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
13771 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
13772 return SDValue();
13774 SDLoc DL(N);
13775 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
13778 // Convert from one FMA opcode to another based on whether we are negating the
13779 // multiply result and/or the accumulator.
13780 // NOTE: Only supports RVV operations with VL.
13781 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
13782 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
13783 if (NegMul) {
13784 // clang-format off
13785 switch (Opcode) {
13786 default: llvm_unreachable("Unexpected opcode");
13787 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
13788 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
13789 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
13790 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
13791 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
13792 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
13793 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
13794 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
13796 // clang-format on
13799 // Negating the accumulator changes ADD<->SUB.
13800 if (NegAcc) {
13801 // clang-format off
13802 switch (Opcode) {
13803 default: llvm_unreachable("Unexpected opcode");
13804 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
13805 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
13806 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
13807 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
13808 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
13809 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
13810 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
13811 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
13813 // clang-format on
13816 return Opcode;
13819 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
13820 // Fold FNEG_VL into FMA opcodes.
13821 // The first operand of strict-fp is chain.
13822 unsigned Offset = N->isTargetStrictFPOpcode();
13823 SDValue A = N->getOperand(0 + Offset);
13824 SDValue B = N->getOperand(1 + Offset);
13825 SDValue C = N->getOperand(2 + Offset);
13826 SDValue Mask = N->getOperand(3 + Offset);
13827 SDValue VL = N->getOperand(4 + Offset);
13829 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
13830 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
13831 V.getOperand(2) == VL) {
13832 // Return the negated input.
13833 V = V.getOperand(0);
13834 return true;
13837 return false;
13840 bool NegA = invertIfNegative(A);
13841 bool NegB = invertIfNegative(B);
13842 bool NegC = invertIfNegative(C);
13844 // If no operands are negated, we're done.
13845 if (!NegA && !NegB && !NegC)
13846 return SDValue();
13848 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
13849 if (N->isTargetStrictFPOpcode())
13850 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
13851 {N->getOperand(0), A, B, C, Mask, VL});
13852 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
13853 VL);
13856 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,
13857 const RISCVSubtarget &Subtarget) {
13858 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
13859 return V;
13861 if (N->getValueType(0).isScalableVector() &&
13862 N->getValueType(0).getVectorElementType() == MVT::f32 &&
13863 (Subtarget.hasVInstructionsF16Minimal() &&
13864 !Subtarget.hasVInstructionsF16())) {
13865 return SDValue();
13868 // FIXME: Ignore strict opcodes for now.
13869 if (N->isTargetStrictFPOpcode())
13870 return SDValue();
13872 // Try to form widening FMA.
13873 SDValue Op0 = N->getOperand(0);
13874 SDValue Op1 = N->getOperand(1);
13875 SDValue Mask = N->getOperand(3);
13876 SDValue VL = N->getOperand(4);
13878 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
13879 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
13880 return SDValue();
13882 // TODO: Refactor to handle more complex cases similar to
13883 // combineBinOp_VLToVWBinOp_VL.
13884 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
13885 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
13886 return SDValue();
13888 // Check the mask and VL are the same.
13889 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
13890 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
13891 return SDValue();
13893 unsigned NewOpc;
13894 switch (N->getOpcode()) {
13895 default:
13896 llvm_unreachable("Unexpected opcode");
13897 case RISCVISD::VFMADD_VL:
13898 NewOpc = RISCVISD::VFWMADD_VL;
13899 break;
13900 case RISCVISD::VFNMSUB_VL:
13901 NewOpc = RISCVISD::VFWNMSUB_VL;
13902 break;
13903 case RISCVISD::VFNMADD_VL:
13904 NewOpc = RISCVISD::VFWNMADD_VL;
13905 break;
13906 case RISCVISD::VFMSUB_VL:
13907 NewOpc = RISCVISD::VFWMSUB_VL;
13908 break;
13911 Op0 = Op0.getOperand(0);
13912 Op1 = Op1.getOperand(0);
13914 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
13915 N->getOperand(2), Mask, VL);
13918 static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG,
13919 const RISCVSubtarget &Subtarget) {
13920 if (N->getValueType(0).isScalableVector() &&
13921 N->getValueType(0).getVectorElementType() == MVT::f32 &&
13922 (Subtarget.hasVInstructionsF16Minimal() &&
13923 !Subtarget.hasVInstructionsF16())) {
13924 return SDValue();
13927 // FIXME: Ignore strict opcodes for now.
13928 assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
13930 // Try to form widening multiply.
13931 SDValue Op0 = N->getOperand(0);
13932 SDValue Op1 = N->getOperand(1);
13933 SDValue Merge = N->getOperand(2);
13934 SDValue Mask = N->getOperand(3);
13935 SDValue VL = N->getOperand(4);
13937 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
13938 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
13939 return SDValue();
13941 // TODO: Refactor to handle more complex cases similar to
13942 // combineBinOp_VLToVWBinOp_VL.
13943 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
13944 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
13945 return SDValue();
13947 // Check the mask and VL are the same.
13948 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
13949 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
13950 return SDValue();
13952 Op0 = Op0.getOperand(0);
13953 Op1 = Op1.getOperand(0);
13955 return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0,
13956 Op1, Merge, Mask, VL);
13959 static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG,
13960 const RISCVSubtarget &Subtarget) {
13961 if (N->getValueType(0).isScalableVector() &&
13962 N->getValueType(0).getVectorElementType() == MVT::f32 &&
13963 (Subtarget.hasVInstructionsF16Minimal() &&
13964 !Subtarget.hasVInstructionsF16())) {
13965 return SDValue();
13968 SDValue Op0 = N->getOperand(0);
13969 SDValue Op1 = N->getOperand(1);
13970 SDValue Merge = N->getOperand(2);
13971 SDValue Mask = N->getOperand(3);
13972 SDValue VL = N->getOperand(4);
13974 bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL;
13976 // Look for foldable FP_EXTENDS.
13977 bool Op0IsExtend =
13978 Op0.getOpcode() == RISCVISD::FP_EXTEND_VL &&
13979 (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0)));
13980 bool Op1IsExtend =
13981 (Op0 == Op1 && Op0IsExtend) ||
13982 (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse());
13984 // Check the mask and VL.
13985 if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL))
13986 Op0IsExtend = false;
13987 if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL))
13988 Op1IsExtend = false;
13990 // Canonicalize.
13991 if (!Op1IsExtend) {
13992 // Sub requires at least operand 1 to be an extend.
13993 if (!IsAdd)
13994 return SDValue();
13996 // Add is commutable, if the other operand is foldable, swap them.
13997 if (!Op0IsExtend)
13998 return SDValue();
14000 std::swap(Op0, Op1);
14001 std::swap(Op0IsExtend, Op1IsExtend);
14004 // Op1 is a foldable extend. Op0 might be foldable.
14005 Op1 = Op1.getOperand(0);
14006 if (Op0IsExtend)
14007 Op0 = Op0.getOperand(0);
14009 unsigned Opc;
14010 if (IsAdd)
14011 Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL;
14012 else
14013 Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL;
14015 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask,
14016 VL);
14019 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
14020 const RISCVSubtarget &Subtarget) {
14021 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
14023 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
14024 return SDValue();
14026 if (!isa<ConstantSDNode>(N->getOperand(1)))
14027 return SDValue();
14028 uint64_t ShAmt = N->getConstantOperandVal(1);
14029 if (ShAmt > 32)
14030 return SDValue();
14032 SDValue N0 = N->getOperand(0);
14034 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
14035 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
14036 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
14037 if (ShAmt < 32 &&
14038 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
14039 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
14040 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
14041 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
14042 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
14043 if (LShAmt < 32) {
14044 SDLoc ShlDL(N0.getOperand(0));
14045 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
14046 N0.getOperand(0).getOperand(0),
14047 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
14048 SDLoc DL(N);
14049 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
14050 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
14054 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
14055 // FIXME: Should this be a generic combine? There's a similar combine on X86.
14057 // Also try these folds where an add or sub is in the middle.
14058 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
14059 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
14060 SDValue Shl;
14061 ConstantSDNode *AddC = nullptr;
14063 // We might have an ADD or SUB between the SRA and SHL.
14064 bool IsAdd = N0.getOpcode() == ISD::ADD;
14065 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
14066 // Other operand needs to be a constant we can modify.
14067 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
14068 if (!AddC)
14069 return SDValue();
14071 // AddC needs to have at least 32 trailing zeros.
14072 if (AddC->getAPIntValue().countr_zero() < 32)
14073 return SDValue();
14075 // All users should be a shift by constant less than or equal to 32. This
14076 // ensures we'll do this optimization for each of them to produce an
14077 // add/sub+sext_inreg they can all share.
14078 for (SDNode *U : N0->uses()) {
14079 if (U->getOpcode() != ISD::SRA ||
14080 !isa<ConstantSDNode>(U->getOperand(1)) ||
14081 U->getConstantOperandVal(1) > 32)
14082 return SDValue();
14085 Shl = N0.getOperand(IsAdd ? 0 : 1);
14086 } else {
14087 // Not an ADD or SUB.
14088 Shl = N0;
14091 // Look for a shift left by 32.
14092 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
14093 Shl.getConstantOperandVal(1) != 32)
14094 return SDValue();
14096 // We if we didn't look through an add/sub, then the shl should have one use.
14097 // If we did look through an add/sub, the sext_inreg we create is free so
14098 // we're only creating 2 new instructions. It's enough to only remove the
14099 // original sra+add/sub.
14100 if (!AddC && !Shl.hasOneUse())
14101 return SDValue();
14103 SDLoc DL(N);
14104 SDValue In = Shl.getOperand(0);
14106 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
14107 // constant.
14108 if (AddC) {
14109 SDValue ShiftedAddC =
14110 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
14111 if (IsAdd)
14112 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
14113 else
14114 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
14117 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
14118 DAG.getValueType(MVT::i32));
14119 if (ShAmt == 32)
14120 return SExt;
14122 return DAG.getNode(
14123 ISD::SHL, DL, MVT::i64, SExt,
14124 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
14127 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
14128 // the result is used as the conditon of a br_cc or select_cc we can invert,
14129 // inverting the setcc is free, and Z is 0/1. Caller will invert the
14130 // br_cc/select_cc.
14131 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
14132 bool IsAnd = Cond.getOpcode() == ISD::AND;
14133 if (!IsAnd && Cond.getOpcode() != ISD::OR)
14134 return SDValue();
14136 if (!Cond.hasOneUse())
14137 return SDValue();
14139 SDValue Setcc = Cond.getOperand(0);
14140 SDValue Xor = Cond.getOperand(1);
14141 // Canonicalize setcc to LHS.
14142 if (Setcc.getOpcode() != ISD::SETCC)
14143 std::swap(Setcc, Xor);
14144 // LHS should be a setcc and RHS should be an xor.
14145 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
14146 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
14147 return SDValue();
14149 // If the condition is an And, SimplifyDemandedBits may have changed
14150 // (xor Z, 1) to (not Z).
14151 SDValue Xor1 = Xor.getOperand(1);
14152 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
14153 return SDValue();
14155 EVT VT = Cond.getValueType();
14156 SDValue Xor0 = Xor.getOperand(0);
14158 // The LHS of the xor needs to be 0/1.
14159 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
14160 if (!DAG.MaskedValueIsZero(Xor0, Mask))
14161 return SDValue();
14163 // We can only invert integer setccs.
14164 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
14165 if (!SetCCOpVT.isScalarInteger())
14166 return SDValue();
14168 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
14169 if (ISD::isIntEqualitySetCC(CCVal)) {
14170 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14171 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
14172 Setcc.getOperand(1), CCVal);
14173 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
14174 // Invert (setlt 0, X) by converting to (setlt X, 1).
14175 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
14176 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
14177 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
14178 // (setlt X, 1) by converting to (setlt 0, X).
14179 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
14180 DAG.getConstant(0, SDLoc(Setcc), VT),
14181 Setcc.getOperand(0), CCVal);
14182 } else
14183 return SDValue();
14185 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14186 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
14189 // Perform common combines for BR_CC and SELECT_CC condtions.
14190 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
14191 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
14192 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
14194 // As far as arithmetic right shift always saves the sign,
14195 // shift can be omitted.
14196 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
14197 // setge (sra X, N), 0 -> setge X, 0
14198 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
14199 LHS.getOpcode() == ISD::SRA) {
14200 LHS = LHS.getOperand(0);
14201 return true;
14204 if (!ISD::isIntEqualitySetCC(CCVal))
14205 return false;
14207 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
14208 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
14209 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
14210 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
14211 // If we're looking for eq 0 instead of ne 0, we need to invert the
14212 // condition.
14213 bool Invert = CCVal == ISD::SETEQ;
14214 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
14215 if (Invert)
14216 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
14218 RHS = LHS.getOperand(1);
14219 LHS = LHS.getOperand(0);
14220 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
14222 CC = DAG.getCondCode(CCVal);
14223 return true;
14226 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
14227 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
14228 RHS = LHS.getOperand(1);
14229 LHS = LHS.getOperand(0);
14230 return true;
14233 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
14234 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
14235 LHS.getOperand(1).getOpcode() == ISD::Constant) {
14236 SDValue LHS0 = LHS.getOperand(0);
14237 if (LHS0.getOpcode() == ISD::AND &&
14238 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
14239 uint64_t Mask = LHS0.getConstantOperandVal(1);
14240 uint64_t ShAmt = LHS.getConstantOperandVal(1);
14241 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
14242 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
14243 CC = DAG.getCondCode(CCVal);
14245 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
14246 LHS = LHS0.getOperand(0);
14247 if (ShAmt != 0)
14248 LHS =
14249 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
14250 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
14251 return true;
14256 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
14257 // This can occur when legalizing some floating point comparisons.
14258 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
14259 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
14260 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
14261 CC = DAG.getCondCode(CCVal);
14262 RHS = DAG.getConstant(0, DL, LHS.getValueType());
14263 return true;
14266 if (isNullConstant(RHS)) {
14267 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
14268 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
14269 CC = DAG.getCondCode(CCVal);
14270 LHS = NewCond;
14271 return true;
14275 return false;
14278 // Fold
14279 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
14280 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
14281 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
14282 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
14283 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
14284 SDValue TrueVal, SDValue FalseVal,
14285 bool Swapped) {
14286 bool Commutative = true;
14287 unsigned Opc = TrueVal.getOpcode();
14288 switch (Opc) {
14289 default:
14290 return SDValue();
14291 case ISD::SHL:
14292 case ISD::SRA:
14293 case ISD::SRL:
14294 case ISD::SUB:
14295 Commutative = false;
14296 break;
14297 case ISD::ADD:
14298 case ISD::OR:
14299 case ISD::XOR:
14300 break;
14303 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
14304 return SDValue();
14306 unsigned OpToFold;
14307 if (FalseVal == TrueVal.getOperand(0))
14308 OpToFold = 0;
14309 else if (Commutative && FalseVal == TrueVal.getOperand(1))
14310 OpToFold = 1;
14311 else
14312 return SDValue();
14314 EVT VT = N->getValueType(0);
14315 SDLoc DL(N);
14316 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
14317 EVT OtherOpVT = OtherOp->getValueType(0);
14318 SDValue IdentityOperand =
14319 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
14320 if (!Commutative)
14321 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
14322 assert(IdentityOperand && "No identity operand!");
14324 if (Swapped)
14325 std::swap(OtherOp, IdentityOperand);
14326 SDValue NewSel =
14327 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
14328 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
14331 // This tries to get rid of `select` and `icmp` that are being used to handle
14332 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
14333 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
14334 SDValue Cond = N->getOperand(0);
14336 // This represents either CTTZ or CTLZ instruction.
14337 SDValue CountZeroes;
14339 SDValue ValOnZero;
14341 if (Cond.getOpcode() != ISD::SETCC)
14342 return SDValue();
14344 if (!isNullConstant(Cond->getOperand(1)))
14345 return SDValue();
14347 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
14348 if (CCVal == ISD::CondCode::SETEQ) {
14349 CountZeroes = N->getOperand(2);
14350 ValOnZero = N->getOperand(1);
14351 } else if (CCVal == ISD::CondCode::SETNE) {
14352 CountZeroes = N->getOperand(1);
14353 ValOnZero = N->getOperand(2);
14354 } else {
14355 return SDValue();
14358 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
14359 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
14360 CountZeroes = CountZeroes.getOperand(0);
14362 if (CountZeroes.getOpcode() != ISD::CTTZ &&
14363 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
14364 CountZeroes.getOpcode() != ISD::CTLZ &&
14365 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
14366 return SDValue();
14368 if (!isNullConstant(ValOnZero))
14369 return SDValue();
14371 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
14372 if (Cond->getOperand(0) != CountZeroesArgument)
14373 return SDValue();
14375 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
14376 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
14377 CountZeroes.getValueType(), CountZeroesArgument);
14378 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
14379 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
14380 CountZeroes.getValueType(), CountZeroesArgument);
14383 unsigned BitWidth = CountZeroes.getValueSizeInBits();
14384 SDValue BitWidthMinusOne =
14385 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
14387 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
14388 CountZeroes, BitWidthMinusOne);
14389 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
14392 static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
14393 const RISCVSubtarget &Subtarget) {
14394 SDValue Cond = N->getOperand(0);
14395 SDValue True = N->getOperand(1);
14396 SDValue False = N->getOperand(2);
14397 SDLoc DL(N);
14398 EVT VT = N->getValueType(0);
14399 EVT CondVT = Cond.getValueType();
14401 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
14402 return SDValue();
14404 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
14405 // BEXTI, where C is power of 2.
14406 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
14407 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
14408 SDValue LHS = Cond.getOperand(0);
14409 SDValue RHS = Cond.getOperand(1);
14410 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14411 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
14412 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
14413 uint64_t MaskVal = LHS.getConstantOperandVal(1);
14414 if (isPowerOf2_64(MaskVal) && !isInt<12>(MaskVal))
14415 return DAG.getSelect(DL, VT,
14416 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
14417 False, True);
14420 return SDValue();
14423 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
14424 const RISCVSubtarget &Subtarget) {
14425 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
14426 return Folded;
14428 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
14429 return V;
14431 if (Subtarget.hasShortForwardBranchOpt())
14432 return SDValue();
14434 SDValue TrueVal = N->getOperand(1);
14435 SDValue FalseVal = N->getOperand(2);
14436 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
14437 return V;
14438 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
14441 /// If we have a build_vector where each lane is binop X, C, where C
14442 /// is a constant (but not necessarily the same constant on all lanes),
14443 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
14444 /// We assume that materializing a constant build vector will be no more
14445 /// expensive that performing O(n) binops.
14446 static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
14447 const RISCVSubtarget &Subtarget,
14448 const RISCVTargetLowering &TLI) {
14449 SDLoc DL(N);
14450 EVT VT = N->getValueType(0);
14452 assert(!VT.isScalableVector() && "unexpected build vector");
14454 if (VT.getVectorNumElements() == 1)
14455 return SDValue();
14457 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
14458 if (!TLI.isBinOp(Opcode))
14459 return SDValue();
14461 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
14462 return SDValue();
14464 SmallVector<SDValue> LHSOps;
14465 SmallVector<SDValue> RHSOps;
14466 for (SDValue Op : N->ops()) {
14467 if (Op.isUndef()) {
14468 // We can't form a divide or remainder from undef.
14469 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
14470 return SDValue();
14472 LHSOps.push_back(Op);
14473 RHSOps.push_back(Op);
14474 continue;
14477 // TODO: We can handle operations which have an neutral rhs value
14478 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
14479 // of profit in a more explicit manner.
14480 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
14481 return SDValue();
14483 LHSOps.push_back(Op.getOperand(0));
14484 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
14485 !isa<ConstantFPSDNode>(Op.getOperand(1)))
14486 return SDValue();
14487 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
14488 // have different LHS and RHS types.
14489 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
14490 return SDValue();
14491 RHSOps.push_back(Op.getOperand(1));
14494 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
14495 DAG.getBuildVector(VT, DL, RHSOps));
14498 static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
14499 const RISCVSubtarget &Subtarget,
14500 const RISCVTargetLowering &TLI) {
14501 SDValue InVec = N->getOperand(0);
14502 SDValue InVal = N->getOperand(1);
14503 SDValue EltNo = N->getOperand(2);
14504 SDLoc DL(N);
14506 EVT VT = InVec.getValueType();
14507 if (VT.isScalableVector())
14508 return SDValue();
14510 if (!InVec.hasOneUse())
14511 return SDValue();
14513 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
14514 // move the insert_vector_elts into the arms of the binop. Note that
14515 // the new RHS must be a constant.
14516 const unsigned InVecOpcode = InVec->getOpcode();
14517 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
14518 InVal.hasOneUse()) {
14519 SDValue InVecLHS = InVec->getOperand(0);
14520 SDValue InVecRHS = InVec->getOperand(1);
14521 SDValue InValLHS = InVal->getOperand(0);
14522 SDValue InValRHS = InVal->getOperand(1);
14524 if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))
14525 return SDValue();
14526 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
14527 return SDValue();
14528 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
14529 // have different LHS and RHS types.
14530 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
14531 return SDValue();
14532 SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
14533 InVecLHS, InValLHS, EltNo);
14534 SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
14535 InVecRHS, InValRHS, EltNo);
14536 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
14539 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
14540 // move the insert_vector_elt to the source operand of the concat_vector.
14541 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
14542 return SDValue();
14544 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
14545 if (!IndexC)
14546 return SDValue();
14547 unsigned Elt = IndexC->getZExtValue();
14549 EVT ConcatVT = InVec.getOperand(0).getValueType();
14550 if (ConcatVT.getVectorElementType() != InVal.getValueType())
14551 return SDValue();
14552 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
14553 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
14554 EltNo.getValueType());
14556 unsigned ConcatOpIdx = Elt / ConcatNumElts;
14557 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
14558 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
14559 ConcatOp, InVal, NewIdx);
14561 SmallVector<SDValue> ConcatOps;
14562 ConcatOps.append(InVec->op_begin(), InVec->op_end());
14563 ConcatOps[ConcatOpIdx] = ConcatOp;
14564 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14567 // If we're concatenating a series of vector loads like
14568 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
14569 // Then we can turn this into a strided load by widening the vector elements
14570 // vlse32 p, stride=n
14571 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
14572 const RISCVSubtarget &Subtarget,
14573 const RISCVTargetLowering &TLI) {
14574 SDLoc DL(N);
14575 EVT VT = N->getValueType(0);
14577 // Only perform this combine on legal MVTs.
14578 if (!TLI.isTypeLegal(VT))
14579 return SDValue();
14581 // TODO: Potentially extend this to scalable vectors
14582 if (VT.isScalableVector())
14583 return SDValue();
14585 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
14586 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
14587 !SDValue(BaseLd, 0).hasOneUse())
14588 return SDValue();
14590 EVT BaseLdVT = BaseLd->getValueType(0);
14592 // Go through the loads and check that they're strided
14593 SmallVector<LoadSDNode *> Lds;
14594 Lds.push_back(BaseLd);
14595 Align Align = BaseLd->getAlign();
14596 for (SDValue Op : N->ops().drop_front()) {
14597 auto *Ld = dyn_cast<LoadSDNode>(Op);
14598 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
14599 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
14600 Ld->getValueType(0) != BaseLdVT)
14601 return SDValue();
14603 Lds.push_back(Ld);
14605 // The common alignment is the most restrictive (smallest) of all the loads
14606 Align = std::min(Align, Ld->getAlign());
14609 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
14610 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
14611 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
14612 // If the load ptrs can be decomposed into a common (Base + Index) with a
14613 // common constant stride, then return the constant stride.
14614 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
14615 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
14616 if (BIO1.equalBaseIndex(BIO2, DAG))
14617 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
14619 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
14620 SDValue P1 = Ld1->getBasePtr();
14621 SDValue P2 = Ld2->getBasePtr();
14622 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
14623 return {{P2.getOperand(1), false}};
14624 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
14625 return {{P1.getOperand(1), true}};
14627 return std::nullopt;
14630 // Get the distance between the first and second loads
14631 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
14632 if (!BaseDiff)
14633 return SDValue();
14635 // Check all the loads are the same distance apart
14636 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
14637 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
14638 return SDValue();
14640 // TODO: At this point, we've successfully matched a generalized gather
14641 // load. Maybe we should emit that, and then move the specialized
14642 // matchers above and below into a DAG combine?
14644 // Get the widened scalar type, e.g. v4i8 -> i64
14645 unsigned WideScalarBitWidth =
14646 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
14647 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
14649 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
14650 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
14651 if (!TLI.isTypeLegal(WideVecVT))
14652 return SDValue();
14654 // Check that the operation is legal
14655 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
14656 return SDValue();
14658 auto [StrideVariant, MustNegateStride] = *BaseDiff;
14659 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
14660 ? std::get<SDValue>(StrideVariant)
14661 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
14662 Lds[0]->getOffset().getValueType());
14663 if (MustNegateStride)
14664 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
14666 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
14667 SDValue IntID =
14668 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
14669 Subtarget.getXLenVT());
14671 SDValue AllOneMask =
14672 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
14673 DAG.getConstant(1, DL, MVT::i1));
14675 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
14676 BaseLd->getBasePtr(), Stride, AllOneMask};
14678 uint64_t MemSize;
14679 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
14680 ConstStride && ConstStride->getSExtValue() >= 0)
14681 // total size = (elsize * n) + (stride - elsize) * (n-1)
14682 // = elsize + stride * (n-1)
14683 MemSize = WideScalarVT.getSizeInBits() +
14684 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
14685 else
14686 // If Stride isn't constant, then we can't know how much it will load
14687 MemSize = MemoryLocation::UnknownSize;
14689 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
14690 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
14691 Align);
14693 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
14694 Ops, WideVecVT, MMO);
14695 for (SDValue Ld : N->ops())
14696 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
14698 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
14701 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
14702 const RISCVSubtarget &Subtarget) {
14703 assert(N->getOpcode() == RISCVISD::ADD_VL);
14704 SDValue Addend = N->getOperand(0);
14705 SDValue MulOp = N->getOperand(1);
14706 SDValue AddMergeOp = N->getOperand(2);
14708 if (!AddMergeOp.isUndef())
14709 return SDValue();
14711 auto IsVWMulOpc = [](unsigned Opc) {
14712 switch (Opc) {
14713 case RISCVISD::VWMUL_VL:
14714 case RISCVISD::VWMULU_VL:
14715 case RISCVISD::VWMULSU_VL:
14716 return true;
14717 default:
14718 return false;
14722 if (!IsVWMulOpc(MulOp.getOpcode()))
14723 std::swap(Addend, MulOp);
14725 if (!IsVWMulOpc(MulOp.getOpcode()))
14726 return SDValue();
14728 SDValue MulMergeOp = MulOp.getOperand(2);
14730 if (!MulMergeOp.isUndef())
14731 return SDValue();
14733 SDValue AddMask = N->getOperand(3);
14734 SDValue AddVL = N->getOperand(4);
14735 SDValue MulMask = MulOp.getOperand(3);
14736 SDValue MulVL = MulOp.getOperand(4);
14738 if (AddMask != MulMask || AddVL != MulVL)
14739 return SDValue();
14741 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
14742 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
14743 "Unexpected opcode after VWMACC_VL");
14744 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
14745 "Unexpected opcode after VWMACC_VL!");
14746 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
14747 "Unexpected opcode after VWMUL_VL!");
14748 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
14749 "Unexpected opcode after VWMUL_VL!");
14751 SDLoc DL(N);
14752 EVT VT = N->getValueType(0);
14753 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
14754 AddVL};
14755 return DAG.getNode(Opc, DL, VT, Ops);
14758 static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
14759 ISD::MemIndexType &IndexType,
14760 RISCVTargetLowering::DAGCombinerInfo &DCI) {
14761 if (!DCI.isBeforeLegalize())
14762 return false;
14764 SelectionDAG &DAG = DCI.DAG;
14765 const MVT XLenVT =
14766 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
14768 const EVT IndexVT = Index.getValueType();
14770 // RISC-V indexed loads only support the "unsigned unscaled" addressing
14771 // mode, so anything else must be manually legalized.
14772 if (!isIndexTypeSigned(IndexType))
14773 return false;
14775 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
14776 // Any index legalization should first promote to XLenVT, so we don't lose
14777 // bits when scaling. This may create an illegal index type so we let
14778 // LLVM's legalization take care of the splitting.
14779 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
14780 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
14781 IndexVT.changeVectorElementType(XLenVT), Index);
14783 IndexType = ISD::UNSIGNED_SCALED;
14784 return true;
14787 /// Match the index vector of a scatter or gather node as the shuffle mask
14788 /// which performs the rearrangement if possible. Will only match if
14789 /// all lanes are touched, and thus replacing the scatter or gather with
14790 /// a unit strided access and shuffle is legal.
14791 static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
14792 SmallVector<int> &ShuffleMask) {
14793 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
14794 return false;
14795 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
14796 return false;
14798 const unsigned ElementSize = VT.getScalarStoreSize();
14799 const unsigned NumElems = VT.getVectorNumElements();
14801 // Create the shuffle mask and check all bits active
14802 assert(ShuffleMask.empty());
14803 BitVector ActiveLanes(NumElems);
14804 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
14805 // TODO: We've found an active bit of UB, and could be
14806 // more aggressive here if desired.
14807 if (Index->getOperand(i)->isUndef())
14808 return false;
14809 uint64_t C = Index->getConstantOperandVal(i);
14810 if (C % ElementSize != 0)
14811 return false;
14812 C = C / ElementSize;
14813 if (C >= NumElems)
14814 return false;
14815 ShuffleMask.push_back(C);
14816 ActiveLanes.set(C);
14818 return ActiveLanes.all();
14821 /// Match the index of a gather or scatter operation as an operation
14822 /// with twice the element width and half the number of elements. This is
14823 /// generally profitable (if legal) because these operations are linear
14824 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
14825 /// come out ahead.
14826 static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
14827 Align BaseAlign, const RISCVSubtarget &ST) {
14828 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
14829 return false;
14830 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
14831 return false;
14833 // Attempt a doubling. If we can use a element type 4x or 8x in
14834 // size, this will happen via multiply iterations of the transform.
14835 const unsigned NumElems = VT.getVectorNumElements();
14836 if (NumElems % 2 != 0)
14837 return false;
14839 const unsigned ElementSize = VT.getScalarStoreSize();
14840 const unsigned WiderElementSize = ElementSize * 2;
14841 if (WiderElementSize > ST.getELen()/8)
14842 return false;
14844 if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize)
14845 return false;
14847 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
14848 // TODO: We've found an active bit of UB, and could be
14849 // more aggressive here if desired.
14850 if (Index->getOperand(i)->isUndef())
14851 return false;
14852 // TODO: This offset check is too strict if we support fully
14853 // misaligned memory operations.
14854 uint64_t C = Index->getConstantOperandVal(i);
14855 if (i % 2 == 0) {
14856 if (C % WiderElementSize != 0)
14857 return false;
14858 continue;
14860 uint64_t Last = Index->getConstantOperandVal(i-1);
14861 if (C != Last + ElementSize)
14862 return false;
14864 return true;
14868 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
14869 DAGCombinerInfo &DCI) const {
14870 SelectionDAG &DAG = DCI.DAG;
14871 const MVT XLenVT = Subtarget.getXLenVT();
14872 SDLoc DL(N);
14874 // Helper to call SimplifyDemandedBits on an operand of N where only some low
14875 // bits are demanded. N will be added to the Worklist if it was not deleted.
14876 // Caller should return SDValue(N, 0) if this returns true.
14877 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
14878 SDValue Op = N->getOperand(OpNo);
14879 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
14880 if (!SimplifyDemandedBits(Op, Mask, DCI))
14881 return false;
14883 if (N->getOpcode() != ISD::DELETED_NODE)
14884 DCI.AddToWorklist(N);
14885 return true;
14888 switch (N->getOpcode()) {
14889 default:
14890 break;
14891 case RISCVISD::SplitF64: {
14892 SDValue Op0 = N->getOperand(0);
14893 // If the input to SplitF64 is just BuildPairF64 then the operation is
14894 // redundant. Instead, use BuildPairF64's operands directly.
14895 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
14896 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
14898 if (Op0->isUndef()) {
14899 SDValue Lo = DAG.getUNDEF(MVT::i32);
14900 SDValue Hi = DAG.getUNDEF(MVT::i32);
14901 return DCI.CombineTo(N, Lo, Hi);
14904 // It's cheaper to materialise two 32-bit integers than to load a double
14905 // from the constant pool and transfer it to integer registers through the
14906 // stack.
14907 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
14908 APInt V = C->getValueAPF().bitcastToAPInt();
14909 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
14910 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
14911 return DCI.CombineTo(N, Lo, Hi);
14914 // This is a target-specific version of a DAGCombine performed in
14915 // DAGCombiner::visitBITCAST. It performs the equivalent of:
14916 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14917 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14918 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
14919 !Op0.getNode()->hasOneUse())
14920 break;
14921 SDValue NewSplitF64 =
14922 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
14923 Op0.getOperand(0));
14924 SDValue Lo = NewSplitF64.getValue(0);
14925 SDValue Hi = NewSplitF64.getValue(1);
14926 APInt SignBit = APInt::getSignMask(32);
14927 if (Op0.getOpcode() == ISD::FNEG) {
14928 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
14929 DAG.getConstant(SignBit, DL, MVT::i32));
14930 return DCI.CombineTo(N, Lo, NewHi);
14932 assert(Op0.getOpcode() == ISD::FABS);
14933 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
14934 DAG.getConstant(~SignBit, DL, MVT::i32));
14935 return DCI.CombineTo(N, Lo, NewHi);
14937 case RISCVISD::SLLW:
14938 case RISCVISD::SRAW:
14939 case RISCVISD::SRLW:
14940 case RISCVISD::RORW:
14941 case RISCVISD::ROLW: {
14942 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
14943 if (SimplifyDemandedLowBitsHelper(0, 32) ||
14944 SimplifyDemandedLowBitsHelper(1, 5))
14945 return SDValue(N, 0);
14947 break;
14949 case RISCVISD::CLZW:
14950 case RISCVISD::CTZW: {
14951 // Only the lower 32 bits of the first operand are read
14952 if (SimplifyDemandedLowBitsHelper(0, 32))
14953 return SDValue(N, 0);
14954 break;
14956 case RISCVISD::FMV_W_X_RV64: {
14957 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
14958 // conversion is unnecessary and can be replaced with the
14959 // FMV_X_ANYEXTW_RV64 operand.
14960 SDValue Op0 = N->getOperand(0);
14961 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
14962 return Op0.getOperand(0);
14963 break;
14965 case RISCVISD::FMV_X_ANYEXTH:
14966 case RISCVISD::FMV_X_ANYEXTW_RV64: {
14967 SDLoc DL(N);
14968 SDValue Op0 = N->getOperand(0);
14969 MVT VT = N->getSimpleValueType(0);
14970 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
14971 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
14972 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
14973 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
14974 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
14975 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
14976 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
14977 assert(Op0.getOperand(0).getValueType() == VT &&
14978 "Unexpected value type!");
14979 return Op0.getOperand(0);
14982 // This is a target-specific version of a DAGCombine performed in
14983 // DAGCombiner::visitBITCAST. It performs the equivalent of:
14984 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14985 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14986 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
14987 !Op0.getNode()->hasOneUse())
14988 break;
14989 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
14990 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
14991 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
14992 if (Op0.getOpcode() == ISD::FNEG)
14993 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
14994 DAG.getConstant(SignBit, DL, VT));
14996 assert(Op0.getOpcode() == ISD::FABS);
14997 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
14998 DAG.getConstant(~SignBit, DL, VT));
15000 case ISD::ADD:
15001 return performADDCombine(N, DAG, Subtarget);
15002 case ISD::SUB:
15003 return performSUBCombine(N, DAG, Subtarget);
15004 case ISD::AND:
15005 return performANDCombine(N, DCI, Subtarget);
15006 case ISD::OR:
15007 return performORCombine(N, DCI, Subtarget);
15008 case ISD::XOR:
15009 return performXORCombine(N, DAG, Subtarget);
15010 case ISD::MUL:
15011 return performMULCombine(N, DAG);
15012 case ISD::FADD:
15013 case ISD::UMAX:
15014 case ISD::UMIN:
15015 case ISD::SMAX:
15016 case ISD::SMIN:
15017 case ISD::FMAXNUM:
15018 case ISD::FMINNUM: {
15019 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15020 return V;
15021 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15022 return V;
15023 return SDValue();
15025 case ISD::SETCC:
15026 return performSETCCCombine(N, DAG, Subtarget);
15027 case ISD::SIGN_EXTEND_INREG:
15028 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
15029 case ISD::ZERO_EXTEND:
15030 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
15031 // type legalization. This is safe because fp_to_uint produces poison if
15032 // it overflows.
15033 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
15034 SDValue Src = N->getOperand(0);
15035 if (Src.getOpcode() == ISD::FP_TO_UINT &&
15036 isTypeLegal(Src.getOperand(0).getValueType()))
15037 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
15038 Src.getOperand(0));
15039 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
15040 isTypeLegal(Src.getOperand(1).getValueType())) {
15041 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
15042 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
15043 Src.getOperand(0), Src.getOperand(1));
15044 DCI.CombineTo(N, Res);
15045 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
15046 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
15047 return SDValue(N, 0); // Return N so it doesn't get rechecked.
15050 return SDValue();
15051 case RISCVISD::TRUNCATE_VECTOR_VL: {
15052 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
15053 // This would be benefit for the cases where X and Y are both the same value
15054 // type of low precision vectors. Since the truncate would be lowered into
15055 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
15056 // restriction, such pattern would be expanded into a series of "vsetvli"
15057 // and "vnsrl" instructions later to reach this point.
15058 auto IsTruncNode = [](SDValue V) {
15059 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
15060 return false;
15061 SDValue VL = V.getOperand(2);
15062 auto *C = dyn_cast<ConstantSDNode>(VL);
15063 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
15064 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
15065 (isa<RegisterSDNode>(VL) &&
15066 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
15067 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
15068 IsVLMAXForVMSET;
15071 SDValue Op = N->getOperand(0);
15073 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
15074 // to distinguish such pattern.
15075 while (IsTruncNode(Op)) {
15076 if (!Op.hasOneUse())
15077 return SDValue();
15078 Op = Op.getOperand(0);
15081 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
15082 SDValue N0 = Op.getOperand(0);
15083 SDValue N1 = Op.getOperand(1);
15084 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
15085 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
15086 SDValue N00 = N0.getOperand(0);
15087 SDValue N10 = N1.getOperand(0);
15088 if (N00.getValueType().isVector() &&
15089 N00.getValueType() == N10.getValueType() &&
15090 N->getValueType(0) == N10.getValueType()) {
15091 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
15092 SDValue SMin = DAG.getNode(
15093 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
15094 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
15095 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
15099 break;
15101 case ISD::TRUNCATE:
15102 return performTRUNCATECombine(N, DAG, Subtarget);
15103 case ISD::SELECT:
15104 return performSELECTCombine(N, DAG, Subtarget);
15105 case RISCVISD::CZERO_EQZ:
15106 case RISCVISD::CZERO_NEZ:
15107 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
15108 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
15109 if (N->getOperand(1).getOpcode() == ISD::XOR &&
15110 isOneConstant(N->getOperand(1).getOperand(1))) {
15111 SDValue Cond = N->getOperand(1).getOperand(0);
15112 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
15113 if (DAG.MaskedValueIsZero(Cond, Mask)) {
15114 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
15115 ? RISCVISD::CZERO_NEZ
15116 : RISCVISD::CZERO_EQZ;
15117 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
15118 N->getOperand(0), Cond);
15121 return SDValue();
15123 case RISCVISD::SELECT_CC: {
15124 // Transform
15125 SDValue LHS = N->getOperand(0);
15126 SDValue RHS = N->getOperand(1);
15127 SDValue CC = N->getOperand(2);
15128 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15129 SDValue TrueV = N->getOperand(3);
15130 SDValue FalseV = N->getOperand(4);
15131 SDLoc DL(N);
15132 EVT VT = N->getValueType(0);
15134 // If the True and False values are the same, we don't need a select_cc.
15135 if (TrueV == FalseV)
15136 return TrueV;
15138 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
15139 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
15140 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
15141 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
15142 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
15143 if (CCVal == ISD::CondCode::SETGE)
15144 std::swap(TrueV, FalseV);
15146 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
15147 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
15148 // Only handle simm12, if it is not in this range, it can be considered as
15149 // register.
15150 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
15151 isInt<12>(TrueSImm - FalseSImm)) {
15152 SDValue SRA =
15153 DAG.getNode(ISD::SRA, DL, VT, LHS,
15154 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
15155 SDValue AND =
15156 DAG.getNode(ISD::AND, DL, VT, SRA,
15157 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
15158 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
15161 if (CCVal == ISD::CondCode::SETGE)
15162 std::swap(TrueV, FalseV);
15165 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
15166 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
15167 {LHS, RHS, CC, TrueV, FalseV});
15169 if (!Subtarget.hasShortForwardBranchOpt()) {
15170 // (select c, -1, y) -> -c | y
15171 if (isAllOnesConstant(TrueV)) {
15172 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
15173 SDValue Neg = DAG.getNegative(C, DL, VT);
15174 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
15176 // (select c, y, -1) -> -!c | y
15177 if (isAllOnesConstant(FalseV)) {
15178 SDValue C =
15179 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
15180 SDValue Neg = DAG.getNegative(C, DL, VT);
15181 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
15184 // (select c, 0, y) -> -!c & y
15185 if (isNullConstant(TrueV)) {
15186 SDValue C =
15187 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
15188 SDValue Neg = DAG.getNegative(C, DL, VT);
15189 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
15191 // (select c, y, 0) -> -c & y
15192 if (isNullConstant(FalseV)) {
15193 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
15194 SDValue Neg = DAG.getNegative(C, DL, VT);
15195 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
15197 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
15198 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
15199 if (((isOneConstant(FalseV) && LHS == TrueV &&
15200 CCVal == ISD::CondCode::SETNE) ||
15201 (isOneConstant(TrueV) && LHS == FalseV &&
15202 CCVal == ISD::CondCode::SETEQ)) &&
15203 isNullConstant(RHS)) {
15204 // freeze it to be safe.
15205 LHS = DAG.getFreeze(LHS);
15206 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
15207 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
15211 // If both true/false are an xor with 1, pull through the select.
15212 // This can occur after op legalization if both operands are setccs that
15213 // require an xor to invert.
15214 // FIXME: Generalize to other binary ops with identical operand?
15215 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
15216 TrueV.getOperand(1) == FalseV.getOperand(1) &&
15217 isOneConstant(TrueV.getOperand(1)) &&
15218 TrueV.hasOneUse() && FalseV.hasOneUse()) {
15219 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
15220 TrueV.getOperand(0), FalseV.getOperand(0));
15221 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
15224 return SDValue();
15226 case RISCVISD::BR_CC: {
15227 SDValue LHS = N->getOperand(1);
15228 SDValue RHS = N->getOperand(2);
15229 SDValue CC = N->getOperand(3);
15230 SDLoc DL(N);
15232 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
15233 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
15234 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
15236 return SDValue();
15238 case ISD::BITREVERSE:
15239 return performBITREVERSECombine(N, DAG, Subtarget);
15240 case ISD::FP_TO_SINT:
15241 case ISD::FP_TO_UINT:
15242 return performFP_TO_INTCombine(N, DCI, Subtarget);
15243 case ISD::FP_TO_SINT_SAT:
15244 case ISD::FP_TO_UINT_SAT:
15245 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
15246 case ISD::FCOPYSIGN: {
15247 EVT VT = N->getValueType(0);
15248 if (!VT.isVector())
15249 break;
15250 // There is a form of VFSGNJ which injects the negated sign of its second
15251 // operand. Try and bubble any FNEG up after the extend/round to produce
15252 // this optimized pattern. Avoid modifying cases where FP_ROUND and
15253 // TRUNC=1.
15254 SDValue In2 = N->getOperand(1);
15255 // Avoid cases where the extend/round has multiple uses, as duplicating
15256 // those is typically more expensive than removing a fneg.
15257 if (!In2.hasOneUse())
15258 break;
15259 if (In2.getOpcode() != ISD::FP_EXTEND &&
15260 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
15261 break;
15262 In2 = In2.getOperand(0);
15263 if (In2.getOpcode() != ISD::FNEG)
15264 break;
15265 SDLoc DL(N);
15266 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
15267 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
15268 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
15270 case ISD::MGATHER: {
15271 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
15272 const EVT VT = N->getValueType(0);
15273 SDValue Index = MGN->getIndex();
15274 SDValue ScaleOp = MGN->getScale();
15275 ISD::MemIndexType IndexType = MGN->getIndexType();
15276 assert(!MGN->isIndexScaled() &&
15277 "Scaled gather/scatter should not be formed");
15279 SDLoc DL(N);
15280 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
15281 return DAG.getMaskedGather(
15282 N->getVTList(), MGN->getMemoryVT(), DL,
15283 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
15284 MGN->getBasePtr(), Index, ScaleOp},
15285 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
15287 if (narrowIndex(Index, IndexType, DAG))
15288 return DAG.getMaskedGather(
15289 N->getVTList(), MGN->getMemoryVT(), DL,
15290 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
15291 MGN->getBasePtr(), Index, ScaleOp},
15292 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
15294 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
15295 MGN->getExtensionType() == ISD::NON_EXTLOAD) {
15296 if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
15297 SimpleVID && SimpleVID->StepDenominator == 1) {
15298 const int64_t StepNumerator = SimpleVID->StepNumerator;
15299 const int64_t Addend = SimpleVID->Addend;
15301 // Note: We don't need to check alignment here since (by assumption
15302 // from the existance of the gather), our offsets must be sufficiently
15303 // aligned.
15305 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
15306 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
15307 assert(IndexType == ISD::UNSIGNED_SCALED);
15308 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
15309 DAG.getConstant(Addend, DL, PtrVT));
15311 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
15312 SDValue IntID =
15313 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15314 XLenVT);
15315 SDValue Ops[] =
15316 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
15317 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
15318 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15319 Ops, VT, MGN->getMemOperand());
15323 SmallVector<int> ShuffleMask;
15324 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
15325 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
15326 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
15327 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
15328 MGN->getMask(), DAG.getUNDEF(VT),
15329 MGN->getMemoryVT(), MGN->getMemOperand(),
15330 ISD::UNINDEXED, ISD::NON_EXTLOAD);
15331 SDValue Shuffle =
15332 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
15333 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
15336 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
15337 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
15338 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
15339 SmallVector<SDValue> NewIndices;
15340 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
15341 NewIndices.push_back(Index.getOperand(i));
15342 EVT IndexVT = Index.getValueType()
15343 .getHalfNumVectorElementsVT(*DAG.getContext());
15344 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
15346 unsigned ElementSize = VT.getScalarStoreSize();
15347 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
15348 auto EltCnt = VT.getVectorElementCount();
15349 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
15350 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
15351 EltCnt.divideCoefficientBy(2));
15352 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
15353 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
15354 EltCnt.divideCoefficientBy(2));
15355 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
15357 SDValue Gather =
15358 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
15359 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
15360 Index, ScaleOp},
15361 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
15362 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
15363 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
15365 break;
15367 case ISD::MSCATTER:{
15368 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
15369 SDValue Index = MSN->getIndex();
15370 SDValue ScaleOp = MSN->getScale();
15371 ISD::MemIndexType IndexType = MSN->getIndexType();
15372 assert(!MSN->isIndexScaled() &&
15373 "Scaled gather/scatter should not be formed");
15375 SDLoc DL(N);
15376 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
15377 return DAG.getMaskedScatter(
15378 N->getVTList(), MSN->getMemoryVT(), DL,
15379 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
15380 Index, ScaleOp},
15381 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
15383 if (narrowIndex(Index, IndexType, DAG))
15384 return DAG.getMaskedScatter(
15385 N->getVTList(), MSN->getMemoryVT(), DL,
15386 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
15387 Index, ScaleOp},
15388 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
15390 EVT VT = MSN->getValue()->getValueType(0);
15391 SmallVector<int> ShuffleMask;
15392 if (!MSN->isTruncatingStore() &&
15393 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
15394 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
15395 DAG.getUNDEF(VT), ShuffleMask);
15396 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
15397 DAG.getUNDEF(XLenVT), MSN->getMask(),
15398 MSN->getMemoryVT(), MSN->getMemOperand(),
15399 ISD::UNINDEXED, false);
15401 break;
15403 case ISD::VP_GATHER: {
15404 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
15405 SDValue Index = VPGN->getIndex();
15406 SDValue ScaleOp = VPGN->getScale();
15407 ISD::MemIndexType IndexType = VPGN->getIndexType();
15408 assert(!VPGN->isIndexScaled() &&
15409 "Scaled gather/scatter should not be formed");
15411 SDLoc DL(N);
15412 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
15413 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
15414 {VPGN->getChain(), VPGN->getBasePtr(), Index,
15415 ScaleOp, VPGN->getMask(),
15416 VPGN->getVectorLength()},
15417 VPGN->getMemOperand(), IndexType);
15419 if (narrowIndex(Index, IndexType, DAG))
15420 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
15421 {VPGN->getChain(), VPGN->getBasePtr(), Index,
15422 ScaleOp, VPGN->getMask(),
15423 VPGN->getVectorLength()},
15424 VPGN->getMemOperand(), IndexType);
15426 break;
15428 case ISD::VP_SCATTER: {
15429 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
15430 SDValue Index = VPSN->getIndex();
15431 SDValue ScaleOp = VPSN->getScale();
15432 ISD::MemIndexType IndexType = VPSN->getIndexType();
15433 assert(!VPSN->isIndexScaled() &&
15434 "Scaled gather/scatter should not be formed");
15436 SDLoc DL(N);
15437 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
15438 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
15439 {VPSN->getChain(), VPSN->getValue(),
15440 VPSN->getBasePtr(), Index, ScaleOp,
15441 VPSN->getMask(), VPSN->getVectorLength()},
15442 VPSN->getMemOperand(), IndexType);
15444 if (narrowIndex(Index, IndexType, DAG))
15445 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
15446 {VPSN->getChain(), VPSN->getValue(),
15447 VPSN->getBasePtr(), Index, ScaleOp,
15448 VPSN->getMask(), VPSN->getVectorLength()},
15449 VPSN->getMemOperand(), IndexType);
15450 break;
15452 case RISCVISD::SRA_VL:
15453 case RISCVISD::SRL_VL:
15454 case RISCVISD::SHL_VL: {
15455 SDValue ShAmt = N->getOperand(1);
15456 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
15457 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
15458 SDLoc DL(N);
15459 SDValue VL = N->getOperand(4);
15460 EVT VT = N->getValueType(0);
15461 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
15462 ShAmt.getOperand(1), VL);
15463 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
15464 N->getOperand(2), N->getOperand(3), N->getOperand(4));
15466 break;
15468 case ISD::SRA:
15469 if (SDValue V = performSRACombine(N, DAG, Subtarget))
15470 return V;
15471 [[fallthrough]];
15472 case ISD::SRL:
15473 case ISD::SHL: {
15474 SDValue ShAmt = N->getOperand(1);
15475 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
15476 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
15477 SDLoc DL(N);
15478 EVT VT = N->getValueType(0);
15479 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
15480 ShAmt.getOperand(1),
15481 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
15482 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
15484 break;
15486 case RISCVISD::ADD_VL:
15487 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
15488 return V;
15489 return combineToVWMACC(N, DAG, Subtarget);
15490 case RISCVISD::SUB_VL:
15491 case RISCVISD::VWADD_W_VL:
15492 case RISCVISD::VWADDU_W_VL:
15493 case RISCVISD::VWSUB_W_VL:
15494 case RISCVISD::VWSUBU_W_VL:
15495 case RISCVISD::MUL_VL:
15496 return combineBinOp_VLToVWBinOp_VL(N, DCI);
15497 case RISCVISD::VFMADD_VL:
15498 case RISCVISD::VFNMADD_VL:
15499 case RISCVISD::VFMSUB_VL:
15500 case RISCVISD::VFNMSUB_VL:
15501 case RISCVISD::STRICT_VFMADD_VL:
15502 case RISCVISD::STRICT_VFNMADD_VL:
15503 case RISCVISD::STRICT_VFMSUB_VL:
15504 case RISCVISD::STRICT_VFNMSUB_VL:
15505 return performVFMADD_VLCombine(N, DAG, Subtarget);
15506 case RISCVISD::FMUL_VL:
15507 return performVFMUL_VLCombine(N, DAG, Subtarget);
15508 case RISCVISD::FADD_VL:
15509 case RISCVISD::FSUB_VL:
15510 return performFADDSUB_VLCombine(N, DAG, Subtarget);
15511 case ISD::LOAD:
15512 case ISD::STORE: {
15513 if (DCI.isAfterLegalizeDAG())
15514 if (SDValue V = performMemPairCombine(N, DCI))
15515 return V;
15517 if (N->getOpcode() != ISD::STORE)
15518 break;
15520 auto *Store = cast<StoreSDNode>(N);
15521 SDValue Chain = Store->getChain();
15522 EVT MemVT = Store->getMemoryVT();
15523 SDValue Val = Store->getValue();
15524 SDLoc DL(N);
15526 bool IsScalarizable =
15527 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
15528 Store->isSimple() &&
15529 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
15530 isPowerOf2_64(MemVT.getSizeInBits()) &&
15531 MemVT.getSizeInBits() <= Subtarget.getXLen();
15533 // If sufficiently aligned we can scalarize stores of constant vectors of
15534 // any power-of-two size up to XLen bits, provided that they aren't too
15535 // expensive to materialize.
15536 // vsetivli zero, 2, e8, m1, ta, ma
15537 // vmv.v.i v8, 4
15538 // vse64.v v8, (a0)
15539 // ->
15540 // li a1, 1028
15541 // sh a1, 0(a0)
15542 if (DCI.isBeforeLegalize() && IsScalarizable &&
15543 ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
15544 // Get the constant vector bits
15545 APInt NewC(Val.getValueSizeInBits(), 0);
15546 uint64_t EltSize = Val.getScalarValueSizeInBits();
15547 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
15548 if (Val.getOperand(i).isUndef())
15549 continue;
15550 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
15551 i * EltSize);
15553 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
15555 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
15556 true) <= 2 &&
15557 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
15558 NewVT, *Store->getMemOperand())) {
15559 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
15560 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
15561 Store->getPointerInfo(), Store->getOriginalAlign(),
15562 Store->getMemOperand()->getFlags());
15566 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
15567 // vsetivli zero, 2, e16, m1, ta, ma
15568 // vle16.v v8, (a0)
15569 // vse16.v v8, (a1)
15570 if (auto *L = dyn_cast<LoadSDNode>(Val);
15571 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
15572 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
15573 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
15574 L->getMemoryVT() == MemVT) {
15575 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
15576 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
15577 NewVT, *Store->getMemOperand()) &&
15578 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
15579 NewVT, *L->getMemOperand())) {
15580 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
15581 L->getPointerInfo(), L->getOriginalAlign(),
15582 L->getMemOperand()->getFlags());
15583 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
15584 Store->getPointerInfo(), Store->getOriginalAlign(),
15585 Store->getMemOperand()->getFlags());
15589 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
15590 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
15591 // any illegal types.
15592 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
15593 (DCI.isAfterLegalizeDAG() &&
15594 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15595 isNullConstant(Val.getOperand(1)))) {
15596 SDValue Src = Val.getOperand(0);
15597 MVT VecVT = Src.getSimpleValueType();
15598 // VecVT should be scalable and memory VT should match the element type.
15599 if (!Store->isIndexed() && VecVT.isScalableVector() &&
15600 MemVT == VecVT.getVectorElementType()) {
15601 SDLoc DL(N);
15602 MVT MaskVT = getMaskTypeFor(VecVT);
15603 return DAG.getStoreVP(
15604 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
15605 DAG.getConstant(1, DL, MaskVT),
15606 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
15607 Store->getMemOperand(), Store->getAddressingMode(),
15608 Store->isTruncatingStore(), /*IsCompress*/ false);
15612 break;
15614 case ISD::SPLAT_VECTOR: {
15615 EVT VT = N->getValueType(0);
15616 // Only perform this combine on legal MVT types.
15617 if (!isTypeLegal(VT))
15618 break;
15619 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
15620 DAG, Subtarget))
15621 return Gather;
15622 break;
15624 case ISD::BUILD_VECTOR:
15625 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
15626 return V;
15627 break;
15628 case ISD::CONCAT_VECTORS:
15629 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
15630 return V;
15631 break;
15632 case ISD::INSERT_VECTOR_ELT:
15633 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
15634 return V;
15635 break;
15636 case RISCVISD::VFMV_V_F_VL: {
15637 const MVT VT = N->getSimpleValueType(0);
15638 SDValue Passthru = N->getOperand(0);
15639 SDValue Scalar = N->getOperand(1);
15640 SDValue VL = N->getOperand(2);
15642 // If VL is 1, we can use vfmv.s.f.
15643 if (isOneConstant(VL))
15644 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
15645 break;
15647 case RISCVISD::VMV_V_X_VL: {
15648 const MVT VT = N->getSimpleValueType(0);
15649 SDValue Passthru = N->getOperand(0);
15650 SDValue Scalar = N->getOperand(1);
15651 SDValue VL = N->getOperand(2);
15653 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
15654 // scalar input.
15655 unsigned ScalarSize = Scalar.getValueSizeInBits();
15656 unsigned EltWidth = VT.getScalarSizeInBits();
15657 if (ScalarSize > EltWidth && Passthru.isUndef())
15658 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
15659 return SDValue(N, 0);
15661 // If VL is 1 and the scalar value won't benefit from immediate, we can
15662 // use vmv.s.x.
15663 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
15664 if (isOneConstant(VL) &&
15665 (!Const || Const->isZero() ||
15666 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
15667 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
15669 break;
15671 case RISCVISD::VFMV_S_F_VL: {
15672 SDValue Src = N->getOperand(1);
15673 // Try to remove vector->scalar->vector if the scalar->vector is inserting
15674 // into an undef vector.
15675 // TODO: Could use a vslide or vmv.v.v for non-undef.
15676 if (N->getOperand(0).isUndef() &&
15677 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15678 isNullConstant(Src.getOperand(1)) &&
15679 Src.getOperand(0).getValueType().isScalableVector()) {
15680 EVT VT = N->getValueType(0);
15681 EVT SrcVT = Src.getOperand(0).getValueType();
15682 assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
15683 // Widths match, just return the original vector.
15684 if (SrcVT == VT)
15685 return Src.getOperand(0);
15686 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
15688 [[fallthrough]];
15690 case RISCVISD::VMV_S_X_VL: {
15691 const MVT VT = N->getSimpleValueType(0);
15692 SDValue Passthru = N->getOperand(0);
15693 SDValue Scalar = N->getOperand(1);
15694 SDValue VL = N->getOperand(2);
15696 // Use M1 or smaller to avoid over constraining register allocation
15697 const MVT M1VT = getLMUL1VT(VT);
15698 if (M1VT.bitsLT(VT)) {
15699 SDValue M1Passthru =
15700 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
15701 DAG.getVectorIdxConstant(0, DL));
15702 SDValue Result =
15703 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
15704 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
15705 DAG.getConstant(0, DL, XLenVT));
15706 return Result;
15709 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
15710 // higher would involve overly constraining the register allocator for
15711 // no purpose.
15712 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
15713 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
15714 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
15715 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
15717 break;
15719 case ISD::INTRINSIC_VOID:
15720 case ISD::INTRINSIC_W_CHAIN:
15721 case ISD::INTRINSIC_WO_CHAIN: {
15722 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
15723 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
15724 switch (IntNo) {
15725 // By default we do not combine any intrinsic.
15726 default:
15727 return SDValue();
15728 case Intrinsic::riscv_masked_strided_load: {
15729 MVT VT = N->getSimpleValueType(0);
15730 auto *Load = cast<MemIntrinsicSDNode>(N);
15731 SDValue PassThru = N->getOperand(2);
15732 SDValue Base = N->getOperand(3);
15733 SDValue Stride = N->getOperand(4);
15734 SDValue Mask = N->getOperand(5);
15736 // If the stride is equal to the element size in bytes, we can use
15737 // a masked.load.
15738 const unsigned ElementSize = VT.getScalarStoreSize();
15739 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
15740 StrideC && StrideC->getZExtValue() == ElementSize)
15741 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
15742 DAG.getUNDEF(XLenVT), Mask, PassThru,
15743 Load->getMemoryVT(), Load->getMemOperand(),
15744 ISD::UNINDEXED, ISD::NON_EXTLOAD);
15745 return SDValue();
15747 case Intrinsic::riscv_masked_strided_store: {
15748 auto *Store = cast<MemIntrinsicSDNode>(N);
15749 SDValue Value = N->getOperand(2);
15750 SDValue Base = N->getOperand(3);
15751 SDValue Stride = N->getOperand(4);
15752 SDValue Mask = N->getOperand(5);
15754 // If the stride is equal to the element size in bytes, we can use
15755 // a masked.store.
15756 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
15757 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
15758 StrideC && StrideC->getZExtValue() == ElementSize)
15759 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
15760 DAG.getUNDEF(XLenVT), Mask,
15761 Store->getMemoryVT(), Store->getMemOperand(),
15762 ISD::UNINDEXED, false);
15763 return SDValue();
15765 case Intrinsic::riscv_vcpop:
15766 case Intrinsic::riscv_vcpop_mask:
15767 case Intrinsic::riscv_vfirst:
15768 case Intrinsic::riscv_vfirst_mask: {
15769 SDValue VL = N->getOperand(2);
15770 if (IntNo == Intrinsic::riscv_vcpop_mask ||
15771 IntNo == Intrinsic::riscv_vfirst_mask)
15772 VL = N->getOperand(3);
15773 if (!isNullConstant(VL))
15774 return SDValue();
15775 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
15776 SDLoc DL(N);
15777 EVT VT = N->getValueType(0);
15778 if (IntNo == Intrinsic::riscv_vfirst ||
15779 IntNo == Intrinsic::riscv_vfirst_mask)
15780 return DAG.getConstant(-1, DL, VT);
15781 return DAG.getConstant(0, DL, VT);
15785 case ISD::BITCAST: {
15786 assert(Subtarget.useRVVForFixedLengthVectors());
15787 SDValue N0 = N->getOperand(0);
15788 EVT VT = N->getValueType(0);
15789 EVT SrcVT = N0.getValueType();
15790 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
15791 // type, widen both sides to avoid a trip through memory.
15792 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
15793 VT.isScalarInteger()) {
15794 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
15795 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
15796 Ops[0] = N0;
15797 SDLoc DL(N);
15798 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
15799 N0 = DAG.getBitcast(MVT::i8, N0);
15800 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
15803 return SDValue();
15807 return SDValue();
15810 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
15811 EVT XVT, unsigned KeptBits) const {
15812 // For vectors, we don't have a preference..
15813 if (XVT.isVector())
15814 return false;
15816 if (XVT != MVT::i32 && XVT != MVT::i64)
15817 return false;
15819 // We can use sext.w for RV64 or an srai 31 on RV32.
15820 if (KeptBits == 32 || KeptBits == 64)
15821 return true;
15823 // With Zbb we can use sext.h/sext.b.
15824 return Subtarget.hasStdExtZbb() &&
15825 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
15826 KeptBits == 16);
15829 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
15830 const SDNode *N, CombineLevel Level) const {
15831 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
15832 N->getOpcode() == ISD::SRL) &&
15833 "Expected shift op");
15835 // The following folds are only desirable if `(OP _, c1 << c2)` can be
15836 // materialised in fewer instructions than `(OP _, c1)`:
15838 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
15839 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
15840 SDValue N0 = N->getOperand(0);
15841 EVT Ty = N0.getValueType();
15842 if (Ty.isScalarInteger() &&
15843 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
15844 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15845 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15846 if (C1 && C2) {
15847 const APInt &C1Int = C1->getAPIntValue();
15848 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
15850 // We can materialise `c1 << c2` into an add immediate, so it's "free",
15851 // and the combine should happen, to potentially allow further combines
15852 // later.
15853 if (ShiftedC1Int.getSignificantBits() <= 64 &&
15854 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
15855 return true;
15857 // We can materialise `c1` in an add immediate, so it's "free", and the
15858 // combine should be prevented.
15859 if (C1Int.getSignificantBits() <= 64 &&
15860 isLegalAddImmediate(C1Int.getSExtValue()))
15861 return false;
15863 // Neither constant will fit into an immediate, so find materialisation
15864 // costs.
15865 int C1Cost =
15866 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
15867 /*CompressionCost*/ true);
15868 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
15869 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
15870 /*CompressionCost*/ true);
15872 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
15873 // combine should be prevented.
15874 if (C1Cost < ShiftedC1Cost)
15875 return false;
15878 return true;
15881 bool RISCVTargetLowering::targetShrinkDemandedConstant(
15882 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
15883 TargetLoweringOpt &TLO) const {
15884 // Delay this optimization as late as possible.
15885 if (!TLO.LegalOps)
15886 return false;
15888 EVT VT = Op.getValueType();
15889 if (VT.isVector())
15890 return false;
15892 unsigned Opcode = Op.getOpcode();
15893 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
15894 return false;
15896 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15897 if (!C)
15898 return false;
15900 const APInt &Mask = C->getAPIntValue();
15902 // Clear all non-demanded bits initially.
15903 APInt ShrunkMask = Mask & DemandedBits;
15905 // Try to make a smaller immediate by setting undemanded bits.
15907 APInt ExpandedMask = Mask | ~DemandedBits;
15909 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
15910 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
15912 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
15913 if (NewMask == Mask)
15914 return true;
15915 SDLoc DL(Op);
15916 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
15917 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
15918 Op.getOperand(0), NewC);
15919 return TLO.CombineTo(Op, NewOp);
15922 // If the shrunk mask fits in sign extended 12 bits, let the target
15923 // independent code apply it.
15924 if (ShrunkMask.isSignedIntN(12))
15925 return false;
15927 // And has a few special cases for zext.
15928 if (Opcode == ISD::AND) {
15929 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
15930 // otherwise use SLLI + SRLI.
15931 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
15932 if (IsLegalMask(NewMask))
15933 return UseMask(NewMask);
15935 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
15936 if (VT == MVT::i64) {
15937 APInt NewMask = APInt(64, 0xffffffff);
15938 if (IsLegalMask(NewMask))
15939 return UseMask(NewMask);
15943 // For the remaining optimizations, we need to be able to make a negative
15944 // number through a combination of mask and undemanded bits.
15945 if (!ExpandedMask.isNegative())
15946 return false;
15948 // What is the fewest number of bits we need to represent the negative number.
15949 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
15951 // Try to make a 12 bit negative immediate. If that fails try to make a 32
15952 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
15953 // If we can't create a simm12, we shouldn't change opaque constants.
15954 APInt NewMask = ShrunkMask;
15955 if (MinSignedBits <= 12)
15956 NewMask.setBitsFrom(11);
15957 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
15958 NewMask.setBitsFrom(31);
15959 else
15960 return false;
15962 // Check that our new mask is a subset of the demanded mask.
15963 assert(IsLegalMask(NewMask));
15964 return UseMask(NewMask);
15967 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
15968 static const uint64_t GREVMasks[] = {
15969 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
15970 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
15972 for (unsigned Stage = 0; Stage != 6; ++Stage) {
15973 unsigned Shift = 1 << Stage;
15974 if (ShAmt & Shift) {
15975 uint64_t Mask = GREVMasks[Stage];
15976 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
15977 if (IsGORC)
15978 Res |= x;
15979 x = Res;
15983 return x;
15986 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15987 KnownBits &Known,
15988 const APInt &DemandedElts,
15989 const SelectionDAG &DAG,
15990 unsigned Depth) const {
15991 unsigned BitWidth = Known.getBitWidth();
15992 unsigned Opc = Op.getOpcode();
15993 assert((Opc >= ISD::BUILTIN_OP_END ||
15994 Opc == ISD::INTRINSIC_WO_CHAIN ||
15995 Opc == ISD::INTRINSIC_W_CHAIN ||
15996 Opc == ISD::INTRINSIC_VOID) &&
15997 "Should use MaskedValueIsZero if you don't know whether Op"
15998 " is a target node!");
16000 Known.resetAll();
16001 switch (Opc) {
16002 default: break;
16003 case RISCVISD::SELECT_CC: {
16004 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
16005 // If we don't know any bits, early out.
16006 if (Known.isUnknown())
16007 break;
16008 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
16010 // Only known if known in both the LHS and RHS.
16011 Known = Known.intersectWith(Known2);
16012 break;
16014 case RISCVISD::CZERO_EQZ:
16015 case RISCVISD::CZERO_NEZ:
16016 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16017 // Result is either all zero or operand 0. We can propagate zeros, but not
16018 // ones.
16019 Known.One.clearAllBits();
16020 break;
16021 case RISCVISD::REMUW: {
16022 KnownBits Known2;
16023 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16024 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16025 // We only care about the lower 32 bits.
16026 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
16027 // Restore the original width by sign extending.
16028 Known = Known.sext(BitWidth);
16029 break;
16031 case RISCVISD::DIVUW: {
16032 KnownBits Known2;
16033 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16034 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16035 // We only care about the lower 32 bits.
16036 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
16037 // Restore the original width by sign extending.
16038 Known = Known.sext(BitWidth);
16039 break;
16041 case RISCVISD::SLLW: {
16042 KnownBits Known2;
16043 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16044 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16045 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
16046 // Restore the original width by sign extending.
16047 Known = Known.sext(BitWidth);
16048 break;
16050 case RISCVISD::CTZW: {
16051 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16052 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
16053 unsigned LowBits = llvm::bit_width(PossibleTZ);
16054 Known.Zero.setBitsFrom(LowBits);
16055 break;
16057 case RISCVISD::CLZW: {
16058 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16059 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
16060 unsigned LowBits = llvm::bit_width(PossibleLZ);
16061 Known.Zero.setBitsFrom(LowBits);
16062 break;
16064 case RISCVISD::BREV8:
16065 case RISCVISD::ORC_B: {
16066 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
16067 // control value of 7 is equivalent to brev8 and orc.b.
16068 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16069 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
16070 // To compute zeros, we need to invert the value and invert it back after.
16071 Known.Zero =
16072 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
16073 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
16074 break;
16076 case RISCVISD::READ_VLENB: {
16077 // We can use the minimum and maximum VLEN values to bound VLENB. We
16078 // know VLEN must be a power of two.
16079 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
16080 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
16081 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
16082 Known.Zero.setLowBits(Log2_32(MinVLenB));
16083 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
16084 if (MaxVLenB == MinVLenB)
16085 Known.One.setBit(Log2_32(MinVLenB));
16086 break;
16088 case RISCVISD::FCLASS: {
16089 // fclass will only set one of the low 10 bits.
16090 Known.Zero.setBitsFrom(10);
16091 break;
16093 case ISD::INTRINSIC_W_CHAIN:
16094 case ISD::INTRINSIC_WO_CHAIN: {
16095 unsigned IntNo =
16096 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
16097 switch (IntNo) {
16098 default:
16099 // We can't do anything for most intrinsics.
16100 break;
16101 case Intrinsic::riscv_vsetvli:
16102 case Intrinsic::riscv_vsetvlimax: {
16103 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
16104 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
16105 RISCVII::VLMUL VLMUL =
16106 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
16107 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
16108 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
16109 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
16110 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
16112 // Result of vsetvli must be not larger than AVL.
16113 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
16114 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
16116 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
16117 if (BitWidth > KnownZeroFirstBit)
16118 Known.Zero.setBitsFrom(KnownZeroFirstBit);
16119 break;
16122 break;
16127 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
16128 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
16129 unsigned Depth) const {
16130 switch (Op.getOpcode()) {
16131 default:
16132 break;
16133 case RISCVISD::SELECT_CC: {
16134 unsigned Tmp =
16135 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
16136 if (Tmp == 1) return 1; // Early out.
16137 unsigned Tmp2 =
16138 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
16139 return std::min(Tmp, Tmp2);
16141 case RISCVISD::CZERO_EQZ:
16142 case RISCVISD::CZERO_NEZ:
16143 // Output is either all zero or operand 0. We can propagate sign bit count
16144 // from operand 0.
16145 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
16146 case RISCVISD::ABSW: {
16147 // We expand this at isel to negw+max. The result will have 33 sign bits
16148 // if the input has at least 33 sign bits.
16149 unsigned Tmp =
16150 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
16151 if (Tmp < 33) return 1;
16152 return 33;
16154 case RISCVISD::SLLW:
16155 case RISCVISD::SRAW:
16156 case RISCVISD::SRLW:
16157 case RISCVISD::DIVW:
16158 case RISCVISD::DIVUW:
16159 case RISCVISD::REMUW:
16160 case RISCVISD::ROLW:
16161 case RISCVISD::RORW:
16162 case RISCVISD::FCVT_W_RV64:
16163 case RISCVISD::FCVT_WU_RV64:
16164 case RISCVISD::STRICT_FCVT_W_RV64:
16165 case RISCVISD::STRICT_FCVT_WU_RV64:
16166 // TODO: As the result is sign-extended, this is conservatively correct. A
16167 // more precise answer could be calculated for SRAW depending on known
16168 // bits in the shift amount.
16169 return 33;
16170 case RISCVISD::VMV_X_S: {
16171 // The number of sign bits of the scalar result is computed by obtaining the
16172 // element type of the input vector operand, subtracting its width from the
16173 // XLEN, and then adding one (sign bit within the element type). If the
16174 // element type is wider than XLen, the least-significant XLEN bits are
16175 // taken.
16176 unsigned XLen = Subtarget.getXLen();
16177 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
16178 if (EltBits <= XLen)
16179 return XLen - EltBits + 1;
16180 break;
16182 case ISD::INTRINSIC_W_CHAIN: {
16183 unsigned IntNo = Op.getConstantOperandVal(1);
16184 switch (IntNo) {
16185 default:
16186 break;
16187 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
16188 case Intrinsic::riscv_masked_atomicrmw_add_i64:
16189 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
16190 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
16191 case Intrinsic::riscv_masked_atomicrmw_max_i64:
16192 case Intrinsic::riscv_masked_atomicrmw_min_i64:
16193 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
16194 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
16195 case Intrinsic::riscv_masked_cmpxchg_i64:
16196 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
16197 // narrow atomic operation. These are implemented using atomic
16198 // operations at the minimum supported atomicrmw/cmpxchg width whose
16199 // result is then sign extended to XLEN. With +A, the minimum width is
16200 // 32 for both 64 and 32.
16201 assert(Subtarget.getXLen() == 64);
16202 assert(getMinCmpXchgSizeInBits() == 32);
16203 assert(Subtarget.hasStdExtA());
16204 return 33;
16206 break;
16210 return 1;
16213 const Constant *
16214 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
16215 assert(Ld && "Unexpected null LoadSDNode");
16216 if (!ISD::isNormalLoad(Ld))
16217 return nullptr;
16219 SDValue Ptr = Ld->getBasePtr();
16221 // Only constant pools with no offset are supported.
16222 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
16223 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
16224 if (!CNode || CNode->isMachineConstantPoolEntry() ||
16225 CNode->getOffset() != 0)
16226 return nullptr;
16228 return CNode;
16231 // Simple case, LLA.
16232 if (Ptr.getOpcode() == RISCVISD::LLA) {
16233 auto *CNode = GetSupportedConstantPool(Ptr);
16234 if (!CNode || CNode->getTargetFlags() != 0)
16235 return nullptr;
16237 return CNode->getConstVal();
16240 // Look for a HI and ADD_LO pair.
16241 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
16242 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
16243 return nullptr;
16245 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
16246 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
16248 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
16249 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
16250 return nullptr;
16252 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
16253 return nullptr;
16255 return CNodeLo->getConstVal();
16258 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
16259 MachineBasicBlock *BB) {
16260 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
16262 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
16263 // Should the count have wrapped while it was being read, we need to try
16264 // again.
16265 // ...
16266 // read:
16267 // rdcycleh x3 # load high word of cycle
16268 // rdcycle x2 # load low word of cycle
16269 // rdcycleh x4 # load high word of cycle
16270 // bne x3, x4, read # check if high word reads match, otherwise try again
16271 // ...
16273 MachineFunction &MF = *BB->getParent();
16274 const BasicBlock *LLVM_BB = BB->getBasicBlock();
16275 MachineFunction::iterator It = ++BB->getIterator();
16277 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
16278 MF.insert(It, LoopMBB);
16280 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
16281 MF.insert(It, DoneMBB);
16283 // Transfer the remainder of BB and its successor edges to DoneMBB.
16284 DoneMBB->splice(DoneMBB->begin(), BB,
16285 std::next(MachineBasicBlock::iterator(MI)), BB->end());
16286 DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
16288 BB->addSuccessor(LoopMBB);
16290 MachineRegisterInfo &RegInfo = MF.getRegInfo();
16291 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
16292 Register LoReg = MI.getOperand(0).getReg();
16293 Register HiReg = MI.getOperand(1).getReg();
16294 DebugLoc DL = MI.getDebugLoc();
16296 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
16297 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
16298 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
16299 .addReg(RISCV::X0);
16300 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
16301 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
16302 .addReg(RISCV::X0);
16303 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
16304 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
16305 .addReg(RISCV::X0);
16307 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
16308 .addReg(HiReg)
16309 .addReg(ReadAgainReg)
16310 .addMBB(LoopMBB);
16312 LoopMBB->addSuccessor(LoopMBB);
16313 LoopMBB->addSuccessor(DoneMBB);
16315 MI.eraseFromParent();
16317 return DoneMBB;
16320 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
16321 MachineBasicBlock *BB,
16322 const RISCVSubtarget &Subtarget) {
16323 assert((MI.getOpcode() == RISCV::SplitF64Pseudo ||
16324 MI.getOpcode() == RISCV::SplitF64Pseudo_INX) &&
16325 "Unexpected instruction");
16327 MachineFunction &MF = *BB->getParent();
16328 DebugLoc DL = MI.getDebugLoc();
16329 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
16330 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
16331 Register LoReg = MI.getOperand(0).getReg();
16332 Register HiReg = MI.getOperand(1).getReg();
16333 Register SrcReg = MI.getOperand(2).getReg();
16335 const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
16336 ? &RISCV::GPRPF64RegClass
16337 : &RISCV::FPR64RegClass;
16338 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
16340 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
16341 RI, Register());
16342 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
16343 MachineMemOperand *MMOLo =
16344 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
16345 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
16346 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
16347 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
16348 .addFrameIndex(FI)
16349 .addImm(0)
16350 .addMemOperand(MMOLo);
16351 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
16352 .addFrameIndex(FI)
16353 .addImm(4)
16354 .addMemOperand(MMOHi);
16355 MI.eraseFromParent(); // The pseudo instruction is gone now.
16356 return BB;
16359 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
16360 MachineBasicBlock *BB,
16361 const RISCVSubtarget &Subtarget) {
16362 assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo ||
16363 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) &&
16364 "Unexpected instruction");
16366 MachineFunction &MF = *BB->getParent();
16367 DebugLoc DL = MI.getDebugLoc();
16368 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
16369 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
16370 Register DstReg = MI.getOperand(0).getReg();
16371 Register LoReg = MI.getOperand(1).getReg();
16372 Register HiReg = MI.getOperand(2).getReg();
16374 const TargetRegisterClass *DstRC =
16375 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
16376 : &RISCV::FPR64RegClass;
16377 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
16379 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
16380 MachineMemOperand *MMOLo =
16381 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
16382 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
16383 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
16384 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
16385 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
16386 .addFrameIndex(FI)
16387 .addImm(0)
16388 .addMemOperand(MMOLo);
16389 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
16390 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
16391 .addFrameIndex(FI)
16392 .addImm(4)
16393 .addMemOperand(MMOHi);
16394 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
16395 MI.eraseFromParent(); // The pseudo instruction is gone now.
16396 return BB;
16399 static bool isSelectPseudo(MachineInstr &MI) {
16400 switch (MI.getOpcode()) {
16401 default:
16402 return false;
16403 case RISCV::Select_GPR_Using_CC_GPR:
16404 case RISCV::Select_FPR16_Using_CC_GPR:
16405 case RISCV::Select_FPR16INX_Using_CC_GPR:
16406 case RISCV::Select_FPR32_Using_CC_GPR:
16407 case RISCV::Select_FPR32INX_Using_CC_GPR:
16408 case RISCV::Select_FPR64_Using_CC_GPR:
16409 case RISCV::Select_FPR64INX_Using_CC_GPR:
16410 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
16411 return true;
16415 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
16416 unsigned RelOpcode, unsigned EqOpcode,
16417 const RISCVSubtarget &Subtarget) {
16418 DebugLoc DL = MI.getDebugLoc();
16419 Register DstReg = MI.getOperand(0).getReg();
16420 Register Src1Reg = MI.getOperand(1).getReg();
16421 Register Src2Reg = MI.getOperand(2).getReg();
16422 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16423 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16424 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16426 // Save the current FFLAGS.
16427 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
16429 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
16430 .addReg(Src1Reg)
16431 .addReg(Src2Reg);
16432 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16433 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16435 // Restore the FFLAGS.
16436 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
16437 .addReg(SavedFFlags, RegState::Kill);
16439 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
16440 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
16441 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
16442 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
16443 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16444 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
16446 // Erase the pseudoinstruction.
16447 MI.eraseFromParent();
16448 return BB;
16451 static MachineBasicBlock *
16452 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
16453 MachineBasicBlock *ThisMBB,
16454 const RISCVSubtarget &Subtarget) {
16455 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
16456 // Without this, custom-inserter would have generated:
16458 // A
16459 // | \
16460 // | B
16461 // | /
16462 // C
16463 // | \
16464 // | D
16465 // | /
16466 // E
16468 // A: X = ...; Y = ...
16469 // B: empty
16470 // C: Z = PHI [X, A], [Y, B]
16471 // D: empty
16472 // E: PHI [X, C], [Z, D]
16474 // If we lower both Select_FPRX_ in a single step, we can instead generate:
16476 // A
16477 // | \
16478 // | C
16479 // | /|
16480 // |/ |
16481 // | |
16482 // | D
16483 // | /
16484 // E
16486 // A: X = ...; Y = ...
16487 // D: empty
16488 // E: PHI [X, A], [X, C], [Y, D]
16490 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16491 const DebugLoc &DL = First.getDebugLoc();
16492 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
16493 MachineFunction *F = ThisMBB->getParent();
16494 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
16495 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
16496 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
16497 MachineFunction::iterator It = ++ThisMBB->getIterator();
16498 F->insert(It, FirstMBB);
16499 F->insert(It, SecondMBB);
16500 F->insert(It, SinkMBB);
16502 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
16503 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
16504 std::next(MachineBasicBlock::iterator(First)),
16505 ThisMBB->end());
16506 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
16508 // Fallthrough block for ThisMBB.
16509 ThisMBB->addSuccessor(FirstMBB);
16510 // Fallthrough block for FirstMBB.
16511 FirstMBB->addSuccessor(SecondMBB);
16512 ThisMBB->addSuccessor(SinkMBB);
16513 FirstMBB->addSuccessor(SinkMBB);
16514 // This is fallthrough.
16515 SecondMBB->addSuccessor(SinkMBB);
16517 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
16518 Register FLHS = First.getOperand(1).getReg();
16519 Register FRHS = First.getOperand(2).getReg();
16520 // Insert appropriate branch.
16521 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
16522 .addReg(FLHS)
16523 .addReg(FRHS)
16524 .addMBB(SinkMBB);
16526 Register SLHS = Second.getOperand(1).getReg();
16527 Register SRHS = Second.getOperand(2).getReg();
16528 Register Op1Reg4 = First.getOperand(4).getReg();
16529 Register Op1Reg5 = First.getOperand(5).getReg();
16531 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
16532 // Insert appropriate branch.
16533 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
16534 .addReg(SLHS)
16535 .addReg(SRHS)
16536 .addMBB(SinkMBB);
16538 Register DestReg = Second.getOperand(0).getReg();
16539 Register Op2Reg4 = Second.getOperand(4).getReg();
16540 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
16541 .addReg(Op2Reg4)
16542 .addMBB(ThisMBB)
16543 .addReg(Op1Reg4)
16544 .addMBB(FirstMBB)
16545 .addReg(Op1Reg5)
16546 .addMBB(SecondMBB);
16548 // Now remove the Select_FPRX_s.
16549 First.eraseFromParent();
16550 Second.eraseFromParent();
16551 return SinkMBB;
16554 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
16555 MachineBasicBlock *BB,
16556 const RISCVSubtarget &Subtarget) {
16557 // To "insert" Select_* instructions, we actually have to insert the triangle
16558 // control-flow pattern. The incoming instructions know the destination vreg
16559 // to set, the condition code register to branch on, the true/false values to
16560 // select between, and the condcode to use to select the appropriate branch.
16562 // We produce the following control flow:
16563 // HeadMBB
16564 // | \
16565 // | IfFalseMBB
16566 // | /
16567 // TailMBB
16569 // When we find a sequence of selects we attempt to optimize their emission
16570 // by sharing the control flow. Currently we only handle cases where we have
16571 // multiple selects with the exact same condition (same LHS, RHS and CC).
16572 // The selects may be interleaved with other instructions if the other
16573 // instructions meet some requirements we deem safe:
16574 // - They are not pseudo instructions.
16575 // - They are debug instructions. Otherwise,
16576 // - They do not have side-effects, do not access memory and their inputs do
16577 // not depend on the results of the select pseudo-instructions.
16578 // The TrueV/FalseV operands of the selects cannot depend on the result of
16579 // previous selects in the sequence.
16580 // These conditions could be further relaxed. See the X86 target for a
16581 // related approach and more information.
16583 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
16584 // is checked here and handled by a separate function -
16585 // EmitLoweredCascadedSelect.
16586 Register LHS = MI.getOperand(1).getReg();
16587 Register RHS = MI.getOperand(2).getReg();
16588 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
16590 SmallVector<MachineInstr *, 4> SelectDebugValues;
16591 SmallSet<Register, 4> SelectDests;
16592 SelectDests.insert(MI.getOperand(0).getReg());
16594 MachineInstr *LastSelectPseudo = &MI;
16595 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
16596 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
16597 Next->getOpcode() == MI.getOpcode() &&
16598 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
16599 Next->getOperand(5).isKill()) {
16600 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
16603 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
16604 SequenceMBBI != E; ++SequenceMBBI) {
16605 if (SequenceMBBI->isDebugInstr())
16606 continue;
16607 if (isSelectPseudo(*SequenceMBBI)) {
16608 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
16609 SequenceMBBI->getOperand(2).getReg() != RHS ||
16610 SequenceMBBI->getOperand(3).getImm() != CC ||
16611 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
16612 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
16613 break;
16614 LastSelectPseudo = &*SequenceMBBI;
16615 SequenceMBBI->collectDebugValues(SelectDebugValues);
16616 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
16617 continue;
16619 if (SequenceMBBI->hasUnmodeledSideEffects() ||
16620 SequenceMBBI->mayLoadOrStore() ||
16621 SequenceMBBI->usesCustomInsertionHook())
16622 break;
16623 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
16624 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
16626 break;
16629 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16630 const BasicBlock *LLVM_BB = BB->getBasicBlock();
16631 DebugLoc DL = MI.getDebugLoc();
16632 MachineFunction::iterator I = ++BB->getIterator();
16634 MachineBasicBlock *HeadMBB = BB;
16635 MachineFunction *F = BB->getParent();
16636 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
16637 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
16639 F->insert(I, IfFalseMBB);
16640 F->insert(I, TailMBB);
16642 // Transfer debug instructions associated with the selects to TailMBB.
16643 for (MachineInstr *DebugInstr : SelectDebugValues) {
16644 TailMBB->push_back(DebugInstr->removeFromParent());
16647 // Move all instructions after the sequence to TailMBB.
16648 TailMBB->splice(TailMBB->end(), HeadMBB,
16649 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
16650 // Update machine-CFG edges by transferring all successors of the current
16651 // block to the new block which will contain the Phi nodes for the selects.
16652 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
16653 // Set the successors for HeadMBB.
16654 HeadMBB->addSuccessor(IfFalseMBB);
16655 HeadMBB->addSuccessor(TailMBB);
16657 // Insert appropriate branch.
16658 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
16659 .addReg(LHS)
16660 .addReg(RHS)
16661 .addMBB(TailMBB);
16663 // IfFalseMBB just falls through to TailMBB.
16664 IfFalseMBB->addSuccessor(TailMBB);
16666 // Create PHIs for all of the select pseudo-instructions.
16667 auto SelectMBBI = MI.getIterator();
16668 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
16669 auto InsertionPoint = TailMBB->begin();
16670 while (SelectMBBI != SelectEnd) {
16671 auto Next = std::next(SelectMBBI);
16672 if (isSelectPseudo(*SelectMBBI)) {
16673 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
16674 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
16675 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
16676 .addReg(SelectMBBI->getOperand(4).getReg())
16677 .addMBB(HeadMBB)
16678 .addReg(SelectMBBI->getOperand(5).getReg())
16679 .addMBB(IfFalseMBB);
16680 SelectMBBI->eraseFromParent();
16682 SelectMBBI = Next;
16685 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
16686 return TailMBB;
16689 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
16690 MachineBasicBlock *BB,
16691 unsigned CVTXOpc,
16692 unsigned CVTFOpc) {
16693 DebugLoc DL = MI.getDebugLoc();
16695 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16697 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16698 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16700 // Save the old value of FFLAGS.
16701 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
16703 assert(MI.getNumOperands() == 7);
16705 // Emit a VFCVT_X_F
16706 const TargetRegisterInfo *TRI =
16707 BB->getParent()->getSubtarget().getRegisterInfo();
16708 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
16709 Register Tmp = MRI.createVirtualRegister(RC);
16710 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
16711 .add(MI.getOperand(1))
16712 .add(MI.getOperand(2))
16713 .add(MI.getOperand(3))
16714 .add(MachineOperand::CreateImm(7)) // frm = DYN
16715 .add(MI.getOperand(4))
16716 .add(MI.getOperand(5))
16717 .add(MI.getOperand(6))
16718 .add(MachineOperand::CreateReg(RISCV::FRM,
16719 /*IsDef*/ false,
16720 /*IsImp*/ true));
16722 // Emit a VFCVT_F_X
16723 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
16724 .add(MI.getOperand(0))
16725 .add(MI.getOperand(1))
16726 .addReg(Tmp)
16727 .add(MI.getOperand(3))
16728 .add(MachineOperand::CreateImm(7)) // frm = DYN
16729 .add(MI.getOperand(4))
16730 .add(MI.getOperand(5))
16731 .add(MI.getOperand(6))
16732 .add(MachineOperand::CreateReg(RISCV::FRM,
16733 /*IsDef*/ false,
16734 /*IsImp*/ true));
16736 // Restore FFLAGS.
16737 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
16738 .addReg(SavedFFLAGS, RegState::Kill);
16740 // Erase the pseudoinstruction.
16741 MI.eraseFromParent();
16742 return BB;
16745 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
16746 const RISCVSubtarget &Subtarget) {
16747 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
16748 const TargetRegisterClass *RC;
16749 switch (MI.getOpcode()) {
16750 default:
16751 llvm_unreachable("Unexpected opcode");
16752 case RISCV::PseudoFROUND_H:
16753 CmpOpc = RISCV::FLT_H;
16754 F2IOpc = RISCV::FCVT_W_H;
16755 I2FOpc = RISCV::FCVT_H_W;
16756 FSGNJOpc = RISCV::FSGNJ_H;
16757 FSGNJXOpc = RISCV::FSGNJX_H;
16758 RC = &RISCV::FPR16RegClass;
16759 break;
16760 case RISCV::PseudoFROUND_H_INX:
16761 CmpOpc = RISCV::FLT_H_INX;
16762 F2IOpc = RISCV::FCVT_W_H_INX;
16763 I2FOpc = RISCV::FCVT_H_W_INX;
16764 FSGNJOpc = RISCV::FSGNJ_H_INX;
16765 FSGNJXOpc = RISCV::FSGNJX_H_INX;
16766 RC = &RISCV::GPRF16RegClass;
16767 break;
16768 case RISCV::PseudoFROUND_S:
16769 CmpOpc = RISCV::FLT_S;
16770 F2IOpc = RISCV::FCVT_W_S;
16771 I2FOpc = RISCV::FCVT_S_W;
16772 FSGNJOpc = RISCV::FSGNJ_S;
16773 FSGNJXOpc = RISCV::FSGNJX_S;
16774 RC = &RISCV::FPR32RegClass;
16775 break;
16776 case RISCV::PseudoFROUND_S_INX:
16777 CmpOpc = RISCV::FLT_S_INX;
16778 F2IOpc = RISCV::FCVT_W_S_INX;
16779 I2FOpc = RISCV::FCVT_S_W_INX;
16780 FSGNJOpc = RISCV::FSGNJ_S_INX;
16781 FSGNJXOpc = RISCV::FSGNJX_S_INX;
16782 RC = &RISCV::GPRF32RegClass;
16783 break;
16784 case RISCV::PseudoFROUND_D:
16785 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
16786 CmpOpc = RISCV::FLT_D;
16787 F2IOpc = RISCV::FCVT_L_D;
16788 I2FOpc = RISCV::FCVT_D_L;
16789 FSGNJOpc = RISCV::FSGNJ_D;
16790 FSGNJXOpc = RISCV::FSGNJX_D;
16791 RC = &RISCV::FPR64RegClass;
16792 break;
16793 case RISCV::PseudoFROUND_D_INX:
16794 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
16795 CmpOpc = RISCV::FLT_D_INX;
16796 F2IOpc = RISCV::FCVT_L_D_INX;
16797 I2FOpc = RISCV::FCVT_D_L_INX;
16798 FSGNJOpc = RISCV::FSGNJ_D_INX;
16799 FSGNJXOpc = RISCV::FSGNJX_D_INX;
16800 RC = &RISCV::GPRRegClass;
16801 break;
16804 const BasicBlock *BB = MBB->getBasicBlock();
16805 DebugLoc DL = MI.getDebugLoc();
16806 MachineFunction::iterator I = ++MBB->getIterator();
16808 MachineFunction *F = MBB->getParent();
16809 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
16810 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
16812 F->insert(I, CvtMBB);
16813 F->insert(I, DoneMBB);
16814 // Move all instructions after the sequence to DoneMBB.
16815 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
16816 MBB->end());
16817 // Update machine-CFG edges by transferring all successors of the current
16818 // block to the new block which will contain the Phi nodes for the selects.
16819 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
16820 // Set the successors for MBB.
16821 MBB->addSuccessor(CvtMBB);
16822 MBB->addSuccessor(DoneMBB);
16824 Register DstReg = MI.getOperand(0).getReg();
16825 Register SrcReg = MI.getOperand(1).getReg();
16826 Register MaxReg = MI.getOperand(2).getReg();
16827 int64_t FRM = MI.getOperand(3).getImm();
16829 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16830 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
16832 Register FabsReg = MRI.createVirtualRegister(RC);
16833 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
16835 // Compare the FP value to the max value.
16836 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16837 auto MIB =
16838 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
16839 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16840 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16842 // Insert branch.
16843 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
16844 .addReg(CmpReg)
16845 .addReg(RISCV::X0)
16846 .addMBB(DoneMBB);
16848 CvtMBB->addSuccessor(DoneMBB);
16850 // Convert to integer.
16851 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16852 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
16853 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16854 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16856 // Convert back to FP.
16857 Register I2FReg = MRI.createVirtualRegister(RC);
16858 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
16859 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16860 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16862 // Restore the sign bit.
16863 Register CvtReg = MRI.createVirtualRegister(RC);
16864 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
16866 // Merge the results.
16867 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
16868 .addReg(SrcReg)
16869 .addMBB(MBB)
16870 .addReg(CvtReg)
16871 .addMBB(CvtMBB);
16873 MI.eraseFromParent();
16874 return DoneMBB;
16877 MachineBasicBlock *
16878 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
16879 MachineBasicBlock *BB) const {
16880 switch (MI.getOpcode()) {
16881 default:
16882 llvm_unreachable("Unexpected instr type to insert");
16883 case RISCV::ReadCycleWide:
16884 assert(!Subtarget.is64Bit() &&
16885 "ReadCycleWrite is only to be used on riscv32");
16886 return emitReadCycleWidePseudo(MI, BB);
16887 case RISCV::Select_GPR_Using_CC_GPR:
16888 case RISCV::Select_FPR16_Using_CC_GPR:
16889 case RISCV::Select_FPR16INX_Using_CC_GPR:
16890 case RISCV::Select_FPR32_Using_CC_GPR:
16891 case RISCV::Select_FPR32INX_Using_CC_GPR:
16892 case RISCV::Select_FPR64_Using_CC_GPR:
16893 case RISCV::Select_FPR64INX_Using_CC_GPR:
16894 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
16895 return emitSelectPseudo(MI, BB, Subtarget);
16896 case RISCV::BuildPairF64Pseudo:
16897 case RISCV::BuildPairF64Pseudo_INX:
16898 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
16899 case RISCV::SplitF64Pseudo:
16900 case RISCV::SplitF64Pseudo_INX:
16901 return emitSplitF64Pseudo(MI, BB, Subtarget);
16902 case RISCV::PseudoQuietFLE_H:
16903 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
16904 case RISCV::PseudoQuietFLE_H_INX:
16905 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
16906 case RISCV::PseudoQuietFLT_H:
16907 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
16908 case RISCV::PseudoQuietFLT_H_INX:
16909 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
16910 case RISCV::PseudoQuietFLE_S:
16911 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
16912 case RISCV::PseudoQuietFLE_S_INX:
16913 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
16914 case RISCV::PseudoQuietFLT_S:
16915 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
16916 case RISCV::PseudoQuietFLT_S_INX:
16917 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
16918 case RISCV::PseudoQuietFLE_D:
16919 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
16920 case RISCV::PseudoQuietFLE_D_INX:
16921 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
16922 case RISCV::PseudoQuietFLE_D_IN32X:
16923 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
16924 Subtarget);
16925 case RISCV::PseudoQuietFLT_D:
16926 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
16927 case RISCV::PseudoQuietFLT_D_INX:
16928 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
16929 case RISCV::PseudoQuietFLT_D_IN32X:
16930 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
16931 Subtarget);
16933 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
16934 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
16935 RISCV::PseudoVFCVT_F_X_V_M1_MASK);
16936 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
16937 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
16938 RISCV::PseudoVFCVT_F_X_V_M2_MASK);
16939 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
16940 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
16941 RISCV::PseudoVFCVT_F_X_V_M4_MASK);
16942 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
16943 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
16944 RISCV::PseudoVFCVT_F_X_V_M8_MASK);
16945 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
16946 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
16947 RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
16948 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
16949 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
16950 RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
16951 case RISCV::PseudoFROUND_H:
16952 case RISCV::PseudoFROUND_H_INX:
16953 case RISCV::PseudoFROUND_S:
16954 case RISCV::PseudoFROUND_S_INX:
16955 case RISCV::PseudoFROUND_D:
16956 case RISCV::PseudoFROUND_D_INX:
16957 case RISCV::PseudoFROUND_D_IN32X:
16958 return emitFROUND(MI, BB, Subtarget);
16959 case TargetOpcode::STATEPOINT:
16960 case TargetOpcode::STACKMAP:
16961 case TargetOpcode::PATCHPOINT:
16962 if (!Subtarget.is64Bit())
16963 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
16964 "supported on 64-bit targets");
16965 return emitPatchPoint(MI, BB);
16969 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
16970 SDNode *Node) const {
16971 // Add FRM dependency to any instructions with dynamic rounding mode.
16972 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
16973 if (Idx < 0) {
16974 // Vector pseudos have FRM index indicated by TSFlags.
16975 Idx = RISCVII::getFRMOpNum(MI.getDesc());
16976 if (Idx < 0)
16977 return;
16979 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
16980 return;
16981 // If the instruction already reads FRM, don't add another read.
16982 if (MI.readsRegister(RISCV::FRM))
16983 return;
16984 MI.addOperand(
16985 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
16988 // Calling Convention Implementation.
16989 // The expectations for frontend ABI lowering vary from target to target.
16990 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
16991 // details, but this is a longer term goal. For now, we simply try to keep the
16992 // role of the frontend as simple and well-defined as possible. The rules can
16993 // be summarised as:
16994 // * Never split up large scalar arguments. We handle them here.
16995 // * If a hardfloat calling convention is being used, and the struct may be
16996 // passed in a pair of registers (fp+fp, int+fp), and both registers are
16997 // available, then pass as two separate arguments. If either the GPRs or FPRs
16998 // are exhausted, then pass according to the rule below.
16999 // * If a struct could never be passed in registers or directly in a stack
17000 // slot (as it is larger than 2*XLEN and the floating point rules don't
17001 // apply), then pass it using a pointer with the byval attribute.
17002 // * If a struct is less than 2*XLEN, then coerce to either a two-element
17003 // word-sized array or a 2*XLEN scalar (depending on alignment).
17004 // * The frontend can determine whether a struct is returned by reference or
17005 // not based on its size and fields. If it will be returned by reference, the
17006 // frontend must modify the prototype so a pointer with the sret annotation is
17007 // passed as the first argument. This is not necessary for large scalar
17008 // returns.
17009 // * Struct return values and varargs should be coerced to structs containing
17010 // register-size fields in the same situations they would be for fixed
17011 // arguments.
17013 static const MCPhysReg ArgFPR16s[] = {
17014 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
17015 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
17017 static const MCPhysReg ArgFPR32s[] = {
17018 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
17019 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
17021 static const MCPhysReg ArgFPR64s[] = {
17022 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
17023 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
17025 // This is an interim calling convention and it may be changed in the future.
17026 static const MCPhysReg ArgVRs[] = {
17027 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
17028 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
17029 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
17030 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
17031 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
17032 RISCV::V20M2, RISCV::V22M2};
17033 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
17034 RISCV::V20M4};
17035 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
17037 ArrayRef<MCPhysReg> RISCV::getArgGPRs() {
17038 static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
17039 RISCV::X13, RISCV::X14, RISCV::X15,
17040 RISCV::X16, RISCV::X17};
17042 return ArrayRef(ArgGPRs);
17045 // Pass a 2*XLEN argument that has been split into two XLEN values through
17046 // registers or the stack as necessary.
17047 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
17048 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
17049 MVT ValVT2, MVT LocVT2,
17050 ISD::ArgFlagsTy ArgFlags2) {
17051 unsigned XLenInBytes = XLen / 8;
17052 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
17053 if (Register Reg = State.AllocateReg(ArgGPRs)) {
17054 // At least one half can be passed via register.
17055 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
17056 VA1.getLocVT(), CCValAssign::Full));
17057 } else {
17058 // Both halves must be passed on the stack, with proper alignment.
17059 Align StackAlign =
17060 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
17061 State.addLoc(
17062 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
17063 State.AllocateStack(XLenInBytes, StackAlign),
17064 VA1.getLocVT(), CCValAssign::Full));
17065 State.addLoc(CCValAssign::getMem(
17066 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
17067 LocVT2, CCValAssign::Full));
17068 return false;
17071 if (Register Reg = State.AllocateReg(ArgGPRs)) {
17072 // The second half can also be passed via register.
17073 State.addLoc(
17074 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
17075 } else {
17076 // The second half is passed via the stack, without additional alignment.
17077 State.addLoc(CCValAssign::getMem(
17078 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
17079 LocVT2, CCValAssign::Full));
17082 return false;
17085 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
17086 std::optional<unsigned> FirstMaskArgument,
17087 CCState &State, const RISCVTargetLowering &TLI) {
17088 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
17089 if (RC == &RISCV::VRRegClass) {
17090 // Assign the first mask argument to V0.
17091 // This is an interim calling convention and it may be changed in the
17092 // future.
17093 if (FirstMaskArgument && ValNo == *FirstMaskArgument)
17094 return State.AllocateReg(RISCV::V0);
17095 return State.AllocateReg(ArgVRs);
17097 if (RC == &RISCV::VRM2RegClass)
17098 return State.AllocateReg(ArgVRM2s);
17099 if (RC == &RISCV::VRM4RegClass)
17100 return State.AllocateReg(ArgVRM4s);
17101 if (RC == &RISCV::VRM8RegClass)
17102 return State.AllocateReg(ArgVRM8s);
17103 llvm_unreachable("Unhandled register class for ValueType");
17106 // Implements the RISC-V calling convention. Returns true upon failure.
17107 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
17108 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
17109 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
17110 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
17111 std::optional<unsigned> FirstMaskArgument) {
17112 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
17113 assert(XLen == 32 || XLen == 64);
17114 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
17116 // Static chain parameter must not be passed in normal argument registers,
17117 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
17118 if (ArgFlags.isNest()) {
17119 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
17120 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17121 return false;
17125 // Any return value split in to more than two values can't be returned
17126 // directly. Vectors are returned via the available vector registers.
17127 if (!LocVT.isVector() && IsRet && ValNo > 1)
17128 return true;
17130 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
17131 // variadic argument, or if no F16/F32 argument registers are available.
17132 bool UseGPRForF16_F32 = true;
17133 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
17134 // variadic argument, or if no F64 argument registers are available.
17135 bool UseGPRForF64 = true;
17137 switch (ABI) {
17138 default:
17139 llvm_unreachable("Unexpected ABI");
17140 case RISCVABI::ABI_ILP32:
17141 case RISCVABI::ABI_LP64:
17142 break;
17143 case RISCVABI::ABI_ILP32F:
17144 case RISCVABI::ABI_LP64F:
17145 UseGPRForF16_F32 = !IsFixed;
17146 break;
17147 case RISCVABI::ABI_ILP32D:
17148 case RISCVABI::ABI_LP64D:
17149 UseGPRForF16_F32 = !IsFixed;
17150 UseGPRForF64 = !IsFixed;
17151 break;
17154 // FPR16, FPR32, and FPR64 alias each other.
17155 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
17156 UseGPRForF16_F32 = true;
17157 UseGPRForF64 = true;
17160 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
17161 // similar local variables rather than directly checking against the target
17162 // ABI.
17164 if (UseGPRForF16_F32 &&
17165 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
17166 LocVT = XLenVT;
17167 LocInfo = CCValAssign::BCvt;
17168 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
17169 LocVT = MVT::i64;
17170 LocInfo = CCValAssign::BCvt;
17173 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
17175 // If this is a variadic argument, the RISC-V calling convention requires
17176 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
17177 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
17178 // be used regardless of whether the original argument was split during
17179 // legalisation or not. The argument will not be passed by registers if the
17180 // original type is larger than 2*XLEN, so the register alignment rule does
17181 // not apply.
17182 unsigned TwoXLenInBytes = (2 * XLen) / 8;
17183 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
17184 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
17185 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
17186 // Skip 'odd' register if necessary.
17187 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
17188 State.AllocateReg(ArgGPRs);
17191 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
17192 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
17193 State.getPendingArgFlags();
17195 assert(PendingLocs.size() == PendingArgFlags.size() &&
17196 "PendingLocs and PendingArgFlags out of sync");
17198 // Handle passing f64 on RV32D with a soft float ABI or when floating point
17199 // registers are exhausted.
17200 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
17201 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
17202 // Depending on available argument GPRS, f64 may be passed in a pair of
17203 // GPRs, split between a GPR and the stack, or passed completely on the
17204 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
17205 // cases.
17206 Register Reg = State.AllocateReg(ArgGPRs);
17207 if (!Reg) {
17208 unsigned StackOffset = State.AllocateStack(8, Align(8));
17209 State.addLoc(
17210 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17211 return false;
17213 LocVT = MVT::i32;
17214 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17215 Register HiReg = State.AllocateReg(ArgGPRs);
17216 if (HiReg) {
17217 State.addLoc(
17218 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
17219 } else {
17220 unsigned StackOffset = State.AllocateStack(4, Align(4));
17221 State.addLoc(
17222 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17224 return false;
17227 // Fixed-length vectors are located in the corresponding scalable-vector
17228 // container types.
17229 if (ValVT.isFixedLengthVector())
17230 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
17232 // Split arguments might be passed indirectly, so keep track of the pending
17233 // values. Split vectors are passed via a mix of registers and indirectly, so
17234 // treat them as we would any other argument.
17235 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
17236 LocVT = XLenVT;
17237 LocInfo = CCValAssign::Indirect;
17238 PendingLocs.push_back(
17239 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
17240 PendingArgFlags.push_back(ArgFlags);
17241 if (!ArgFlags.isSplitEnd()) {
17242 return false;
17246 // If the split argument only had two elements, it should be passed directly
17247 // in registers or on the stack.
17248 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
17249 PendingLocs.size() <= 2) {
17250 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
17251 // Apply the normal calling convention rules to the first half of the
17252 // split argument.
17253 CCValAssign VA = PendingLocs[0];
17254 ISD::ArgFlagsTy AF = PendingArgFlags[0];
17255 PendingLocs.clear();
17256 PendingArgFlags.clear();
17257 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
17258 ArgFlags);
17261 // Allocate to a register if possible, or else a stack slot.
17262 Register Reg;
17263 unsigned StoreSizeBytes = XLen / 8;
17264 Align StackAlign = Align(XLen / 8);
17266 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
17267 Reg = State.AllocateReg(ArgFPR16s);
17268 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
17269 Reg = State.AllocateReg(ArgFPR32s);
17270 else if (ValVT == MVT::f64 && !UseGPRForF64)
17271 Reg = State.AllocateReg(ArgFPR64s);
17272 else if (ValVT.isVector()) {
17273 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
17274 if (!Reg) {
17275 // For return values, the vector must be passed fully via registers or
17276 // via the stack.
17277 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
17278 // but we're using all of them.
17279 if (IsRet)
17280 return true;
17281 // Try using a GPR to pass the address
17282 if ((Reg = State.AllocateReg(ArgGPRs))) {
17283 LocVT = XLenVT;
17284 LocInfo = CCValAssign::Indirect;
17285 } else if (ValVT.isScalableVector()) {
17286 LocVT = XLenVT;
17287 LocInfo = CCValAssign::Indirect;
17288 } else {
17289 // Pass fixed-length vectors on the stack.
17290 LocVT = ValVT;
17291 StoreSizeBytes = ValVT.getStoreSize();
17292 // Align vectors to their element sizes, being careful for vXi1
17293 // vectors.
17294 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
17297 } else {
17298 Reg = State.AllocateReg(ArgGPRs);
17301 unsigned StackOffset =
17302 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
17304 // If we reach this point and PendingLocs is non-empty, we must be at the
17305 // end of a split argument that must be passed indirectly.
17306 if (!PendingLocs.empty()) {
17307 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
17308 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
17310 for (auto &It : PendingLocs) {
17311 if (Reg)
17312 It.convertToReg(Reg);
17313 else
17314 It.convertToMem(StackOffset);
17315 State.addLoc(It);
17317 PendingLocs.clear();
17318 PendingArgFlags.clear();
17319 return false;
17322 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
17323 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
17324 "Expected an XLenVT or vector types at this stage");
17326 if (Reg) {
17327 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17328 return false;
17331 // When a scalar floating-point value is passed on the stack, no
17332 // bit-conversion is needed.
17333 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
17334 assert(!ValVT.isVector());
17335 LocVT = ValVT;
17336 LocInfo = CCValAssign::Full;
17338 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17339 return false;
17342 template <typename ArgTy>
17343 static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
17344 for (const auto &ArgIdx : enumerate(Args)) {
17345 MVT ArgVT = ArgIdx.value().VT;
17346 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
17347 return ArgIdx.index();
17349 return std::nullopt;
17352 void RISCVTargetLowering::analyzeInputArgs(
17353 MachineFunction &MF, CCState &CCInfo,
17354 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
17355 RISCVCCAssignFn Fn) const {
17356 unsigned NumArgs = Ins.size();
17357 FunctionType *FType = MF.getFunction().getFunctionType();
17359 std::optional<unsigned> FirstMaskArgument;
17360 if (Subtarget.hasVInstructions())
17361 FirstMaskArgument = preAssignMask(Ins);
17363 for (unsigned i = 0; i != NumArgs; ++i) {
17364 MVT ArgVT = Ins[i].VT;
17365 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
17367 Type *ArgTy = nullptr;
17368 if (IsRet)
17369 ArgTy = FType->getReturnType();
17370 else if (Ins[i].isOrigArg())
17371 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
17373 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
17374 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
17375 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
17376 FirstMaskArgument)) {
17377 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
17378 << ArgVT << '\n');
17379 llvm_unreachable(nullptr);
17384 void RISCVTargetLowering::analyzeOutputArgs(
17385 MachineFunction &MF, CCState &CCInfo,
17386 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
17387 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
17388 unsigned NumArgs = Outs.size();
17390 std::optional<unsigned> FirstMaskArgument;
17391 if (Subtarget.hasVInstructions())
17392 FirstMaskArgument = preAssignMask(Outs);
17394 for (unsigned i = 0; i != NumArgs; i++) {
17395 MVT ArgVT = Outs[i].VT;
17396 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
17397 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
17399 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
17400 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
17401 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
17402 FirstMaskArgument)) {
17403 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
17404 << ArgVT << "\n");
17405 llvm_unreachable(nullptr);
17410 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
17411 // values.
17412 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
17413 const CCValAssign &VA, const SDLoc &DL,
17414 const RISCVSubtarget &Subtarget) {
17415 switch (VA.getLocInfo()) {
17416 default:
17417 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17418 case CCValAssign::Full:
17419 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
17420 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
17421 break;
17422 case CCValAssign::BCvt:
17423 if (VA.getLocVT().isInteger() &&
17424 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
17425 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
17426 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
17427 if (RV64LegalI32) {
17428 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
17429 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
17430 } else {
17431 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
17433 } else {
17434 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
17436 break;
17438 return Val;
17441 // The caller is responsible for loading the full value if the argument is
17442 // passed with CCValAssign::Indirect.
17443 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
17444 const CCValAssign &VA, const SDLoc &DL,
17445 const ISD::InputArg &In,
17446 const RISCVTargetLowering &TLI) {
17447 MachineFunction &MF = DAG.getMachineFunction();
17448 MachineRegisterInfo &RegInfo = MF.getRegInfo();
17449 EVT LocVT = VA.getLocVT();
17450 SDValue Val;
17451 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
17452 Register VReg = RegInfo.createVirtualRegister(RC);
17453 RegInfo.addLiveIn(VA.getLocReg(), VReg);
17454 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
17456 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
17457 if (In.isOrigArg()) {
17458 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
17459 if (OrigArg->getType()->isIntegerTy()) {
17460 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
17461 // An input zero extended from i31 can also be considered sign extended.
17462 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
17463 (BitWidth < 32 && In.Flags.isZExt())) {
17464 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
17465 RVFI->addSExt32Register(VReg);
17470 if (VA.getLocInfo() == CCValAssign::Indirect)
17471 return Val;
17473 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
17476 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
17477 const CCValAssign &VA, const SDLoc &DL,
17478 const RISCVSubtarget &Subtarget) {
17479 EVT LocVT = VA.getLocVT();
17481 switch (VA.getLocInfo()) {
17482 default:
17483 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17484 case CCValAssign::Full:
17485 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
17486 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
17487 break;
17488 case CCValAssign::BCvt:
17489 if (LocVT.isInteger() &&
17490 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
17491 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
17492 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
17493 if (RV64LegalI32) {
17494 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
17495 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
17496 } else {
17497 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
17499 } else {
17500 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
17502 break;
17504 return Val;
17507 // The caller is responsible for loading the full value if the argument is
17508 // passed with CCValAssign::Indirect.
17509 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
17510 const CCValAssign &VA, const SDLoc &DL) {
17511 MachineFunction &MF = DAG.getMachineFunction();
17512 MachineFrameInfo &MFI = MF.getFrameInfo();
17513 EVT LocVT = VA.getLocVT();
17514 EVT ValVT = VA.getValVT();
17515 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
17516 if (ValVT.isScalableVector()) {
17517 // When the value is a scalable vector, we save the pointer which points to
17518 // the scalable vector value in the stack. The ValVT will be the pointer
17519 // type, instead of the scalable vector type.
17520 ValVT = LocVT;
17522 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
17523 /*IsImmutable=*/true);
17524 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
17525 SDValue Val;
17527 ISD::LoadExtType ExtType;
17528 switch (VA.getLocInfo()) {
17529 default:
17530 llvm_unreachable("Unexpected CCValAssign::LocInfo");
17531 case CCValAssign::Full:
17532 case CCValAssign::Indirect:
17533 case CCValAssign::BCvt:
17534 ExtType = ISD::NON_EXTLOAD;
17535 break;
17537 Val = DAG.getExtLoad(
17538 ExtType, DL, LocVT, Chain, FIN,
17539 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
17540 return Val;
17543 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
17544 const CCValAssign &VA,
17545 const CCValAssign &HiVA,
17546 const SDLoc &DL) {
17547 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
17548 "Unexpected VA");
17549 MachineFunction &MF = DAG.getMachineFunction();
17550 MachineFrameInfo &MFI = MF.getFrameInfo();
17551 MachineRegisterInfo &RegInfo = MF.getRegInfo();
17553 assert(VA.isRegLoc() && "Expected register VA assignment");
17555 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17556 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
17557 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
17558 SDValue Hi;
17559 if (HiVA.isMemLoc()) {
17560 // Second half of f64 is passed on the stack.
17561 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
17562 /*IsImmutable=*/true);
17563 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
17564 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
17565 MachinePointerInfo::getFixedStack(MF, FI));
17566 } else {
17567 // Second half of f64 is passed in another GPR.
17568 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17569 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
17570 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
17572 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
17575 // FastCC has less than 1% performance improvement for some particular
17576 // benchmark. But theoretically, it may has benenfit for some cases.
17577 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
17578 unsigned ValNo, MVT ValVT, MVT LocVT,
17579 CCValAssign::LocInfo LocInfo,
17580 ISD::ArgFlagsTy ArgFlags, CCState &State,
17581 bool IsFixed, bool IsRet, Type *OrigTy,
17582 const RISCVTargetLowering &TLI,
17583 std::optional<unsigned> FirstMaskArgument) {
17585 // X5 and X6 might be used for save-restore libcall.
17586 static const MCPhysReg GPRList[] = {
17587 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
17588 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
17589 RISCV::X29, RISCV::X30, RISCV::X31};
17591 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
17592 if (unsigned Reg = State.AllocateReg(GPRList)) {
17593 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17594 return false;
17598 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
17600 if (LocVT == MVT::f16 &&
17601 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
17602 static const MCPhysReg FPR16List[] = {
17603 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
17604 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
17605 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
17606 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
17607 if (unsigned Reg = State.AllocateReg(FPR16List)) {
17608 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17609 return false;
17613 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
17614 static const MCPhysReg FPR32List[] = {
17615 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
17616 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
17617 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
17618 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
17619 if (unsigned Reg = State.AllocateReg(FPR32List)) {
17620 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17621 return false;
17625 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
17626 static const MCPhysReg FPR64List[] = {
17627 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
17628 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
17629 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
17630 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
17631 if (unsigned Reg = State.AllocateReg(FPR64List)) {
17632 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17633 return false;
17637 // Check if there is an available GPR before hitting the stack.
17638 if ((LocVT == MVT::f16 &&
17639 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
17640 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
17641 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
17642 Subtarget.hasStdExtZdinx())) {
17643 if (unsigned Reg = State.AllocateReg(GPRList)) {
17644 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17645 return false;
17649 if (LocVT == MVT::f16) {
17650 unsigned Offset2 = State.AllocateStack(2, Align(2));
17651 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
17652 return false;
17655 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
17656 unsigned Offset4 = State.AllocateStack(4, Align(4));
17657 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
17658 return false;
17661 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
17662 unsigned Offset5 = State.AllocateStack(8, Align(8));
17663 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
17664 return false;
17667 if (LocVT.isVector()) {
17668 if (unsigned Reg =
17669 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
17670 // Fixed-length vectors are located in the corresponding scalable-vector
17671 // container types.
17672 if (ValVT.isFixedLengthVector())
17673 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
17674 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17675 } else {
17676 // Try and pass the address via a "fast" GPR.
17677 if (unsigned GPRReg = State.AllocateReg(GPRList)) {
17678 LocInfo = CCValAssign::Indirect;
17679 LocVT = TLI.getSubtarget().getXLenVT();
17680 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
17681 } else if (ValVT.isFixedLengthVector()) {
17682 auto StackAlign =
17683 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
17684 unsigned StackOffset =
17685 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
17686 State.addLoc(
17687 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17688 } else {
17689 // Can't pass scalable vectors on the stack.
17690 return true;
17694 return false;
17697 return true; // CC didn't match.
17700 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
17701 CCValAssign::LocInfo LocInfo,
17702 ISD::ArgFlagsTy ArgFlags, CCState &State) {
17703 if (ArgFlags.isNest()) {
17704 report_fatal_error(
17705 "Attribute 'nest' is not supported in GHC calling convention");
17708 static const MCPhysReg GPRList[] = {
17709 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
17710 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
17712 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
17713 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
17714 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
17715 if (unsigned Reg = State.AllocateReg(GPRList)) {
17716 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17717 return false;
17721 const RISCVSubtarget &Subtarget =
17722 State.getMachineFunction().getSubtarget<RISCVSubtarget>();
17724 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
17725 // Pass in STG registers: F1, ..., F6
17726 // fs0 ... fs5
17727 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
17728 RISCV::F18_F, RISCV::F19_F,
17729 RISCV::F20_F, RISCV::F21_F};
17730 if (unsigned Reg = State.AllocateReg(FPR32List)) {
17731 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17732 return false;
17736 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
17737 // Pass in STG registers: D1, ..., D6
17738 // fs6 ... fs11
17739 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
17740 RISCV::F24_D, RISCV::F25_D,
17741 RISCV::F26_D, RISCV::F27_D};
17742 if (unsigned Reg = State.AllocateReg(FPR64List)) {
17743 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17744 return false;
17748 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
17749 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
17750 Subtarget.is64Bit())) {
17751 if (unsigned Reg = State.AllocateReg(GPRList)) {
17752 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17753 return false;
17757 report_fatal_error("No registers left in GHC calling convention");
17758 return true;
17761 // Transform physical registers into virtual registers.
17762 SDValue RISCVTargetLowering::LowerFormalArguments(
17763 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
17764 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
17765 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
17767 MachineFunction &MF = DAG.getMachineFunction();
17769 switch (CallConv) {
17770 default:
17771 report_fatal_error("Unsupported calling convention");
17772 case CallingConv::C:
17773 case CallingConv::Fast:
17774 case CallingConv::SPIR_KERNEL:
17775 case CallingConv::GRAAL:
17776 break;
17777 case CallingConv::GHC:
17778 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
17779 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
17780 "(Zdinx/D) instruction set extensions");
17783 const Function &Func = MF.getFunction();
17784 if (Func.hasFnAttribute("interrupt")) {
17785 if (!Func.arg_empty())
17786 report_fatal_error(
17787 "Functions with the interrupt attribute cannot have arguments!");
17789 StringRef Kind =
17790 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
17792 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
17793 report_fatal_error(
17794 "Function interrupt attribute argument not supported!");
17797 EVT PtrVT = getPointerTy(DAG.getDataLayout());
17798 MVT XLenVT = Subtarget.getXLenVT();
17799 unsigned XLenInBytes = Subtarget.getXLen() / 8;
17800 // Used with vargs to acumulate store chains.
17801 std::vector<SDValue> OutChains;
17803 // Assign locations to all of the incoming arguments.
17804 SmallVector<CCValAssign, 16> ArgLocs;
17805 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
17807 if (CallConv == CallingConv::GHC)
17808 CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC);
17809 else
17810 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
17811 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
17812 : RISCV::CC_RISCV);
17814 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
17815 CCValAssign &VA = ArgLocs[i];
17816 SDValue ArgValue;
17817 // Passing f64 on RV32D with a soft float ABI must be handled as a special
17818 // case.
17819 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17820 assert(VA.needsCustom());
17821 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
17822 } else if (VA.isRegLoc())
17823 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
17824 else
17825 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
17827 if (VA.getLocInfo() == CCValAssign::Indirect) {
17828 // If the original argument was split and passed by reference (e.g. i128
17829 // on RV32), we need to load all parts of it here (using the same
17830 // address). Vectors may be partly split to registers and partly to the
17831 // stack, in which case the base address is partly offset and subsequent
17832 // stores are relative to that.
17833 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
17834 MachinePointerInfo()));
17835 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
17836 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
17837 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
17838 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
17839 CCValAssign &PartVA = ArgLocs[i + 1];
17840 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
17841 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
17842 if (PartVA.getValVT().isScalableVector())
17843 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
17844 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
17845 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
17846 MachinePointerInfo()));
17847 ++i;
17848 ++InsIdx;
17850 continue;
17852 InVals.push_back(ArgValue);
17855 if (any_of(ArgLocs,
17856 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
17857 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
17859 if (IsVarArg) {
17860 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();
17861 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
17862 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
17863 MachineFrameInfo &MFI = MF.getFrameInfo();
17864 MachineRegisterInfo &RegInfo = MF.getRegInfo();
17865 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
17867 // Size of the vararg save area. For now, the varargs save area is either
17868 // zero or large enough to hold a0-a7.
17869 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
17870 int FI;
17872 // If all registers are allocated, then all varargs must be passed on the
17873 // stack and we don't need to save any argregs.
17874 if (VarArgsSaveSize == 0) {
17875 int VaArgOffset = CCInfo.getStackSize();
17876 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
17877 } else {
17878 int VaArgOffset = -VarArgsSaveSize;
17879 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
17881 // If saving an odd number of registers then create an extra stack slot to
17882 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
17883 // offsets to even-numbered registered remain 2*XLEN-aligned.
17884 if (Idx % 2) {
17885 MFI.CreateFixedObject(
17886 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
17887 VarArgsSaveSize += XLenInBytes;
17890 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
17892 // Copy the integer registers that may have been used for passing varargs
17893 // to the vararg save area.
17894 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
17895 const Register Reg = RegInfo.createVirtualRegister(RC);
17896 RegInfo.addLiveIn(ArgRegs[I], Reg);
17897 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
17898 SDValue Store = DAG.getStore(
17899 Chain, DL, ArgValue, FIN,
17900 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
17901 OutChains.push_back(Store);
17902 FIN =
17903 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
17907 // Record the frame index of the first variable argument
17908 // which is a value necessary to VASTART.
17909 RVFI->setVarArgsFrameIndex(FI);
17910 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
17913 // All stores are grouped in one node to allow the matching between
17914 // the size of Ins and InVals. This only happens for vararg functions.
17915 if (!OutChains.empty()) {
17916 OutChains.push_back(Chain);
17917 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
17920 return Chain;
17923 /// isEligibleForTailCallOptimization - Check whether the call is eligible
17924 /// for tail call optimization.
17925 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
17926 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
17927 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
17928 const SmallVector<CCValAssign, 16> &ArgLocs) const {
17930 auto CalleeCC = CLI.CallConv;
17931 auto &Outs = CLI.Outs;
17932 auto &Caller = MF.getFunction();
17933 auto CallerCC = Caller.getCallingConv();
17935 // Exception-handling functions need a special set of instructions to
17936 // indicate a return to the hardware. Tail-calling another function would
17937 // probably break this.
17938 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
17939 // should be expanded as new function attributes are introduced.
17940 if (Caller.hasFnAttribute("interrupt"))
17941 return false;
17943 // Do not tail call opt if the stack is used to pass parameters.
17944 if (CCInfo.getStackSize() != 0)
17945 return false;
17947 // Do not tail call opt if any parameters need to be passed indirectly.
17948 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
17949 // passed indirectly. So the address of the value will be passed in a
17950 // register, or if not available, then the address is put on the stack. In
17951 // order to pass indirectly, space on the stack often needs to be allocated
17952 // in order to store the value. In this case the CCInfo.getNextStackOffset()
17953 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
17954 // are passed CCValAssign::Indirect.
17955 for (auto &VA : ArgLocs)
17956 if (VA.getLocInfo() == CCValAssign::Indirect)
17957 return false;
17959 // Do not tail call opt if either caller or callee uses struct return
17960 // semantics.
17961 auto IsCallerStructRet = Caller.hasStructRetAttr();
17962 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
17963 if (IsCallerStructRet || IsCalleeStructRet)
17964 return false;
17966 // The callee has to preserve all registers the caller needs to preserve.
17967 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
17968 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
17969 if (CalleeCC != CallerCC) {
17970 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
17971 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
17972 return false;
17975 // Byval parameters hand the function a pointer directly into the stack area
17976 // we want to reuse during a tail call. Working around this *is* possible
17977 // but less efficient and uglier in LowerCall.
17978 for (auto &Arg : Outs)
17979 if (Arg.Flags.isByVal())
17980 return false;
17982 return true;
17985 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
17986 return DAG.getDataLayout().getPrefTypeAlign(
17987 VT.getTypeForEVT(*DAG.getContext()));
17990 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
17991 // and output parameter nodes.
17992 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
17993 SmallVectorImpl<SDValue> &InVals) const {
17994 SelectionDAG &DAG = CLI.DAG;
17995 SDLoc &DL = CLI.DL;
17996 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
17997 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
17998 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
17999 SDValue Chain = CLI.Chain;
18000 SDValue Callee = CLI.Callee;
18001 bool &IsTailCall = CLI.IsTailCall;
18002 CallingConv::ID CallConv = CLI.CallConv;
18003 bool IsVarArg = CLI.IsVarArg;
18004 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18005 MVT XLenVT = Subtarget.getXLenVT();
18007 MachineFunction &MF = DAG.getMachineFunction();
18009 // Analyze the operands of the call, assigning locations to each operand.
18010 SmallVector<CCValAssign, 16> ArgLocs;
18011 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18013 if (CallConv == CallingConv::GHC)
18014 ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
18015 else
18016 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
18017 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
18018 : RISCV::CC_RISCV);
18020 // Check if it's really possible to do a tail call.
18021 if (IsTailCall)
18022 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
18024 if (IsTailCall)
18025 ++NumTailCalls;
18026 else if (CLI.CB && CLI.CB->isMustTailCall())
18027 report_fatal_error("failed to perform tail call elimination on a call "
18028 "site marked musttail");
18030 // Get a count of how many bytes are to be pushed on the stack.
18031 unsigned NumBytes = ArgCCInfo.getStackSize();
18033 // Create local copies for byval args
18034 SmallVector<SDValue, 8> ByValArgs;
18035 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
18036 ISD::ArgFlagsTy Flags = Outs[i].Flags;
18037 if (!Flags.isByVal())
18038 continue;
18040 SDValue Arg = OutVals[i];
18041 unsigned Size = Flags.getByValSize();
18042 Align Alignment = Flags.getNonZeroByValAlign();
18044 int FI =
18045 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
18046 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
18047 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
18049 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
18050 /*IsVolatile=*/false,
18051 /*AlwaysInline=*/false, IsTailCall,
18052 MachinePointerInfo(), MachinePointerInfo());
18053 ByValArgs.push_back(FIPtr);
18056 if (!IsTailCall)
18057 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
18059 // Copy argument values to their designated locations.
18060 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
18061 SmallVector<SDValue, 8> MemOpChains;
18062 SDValue StackPtr;
18063 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
18064 ++i, ++OutIdx) {
18065 CCValAssign &VA = ArgLocs[i];
18066 SDValue ArgValue = OutVals[OutIdx];
18067 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
18069 // Handle passing f64 on RV32D with a soft float ABI as a special case.
18070 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18071 assert(VA.isRegLoc() && "Expected register VA assignment");
18072 assert(VA.needsCustom());
18073 SDValue SplitF64 = DAG.getNode(
18074 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
18075 SDValue Lo = SplitF64.getValue(0);
18076 SDValue Hi = SplitF64.getValue(1);
18078 Register RegLo = VA.getLocReg();
18079 RegsToPass.push_back(std::make_pair(RegLo, Lo));
18081 // Get the CCValAssign for the Hi part.
18082 CCValAssign &HiVA = ArgLocs[++i];
18084 if (HiVA.isMemLoc()) {
18085 // Second half of f64 is passed on the stack.
18086 if (!StackPtr.getNode())
18087 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
18088 SDValue Address =
18089 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
18090 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
18091 // Emit the store.
18092 MemOpChains.push_back(
18093 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
18094 } else {
18095 // Second half of f64 is passed in another GPR.
18096 Register RegHigh = HiVA.getLocReg();
18097 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
18099 continue;
18102 // Promote the value if needed.
18103 // For now, only handle fully promoted and indirect arguments.
18104 if (VA.getLocInfo() == CCValAssign::Indirect) {
18105 // Store the argument in a stack slot and pass its address.
18106 Align StackAlign =
18107 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
18108 getPrefTypeAlign(ArgValue.getValueType(), DAG));
18109 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
18110 // If the original argument was split (e.g. i128), we need
18111 // to store the required parts of it here (and pass just one address).
18112 // Vectors may be partly split to registers and partly to the stack, in
18113 // which case the base address is partly offset and subsequent stores are
18114 // relative to that.
18115 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
18116 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
18117 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
18118 // Calculate the total size to store. We don't have access to what we're
18119 // actually storing other than performing the loop and collecting the
18120 // info.
18121 SmallVector<std::pair<SDValue, SDValue>> Parts;
18122 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
18123 SDValue PartValue = OutVals[OutIdx + 1];
18124 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
18125 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
18126 EVT PartVT = PartValue.getValueType();
18127 if (PartVT.isScalableVector())
18128 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
18129 StoredSize += PartVT.getStoreSize();
18130 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
18131 Parts.push_back(std::make_pair(PartValue, Offset));
18132 ++i;
18133 ++OutIdx;
18135 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
18136 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
18137 MemOpChains.push_back(
18138 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
18139 MachinePointerInfo::getFixedStack(MF, FI)));
18140 for (const auto &Part : Parts) {
18141 SDValue PartValue = Part.first;
18142 SDValue PartOffset = Part.second;
18143 SDValue Address =
18144 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
18145 MemOpChains.push_back(
18146 DAG.getStore(Chain, DL, PartValue, Address,
18147 MachinePointerInfo::getFixedStack(MF, FI)));
18149 ArgValue = SpillSlot;
18150 } else {
18151 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
18154 // Use local copy if it is a byval arg.
18155 if (Flags.isByVal())
18156 ArgValue = ByValArgs[j++];
18158 if (VA.isRegLoc()) {
18159 // Queue up the argument copies and emit them at the end.
18160 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
18161 } else {
18162 assert(VA.isMemLoc() && "Argument not register or memory");
18163 assert(!IsTailCall && "Tail call not allowed if stack is used "
18164 "for passing parameters");
18166 // Work out the address of the stack slot.
18167 if (!StackPtr.getNode())
18168 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
18169 SDValue Address =
18170 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
18171 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
18173 // Emit the store.
18174 MemOpChains.push_back(
18175 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
18179 // Join the stores, which are independent of one another.
18180 if (!MemOpChains.empty())
18181 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
18183 SDValue Glue;
18185 // Build a sequence of copy-to-reg nodes, chained and glued together.
18186 for (auto &Reg : RegsToPass) {
18187 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
18188 Glue = Chain.getValue(1);
18191 // Validate that none of the argument registers have been marked as
18192 // reserved, if so report an error. Do the same for the return address if this
18193 // is not a tailcall.
18194 validateCCReservedRegs(RegsToPass, MF);
18195 if (!IsTailCall &&
18196 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
18197 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
18198 MF.getFunction(),
18199 "Return address register required, but has been reserved."});
18201 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
18202 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
18203 // split it and then direct call can be matched by PseudoCALL.
18204 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
18205 const GlobalValue *GV = S->getGlobal();
18207 unsigned OpFlags = RISCVII::MO_CALL;
18208 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
18209 OpFlags = RISCVII::MO_PLT;
18211 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
18212 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
18213 unsigned OpFlags = RISCVII::MO_CALL;
18215 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
18216 nullptr))
18217 OpFlags = RISCVII::MO_PLT;
18219 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
18222 // The first call operand is the chain and the second is the target address.
18223 SmallVector<SDValue, 8> Ops;
18224 Ops.push_back(Chain);
18225 Ops.push_back(Callee);
18227 // Add argument registers to the end of the list so that they are
18228 // known live into the call.
18229 for (auto &Reg : RegsToPass)
18230 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
18232 if (!IsTailCall) {
18233 // Add a register mask operand representing the call-preserved registers.
18234 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
18235 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
18236 assert(Mask && "Missing call preserved mask for calling convention");
18237 Ops.push_back(DAG.getRegisterMask(Mask));
18240 // Glue the call to the argument copies, if any.
18241 if (Glue.getNode())
18242 Ops.push_back(Glue);
18244 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
18245 "Unexpected CFI type for a direct call");
18247 // Emit the call.
18248 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
18250 if (IsTailCall) {
18251 MF.getFrameInfo().setHasTailCall();
18252 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
18253 if (CLI.CFIType)
18254 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
18255 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
18256 return Ret;
18259 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
18260 if (CLI.CFIType)
18261 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
18262 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
18263 Glue = Chain.getValue(1);
18265 // Mark the end of the call, which is glued to the call itself.
18266 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
18267 Glue = Chain.getValue(1);
18269 // Assign locations to each value returned by this call.
18270 SmallVector<CCValAssign, 16> RVLocs;
18271 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
18272 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
18274 // Copy all of the result registers out of their specified physreg.
18275 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
18276 auto &VA = RVLocs[i];
18277 // Copy the value out
18278 SDValue RetValue =
18279 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
18280 // Glue the RetValue to the end of the call sequence
18281 Chain = RetValue.getValue(1);
18282 Glue = RetValue.getValue(2);
18284 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18285 assert(VA.needsCustom());
18286 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
18287 MVT::i32, Glue);
18288 Chain = RetValue2.getValue(1);
18289 Glue = RetValue2.getValue(2);
18290 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
18291 RetValue2);
18294 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
18296 InVals.push_back(RetValue);
18299 return Chain;
18302 bool RISCVTargetLowering::CanLowerReturn(
18303 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
18304 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
18305 SmallVector<CCValAssign, 16> RVLocs;
18306 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
18308 std::optional<unsigned> FirstMaskArgument;
18309 if (Subtarget.hasVInstructions())
18310 FirstMaskArgument = preAssignMask(Outs);
18312 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
18313 MVT VT = Outs[i].VT;
18314 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18315 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
18316 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
18317 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
18318 *this, FirstMaskArgument))
18319 return false;
18321 return true;
18324 SDValue
18325 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
18326 bool IsVarArg,
18327 const SmallVectorImpl<ISD::OutputArg> &Outs,
18328 const SmallVectorImpl<SDValue> &OutVals,
18329 const SDLoc &DL, SelectionDAG &DAG) const {
18330 MachineFunction &MF = DAG.getMachineFunction();
18331 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
18333 // Stores the assignment of the return value to a location.
18334 SmallVector<CCValAssign, 16> RVLocs;
18336 // Info about the registers and stack slot.
18337 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
18338 *DAG.getContext());
18340 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
18341 nullptr, RISCV::CC_RISCV);
18343 if (CallConv == CallingConv::GHC && !RVLocs.empty())
18344 report_fatal_error("GHC functions return void only");
18346 SDValue Glue;
18347 SmallVector<SDValue, 4> RetOps(1, Chain);
18349 // Copy the result values into the output registers.
18350 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
18351 SDValue Val = OutVals[OutIdx];
18352 CCValAssign &VA = RVLocs[i];
18353 assert(VA.isRegLoc() && "Can only return in registers!");
18355 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18356 // Handle returning f64 on RV32D with a soft float ABI.
18357 assert(VA.isRegLoc() && "Expected return via registers");
18358 assert(VA.needsCustom());
18359 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
18360 DAG.getVTList(MVT::i32, MVT::i32), Val);
18361 SDValue Lo = SplitF64.getValue(0);
18362 SDValue Hi = SplitF64.getValue(1);
18363 Register RegLo = VA.getLocReg();
18364 Register RegHi = RVLocs[++i].getLocReg();
18366 if (STI.isRegisterReservedByUser(RegLo) ||
18367 STI.isRegisterReservedByUser(RegHi))
18368 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
18369 MF.getFunction(),
18370 "Return value register required, but has been reserved."});
18372 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
18373 Glue = Chain.getValue(1);
18374 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
18375 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
18376 Glue = Chain.getValue(1);
18377 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
18378 } else {
18379 // Handle a 'normal' return.
18380 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
18381 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
18383 if (STI.isRegisterReservedByUser(VA.getLocReg()))
18384 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
18385 MF.getFunction(),
18386 "Return value register required, but has been reserved."});
18388 // Guarantee that all emitted copies are stuck together.
18389 Glue = Chain.getValue(1);
18390 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
18394 RetOps[0] = Chain; // Update chain.
18396 // Add the glue node if we have it.
18397 if (Glue.getNode()) {
18398 RetOps.push_back(Glue);
18401 if (any_of(RVLocs,
18402 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
18403 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
18405 unsigned RetOpc = RISCVISD::RET_GLUE;
18406 // Interrupt service routines use different return instructions.
18407 const Function &Func = DAG.getMachineFunction().getFunction();
18408 if (Func.hasFnAttribute("interrupt")) {
18409 if (!Func.getReturnType()->isVoidTy())
18410 report_fatal_error(
18411 "Functions with the interrupt attribute must have void return type!");
18413 MachineFunction &MF = DAG.getMachineFunction();
18414 StringRef Kind =
18415 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
18417 if (Kind == "supervisor")
18418 RetOpc = RISCVISD::SRET_GLUE;
18419 else
18420 RetOpc = RISCVISD::MRET_GLUE;
18423 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
18426 void RISCVTargetLowering::validateCCReservedRegs(
18427 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
18428 MachineFunction &MF) const {
18429 const Function &F = MF.getFunction();
18430 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
18432 if (llvm::any_of(Regs, [&STI](auto Reg) {
18433 return STI.isRegisterReservedByUser(Reg.first);
18435 F.getContext().diagnose(DiagnosticInfoUnsupported{
18436 F, "Argument register required, but has been reserved."});
18439 // Check if the result of the node is only used as a return value, as
18440 // otherwise we can't perform a tail-call.
18441 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
18442 if (N->getNumValues() != 1)
18443 return false;
18444 if (!N->hasNUsesOfValue(1, 0))
18445 return false;
18447 SDNode *Copy = *N->use_begin();
18449 if (Copy->getOpcode() == ISD::BITCAST) {
18450 return isUsedByReturnOnly(Copy, Chain);
18453 // TODO: Handle additional opcodes in order to support tail-calling libcalls
18454 // with soft float ABIs.
18455 if (Copy->getOpcode() != ISD::CopyToReg) {
18456 return false;
18459 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
18460 // isn't safe to perform a tail call.
18461 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
18462 return false;
18464 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
18465 bool HasRet = false;
18466 for (SDNode *Node : Copy->uses()) {
18467 if (Node->getOpcode() != RISCVISD::RET_GLUE)
18468 return false;
18469 HasRet = true;
18471 if (!HasRet)
18472 return false;
18474 Chain = Copy->getOperand(0);
18475 return true;
18478 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18479 return CI->isTailCall();
18482 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
18483 #define NODE_NAME_CASE(NODE) \
18484 case RISCVISD::NODE: \
18485 return "RISCVISD::" #NODE;
18486 // clang-format off
18487 switch ((RISCVISD::NodeType)Opcode) {
18488 case RISCVISD::FIRST_NUMBER:
18489 break;
18490 NODE_NAME_CASE(RET_GLUE)
18491 NODE_NAME_CASE(SRET_GLUE)
18492 NODE_NAME_CASE(MRET_GLUE)
18493 NODE_NAME_CASE(CALL)
18494 NODE_NAME_CASE(SELECT_CC)
18495 NODE_NAME_CASE(BR_CC)
18496 NODE_NAME_CASE(BuildPairF64)
18497 NODE_NAME_CASE(SplitF64)
18498 NODE_NAME_CASE(TAIL)
18499 NODE_NAME_CASE(ADD_LO)
18500 NODE_NAME_CASE(HI)
18501 NODE_NAME_CASE(LLA)
18502 NODE_NAME_CASE(ADD_TPREL)
18503 NODE_NAME_CASE(MULHSU)
18504 NODE_NAME_CASE(SLLW)
18505 NODE_NAME_CASE(SRAW)
18506 NODE_NAME_CASE(SRLW)
18507 NODE_NAME_CASE(DIVW)
18508 NODE_NAME_CASE(DIVUW)
18509 NODE_NAME_CASE(REMUW)
18510 NODE_NAME_CASE(ROLW)
18511 NODE_NAME_CASE(RORW)
18512 NODE_NAME_CASE(CLZW)
18513 NODE_NAME_CASE(CTZW)
18514 NODE_NAME_CASE(ABSW)
18515 NODE_NAME_CASE(FMV_H_X)
18516 NODE_NAME_CASE(FMV_X_ANYEXTH)
18517 NODE_NAME_CASE(FMV_X_SIGNEXTH)
18518 NODE_NAME_CASE(FMV_W_X_RV64)
18519 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
18520 NODE_NAME_CASE(FCVT_X)
18521 NODE_NAME_CASE(FCVT_XU)
18522 NODE_NAME_CASE(FCVT_W_RV64)
18523 NODE_NAME_CASE(FCVT_WU_RV64)
18524 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
18525 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
18526 NODE_NAME_CASE(FP_ROUND_BF16)
18527 NODE_NAME_CASE(FP_EXTEND_BF16)
18528 NODE_NAME_CASE(FROUND)
18529 NODE_NAME_CASE(FCLASS)
18530 NODE_NAME_CASE(FMAX)
18531 NODE_NAME_CASE(FMIN)
18532 NODE_NAME_CASE(READ_CYCLE_WIDE)
18533 NODE_NAME_CASE(BREV8)
18534 NODE_NAME_CASE(ORC_B)
18535 NODE_NAME_CASE(ZIP)
18536 NODE_NAME_CASE(UNZIP)
18537 NODE_NAME_CASE(CLMUL)
18538 NODE_NAME_CASE(CLMULH)
18539 NODE_NAME_CASE(CLMULR)
18540 NODE_NAME_CASE(SHA256SIG0)
18541 NODE_NAME_CASE(SHA256SIG1)
18542 NODE_NAME_CASE(SHA256SUM0)
18543 NODE_NAME_CASE(SHA256SUM1)
18544 NODE_NAME_CASE(SM4KS)
18545 NODE_NAME_CASE(SM4ED)
18546 NODE_NAME_CASE(SM3P0)
18547 NODE_NAME_CASE(SM3P1)
18548 NODE_NAME_CASE(TH_LWD)
18549 NODE_NAME_CASE(TH_LWUD)
18550 NODE_NAME_CASE(TH_LDD)
18551 NODE_NAME_CASE(TH_SWD)
18552 NODE_NAME_CASE(TH_SDD)
18553 NODE_NAME_CASE(VMV_V_V_VL)
18554 NODE_NAME_CASE(VMV_V_X_VL)
18555 NODE_NAME_CASE(VFMV_V_F_VL)
18556 NODE_NAME_CASE(VMV_X_S)
18557 NODE_NAME_CASE(VMV_S_X_VL)
18558 NODE_NAME_CASE(VFMV_S_F_VL)
18559 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
18560 NODE_NAME_CASE(READ_VLENB)
18561 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
18562 NODE_NAME_CASE(VSLIDEUP_VL)
18563 NODE_NAME_CASE(VSLIDE1UP_VL)
18564 NODE_NAME_CASE(VSLIDEDOWN_VL)
18565 NODE_NAME_CASE(VSLIDE1DOWN_VL)
18566 NODE_NAME_CASE(VFSLIDE1UP_VL)
18567 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
18568 NODE_NAME_CASE(VID_VL)
18569 NODE_NAME_CASE(VFNCVT_ROD_VL)
18570 NODE_NAME_CASE(VECREDUCE_ADD_VL)
18571 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
18572 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
18573 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
18574 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
18575 NODE_NAME_CASE(VECREDUCE_AND_VL)
18576 NODE_NAME_CASE(VECREDUCE_OR_VL)
18577 NODE_NAME_CASE(VECREDUCE_XOR_VL)
18578 NODE_NAME_CASE(VECREDUCE_FADD_VL)
18579 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
18580 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
18581 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
18582 NODE_NAME_CASE(ADD_VL)
18583 NODE_NAME_CASE(AND_VL)
18584 NODE_NAME_CASE(MUL_VL)
18585 NODE_NAME_CASE(OR_VL)
18586 NODE_NAME_CASE(SDIV_VL)
18587 NODE_NAME_CASE(SHL_VL)
18588 NODE_NAME_CASE(SREM_VL)
18589 NODE_NAME_CASE(SRA_VL)
18590 NODE_NAME_CASE(SRL_VL)
18591 NODE_NAME_CASE(ROTL_VL)
18592 NODE_NAME_CASE(ROTR_VL)
18593 NODE_NAME_CASE(SUB_VL)
18594 NODE_NAME_CASE(UDIV_VL)
18595 NODE_NAME_CASE(UREM_VL)
18596 NODE_NAME_CASE(XOR_VL)
18597 NODE_NAME_CASE(SADDSAT_VL)
18598 NODE_NAME_CASE(UADDSAT_VL)
18599 NODE_NAME_CASE(SSUBSAT_VL)
18600 NODE_NAME_CASE(USUBSAT_VL)
18601 NODE_NAME_CASE(FADD_VL)
18602 NODE_NAME_CASE(FSUB_VL)
18603 NODE_NAME_CASE(FMUL_VL)
18604 NODE_NAME_CASE(FDIV_VL)
18605 NODE_NAME_CASE(FNEG_VL)
18606 NODE_NAME_CASE(FABS_VL)
18607 NODE_NAME_CASE(FSQRT_VL)
18608 NODE_NAME_CASE(FCLASS_VL)
18609 NODE_NAME_CASE(VFMADD_VL)
18610 NODE_NAME_CASE(VFNMADD_VL)
18611 NODE_NAME_CASE(VFMSUB_VL)
18612 NODE_NAME_CASE(VFNMSUB_VL)
18613 NODE_NAME_CASE(VFWMADD_VL)
18614 NODE_NAME_CASE(VFWNMADD_VL)
18615 NODE_NAME_CASE(VFWMSUB_VL)
18616 NODE_NAME_CASE(VFWNMSUB_VL)
18617 NODE_NAME_CASE(FCOPYSIGN_VL)
18618 NODE_NAME_CASE(SMIN_VL)
18619 NODE_NAME_CASE(SMAX_VL)
18620 NODE_NAME_CASE(UMIN_VL)
18621 NODE_NAME_CASE(UMAX_VL)
18622 NODE_NAME_CASE(BITREVERSE_VL)
18623 NODE_NAME_CASE(BSWAP_VL)
18624 NODE_NAME_CASE(CTLZ_VL)
18625 NODE_NAME_CASE(CTTZ_VL)
18626 NODE_NAME_CASE(CTPOP_VL)
18627 NODE_NAME_CASE(VFMIN_VL)
18628 NODE_NAME_CASE(VFMAX_VL)
18629 NODE_NAME_CASE(MULHS_VL)
18630 NODE_NAME_CASE(MULHU_VL)
18631 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
18632 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
18633 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
18634 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
18635 NODE_NAME_CASE(VFCVT_X_F_VL)
18636 NODE_NAME_CASE(VFCVT_XU_F_VL)
18637 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
18638 NODE_NAME_CASE(SINT_TO_FP_VL)
18639 NODE_NAME_CASE(UINT_TO_FP_VL)
18640 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
18641 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
18642 NODE_NAME_CASE(FP_EXTEND_VL)
18643 NODE_NAME_CASE(FP_ROUND_VL)
18644 NODE_NAME_CASE(STRICT_FADD_VL)
18645 NODE_NAME_CASE(STRICT_FSUB_VL)
18646 NODE_NAME_CASE(STRICT_FMUL_VL)
18647 NODE_NAME_CASE(STRICT_FDIV_VL)
18648 NODE_NAME_CASE(STRICT_FSQRT_VL)
18649 NODE_NAME_CASE(STRICT_VFMADD_VL)
18650 NODE_NAME_CASE(STRICT_VFNMADD_VL)
18651 NODE_NAME_CASE(STRICT_VFMSUB_VL)
18652 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
18653 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
18654 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
18655 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
18656 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
18657 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
18658 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
18659 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
18660 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
18661 NODE_NAME_CASE(STRICT_FSETCC_VL)
18662 NODE_NAME_CASE(STRICT_FSETCCS_VL)
18663 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
18664 NODE_NAME_CASE(VWMUL_VL)
18665 NODE_NAME_CASE(VWMULU_VL)
18666 NODE_NAME_CASE(VWMULSU_VL)
18667 NODE_NAME_CASE(VWADD_VL)
18668 NODE_NAME_CASE(VWADDU_VL)
18669 NODE_NAME_CASE(VWSUB_VL)
18670 NODE_NAME_CASE(VWSUBU_VL)
18671 NODE_NAME_CASE(VWADD_W_VL)
18672 NODE_NAME_CASE(VWADDU_W_VL)
18673 NODE_NAME_CASE(VWSUB_W_VL)
18674 NODE_NAME_CASE(VWSUBU_W_VL)
18675 NODE_NAME_CASE(VWSLL_VL)
18676 NODE_NAME_CASE(VFWMUL_VL)
18677 NODE_NAME_CASE(VFWADD_VL)
18678 NODE_NAME_CASE(VFWSUB_VL)
18679 NODE_NAME_CASE(VFWADD_W_VL)
18680 NODE_NAME_CASE(VFWSUB_W_VL)
18681 NODE_NAME_CASE(VWMACC_VL)
18682 NODE_NAME_CASE(VWMACCU_VL)
18683 NODE_NAME_CASE(VWMACCSU_VL)
18684 NODE_NAME_CASE(VNSRL_VL)
18685 NODE_NAME_CASE(SETCC_VL)
18686 NODE_NAME_CASE(VSELECT_VL)
18687 NODE_NAME_CASE(VMERGE_VL)
18688 NODE_NAME_CASE(VMAND_VL)
18689 NODE_NAME_CASE(VMOR_VL)
18690 NODE_NAME_CASE(VMXOR_VL)
18691 NODE_NAME_CASE(VMCLR_VL)
18692 NODE_NAME_CASE(VMSET_VL)
18693 NODE_NAME_CASE(VRGATHER_VX_VL)
18694 NODE_NAME_CASE(VRGATHER_VV_VL)
18695 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
18696 NODE_NAME_CASE(VSEXT_VL)
18697 NODE_NAME_CASE(VZEXT_VL)
18698 NODE_NAME_CASE(VCPOP_VL)
18699 NODE_NAME_CASE(VFIRST_VL)
18700 NODE_NAME_CASE(READ_CSR)
18701 NODE_NAME_CASE(WRITE_CSR)
18702 NODE_NAME_CASE(SWAP_CSR)
18703 NODE_NAME_CASE(CZERO_EQZ)
18704 NODE_NAME_CASE(CZERO_NEZ)
18706 // clang-format on
18707 return nullptr;
18708 #undef NODE_NAME_CASE
18711 /// getConstraintType - Given a constraint letter, return the type of
18712 /// constraint it is for this target.
18713 RISCVTargetLowering::ConstraintType
18714 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
18715 if (Constraint.size() == 1) {
18716 switch (Constraint[0]) {
18717 default:
18718 break;
18719 case 'f':
18720 return C_RegisterClass;
18721 case 'I':
18722 case 'J':
18723 case 'K':
18724 return C_Immediate;
18725 case 'A':
18726 return C_Memory;
18727 case 'S': // A symbolic address
18728 return C_Other;
18730 } else {
18731 if (Constraint == "vr" || Constraint == "vm")
18732 return C_RegisterClass;
18734 return TargetLowering::getConstraintType(Constraint);
18737 std::pair<unsigned, const TargetRegisterClass *>
18738 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
18739 StringRef Constraint,
18740 MVT VT) const {
18741 // First, see if this is a constraint that directly corresponds to a RISC-V
18742 // register class.
18743 if (Constraint.size() == 1) {
18744 switch (Constraint[0]) {
18745 case 'r':
18746 // TODO: Support fixed vectors up to XLen for P extension?
18747 if (VT.isVector())
18748 break;
18749 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
18750 return std::make_pair(0U, &RISCV::GPRF16RegClass);
18751 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
18752 return std::make_pair(0U, &RISCV::GPRF32RegClass);
18753 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
18754 return std::make_pair(0U, &RISCV::GPRPF64RegClass);
18755 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
18756 case 'f':
18757 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
18758 return std::make_pair(0U, &RISCV::FPR16RegClass);
18759 if (Subtarget.hasStdExtF() && VT == MVT::f32)
18760 return std::make_pair(0U, &RISCV::FPR32RegClass);
18761 if (Subtarget.hasStdExtD() && VT == MVT::f64)
18762 return std::make_pair(0U, &RISCV::FPR64RegClass);
18763 break;
18764 default:
18765 break;
18767 } else if (Constraint == "vr") {
18768 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
18769 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
18770 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
18771 return std::make_pair(0U, RC);
18773 } else if (Constraint == "vm") {
18774 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
18775 return std::make_pair(0U, &RISCV::VMV0RegClass);
18778 // Clang will correctly decode the usage of register name aliases into their
18779 // official names. However, other frontends like `rustc` do not. This allows
18780 // users of these frontends to use the ABI names for registers in LLVM-style
18781 // register constraints.
18782 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
18783 .Case("{zero}", RISCV::X0)
18784 .Case("{ra}", RISCV::X1)
18785 .Case("{sp}", RISCV::X2)
18786 .Case("{gp}", RISCV::X3)
18787 .Case("{tp}", RISCV::X4)
18788 .Case("{t0}", RISCV::X5)
18789 .Case("{t1}", RISCV::X6)
18790 .Case("{t2}", RISCV::X7)
18791 .Cases("{s0}", "{fp}", RISCV::X8)
18792 .Case("{s1}", RISCV::X9)
18793 .Case("{a0}", RISCV::X10)
18794 .Case("{a1}", RISCV::X11)
18795 .Case("{a2}", RISCV::X12)
18796 .Case("{a3}", RISCV::X13)
18797 .Case("{a4}", RISCV::X14)
18798 .Case("{a5}", RISCV::X15)
18799 .Case("{a6}", RISCV::X16)
18800 .Case("{a7}", RISCV::X17)
18801 .Case("{s2}", RISCV::X18)
18802 .Case("{s3}", RISCV::X19)
18803 .Case("{s4}", RISCV::X20)
18804 .Case("{s5}", RISCV::X21)
18805 .Case("{s6}", RISCV::X22)
18806 .Case("{s7}", RISCV::X23)
18807 .Case("{s8}", RISCV::X24)
18808 .Case("{s9}", RISCV::X25)
18809 .Case("{s10}", RISCV::X26)
18810 .Case("{s11}", RISCV::X27)
18811 .Case("{t3}", RISCV::X28)
18812 .Case("{t4}", RISCV::X29)
18813 .Case("{t5}", RISCV::X30)
18814 .Case("{t6}", RISCV::X31)
18815 .Default(RISCV::NoRegister);
18816 if (XRegFromAlias != RISCV::NoRegister)
18817 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
18819 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
18820 // TableGen record rather than the AsmName to choose registers for InlineAsm
18821 // constraints, plus we want to match those names to the widest floating point
18822 // register type available, manually select floating point registers here.
18824 // The second case is the ABI name of the register, so that frontends can also
18825 // use the ABI names in register constraint lists.
18826 if (Subtarget.hasStdExtF()) {
18827 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
18828 .Cases("{f0}", "{ft0}", RISCV::F0_F)
18829 .Cases("{f1}", "{ft1}", RISCV::F1_F)
18830 .Cases("{f2}", "{ft2}", RISCV::F2_F)
18831 .Cases("{f3}", "{ft3}", RISCV::F3_F)
18832 .Cases("{f4}", "{ft4}", RISCV::F4_F)
18833 .Cases("{f5}", "{ft5}", RISCV::F5_F)
18834 .Cases("{f6}", "{ft6}", RISCV::F6_F)
18835 .Cases("{f7}", "{ft7}", RISCV::F7_F)
18836 .Cases("{f8}", "{fs0}", RISCV::F8_F)
18837 .Cases("{f9}", "{fs1}", RISCV::F9_F)
18838 .Cases("{f10}", "{fa0}", RISCV::F10_F)
18839 .Cases("{f11}", "{fa1}", RISCV::F11_F)
18840 .Cases("{f12}", "{fa2}", RISCV::F12_F)
18841 .Cases("{f13}", "{fa3}", RISCV::F13_F)
18842 .Cases("{f14}", "{fa4}", RISCV::F14_F)
18843 .Cases("{f15}", "{fa5}", RISCV::F15_F)
18844 .Cases("{f16}", "{fa6}", RISCV::F16_F)
18845 .Cases("{f17}", "{fa7}", RISCV::F17_F)
18846 .Cases("{f18}", "{fs2}", RISCV::F18_F)
18847 .Cases("{f19}", "{fs3}", RISCV::F19_F)
18848 .Cases("{f20}", "{fs4}", RISCV::F20_F)
18849 .Cases("{f21}", "{fs5}", RISCV::F21_F)
18850 .Cases("{f22}", "{fs6}", RISCV::F22_F)
18851 .Cases("{f23}", "{fs7}", RISCV::F23_F)
18852 .Cases("{f24}", "{fs8}", RISCV::F24_F)
18853 .Cases("{f25}", "{fs9}", RISCV::F25_F)
18854 .Cases("{f26}", "{fs10}", RISCV::F26_F)
18855 .Cases("{f27}", "{fs11}", RISCV::F27_F)
18856 .Cases("{f28}", "{ft8}", RISCV::F28_F)
18857 .Cases("{f29}", "{ft9}", RISCV::F29_F)
18858 .Cases("{f30}", "{ft10}", RISCV::F30_F)
18859 .Cases("{f31}", "{ft11}", RISCV::F31_F)
18860 .Default(RISCV::NoRegister);
18861 if (FReg != RISCV::NoRegister) {
18862 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
18863 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
18864 unsigned RegNo = FReg - RISCV::F0_F;
18865 unsigned DReg = RISCV::F0_D + RegNo;
18866 return std::make_pair(DReg, &RISCV::FPR64RegClass);
18868 if (VT == MVT::f32 || VT == MVT::Other)
18869 return std::make_pair(FReg, &RISCV::FPR32RegClass);
18870 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
18871 unsigned RegNo = FReg - RISCV::F0_F;
18872 unsigned HReg = RISCV::F0_H + RegNo;
18873 return std::make_pair(HReg, &RISCV::FPR16RegClass);
18878 if (Subtarget.hasVInstructions()) {
18879 Register VReg = StringSwitch<Register>(Constraint.lower())
18880 .Case("{v0}", RISCV::V0)
18881 .Case("{v1}", RISCV::V1)
18882 .Case("{v2}", RISCV::V2)
18883 .Case("{v3}", RISCV::V3)
18884 .Case("{v4}", RISCV::V4)
18885 .Case("{v5}", RISCV::V5)
18886 .Case("{v6}", RISCV::V6)
18887 .Case("{v7}", RISCV::V7)
18888 .Case("{v8}", RISCV::V8)
18889 .Case("{v9}", RISCV::V9)
18890 .Case("{v10}", RISCV::V10)
18891 .Case("{v11}", RISCV::V11)
18892 .Case("{v12}", RISCV::V12)
18893 .Case("{v13}", RISCV::V13)
18894 .Case("{v14}", RISCV::V14)
18895 .Case("{v15}", RISCV::V15)
18896 .Case("{v16}", RISCV::V16)
18897 .Case("{v17}", RISCV::V17)
18898 .Case("{v18}", RISCV::V18)
18899 .Case("{v19}", RISCV::V19)
18900 .Case("{v20}", RISCV::V20)
18901 .Case("{v21}", RISCV::V21)
18902 .Case("{v22}", RISCV::V22)
18903 .Case("{v23}", RISCV::V23)
18904 .Case("{v24}", RISCV::V24)
18905 .Case("{v25}", RISCV::V25)
18906 .Case("{v26}", RISCV::V26)
18907 .Case("{v27}", RISCV::V27)
18908 .Case("{v28}", RISCV::V28)
18909 .Case("{v29}", RISCV::V29)
18910 .Case("{v30}", RISCV::V30)
18911 .Case("{v31}", RISCV::V31)
18912 .Default(RISCV::NoRegister);
18913 if (VReg != RISCV::NoRegister) {
18914 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
18915 return std::make_pair(VReg, &RISCV::VMRegClass);
18916 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
18917 return std::make_pair(VReg, &RISCV::VRRegClass);
18918 for (const auto *RC :
18919 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
18920 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
18921 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
18922 return std::make_pair(VReg, RC);
18928 std::pair<Register, const TargetRegisterClass *> Res =
18929 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
18931 // If we picked one of the Zfinx register classes, remap it to the GPR class.
18932 // FIXME: When Zfinx is supported in CodeGen this will need to take the
18933 // Subtarget into account.
18934 if (Res.second == &RISCV::GPRF16RegClass ||
18935 Res.second == &RISCV::GPRF32RegClass ||
18936 Res.second == &RISCV::GPRPF64RegClass)
18937 return std::make_pair(Res.first, &RISCV::GPRRegClass);
18939 return Res;
18942 InlineAsm::ConstraintCode
18943 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
18944 // Currently only support length 1 constraints.
18945 if (ConstraintCode.size() == 1) {
18946 switch (ConstraintCode[0]) {
18947 case 'A':
18948 return InlineAsm::ConstraintCode::A;
18949 default:
18950 break;
18954 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
18957 void RISCVTargetLowering::LowerAsmOperandForConstraint(
18958 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
18959 SelectionDAG &DAG) const {
18960 // Currently only support length 1 constraints.
18961 if (Constraint.size() == 1) {
18962 switch (Constraint[0]) {
18963 case 'I':
18964 // Validate & create a 12-bit signed immediate operand.
18965 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
18966 uint64_t CVal = C->getSExtValue();
18967 if (isInt<12>(CVal))
18968 Ops.push_back(
18969 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
18971 return;
18972 case 'J':
18973 // Validate & create an integer zero operand.
18974 if (isNullConstant(Op))
18975 Ops.push_back(
18976 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
18977 return;
18978 case 'K':
18979 // Validate & create a 5-bit unsigned immediate operand.
18980 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
18981 uint64_t CVal = C->getZExtValue();
18982 if (isUInt<5>(CVal))
18983 Ops.push_back(
18984 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
18986 return;
18987 case 'S':
18988 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
18989 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
18990 GA->getValueType(0)));
18991 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
18992 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
18993 BA->getValueType(0)));
18995 return;
18996 default:
18997 break;
19000 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
19003 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
19004 Instruction *Inst,
19005 AtomicOrdering Ord) const {
19006 if (Subtarget.hasStdExtZtso()) {
19007 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19008 return Builder.CreateFence(Ord);
19009 return nullptr;
19012 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19013 return Builder.CreateFence(Ord);
19014 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
19015 return Builder.CreateFence(AtomicOrdering::Release);
19016 return nullptr;
19019 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
19020 Instruction *Inst,
19021 AtomicOrdering Ord) const {
19022 if (Subtarget.hasStdExtZtso()) {
19023 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19024 return Builder.CreateFence(Ord);
19025 return nullptr;
19028 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
19029 return Builder.CreateFence(AtomicOrdering::Acquire);
19030 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
19031 Ord == AtomicOrdering::SequentiallyConsistent)
19032 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
19033 return nullptr;
19036 TargetLowering::AtomicExpansionKind
19037 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
19038 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
19039 // point operations can't be used in an lr/sc sequence without breaking the
19040 // forward-progress guarantee.
19041 if (AI->isFloatingPointOperation() ||
19042 AI->getOperation() == AtomicRMWInst::UIncWrap ||
19043 AI->getOperation() == AtomicRMWInst::UDecWrap)
19044 return AtomicExpansionKind::CmpXChg;
19046 // Don't expand forced atomics, we want to have __sync libcalls instead.
19047 if (Subtarget.hasForcedAtomics())
19048 return AtomicExpansionKind::None;
19050 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19051 if (Size == 8 || Size == 16)
19052 return AtomicExpansionKind::MaskedIntrinsic;
19053 return AtomicExpansionKind::None;
19056 static Intrinsic::ID
19057 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
19058 if (XLen == 32) {
19059 switch (BinOp) {
19060 default:
19061 llvm_unreachable("Unexpected AtomicRMW BinOp");
19062 case AtomicRMWInst::Xchg:
19063 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
19064 case AtomicRMWInst::Add:
19065 return Intrinsic::riscv_masked_atomicrmw_add_i32;
19066 case AtomicRMWInst::Sub:
19067 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
19068 case AtomicRMWInst::Nand:
19069 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
19070 case AtomicRMWInst::Max:
19071 return Intrinsic::riscv_masked_atomicrmw_max_i32;
19072 case AtomicRMWInst::Min:
19073 return Intrinsic::riscv_masked_atomicrmw_min_i32;
19074 case AtomicRMWInst::UMax:
19075 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
19076 case AtomicRMWInst::UMin:
19077 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
19081 if (XLen == 64) {
19082 switch (BinOp) {
19083 default:
19084 llvm_unreachable("Unexpected AtomicRMW BinOp");
19085 case AtomicRMWInst::Xchg:
19086 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
19087 case AtomicRMWInst::Add:
19088 return Intrinsic::riscv_masked_atomicrmw_add_i64;
19089 case AtomicRMWInst::Sub:
19090 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
19091 case AtomicRMWInst::Nand:
19092 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
19093 case AtomicRMWInst::Max:
19094 return Intrinsic::riscv_masked_atomicrmw_max_i64;
19095 case AtomicRMWInst::Min:
19096 return Intrinsic::riscv_masked_atomicrmw_min_i64;
19097 case AtomicRMWInst::UMax:
19098 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
19099 case AtomicRMWInst::UMin:
19100 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
19104 llvm_unreachable("Unexpected XLen\n");
19107 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
19108 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19109 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19110 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
19111 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
19112 // mask, as this produces better code than the LR/SC loop emitted by
19113 // int_riscv_masked_atomicrmw_xchg.
19114 if (AI->getOperation() == AtomicRMWInst::Xchg &&
19115 isa<ConstantInt>(AI->getValOperand())) {
19116 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
19117 if (CVal->isZero())
19118 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
19119 Builder.CreateNot(Mask, "Inv_Mask"),
19120 AI->getAlign(), Ord);
19121 if (CVal->isMinusOne())
19122 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
19123 AI->getAlign(), Ord);
19126 unsigned XLen = Subtarget.getXLen();
19127 Value *Ordering =
19128 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
19129 Type *Tys[] = {AlignedAddr->getType()};
19130 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
19131 AI->getModule(),
19132 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
19134 if (XLen == 64) {
19135 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
19136 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
19137 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
19140 Value *Result;
19142 // Must pass the shift amount needed to sign extend the loaded value prior
19143 // to performing a signed comparison for min/max. ShiftAmt is the number of
19144 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
19145 // is the number of bits to left+right shift the value in order to
19146 // sign-extend.
19147 if (AI->getOperation() == AtomicRMWInst::Min ||
19148 AI->getOperation() == AtomicRMWInst::Max) {
19149 const DataLayout &DL = AI->getModule()->getDataLayout();
19150 unsigned ValWidth =
19151 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
19152 Value *SextShamt =
19153 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
19154 Result = Builder.CreateCall(LrwOpScwLoop,
19155 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
19156 } else {
19157 Result =
19158 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
19161 if (XLen == 64)
19162 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
19163 return Result;
19166 TargetLowering::AtomicExpansionKind
19167 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
19168 AtomicCmpXchgInst *CI) const {
19169 // Don't expand forced atomics, we want to have __sync libcalls instead.
19170 if (Subtarget.hasForcedAtomics())
19171 return AtomicExpansionKind::None;
19173 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
19174 if (Size == 8 || Size == 16)
19175 return AtomicExpansionKind::MaskedIntrinsic;
19176 return AtomicExpansionKind::None;
19179 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
19180 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19181 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19182 unsigned XLen = Subtarget.getXLen();
19183 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
19184 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
19185 if (XLen == 64) {
19186 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
19187 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
19188 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
19189 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
19191 Type *Tys[] = {AlignedAddr->getType()};
19192 Function *MaskedCmpXchg =
19193 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
19194 Value *Result = Builder.CreateCall(
19195 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
19196 if (XLen == 64)
19197 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
19198 return Result;
19201 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
19202 EVT DataVT) const {
19203 // We have indexed loads for all legal index types. Indices are always
19204 // zero extended
19205 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
19206 isTypeLegal(Extend.getValueType()) &&
19207 isTypeLegal(Extend.getOperand(0).getValueType());
19210 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
19211 EVT VT) const {
19212 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
19213 return false;
19215 switch (FPVT.getSimpleVT().SimpleTy) {
19216 case MVT::f16:
19217 return Subtarget.hasStdExtZfhmin();
19218 case MVT::f32:
19219 return Subtarget.hasStdExtF();
19220 case MVT::f64:
19221 return Subtarget.hasStdExtD();
19222 default:
19223 return false;
19227 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
19228 // If we are using the small code model, we can reduce size of jump table
19229 // entry to 4 bytes.
19230 if (Subtarget.is64Bit() && !isPositionIndependent() &&
19231 getTargetMachine().getCodeModel() == CodeModel::Small) {
19232 return MachineJumpTableInfo::EK_Custom32;
19234 return TargetLowering::getJumpTableEncoding();
19237 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
19238 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
19239 unsigned uid, MCContext &Ctx) const {
19240 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
19241 getTargetMachine().getCodeModel() == CodeModel::Small);
19242 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
19245 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
19246 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
19247 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
19248 // a power of two as well.
19249 // FIXME: This doesn't work for zve32, but that's already broken
19250 // elsewhere for the same reason.
19251 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
19252 static_assert(RISCV::RVVBitsPerBlock == 64,
19253 "RVVBitsPerBlock changed, audit needed");
19254 return true;
19257 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
19258 SDValue &Offset,
19259 ISD::MemIndexedMode &AM,
19260 bool &IsInc,
19261 SelectionDAG &DAG) const {
19262 // Target does not support indexed loads.
19263 if (!Subtarget.hasVendorXTHeadMemIdx())
19264 return false;
19266 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
19267 return false;
19269 Base = Op->getOperand(0);
19270 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
19271 int64_t RHSC = RHS->getSExtValue();
19272 if (Op->getOpcode() == ISD::SUB)
19273 RHSC = -(uint64_t)RHSC;
19275 // The constants that can be encoded in the THeadMemIdx instructions
19276 // are of the form (sign_extend(imm5) << imm2).
19277 bool isLegalIndexedOffset = false;
19278 for (unsigned i = 0; i < 4; i++)
19279 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
19280 isLegalIndexedOffset = true;
19281 break;
19284 if (!isLegalIndexedOffset)
19285 return false;
19287 IsInc = (Op->getOpcode() == ISD::ADD);
19288 Offset = Op->getOperand(1);
19289 return true;
19292 return false;
19295 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
19296 SDValue &Offset,
19297 ISD::MemIndexedMode &AM,
19298 SelectionDAG &DAG) const {
19299 EVT VT;
19300 SDValue Ptr;
19301 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19302 VT = LD->getMemoryVT();
19303 Ptr = LD->getBasePtr();
19304 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19305 VT = ST->getMemoryVT();
19306 Ptr = ST->getBasePtr();
19307 } else
19308 return false;
19310 bool IsInc;
19311 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
19312 return false;
19314 AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
19315 return true;
19318 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
19319 SDValue &Base,
19320 SDValue &Offset,
19321 ISD::MemIndexedMode &AM,
19322 SelectionDAG &DAG) const {
19323 EVT VT;
19324 SDValue Ptr;
19325 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19326 VT = LD->getMemoryVT();
19327 Ptr = LD->getBasePtr();
19328 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19329 VT = ST->getMemoryVT();
19330 Ptr = ST->getBasePtr();
19331 } else
19332 return false;
19334 bool IsInc;
19335 if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
19336 return false;
19337 // Post-indexing updates the base, so it's not a valid transform
19338 // if that's not the same as the load's pointer.
19339 if (Ptr != Base)
19340 return false;
19342 AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
19343 return true;
19346 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19347 EVT VT) const {
19348 EVT SVT = VT.getScalarType();
19350 if (!SVT.isSimple())
19351 return false;
19353 switch (SVT.getSimpleVT().SimpleTy) {
19354 case MVT::f16:
19355 return VT.isVector() ? Subtarget.hasVInstructionsF16()
19356 : Subtarget.hasStdExtZfhOrZhinx();
19357 case MVT::f32:
19358 return Subtarget.hasStdExtFOrZfinx();
19359 case MVT::f64:
19360 return Subtarget.hasStdExtDOrZdinx();
19361 default:
19362 break;
19365 return false;
19368 Register RISCVTargetLowering::getExceptionPointerRegister(
19369 const Constant *PersonalityFn) const {
19370 return RISCV::X10;
19373 Register RISCVTargetLowering::getExceptionSelectorRegister(
19374 const Constant *PersonalityFn) const {
19375 return RISCV::X11;
19378 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
19379 // Return false to suppress the unnecessary extensions if the LibCall
19380 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
19381 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
19382 Type.getSizeInBits() < Subtarget.getXLen()))
19383 return false;
19385 return true;
19388 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
19389 if (Subtarget.is64Bit() && Type == MVT::i32)
19390 return true;
19392 return IsSigned;
19395 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
19396 SDValue C) const {
19397 // Check integral scalar types.
19398 const bool HasExtMOrZmmul =
19399 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
19400 if (!VT.isScalarInteger())
19401 return false;
19403 // Omit the optimization if the sub target has the M extension and the data
19404 // size exceeds XLen.
19405 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
19406 return false;
19408 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19409 // Break the MUL to a SLLI and an ADD/SUB.
19410 const APInt &Imm = ConstNode->getAPIntValue();
19411 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
19412 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
19413 return true;
19415 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
19416 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
19417 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
19418 (Imm - 8).isPowerOf2()))
19419 return true;
19421 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
19422 // a pair of LUI/ADDI.
19423 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
19424 ConstNode->hasOneUse()) {
19425 APInt ImmS = Imm.ashr(Imm.countr_zero());
19426 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
19427 (1 - ImmS).isPowerOf2())
19428 return true;
19432 return false;
19435 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
19436 SDValue ConstNode) const {
19437 // Let the DAGCombiner decide for vectors.
19438 EVT VT = AddNode.getValueType();
19439 if (VT.isVector())
19440 return true;
19442 // Let the DAGCombiner decide for larger types.
19443 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
19444 return true;
19446 // It is worse if c1 is simm12 while c1*c2 is not.
19447 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19448 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
19449 const APInt &C1 = C1Node->getAPIntValue();
19450 const APInt &C2 = C2Node->getAPIntValue();
19451 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
19452 return false;
19454 // Default to true and let the DAGCombiner decide.
19455 return true;
19458 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
19459 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
19460 unsigned *Fast) const {
19461 if (!VT.isVector()) {
19462 if (Fast)
19463 *Fast = Subtarget.hasFastUnalignedAccess();
19464 return Subtarget.hasFastUnalignedAccess();
19467 // All vector implementations must support element alignment
19468 EVT ElemVT = VT.getVectorElementType();
19469 if (Alignment >= ElemVT.getStoreSize()) {
19470 if (Fast)
19471 *Fast = 1;
19472 return true;
19475 // Note: We lower an unmasked unaligned vector access to an equally sized
19476 // e8 element type access. Given this, we effectively support all unmasked
19477 // misaligned accesses. TODO: Work through the codegen implications of
19478 // allowing such accesses to be formed, and considered fast.
19479 if (Fast)
19480 *Fast = Subtarget.hasFastUnalignedAccess();
19481 return Subtarget.hasFastUnalignedAccess();
19485 EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
19486 const AttributeList &FuncAttributes) const {
19487 if (!Subtarget.hasVInstructions())
19488 return MVT::Other;
19490 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
19491 return MVT::Other;
19493 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
19494 // has an expansion threshold, and we want the number of hardware memory
19495 // operations to correspond roughly to that threshold. LMUL>1 operations
19496 // are typically expanded linearly internally, and thus correspond to more
19497 // than one actual memory operation. Note that store merging and load
19498 // combining will typically form larger LMUL operations from the LMUL1
19499 // operations emitted here, and that's okay because combining isn't
19500 // introducing new memory operations; it's just merging existing ones.
19501 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
19502 if (Op.size() < MinVLenInBytes)
19503 // TODO: Figure out short memops. For the moment, do the default thing
19504 // which ends up using scalar sequences.
19505 return MVT::Other;
19507 // Prefer i8 for non-zero memset as it allows us to avoid materializing
19508 // a large scalar constant and instead use vmv.v.x/i to do the
19509 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
19510 // maximize the chance we can encode the size in the vsetvli.
19511 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
19512 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
19514 // Do we have sufficient alignment for our preferred VT? If not, revert
19515 // to largest size allowed by our alignment criteria.
19516 if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) {
19517 Align RequiredAlign(PreferredVT.getStoreSize());
19518 if (Op.isFixedDstAlign())
19519 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
19520 if (Op.isMemcpy())
19521 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
19522 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
19524 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
19527 bool RISCVTargetLowering::splitValueIntoRegisterParts(
19528 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19529 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19530 bool IsABIRegCopy = CC.has_value();
19531 EVT ValueVT = Val.getValueType();
19532 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
19533 PartVT == MVT::f32) {
19534 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
19535 // nan, and cast to f32.
19536 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
19537 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
19538 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
19539 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
19540 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
19541 Parts[0] = Val;
19542 return true;
19545 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
19546 LLVMContext &Context = *DAG.getContext();
19547 EVT ValueEltVT = ValueVT.getVectorElementType();
19548 EVT PartEltVT = PartVT.getVectorElementType();
19549 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
19550 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
19551 if (PartVTBitSize % ValueVTBitSize == 0) {
19552 assert(PartVTBitSize >= ValueVTBitSize);
19553 // If the element types are different, bitcast to the same element type of
19554 // PartVT first.
19555 // Give an example here, we want copy a <vscale x 1 x i8> value to
19556 // <vscale x 4 x i16>.
19557 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
19558 // subvector, then we can bitcast to <vscale x 4 x i16>.
19559 if (ValueEltVT != PartEltVT) {
19560 if (PartVTBitSize > ValueVTBitSize) {
19561 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
19562 assert(Count != 0 && "The number of element should not be zero.");
19563 EVT SameEltTypeVT =
19564 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
19565 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
19566 DAG.getUNDEF(SameEltTypeVT), Val,
19567 DAG.getVectorIdxConstant(0, DL));
19569 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
19570 } else {
19571 Val =
19572 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
19573 Val, DAG.getVectorIdxConstant(0, DL));
19575 Parts[0] = Val;
19576 return true;
19579 return false;
19582 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
19583 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
19584 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
19585 bool IsABIRegCopy = CC.has_value();
19586 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
19587 PartVT == MVT::f32) {
19588 SDValue Val = Parts[0];
19590 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
19591 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
19592 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
19593 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
19594 return Val;
19597 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
19598 LLVMContext &Context = *DAG.getContext();
19599 SDValue Val = Parts[0];
19600 EVT ValueEltVT = ValueVT.getVectorElementType();
19601 EVT PartEltVT = PartVT.getVectorElementType();
19602 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
19603 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
19604 if (PartVTBitSize % ValueVTBitSize == 0) {
19605 assert(PartVTBitSize >= ValueVTBitSize);
19606 EVT SameEltTypeVT = ValueVT;
19607 // If the element types are different, convert it to the same element type
19608 // of PartVT.
19609 // Give an example here, we want copy a <vscale x 1 x i8> value from
19610 // <vscale x 4 x i16>.
19611 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
19612 // then we can extract <vscale x 1 x i8>.
19613 if (ValueEltVT != PartEltVT) {
19614 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
19615 assert(Count != 0 && "The number of element should not be zero.");
19616 SameEltTypeVT =
19617 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
19618 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
19620 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
19621 DAG.getVectorIdxConstant(0, DL));
19622 return Val;
19625 return SDValue();
19628 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
19629 // When aggressively optimizing for code size, we prefer to use a div
19630 // instruction, as it is usually smaller than the alternative sequence.
19631 // TODO: Add vector division?
19632 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
19633 return OptSize && !VT.isVector();
19636 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
19637 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
19638 // some situation.
19639 unsigned Opc = N->getOpcode();
19640 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
19641 return false;
19642 return true;
19645 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
19646 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
19647 Function *ThreadPointerFunc =
19648 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
19649 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
19650 IRB.CreateCall(ThreadPointerFunc), Offset);
19653 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
19654 // Fuchsia provides a fixed TLS slot for the stack cookie.
19655 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
19656 if (Subtarget.isTargetFuchsia())
19657 return useTpOffset(IRB, -0x10);
19659 return TargetLowering::getIRStackGuard(IRB);
19662 bool RISCVTargetLowering::isLegalInterleavedAccessType(
19663 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
19664 const DataLayout &DL) const {
19665 EVT VT = getValueType(DL, VTy);
19666 // Don't lower vlseg/vsseg for vector types that can't be split.
19667 if (!isTypeLegal(VT))
19668 return false;
19670 if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
19671 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
19672 Alignment))
19673 return false;
19675 MVT ContainerVT = VT.getSimpleVT();
19677 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19678 if (!Subtarget.useRVVForFixedLengthVectors())
19679 return false;
19680 // Sometimes the interleaved access pass picks up splats as interleaves of
19681 // one element. Don't lower these.
19682 if (FVTy->getNumElements() < 2)
19683 return false;
19685 ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());
19688 // Need to make sure that EMUL * NFIELDS ≤ 8
19689 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
19690 if (Fractional)
19691 return true;
19692 return Factor * LMUL <= 8;
19695 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
19696 Align Alignment) const {
19697 if (!Subtarget.hasVInstructions())
19698 return false;
19700 // Only support fixed vectors if we know the minimum vector size.
19701 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
19702 return false;
19704 EVT ScalarType = DataType.getScalarType();
19705 if (!isLegalElementTypeForRVV(ScalarType))
19706 return false;
19708 if (!Subtarget.hasFastUnalignedAccess() &&
19709 Alignment < ScalarType.getStoreSize())
19710 return false;
19712 return true;
19715 static const Intrinsic::ID FixedVlsegIntrIds[] = {
19716 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
19717 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
19718 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
19719 Intrinsic::riscv_seg8_load};
19721 /// Lower an interleaved load into a vlsegN intrinsic.
19723 /// E.g. Lower an interleaved load (Factor = 2):
19724 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
19725 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
19726 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
19728 /// Into:
19729 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
19730 /// %ptr, i64 4)
19731 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
19732 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
19733 bool RISCVTargetLowering::lowerInterleavedLoad(
19734 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
19735 ArrayRef<unsigned> Indices, unsigned Factor) const {
19736 IRBuilder<> Builder(LI);
19738 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
19739 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
19740 LI->getPointerAddressSpace(),
19741 LI->getModule()->getDataLayout()))
19742 return false;
19744 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
19746 Function *VlsegNFunc =
19747 Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2],
19748 {VTy, LI->getPointerOperandType(), XLenTy});
19750 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
19752 CallInst *VlsegN =
19753 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
19755 for (unsigned i = 0; i < Shuffles.size(); i++) {
19756 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
19757 Shuffles[i]->replaceAllUsesWith(SubVec);
19760 return true;
19763 static const Intrinsic::ID FixedVssegIntrIds[] = {
19764 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
19765 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
19766 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
19767 Intrinsic::riscv_seg8_store};
19769 /// Lower an interleaved store into a vssegN intrinsic.
19771 /// E.g. Lower an interleaved store (Factor = 3):
19772 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
19773 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
19774 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
19776 /// Into:
19777 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
19778 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
19779 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
19780 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
19781 /// %ptr, i32 4)
19783 /// Note that the new shufflevectors will be removed and we'll only generate one
19784 /// vsseg3 instruction in CodeGen.
19785 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
19786 ShuffleVectorInst *SVI,
19787 unsigned Factor) const {
19788 IRBuilder<> Builder(SI);
19789 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
19790 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
19791 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
19792 ShuffleVTy->getNumElements() / Factor);
19793 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
19794 SI->getPointerAddressSpace(),
19795 SI->getModule()->getDataLayout()))
19796 return false;
19798 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
19800 Function *VssegNFunc =
19801 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
19802 {VTy, SI->getPointerOperandType(), XLenTy});
19804 auto Mask = SVI->getShuffleMask();
19805 SmallVector<Value *, 10> Ops;
19807 for (unsigned i = 0; i < Factor; i++) {
19808 Value *Shuffle = Builder.CreateShuffleVector(
19809 SVI->getOperand(0), SVI->getOperand(1),
19810 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
19811 Ops.push_back(Shuffle);
19813 // This VL should be OK (should be executable in one vsseg instruction,
19814 // potentially under larger LMULs) because we checked that the fixed vector
19815 // type fits in isLegalInterleavedAccessType
19816 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
19817 Ops.append({SI->getPointerOperand(), VL});
19819 Builder.CreateCall(VssegNFunc, Ops);
19821 return true;
19824 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
19825 LoadInst *LI) const {
19826 assert(LI->isSimple());
19827 IRBuilder<> Builder(LI);
19829 // Only deinterleave2 supported at present.
19830 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
19831 return false;
19833 unsigned Factor = 2;
19835 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
19836 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
19838 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
19839 LI->getPointerAddressSpace(),
19840 LI->getModule()->getDataLayout()))
19841 return false;
19843 Function *VlsegNFunc;
19844 Value *VL;
19845 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
19846 SmallVector<Value *, 10> Ops;
19848 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19849 VlsegNFunc = Intrinsic::getDeclaration(
19850 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
19851 {ResVTy, LI->getPointerOperandType(), XLenTy});
19852 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
19853 } else {
19854 static const Intrinsic::ID IntrIds[] = {
19855 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
19856 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
19857 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
19858 Intrinsic::riscv_vlseg8};
19860 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
19861 {ResVTy, XLenTy});
19862 VL = Constant::getAllOnesValue(XLenTy);
19863 Ops.append(Factor, PoisonValue::get(ResVTy));
19866 Ops.append({LI->getPointerOperand(), VL});
19868 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
19869 DI->replaceAllUsesWith(Vlseg);
19871 return true;
19874 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
19875 StoreInst *SI) const {
19876 assert(SI->isSimple());
19877 IRBuilder<> Builder(SI);
19879 // Only interleave2 supported at present.
19880 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
19881 return false;
19883 unsigned Factor = 2;
19885 VectorType *VTy = cast<VectorType>(II->getType());
19886 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
19888 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
19889 SI->getPointerAddressSpace(),
19890 SI->getModule()->getDataLayout()))
19891 return false;
19893 Function *VssegNFunc;
19894 Value *VL;
19895 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
19897 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19898 VssegNFunc = Intrinsic::getDeclaration(
19899 SI->getModule(), FixedVssegIntrIds[Factor - 2],
19900 {InVTy, SI->getPointerOperandType(), XLenTy});
19901 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
19902 } else {
19903 static const Intrinsic::ID IntrIds[] = {
19904 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
19905 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
19906 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
19907 Intrinsic::riscv_vsseg8};
19909 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
19910 {InVTy, XLenTy});
19911 VL = Constant::getAllOnesValue(XLenTy);
19914 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
19915 SI->getPointerOperand(), VL});
19917 return true;
19920 MachineInstr *
19921 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
19922 MachineBasicBlock::instr_iterator &MBBI,
19923 const TargetInstrInfo *TII) const {
19924 assert(MBBI->isCall() && MBBI->getCFIType() &&
19925 "Invalid call instruction for a KCFI check");
19926 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
19927 MBBI->getOpcode()));
19929 MachineOperand &Target = MBBI->getOperand(0);
19930 Target.setIsRenamable(false);
19932 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
19933 .addReg(Target.getReg())
19934 .addImm(MBBI->getCFIType())
19935 .getInstr();
19938 #define GET_REGISTER_MATCHER
19939 #include "RISCVGenAsmMatcher.inc"
19941 Register
19942 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
19943 const MachineFunction &MF) const {
19944 Register Reg = MatchRegisterAltName(RegName);
19945 if (Reg == RISCV::NoRegister)
19946 Reg = MatchRegisterName(RegName);
19947 if (Reg == RISCV::NoRegister)
19948 report_fatal_error(
19949 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
19950 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
19951 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
19952 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
19953 StringRef(RegName) + "\"."));
19954 return Reg;
19957 MachineMemOperand::Flags
19958 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
19959 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
19961 if (NontemporalInfo == nullptr)
19962 return MachineMemOperand::MONone;
19964 // 1 for default value work as __RISCV_NTLH_ALL
19965 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
19966 // 3 -> __RISCV_NTLH_ALL_PRIVATE
19967 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
19968 // 5 -> __RISCV_NTLH_ALL
19969 int NontemporalLevel = 5;
19970 const MDNode *RISCVNontemporalInfo =
19971 I.getMetadata("riscv-nontemporal-domain");
19972 if (RISCVNontemporalInfo != nullptr)
19973 NontemporalLevel =
19974 cast<ConstantInt>(
19975 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
19976 ->getValue())
19977 ->getZExtValue();
19979 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
19980 "RISC-V target doesn't support this non-temporal domain.");
19982 NontemporalLevel -= 2;
19983 MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
19984 if (NontemporalLevel & 0b1)
19985 Flags |= MONontemporalBit0;
19986 if (NontemporalLevel & 0b10)
19987 Flags |= MONontemporalBit1;
19989 return Flags;
19992 MachineMemOperand::Flags
19993 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
19995 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
19996 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
19997 TargetFlags |= (NodeFlags & MONontemporalBit0);
19998 TargetFlags |= (NodeFlags & MONontemporalBit1);
20000 return TargetFlags;
20003 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
20004 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
20005 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
20008 bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
20009 if (VT.isScalableVector())
20010 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
20011 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
20012 return true;
20013 return Subtarget.hasStdExtZbb() &&
20014 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
20017 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
20018 ISD::CondCode Cond) const {
20019 return isCtpopFast(VT) ? 0 : 1;
20022 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
20023 // At the moment, the only scalable instruction GISel knows how to lower is
20024 // ret with scalable argument.
20026 if (Inst.getType()->isScalableTy())
20027 return true;
20029 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
20030 if (Inst.getOperand(i)->getType()->isScalableTy() &&
20031 !isa<ReturnInst>(&Inst))
20032 return true;
20034 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
20035 if (AI->getAllocatedType()->isScalableTy())
20036 return true;
20039 return false;
20042 SDValue
20043 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
20044 SelectionDAG &DAG,
20045 SmallVectorImpl<SDNode *> &Created) const {
20046 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
20047 if (isIntDivCheap(N->getValueType(0), Attr))
20048 return SDValue(N, 0); // Lower SDIV as SDIV
20050 // Only perform this transform if short forward branch opt is supported.
20051 if (!Subtarget.hasShortForwardBranchOpt())
20052 return SDValue();
20053 EVT VT = N->getValueType(0);
20054 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
20055 return SDValue();
20057 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
20058 if (Divisor.sgt(2048) || Divisor.slt(-2048))
20059 return SDValue();
20060 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
20063 bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
20064 EVT VT, const APInt &AndMask) const {
20065 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
20066 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
20067 return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
20070 unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
20071 return Subtarget.getMinimumJumpTableEntries();
20074 namespace llvm::RISCVVIntrinsicsTable {
20076 #define GET_RISCVVIntrinsicsTable_IMPL
20077 #include "RISCVGenSearchableTables.inc"
20079 } // namespace llvm::RISCVVIntrinsicsTable