llvm/lib/Target/RISCV/RISCVISelLowering.cpp

   1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation  -------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "RISCVISelLowering.h"
  15 #include "MCTargetDesc/RISCVMatInt.h"
  16 #include "RISCV.h"
  17 #include "RISCVMachineFunctionInfo.h"
  18 #include "RISCVRegisterInfo.h"
  19 #include "RISCVSubtarget.h"
  20 #include "RISCVTargetMachine.h"
  21 #include "llvm/ADT/SmallSet.h"
  22 #include "llvm/ADT/Statistic.h"
  23 #include "llvm/Analysis/MemoryLocation.h"
  24 #include "llvm/Analysis/VectorUtils.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineFunction.h"
  27 #include "llvm/CodeGen/MachineInstrBuilder.h"
  28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  29 #include "llvm/CodeGen/MachineRegisterInfo.h"
  30 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  32 #include "llvm/CodeGen/ValueTypes.h"
  33 #include "llvm/IR/DiagnosticInfo.h"
  34 #include "llvm/IR/DiagnosticPrinter.h"
  35 #include "llvm/IR/IRBuilder.h"
  36 #include "llvm/IR/Instructions.h"
  37 #include "llvm/IR/IntrinsicsRISCV.h"
  38 #include "llvm/IR/PatternMatch.h"
  39 #include "llvm/Support/CommandLine.h"
  40 #include "llvm/Support/Debug.h"
  41 #include "llvm/Support/ErrorHandling.h"
  42 #include "llvm/Support/InstructionCost.h"
  43 #include "llvm/Support/KnownBits.h"
  44 #include "llvm/Support/MathExtras.h"
  45 #include "llvm/Support/raw_ostream.h"
  46 #include <optional>
  47
  48 using namespace llvm;
  49
  50 #define DEBUG_TYPE "riscv-lower"
  51
  52 STATISTIC(NumTailCalls, "Number of tail calls");
  53
  54 static cl::opt<unsigned> ExtensionMaxWebSize(
  55     DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
  56     cl::desc("Give the maximum size (in number of nodes) of the web of "
  57              "instructions that we will consider for VW expansion"),
  58     cl::init(18));
  59
  60 static cl::opt<bool>
  61     AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
  62                      cl::desc("Allow the formation of VW_W operations (e.g., "
  63                               "VWADD_W) with splat constants"),
  64                      cl::init(false));
  65
  66 static cl::opt<unsigned> NumRepeatedDivisors(
  67     DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
  68     cl::desc("Set the minimum number of repetitions of a divisor to allow "
  69              "transformation to multiplications by the reciprocal"),
  70     cl::init(2));
  71
  72 static cl::opt<int>
  73     FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
  74               cl::desc("Give the maximum number of instructions that we will "
  75                        "use for creating a floating-point immediate value"),
  76               cl::init(2));
  77
  78 static cl::opt<bool>
  79     RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
  80                  cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
  81
  82 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
  83                                          const RISCVSubtarget &STI)
  84     : TargetLowering(TM), Subtarget(STI) {
  85
  86   if (Subtarget.isRVE())
  87     report_fatal_error("Codegen not yet implemented for RVE");
  88
  89   RISCVABI::ABI ABI = Subtarget.getTargetABI();
  90   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
  91
  92   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
  93       !Subtarget.hasStdExtF()) {
  94     errs() << "Hard-float 'f' ABI can't be used for a target that "
  95                 "doesn't support the F instruction set extension (ignoring "
  96                           "target-abi)\n";
  97     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
  98   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
  99              !Subtarget.hasStdExtD()) {
 100     errs() << "Hard-float 'd' ABI can't be used for a target that "
 101               "doesn't support the D instruction set extension (ignoring "
 102               "target-abi)\n";
 103     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
 104   }
 105
 106   switch (ABI) {
 107   default:
 108     report_fatal_error("Don't know how to lower this ABI");
 109   case RISCVABI::ABI_ILP32:
 110   case RISCVABI::ABI_ILP32F:
 111   case RISCVABI::ABI_ILP32D:
 112   case RISCVABI::ABI_LP64:
 113   case RISCVABI::ABI_LP64F:
 114   case RISCVABI::ABI_LP64D:
 115     break;
 116   }
 117
 118   MVT XLenVT = Subtarget.getXLenVT();
 119
 120   // Set up the register classes.
 121   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
 122   if (Subtarget.is64Bit() && RV64LegalI32)
 123     addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
 124
 125   if (Subtarget.hasStdExtZfhOrZfhmin())
 126     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
 127   if (Subtarget.hasStdExtZfbfmin())
 128     addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
 129   if (Subtarget.hasStdExtF())
 130     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
 131   if (Subtarget.hasStdExtD())
 132     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
 133   if (Subtarget.hasStdExtZhinxOrZhinxmin())
 134     addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
 135   if (Subtarget.hasStdExtZfinx())
 136     addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
 137   if (Subtarget.hasStdExtZdinx()) {
 138     if (Subtarget.is64Bit())
 139       addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
 140     else
 141       addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
 142   }
 143
 144   static const MVT::SimpleValueType BoolVecVTs[] = {
 145       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
 146       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
 147   static const MVT::SimpleValueType IntVecVTs[] = {
 148       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
 149       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
 150       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
 151       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
 152       MVT::nxv4i64, MVT::nxv8i64};
 153   static const MVT::SimpleValueType F16VecVTs[] = {
 154       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
 155       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
 156   static const MVT::SimpleValueType BF16VecVTs[] = {
 157       MVT::nxv1bf16, MVT::nxv2bf16,  MVT::nxv4bf16,
 158       MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
 159   static const MVT::SimpleValueType F32VecVTs[] = {
 160       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
 161   static const MVT::SimpleValueType F64VecVTs[] = {
 162       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
 163
 164   if (Subtarget.hasVInstructions()) {
 165     auto addRegClassForRVV = [this](MVT VT) {
 166       // Disable the smallest fractional LMUL types if ELEN is less than
 167       // RVVBitsPerBlock.
 168       unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
 169       if (VT.getVectorMinNumElements() < MinElts)
 170         return;
 171
 172       unsigned Size = VT.getSizeInBits().getKnownMinValue();
 173       const TargetRegisterClass *RC;
 174       if (Size <= RISCV::RVVBitsPerBlock)
 175         RC = &RISCV::VRRegClass;
 176       else if (Size == 2 * RISCV::RVVBitsPerBlock)
 177         RC = &RISCV::VRM2RegClass;
 178       else if (Size == 4 * RISCV::RVVBitsPerBlock)
 179         RC = &RISCV::VRM4RegClass;
 180       else if (Size == 8 * RISCV::RVVBitsPerBlock)
 181         RC = &RISCV::VRM8RegClass;
 182       else
 183         llvm_unreachable("Unexpected size");
 184
 185       addRegisterClass(VT, RC);
 186     };
 187
 188     for (MVT VT : BoolVecVTs)
 189       addRegClassForRVV(VT);
 190     for (MVT VT : IntVecVTs) {
 191       if (VT.getVectorElementType() == MVT::i64 &&
 192           !Subtarget.hasVInstructionsI64())
 193         continue;
 194       addRegClassForRVV(VT);
 195     }
 196
 197     if (Subtarget.hasVInstructionsF16Minimal())
 198       for (MVT VT : F16VecVTs)
 199         addRegClassForRVV(VT);
 200
 201     if (Subtarget.hasVInstructionsBF16())
 202       for (MVT VT : BF16VecVTs)
 203         addRegClassForRVV(VT);
 204
 205     if (Subtarget.hasVInstructionsF32())
 206       for (MVT VT : F32VecVTs)
 207         addRegClassForRVV(VT);
 208
 209     if (Subtarget.hasVInstructionsF64())
 210       for (MVT VT : F64VecVTs)
 211         addRegClassForRVV(VT);
 212
 213     if (Subtarget.useRVVForFixedLengthVectors()) {
 214       auto addRegClassForFixedVectors = [this](MVT VT) {
 215         MVT ContainerVT = getContainerForFixedLengthVector(VT);
 216         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
 217         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
 218         addRegisterClass(VT, TRI.getRegClass(RCID));
 219       };
 220       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
 221         if (useRVVForFixedLengthVectorVT(VT))
 222           addRegClassForFixedVectors(VT);
 223
 224       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
 225         if (useRVVForFixedLengthVectorVT(VT))
 226           addRegClassForFixedVectors(VT);
 227     }
 228   }
 229
 230   // Compute derived properties from the register classes.
 231   computeRegisterProperties(STI.getRegisterInfo());
 232
 233   setStackPointerRegisterToSaveRestore(RISCV::X2);
 234
 235   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
 236                    MVT::i1, Promote);
 237   // DAGCombiner can call isLoadExtLegal for types that aren't legal.
 238   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
 239                    MVT::i1, Promote);
 240
 241   // TODO: add all necessary setOperationAction calls.
 242   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
 243
 244   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 245   setOperationAction(ISD::BR_CC, XLenVT, Expand);
 246   if (RV64LegalI32 && Subtarget.is64Bit())
 247     setOperationAction(ISD::BR_CC, MVT::i32, Expand);
 248   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 249   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
 250   if (RV64LegalI32 && Subtarget.is64Bit())
 251     setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
 252
 253   setCondCodeAction(ISD::SETLE, XLenVT, Expand);
 254   setCondCodeAction(ISD::SETGT, XLenVT, Custom);
 255   setCondCodeAction(ISD::SETGE, XLenVT, Expand);
 256   setCondCodeAction(ISD::SETULE, XLenVT, Expand);
 257   setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
 258   setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
 259
 260   if (RV64LegalI32 && Subtarget.is64Bit())
 261     setOperationAction(ISD::SETCC, MVT::i32, Promote);
 262
 263   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
 264
 265   setOperationAction(ISD::VASTART, MVT::Other, Custom);
 266   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
 267
 268   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 269
 270   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
 271
 272   if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
 273     setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
 274
 275   if (Subtarget.is64Bit()) {
 276     setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 277
 278     if (!RV64LegalI32) {
 279       setOperationAction(ISD::LOAD, MVT::i32, Custom);
 280       setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
 281                          MVT::i32, Custom);
 282       setOperationAction(ISD::SADDO, MVT::i32, Custom);
 283       setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
 284                          MVT::i32, Custom);
 285     }
 286   } else {
 287     setLibcallName(
 288         {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
 289         nullptr);
 290     setLibcallName(RTLIB::MULO_I64, nullptr);
 291   }
 292
 293   if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
 294     setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
 295     if (RV64LegalI32 && Subtarget.is64Bit())
 296       setOperationAction(ISD::MUL, MVT::i32, Promote);
 297   } else if (Subtarget.is64Bit()) {
 298     setOperationAction(ISD::MUL, MVT::i128, Custom);
 299     if (!RV64LegalI32)
 300       setOperationAction(ISD::MUL, MVT::i32, Custom);
 301   } else {
 302     setOperationAction(ISD::MUL, MVT::i64, Custom);
 303   }
 304
 305   if (!Subtarget.hasStdExtM()) {
 306     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
 307                        XLenVT, Expand);
 308     if (RV64LegalI32 && Subtarget.is64Bit())
 309       setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
 310                          Promote);
 311   } else if (Subtarget.is64Bit()) {
 312     if (!RV64LegalI32)
 313       setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
 314                          {MVT::i8, MVT::i16, MVT::i32}, Custom);
 315   }
 316
 317   if (RV64LegalI32 && Subtarget.is64Bit()) {
 318     setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
 319     setOperationAction(
 320         {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
 321         Expand);
 322   }
 323
 324   setOperationAction(
 325       {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
 326       Expand);
 327
 328   setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
 329                      Custom);
 330
 331   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
 332     if (!RV64LegalI32 && Subtarget.is64Bit())
 333       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
 334   } else if (Subtarget.hasVendorXTHeadBb()) {
 335     if (Subtarget.is64Bit())
 336       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
 337     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
 338   } else {
 339     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
 340     if (RV64LegalI32 && Subtarget.is64Bit())
 341       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
 342   }
 343
 344   // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
 345   // pattern match it directly in isel.
 346   setOperationAction(ISD::BSWAP, XLenVT,
 347                      (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
 348                       Subtarget.hasVendorXTHeadBb())
 349                          ? Legal
 350                          : Expand);
 351   if (RV64LegalI32 && Subtarget.is64Bit())
 352     setOperationAction(ISD::BSWAP, MVT::i32,
 353                        (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
 354                         Subtarget.hasVendorXTHeadBb())
 355                            ? Promote
 356                            : Expand);
 357
 358   // Zbkb can use rev8+brev8 to implement bitreverse.
 359   setOperationAction(ISD::BITREVERSE, XLenVT,
 360                      Subtarget.hasStdExtZbkb() ? Custom : Expand);
 361
 362   if (Subtarget.hasStdExtZbb()) {
 363     setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
 364                        Legal);
 365     if (RV64LegalI32 && Subtarget.is64Bit())
 366       setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
 367                          Promote);
 368
 369     if (Subtarget.is64Bit()) {
 370       if (RV64LegalI32)
 371         setOperationAction(ISD::CTTZ, MVT::i32, Legal);
 372       else
 373         setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
 374     }
 375   } else {
 376     setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
 377     if (RV64LegalI32 && Subtarget.is64Bit())
 378       setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
 379   }
 380
 381   if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
 382     // We need the custom lowering to make sure that the resulting sequence
 383     // for the 32bit case is efficient on 64bit targets.
 384     if (Subtarget.is64Bit()) {
 385       if (RV64LegalI32) {
 386         setOperationAction(ISD::CTLZ, MVT::i32,
 387                            Subtarget.hasStdExtZbb() ? Legal : Promote);
 388         if (!Subtarget.hasStdExtZbb())
 389           setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
 390       } else
 391         setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
 392     }
 393   } else {
 394     setOperationAction(ISD::CTLZ, XLenVT, Expand);
 395     if (RV64LegalI32 && Subtarget.is64Bit())
 396       setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 397   }
 398
 399   if (!RV64LegalI32 && Subtarget.is64Bit())
 400     setOperationAction(ISD::ABS, MVT::i32, Custom);
 401
 402   if (!Subtarget.hasVendorXTHeadCondMov())
 403     setOperationAction(ISD::SELECT, XLenVT, Custom);
 404
 405   if (RV64LegalI32 && Subtarget.is64Bit())
 406     setOperationAction(ISD::SELECT, MVT::i32, Promote);
 407
 408   static const unsigned FPLegalNodeTypes[] = {
 409       ISD::FMINNUM,        ISD::FMAXNUM,       ISD::LRINT,
 410       ISD::LLRINT,         ISD::LROUND,        ISD::LLROUND,
 411       ISD::STRICT_LRINT,   ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
 412       ISD::STRICT_LLROUND, ISD::STRICT_FMA,    ISD::STRICT_FADD,
 413       ISD::STRICT_FSUB,    ISD::STRICT_FMUL,   ISD::STRICT_FDIV,
 414       ISD::STRICT_FSQRT,   ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
 415
 416   static const ISD::CondCode FPCCToExpand[] = {
 417       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
 418       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
 419       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
 420
 421   static const unsigned FPOpToExpand[] = {
 422       ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
 423       ISD::FREM};
 424
 425   static const unsigned FPRndMode[] = {
 426       ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
 427       ISD::FROUNDEVEN};
 428
 429   if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
 430     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
 431
 432   static const unsigned ZfhminZfbfminPromoteOps[] = {
 433       ISD::FMINNUM,      ISD::FMAXNUM,       ISD::FADD,
 434       ISD::FSUB,         ISD::FMUL,          ISD::FMA,
 435       ISD::FDIV,         ISD::FSQRT,         ISD::FABS,
 436       ISD::FNEG,         ISD::STRICT_FMA,    ISD::STRICT_FADD,
 437       ISD::STRICT_FSUB,  ISD::STRICT_FMUL,   ISD::STRICT_FDIV,
 438       ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
 439       ISD::SETCC,        ISD::FCEIL,         ISD::FFLOOR,
 440       ISD::FTRUNC,       ISD::FRINT,         ISD::FROUND,
 441       ISD::FROUNDEVEN,   ISD::SELECT};
 442
 443   if (Subtarget.hasStdExtZfbfmin()) {
 444     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
 445     setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
 446     setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
 447     setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
 448     setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
 449     setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
 450     setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
 451     setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
 452     setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
 453     setOperationAction(ISD::FREM, MVT::bf16, Promote);
 454     // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
 455     // DAGCombiner::visitFP_ROUND probably needs improvements first.
 456     setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
 457   }
 458
 459   if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
 460     if (Subtarget.hasStdExtZfhOrZhinx()) {
 461       setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
 462       setOperationAction(FPRndMode, MVT::f16,
 463                          Subtarget.hasStdExtZfa() ? Legal : Custom);
 464       setOperationAction(ISD::SELECT, MVT::f16, Custom);
 465       setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
 466     } else {
 467       setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
 468       setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
 469                           ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
 470                          MVT::f16, Legal);
 471       // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
 472       // DAGCombiner::visitFP_ROUND probably needs improvements first.
 473       setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
 474     }
 475
 476     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
 477     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
 478     setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
 479     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
 480     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
 481
 482     setOperationAction(ISD::FNEARBYINT, MVT::f16,
 483                        Subtarget.hasStdExtZfa() ? Legal : Promote);
 484     setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
 485                         ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
 486                         ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
 487                         ISD::FLOG10},
 488                        MVT::f16, Promote);
 489
 490     // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
 491     // complete support for all operations in LegalizeDAG.
 492     setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
 493                         ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
 494                         ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
 495                         ISD::STRICT_FTRUNC},
 496                        MVT::f16, Promote);
 497
 498     // We need to custom promote this.
 499     if (Subtarget.is64Bit())
 500       setOperationAction(ISD::FPOWI, MVT::i32, Custom);
 501
 502     if (!Subtarget.hasStdExtZfa())
 503       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
 504   }
 505
 506   if (Subtarget.hasStdExtFOrZfinx()) {
 507     setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
 508     setOperationAction(FPRndMode, MVT::f32,
 509                        Subtarget.hasStdExtZfa() ? Legal : Custom);
 510     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
 511     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
 512     setOperationAction(ISD::SELECT, MVT::f32, Custom);
 513     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
 514     setOperationAction(FPOpToExpand, MVT::f32, Expand);
 515     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
 516     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
 517     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
 518     setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
 519     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
 520     setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
 521     setOperationAction(ISD::FP_TO_BF16, MVT::f32,
 522                        Subtarget.isSoftFPABI() ? LibCall : Custom);
 523     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
 524     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
 525
 526     if (Subtarget.hasStdExtZfa())
 527       setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
 528     else
 529       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
 530   }
 531
 532   if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
 533     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
 534
 535   if (Subtarget.hasStdExtDOrZdinx()) {
 536     setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
 537
 538     if (Subtarget.hasStdExtZfa()) {
 539       setOperationAction(FPRndMode, MVT::f64, Legal);
 540       setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
 541       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
 542       setOperationAction(ISD::BITCAST, MVT::f64, Custom);
 543     } else {
 544       if (Subtarget.is64Bit())
 545         setOperationAction(FPRndMode, MVT::f64, Custom);
 546
 547       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
 548     }
 549
 550     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
 551     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
 552     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
 553     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
 554     setOperationAction(ISD::SELECT, MVT::f64, Custom);
 555     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
 556     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
 557     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 558     setOperationAction(FPOpToExpand, MVT::f64, Expand);
 559     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
 560     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
 561     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
 562     setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
 563     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
 564     setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
 565     setOperationAction(ISD::FP_TO_BF16, MVT::f64,
 566                        Subtarget.isSoftFPABI() ? LibCall : Custom);
 567     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
 568     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
 569   }
 570
 571   if (Subtarget.is64Bit()) {
 572     setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
 573                         ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
 574                        MVT::i32, Custom);
 575     setOperationAction(ISD::LROUND, MVT::i32, Custom);
 576   }
 577
 578   if (Subtarget.hasStdExtFOrZfinx()) {
 579     setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
 580                        Custom);
 581
 582     setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
 583                         ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
 584                        XLenVT, Legal);
 585
 586     if (RV64LegalI32 && Subtarget.is64Bit())
 587       setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
 588                           ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
 589                          MVT::i32, Legal);
 590
 591     setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
 592     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
 593   }
 594
 595   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
 596                       ISD::JumpTable},
 597                      XLenVT, Custom);
 598
 599   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
 600
 601   if (Subtarget.is64Bit())
 602     setOperationAction(ISD::Constant, MVT::i64, Custom);
 603
 604   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
 605   // Unfortunately this can't be determined just from the ISA naming string.
 606   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
 607                      Subtarget.is64Bit() ? Legal : Custom);
 608
 609   setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
 610   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 611   if (Subtarget.is64Bit())
 612     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
 613
 614   if (Subtarget.hasStdExtZicbop()) {
 615     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 616   }
 617
 618   if (Subtarget.hasStdExtA()) {
 619     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
 620     setMinCmpXchgSizeInBits(32);
 621   } else if (Subtarget.hasForcedAtomics()) {
 622     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
 623   } else {
 624     setMaxAtomicSizeInBitsSupported(0);
 625   }
 626
 627   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 628
 629   setBooleanContents(ZeroOrOneBooleanContent);
 630
 631   if (Subtarget.hasVInstructions()) {
 632     setBooleanVectorContents(ZeroOrOneBooleanContent);
 633
 634     setOperationAction(ISD::VSCALE, XLenVT, Custom);
 635     if (RV64LegalI32 && Subtarget.is64Bit())
 636       setOperationAction(ISD::VSCALE, MVT::i32, Custom);
 637
 638     // RVV intrinsics may have illegal operands.
 639     // We also need to custom legalize vmv.x.s.
 640     setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
 641                         ISD::INTRINSIC_VOID},
 642                        {MVT::i8, MVT::i16}, Custom);
 643     if (Subtarget.is64Bit())
 644       setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
 645                          MVT::i32, Custom);
 646     else
 647       setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
 648                          MVT::i64, Custom);
 649
 650     setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
 651                        MVT::Other, Custom);
 652
 653     static const unsigned IntegerVPOps[] = {
 654         ISD::VP_ADD,         ISD::VP_SUB,         ISD::VP_MUL,
 655         ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,
 656         ISD::VP_UREM,        ISD::VP_AND,         ISD::VP_OR,
 657         ISD::VP_XOR,         ISD::VP_ASHR,        ISD::VP_LSHR,
 658         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
 659         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
 660         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
 661         ISD::VP_MERGE,       ISD::VP_SELECT,      ISD::VP_FP_TO_SINT,
 662         ISD::VP_FP_TO_UINT,  ISD::VP_SETCC,       ISD::VP_SIGN_EXTEND,
 663         ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE,    ISD::VP_SMIN,
 664         ISD::VP_SMAX,        ISD::VP_UMIN,        ISD::VP_UMAX,
 665         ISD::VP_ABS};
 666
 667     static const unsigned FloatingPointVPOps[] = {
 668         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
 669         ISD::VP_FDIV,        ISD::VP_FNEG,        ISD::VP_FABS,
 670         ISD::VP_FMA,         ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
 671         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
 672         ISD::VP_SELECT,      ISD::VP_SINT_TO_FP,  ISD::VP_UINT_TO_FP,
 673         ISD::VP_SETCC,       ISD::VP_FP_ROUND,    ISD::VP_FP_EXTEND,
 674         ISD::VP_SQRT,        ISD::VP_FMINNUM,     ISD::VP_FMAXNUM,
 675         ISD::VP_FCEIL,       ISD::VP_FFLOOR,      ISD::VP_FROUND,
 676         ISD::VP_FROUNDEVEN,  ISD::VP_FCOPYSIGN,   ISD::VP_FROUNDTOZERO,
 677         ISD::VP_FRINT,       ISD::VP_FNEARBYINT,  ISD::VP_IS_FPCLASS};
 678
 679     static const unsigned IntegerVecReduceOps[] = {
 680         ISD::VECREDUCE_ADD,  ISD::VECREDUCE_AND,  ISD::VECREDUCE_OR,
 681         ISD::VECREDUCE_XOR,  ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
 682         ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
 683
 684     static const unsigned FloatingPointVecReduceOps[] = {
 685         ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
 686         ISD::VECREDUCE_FMAX};
 687
 688     if (!Subtarget.is64Bit()) {
 689       // We must custom-lower certain vXi64 operations on RV32 due to the vector
 690       // element type being illegal.
 691       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
 692                          MVT::i64, Custom);
 693
 694       setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
 695
 696       setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
 697                           ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
 698                           ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
 699                           ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
 700                          MVT::i64, Custom);
 701     }
 702
 703     for (MVT VT : BoolVecVTs) {
 704       if (!isTypeLegal(VT))
 705         continue;
 706
 707       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
 708
 709       // Mask VTs are custom-expanded into a series of standard nodes
 710       setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
 711                           ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
 712                           ISD::SCALAR_TO_VECTOR},
 713                          VT, Custom);
 714
 715       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
 716                          Custom);
 717
 718       setOperationAction(ISD::SELECT, VT, Custom);
 719       setOperationAction(
 720           {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
 721           Expand);
 722
 723       setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
 724
 725       setOperationAction(
 726           {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
 727           Custom);
 728
 729       setOperationAction(
 730           {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
 731           Custom);
 732
 733       // RVV has native int->float & float->int conversions where the
 734       // element type sizes are within one power-of-two of each other. Any
 735       // wider distances between type sizes have to be lowered as sequences
 736       // which progressively narrow the gap in stages.
 737       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
 738                           ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
 739                           ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
 740                           ISD::STRICT_FP_TO_UINT},
 741                          VT, Custom);
 742       setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
 743                          Custom);
 744
 745       // Expand all extending loads to types larger than this, and truncating
 746       // stores from types larger than this.
 747       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
 748         setTruncStoreAction(OtherVT, VT, Expand);
 749         setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
 750                          VT, Expand);
 751       }
 752
 753       setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
 754                           ISD::VP_TRUNCATE, ISD::VP_SETCC},
 755                          VT, Custom);
 756
 757       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
 758       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
 759
 760       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
 761
 762       setOperationPromotedToType(
 763           ISD::VECTOR_SPLICE, VT,
 764           MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
 765     }
 766
 767     for (MVT VT : IntVecVTs) {
 768       if (!isTypeLegal(VT))
 769         continue;
 770
 771       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
 772       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
 773
 774       // Vectors implement MULHS/MULHU.
 775       setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
 776
 777       // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
 778       if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
 779         setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
 780
 781       setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
 782                          Legal);
 783
 784       // Custom-lower extensions and truncations from/to mask types.
 785       setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
 786                          VT, Custom);
 787
 788       // RVV has native int->float & float->int conversions where the
 789       // element type sizes are within one power-of-two of each other. Any
 790       // wider distances between type sizes have to be lowered as sequences
 791       // which progressively narrow the gap in stages.
 792       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
 793                           ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
 794                           ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
 795                           ISD::STRICT_FP_TO_UINT},
 796                          VT, Custom);
 797       setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
 798                          Custom);
 799       setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
 800       setOperationAction(
 801           {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
 802
 803       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
 804       // nodes which truncate by one power of two at a time.
 805       setOperationAction(ISD::TRUNCATE, VT, Custom);
 806
 807       // Custom-lower insert/extract operations to simplify patterns.
 808       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
 809                          Custom);
 810
 811       // Custom-lower reduction operations to set up the corresponding custom
 812       // nodes' operands.
 813       setOperationAction(IntegerVecReduceOps, VT, Custom);
 814
 815       setOperationAction(IntegerVPOps, VT, Custom);
 816
 817       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
 818
 819       setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
 820                          VT, Custom);
 821
 822       setOperationAction(
 823           {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
 824            ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
 825           VT, Custom);
 826
 827       setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
 828                           ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
 829                          VT, Custom);
 830
 831       setOperationAction(ISD::SELECT, VT, Custom);
 832       setOperationAction(ISD::SELECT_CC, VT, Expand);
 833
 834       setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
 835
 836       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
 837         setTruncStoreAction(VT, OtherVT, Expand);
 838         setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
 839                          VT, Expand);
 840       }
 841
 842       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
 843       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
 844
 845       // Splice
 846       setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
 847
 848       if (Subtarget.hasStdExtZvkb()) {
 849         setOperationAction(ISD::BSWAP, VT, Legal);
 850         setOperationAction(ISD::VP_BSWAP, VT, Custom);
 851       } else {
 852         setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
 853         setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
 854       }
 855
 856       if (Subtarget.hasStdExtZvbb()) {
 857         setOperationAction(ISD::BITREVERSE, VT, Legal);
 858         setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
 859         setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
 860                             ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
 861                            VT, Custom);
 862       } else {
 863         setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
 864         setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
 865         setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
 866                             ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
 867                            VT, Expand);
 868
 869         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
 870         // range of f32.
 871         EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
 872         if (isTypeLegal(FloatVT)) {
 873           setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
 874                               ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
 875                               ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
 876                              VT, Custom);
 877         }
 878       }
 879     }
 880
 881     // Expand various CCs to best match the RVV ISA, which natively supports UNE
 882     // but no other unordered comparisons, and supports all ordered comparisons
 883     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
 884     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
 885     // and we pattern-match those back to the "original", swapping operands once
 886     // more. This way we catch both operations and both "vf" and "fv" forms with
 887     // fewer patterns.
 888     static const ISD::CondCode VFPCCToExpand[] = {
 889         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
 890         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
 891         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
 892     };
 893
 894     // TODO: support more ops.
 895     static const unsigned ZvfhminPromoteOps[] = {
 896         ISD::FMINNUM,    ISD::FMAXNUM,    ISD::FADD,       ISD::FSUB,
 897         ISD::FMUL,       ISD::FMA,        ISD::FDIV,       ISD::FSQRT,
 898         ISD::FABS,       ISD::FNEG,       ISD::FCOPYSIGN,  ISD::FCEIL,
 899         ISD::FFLOOR,     ISD::FROUND,     ISD::FROUNDEVEN, ISD::FRINT,
 900         ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC,      ISD::FMAXIMUM,
 901         ISD::FMINIMUM};
 902
 903     // TODO: support more vp ops.
 904     static const unsigned ZvfhminPromoteVPOps[] = {
 905         ISD::VP_FADD,        ISD::VP_FSUB,         ISD::VP_FMUL,
 906         ISD::VP_FDIV,        ISD::VP_FNEG,         ISD::VP_FABS,
 907         ISD::VP_FMA,         ISD::VP_REDUCE_FADD,  ISD::VP_REDUCE_SEQ_FADD,
 908         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,  ISD::VP_SQRT,
 909         ISD::VP_FMINNUM,     ISD::VP_FMAXNUM,      ISD::VP_FCEIL,
 910         ISD::VP_FFLOOR,      ISD::VP_FROUND,       ISD::VP_FROUNDEVEN,
 911         ISD::VP_FCOPYSIGN,   ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
 912         ISD::VP_FNEARBYINT,  ISD::VP_SETCC};
 913
 914     // Sets common operation actions on RVV floating-point vector types.
 915     const auto SetCommonVFPActions = [&](MVT VT) {
 916       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
 917       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
 918       // sizes are within one power-of-two of each other. Therefore conversions
 919       // between vXf16 and vXf64 must be lowered as sequences which convert via
 920       // vXf32.
 921       setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
 922       // Custom-lower insert/extract operations to simplify patterns.
 923       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
 924                          Custom);
 925       // Expand various condition codes (explained above).
 926       setCondCodeAction(VFPCCToExpand, VT, Expand);
 927
 928       setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
 929       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
 930
 931       setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
 932                           ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
 933                           ISD::IS_FPCLASS},
 934                          VT, Custom);
 935
 936       setOperationAction(FloatingPointVecReduceOps, VT, Custom);
 937
 938       // Expand FP operations that need libcalls.
 939       setOperationAction(ISD::FREM, VT, Expand);
 940       setOperationAction(ISD::FPOW, VT, Expand);
 941       setOperationAction(ISD::FCOS, VT, Expand);
 942       setOperationAction(ISD::FSIN, VT, Expand);
 943       setOperationAction(ISD::FSINCOS, VT, Expand);
 944       setOperationAction(ISD::FEXP, VT, Expand);
 945       setOperationAction(ISD::FEXP2, VT, Expand);
 946       setOperationAction(ISD::FEXP10, VT, Expand);
 947       setOperationAction(ISD::FLOG, VT, Expand);
 948       setOperationAction(ISD::FLOG2, VT, Expand);
 949       setOperationAction(ISD::FLOG10, VT, Expand);
 950
 951       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
 952
 953       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
 954
 955       setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
 956                          VT, Custom);
 957
 958       setOperationAction(
 959           {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
 960            ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
 961           VT, Custom);
 962
 963       setOperationAction(ISD::SELECT, VT, Custom);
 964       setOperationAction(ISD::SELECT_CC, VT, Expand);
 965
 966       setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
 967                           ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
 968                          VT, Custom);
 969
 970       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
 971       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
 972
 973       setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
 974
 975       setOperationAction(FloatingPointVPOps, VT, Custom);
 976
 977       setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
 978                          Custom);
 979       setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
 980                           ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
 981                          VT, Legal);
 982       setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
 983                           ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
 984                           ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
 985                           ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
 986                          VT, Custom);
 987     };
 988
 989     // Sets common extload/truncstore actions on RVV floating-point vector
 990     // types.
 991     const auto SetCommonVFPExtLoadTruncStoreActions =
 992         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
 993           for (auto SmallVT : SmallerVTs) {
 994             setTruncStoreAction(VT, SmallVT, Expand);
 995             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
 996           }
 997         };
 998
 999     if (Subtarget.hasVInstructionsF16()) {
1000       for (MVT VT : F16VecVTs) {
1001         if (!isTypeLegal(VT))
1002           continue;
1003         SetCommonVFPActions(VT);
1004       }
1005     } else if (Subtarget.hasVInstructionsF16Minimal()) {
1006       for (MVT VT : F16VecVTs) {
1007         if (!isTypeLegal(VT))
1008           continue;
1009         setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1010         setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1011                            Custom);
1012         setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1013         setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1014                            Custom);
1015         setOperationAction(ISD::SELECT_CC, VT, Expand);
1016         setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1017                             ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1018                            VT, Custom);
1019         setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1020                             ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1021                            VT, Custom);
1022         setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1023         // load/store
1024         setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1025
1026         // Custom split nxv32f16 since nxv32f32 if not legal.
1027         if (VT == MVT::nxv32f16) {
1028           setOperationAction(ZvfhminPromoteOps, VT, Custom);
1029           setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1030           continue;
1031         }
1032         // Add more promote ops.
1033         MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1034         setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1035         setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1036       }
1037     }
1038
1039     if (Subtarget.hasVInstructionsF32()) {
1040       for (MVT VT : F32VecVTs) {
1041         if (!isTypeLegal(VT))
1042           continue;
1043         SetCommonVFPActions(VT);
1044         SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1045       }
1046     }
1047
1048     if (Subtarget.hasVInstructionsF64()) {
1049       for (MVT VT : F64VecVTs) {
1050         if (!isTypeLegal(VT))
1051           continue;
1052         SetCommonVFPActions(VT);
1053         SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1054         SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1055       }
1056     }
1057
1058     if (Subtarget.useRVVForFixedLengthVectors()) {
1059       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1060         if (!useRVVForFixedLengthVectorVT(VT))
1061           continue;
1062
1063         // By default everything must be expanded.
1064         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1065           setOperationAction(Op, VT, Expand);
1066         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
1067           setTruncStoreAction(VT, OtherVT, Expand);
1068           setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
1069                            OtherVT, VT, Expand);
1070         }
1071
1072         // Custom lower fixed vector undefs to scalable vector undefs to avoid
1073         // expansion to a build_vector of 0s.
1074         setOperationAction(ISD::UNDEF, VT, Custom);
1075
1076         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1077         setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1078                            Custom);
1079
1080         setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
1081                            Custom);
1082
1083         setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1084                            VT, Custom);
1085
1086         setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1087
1088         setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1089
1090         setOperationAction(ISD::SETCC, VT, Custom);
1091
1092         setOperationAction(ISD::SELECT, VT, Custom);
1093
1094         setOperationAction(ISD::TRUNCATE, VT, Custom);
1095
1096         setOperationAction(ISD::BITCAST, VT, Custom);
1097
1098         setOperationAction(
1099             {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1100             Custom);
1101
1102         setOperationAction(
1103             {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1104             Custom);
1105
1106         setOperationAction(
1107             {
1108                 ISD::SINT_TO_FP,
1109                 ISD::UINT_TO_FP,
1110                 ISD::FP_TO_SINT,
1111                 ISD::FP_TO_UINT,
1112                 ISD::STRICT_SINT_TO_FP,
1113                 ISD::STRICT_UINT_TO_FP,
1114                 ISD::STRICT_FP_TO_SINT,
1115                 ISD::STRICT_FP_TO_UINT,
1116             },
1117             VT, Custom);
1118         setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
1119                            Custom);
1120
1121         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1122
1123         // Operations below are different for between masks and other vectors.
1124         if (VT.getVectorElementType() == MVT::i1) {
1125           setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1126                               ISD::OR, ISD::XOR},
1127                              VT, Custom);
1128
1129           setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1130                               ISD::VP_SETCC, ISD::VP_TRUNCATE},
1131                              VT, Custom);
1132           continue;
1133         }
1134
1135         // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1136         // it before type legalization for i64 vectors on RV32. It will then be
1137         // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1138         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1139         // improvements first.
1140         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1141           setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1142           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1143         }
1144
1145         setOperationAction(
1146             {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1147
1148         setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1149                             ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1150                             ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1151                             ISD::VP_SCATTER},
1152                            VT, Custom);
1153
1154         setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1155                             ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1156                             ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1157                            VT, Custom);
1158
1159         setOperationAction(
1160             {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1161
1162         // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1163         if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1164           setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1165
1166         setOperationAction(
1167             {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
1168             Custom);
1169
1170         setOperationAction(ISD::VSELECT, VT, Custom);
1171         setOperationAction(ISD::SELECT_CC, VT, Expand);
1172
1173         setOperationAction(
1174             {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1175
1176         // Custom-lower reduction operations to set up the corresponding custom
1177         // nodes' operands.
1178         setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1179                             ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1180                             ISD::VECREDUCE_UMIN},
1181                            VT, Custom);
1182
1183         setOperationAction(IntegerVPOps, VT, Custom);
1184
1185         if (Subtarget.hasStdExtZvkb())
1186           setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
1187
1188         if (Subtarget.hasStdExtZvbb()) {
1189           setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
1190                               ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
1191                              VT, Custom);
1192         } else {
1193           // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1194           // range of f32.
1195           EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1196           if (isTypeLegal(FloatVT))
1197             setOperationAction(
1198                 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1199                 Custom);
1200         }
1201       }
1202
1203       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1204         // There are no extending loads or truncating stores.
1205         for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1206           setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1207           setTruncStoreAction(VT, InnerVT, Expand);
1208         }
1209
1210         if (!useRVVForFixedLengthVectorVT(VT))
1211           continue;
1212
1213         // By default everything must be expanded.
1214         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1215           setOperationAction(Op, VT, Expand);
1216
1217         // Custom lower fixed vector undefs to scalable vector undefs to avoid
1218         // expansion to a build_vector of 0s.
1219         setOperationAction(ISD::UNDEF, VT, Custom);
1220
1221         if (VT.getVectorElementType() == MVT::f16 &&
1222             !Subtarget.hasVInstructionsF16()) {
1223           setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1224           setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1225                              Custom);
1226           setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1227           setOperationAction(
1228               {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1229               Custom);
1230           setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1231                               ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1232                              VT, Custom);
1233           setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1234                               ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1235                              VT, Custom);
1236           setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1237           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1238           MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1239           // Don't promote f16 vector operations to f32 if f32 vector type is
1240           // not legal.
1241           // TODO: could split the f16 vector into two vectors and do promotion.
1242           if (!isTypeLegal(F32VecVT))
1243             continue;
1244           setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1245           setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1246           continue;
1247         }
1248
1249         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1250         setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1251                            Custom);
1252
1253         setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1254                             ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
1255                             ISD::EXTRACT_VECTOR_ELT},
1256                            VT, Custom);
1257
1258         setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1259                             ISD::MGATHER, ISD::MSCATTER},
1260                            VT, Custom);
1261
1262         setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1263                             ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1264                             ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1265                             ISD::VP_SCATTER},
1266                            VT, Custom);
1267
1268         setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1269                             ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1270                             ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1271                             ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1272                            VT, Custom);
1273
1274         setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1275
1276         setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1277                             ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1278                            VT, Custom);
1279
1280         setCondCodeAction(VFPCCToExpand, VT, Expand);
1281
1282         setOperationAction(ISD::SETCC, VT, Custom);
1283         setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1284         setOperationAction(ISD::SELECT_CC, VT, Expand);
1285
1286         setOperationAction(ISD::BITCAST, VT, Custom);
1287
1288         setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1289
1290         setOperationAction(FloatingPointVPOps, VT, Custom);
1291
1292         setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1293                            Custom);
1294         setOperationAction(
1295             {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1296              ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1297              ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1298              ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1299              ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1300             VT, Custom);
1301       }
1302
1303       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1304       setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1305                          Custom);
1306       if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1307         setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1308       if (Subtarget.hasStdExtFOrZfinx())
1309         setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1310       if (Subtarget.hasStdExtDOrZdinx())
1311         setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1312     }
1313   }
1314
1315   if (Subtarget.hasStdExtA()) {
1316     setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1317     if (RV64LegalI32 && Subtarget.is64Bit())
1318       setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1319   }
1320
1321   if (Subtarget.hasForcedAtomics()) {
1322     // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1323     setOperationAction(
1324         {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1325          ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1326          ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1327          ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1328         XLenVT, LibCall);
1329   }
1330
1331   if (Subtarget.hasVendorXTHeadMemIdx()) {
1332     for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1333          ++im) {
1334       setIndexedLoadAction(im, MVT::i8, Legal);
1335       setIndexedStoreAction(im, MVT::i8, Legal);
1336       setIndexedLoadAction(im, MVT::i16, Legal);
1337       setIndexedStoreAction(im, MVT::i16, Legal);
1338       setIndexedLoadAction(im, MVT::i32, Legal);
1339       setIndexedStoreAction(im, MVT::i32, Legal);
1340
1341       if (Subtarget.is64Bit()) {
1342         setIndexedLoadAction(im, MVT::i64, Legal);
1343         setIndexedStoreAction(im, MVT::i64, Legal);
1344       }
1345     }
1346   }
1347
1348   // Function alignments.
1349   const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1350   setMinFunctionAlignment(FunctionAlignment);
1351   // Set preferred alignments.
1352   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1353   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1354
1355   setMinimumJumpTableEntries(5);
1356
1357   // Jumps are expensive, compared to logic
1358   setJumpIsExpensive();
1359
1360   setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1361                        ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
1362                        ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1363   if (Subtarget.is64Bit())
1364     setTargetDAGCombine(ISD::SRA);
1365
1366   if (Subtarget.hasStdExtFOrZfinx())
1367     setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1368
1369   if (Subtarget.hasStdExtZbb())
1370     setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1371
1372   if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1373     setTargetDAGCombine(ISD::TRUNCATE);
1374
1375   if (Subtarget.hasStdExtZbkb())
1376     setTargetDAGCombine(ISD::BITREVERSE);
1377   if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1378     setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1379   if (Subtarget.hasStdExtFOrZfinx())
1380     setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1381                          ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1382   if (Subtarget.hasVInstructions())
1383     setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1384                          ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1385                          ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1386                          ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS});
1387   if (Subtarget.hasVendorXTHeadMemPair())
1388     setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1389   if (Subtarget.useRVVForFixedLengthVectors())
1390     setTargetDAGCombine(ISD::BITCAST);
1391
1392   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1393   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1394
1395   // Disable strict node mutation.
1396   IsStrictFPEnabled = true;
1397 }
1398
1399 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1400                                             LLVMContext &Context,
1401                                             EVT VT) const {
1402   if (!VT.isVector())
1403     return getPointerTy(DL);
1404   if (Subtarget.hasVInstructions() &&
1405       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1406     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1407   return VT.changeVectorElementTypeToInteger();
1408 }
1409
1410 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1411   return Subtarget.getXLenVT();
1412 }
1413
1414 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1415 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1416                                                       unsigned VF,
1417                                                       bool IsScalable) const {
1418   if (!Subtarget.hasVInstructions())
1419     return true;
1420
1421   if (!IsScalable)
1422     return true;
1423
1424   if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1425     return true;
1426
1427   // Don't allow VF=1 if those types are't legal.
1428   if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1429     return true;
1430
1431   // VLEN=32 support is incomplete.
1432   if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1433     return true;
1434
1435   // The maximum VF is for the smallest element width with LMUL=8.
1436   // VF must be a power of 2.
1437   unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1438   return VF > MaxVF || !isPowerOf2_32(VF);
1439 }
1440
1441 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1442                                              const CallInst &I,
1443                                              MachineFunction &MF,
1444                                              unsigned Intrinsic) const {
1445   auto &DL = I.getModule()->getDataLayout();
1446
1447   auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1448                                  bool IsUnitStrided) {
1449     Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1450     Info.ptrVal = I.getArgOperand(PtrOp);
1451     Type *MemTy;
1452     if (IsStore) {
1453       // Store value is the first operand.
1454       MemTy = I.getArgOperand(0)->getType();
1455     } else {
1456       // Use return type. If it's segment load, return type is a struct.
1457       MemTy = I.getType();
1458       if (MemTy->isStructTy())
1459         MemTy = MemTy->getStructElementType(0);
1460     }
1461     if (!IsUnitStrided)
1462       MemTy = MemTy->getScalarType();
1463
1464     Info.memVT = getValueType(DL, MemTy);
1465     Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1466     Info.size = MemoryLocation::UnknownSize;
1467     Info.flags |=
1468         IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1469     return true;
1470   };
1471
1472   if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1473     Info.flags |= MachineMemOperand::MONonTemporal;
1474
1475   Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
1476   switch (Intrinsic) {
1477   default:
1478     return false;
1479   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1480   case Intrinsic::riscv_masked_atomicrmw_add_i32:
1481   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1482   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1483   case Intrinsic::riscv_masked_atomicrmw_max_i32:
1484   case Intrinsic::riscv_masked_atomicrmw_min_i32:
1485   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1486   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1487   case Intrinsic::riscv_masked_cmpxchg_i32:
1488     Info.opc = ISD::INTRINSIC_W_CHAIN;
1489     Info.memVT = MVT::i32;
1490     Info.ptrVal = I.getArgOperand(0);
1491     Info.offset = 0;
1492     Info.align = Align(4);
1493     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1494                  MachineMemOperand::MOVolatile;
1495     return true;
1496   case Intrinsic::riscv_masked_strided_load:
1497     return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1498                                /*IsUnitStrided*/ false);
1499   case Intrinsic::riscv_masked_strided_store:
1500     return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1501                                /*IsUnitStrided*/ false);
1502   case Intrinsic::riscv_seg2_load:
1503   case Intrinsic::riscv_seg3_load:
1504   case Intrinsic::riscv_seg4_load:
1505   case Intrinsic::riscv_seg5_load:
1506   case Intrinsic::riscv_seg6_load:
1507   case Intrinsic::riscv_seg7_load:
1508   case Intrinsic::riscv_seg8_load:
1509     return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1510                                /*IsUnitStrided*/ false);
1511   case Intrinsic::riscv_seg2_store:
1512   case Intrinsic::riscv_seg3_store:
1513   case Intrinsic::riscv_seg4_store:
1514   case Intrinsic::riscv_seg5_store:
1515   case Intrinsic::riscv_seg6_store:
1516   case Intrinsic::riscv_seg7_store:
1517   case Intrinsic::riscv_seg8_store:
1518     // Operands are (vec, ..., vec, ptr, vl)
1519     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1520                                /*IsStore*/ true,
1521                                /*IsUnitStrided*/ false);
1522   case Intrinsic::riscv_vle:
1523   case Intrinsic::riscv_vle_mask:
1524   case Intrinsic::riscv_vleff:
1525   case Intrinsic::riscv_vleff_mask:
1526     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1527                                /*IsStore*/ false,
1528                                /*IsUnitStrided*/ true);
1529   case Intrinsic::riscv_vse:
1530   case Intrinsic::riscv_vse_mask:
1531     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1532                                /*IsStore*/ true,
1533                                /*IsUnitStrided*/ true);
1534   case Intrinsic::riscv_vlse:
1535   case Intrinsic::riscv_vlse_mask:
1536   case Intrinsic::riscv_vloxei:
1537   case Intrinsic::riscv_vloxei_mask:
1538   case Intrinsic::riscv_vluxei:
1539   case Intrinsic::riscv_vluxei_mask:
1540     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1541                                /*IsStore*/ false,
1542                                /*IsUnitStrided*/ false);
1543   case Intrinsic::riscv_vsse:
1544   case Intrinsic::riscv_vsse_mask:
1545   case Intrinsic::riscv_vsoxei:
1546   case Intrinsic::riscv_vsoxei_mask:
1547   case Intrinsic::riscv_vsuxei:
1548   case Intrinsic::riscv_vsuxei_mask:
1549     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1550                                /*IsStore*/ true,
1551                                /*IsUnitStrided*/ false);
1552   case Intrinsic::riscv_vlseg2:
1553   case Intrinsic::riscv_vlseg3:
1554   case Intrinsic::riscv_vlseg4:
1555   case Intrinsic::riscv_vlseg5:
1556   case Intrinsic::riscv_vlseg6:
1557   case Intrinsic::riscv_vlseg7:
1558   case Intrinsic::riscv_vlseg8:
1559   case Intrinsic::riscv_vlseg2ff:
1560   case Intrinsic::riscv_vlseg3ff:
1561   case Intrinsic::riscv_vlseg4ff:
1562   case Intrinsic::riscv_vlseg5ff:
1563   case Intrinsic::riscv_vlseg6ff:
1564   case Intrinsic::riscv_vlseg7ff:
1565   case Intrinsic::riscv_vlseg8ff:
1566     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1567                                /*IsStore*/ false,
1568                                /*IsUnitStrided*/ false);
1569   case Intrinsic::riscv_vlseg2_mask:
1570   case Intrinsic::riscv_vlseg3_mask:
1571   case Intrinsic::riscv_vlseg4_mask:
1572   case Intrinsic::riscv_vlseg5_mask:
1573   case Intrinsic::riscv_vlseg6_mask:
1574   case Intrinsic::riscv_vlseg7_mask:
1575   case Intrinsic::riscv_vlseg8_mask:
1576   case Intrinsic::riscv_vlseg2ff_mask:
1577   case Intrinsic::riscv_vlseg3ff_mask:
1578   case Intrinsic::riscv_vlseg4ff_mask:
1579   case Intrinsic::riscv_vlseg5ff_mask:
1580   case Intrinsic::riscv_vlseg6ff_mask:
1581   case Intrinsic::riscv_vlseg7ff_mask:
1582   case Intrinsic::riscv_vlseg8ff_mask:
1583     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1584                                /*IsStore*/ false,
1585                                /*IsUnitStrided*/ false);
1586   case Intrinsic::riscv_vlsseg2:
1587   case Intrinsic::riscv_vlsseg3:
1588   case Intrinsic::riscv_vlsseg4:
1589   case Intrinsic::riscv_vlsseg5:
1590   case Intrinsic::riscv_vlsseg6:
1591   case Intrinsic::riscv_vlsseg7:
1592   case Intrinsic::riscv_vlsseg8:
1593   case Intrinsic::riscv_vloxseg2:
1594   case Intrinsic::riscv_vloxseg3:
1595   case Intrinsic::riscv_vloxseg4:
1596   case Intrinsic::riscv_vloxseg5:
1597   case Intrinsic::riscv_vloxseg6:
1598   case Intrinsic::riscv_vloxseg7:
1599   case Intrinsic::riscv_vloxseg8:
1600   case Intrinsic::riscv_vluxseg2:
1601   case Intrinsic::riscv_vluxseg3:
1602   case Intrinsic::riscv_vluxseg4:
1603   case Intrinsic::riscv_vluxseg5:
1604   case Intrinsic::riscv_vluxseg6:
1605   case Intrinsic::riscv_vluxseg7:
1606   case Intrinsic::riscv_vluxseg8:
1607     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1608                                /*IsStore*/ false,
1609                                /*IsUnitStrided*/ false);
1610   case Intrinsic::riscv_vlsseg2_mask:
1611   case Intrinsic::riscv_vlsseg3_mask:
1612   case Intrinsic::riscv_vlsseg4_mask:
1613   case Intrinsic::riscv_vlsseg5_mask:
1614   case Intrinsic::riscv_vlsseg6_mask:
1615   case Intrinsic::riscv_vlsseg7_mask:
1616   case Intrinsic::riscv_vlsseg8_mask:
1617   case Intrinsic::riscv_vloxseg2_mask:
1618   case Intrinsic::riscv_vloxseg3_mask:
1619   case Intrinsic::riscv_vloxseg4_mask:
1620   case Intrinsic::riscv_vloxseg5_mask:
1621   case Intrinsic::riscv_vloxseg6_mask:
1622   case Intrinsic::riscv_vloxseg7_mask:
1623   case Intrinsic::riscv_vloxseg8_mask:
1624   case Intrinsic::riscv_vluxseg2_mask:
1625   case Intrinsic::riscv_vluxseg3_mask:
1626   case Intrinsic::riscv_vluxseg4_mask:
1627   case Intrinsic::riscv_vluxseg5_mask:
1628   case Intrinsic::riscv_vluxseg6_mask:
1629   case Intrinsic::riscv_vluxseg7_mask:
1630   case Intrinsic::riscv_vluxseg8_mask:
1631     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1632                                /*IsStore*/ false,
1633                                /*IsUnitStrided*/ false);
1634   case Intrinsic::riscv_vsseg2:
1635   case Intrinsic::riscv_vsseg3:
1636   case Intrinsic::riscv_vsseg4:
1637   case Intrinsic::riscv_vsseg5:
1638   case Intrinsic::riscv_vsseg6:
1639   case Intrinsic::riscv_vsseg7:
1640   case Intrinsic::riscv_vsseg8:
1641     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1642                                /*IsStore*/ true,
1643                                /*IsUnitStrided*/ false);
1644   case Intrinsic::riscv_vsseg2_mask:
1645   case Intrinsic::riscv_vsseg3_mask:
1646   case Intrinsic::riscv_vsseg4_mask:
1647   case Intrinsic::riscv_vsseg5_mask:
1648   case Intrinsic::riscv_vsseg6_mask:
1649   case Intrinsic::riscv_vsseg7_mask:
1650   case Intrinsic::riscv_vsseg8_mask:
1651     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1652                                /*IsStore*/ true,
1653                                /*IsUnitStrided*/ false);
1654   case Intrinsic::riscv_vssseg2:
1655   case Intrinsic::riscv_vssseg3:
1656   case Intrinsic::riscv_vssseg4:
1657   case Intrinsic::riscv_vssseg5:
1658   case Intrinsic::riscv_vssseg6:
1659   case Intrinsic::riscv_vssseg7:
1660   case Intrinsic::riscv_vssseg8:
1661   case Intrinsic::riscv_vsoxseg2:
1662   case Intrinsic::riscv_vsoxseg3:
1663   case Intrinsic::riscv_vsoxseg4:
1664   case Intrinsic::riscv_vsoxseg5:
1665   case Intrinsic::riscv_vsoxseg6:
1666   case Intrinsic::riscv_vsoxseg7:
1667   case Intrinsic::riscv_vsoxseg8:
1668   case Intrinsic::riscv_vsuxseg2:
1669   case Intrinsic::riscv_vsuxseg3:
1670   case Intrinsic::riscv_vsuxseg4:
1671   case Intrinsic::riscv_vsuxseg5:
1672   case Intrinsic::riscv_vsuxseg6:
1673   case Intrinsic::riscv_vsuxseg7:
1674   case Intrinsic::riscv_vsuxseg8:
1675     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1676                                /*IsStore*/ true,
1677                                /*IsUnitStrided*/ false);
1678   case Intrinsic::riscv_vssseg2_mask:
1679   case Intrinsic::riscv_vssseg3_mask:
1680   case Intrinsic::riscv_vssseg4_mask:
1681   case Intrinsic::riscv_vssseg5_mask:
1682   case Intrinsic::riscv_vssseg6_mask:
1683   case Intrinsic::riscv_vssseg7_mask:
1684   case Intrinsic::riscv_vssseg8_mask:
1685   case Intrinsic::riscv_vsoxseg2_mask:
1686   case Intrinsic::riscv_vsoxseg3_mask:
1687   case Intrinsic::riscv_vsoxseg4_mask:
1688   case Intrinsic::riscv_vsoxseg5_mask:
1689   case Intrinsic::riscv_vsoxseg6_mask:
1690   case Intrinsic::riscv_vsoxseg7_mask:
1691   case Intrinsic::riscv_vsoxseg8_mask:
1692   case Intrinsic::riscv_vsuxseg2_mask:
1693   case Intrinsic::riscv_vsuxseg3_mask:
1694   case Intrinsic::riscv_vsuxseg4_mask:
1695   case Intrinsic::riscv_vsuxseg5_mask:
1696   case Intrinsic::riscv_vsuxseg6_mask:
1697   case Intrinsic::riscv_vsuxseg7_mask:
1698   case Intrinsic::riscv_vsuxseg8_mask:
1699     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1700                                /*IsStore*/ true,
1701                                /*IsUnitStrided*/ false);
1702   }
1703 }
1704
1705 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1706                                                 const AddrMode &AM, Type *Ty,
1707                                                 unsigned AS,
1708                                                 Instruction *I) const {
1709   // No global is ever allowed as a base.
1710   if (AM.BaseGV)
1711     return false;
1712
1713   // RVV instructions only support register addressing.
1714   if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1715     return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1716
1717   // Require a 12-bit signed offset.
1718   if (!isInt<12>(AM.BaseOffs))
1719     return false;
1720
1721   switch (AM.Scale) {
1722   case 0: // "r+i" or just "i", depending on HasBaseReg.
1723     break;
1724   case 1:
1725     if (!AM.HasBaseReg) // allow "r+i".
1726       break;
1727     return false; // disallow "r+r" or "r+r+i".
1728   default:
1729     return false;
1730   }
1731
1732   return true;
1733 }
1734
1735 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1736   return isInt<12>(Imm);
1737 }
1738
1739 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1740   return isInt<12>(Imm);
1741 }
1742
1743 // On RV32, 64-bit integers are split into their high and low parts and held
1744 // in two different registers, so the trunc is free since the low register can
1745 // just be used.
1746 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1747 // isTruncateFree?
1748 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1749   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1750     return false;
1751   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1752   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1753   return (SrcBits == 64 && DestBits == 32);
1754 }
1755
1756 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1757   // We consider i64->i32 free on RV64 since we have good selection of W
1758   // instructions that make promoting operations back to i64 free in many cases.
1759   if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1760       !DstVT.isInteger())
1761     return false;
1762   unsigned SrcBits = SrcVT.getSizeInBits();
1763   unsigned DestBits = DstVT.getSizeInBits();
1764   return (SrcBits == 64 && DestBits == 32);
1765 }
1766
1767 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1768   // Zexts are free if they can be combined with a load.
1769   // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1770   // poorly with type legalization of compares preferring sext.
1771   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1772     EVT MemVT = LD->getMemoryVT();
1773     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1774         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1775          LD->getExtensionType() == ISD::ZEXTLOAD))
1776       return true;
1777   }
1778
1779   return TargetLowering::isZExtFree(Val, VT2);
1780 }
1781
1782 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1783   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1784 }
1785
1786 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1787   return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1788 }
1789
1790 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1791   return Subtarget.hasStdExtZbb();
1792 }
1793
1794 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1795   return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1796 }
1797
1798 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1799     const Instruction &AndI) const {
1800   // We expect to be able to match a bit extraction instruction if the Zbs
1801   // extension is supported and the mask is a power of two. However, we
1802   // conservatively return false if the mask would fit in an ANDI instruction,
1803   // on the basis that it's possible the sinking+duplication of the AND in
1804   // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1805   // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1806   if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1807     return false;
1808   ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1809   if (!Mask)
1810     return false;
1811   return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1812 }
1813
1814 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1815   EVT VT = Y.getValueType();
1816
1817   // FIXME: Support vectors once we have tests.
1818   if (VT.isVector())
1819     return false;
1820
1821   return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1822          !isa<ConstantSDNode>(Y);
1823 }
1824
1825 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1826   // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1827   if (Subtarget.hasStdExtZbs())
1828     return X.getValueType().isScalarInteger();
1829   auto *C = dyn_cast<ConstantSDNode>(Y);
1830   // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1831   if (Subtarget.hasVendorXTHeadBs())
1832     return C != nullptr;
1833   // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1834   return C && C->getAPIntValue().ule(10);
1835 }
1836
1837 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
1838                                                                EVT VT) const {
1839   // Only enable for rvv.
1840   if (!VT.isVector() || !Subtarget.hasVInstructions())
1841     return false;
1842
1843   if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1844     return false;
1845
1846   return true;
1847 }
1848
1849 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1850                                                             Type *Ty) const {
1851   assert(Ty->isIntegerTy());
1852
1853   unsigned BitSize = Ty->getIntegerBitWidth();
1854   if (BitSize > Subtarget.getXLen())
1855     return false;
1856
1857   // Fast path, assume 32-bit immediates are cheap.
1858   int64_t Val = Imm.getSExtValue();
1859   if (isInt<32>(Val))
1860     return true;
1861
1862   // A constant pool entry may be more aligned thant he load we're trying to
1863   // replace. If we don't support unaligned scalar mem, prefer the constant
1864   // pool.
1865   // TODO: Can the caller pass down the alignment?
1866   if (!Subtarget.enableUnalignedScalarMem())
1867     return true;
1868
1869   // Prefer to keep the load if it would require many instructions.
1870   // This uses the same threshold we use for constant pools but doesn't
1871   // check useConstantPoolForLargeInts.
1872   // TODO: Should we keep the load only when we're definitely going to emit a
1873   // constant pool?
1874
1875   RISCVMatInt::InstSeq Seq =
1876       RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1877   return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1878 }
1879
1880 bool RISCVTargetLowering::
1881     shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1882         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1883         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1884         SelectionDAG &DAG) const {
1885   // One interesting pattern that we'd want to form is 'bit extract':
1886   //   ((1 >> Y) & 1) ==/!= 0
1887   // But we also need to be careful not to try to reverse that fold.
1888
1889   // Is this '((1 >> Y) & 1)'?
1890   if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1891     return false; // Keep the 'bit extract' pattern.
1892
1893   // Will this be '((1 >> Y) & 1)' after the transform?
1894   if (NewShiftOpcode == ISD::SRL && CC->isOne())
1895     return true; // Do form the 'bit extract' pattern.
1896
1897   // If 'X' is a constant, and we transform, then we will immediately
1898   // try to undo the fold, thus causing endless combine loop.
1899   // So only do the transform if X is not a constant. This matches the default
1900   // implementation of this function.
1901   return !XC;
1902 }
1903
1904 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1905   switch (Opcode) {
1906   case Instruction::Add:
1907   case Instruction::Sub:
1908   case Instruction::Mul:
1909   case Instruction::And:
1910   case Instruction::Or:
1911   case Instruction::Xor:
1912   case Instruction::FAdd:
1913   case Instruction::FSub:
1914   case Instruction::FMul:
1915   case Instruction::FDiv:
1916   case Instruction::ICmp:
1917   case Instruction::FCmp:
1918     return true;
1919   case Instruction::Shl:
1920   case Instruction::LShr:
1921   case Instruction::AShr:
1922   case Instruction::UDiv:
1923   case Instruction::SDiv:
1924   case Instruction::URem:
1925   case Instruction::SRem:
1926     return Operand == 1;
1927   default:
1928     return false;
1929   }
1930 }
1931
1932
1933 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1934   if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1935     return false;
1936
1937   if (canSplatOperand(I->getOpcode(), Operand))
1938     return true;
1939
1940   auto *II = dyn_cast<IntrinsicInst>(I);
1941   if (!II)
1942     return false;
1943
1944   switch (II->getIntrinsicID()) {
1945   case Intrinsic::fma:
1946   case Intrinsic::vp_fma:
1947     return Operand == 0 || Operand == 1;
1948   case Intrinsic::vp_shl:
1949   case Intrinsic::vp_lshr:
1950   case Intrinsic::vp_ashr:
1951   case Intrinsic::vp_udiv:
1952   case Intrinsic::vp_sdiv:
1953   case Intrinsic::vp_urem:
1954   case Intrinsic::vp_srem:
1955     return Operand == 1;
1956     // These intrinsics are commutative.
1957   case Intrinsic::vp_add:
1958   case Intrinsic::vp_mul:
1959   case Intrinsic::vp_and:
1960   case Intrinsic::vp_or:
1961   case Intrinsic::vp_xor:
1962   case Intrinsic::vp_fadd:
1963   case Intrinsic::vp_fmul:
1964   case Intrinsic::vp_icmp:
1965   case Intrinsic::vp_fcmp:
1966     // These intrinsics have 'vr' versions.
1967   case Intrinsic::vp_sub:
1968   case Intrinsic::vp_fsub:
1969   case Intrinsic::vp_fdiv:
1970     return Operand == 0 || Operand == 1;
1971   default:
1972     return false;
1973   }
1974 }
1975
1976 /// Check if sinking \p I's operands to I's basic block is profitable, because
1977 /// the operands can be folded into a target instruction, e.g.
1978 /// splats of scalars can fold into vector instructions.
1979 bool RISCVTargetLowering::shouldSinkOperands(
1980     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1981   using namespace llvm::PatternMatch;
1982
1983   if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1984     return false;
1985
1986   for (auto OpIdx : enumerate(I->operands())) {
1987     if (!canSplatOperand(I, OpIdx.index()))
1988       continue;
1989
1990     Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1991     // Make sure we are not already sinking this operand
1992     if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1993       continue;
1994
1995     // We are looking for a splat that can be sunk.
1996     if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1997                              m_Undef(), m_ZeroMask())))
1998       continue;
1999
2000     // Don't sink i1 splats.
2001     if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2002       continue;
2003
2004     // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2005     // and vector registers
2006     for (Use &U : Op->uses()) {
2007       Instruction *Insn = cast<Instruction>(U.getUser());
2008       if (!canSplatOperand(Insn, U.getOperandNo()))
2009         return false;
2010     }
2011
2012     Ops.push_back(&Op->getOperandUse(0));
2013     Ops.push_back(&OpIdx.value());
2014   }
2015   return true;
2016 }
2017
2018 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
2019   unsigned Opc = VecOp.getOpcode();
2020
2021   // Assume target opcodes can't be scalarized.
2022   // TODO - do we have any exceptions?
2023   if (Opc >= ISD::BUILTIN_OP_END)
2024     return false;
2025
2026   // If the vector op is not supported, try to convert to scalar.
2027   EVT VecVT = VecOp.getValueType();
2028   if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2029     return true;
2030
2031   // If the vector op is supported, but the scalar op is not, the transform may
2032   // not be worthwhile.
2033   // Permit a vector binary operation can be converted to scalar binary
2034   // operation which is custom lowered with illegal type.
2035   EVT ScalarVT = VecVT.getScalarType();
2036   return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2037          isOperationCustom(Opc, ScalarVT);
2038 }
2039
2040 bool RISCVTargetLowering::isOffsetFoldingLegal(
2041     const GlobalAddressSDNode *GA) const {
2042   // In order to maximise the opportunity for common subexpression elimination,
2043   // keep a separate ADD node for the global address offset instead of folding
2044   // it in the global address node. Later peephole optimisations may choose to
2045   // fold it back in when profitable.
2046   return false;
2047 }
2048
2049 // Return one of the followings:
2050 // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2051 // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2052 // positive counterpart, which will be materialized from the first returned
2053 // element. The second returned element indicated that there should be a FNEG
2054 // followed.
2055 // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2056 std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2057                                                            EVT VT) const {
2058   if (!Subtarget.hasStdExtZfa())
2059     return std::make_pair(-1, false);
2060
2061   bool IsSupportedVT = false;
2062   if (VT == MVT::f16) {
2063     IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2064   } else if (VT == MVT::f32) {
2065     IsSupportedVT = true;
2066   } else if (VT == MVT::f64) {
2067     assert(Subtarget.hasStdExtD() && "Expect D extension");
2068     IsSupportedVT = true;
2069   }
2070
2071   if (!IsSupportedVT)
2072     return std::make_pair(-1, false);
2073
2074   int Index = RISCVLoadFPImm::getLoadFPImm(Imm);
2075   if (Index < 0 && Imm.isNegative())
2076     // Try the combination of its positive counterpart + FNEG.
2077     return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2078   else
2079     return std::make_pair(Index, false);
2080 }
2081
2082 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2083                                        bool ForCodeSize) const {
2084   bool IsLegalVT = false;
2085   if (VT == MVT::f16)
2086     IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
2087   else if (VT == MVT::f32)
2088     IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2089   else if (VT == MVT::f64)
2090     IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2091   else if (VT == MVT::bf16)
2092     IsLegalVT = Subtarget.hasStdExtZfbfmin();
2093
2094   if (!IsLegalVT)
2095     return false;
2096
2097   if (getLegalZfaFPImm(Imm, VT).first >= 0)
2098     return true;
2099
2100   // Cannot create a 64 bit floating-point immediate value for rv32.
2101   if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2102     // td can handle +0.0 or -0.0 already.
2103     // -0.0 can be created by fmv + fneg.
2104     return Imm.isZero();
2105   }
2106
2107   // Special case: fmv + fneg
2108   if (Imm.isNegZero())
2109     return true;
2110
2111   // Building an integer and then converting requires a fmv at the end of
2112   // the integer sequence.
2113   const int Cost =
2114     1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2115                                    Subtarget.getFeatureBits());
2116   return Cost <= FPImmCost;
2117 }
2118
2119 // TODO: This is very conservative.
2120 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2121                                                   unsigned Index) const {
2122   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
2123     return false;
2124
2125   // Only support extracting a fixed from a fixed vector for now.
2126   if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2127     return false;
2128
2129   unsigned ResElts = ResVT.getVectorNumElements();
2130   unsigned SrcElts = SrcVT.getVectorNumElements();
2131
2132   // Convervatively only handle extracting half of a vector.
2133   // TODO: Relax this.
2134   if ((ResElts * 2) != SrcElts)
2135     return false;
2136
2137   // The smallest type we can slide is i8.
2138   // TODO: We can extract index 0 from a mask vector without a slide.
2139   if (ResVT.getVectorElementType() == MVT::i1)
2140     return false;
2141
2142   // Slide can support arbitrary index, but we only treat vslidedown.vi as
2143   // cheap.
2144   if (Index >= 32)
2145     return false;
2146
2147   // TODO: We can do arbitrary slidedowns, but for now only support extracting
2148   // the upper half of a vector until we have more test coverage.
2149   return Index == 0 || Index == ResElts;
2150 }
2151
2152 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2153                                                       CallingConv::ID CC,
2154                                                       EVT VT) const {
2155   // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2156   // We might still end up using a GPR but that will be decided based on ABI.
2157   if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2158       !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
2159     return MVT::f32;
2160
2161   MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2162
2163   if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2164     return MVT::i64;
2165
2166   return PartVT;
2167 }
2168
2169 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2170                                                            CallingConv::ID CC,
2171                                                            EVT VT) const {
2172   // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2173   // We might still end up using a GPR but that will be decided based on ABI.
2174   if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2175       !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
2176     return 1;
2177
2178   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2179 }
2180
2181 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2182     LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2183     unsigned &NumIntermediates, MVT &RegisterVT) const {
2184   unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
2185       Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2186
2187   if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2188     IntermediateVT = MVT::i64;
2189
2190   if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2191     RegisterVT = MVT::i64;
2192
2193   return NumRegs;
2194 }
2195
2196 // Changes the condition code and swaps operands if necessary, so the SetCC
2197 // operation matches one of the comparisons supported directly by branches
2198 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2199 // with 1/-1.
2200 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2201                                     ISD::CondCode &CC, SelectionDAG &DAG) {
2202   // If this is a single bit test that can't be handled by ANDI, shift the
2203   // bit to be tested to the MSB and perform a signed compare with 0.
2204   if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2205       LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2206       isa<ConstantSDNode>(LHS.getOperand(1))) {
2207     uint64_t Mask = LHS.getConstantOperandVal(1);
2208     if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2209       unsigned ShAmt = 0;
2210       if (isPowerOf2_64(Mask)) {
2211         CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2212         ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2213       } else {
2214         ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2215       }
2216
2217       LHS = LHS.getOperand(0);
2218       if (ShAmt != 0)
2219         LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2220                           DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2221       return;
2222     }
2223   }
2224
2225   if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2226     int64_t C = RHSC->getSExtValue();
2227     switch (CC) {
2228     default: break;
2229     case ISD::SETGT:
2230       // Convert X > -1 to X >= 0.
2231       if (C == -1) {
2232         RHS = DAG.getConstant(0, DL, RHS.getValueType());
2233         CC = ISD::SETGE;
2234         return;
2235       }
2236       break;
2237     case ISD::SETLT:
2238       // Convert X < 1 to 0 >= X.
2239       if (C == 1) {
2240         RHS = LHS;
2241         LHS = DAG.getConstant(0, DL, RHS.getValueType());
2242         CC = ISD::SETGE;
2243         return;
2244       }
2245       break;
2246     }
2247   }
2248
2249   switch (CC) {
2250   default:
2251     break;
2252   case ISD::SETGT:
2253   case ISD::SETLE:
2254   case ISD::SETUGT:
2255   case ISD::SETULE:
2256     CC = ISD::getSetCCSwappedOperands(CC);
2257     std::swap(LHS, RHS);
2258     break;
2259   }
2260 }
2261
2262 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2263   assert(VT.isScalableVector() && "Expecting a scalable vector type");
2264   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2265   if (VT.getVectorElementType() == MVT::i1)
2266     KnownSize *= 8;
2267
2268   switch (KnownSize) {
2269   default:
2270     llvm_unreachable("Invalid LMUL.");
2271   case 8:
2272     return RISCVII::VLMUL::LMUL_F8;
2273   case 16:
2274     return RISCVII::VLMUL::LMUL_F4;
2275   case 32:
2276     return RISCVII::VLMUL::LMUL_F2;
2277   case 64:
2278     return RISCVII::VLMUL::LMUL_1;
2279   case 128:
2280     return RISCVII::VLMUL::LMUL_2;
2281   case 256:
2282     return RISCVII::VLMUL::LMUL_4;
2283   case 512:
2284     return RISCVII::VLMUL::LMUL_8;
2285   }
2286 }
2287
2288 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2289   switch (LMul) {
2290   default:
2291     llvm_unreachable("Invalid LMUL.");
2292   case RISCVII::VLMUL::LMUL_F8:
2293   case RISCVII::VLMUL::LMUL_F4:
2294   case RISCVII::VLMUL::LMUL_F2:
2295   case RISCVII::VLMUL::LMUL_1:
2296     return RISCV::VRRegClassID;
2297   case RISCVII::VLMUL::LMUL_2:
2298     return RISCV::VRM2RegClassID;
2299   case RISCVII::VLMUL::LMUL_4:
2300     return RISCV::VRM4RegClassID;
2301   case RISCVII::VLMUL::LMUL_8:
2302     return RISCV::VRM8RegClassID;
2303   }
2304 }
2305
2306 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2307   RISCVII::VLMUL LMUL = getLMUL(VT);
2308   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2309       LMUL == RISCVII::VLMUL::LMUL_F4 ||
2310       LMUL == RISCVII::VLMUL::LMUL_F2 ||
2311       LMUL == RISCVII::VLMUL::LMUL_1) {
2312     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2313                   "Unexpected subreg numbering");
2314     return RISCV::sub_vrm1_0 + Index;
2315   }
2316   if (LMUL == RISCVII::VLMUL::LMUL_2) {
2317     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2318                   "Unexpected subreg numbering");
2319     return RISCV::sub_vrm2_0 + Index;
2320   }
2321   if (LMUL == RISCVII::VLMUL::LMUL_4) {
2322     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2323                   "Unexpected subreg numbering");
2324     return RISCV::sub_vrm4_0 + Index;
2325   }
2326   llvm_unreachable("Invalid vector type.");
2327 }
2328
2329 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2330   if (VT.getVectorElementType() == MVT::i1)
2331     return RISCV::VRRegClassID;
2332   return getRegClassIDForLMUL(getLMUL(VT));
2333 }
2334
2335 // Attempt to decompose a subvector insert/extract between VecVT and
2336 // SubVecVT via subregister indices. Returns the subregister index that
2337 // can perform the subvector insert/extract with the given element index, as
2338 // well as the index corresponding to any leftover subvectors that must be
2339 // further inserted/extracted within the register class for SubVecVT.
2340 std::pair<unsigned, unsigned>
2341 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2342     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2343     const RISCVRegisterInfo *TRI) {
2344   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2345                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2346                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2347                 "Register classes not ordered");
2348   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2349   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2350   // Try to compose a subregister index that takes us from the incoming
2351   // LMUL>1 register class down to the outgoing one. At each step we half
2352   // the LMUL:
2353   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2354   // Note that this is not guaranteed to find a subregister index, such as
2355   // when we are extracting from one VR type to another.
2356   unsigned SubRegIdx = RISCV::NoSubRegister;
2357   for (const unsigned RCID :
2358        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2359     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2360       VecVT = VecVT.getHalfNumVectorElementsVT();
2361       bool IsHi =
2362           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2363       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2364                                             getSubregIndexByMVT(VecVT, IsHi));
2365       if (IsHi)
2366         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2367     }
2368   return {SubRegIdx, InsertExtractIdx};
2369 }
2370
2371 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2372 // stores for those types.
2373 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2374   return !Subtarget.useRVVForFixedLengthVectors() ||
2375          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2376 }
2377
2378 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2379   if (!ScalarTy.isSimple())
2380     return false;
2381   switch (ScalarTy.getSimpleVT().SimpleTy) {
2382   case MVT::iPTR:
2383     return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2384   case MVT::i8:
2385   case MVT::i16:
2386   case MVT::i32:
2387     return true;
2388   case MVT::i64:
2389     return Subtarget.hasVInstructionsI64();
2390   case MVT::f16:
2391     return Subtarget.hasVInstructionsF16();
2392   case MVT::f32:
2393     return Subtarget.hasVInstructionsF32();
2394   case MVT::f64:
2395     return Subtarget.hasVInstructionsF64();
2396   default:
2397     return false;
2398   }
2399 }
2400
2401
2402 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2403   return NumRepeatedDivisors;
2404 }
2405
2406 static SDValue getVLOperand(SDValue Op) {
2407   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2408           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2409          "Unexpected opcode");
2410   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2411   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2412   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2413       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2414   if (!II)
2415     return SDValue();
2416   return Op.getOperand(II->VLOperand + 1 + HasChain);
2417 }
2418
2419 static bool useRVVForFixedLengthVectorVT(MVT VT,
2420                                          const RISCVSubtarget &Subtarget) {
2421   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2422   if (!Subtarget.useRVVForFixedLengthVectors())
2423     return false;
2424
2425   // We only support a set of vector types with a consistent maximum fixed size
2426   // across all supported vector element types to avoid legalization issues.
2427   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2428   // fixed-length vector type we support is 1024 bytes.
2429   if (VT.getFixedSizeInBits() > 1024 * 8)
2430     return false;
2431
2432   unsigned MinVLen = Subtarget.getRealMinVLen();
2433
2434   MVT EltVT = VT.getVectorElementType();
2435
2436   // Don't use RVV for vectors we cannot scalarize if required.
2437   switch (EltVT.SimpleTy) {
2438   // i1 is supported but has different rules.
2439   default:
2440     return false;
2441   case MVT::i1:
2442     // Masks can only use a single register.
2443     if (VT.getVectorNumElements() > MinVLen)
2444       return false;
2445     MinVLen /= 8;
2446     break;
2447   case MVT::i8:
2448   case MVT::i16:
2449   case MVT::i32:
2450     break;
2451   case MVT::i64:
2452     if (!Subtarget.hasVInstructionsI64())
2453       return false;
2454     break;
2455   case MVT::f16:
2456     if (!Subtarget.hasVInstructionsF16Minimal())
2457       return false;
2458     break;
2459   case MVT::f32:
2460     if (!Subtarget.hasVInstructionsF32())
2461       return false;
2462     break;
2463   case MVT::f64:
2464     if (!Subtarget.hasVInstructionsF64())
2465       return false;
2466     break;
2467   }
2468
2469   // Reject elements larger than ELEN.
2470   if (EltVT.getSizeInBits() > Subtarget.getELen())
2471     return false;
2472
2473   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2474   // Don't use RVV for types that don't fit.
2475   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2476     return false;
2477
2478   // TODO: Perhaps an artificial restriction, but worth having whilst getting
2479   // the base fixed length RVV support in place.
2480   if (!VT.isPow2VectorType())
2481     return false;
2482
2483   return true;
2484 }
2485
2486 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2487   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2488 }
2489
2490 // Return the largest legal scalable vector type that matches VT's element type.
2491 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2492                                             const RISCVSubtarget &Subtarget) {
2493   // This may be called before legal types are setup.
2494   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2495           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2496          "Expected legal fixed length vector!");
2497
2498   unsigned MinVLen = Subtarget.getRealMinVLen();
2499   unsigned MaxELen = Subtarget.getELen();
2500
2501   MVT EltVT = VT.getVectorElementType();
2502   switch (EltVT.SimpleTy) {
2503   default:
2504     llvm_unreachable("unexpected element type for RVV container");
2505   case MVT::i1:
2506   case MVT::i8:
2507   case MVT::i16:
2508   case MVT::i32:
2509   case MVT::i64:
2510   case MVT::f16:
2511   case MVT::f32:
2512   case MVT::f64: {
2513     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2514     // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2515     // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2516     unsigned NumElts =
2517         (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2518     NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2519     assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2520     return MVT::getScalableVectorVT(EltVT, NumElts);
2521   }
2522   }
2523 }
2524
2525 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2526                                             const RISCVSubtarget &Subtarget) {
2527   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
2528                                           Subtarget);
2529 }
2530
2531 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2532   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2533 }
2534
2535 // Grow V to consume an entire RVV register.
2536 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2537                                        const RISCVSubtarget &Subtarget) {
2538   assert(VT.isScalableVector() &&
2539          "Expected to convert into a scalable vector!");
2540   assert(V.getValueType().isFixedLengthVector() &&
2541          "Expected a fixed length vector operand!");
2542   SDLoc DL(V);
2543   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2544   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2545 }
2546
2547 // Shrink V so it's just big enough to maintain a VT's worth of data.
2548 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2549                                          const RISCVSubtarget &Subtarget) {
2550   assert(VT.isFixedLengthVector() &&
2551          "Expected to convert into a fixed length vector!");
2552   assert(V.getValueType().isScalableVector() &&
2553          "Expected a scalable vector operand!");
2554   SDLoc DL(V);
2555   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2556   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2557 }
2558
2559 /// Return the type of the mask type suitable for masking the provided
2560 /// vector type.  This is simply an i1 element type vector of the same
2561 /// (possibly scalable) length.
2562 static MVT getMaskTypeFor(MVT VecVT) {
2563   assert(VecVT.isVector());
2564   ElementCount EC = VecVT.getVectorElementCount();
2565   return MVT::getVectorVT(MVT::i1, EC);
2566 }
2567
2568 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2569 /// vector length VL.  .
2570 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2571                               SelectionDAG &DAG) {
2572   MVT MaskVT = getMaskTypeFor(VecVT);
2573   return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2574 }
2575
2576 static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
2577                        const RISCVSubtarget &Subtarget) {
2578   return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2579 }
2580
2581 static std::pair<SDValue, SDValue>
2582 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2583                 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2584   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2585   SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
2586   SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2587   return {Mask, VL};
2588 }
2589
2590 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2591 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2592 // the vector type that the fixed-length vector is contained in. Otherwise if
2593 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2594 static std::pair<SDValue, SDValue>
2595 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2596                 const RISCVSubtarget &Subtarget) {
2597   if (VecVT.isFixedLengthVector())
2598     return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2599                            Subtarget);
2600   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2601   MVT XLenVT = Subtarget.getXLenVT();
2602   SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
2603   SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2604   return {Mask, VL};
2605 }
2606
2607 // As above but assuming the given type is a scalable vector type.
2608 static std::pair<SDValue, SDValue>
2609 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2610                         const RISCVSubtarget &Subtarget) {
2611   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2612   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
2613 }
2614
2615 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2616                                           SelectionDAG &DAG) const {
2617   assert(VecVT.isScalableVector() && "Expected scalable vector");
2618   return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2619                              VecVT.getVectorElementCount());
2620 }
2621
2622 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2623 // of either is (currently) supported. This can get us into an infinite loop
2624 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2625 // as a ..., etc.
2626 // Until either (or both) of these can reliably lower any node, reporting that
2627 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2628 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2629 // which is not desirable.
2630 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2631     EVT VT, unsigned DefinedValues) const {
2632   return false;
2633 }
2634
2635 InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2636   // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2637   // implementation-defined.
2638   if (!VT.isVector())
2639     return InstructionCost::getInvalid();
2640   unsigned DLenFactor = Subtarget.getDLenFactor();
2641   unsigned Cost;
2642   if (VT.isScalableVector()) {
2643     unsigned LMul;
2644     bool Fractional;
2645     std::tie(LMul, Fractional) =
2646         RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
2647     if (Fractional)
2648       Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2649     else
2650       Cost = (LMul * DLenFactor);
2651   } else {
2652     Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2653   }
2654   return Cost;
2655 }
2656
2657
2658 /// Return the cost of a vrgather.vv instruction for the type VT.  vrgather.vv
2659 /// is generally quadratic in the number of vreg implied by LMUL.  Note that
2660 /// operand (index and possibly mask) are handled separately.
2661 InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2662   return getLMULCost(VT) * getLMULCost(VT);
2663 }
2664
2665 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2666 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2667 /// or may track the vrgather.vv cost. It is implementation-dependent.
2668 InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
2669   return getLMULCost(VT);
2670 }
2671
2672 /// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2673 /// for the type VT.  (This does not cover the vslide1up or vslide1down
2674 /// variants.)  Slides may be linear in the number of vregs implied by LMUL,
2675 /// or may track the vrgather.vv cost. It is implementation-dependent.
2676 InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
2677   return getLMULCost(VT);
2678 }
2679
2680 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2681                                   const RISCVSubtarget &Subtarget) {
2682   // RISC-V FP-to-int conversions saturate to the destination register size, but
2683   // don't produce 0 for nan. We can use a conversion instruction and fix the
2684   // nan case with a compare and a select.
2685   SDValue Src = Op.getOperand(0);
2686
2687   MVT DstVT = Op.getSimpleValueType();
2688   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2689
2690   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2691
2692   if (!DstVT.isVector()) {
2693     // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2694     // the result.
2695     if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2696         Src.getValueType() == MVT::bf16) {
2697       Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2698     }
2699
2700     unsigned Opc;
2701     if (SatVT == DstVT)
2702       Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2703     else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2704       Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2705     else
2706       return SDValue();
2707     // FIXME: Support other SatVTs by clamping before or after the conversion.
2708
2709     SDLoc DL(Op);
2710     SDValue FpToInt = DAG.getNode(
2711         Opc, DL, DstVT, Src,
2712         DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
2713
2714     if (Opc == RISCVISD::FCVT_WU_RV64)
2715       FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2716
2717     SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2718     return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2719                            ISD::CondCode::SETUO);
2720   }
2721
2722   // Vectors.
2723
2724   MVT DstEltVT = DstVT.getVectorElementType();
2725   MVT SrcVT = Src.getSimpleValueType();
2726   MVT SrcEltVT = SrcVT.getVectorElementType();
2727   unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2728   unsigned DstEltSize = DstEltVT.getSizeInBits();
2729
2730   // Only handle saturating to the destination type.
2731   if (SatVT != DstEltVT)
2732     return SDValue();
2733
2734   // FIXME: Don't support narrowing by more than 1 steps for now.
2735   if (SrcEltSize > (2 * DstEltSize))
2736     return SDValue();
2737
2738   MVT DstContainerVT = DstVT;
2739   MVT SrcContainerVT = SrcVT;
2740   if (DstVT.isFixedLengthVector()) {
2741     DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2742     SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2743     assert(DstContainerVT.getVectorElementCount() ==
2744                SrcContainerVT.getVectorElementCount() &&
2745            "Expected same element count");
2746     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2747   }
2748
2749   SDLoc DL(Op);
2750
2751   auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2752
2753   SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2754                               {Src, Src, DAG.getCondCode(ISD::SETNE),
2755                                DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2756
2757   // Need to widen by more than 1 step, promote the FP type, then do a widening
2758   // convert.
2759   if (DstEltSize > (2 * SrcEltSize)) {
2760     assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2761     MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2762     Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2763   }
2764
2765   unsigned RVVOpc =
2766       IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2767   SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2768
2769   SDValue SplatZero = DAG.getNode(
2770       RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2771       DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2772   Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2773                     Res, VL);
2774
2775   if (DstVT.isFixedLengthVector())
2776     Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2777
2778   return Res;
2779 }
2780
2781 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
2782   switch (Opc) {
2783   case ISD::FROUNDEVEN:
2784   case ISD::STRICT_FROUNDEVEN:
2785   case ISD::VP_FROUNDEVEN:
2786     return RISCVFPRndMode::RNE;
2787   case ISD::FTRUNC:
2788   case ISD::STRICT_FTRUNC:
2789   case ISD::VP_FROUNDTOZERO:
2790     return RISCVFPRndMode::RTZ;
2791   case ISD::FFLOOR:
2792   case ISD::STRICT_FFLOOR:
2793   case ISD::VP_FFLOOR:
2794     return RISCVFPRndMode::RDN;
2795   case ISD::FCEIL:
2796   case ISD::STRICT_FCEIL:
2797   case ISD::VP_FCEIL:
2798     return RISCVFPRndMode::RUP;
2799   case ISD::FROUND:
2800   case ISD::STRICT_FROUND:
2801   case ISD::VP_FROUND:
2802     return RISCVFPRndMode::RMM;
2803   case ISD::FRINT:
2804     return RISCVFPRndMode::DYN;
2805   }
2806
2807   return RISCVFPRndMode::Invalid;
2808 }
2809
2810 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2811 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2812 // the integer domain and back. Taking care to avoid converting values that are
2813 // nan or already correct.
2814 static SDValue
2815 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2816                                       const RISCVSubtarget &Subtarget) {
2817   MVT VT = Op.getSimpleValueType();
2818   assert(VT.isVector() && "Unexpected type");
2819
2820   SDLoc DL(Op);
2821
2822   SDValue Src = Op.getOperand(0);
2823
2824   MVT ContainerVT = VT;
2825   if (VT.isFixedLengthVector()) {
2826     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2827     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2828   }
2829
2830   SDValue Mask, VL;
2831   if (Op->isVPOpcode()) {
2832     Mask = Op.getOperand(1);
2833     if (VT.isFixedLengthVector())
2834       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2835                                      Subtarget);
2836     VL = Op.getOperand(2);
2837   } else {
2838     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2839   }
2840
2841   // Freeze the source since we are increasing the number of uses.
2842   Src = DAG.getFreeze(Src);
2843
2844   // We do the conversion on the absolute value and fix the sign at the end.
2845   SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2846
2847   // Determine the largest integer that can be represented exactly. This and
2848   // values larger than it don't have any fractional bits so don't need to
2849   // be converted.
2850   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2851   unsigned Precision = APFloat::semanticsPrecision(FltSem);
2852   APFloat MaxVal = APFloat(FltSem);
2853   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2854                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2855   SDValue MaxValNode =
2856       DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2857   SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2858                                     DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2859
2860   // If abs(Src) was larger than MaxVal or nan, keep it.
2861   MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2862   Mask =
2863       DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2864                   {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2865                    Mask, Mask, VL});
2866
2867   // Truncate to integer and convert back to FP.
2868   MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2869   MVT XLenVT = Subtarget.getXLenVT();
2870   SDValue Truncated;
2871
2872   switch (Op.getOpcode()) {
2873   default:
2874     llvm_unreachable("Unexpected opcode");
2875   case ISD::FCEIL:
2876   case ISD::VP_FCEIL:
2877   case ISD::FFLOOR:
2878   case ISD::VP_FFLOOR:
2879   case ISD::FROUND:
2880   case ISD::FROUNDEVEN:
2881   case ISD::VP_FROUND:
2882   case ISD::VP_FROUNDEVEN:
2883   case ISD::VP_FROUNDTOZERO: {
2884     RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2885     assert(FRM != RISCVFPRndMode::Invalid);
2886     Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2887                             DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2888     break;
2889   }
2890   case ISD::FTRUNC:
2891     Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2892                             Mask, VL);
2893     break;
2894   case ISD::FRINT:
2895   case ISD::VP_FRINT:
2896     Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2897     break;
2898   case ISD::FNEARBYINT:
2899   case ISD::VP_FNEARBYINT:
2900     Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2901                             Mask, VL);
2902     break;
2903   }
2904
2905   // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2906   if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2907     Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2908                             Mask, VL);
2909
2910   // Restore the original sign so that -0.0 is preserved.
2911   Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2912                           Src, Src, Mask, VL);
2913
2914   if (!VT.isFixedLengthVector())
2915     return Truncated;
2916
2917   return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2918 }
2919
2920 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2921 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2922 // qNan and coverting the new source to integer and back to FP.
2923 static SDValue
2924 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2925                                             const RISCVSubtarget &Subtarget) {
2926   SDLoc DL(Op);
2927   MVT VT = Op.getSimpleValueType();
2928   SDValue Chain = Op.getOperand(0);
2929   SDValue Src = Op.getOperand(1);
2930
2931   MVT ContainerVT = VT;
2932   if (VT.isFixedLengthVector()) {
2933     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2934     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2935   }
2936
2937   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2938
2939   // Freeze the source since we are increasing the number of uses.
2940   Src = DAG.getFreeze(Src);
2941
2942   // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2943   MVT MaskVT = Mask.getSimpleValueType();
2944   SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
2945                                 DAG.getVTList(MaskVT, MVT::Other),
2946                                 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2947                                  DAG.getUNDEF(MaskVT), Mask, VL});
2948   Chain = Unorder.getValue(1);
2949   Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
2950                     DAG.getVTList(ContainerVT, MVT::Other),
2951                     {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2952   Chain = Src.getValue(1);
2953
2954   // We do the conversion on the absolute value and fix the sign at the end.
2955   SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2956
2957   // Determine the largest integer that can be represented exactly. This and
2958   // values larger than it don't have any fractional bits so don't need to
2959   // be converted.
2960   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2961   unsigned Precision = APFloat::semanticsPrecision(FltSem);
2962   APFloat MaxVal = APFloat(FltSem);
2963   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2964                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2965   SDValue MaxValNode =
2966       DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2967   SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2968                                     DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2969
2970   // If abs(Src) was larger than MaxVal or nan, keep it.
2971   Mask = DAG.getNode(
2972       RISCVISD::SETCC_VL, DL, MaskVT,
2973       {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
2974
2975   // Truncate to integer and convert back to FP.
2976   MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2977   MVT XLenVT = Subtarget.getXLenVT();
2978   SDValue Truncated;
2979
2980   switch (Op.getOpcode()) {
2981   default:
2982     llvm_unreachable("Unexpected opcode");
2983   case ISD::STRICT_FCEIL:
2984   case ISD::STRICT_FFLOOR:
2985   case ISD::STRICT_FROUND:
2986   case ISD::STRICT_FROUNDEVEN: {
2987     RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2988     assert(FRM != RISCVFPRndMode::Invalid);
2989     Truncated = DAG.getNode(
2990         RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
2991         {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
2992     break;
2993   }
2994   case ISD::STRICT_FTRUNC:
2995     Truncated =
2996         DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
2997                     DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
2998     break;
2999   case ISD::STRICT_FNEARBYINT:
3000     Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3001                             DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3002                             Mask, VL);
3003     break;
3004   }
3005   Chain = Truncated.getValue(1);
3006
3007   // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3008   if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3009     Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3010                             DAG.getVTList(ContainerVT, MVT::Other), Chain,
3011                             Truncated, Mask, VL);
3012     Chain = Truncated.getValue(1);
3013   }
3014
3015   // Restore the original sign so that -0.0 is preserved.
3016   Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3017                           Src, Src, Mask, VL);
3018
3019   if (VT.isFixedLengthVector())
3020     Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3021   return DAG.getMergeValues({Truncated, Chain}, DL);
3022 }
3023
3024 static SDValue
3025 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3026                                 const RISCVSubtarget &Subtarget) {
3027   MVT VT = Op.getSimpleValueType();
3028   if (VT.isVector())
3029     return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3030
3031   if (DAG.shouldOptForSize())
3032     return SDValue();
3033
3034   SDLoc DL(Op);
3035   SDValue Src = Op.getOperand(0);
3036
3037   // Create an integer the size of the mantissa with the MSB set. This and all
3038   // values larger than it don't have any fractional bits so don't need to be
3039   // converted.
3040   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3041   unsigned Precision = APFloat::semanticsPrecision(FltSem);
3042   APFloat MaxVal = APFloat(FltSem);
3043   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3044                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3045   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3046
3047   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3048   return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3049                      DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3050 }
3051
3052 // Expand vector LRINT and LLRINT by converting to the integer domain.
3053 static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
3054                                 const RISCVSubtarget &Subtarget) {
3055   MVT VT = Op.getSimpleValueType();
3056   assert(VT.isVector() && "Unexpected type");
3057
3058   SDLoc DL(Op);
3059   SDValue Src = Op.getOperand(0);
3060   MVT ContainerVT = VT;
3061
3062   if (VT.isFixedLengthVector()) {
3063     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3064     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3065   }
3066
3067   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3068   SDValue Truncated =
3069       DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3070
3071   if (!VT.isFixedLengthVector())
3072     return Truncated;
3073
3074   return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3075 }
3076
3077 static SDValue
3078 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
3079               const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3080               SDValue Offset, SDValue Mask, SDValue VL,
3081               unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3082   if (Merge.isUndef())
3083     Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3084   SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3085   SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3086   return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3087 }
3088
3089 static SDValue
3090 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3091             EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
3092             SDValue VL,
3093             unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3094   if (Merge.isUndef())
3095     Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3096   SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3097   SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3098   return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3099 }
3100
3101 struct VIDSequence {
3102   int64_t StepNumerator;
3103   unsigned StepDenominator;
3104   int64_t Addend;
3105 };
3106
3107 static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3108                                                uint32_t BitWidth) {
3109   APSInt ValInt(BitWidth, !APF.isNegative());
3110   // We use an arbitrary rounding mode here. If a floating-point is an exact
3111   // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3112   // the rounding mode changes the output value, then it is not an exact
3113   // integer.
3114   RoundingMode ArbitraryRM = RoundingMode::TowardZero;
3115   bool IsExact;
3116   // If it is out of signed integer range, it will return an invalid operation.
3117   // If it is not an exact integer, IsExact is false.
3118   if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3119        APFloatBase::opInvalidOp) ||
3120       !IsExact)
3121     return std::nullopt;
3122   return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3123 }
3124
3125 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3126 // to the (non-zero) step S and start value X. This can be then lowered as the
3127 // RVV sequence (VID * S) + X, for example.
3128 // The step S is represented as an integer numerator divided by a positive
3129 // denominator. Note that the implementation currently only identifies
3130 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
3131 // cannot detect 2/3, for example.
3132 // Note that this method will also match potentially unappealing index
3133 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3134 // determine whether this is worth generating code for.
3135 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
3136   unsigned NumElts = Op.getNumOperands();
3137   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3138   bool IsInteger = Op.getValueType().isInteger();
3139
3140   std::optional<unsigned> SeqStepDenom;
3141   std::optional<int64_t> SeqStepNum, SeqAddend;
3142   std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3143   unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
3144   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3145     // Assume undef elements match the sequence; we just have to be careful
3146     // when interpolating across them.
3147     if (Op.getOperand(Idx).isUndef())
3148       continue;
3149
3150     uint64_t Val;
3151     if (IsInteger) {
3152       // The BUILD_VECTOR must be all constants.
3153       if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
3154         return std::nullopt;
3155       Val = Op.getConstantOperandVal(Idx) &
3156             maskTrailingOnes<uint64_t>(EltSizeInBits);
3157     } else {
3158       // The BUILD_VECTOR must be all constants.
3159       if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
3160         return std::nullopt;
3161       if (auto ExactInteger = getExactInteger(
3162               cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3163               EltSizeInBits))
3164         Val = *ExactInteger;
3165       else
3166         return std::nullopt;
3167     }
3168
3169     if (PrevElt) {
3170       // Calculate the step since the last non-undef element, and ensure
3171       // it's consistent across the entire sequence.
3172       unsigned IdxDiff = Idx - PrevElt->second;
3173       int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
3174
3175       // A zero-value value difference means that we're somewhere in the middle
3176       // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3177       // step change before evaluating the sequence.
3178       if (ValDiff == 0)
3179         continue;
3180
3181       int64_t Remainder = ValDiff % IdxDiff;
3182       // Normalize the step if it's greater than 1.
3183       if (Remainder != ValDiff) {
3184         // The difference must cleanly divide the element span.
3185         if (Remainder != 0)
3186           return std::nullopt;
3187         ValDiff /= IdxDiff;
3188         IdxDiff = 1;
3189       }
3190
3191       if (!SeqStepNum)
3192         SeqStepNum = ValDiff;
3193       else if (ValDiff != SeqStepNum)
3194         return std::nullopt;
3195
3196       if (!SeqStepDenom)
3197         SeqStepDenom = IdxDiff;
3198       else if (IdxDiff != *SeqStepDenom)
3199         return std::nullopt;
3200     }
3201
3202     // Record this non-undef element for later.
3203     if (!PrevElt || PrevElt->first != Val)
3204       PrevElt = std::make_pair(Val, Idx);
3205   }
3206
3207   // We need to have logged a step for this to count as a legal index sequence.
3208   if (!SeqStepNum || !SeqStepDenom)
3209     return std::nullopt;
3210
3211   // Loop back through the sequence and validate elements we might have skipped
3212   // while waiting for a valid step. While doing this, log any sequence addend.
3213   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3214     if (Op.getOperand(Idx).isUndef())
3215       continue;
3216     uint64_t Val;
3217     if (IsInteger) {
3218       Val = Op.getConstantOperandVal(Idx) &
3219             maskTrailingOnes<uint64_t>(EltSizeInBits);
3220     } else {
3221       Val = *getExactInteger(
3222           cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3223           EltSizeInBits);
3224     }
3225     uint64_t ExpectedVal =
3226         (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3227     int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
3228     if (!SeqAddend)
3229       SeqAddend = Addend;
3230     else if (Addend != SeqAddend)
3231       return std::nullopt;
3232   }
3233
3234   assert(SeqAddend && "Must have an addend if we have a step");
3235
3236   return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3237 }
3238
3239 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3240 // and lower it as a VRGATHER_VX_VL from the source vector.
3241 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3242                                   SelectionDAG &DAG,
3243                                   const RISCVSubtarget &Subtarget) {
3244   if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3245     return SDValue();
3246   SDValue Vec = SplatVal.getOperand(0);
3247   // Only perform this optimization on vectors of the same size for simplicity.
3248   // Don't perform this optimization for i1 vectors.
3249   // FIXME: Support i1 vectors, maybe by promoting to i8?
3250   if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3251     return SDValue();
3252   SDValue Idx = SplatVal.getOperand(1);
3253   // The index must be a legal type.
3254   if (Idx.getValueType() != Subtarget.getXLenVT())
3255     return SDValue();
3256
3257   MVT ContainerVT = VT;
3258   if (VT.isFixedLengthVector()) {
3259     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3260     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3261   }
3262
3263   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3264
3265   SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3266                                Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3267
3268   if (!VT.isFixedLengthVector())
3269     return Gather;
3270
3271   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3272 }
3273
3274
3275 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3276 /// which constitute a large proportion of the elements. In such cases we can
3277 /// splat a vector with the dominant element and make up the shortfall with
3278 /// INSERT_VECTOR_ELTs.  Returns SDValue if not profitable.
3279 /// Note that this includes vectors of 2 elements by association. The
3280 /// upper-most element is the "dominant" one, allowing us to use a splat to
3281 /// "insert" the upper element, and an insert of the lower element at position
3282 /// 0, which improves codegen.
3283 static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
3284                                                  const RISCVSubtarget &Subtarget) {
3285   MVT VT = Op.getSimpleValueType();
3286   assert(VT.isFixedLengthVector() && "Unexpected vector!");
3287
3288   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3289
3290   SDLoc DL(Op);
3291   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3292
3293   MVT XLenVT = Subtarget.getXLenVT();
3294   unsigned NumElts = Op.getNumOperands();
3295
3296   SDValue DominantValue;
3297   unsigned MostCommonCount = 0;
3298   DenseMap<SDValue, unsigned> ValueCounts;
3299   unsigned NumUndefElts =
3300       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3301
3302   // Track the number of scalar loads we know we'd be inserting, estimated as
3303   // any non-zero floating-point constant. Other kinds of element are either
3304   // already in registers or are materialized on demand. The threshold at which
3305   // a vector load is more desirable than several scalar materializion and
3306   // vector-insertion instructions is not known.
3307   unsigned NumScalarLoads = 0;
3308
3309   for (SDValue V : Op->op_values()) {
3310     if (V.isUndef())
3311       continue;
3312
3313     ValueCounts.insert(std::make_pair(V, 0));
3314     unsigned &Count = ValueCounts[V];
3315     if (0 == Count)
3316       if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3317         NumScalarLoads += !CFP->isExactlyValue(+0.0);
3318
3319     // Is this value dominant? In case of a tie, prefer the highest element as
3320     // it's cheaper to insert near the beginning of a vector than it is at the
3321     // end.
3322     if (++Count >= MostCommonCount) {
3323       DominantValue = V;
3324       MostCommonCount = Count;
3325     }
3326   }
3327
3328   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3329   unsigned NumDefElts = NumElts - NumUndefElts;
3330   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3331
3332   // Don't perform this optimization when optimizing for size, since
3333   // materializing elements and inserting them tends to cause code bloat.
3334   if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3335       (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3336       ((MostCommonCount > DominantValueCountThreshold) ||
3337        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3338     // Start by splatting the most common element.
3339     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3340
3341     DenseSet<SDValue> Processed{DominantValue};
3342
3343     // We can handle an insert into the last element (of a splat) via
3344     // v(f)slide1down.  This is slightly better than the vslideup insert
3345     // lowering as it avoids the need for a vector group temporary.  It
3346     // is also better than using vmerge.vx as it avoids the need to
3347     // materialize the mask in a vector register.
3348     if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3349         !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3350         LastOp != DominantValue) {
3351       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3352       auto OpCode =
3353         VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3354       if (!VT.isFloatingPoint())
3355         LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3356       Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3357                         LastOp, Mask, VL);
3358       Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3359       Processed.insert(LastOp);
3360     }
3361
3362     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3363     for (const auto &OpIdx : enumerate(Op->ops())) {
3364       const SDValue &V = OpIdx.value();
3365       if (V.isUndef() || !Processed.insert(V).second)
3366         continue;
3367       if (ValueCounts[V] == 1) {
3368         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3369                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
3370       } else {
3371         // Blend in all instances of this value using a VSELECT, using a
3372         // mask where each bit signals whether that element is the one
3373         // we're after.
3374         SmallVector<SDValue> Ops;
3375         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3376           return DAG.getConstant(V == V1, DL, XLenVT);
3377         });
3378         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3379                           DAG.getBuildVector(SelMaskTy, DL, Ops),
3380                           DAG.getSplatBuildVector(VT, DL, V), Vec);
3381       }
3382     }
3383
3384     return Vec;
3385   }
3386
3387   return SDValue();
3388 }
3389
3390 static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3391                                            const RISCVSubtarget &Subtarget) {
3392   MVT VT = Op.getSimpleValueType();
3393   assert(VT.isFixedLengthVector() && "Unexpected vector!");
3394
3395   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3396
3397   SDLoc DL(Op);
3398   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3399
3400   MVT XLenVT = Subtarget.getXLenVT();
3401   unsigned NumElts = Op.getNumOperands();
3402
3403   if (VT.getVectorElementType() == MVT::i1) {
3404     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3405       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3406       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3407     }
3408
3409     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3410       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3411       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3412     }
3413
3414     // Lower constant mask BUILD_VECTORs via an integer vector type, in
3415     // scalar integer chunks whose bit-width depends on the number of mask
3416     // bits and XLEN.
3417     // First, determine the most appropriate scalar integer type to use. This
3418     // is at most XLenVT, but may be shrunk to a smaller vector element type
3419     // according to the size of the final vector - use i8 chunks rather than
3420     // XLenVT if we're producing a v8i1. This results in more consistent
3421     // codegen across RV32 and RV64.
3422     unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3423     NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3424     // If we have to use more than one INSERT_VECTOR_ELT then this
3425     // optimization is likely to increase code size; avoid peforming it in
3426     // such a case. We can use a load from a constant pool in this case.
3427     if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3428       return SDValue();
3429     // Now we can create our integer vector type. Note that it may be larger
3430     // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3431     unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3432     MVT IntegerViaVecVT =
3433       MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3434                        IntegerViaVecElts);
3435
3436     uint64_t Bits = 0;
3437     unsigned BitPos = 0, IntegerEltIdx = 0;
3438     SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3439
3440     for (unsigned I = 0; I < NumElts;) {
3441       SDValue V = Op.getOperand(I);
3442       bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3443       Bits |= ((uint64_t)BitValue << BitPos);
3444       ++BitPos;
3445       ++I;
3446
3447       // Once we accumulate enough bits to fill our scalar type or process the
3448       // last element, insert into our vector and clear our accumulated data.
3449       if (I % NumViaIntegerBits == 0 || I == NumElts) {
3450         if (NumViaIntegerBits <= 32)
3451           Bits = SignExtend64<32>(Bits);
3452         SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3453         Elts[IntegerEltIdx] = Elt;
3454         Bits = 0;
3455         BitPos = 0;
3456         IntegerEltIdx++;
3457       }
3458     }
3459
3460     SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3461
3462     if (NumElts < NumViaIntegerBits) {
3463       // If we're producing a smaller vector than our minimum legal integer
3464       // type, bitcast to the equivalent (known-legal) mask type, and extract
3465       // our final mask.
3466       assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3467       Vec = DAG.getBitcast(MVT::v8i1, Vec);
3468       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3469                         DAG.getConstant(0, DL, XLenVT));
3470     } else {
3471       // Else we must have produced an integer type with the same size as the
3472       // mask type; bitcast for the final result.
3473       assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3474       Vec = DAG.getBitcast(VT, Vec);
3475     }
3476
3477     return Vec;
3478   }
3479
3480   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3481     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3482                                         : RISCVISD::VMV_V_X_VL;
3483     if (!VT.isFloatingPoint())
3484       Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3485     Splat =
3486         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3487     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3488   }
3489
3490   // Try and match index sequences, which we can lower to the vid instruction
3491   // with optional modifications. An all-undef vector is matched by
3492   // getSplatValue, above.
3493   if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3494     int64_t StepNumerator = SimpleVID->StepNumerator;
3495     unsigned StepDenominator = SimpleVID->StepDenominator;
3496     int64_t Addend = SimpleVID->Addend;
3497
3498     assert(StepNumerator != 0 && "Invalid step");
3499     bool Negate = false;
3500     int64_t SplatStepVal = StepNumerator;
3501     unsigned StepOpcode = ISD::MUL;
3502     // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3503     // anyway as the shift of 63 won't fit in uimm5.
3504     if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3505         isPowerOf2_64(std::abs(StepNumerator))) {
3506       Negate = StepNumerator < 0;
3507       StepOpcode = ISD::SHL;
3508       SplatStepVal = Log2_64(std::abs(StepNumerator));
3509     }
3510
3511     // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3512     // threshold since it's the immediate value many RVV instructions accept.
3513     // There is no vmul.vi instruction so ensure multiply constant can fit in
3514     // a single addi instruction.
3515     if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3516          (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3517         isPowerOf2_32(StepDenominator) &&
3518         (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3519       MVT VIDVT =
3520           VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3521       MVT VIDContainerVT =
3522           getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3523       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3524       // Convert right out of the scalable type so we can use standard ISD
3525       // nodes for the rest of the computation. If we used scalable types with
3526       // these, we'd lose the fixed-length vector info and generate worse
3527       // vsetvli code.
3528       VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3529       if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3530           (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3531         SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3532         VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3533       }
3534       if (StepDenominator != 1) {
3535         SDValue SplatStep =
3536             DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3537         VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3538       }
3539       if (Addend != 0 || Negate) {
3540         SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3541         VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3542                           VID);
3543       }
3544       if (VT.isFloatingPoint()) {
3545         // TODO: Use vfwcvt to reduce register pressure.
3546         VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3547       }
3548       return VID;
3549     }
3550   }
3551
3552   // For very small build_vectors, use a single scalar insert of a constant.
3553   // TODO: Base this on constant rematerialization cost, not size.
3554   const unsigned EltBitSize = VT.getScalarSizeInBits();
3555   if (VT.getSizeInBits() <= 32 &&
3556       ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3557     MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3558     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3559            "Unexpected sequence type");
3560     // If we can use the original VL with the modified element type, this
3561     // means we only have a VTYPE toggle, not a VL toggle.  TODO: Should this
3562     // be moved into InsertVSETVLI?
3563     unsigned ViaVecLen =
3564       (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3565     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3566
3567     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3568     uint64_t SplatValue = 0;
3569     // Construct the amalgamated value at this larger vector type.
3570     for (const auto &OpIdx : enumerate(Op->op_values())) {
3571       const auto &SeqV = OpIdx.value();
3572       if (!SeqV.isUndef())
3573         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3574                        << (OpIdx.index() * EltBitSize));
3575     }
3576
3577     // On RV64, sign-extend from 32 to 64 bits where possible in order to
3578     // achieve better constant materializion.
3579     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3580       SplatValue = SignExtend64<32>(SplatValue);
3581
3582     SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3583                               DAG.getUNDEF(ViaVecVT),
3584                               DAG.getConstant(SplatValue, DL, XLenVT),
3585                               DAG.getConstant(0, DL, XLenVT));
3586     if (ViaVecLen != 1)
3587       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3588                         MVT::getVectorVT(ViaIntVT, 1), Vec,
3589                         DAG.getConstant(0, DL, XLenVT));
3590     return DAG.getBitcast(VT, Vec);
3591   }
3592
3593
3594   // Attempt to detect "hidden" splats, which only reveal themselves as splats
3595   // when re-interpreted as a vector with a larger element type. For example,
3596   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3597   // could be instead splat as
3598   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
3599   // TODO: This optimization could also work on non-constant splats, but it
3600   // would require bit-manipulation instructions to construct the splat value.
3601   SmallVector<SDValue> Sequence;
3602   const auto *BV = cast<BuildVectorSDNode>(Op);
3603   if (VT.isInteger() && EltBitSize < 64 &&
3604       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
3605       BV->getRepeatedSequence(Sequence) &&
3606       (Sequence.size() * EltBitSize) <= 64) {
3607     unsigned SeqLen = Sequence.size();
3608     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3609     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3610             ViaIntVT == MVT::i64) &&
3611            "Unexpected sequence type");
3612
3613     // If we can use the original VL with the modified element type, this
3614     // means we only have a VTYPE toggle, not a VL toggle.  TODO: Should this
3615     // be moved into InsertVSETVLI?
3616     const unsigned RequiredVL = NumElts / SeqLen;
3617     const unsigned ViaVecLen =
3618       (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3619       NumElts : RequiredVL;
3620     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3621
3622     unsigned EltIdx = 0;
3623     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3624     uint64_t SplatValue = 0;
3625     // Construct the amalgamated value which can be splatted as this larger
3626     // vector type.
3627     for (const auto &SeqV : Sequence) {
3628       if (!SeqV.isUndef())
3629         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3630                        << (EltIdx * EltBitSize));
3631       EltIdx++;
3632     }
3633
3634     // On RV64, sign-extend from 32 to 64 bits where possible in order to
3635     // achieve better constant materializion.
3636     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3637       SplatValue = SignExtend64<32>(SplatValue);
3638
3639     // Since we can't introduce illegal i64 types at this stage, we can only
3640     // perform an i64 splat on RV32 if it is its own sign-extended value. That
3641     // way we can use RVV instructions to splat.
3642     assert((ViaIntVT.bitsLE(XLenVT) ||
3643             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3644            "Unexpected bitcast sequence");
3645     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3646       SDValue ViaVL =
3647           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3648       MVT ViaContainerVT =
3649           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3650       SDValue Splat =
3651           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3652                       DAG.getUNDEF(ViaContainerVT),
3653                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3654       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3655       if (ViaVecLen != RequiredVL)
3656         Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3657                             MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3658                             DAG.getConstant(0, DL, XLenVT));
3659       return DAG.getBitcast(VT, Splat);
3660     }
3661   }
3662
3663   // If the number of signbits allows, see if we can lower as a <N x i8>.
3664   // Our main goal here is to reduce LMUL (and thus work) required to
3665   // build the constant, but we will also narrow if the resulting
3666   // narrow vector is known to materialize cheaply.
3667   // TODO: We really should be costing the smaller vector.  There are
3668   // profitable cases this misses.
3669   if (EltBitSize > 8 && VT.isInteger() &&
3670       (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3671     unsigned SignBits = DAG.ComputeNumSignBits(Op);
3672     if (EltBitSize - SignBits < 8) {
3673       SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3674                                           DL, Op->ops());
3675       Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3676                                        Source, DAG, Subtarget);
3677       SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3678       return convertFromScalableVector(VT, Res, DAG, Subtarget);
3679     }
3680   }
3681
3682   if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3683     return Res;
3684
3685   // For constant vectors, use generic constant pool lowering.  Otherwise,
3686   // we'd have to materialize constants in GPRs just to move them into the
3687   // vector.
3688   return SDValue();
3689 }
3690
3691 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3692                                  const RISCVSubtarget &Subtarget) {
3693   MVT VT = Op.getSimpleValueType();
3694   assert(VT.isFixedLengthVector() && "Unexpected vector!");
3695
3696   if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3697       ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
3698     return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3699
3700   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3701
3702   SDLoc DL(Op);
3703   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3704
3705   MVT XLenVT = Subtarget.getXLenVT();
3706
3707   if (VT.getVectorElementType() == MVT::i1) {
3708     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3709     // vector type, we have a legal equivalently-sized i8 type, so we can use
3710     // that.
3711     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3712     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3713
3714     SDValue WideVec;
3715     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3716       // For a splat, perform a scalar truncate before creating the wider
3717       // vector.
3718       Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3719                           DAG.getConstant(1, DL, Splat.getValueType()));
3720       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3721     } else {
3722       SmallVector<SDValue, 8> Ops(Op->op_values());
3723       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3724       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3725       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3726     }
3727
3728     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3729   }
3730
3731   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3732     if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3733       return Gather;
3734     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3735                                         : RISCVISD::VMV_V_X_VL;
3736     if (!VT.isFloatingPoint())
3737       Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3738     Splat =
3739         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3740     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3741   }
3742
3743   if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3744     return Res;
3745
3746   // Cap the cost at a value linear to the number of elements in the vector.
3747   // The default lowering is to use the stack.  The vector store + scalar loads
3748   // is linear in VL.  However, at high lmuls vslide1down and vslidedown end up
3749   // being (at least) linear in LMUL.  As a result, using the vslidedown
3750   // lowering for every element ends up being VL*LMUL..
3751   // TODO: Should we be directly costing the stack alternative?  Doing so might
3752   // give us a more accurate upper bound.
3753   InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3754
3755   // TODO: unify with TTI getSlideCost.
3756   InstructionCost PerSlideCost = 1;
3757   switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3758   default: break;
3759   case RISCVII::VLMUL::LMUL_2:
3760     PerSlideCost = 2;
3761     break;
3762   case RISCVII::VLMUL::LMUL_4:
3763     PerSlideCost = 4;
3764     break;
3765   case RISCVII::VLMUL::LMUL_8:
3766     PerSlideCost = 8;
3767     break;
3768   }
3769
3770   // TODO: Should we be using the build instseq then cost + evaluate scheme
3771   // we use for integer constants here?
3772   unsigned UndefCount = 0;
3773   for (const SDValue &V : Op->ops()) {
3774     if (V.isUndef()) {
3775       UndefCount++;
3776       continue;
3777     }
3778     if (UndefCount) {
3779       LinearBudget -= PerSlideCost;
3780       UndefCount = 0;
3781     }
3782     LinearBudget -= PerSlideCost;
3783   }
3784   if (UndefCount) {
3785     LinearBudget -= PerSlideCost;
3786   }
3787
3788   if (LinearBudget < 0)
3789     return SDValue();
3790
3791   assert((!VT.isFloatingPoint() ||
3792           VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3793          "Illegal type which will result in reserved encoding");
3794
3795   const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3796
3797   SDValue Vec = DAG.getUNDEF(ContainerVT);
3798   UndefCount = 0;
3799   for (SDValue V : Op->ops()) {
3800     if (V.isUndef()) {
3801       UndefCount++;
3802       continue;
3803     }
3804     if (UndefCount) {
3805       const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3806       Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3807                           Vec, Offset, Mask, VL, Policy);
3808       UndefCount = 0;
3809     }
3810     auto OpCode =
3811       VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3812     if (!VT.isFloatingPoint())
3813       V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
3814     Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3815                       V, Mask, VL);
3816   }
3817   if (UndefCount) {
3818     const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3819     Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3820                         Vec, Offset, Mask, VL, Policy);
3821   }
3822   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3823 }
3824
3825 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3826                                    SDValue Lo, SDValue Hi, SDValue VL,
3827                                    SelectionDAG &DAG) {
3828   if (!Passthru)
3829     Passthru = DAG.getUNDEF(VT);
3830   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3831     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3832     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3833     // If Hi constant is all the same sign bit as Lo, lower this as a custom
3834     // node in order to try and match RVV vector/scalar instructions.
3835     if ((LoC >> 31) == HiC)
3836       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3837
3838     // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3839     // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3840     // vlmax vsetvli or vsetivli to change the VL.
3841     // FIXME: Support larger constants?
3842     // FIXME: Support non-constant VLs by saturating?
3843     if (LoC == HiC) {
3844       SDValue NewVL;
3845       if (isAllOnesConstant(VL) ||
3846           (isa<RegisterSDNode>(VL) &&
3847            cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
3848         NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
3849       else if (isa<ConstantSDNode>(VL) &&
3850                isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
3851         NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
3852
3853       if (NewVL) {
3854         MVT InterVT =
3855             MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3856         auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
3857                                     DAG.getUNDEF(InterVT), Lo,
3858                                     DAG.getRegister(RISCV::X0, MVT::i32));
3859         return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3860       }
3861     }
3862   }
3863
3864   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3865   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3866       isa<ConstantSDNode>(Hi.getOperand(1)) &&
3867       Hi.getConstantOperandVal(1) == 31)
3868     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3869
3870   // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3871   // even if it might be sign extended.
3872   if (Hi.isUndef())
3873     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3874
3875   // Fall back to a stack store and stride x0 vector load.
3876   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3877                      Hi, VL);
3878 }
3879
3880 // Called by type legalization to handle splat of i64 on RV32.
3881 // FIXME: We can optimize this when the type has sign or zero bits in one
3882 // of the halves.
3883 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3884                                    SDValue Scalar, SDValue VL,
3885                                    SelectionDAG &DAG) {
3886   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3887   SDValue Lo, Hi;
3888   std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3889   return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3890 }
3891
3892 // This function lowers a splat of a scalar operand Splat with the vector
3893 // length VL. It ensures the final sequence is type legal, which is useful when
3894 // lowering a splat after type legalization.
3895 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3896                                 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3897                                 const RISCVSubtarget &Subtarget) {
3898   bool HasPassthru = Passthru && !Passthru.isUndef();
3899   if (!HasPassthru && !Passthru)
3900     Passthru = DAG.getUNDEF(VT);
3901   if (VT.isFloatingPoint())
3902     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
3903
3904   MVT XLenVT = Subtarget.getXLenVT();
3905
3906   // Simplest case is that the operand needs to be promoted to XLenVT.
3907   if (Scalar.getValueType().bitsLE(XLenVT)) {
3908     // If the operand is a constant, sign extend to increase our chances
3909     // of being able to use a .vi instruction. ANY_EXTEND would become a
3910     // a zero extend and the simm5 check in isel would fail.
3911     // FIXME: Should we ignore the upper bits in isel instead?
3912     unsigned ExtOpc =
3913         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3914     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3915     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3916   }
3917
3918   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
3919          "Unexpected scalar for splat lowering!");
3920
3921   if (isOneConstant(VL) && isNullConstant(Scalar))
3922     return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
3923                        DAG.getConstant(0, DL, XLenVT), VL);
3924
3925   // Otherwise use the more complicated splatting algorithm.
3926   return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
3927 }
3928
3929 static MVT getLMUL1VT(MVT VT) {
3930   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3931          "Unexpected vector MVT");
3932   return MVT::getScalableVectorVT(
3933       VT.getVectorElementType(),
3934       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3935 }
3936
3937 // This function lowers an insert of a scalar operand Scalar into lane
3938 // 0 of the vector regardless of the value of VL.  The contents of the
3939 // remaining lanes of the result vector are unspecified.  VL is assumed
3940 // to be non-zero.
3941 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
3942                                  const SDLoc &DL, SelectionDAG &DAG,
3943                                  const RISCVSubtarget &Subtarget) {
3944   assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
3945
3946   const MVT XLenVT = Subtarget.getXLenVT();
3947   SDValue Passthru = DAG.getUNDEF(VT);
3948
3949   if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3950       isNullConstant(Scalar.getOperand(1))) {
3951     SDValue ExtractedVal = Scalar.getOperand(0);
3952     MVT ExtractedVT = ExtractedVal.getSimpleValueType();
3953     MVT ExtractedContainerVT = ExtractedVT;
3954     if (ExtractedContainerVT.isFixedLengthVector()) {
3955       ExtractedContainerVT = getContainerForFixedLengthVector(
3956           DAG, ExtractedContainerVT, Subtarget);
3957       ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,
3958                                              DAG, Subtarget);
3959     }
3960     if (ExtractedContainerVT.bitsLE(VT))
3961       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,
3962                          DAG.getConstant(0, DL, XLenVT));
3963     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
3964                        DAG.getConstant(0, DL, XLenVT));
3965   }
3966
3967
3968   if (VT.isFloatingPoint())
3969     return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
3970                        DAG.getUNDEF(VT), Scalar, VL);
3971
3972   // Avoid the tricky legalization cases by falling back to using the
3973   // splat code which already handles it gracefully.
3974   if (!Scalar.getValueType().bitsLE(XLenVT))
3975     return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
3976                             DAG.getConstant(1, DL, XLenVT),
3977                             VT, DL, DAG, Subtarget);
3978
3979   // If the operand is a constant, sign extend to increase our chances
3980   // of being able to use a .vi instruction. ANY_EXTEND would become a
3981   // a zero extend and the simm5 check in isel would fail.
3982   // FIXME: Should we ignore the upper bits in isel instead?
3983   unsigned ExtOpc =
3984     isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3985   Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3986   return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
3987                      DAG.getUNDEF(VT), Scalar, VL);
3988 }
3989
3990 // Is this a shuffle extracts either the even or odd elements of a vector?
3991 // That is, specifically, either (a) or (b) below.
3992 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
3993 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
3994 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
3995 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
3996 // Returns {Src Vector, Even Elements} om success
3997 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
3998                                   SDValue V2, ArrayRef<int> Mask,
3999                                   const RISCVSubtarget &Subtarget) {
4000   // Need to be able to widen the vector.
4001   if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4002     return false;
4003
4004   // Both input must be extracts.
4005   if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4006       V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4007     return false;
4008
4009   // Extracting from the same source.
4010   SDValue Src = V1.getOperand(0);
4011   if (Src != V2.getOperand(0))
4012     return false;
4013
4014   // Src needs to have twice the number of elements.
4015   if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4016     return false;
4017
4018   // The extracts must extract the two halves of the source.
4019   if (V1.getConstantOperandVal(1) != 0 ||
4020       V2.getConstantOperandVal(1) != Mask.size())
4021     return false;
4022
4023   // First index must be the first even or odd element from V1.
4024   if (Mask[0] != 0 && Mask[0] != 1)
4025     return false;
4026
4027   // The others must increase by 2 each time.
4028   // TODO: Support undef elements?
4029   for (unsigned i = 1; i != Mask.size(); ++i)
4030     if (Mask[i] != Mask[i - 1] + 2)
4031       return false;
4032
4033   return true;
4034 }
4035
4036 /// Is this shuffle interleaving contiguous elements from one vector into the
4037 /// even elements and contiguous elements from another vector into the odd
4038 /// elements. \p EvenSrc will contain the element that should be in the first
4039 /// even element. \p OddSrc will contain the element that should be in the first
4040 /// odd element. These can be the first element in a source or the element half
4041 /// way through the source.
4042 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4043                                 int &OddSrc, const RISCVSubtarget &Subtarget) {
4044   // We need to be able to widen elements to the next larger integer type.
4045   if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4046     return false;
4047
4048   int Size = Mask.size();
4049   int NumElts = VT.getVectorNumElements();
4050   assert(Size == (int)NumElts && "Unexpected mask size");
4051
4052   SmallVector<unsigned, 2> StartIndexes;
4053   if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4054     return false;
4055
4056   EvenSrc = StartIndexes[0];
4057   OddSrc = StartIndexes[1];
4058
4059   // One source should be low half of first vector.
4060   if (EvenSrc != 0 && OddSrc != 0)
4061     return false;
4062
4063   // Subvectors will be subtracted from either at the start of the two input
4064   // vectors, or at the start and middle of the first vector if it's an unary
4065   // interleave.
4066   // In both cases, HalfNumElts will be extracted.
4067   // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4068   // we'll create an illegal extract_subvector.
4069   // FIXME: We could support other values using a slidedown first.
4070   int HalfNumElts = NumElts / 2;
4071   return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4072 }
4073
4074 /// Match shuffles that concatenate two vectors, rotate the concatenation,
4075 /// and then extract the original number of elements from the rotated result.
4076 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4077 /// returned rotation amount is for a rotate right, where elements move from
4078 /// higher elements to lower elements. \p LoSrc indicates the first source
4079 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4080 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4081 /// 0 or 1 if a rotation is found.
4082 ///
4083 /// NOTE: We talk about rotate to the right which matches how bit shift and
4084 /// rotate instructions are described where LSBs are on the right, but LLVM IR
4085 /// and the table below write vectors with the lowest elements on the left.
4086 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4087   int Size = Mask.size();
4088
4089   // We need to detect various ways of spelling a rotation:
4090   //   [11, 12, 13, 14, 15,  0,  1,  2]
4091   //   [-1, 12, 13, 14, -1, -1,  1, -1]
4092   //   [-1, -1, -1, -1, -1, -1,  1,  2]
4093   //   [ 3,  4,  5,  6,  7,  8,  9, 10]
4094   //   [-1,  4,  5,  6, -1, -1,  9, -1]
4095   //   [-1,  4,  5,  6, -1, -1, -1, -1]
4096   int Rotation = 0;
4097   LoSrc = -1;
4098   HiSrc = -1;
4099   for (int i = 0; i != Size; ++i) {
4100     int M = Mask[i];
4101     if (M < 0)
4102       continue;
4103
4104     // Determine where a rotate vector would have started.
4105     int StartIdx = i - (M % Size);
4106     // The identity rotation isn't interesting, stop.
4107     if (StartIdx == 0)
4108       return -1;
4109
4110     // If we found the tail of a vector the rotation must be the missing
4111     // front. If we found the head of a vector, it must be how much of the
4112     // head.
4113     int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4114
4115     if (Rotation == 0)
4116       Rotation = CandidateRotation;
4117     else if (Rotation != CandidateRotation)
4118       // The rotations don't match, so we can't match this mask.
4119       return -1;
4120
4121     // Compute which value this mask is pointing at.
4122     int MaskSrc = M < Size ? 0 : 1;
4123
4124     // Compute which of the two target values this index should be assigned to.
4125     // This reflects whether the high elements are remaining or the low elemnts
4126     // are remaining.
4127     int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4128
4129     // Either set up this value if we've not encountered it before, or check
4130     // that it remains consistent.
4131     if (TargetSrc < 0)
4132       TargetSrc = MaskSrc;
4133     else if (TargetSrc != MaskSrc)
4134       // This may be a rotation, but it pulls from the inputs in some
4135       // unsupported interleaving.
4136       return -1;
4137   }
4138
4139   // Check that we successfully analyzed the mask, and normalize the results.
4140   assert(Rotation != 0 && "Failed to locate a viable rotation!");
4141   assert((LoSrc >= 0 || HiSrc >= 0) &&
4142          "Failed to find a rotated input vector!");
4143
4144   return Rotation;
4145 }
4146
4147 // Lower a deinterleave shuffle to vnsrl.
4148 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4149 //                          -> [p, q, r, s] (EvenElts == false)
4150 // VT is the type of the vector to return, <[vscale x ]n x ty>
4151 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4152 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
4153                                        bool EvenElts,
4154                                        const RISCVSubtarget &Subtarget,
4155                                        SelectionDAG &DAG) {
4156   // The result is a vector of type <m x n x ty>
4157   MVT ContainerVT = VT;
4158   // Convert fixed vectors to scalable if needed
4159   if (ContainerVT.isFixedLengthVector()) {
4160     assert(Src.getSimpleValueType().isFixedLengthVector());
4161     ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4162
4163     // The source is a vector of type <m x n*2 x ty>
4164     MVT SrcContainerVT =
4165         MVT::getVectorVT(ContainerVT.getVectorElementType(),
4166                          ContainerVT.getVectorElementCount() * 2);
4167     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4168   }
4169
4170   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4171
4172   // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4173   // This also converts FP to int.
4174   unsigned EltBits = ContainerVT.getScalarSizeInBits();
4175   MVT WideSrcContainerVT = MVT::getVectorVT(
4176       MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4177   Src = DAG.getBitcast(WideSrcContainerVT, Src);
4178
4179   // The integer version of the container type.
4180   MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4181
4182   // If we want even elements, then the shift amount is 0. Otherwise, shift by
4183   // the original element size.
4184   unsigned Shift = EvenElts ? 0 : EltBits;
4185   SDValue SplatShift = DAG.getNode(
4186       RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4187       DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4188   SDValue Res =
4189       DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4190                   DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4191   // Cast back to FP if needed.
4192   Res = DAG.getBitcast(ContainerVT, Res);
4193
4194   if (VT.isFixedLengthVector())
4195     Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4196   return Res;
4197 }
4198
4199 // Lower the following shuffle to vslidedown.
4200 // a)
4201 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
4202 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
4203 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4204 // b)
4205 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
4206 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
4207 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
4208 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
4209 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
4210 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4211 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
4212                                                SDValue V1, SDValue V2,
4213                                                ArrayRef<int> Mask,
4214                                                const RISCVSubtarget &Subtarget,
4215                                                SelectionDAG &DAG) {
4216   auto findNonEXTRACT_SUBVECTORParent =
4217       [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4218     uint64_t Offset = 0;
4219     while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4220            // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4221            // a scalable vector. But we don't want to match the case.
4222            Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4223       Offset += Parent.getConstantOperandVal(1);
4224       Parent = Parent.getOperand(0);
4225     }
4226     return std::make_pair(Parent, Offset);
4227   };
4228
4229   auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4230   auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4231
4232   // Extracting from the same source.
4233   SDValue Src = V1Src;
4234   if (Src != V2Src)
4235     return SDValue();
4236
4237   // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4238   SmallVector<int, 16> NewMask(Mask);
4239   for (size_t i = 0; i != NewMask.size(); ++i) {
4240     if (NewMask[i] == -1)
4241       continue;
4242
4243     if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4244       NewMask[i] = NewMask[i] + V1IndexOffset;
4245     } else {
4246       // Minus NewMask.size() is needed. Otherwise, the b case would be
4247       // <5,6,7,12> instead of <5,6,7,8>.
4248       NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4249     }
4250   }
4251
4252   // First index must be known and non-zero. It will be used as the slidedown
4253   // amount.
4254   if (NewMask[0] <= 0)
4255     return SDValue();
4256
4257   // NewMask is also continuous.
4258   for (unsigned i = 1; i != NewMask.size(); ++i)
4259     if (NewMask[i - 1] + 1 != NewMask[i])
4260       return SDValue();
4261
4262   MVT XLenVT = Subtarget.getXLenVT();
4263   MVT SrcVT = Src.getSimpleValueType();
4264   MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4265   auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4266   SDValue Slidedown =
4267       getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4268                     convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4269                     DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4270   return DAG.getNode(
4271       ISD::EXTRACT_SUBVECTOR, DL, VT,
4272       convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4273       DAG.getConstant(0, DL, XLenVT));
4274 }
4275
4276 // Because vslideup leaves the destination elements at the start intact, we can
4277 // use it to perform shuffles that insert subvectors:
4278 //
4279 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4280 // ->
4281 // vsetvli zero, 8, e8, mf2, ta, ma
4282 // vslideup.vi v8, v9, 4
4283 //
4284 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4285 // ->
4286 // vsetvli zero, 5, e8, mf2, tu, ma
4287 // vslideup.v1 v8, v9, 2
4288 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
4289                                              SDValue V1, SDValue V2,
4290                                              ArrayRef<int> Mask,
4291                                              const RISCVSubtarget &Subtarget,
4292                                              SelectionDAG &DAG) {
4293   unsigned NumElts = VT.getVectorNumElements();
4294   int NumSubElts, Index;
4295   if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4296                                                 Index))
4297     return SDValue();
4298
4299   bool OpsSwapped = Mask[Index] < (int)NumElts;
4300   SDValue InPlace = OpsSwapped ? V2 : V1;
4301   SDValue ToInsert = OpsSwapped ? V1 : V2;
4302
4303   MVT XLenVT = Subtarget.getXLenVT();
4304   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4305   auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4306   // We slide up by the index that the subvector is being inserted at, and set
4307   // VL to the index + the number of elements being inserted.
4308   unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
4309   // If the we're adding a suffix to the in place vector, i.e. inserting right
4310   // up to the very end of it, then we don't actually care about the tail.
4311   if (NumSubElts + Index >= (int)NumElts)
4312     Policy |= RISCVII::TAIL_AGNOSTIC;
4313
4314   InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4315   ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4316   SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4317
4318   SDValue Res;
4319   // If we're inserting into the lowest elements, use a tail undisturbed
4320   // vmv.v.v.
4321   if (Index == 0)
4322     Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4323                       VL);
4324   else
4325     Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4326                       DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4327   return convertFromScalableVector(VT, Res, DAG, Subtarget);
4328 }
4329
4330 /// Match v(f)slide1up/down idioms.  These operations involve sliding
4331 /// N-1 elements to make room for an inserted scalar at one end.
4332 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
4333                                             SDValue V1, SDValue V2,
4334                                             ArrayRef<int> Mask,
4335                                             const RISCVSubtarget &Subtarget,
4336                                             SelectionDAG &DAG) {
4337   bool OpsSwapped = false;
4338   if (!isa<BuildVectorSDNode>(V1)) {
4339     if (!isa<BuildVectorSDNode>(V2))
4340       return SDValue();
4341     std::swap(V1, V2);
4342     OpsSwapped = true;
4343   }
4344   SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4345   if (!Splat)
4346     return SDValue();
4347
4348   // Return true if the mask could describe a slide of Mask.size() - 1
4349   // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4350   auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4351     const unsigned S = (Offset > 0) ? 0 : -Offset;
4352     const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4353     for (unsigned i = S; i != E; ++i)
4354       if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4355         return false;
4356     return true;
4357   };
4358
4359   const unsigned NumElts = VT.getVectorNumElements();
4360   bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4361   if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4362     return SDValue();
4363
4364   const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4365   // Inserted lane must come from splat, undef scalar is legal but not profitable.
4366   if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4367     return SDValue();
4368
4369   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4370   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4371   auto OpCode = IsVSlidedown ?
4372     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
4373     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
4374   if (!VT.isFloatingPoint())
4375     Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4376   auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4377                          DAG.getUNDEF(ContainerVT),
4378                          convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4379                          Splat, TrueMask, VL);
4380   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4381 }
4382
4383 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4384 // to create an interleaved vector of <[vscale x] n*2 x ty>.
4385 // This requires that the size of ty is less than the subtarget's maximum ELEN.
4386 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4387                                      const SDLoc &DL, SelectionDAG &DAG,
4388                                      const RISCVSubtarget &Subtarget) {
4389   MVT VecVT = EvenV.getSimpleValueType();
4390   MVT VecContainerVT = VecVT; // <vscale x n x ty>
4391   // Convert fixed vectors to scalable if needed
4392   if (VecContainerVT.isFixedLengthVector()) {
4393     VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4394     EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4395     OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4396   }
4397
4398   assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4399
4400   // We're working with a vector of the same size as the resulting
4401   // interleaved vector, but with half the number of elements and
4402   // twice the SEW (Hence the restriction on not using the maximum
4403   // ELEN)
4404   MVT WideVT =
4405       MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
4406                        VecVT.getVectorElementCount());
4407   MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4408   if (WideContainerVT.isFixedLengthVector())
4409     WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4410
4411   // Bitcast the input vectors to integers in case they are FP
4412   VecContainerVT = VecContainerVT.changeTypeToInteger();
4413   EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4414   OddV = DAG.getBitcast(VecContainerVT, OddV);
4415
4416   auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4417   SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4418
4419   SDValue Interleaved;
4420   if (Subtarget.hasStdExtZvbb()) {
4421     // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4422     SDValue OffsetVec =
4423         DAG.getSplatVector(VecContainerVT, DL,
4424                            DAG.getConstant(VecVT.getScalarSizeInBits(), DL,
4425                                            Subtarget.getXLenVT()));
4426     Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4427                               OffsetVec, Passthru, Mask, VL);
4428     Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4429                               Interleaved, EvenV, Passthru, Mask, VL);
4430   } else {
4431     // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4432     // vwaddu.vv
4433     Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4434                               OddV, Passthru, Mask, VL);
4435
4436     // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4437     SDValue AllOnesVec = DAG.getSplatVector(
4438         VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4439     SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4440                                   OddV, AllOnesVec, Passthru, Mask, VL);
4441
4442     // Add the two together so we get
4443     //   (OddV * 0xff...ff) + (OddV + EvenV)
4444     // = (OddV * 0x100...00) + EvenV
4445     // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4446     // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4447     Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4448                               Interleaved, OddsMul, Passthru, Mask, VL);
4449   }
4450
4451   // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4452   MVT ResultContainerVT = MVT::getVectorVT(
4453       VecVT.getVectorElementType(), // Make sure to use original type
4454       VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4455   Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4456
4457   // Convert back to a fixed vector if needed
4458   MVT ResultVT =
4459       MVT::getVectorVT(VecVT.getVectorElementType(),
4460                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
4461   if (ResultVT.isFixedLengthVector())
4462     Interleaved =
4463         convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4464
4465   return Interleaved;
4466 }
4467
4468 // If we have a vector of bits that we want to reverse, we can use a vbrev on a
4469 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4470 static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
4471                                       SelectionDAG &DAG,
4472                                       const RISCVSubtarget &Subtarget) {
4473   SDLoc DL(SVN);
4474   MVT VT = SVN->getSimpleValueType(0);
4475   SDValue V = SVN->getOperand(0);
4476   unsigned NumElts = VT.getVectorNumElements();
4477
4478   assert(VT.getVectorElementType() == MVT::i1);
4479
4480   if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),
4481                                         SVN->getMask().size()) ||
4482       !SVN->getOperand(1).isUndef())
4483     return SDValue();
4484
4485   unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4486   EVT ViaVT = EVT::getVectorVT(
4487       *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4488   EVT ViaBitVT =
4489       EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4490
4491   // If we don't have zvbb or the larger element type > ELEN, the operation will
4492   // be illegal.
4493   if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,
4494                                                                ViaVT) ||
4495       !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4496     return SDValue();
4497
4498   // If the bit vector doesn't fit exactly into the larger element type, we need
4499   // to insert it into the larger vector and then shift up the reversed bits
4500   // afterwards to get rid of the gap introduced.
4501   if (ViaEltSize > NumElts)
4502     V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4503                     V, DAG.getVectorIdxConstant(0, DL));
4504
4505   SDValue Res =
4506       DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4507
4508   // Shift up the reversed bits if the vector didn't exactly fit into the larger
4509   // element type.
4510   if (ViaEltSize > NumElts)
4511     Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4512                       DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4513
4514   Res = DAG.getBitcast(ViaBitVT, Res);
4515
4516   if (ViaEltSize > NumElts)
4517     Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4518                       DAG.getVectorIdxConstant(0, DL));
4519   return Res;
4520 }
4521
4522 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4523 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4524 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4525 static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
4526                                            SelectionDAG &DAG,
4527                                            const RISCVSubtarget &Subtarget) {
4528   SDLoc DL(SVN);
4529
4530   EVT VT = SVN->getValueType(0);
4531   unsigned NumElts = VT.getVectorNumElements();
4532   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4533   unsigned NumSubElts, RotateAmt;
4534   if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4535                                           NumElts, NumSubElts, RotateAmt))
4536     return SDValue();
4537   MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4538                                   NumElts / NumSubElts);
4539
4540   // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4541   if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
4542     return SDValue();
4543
4544   SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4545
4546   SDValue Rotate;
4547   // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4548   // so canonicalize to vrev8.
4549   if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4550     Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4551   else
4552     Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4553                          DAG.getConstant(RotateAmt, DL, RotateVT));
4554
4555   return DAG.getBitcast(VT, Rotate);
4556 }
4557
4558 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4559                                    const RISCVSubtarget &Subtarget) {
4560   SDValue V1 = Op.getOperand(0);
4561   SDValue V2 = Op.getOperand(1);
4562   SDLoc DL(Op);
4563   MVT XLenVT = Subtarget.getXLenVT();
4564   MVT VT = Op.getSimpleValueType();
4565   unsigned NumElts = VT.getVectorNumElements();
4566   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4567
4568   if (VT.getVectorElementType() == MVT::i1) {
4569     // Lower to a vror.vi of a larger element type if possible before we promote
4570     // i1s to i8s.
4571     if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4572       return V;
4573     if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4574       return V;
4575
4576     // Promote i1 shuffle to i8 shuffle.
4577     MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4578     V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4579     V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4580                       : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4581     SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4582     return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4583                         ISD::SETNE);
4584   }
4585
4586   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4587
4588   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4589
4590   if (SVN->isSplat()) {
4591     const int Lane = SVN->getSplatIndex();
4592     if (Lane >= 0) {
4593       MVT SVT = VT.getVectorElementType();
4594
4595       // Turn splatted vector load into a strided load with an X0 stride.
4596       SDValue V = V1;
4597       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4598       // with undef.
4599       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4600       int Offset = Lane;
4601       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4602         int OpElements =
4603             V.getOperand(0).getSimpleValueType().getVectorNumElements();
4604         V = V.getOperand(Offset / OpElements);
4605         Offset %= OpElements;
4606       }
4607
4608       // We need to ensure the load isn't atomic or volatile.
4609       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4610         auto *Ld = cast<LoadSDNode>(V);
4611         Offset *= SVT.getStoreSize();
4612         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
4613                                                    TypeSize::Fixed(Offset), DL);
4614
4615         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4616         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4617           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4618           SDValue IntID =
4619               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4620           SDValue Ops[] = {Ld->getChain(),
4621                            IntID,
4622                            DAG.getUNDEF(ContainerVT),
4623                            NewAddr,
4624                            DAG.getRegister(RISCV::X0, XLenVT),
4625                            VL};
4626           SDValue NewLoad = DAG.getMemIntrinsicNode(
4627               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4628               DAG.getMachineFunction().getMachineMemOperand(
4629                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4630           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4631           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4632         }
4633
4634         // Otherwise use a scalar load and splat. This will give the best
4635         // opportunity to fold a splat into the operation. ISel can turn it into
4636         // the x0 strided load if we aren't able to fold away the select.
4637         if (SVT.isFloatingPoint())
4638           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4639                           Ld->getPointerInfo().getWithOffset(Offset),
4640                           Ld->getOriginalAlign(),
4641                           Ld->getMemOperand()->getFlags());
4642         else
4643           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4644                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
4645                              Ld->getOriginalAlign(),
4646                              Ld->getMemOperand()->getFlags());
4647         DAG.makeEquivalentMemoryOrdering(Ld, V);
4648
4649         unsigned Opc =
4650             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4651         SDValue Splat =
4652             DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4653         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4654       }
4655
4656       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4657       assert(Lane < (int)NumElts && "Unexpected lane!");
4658       SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4659                                    V1, DAG.getConstant(Lane, DL, XLenVT),
4660                                    DAG.getUNDEF(ContainerVT), TrueMask, VL);
4661       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4662     }
4663   }
4664
4665   ArrayRef<int> Mask = SVN->getMask();
4666
4667   if (SDValue V =
4668           lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
4669     return V;
4670
4671   if (SDValue V =
4672           lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
4673     return V;
4674
4675   // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
4676   // available.
4677   if (Subtarget.hasStdExtZvkb())
4678     if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4679       return V;
4680
4681   // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4682   // be undef which can be handled with a single SLIDEDOWN/UP.
4683   int LoSrc, HiSrc;
4684   int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
4685   if (Rotation > 0) {
4686     SDValue LoV, HiV;
4687     if (LoSrc >= 0) {
4688       LoV = LoSrc == 0 ? V1 : V2;
4689       LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
4690     }
4691     if (HiSrc >= 0) {
4692       HiV = HiSrc == 0 ? V1 : V2;
4693       HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
4694     }
4695
4696     // We found a rotation. We need to slide HiV down by Rotation. Then we need
4697     // to slide LoV up by (NumElts - Rotation).
4698     unsigned InvRotate = NumElts - Rotation;
4699
4700     SDValue Res = DAG.getUNDEF(ContainerVT);
4701     if (HiV) {
4702       // Even though we could use a smaller VL, don't to avoid a vsetivli
4703       // toggle.
4704       Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
4705                           DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
4706     }
4707     if (LoV)
4708       Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
4709                         DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
4710                         RISCVII::TAIL_AGNOSTIC);
4711
4712     return convertFromScalableVector(VT, Res, DAG, Subtarget);
4713   }
4714
4715   // If this is a deinterleave and we can widen the vector, then we can use
4716   // vnsrl to deinterleave.
4717   if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
4718     return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
4719                                    Subtarget, DAG);
4720   }
4721
4722   if (SDValue V =
4723           lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
4724     return V;
4725
4726   // Detect an interleave shuffle and lower to
4727   // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4728   int EvenSrc, OddSrc;
4729   if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
4730     // Extract the halves of the vectors.
4731     MVT HalfVT = VT.getHalfNumVectorElementsVT();
4732
4733     int Size = Mask.size();
4734     SDValue EvenV, OddV;
4735     assert(EvenSrc >= 0 && "Undef source?");
4736     EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
4737     EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
4738                         DAG.getConstant(EvenSrc % Size, DL, XLenVT));
4739
4740     assert(OddSrc >= 0 && "Undef source?");
4741     OddV = (OddSrc / Size) == 0 ? V1 : V2;
4742     OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
4743                        DAG.getConstant(OddSrc % Size, DL, XLenVT));
4744
4745     return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
4746   }
4747
4748   // Detect shuffles which can be re-expressed as vector selects; these are
4749   // shuffles in which each element in the destination is taken from an element
4750   // at the corresponding index in either source vectors.
4751   bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4752     int MaskIndex = MaskIdx.value();
4753     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4754   });
4755
4756   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4757
4758   SmallVector<SDValue> MaskVals;
4759   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4760   // merged with a second vrgather.
4761   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4762
4763   // By default we preserve the original operand order, and use a mask to
4764   // select LHS as true and RHS as false. However, since RVV vector selects may
4765   // feature splats but only on the LHS, we may choose to invert our mask and
4766   // instead select between RHS and LHS.
4767   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
4768   bool InvertMask = IsSelect == SwapOps;
4769
4770   // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4771   // half.
4772   DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4773
4774   // Now construct the mask that will be used by the vselect or blended
4775   // vrgather operation. For vrgathers, construct the appropriate indices into
4776   // each vector.
4777   for (int MaskIndex : Mask) {
4778     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
4779     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4780     if (!IsSelect) {
4781       bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4782       GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4783                                      ? DAG.getConstant(MaskIndex, DL, XLenVT)
4784                                      : DAG.getUNDEF(XLenVT));
4785       GatherIndicesRHS.push_back(
4786           IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4787                             : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4788       if (IsLHSOrUndefIndex && MaskIndex >= 0)
4789         ++LHSIndexCounts[MaskIndex];
4790       if (!IsLHSOrUndefIndex)
4791         ++RHSIndexCounts[MaskIndex - NumElts];
4792     }
4793   }
4794
4795   if (SwapOps) {
4796     std::swap(V1, V2);
4797     std::swap(GatherIndicesLHS, GatherIndicesRHS);
4798   }
4799
4800   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4801   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4802   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4803
4804   if (IsSelect)
4805     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4806
4807   // We might be able to express the shuffle as a bitrotate. But even if we
4808   // don't have Zvkb and have to expand, the expanded sequence of approx. 2
4809   // shifts and a vor will have a higher throughput than a vrgather.
4810   if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4811     return V;
4812
4813   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
4814     // On such a large vector we're unable to use i8 as the index type.
4815     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4816     // may involve vector splitting if we're already at LMUL=8, or our
4817     // user-supplied maximum fixed-length LMUL.
4818     return SDValue();
4819   }
4820
4821   unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
4822   unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
4823   MVT IndexVT = VT.changeTypeToInteger();
4824   // Since we can't introduce illegal index types at this stage, use i16 and
4825   // vrgatherei16 if the corresponding index type for plain vrgather is greater
4826   // than XLenVT.
4827   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
4828     GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4829     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4830   }
4831
4832   // If the mask allows, we can do all the index computation in 16 bits.  This
4833   // requires less work and less register pressure at high LMUL, and creates
4834   // smaller constants which may be cheaper to materialize.
4835   if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
4836       (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
4837     GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4838     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4839   }
4840
4841   MVT IndexContainerVT =
4842       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
4843
4844   SDValue Gather;
4845   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
4846   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
4847   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
4848     Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
4849                               Subtarget);
4850   } else {
4851     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4852     // If only one index is used, we can use a "splat" vrgather.
4853     // TODO: We can splat the most-common index and fix-up any stragglers, if
4854     // that's beneficial.
4855     if (LHSIndexCounts.size() == 1) {
4856       int SplatIndex = LHSIndexCounts.begin()->getFirst();
4857       Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
4858                            DAG.getConstant(SplatIndex, DL, XLenVT),
4859                            DAG.getUNDEF(ContainerVT), TrueMask, VL);
4860     } else {
4861       SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
4862       LHSIndices =
4863           convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
4864
4865       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
4866                            DAG.getUNDEF(ContainerVT), TrueMask, VL);
4867     }
4868   }
4869
4870   // If a second vector operand is used by this shuffle, blend it in with an
4871   // additional vrgather.
4872   if (!V2.isUndef()) {
4873     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4874
4875     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4876     SelectMask =
4877         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
4878
4879     // If only one index is used, we can use a "splat" vrgather.
4880     // TODO: We can splat the most-common index and fix-up any stragglers, if
4881     // that's beneficial.
4882     if (RHSIndexCounts.size() == 1) {
4883       int SplatIndex = RHSIndexCounts.begin()->getFirst();
4884       Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
4885                            DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
4886                            SelectMask, VL);
4887     } else {
4888       SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
4889       RHSIndices =
4890           convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
4891       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
4892                            SelectMask, VL);
4893     }
4894   }
4895
4896   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4897 }
4898
4899 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
4900   // Support splats for any type. These should type legalize well.
4901   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
4902     return true;
4903
4904   // Only support legal VTs for other shuffles for now.
4905   if (!isTypeLegal(VT))
4906     return false;
4907
4908   MVT SVT = VT.getSimpleVT();
4909
4910   // Not for i1 vectors.
4911   if (SVT.getScalarType() == MVT::i1)
4912     return false;
4913
4914   int Dummy1, Dummy2;
4915   return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
4916          isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
4917 }
4918
4919 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
4920 // the exponent.
4921 SDValue
4922 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
4923                                                SelectionDAG &DAG) const {
4924   MVT VT = Op.getSimpleValueType();
4925   unsigned EltSize = VT.getScalarSizeInBits();
4926   SDValue Src = Op.getOperand(0);
4927   SDLoc DL(Op);
4928   MVT ContainerVT = VT;
4929
4930   SDValue Mask, VL;
4931   if (Op->isVPOpcode()) {
4932     Mask = Op.getOperand(1);
4933     if (VT.isFixedLengthVector())
4934       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
4935                                      Subtarget);
4936     VL = Op.getOperand(2);
4937   }
4938
4939   // We choose FP type that can represent the value if possible. Otherwise, we
4940   // use rounding to zero conversion for correct exponent of the result.
4941   // TODO: Use f16 for i8 when possible?
4942   MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
4943   if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
4944     FloatEltVT = MVT::f32;
4945   MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
4946
4947   // Legal types should have been checked in the RISCVTargetLowering
4948   // constructor.
4949   // TODO: Splitting may make sense in some cases.
4950   assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
4951          "Expected legal float type!");
4952
4953   // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
4954   // The trailing zero count is equal to log2 of this single bit value.
4955   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
4956     SDValue Neg = DAG.getNegative(Src, DL, VT);
4957     Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
4958   } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
4959     SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
4960                               Src, Mask, VL);
4961     Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
4962   }
4963
4964   // We have a legal FP type, convert to it.
4965   SDValue FloatVal;
4966   if (FloatVT.bitsGT(VT)) {
4967     if (Op->isVPOpcode())
4968       FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
4969     else
4970       FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
4971   } else {
4972     // Use RTZ to avoid rounding influencing exponent of FloatVal.
4973     if (VT.isFixedLengthVector()) {
4974       ContainerVT = getContainerForFixedLengthVector(VT);
4975       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4976     }
4977     if (!Op->isVPOpcode())
4978       std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4979     SDValue RTZRM =
4980         DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
4981     MVT ContainerFloatVT =
4982         MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
4983     FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
4984                            Src, Mask, RTZRM, VL);
4985     if (VT.isFixedLengthVector())
4986       FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
4987   }
4988   // Bitcast to integer and shift the exponent to the LSB.
4989   EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
4990   SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
4991   unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
4992
4993   SDValue Exp;
4994   // Restore back to original type. Truncation after SRL is to generate vnsrl.
4995   if (Op->isVPOpcode()) {
4996     Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
4997                       DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
4998     Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
4999   } else {
5000     Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5001                       DAG.getConstant(ShiftAmt, DL, IntVT));
5002     if (IntVT.bitsLT(VT))
5003       Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5004     else if (IntVT.bitsGT(VT))
5005       Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5006   }
5007
5008   // The exponent contains log2 of the value in biased form.
5009   unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5010   // For trailing zeros, we just need to subtract the bias.
5011   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5012     return DAG.getNode(ISD::SUB, DL, VT, Exp,
5013                        DAG.getConstant(ExponentBias, DL, VT));
5014   if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5015     return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5016                        DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5017
5018   // For leading zeros, we need to remove the bias and convert from log2 to
5019   // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5020   unsigned Adjust = ExponentBias + (EltSize - 1);
5021   SDValue Res;
5022   if (Op->isVPOpcode())
5023     Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5024                       Mask, VL);
5025   else
5026     Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5027
5028   // The above result with zero input equals to Adjust which is greater than
5029   // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5030   if (Op.getOpcode() == ISD::CTLZ)
5031     Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5032   else if (Op.getOpcode() == ISD::VP_CTLZ)
5033     Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5034                       DAG.getConstant(EltSize, DL, VT), Mask, VL);
5035   return Res;
5036 }
5037
5038 // While RVV has alignment restrictions, we should always be able to load as a
5039 // legal equivalently-sized byte-typed vector instead. This method is
5040 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5041 // the load is already correctly-aligned, it returns SDValue().
5042 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5043                                                     SelectionDAG &DAG) const {
5044   auto *Load = cast<LoadSDNode>(Op);
5045   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5046
5047   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5048                                      Load->getMemoryVT(),
5049                                      *Load->getMemOperand()))
5050     return SDValue();
5051
5052   SDLoc DL(Op);
5053   MVT VT = Op.getSimpleValueType();
5054   unsigned EltSizeBits = VT.getScalarSizeInBits();
5055   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5056          "Unexpected unaligned RVV load type");
5057   MVT NewVT =
5058       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5059   assert(NewVT.isValid() &&
5060          "Expecting equally-sized RVV vector types to be legal");
5061   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5062                           Load->getPointerInfo(), Load->getOriginalAlign(),
5063                           Load->getMemOperand()->getFlags());
5064   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5065 }
5066
5067 // While RVV has alignment restrictions, we should always be able to store as a
5068 // legal equivalently-sized byte-typed vector instead. This method is
5069 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5070 // returns SDValue() if the store is already correctly aligned.
5071 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5072                                                      SelectionDAG &DAG) const {
5073   auto *Store = cast<StoreSDNode>(Op);
5074   assert(Store && Store->getValue().getValueType().isVector() &&
5075          "Expected vector store");
5076
5077   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5078                                      Store->getMemoryVT(),
5079                                      *Store->getMemOperand()))
5080     return SDValue();
5081
5082   SDLoc DL(Op);
5083   SDValue StoredVal = Store->getValue();
5084   MVT VT = StoredVal.getSimpleValueType();
5085   unsigned EltSizeBits = VT.getScalarSizeInBits();
5086   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5087          "Unexpected unaligned RVV store type");
5088   MVT NewVT =
5089       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5090   assert(NewVT.isValid() &&
5091          "Expecting equally-sized RVV vector types to be legal");
5092   StoredVal = DAG.getBitcast(NewVT, StoredVal);
5093   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5094                       Store->getPointerInfo(), Store->getOriginalAlign(),
5095                       Store->getMemOperand()->getFlags());
5096 }
5097
5098 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
5099                              const RISCVSubtarget &Subtarget) {
5100   assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5101
5102   int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5103
5104   // All simm32 constants should be handled by isel.
5105   // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5106   // this check redundant, but small immediates are common so this check
5107   // should have better compile time.
5108   if (isInt<32>(Imm))
5109     return Op;
5110
5111   // We only need to cost the immediate, if constant pool lowering is enabled.
5112   if (!Subtarget.useConstantPoolForLargeInts())
5113     return Op;
5114
5115   RISCVMatInt::InstSeq Seq =
5116       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
5117   if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5118     return Op;
5119
5120   // Optimizations below are disabled for opt size. If we're optimizing for
5121   // size, use a constant pool.
5122   if (DAG.shouldOptForSize())
5123     return SDValue();
5124
5125   // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5126   // that if it will avoid a constant pool.
5127   // It will require an extra temporary register though.
5128   // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5129   // low and high 32 bits are the same and bit 31 and 63 are set.
5130   unsigned ShiftAmt, AddOpc;
5131   RISCVMatInt::InstSeq SeqLo = RISCVMatInt::generateTwoRegInstSeq(
5132       Imm, Subtarget.getFeatureBits(), ShiftAmt, AddOpc);
5133   if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5134     return Op;
5135
5136   return SDValue();
5137 }
5138
5139 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
5140                                  const RISCVSubtarget &Subtarget) {
5141   SDLoc dl(Op);
5142   AtomicOrdering FenceOrdering =
5143       static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5144   SyncScope::ID FenceSSID =
5145       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5146
5147   if (Subtarget.hasStdExtZtso()) {
5148     // The only fence that needs an instruction is a sequentially-consistent
5149     // cross-thread fence.
5150     if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5151         FenceSSID == SyncScope::System)
5152       return Op;
5153
5154     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5155     return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5156   }
5157
5158   // singlethread fences only synchronize with signal handlers on the same
5159   // thread and thus only need to preserve instruction order, not actually
5160   // enforce memory ordering.
5161   if (FenceSSID == SyncScope::SingleThread)
5162     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5163     return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5164
5165   return Op;
5166 }
5167
5168 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5169                                              SelectionDAG &DAG) const {
5170   SDLoc DL(Op);
5171   MVT VT = Op.getSimpleValueType();
5172   MVT XLenVT = Subtarget.getXLenVT();
5173   unsigned Check = Op.getConstantOperandVal(1);
5174   unsigned TDCMask = 0;
5175   if (Check & fcSNan)
5176     TDCMask |= RISCV::FPMASK_Signaling_NaN;
5177   if (Check & fcQNan)
5178     TDCMask |= RISCV::FPMASK_Quiet_NaN;
5179   if (Check & fcPosInf)
5180     TDCMask |= RISCV::FPMASK_Positive_Infinity;
5181   if (Check & fcNegInf)
5182     TDCMask |= RISCV::FPMASK_Negative_Infinity;
5183   if (Check & fcPosNormal)
5184     TDCMask |= RISCV::FPMASK_Positive_Normal;
5185   if (Check & fcNegNormal)
5186     TDCMask |= RISCV::FPMASK_Negative_Normal;
5187   if (Check & fcPosSubnormal)
5188     TDCMask |= RISCV::FPMASK_Positive_Subnormal;
5189   if (Check & fcNegSubnormal)
5190     TDCMask |= RISCV::FPMASK_Negative_Subnormal;
5191   if (Check & fcPosZero)
5192     TDCMask |= RISCV::FPMASK_Positive_Zero;
5193   if (Check & fcNegZero)
5194     TDCMask |= RISCV::FPMASK_Negative_Zero;
5195
5196   bool IsOneBitMask = isPowerOf2_32(TDCMask);
5197
5198   SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5199
5200   if (VT.isVector()) {
5201     SDValue Op0 = Op.getOperand(0);
5202     MVT VT0 = Op.getOperand(0).getSimpleValueType();
5203
5204     if (VT.isScalableVector()) {
5205       MVT DstVT = VT0.changeVectorElementTypeToInteger();
5206       auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5207       if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5208         Mask = Op.getOperand(2);
5209         VL = Op.getOperand(3);
5210       }
5211       SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5212                                     VL, Op->getFlags());
5213       if (IsOneBitMask)
5214         return DAG.getSetCC(DL, VT, FPCLASS,
5215                             DAG.getConstant(TDCMask, DL, DstVT),
5216                             ISD::CondCode::SETEQ);
5217       SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5218                                 DAG.getConstant(TDCMask, DL, DstVT));
5219       return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5220                           ISD::SETNE);
5221     }
5222
5223     MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5224     MVT ContainerVT = getContainerForFixedLengthVector(VT);
5225     MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5226     auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5227     if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5228       Mask = Op.getOperand(2);
5229       MVT MaskContainerVT =
5230           getContainerForFixedLengthVector(Mask.getSimpleValueType());
5231       Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5232       VL = Op.getOperand(3);
5233     }
5234     Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5235
5236     SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5237                                   Mask, VL, Op->getFlags());
5238
5239     TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5240                            DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5241     if (IsOneBitMask) {
5242       SDValue VMSEQ =
5243           DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5244                       {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5245                        DAG.getUNDEF(ContainerVT), Mask, VL});
5246       return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5247     }
5248     SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5249                               TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5250
5251     SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5252     SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5253                             DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5254
5255     SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5256                                 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5257                                  DAG.getUNDEF(ContainerVT), Mask, VL});
5258     return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5259   }
5260
5261   SDValue FPCLASS =
5262       DAG.getNode(RISCVISD::FPCLASS, DL, XLenVT, Op.getOperand(0));
5263   SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FPCLASS, TDCMaskV);
5264   SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5265                              ISD::CondCode::SETNE);
5266   return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5267 }
5268
5269 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5270 // operations propagate nans.
5271 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
5272                                       const RISCVSubtarget &Subtarget) {
5273   SDLoc DL(Op);
5274   MVT VT = Op.getSimpleValueType();
5275
5276   SDValue X = Op.getOperand(0);
5277   SDValue Y = Op.getOperand(1);
5278
5279   if (!VT.isVector()) {
5280     MVT XLenVT = Subtarget.getXLenVT();
5281
5282     // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5283     // ensures that when one input is a nan, the other will also be a nan
5284     // allowing the nan to propagate. If both inputs are nan, this will swap the
5285     // inputs which is harmless.
5286
5287     SDValue NewY = Y;
5288     if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5289       SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5290       NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5291     }
5292
5293     SDValue NewX = X;
5294     if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5295       SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5296       NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5297     }
5298
5299     unsigned Opc =
5300         Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5301     return DAG.getNode(Opc, DL, VT, NewX, NewY);
5302   }
5303
5304   // Check no NaNs before converting to fixed vector scalable.
5305   bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5306   bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5307
5308   MVT ContainerVT = VT;
5309   if (VT.isFixedLengthVector()) {
5310     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5311     X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5312     Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5313   }
5314
5315   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5316
5317   SDValue NewY = Y;
5318   if (!XIsNeverNan) {
5319     SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5320                                     {X, X, DAG.getCondCode(ISD::SETOEQ),
5321                                      DAG.getUNDEF(ContainerVT), Mask, VL});
5322     NewY =
5323         DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, XIsNonNan, Y, X, VL);
5324   }
5325
5326   SDValue NewX = X;
5327   if (!YIsNeverNan) {
5328     SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5329                                     {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5330                                      DAG.getUNDEF(ContainerVT), Mask, VL});
5331     NewX =
5332         DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, YIsNonNan, X, Y, VL);
5333   }
5334
5335   unsigned Opc =
5336       Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;
5337   SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5338                             DAG.getUNDEF(ContainerVT), Mask, VL);
5339   if (VT.isFixedLengthVector())
5340     Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5341   return Res;
5342 }
5343
5344 /// Get a RISC-V target specified VL op for a given SDNode.
5345 static unsigned getRISCVVLOp(SDValue Op) {
5346 #define OP_CASE(NODE)                                                          \
5347   case ISD::NODE:                                                              \
5348     return RISCVISD::NODE##_VL;
5349 #define VP_CASE(NODE)                                                          \
5350   case ISD::VP_##NODE:                                                         \
5351     return RISCVISD::NODE##_VL;
5352   // clang-format off
5353   switch (Op.getOpcode()) {
5354   default:
5355     llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5356   OP_CASE(ADD)
5357   OP_CASE(SUB)
5358   OP_CASE(MUL)
5359   OP_CASE(MULHS)
5360   OP_CASE(MULHU)
5361   OP_CASE(SDIV)
5362   OP_CASE(SREM)
5363   OP_CASE(UDIV)
5364   OP_CASE(UREM)
5365   OP_CASE(SHL)
5366   OP_CASE(SRA)
5367   OP_CASE(SRL)
5368   OP_CASE(ROTL)
5369   OP_CASE(ROTR)
5370   OP_CASE(BSWAP)
5371   OP_CASE(CTTZ)
5372   OP_CASE(CTLZ)
5373   OP_CASE(CTPOP)
5374   OP_CASE(BITREVERSE)
5375   OP_CASE(SADDSAT)
5376   OP_CASE(UADDSAT)
5377   OP_CASE(SSUBSAT)
5378   OP_CASE(USUBSAT)
5379   OP_CASE(FADD)
5380   OP_CASE(FSUB)
5381   OP_CASE(FMUL)
5382   OP_CASE(FDIV)
5383   OP_CASE(FNEG)
5384   OP_CASE(FABS)
5385   OP_CASE(FSQRT)
5386   OP_CASE(SMIN)
5387   OP_CASE(SMAX)
5388   OP_CASE(UMIN)
5389   OP_CASE(UMAX)
5390   OP_CASE(STRICT_FADD)
5391   OP_CASE(STRICT_FSUB)
5392   OP_CASE(STRICT_FMUL)
5393   OP_CASE(STRICT_FDIV)
5394   OP_CASE(STRICT_FSQRT)
5395   VP_CASE(ADD)        // VP_ADD
5396   VP_CASE(SUB)        // VP_SUB
5397   VP_CASE(MUL)        // VP_MUL
5398   VP_CASE(SDIV)       // VP_SDIV
5399   VP_CASE(SREM)       // VP_SREM
5400   VP_CASE(UDIV)       // VP_UDIV
5401   VP_CASE(UREM)       // VP_UREM
5402   VP_CASE(SHL)        // VP_SHL
5403   VP_CASE(FADD)       // VP_FADD
5404   VP_CASE(FSUB)       // VP_FSUB
5405   VP_CASE(FMUL)       // VP_FMUL
5406   VP_CASE(FDIV)       // VP_FDIV
5407   VP_CASE(FNEG)       // VP_FNEG
5408   VP_CASE(FABS)       // VP_FABS
5409   VP_CASE(SMIN)       // VP_SMIN
5410   VP_CASE(SMAX)       // VP_SMAX
5411   VP_CASE(UMIN)       // VP_UMIN
5412   VP_CASE(UMAX)       // VP_UMAX
5413   VP_CASE(FCOPYSIGN)  // VP_FCOPYSIGN
5414   VP_CASE(SETCC)      // VP_SETCC
5415   VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5416   VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5417   VP_CASE(BITREVERSE) // VP_BITREVERSE
5418   VP_CASE(BSWAP)      // VP_BSWAP
5419   VP_CASE(CTLZ)       // VP_CTLZ
5420   VP_CASE(CTTZ)       // VP_CTTZ
5421   VP_CASE(CTPOP)      // VP_CTPOP
5422   case ISD::CTLZ_ZERO_UNDEF:
5423   case ISD::VP_CTLZ_ZERO_UNDEF:
5424     return RISCVISD::CTLZ_VL;
5425   case ISD::CTTZ_ZERO_UNDEF:
5426   case ISD::VP_CTTZ_ZERO_UNDEF:
5427     return RISCVISD::CTTZ_VL;
5428   case ISD::FMA:
5429   case ISD::VP_FMA:
5430     return RISCVISD::VFMADD_VL;
5431   case ISD::STRICT_FMA:
5432     return RISCVISD::STRICT_VFMADD_VL;
5433   case ISD::AND:
5434   case ISD::VP_AND:
5435     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5436       return RISCVISD::VMAND_VL;
5437     return RISCVISD::AND_VL;
5438   case ISD::OR:
5439   case ISD::VP_OR:
5440     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5441       return RISCVISD::VMOR_VL;
5442     return RISCVISD::OR_VL;
5443   case ISD::XOR:
5444   case ISD::VP_XOR:
5445     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5446       return RISCVISD::VMXOR_VL;
5447     return RISCVISD::XOR_VL;
5448   case ISD::VP_SELECT:
5449     return RISCVISD::VSELECT_VL;
5450   case ISD::VP_MERGE:
5451     return RISCVISD::VP_MERGE_VL;
5452   case ISD::VP_ASHR:
5453     return RISCVISD::SRA_VL;
5454   case ISD::VP_LSHR:
5455     return RISCVISD::SRL_VL;
5456   case ISD::VP_SQRT:
5457     return RISCVISD::FSQRT_VL;
5458   case ISD::VP_SIGN_EXTEND:
5459     return RISCVISD::VSEXT_VL;
5460   case ISD::VP_ZERO_EXTEND:
5461     return RISCVISD::VZEXT_VL;
5462   case ISD::VP_FP_TO_SINT:
5463     return RISCVISD::VFCVT_RTZ_X_F_VL;
5464   case ISD::VP_FP_TO_UINT:
5465     return RISCVISD::VFCVT_RTZ_XU_F_VL;
5466   case ISD::FMINNUM:
5467   case ISD::VP_FMINNUM:
5468     return RISCVISD::VFMIN_VL;
5469   case ISD::FMAXNUM:
5470   case ISD::VP_FMAXNUM:
5471     return RISCVISD::VFMAX_VL;
5472   }
5473   // clang-format on
5474 #undef OP_CASE
5475 #undef VP_CASE
5476 }
5477
5478 /// Return true if a RISC-V target specified op has a merge operand.
5479 static bool hasMergeOp(unsigned Opcode) {
5480   assert(Opcode > RISCVISD::FIRST_NUMBER &&
5481          Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5482          "not a RISC-V target specific op");
5483   static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5484                     125 &&
5485                 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5486                         ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5487                     21 &&
5488                 "adding target specific op should update this function");
5489   if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5490     return true;
5491   if (Opcode == RISCVISD::FCOPYSIGN_VL)
5492     return true;
5493   if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5494     return true;
5495   if (Opcode == RISCVISD::SETCC_VL)
5496     return true;
5497   if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5498     return true;
5499   return false;
5500 }
5501
5502 /// Return true if a RISC-V target specified op has a mask operand.
5503 static bool hasMaskOp(unsigned Opcode) {
5504   assert(Opcode > RISCVISD::FIRST_NUMBER &&
5505          Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5506          "not a RISC-V target specific op");
5507   static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5508                     125 &&
5509                 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5510                         ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5511                     21 &&
5512                 "adding target specific op should update this function");
5513   if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5514     return true;
5515   if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5516     return true;
5517   if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5518       Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
5519     return true;
5520   return false;
5521 }
5522
5523 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
5524   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5525   SDLoc DL(Op);
5526
5527   SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5528   SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5529
5530   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5531     if (!Op.getOperand(j).getValueType().isVector()) {
5532       LoOperands[j] = Op.getOperand(j);
5533       HiOperands[j] = Op.getOperand(j);
5534       continue;
5535     }
5536     std::tie(LoOperands[j], HiOperands[j]) =
5537         DAG.SplitVector(Op.getOperand(j), DL);
5538   }
5539
5540   SDValue LoRes =
5541       DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5542   SDValue HiRes =
5543       DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5544
5545   return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5546 }
5547
5548 static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
5549   assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5550   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5551   SDLoc DL(Op);
5552
5553   SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5554   SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5555
5556   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5557     if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5558       std::tie(LoOperands[j], HiOperands[j]) =
5559           DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5560       continue;
5561     }
5562     if (!Op.getOperand(j).getValueType().isVector()) {
5563       LoOperands[j] = Op.getOperand(j);
5564       HiOperands[j] = Op.getOperand(j);
5565       continue;
5566     }
5567     std::tie(LoOperands[j], HiOperands[j]) =
5568         DAG.SplitVector(Op.getOperand(j), DL);
5569   }
5570
5571   SDValue LoRes =
5572       DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5573   SDValue HiRes =
5574       DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5575
5576   return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5577 }
5578
5579 static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
5580   SDLoc DL(Op);
5581
5582   auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5583   auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5584   auto [EVLLo, EVLHi] =
5585       DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5586
5587   SDValue ResLo =
5588       DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5589                   {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5590   return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5591                      {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5592 }
5593
5594 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
5595                                             SelectionDAG &DAG) const {
5596   switch (Op.getOpcode()) {
5597   default:
5598     report_fatal_error("unimplemented operand");
5599   case ISD::ATOMIC_FENCE:
5600     return LowerATOMIC_FENCE(Op, DAG, Subtarget);
5601   case ISD::GlobalAddress:
5602     return lowerGlobalAddress(Op, DAG);
5603   case ISD::BlockAddress:
5604     return lowerBlockAddress(Op, DAG);
5605   case ISD::ConstantPool:
5606     return lowerConstantPool(Op, DAG);
5607   case ISD::JumpTable:
5608     return lowerJumpTable(Op, DAG);
5609   case ISD::GlobalTLSAddress:
5610     return lowerGlobalTLSAddress(Op, DAG);
5611   case ISD::Constant:
5612     return lowerConstant(Op, DAG, Subtarget);
5613   case ISD::SELECT:
5614     return lowerSELECT(Op, DAG);
5615   case ISD::BRCOND:
5616     return lowerBRCOND(Op, DAG);
5617   case ISD::VASTART:
5618     return lowerVASTART(Op, DAG);
5619   case ISD::FRAMEADDR:
5620     return lowerFRAMEADDR(Op, DAG);
5621   case ISD::RETURNADDR:
5622     return lowerRETURNADDR(Op, DAG);
5623   case ISD::SHL_PARTS:
5624     return lowerShiftLeftParts(Op, DAG);
5625   case ISD::SRA_PARTS:
5626     return lowerShiftRightParts(Op, DAG, true);
5627   case ISD::SRL_PARTS:
5628     return lowerShiftRightParts(Op, DAG, false);
5629   case ISD::ROTL:
5630   case ISD::ROTR:
5631     if (Op.getValueType().isFixedLengthVector()) {
5632       assert(Subtarget.hasStdExtZvkb());
5633       return lowerToScalableOp(Op, DAG);
5634     }
5635     assert(Subtarget.hasVendorXTHeadBb() &&
5636            !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
5637            "Unexpected custom legalization");
5638     // XTHeadBb only supports rotate by constant.
5639     if (!isa<ConstantSDNode>(Op.getOperand(1)))
5640       return SDValue();
5641     return Op;
5642   case ISD::BITCAST: {
5643     SDLoc DL(Op);
5644     EVT VT = Op.getValueType();
5645     SDValue Op0 = Op.getOperand(0);
5646     EVT Op0VT = Op0.getValueType();
5647     MVT XLenVT = Subtarget.getXLenVT();
5648     if (VT == MVT::f16 && Op0VT == MVT::i16 &&
5649         Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
5650       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
5651       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
5652       return FPConv;
5653     }
5654     if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
5655         Subtarget.hasStdExtZfbfmin()) {
5656       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
5657       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
5658       return FPConv;
5659     }
5660     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
5661         Subtarget.hasStdExtFOrZfinx()) {
5662       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5663       SDValue FPConv =
5664           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
5665       return FPConv;
5666     }
5667     if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
5668         Subtarget.hasStdExtZfa()) {
5669       SDValue Lo, Hi;
5670       std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
5671       SDValue RetReg =
5672           DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
5673       return RetReg;
5674     }
5675
5676     // Consider other scalar<->scalar casts as legal if the types are legal.
5677     // Otherwise expand them.
5678     if (!VT.isVector() && !Op0VT.isVector()) {
5679       if (isTypeLegal(VT) && isTypeLegal(Op0VT))
5680         return Op;
5681       return SDValue();
5682     }
5683
5684     assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
5685            "Unexpected types");
5686
5687     if (VT.isFixedLengthVector()) {
5688       // We can handle fixed length vector bitcasts with a simple replacement
5689       // in isel.
5690       if (Op0VT.isFixedLengthVector())
5691         return Op;
5692       // When bitcasting from scalar to fixed-length vector, insert the scalar
5693       // into a one-element vector of the result type, and perform a vector
5694       // bitcast.
5695       if (!Op0VT.isVector()) {
5696         EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
5697         if (!isTypeLegal(BVT))
5698           return SDValue();
5699         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
5700                                               DAG.getUNDEF(BVT), Op0,
5701                                               DAG.getConstant(0, DL, XLenVT)));
5702       }
5703       return SDValue();
5704     }
5705     // Custom-legalize bitcasts from fixed-length vector types to scalar types
5706     // thus: bitcast the vector to a one-element vector type whose element type
5707     // is the same as the result type, and extract the first element.
5708     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
5709       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
5710       if (!isTypeLegal(BVT))
5711         return SDValue();
5712       SDValue BVec = DAG.getBitcast(BVT, Op0);
5713       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
5714                          DAG.getConstant(0, DL, XLenVT));
5715     }
5716     return SDValue();
5717   }
5718   case ISD::INTRINSIC_WO_CHAIN:
5719     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
5720   case ISD::INTRINSIC_W_CHAIN:
5721     return LowerINTRINSIC_W_CHAIN(Op, DAG);
5722   case ISD::INTRINSIC_VOID:
5723     return LowerINTRINSIC_VOID(Op, DAG);
5724   case ISD::IS_FPCLASS:
5725     return LowerIS_FPCLASS(Op, DAG);
5726   case ISD::BITREVERSE: {
5727     MVT VT = Op.getSimpleValueType();
5728     if (VT.isFixedLengthVector()) {
5729       assert(Subtarget.hasStdExtZvbb());
5730       return lowerToScalableOp(Op, DAG);
5731     }
5732     SDLoc DL(Op);
5733     assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
5734     assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
5735     // Expand bitreverse to a bswap(rev8) followed by brev8.
5736     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
5737     return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
5738   }
5739   case ISD::TRUNCATE:
5740     // Only custom-lower vector truncates
5741     if (!Op.getSimpleValueType().isVector())
5742       return Op;
5743     return lowerVectorTruncLike(Op, DAG);
5744   case ISD::ANY_EXTEND:
5745   case ISD::ZERO_EXTEND:
5746     if (Op.getOperand(0).getValueType().isVector() &&
5747         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5748       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
5749     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
5750   case ISD::SIGN_EXTEND:
5751     if (Op.getOperand(0).getValueType().isVector() &&
5752         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5753       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
5754     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
5755   case ISD::SPLAT_VECTOR_PARTS:
5756     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
5757   case ISD::INSERT_VECTOR_ELT:
5758     return lowerINSERT_VECTOR_ELT(Op, DAG);
5759   case ISD::EXTRACT_VECTOR_ELT:
5760     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5761   case ISD::SCALAR_TO_VECTOR: {
5762     MVT VT = Op.getSimpleValueType();
5763     SDLoc DL(Op);
5764     SDValue Scalar = Op.getOperand(0);
5765     if (VT.getVectorElementType() == MVT::i1) {
5766       MVT WideVT = VT.changeVectorElementType(MVT::i8);
5767       SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
5768       return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
5769     }
5770     MVT ContainerVT = VT;
5771     if (VT.isFixedLengthVector())
5772       ContainerVT = getContainerForFixedLengthVector(VT);
5773     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5774     Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
5775     SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
5776                             DAG.getUNDEF(ContainerVT), Scalar, VL);
5777     if (VT.isFixedLengthVector())
5778       V = convertFromScalableVector(VT, V, DAG, Subtarget);
5779     return V;
5780   }
5781   case ISD::VSCALE: {
5782     MVT XLenVT = Subtarget.getXLenVT();
5783     MVT VT = Op.getSimpleValueType();
5784     SDLoc DL(Op);
5785     SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
5786     // We define our scalable vector types for lmul=1 to use a 64 bit known
5787     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
5788     // vscale as VLENB / 8.
5789     static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
5790     if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
5791       report_fatal_error("Support for VLEN==32 is incomplete.");
5792     // We assume VLENB is a multiple of 8. We manually choose the best shift
5793     // here because SimplifyDemandedBits isn't always able to simplify it.
5794     uint64_t Val = Op.getConstantOperandVal(0);
5795     if (isPowerOf2_64(Val)) {
5796       uint64_t Log2 = Log2_64(Val);
5797       if (Log2 < 3)
5798         Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
5799                           DAG.getConstant(3 - Log2, DL, VT));
5800       else if (Log2 > 3)
5801         Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
5802                           DAG.getConstant(Log2 - 3, DL, XLenVT));
5803     } else if ((Val % 8) == 0) {
5804       // If the multiplier is a multiple of 8, scale it down to avoid needing
5805       // to shift the VLENB value.
5806       Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
5807                         DAG.getConstant(Val / 8, DL, XLenVT));
5808     } else {
5809       SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
5810                                    DAG.getConstant(3, DL, XLenVT));
5811       Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
5812                         DAG.getConstant(Val, DL, XLenVT));
5813     }
5814     return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5815   }
5816   case ISD::FPOWI: {
5817     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
5818     // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
5819     if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
5820         Op.getOperand(1).getValueType() == MVT::i32) {
5821       SDLoc DL(Op);
5822       SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
5823       SDValue Powi =
5824           DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
5825       return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
5826                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5827     }
5828     return SDValue();
5829   }
5830   case ISD::FMAXIMUM:
5831   case ISD::FMINIMUM:
5832     if (Op.getValueType() == MVT::nxv32f16 &&
5833         (Subtarget.hasVInstructionsF16Minimal() &&
5834          !Subtarget.hasVInstructionsF16()))
5835       return SplitVectorOp(Op, DAG);
5836     return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
5837   case ISD::FP_EXTEND: {
5838     SDLoc DL(Op);
5839     EVT VT = Op.getValueType();
5840     SDValue Op0 = Op.getOperand(0);
5841     EVT Op0VT = Op0.getValueType();
5842     if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
5843       return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
5844     if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
5845       SDValue FloatVal =
5846           DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
5847       return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
5848     }
5849
5850     if (!Op.getValueType().isVector())
5851       return Op;
5852     return lowerVectorFPExtendOrRoundLike(Op, DAG);
5853   }
5854   case ISD::FP_ROUND: {
5855     SDLoc DL(Op);
5856     EVT VT = Op.getValueType();
5857     SDValue Op0 = Op.getOperand(0);
5858     EVT Op0VT = Op0.getValueType();
5859     if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
5860       return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
5861     if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
5862         Subtarget.hasStdExtDOrZdinx()) {
5863       SDValue FloatVal =
5864           DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
5865                       DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5866       return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
5867     }
5868
5869     if (!Op.getValueType().isVector())
5870       return Op;
5871     return lowerVectorFPExtendOrRoundLike(Op, DAG);
5872   }
5873   case ISD::STRICT_FP_ROUND:
5874   case ISD::STRICT_FP_EXTEND:
5875     return lowerStrictFPExtendOrRoundLike(Op, DAG);
5876   case ISD::SINT_TO_FP:
5877   case ISD::UINT_TO_FP:
5878     if (Op.getValueType().isVector() &&
5879         Op.getValueType().getScalarType() == MVT::f16 &&
5880         (Subtarget.hasVInstructionsF16Minimal() &&
5881          !Subtarget.hasVInstructionsF16())) {
5882       if (Op.getValueType() == MVT::nxv32f16)
5883         return SplitVectorOp(Op, DAG);
5884       // int -> f32
5885       SDLoc DL(Op);
5886       MVT NVT =
5887           MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
5888       SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
5889       // f32 -> f16
5890       return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
5891                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5892     }
5893     [[fallthrough]];
5894   case ISD::FP_TO_SINT:
5895   case ISD::FP_TO_UINT:
5896     if (SDValue Op1 = Op.getOperand(0);
5897         Op1.getValueType().isVector() &&
5898         Op1.getValueType().getScalarType() == MVT::f16 &&
5899         (Subtarget.hasVInstructionsF16Minimal() &&
5900          !Subtarget.hasVInstructionsF16())) {
5901       if (Op1.getValueType() == MVT::nxv32f16)
5902         return SplitVectorOp(Op, DAG);
5903       // f16 -> f32
5904       SDLoc DL(Op);
5905       MVT NVT = MVT::getVectorVT(MVT::f32,
5906                                  Op1.getValueType().getVectorElementCount());
5907       SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
5908       // f32 -> int
5909       return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
5910     }
5911     [[fallthrough]];
5912   case ISD::STRICT_FP_TO_SINT:
5913   case ISD::STRICT_FP_TO_UINT:
5914   case ISD::STRICT_SINT_TO_FP:
5915   case ISD::STRICT_UINT_TO_FP: {
5916     // RVV can only do fp<->int conversions to types half/double the size as
5917     // the source. We custom-lower any conversions that do two hops into
5918     // sequences.
5919     MVT VT = Op.getSimpleValueType();
5920     if (!VT.isVector())
5921       return Op;
5922     SDLoc DL(Op);
5923     bool IsStrict = Op->isStrictFPOpcode();
5924     SDValue Src = Op.getOperand(0 + IsStrict);
5925     MVT EltVT = VT.getVectorElementType();
5926     MVT SrcVT = Src.getSimpleValueType();
5927     MVT SrcEltVT = SrcVT.getVectorElementType();
5928     unsigned EltSize = EltVT.getSizeInBits();
5929     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
5930     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
5931            "Unexpected vector element types");
5932
5933     bool IsInt2FP = SrcEltVT.isInteger();
5934     // Widening conversions
5935     if (EltSize > (2 * SrcEltSize)) {
5936       if (IsInt2FP) {
5937         // Do a regular integer sign/zero extension then convert to float.
5938         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
5939                                       VT.getVectorElementCount());
5940         unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
5941                               Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
5942                                  ? ISD::ZERO_EXTEND
5943                                  : ISD::SIGN_EXTEND;
5944         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
5945         if (IsStrict)
5946           return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
5947                              Op.getOperand(0), Ext);
5948         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
5949       }
5950       // FP2Int
5951       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
5952       // Do one doubling fp_extend then complete the operation by converting
5953       // to int.
5954       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5955       if (IsStrict) {
5956         auto [FExt, Chain] =
5957             DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
5958         return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
5959       }
5960       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
5961       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
5962     }
5963
5964     // Narrowing conversions
5965     if (SrcEltSize > (2 * EltSize)) {
5966       if (IsInt2FP) {
5967         // One narrowing int_to_fp, then an fp_round.
5968         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
5969         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5970         if (IsStrict) {
5971           SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
5972                                        DAG.getVTList(InterimFVT, MVT::Other),
5973                                        Op.getOperand(0), Src);
5974           SDValue Chain = Int2FP.getValue(1);
5975           return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
5976         }
5977         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
5978         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
5979       }
5980       // FP2Int
5981       // One narrowing fp_to_int, then truncate the integer. If the float isn't
5982       // representable by the integer, the result is poison.
5983       MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
5984                                     VT.getVectorElementCount());
5985       if (IsStrict) {
5986         SDValue FP2Int =
5987             DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
5988                         Op.getOperand(0), Src);
5989         SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5990         return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
5991       }
5992       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
5993       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5994     }
5995
5996     // Scalable vectors can exit here. Patterns will handle equally-sized
5997     // conversions halving/doubling ones.
5998     if (!VT.isFixedLengthVector())
5999       return Op;
6000
6001     // For fixed-length vectors we lower to a custom "VL" node.
6002     unsigned RVVOpc = 0;
6003     switch (Op.getOpcode()) {
6004     default:
6005       llvm_unreachable("Impossible opcode");
6006     case ISD::FP_TO_SINT:
6007       RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
6008       break;
6009     case ISD::FP_TO_UINT:
6010       RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
6011       break;
6012     case ISD::SINT_TO_FP:
6013       RVVOpc = RISCVISD::SINT_TO_FP_VL;
6014       break;
6015     case ISD::UINT_TO_FP:
6016       RVVOpc = RISCVISD::UINT_TO_FP_VL;
6017       break;
6018     case ISD::STRICT_FP_TO_SINT:
6019       RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
6020       break;
6021     case ISD::STRICT_FP_TO_UINT:
6022       RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
6023       break;
6024     case ISD::STRICT_SINT_TO_FP:
6025       RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
6026       break;
6027     case ISD::STRICT_UINT_TO_FP:
6028       RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
6029       break;
6030     }
6031
6032     MVT ContainerVT = getContainerForFixedLengthVector(VT);
6033     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6034     assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6035            "Expected same element count");
6036
6037     auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6038
6039     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6040     if (IsStrict) {
6041       Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6042                         Op.getOperand(0), Src, Mask, VL);
6043       SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6044       return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6045     }
6046     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6047     return convertFromScalableVector(VT, Src, DAG, Subtarget);
6048   }
6049   case ISD::FP_TO_SINT_SAT:
6050   case ISD::FP_TO_UINT_SAT:
6051     return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6052   case ISD::FP_TO_BF16: {
6053     // Custom lower to ensure the libcall return is passed in an FPR on hard
6054     // float ABIs.
6055     assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6056     SDLoc DL(Op);
6057     MakeLibCallOptions CallOptions;
6058     RTLIB::Libcall LC =
6059         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6060     SDValue Res =
6061         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6062     if (Subtarget.is64Bit() && !RV64LegalI32)
6063       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6064     return DAG.getBitcast(MVT::i32, Res);
6065   }
6066   case ISD::BF16_TO_FP: {
6067     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6068     MVT VT = Op.getSimpleValueType();
6069     SDLoc DL(Op);
6070     Op = DAG.getNode(
6071         ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6072         DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6073     SDValue Res = Subtarget.is64Bit()
6074                       ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6075                       : DAG.getBitcast(MVT::f32, Op);
6076     // fp_extend if the target VT is bigger than f32.
6077     if (VT != MVT::f32)
6078       return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6079     return Res;
6080   }
6081   case ISD::FP_TO_FP16: {
6082     // Custom lower to ensure the libcall return is passed in an FPR on hard
6083     // float ABIs.
6084     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6085     SDLoc DL(Op);
6086     MakeLibCallOptions CallOptions;
6087     RTLIB::Libcall LC =
6088         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6089     SDValue Res =
6090         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6091     if (Subtarget.is64Bit() && !RV64LegalI32)
6092       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6093     return DAG.getBitcast(MVT::i32, Res);
6094   }
6095   case ISD::FP16_TO_FP: {
6096     // Custom lower to ensure the libcall argument is passed in an FPR on hard
6097     // float ABIs.
6098     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6099     SDLoc DL(Op);
6100     MakeLibCallOptions CallOptions;
6101     SDValue Arg = Subtarget.is64Bit()
6102                       ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6103                                     Op.getOperand(0))
6104                       : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6105     SDValue Res =
6106         makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6107             .first;
6108     return Res;
6109   }
6110   case ISD::FTRUNC:
6111   case ISD::FCEIL:
6112   case ISD::FFLOOR:
6113   case ISD::FNEARBYINT:
6114   case ISD::FRINT:
6115   case ISD::FROUND:
6116   case ISD::FROUNDEVEN:
6117     return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6118   case ISD::LRINT:
6119   case ISD::LLRINT:
6120     return lowerVectorXRINT(Op, DAG, Subtarget);
6121   case ISD::VECREDUCE_ADD:
6122   case ISD::VECREDUCE_UMAX:
6123   case ISD::VECREDUCE_SMAX:
6124   case ISD::VECREDUCE_UMIN:
6125   case ISD::VECREDUCE_SMIN:
6126     return lowerVECREDUCE(Op, DAG);
6127   case ISD::VECREDUCE_AND:
6128   case ISD::VECREDUCE_OR:
6129   case ISD::VECREDUCE_XOR:
6130     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6131       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6132     return lowerVECREDUCE(Op, DAG);
6133   case ISD::VECREDUCE_FADD:
6134   case ISD::VECREDUCE_SEQ_FADD:
6135   case ISD::VECREDUCE_FMIN:
6136   case ISD::VECREDUCE_FMAX:
6137     return lowerFPVECREDUCE(Op, DAG);
6138   case ISD::VP_REDUCE_ADD:
6139   case ISD::VP_REDUCE_UMAX:
6140   case ISD::VP_REDUCE_SMAX:
6141   case ISD::VP_REDUCE_UMIN:
6142   case ISD::VP_REDUCE_SMIN:
6143   case ISD::VP_REDUCE_FADD:
6144   case ISD::VP_REDUCE_SEQ_FADD:
6145   case ISD::VP_REDUCE_FMIN:
6146   case ISD::VP_REDUCE_FMAX:
6147     if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6148         (Subtarget.hasVInstructionsF16Minimal() &&
6149          !Subtarget.hasVInstructionsF16()))
6150       return SplitVectorReductionOp(Op, DAG);
6151     return lowerVPREDUCE(Op, DAG);
6152   case ISD::VP_REDUCE_AND:
6153   case ISD::VP_REDUCE_OR:
6154   case ISD::VP_REDUCE_XOR:
6155     if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6156       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6157     return lowerVPREDUCE(Op, DAG);
6158   case ISD::UNDEF: {
6159     MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6160     return convertFromScalableVector(Op.getSimpleValueType(),
6161                                      DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6162   }
6163   case ISD::INSERT_SUBVECTOR:
6164     return lowerINSERT_SUBVECTOR(Op, DAG);
6165   case ISD::EXTRACT_SUBVECTOR:
6166     return lowerEXTRACT_SUBVECTOR(Op, DAG);
6167   case ISD::VECTOR_DEINTERLEAVE:
6168     return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6169   case ISD::VECTOR_INTERLEAVE:
6170     return lowerVECTOR_INTERLEAVE(Op, DAG);
6171   case ISD::STEP_VECTOR:
6172     return lowerSTEP_VECTOR(Op, DAG);
6173   case ISD::VECTOR_REVERSE:
6174     return lowerVECTOR_REVERSE(Op, DAG);
6175   case ISD::VECTOR_SPLICE:
6176     return lowerVECTOR_SPLICE(Op, DAG);
6177   case ISD::BUILD_VECTOR:
6178     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6179   case ISD::SPLAT_VECTOR:
6180     if (Op.getValueType().getScalarType() == MVT::f16 &&
6181         (Subtarget.hasVInstructionsF16Minimal() &&
6182          !Subtarget.hasVInstructionsF16())) {
6183       if (Op.getValueType() == MVT::nxv32f16)
6184         return SplitVectorOp(Op, DAG);
6185       SDLoc DL(Op);
6186       SDValue NewScalar =
6187           DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6188       SDValue NewSplat = DAG.getNode(
6189           ISD::SPLAT_VECTOR, DL,
6190           MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6191           NewScalar);
6192       return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6193                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6194     }
6195     if (Op.getValueType().getVectorElementType() == MVT::i1)
6196       return lowerVectorMaskSplat(Op, DAG);
6197     return SDValue();
6198   case ISD::VECTOR_SHUFFLE:
6199     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6200   case ISD::CONCAT_VECTORS: {
6201     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6202     // better than going through the stack, as the default expansion does.
6203     SDLoc DL(Op);
6204     MVT VT = Op.getSimpleValueType();
6205     unsigned NumOpElts =
6206         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6207     SDValue Vec = DAG.getUNDEF(VT);
6208     for (const auto &OpIdx : enumerate(Op->ops())) {
6209       SDValue SubVec = OpIdx.value();
6210       // Don't insert undef subvectors.
6211       if (SubVec.isUndef())
6212         continue;
6213       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6214                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
6215     }
6216     return Vec;
6217   }
6218   case ISD::LOAD:
6219     if (auto V = expandUnalignedRVVLoad(Op, DAG))
6220       return V;
6221     if (Op.getValueType().isFixedLengthVector())
6222       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6223     return Op;
6224   case ISD::STORE:
6225     if (auto V = expandUnalignedRVVStore(Op, DAG))
6226       return V;
6227     if (Op.getOperand(1).getValueType().isFixedLengthVector())
6228       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6229     return Op;
6230   case ISD::MLOAD:
6231   case ISD::VP_LOAD:
6232     return lowerMaskedLoad(Op, DAG);
6233   case ISD::MSTORE:
6234   case ISD::VP_STORE:
6235     return lowerMaskedStore(Op, DAG);
6236   case ISD::SELECT_CC: {
6237     // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6238     // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6239     // into separate SETCC+SELECT just like LegalizeDAG.
6240     SDValue Tmp1 = Op.getOperand(0);
6241     SDValue Tmp2 = Op.getOperand(1);
6242     SDValue True = Op.getOperand(2);
6243     SDValue False = Op.getOperand(3);
6244     EVT VT = Op.getValueType();
6245     SDValue CC = Op.getOperand(4);
6246     EVT CmpVT = Tmp1.getValueType();
6247     EVT CCVT =
6248         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6249     SDLoc DL(Op);
6250     SDValue Cond =
6251         DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6252     return DAG.getSelect(DL, VT, Cond, True, False);
6253   }
6254   case ISD::SETCC: {
6255     MVT OpVT = Op.getOperand(0).getSimpleValueType();
6256     if (OpVT.isScalarInteger()) {
6257       MVT VT = Op.getSimpleValueType();
6258       SDValue LHS = Op.getOperand(0);
6259       SDValue RHS = Op.getOperand(1);
6260       ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6261       assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6262              "Unexpected CondCode");
6263
6264       SDLoc DL(Op);
6265
6266       // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6267       // convert this to the equivalent of (set(u)ge X, C+1) by using
6268       // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6269       // in a register.
6270       if (isa<ConstantSDNode>(RHS)) {
6271         int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6272         if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6273           // If this is an unsigned compare and the constant is -1, incrementing
6274           // the constant would change behavior. The result should be false.
6275           if (CCVal == ISD::SETUGT && Imm == -1)
6276             return DAG.getConstant(0, DL, VT);
6277           // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6278           CCVal = ISD::getSetCCSwappedOperands(CCVal);
6279           SDValue SetCC = DAG.getSetCC(
6280               DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6281           return DAG.getLogicalNOT(DL, SetCC, VT);
6282         }
6283       }
6284
6285       // Not a constant we could handle, swap the operands and condition code to
6286       // SETLT/SETULT.
6287       CCVal = ISD::getSetCCSwappedOperands(CCVal);
6288       return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6289     }
6290
6291     if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6292         (Subtarget.hasVInstructionsF16Minimal() &&
6293          !Subtarget.hasVInstructionsF16()))
6294       return SplitVectorOp(Op, DAG);
6295
6296     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6297   }
6298   case ISD::ADD:
6299   case ISD::SUB:
6300   case ISD::MUL:
6301   case ISD::MULHS:
6302   case ISD::MULHU:
6303   case ISD::AND:
6304   case ISD::OR:
6305   case ISD::XOR:
6306   case ISD::SDIV:
6307   case ISD::SREM:
6308   case ISD::UDIV:
6309   case ISD::UREM:
6310   case ISD::BSWAP:
6311   case ISD::CTPOP:
6312     return lowerToScalableOp(Op, DAG);
6313   case ISD::SHL:
6314   case ISD::SRA:
6315   case ISD::SRL:
6316     if (Op.getSimpleValueType().isFixedLengthVector())
6317       return lowerToScalableOp(Op, DAG);
6318     // This can be called for an i32 shift amount that needs to be promoted.
6319     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6320            "Unexpected custom legalisation");
6321     return SDValue();
6322   case ISD::FADD:
6323   case ISD::FSUB:
6324   case ISD::FMUL:
6325   case ISD::FDIV:
6326   case ISD::FNEG:
6327   case ISD::FABS:
6328   case ISD::FSQRT:
6329   case ISD::FMA:
6330   case ISD::FMINNUM:
6331   case ISD::FMAXNUM:
6332     if (Op.getValueType() == MVT::nxv32f16 &&
6333         (Subtarget.hasVInstructionsF16Minimal() &&
6334          !Subtarget.hasVInstructionsF16()))
6335       return SplitVectorOp(Op, DAG);
6336     [[fallthrough]];
6337   case ISD::SADDSAT:
6338   case ISD::UADDSAT:
6339   case ISD::SSUBSAT:
6340   case ISD::USUBSAT:
6341   case ISD::SMIN:
6342   case ISD::SMAX:
6343   case ISD::UMIN:
6344   case ISD::UMAX:
6345     return lowerToScalableOp(Op, DAG);
6346   case ISD::ABS:
6347   case ISD::VP_ABS:
6348     return lowerABS(Op, DAG);
6349   case ISD::CTLZ:
6350   case ISD::CTLZ_ZERO_UNDEF:
6351   case ISD::CTTZ:
6352   case ISD::CTTZ_ZERO_UNDEF:
6353     if (Subtarget.hasStdExtZvbb())
6354       return lowerToScalableOp(Op, DAG);
6355     assert(Op.getOpcode() != ISD::CTTZ);
6356     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6357   case ISD::VSELECT:
6358     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6359   case ISD::FCOPYSIGN:
6360     if (Op.getValueType() == MVT::nxv32f16 &&
6361         (Subtarget.hasVInstructionsF16Minimal() &&
6362          !Subtarget.hasVInstructionsF16()))
6363       return SplitVectorOp(Op, DAG);
6364     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6365   case ISD::STRICT_FADD:
6366   case ISD::STRICT_FSUB:
6367   case ISD::STRICT_FMUL:
6368   case ISD::STRICT_FDIV:
6369   case ISD::STRICT_FSQRT:
6370   case ISD::STRICT_FMA:
6371     return lowerToScalableOp(Op, DAG);
6372   case ISD::STRICT_FSETCC:
6373   case ISD::STRICT_FSETCCS:
6374     return lowerVectorStrictFSetcc(Op, DAG);
6375   case ISD::STRICT_FCEIL:
6376   case ISD::STRICT_FRINT:
6377   case ISD::STRICT_FFLOOR:
6378   case ISD::STRICT_FTRUNC:
6379   case ISD::STRICT_FNEARBYINT:
6380   case ISD::STRICT_FROUND:
6381   case ISD::STRICT_FROUNDEVEN:
6382     return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6383   case ISD::MGATHER:
6384   case ISD::VP_GATHER:
6385     return lowerMaskedGather(Op, DAG);
6386   case ISD::MSCATTER:
6387   case ISD::VP_SCATTER:
6388     return lowerMaskedScatter(Op, DAG);
6389   case ISD::GET_ROUNDING:
6390     return lowerGET_ROUNDING(Op, DAG);
6391   case ISD::SET_ROUNDING:
6392     return lowerSET_ROUNDING(Op, DAG);
6393   case ISD::EH_DWARF_CFA:
6394     return lowerEH_DWARF_CFA(Op, DAG);
6395   case ISD::VP_SELECT:
6396   case ISD::VP_MERGE:
6397   case ISD::VP_ADD:
6398   case ISD::VP_SUB:
6399   case ISD::VP_MUL:
6400   case ISD::VP_SDIV:
6401   case ISD::VP_UDIV:
6402   case ISD::VP_SREM:
6403   case ISD::VP_UREM:
6404     return lowerVPOp(Op, DAG);
6405   case ISD::VP_AND:
6406   case ISD::VP_OR:
6407   case ISD::VP_XOR:
6408     return lowerLogicVPOp(Op, DAG);
6409   case ISD::VP_FADD:
6410   case ISD::VP_FSUB:
6411   case ISD::VP_FMUL:
6412   case ISD::VP_FDIV:
6413   case ISD::VP_FNEG:
6414   case ISD::VP_FABS:
6415   case ISD::VP_SQRT:
6416   case ISD::VP_FMA:
6417   case ISD::VP_FMINNUM:
6418   case ISD::VP_FMAXNUM:
6419   case ISD::VP_FCOPYSIGN:
6420     if (Op.getValueType() == MVT::nxv32f16 &&
6421         (Subtarget.hasVInstructionsF16Minimal() &&
6422          !Subtarget.hasVInstructionsF16()))
6423       return SplitVPOp(Op, DAG);
6424     [[fallthrough]];
6425   case ISD::VP_ASHR:
6426   case ISD::VP_LSHR:
6427   case ISD::VP_SHL:
6428     return lowerVPOp(Op, DAG);
6429   case ISD::VP_IS_FPCLASS:
6430     return LowerIS_FPCLASS(Op, DAG);
6431   case ISD::VP_SIGN_EXTEND:
6432   case ISD::VP_ZERO_EXTEND:
6433     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6434       return lowerVPExtMaskOp(Op, DAG);
6435     return lowerVPOp(Op, DAG);
6436   case ISD::VP_TRUNCATE:
6437     return lowerVectorTruncLike(Op, DAG);
6438   case ISD::VP_FP_EXTEND:
6439   case ISD::VP_FP_ROUND:
6440     return lowerVectorFPExtendOrRoundLike(Op, DAG);
6441   case ISD::VP_SINT_TO_FP:
6442   case ISD::VP_UINT_TO_FP:
6443     if (Op.getValueType().isVector() &&
6444         Op.getValueType().getScalarType() == MVT::f16 &&
6445         (Subtarget.hasVInstructionsF16Minimal() &&
6446          !Subtarget.hasVInstructionsF16())) {
6447       if (Op.getValueType() == MVT::nxv32f16)
6448         return SplitVPOp(Op, DAG);
6449       // int -> f32
6450       SDLoc DL(Op);
6451       MVT NVT =
6452           MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6453       auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6454       // f32 -> f16
6455       return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6456                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6457     }
6458     [[fallthrough]];
6459   case ISD::VP_FP_TO_SINT:
6460   case ISD::VP_FP_TO_UINT:
6461     if (SDValue Op1 = Op.getOperand(0);
6462         Op1.getValueType().isVector() &&
6463         Op1.getValueType().getScalarType() == MVT::f16 &&
6464         (Subtarget.hasVInstructionsF16Minimal() &&
6465          !Subtarget.hasVInstructionsF16())) {
6466       if (Op1.getValueType() == MVT::nxv32f16)
6467         return SplitVPOp(Op, DAG);
6468       // f16 -> f32
6469       SDLoc DL(Op);
6470       MVT NVT = MVT::getVectorVT(MVT::f32,
6471                                  Op1.getValueType().getVectorElementCount());
6472       SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6473       // f32 -> int
6474       return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6475                          {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6476     }
6477     return lowerVPFPIntConvOp(Op, DAG);
6478   case ISD::VP_SETCC:
6479     if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6480         (Subtarget.hasVInstructionsF16Minimal() &&
6481          !Subtarget.hasVInstructionsF16()))
6482       return SplitVPOp(Op, DAG);
6483     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6484       return lowerVPSetCCMaskOp(Op, DAG);
6485     [[fallthrough]];
6486   case ISD::VP_SMIN:
6487   case ISD::VP_SMAX:
6488   case ISD::VP_UMIN:
6489   case ISD::VP_UMAX:
6490   case ISD::VP_BITREVERSE:
6491   case ISD::VP_BSWAP:
6492     return lowerVPOp(Op, DAG);
6493   case ISD::VP_CTLZ:
6494   case ISD::VP_CTLZ_ZERO_UNDEF:
6495     if (Subtarget.hasStdExtZvbb())
6496       return lowerVPOp(Op, DAG);
6497     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6498   case ISD::VP_CTTZ:
6499   case ISD::VP_CTTZ_ZERO_UNDEF:
6500     if (Subtarget.hasStdExtZvbb())
6501       return lowerVPOp(Op, DAG);
6502     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6503   case ISD::VP_CTPOP:
6504     return lowerVPOp(Op, DAG);
6505   case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
6506     return lowerVPStridedLoad(Op, DAG);
6507   case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
6508     return lowerVPStridedStore(Op, DAG);
6509   case ISD::VP_FCEIL:
6510   case ISD::VP_FFLOOR:
6511   case ISD::VP_FRINT:
6512   case ISD::VP_FNEARBYINT:
6513   case ISD::VP_FROUND:
6514   case ISD::VP_FROUNDEVEN:
6515   case ISD::VP_FROUNDTOZERO:
6516     if (Op.getValueType() == MVT::nxv32f16 &&
6517         (Subtarget.hasVInstructionsF16Minimal() &&
6518          !Subtarget.hasVInstructionsF16()))
6519       return SplitVPOp(Op, DAG);
6520     return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6521   }
6522 }
6523
6524 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
6525                              SelectionDAG &DAG, unsigned Flags) {
6526   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
6527 }
6528
6529 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
6530                              SelectionDAG &DAG, unsigned Flags) {
6531   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
6532                                    Flags);
6533 }
6534
6535 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
6536                              SelectionDAG &DAG, unsigned Flags) {
6537   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
6538                                    N->getOffset(), Flags);
6539 }
6540
6541 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
6542                              SelectionDAG &DAG, unsigned Flags) {
6543   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
6544 }
6545
6546 template <class NodeTy>
6547 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6548                                      bool IsLocal, bool IsExternWeak) const {
6549   SDLoc DL(N);
6550   EVT Ty = getPointerTy(DAG.getDataLayout());
6551
6552   // When HWASAN is used and tagging of global variables is enabled
6553   // they should be accessed via the GOT, since the tagged address of a global
6554   // is incompatible with existing code models. This also applies to non-pic
6555   // mode.
6556   if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
6557     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
6558     if (IsLocal && !Subtarget.allowTaggedGlobals())
6559       // Use PC-relative addressing to access the symbol. This generates the
6560       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
6561       // %pcrel_lo(auipc)).
6562       return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
6563
6564     // Use PC-relative addressing to access the GOT for this symbol, then load
6565     // the address from the GOT. This generates the pattern (PseudoLGA sym),
6566     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6567     SDValue Load =
6568         SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
6569     MachineFunction &MF = DAG.getMachineFunction();
6570     MachineMemOperand *MemOp = MF.getMachineMemOperand(
6571         MachinePointerInfo::getGOT(MF),
6572         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6573             MachineMemOperand::MOInvariant,
6574         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6575     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6576     return Load;
6577   }
6578
6579   switch (getTargetMachine().getCodeModel()) {
6580   default:
6581     report_fatal_error("Unsupported code model for lowering");
6582   case CodeModel::Small: {
6583     // Generate a sequence for accessing addresses within the first 2 GiB of
6584     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
6585     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
6586     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
6587     SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
6588     return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
6589   }
6590   case CodeModel::Medium: {
6591     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
6592     if (IsExternWeak) {
6593       // An extern weak symbol may be undefined, i.e. have value 0, which may
6594       // not be within 2GiB of PC, so use GOT-indirect addressing to access the
6595       // symbol. This generates the pattern (PseudoLGA sym), which expands to
6596       // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
6597       SDValue Load =
6598           SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
6599       MachineFunction &MF = DAG.getMachineFunction();
6600       MachineMemOperand *MemOp = MF.getMachineMemOperand(
6601           MachinePointerInfo::getGOT(MF),
6602           MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6603               MachineMemOperand::MOInvariant,
6604           LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6605       DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6606       return Load;
6607     }
6608
6609     // Generate a sequence for accessing addresses within any 2GiB range within
6610     // the address space. This generates the pattern (PseudoLLA sym), which
6611     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
6612     return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
6613   }
6614   }
6615 }
6616
6617 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
6618                                                 SelectionDAG &DAG) const {
6619   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
6620   assert(N->getOffset() == 0 && "unexpected offset in global node");
6621   const GlobalValue *GV = N->getGlobal();
6622   return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
6623 }
6624
6625 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
6626                                                SelectionDAG &DAG) const {
6627   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
6628
6629   return getAddr(N, DAG);
6630 }
6631
6632 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
6633                                                SelectionDAG &DAG) const {
6634   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
6635
6636   return getAddr(N, DAG);
6637 }
6638
6639 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
6640                                             SelectionDAG &DAG) const {
6641   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
6642
6643   return getAddr(N, DAG);
6644 }
6645
6646 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
6647                                               SelectionDAG &DAG,
6648                                               bool UseGOT) const {
6649   SDLoc DL(N);
6650   EVT Ty = getPointerTy(DAG.getDataLayout());
6651   const GlobalValue *GV = N->getGlobal();
6652   MVT XLenVT = Subtarget.getXLenVT();
6653
6654   if (UseGOT) {
6655     // Use PC-relative addressing to access the GOT for this TLS symbol, then
6656     // load the address from the GOT and add the thread pointer. This generates
6657     // the pattern (PseudoLA_TLS_IE sym), which expands to
6658     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
6659     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6660     SDValue Load =
6661         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
6662     MachineFunction &MF = DAG.getMachineFunction();
6663     MachineMemOperand *MemOp = MF.getMachineMemOperand(
6664         MachinePointerInfo::getGOT(MF),
6665         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
6666             MachineMemOperand::MOInvariant,
6667         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
6668     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
6669
6670     // Add the thread pointer.
6671     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
6672     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
6673   }
6674
6675   // Generate a sequence for accessing the address relative to the thread
6676   // pointer, with the appropriate adjustment for the thread pointer offset.
6677   // This generates the pattern
6678   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
6679   SDValue AddrHi =
6680       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
6681   SDValue AddrAdd =
6682       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
6683   SDValue AddrLo =
6684       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
6685
6686   SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
6687   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
6688   SDValue MNAdd =
6689       DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
6690   return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
6691 }
6692
6693 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
6694                                                SelectionDAG &DAG) const {
6695   SDLoc DL(N);
6696   EVT Ty = getPointerTy(DAG.getDataLayout());
6697   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
6698   const GlobalValue *GV = N->getGlobal();
6699
6700   // Use a PC-relative addressing mode to access the global dynamic GOT address.
6701   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
6702   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
6703   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
6704   SDValue Load =
6705       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
6706
6707   // Prepare argument list to generate call.
6708   ArgListTy Args;
6709   ArgListEntry Entry;
6710   Entry.Node = Load;
6711   Entry.Ty = CallTy;
6712   Args.push_back(Entry);
6713
6714   // Setup call to __tls_get_addr.
6715   TargetLowering::CallLoweringInfo CLI(DAG);
6716   CLI.setDebugLoc(DL)
6717       .setChain(DAG.getEntryNode())
6718       .setLibCallee(CallingConv::C, CallTy,
6719                     DAG.getExternalSymbol("__tls_get_addr", Ty),
6720                     std::move(Args));
6721
6722   return LowerCallTo(CLI).first;
6723 }
6724
6725 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
6726                                                    SelectionDAG &DAG) const {
6727   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
6728   assert(N->getOffset() == 0 && "unexpected offset in global node");
6729
6730   if (DAG.getTarget().useEmulatedTLS())
6731     return LowerToTLSEmulatedModel(N, DAG);
6732
6733   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
6734
6735   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
6736       CallingConv::GHC)
6737     report_fatal_error("In GHC calling convention TLS is not supported");
6738
6739   SDValue Addr;
6740   switch (Model) {
6741   case TLSModel::LocalExec:
6742     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
6743     break;
6744   case TLSModel::InitialExec:
6745     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
6746     break;
6747   case TLSModel::LocalDynamic:
6748   case TLSModel::GeneralDynamic:
6749     Addr = getDynamicTLSAddr(N, DAG);
6750     break;
6751   }
6752
6753   return Addr;
6754 }
6755
6756 // Return true if Val is equal to (setcc LHS, RHS, CC).
6757 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
6758 // Otherwise, return std::nullopt.
6759 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
6760                                       ISD::CondCode CC, SDValue Val) {
6761   assert(Val->getOpcode() == ISD::SETCC);
6762   SDValue LHS2 = Val.getOperand(0);
6763   SDValue RHS2 = Val.getOperand(1);
6764   ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
6765
6766   if (LHS == LHS2 && RHS == RHS2) {
6767     if (CC == CC2)
6768       return true;
6769     if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
6770       return false;
6771   } else if (LHS == RHS2 && RHS == LHS2) {
6772     CC2 = ISD::getSetCCSwappedOperands(CC2);
6773     if (CC == CC2)
6774       return true;
6775     if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
6776       return false;
6777   }
6778
6779   return std::nullopt;
6780 }
6781
6782 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
6783                                     const RISCVSubtarget &Subtarget) {
6784   SDValue CondV = N->getOperand(0);
6785   SDValue TrueV = N->getOperand(1);
6786   SDValue FalseV = N->getOperand(2);
6787   MVT VT = N->getSimpleValueType(0);
6788   SDLoc DL(N);
6789
6790   if (!Subtarget.hasShortForwardBranchOpt()) {
6791     // (select c, -1, y) -> -c | y
6792     if (isAllOnesConstant(TrueV)) {
6793       SDValue Neg = DAG.getNegative(CondV, DL, VT);
6794       return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
6795     }
6796     // (select c, y, -1) -> (c-1) | y
6797     if (isAllOnesConstant(FalseV)) {
6798       SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
6799                                 DAG.getAllOnesConstant(DL, VT));
6800       return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
6801     }
6802
6803     // (select c, 0, y) -> (c-1) & y
6804     if (isNullConstant(TrueV)) {
6805       SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
6806                                 DAG.getAllOnesConstant(DL, VT));
6807       return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
6808     }
6809     // (select c, y, 0) -> -c & y
6810     if (isNullConstant(FalseV)) {
6811       SDValue Neg = DAG.getNegative(CondV, DL, VT);
6812       return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
6813     }
6814   }
6815
6816   // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
6817   // when both truev and falsev are also setcc.
6818   if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
6819       FalseV.getOpcode() == ISD::SETCC) {
6820     SDValue LHS = CondV.getOperand(0);
6821     SDValue RHS = CondV.getOperand(1);
6822     ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6823
6824     // (select x, x, y) -> x | y
6825     // (select !x, x, y) -> x & y
6826     if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
6827       return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
6828                          FalseV);
6829     }
6830     // (select x, y, x) -> x & y
6831     // (select !x, y, x) -> x | y
6832     if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
6833       return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
6834                          FalseV);
6835     }
6836   }
6837
6838   return SDValue();
6839 }
6840
6841 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
6842 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
6843 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
6844 // being `0` or `-1`. In such cases we can replace `select` with `and`.
6845 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
6846 // than `c0`?
6847 static SDValue
6848 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
6849                                 const RISCVSubtarget &Subtarget) {
6850   if (Subtarget.hasShortForwardBranchOpt())
6851     return SDValue();
6852
6853   unsigned SelOpNo = 0;
6854   SDValue Sel = BO->getOperand(0);
6855   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
6856     SelOpNo = 1;
6857     Sel = BO->getOperand(1);
6858   }
6859
6860   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
6861     return SDValue();
6862
6863   unsigned ConstSelOpNo = 1;
6864   unsigned OtherSelOpNo = 2;
6865   if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
6866     ConstSelOpNo = 2;
6867     OtherSelOpNo = 1;
6868   }
6869   SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
6870   ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
6871   if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
6872     return SDValue();
6873
6874   SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
6875   ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
6876   if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
6877     return SDValue();
6878
6879   SDLoc DL(Sel);
6880   EVT VT = BO->getValueType(0);
6881
6882   SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
6883   if (SelOpNo == 1)
6884     std::swap(NewConstOps[0], NewConstOps[1]);
6885
6886   SDValue NewConstOp =
6887       DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
6888   if (!NewConstOp)
6889     return SDValue();
6890
6891   const APInt &NewConstAPInt =
6892       cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
6893   if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
6894     return SDValue();
6895
6896   SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
6897   SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
6898   if (SelOpNo == 1)
6899     std::swap(NewNonConstOps[0], NewNonConstOps[1]);
6900   SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
6901
6902   SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
6903   SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
6904   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
6905 }
6906
6907 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
6908   SDValue CondV = Op.getOperand(0);
6909   SDValue TrueV = Op.getOperand(1);
6910   SDValue FalseV = Op.getOperand(2);
6911   SDLoc DL(Op);
6912   MVT VT = Op.getSimpleValueType();
6913   MVT XLenVT = Subtarget.getXLenVT();
6914
6915   // Lower vector SELECTs to VSELECTs by splatting the condition.
6916   if (VT.isVector()) {
6917     MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
6918     SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
6919     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
6920   }
6921
6922   // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
6923   // nodes to implement the SELECT. Performing the lowering here allows for
6924   // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
6925   // sequence or RISCVISD::SELECT_CC node (branch-based select).
6926   if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
6927       VT.isScalarInteger()) {
6928     // (select c, t, 0) -> (czero_eqz t, c)
6929     if (isNullConstant(FalseV))
6930       return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
6931     // (select c, 0, f) -> (czero_nez f, c)
6932     if (isNullConstant(TrueV))
6933       return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
6934
6935     // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
6936     if (TrueV.getOpcode() == ISD::AND &&
6937         (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
6938       return DAG.getNode(
6939           ISD::OR, DL, VT, TrueV,
6940           DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6941     // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
6942     if (FalseV.getOpcode() == ISD::AND &&
6943         (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
6944       return DAG.getNode(
6945           ISD::OR, DL, VT, FalseV,
6946           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
6947
6948     // Try some other optimizations before falling back to generic lowering.
6949     if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
6950       return V;
6951
6952     // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
6953     // Unless we have the short forward branch optimization.
6954     if (!Subtarget.hasShortForwardBranchOpt())
6955       return DAG.getNode(
6956           ISD::OR, DL, VT,
6957           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
6958           DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6959   }
6960
6961   if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
6962     return V;
6963
6964   if (Op.hasOneUse()) {
6965     unsigned UseOpc = Op->use_begin()->getOpcode();
6966     if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
6967       SDNode *BinOp = *Op->use_begin();
6968       if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
6969                                                            DAG, Subtarget)) {
6970         DAG.ReplaceAllUsesWith(BinOp, &NewSel);
6971         return lowerSELECT(NewSel, DAG);
6972       }
6973     }
6974   }
6975
6976   // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
6977   // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
6978   const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
6979   const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
6980   if (FPTV && FPFV) {
6981     if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
6982       return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
6983     if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
6984       SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
6985                                 DAG.getConstant(1, DL, XLenVT));
6986       return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
6987     }
6988   }
6989
6990   // If the condition is not an integer SETCC which operates on XLenVT, we need
6991   // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
6992   // (select condv, truev, falsev)
6993   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
6994   if (CondV.getOpcode() != ISD::SETCC ||
6995       CondV.getOperand(0).getSimpleValueType() != XLenVT) {
6996     SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6997     SDValue SetNE = DAG.getCondCode(ISD::SETNE);
6998
6999     SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7000
7001     return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7002   }
7003
7004   // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7005   // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7006   // advantage of the integer compare+branch instructions. i.e.:
7007   // (select (setcc lhs, rhs, cc), truev, falsev)
7008   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7009   SDValue LHS = CondV.getOperand(0);
7010   SDValue RHS = CondV.getOperand(1);
7011   ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7012
7013   // Special case for a select of 2 constants that have a diffence of 1.
7014   // Normally this is done by DAGCombine, but if the select is introduced by
7015   // type legalization or op legalization, we miss it. Restricting to SETLT
7016   // case for now because that is what signed saturating add/sub need.
7017   // FIXME: We don't need the condition to be SETLT or even a SETCC,
7018   // but we would probably want to swap the true/false values if the condition
7019   // is SETGE/SETLE to avoid an XORI.
7020   if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7021       CCVal == ISD::SETLT) {
7022     const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
7023     const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
7024     if (TrueVal - 1 == FalseVal)
7025       return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7026     if (TrueVal + 1 == FalseVal)
7027       return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7028   }
7029
7030   translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7031   // 1 < x ? x : 1 -> 0 < x ? x : 1
7032   if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7033       RHS == TrueV && LHS == FalseV) {
7034     LHS = DAG.getConstant(0, DL, VT);
7035     // 0 <u x is the same as x != 0.
7036     if (CCVal == ISD::SETULT) {
7037       std::swap(LHS, RHS);
7038       CCVal = ISD::SETNE;
7039     }
7040   }
7041
7042   // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7043   if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7044       RHS == FalseV) {
7045     RHS = DAG.getConstant(0, DL, VT);
7046   }
7047
7048   SDValue TargetCC = DAG.getCondCode(CCVal);
7049
7050   if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7051     // (select (setcc lhs, rhs, CC), constant, falsev)
7052     // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7053     std::swap(TrueV, FalseV);
7054     TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7055   }
7056
7057   SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7058   return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7059 }
7060
7061 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7062   SDValue CondV = Op.getOperand(1);
7063   SDLoc DL(Op);
7064   MVT XLenVT = Subtarget.getXLenVT();
7065
7066   if (CondV.getOpcode() == ISD::SETCC &&
7067       CondV.getOperand(0).getValueType() == XLenVT) {
7068     SDValue LHS = CondV.getOperand(0);
7069     SDValue RHS = CondV.getOperand(1);
7070     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7071
7072     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7073
7074     SDValue TargetCC = DAG.getCondCode(CCVal);
7075     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7076                        LHS, RHS, TargetCC, Op.getOperand(2));
7077   }
7078
7079   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7080                      CondV, DAG.getConstant(0, DL, XLenVT),
7081                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7082 }
7083
7084 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7085   MachineFunction &MF = DAG.getMachineFunction();
7086   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
7087
7088   SDLoc DL(Op);
7089   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7090                                  getPointerTy(MF.getDataLayout()));
7091
7092   // vastart just stores the address of the VarArgsFrameIndex slot into the
7093   // memory location argument.
7094   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7095   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7096                       MachinePointerInfo(SV));
7097 }
7098
7099 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7100                                             SelectionDAG &DAG) const {
7101   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7102   MachineFunction &MF = DAG.getMachineFunction();
7103   MachineFrameInfo &MFI = MF.getFrameInfo();
7104   MFI.setFrameAddressIsTaken(true);
7105   Register FrameReg = RI.getFrameRegister(MF);
7106   int XLenInBytes = Subtarget.getXLen() / 8;
7107
7108   EVT VT = Op.getValueType();
7109   SDLoc DL(Op);
7110   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7111   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7112   while (Depth--) {
7113     int Offset = -(XLenInBytes * 2);
7114     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7115                               DAG.getIntPtrConstant(Offset, DL));
7116     FrameAddr =
7117         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7118   }
7119   return FrameAddr;
7120 }
7121
7122 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7123                                              SelectionDAG &DAG) const {
7124   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7125   MachineFunction &MF = DAG.getMachineFunction();
7126   MachineFrameInfo &MFI = MF.getFrameInfo();
7127   MFI.setReturnAddressIsTaken(true);
7128   MVT XLenVT = Subtarget.getXLenVT();
7129   int XLenInBytes = Subtarget.getXLen() / 8;
7130
7131   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
7132     return SDValue();
7133
7134   EVT VT = Op.getValueType();
7135   SDLoc DL(Op);
7136   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7137   if (Depth) {
7138     int Off = -XLenInBytes;
7139     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7140     SDValue Offset = DAG.getConstant(Off, DL, VT);
7141     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7142                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7143                        MachinePointerInfo());
7144   }
7145
7146   // Return the value of the return address register, marking it an implicit
7147   // live-in.
7148   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7149   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7150 }
7151
7152 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7153                                                  SelectionDAG &DAG) const {
7154   SDLoc DL(Op);
7155   SDValue Lo = Op.getOperand(0);
7156   SDValue Hi = Op.getOperand(1);
7157   SDValue Shamt = Op.getOperand(2);
7158   EVT VT = Lo.getValueType();
7159
7160   // if Shamt-XLEN < 0: // Shamt < XLEN
7161   //   Lo = Lo << Shamt
7162   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
7163   // else:
7164   //   Lo = 0
7165   //   Hi = Lo << (Shamt-XLEN)
7166
7167   SDValue Zero = DAG.getConstant(0, DL, VT);
7168   SDValue One = DAG.getConstant(1, DL, VT);
7169   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7170   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7171   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7172   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7173
7174   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7175   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7176   SDValue ShiftRightLo =
7177       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7178   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7179   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7180   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7181
7182   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7183
7184   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7185   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7186
7187   SDValue Parts[2] = {Lo, Hi};
7188   return DAG.getMergeValues(Parts, DL);
7189 }
7190
7191 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7192                                                   bool IsSRA) const {
7193   SDLoc DL(Op);
7194   SDValue Lo = Op.getOperand(0);
7195   SDValue Hi = Op.getOperand(1);
7196   SDValue Shamt = Op.getOperand(2);
7197   EVT VT = Lo.getValueType();
7198
7199   // SRA expansion:
7200   //   if Shamt-XLEN < 0: // Shamt < XLEN
7201   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
7202   //     Hi = Hi >>s Shamt
7203   //   else:
7204   //     Lo = Hi >>s (Shamt-XLEN);
7205   //     Hi = Hi >>s (XLEN-1)
7206   //
7207   // SRL expansion:
7208   //   if Shamt-XLEN < 0: // Shamt < XLEN
7209   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
7210   //     Hi = Hi >>u Shamt
7211   //   else:
7212   //     Lo = Hi >>u (Shamt-XLEN);
7213   //     Hi = 0;
7214
7215   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7216
7217   SDValue Zero = DAG.getConstant(0, DL, VT);
7218   SDValue One = DAG.getConstant(1, DL, VT);
7219   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7220   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7221   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7222   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7223
7224   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7225   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7226   SDValue ShiftLeftHi =
7227       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7228   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7229   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7230   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7231   SDValue HiFalse =
7232       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7233
7234   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7235
7236   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7237   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7238
7239   SDValue Parts[2] = {Lo, Hi};
7240   return DAG.getMergeValues(Parts, DL);
7241 }
7242
7243 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
7244 // legal equivalently-sized i8 type, so we can use that as a go-between.
7245 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7246                                                   SelectionDAG &DAG) const {
7247   SDLoc DL(Op);
7248   MVT VT = Op.getSimpleValueType();
7249   SDValue SplatVal = Op.getOperand(0);
7250   // All-zeros or all-ones splats are handled specially.
7251   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7252     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7253     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7254   }
7255   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7256     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7257     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7258   }
7259   MVT InterVT = VT.changeVectorElementType(MVT::i8);
7260   SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7261                          DAG.getConstant(1, DL, SplatVal.getValueType()));
7262   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7263   SDValue Zero = DAG.getConstant(0, DL, InterVT);
7264   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7265 }
7266
7267 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7268 // illegal (currently only vXi64 RV32).
7269 // FIXME: We could also catch non-constant sign-extended i32 values and lower
7270 // them to VMV_V_X_VL.
7271 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7272                                                      SelectionDAG &DAG) const {
7273   SDLoc DL(Op);
7274   MVT VecVT = Op.getSimpleValueType();
7275   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7276          "Unexpected SPLAT_VECTOR_PARTS lowering");
7277
7278   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7279   SDValue Lo = Op.getOperand(0);
7280   SDValue Hi = Op.getOperand(1);
7281
7282   MVT ContainerVT = VecVT;
7283   if (VecVT.isFixedLengthVector())
7284     ContainerVT = getContainerForFixedLengthVector(VecVT);
7285
7286   auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7287
7288   SDValue Res =
7289       splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7290
7291   if (VecVT.isFixedLengthVector())
7292     Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7293
7294   return Res;
7295 }
7296
7297 // Custom-lower extensions from mask vectors by using a vselect either with 1
7298 // for zero/any-extension or -1 for sign-extension:
7299 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7300 // Note that any-extension is lowered identically to zero-extension.
7301 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7302                                                 int64_t ExtTrueVal) const {
7303   SDLoc DL(Op);
7304   MVT VecVT = Op.getSimpleValueType();
7305   SDValue Src = Op.getOperand(0);
7306   // Only custom-lower extensions from mask types
7307   assert(Src.getValueType().isVector() &&
7308          Src.getValueType().getVectorElementType() == MVT::i1);
7309
7310   if (VecVT.isScalableVector()) {
7311     SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7312     SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7313     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7314   }
7315
7316   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7317   MVT I1ContainerVT =
7318       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7319
7320   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7321
7322   SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7323
7324   MVT XLenVT = Subtarget.getXLenVT();
7325   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7326   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7327
7328   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7329                           DAG.getUNDEF(ContainerVT), SplatZero, VL);
7330   SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7331                              DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7332   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
7333                                SplatTrueVal, SplatZero, VL);
7334
7335   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7336 }
7337
7338 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7339     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7340   MVT ExtVT = Op.getSimpleValueType();
7341   // Only custom-lower extensions from fixed-length vector types.
7342   if (!ExtVT.isFixedLengthVector())
7343     return Op;
7344   MVT VT = Op.getOperand(0).getSimpleValueType();
7345   // Grab the canonical container type for the extended type. Infer the smaller
7346   // type from that to ensure the same number of vector elements, as we know
7347   // the LMUL will be sufficient to hold the smaller type.
7348   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7349   // Get the extended container type manually to ensure the same number of
7350   // vector elements between source and dest.
7351   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7352                                      ContainerExtVT.getVectorElementCount());
7353
7354   SDValue Op1 =
7355       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7356
7357   SDLoc DL(Op);
7358   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7359
7360   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7361
7362   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7363 }
7364
7365 // Custom-lower truncations from vectors to mask vectors by using a mask and a
7366 // setcc operation:
7367 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7368 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7369                                                       SelectionDAG &DAG) const {
7370   bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7371   SDLoc DL(Op);
7372   EVT MaskVT = Op.getValueType();
7373   // Only expect to custom-lower truncations to mask types
7374   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7375          "Unexpected type for vector mask lowering");
7376   SDValue Src = Op.getOperand(0);
7377   MVT VecVT = Src.getSimpleValueType();
7378   SDValue Mask, VL;
7379   if (IsVPTrunc) {
7380     Mask = Op.getOperand(1);
7381     VL = Op.getOperand(2);
7382   }
7383   // If this is a fixed vector, we need to convert it to a scalable vector.
7384   MVT ContainerVT = VecVT;
7385
7386   if (VecVT.isFixedLengthVector()) {
7387     ContainerVT = getContainerForFixedLengthVector(VecVT);
7388     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7389     if (IsVPTrunc) {
7390       MVT MaskContainerVT =
7391           getContainerForFixedLengthVector(Mask.getSimpleValueType());
7392       Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7393     }
7394   }
7395
7396   if (!IsVPTrunc) {
7397     std::tie(Mask, VL) =
7398         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7399   }
7400
7401   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7402   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7403
7404   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7405                          DAG.getUNDEF(ContainerVT), SplatOne, VL);
7406   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7407                           DAG.getUNDEF(ContainerVT), SplatZero, VL);
7408
7409   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7410   SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7411                               DAG.getUNDEF(ContainerVT), Mask, VL);
7412   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7413                       {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7414                        DAG.getUNDEF(MaskContainerVT), Mask, VL});
7415   if (MaskVT.isFixedLengthVector())
7416     Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7417   return Trunc;
7418 }
7419
7420 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7421                                                   SelectionDAG &DAG) const {
7422   bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7423   SDLoc DL(Op);
7424
7425   MVT VT = Op.getSimpleValueType();
7426   // Only custom-lower vector truncates
7427   assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7428
7429   // Truncates to mask types are handled differently
7430   if (VT.getVectorElementType() == MVT::i1)
7431     return lowerVectorMaskTruncLike(Op, DAG);
7432
7433   // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7434   // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7435   // truncate by one power of two at a time.
7436   MVT DstEltVT = VT.getVectorElementType();
7437
7438   SDValue Src = Op.getOperand(0);
7439   MVT SrcVT = Src.getSimpleValueType();
7440   MVT SrcEltVT = SrcVT.getVectorElementType();
7441
7442   assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7443          isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7444          "Unexpected vector truncate lowering");
7445
7446   MVT ContainerVT = SrcVT;
7447   SDValue Mask, VL;
7448   if (IsVPTrunc) {
7449     Mask = Op.getOperand(1);
7450     VL = Op.getOperand(2);
7451   }
7452   if (SrcVT.isFixedLengthVector()) {
7453     ContainerVT = getContainerForFixedLengthVector(SrcVT);
7454     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7455     if (IsVPTrunc) {
7456       MVT MaskVT = getMaskTypeFor(ContainerVT);
7457       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7458     }
7459   }
7460
7461   SDValue Result = Src;
7462   if (!IsVPTrunc) {
7463     std::tie(Mask, VL) =
7464         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7465   }
7466
7467   LLVMContext &Context = *DAG.getContext();
7468   const ElementCount Count = ContainerVT.getVectorElementCount();
7469   do {
7470     SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
7471     EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
7472     Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
7473                          Mask, VL);
7474   } while (SrcEltVT != DstEltVT);
7475
7476   if (SrcVT.isFixedLengthVector())
7477     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
7478
7479   return Result;
7480 }
7481
7482 SDValue
7483 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
7484                                                     SelectionDAG &DAG) const {
7485   SDLoc DL(Op);
7486   SDValue Chain = Op.getOperand(0);
7487   SDValue Src = Op.getOperand(1);
7488   MVT VT = Op.getSimpleValueType();
7489   MVT SrcVT = Src.getSimpleValueType();
7490   MVT ContainerVT = VT;
7491   if (VT.isFixedLengthVector()) {
7492     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7493     ContainerVT =
7494         SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
7495     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7496   }
7497
7498   auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7499
7500   // RVV can only widen/truncate fp to types double/half the size as the source.
7501   if ((VT.getVectorElementType() == MVT::f64 &&
7502        SrcVT.getVectorElementType() == MVT::f16) ||
7503       (VT.getVectorElementType() == MVT::f16 &&
7504        SrcVT.getVectorElementType() == MVT::f64)) {
7505     // For double rounding, the intermediate rounding should be round-to-odd.
7506     unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
7507                                 ? RISCVISD::STRICT_FP_EXTEND_VL
7508                                 : RISCVISD::STRICT_VFNCVT_ROD_VL;
7509     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
7510     Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
7511                       Chain, Src, Mask, VL);
7512     Chain = Src.getValue(1);
7513   }
7514
7515   unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
7516                          ? RISCVISD::STRICT_FP_EXTEND_VL
7517                          : RISCVISD::STRICT_FP_ROUND_VL;
7518   SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7519                             Chain, Src, Mask, VL);
7520   if (VT.isFixedLengthVector()) {
7521     // StrictFP operations have two result values. Their lowered result should
7522     // have same result count.
7523     SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
7524     Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
7525   }
7526   return Res;
7527 }
7528
7529 SDValue
7530 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
7531                                                     SelectionDAG &DAG) const {
7532   bool IsVP =
7533       Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
7534   bool IsExtend =
7535       Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
7536   // RVV can only do truncate fp to types half the size as the source. We
7537   // custom-lower f64->f16 rounds via RVV's round-to-odd float
7538   // conversion instruction.
7539   SDLoc DL(Op);
7540   MVT VT = Op.getSimpleValueType();
7541
7542   assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7543
7544   SDValue Src = Op.getOperand(0);
7545   MVT SrcVT = Src.getSimpleValueType();
7546
7547   bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
7548                                      SrcVT.getVectorElementType() != MVT::f16);
7549   bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
7550                                      SrcVT.getVectorElementType() != MVT::f64);
7551
7552   bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
7553
7554   // Prepare any fixed-length vector operands.
7555   MVT ContainerVT = VT;
7556   SDValue Mask, VL;
7557   if (IsVP) {
7558     Mask = Op.getOperand(1);
7559     VL = Op.getOperand(2);
7560   }
7561   if (VT.isFixedLengthVector()) {
7562     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7563     ContainerVT =
7564         SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
7565     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7566     if (IsVP) {
7567       MVT MaskVT = getMaskTypeFor(ContainerVT);
7568       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7569     }
7570   }
7571
7572   if (!IsVP)
7573     std::tie(Mask, VL) =
7574         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7575
7576   unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
7577
7578   if (IsDirectConv) {
7579     Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
7580     if (VT.isFixedLengthVector())
7581       Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
7582     return Src;
7583   }
7584
7585   unsigned InterConvOpc =
7586       IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
7587
7588   MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
7589   SDValue IntermediateConv =
7590       DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
7591   SDValue Result =
7592       DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
7593   if (VT.isFixedLengthVector())
7594     return convertFromScalableVector(VT, Result, DAG, Subtarget);
7595   return Result;
7596 }
7597
7598 // Given a scalable vector type and an index into it, returns the type for the
7599 // smallest subvector that the index fits in. This can be used to reduce LMUL
7600 // for operations like vslidedown.
7601 //
7602 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
7603 static std::optional<MVT>
7604 getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
7605                       const RISCVSubtarget &Subtarget) {
7606   assert(VecVT.isScalableVector());
7607   const unsigned EltSize = VecVT.getScalarSizeInBits();
7608   const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
7609   const unsigned MinVLMAX = VectorBitsMin / EltSize;
7610   MVT SmallerVT;
7611   if (MaxIdx < MinVLMAX)
7612     SmallerVT = getLMUL1VT(VecVT);
7613   else if (MaxIdx < MinVLMAX * 2)
7614     SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
7615   else if (MaxIdx < MinVLMAX * 4)
7616     SmallerVT = getLMUL1VT(VecVT)
7617                     .getDoubleNumVectorElementsVT()
7618                     .getDoubleNumVectorElementsVT();
7619   if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
7620     return std::nullopt;
7621   return SmallerVT;
7622 }
7623
7624 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
7625 // first position of a vector, and that vector is slid up to the insert index.
7626 // By limiting the active vector length to index+1 and merging with the
7627 // original vector (with an undisturbed tail policy for elements >= VL), we
7628 // achieve the desired result of leaving all elements untouched except the one
7629 // at VL-1, which is replaced with the desired value.
7630 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
7631                                                     SelectionDAG &DAG) const {
7632   SDLoc DL(Op);
7633   MVT VecVT = Op.getSimpleValueType();
7634   SDValue Vec = Op.getOperand(0);
7635   SDValue Val = Op.getOperand(1);
7636   SDValue Idx = Op.getOperand(2);
7637
7638   if (VecVT.getVectorElementType() == MVT::i1) {
7639     // FIXME: For now we just promote to an i8 vector and insert into that,
7640     // but this is probably not optimal.
7641     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
7642     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
7643     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
7644     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
7645   }
7646
7647   MVT ContainerVT = VecVT;
7648   // If the operand is a fixed-length vector, convert to a scalable one.
7649   if (VecVT.isFixedLengthVector()) {
7650     ContainerVT = getContainerForFixedLengthVector(VecVT);
7651     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7652   }
7653
7654   MVT OrigContainerVT = ContainerVT;
7655   SDValue OrigVec = Vec;
7656   // If we know the index we're going to insert at, we can shrink Vec so that
7657   // we're performing the scalar inserts and slideup on a smaller LMUL.
7658   if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
7659     if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, CIdx->getZExtValue(),
7660                                               DL, DAG, Subtarget)) {
7661       ContainerVT = *ShrunkVT;
7662       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
7663                         DAG.getVectorIdxConstant(0, DL));
7664     }
7665   }
7666
7667   MVT XLenVT = Subtarget.getXLenVT();
7668
7669   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
7670   // Even i64-element vectors on RV32 can be lowered without scalar
7671   // legalization if the most-significant 32 bits of the value are not affected
7672   // by the sign-extension of the lower 32 bits.
7673   // TODO: We could also catch sign extensions of a 32-bit value.
7674   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
7675     const auto *CVal = cast<ConstantSDNode>(Val);
7676     if (isInt<32>(CVal->getSExtValue())) {
7677       IsLegalInsert = true;
7678       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
7679     }
7680   }
7681
7682   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7683
7684   SDValue ValInVec;
7685
7686   if (IsLegalInsert) {
7687     unsigned Opc =
7688         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
7689     if (isNullConstant(Idx)) {
7690       if (!VecVT.isFloatingPoint())
7691         Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
7692       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
7693
7694       if (ContainerVT != OrigContainerVT)
7695         Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7696                           Vec, DAG.getVectorIdxConstant(0, DL));
7697       if (!VecVT.isFixedLengthVector())
7698         return Vec;
7699       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
7700     }
7701     ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
7702   } else {
7703     // On RV32, i64-element vectors must be specially handled to place the
7704     // value at element 0, by using two vslide1down instructions in sequence on
7705     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
7706     // this.
7707     SDValue ValLo, ValHi;
7708     std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
7709     MVT I32ContainerVT =
7710         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
7711     SDValue I32Mask =
7712         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
7713     // Limit the active VL to two.
7714     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
7715     // If the Idx is 0 we can insert directly into the vector.
7716     if (isNullConstant(Idx)) {
7717       // First slide in the lo value, then the hi in above it. We use slide1down
7718       // to avoid the register group overlap constraint of vslide1up.
7719       ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7720                              Vec, Vec, ValLo, I32Mask, InsertI64VL);
7721       // If the source vector is undef don't pass along the tail elements from
7722       // the previous slide1down.
7723       SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
7724       ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7725                              Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
7726       // Bitcast back to the right container type.
7727       ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
7728
7729       if (ContainerVT != OrigContainerVT)
7730         ValInVec =
7731             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7732                         ValInVec, DAG.getVectorIdxConstant(0, DL));
7733       if (!VecVT.isFixedLengthVector())
7734         return ValInVec;
7735       return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
7736     }
7737
7738     // First slide in the lo value, then the hi in above it. We use slide1down
7739     // to avoid the register group overlap constraint of vslide1up.
7740     ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7741                            DAG.getUNDEF(I32ContainerVT),
7742                            DAG.getUNDEF(I32ContainerVT), ValLo,
7743                            I32Mask, InsertI64VL);
7744     ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
7745                            DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
7746                            I32Mask, InsertI64VL);
7747     // Bitcast back to the right container type.
7748     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
7749   }
7750
7751   // Now that the value is in a vector, slide it into position.
7752   SDValue InsertVL =
7753       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
7754
7755   // Use tail agnostic policy if Idx is the last index of Vec.
7756   unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
7757   if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
7758       cast<ConstantSDNode>(Idx)->getZExtValue() + 1 ==
7759           VecVT.getVectorNumElements())
7760     Policy = RISCVII::TAIL_AGNOSTIC;
7761   SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
7762                                 Idx, Mask, InsertVL, Policy);
7763
7764   if (ContainerVT != OrigContainerVT)
7765     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7766                           Slideup, DAG.getVectorIdxConstant(0, DL));
7767   if (!VecVT.isFixedLengthVector())
7768     return Slideup;
7769   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
7770 }
7771
7772 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
7773 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
7774 // types this is done using VMV_X_S to allow us to glean information about the
7775 // sign bits of the result.
7776 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
7777                                                      SelectionDAG &DAG) const {
7778   SDLoc DL(Op);
7779   SDValue Idx = Op.getOperand(1);
7780   SDValue Vec = Op.getOperand(0);
7781   EVT EltVT = Op.getValueType();
7782   MVT VecVT = Vec.getSimpleValueType();
7783   MVT XLenVT = Subtarget.getXLenVT();
7784
7785   if (VecVT.getVectorElementType() == MVT::i1) {
7786     // Use vfirst.m to extract the first bit.
7787     if (isNullConstant(Idx)) {
7788       MVT ContainerVT = VecVT;
7789       if (VecVT.isFixedLengthVector()) {
7790         ContainerVT = getContainerForFixedLengthVector(VecVT);
7791         Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7792       }
7793       auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7794       SDValue Vfirst =
7795           DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
7796       SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
7797                                  DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
7798       return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
7799     }
7800     if (VecVT.isFixedLengthVector()) {
7801       unsigned NumElts = VecVT.getVectorNumElements();
7802       if (NumElts >= 8) {
7803         MVT WideEltVT;
7804         unsigned WidenVecLen;
7805         SDValue ExtractElementIdx;
7806         SDValue ExtractBitIdx;
7807         unsigned MaxEEW = Subtarget.getELen();
7808         MVT LargestEltVT = MVT::getIntegerVT(
7809             std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
7810         if (NumElts <= LargestEltVT.getSizeInBits()) {
7811           assert(isPowerOf2_32(NumElts) &&
7812                  "the number of elements should be power of 2");
7813           WideEltVT = MVT::getIntegerVT(NumElts);
7814           WidenVecLen = 1;
7815           ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
7816           ExtractBitIdx = Idx;
7817         } else {
7818           WideEltVT = LargestEltVT;
7819           WidenVecLen = NumElts / WideEltVT.getSizeInBits();
7820           // extract element index = index / element width
7821           ExtractElementIdx = DAG.getNode(
7822               ISD::SRL, DL, XLenVT, Idx,
7823               DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
7824           // mask bit index = index % element width
7825           ExtractBitIdx = DAG.getNode(
7826               ISD::AND, DL, XLenVT, Idx,
7827               DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
7828         }
7829         MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
7830         Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
7831         SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
7832                                          Vec, ExtractElementIdx);
7833         // Extract the bit from GPR.
7834         SDValue ShiftRight =
7835             DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
7836         SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
7837                                   DAG.getConstant(1, DL, XLenVT));
7838         return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
7839       }
7840     }
7841     // Otherwise, promote to an i8 vector and extract from that.
7842     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
7843     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
7844     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
7845   }
7846
7847   // If this is a fixed vector, we need to convert it to a scalable vector.
7848   MVT ContainerVT = VecVT;
7849   if (VecVT.isFixedLengthVector()) {
7850     ContainerVT = getContainerForFixedLengthVector(VecVT);
7851     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7852   }
7853
7854   // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
7855   // contains our index.
7856   std::optional<uint64_t> MaxIdx;
7857   if (VecVT.isFixedLengthVector())
7858     MaxIdx = VecVT.getVectorNumElements() - 1;
7859   if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
7860     MaxIdx = IdxC->getZExtValue();
7861   if (MaxIdx) {
7862     if (auto SmallerVT =
7863             getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
7864       ContainerVT = *SmallerVT;
7865       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
7866                         DAG.getConstant(0, DL, XLenVT));
7867     }
7868   }
7869
7870   // If after narrowing, the required slide is still greater than LMUL2,
7871   // fallback to generic expansion and go through the stack.  This is done
7872   // for a subtle reason: extracting *all* elements out of a vector is
7873   // widely expected to be linear in vector size, but because vslidedown
7874   // is linear in LMUL, performing N extracts using vslidedown becomes
7875   // O(n^2) / (VLEN/ETYPE) work.  On the surface, going through the stack
7876   // seems to have the same problem (the store is linear in LMUL), but the
7877   // generic expansion *memoizes* the store, and thus for many extracts of
7878   // the same vector we end up with one store and a bunch of loads.
7879   // TODO: We don't have the same code for insert_vector_elt because we
7880   // have BUILD_VECTOR and handle the degenerate case there.  Should we
7881   // consider adding an inverse BUILD_VECTOR node?
7882   MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
7883   if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
7884     return SDValue();
7885
7886   // If the index is 0, the vector is already in the right position.
7887   if (!isNullConstant(Idx)) {
7888     // Use a VL of 1 to avoid processing more elements than we need.
7889     auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
7890     Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
7891                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
7892   }
7893
7894   if (!EltVT.isInteger()) {
7895     // Floating-point extracts are handled in TableGen.
7896     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
7897                        DAG.getConstant(0, DL, XLenVT));
7898   }
7899
7900   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
7901   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
7902 }
7903
7904 // Some RVV intrinsics may claim that they want an integer operand to be
7905 // promoted or expanded.
7906 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
7907                                            const RISCVSubtarget &Subtarget) {
7908   assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
7909           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
7910           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
7911          "Unexpected opcode");
7912
7913   if (!Subtarget.hasVInstructions())
7914     return SDValue();
7915
7916   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
7917                   Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
7918   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
7919
7920   SDLoc DL(Op);
7921
7922   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
7923       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
7924   if (!II || !II->hasScalarOperand())
7925     return SDValue();
7926
7927   unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
7928   assert(SplatOp < Op.getNumOperands());
7929
7930   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
7931   SDValue &ScalarOp = Operands[SplatOp];
7932   MVT OpVT = ScalarOp.getSimpleValueType();
7933   MVT XLenVT = Subtarget.getXLenVT();
7934
7935   // If this isn't a scalar, or its type is XLenVT we're done.
7936   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
7937     return SDValue();
7938
7939   // Simplest case is that the operand needs to be promoted to XLenVT.
7940   if (OpVT.bitsLT(XLenVT)) {
7941     // If the operand is a constant, sign extend to increase our chances
7942     // of being able to use a .vi instruction. ANY_EXTEND would become a
7943     // a zero extend and the simm5 check in isel would fail.
7944     // FIXME: Should we ignore the upper bits in isel instead?
7945     unsigned ExtOpc =
7946         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
7947     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
7948     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7949   }
7950
7951   // Use the previous operand to get the vXi64 VT. The result might be a mask
7952   // VT for compares. Using the previous operand assumes that the previous
7953   // operand will never have a smaller element size than a scalar operand and
7954   // that a widening operation never uses SEW=64.
7955   // NOTE: If this fails the below assert, we can probably just find the
7956   // element count from any operand or result and use it to construct the VT.
7957   assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
7958   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
7959
7960   // The more complex case is when the scalar is larger than XLenVT.
7961   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
7962          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
7963
7964   // If this is a sign-extended 32-bit value, we can truncate it and rely on the
7965   // instruction to sign-extend since SEW>XLEN.
7966   if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
7967     ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
7968     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7969   }
7970
7971   switch (IntNo) {
7972   case Intrinsic::riscv_vslide1up:
7973   case Intrinsic::riscv_vslide1down:
7974   case Intrinsic::riscv_vslide1up_mask:
7975   case Intrinsic::riscv_vslide1down_mask: {
7976     // We need to special case these when the scalar is larger than XLen.
7977     unsigned NumOps = Op.getNumOperands();
7978     bool IsMasked = NumOps == 7;
7979
7980     // Convert the vector source to the equivalent nxvXi32 vector.
7981     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
7982     SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
7983     SDValue ScalarLo, ScalarHi;
7984     std::tie(ScalarLo, ScalarHi) =
7985         DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
7986
7987     // Double the VL since we halved SEW.
7988     SDValue AVL = getVLOperand(Op);
7989     SDValue I32VL;
7990
7991     // Optimize for constant AVL
7992     if (isa<ConstantSDNode>(AVL)) {
7993       unsigned EltSize = VT.getScalarSizeInBits();
7994       unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
7995
7996       unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
7997       unsigned MaxVLMAX =
7998           RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
7999
8000       unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8001       unsigned MinVLMAX =
8002           RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
8003
8004       uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
8005       if (AVLInt <= MinVLMAX) {
8006         I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8007       } else if (AVLInt >= 2 * MaxVLMAX) {
8008         // Just set vl to VLMAX in this situation
8009         RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
8010         SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8011         unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8012         SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8013         SDValue SETVLMAX = DAG.getTargetConstant(
8014             Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8015         I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8016                             LMUL);
8017       } else {
8018         // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8019         // is related to the hardware implementation.
8020         // So let the following code handle
8021       }
8022     }
8023     if (!I32VL) {
8024       RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
8025       SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8026       unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8027       SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8028       SDValue SETVL =
8029           DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8030       // Using vsetvli instruction to get actually used length which related to
8031       // the hardware implementation
8032       SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8033                                SEW, LMUL);
8034       I32VL =
8035           DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8036     }
8037
8038     SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8039
8040     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8041     // instructions.
8042     SDValue Passthru;
8043     if (IsMasked)
8044       Passthru = DAG.getUNDEF(I32VT);
8045     else
8046       Passthru = DAG.getBitcast(I32VT, Operands[1]);
8047
8048     if (IntNo == Intrinsic::riscv_vslide1up ||
8049         IntNo == Intrinsic::riscv_vslide1up_mask) {
8050       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8051                         ScalarHi, I32Mask, I32VL);
8052       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8053                         ScalarLo, I32Mask, I32VL);
8054     } else {
8055       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8056                         ScalarLo, I32Mask, I32VL);
8057       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8058                         ScalarHi, I32Mask, I32VL);
8059     }
8060
8061     // Convert back to nxvXi64.
8062     Vec = DAG.getBitcast(VT, Vec);
8063
8064     if (!IsMasked)
8065       return Vec;
8066     // Apply mask after the operation.
8067     SDValue Mask = Operands[NumOps - 3];
8068     SDValue MaskedOff = Operands[1];
8069     // Assume Policy operand is the last operand.
8070     uint64_t Policy =
8071         cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
8072     // We don't need to select maskedoff if it's undef.
8073     if (MaskedOff.isUndef())
8074       return Vec;
8075     // TAMU
8076     if (Policy == RISCVII::TAIL_AGNOSTIC)
8077       return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
8078                          AVL);
8079     // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8080     // It's fine because vmerge does not care mask policy.
8081     return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8082                        AVL);
8083   }
8084   }
8085
8086   // We need to convert the scalar to a splat vector.
8087   SDValue VL = getVLOperand(Op);
8088   assert(VL.getValueType() == XLenVT);
8089   ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8090   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8091 }
8092
8093 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8094 // scalable vector llvm.get.vector.length for now.
8095 //
8096 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
8097 // (vscale * VF). The vscale and VF are independent of element width. We use
8098 // SEW=8 for the vsetvli because it is the only element width that supports all
8099 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8100 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8101 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8102 // SEW and LMUL are better for the surrounding vector instructions.
8103 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
8104                                     const RISCVSubtarget &Subtarget) {
8105   MVT XLenVT = Subtarget.getXLenVT();
8106
8107   // The smallest LMUL is only valid for the smallest element width.
8108   const unsigned ElementWidth = 8;
8109
8110   // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8111   unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8112   // We don't support VF==1 with ELEN==32.
8113   unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen();
8114
8115   unsigned VF = N->getConstantOperandVal(2);
8116   assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8117          "Unexpected VF");
8118   (void)MinVF;
8119
8120   bool Fractional = VF < LMul1VF;
8121   unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8122   unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8123   unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8124
8125   SDLoc DL(N);
8126
8127   SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8128   SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8129
8130   SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8131
8132   SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8133   SDValue Res =
8134       DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8135   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8136 }
8137
8138 // LMUL * VLEN should be greater than or equal to EGS * SEW
8139 static inline bool isValidEGW(int EGS, EVT VT,
8140                               const RISCVSubtarget &Subtarget) {
8141   return (Subtarget.getRealMinVLen() *
8142              VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
8143          EGS * VT.getScalarSizeInBits();
8144 }
8145
8146 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8147                                                      SelectionDAG &DAG) const {
8148   unsigned IntNo = Op.getConstantOperandVal(0);
8149   SDLoc DL(Op);
8150   MVT XLenVT = Subtarget.getXLenVT();
8151
8152   switch (IntNo) {
8153   default:
8154     break; // Don't custom lower most intrinsics.
8155   case Intrinsic::thread_pointer: {
8156     EVT PtrVT = getPointerTy(DAG.getDataLayout());
8157     return DAG.getRegister(RISCV::X4, PtrVT);
8158   }
8159   case Intrinsic::riscv_orc_b:
8160   case Intrinsic::riscv_brev8:
8161   case Intrinsic::riscv_sha256sig0:
8162   case Intrinsic::riscv_sha256sig1:
8163   case Intrinsic::riscv_sha256sum0:
8164   case Intrinsic::riscv_sha256sum1:
8165   case Intrinsic::riscv_sm3p0:
8166   case Intrinsic::riscv_sm3p1: {
8167     unsigned Opc;
8168     switch (IntNo) {
8169     case Intrinsic::riscv_orc_b:      Opc = RISCVISD::ORC_B;      break;
8170     case Intrinsic::riscv_brev8:      Opc = RISCVISD::BREV8;      break;
8171     case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8172     case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8173     case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8174     case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8175     case Intrinsic::riscv_sm3p0:      Opc = RISCVISD::SM3P0;      break;
8176     case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;
8177     }
8178
8179     if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8180       SDValue NewOp =
8181           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8182       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8183       return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8184     }
8185
8186     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8187   }
8188   case Intrinsic::riscv_sm4ks:
8189   case Intrinsic::riscv_sm4ed: {
8190     unsigned Opc =
8191         IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8192
8193     if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8194       SDValue NewOp0 =
8195           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8196       SDValue NewOp1 =
8197           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8198       SDValue Res =
8199           DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8200       return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8201     }
8202
8203     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8204                        Op.getOperand(3));
8205   }
8206   case Intrinsic::riscv_zip:
8207   case Intrinsic::riscv_unzip: {
8208     unsigned Opc =
8209         IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8210     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8211   }
8212   case Intrinsic::riscv_clmul:
8213     if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8214       SDValue NewOp0 =
8215           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8216       SDValue NewOp1 =
8217           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8218       SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8219       return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8220     }
8221     return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8222                        Op.getOperand(2));
8223   case Intrinsic::riscv_clmulh:
8224   case Intrinsic::riscv_clmulr: {
8225     unsigned Opc =
8226         IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8227     if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8228       SDValue NewOp0 =
8229           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8230       SDValue NewOp1 =
8231           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8232       NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8233                            DAG.getConstant(32, DL, MVT::i64));
8234       NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8235                            DAG.getConstant(32, DL, MVT::i64));
8236       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8237       Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8238                         DAG.getConstant(32, DL, MVT::i64));
8239       return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8240     }
8241
8242     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8243   }
8244   case Intrinsic::experimental_get_vector_length:
8245     return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8246   case Intrinsic::riscv_vmv_x_s: {
8247     SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8248     return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8249   }
8250   case Intrinsic::riscv_vfmv_f_s:
8251     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8252                        Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
8253   case Intrinsic::riscv_vmv_v_x:
8254     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8255                             Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8256                             Subtarget);
8257   case Intrinsic::riscv_vfmv_v_f:
8258     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8259                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8260   case Intrinsic::riscv_vmv_s_x: {
8261     SDValue Scalar = Op.getOperand(2);
8262
8263     if (Scalar.getValueType().bitsLE(XLenVT)) {
8264       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8265       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8266                          Op.getOperand(1), Scalar, Op.getOperand(3));
8267     }
8268
8269     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8270
8271     // This is an i64 value that lives in two scalar registers. We have to
8272     // insert this in a convoluted way. First we build vXi64 splat containing
8273     // the two values that we assemble using some bit math. Next we'll use
8274     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8275     // to merge element 0 from our splat into the source vector.
8276     // FIXME: This is probably not the best way to do this, but it is
8277     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8278     // point.
8279     //   sw lo, (a0)
8280     //   sw hi, 4(a0)
8281     //   vlse vX, (a0)
8282     //
8283     //   vid.v      vVid
8284     //   vmseq.vx   mMask, vVid, 0
8285     //   vmerge.vvm vDest, vSrc, vVal, mMask
8286     MVT VT = Op.getSimpleValueType();
8287     SDValue Vec = Op.getOperand(1);
8288     SDValue VL = getVLOperand(Op);
8289
8290     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8291     if (Op.getOperand(1).isUndef())
8292       return SplattedVal;
8293     SDValue SplattedIdx =
8294         DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8295                     DAG.getConstant(0, DL, MVT::i32), VL);
8296
8297     MVT MaskVT = getMaskTypeFor(VT);
8298     SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
8299     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
8300     SDValue SelectCond =
8301         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
8302                     {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
8303                      DAG.getUNDEF(MaskVT), Mask, VL});
8304     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
8305                        Vec, VL);
8306   }
8307   // EGS * EEW >= 128 bits
8308   case Intrinsic::riscv_vaesdf_vv:
8309   case Intrinsic::riscv_vaesdf_vs:
8310   case Intrinsic::riscv_vaesdm_vv:
8311   case Intrinsic::riscv_vaesdm_vs:
8312   case Intrinsic::riscv_vaesef_vv:
8313   case Intrinsic::riscv_vaesef_vs:
8314   case Intrinsic::riscv_vaesem_vv:
8315   case Intrinsic::riscv_vaesem_vs:
8316   case Intrinsic::riscv_vaeskf1:
8317   case Intrinsic::riscv_vaeskf2:
8318   case Intrinsic::riscv_vaesz_vs:
8319   case Intrinsic::riscv_vsm4k:
8320   case Intrinsic::riscv_vsm4r_vv:
8321   case Intrinsic::riscv_vsm4r_vs: {
8322     if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8323         !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8324         !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8325       report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8326     return Op;
8327   }
8328   // EGS * EEW >= 256 bits
8329   case Intrinsic::riscv_vsm3c:
8330   case Intrinsic::riscv_vsm3me: {
8331     if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
8332         !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
8333       report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
8334     return Op;
8335   }
8336   // zvknha(SEW=32)/zvknhb(SEW=[32|64])
8337   case Intrinsic::riscv_vsha2ch:
8338   case Intrinsic::riscv_vsha2cl:
8339   case Intrinsic::riscv_vsha2ms: {
8340     if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
8341         !Subtarget.hasStdExtZvknhb())
8342       report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
8343     if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8344         !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8345         !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8346       report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8347     return Op;
8348   }
8349   case Intrinsic::riscv_sf_vc_v_x:
8350   case Intrinsic::riscv_sf_vc_v_i:
8351   case Intrinsic::riscv_sf_vc_v_xv:
8352   case Intrinsic::riscv_sf_vc_v_iv:
8353   case Intrinsic::riscv_sf_vc_v_vv:
8354   case Intrinsic::riscv_sf_vc_v_fv:
8355   case Intrinsic::riscv_sf_vc_v_xvv:
8356   case Intrinsic::riscv_sf_vc_v_ivv:
8357   case Intrinsic::riscv_sf_vc_v_vvv:
8358   case Intrinsic::riscv_sf_vc_v_fvv:
8359   case Intrinsic::riscv_sf_vc_v_xvw:
8360   case Intrinsic::riscv_sf_vc_v_ivw:
8361   case Intrinsic::riscv_sf_vc_v_vvw:
8362   case Intrinsic::riscv_sf_vc_v_fvw: {
8363     MVT VT = Op.getSimpleValueType();
8364
8365     if (!VT.isFixedLengthVector())
8366       break;
8367
8368     SmallVector<SDValue, 6> Ops;
8369     for (const SDValue &V : Op->op_values()) {
8370       // Skip non-fixed vector operands.
8371       if (!V.getValueType().isFixedLengthVector()) {
8372         Ops.push_back(V);
8373         continue;
8374       }
8375
8376       MVT OpContainerVT =
8377           getContainerForFixedLengthVector(V.getSimpleValueType());
8378       Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
8379     }
8380
8381     MVT RetContainerVT = getContainerForFixedLengthVector(VT);
8382     SDValue Scalable =
8383         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetContainerVT, Ops);
8384     return convertFromScalableVector(VT, Scalable, DAG, Subtarget);
8385   }
8386   }
8387
8388   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8389 }
8390
8391 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
8392                                                     SelectionDAG &DAG) const {
8393   unsigned IntNo = Op.getConstantOperandVal(1);
8394   switch (IntNo) {
8395   default:
8396     break;
8397   case Intrinsic::riscv_masked_strided_load: {
8398     SDLoc DL(Op);
8399     MVT XLenVT = Subtarget.getXLenVT();
8400
8401     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8402     // the selection of the masked intrinsics doesn't do this for us.
8403     SDValue Mask = Op.getOperand(5);
8404     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8405
8406     MVT VT = Op->getSimpleValueType(0);
8407     MVT ContainerVT = VT;
8408     if (VT.isFixedLengthVector())
8409       ContainerVT = getContainerForFixedLengthVector(VT);
8410
8411     SDValue PassThru = Op.getOperand(2);
8412     if (!IsUnmasked) {
8413       MVT MaskVT = getMaskTypeFor(ContainerVT);
8414       if (VT.isFixedLengthVector()) {
8415         Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8416         PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
8417       }
8418     }
8419
8420     auto *Load = cast<MemIntrinsicSDNode>(Op);
8421     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8422     SDValue Ptr = Op.getOperand(3);
8423     SDValue Stride = Op.getOperand(4);
8424     SDValue Result, Chain;
8425
8426     // TODO: We restrict this to unmasked loads currently in consideration of
8427     // the complexity of hanlding all falses masks.
8428     if (IsUnmasked && isNullConstant(Stride)) {
8429       MVT ScalarVT = ContainerVT.getVectorElementType();
8430       SDValue ScalarLoad =
8431           DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
8432                          ScalarVT, Load->getMemOperand());
8433       Chain = ScalarLoad.getValue(1);
8434       Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
8435                                 Subtarget);
8436     } else {
8437       SDValue IntID = DAG.getTargetConstant(
8438           IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
8439           XLenVT);
8440
8441       SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
8442       if (IsUnmasked)
8443         Ops.push_back(DAG.getUNDEF(ContainerVT));
8444       else
8445         Ops.push_back(PassThru);
8446       Ops.push_back(Ptr);
8447       Ops.push_back(Stride);
8448       if (!IsUnmasked)
8449         Ops.push_back(Mask);
8450       Ops.push_back(VL);
8451       if (!IsUnmasked) {
8452         SDValue Policy =
8453             DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
8454         Ops.push_back(Policy);
8455       }
8456
8457       SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8458       Result =
8459           DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8460                                   Load->getMemoryVT(), Load->getMemOperand());
8461       Chain = Result.getValue(1);
8462     }
8463     if (VT.isFixedLengthVector())
8464       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8465     return DAG.getMergeValues({Result, Chain}, DL);
8466   }
8467   case Intrinsic::riscv_seg2_load:
8468   case Intrinsic::riscv_seg3_load:
8469   case Intrinsic::riscv_seg4_load:
8470   case Intrinsic::riscv_seg5_load:
8471   case Intrinsic::riscv_seg6_load:
8472   case Intrinsic::riscv_seg7_load:
8473   case Intrinsic::riscv_seg8_load: {
8474     SDLoc DL(Op);
8475     static const Intrinsic::ID VlsegInts[7] = {
8476         Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
8477         Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
8478         Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
8479         Intrinsic::riscv_vlseg8};
8480     unsigned NF = Op->getNumValues() - 1;
8481     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
8482     MVT XLenVT = Subtarget.getXLenVT();
8483     MVT VT = Op->getSimpleValueType(0);
8484     MVT ContainerVT = getContainerForFixedLengthVector(VT);
8485
8486     SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8487     SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
8488     auto *Load = cast<MemIntrinsicSDNode>(Op);
8489     SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
8490     ContainerVTs.push_back(MVT::Other);
8491     SDVTList VTs = DAG.getVTList(ContainerVTs);
8492     SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
8493     Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
8494     Ops.push_back(Op.getOperand(2));
8495     Ops.push_back(VL);
8496     SDValue Result =
8497         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8498                                 Load->getMemoryVT(), Load->getMemOperand());
8499     SmallVector<SDValue, 9> Results;
8500     for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
8501       Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
8502                                                   DAG, Subtarget));
8503     Results.push_back(Result.getValue(NF));
8504     return DAG.getMergeValues(Results, DL);
8505   }
8506   case Intrinsic::riscv_sf_vc_v_x_se:
8507   case Intrinsic::riscv_sf_vc_v_i_se:
8508   case Intrinsic::riscv_sf_vc_v_xv_se:
8509   case Intrinsic::riscv_sf_vc_v_iv_se:
8510   case Intrinsic::riscv_sf_vc_v_vv_se:
8511   case Intrinsic::riscv_sf_vc_v_fv_se:
8512   case Intrinsic::riscv_sf_vc_v_xvv_se:
8513   case Intrinsic::riscv_sf_vc_v_ivv_se:
8514   case Intrinsic::riscv_sf_vc_v_vvv_se:
8515   case Intrinsic::riscv_sf_vc_v_fvv_se:
8516   case Intrinsic::riscv_sf_vc_v_xvw_se:
8517   case Intrinsic::riscv_sf_vc_v_ivw_se:
8518   case Intrinsic::riscv_sf_vc_v_vvw_se:
8519   case Intrinsic::riscv_sf_vc_v_fvw_se: {
8520     MVT VT = Op.getSimpleValueType();
8521
8522     if (!VT.isFixedLengthVector())
8523       break;
8524
8525     SmallVector<SDValue, 6> Ops;
8526     for (const SDValue &V : Op->op_values()) {
8527       // Skip non-fixed vector operands.
8528       if (!V.getValueType().isFixedLengthVector()) {
8529         Ops.push_back(V);
8530         continue;
8531       }
8532
8533       MVT OpContainerVT =
8534           getContainerForFixedLengthVector(V.getSimpleValueType());
8535       Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
8536     }
8537
8538     SDLoc DL(Op);
8539     MVT RetContainerVT = getContainerForFixedLengthVector(VT);
8540     SDVTList VTs = DAG.getVTList({RetContainerVT, MVT::Other});
8541     SDValue ScalableVector = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
8542     SDValue FixedVector =
8543         convertFromScalableVector(VT, ScalableVector, DAG, Subtarget);
8544     return DAG.getMergeValues({FixedVector, ScalableVector.getValue(1)}, DL);
8545   }
8546   }
8547
8548   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8549 }
8550
8551 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8552                                                  SelectionDAG &DAG) const {
8553   unsigned IntNo = Op.getConstantOperandVal(1);
8554   switch (IntNo) {
8555   default:
8556     break;
8557   case Intrinsic::riscv_masked_strided_store: {
8558     SDLoc DL(Op);
8559     MVT XLenVT = Subtarget.getXLenVT();
8560
8561     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
8562     // the selection of the masked intrinsics doesn't do this for us.
8563     SDValue Mask = Op.getOperand(5);
8564     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8565
8566     SDValue Val = Op.getOperand(2);
8567     MVT VT = Val.getSimpleValueType();
8568     MVT ContainerVT = VT;
8569     if (VT.isFixedLengthVector()) {
8570       ContainerVT = getContainerForFixedLengthVector(VT);
8571       Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
8572     }
8573     if (!IsUnmasked) {
8574       MVT MaskVT = getMaskTypeFor(ContainerVT);
8575       if (VT.isFixedLengthVector())
8576         Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8577     }
8578
8579     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8580
8581     SDValue IntID = DAG.getTargetConstant(
8582         IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
8583         XLenVT);
8584
8585     auto *Store = cast<MemIntrinsicSDNode>(Op);
8586     SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
8587     Ops.push_back(Val);
8588     Ops.push_back(Op.getOperand(3)); // Ptr
8589     Ops.push_back(Op.getOperand(4)); // Stride
8590     if (!IsUnmasked)
8591       Ops.push_back(Mask);
8592     Ops.push_back(VL);
8593
8594     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
8595                                    Ops, Store->getMemoryVT(),
8596                                    Store->getMemOperand());
8597   }
8598   case Intrinsic::riscv_seg2_store:
8599   case Intrinsic::riscv_seg3_store:
8600   case Intrinsic::riscv_seg4_store:
8601   case Intrinsic::riscv_seg5_store:
8602   case Intrinsic::riscv_seg6_store:
8603   case Intrinsic::riscv_seg7_store:
8604   case Intrinsic::riscv_seg8_store: {
8605     SDLoc DL(Op);
8606     static const Intrinsic::ID VssegInts[] = {
8607         Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
8608         Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
8609         Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
8610         Intrinsic::riscv_vsseg8};
8611     // Operands are (chain, int_id, vec*, ptr, vl)
8612     unsigned NF = Op->getNumOperands() - 4;
8613     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
8614     MVT XLenVT = Subtarget.getXLenVT();
8615     MVT VT = Op->getOperand(2).getSimpleValueType();
8616     MVT ContainerVT = getContainerForFixedLengthVector(VT);
8617
8618     SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8619     SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
8620     SDValue Ptr = Op->getOperand(NF + 2);
8621
8622     auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
8623     SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
8624     for (unsigned i = 0; i < NF; i++)
8625       Ops.push_back(convertToScalableVector(
8626           ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
8627     Ops.append({Ptr, VL});
8628
8629     return DAG.getMemIntrinsicNode(
8630         ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
8631         FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
8632   }
8633   case Intrinsic::riscv_sf_vc_x_se_e8mf8:
8634   case Intrinsic::riscv_sf_vc_x_se_e8mf4:
8635   case Intrinsic::riscv_sf_vc_x_se_e8mf2:
8636   case Intrinsic::riscv_sf_vc_x_se_e8m1:
8637   case Intrinsic::riscv_sf_vc_x_se_e8m2:
8638   case Intrinsic::riscv_sf_vc_x_se_e8m4:
8639   case Intrinsic::riscv_sf_vc_x_se_e8m8:
8640   case Intrinsic::riscv_sf_vc_x_se_e16mf4:
8641   case Intrinsic::riscv_sf_vc_x_se_e16mf2:
8642   case Intrinsic::riscv_sf_vc_x_se_e16m1:
8643   case Intrinsic::riscv_sf_vc_x_se_e16m2:
8644   case Intrinsic::riscv_sf_vc_x_se_e16m4:
8645   case Intrinsic::riscv_sf_vc_x_se_e16m8:
8646   case Intrinsic::riscv_sf_vc_x_se_e32mf2:
8647   case Intrinsic::riscv_sf_vc_x_se_e32m1:
8648   case Intrinsic::riscv_sf_vc_x_se_e32m2:
8649   case Intrinsic::riscv_sf_vc_x_se_e32m4:
8650   case Intrinsic::riscv_sf_vc_x_se_e32m8:
8651   case Intrinsic::riscv_sf_vc_x_se_e64m1:
8652   case Intrinsic::riscv_sf_vc_x_se_e64m2:
8653   case Intrinsic::riscv_sf_vc_x_se_e64m4:
8654   case Intrinsic::riscv_sf_vc_x_se_e64m8:
8655   case Intrinsic::riscv_sf_vc_i_se_e8mf8:
8656   case Intrinsic::riscv_sf_vc_i_se_e8mf4:
8657   case Intrinsic::riscv_sf_vc_i_se_e8mf2:
8658   case Intrinsic::riscv_sf_vc_i_se_e8m1:
8659   case Intrinsic::riscv_sf_vc_i_se_e8m2:
8660   case Intrinsic::riscv_sf_vc_i_se_e8m4:
8661   case Intrinsic::riscv_sf_vc_i_se_e8m8:
8662   case Intrinsic::riscv_sf_vc_i_se_e16mf4:
8663   case Intrinsic::riscv_sf_vc_i_se_e16mf2:
8664   case Intrinsic::riscv_sf_vc_i_se_e16m1:
8665   case Intrinsic::riscv_sf_vc_i_se_e16m2:
8666   case Intrinsic::riscv_sf_vc_i_se_e16m4:
8667   case Intrinsic::riscv_sf_vc_i_se_e16m8:
8668   case Intrinsic::riscv_sf_vc_i_se_e32mf2:
8669   case Intrinsic::riscv_sf_vc_i_se_e32m1:
8670   case Intrinsic::riscv_sf_vc_i_se_e32m2:
8671   case Intrinsic::riscv_sf_vc_i_se_e32m4:
8672   case Intrinsic::riscv_sf_vc_i_se_e32m8:
8673   case Intrinsic::riscv_sf_vc_i_se_e64m1:
8674   case Intrinsic::riscv_sf_vc_i_se_e64m2:
8675   case Intrinsic::riscv_sf_vc_i_se_e64m4:
8676   case Intrinsic::riscv_sf_vc_i_se_e64m8:
8677   case Intrinsic::riscv_sf_vc_xv_se:
8678   case Intrinsic::riscv_sf_vc_iv_se:
8679   case Intrinsic::riscv_sf_vc_vv_se:
8680   case Intrinsic::riscv_sf_vc_fv_se:
8681   case Intrinsic::riscv_sf_vc_xvv_se:
8682   case Intrinsic::riscv_sf_vc_ivv_se:
8683   case Intrinsic::riscv_sf_vc_vvv_se:
8684   case Intrinsic::riscv_sf_vc_fvv_se:
8685   case Intrinsic::riscv_sf_vc_xvw_se:
8686   case Intrinsic::riscv_sf_vc_ivw_se:
8687   case Intrinsic::riscv_sf_vc_vvw_se:
8688   case Intrinsic::riscv_sf_vc_fvw_se: {
8689     if (!llvm::any_of(Op->op_values(), [&](const SDValue &V) {
8690           return V.getValueType().isFixedLengthVector();
8691         }))
8692       break;
8693
8694     SmallVector<SDValue, 6> Ops;
8695     for (const SDValue &V : Op->op_values()) {
8696       // Skip non-fixed vector operands.
8697       if (!V.getValueType().isFixedLengthVector()) {
8698         Ops.push_back(V);
8699         continue;
8700       }
8701
8702       MVT OpContainerVT =
8703           getContainerForFixedLengthVector(V.getSimpleValueType());
8704       Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
8705     }
8706
8707     return DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
8708   }
8709   }
8710
8711   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
8712 }
8713
8714 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
8715   switch (ISDOpcode) {
8716   default:
8717     llvm_unreachable("Unhandled reduction");
8718   case ISD::VP_REDUCE_ADD:
8719   case ISD::VECREDUCE_ADD:
8720     return RISCVISD::VECREDUCE_ADD_VL;
8721   case ISD::VP_REDUCE_UMAX:
8722   case ISD::VECREDUCE_UMAX:
8723     return RISCVISD::VECREDUCE_UMAX_VL;
8724   case ISD::VP_REDUCE_SMAX:
8725   case ISD::VECREDUCE_SMAX:
8726     return RISCVISD::VECREDUCE_SMAX_VL;
8727   case ISD::VP_REDUCE_UMIN:
8728   case ISD::VECREDUCE_UMIN:
8729     return RISCVISD::VECREDUCE_UMIN_VL;
8730   case ISD::VP_REDUCE_SMIN:
8731   case ISD::VECREDUCE_SMIN:
8732     return RISCVISD::VECREDUCE_SMIN_VL;
8733   case ISD::VP_REDUCE_AND:
8734   case ISD::VECREDUCE_AND:
8735     return RISCVISD::VECREDUCE_AND_VL;
8736   case ISD::VP_REDUCE_OR:
8737   case ISD::VECREDUCE_OR:
8738     return RISCVISD::VECREDUCE_OR_VL;
8739   case ISD::VP_REDUCE_XOR:
8740   case ISD::VECREDUCE_XOR:
8741     return RISCVISD::VECREDUCE_XOR_VL;
8742   case ISD::VP_REDUCE_FADD:
8743     return RISCVISD::VECREDUCE_FADD_VL;
8744   case ISD::VP_REDUCE_SEQ_FADD:
8745     return RISCVISD::VECREDUCE_SEQ_FADD_VL;
8746   case ISD::VP_REDUCE_FMAX:
8747     return RISCVISD::VECREDUCE_FMAX_VL;
8748   case ISD::VP_REDUCE_FMIN:
8749     return RISCVISD::VECREDUCE_FMIN_VL;
8750   }
8751
8752 }
8753
8754 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
8755                                                          SelectionDAG &DAG,
8756                                                          bool IsVP) const {
8757   SDLoc DL(Op);
8758   SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
8759   MVT VecVT = Vec.getSimpleValueType();
8760   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
8761           Op.getOpcode() == ISD::VECREDUCE_OR ||
8762           Op.getOpcode() == ISD::VECREDUCE_XOR ||
8763           Op.getOpcode() == ISD::VP_REDUCE_AND ||
8764           Op.getOpcode() == ISD::VP_REDUCE_OR ||
8765           Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
8766          "Unexpected reduction lowering");
8767
8768   MVT XLenVT = Subtarget.getXLenVT();
8769
8770   MVT ContainerVT = VecVT;
8771   if (VecVT.isFixedLengthVector()) {
8772     ContainerVT = getContainerForFixedLengthVector(VecVT);
8773     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8774   }
8775
8776   SDValue Mask, VL;
8777   if (IsVP) {
8778     Mask = Op.getOperand(2);
8779     VL = Op.getOperand(3);
8780   } else {
8781     std::tie(Mask, VL) =
8782         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8783   }
8784
8785   unsigned BaseOpc;
8786   ISD::CondCode CC;
8787   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8788
8789   switch (Op.getOpcode()) {
8790   default:
8791     llvm_unreachable("Unhandled reduction");
8792   case ISD::VECREDUCE_AND:
8793   case ISD::VP_REDUCE_AND: {
8794     // vcpop ~x == 0
8795     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
8796     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
8797     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8798     CC = ISD::SETEQ;
8799     BaseOpc = ISD::AND;
8800     break;
8801   }
8802   case ISD::VECREDUCE_OR:
8803   case ISD::VP_REDUCE_OR:
8804     // vcpop x != 0
8805     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8806     CC = ISD::SETNE;
8807     BaseOpc = ISD::OR;
8808     break;
8809   case ISD::VECREDUCE_XOR:
8810   case ISD::VP_REDUCE_XOR: {
8811     // ((vcpop x) & 1) != 0
8812     SDValue One = DAG.getConstant(1, DL, XLenVT);
8813     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
8814     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
8815     CC = ISD::SETNE;
8816     BaseOpc = ISD::XOR;
8817     break;
8818   }
8819   }
8820
8821   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
8822   SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
8823
8824   if (!IsVP)
8825     return SetCC;
8826
8827   // Now include the start value in the operation.
8828   // Note that we must return the start value when no elements are operated
8829   // upon. The vcpop instructions we've emitted in each case above will return
8830   // 0 for an inactive vector, and so we've already received the neutral value:
8831   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
8832   // can simply include the start value.
8833   return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
8834 }
8835
8836 static bool isNonZeroAVL(SDValue AVL) {
8837   auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
8838   auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
8839   return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
8840          (ImmAVL && ImmAVL->getZExtValue() >= 1);
8841 }
8842
8843 /// Helper to lower a reduction sequence of the form:
8844 /// scalar = reduce_op vec, scalar_start
8845 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
8846                                  SDValue StartValue, SDValue Vec, SDValue Mask,
8847                                  SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
8848                                  const RISCVSubtarget &Subtarget) {
8849   const MVT VecVT = Vec.getSimpleValueType();
8850   const MVT M1VT = getLMUL1VT(VecVT);
8851   const MVT XLenVT = Subtarget.getXLenVT();
8852   const bool NonZeroAVL = isNonZeroAVL(VL);
8853
8854   // The reduction needs an LMUL1 input; do the splat at either LMUL1
8855   // or the original VT if fractional.
8856   auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
8857   // We reuse the VL of the reduction to reduce vsetvli toggles if we can
8858   // prove it is non-zero.  For the AVL=0 case, we need the scalar to
8859   // be the result of the reduction operation.
8860   auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
8861   SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
8862                                            DAG, Subtarget);
8863   if (M1VT != InnerVT)
8864     InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
8865                                DAG.getUNDEF(M1VT),
8866                                InitialValue, DAG.getConstant(0, DL, XLenVT));
8867   SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
8868   SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
8869   SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
8870   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
8871   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
8872                      DAG.getConstant(0, DL, XLenVT));
8873 }
8874
8875 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
8876                                             SelectionDAG &DAG) const {
8877   SDLoc DL(Op);
8878   SDValue Vec = Op.getOperand(0);
8879   EVT VecEVT = Vec.getValueType();
8880
8881   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
8882
8883   // Due to ordering in legalize types we may have a vector type that needs to
8884   // be split. Do that manually so we can get down to a legal type.
8885   while (getTypeAction(*DAG.getContext(), VecEVT) ==
8886          TargetLowering::TypeSplitVector) {
8887     auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
8888     VecEVT = Lo.getValueType();
8889     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
8890   }
8891
8892   // TODO: The type may need to be widened rather than split. Or widened before
8893   // it can be split.
8894   if (!isTypeLegal(VecEVT))
8895     return SDValue();
8896
8897   MVT VecVT = VecEVT.getSimpleVT();
8898   MVT VecEltVT = VecVT.getVectorElementType();
8899   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
8900
8901   MVT ContainerVT = VecVT;
8902   if (VecVT.isFixedLengthVector()) {
8903     ContainerVT = getContainerForFixedLengthVector(VecVT);
8904     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8905   }
8906
8907   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8908
8909   SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
8910   switch (BaseOpc) {
8911   case ISD::AND:
8912   case ISD::OR:
8913   case ISD::UMAX:
8914   case ISD::UMIN:
8915   case ISD::SMAX:
8916   case ISD::SMIN:
8917     MVT XLenVT = Subtarget.getXLenVT();
8918     StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
8919                          DAG.getConstant(0, DL, XLenVT));
8920   }
8921   return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
8922                            Mask, VL, DL, DAG, Subtarget);
8923 }
8924
8925 // Given a reduction op, this function returns the matching reduction opcode,
8926 // the vector SDValue and the scalar SDValue required to lower this to a
8927 // RISCVISD node.
8928 static std::tuple<unsigned, SDValue, SDValue>
8929 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
8930                                const RISCVSubtarget &Subtarget) {
8931   SDLoc DL(Op);
8932   auto Flags = Op->getFlags();
8933   unsigned Opcode = Op.getOpcode();
8934   switch (Opcode) {
8935   default:
8936     llvm_unreachable("Unhandled reduction");
8937   case ISD::VECREDUCE_FADD: {
8938     // Use positive zero if we can. It is cheaper to materialize.
8939     SDValue Zero =
8940         DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
8941     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
8942   }
8943   case ISD::VECREDUCE_SEQ_FADD:
8944     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
8945                            Op.getOperand(0));
8946   case ISD::VECREDUCE_FMIN:
8947   case ISD::VECREDUCE_FMAX: {
8948     MVT XLenVT = Subtarget.getXLenVT();
8949     SDValue Front =
8950         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
8951                     DAG.getConstant(0, DL, XLenVT));
8952     unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN)
8953                           ? RISCVISD::VECREDUCE_FMIN_VL
8954                           : RISCVISD::VECREDUCE_FMAX_VL;
8955     return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
8956   }
8957   }
8958 }
8959
8960 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
8961                                               SelectionDAG &DAG) const {
8962   SDLoc DL(Op);
8963   MVT VecEltVT = Op.getSimpleValueType();
8964
8965   unsigned RVVOpcode;
8966   SDValue VectorVal, ScalarVal;
8967   std::tie(RVVOpcode, VectorVal, ScalarVal) =
8968       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
8969   MVT VecVT = VectorVal.getSimpleValueType();
8970
8971   MVT ContainerVT = VecVT;
8972   if (VecVT.isFixedLengthVector()) {
8973     ContainerVT = getContainerForFixedLengthVector(VecVT);
8974     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
8975   }
8976
8977   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8978   return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal,
8979                            VectorVal, Mask, VL, DL, DAG, Subtarget);
8980 }
8981
8982 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
8983                                            SelectionDAG &DAG) const {
8984   SDLoc DL(Op);
8985   SDValue Vec = Op.getOperand(1);
8986   EVT VecEVT = Vec.getValueType();
8987
8988   // TODO: The type may need to be widened rather than split. Or widened before
8989   // it can be split.
8990   if (!isTypeLegal(VecEVT))
8991     return SDValue();
8992
8993   MVT VecVT = VecEVT.getSimpleVT();
8994   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
8995
8996   if (VecVT.isFixedLengthVector()) {
8997     auto ContainerVT = getContainerForFixedLengthVector(VecVT);
8998     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8999   }
9000
9001   SDValue VL = Op.getOperand(3);
9002   SDValue Mask = Op.getOperand(2);
9003   return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9004                            Vec, Mask, VL, DL, DAG, Subtarget);
9005 }
9006
9007 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9008                                                    SelectionDAG &DAG) const {
9009   SDValue Vec = Op.getOperand(0);
9010   SDValue SubVec = Op.getOperand(1);
9011   MVT VecVT = Vec.getSimpleValueType();
9012   MVT SubVecVT = SubVec.getSimpleValueType();
9013
9014   SDLoc DL(Op);
9015   MVT XLenVT = Subtarget.getXLenVT();
9016   unsigned OrigIdx = Op.getConstantOperandVal(2);
9017   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9018
9019   // We don't have the ability to slide mask vectors up indexed by their i1
9020   // elements; the smallest we can do is i8. Often we are able to bitcast to
9021   // equivalent i8 vectors. Note that when inserting a fixed-length vector
9022   // into a scalable one, we might not necessarily have enough scalable
9023   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9024   if (SubVecVT.getVectorElementType() == MVT::i1 &&
9025       (OrigIdx != 0 || !Vec.isUndef())) {
9026     if (VecVT.getVectorMinNumElements() >= 8 &&
9027         SubVecVT.getVectorMinNumElements() >= 8) {
9028       assert(OrigIdx % 8 == 0 && "Invalid index");
9029       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9030              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9031              "Unexpected mask vector lowering");
9032       OrigIdx /= 8;
9033       SubVecVT =
9034           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9035                            SubVecVT.isScalableVector());
9036       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9037                                VecVT.isScalableVector());
9038       Vec = DAG.getBitcast(VecVT, Vec);
9039       SubVec = DAG.getBitcast(SubVecVT, SubVec);
9040     } else {
9041       // We can't slide this mask vector up indexed by its i1 elements.
9042       // This poses a problem when we wish to insert a scalable vector which
9043       // can't be re-expressed as a larger type. Just choose the slow path and
9044       // extend to a larger type, then truncate back down.
9045       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9046       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9047       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9048       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9049       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9050                         Op.getOperand(2));
9051       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9052       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9053     }
9054   }
9055
9056   // If the subvector vector is a fixed-length type, we cannot use subregister
9057   // manipulation to simplify the codegen; we don't know which register of a
9058   // LMUL group contains the specific subvector as we only know the minimum
9059   // register size. Therefore we must slide the vector group up the full
9060   // amount.
9061   if (SubVecVT.isFixedLengthVector()) {
9062     if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9063       return Op;
9064     MVT ContainerVT = VecVT;
9065     if (VecVT.isFixedLengthVector()) {
9066       ContainerVT = getContainerForFixedLengthVector(VecVT);
9067       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9068     }
9069
9070     if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9071       SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9072                            DAG.getUNDEF(ContainerVT), SubVec,
9073                            DAG.getConstant(0, DL, XLenVT));
9074       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9075       return DAG.getBitcast(Op.getValueType(), SubVec);
9076     }
9077
9078     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9079                          DAG.getUNDEF(ContainerVT), SubVec,
9080                          DAG.getConstant(0, DL, XLenVT));
9081     SDValue Mask =
9082         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9083     // Set the vector length to only the number of elements we care about. Note
9084     // that for slideup this includes the offset.
9085     unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9086     SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
9087
9088     // Use tail agnostic policy if we're inserting over Vec's tail.
9089     unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9090     if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9091       Policy = RISCVII::TAIL_AGNOSTIC;
9092
9093     // If we're inserting into the lowest elements, use a tail undisturbed
9094     // vmv.v.v.
9095     if (OrigIdx == 0) {
9096       SubVec =
9097           DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9098     } else {
9099       SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9100       SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9101                            SlideupAmt, Mask, VL, Policy);
9102     }
9103
9104     if (VecVT.isFixedLengthVector())
9105       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9106     return DAG.getBitcast(Op.getValueType(), SubVec);
9107   }
9108
9109   unsigned SubRegIdx, RemIdx;
9110   std::tie(SubRegIdx, RemIdx) =
9111       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9112           VecVT, SubVecVT, OrigIdx, TRI);
9113
9114   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9115   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9116                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9117                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9118
9119   // 1. If the Idx has been completely eliminated and this subvector's size is
9120   // a vector register or a multiple thereof, or the surrounding elements are
9121   // undef, then this is a subvector insert which naturally aligns to a vector
9122   // register. These can easily be handled using subregister manipulation.
9123   // 2. If the subvector is smaller than a vector register, then the insertion
9124   // must preserve the undisturbed elements of the register. We do this by
9125   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9126   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9127   // subvector within the vector register, and an INSERT_SUBVECTOR of that
9128   // LMUL=1 type back into the larger vector (resolving to another subregister
9129   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9130   // to avoid allocating a large register group to hold our subvector.
9131   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9132     return Op;
9133
9134   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9135   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9136   // (in our case undisturbed). This means we can set up a subvector insertion
9137   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9138   // size of the subvector.
9139   MVT InterSubVT = VecVT;
9140   SDValue AlignedExtract = Vec;
9141   unsigned AlignedIdx = OrigIdx - RemIdx;
9142   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9143     InterSubVT = getLMUL1VT(VecVT);
9144     // Extract a subvector equal to the nearest full vector register type. This
9145     // should resolve to a EXTRACT_SUBREG instruction.
9146     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9147                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
9148   }
9149
9150   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9151                        DAG.getUNDEF(InterSubVT), SubVec,
9152                        DAG.getConstant(0, DL, XLenVT));
9153
9154   auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9155
9156   VL = computeVLMax(SubVecVT, DL, DAG);
9157
9158   // If we're inserting into the lowest elements, use a tail undisturbed
9159   // vmv.v.v.
9160   if (RemIdx == 0) {
9161     SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9162                          SubVec, VL);
9163   } else {
9164     SDValue SlideupAmt =
9165         DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9166
9167     // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9168     VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9169
9170     SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9171                          SlideupAmt, Mask, VL);
9172   }
9173
9174   // If required, insert this subvector back into the correct vector register.
9175   // This should resolve to an INSERT_SUBREG instruction.
9176   if (VecVT.bitsGT(InterSubVT))
9177     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9178                          DAG.getConstant(AlignedIdx, DL, XLenVT));
9179
9180   // We might have bitcast from a mask type: cast back to the original type if
9181   // required.
9182   return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9183 }
9184
9185 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9186                                                     SelectionDAG &DAG) const {
9187   SDValue Vec = Op.getOperand(0);
9188   MVT SubVecVT = Op.getSimpleValueType();
9189   MVT VecVT = Vec.getSimpleValueType();
9190
9191   SDLoc DL(Op);
9192   MVT XLenVT = Subtarget.getXLenVT();
9193   unsigned OrigIdx = Op.getConstantOperandVal(1);
9194   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9195
9196   // We don't have the ability to slide mask vectors down indexed by their i1
9197   // elements; the smallest we can do is i8. Often we are able to bitcast to
9198   // equivalent i8 vectors. Note that when extracting a fixed-length vector
9199   // from a scalable one, we might not necessarily have enough scalable
9200   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9201   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9202     if (VecVT.getVectorMinNumElements() >= 8 &&
9203         SubVecVT.getVectorMinNumElements() >= 8) {
9204       assert(OrigIdx % 8 == 0 && "Invalid index");
9205       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9206              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9207              "Unexpected mask vector lowering");
9208       OrigIdx /= 8;
9209       SubVecVT =
9210           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9211                            SubVecVT.isScalableVector());
9212       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9213                                VecVT.isScalableVector());
9214       Vec = DAG.getBitcast(VecVT, Vec);
9215     } else {
9216       // We can't slide this mask vector down, indexed by its i1 elements.
9217       // This poses a problem when we wish to extract a scalable vector which
9218       // can't be re-expressed as a larger type. Just choose the slow path and
9219       // extend to a larger type, then truncate back down.
9220       // TODO: We could probably improve this when extracting certain fixed
9221       // from fixed, where we can extract as i8 and shift the correct element
9222       // right to reach the desired subvector?
9223       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9224       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9225       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9226       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9227                         Op.getOperand(1));
9228       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9229       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9230     }
9231   }
9232
9233   // With an index of 0 this is a cast-like subvector, which can be performed
9234   // with subregister operations.
9235   if (OrigIdx == 0)
9236     return Op;
9237
9238   // If the subvector vector is a fixed-length type, we cannot use subregister
9239   // manipulation to simplify the codegen; we don't know which register of a
9240   // LMUL group contains the specific subvector as we only know the minimum
9241   // register size. Therefore we must slide the vector group down the full
9242   // amount.
9243   if (SubVecVT.isFixedLengthVector()) {
9244     MVT ContainerVT = VecVT;
9245     if (VecVT.isFixedLengthVector()) {
9246       ContainerVT = getContainerForFixedLengthVector(VecVT);
9247       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9248     }
9249
9250     // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9251     unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9252     if (auto ShrunkVT =
9253             getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9254       ContainerVT = *ShrunkVT;
9255       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9256                         DAG.getVectorIdxConstant(0, DL));
9257     }
9258
9259     SDValue Mask =
9260         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9261     // Set the vector length to only the number of elements we care about. This
9262     // avoids sliding down elements we're going to discard straight away.
9263     SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
9264     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9265     SDValue Slidedown =
9266         getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9267                       DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9268     // Now we can use a cast-like subvector extract to get the result.
9269     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9270                             DAG.getConstant(0, DL, XLenVT));
9271     return DAG.getBitcast(Op.getValueType(), Slidedown);
9272   }
9273
9274   unsigned SubRegIdx, RemIdx;
9275   std::tie(SubRegIdx, RemIdx) =
9276       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9277           VecVT, SubVecVT, OrigIdx, TRI);
9278
9279   // If the Idx has been completely eliminated then this is a subvector extract
9280   // which naturally aligns to a vector register. These can easily be handled
9281   // using subregister manipulation.
9282   if (RemIdx == 0)
9283     return Op;
9284
9285   // Else SubVecVT is a fractional LMUL and may need to be slid down.
9286   assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second);
9287
9288   // If the vector type is an LMUL-group type, extract a subvector equal to the
9289   // nearest full vector register type.
9290   MVT InterSubVT = VecVT;
9291   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9292     // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
9293     // we should have successfully decomposed the extract into a subregister.
9294     assert(SubRegIdx != RISCV::NoSubRegister);
9295     InterSubVT = getLMUL1VT(VecVT);
9296     Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
9297   }
9298
9299   // Slide this vector register down by the desired number of elements in order
9300   // to place the desired subvector starting at element 0.
9301   SDValue SlidedownAmt =
9302       DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9303
9304   auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
9305   SDValue Slidedown =
9306       getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
9307                     Vec, SlidedownAmt, Mask, VL);
9308
9309   // Now the vector is in the right position, extract our final subvector. This
9310   // should resolve to a COPY.
9311   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9312                           DAG.getConstant(0, DL, XLenVT));
9313
9314   // We might have bitcast from a mask type: cast back to the original type if
9315   // required.
9316   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
9317 }
9318
9319 // Widen a vector's operands to i8, then truncate its results back to the
9320 // original type, typically i1.  All operand and result types must be the same.
9321 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
9322                                   SelectionDAG &DAG) {
9323   MVT VT = N.getSimpleValueType();
9324   MVT WideVT = VT.changeVectorElementType(MVT::i8);
9325   SmallVector<SDValue, 4> WideOps;
9326   for (SDValue Op : N->ops()) {
9327     assert(Op.getSimpleValueType() == VT &&
9328            "Operands and result must be same type");
9329     WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
9330   }
9331
9332   unsigned NumVals = N->getNumValues();
9333
9334   SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
9335       NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
9336   SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
9337   SmallVector<SDValue, 4> TruncVals;
9338   for (unsigned I = 0; I < NumVals; I++) {
9339     TruncVals.push_back(
9340         DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
9341                      DAG.getConstant(0, DL, WideVT), ISD::SETNE));
9342   }
9343
9344   if (TruncVals.size() > 1)
9345     return DAG.getMergeValues(TruncVals, DL);
9346   return TruncVals.front();
9347 }
9348
9349 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
9350                                                       SelectionDAG &DAG) const {
9351   SDLoc DL(Op);
9352   MVT VecVT = Op.getSimpleValueType();
9353   MVT XLenVT = Subtarget.getXLenVT();
9354
9355   assert(VecVT.isScalableVector() &&
9356          "vector_interleave on non-scalable vector!");
9357
9358   // 1 bit element vectors need to be widened to e8
9359   if (VecVT.getVectorElementType() == MVT::i1)
9360     return widenVectorOpsToi8(Op, DL, DAG);
9361
9362   // If the VT is LMUL=8, we need to split and reassemble.
9363   if (VecVT.getSizeInBits().getKnownMinValue() ==
9364       (8 * RISCV::RVVBitsPerBlock)) {
9365     auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9366     auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
9367     EVT SplitVT = Op0Lo.getValueType();
9368
9369     SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
9370                                 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
9371     SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
9372                                 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
9373
9374     SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9375                                ResLo.getValue(0), ResHi.getValue(0));
9376     SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
9377                               ResHi.getValue(1));
9378     return DAG.getMergeValues({Even, Odd}, DL);
9379   }
9380
9381   // Concatenate the two vectors as one vector to deinterleave
9382   MVT ConcatVT =
9383       MVT::getVectorVT(VecVT.getVectorElementType(),
9384                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
9385   SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
9386                                Op.getOperand(0), Op.getOperand(1));
9387
9388   // We want to operate on all lanes, so get the mask and VL and mask for it
9389   auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
9390   SDValue Passthru = DAG.getUNDEF(ConcatVT);
9391
9392   // We can deinterleave through vnsrl.wi if the element type is smaller than
9393   // ELEN
9394   if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
9395     SDValue Even =
9396         getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
9397     SDValue Odd =
9398         getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
9399     return DAG.getMergeValues({Even, Odd}, DL);
9400   }
9401
9402   // For the indices, use the same SEW to avoid an extra vsetvli
9403   MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
9404   // Create a vector of even indices {0, 2, 4, ...}
9405   SDValue EvenIdx =
9406       DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
9407   // Create a vector of odd indices {1, 3, 5, ... }
9408   SDValue OddIdx =
9409       DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
9410
9411   // Gather the even and odd elements into two separate vectors
9412   SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
9413                                  Concat, EvenIdx, Passthru, Mask, VL);
9414   SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
9415                                 Concat, OddIdx, Passthru, Mask, VL);
9416
9417   // Extract the result half of the gather for even and odd
9418   SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
9419                              DAG.getConstant(0, DL, XLenVT));
9420   SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
9421                             DAG.getConstant(0, DL, XLenVT));
9422
9423   return DAG.getMergeValues({Even, Odd}, DL);
9424 }
9425
9426 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
9427                                                     SelectionDAG &DAG) const {
9428   SDLoc DL(Op);
9429   MVT VecVT = Op.getSimpleValueType();
9430
9431   assert(VecVT.isScalableVector() &&
9432          "vector_interleave on non-scalable vector!");
9433
9434   // i1 vectors need to be widened to i8
9435   if (VecVT.getVectorElementType() == MVT::i1)
9436     return widenVectorOpsToi8(Op, DL, DAG);
9437
9438   MVT XLenVT = Subtarget.getXLenVT();
9439   SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
9440
9441   // If the VT is LMUL=8, we need to split and reassemble.
9442   if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
9443     auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9444     auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
9445     EVT SplitVT = Op0Lo.getValueType();
9446
9447     SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
9448                                 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
9449     SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
9450                                 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
9451
9452     SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9453                              ResLo.getValue(0), ResLo.getValue(1));
9454     SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
9455                              ResHi.getValue(0), ResHi.getValue(1));
9456     return DAG.getMergeValues({Lo, Hi}, DL);
9457   }
9458
9459   SDValue Interleaved;
9460
9461   // If the element type is smaller than ELEN, then we can interleave with
9462   // vwaddu.vv and vwmaccu.vx
9463   if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
9464     Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
9465                                         DAG, Subtarget);
9466   } else {
9467     // Otherwise, fallback to using vrgathere16.vv
9468     MVT ConcatVT =
9469       MVT::getVectorVT(VecVT.getVectorElementType(),
9470                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
9471     SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
9472                                  Op.getOperand(0), Op.getOperand(1));
9473
9474     MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
9475
9476     // 0 1 2 3 4 5 6 7 ...
9477     SDValue StepVec = DAG.getStepVector(DL, IdxVT);
9478
9479     // 1 1 1 1 1 1 1 1 ...
9480     SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
9481
9482     // 1 0 1 0 1 0 1 0 ...
9483     SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
9484     OddMask = DAG.getSetCC(
9485         DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
9486         DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
9487         ISD::CondCode::SETNE);
9488
9489     SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
9490
9491     // Build up the index vector for interleaving the concatenated vector
9492     //      0      0      1      1      2      2      3      3 ...
9493     SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
9494     //      0      n      1    n+1      2    n+2      3    n+3 ...
9495     Idx =
9496         DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
9497
9498     // Then perform the interleave
9499     //   v[0]   v[n]   v[1] v[n+1]   v[2] v[n+2]   v[3] v[n+3] ...
9500     SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
9501     Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
9502                               Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
9503   }
9504
9505   // Extract the two halves from the interleaved result
9506   SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
9507                            DAG.getVectorIdxConstant(0, DL));
9508   SDValue Hi = DAG.getNode(
9509       ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
9510       DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));
9511
9512   return DAG.getMergeValues({Lo, Hi}, DL);
9513 }
9514
9515 // Lower step_vector to the vid instruction. Any non-identity step value must
9516 // be accounted for my manual expansion.
9517 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
9518                                               SelectionDAG &DAG) const {
9519   SDLoc DL(Op);
9520   MVT VT = Op.getSimpleValueType();
9521   assert(VT.isScalableVector() && "Expected scalable vector");
9522   MVT XLenVT = Subtarget.getXLenVT();
9523   auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
9524   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9525   uint64_t StepValImm = Op.getConstantOperandVal(0);
9526   if (StepValImm != 1) {
9527     if (isPowerOf2_64(StepValImm)) {
9528       SDValue StepVal =
9529           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9530                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
9531       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
9532     } else {
9533       SDValue StepVal = lowerScalarSplat(
9534           SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
9535           VL, VT, DL, DAG, Subtarget);
9536       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
9537     }
9538   }
9539   return StepVec;
9540 }
9541
9542 // Implement vector_reverse using vrgather.vv with indices determined by
9543 // subtracting the id of each element from (VLMAX-1). This will convert
9544 // the indices like so:
9545 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
9546 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
9547 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
9548                                                  SelectionDAG &DAG) const {
9549   SDLoc DL(Op);
9550   MVT VecVT = Op.getSimpleValueType();
9551   if (VecVT.getVectorElementType() == MVT::i1) {
9552     MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9553     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
9554     SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
9555     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
9556   }
9557   unsigned EltSize = VecVT.getScalarSizeInBits();
9558   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
9559   unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
9560   unsigned MaxVLMAX =
9561     RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
9562
9563   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
9564   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
9565
9566   // If this is SEW=8 and VLMAX is potentially more than 256, we need
9567   // to use vrgatherei16.vv.
9568   // TODO: It's also possible to use vrgatherei16.vv for other types to
9569   // decrease register width for the index calculation.
9570   if (MaxVLMAX > 256 && EltSize == 8) {
9571     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
9572     // Reverse each half, then reassemble them in reverse order.
9573     // NOTE: It's also possible that after splitting that VLMAX no longer
9574     // requires vrgatherei16.vv.
9575     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
9576       auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
9577       auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
9578       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
9579       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
9580       // Reassemble the low and high pieces reversed.
9581       // FIXME: This is a CONCAT_VECTORS.
9582       SDValue Res =
9583           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
9584                       DAG.getIntPtrConstant(0, DL));
9585       return DAG.getNode(
9586           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
9587           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
9588     }
9589
9590     // Just promote the int type to i16 which will double the LMUL.
9591     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
9592     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
9593   }
9594
9595   MVT XLenVT = Subtarget.getXLenVT();
9596   auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9597
9598   // Calculate VLMAX-1 for the desired SEW.
9599   SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
9600                                  computeVLMax(VecVT, DL, DAG),
9601                                  DAG.getConstant(1, DL, XLenVT));
9602
9603   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
9604   bool IsRV32E64 =
9605       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
9606   SDValue SplatVL;
9607   if (!IsRV32E64)
9608     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
9609   else
9610     SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
9611                           VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
9612
9613   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
9614   SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
9615                                 DAG.getUNDEF(IntVT), Mask, VL);
9616
9617   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
9618                      DAG.getUNDEF(VecVT), Mask, VL);
9619 }
9620
9621 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
9622                                                 SelectionDAG &DAG) const {
9623   SDLoc DL(Op);
9624   SDValue V1 = Op.getOperand(0);
9625   SDValue V2 = Op.getOperand(1);
9626   MVT XLenVT = Subtarget.getXLenVT();
9627   MVT VecVT = Op.getSimpleValueType();
9628
9629   SDValue VLMax = computeVLMax(VecVT, DL, DAG);
9630
9631   int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
9632   SDValue DownOffset, UpOffset;
9633   if (ImmValue >= 0) {
9634     // The operand is a TargetConstant, we need to rebuild it as a regular
9635     // constant.
9636     DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
9637     UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
9638   } else {
9639     // The operand is a TargetConstant, we need to rebuild it as a regular
9640     // constant rather than negating the original operand.
9641     UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
9642     DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
9643   }
9644
9645   SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
9646
9647   SDValue SlideDown =
9648       getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
9649                     DownOffset, TrueMask, UpOffset);
9650   return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
9651                      TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
9652                      RISCVII::TAIL_AGNOSTIC);
9653 }
9654
9655 SDValue
9656 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
9657                                                      SelectionDAG &DAG) const {
9658   SDLoc DL(Op);
9659   auto *Load = cast<LoadSDNode>(Op);
9660
9661   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
9662                                         Load->getMemoryVT(),
9663                                         *Load->getMemOperand()) &&
9664          "Expecting a correctly-aligned load");
9665
9666   MVT VT = Op.getSimpleValueType();
9667   MVT XLenVT = Subtarget.getXLenVT();
9668   MVT ContainerVT = getContainerForFixedLengthVector(VT);
9669
9670   SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
9671
9672   bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
9673   SDValue IntID = DAG.getTargetConstant(
9674       IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
9675   SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
9676   if (!IsMaskOp)
9677     Ops.push_back(DAG.getUNDEF(ContainerVT));
9678   Ops.push_back(Load->getBasePtr());
9679   Ops.push_back(VL);
9680   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9681   SDValue NewLoad =
9682       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
9683                               Load->getMemoryVT(), Load->getMemOperand());
9684
9685   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
9686   return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
9687 }
9688
9689 SDValue
9690 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
9691                                                       SelectionDAG &DAG) const {
9692   SDLoc DL(Op);
9693   auto *Store = cast<StoreSDNode>(Op);
9694
9695   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
9696                                         Store->getMemoryVT(),
9697                                         *Store->getMemOperand()) &&
9698          "Expecting a correctly-aligned store");
9699
9700   SDValue StoreVal = Store->getValue();
9701   MVT VT = StoreVal.getSimpleValueType();
9702   MVT XLenVT = Subtarget.getXLenVT();
9703
9704   // If the size less than a byte, we need to pad with zeros to make a byte.
9705   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
9706     VT = MVT::v8i1;
9707     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
9708                            DAG.getConstant(0, DL, VT), StoreVal,
9709                            DAG.getIntPtrConstant(0, DL));
9710   }
9711
9712   MVT ContainerVT = getContainerForFixedLengthVector(VT);
9713
9714   SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
9715
9716   SDValue NewValue =
9717       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
9718
9719   bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
9720   SDValue IntID = DAG.getTargetConstant(
9721       IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
9722   return DAG.getMemIntrinsicNode(
9723       ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
9724       {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
9725       Store->getMemoryVT(), Store->getMemOperand());
9726 }
9727
9728 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
9729                                              SelectionDAG &DAG) const {
9730   SDLoc DL(Op);
9731   MVT VT = Op.getSimpleValueType();
9732
9733   const auto *MemSD = cast<MemSDNode>(Op);
9734   EVT MemVT = MemSD->getMemoryVT();
9735   MachineMemOperand *MMO = MemSD->getMemOperand();
9736   SDValue Chain = MemSD->getChain();
9737   SDValue BasePtr = MemSD->getBasePtr();
9738
9739   SDValue Mask, PassThru, VL;
9740   if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
9741     Mask = VPLoad->getMask();
9742     PassThru = DAG.getUNDEF(VT);
9743     VL = VPLoad->getVectorLength();
9744   } else {
9745     const auto *MLoad = cast<MaskedLoadSDNode>(Op);
9746     Mask = MLoad->getMask();
9747     PassThru = MLoad->getPassThru();
9748   }
9749
9750   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9751
9752   MVT XLenVT = Subtarget.getXLenVT();
9753
9754   MVT ContainerVT = VT;
9755   if (VT.isFixedLengthVector()) {
9756     ContainerVT = getContainerForFixedLengthVector(VT);
9757     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9758     if (!IsUnmasked) {
9759       MVT MaskVT = getMaskTypeFor(ContainerVT);
9760       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9761     }
9762   }
9763
9764   if (!VL)
9765     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9766
9767   unsigned IntID =
9768       IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
9769   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9770   if (IsUnmasked)
9771     Ops.push_back(DAG.getUNDEF(ContainerVT));
9772   else
9773     Ops.push_back(PassThru);
9774   Ops.push_back(BasePtr);
9775   if (!IsUnmasked)
9776     Ops.push_back(Mask);
9777   Ops.push_back(VL);
9778   if (!IsUnmasked)
9779     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
9780
9781   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9782
9783   SDValue Result =
9784       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
9785   Chain = Result.getValue(1);
9786
9787   if (VT.isFixedLengthVector())
9788     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9789
9790   return DAG.getMergeValues({Result, Chain}, DL);
9791 }
9792
9793 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
9794                                               SelectionDAG &DAG) const {
9795   SDLoc DL(Op);
9796
9797   const auto *MemSD = cast<MemSDNode>(Op);
9798   EVT MemVT = MemSD->getMemoryVT();
9799   MachineMemOperand *MMO = MemSD->getMemOperand();
9800   SDValue Chain = MemSD->getChain();
9801   SDValue BasePtr = MemSD->getBasePtr();
9802   SDValue Val, Mask, VL;
9803
9804   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
9805     Val = VPStore->getValue();
9806     Mask = VPStore->getMask();
9807     VL = VPStore->getVectorLength();
9808   } else {
9809     const auto *MStore = cast<MaskedStoreSDNode>(Op);
9810     Val = MStore->getValue();
9811     Mask = MStore->getMask();
9812   }
9813
9814   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9815
9816   MVT VT = Val.getSimpleValueType();
9817   MVT XLenVT = Subtarget.getXLenVT();
9818
9819   MVT ContainerVT = VT;
9820   if (VT.isFixedLengthVector()) {
9821     ContainerVT = getContainerForFixedLengthVector(VT);
9822
9823     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9824     if (!IsUnmasked) {
9825       MVT MaskVT = getMaskTypeFor(ContainerVT);
9826       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9827     }
9828   }
9829
9830   if (!VL)
9831     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9832
9833   unsigned IntID =
9834       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
9835   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9836   Ops.push_back(Val);
9837   Ops.push_back(BasePtr);
9838   if (!IsUnmasked)
9839     Ops.push_back(Mask);
9840   Ops.push_back(VL);
9841
9842   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
9843                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
9844 }
9845
9846 SDValue
9847 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
9848                                                       SelectionDAG &DAG) const {
9849   MVT InVT = Op.getOperand(0).getSimpleValueType();
9850   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
9851
9852   MVT VT = Op.getSimpleValueType();
9853
9854   SDValue Op1 =
9855       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
9856   SDValue Op2 =
9857       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
9858
9859   SDLoc DL(Op);
9860   auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
9861                                     DAG, Subtarget);
9862   MVT MaskVT = getMaskTypeFor(ContainerVT);
9863
9864   SDValue Cmp =
9865       DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9866                   {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
9867
9868   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
9869 }
9870
9871 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
9872                                                      SelectionDAG &DAG) const {
9873   unsigned Opc = Op.getOpcode();
9874   SDLoc DL(Op);
9875   SDValue Chain = Op.getOperand(0);
9876   SDValue Op1 = Op.getOperand(1);
9877   SDValue Op2 = Op.getOperand(2);
9878   SDValue CC = Op.getOperand(3);
9879   ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
9880   MVT VT = Op.getSimpleValueType();
9881   MVT InVT = Op1.getSimpleValueType();
9882
9883   // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
9884   // condition code.
9885   if (Opc == ISD::STRICT_FSETCCS) {
9886     // Expand strict_fsetccs(x, oeq) to
9887     // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
9888     SDVTList VTList = Op->getVTList();
9889     if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
9890       SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
9891       SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
9892                                  Op2, OLECCVal);
9893       SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
9894                                  Op1, OLECCVal);
9895       SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
9896                                      Tmp1.getValue(1), Tmp2.getValue(1));
9897       // Tmp1 and Tmp2 might be the same node.
9898       if (Tmp1 != Tmp2)
9899         Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
9900       return DAG.getMergeValues({Tmp1, OutChain}, DL);
9901     }
9902
9903     // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
9904     if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
9905       SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
9906       SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
9907                                 Op2, OEQCCVal);
9908       SDValue Res = DAG.getNOT(DL, OEQ, VT);
9909       return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
9910     }
9911   }
9912
9913   MVT ContainerInVT = InVT;
9914   if (InVT.isFixedLengthVector()) {
9915     ContainerInVT = getContainerForFixedLengthVector(InVT);
9916     Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
9917     Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
9918   }
9919   MVT MaskVT = getMaskTypeFor(ContainerInVT);
9920
9921   auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
9922
9923   SDValue Res;
9924   if (Opc == ISD::STRICT_FSETCC &&
9925       (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
9926        CCVal == ISD::SETOLE)) {
9927     // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
9928     // active when both input elements are ordered.
9929     SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
9930     SDValue OrderMask1 = DAG.getNode(
9931         RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
9932         {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
9933          True, VL});
9934     SDValue OrderMask2 = DAG.getNode(
9935         RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
9936         {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
9937          True, VL});
9938     Mask =
9939         DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
9940     // Use Mask as the merge operand to let the result be 0 if either of the
9941     // inputs is unordered.
9942     Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
9943                       DAG.getVTList(MaskVT, MVT::Other),
9944                       {Chain, Op1, Op2, CC, Mask, Mask, VL});
9945   } else {
9946     unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
9947                                                 : RISCVISD::STRICT_FSETCCS_VL;
9948     Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
9949                       {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
9950   }
9951
9952   if (VT.isFixedLengthVector()) {
9953     SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
9954     return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
9955   }
9956   return Res;
9957 }
9958
9959 // Lower vector ABS to smax(X, sub(0, X)).
9960 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
9961   SDLoc DL(Op);
9962   MVT VT = Op.getSimpleValueType();
9963   SDValue X = Op.getOperand(0);
9964
9965   assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
9966          "Unexpected type for ISD::ABS");
9967
9968   MVT ContainerVT = VT;
9969   if (VT.isFixedLengthVector()) {
9970     ContainerVT = getContainerForFixedLengthVector(VT);
9971     X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
9972   }
9973
9974   SDValue Mask, VL;
9975   if (Op->getOpcode() == ISD::VP_ABS) {
9976     Mask = Op->getOperand(1);
9977     if (VT.isFixedLengthVector())
9978       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
9979                                      Subtarget);
9980     VL = Op->getOperand(2);
9981   } else
9982     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
9983
9984   SDValue SplatZero = DAG.getNode(
9985       RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
9986       DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
9987   SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
9988                              DAG.getUNDEF(ContainerVT), Mask, VL);
9989   SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
9990                             DAG.getUNDEF(ContainerVT), Mask, VL);
9991
9992   if (VT.isFixedLengthVector())
9993     Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
9994   return Max;
9995 }
9996
9997 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
9998     SDValue Op, SelectionDAG &DAG) const {
9999   SDLoc DL(Op);
10000   MVT VT = Op.getSimpleValueType();
10001   SDValue Mag = Op.getOperand(0);
10002   SDValue Sign = Op.getOperand(1);
10003   assert(Mag.getValueType() == Sign.getValueType() &&
10004          "Can only handle COPYSIGN with matching types.");
10005
10006   MVT ContainerVT = getContainerForFixedLengthVector(VT);
10007   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10008   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10009
10010   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10011
10012   SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10013                                  Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10014
10015   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10016 }
10017
10018 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10019     SDValue Op, SelectionDAG &DAG) const {
10020   MVT VT = Op.getSimpleValueType();
10021   MVT ContainerVT = getContainerForFixedLengthVector(VT);
10022
10023   MVT I1ContainerVT =
10024       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10025
10026   SDValue CC =
10027       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10028   SDValue Op1 =
10029       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10030   SDValue Op2 =
10031       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10032
10033   SDLoc DL(Op);
10034   SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10035
10036   SDValue Select =
10037       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
10038
10039   return convertFromScalableVector(VT, Select, DAG, Subtarget);
10040 }
10041
10042 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10043                                                SelectionDAG &DAG) const {
10044   unsigned NewOpc = getRISCVVLOp(Op);
10045   bool HasMergeOp = hasMergeOp(NewOpc);
10046   bool HasMask = hasMaskOp(NewOpc);
10047
10048   MVT VT = Op.getSimpleValueType();
10049   MVT ContainerVT = getContainerForFixedLengthVector(VT);
10050
10051   // Create list of operands by converting existing ones to scalable types.
10052   SmallVector<SDValue, 6> Ops;
10053   for (const SDValue &V : Op->op_values()) {
10054     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10055
10056     // Pass through non-vector operands.
10057     if (!V.getValueType().isVector()) {
10058       Ops.push_back(V);
10059       continue;
10060     }
10061
10062     // "cast" fixed length vector to a scalable vector.
10063     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10064            "Only fixed length vectors are supported!");
10065     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10066   }
10067
10068   SDLoc DL(Op);
10069   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10070   if (HasMergeOp)
10071     Ops.push_back(DAG.getUNDEF(ContainerVT));
10072   if (HasMask)
10073     Ops.push_back(Mask);
10074   Ops.push_back(VL);
10075
10076   // StrictFP operations have two result values. Their lowered result should
10077   // have same result count.
10078   if (Op->isStrictFPOpcode()) {
10079     SDValue ScalableRes =
10080         DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10081                     Op->getFlags());
10082     SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10083     return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10084   }
10085
10086   SDValue ScalableRes =
10087       DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10088   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10089 }
10090
10091 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10092 // * Operands of each node are assumed to be in the same order.
10093 // * The EVL operand is promoted from i32 to i64 on RV64.
10094 // * Fixed-length vectors are converted to their scalable-vector container
10095 //   types.
10096 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10097   unsigned RISCVISDOpc = getRISCVVLOp(Op);
10098   bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10099
10100   SDLoc DL(Op);
10101   MVT VT = Op.getSimpleValueType();
10102   SmallVector<SDValue, 4> Ops;
10103
10104   MVT ContainerVT = VT;
10105   if (VT.isFixedLengthVector())
10106     ContainerVT = getContainerForFixedLengthVector(VT);
10107
10108   for (const auto &OpIdx : enumerate(Op->ops())) {
10109     SDValue V = OpIdx.value();
10110     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10111     // Add dummy merge value before the mask.
10112     if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index())
10113       Ops.push_back(DAG.getUNDEF(ContainerVT));
10114     // Pass through operands which aren't fixed-length vectors.
10115     if (!V.getValueType().isFixedLengthVector()) {
10116       Ops.push_back(V);
10117       continue;
10118     }
10119     // "cast" fixed length vector to a scalable vector.
10120     MVT OpVT = V.getSimpleValueType();
10121     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10122     assert(useRVVForFixedLengthVectorVT(OpVT) &&
10123            "Only fixed length vectors are supported!");
10124     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10125   }
10126
10127   if (!VT.isFixedLengthVector())
10128     return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10129
10130   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10131
10132   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10133 }
10134
10135 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10136                                               SelectionDAG &DAG) const {
10137   SDLoc DL(Op);
10138   MVT VT = Op.getSimpleValueType();
10139
10140   SDValue Src = Op.getOperand(0);
10141   // NOTE: Mask is dropped.
10142   SDValue VL = Op.getOperand(2);
10143
10144   MVT ContainerVT = VT;
10145   if (VT.isFixedLengthVector()) {
10146     ContainerVT = getContainerForFixedLengthVector(VT);
10147     MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10148     Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10149   }
10150
10151   MVT XLenVT = Subtarget.getXLenVT();
10152   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10153   SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10154                                   DAG.getUNDEF(ContainerVT), Zero, VL);
10155
10156   SDValue SplatValue = DAG.getConstant(
10157       Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10158   SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10159                               DAG.getUNDEF(ContainerVT), SplatValue, VL);
10160
10161   SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
10162                                Splat, ZeroSplat, VL);
10163   if (!VT.isFixedLengthVector())
10164     return Result;
10165   return convertFromScalableVector(VT, Result, DAG, Subtarget);
10166 }
10167
10168 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10169                                                 SelectionDAG &DAG) const {
10170   SDLoc DL(Op);
10171   MVT VT = Op.getSimpleValueType();
10172
10173   SDValue Op1 = Op.getOperand(0);
10174   SDValue Op2 = Op.getOperand(1);
10175   ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10176   // NOTE: Mask is dropped.
10177   SDValue VL = Op.getOperand(4);
10178
10179   MVT ContainerVT = VT;
10180   if (VT.isFixedLengthVector()) {
10181     ContainerVT = getContainerForFixedLengthVector(VT);
10182     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10183     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10184   }
10185
10186   SDValue Result;
10187   SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10188
10189   switch (Condition) {
10190   default:
10191     break;
10192   // X != Y  --> (X^Y)
10193   case ISD::SETNE:
10194     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10195     break;
10196   // X == Y  --> ~(X^Y)
10197   case ISD::SETEQ: {
10198     SDValue Temp =
10199         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10200     Result =
10201         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
10202     break;
10203   }
10204   // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
10205   // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
10206   case ISD::SETGT:
10207   case ISD::SETULT: {
10208     SDValue Temp =
10209         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10210     Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
10211     break;
10212   }
10213   // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
10214   // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
10215   case ISD::SETLT:
10216   case ISD::SETUGT: {
10217     SDValue Temp =
10218         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10219     Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
10220     break;
10221   }
10222   // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
10223   // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
10224   case ISD::SETGE:
10225   case ISD::SETULE: {
10226     SDValue Temp =
10227         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10228     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
10229     break;
10230   }
10231   // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
10232   // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
10233   case ISD::SETLE:
10234   case ISD::SETUGE: {
10235     SDValue Temp =
10236         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10237     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
10238     break;
10239   }
10240   }
10241
10242   if (!VT.isFixedLengthVector())
10243     return Result;
10244   return convertFromScalableVector(VT, Result, DAG, Subtarget);
10245 }
10246
10247 // Lower Floating-Point/Integer Type-Convert VP SDNodes
10248 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
10249                                                 SelectionDAG &DAG) const {
10250   SDLoc DL(Op);
10251
10252   SDValue Src = Op.getOperand(0);
10253   SDValue Mask = Op.getOperand(1);
10254   SDValue VL = Op.getOperand(2);
10255   unsigned RISCVISDOpc = getRISCVVLOp(Op);
10256
10257   MVT DstVT = Op.getSimpleValueType();
10258   MVT SrcVT = Src.getSimpleValueType();
10259   if (DstVT.isFixedLengthVector()) {
10260     DstVT = getContainerForFixedLengthVector(DstVT);
10261     SrcVT = getContainerForFixedLengthVector(SrcVT);
10262     Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10263     MVT MaskVT = getMaskTypeFor(DstVT);
10264     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10265   }
10266
10267   unsigned DstEltSize = DstVT.getScalarSizeInBits();
10268   unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
10269
10270   SDValue Result;
10271   if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
10272     if (SrcVT.isInteger()) {
10273       assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
10274
10275       unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
10276                                     ? RISCVISD::VSEXT_VL
10277                                     : RISCVISD::VZEXT_VL;
10278
10279       // Do we need to do any pre-widening before converting?
10280       if (SrcEltSize == 1) {
10281         MVT IntVT = DstVT.changeVectorElementTypeToInteger();
10282         MVT XLenVT = Subtarget.getXLenVT();
10283         SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10284         SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
10285                                         DAG.getUNDEF(IntVT), Zero, VL);
10286         SDValue One = DAG.getConstant(
10287             RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
10288         SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
10289                                        DAG.getUNDEF(IntVT), One, VL);
10290         Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
10291                           ZeroSplat, VL);
10292       } else if (DstEltSize > (2 * SrcEltSize)) {
10293         // Widen before converting.
10294         MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
10295                                      DstVT.getVectorElementCount());
10296         Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
10297       }
10298
10299       Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
10300     } else {
10301       assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
10302              "Wrong input/output vector types");
10303
10304       // Convert f16 to f32 then convert f32 to i64.
10305       if (DstEltSize > (2 * SrcEltSize)) {
10306         assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
10307         MVT InterimFVT =
10308             MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
10309         Src =
10310             DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
10311       }
10312
10313       Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
10314     }
10315   } else { // Narrowing + Conversion
10316     if (SrcVT.isInteger()) {
10317       assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
10318       // First do a narrowing convert to an FP type half the size, then round
10319       // the FP type to a small FP type if needed.
10320
10321       MVT InterimFVT = DstVT;
10322       if (SrcEltSize > (2 * DstEltSize)) {
10323         assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
10324         assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
10325         InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
10326       }
10327
10328       Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
10329
10330       if (InterimFVT != DstVT) {
10331         Src = Result;
10332         Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
10333       }
10334     } else {
10335       assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
10336              "Wrong input/output vector types");
10337       // First do a narrowing conversion to an integer half the size, then
10338       // truncate if needed.
10339
10340       if (DstEltSize == 1) {
10341         // First convert to the same size integer, then convert to mask using
10342         // setcc.
10343         assert(SrcEltSize >= 16 && "Unexpected FP type!");
10344         MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
10345                                           DstVT.getVectorElementCount());
10346         Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
10347
10348         // Compare the integer result to 0. The integer should be 0 or 1/-1,
10349         // otherwise the conversion was undefined.
10350         MVT XLenVT = Subtarget.getXLenVT();
10351         SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
10352         SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
10353                                 DAG.getUNDEF(InterimIVT), SplatZero, VL);
10354         Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
10355                              {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
10356                               DAG.getUNDEF(DstVT), Mask, VL});
10357       } else {
10358         MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
10359                                           DstVT.getVectorElementCount());
10360
10361         Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
10362
10363         while (InterimIVT != DstVT) {
10364           SrcEltSize /= 2;
10365           Src = Result;
10366           InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
10367                                         DstVT.getVectorElementCount());
10368           Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
10369                                Src, Mask, VL);
10370         }
10371       }
10372     }
10373   }
10374
10375   MVT VT = Op.getSimpleValueType();
10376   if (!VT.isFixedLengthVector())
10377     return Result;
10378   return convertFromScalableVector(VT, Result, DAG, Subtarget);
10379 }
10380
10381 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
10382                                             SelectionDAG &DAG) const {
10383   MVT VT = Op.getSimpleValueType();
10384   if (VT.getVectorElementType() != MVT::i1)
10385     return lowerVPOp(Op, DAG);
10386
10387   // It is safe to drop mask parameter as masked-off elements are undef.
10388   SDValue Op1 = Op->getOperand(0);
10389   SDValue Op2 = Op->getOperand(1);
10390   SDValue VL = Op->getOperand(3);
10391
10392   MVT ContainerVT = VT;
10393   const bool IsFixed = VT.isFixedLengthVector();
10394   if (IsFixed) {
10395     ContainerVT = getContainerForFixedLengthVector(VT);
10396     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10397     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10398   }
10399
10400   SDLoc DL(Op);
10401   SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
10402   if (!IsFixed)
10403     return Val;
10404   return convertFromScalableVector(VT, Val, DAG, Subtarget);
10405 }
10406
10407 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
10408                                                 SelectionDAG &DAG) const {
10409   SDLoc DL(Op);
10410   MVT XLenVT = Subtarget.getXLenVT();
10411   MVT VT = Op.getSimpleValueType();
10412   MVT ContainerVT = VT;
10413   if (VT.isFixedLengthVector())
10414     ContainerVT = getContainerForFixedLengthVector(VT);
10415
10416   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10417
10418   auto *VPNode = cast<VPStridedLoadSDNode>(Op);
10419   // Check if the mask is known to be all ones
10420   SDValue Mask = VPNode->getMask();
10421   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10422
10423   SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
10424                                                    : Intrinsic::riscv_vlse_mask,
10425                                         DL, XLenVT);
10426   SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
10427                               DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
10428                               VPNode->getStride()};
10429   if (!IsUnmasked) {
10430     if (VT.isFixedLengthVector()) {
10431       MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
10432       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10433     }
10434     Ops.push_back(Mask);
10435   }
10436   Ops.push_back(VPNode->getVectorLength());
10437   if (!IsUnmasked) {
10438     SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10439     Ops.push_back(Policy);
10440   }
10441
10442   SDValue Result =
10443       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
10444                               VPNode->getMemoryVT(), VPNode->getMemOperand());
10445   SDValue Chain = Result.getValue(1);
10446
10447   if (VT.isFixedLengthVector())
10448     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10449
10450   return DAG.getMergeValues({Result, Chain}, DL);
10451 }
10452
10453 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
10454                                                  SelectionDAG &DAG) const {
10455   SDLoc DL(Op);
10456   MVT XLenVT = Subtarget.getXLenVT();
10457
10458   auto *VPNode = cast<VPStridedStoreSDNode>(Op);
10459   SDValue StoreVal = VPNode->getValue();
10460   MVT VT = StoreVal.getSimpleValueType();
10461   MVT ContainerVT = VT;
10462   if (VT.isFixedLengthVector()) {
10463     ContainerVT = getContainerForFixedLengthVector(VT);
10464     StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10465   }
10466
10467   // Check if the mask is known to be all ones
10468   SDValue Mask = VPNode->getMask();
10469   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10470
10471   SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
10472                                                    : Intrinsic::riscv_vsse_mask,
10473                                         DL, XLenVT);
10474   SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
10475                               VPNode->getBasePtr(), VPNode->getStride()};
10476   if (!IsUnmasked) {
10477     if (VT.isFixedLengthVector()) {
10478       MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
10479       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10480     }
10481     Ops.push_back(Mask);
10482   }
10483   Ops.push_back(VPNode->getVectorLength());
10484
10485   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
10486                                  Ops, VPNode->getMemoryVT(),
10487                                  VPNode->getMemOperand());
10488 }
10489
10490 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
10491 // matched to a RVV indexed load. The RVV indexed load instructions only
10492 // support the "unsigned unscaled" addressing mode; indices are implicitly
10493 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
10494 // signed or scaled indexing is extended to the XLEN value type and scaled
10495 // accordingly.
10496 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
10497                                                SelectionDAG &DAG) const {
10498   SDLoc DL(Op);
10499   MVT VT = Op.getSimpleValueType();
10500
10501   const auto *MemSD = cast<MemSDNode>(Op.getNode());
10502   EVT MemVT = MemSD->getMemoryVT();
10503   MachineMemOperand *MMO = MemSD->getMemOperand();
10504   SDValue Chain = MemSD->getChain();
10505   SDValue BasePtr = MemSD->getBasePtr();
10506
10507   ISD::LoadExtType LoadExtType;
10508   SDValue Index, Mask, PassThru, VL;
10509
10510   if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
10511     Index = VPGN->getIndex();
10512     Mask = VPGN->getMask();
10513     PassThru = DAG.getUNDEF(VT);
10514     VL = VPGN->getVectorLength();
10515     // VP doesn't support extending loads.
10516     LoadExtType = ISD::NON_EXTLOAD;
10517   } else {
10518     // Else it must be a MGATHER.
10519     auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
10520     Index = MGN->getIndex();
10521     Mask = MGN->getMask();
10522     PassThru = MGN->getPassThru();
10523     LoadExtType = MGN->getExtensionType();
10524   }
10525
10526   MVT IndexVT = Index.getSimpleValueType();
10527   MVT XLenVT = Subtarget.getXLenVT();
10528
10529   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
10530          "Unexpected VTs!");
10531   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
10532   // Targets have to explicitly opt-in for extending vector loads.
10533   assert(LoadExtType == ISD::NON_EXTLOAD &&
10534          "Unexpected extending MGATHER/VP_GATHER");
10535   (void)LoadExtType;
10536
10537   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
10538   // the selection of the masked intrinsics doesn't do this for us.
10539   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10540
10541   MVT ContainerVT = VT;
10542   if (VT.isFixedLengthVector()) {
10543     ContainerVT = getContainerForFixedLengthVector(VT);
10544     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
10545                                ContainerVT.getVectorElementCount());
10546
10547     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
10548
10549     if (!IsUnmasked) {
10550       MVT MaskVT = getMaskTypeFor(ContainerVT);
10551       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10552       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10553     }
10554   }
10555
10556   if (!VL)
10557     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10558
10559   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
10560     IndexVT = IndexVT.changeVectorElementType(XLenVT);
10561     Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
10562   }
10563
10564   unsigned IntID =
10565       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
10566   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10567   if (IsUnmasked)
10568     Ops.push_back(DAG.getUNDEF(ContainerVT));
10569   else
10570     Ops.push_back(PassThru);
10571   Ops.push_back(BasePtr);
10572   Ops.push_back(Index);
10573   if (!IsUnmasked)
10574     Ops.push_back(Mask);
10575   Ops.push_back(VL);
10576   if (!IsUnmasked)
10577     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
10578
10579   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10580   SDValue Result =
10581       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10582   Chain = Result.getValue(1);
10583
10584   if (VT.isFixedLengthVector())
10585     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10586
10587   return DAG.getMergeValues({Result, Chain}, DL);
10588 }
10589
10590 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
10591 // matched to a RVV indexed store. The RVV indexed store instructions only
10592 // support the "unsigned unscaled" addressing mode; indices are implicitly
10593 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
10594 // signed or scaled indexing is extended to the XLEN value type and scaled
10595 // accordingly.
10596 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
10597                                                 SelectionDAG &DAG) const {
10598   SDLoc DL(Op);
10599   const auto *MemSD = cast<MemSDNode>(Op.getNode());
10600   EVT MemVT = MemSD->getMemoryVT();
10601   MachineMemOperand *MMO = MemSD->getMemOperand();
10602   SDValue Chain = MemSD->getChain();
10603   SDValue BasePtr = MemSD->getBasePtr();
10604
10605   bool IsTruncatingStore = false;
10606   SDValue Index, Mask, Val, VL;
10607
10608   if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
10609     Index = VPSN->getIndex();
10610     Mask = VPSN->getMask();
10611     Val = VPSN->getValue();
10612     VL = VPSN->getVectorLength();
10613     // VP doesn't support truncating stores.
10614     IsTruncatingStore = false;
10615   } else {
10616     // Else it must be a MSCATTER.
10617     auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
10618     Index = MSN->getIndex();
10619     Mask = MSN->getMask();
10620     Val = MSN->getValue();
10621     IsTruncatingStore = MSN->isTruncatingStore();
10622   }
10623
10624   MVT VT = Val.getSimpleValueType();
10625   MVT IndexVT = Index.getSimpleValueType();
10626   MVT XLenVT = Subtarget.getXLenVT();
10627
10628   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
10629          "Unexpected VTs!");
10630   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
10631   // Targets have to explicitly opt-in for extending vector loads and
10632   // truncating vector stores.
10633   assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
10634   (void)IsTruncatingStore;
10635
10636   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
10637   // the selection of the masked intrinsics doesn't do this for us.
10638   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10639
10640   MVT ContainerVT = VT;
10641   if (VT.isFixedLengthVector()) {
10642     ContainerVT = getContainerForFixedLengthVector(VT);
10643     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
10644                                ContainerVT.getVectorElementCount());
10645
10646     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
10647     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10648
10649     if (!IsUnmasked) {
10650       MVT MaskVT = getMaskTypeFor(ContainerVT);
10651       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10652     }
10653   }
10654
10655   if (!VL)
10656     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10657
10658   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
10659     IndexVT = IndexVT.changeVectorElementType(XLenVT);
10660     Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
10661   }
10662
10663   unsigned IntID =
10664       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
10665   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10666   Ops.push_back(Val);
10667   Ops.push_back(BasePtr);
10668   Ops.push_back(Index);
10669   if (!IsUnmasked)
10670     Ops.push_back(Mask);
10671   Ops.push_back(VL);
10672
10673   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
10674                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10675 }
10676
10677 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
10678                                                SelectionDAG &DAG) const {
10679   const MVT XLenVT = Subtarget.getXLenVT();
10680   SDLoc DL(Op);
10681   SDValue Chain = Op->getOperand(0);
10682   SDValue SysRegNo = DAG.getTargetConstant(
10683       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
10684   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
10685   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
10686
10687   // Encoding used for rounding mode in RISC-V differs from that used in
10688   // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
10689   // table, which consists of a sequence of 4-bit fields, each representing
10690   // corresponding FLT_ROUNDS mode.
10691   static const int Table =
10692       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
10693       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
10694       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
10695       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
10696       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
10697
10698   SDValue Shift =
10699       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
10700   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
10701                                 DAG.getConstant(Table, DL, XLenVT), Shift);
10702   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
10703                                DAG.getConstant(7, DL, XLenVT));
10704
10705   return DAG.getMergeValues({Masked, Chain}, DL);
10706 }
10707
10708 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
10709                                                SelectionDAG &DAG) const {
10710   const MVT XLenVT = Subtarget.getXLenVT();
10711   SDLoc DL(Op);
10712   SDValue Chain = Op->getOperand(0);
10713   SDValue RMValue = Op->getOperand(1);
10714   SDValue SysRegNo = DAG.getTargetConstant(
10715       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
10716
10717   // Encoding used for rounding mode in RISC-V differs from that used in
10718   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
10719   // a table, which consists of a sequence of 4-bit fields, each representing
10720   // corresponding RISC-V mode.
10721   static const unsigned Table =
10722       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
10723       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
10724       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
10725       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
10726       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
10727
10728   RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
10729
10730   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
10731                               DAG.getConstant(2, DL, XLenVT));
10732   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
10733                                 DAG.getConstant(Table, DL, XLenVT), Shift);
10734   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
10735                         DAG.getConstant(0x7, DL, XLenVT));
10736   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
10737                      RMValue);
10738 }
10739
10740 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
10741                                                SelectionDAG &DAG) const {
10742   MachineFunction &MF = DAG.getMachineFunction();
10743
10744   bool isRISCV64 = Subtarget.is64Bit();
10745   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10746
10747   int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
10748   return DAG.getFrameIndex(FI, PtrVT);
10749 }
10750
10751 // Returns the opcode of the target-specific SDNode that implements the 32-bit
10752 // form of the given Opcode.
10753 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
10754   switch (Opcode) {
10755   default:
10756     llvm_unreachable("Unexpected opcode");
10757   case ISD::SHL:
10758     return RISCVISD::SLLW;
10759   case ISD::SRA:
10760     return RISCVISD::SRAW;
10761   case ISD::SRL:
10762     return RISCVISD::SRLW;
10763   case ISD::SDIV:
10764     return RISCVISD::DIVW;
10765   case ISD::UDIV:
10766     return RISCVISD::DIVUW;
10767   case ISD::UREM:
10768     return RISCVISD::REMUW;
10769   case ISD::ROTL:
10770     return RISCVISD::ROLW;
10771   case ISD::ROTR:
10772     return RISCVISD::RORW;
10773   }
10774 }
10775
10776 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
10777 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
10778 // otherwise be promoted to i64, making it difficult to select the
10779 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
10780 // type i8/i16/i32 is lost.
10781 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
10782                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
10783   SDLoc DL(N);
10784   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
10785   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
10786   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
10787   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
10788   // ReplaceNodeResults requires we maintain the same type for the return value.
10789   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
10790 }
10791
10792 // Converts the given 32-bit operation to a i64 operation with signed extension
10793 // semantic to reduce the signed extension instructions.
10794 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
10795   SDLoc DL(N);
10796   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
10797   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10798   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
10799   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
10800                                DAG.getValueType(MVT::i32));
10801   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
10802 }
10803
10804 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
10805                                              SmallVectorImpl<SDValue> &Results,
10806                                              SelectionDAG &DAG) const {
10807   SDLoc DL(N);
10808   switch (N->getOpcode()) {
10809   default:
10810     llvm_unreachable("Don't know how to custom type legalize this operation!");
10811   case ISD::STRICT_FP_TO_SINT:
10812   case ISD::STRICT_FP_TO_UINT:
10813   case ISD::FP_TO_SINT:
10814   case ISD::FP_TO_UINT: {
10815     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
10816            "Unexpected custom legalisation");
10817     bool IsStrict = N->isStrictFPOpcode();
10818     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
10819                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;
10820     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
10821     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
10822         TargetLowering::TypeSoftenFloat) {
10823       if (!isTypeLegal(Op0.getValueType()))
10824         return;
10825       if (IsStrict) {
10826         SDValue Chain = N->getOperand(0);
10827         // In absense of Zfh, promote f16 to f32, then convert.
10828         if (Op0.getValueType() == MVT::f16 &&
10829             !Subtarget.hasStdExtZfhOrZhinx()) {
10830           Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
10831                             {Chain, Op0});
10832           Chain = Op0.getValue(1);
10833         }
10834         unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
10835                                 : RISCVISD::STRICT_FCVT_WU_RV64;
10836         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
10837         SDValue Res = DAG.getNode(
10838             Opc, DL, VTs, Chain, Op0,
10839             DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
10840         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10841         Results.push_back(Res.getValue(1));
10842         return;
10843       }
10844       // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
10845       // convert.
10846       if ((Op0.getValueType() == MVT::f16 &&
10847            !Subtarget.hasStdExtZfhOrZhinx()) ||
10848           Op0.getValueType() == MVT::bf16)
10849         Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
10850
10851       unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
10852       SDValue Res =
10853           DAG.getNode(Opc, DL, MVT::i64, Op0,
10854                       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
10855       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10856       return;
10857     }
10858     // If the FP type needs to be softened, emit a library call using the 'si'
10859     // version. If we left it to default legalization we'd end up with 'di'. If
10860     // the FP type doesn't need to be softened just let generic type
10861     // legalization promote the result type.
10862     RTLIB::Libcall LC;
10863     if (IsSigned)
10864       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
10865     else
10866       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
10867     MakeLibCallOptions CallOptions;
10868     EVT OpVT = Op0.getValueType();
10869     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
10870     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
10871     SDValue Result;
10872     std::tie(Result, Chain) =
10873         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
10874     Results.push_back(Result);
10875     if (IsStrict)
10876       Results.push_back(Chain);
10877     break;
10878   }
10879   case ISD::LROUND: {
10880     SDValue Op0 = N->getOperand(0);
10881     EVT Op0VT = Op0.getValueType();
10882     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
10883         TargetLowering::TypeSoftenFloat) {
10884       if (!isTypeLegal(Op0VT))
10885         return;
10886
10887       // In absense of Zfh, promote f16 to f32, then convert.
10888       if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
10889         Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
10890
10891       SDValue Res =
10892           DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
10893                       DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
10894       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10895       return;
10896     }
10897     // If the FP type needs to be softened, emit a library call to lround. We'll
10898     // need to truncate the result. We assume any value that doesn't fit in i32
10899     // is allowed to return an unspecified value.
10900     RTLIB::Libcall LC =
10901         Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
10902     MakeLibCallOptions CallOptions;
10903     EVT OpVT = Op0.getValueType();
10904     CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
10905     SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
10906     Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
10907     Results.push_back(Result);
10908     break;
10909   }
10910   case ISD::READCYCLECOUNTER: {
10911     assert(!Subtarget.is64Bit() &&
10912            "READCYCLECOUNTER only has custom type legalization on riscv32");
10913
10914     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10915     SDValue RCW =
10916         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
10917
10918     Results.push_back(
10919         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
10920     Results.push_back(RCW.getValue(2));
10921     break;
10922   }
10923   case ISD::LOAD: {
10924     if (!ISD::isNON_EXTLoad(N))
10925       return;
10926
10927     // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
10928     // sext_inreg we emit for ADD/SUB/MUL/SLLI.
10929     LoadSDNode *Ld = cast<LoadSDNode>(N);
10930
10931     SDLoc dl(N);
10932     SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
10933                                  Ld->getBasePtr(), Ld->getMemoryVT(),
10934                                  Ld->getMemOperand());
10935     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
10936     Results.push_back(Res.getValue(1));
10937     return;
10938   }
10939   case ISD::MUL: {
10940     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
10941     unsigned XLen = Subtarget.getXLen();
10942     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
10943     if (Size > XLen) {
10944       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
10945       SDValue LHS = N->getOperand(0);
10946       SDValue RHS = N->getOperand(1);
10947       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
10948
10949       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
10950       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
10951       // We need exactly one side to be unsigned.
10952       if (LHSIsU == RHSIsU)
10953         return;
10954
10955       auto MakeMULPair = [&](SDValue S, SDValue U) {
10956         MVT XLenVT = Subtarget.getXLenVT();
10957         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
10958         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
10959         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
10960         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
10961         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
10962       };
10963
10964       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
10965       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
10966
10967       // The other operand should be signed, but still prefer MULH when
10968       // possible.
10969       if (RHSIsU && LHSIsS && !RHSIsS)
10970         Results.push_back(MakeMULPair(LHS, RHS));
10971       else if (LHSIsU && RHSIsS && !LHSIsS)
10972         Results.push_back(MakeMULPair(RHS, LHS));
10973
10974       return;
10975     }
10976     [[fallthrough]];
10977   }
10978   case ISD::ADD:
10979   case ISD::SUB:
10980     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
10981            "Unexpected custom legalisation");
10982     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
10983     break;
10984   case ISD::SHL:
10985   case ISD::SRA:
10986   case ISD::SRL:
10987     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
10988            "Unexpected custom legalisation");
10989     if (N->getOperand(1).getOpcode() != ISD::Constant) {
10990       // If we can use a BSET instruction, allow default promotion to apply.
10991       if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
10992           isOneConstant(N->getOperand(0)))
10993         break;
10994       Results.push_back(customLegalizeToWOp(N, DAG));
10995       break;
10996     }
10997
10998     // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
10999     // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
11000     // shift amount.
11001     if (N->getOpcode() == ISD::SHL) {
11002       SDLoc DL(N);
11003       SDValue NewOp0 =
11004           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11005       SDValue NewOp1 =
11006           DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
11007       SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
11008       SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11009                                    DAG.getValueType(MVT::i32));
11010       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
11011     }
11012
11013     break;
11014   case ISD::ROTL:
11015   case ISD::ROTR:
11016     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11017            "Unexpected custom legalisation");
11018     assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
11019             Subtarget.hasVendorXTHeadBb()) &&
11020            "Unexpected custom legalization");
11021     if (!isa<ConstantSDNode>(N->getOperand(1)) &&
11022         !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
11023       return;
11024     Results.push_back(customLegalizeToWOp(N, DAG));
11025     break;
11026   case ISD::CTTZ:
11027   case ISD::CTTZ_ZERO_UNDEF:
11028   case ISD::CTLZ:
11029   case ISD::CTLZ_ZERO_UNDEF: {
11030     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11031            "Unexpected custom legalisation");
11032
11033     SDValue NewOp0 =
11034         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11035     bool IsCTZ =
11036         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
11037     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
11038     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
11039     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11040     return;
11041   }
11042   case ISD::SDIV:
11043   case ISD::UDIV:
11044   case ISD::UREM: {
11045     MVT VT = N->getSimpleValueType(0);
11046     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
11047            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
11048            "Unexpected custom legalisation");
11049     // Don't promote division/remainder by constant since we should expand those
11050     // to multiply by magic constant.
11051     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
11052     if (N->getOperand(1).getOpcode() == ISD::Constant &&
11053         !isIntDivCheap(N->getValueType(0), Attr))
11054       return;
11055
11056     // If the input is i32, use ANY_EXTEND since the W instructions don't read
11057     // the upper 32 bits. For other types we need to sign or zero extend
11058     // based on the opcode.
11059     unsigned ExtOpc = ISD::ANY_EXTEND;
11060     if (VT != MVT::i32)
11061       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
11062                                            : ISD::ZERO_EXTEND;
11063
11064     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
11065     break;
11066   }
11067   case ISD::SADDO: {
11068     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11069            "Unexpected custom legalisation");
11070
11071     // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
11072     // use the default legalization.
11073     if (!isa<ConstantSDNode>(N->getOperand(1)))
11074       return;
11075
11076     SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11077     SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
11078     SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
11079     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
11080                       DAG.getValueType(MVT::i32));
11081
11082     SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
11083
11084     // For an addition, the result should be less than one of the operands (LHS)
11085     // if and only if the other operand (RHS) is negative, otherwise there will
11086     // be overflow.
11087     // For a subtraction, the result should be less than one of the operands
11088     // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11089     // otherwise there will be overflow.
11090     EVT OType = N->getValueType(1);
11091     SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
11092     SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
11093
11094     SDValue Overflow =
11095         DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
11096     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11097     Results.push_back(Overflow);
11098     return;
11099   }
11100   case ISD::UADDO:
11101   case ISD::USUBO: {
11102     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11103            "Unexpected custom legalisation");
11104     bool IsAdd = N->getOpcode() == ISD::UADDO;
11105     // Create an ADDW or SUBW.
11106     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11107     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11108     SDValue Res =
11109         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
11110     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
11111                       DAG.getValueType(MVT::i32));
11112
11113     SDValue Overflow;
11114     if (IsAdd && isOneConstant(RHS)) {
11115       // Special case uaddo X, 1 overflowed if the addition result is 0.
11116       // The general case (X + C) < C is not necessarily beneficial. Although we
11117       // reduce the live range of X, we may introduce the materialization of
11118       // constant C, especially when the setcc result is used by branch. We have
11119       // no compare with constant and branch instructions.
11120       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
11121                               DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
11122     } else if (IsAdd && isAllOnesConstant(RHS)) {
11123       // Special case uaddo X, -1 overflowed if X != 0.
11124       Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
11125                               DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
11126     } else {
11127       // Sign extend the LHS and perform an unsigned compare with the ADDW
11128       // result. Since the inputs are sign extended from i32, this is equivalent
11129       // to comparing the lower 32 bits.
11130       LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11131       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
11132                               IsAdd ? ISD::SETULT : ISD::SETUGT);
11133     }
11134
11135     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11136     Results.push_back(Overflow);
11137     return;
11138   }
11139   case ISD::UADDSAT:
11140   case ISD::USUBSAT: {
11141     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11142            "Unexpected custom legalisation");
11143     if (Subtarget.hasStdExtZbb()) {
11144       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
11145       // sign extend allows overflow of the lower 32 bits to be detected on
11146       // the promoted size.
11147       SDValue LHS =
11148           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
11149       SDValue RHS =
11150           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
11151       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
11152       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11153       return;
11154     }
11155
11156     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
11157     // promotion for UADDO/USUBO.
11158     Results.push_back(expandAddSubSat(N, DAG));
11159     return;
11160   }
11161   case ISD::ABS: {
11162     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11163            "Unexpected custom legalisation");
11164
11165     if (Subtarget.hasStdExtZbb()) {
11166       // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
11167       // This allows us to remember that the result is sign extended. Expanding
11168       // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
11169       SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
11170                                 N->getOperand(0));
11171       SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
11172       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
11173       return;
11174     }
11175
11176     // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
11177     SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11178
11179     // Freeze the source so we can increase it's use count.
11180     Src = DAG.getFreeze(Src);
11181
11182     // Copy sign bit to all bits using the sraiw pattern.
11183     SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
11184                                    DAG.getValueType(MVT::i32));
11185     SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
11186                            DAG.getConstant(31, DL, MVT::i64));
11187
11188     SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
11189     NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
11190
11191     // NOTE: The result is only required to be anyextended, but sext is
11192     // consistent with type legalization of sub.
11193     NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
11194                          DAG.getValueType(MVT::i32));
11195     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
11196     return;
11197   }
11198   case ISD::BITCAST: {
11199     EVT VT = N->getValueType(0);
11200     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
11201     SDValue Op0 = N->getOperand(0);
11202     EVT Op0VT = Op0.getValueType();
11203     MVT XLenVT = Subtarget.getXLenVT();
11204     if (VT == MVT::i16 && Op0VT == MVT::f16 &&
11205         Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
11206       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
11207       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
11208     } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
11209         Subtarget.hasStdExtZfbfmin()) {
11210       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
11211       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
11212     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
11213                Subtarget.hasStdExtFOrZfinx()) {
11214       SDValue FPConv =
11215           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
11216       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
11217     } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 &&
11218                Subtarget.hasStdExtZfa()) {
11219       SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
11220                                    DAG.getVTList(MVT::i32, MVT::i32), Op0);
11221       SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
11222                                    NewReg.getValue(0), NewReg.getValue(1));
11223       Results.push_back(RetReg);
11224     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
11225                isTypeLegal(Op0VT)) {
11226       // Custom-legalize bitcasts from fixed-length vector types to illegal
11227       // scalar types in order to improve codegen. Bitcast the vector to a
11228       // one-element vector type whose element type is the same as the result
11229       // type, and extract the first element.
11230       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
11231       if (isTypeLegal(BVT)) {
11232         SDValue BVec = DAG.getBitcast(BVT, Op0);
11233         Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
11234                                       DAG.getConstant(0, DL, XLenVT)));
11235       }
11236     }
11237     break;
11238   }
11239   case RISCVISD::BREV8: {
11240     MVT VT = N->getSimpleValueType(0);
11241     MVT XLenVT = Subtarget.getXLenVT();
11242     assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
11243            "Unexpected custom legalisation");
11244     assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
11245     SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
11246     SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
11247     // ReplaceNodeResults requires we maintain the same type for the return
11248     // value.
11249     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
11250     break;
11251   }
11252   case ISD::EXTRACT_VECTOR_ELT: {
11253     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
11254     // type is illegal (currently only vXi64 RV32).
11255     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
11256     // transferred to the destination register. We issue two of these from the
11257     // upper- and lower- halves of the SEW-bit vector element, slid down to the
11258     // first element.
11259     SDValue Vec = N->getOperand(0);
11260     SDValue Idx = N->getOperand(1);
11261
11262     // The vector type hasn't been legalized yet so we can't issue target
11263     // specific nodes if it needs legalization.
11264     // FIXME: We would manually legalize if it's important.
11265     if (!isTypeLegal(Vec.getValueType()))
11266       return;
11267
11268     MVT VecVT = Vec.getSimpleValueType();
11269
11270     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
11271            VecVT.getVectorElementType() == MVT::i64 &&
11272            "Unexpected EXTRACT_VECTOR_ELT legalization");
11273
11274     // If this is a fixed vector, we need to convert it to a scalable vector.
11275     MVT ContainerVT = VecVT;
11276     if (VecVT.isFixedLengthVector()) {
11277       ContainerVT = getContainerForFixedLengthVector(VecVT);
11278       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11279     }
11280
11281     MVT XLenVT = Subtarget.getXLenVT();
11282
11283     // Use a VL of 1 to avoid processing more elements than we need.
11284     auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
11285
11286     // Unless the index is known to be 0, we must slide the vector down to get
11287     // the desired element into index 0.
11288     if (!isNullConstant(Idx)) {
11289       Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11290                           DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
11291     }
11292
11293     // Extract the lower XLEN bits of the correct vector element.
11294     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
11295
11296     // To extract the upper XLEN bits of the vector element, shift the first
11297     // element right by 32 bits and re-extract the lower XLEN bits.
11298     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11299                                      DAG.getUNDEF(ContainerVT),
11300                                      DAG.getConstant(32, DL, XLenVT), VL);
11301     SDValue LShr32 =
11302         DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
11303                     DAG.getUNDEF(ContainerVT), Mask, VL);
11304
11305     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
11306
11307     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
11308     break;
11309   }
11310   case ISD::INTRINSIC_WO_CHAIN: {
11311     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
11312     switch (IntNo) {
11313     default:
11314       llvm_unreachable(
11315           "Don't know how to custom type legalize this intrinsic!");
11316     case Intrinsic::experimental_get_vector_length: {
11317       SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
11318       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11319       return;
11320     }
11321     case Intrinsic::riscv_orc_b:
11322     case Intrinsic::riscv_brev8:
11323     case Intrinsic::riscv_sha256sig0:
11324     case Intrinsic::riscv_sha256sig1:
11325     case Intrinsic::riscv_sha256sum0:
11326     case Intrinsic::riscv_sha256sum1:
11327     case Intrinsic::riscv_sm3p0:
11328     case Intrinsic::riscv_sm3p1: {
11329       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11330         return;
11331       unsigned Opc;
11332       switch (IntNo) {
11333       case Intrinsic::riscv_orc_b:      Opc = RISCVISD::ORC_B;      break;
11334       case Intrinsic::riscv_brev8:      Opc = RISCVISD::BREV8;      break;
11335       case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
11336       case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
11337       case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
11338       case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
11339       case Intrinsic::riscv_sm3p0:      Opc = RISCVISD::SM3P0;      break;
11340       case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;
11341       }
11342
11343       SDValue NewOp =
11344           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11345       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
11346       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11347       return;
11348     }
11349     case Intrinsic::riscv_sm4ks:
11350     case Intrinsic::riscv_sm4ed: {
11351       unsigned Opc =
11352           IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
11353       SDValue NewOp0 =
11354           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11355       SDValue NewOp1 =
11356           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11357       SDValue Res =
11358           DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
11359       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11360       return;
11361     }
11362     case Intrinsic::riscv_clmul: {
11363       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11364         return;
11365
11366       SDValue NewOp0 =
11367           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11368       SDValue NewOp1 =
11369           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11370       SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
11371       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11372       return;
11373     }
11374     case Intrinsic::riscv_clmulh:
11375     case Intrinsic::riscv_clmulr: {
11376       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
11377         return;
11378
11379       // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
11380       // to the full 128-bit clmul result of multiplying two xlen values.
11381       // Perform clmulr or clmulh on the shifted values. Finally, extract the
11382       // upper 32 bits.
11383       //
11384       // The alternative is to mask the inputs to 32 bits and use clmul, but
11385       // that requires two shifts to mask each input without zext.w.
11386       // FIXME: If the inputs are known zero extended or could be freely
11387       // zero extended, the mask form would be better.
11388       SDValue NewOp0 =
11389           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11390       SDValue NewOp1 =
11391           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
11392       NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
11393                            DAG.getConstant(32, DL, MVT::i64));
11394       NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
11395                            DAG.getConstant(32, DL, MVT::i64));
11396       unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
11397                                                       : RISCVISD::CLMULR;
11398       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
11399       Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
11400                         DAG.getConstant(32, DL, MVT::i64));
11401       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11402       return;
11403     }
11404     case Intrinsic::riscv_vmv_x_s: {
11405       EVT VT = N->getValueType(0);
11406       MVT XLenVT = Subtarget.getXLenVT();
11407       if (VT.bitsLT(XLenVT)) {
11408         // Simple case just extract using vmv.x.s and truncate.
11409         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
11410                                       Subtarget.getXLenVT(), N->getOperand(1));
11411         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
11412         return;
11413       }
11414
11415       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
11416              "Unexpected custom legalization");
11417
11418       // We need to do the move in two steps.
11419       SDValue Vec = N->getOperand(1);
11420       MVT VecVT = Vec.getSimpleValueType();
11421
11422       // First extract the lower XLEN bits of the element.
11423       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
11424
11425       // To extract the upper XLEN bits of the vector element, shift the first
11426       // element right by 32 bits and re-extract the lower XLEN bits.
11427       auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
11428
11429       SDValue ThirtyTwoV =
11430           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
11431                       DAG.getConstant(32, DL, XLenVT), VL);
11432       SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
11433                                    DAG.getUNDEF(VecVT), Mask, VL);
11434       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
11435
11436       Results.push_back(
11437           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
11438       break;
11439     }
11440     }
11441     break;
11442   }
11443   case ISD::VECREDUCE_ADD:
11444   case ISD::VECREDUCE_AND:
11445   case ISD::VECREDUCE_OR:
11446   case ISD::VECREDUCE_XOR:
11447   case ISD::VECREDUCE_SMAX:
11448   case ISD::VECREDUCE_UMAX:
11449   case ISD::VECREDUCE_SMIN:
11450   case ISD::VECREDUCE_UMIN:
11451     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
11452       Results.push_back(V);
11453     break;
11454   case ISD::VP_REDUCE_ADD:
11455   case ISD::VP_REDUCE_AND:
11456   case ISD::VP_REDUCE_OR:
11457   case ISD::VP_REDUCE_XOR:
11458   case ISD::VP_REDUCE_SMAX:
11459   case ISD::VP_REDUCE_UMAX:
11460   case ISD::VP_REDUCE_SMIN:
11461   case ISD::VP_REDUCE_UMIN:
11462     if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
11463       Results.push_back(V);
11464     break;
11465   case ISD::GET_ROUNDING: {
11466     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
11467     SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
11468     Results.push_back(Res.getValue(0));
11469     Results.push_back(Res.getValue(1));
11470     break;
11471   }
11472   }
11473 }
11474
11475 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
11476 /// which corresponds to it.
11477 static unsigned getVecReduceOpcode(unsigned Opc) {
11478   switch (Opc) {
11479   default:
11480     llvm_unreachable("Unhandled binary to transfrom reduction");
11481   case ISD::ADD:
11482     return ISD::VECREDUCE_ADD;
11483   case ISD::UMAX:
11484     return ISD::VECREDUCE_UMAX;
11485   case ISD::SMAX:
11486     return ISD::VECREDUCE_SMAX;
11487   case ISD::UMIN:
11488     return ISD::VECREDUCE_UMIN;
11489   case ISD::SMIN:
11490     return ISD::VECREDUCE_SMIN;
11491   case ISD::AND:
11492     return ISD::VECREDUCE_AND;
11493   case ISD::OR:
11494     return ISD::VECREDUCE_OR;
11495   case ISD::XOR:
11496     return ISD::VECREDUCE_XOR;
11497   case ISD::FADD:
11498     // Note: This is the associative form of the generic reduction opcode.
11499     return ISD::VECREDUCE_FADD;
11500   }
11501 }
11502
11503 /// Perform two related transforms whose purpose is to incrementally recognize
11504 /// an explode_vector followed by scalar reduction as a vector reduction node.
11505 /// This exists to recover from a deficiency in SLP which can't handle
11506 /// forests with multiple roots sharing common nodes.  In some cases, one
11507 /// of the trees will be vectorized, and the other will remain (unprofitably)
11508 /// scalarized.
11509 static SDValue
11510 combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
11511                                   const RISCVSubtarget &Subtarget) {
11512
11513   // This transforms need to run before all integer types have been legalized
11514   // to i64 (so that the vector element type matches the add type), and while
11515   // it's safe to introduce odd sized vector types.
11516   if (DAG.NewNodesMustHaveLegalTypes)
11517     return SDValue();
11518
11519   // Without V, this transform isn't useful.  We could form the (illegal)
11520   // operations and let them be scalarized again, but there's really no point.
11521   if (!Subtarget.hasVInstructions())
11522     return SDValue();
11523
11524   const SDLoc DL(N);
11525   const EVT VT = N->getValueType(0);
11526   const unsigned Opc = N->getOpcode();
11527
11528   // For FADD, we only handle the case with reassociation allowed.  We
11529   // could handle strict reduction order, but at the moment, there's no
11530   // known reason to, and the complexity isn't worth it.
11531   // TODO: Handle fminnum and fmaxnum here
11532   if (!VT.isInteger() &&
11533       (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
11534     return SDValue();
11535
11536   const unsigned ReduceOpc = getVecReduceOpcode(Opc);
11537   assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
11538          "Inconsistent mappings");
11539   SDValue LHS = N->getOperand(0);
11540   SDValue RHS = N->getOperand(1);
11541
11542   if (!LHS.hasOneUse() || !RHS.hasOneUse())
11543     return SDValue();
11544
11545   if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11546     std::swap(LHS, RHS);
11547
11548   if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11549       !isa<ConstantSDNode>(RHS.getOperand(1)))
11550     return SDValue();
11551
11552   uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
11553   SDValue SrcVec = RHS.getOperand(0);
11554   EVT SrcVecVT = SrcVec.getValueType();
11555   assert(SrcVecVT.getVectorElementType() == VT);
11556   if (SrcVecVT.isScalableVector())
11557     return SDValue();
11558
11559   if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
11560     return SDValue();
11561
11562   // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
11563   // reduce_op (extract_subvector [2 x VT] from V).  This will form the
11564   // root of our reduction tree. TODO: We could extend this to any two
11565   // adjacent aligned constant indices if desired.
11566   if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11567       LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
11568     uint64_t LHSIdx =
11569       cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
11570     if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
11571       EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
11572       SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
11573                                 DAG.getVectorIdxConstant(0, DL));
11574       return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
11575     }
11576   }
11577
11578   // Match (binop (reduce (extract_subvector V, 0),
11579   //                      (extract_vector_elt V, sizeof(SubVec))))
11580   // into a reduction of one more element from the original vector V.
11581   if (LHS.getOpcode() != ReduceOpc)
11582     return SDValue();
11583
11584   SDValue ReduceVec = LHS.getOperand(0);
11585   if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
11586       ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
11587       isNullConstant(ReduceVec.getOperand(1)) &&
11588       ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
11589     // For illegal types (e.g. 3xi32), most will be combined again into a
11590     // wider (hopefully legal) type.  If this is a terminal state, we are
11591     // relying on type legalization here to produce something reasonable
11592     // and this lowering quality could probably be improved. (TODO)
11593     EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
11594     SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
11595                               DAG.getVectorIdxConstant(0, DL));
11596     auto Flags = ReduceVec->getFlags();
11597     Flags.intersectWith(N->getFlags());
11598     return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
11599   }
11600
11601   return SDValue();
11602 }
11603
11604
11605 // Try to fold (<bop> x, (reduction.<bop> vec, start))
11606 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
11607                                     const RISCVSubtarget &Subtarget) {
11608   auto BinOpToRVVReduce = [](unsigned Opc) {
11609     switch (Opc) {
11610     default:
11611       llvm_unreachable("Unhandled binary to transfrom reduction");
11612     case ISD::ADD:
11613       return RISCVISD::VECREDUCE_ADD_VL;
11614     case ISD::UMAX:
11615       return RISCVISD::VECREDUCE_UMAX_VL;
11616     case ISD::SMAX:
11617       return RISCVISD::VECREDUCE_SMAX_VL;
11618     case ISD::UMIN:
11619       return RISCVISD::VECREDUCE_UMIN_VL;
11620     case ISD::SMIN:
11621       return RISCVISD::VECREDUCE_SMIN_VL;
11622     case ISD::AND:
11623       return RISCVISD::VECREDUCE_AND_VL;
11624     case ISD::OR:
11625       return RISCVISD::VECREDUCE_OR_VL;
11626     case ISD::XOR:
11627       return RISCVISD::VECREDUCE_XOR_VL;
11628     case ISD::FADD:
11629       return RISCVISD::VECREDUCE_FADD_VL;
11630     case ISD::FMAXNUM:
11631       return RISCVISD::VECREDUCE_FMAX_VL;
11632     case ISD::FMINNUM:
11633       return RISCVISD::VECREDUCE_FMIN_VL;
11634     }
11635   };
11636
11637   auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
11638     return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11639            isNullConstant(V.getOperand(1)) &&
11640            V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
11641   };
11642
11643   unsigned Opc = N->getOpcode();
11644   unsigned ReduceIdx;
11645   if (IsReduction(N->getOperand(0), Opc))
11646     ReduceIdx = 0;
11647   else if (IsReduction(N->getOperand(1), Opc))
11648     ReduceIdx = 1;
11649   else
11650     return SDValue();
11651
11652   // Skip if FADD disallows reassociation but the combiner needs.
11653   if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
11654     return SDValue();
11655
11656   SDValue Extract = N->getOperand(ReduceIdx);
11657   SDValue Reduce = Extract.getOperand(0);
11658   if (!Extract.hasOneUse() || !Reduce.hasOneUse())
11659     return SDValue();
11660
11661   SDValue ScalarV = Reduce.getOperand(2);
11662   EVT ScalarVT = ScalarV.getValueType();
11663   if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
11664       ScalarV.getOperand(0)->isUndef() &&
11665       isNullConstant(ScalarV.getOperand(2)))
11666     ScalarV = ScalarV.getOperand(1);
11667
11668   // Make sure that ScalarV is a splat with VL=1.
11669   if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
11670       ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
11671       ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
11672     return SDValue();
11673
11674   if (!isNonZeroAVL(ScalarV.getOperand(2)))
11675     return SDValue();
11676
11677   // Check the scalar of ScalarV is neutral element
11678   // TODO: Deal with value other than neutral element.
11679   if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
11680                          0))
11681     return SDValue();
11682
11683   // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
11684   // FIXME: We might be able to improve this if operand 0 is undef.
11685   if (!isNonZeroAVL(Reduce.getOperand(5)))
11686     return SDValue();
11687
11688   SDValue NewStart = N->getOperand(1 - ReduceIdx);
11689
11690   SDLoc DL(N);
11691   SDValue NewScalarV =
11692       lowerScalarInsert(NewStart, ScalarV.getOperand(2),
11693                         ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
11694
11695   // If we looked through an INSERT_SUBVECTOR we need to restore it.
11696   if (ScalarVT != ScalarV.getValueType())
11697     NewScalarV =
11698         DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
11699                     NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
11700
11701   SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
11702                    NewScalarV,           Reduce.getOperand(3),
11703                    Reduce.getOperand(4), Reduce.getOperand(5)};
11704   SDValue NewReduce =
11705       DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
11706   return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
11707                      Extract.getOperand(1));
11708 }
11709
11710 // Optimize (add (shl x, c0), (shl y, c1)) ->
11711 //          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
11712 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
11713                                   const RISCVSubtarget &Subtarget) {
11714   // Perform this optimization only in the zba extension.
11715   if (!Subtarget.hasStdExtZba())
11716     return SDValue();
11717
11718   // Skip for vector types and larger types.
11719   EVT VT = N->getValueType(0);
11720   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
11721     return SDValue();
11722
11723   // The two operand nodes must be SHL and have no other use.
11724   SDValue N0 = N->getOperand(0);
11725   SDValue N1 = N->getOperand(1);
11726   if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
11727       !N0->hasOneUse() || !N1->hasOneUse())
11728     return SDValue();
11729
11730   // Check c0 and c1.
11731   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
11732   auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
11733   if (!N0C || !N1C)
11734     return SDValue();
11735   int64_t C0 = N0C->getSExtValue();
11736   int64_t C1 = N1C->getSExtValue();
11737   if (C0 <= 0 || C1 <= 0)
11738     return SDValue();
11739
11740   // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
11741   int64_t Bits = std::min(C0, C1);
11742   int64_t Diff = std::abs(C0 - C1);
11743   if (Diff != 1 && Diff != 2 && Diff != 3)
11744     return SDValue();
11745
11746   // Build nodes.
11747   SDLoc DL(N);
11748   SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
11749   SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
11750   SDValue NA0 =
11751       DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
11752   SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
11753   return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
11754 }
11755
11756 // Combine a constant select operand into its use:
11757 //
11758 // (and (select cond, -1, c), x)
11759 //   -> (select cond, x, (and x, c))  [AllOnes=1]
11760 // (or  (select cond, 0, c), x)
11761 //   -> (select cond, x, (or x, c))  [AllOnes=0]
11762 // (xor (select cond, 0, c), x)
11763 //   -> (select cond, x, (xor x, c))  [AllOnes=0]
11764 // (add (select cond, 0, c), x)
11765 //   -> (select cond, x, (add x, c))  [AllOnes=0]
11766 // (sub x, (select cond, 0, c))
11767 //   -> (select cond, x, (sub x, c))  [AllOnes=0]
11768 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
11769                                    SelectionDAG &DAG, bool AllOnes,
11770                                    const RISCVSubtarget &Subtarget) {
11771   EVT VT = N->getValueType(0);
11772
11773   // Skip vectors.
11774   if (VT.isVector())
11775     return SDValue();
11776
11777   if (!Subtarget.hasShortForwardBranchOpt()) {
11778     // (select cond, x, (and x, c)) has custom lowering with Zicond.
11779     if ((!Subtarget.hasStdExtZicond() &&
11780          !Subtarget.hasVendorXVentanaCondOps()) ||
11781         N->getOpcode() != ISD::AND)
11782       return SDValue();
11783
11784     // Maybe harmful when condition code has multiple use.
11785     if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
11786       return SDValue();
11787
11788     // Maybe harmful when VT is wider than XLen.
11789     if (VT.getSizeInBits() > Subtarget.getXLen())
11790       return SDValue();
11791   }
11792
11793   if ((Slct.getOpcode() != ISD::SELECT &&
11794        Slct.getOpcode() != RISCVISD::SELECT_CC) ||
11795       !Slct.hasOneUse())
11796     return SDValue();
11797
11798   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
11799     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
11800   };
11801
11802   bool SwapSelectOps;
11803   unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
11804   SDValue TrueVal = Slct.getOperand(1 + OpOffset);
11805   SDValue FalseVal = Slct.getOperand(2 + OpOffset);
11806   SDValue NonConstantVal;
11807   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
11808     SwapSelectOps = false;
11809     NonConstantVal = FalseVal;
11810   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
11811     SwapSelectOps = true;
11812     NonConstantVal = TrueVal;
11813   } else
11814     return SDValue();
11815
11816   // Slct is now know to be the desired identity constant when CC is true.
11817   TrueVal = OtherOp;
11818   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
11819   // Unless SwapSelectOps says the condition should be false.
11820   if (SwapSelectOps)
11821     std::swap(TrueVal, FalseVal);
11822
11823   if (Slct.getOpcode() == RISCVISD::SELECT_CC)
11824     return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
11825                        {Slct.getOperand(0), Slct.getOperand(1),
11826                         Slct.getOperand(2), TrueVal, FalseVal});
11827
11828   return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
11829                      {Slct.getOperand(0), TrueVal, FalseVal});
11830 }
11831
11832 // Attempt combineSelectAndUse on each operand of a commutative operator N.
11833 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
11834                                               bool AllOnes,
11835                                               const RISCVSubtarget &Subtarget) {
11836   SDValue N0 = N->getOperand(0);
11837   SDValue N1 = N->getOperand(1);
11838   if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
11839     return Result;
11840   if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
11841     return Result;
11842   return SDValue();
11843 }
11844
11845 // Transform (add (mul x, c0), c1) ->
11846 //           (add (mul (add x, c1/c0), c0), c1%c0).
11847 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
11848 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
11849 // to an infinite loop in DAGCombine if transformed.
11850 // Or transform (add (mul x, c0), c1) ->
11851 //              (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
11852 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
11853 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
11854 // lead to an infinite loop in DAGCombine if transformed.
11855 // Or transform (add (mul x, c0), c1) ->
11856 //              (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
11857 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
11858 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
11859 // lead to an infinite loop in DAGCombine if transformed.
11860 // Or transform (add (mul x, c0), c1) ->
11861 //              (mul (add x, c1/c0), c0).
11862 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
11863 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
11864                                      const RISCVSubtarget &Subtarget) {
11865   // Skip for vector types and larger types.
11866   EVT VT = N->getValueType(0);
11867   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
11868     return SDValue();
11869   // The first operand node must be a MUL and has no other use.
11870   SDValue N0 = N->getOperand(0);
11871   if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
11872     return SDValue();
11873   // Check if c0 and c1 match above conditions.
11874   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
11875   auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
11876   if (!N0C || !N1C)
11877     return SDValue();
11878   // If N0C has multiple uses it's possible one of the cases in
11879   // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
11880   // in an infinite loop.
11881   if (!N0C->hasOneUse())
11882     return SDValue();
11883   int64_t C0 = N0C->getSExtValue();
11884   int64_t C1 = N1C->getSExtValue();
11885   int64_t CA, CB;
11886   if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
11887     return SDValue();
11888   // Search for proper CA (non-zero) and CB that both are simm12.
11889   if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
11890       !isInt<12>(C0 * (C1 / C0))) {
11891     CA = C1 / C0;
11892     CB = C1 % C0;
11893   } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
11894              isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
11895     CA = C1 / C0 + 1;
11896     CB = C1 % C0 - C0;
11897   } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
11898              isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
11899     CA = C1 / C0 - 1;
11900     CB = C1 % C0 + C0;
11901   } else
11902     return SDValue();
11903   // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
11904   SDLoc DL(N);
11905   SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
11906                              DAG.getConstant(CA, DL, VT));
11907   SDValue New1 =
11908       DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
11909   return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
11910 }
11911
11912 // Try to turn (add (xor bool, 1) -1) into (neg bool).
11913 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
11914   SDValue N0 = N->getOperand(0);
11915   SDValue N1 = N->getOperand(1);
11916   EVT VT = N->getValueType(0);
11917   SDLoc DL(N);
11918
11919   // RHS should be -1.
11920   if (!isAllOnesConstant(N1))
11921     return SDValue();
11922
11923   // Look for (xor X, 1).
11924   if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
11925     return SDValue();
11926
11927   // First xor input should be 0 or 1.
11928   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
11929   if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
11930     return SDValue();
11931
11932   // Emit a negate of the setcc.
11933   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
11934                      N0.getOperand(0));
11935 }
11936
11937 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
11938                                  const RISCVSubtarget &Subtarget) {
11939   if (SDValue V = combineAddOfBooleanXor(N, DAG))
11940     return V;
11941   if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
11942     return V;
11943   if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
11944     return V;
11945   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
11946     return V;
11947   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
11948     return V;
11949
11950   // fold (add (select lhs, rhs, cc, 0, y), x) ->
11951   //      (select lhs, rhs, cc, x, (add x, y))
11952   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
11953 }
11954
11955 // Try to turn a sub boolean RHS and constant LHS into an addi.
11956 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
11957   SDValue N0 = N->getOperand(0);
11958   SDValue N1 = N->getOperand(1);
11959   EVT VT = N->getValueType(0);
11960   SDLoc DL(N);
11961
11962   // Require a constant LHS.
11963   auto *N0C = dyn_cast<ConstantSDNode>(N0);
11964   if (!N0C)
11965     return SDValue();
11966
11967   // All our optimizations involve subtracting 1 from the immediate and forming
11968   // an ADDI. Make sure the new immediate is valid for an ADDI.
11969   APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
11970   if (!ImmValMinus1.isSignedIntN(12))
11971     return SDValue();
11972
11973   SDValue NewLHS;
11974   if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
11975     // (sub constant, (setcc x, y, eq/neq)) ->
11976     // (add (setcc x, y, neq/eq), constant - 1)
11977     ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
11978     EVT SetCCOpVT = N1.getOperand(0).getValueType();
11979     if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
11980       return SDValue();
11981     CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
11982     NewLHS =
11983         DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
11984   } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
11985              N1.getOperand(0).getOpcode() == ISD::SETCC) {
11986     // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
11987     // Since setcc returns a bool the xor is equivalent to 1-setcc.
11988     NewLHS = N1.getOperand(0);
11989   } else
11990     return SDValue();
11991
11992   SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
11993   return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
11994 }
11995
11996 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
11997                                  const RISCVSubtarget &Subtarget) {
11998   if (SDValue V = combineSubOfBoolean(N, DAG))
11999     return V;
12000
12001   SDValue N0 = N->getOperand(0);
12002   SDValue N1 = N->getOperand(1);
12003   // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
12004   if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
12005       isNullConstant(N1.getOperand(1))) {
12006     ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
12007     if (CCVal == ISD::SETLT) {
12008       EVT VT = N->getValueType(0);
12009       SDLoc DL(N);
12010       unsigned ShAmt = N0.getValueSizeInBits() - 1;
12011       return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
12012                          DAG.getConstant(ShAmt, DL, VT));
12013     }
12014   }
12015
12016   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
12017   //      (select lhs, rhs, cc, x, (sub x, y))
12018   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
12019 }
12020
12021 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
12022 // Legalizing setcc can introduce xors like this. Doing this transform reduces
12023 // the number of xors and may allow the xor to fold into a branch condition.
12024 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
12025   SDValue N0 = N->getOperand(0);
12026   SDValue N1 = N->getOperand(1);
12027   bool IsAnd = N->getOpcode() == ISD::AND;
12028
12029   if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
12030     return SDValue();
12031
12032   if (!N0.hasOneUse() || !N1.hasOneUse())
12033     return SDValue();
12034
12035   SDValue N01 = N0.getOperand(1);
12036   SDValue N11 = N1.getOperand(1);
12037
12038   // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
12039   // (xor X, -1) based on the upper bits of the other operand being 0. If the
12040   // operation is And, allow one of the Xors to use -1.
12041   if (isOneConstant(N01)) {
12042     if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
12043       return SDValue();
12044   } else if (isOneConstant(N11)) {
12045     // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
12046     if (!(IsAnd && isAllOnesConstant(N01)))
12047       return SDValue();
12048   } else
12049     return SDValue();
12050
12051   EVT VT = N->getValueType(0);
12052
12053   SDValue N00 = N0.getOperand(0);
12054   SDValue N10 = N1.getOperand(0);
12055
12056   // The LHS of the xors needs to be 0/1.
12057   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
12058   if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
12059     return SDValue();
12060
12061   // Invert the opcode and insert a new xor.
12062   SDLoc DL(N);
12063   unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
12064   SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
12065   return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
12066 }
12067
12068 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
12069                                       const RISCVSubtarget &Subtarget) {
12070   SDValue N0 = N->getOperand(0);
12071   EVT VT = N->getValueType(0);
12072
12073   // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
12074   // extending X. This is safe since we only need the LSB after the shift and
12075   // shift amounts larger than 31 would produce poison. If we wait until
12076   // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12077   // to use a BEXT instruction.
12078   if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
12079       N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
12080       !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
12081     SDLoc DL(N0);
12082     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12083     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12084     SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
12085     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
12086   }
12087
12088   return SDValue();
12089 }
12090
12091 // Combines two comparison operation and logic operation to one selection
12092 // operation(min, max) and logic operation. Returns new constructed Node if
12093 // conditions for optimization are satisfied.
12094 static SDValue performANDCombine(SDNode *N,
12095                                  TargetLowering::DAGCombinerInfo &DCI,
12096                                  const RISCVSubtarget &Subtarget) {
12097   SelectionDAG &DAG = DCI.DAG;
12098
12099   SDValue N0 = N->getOperand(0);
12100   // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
12101   // extending X. This is safe since we only need the LSB after the shift and
12102   // shift amounts larger than 31 would produce poison. If we wait until
12103   // type legalization, we'll create RISCVISD::SRLW and we can't recover it
12104   // to use a BEXT instruction.
12105   if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
12106       N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
12107       N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
12108       N0.hasOneUse()) {
12109     SDLoc DL(N);
12110     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
12111     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
12112     SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
12113     SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
12114                               DAG.getConstant(1, DL, MVT::i64));
12115     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
12116   }
12117
12118   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12119     return V;
12120   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12121     return V;
12122
12123   if (DCI.isAfterLegalizeDAG())
12124     if (SDValue V = combineDeMorganOfBoolean(N, DAG))
12125       return V;
12126
12127   // fold (and (select lhs, rhs, cc, -1, y), x) ->
12128   //      (select lhs, rhs, cc, x, (and x, y))
12129   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
12130 }
12131
12132 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
12133 // FIXME: Generalize to other binary operators with same operand.
12134 static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
12135                                 SelectionDAG &DAG) {
12136   assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
12137
12138   if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
12139       N1.getOpcode() != RISCVISD::CZERO_NEZ ||
12140       !N0.hasOneUse() || !N1.hasOneUse())
12141     return SDValue();
12142
12143   // Should have the same condition.
12144   SDValue Cond = N0.getOperand(1);
12145   if (Cond != N1.getOperand(1))
12146     return SDValue();
12147
12148   SDValue TrueV = N0.getOperand(0);
12149   SDValue FalseV = N1.getOperand(0);
12150
12151   if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
12152       TrueV.getOperand(1) != FalseV.getOperand(1) ||
12153       !isOneConstant(TrueV.getOperand(1)) ||
12154       !TrueV.hasOneUse() || !FalseV.hasOneUse())
12155     return SDValue();
12156
12157   EVT VT = N->getValueType(0);
12158   SDLoc DL(N);
12159
12160   SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
12161                               Cond);
12162   SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
12163                               Cond);
12164   SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
12165   return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
12166 }
12167
12168 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
12169                                 const RISCVSubtarget &Subtarget) {
12170   SelectionDAG &DAG = DCI.DAG;
12171
12172   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12173     return V;
12174   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12175     return V;
12176
12177   if (DCI.isAfterLegalizeDAG())
12178     if (SDValue V = combineDeMorganOfBoolean(N, DAG))
12179       return V;
12180
12181   // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
12182   // We may be able to pull a common operation out of the true and false value.
12183   SDValue N0 = N->getOperand(0);
12184   SDValue N1 = N->getOperand(1);
12185   if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
12186     return V;
12187   if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
12188     return V;
12189
12190   // fold (or (select cond, 0, y), x) ->
12191   //      (select cond, x, (or x, y))
12192   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12193 }
12194
12195 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
12196                                  const RISCVSubtarget &Subtarget) {
12197   SDValue N0 = N->getOperand(0);
12198   SDValue N1 = N->getOperand(1);
12199
12200   // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
12201   // NOTE: Assumes ROL being legal means ROLW is legal.
12202   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12203   if (N0.getOpcode() == RISCVISD::SLLW &&
12204       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
12205       TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
12206     SDLoc DL(N);
12207     return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
12208                        DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
12209   }
12210
12211   // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
12212   if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
12213     auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
12214     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12215     if (ConstN00 && CC == ISD::SETLT) {
12216       EVT VT = N0.getValueType();
12217       SDLoc DL(N0);
12218       const APInt &Imm = ConstN00->getAPIntValue();
12219       if ((Imm + 1).isSignedIntN(12))
12220         return DAG.getSetCC(DL, VT, N0.getOperand(1),
12221                             DAG.getConstant(Imm + 1, DL, VT), CC);
12222     }
12223   }
12224
12225   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
12226     return V;
12227   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
12228     return V;
12229
12230   // fold (xor (select cond, 0, y), x) ->
12231   //      (select cond, x, (xor x, y))
12232   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
12233 }
12234
12235 /// According to the property that indexed load/store instructions zero-extend
12236 /// their indices, try to narrow the type of index operand.
12237 static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
12238   if (isIndexTypeSigned(IndexType))
12239     return false;
12240
12241   if (!N->hasOneUse())
12242     return false;
12243
12244   EVT VT = N.getValueType();
12245   SDLoc DL(N);
12246
12247   // In general, what we're doing here is seeing if we can sink a truncate to
12248   // a smaller element type into the expression tree building our index.
12249   // TODO: We can generalize this and handle a bunch more cases if useful.
12250
12251   // Narrow a buildvector to the narrowest element type.  This requires less
12252   // work and less register pressure at high LMUL, and creates smaller constants
12253   // which may be cheaper to materialize.
12254   if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
12255     KnownBits Known = DAG.computeKnownBits(N);
12256     unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
12257     LLVMContext &C = *DAG.getContext();
12258     EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
12259     if (ResultVT.bitsLT(VT.getVectorElementType())) {
12260       N = DAG.getNode(ISD::TRUNCATE, DL,
12261                       VT.changeVectorElementType(ResultVT), N);
12262       return true;
12263     }
12264   }
12265
12266   // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
12267   if (N.getOpcode() != ISD::SHL)
12268     return false;
12269
12270   SDValue N0 = N.getOperand(0);
12271   if (N0.getOpcode() != ISD::ZERO_EXTEND &&
12272       N0.getOpcode() != RISCVISD::VZEXT_VL)
12273     return false;;
12274   if (!N0->hasOneUse())
12275     return false;;
12276
12277   APInt ShAmt;
12278   SDValue N1 = N.getOperand(1);
12279   if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
12280     return false;;
12281
12282   SDValue Src = N0.getOperand(0);
12283   EVT SrcVT = Src.getValueType();
12284   unsigned SrcElen = SrcVT.getScalarSizeInBits();
12285   unsigned ShAmtV = ShAmt.getZExtValue();
12286   unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
12287   NewElen = std::max(NewElen, 8U);
12288
12289   // Skip if NewElen is not narrower than the original extended type.
12290   if (NewElen >= N0.getValueType().getScalarSizeInBits())
12291     return false;
12292
12293   EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
12294   EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
12295
12296   SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
12297   SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
12298   N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
12299   return true;
12300 }
12301
12302 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
12303 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
12304 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
12305 // can become a sext.w instead of a shift pair.
12306 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
12307                                    const RISCVSubtarget &Subtarget) {
12308   SDValue N0 = N->getOperand(0);
12309   SDValue N1 = N->getOperand(1);
12310   EVT VT = N->getValueType(0);
12311   EVT OpVT = N0.getValueType();
12312
12313   if (OpVT != MVT::i64 || !Subtarget.is64Bit())
12314     return SDValue();
12315
12316   // RHS needs to be a constant.
12317   auto *N1C = dyn_cast<ConstantSDNode>(N1);
12318   if (!N1C)
12319     return SDValue();
12320
12321   // LHS needs to be (and X, 0xffffffff).
12322   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
12323       !isa<ConstantSDNode>(N0.getOperand(1)) ||
12324       N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
12325     return SDValue();
12326
12327   // Looking for an equality compare.
12328   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12329   if (!isIntEqualitySetCC(Cond))
12330     return SDValue();
12331
12332   // Don't do this if the sign bit is provably zero, it will be turned back into
12333   // an AND.
12334   APInt SignMask = APInt::getOneBitSet(64, 31);
12335   if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
12336     return SDValue();
12337
12338   const APInt &C1 = N1C->getAPIntValue();
12339
12340   SDLoc dl(N);
12341   // If the constant is larger than 2^32 - 1 it is impossible for both sides
12342   // to be equal.
12343   if (C1.getActiveBits() > 32)
12344     return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
12345
12346   SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
12347                                N0.getOperand(0), DAG.getValueType(MVT::i32));
12348   return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
12349                                                       dl, OpVT), Cond);
12350 }
12351
12352 static SDValue
12353 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
12354                                 const RISCVSubtarget &Subtarget) {
12355   SDValue Src = N->getOperand(0);
12356   EVT VT = N->getValueType(0);
12357
12358   // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
12359   if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
12360       cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
12361     return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
12362                        Src.getOperand(0));
12363
12364   return SDValue();
12365 }
12366
12367 namespace {
12368 // Forward declaration of the structure holding the necessary information to
12369 // apply a combine.
12370 struct CombineResult;
12371
12372 /// Helper class for folding sign/zero extensions.
12373 /// In particular, this class is used for the following combines:
12374 /// add_vl -> vwadd(u) | vwadd(u)_w
12375 /// sub_vl -> vwsub(u) | vwsub(u)_w
12376 /// mul_vl -> vwmul(u) | vwmul_su
12377 ///
12378 /// An object of this class represents an operand of the operation we want to
12379 /// combine.
12380 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
12381 /// NodeExtensionHelper for `a` and one for `b`.
12382 ///
12383 /// This class abstracts away how the extension is materialized and
12384 /// how its Mask, VL, number of users affect the combines.
12385 ///
12386 /// In particular:
12387 /// - VWADD_W is conceptually == add(op0, sext(op1))
12388 /// - VWADDU_W == add(op0, zext(op1))
12389 /// - VWSUB_W == sub(op0, sext(op1))
12390 /// - VWSUBU_W == sub(op0, zext(op1))
12391 ///
12392 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
12393 /// zext|sext(smaller_value).
12394 struct NodeExtensionHelper {
12395   /// Records if this operand is like being zero extended.
12396   bool SupportsZExt;
12397   /// Records if this operand is like being sign extended.
12398   /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
12399   /// instance, a splat constant (e.g., 3), would support being both sign and
12400   /// zero extended.
12401   bool SupportsSExt;
12402   /// This boolean captures whether we care if this operand would still be
12403   /// around after the folding happens.
12404   bool EnforceOneUse;
12405   /// Records if this operand's mask needs to match the mask of the operation
12406   /// that it will fold into.
12407   bool CheckMask;
12408   /// Value of the Mask for this operand.
12409   /// It may be SDValue().
12410   SDValue Mask;
12411   /// Value of the vector length operand.
12412   /// It may be SDValue().
12413   SDValue VL;
12414   /// Original value that this NodeExtensionHelper represents.
12415   SDValue OrigOperand;
12416
12417   /// Get the value feeding the extension or the value itself.
12418   /// E.g., for zext(a), this would return a.
12419   SDValue getSource() const {
12420     switch (OrigOperand.getOpcode()) {
12421     case RISCVISD::VSEXT_VL:
12422     case RISCVISD::VZEXT_VL:
12423       return OrigOperand.getOperand(0);
12424     default:
12425       return OrigOperand;
12426     }
12427   }
12428
12429   /// Check if this instance represents a splat.
12430   bool isSplat() const {
12431     return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
12432   }
12433
12434   /// Get or create a value that can feed \p Root with the given extension \p
12435   /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
12436   /// \see ::getSource().
12437   SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
12438                                 std::optional<bool> SExt) const {
12439     if (!SExt.has_value())
12440       return OrigOperand;
12441
12442     MVT NarrowVT = getNarrowType(Root);
12443
12444     SDValue Source = getSource();
12445     if (Source.getValueType() == NarrowVT)
12446       return Source;
12447
12448     unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
12449
12450     // If we need an extension, we should be changing the type.
12451     SDLoc DL(Root);
12452     auto [Mask, VL] = getMaskAndVL(Root);
12453     switch (OrigOperand.getOpcode()) {
12454     case RISCVISD::VSEXT_VL:
12455     case RISCVISD::VZEXT_VL:
12456       return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
12457     case RISCVISD::VMV_V_X_VL:
12458       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
12459                          DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
12460     default:
12461       // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
12462       // and that operand should already have the right NarrowVT so no
12463       // extension should be required at this point.
12464       llvm_unreachable("Unsupported opcode");
12465     }
12466   }
12467
12468   /// Helper function to get the narrow type for \p Root.
12469   /// The narrow type is the type of \p Root where we divided the size of each
12470   /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
12471   /// \pre The size of the type of the elements of Root must be a multiple of 2
12472   /// and be greater than 16.
12473   static MVT getNarrowType(const SDNode *Root) {
12474     MVT VT = Root->getSimpleValueType(0);
12475
12476     // Determine the narrow size.
12477     unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
12478     assert(NarrowSize >= 8 && "Trying to extend something we can't represent");
12479     MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
12480                                     VT.getVectorElementCount());
12481     return NarrowVT;
12482   }
12483
12484   /// Return the opcode required to materialize the folding of the sign
12485   /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
12486   /// both operands for \p Opcode.
12487   /// Put differently, get the opcode to materialize:
12488   /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
12489   /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
12490   /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
12491   static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
12492     switch (Opcode) {
12493     case RISCVISD::ADD_VL:
12494     case RISCVISD::VWADD_W_VL:
12495     case RISCVISD::VWADDU_W_VL:
12496       return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
12497     case RISCVISD::MUL_VL:
12498       return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
12499     case RISCVISD::SUB_VL:
12500     case RISCVISD::VWSUB_W_VL:
12501     case RISCVISD::VWSUBU_W_VL:
12502       return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL;
12503     default:
12504       llvm_unreachable("Unexpected opcode");
12505     }
12506   }
12507
12508   /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
12509   /// newOpcode(a, b).
12510   static unsigned getSUOpcode(unsigned Opcode) {
12511     assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
12512     return RISCVISD::VWMULSU_VL;
12513   }
12514
12515   /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
12516   /// newOpcode(a, b).
12517   static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
12518     switch (Opcode) {
12519     case RISCVISD::ADD_VL:
12520       return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
12521     case RISCVISD::SUB_VL:
12522       return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
12523     default:
12524       llvm_unreachable("Unexpected opcode");
12525     }
12526   }
12527
12528   using CombineToTry = std::function<std::optional<CombineResult>(
12529       SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
12530       const NodeExtensionHelper & /*RHS*/)>;
12531
12532   /// Check if this node needs to be fully folded or extended for all users.
12533   bool needToPromoteOtherUsers() const { return EnforceOneUse; }
12534
12535   /// Helper method to set the various fields of this struct based on the
12536   /// type of \p Root.
12537   void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
12538     SupportsZExt = false;
12539     SupportsSExt = false;
12540     EnforceOneUse = true;
12541     CheckMask = true;
12542     switch (OrigOperand.getOpcode()) {
12543     case RISCVISD::VZEXT_VL:
12544       SupportsZExt = true;
12545       Mask = OrigOperand.getOperand(1);
12546       VL = OrigOperand.getOperand(2);
12547       break;
12548     case RISCVISD::VSEXT_VL:
12549       SupportsSExt = true;
12550       Mask = OrigOperand.getOperand(1);
12551       VL = OrigOperand.getOperand(2);
12552       break;
12553     case RISCVISD::VMV_V_X_VL: {
12554       // Historically, we didn't care about splat values not disappearing during
12555       // combines.
12556       EnforceOneUse = false;
12557       CheckMask = false;
12558       VL = OrigOperand.getOperand(2);
12559
12560       // The operand is a splat of a scalar.
12561
12562       // The pasthru must be undef for tail agnostic.
12563       if (!OrigOperand.getOperand(0).isUndef())
12564         break;
12565
12566       // Get the scalar value.
12567       SDValue Op = OrigOperand.getOperand(1);
12568
12569       // See if we have enough sign bits or zero bits in the scalar to use a
12570       // widening opcode by splatting to smaller element size.
12571       MVT VT = Root->getSimpleValueType(0);
12572       unsigned EltBits = VT.getScalarSizeInBits();
12573       unsigned ScalarBits = Op.getValueSizeInBits();
12574       // Make sure we're getting all element bits from the scalar register.
12575       // FIXME: Support implicit sign extension of vmv.v.x?
12576       if (ScalarBits < EltBits)
12577         break;
12578
12579       unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
12580       // If the narrow type cannot be expressed with a legal VMV,
12581       // this is not a valid candidate.
12582       if (NarrowSize < 8)
12583         break;
12584
12585       if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
12586         SupportsSExt = true;
12587       if (DAG.MaskedValueIsZero(Op,
12588                                 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
12589         SupportsZExt = true;
12590       break;
12591     }
12592     default:
12593       break;
12594     }
12595   }
12596
12597   /// Check if \p Root supports any extension folding combines.
12598   static bool isSupportedRoot(const SDNode *Root) {
12599     switch (Root->getOpcode()) {
12600     case RISCVISD::ADD_VL:
12601     case RISCVISD::MUL_VL:
12602     case RISCVISD::VWADD_W_VL:
12603     case RISCVISD::VWADDU_W_VL:
12604     case RISCVISD::SUB_VL:
12605     case RISCVISD::VWSUB_W_VL:
12606     case RISCVISD::VWSUBU_W_VL:
12607       return true;
12608     default:
12609       return false;
12610     }
12611   }
12612
12613   /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
12614   NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
12615     assert(isSupportedRoot(Root) && "Trying to build an helper with an "
12616                                     "unsupported root");
12617     assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
12618     OrigOperand = Root->getOperand(OperandIdx);
12619
12620     unsigned Opc = Root->getOpcode();
12621     switch (Opc) {
12622     // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
12623     // <ADD|SUB>(LHS, S|ZEXT(RHS))
12624     case RISCVISD::VWADD_W_VL:
12625     case RISCVISD::VWADDU_W_VL:
12626     case RISCVISD::VWSUB_W_VL:
12627     case RISCVISD::VWSUBU_W_VL:
12628       if (OperandIdx == 1) {
12629         SupportsZExt =
12630             Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
12631         SupportsSExt = !SupportsZExt;
12632         std::tie(Mask, VL) = getMaskAndVL(Root);
12633         CheckMask = true;
12634         // There's no existing extension here, so we don't have to worry about
12635         // making sure it gets removed.
12636         EnforceOneUse = false;
12637         break;
12638       }
12639       [[fallthrough]];
12640     default:
12641       fillUpExtensionSupport(Root, DAG);
12642       break;
12643     }
12644   }
12645
12646   /// Check if this operand is compatible with the given vector length \p VL.
12647   bool isVLCompatible(SDValue VL) const {
12648     return this->VL != SDValue() && this->VL == VL;
12649   }
12650
12651   /// Check if this operand is compatible with the given \p Mask.
12652   bool isMaskCompatible(SDValue Mask) const {
12653     return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask);
12654   }
12655
12656   /// Helper function to get the Mask and VL from \p Root.
12657   static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
12658     assert(isSupportedRoot(Root) && "Unexpected root");
12659     return std::make_pair(Root->getOperand(3), Root->getOperand(4));
12660   }
12661
12662   /// Check if the Mask and VL of this operand are compatible with \p Root.
12663   bool areVLAndMaskCompatible(const SDNode *Root) const {
12664     auto [Mask, VL] = getMaskAndVL(Root);
12665     return isMaskCompatible(Mask) && isVLCompatible(VL);
12666   }
12667
12668   /// Helper function to check if \p N is commutative with respect to the
12669   /// foldings that are supported by this class.
12670   static bool isCommutative(const SDNode *N) {
12671     switch (N->getOpcode()) {
12672     case RISCVISD::ADD_VL:
12673     case RISCVISD::MUL_VL:
12674     case RISCVISD::VWADD_W_VL:
12675     case RISCVISD::VWADDU_W_VL:
12676       return true;
12677     case RISCVISD::SUB_VL:
12678     case RISCVISD::VWSUB_W_VL:
12679     case RISCVISD::VWSUBU_W_VL:
12680       return false;
12681     default:
12682       llvm_unreachable("Unexpected opcode");
12683     }
12684   }
12685
12686   /// Get a list of combine to try for folding extensions in \p Root.
12687   /// Note that each returned CombineToTry function doesn't actually modify
12688   /// anything. Instead they produce an optional CombineResult that if not None,
12689   /// need to be materialized for the combine to be applied.
12690   /// \see CombineResult::materialize.
12691   /// If the related CombineToTry function returns std::nullopt, that means the
12692   /// combine didn't match.
12693   static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
12694 };
12695
12696 /// Helper structure that holds all the necessary information to materialize a
12697 /// combine that does some extension folding.
12698 struct CombineResult {
12699   /// Opcode to be generated when materializing the combine.
12700   unsigned TargetOpcode;
12701   // No value means no extension is needed. If extension is needed, the value
12702   // indicates if it needs to be sign extended.
12703   std::optional<bool> SExtLHS;
12704   std::optional<bool> SExtRHS;
12705   /// Root of the combine.
12706   SDNode *Root;
12707   /// LHS of the TargetOpcode.
12708   NodeExtensionHelper LHS;
12709   /// RHS of the TargetOpcode.
12710   NodeExtensionHelper RHS;
12711
12712   CombineResult(unsigned TargetOpcode, SDNode *Root,
12713                 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS,
12714                 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS)
12715       : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS),
12716         Root(Root), LHS(LHS), RHS(RHS) {}
12717
12718   /// Return a value that uses TargetOpcode and that can be used to replace
12719   /// Root.
12720   /// The actual replacement is *not* done in that method.
12721   SDValue materialize(SelectionDAG &DAG) const {
12722     SDValue Mask, VL, Merge;
12723     std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
12724     Merge = Root->getOperand(2);
12725     return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
12726                        LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
12727                        RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
12728                        Mask, VL);
12729   }
12730 };
12731
12732 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
12733 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
12734 /// are zext) and LHS and RHS can be folded into Root.
12735 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
12736 ///
12737 /// \note If the pattern can match with both zext and sext, the returned
12738 /// CombineResult will feature the zext result.
12739 ///
12740 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12741 /// can be used to apply the pattern.
12742 static std::optional<CombineResult>
12743 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
12744                                  const NodeExtensionHelper &RHS, bool AllowSExt,
12745                                  bool AllowZExt) {
12746   assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
12747   if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
12748     return std::nullopt;
12749   if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
12750     return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
12751                              Root->getOpcode(), /*IsSExt=*/false),
12752                          Root, LHS, /*SExtLHS=*/false, RHS,
12753                          /*SExtRHS=*/false);
12754   if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
12755     return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
12756                              Root->getOpcode(), /*IsSExt=*/true),
12757                          Root, LHS, /*SExtLHS=*/true, RHS,
12758                          /*SExtRHS=*/true);
12759   return std::nullopt;
12760 }
12761
12762 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
12763 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
12764 /// are zext) and LHS and RHS can be folded into Root.
12765 ///
12766 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12767 /// can be used to apply the pattern.
12768 static std::optional<CombineResult>
12769 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
12770                              const NodeExtensionHelper &RHS) {
12771   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
12772                                           /*AllowZExt=*/true);
12773 }
12774
12775 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
12776 ///
12777 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12778 /// can be used to apply the pattern.
12779 static std::optional<CombineResult>
12780 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
12781               const NodeExtensionHelper &RHS) {
12782   if (!RHS.areVLAndMaskCompatible(Root))
12783     return std::nullopt;
12784
12785   // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
12786   // sext/zext?
12787   // Control this behavior behind an option (AllowSplatInVW_W) for testing
12788   // purposes.
12789   if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
12790     return CombineResult(
12791         NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false),
12792         Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false);
12793   if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
12794     return CombineResult(
12795         NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true),
12796         Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true);
12797   return std::nullopt;
12798 }
12799
12800 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
12801 ///
12802 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12803 /// can be used to apply the pattern.
12804 static std::optional<CombineResult>
12805 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
12806                     const NodeExtensionHelper &RHS) {
12807   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
12808                                           /*AllowZExt=*/false);
12809 }
12810
12811 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
12812 ///
12813 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12814 /// can be used to apply the pattern.
12815 static std::optional<CombineResult>
12816 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
12817                     const NodeExtensionHelper &RHS) {
12818   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
12819                                           /*AllowZExt=*/true);
12820 }
12821
12822 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
12823 ///
12824 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
12825 /// can be used to apply the pattern.
12826 static std::optional<CombineResult>
12827 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
12828                const NodeExtensionHelper &RHS) {
12829   if (!LHS.SupportsSExt || !RHS.SupportsZExt)
12830     return std::nullopt;
12831   if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
12832     return std::nullopt;
12833   return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
12834                        Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
12835 }
12836
12837 SmallVector<NodeExtensionHelper::CombineToTry>
12838 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
12839   SmallVector<CombineToTry> Strategies;
12840   switch (Root->getOpcode()) {
12841   case RISCVISD::ADD_VL:
12842   case RISCVISD::SUB_VL:
12843     // add|sub -> vwadd(u)|vwsub(u)
12844     Strategies.push_back(canFoldToVWWithSameExtension);
12845     // add|sub -> vwadd(u)_w|vwsub(u)_w
12846     Strategies.push_back(canFoldToVW_W);
12847     break;
12848   case RISCVISD::MUL_VL:
12849     // mul -> vwmul(u)
12850     Strategies.push_back(canFoldToVWWithSameExtension);
12851     // mul -> vwmulsu
12852     Strategies.push_back(canFoldToVW_SU);
12853     break;
12854   case RISCVISD::VWADD_W_VL:
12855   case RISCVISD::VWSUB_W_VL:
12856     // vwadd_w|vwsub_w -> vwadd|vwsub
12857     Strategies.push_back(canFoldToVWWithSEXT);
12858     break;
12859   case RISCVISD::VWADDU_W_VL:
12860   case RISCVISD::VWSUBU_W_VL:
12861     // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
12862     Strategies.push_back(canFoldToVWWithZEXT);
12863     break;
12864   default:
12865     llvm_unreachable("Unexpected opcode");
12866   }
12867   return Strategies;
12868 }
12869 } // End anonymous namespace.
12870
12871 /// Combine a binary operation to its equivalent VW or VW_W form.
12872 /// The supported combines are:
12873 /// add_vl -> vwadd(u) | vwadd(u)_w
12874 /// sub_vl -> vwsub(u) | vwsub(u)_w
12875 /// mul_vl -> vwmul(u) | vwmul_su
12876 /// vwadd_w(u) -> vwadd(u)
12877 /// vwub_w(u) -> vwadd(u)
12878 static SDValue
12879 combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
12880   SelectionDAG &DAG = DCI.DAG;
12881
12882   assert(NodeExtensionHelper::isSupportedRoot(N) &&
12883          "Shouldn't have called this method");
12884   SmallVector<SDNode *> Worklist;
12885   SmallSet<SDNode *, 8> Inserted;
12886   Worklist.push_back(N);
12887   Inserted.insert(N);
12888   SmallVector<CombineResult> CombinesToApply;
12889
12890   while (!Worklist.empty()) {
12891     SDNode *Root = Worklist.pop_back_val();
12892     if (!NodeExtensionHelper::isSupportedRoot(Root))
12893       return SDValue();
12894
12895     NodeExtensionHelper LHS(N, 0, DAG);
12896     NodeExtensionHelper RHS(N, 1, DAG);
12897     auto AppendUsersIfNeeded = [&Worklist,
12898                                 &Inserted](const NodeExtensionHelper &Op) {
12899       if (Op.needToPromoteOtherUsers()) {
12900         for (SDNode *TheUse : Op.OrigOperand->uses()) {
12901           if (Inserted.insert(TheUse).second)
12902             Worklist.push_back(TheUse);
12903         }
12904       }
12905     };
12906
12907     // Control the compile time by limiting the number of node we look at in
12908     // total.
12909     if (Inserted.size() > ExtensionMaxWebSize)
12910       return SDValue();
12911
12912     SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
12913         NodeExtensionHelper::getSupportedFoldings(N);
12914
12915     assert(!FoldingStrategies.empty() && "Nothing to be folded");
12916     bool Matched = false;
12917     for (int Attempt = 0;
12918          (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
12919          ++Attempt) {
12920
12921       for (NodeExtensionHelper::CombineToTry FoldingStrategy :
12922            FoldingStrategies) {
12923         std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
12924         if (Res) {
12925           Matched = true;
12926           CombinesToApply.push_back(*Res);
12927           // All the inputs that are extended need to be folded, otherwise
12928           // we would be leaving the old input (since it is may still be used),
12929           // and the new one.
12930           if (Res->SExtLHS.has_value())
12931             AppendUsersIfNeeded(LHS);
12932           if (Res->SExtRHS.has_value())
12933             AppendUsersIfNeeded(RHS);
12934           break;
12935         }
12936       }
12937       std::swap(LHS, RHS);
12938     }
12939     // Right now we do an all or nothing approach.
12940     if (!Matched)
12941       return SDValue();
12942   }
12943   // Store the value for the replacement of the input node separately.
12944   SDValue InputRootReplacement;
12945   // We do the RAUW after we materialize all the combines, because some replaced
12946   // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
12947   // some of these nodes may appear in the NodeExtensionHelpers of some of the
12948   // yet-to-be-visited CombinesToApply roots.
12949   SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
12950   ValuesToReplace.reserve(CombinesToApply.size());
12951   for (CombineResult Res : CombinesToApply) {
12952     SDValue NewValue = Res.materialize(DAG);
12953     if (!InputRootReplacement) {
12954       assert(Res.Root == N &&
12955              "First element is expected to be the current node");
12956       InputRootReplacement = NewValue;
12957     } else {
12958       ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
12959     }
12960   }
12961   for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
12962     DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
12963     DCI.AddToWorklist(OldNewValues.second.getNode());
12964   }
12965   return InputRootReplacement;
12966 }
12967
12968 // Helper function for performMemPairCombine.
12969 // Try to combine the memory loads/stores LSNode1 and LSNode2
12970 // into a single memory pair operation.
12971 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
12972                                  LSBaseSDNode *LSNode2, SDValue BasePtr,
12973                                  uint64_t Imm) {
12974   SmallPtrSet<const SDNode *, 32> Visited;
12975   SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
12976
12977   if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
12978       SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
12979     return SDValue();
12980
12981   MachineFunction &MF = DAG.getMachineFunction();
12982   const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
12983
12984   // The new operation has twice the width.
12985   MVT XLenVT = Subtarget.getXLenVT();
12986   EVT MemVT = LSNode1->getMemoryVT();
12987   EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
12988   MachineMemOperand *MMO = LSNode1->getMemOperand();
12989   MachineMemOperand *NewMMO = MF.getMachineMemOperand(
12990       MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
12991
12992   if (LSNode1->getOpcode() == ISD::LOAD) {
12993     auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
12994     unsigned Opcode;
12995     if (MemVT == MVT::i32)
12996       Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
12997     else
12998       Opcode = RISCVISD::TH_LDD;
12999
13000     SDValue Res = DAG.getMemIntrinsicNode(
13001         Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
13002         {LSNode1->getChain(), BasePtr,
13003          DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
13004         NewMemVT, NewMMO);
13005
13006     SDValue Node1 =
13007         DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
13008     SDValue Node2 =
13009         DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
13010
13011     DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
13012     return Node1;
13013   } else {
13014     unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
13015
13016     SDValue Res = DAG.getMemIntrinsicNode(
13017         Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
13018         {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
13019          BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
13020         NewMemVT, NewMMO);
13021
13022     DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
13023     return Res;
13024   }
13025 }
13026
13027 // Try to combine two adjacent loads/stores to a single pair instruction from
13028 // the XTHeadMemPair vendor extension.
13029 static SDValue performMemPairCombine(SDNode *N,
13030                                      TargetLowering::DAGCombinerInfo &DCI) {
13031   SelectionDAG &DAG = DCI.DAG;
13032   MachineFunction &MF = DAG.getMachineFunction();
13033   const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
13034
13035   // Target does not support load/store pair.
13036   if (!Subtarget.hasVendorXTHeadMemPair())
13037     return SDValue();
13038
13039   LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
13040   EVT MemVT = LSNode1->getMemoryVT();
13041   unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
13042
13043   // No volatile, indexed or atomic loads/stores.
13044   if (!LSNode1->isSimple() || LSNode1->isIndexed())
13045     return SDValue();
13046
13047   // Function to get a base + constant representation from a memory value.
13048   auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
13049     if (Ptr->getOpcode() == ISD::ADD)
13050       if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
13051         return {Ptr->getOperand(0), C1->getZExtValue()};
13052     return {Ptr, 0};
13053   };
13054
13055   auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
13056
13057   SDValue Chain = N->getOperand(0);
13058   for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
13059        UI != UE; ++UI) {
13060     SDUse &Use = UI.getUse();
13061     if (Use.getUser() != N && Use.getResNo() == 0 &&
13062         Use.getUser()->getOpcode() == N->getOpcode()) {
13063       LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
13064
13065       // No volatile, indexed or atomic loads/stores.
13066       if (!LSNode2->isSimple() || LSNode2->isIndexed())
13067         continue;
13068
13069       // Check if LSNode1 and LSNode2 have the same type and extension.
13070       if (LSNode1->getOpcode() == ISD::LOAD)
13071         if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
13072             cast<LoadSDNode>(LSNode1)->getExtensionType())
13073           continue;
13074
13075       if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
13076         continue;
13077
13078       auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
13079
13080       // Check if the base pointer is the same for both instruction.
13081       if (Base1 != Base2)
13082         continue;
13083
13084       // Check if the offsets match the XTHeadMemPair encoding contraints.
13085       bool Valid = false;
13086       if (MemVT == MVT::i32) {
13087         // Check for adjacent i32 values and a 2-bit index.
13088         if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
13089           Valid = true;
13090       } else if (MemVT == MVT::i64) {
13091         // Check for adjacent i64 values and a 2-bit index.
13092         if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
13093           Valid = true;
13094       }
13095
13096       if (!Valid)
13097         continue;
13098
13099       // Try to combine.
13100       if (SDValue Res =
13101               tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
13102         return Res;
13103     }
13104   }
13105
13106   return SDValue();
13107 }
13108
13109 // Fold
13110 //   (fp_to_int (froundeven X)) -> fcvt X, rne
13111 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
13112 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
13113 //   (fp_to_int (fceil X))      -> fcvt X, rup
13114 //   (fp_to_int (fround X))     -> fcvt X, rmm
13115 static SDValue performFP_TO_INTCombine(SDNode *N,
13116                                        TargetLowering::DAGCombinerInfo &DCI,
13117                                        const RISCVSubtarget &Subtarget) {
13118   SelectionDAG &DAG = DCI.DAG;
13119   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13120   MVT XLenVT = Subtarget.getXLenVT();
13121
13122   SDValue Src = N->getOperand(0);
13123
13124   // Don't do this for strict-fp Src.
13125   if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
13126     return SDValue();
13127
13128   // Ensure the FP type is legal.
13129   if (!TLI.isTypeLegal(Src.getValueType()))
13130     return SDValue();
13131
13132   // Don't do this for f16 with Zfhmin and not Zfh.
13133   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
13134     return SDValue();
13135
13136   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
13137   // If the result is invalid, we didn't find a foldable instruction.
13138   // If the result is dynamic, then we found an frint which we don't yet
13139   // support. It will cause 7 to be written to the FRM CSR for vector.
13140   // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below.
13141   if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN)
13142     return SDValue();
13143
13144   SDLoc DL(N);
13145   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
13146   EVT VT = N->getValueType(0);
13147
13148   if (VT.isVector() && TLI.isTypeLegal(VT)) {
13149     MVT SrcVT = Src.getSimpleValueType();
13150     MVT SrcContainerVT = SrcVT;
13151     MVT ContainerVT = VT.getSimpleVT();
13152     SDValue XVal = Src.getOperand(0);
13153
13154     // For widening and narrowing conversions we just combine it into a
13155     // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
13156     // end up getting lowered to their appropriate pseudo instructions based on
13157     // their operand types
13158     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
13159         VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
13160       return SDValue();
13161
13162     // Make fixed-length vectors scalable first
13163     if (SrcVT.isFixedLengthVector()) {
13164       SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
13165       XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
13166       ContainerVT =
13167           getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
13168     }
13169
13170     auto [Mask, VL] =
13171         getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
13172
13173     SDValue FpToInt;
13174     if (FRM == RISCVFPRndMode::RTZ) {
13175       // Use the dedicated trunc static rounding mode if we're truncating so we
13176       // don't need to generate calls to fsrmi/fsrm
13177       unsigned Opc =
13178           IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
13179       FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
13180     } else {
13181       unsigned Opc =
13182           IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
13183       FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
13184                             DAG.getTargetConstant(FRM, DL, XLenVT), VL);
13185     }
13186
13187     // If converted from fixed-length to scalable, convert back
13188     if (VT.isFixedLengthVector())
13189       FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
13190
13191     return FpToInt;
13192   }
13193
13194   // Only handle XLen or i32 types. Other types narrower than XLen will
13195   // eventually be legalized to XLenVT.
13196   if (VT != MVT::i32 && VT != XLenVT)
13197     return SDValue();
13198
13199   unsigned Opc;
13200   if (VT == XLenVT)
13201     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
13202   else
13203     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13204
13205   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
13206                                 DAG.getTargetConstant(FRM, DL, XLenVT));
13207   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
13208 }
13209
13210 // Fold
13211 //   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
13212 //   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
13213 //   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
13214 //   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
13215 //   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
13216 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
13217                                        TargetLowering::DAGCombinerInfo &DCI,
13218                                        const RISCVSubtarget &Subtarget) {
13219   SelectionDAG &DAG = DCI.DAG;
13220   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13221   MVT XLenVT = Subtarget.getXLenVT();
13222
13223   // Only handle XLen types. Other types narrower than XLen will eventually be
13224   // legalized to XLenVT.
13225   EVT DstVT = N->getValueType(0);
13226   if (DstVT != XLenVT)
13227     return SDValue();
13228
13229   SDValue Src = N->getOperand(0);
13230
13231   // Don't do this for strict-fp Src.
13232   if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
13233     return SDValue();
13234
13235   // Ensure the FP type is also legal.
13236   if (!TLI.isTypeLegal(Src.getValueType()))
13237     return SDValue();
13238
13239   // Don't do this for f16 with Zfhmin and not Zfh.
13240   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
13241     return SDValue();
13242
13243   EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13244
13245   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
13246   if (FRM == RISCVFPRndMode::Invalid)
13247     return SDValue();
13248
13249   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
13250
13251   unsigned Opc;
13252   if (SatVT == DstVT)
13253     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
13254   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
13255     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13256   else
13257     return SDValue();
13258   // FIXME: Support other SatVTs by clamping before or after the conversion.
13259
13260   Src = Src.getOperand(0);
13261
13262   SDLoc DL(N);
13263   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
13264                                 DAG.getTargetConstant(FRM, DL, XLenVT));
13265
13266   // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
13267   // extend.
13268   if (Opc == RISCVISD::FCVT_WU_RV64)
13269     FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
13270
13271   // RISC-V FP-to-int conversions saturate to the destination register size, but
13272   // don't produce 0 for nan.
13273   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
13274   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
13275 }
13276
13277 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
13278 // smaller than XLenVT.
13279 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
13280                                         const RISCVSubtarget &Subtarget) {
13281   assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
13282
13283   SDValue Src = N->getOperand(0);
13284   if (Src.getOpcode() != ISD::BSWAP)
13285     return SDValue();
13286
13287   EVT VT = N->getValueType(0);
13288   if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
13289       !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
13290     return SDValue();
13291
13292   SDLoc DL(N);
13293   return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
13294 }
13295
13296 // Convert from one FMA opcode to another based on whether we are negating the
13297 // multiply result and/or the accumulator.
13298 // NOTE: Only supports RVV operations with VL.
13299 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
13300   // Negating the multiply result changes ADD<->SUB and toggles 'N'.
13301   if (NegMul) {
13302     // clang-format off
13303     switch (Opcode) {
13304     default: llvm_unreachable("Unexpected opcode");
13305     case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFNMSUB_VL; break;
13306     case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL;  break;
13307     case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL;  break;
13308     case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFNMADD_VL; break;
13309     case RISCVISD::STRICT_VFMADD_VL:  Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
13310     case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL;  break;
13311     case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL;  break;
13312     case RISCVISD::STRICT_VFMSUB_VL:  Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
13313     }
13314     // clang-format on
13315   }
13316
13317   // Negating the accumulator changes ADD<->SUB.
13318   if (NegAcc) {
13319     // clang-format off
13320     switch (Opcode) {
13321     default: llvm_unreachable("Unexpected opcode");
13322     case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFMSUB_VL;  break;
13323     case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFMADD_VL;  break;
13324     case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
13325     case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
13326     case RISCVISD::STRICT_VFMADD_VL:  Opcode = RISCVISD::STRICT_VFMSUB_VL;  break;
13327     case RISCVISD::STRICT_VFMSUB_VL:  Opcode = RISCVISD::STRICT_VFMADD_VL;  break;
13328     case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
13329     case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
13330     }
13331     // clang-format on
13332   }
13333
13334   return Opcode;
13335 }
13336
13337 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
13338   // Fold FNEG_VL into FMA opcodes.
13339   // The first operand of strict-fp is chain.
13340   unsigned Offset = N->isTargetStrictFPOpcode();
13341   SDValue A = N->getOperand(0 + Offset);
13342   SDValue B = N->getOperand(1 + Offset);
13343   SDValue C = N->getOperand(2 + Offset);
13344   SDValue Mask = N->getOperand(3 + Offset);
13345   SDValue VL = N->getOperand(4 + Offset);
13346
13347   auto invertIfNegative = [&Mask, &VL](SDValue &V) {
13348     if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
13349         V.getOperand(2) == VL) {
13350       // Return the negated input.
13351       V = V.getOperand(0);
13352       return true;
13353     }
13354
13355     return false;
13356   };
13357
13358   bool NegA = invertIfNegative(A);
13359   bool NegB = invertIfNegative(B);
13360   bool NegC = invertIfNegative(C);
13361
13362   // If no operands are negated, we're done.
13363   if (!NegA && !NegB && !NegC)
13364     return SDValue();
13365
13366   unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
13367   if (N->isTargetStrictFPOpcode())
13368     return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
13369                        {N->getOperand(0), A, B, C, Mask, VL});
13370   return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
13371                      VL);
13372 }
13373
13374 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,
13375                                        const RISCVSubtarget &Subtarget) {
13376   if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
13377     return V;
13378
13379   if (N->getValueType(0).isScalableVector() &&
13380       N->getValueType(0).getVectorElementType() == MVT::f32 &&
13381       (Subtarget.hasVInstructionsF16Minimal() &&
13382        !Subtarget.hasVInstructionsF16())) {
13383     return SDValue();
13384   }
13385
13386   // FIXME: Ignore strict opcodes for now.
13387   if (N->isTargetStrictFPOpcode())
13388     return SDValue();
13389
13390   // Try to form widening FMA.
13391   SDValue Op0 = N->getOperand(0);
13392   SDValue Op1 = N->getOperand(1);
13393   SDValue Mask = N->getOperand(3);
13394   SDValue VL = N->getOperand(4);
13395
13396   if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
13397       Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
13398     return SDValue();
13399
13400   // TODO: Refactor to handle more complex cases similar to
13401   // combineBinOp_VLToVWBinOp_VL.
13402   if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
13403       (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
13404     return SDValue();
13405
13406   // Check the mask and VL are the same.
13407   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
13408       Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
13409     return SDValue();
13410
13411   unsigned NewOpc;
13412   switch (N->getOpcode()) {
13413   default:
13414     llvm_unreachable("Unexpected opcode");
13415   case RISCVISD::VFMADD_VL:
13416     NewOpc = RISCVISD::VFWMADD_VL;
13417     break;
13418   case RISCVISD::VFNMSUB_VL:
13419     NewOpc = RISCVISD::VFWNMSUB_VL;
13420     break;
13421   case RISCVISD::VFNMADD_VL:
13422     NewOpc = RISCVISD::VFWNMADD_VL;
13423     break;
13424   case RISCVISD::VFMSUB_VL:
13425     NewOpc = RISCVISD::VFWMSUB_VL;
13426     break;
13427   }
13428
13429   Op0 = Op0.getOperand(0);
13430   Op1 = Op1.getOperand(0);
13431
13432   return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
13433                      N->getOperand(2), Mask, VL);
13434 }
13435
13436 static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG,
13437                                       const RISCVSubtarget &Subtarget) {
13438   if (N->getValueType(0).isScalableVector() &&
13439       N->getValueType(0).getVectorElementType() == MVT::f32 &&
13440       (Subtarget.hasVInstructionsF16Minimal() &&
13441        !Subtarget.hasVInstructionsF16())) {
13442     return SDValue();
13443   }
13444
13445   // FIXME: Ignore strict opcodes for now.
13446   assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
13447
13448   // Try to form widening multiply.
13449   SDValue Op0 = N->getOperand(0);
13450   SDValue Op1 = N->getOperand(1);
13451   SDValue Merge = N->getOperand(2);
13452   SDValue Mask = N->getOperand(3);
13453   SDValue VL = N->getOperand(4);
13454
13455   if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
13456       Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
13457     return SDValue();
13458
13459   // TODO: Refactor to handle more complex cases similar to
13460   // combineBinOp_VLToVWBinOp_VL.
13461   if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
13462       (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
13463     return SDValue();
13464
13465   // Check the mask and VL are the same.
13466   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
13467       Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
13468     return SDValue();
13469
13470   Op0 = Op0.getOperand(0);
13471   Op1 = Op1.getOperand(0);
13472
13473   return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0,
13474                      Op1, Merge, Mask, VL);
13475 }
13476
13477 static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG,
13478                                         const RISCVSubtarget &Subtarget) {
13479   if (N->getValueType(0).isScalableVector() &&
13480       N->getValueType(0).getVectorElementType() == MVT::f32 &&
13481       (Subtarget.hasVInstructionsF16Minimal() &&
13482        !Subtarget.hasVInstructionsF16())) {
13483     return SDValue();
13484   }
13485
13486   SDValue Op0 = N->getOperand(0);
13487   SDValue Op1 = N->getOperand(1);
13488   SDValue Merge = N->getOperand(2);
13489   SDValue Mask = N->getOperand(3);
13490   SDValue VL = N->getOperand(4);
13491
13492   bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL;
13493
13494   // Look for foldable FP_EXTENDS.
13495   bool Op0IsExtend =
13496       Op0.getOpcode() == RISCVISD::FP_EXTEND_VL &&
13497       (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0)));
13498   bool Op1IsExtend =
13499       (Op0 == Op1 && Op0IsExtend) ||
13500       (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse());
13501
13502   // Check the mask and VL.
13503   if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL))
13504     Op0IsExtend = false;
13505   if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL))
13506     Op1IsExtend = false;
13507
13508   // Canonicalize.
13509   if (!Op1IsExtend) {
13510     // Sub requires at least operand 1 to be an extend.
13511     if (!IsAdd)
13512       return SDValue();
13513
13514     // Add is commutable, if the other operand is foldable, swap them.
13515     if (!Op0IsExtend)
13516       return SDValue();
13517
13518     std::swap(Op0, Op1);
13519     std::swap(Op0IsExtend, Op1IsExtend);
13520   }
13521
13522   // Op1 is a foldable extend. Op0 might be foldable.
13523   Op1 = Op1.getOperand(0);
13524   if (Op0IsExtend)
13525     Op0 = Op0.getOperand(0);
13526
13527   unsigned Opc;
13528   if (IsAdd)
13529     Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL;
13530   else
13531     Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL;
13532
13533   return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask,
13534                      VL);
13535 }
13536
13537 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
13538                                  const RISCVSubtarget &Subtarget) {
13539   assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
13540
13541   if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
13542     return SDValue();
13543
13544   if (!isa<ConstantSDNode>(N->getOperand(1)))
13545     return SDValue();
13546   uint64_t ShAmt = N->getConstantOperandVal(1);
13547   if (ShAmt > 32)
13548     return SDValue();
13549
13550   SDValue N0 = N->getOperand(0);
13551
13552   // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
13553   // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
13554   // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
13555   if (ShAmt < 32 &&
13556       N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
13557       cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
13558       N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
13559       isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
13560     uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
13561     if (LShAmt < 32) {
13562       SDLoc ShlDL(N0.getOperand(0));
13563       SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
13564                                 N0.getOperand(0).getOperand(0),
13565                                 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
13566       SDLoc DL(N);
13567       return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
13568                          DAG.getConstant(ShAmt + 32, DL, MVT::i64));
13569     }
13570   }
13571
13572   // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
13573   // FIXME: Should this be a generic combine? There's a similar combine on X86.
13574   //
13575   // Also try these folds where an add or sub is in the middle.
13576   // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
13577   // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
13578   SDValue Shl;
13579   ConstantSDNode *AddC = nullptr;
13580
13581   // We might have an ADD or SUB between the SRA and SHL.
13582   bool IsAdd = N0.getOpcode() == ISD::ADD;
13583   if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
13584     // Other operand needs to be a constant we can modify.
13585     AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
13586     if (!AddC)
13587       return SDValue();
13588
13589     // AddC needs to have at least 32 trailing zeros.
13590     if (AddC->getAPIntValue().countr_zero() < 32)
13591       return SDValue();
13592
13593     // All users should be a shift by constant less than or equal to 32. This
13594     // ensures we'll do this optimization for each of them to produce an
13595     // add/sub+sext_inreg they can all share.
13596     for (SDNode *U : N0->uses()) {
13597       if (U->getOpcode() != ISD::SRA ||
13598           !isa<ConstantSDNode>(U->getOperand(1)) ||
13599           cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32)
13600         return SDValue();
13601     }
13602
13603     Shl = N0.getOperand(IsAdd ? 0 : 1);
13604   } else {
13605     // Not an ADD or SUB.
13606     Shl = N0;
13607   }
13608
13609   // Look for a shift left by 32.
13610   if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
13611       Shl.getConstantOperandVal(1) != 32)
13612     return SDValue();
13613
13614   // We if we didn't look through an add/sub, then the shl should have one use.
13615   // If we did look through an add/sub, the sext_inreg we create is free so
13616   // we're only creating 2 new instructions. It's enough to only remove the
13617   // original sra+add/sub.
13618   if (!AddC && !Shl.hasOneUse())
13619     return SDValue();
13620
13621   SDLoc DL(N);
13622   SDValue In = Shl.getOperand(0);
13623
13624   // If we looked through an ADD or SUB, we need to rebuild it with the shifted
13625   // constant.
13626   if (AddC) {
13627     SDValue ShiftedAddC =
13628         DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
13629     if (IsAdd)
13630       In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
13631     else
13632       In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
13633   }
13634
13635   SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
13636                              DAG.getValueType(MVT::i32));
13637   if (ShAmt == 32)
13638     return SExt;
13639
13640   return DAG.getNode(
13641       ISD::SHL, DL, MVT::i64, SExt,
13642       DAG.getConstant(32 - ShAmt, DL, MVT::i64));
13643 }
13644
13645 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
13646 // the result is used as the conditon of a br_cc or select_cc we can invert,
13647 // inverting the setcc is free, and Z is 0/1. Caller will invert the
13648 // br_cc/select_cc.
13649 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
13650   bool IsAnd = Cond.getOpcode() == ISD::AND;
13651   if (!IsAnd && Cond.getOpcode() != ISD::OR)
13652     return SDValue();
13653
13654   if (!Cond.hasOneUse())
13655     return SDValue();
13656
13657   SDValue Setcc = Cond.getOperand(0);
13658   SDValue Xor = Cond.getOperand(1);
13659   // Canonicalize setcc to LHS.
13660   if (Setcc.getOpcode() != ISD::SETCC)
13661     std::swap(Setcc, Xor);
13662   // LHS should be a setcc and RHS should be an xor.
13663   if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
13664       Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
13665     return SDValue();
13666
13667   // If the condition is an And, SimplifyDemandedBits may have changed
13668   // (xor Z, 1) to (not Z).
13669   SDValue Xor1 = Xor.getOperand(1);
13670   if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
13671     return SDValue();
13672
13673   EVT VT = Cond.getValueType();
13674   SDValue Xor0 = Xor.getOperand(0);
13675
13676   // The LHS of the xor needs to be 0/1.
13677   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
13678   if (!DAG.MaskedValueIsZero(Xor0, Mask))
13679     return SDValue();
13680
13681   // We can only invert integer setccs.
13682   EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
13683   if (!SetCCOpVT.isScalarInteger())
13684     return SDValue();
13685
13686   ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
13687   if (ISD::isIntEqualitySetCC(CCVal)) {
13688     CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13689     Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
13690                          Setcc.getOperand(1), CCVal);
13691   } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
13692     // Invert (setlt 0, X) by converting to (setlt X, 1).
13693     Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
13694                          DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
13695   } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
13696     // (setlt X, 1) by converting to (setlt 0, X).
13697     Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
13698                          DAG.getConstant(0, SDLoc(Setcc), VT),
13699                          Setcc.getOperand(0), CCVal);
13700   } else
13701     return SDValue();
13702
13703   unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13704   return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
13705 }
13706
13707 // Perform common combines for BR_CC and SELECT_CC condtions.
13708 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
13709                        SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
13710   ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13711
13712   // As far as arithmetic right shift always saves the sign,
13713   // shift can be omitted.
13714   // Fold setlt (sra X, N), 0 -> setlt X, 0 and
13715   // setge (sra X, N), 0 -> setge X, 0
13716   if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
13717       LHS.getOpcode() == ISD::SRA) {
13718     LHS = LHS.getOperand(0);
13719     return true;
13720   }
13721
13722   if (!ISD::isIntEqualitySetCC(CCVal))
13723     return false;
13724
13725   // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
13726   // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
13727   if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
13728       LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
13729     // If we're looking for eq 0 instead of ne 0, we need to invert the
13730     // condition.
13731     bool Invert = CCVal == ISD::SETEQ;
13732     CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
13733     if (Invert)
13734       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
13735
13736     RHS = LHS.getOperand(1);
13737     LHS = LHS.getOperand(0);
13738     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
13739
13740     CC = DAG.getCondCode(CCVal);
13741     return true;
13742   }
13743
13744   // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
13745   if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
13746     RHS = LHS.getOperand(1);
13747     LHS = LHS.getOperand(0);
13748     return true;
13749   }
13750
13751   // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
13752   if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
13753       LHS.getOperand(1).getOpcode() == ISD::Constant) {
13754     SDValue LHS0 = LHS.getOperand(0);
13755     if (LHS0.getOpcode() == ISD::AND &&
13756         LHS0.getOperand(1).getOpcode() == ISD::Constant) {
13757       uint64_t Mask = LHS0.getConstantOperandVal(1);
13758       uint64_t ShAmt = LHS.getConstantOperandVal(1);
13759       if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
13760         CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
13761         CC = DAG.getCondCode(CCVal);
13762
13763         ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
13764         LHS = LHS0.getOperand(0);
13765         if (ShAmt != 0)
13766           LHS =
13767               DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
13768                           DAG.getConstant(ShAmt, DL, LHS.getValueType()));
13769         return true;
13770       }
13771     }
13772   }
13773
13774   // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
13775   // This can occur when legalizing some floating point comparisons.
13776   APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
13777   if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
13778     CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
13779     CC = DAG.getCondCode(CCVal);
13780     RHS = DAG.getConstant(0, DL, LHS.getValueType());
13781     return true;
13782   }
13783
13784   if (isNullConstant(RHS)) {
13785     if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
13786       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
13787       CC = DAG.getCondCode(CCVal);
13788       LHS = NewCond;
13789       return true;
13790     }
13791   }
13792
13793   return false;
13794 }
13795
13796 // Fold
13797 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
13798 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
13799 // (select C, (or Y, X), Y)  -> (or Y, (select C, X, 0)).
13800 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
13801 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
13802                                    SDValue TrueVal, SDValue FalseVal,
13803                                    bool Swapped) {
13804   bool Commutative = true;
13805   unsigned Opc = TrueVal.getOpcode();
13806   switch (Opc) {
13807   default:
13808     return SDValue();
13809   case ISD::SHL:
13810   case ISD::SRA:
13811   case ISD::SRL:
13812   case ISD::SUB:
13813     Commutative = false;
13814     break;
13815   case ISD::ADD:
13816   case ISD::OR:
13817   case ISD::XOR:
13818     break;
13819   }
13820
13821   if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
13822     return SDValue();
13823
13824   unsigned OpToFold;
13825   if (FalseVal == TrueVal.getOperand(0))
13826     OpToFold = 0;
13827   else if (Commutative && FalseVal == TrueVal.getOperand(1))
13828     OpToFold = 1;
13829   else
13830     return SDValue();
13831
13832   EVT VT = N->getValueType(0);
13833   SDLoc DL(N);
13834   SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
13835   EVT OtherOpVT = OtherOp->getValueType(0);
13836   SDValue IdentityOperand =
13837       DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
13838   if (!Commutative)
13839     IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
13840   assert(IdentityOperand && "No identity operand!");
13841
13842   if (Swapped)
13843     std::swap(OtherOp, IdentityOperand);
13844   SDValue NewSel =
13845       DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
13846   return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
13847 }
13848
13849 // This tries to get rid of `select` and `icmp` that are being used to handle
13850 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
13851 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
13852   SDValue Cond = N->getOperand(0);
13853
13854   // This represents either CTTZ or CTLZ instruction.
13855   SDValue CountZeroes;
13856
13857   SDValue ValOnZero;
13858
13859   if (Cond.getOpcode() != ISD::SETCC)
13860     return SDValue();
13861
13862   if (!isNullConstant(Cond->getOperand(1)))
13863     return SDValue();
13864
13865   ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
13866   if (CCVal == ISD::CondCode::SETEQ) {
13867     CountZeroes = N->getOperand(2);
13868     ValOnZero = N->getOperand(1);
13869   } else if (CCVal == ISD::CondCode::SETNE) {
13870     CountZeroes = N->getOperand(1);
13871     ValOnZero = N->getOperand(2);
13872   } else {
13873     return SDValue();
13874   }
13875
13876   if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
13877       CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
13878     CountZeroes = CountZeroes.getOperand(0);
13879
13880   if (CountZeroes.getOpcode() != ISD::CTTZ &&
13881       CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
13882       CountZeroes.getOpcode() != ISD::CTLZ &&
13883       CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
13884     return SDValue();
13885
13886   if (!isNullConstant(ValOnZero))
13887     return SDValue();
13888
13889   SDValue CountZeroesArgument = CountZeroes->getOperand(0);
13890   if (Cond->getOperand(0) != CountZeroesArgument)
13891     return SDValue();
13892
13893   if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
13894     CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
13895                               CountZeroes.getValueType(), CountZeroesArgument);
13896   } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
13897     CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
13898                               CountZeroes.getValueType(), CountZeroesArgument);
13899   }
13900
13901   unsigned BitWidth = CountZeroes.getValueSizeInBits();
13902   SDValue BitWidthMinusOne =
13903       DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
13904
13905   auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
13906                              CountZeroes, BitWidthMinusOne);
13907   return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
13908 }
13909
13910 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
13911                                     const RISCVSubtarget &Subtarget) {
13912   if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
13913     return Folded;
13914
13915   if (Subtarget.hasShortForwardBranchOpt())
13916     return SDValue();
13917
13918   SDValue TrueVal = N->getOperand(1);
13919   SDValue FalseVal = N->getOperand(2);
13920   if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
13921     return V;
13922   return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
13923 }
13924
13925 /// If we have a build_vector where each lane is binop X, C, where C
13926 /// is a constant (but not necessarily the same constant on all lanes),
13927 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
13928 /// We assume that materializing a constant build vector will be no more
13929 /// expensive that performing O(n) binops.
13930 static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
13931                                           const RISCVSubtarget &Subtarget,
13932                                           const RISCVTargetLowering &TLI) {
13933   SDLoc DL(N);
13934   EVT VT = N->getValueType(0);
13935
13936   assert(!VT.isScalableVector() && "unexpected build vector");
13937
13938   if (VT.getVectorNumElements() == 1)
13939     return SDValue();
13940
13941   const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
13942   if (!TLI.isBinOp(Opcode))
13943     return SDValue();
13944
13945   if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
13946     return SDValue();
13947
13948   SmallVector<SDValue> LHSOps;
13949   SmallVector<SDValue> RHSOps;
13950   for (SDValue Op : N->ops()) {
13951     if (Op.isUndef()) {
13952       // We can't form a divide or remainder from undef.
13953       if (!DAG.isSafeToSpeculativelyExecute(Opcode))
13954         return SDValue();
13955
13956       LHSOps.push_back(Op);
13957       RHSOps.push_back(Op);
13958       continue;
13959     }
13960
13961     // TODO: We can handle operations which have an neutral rhs value
13962     // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
13963     // of profit in a more explicit manner.
13964     if (Op.getOpcode() != Opcode || !Op.hasOneUse())
13965       return SDValue();
13966
13967     LHSOps.push_back(Op.getOperand(0));
13968     if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
13969         !isa<ConstantFPSDNode>(Op.getOperand(1)))
13970       return SDValue();
13971     // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
13972     // have different LHS and RHS types.
13973     if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
13974       return SDValue();
13975     RHSOps.push_back(Op.getOperand(1));
13976   }
13977
13978   return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
13979                      DAG.getBuildVector(VT, DL, RHSOps));
13980 }
13981
13982 // If we're concatenating a series of vector loads like
13983 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
13984 // Then we can turn this into a strided load by widening the vector elements
13985 // vlse32 p, stride=n
13986 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
13987                                             const RISCVSubtarget &Subtarget,
13988                                             const RISCVTargetLowering &TLI) {
13989   SDLoc DL(N);
13990   EVT VT = N->getValueType(0);
13991
13992   // Only perform this combine on legal MVTs.
13993   if (!TLI.isTypeLegal(VT))
13994     return SDValue();
13995
13996   // TODO: Potentially extend this to scalable vectors
13997   if (VT.isScalableVector())
13998     return SDValue();
13999
14000   auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
14001   if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
14002       !SDValue(BaseLd, 0).hasOneUse())
14003     return SDValue();
14004
14005   EVT BaseLdVT = BaseLd->getValueType(0);
14006
14007   // Go through the loads and check that they're strided
14008   SmallVector<LoadSDNode *> Lds;
14009   Lds.push_back(BaseLd);
14010   Align Align = BaseLd->getAlign();
14011   for (SDValue Op : N->ops().drop_front()) {
14012     auto *Ld = dyn_cast<LoadSDNode>(Op);
14013     if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
14014         Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
14015         Ld->getValueType(0) != BaseLdVT)
14016       return SDValue();
14017
14018     Lds.push_back(Ld);
14019
14020     // The common alignment is the most restrictive (smallest) of all the loads
14021     Align = std::min(Align, Ld->getAlign());
14022   }
14023
14024   using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
14025   auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
14026                            LoadSDNode *Ld2) -> std::optional<PtrDiff> {
14027     // If the load ptrs can be decomposed into a common (Base + Index) with a
14028     // common constant stride, then return the constant stride.
14029     BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
14030     BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
14031     if (BIO1.equalBaseIndex(BIO2, DAG))
14032       return {{BIO2.getOffset() - BIO1.getOffset(), false}};
14033
14034     // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
14035     SDValue P1 = Ld1->getBasePtr();
14036     SDValue P2 = Ld2->getBasePtr();
14037     if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
14038       return {{P2.getOperand(1), false}};
14039     if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
14040       return {{P1.getOperand(1), true}};
14041
14042     return std::nullopt;
14043   };
14044
14045   // Get the distance between the first and second loads
14046   auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
14047   if (!BaseDiff)
14048     return SDValue();
14049
14050   // Check all the loads are the same distance apart
14051   for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
14052     if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
14053       return SDValue();
14054
14055   // TODO: At this point, we've successfully matched a generalized gather
14056   // load.  Maybe we should emit that, and then move the specialized
14057   // matchers above and below into a DAG combine?
14058
14059   // Get the widened scalar type, e.g. v4i8 -> i64
14060   unsigned WideScalarBitWidth =
14061       BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
14062   MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
14063
14064   // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
14065   MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
14066   if (!TLI.isTypeLegal(WideVecVT))
14067     return SDValue();
14068
14069   // Check that the operation is legal
14070   if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
14071     return SDValue();
14072
14073   auto [StrideVariant, MustNegateStride] = *BaseDiff;
14074   SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
14075                        ? std::get<SDValue>(StrideVariant)
14076                        : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
14077                                          Lds[0]->getOffset().getValueType());
14078   if (MustNegateStride)
14079     Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
14080
14081   SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
14082   SDValue IntID =
14083     DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
14084                           Subtarget.getXLenVT());
14085
14086   SDValue AllOneMask =
14087     DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
14088                  DAG.getConstant(1, DL, MVT::i1));
14089
14090   SDValue Ops[] = {BaseLd->getChain(),   IntID,  DAG.getUNDEF(WideVecVT),
14091                    BaseLd->getBasePtr(), Stride, AllOneMask};
14092
14093   uint64_t MemSize;
14094   if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
14095       ConstStride && ConstStride->getSExtValue() >= 0)
14096     // total size = (elsize * n) + (stride - elsize) * (n-1)
14097     //            = elsize + stride * (n-1)
14098     MemSize = WideScalarVT.getSizeInBits() +
14099               ConstStride->getSExtValue() * (N->getNumOperands() - 1);
14100   else
14101     // If Stride isn't constant, then we can't know how much it will load
14102     MemSize = MemoryLocation::UnknownSize;
14103
14104   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
14105       BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
14106       Align);
14107
14108   SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
14109                                                 Ops, WideVecVT, MMO);
14110   for (SDValue Ld : N->ops())
14111     DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
14112
14113   return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
14114 }
14115
14116 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
14117                                const RISCVSubtarget &Subtarget) {
14118   assert(N->getOpcode() == RISCVISD::ADD_VL);
14119   SDValue Addend = N->getOperand(0);
14120   SDValue MulOp = N->getOperand(1);
14121   SDValue AddMergeOp = N->getOperand(2);
14122
14123   if (!AddMergeOp.isUndef())
14124     return SDValue();
14125
14126   auto IsVWMulOpc = [](unsigned Opc) {
14127     switch (Opc) {
14128     case RISCVISD::VWMUL_VL:
14129     case RISCVISD::VWMULU_VL:
14130     case RISCVISD::VWMULSU_VL:
14131       return true;
14132     default:
14133       return false;
14134     }
14135   };
14136
14137   if (!IsVWMulOpc(MulOp.getOpcode()))
14138     std::swap(Addend, MulOp);
14139
14140   if (!IsVWMulOpc(MulOp.getOpcode()))
14141     return SDValue();
14142
14143   SDValue MulMergeOp = MulOp.getOperand(2);
14144
14145   if (!MulMergeOp.isUndef())
14146     return SDValue();
14147
14148   SDValue AddMask = N->getOperand(3);
14149   SDValue AddVL = N->getOperand(4);
14150   SDValue MulMask = MulOp.getOperand(3);
14151   SDValue MulVL = MulOp.getOperand(4);
14152
14153   if (AddMask != MulMask || AddVL != MulVL)
14154     return SDValue();
14155
14156   unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
14157   static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
14158                 "Unexpected opcode after VWMACC_VL");
14159   static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
14160                 "Unexpected opcode after VWMACC_VL!");
14161   static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
14162                 "Unexpected opcode after VWMUL_VL!");
14163   static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
14164                 "Unexpected opcode after VWMUL_VL!");
14165
14166   SDLoc DL(N);
14167   EVT VT = N->getValueType(0);
14168   SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
14169                    AddVL};
14170   return DAG.getNode(Opc, DL, VT, Ops);
14171 }
14172
14173 static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
14174                                            ISD::MemIndexType &IndexType,
14175                                            RISCVTargetLowering::DAGCombinerInfo &DCI) {
14176   if (!DCI.isBeforeLegalize())
14177     return false;
14178
14179   SelectionDAG &DAG = DCI.DAG;
14180   const MVT XLenVT =
14181     DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
14182
14183   const EVT IndexVT = Index.getValueType();
14184
14185   // RISC-V indexed loads only support the "unsigned unscaled" addressing
14186   // mode, so anything else must be manually legalized.
14187   if (!isIndexTypeSigned(IndexType))
14188     return false;
14189
14190   if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
14191     // Any index legalization should first promote to XLenVT, so we don't lose
14192     // bits when scaling. This may create an illegal index type so we let
14193     // LLVM's legalization take care of the splitting.
14194     // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
14195     Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
14196                         IndexVT.changeVectorElementType(XLenVT), Index);
14197   }
14198   IndexType = ISD::UNSIGNED_SCALED;
14199   return true;
14200 }
14201
14202 /// Match the index vector of a scatter or gather node as the shuffle mask
14203 /// which performs the rearrangement if possible.  Will only match if
14204 /// all lanes are touched, and thus replacing the scatter or gather with
14205 /// a unit strided access and shuffle is legal.
14206 static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
14207                                 SmallVector<int> &ShuffleMask) {
14208   if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
14209     return false;
14210   if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
14211     return false;
14212
14213   const unsigned ElementSize = VT.getScalarStoreSize();
14214   const unsigned NumElems = VT.getVectorNumElements();
14215
14216   // Create the shuffle mask and check all bits active
14217   assert(ShuffleMask.empty());
14218   BitVector ActiveLanes(NumElems);
14219   for (unsigned i = 0; i < Index->getNumOperands(); i++) {
14220     // TODO: We've found an active bit of UB, and could be
14221     // more aggressive here if desired.
14222     if (Index->getOperand(i)->isUndef())
14223       return false;
14224     uint64_t C = Index->getConstantOperandVal(i);
14225     if (C % ElementSize != 0)
14226       return false;
14227     C = C / ElementSize;
14228     if (C >= NumElems)
14229       return false;
14230     ShuffleMask.push_back(C);
14231     ActiveLanes.set(C);
14232   }
14233   return ActiveLanes.all();
14234 }
14235
14236 /// Match the index of a gather or scatter operation as an operation
14237 /// with twice the element width and half the number of elements.  This is
14238 /// generally profitable (if legal) because these operations are linear
14239 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
14240 /// come out ahead.
14241 static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
14242                                 Align BaseAlign, const RISCVSubtarget &ST) {
14243   if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
14244     return false;
14245   if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
14246     return false;
14247
14248   // Attempt a doubling.  If we can use a element type 4x or 8x in
14249   // size, this will happen via multiply iterations of the transform.
14250   const unsigned NumElems = VT.getVectorNumElements();
14251   if (NumElems % 2 != 0)
14252     return false;
14253
14254   const unsigned ElementSize = VT.getScalarStoreSize();
14255   const unsigned WiderElementSize = ElementSize * 2;
14256   if (WiderElementSize > ST.getELen()/8)
14257     return false;
14258
14259   if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
14260     return false;
14261
14262   for (unsigned i = 0; i < Index->getNumOperands(); i++) {
14263     // TODO: We've found an active bit of UB, and could be
14264     // more aggressive here if desired.
14265     if (Index->getOperand(i)->isUndef())
14266       return false;
14267     // TODO: This offset check is too strict if we support fully
14268     // misaligned memory operations.
14269     uint64_t C = Index->getConstantOperandVal(i);
14270     if (i % 2 == 0) {
14271       if (C % WiderElementSize != 0)
14272         return false;
14273       continue;
14274     }
14275     uint64_t Last = Index->getConstantOperandVal(i-1);
14276     if (C != Last + ElementSize)
14277       return false;
14278   }
14279   return true;
14280 }
14281
14282
14283 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
14284                                                DAGCombinerInfo &DCI) const {
14285   SelectionDAG &DAG = DCI.DAG;
14286   const MVT XLenVT = Subtarget.getXLenVT();
14287   SDLoc DL(N);
14288
14289   // Helper to call SimplifyDemandedBits on an operand of N where only some low
14290   // bits are demanded. N will be added to the Worklist if it was not deleted.
14291   // Caller should return SDValue(N, 0) if this returns true.
14292   auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
14293     SDValue Op = N->getOperand(OpNo);
14294     APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
14295     if (!SimplifyDemandedBits(Op, Mask, DCI))
14296       return false;
14297
14298     if (N->getOpcode() != ISD::DELETED_NODE)
14299       DCI.AddToWorklist(N);
14300     return true;
14301   };
14302
14303   switch (N->getOpcode()) {
14304   default:
14305     break;
14306   case RISCVISD::SplitF64: {
14307     SDValue Op0 = N->getOperand(0);
14308     // If the input to SplitF64 is just BuildPairF64 then the operation is
14309     // redundant. Instead, use BuildPairF64's operands directly.
14310     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
14311       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
14312
14313     if (Op0->isUndef()) {
14314       SDValue Lo = DAG.getUNDEF(MVT::i32);
14315       SDValue Hi = DAG.getUNDEF(MVT::i32);
14316       return DCI.CombineTo(N, Lo, Hi);
14317     }
14318
14319     // It's cheaper to materialise two 32-bit integers than to load a double
14320     // from the constant pool and transfer it to integer registers through the
14321     // stack.
14322     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
14323       APInt V = C->getValueAPF().bitcastToAPInt();
14324       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
14325       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
14326       return DCI.CombineTo(N, Lo, Hi);
14327     }
14328
14329     // This is a target-specific version of a DAGCombine performed in
14330     // DAGCombiner::visitBITCAST. It performs the equivalent of:
14331     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14332     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14333     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
14334         !Op0.getNode()->hasOneUse())
14335       break;
14336     SDValue NewSplitF64 =
14337         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
14338                     Op0.getOperand(0));
14339     SDValue Lo = NewSplitF64.getValue(0);
14340     SDValue Hi = NewSplitF64.getValue(1);
14341     APInt SignBit = APInt::getSignMask(32);
14342     if (Op0.getOpcode() == ISD::FNEG) {
14343       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
14344                                   DAG.getConstant(SignBit, DL, MVT::i32));
14345       return DCI.CombineTo(N, Lo, NewHi);
14346     }
14347     assert(Op0.getOpcode() == ISD::FABS);
14348     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
14349                                 DAG.getConstant(~SignBit, DL, MVT::i32));
14350     return DCI.CombineTo(N, Lo, NewHi);
14351   }
14352   case RISCVISD::SLLW:
14353   case RISCVISD::SRAW:
14354   case RISCVISD::SRLW:
14355   case RISCVISD::RORW:
14356   case RISCVISD::ROLW: {
14357     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
14358     if (SimplifyDemandedLowBitsHelper(0, 32) ||
14359         SimplifyDemandedLowBitsHelper(1, 5))
14360       return SDValue(N, 0);
14361
14362     break;
14363   }
14364   case RISCVISD::CLZW:
14365   case RISCVISD::CTZW: {
14366     // Only the lower 32 bits of the first operand are read
14367     if (SimplifyDemandedLowBitsHelper(0, 32))
14368       return SDValue(N, 0);
14369     break;
14370   }
14371   case RISCVISD::FMV_W_X_RV64: {
14372     // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
14373     // conversion is unnecessary and can be replaced with the
14374     // FMV_X_ANYEXTW_RV64 operand.
14375     SDValue Op0 = N->getOperand(0);
14376     if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
14377       return Op0.getOperand(0);
14378     break;
14379   }
14380   case RISCVISD::FMV_X_ANYEXTH:
14381   case RISCVISD::FMV_X_ANYEXTW_RV64: {
14382     SDLoc DL(N);
14383     SDValue Op0 = N->getOperand(0);
14384     MVT VT = N->getSimpleValueType(0);
14385     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
14386     // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
14387     // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
14388     if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
14389          Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
14390         (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
14391          Op0->getOpcode() == RISCVISD::FMV_H_X)) {
14392       assert(Op0.getOperand(0).getValueType() == VT &&
14393              "Unexpected value type!");
14394       return Op0.getOperand(0);
14395     }
14396
14397     // This is a target-specific version of a DAGCombine performed in
14398     // DAGCombiner::visitBITCAST. It performs the equivalent of:
14399     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14400     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14401     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
14402         !Op0.getNode()->hasOneUse())
14403       break;
14404     SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
14405     unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
14406     APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
14407     if (Op0.getOpcode() == ISD::FNEG)
14408       return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
14409                          DAG.getConstant(SignBit, DL, VT));
14410
14411     assert(Op0.getOpcode() == ISD::FABS);
14412     return DAG.getNode(ISD::AND, DL, VT, NewFMV,
14413                        DAG.getConstant(~SignBit, DL, VT));
14414   }
14415   case ISD::ADD:
14416     return performADDCombine(N, DAG, Subtarget);
14417   case ISD::SUB:
14418     return performSUBCombine(N, DAG, Subtarget);
14419   case ISD::AND:
14420     return performANDCombine(N, DCI, Subtarget);
14421   case ISD::OR:
14422     return performORCombine(N, DCI, Subtarget);
14423   case ISD::XOR:
14424     return performXORCombine(N, DAG, Subtarget);
14425   case ISD::FADD:
14426   case ISD::UMAX:
14427   case ISD::UMIN:
14428   case ISD::SMAX:
14429   case ISD::SMIN:
14430   case ISD::FMAXNUM:
14431   case ISD::FMINNUM: {
14432     if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14433       return V;
14434     if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14435       return V;
14436     return SDValue();
14437   }
14438   case ISD::SETCC:
14439     return performSETCCCombine(N, DAG, Subtarget);
14440   case ISD::SIGN_EXTEND_INREG:
14441     return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
14442   case ISD::ZERO_EXTEND:
14443     // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
14444     // type legalization. This is safe because fp_to_uint produces poison if
14445     // it overflows.
14446     if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
14447       SDValue Src = N->getOperand(0);
14448       if (Src.getOpcode() == ISD::FP_TO_UINT &&
14449           isTypeLegal(Src.getOperand(0).getValueType()))
14450         return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
14451                            Src.getOperand(0));
14452       if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
14453           isTypeLegal(Src.getOperand(1).getValueType())) {
14454         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14455         SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
14456                                   Src.getOperand(0), Src.getOperand(1));
14457         DCI.CombineTo(N, Res);
14458         DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
14459         DCI.recursivelyDeleteUnusedNodes(Src.getNode());
14460         return SDValue(N, 0); // Return N so it doesn't get rechecked.
14461       }
14462     }
14463     return SDValue();
14464   case RISCVISD::TRUNCATE_VECTOR_VL: {
14465     // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
14466     // This would be benefit for the cases where X and Y are both the same value
14467     // type of low precision vectors. Since the truncate would be lowered into
14468     // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
14469     // restriction, such pattern would be expanded into a series of "vsetvli"
14470     // and "vnsrl" instructions later to reach this point.
14471     auto IsTruncNode = [](SDValue V) {
14472       if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
14473         return false;
14474       SDValue VL = V.getOperand(2);
14475       auto *C = dyn_cast<ConstantSDNode>(VL);
14476       // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
14477       bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
14478                              (isa<RegisterSDNode>(VL) &&
14479                               cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
14480       return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
14481              IsVLMAXForVMSET;
14482     };
14483
14484     SDValue Op = N->getOperand(0);
14485
14486     // We need to first find the inner level of TRUNCATE_VECTOR_VL node
14487     // to distinguish such pattern.
14488     while (IsTruncNode(Op)) {
14489       if (!Op.hasOneUse())
14490         return SDValue();
14491       Op = Op.getOperand(0);
14492     }
14493
14494     if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
14495       SDValue N0 = Op.getOperand(0);
14496       SDValue N1 = Op.getOperand(1);
14497       if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
14498           N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
14499         SDValue N00 = N0.getOperand(0);
14500         SDValue N10 = N1.getOperand(0);
14501         if (N00.getValueType().isVector() &&
14502             N00.getValueType() == N10.getValueType() &&
14503             N->getValueType(0) == N10.getValueType()) {
14504           unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
14505           SDValue SMin = DAG.getNode(
14506               ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
14507               DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
14508           return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
14509         }
14510       }
14511     }
14512     break;
14513   }
14514   case ISD::TRUNCATE:
14515     return performTRUNCATECombine(N, DAG, Subtarget);
14516   case ISD::SELECT:
14517     return performSELECTCombine(N, DAG, Subtarget);
14518   case RISCVISD::CZERO_EQZ:
14519   case RISCVISD::CZERO_NEZ:
14520     // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
14521     // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
14522     if (N->getOperand(1).getOpcode() == ISD::XOR &&
14523         isOneConstant(N->getOperand(1).getOperand(1))) {
14524       SDValue Cond = N->getOperand(1).getOperand(0);
14525       APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
14526       if (DAG.MaskedValueIsZero(Cond, Mask)) {
14527         unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
14528                               ? RISCVISD::CZERO_NEZ
14529                               : RISCVISD::CZERO_EQZ;
14530         return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
14531                            N->getOperand(0), Cond);
14532       }
14533     }
14534     return SDValue();
14535
14536   case RISCVISD::SELECT_CC: {
14537     // Transform
14538     SDValue LHS = N->getOperand(0);
14539     SDValue RHS = N->getOperand(1);
14540     SDValue CC = N->getOperand(2);
14541     ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
14542     SDValue TrueV = N->getOperand(3);
14543     SDValue FalseV = N->getOperand(4);
14544     SDLoc DL(N);
14545     EVT VT = N->getValueType(0);
14546
14547     // If the True and False values are the same, we don't need a select_cc.
14548     if (TrueV == FalseV)
14549       return TrueV;
14550
14551     // (select (x < 0), y, z)  -> x >> (XLEN - 1) & (y - z) + z
14552     // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
14553     if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
14554         isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
14555         (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
14556       if (CCVal == ISD::CondCode::SETGE)
14557         std::swap(TrueV, FalseV);
14558
14559       int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
14560       int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
14561       // Only handle simm12, if it is not in this range, it can be considered as
14562       // register.
14563       if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
14564           isInt<12>(TrueSImm - FalseSImm)) {
14565         SDValue SRA =
14566             DAG.getNode(ISD::SRA, DL, VT, LHS,
14567                         DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
14568         SDValue AND =
14569             DAG.getNode(ISD::AND, DL, VT, SRA,
14570                         DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
14571         return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
14572       }
14573
14574       if (CCVal == ISD::CondCode::SETGE)
14575         std::swap(TrueV, FalseV);
14576     }
14577
14578     if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
14579       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
14580                          {LHS, RHS, CC, TrueV, FalseV});
14581
14582     if (!Subtarget.hasShortForwardBranchOpt()) {
14583       // (select c, -1, y) -> -c | y
14584       if (isAllOnesConstant(TrueV)) {
14585         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
14586         SDValue Neg = DAG.getNegative(C, DL, VT);
14587         return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
14588       }
14589       // (select c, y, -1) -> -!c | y
14590       if (isAllOnesConstant(FalseV)) {
14591         SDValue C =
14592             DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
14593         SDValue Neg = DAG.getNegative(C, DL, VT);
14594         return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
14595       }
14596
14597       // (select c, 0, y) -> -!c & y
14598       if (isNullConstant(TrueV)) {
14599         SDValue C =
14600             DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
14601         SDValue Neg = DAG.getNegative(C, DL, VT);
14602         return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
14603       }
14604       // (select c, y, 0) -> -c & y
14605       if (isNullConstant(FalseV)) {
14606         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
14607         SDValue Neg = DAG.getNegative(C, DL, VT);
14608         return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
14609       }
14610       // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
14611       // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
14612       if (((isOneConstant(FalseV) && LHS == TrueV &&
14613             CCVal == ISD::CondCode::SETNE) ||
14614            (isOneConstant(TrueV) && LHS == FalseV &&
14615             CCVal == ISD::CondCode::SETEQ)) &&
14616           isNullConstant(RHS)) {
14617         // freeze it to be safe.
14618         LHS = DAG.getFreeze(LHS);
14619         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
14620         return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
14621       }
14622     }
14623
14624     // If both true/false are an xor with 1, pull through the select.
14625     // This can occur after op legalization if both operands are setccs that
14626     // require an xor to invert.
14627     // FIXME: Generalize to other binary ops with identical operand?
14628     if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
14629         TrueV.getOperand(1) == FalseV.getOperand(1) &&
14630         isOneConstant(TrueV.getOperand(1)) &&
14631         TrueV.hasOneUse() && FalseV.hasOneUse()) {
14632       SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
14633                                    TrueV.getOperand(0), FalseV.getOperand(0));
14634       return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
14635     }
14636
14637     return SDValue();
14638   }
14639   case RISCVISD::BR_CC: {
14640     SDValue LHS = N->getOperand(1);
14641     SDValue RHS = N->getOperand(2);
14642     SDValue CC = N->getOperand(3);
14643     SDLoc DL(N);
14644
14645     if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
14646       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
14647                          N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
14648
14649     return SDValue();
14650   }
14651   case ISD::BITREVERSE:
14652     return performBITREVERSECombine(N, DAG, Subtarget);
14653   case ISD::FP_TO_SINT:
14654   case ISD::FP_TO_UINT:
14655     return performFP_TO_INTCombine(N, DCI, Subtarget);
14656   case ISD::FP_TO_SINT_SAT:
14657   case ISD::FP_TO_UINT_SAT:
14658     return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
14659   case ISD::FCOPYSIGN: {
14660     EVT VT = N->getValueType(0);
14661     if (!VT.isVector())
14662       break;
14663     // There is a form of VFSGNJ which injects the negated sign of its second
14664     // operand. Try and bubble any FNEG up after the extend/round to produce
14665     // this optimized pattern. Avoid modifying cases where FP_ROUND and
14666     // TRUNC=1.
14667     SDValue In2 = N->getOperand(1);
14668     // Avoid cases where the extend/round has multiple uses, as duplicating
14669     // those is typically more expensive than removing a fneg.
14670     if (!In2.hasOneUse())
14671       break;
14672     if (In2.getOpcode() != ISD::FP_EXTEND &&
14673         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
14674       break;
14675     In2 = In2.getOperand(0);
14676     if (In2.getOpcode() != ISD::FNEG)
14677       break;
14678     SDLoc DL(N);
14679     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
14680     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
14681                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
14682   }
14683   case ISD::MGATHER: {
14684     const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
14685     const EVT VT = N->getValueType(0);
14686     SDValue Index = MGN->getIndex();
14687     SDValue ScaleOp = MGN->getScale();
14688     ISD::MemIndexType IndexType = MGN->getIndexType();
14689     assert(!MGN->isIndexScaled() &&
14690            "Scaled gather/scatter should not be formed");
14691
14692     SDLoc DL(N);
14693     if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
14694       return DAG.getMaskedGather(
14695           N->getVTList(), MGN->getMemoryVT(), DL,
14696           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
14697            MGN->getBasePtr(), Index, ScaleOp},
14698           MGN->getMemOperand(), IndexType, MGN->getExtensionType());
14699
14700     if (narrowIndex(Index, IndexType, DAG))
14701       return DAG.getMaskedGather(
14702           N->getVTList(), MGN->getMemoryVT(), DL,
14703           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
14704            MGN->getBasePtr(), Index, ScaleOp},
14705           MGN->getMemOperand(), IndexType, MGN->getExtensionType());
14706
14707     if (Index.getOpcode() == ISD::BUILD_VECTOR &&
14708         MGN->getExtensionType() == ISD::NON_EXTLOAD) {
14709       if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
14710           SimpleVID && SimpleVID->StepDenominator == 1) {
14711         const int64_t StepNumerator = SimpleVID->StepNumerator;
14712         const int64_t Addend = SimpleVID->Addend;
14713
14714         // Note: We don't need to check alignment here since (by assumption
14715         // from the existance of the gather), our offsets must be sufficiently
14716         // aligned.
14717
14718         const EVT PtrVT = getPointerTy(DAG.getDataLayout());
14719         assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
14720         assert(IndexType == ISD::UNSIGNED_SCALED);
14721         SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
14722                                       DAG.getConstant(Addend, DL, PtrVT));
14723
14724         SDVTList VTs = DAG.getVTList({VT, MVT::Other});
14725         SDValue IntID =
14726           DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
14727                                 XLenVT);
14728         SDValue Ops[] =
14729           {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
14730            DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
14731         return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
14732                                        Ops, VT, MGN->getMemOperand());
14733       }
14734     }
14735
14736     SmallVector<int> ShuffleMask;
14737     if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
14738         matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
14739       SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
14740                                        MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
14741                                        MGN->getMask(), DAG.getUNDEF(VT),
14742                                        MGN->getMemoryVT(), MGN->getMemOperand(),
14743                                        ISD::UNINDEXED, ISD::NON_EXTLOAD);
14744       SDValue Shuffle =
14745         DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
14746       return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
14747     }
14748
14749     if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
14750         matchIndexAsWiderOp(VT, Index, MGN->getMask(),
14751                             MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
14752       SmallVector<SDValue> NewIndices;
14753       for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
14754         NewIndices.push_back(Index.getOperand(i));
14755       EVT IndexVT = Index.getValueType()
14756         .getHalfNumVectorElementsVT(*DAG.getContext());
14757       Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
14758
14759       unsigned ElementSize = VT.getScalarStoreSize();
14760       EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
14761       auto EltCnt = VT.getVectorElementCount();
14762       assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
14763       EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
14764                                     EltCnt.divideCoefficientBy(2));
14765       SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
14766       EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
14767                                     EltCnt.divideCoefficientBy(2));
14768       SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
14769
14770       SDValue Gather =
14771         DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
14772                             {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
14773                              Index, ScaleOp},
14774                             MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
14775       SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
14776       return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
14777     }
14778     break;
14779   }
14780   case ISD::MSCATTER:{
14781     const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
14782     SDValue Index = MSN->getIndex();
14783     SDValue ScaleOp = MSN->getScale();
14784     ISD::MemIndexType IndexType = MSN->getIndexType();
14785     assert(!MSN->isIndexScaled() &&
14786            "Scaled gather/scatter should not be formed");
14787
14788     SDLoc DL(N);
14789     if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
14790       return DAG.getMaskedScatter(
14791           N->getVTList(), MSN->getMemoryVT(), DL,
14792           {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
14793            Index, ScaleOp},
14794           MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
14795
14796     if (narrowIndex(Index, IndexType, DAG))
14797       return DAG.getMaskedScatter(
14798           N->getVTList(), MSN->getMemoryVT(), DL,
14799           {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
14800            Index, ScaleOp},
14801           MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
14802
14803     EVT VT = MSN->getValue()->getValueType(0);
14804     SmallVector<int> ShuffleMask;
14805     if (!MSN->isTruncatingStore() &&
14806         matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
14807       SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
14808                                              DAG.getUNDEF(VT), ShuffleMask);
14809       return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
14810                                 DAG.getUNDEF(XLenVT), MSN->getMask(),
14811                                 MSN->getMemoryVT(), MSN->getMemOperand(),
14812                                 ISD::UNINDEXED, false);
14813     }
14814     break;
14815   }
14816   case ISD::VP_GATHER: {
14817     const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
14818     SDValue Index = VPGN->getIndex();
14819     SDValue ScaleOp = VPGN->getScale();
14820     ISD::MemIndexType IndexType = VPGN->getIndexType();
14821     assert(!VPGN->isIndexScaled() &&
14822            "Scaled gather/scatter should not be formed");
14823
14824     SDLoc DL(N);
14825     if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
14826       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
14827                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
14828                               ScaleOp, VPGN->getMask(),
14829                               VPGN->getVectorLength()},
14830                              VPGN->getMemOperand(), IndexType);
14831
14832     if (narrowIndex(Index, IndexType, DAG))
14833       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
14834                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
14835                               ScaleOp, VPGN->getMask(),
14836                               VPGN->getVectorLength()},
14837                              VPGN->getMemOperand(), IndexType);
14838
14839     break;
14840   }
14841   case ISD::VP_SCATTER: {
14842     const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
14843     SDValue Index = VPSN->getIndex();
14844     SDValue ScaleOp = VPSN->getScale();
14845     ISD::MemIndexType IndexType = VPSN->getIndexType();
14846     assert(!VPSN->isIndexScaled() &&
14847            "Scaled gather/scatter should not be formed");
14848
14849     SDLoc DL(N);
14850     if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
14851       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
14852                               {VPSN->getChain(), VPSN->getValue(),
14853                                VPSN->getBasePtr(), Index, ScaleOp,
14854                                VPSN->getMask(), VPSN->getVectorLength()},
14855                               VPSN->getMemOperand(), IndexType);
14856
14857     if (narrowIndex(Index, IndexType, DAG))
14858       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
14859                               {VPSN->getChain(), VPSN->getValue(),
14860                                VPSN->getBasePtr(), Index, ScaleOp,
14861                                VPSN->getMask(), VPSN->getVectorLength()},
14862                               VPSN->getMemOperand(), IndexType);
14863     break;
14864   }
14865   case RISCVISD::SRA_VL:
14866   case RISCVISD::SRL_VL:
14867   case RISCVISD::SHL_VL: {
14868     SDValue ShAmt = N->getOperand(1);
14869     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
14870       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
14871       SDLoc DL(N);
14872       SDValue VL = N->getOperand(4);
14873       EVT VT = N->getValueType(0);
14874       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
14875                           ShAmt.getOperand(1), VL);
14876       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
14877                          N->getOperand(2), N->getOperand(3), N->getOperand(4));
14878     }
14879     break;
14880   }
14881   case ISD::SRA:
14882     if (SDValue V = performSRACombine(N, DAG, Subtarget))
14883       return V;
14884     [[fallthrough]];
14885   case ISD::SRL:
14886   case ISD::SHL: {
14887     SDValue ShAmt = N->getOperand(1);
14888     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
14889       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
14890       SDLoc DL(N);
14891       EVT VT = N->getValueType(0);
14892       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
14893                           ShAmt.getOperand(1),
14894                           DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
14895       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
14896     }
14897     break;
14898   }
14899   case RISCVISD::ADD_VL:
14900     if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
14901       return V;
14902     return combineToVWMACC(N, DAG, Subtarget);
14903   case RISCVISD::SUB_VL:
14904   case RISCVISD::VWADD_W_VL:
14905   case RISCVISD::VWADDU_W_VL:
14906   case RISCVISD::VWSUB_W_VL:
14907   case RISCVISD::VWSUBU_W_VL:
14908   case RISCVISD::MUL_VL:
14909     return combineBinOp_VLToVWBinOp_VL(N, DCI);
14910   case RISCVISD::VFMADD_VL:
14911   case RISCVISD::VFNMADD_VL:
14912   case RISCVISD::VFMSUB_VL:
14913   case RISCVISD::VFNMSUB_VL:
14914   case RISCVISD::STRICT_VFMADD_VL:
14915   case RISCVISD::STRICT_VFNMADD_VL:
14916   case RISCVISD::STRICT_VFMSUB_VL:
14917   case RISCVISD::STRICT_VFNMSUB_VL:
14918     return performVFMADD_VLCombine(N, DAG, Subtarget);
14919   case RISCVISD::FMUL_VL:
14920     return performVFMUL_VLCombine(N, DAG, Subtarget);
14921   case RISCVISD::FADD_VL:
14922   case RISCVISD::FSUB_VL:
14923     return performFADDSUB_VLCombine(N, DAG, Subtarget);
14924   case ISD::LOAD:
14925   case ISD::STORE: {
14926     if (DCI.isAfterLegalizeDAG())
14927       if (SDValue V = performMemPairCombine(N, DCI))
14928         return V;
14929
14930     if (N->getOpcode() != ISD::STORE)
14931       break;
14932
14933     auto *Store = cast<StoreSDNode>(N);
14934     SDValue Chain = Store->getChain();
14935     EVT MemVT = Store->getMemoryVT();
14936     SDValue Val = Store->getValue();
14937     SDLoc DL(N);
14938
14939     bool IsScalarizable =
14940         MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
14941         Store->isSimple() &&
14942         MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
14943         isPowerOf2_64(MemVT.getSizeInBits()) &&
14944         MemVT.getSizeInBits() <= Subtarget.getXLen();
14945
14946     // If sufficiently aligned we can scalarize stores of constant vectors of
14947     // any power-of-two size up to XLen bits, provided that they aren't too
14948     // expensive to materialize.
14949     //   vsetivli   zero, 2, e8, m1, ta, ma
14950     //   vmv.v.i    v8, 4
14951     //   vse64.v    v8, (a0)
14952     // ->
14953     //   li     a1, 1028
14954     //   sh     a1, 0(a0)
14955     if (DCI.isBeforeLegalize() && IsScalarizable &&
14956         ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
14957       // Get the constant vector bits
14958       APInt NewC(Val.getValueSizeInBits(), 0);
14959       uint64_t EltSize = Val.getScalarValueSizeInBits();
14960       for (unsigned i = 0; i < Val.getNumOperands(); i++) {
14961         if (Val.getOperand(i).isUndef())
14962           continue;
14963         NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
14964                         i * EltSize);
14965       }
14966       MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
14967
14968       if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
14969                                      Subtarget.getFeatureBits(), true) <= 2 &&
14970           allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
14971                                          NewVT, *Store->getMemOperand())) {
14972         SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
14973         return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
14974                             Store->getPointerInfo(), Store->getOriginalAlign(),
14975                             Store->getMemOperand()->getFlags());
14976       }
14977     }
14978
14979     // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
14980     //   vsetivli   zero, 2, e16, m1, ta, ma
14981     //   vle16.v    v8, (a0)
14982     //   vse16.v    v8, (a1)
14983     if (auto *L = dyn_cast<LoadSDNode>(Val);
14984         L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
14985         L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
14986         Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
14987         L->getMemoryVT() == MemVT) {
14988       MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
14989       if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
14990                                          NewVT, *Store->getMemOperand()) &&
14991           allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
14992                                          NewVT, *L->getMemOperand())) {
14993         SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
14994                                    L->getPointerInfo(), L->getOriginalAlign(),
14995                                    L->getMemOperand()->getFlags());
14996         return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
14997                             Store->getPointerInfo(), Store->getOriginalAlign(),
14998                             Store->getMemOperand()->getFlags());
14999       }
15000     }
15001
15002     // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
15003     // vfmv.f.s is represented as extract element from 0. Match it late to avoid
15004     // any illegal types.
15005     if (Val.getOpcode() == RISCVISD::VMV_X_S ||
15006         (DCI.isAfterLegalizeDAG() &&
15007          Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15008          isNullConstant(Val.getOperand(1)))) {
15009       SDValue Src = Val.getOperand(0);
15010       MVT VecVT = Src.getSimpleValueType();
15011       // VecVT should be scalable and memory VT should match the element type.
15012       if (VecVT.isScalableVector() &&
15013           MemVT == VecVT.getVectorElementType()) {
15014         SDLoc DL(N);
15015         MVT MaskVT = getMaskTypeFor(VecVT);
15016         return DAG.getStoreVP(
15017             Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
15018             DAG.getConstant(1, DL, MaskVT),
15019             DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
15020             Store->getMemOperand(), Store->getAddressingMode(),
15021             Store->isTruncatingStore(), /*IsCompress*/ false);
15022       }
15023     }
15024
15025     break;
15026   }
15027   case ISD::SPLAT_VECTOR: {
15028     EVT VT = N->getValueType(0);
15029     // Only perform this combine on legal MVT types.
15030     if (!isTypeLegal(VT))
15031       break;
15032     if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
15033                                          DAG, Subtarget))
15034       return Gather;
15035     break;
15036   }
15037   case ISD::BUILD_VECTOR:
15038     if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
15039       return V;
15040     break;
15041   case ISD::CONCAT_VECTORS:
15042     if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
15043       return V;
15044     break;
15045   case RISCVISD::VFMV_V_F_VL: {
15046     const MVT VT = N->getSimpleValueType(0);
15047     SDValue Passthru = N->getOperand(0);
15048     SDValue Scalar = N->getOperand(1);
15049     SDValue VL = N->getOperand(2);
15050
15051     // If VL is 1, we can use vfmv.s.f.
15052     if (isOneConstant(VL))
15053       return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
15054     break;
15055   }
15056   case RISCVISD::VMV_V_X_VL: {
15057     const MVT VT = N->getSimpleValueType(0);
15058     SDValue Passthru = N->getOperand(0);
15059     SDValue Scalar = N->getOperand(1);
15060     SDValue VL = N->getOperand(2);
15061
15062     // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
15063     // scalar input.
15064     unsigned ScalarSize = Scalar.getValueSizeInBits();
15065     unsigned EltWidth = VT.getScalarSizeInBits();
15066     if (ScalarSize > EltWidth && Passthru.isUndef())
15067       if (SimplifyDemandedLowBitsHelper(1, EltWidth))
15068         return SDValue(N, 0);
15069
15070     // If VL is 1 and the scalar value won't benefit from immediate, we can
15071     // use vmv.s.x.
15072     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
15073     if (isOneConstant(VL) &&
15074         (!Const || Const->isZero() ||
15075          !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
15076       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
15077
15078     break;
15079   }
15080   case RISCVISD::VFMV_S_F_VL: {
15081     SDValue Src = N->getOperand(1);
15082     // Try to remove vector->scalar->vector if the scalar->vector is inserting
15083     // into an undef vector.
15084     // TODO: Could use a vslide or vmv.v.v for non-undef.
15085     if (N->getOperand(0).isUndef() &&
15086         Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15087         isNullConstant(Src.getOperand(1)) &&
15088         Src.getOperand(0).getValueType().isScalableVector()) {
15089       EVT VT = N->getValueType(0);
15090       EVT SrcVT = Src.getOperand(0).getValueType();
15091       assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
15092       // Widths match, just return the original vector.
15093       if (SrcVT == VT)
15094         return Src.getOperand(0);
15095       // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
15096     }
15097     [[fallthrough]];
15098   }
15099   case RISCVISD::VMV_S_X_VL: {
15100     const MVT VT = N->getSimpleValueType(0);
15101     SDValue Passthru = N->getOperand(0);
15102     SDValue Scalar = N->getOperand(1);
15103     SDValue VL = N->getOperand(2);
15104
15105     // Use M1 or smaller to avoid over constraining register allocation
15106     const MVT M1VT = getLMUL1VT(VT);
15107     if (M1VT.bitsLT(VT)) {
15108       SDValue M1Passthru =
15109           DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
15110                       DAG.getVectorIdxConstant(0, DL));
15111       SDValue Result =
15112           DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
15113       Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
15114                            DAG.getConstant(0, DL, XLenVT));
15115       return Result;
15116     }
15117
15118     // We use a vmv.v.i if possible.  We limit this to LMUL1.  LMUL2 or
15119     // higher would involve overly constraining the register allocator for
15120     // no purpose.
15121     if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
15122         Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
15123         VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
15124       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
15125
15126     break;
15127   }
15128   case ISD::INTRINSIC_VOID:
15129   case ISD::INTRINSIC_W_CHAIN:
15130   case ISD::INTRINSIC_WO_CHAIN: {
15131     unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
15132     unsigned IntNo = N->getConstantOperandVal(IntOpNo);
15133     switch (IntNo) {
15134       // By default we do not combine any intrinsic.
15135     default:
15136       return SDValue();
15137     case Intrinsic::riscv_masked_strided_load: {
15138       MVT VT = N->getSimpleValueType(0);
15139       auto *Load = cast<MemIntrinsicSDNode>(N);
15140       SDValue PassThru = N->getOperand(2);
15141       SDValue Base = N->getOperand(3);
15142       SDValue Stride = N->getOperand(4);
15143       SDValue Mask = N->getOperand(5);
15144
15145       // If the stride is equal to the element size in bytes,  we can use
15146       // a masked.load.
15147       const unsigned ElementSize = VT.getScalarStoreSize();
15148       if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
15149           StrideC && StrideC->getZExtValue() == ElementSize)
15150         return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
15151                                  DAG.getUNDEF(XLenVT), Mask, PassThru,
15152                                  Load->getMemoryVT(), Load->getMemOperand(),
15153                                  ISD::UNINDEXED, ISD::NON_EXTLOAD);
15154       return SDValue();
15155     }
15156     case Intrinsic::riscv_masked_strided_store: {
15157       auto *Store = cast<MemIntrinsicSDNode>(N);
15158       SDValue Value = N->getOperand(2);
15159       SDValue Base = N->getOperand(3);
15160       SDValue Stride = N->getOperand(4);
15161       SDValue Mask = N->getOperand(5);
15162
15163       // If the stride is equal to the element size in bytes,  we can use
15164       // a masked.store.
15165       const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
15166       if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
15167           StrideC && StrideC->getZExtValue() == ElementSize)
15168         return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
15169                                   DAG.getUNDEF(XLenVT), Mask,
15170                                   Store->getMemoryVT(), Store->getMemOperand(),
15171                                   ISD::UNINDEXED, false);
15172       return SDValue();
15173     }
15174     case Intrinsic::riscv_vcpop:
15175     case Intrinsic::riscv_vcpop_mask:
15176     case Intrinsic::riscv_vfirst:
15177     case Intrinsic::riscv_vfirst_mask: {
15178       SDValue VL = N->getOperand(2);
15179       if (IntNo == Intrinsic::riscv_vcpop_mask ||
15180           IntNo == Intrinsic::riscv_vfirst_mask)
15181         VL = N->getOperand(3);
15182       if (!isNullConstant(VL))
15183         return SDValue();
15184       // If VL is 0, vcpop -> li 0, vfirst -> li -1.
15185       SDLoc DL(N);
15186       EVT VT = N->getValueType(0);
15187       if (IntNo == Intrinsic::riscv_vfirst ||
15188           IntNo == Intrinsic::riscv_vfirst_mask)
15189         return DAG.getConstant(-1, DL, VT);
15190       return DAG.getConstant(0, DL, VT);
15191     }
15192     }
15193   }
15194   case ISD::BITCAST: {
15195     assert(Subtarget.useRVVForFixedLengthVectors());
15196     SDValue N0 = N->getOperand(0);
15197     EVT VT = N->getValueType(0);
15198     EVT SrcVT = N0.getValueType();
15199     // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
15200     // type, widen both sides to avoid a trip through memory.
15201     if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
15202         VT.isScalarInteger()) {
15203       unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
15204       SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
15205       Ops[0] = N0;
15206       SDLoc DL(N);
15207       N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
15208       N0 = DAG.getBitcast(MVT::i8, N0);
15209       return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
15210     }
15211
15212     return SDValue();
15213   }
15214   }
15215
15216   return SDValue();
15217 }
15218
15219 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
15220     EVT XVT, unsigned KeptBits) const {
15221   // For vectors, we don't have a preference..
15222   if (XVT.isVector())
15223     return false;
15224
15225   if (XVT != MVT::i32 && XVT != MVT::i64)
15226     return false;
15227
15228   // We can use sext.w for RV64 or an srai 31 on RV32.
15229   if (KeptBits == 32 || KeptBits == 64)
15230     return true;
15231
15232   // With Zbb we can use sext.h/sext.b.
15233   return Subtarget.hasStdExtZbb() &&
15234          ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
15235           KeptBits == 16);
15236 }
15237
15238 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
15239     const SDNode *N, CombineLevel Level) const {
15240   assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
15241           N->getOpcode() == ISD::SRL) &&
15242          "Expected shift op");
15243
15244   // The following folds are only desirable if `(OP _, c1 << c2)` can be
15245   // materialised in fewer instructions than `(OP _, c1)`:
15246   //
15247   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
15248   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
15249   SDValue N0 = N->getOperand(0);
15250   EVT Ty = N0.getValueType();
15251   if (Ty.isScalarInteger() &&
15252       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
15253     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15254     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15255     if (C1 && C2) {
15256       const APInt &C1Int = C1->getAPIntValue();
15257       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
15258
15259       // We can materialise `c1 << c2` into an add immediate, so it's "free",
15260       // and the combine should happen, to potentially allow further combines
15261       // later.
15262       if (ShiftedC1Int.getSignificantBits() <= 64 &&
15263           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
15264         return true;
15265
15266       // We can materialise `c1` in an add immediate, so it's "free", and the
15267       // combine should be prevented.
15268       if (C1Int.getSignificantBits() <= 64 &&
15269           isLegalAddImmediate(C1Int.getSExtValue()))
15270         return false;
15271
15272       // Neither constant will fit into an immediate, so find materialisation
15273       // costs.
15274       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
15275                                               Subtarget.getFeatureBits(),
15276                                               /*CompressionCost*/true);
15277       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
15278           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
15279           /*CompressionCost*/true);
15280
15281       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
15282       // combine should be prevented.
15283       if (C1Cost < ShiftedC1Cost)
15284         return false;
15285     }
15286   }
15287   return true;
15288 }
15289
15290 bool RISCVTargetLowering::targetShrinkDemandedConstant(
15291     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
15292     TargetLoweringOpt &TLO) const {
15293   // Delay this optimization as late as possible.
15294   if (!TLO.LegalOps)
15295     return false;
15296
15297   EVT VT = Op.getValueType();
15298   if (VT.isVector())
15299     return false;
15300
15301   unsigned Opcode = Op.getOpcode();
15302   if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
15303     return false;
15304
15305   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15306   if (!C)
15307     return false;
15308
15309   const APInt &Mask = C->getAPIntValue();
15310
15311   // Clear all non-demanded bits initially.
15312   APInt ShrunkMask = Mask & DemandedBits;
15313
15314   // Try to make a smaller immediate by setting undemanded bits.
15315
15316   APInt ExpandedMask = Mask | ~DemandedBits;
15317
15318   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
15319     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
15320   };
15321   auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
15322     if (NewMask == Mask)
15323       return true;
15324     SDLoc DL(Op);
15325     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
15326     SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
15327                                     Op.getOperand(0), NewC);
15328     return TLO.CombineTo(Op, NewOp);
15329   };
15330
15331   // If the shrunk mask fits in sign extended 12 bits, let the target
15332   // independent code apply it.
15333   if (ShrunkMask.isSignedIntN(12))
15334     return false;
15335
15336   // And has a few special cases for zext.
15337   if (Opcode == ISD::AND) {
15338     // Preserve (and X, 0xffff), if zext.h exists use zext.h,
15339     // otherwise use SLLI + SRLI.
15340     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
15341     if (IsLegalMask(NewMask))
15342       return UseMask(NewMask);
15343
15344     // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
15345     if (VT == MVT::i64) {
15346       APInt NewMask = APInt(64, 0xffffffff);
15347       if (IsLegalMask(NewMask))
15348         return UseMask(NewMask);
15349     }
15350   }
15351
15352   // For the remaining optimizations, we need to be able to make a negative
15353   // number through a combination of mask and undemanded bits.
15354   if (!ExpandedMask.isNegative())
15355     return false;
15356
15357   // What is the fewest number of bits we need to represent the negative number.
15358   unsigned MinSignedBits = ExpandedMask.getSignificantBits();
15359
15360   // Try to make a 12 bit negative immediate. If that fails try to make a 32
15361   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
15362   // If we can't create a simm12, we shouldn't change opaque constants.
15363   APInt NewMask = ShrunkMask;
15364   if (MinSignedBits <= 12)
15365     NewMask.setBitsFrom(11);
15366   else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
15367     NewMask.setBitsFrom(31);
15368   else
15369     return false;
15370
15371   // Check that our new mask is a subset of the demanded mask.
15372   assert(IsLegalMask(NewMask));
15373   return UseMask(NewMask);
15374 }
15375
15376 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
15377   static const uint64_t GREVMasks[] = {
15378       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
15379       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
15380
15381   for (unsigned Stage = 0; Stage != 6; ++Stage) {
15382     unsigned Shift = 1 << Stage;
15383     if (ShAmt & Shift) {
15384       uint64_t Mask = GREVMasks[Stage];
15385       uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
15386       if (IsGORC)
15387         Res |= x;
15388       x = Res;
15389     }
15390   }
15391
15392   return x;
15393 }
15394
15395 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15396                                                         KnownBits &Known,
15397                                                         const APInt &DemandedElts,
15398                                                         const SelectionDAG &DAG,
15399                                                         unsigned Depth) const {
15400   unsigned BitWidth = Known.getBitWidth();
15401   unsigned Opc = Op.getOpcode();
15402   assert((Opc >= ISD::BUILTIN_OP_END ||
15403           Opc == ISD::INTRINSIC_WO_CHAIN ||
15404           Opc == ISD::INTRINSIC_W_CHAIN ||
15405           Opc == ISD::INTRINSIC_VOID) &&
15406          "Should use MaskedValueIsZero if you don't know whether Op"
15407          " is a target node!");
15408
15409   Known.resetAll();
15410   switch (Opc) {
15411   default: break;
15412   case RISCVISD::SELECT_CC: {
15413     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
15414     // If we don't know any bits, early out.
15415     if (Known.isUnknown())
15416       break;
15417     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
15418
15419     // Only known if known in both the LHS and RHS.
15420     Known = Known.intersectWith(Known2);
15421     break;
15422   }
15423   case RISCVISD::CZERO_EQZ:
15424   case RISCVISD::CZERO_NEZ:
15425     Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15426     // Result is either all zero or operand 0. We can propagate zeros, but not
15427     // ones.
15428     Known.One.clearAllBits();
15429     break;
15430   case RISCVISD::REMUW: {
15431     KnownBits Known2;
15432     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
15433     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
15434     // We only care about the lower 32 bits.
15435     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
15436     // Restore the original width by sign extending.
15437     Known = Known.sext(BitWidth);
15438     break;
15439   }
15440   case RISCVISD::DIVUW: {
15441     KnownBits Known2;
15442     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
15443     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
15444     // We only care about the lower 32 bits.
15445     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
15446     // Restore the original width by sign extending.
15447     Known = Known.sext(BitWidth);
15448     break;
15449   }
15450   case RISCVISD::CTZW: {
15451     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15452     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
15453     unsigned LowBits = llvm::bit_width(PossibleTZ);
15454     Known.Zero.setBitsFrom(LowBits);
15455     break;
15456   }
15457   case RISCVISD::CLZW: {
15458     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15459     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
15460     unsigned LowBits = llvm::bit_width(PossibleLZ);
15461     Known.Zero.setBitsFrom(LowBits);
15462     break;
15463   }
15464   case RISCVISD::BREV8:
15465   case RISCVISD::ORC_B: {
15466     // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
15467     // control value of 7 is equivalent to brev8 and orc.b.
15468     Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15469     bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
15470     // To compute zeros, we need to invert the value and invert it back after.
15471     Known.Zero =
15472         ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
15473     Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
15474     break;
15475   }
15476   case RISCVISD::READ_VLENB: {
15477     // We can use the minimum and maximum VLEN values to bound VLENB.  We
15478     // know VLEN must be a power of two.
15479     const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
15480     const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
15481     assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
15482     Known.Zero.setLowBits(Log2_32(MinVLenB));
15483     Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
15484     if (MaxVLenB == MinVLenB)
15485       Known.One.setBit(Log2_32(MinVLenB));
15486     break;
15487   }
15488   case RISCVISD::FPCLASS: {
15489     // fclass will only set one of the low 10 bits.
15490     Known.Zero.setBitsFrom(10);
15491     break;
15492   }
15493   case ISD::INTRINSIC_W_CHAIN:
15494   case ISD::INTRINSIC_WO_CHAIN: {
15495     unsigned IntNo =
15496         Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
15497     switch (IntNo) {
15498     default:
15499       // We can't do anything for most intrinsics.
15500       break;
15501     case Intrinsic::riscv_vsetvli:
15502     case Intrinsic::riscv_vsetvlimax:
15503       // Assume that VL output is >= 65536.
15504       // TODO: Take SEW and LMUL into account.
15505       if (BitWidth > 17)
15506         Known.Zero.setBitsFrom(17);
15507       break;
15508     }
15509     break;
15510   }
15511   }
15512 }
15513
15514 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
15515     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
15516     unsigned Depth) const {
15517   switch (Op.getOpcode()) {
15518   default:
15519     break;
15520   case RISCVISD::SELECT_CC: {
15521     unsigned Tmp =
15522         DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
15523     if (Tmp == 1) return 1;  // Early out.
15524     unsigned Tmp2 =
15525         DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
15526     return std::min(Tmp, Tmp2);
15527   }
15528   case RISCVISD::CZERO_EQZ:
15529   case RISCVISD::CZERO_NEZ:
15530     // Output is either all zero or operand 0. We can propagate sign bit count
15531     // from operand 0.
15532     return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
15533   case RISCVISD::ABSW: {
15534     // We expand this at isel to negw+max. The result will have 33 sign bits
15535     // if the input has at least 33 sign bits.
15536     unsigned Tmp =
15537         DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
15538     if (Tmp < 33) return 1;
15539     return 33;
15540   }
15541   case RISCVISD::SLLW:
15542   case RISCVISD::SRAW:
15543   case RISCVISD::SRLW:
15544   case RISCVISD::DIVW:
15545   case RISCVISD::DIVUW:
15546   case RISCVISD::REMUW:
15547   case RISCVISD::ROLW:
15548   case RISCVISD::RORW:
15549   case RISCVISD::FCVT_W_RV64:
15550   case RISCVISD::FCVT_WU_RV64:
15551   case RISCVISD::STRICT_FCVT_W_RV64:
15552   case RISCVISD::STRICT_FCVT_WU_RV64:
15553     // TODO: As the result is sign-extended, this is conservatively correct. A
15554     // more precise answer could be calculated for SRAW depending on known
15555     // bits in the shift amount.
15556     return 33;
15557   case RISCVISD::VMV_X_S: {
15558     // The number of sign bits of the scalar result is computed by obtaining the
15559     // element type of the input vector operand, subtracting its width from the
15560     // XLEN, and then adding one (sign bit within the element type). If the
15561     // element type is wider than XLen, the least-significant XLEN bits are
15562     // taken.
15563     unsigned XLen = Subtarget.getXLen();
15564     unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
15565     if (EltBits <= XLen)
15566       return XLen - EltBits + 1;
15567     break;
15568   }
15569   case ISD::INTRINSIC_W_CHAIN: {
15570     unsigned IntNo = Op.getConstantOperandVal(1);
15571     switch (IntNo) {
15572     default:
15573       break;
15574     case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
15575     case Intrinsic::riscv_masked_atomicrmw_add_i64:
15576     case Intrinsic::riscv_masked_atomicrmw_sub_i64:
15577     case Intrinsic::riscv_masked_atomicrmw_nand_i64:
15578     case Intrinsic::riscv_masked_atomicrmw_max_i64:
15579     case Intrinsic::riscv_masked_atomicrmw_min_i64:
15580     case Intrinsic::riscv_masked_atomicrmw_umax_i64:
15581     case Intrinsic::riscv_masked_atomicrmw_umin_i64:
15582     case Intrinsic::riscv_masked_cmpxchg_i64:
15583       // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
15584       // narrow atomic operation. These are implemented using atomic
15585       // operations at the minimum supported atomicrmw/cmpxchg width whose
15586       // result is then sign extended to XLEN. With +A, the minimum width is
15587       // 32 for both 64 and 32.
15588       assert(Subtarget.getXLen() == 64);
15589       assert(getMinCmpXchgSizeInBits() == 32);
15590       assert(Subtarget.hasStdExtA());
15591       return 33;
15592     }
15593   }
15594   }
15595
15596   return 1;
15597 }
15598
15599 const Constant *
15600 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
15601   assert(Ld && "Unexpected null LoadSDNode");
15602   if (!ISD::isNormalLoad(Ld))
15603     return nullptr;
15604
15605   SDValue Ptr = Ld->getBasePtr();
15606
15607   // Only constant pools with no offset are supported.
15608   auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
15609     auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
15610     if (!CNode || CNode->isMachineConstantPoolEntry() ||
15611         CNode->getOffset() != 0)
15612       return nullptr;
15613
15614     return CNode;
15615   };
15616
15617   // Simple case, LLA.
15618   if (Ptr.getOpcode() == RISCVISD::LLA) {
15619     auto *CNode = GetSupportedConstantPool(Ptr);
15620     if (!CNode || CNode->getTargetFlags() != 0)
15621       return nullptr;
15622
15623     return CNode->getConstVal();
15624   }
15625
15626   // Look for a HI and ADD_LO pair.
15627   if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
15628       Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
15629     return nullptr;
15630
15631   auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
15632   auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
15633
15634   if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
15635       !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
15636     return nullptr;
15637
15638   if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
15639     return nullptr;
15640
15641   return CNodeLo->getConstVal();
15642 }
15643
15644 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
15645                                                   MachineBasicBlock *BB) {
15646   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
15647
15648   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
15649   // Should the count have wrapped while it was being read, we need to try
15650   // again.
15651   // ...
15652   // read:
15653   // rdcycleh x3 # load high word of cycle
15654   // rdcycle  x2 # load low word of cycle
15655   // rdcycleh x4 # load high word of cycle
15656   // bne x3, x4, read # check if high word reads match, otherwise try again
15657   // ...
15658
15659   MachineFunction &MF = *BB->getParent();
15660   const BasicBlock *LLVM_BB = BB->getBasicBlock();
15661   MachineFunction::iterator It = ++BB->getIterator();
15662
15663   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
15664   MF.insert(It, LoopMBB);
15665
15666   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
15667   MF.insert(It, DoneMBB);
15668
15669   // Transfer the remainder of BB and its successor edges to DoneMBB.
15670   DoneMBB->splice(DoneMBB->begin(), BB,
15671                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
15672   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
15673
15674   BB->addSuccessor(LoopMBB);
15675
15676   MachineRegisterInfo &RegInfo = MF.getRegInfo();
15677   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
15678   Register LoReg = MI.getOperand(0).getReg();
15679   Register HiReg = MI.getOperand(1).getReg();
15680   DebugLoc DL = MI.getDebugLoc();
15681
15682   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
15683   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
15684       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
15685       .addReg(RISCV::X0);
15686   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
15687       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
15688       .addReg(RISCV::X0);
15689   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
15690       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
15691       .addReg(RISCV::X0);
15692
15693   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
15694       .addReg(HiReg)
15695       .addReg(ReadAgainReg)
15696       .addMBB(LoopMBB);
15697
15698   LoopMBB->addSuccessor(LoopMBB);
15699   LoopMBB->addSuccessor(DoneMBB);
15700
15701   MI.eraseFromParent();
15702
15703   return DoneMBB;
15704 }
15705
15706 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
15707                                              MachineBasicBlock *BB,
15708                                              const RISCVSubtarget &Subtarget) {
15709   assert((MI.getOpcode() == RISCV::SplitF64Pseudo ||
15710           MI.getOpcode() == RISCV::SplitF64Pseudo_INX) &&
15711          "Unexpected instruction");
15712
15713   MachineFunction &MF = *BB->getParent();
15714   DebugLoc DL = MI.getDebugLoc();
15715   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
15716   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
15717   Register LoReg = MI.getOperand(0).getReg();
15718   Register HiReg = MI.getOperand(1).getReg();
15719   Register SrcReg = MI.getOperand(2).getReg();
15720
15721   const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
15722                                          ? &RISCV::GPRPF64RegClass
15723                                          : &RISCV::FPR64RegClass;
15724   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
15725
15726   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
15727                           RI, Register());
15728   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
15729   MachineMemOperand *MMOLo =
15730       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
15731   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
15732       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
15733   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
15734       .addFrameIndex(FI)
15735       .addImm(0)
15736       .addMemOperand(MMOLo);
15737   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
15738       .addFrameIndex(FI)
15739       .addImm(4)
15740       .addMemOperand(MMOHi);
15741   MI.eraseFromParent(); // The pseudo instruction is gone now.
15742   return BB;
15743 }
15744
15745 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
15746                                                  MachineBasicBlock *BB,
15747                                                  const RISCVSubtarget &Subtarget) {
15748   assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo ||
15749           MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) &&
15750          "Unexpected instruction");
15751
15752   MachineFunction &MF = *BB->getParent();
15753   DebugLoc DL = MI.getDebugLoc();
15754   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
15755   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
15756   Register DstReg = MI.getOperand(0).getReg();
15757   Register LoReg = MI.getOperand(1).getReg();
15758   Register HiReg = MI.getOperand(2).getReg();
15759
15760   const TargetRegisterClass *DstRC =
15761       MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
15762                                                       : &RISCV::FPR64RegClass;
15763   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
15764
15765   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
15766   MachineMemOperand *MMOLo =
15767       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
15768   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
15769       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
15770   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
15771       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
15772       .addFrameIndex(FI)
15773       .addImm(0)
15774       .addMemOperand(MMOLo);
15775   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
15776       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
15777       .addFrameIndex(FI)
15778       .addImm(4)
15779       .addMemOperand(MMOHi);
15780   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
15781   MI.eraseFromParent(); // The pseudo instruction is gone now.
15782   return BB;
15783 }
15784
15785 static bool isSelectPseudo(MachineInstr &MI) {
15786   switch (MI.getOpcode()) {
15787   default:
15788     return false;
15789   case RISCV::Select_GPR_Using_CC_GPR:
15790   case RISCV::Select_FPR16_Using_CC_GPR:
15791   case RISCV::Select_FPR16INX_Using_CC_GPR:
15792   case RISCV::Select_FPR32_Using_CC_GPR:
15793   case RISCV::Select_FPR32INX_Using_CC_GPR:
15794   case RISCV::Select_FPR64_Using_CC_GPR:
15795   case RISCV::Select_FPR64INX_Using_CC_GPR:
15796   case RISCV::Select_FPR64IN32X_Using_CC_GPR:
15797     return true;
15798   }
15799 }
15800
15801 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
15802                                         unsigned RelOpcode, unsigned EqOpcode,
15803                                         const RISCVSubtarget &Subtarget) {
15804   DebugLoc DL = MI.getDebugLoc();
15805   Register DstReg = MI.getOperand(0).getReg();
15806   Register Src1Reg = MI.getOperand(1).getReg();
15807   Register Src2Reg = MI.getOperand(2).getReg();
15808   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
15809   Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
15810   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
15811
15812   // Save the current FFLAGS.
15813   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
15814
15815   auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
15816                  .addReg(Src1Reg)
15817                  .addReg(Src2Reg);
15818   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
15819     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
15820
15821   // Restore the FFLAGS.
15822   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
15823       .addReg(SavedFFlags, RegState::Kill);
15824
15825   // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
15826   auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
15827                   .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
15828                   .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
15829   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
15830     MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
15831
15832   // Erase the pseudoinstruction.
15833   MI.eraseFromParent();
15834   return BB;
15835 }
15836
15837 static MachineBasicBlock *
15838 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
15839                           MachineBasicBlock *ThisMBB,
15840                           const RISCVSubtarget &Subtarget) {
15841   // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
15842   // Without this, custom-inserter would have generated:
15843   //
15844   //   A
15845   //   | \
15846   //   |  B
15847   //   | /
15848   //   C
15849   //   | \
15850   //   |  D
15851   //   | /
15852   //   E
15853   //
15854   // A: X = ...; Y = ...
15855   // B: empty
15856   // C: Z = PHI [X, A], [Y, B]
15857   // D: empty
15858   // E: PHI [X, C], [Z, D]
15859   //
15860   // If we lower both Select_FPRX_ in a single step, we can instead generate:
15861   //
15862   //   A
15863   //   | \
15864   //   |  C
15865   //   | /|
15866   //   |/ |
15867   //   |  |
15868   //   |  D
15869   //   | /
15870   //   E
15871   //
15872   // A: X = ...; Y = ...
15873   // D: empty
15874   // E: PHI [X, A], [X, C], [Y, D]
15875
15876   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
15877   const DebugLoc &DL = First.getDebugLoc();
15878   const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
15879   MachineFunction *F = ThisMBB->getParent();
15880   MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
15881   MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
15882   MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
15883   MachineFunction::iterator It = ++ThisMBB->getIterator();
15884   F->insert(It, FirstMBB);
15885   F->insert(It, SecondMBB);
15886   F->insert(It, SinkMBB);
15887
15888   // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
15889   SinkMBB->splice(SinkMBB->begin(), ThisMBB,
15890                   std::next(MachineBasicBlock::iterator(First)),
15891                   ThisMBB->end());
15892   SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
15893
15894   // Fallthrough block for ThisMBB.
15895   ThisMBB->addSuccessor(FirstMBB);
15896   // Fallthrough block for FirstMBB.
15897   FirstMBB->addSuccessor(SecondMBB);
15898   ThisMBB->addSuccessor(SinkMBB);
15899   FirstMBB->addSuccessor(SinkMBB);
15900   // This is fallthrough.
15901   SecondMBB->addSuccessor(SinkMBB);
15902
15903   auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
15904   Register FLHS = First.getOperand(1).getReg();
15905   Register FRHS = First.getOperand(2).getReg();
15906   // Insert appropriate branch.
15907   BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
15908       .addReg(FLHS)
15909       .addReg(FRHS)
15910       .addMBB(SinkMBB);
15911
15912   Register SLHS = Second.getOperand(1).getReg();
15913   Register SRHS = Second.getOperand(2).getReg();
15914   Register Op1Reg4 = First.getOperand(4).getReg();
15915   Register Op1Reg5 = First.getOperand(5).getReg();
15916
15917   auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
15918   // Insert appropriate branch.
15919   BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
15920       .addReg(SLHS)
15921       .addReg(SRHS)
15922       .addMBB(SinkMBB);
15923
15924   Register DestReg = Second.getOperand(0).getReg();
15925   Register Op2Reg4 = Second.getOperand(4).getReg();
15926   BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
15927       .addReg(Op2Reg4)
15928       .addMBB(ThisMBB)
15929       .addReg(Op1Reg4)
15930       .addMBB(FirstMBB)
15931       .addReg(Op1Reg5)
15932       .addMBB(SecondMBB);
15933
15934   // Now remove the Select_FPRX_s.
15935   First.eraseFromParent();
15936   Second.eraseFromParent();
15937   return SinkMBB;
15938 }
15939
15940 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
15941                                            MachineBasicBlock *BB,
15942                                            const RISCVSubtarget &Subtarget) {
15943   // To "insert" Select_* instructions, we actually have to insert the triangle
15944   // control-flow pattern.  The incoming instructions know the destination vreg
15945   // to set, the condition code register to branch on, the true/false values to
15946   // select between, and the condcode to use to select the appropriate branch.
15947   //
15948   // We produce the following control flow:
15949   //     HeadMBB
15950   //     |  \
15951   //     |  IfFalseMBB
15952   //     | /
15953   //    TailMBB
15954   //
15955   // When we find a sequence of selects we attempt to optimize their emission
15956   // by sharing the control flow. Currently we only handle cases where we have
15957   // multiple selects with the exact same condition (same LHS, RHS and CC).
15958   // The selects may be interleaved with other instructions if the other
15959   // instructions meet some requirements we deem safe:
15960   // - They are not pseudo instructions.
15961   // - They are debug instructions. Otherwise,
15962   // - They do not have side-effects, do not access memory and their inputs do
15963   //   not depend on the results of the select pseudo-instructions.
15964   // The TrueV/FalseV operands of the selects cannot depend on the result of
15965   // previous selects in the sequence.
15966   // These conditions could be further relaxed. See the X86 target for a
15967   // related approach and more information.
15968   //
15969   // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
15970   // is checked here and handled by a separate function -
15971   // EmitLoweredCascadedSelect.
15972   Register LHS = MI.getOperand(1).getReg();
15973   Register RHS = MI.getOperand(2).getReg();
15974   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
15975
15976   SmallVector<MachineInstr *, 4> SelectDebugValues;
15977   SmallSet<Register, 4> SelectDests;
15978   SelectDests.insert(MI.getOperand(0).getReg());
15979
15980   MachineInstr *LastSelectPseudo = &MI;
15981   auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
15982   if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
15983       Next->getOpcode() == MI.getOpcode() &&
15984       Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
15985       Next->getOperand(5).isKill()) {
15986     return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
15987   }
15988
15989   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
15990        SequenceMBBI != E; ++SequenceMBBI) {
15991     if (SequenceMBBI->isDebugInstr())
15992       continue;
15993     if (isSelectPseudo(*SequenceMBBI)) {
15994       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
15995           SequenceMBBI->getOperand(2).getReg() != RHS ||
15996           SequenceMBBI->getOperand(3).getImm() != CC ||
15997           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
15998           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
15999         break;
16000       LastSelectPseudo = &*SequenceMBBI;
16001       SequenceMBBI->collectDebugValues(SelectDebugValues);
16002       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
16003       continue;
16004     }
16005     if (SequenceMBBI->hasUnmodeledSideEffects() ||
16006         SequenceMBBI->mayLoadOrStore() ||
16007         SequenceMBBI->usesCustomInsertionHook())
16008       break;
16009     if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
16010           return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
16011         }))
16012       break;
16013   }
16014
16015   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16016   const BasicBlock *LLVM_BB = BB->getBasicBlock();
16017   DebugLoc DL = MI.getDebugLoc();
16018   MachineFunction::iterator I = ++BB->getIterator();
16019
16020   MachineBasicBlock *HeadMBB = BB;
16021   MachineFunction *F = BB->getParent();
16022   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
16023   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
16024
16025   F->insert(I, IfFalseMBB);
16026   F->insert(I, TailMBB);
16027
16028   // Transfer debug instructions associated with the selects to TailMBB.
16029   for (MachineInstr *DebugInstr : SelectDebugValues) {
16030     TailMBB->push_back(DebugInstr->removeFromParent());
16031   }
16032
16033   // Move all instructions after the sequence to TailMBB.
16034   TailMBB->splice(TailMBB->end(), HeadMBB,
16035                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
16036   // Update machine-CFG edges by transferring all successors of the current
16037   // block to the new block which will contain the Phi nodes for the selects.
16038   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
16039   // Set the successors for HeadMBB.
16040   HeadMBB->addSuccessor(IfFalseMBB);
16041   HeadMBB->addSuccessor(TailMBB);
16042
16043   // Insert appropriate branch.
16044   BuildMI(HeadMBB, DL, TII.getBrCond(CC))
16045     .addReg(LHS)
16046     .addReg(RHS)
16047     .addMBB(TailMBB);
16048
16049   // IfFalseMBB just falls through to TailMBB.
16050   IfFalseMBB->addSuccessor(TailMBB);
16051
16052   // Create PHIs for all of the select pseudo-instructions.
16053   auto SelectMBBI = MI.getIterator();
16054   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
16055   auto InsertionPoint = TailMBB->begin();
16056   while (SelectMBBI != SelectEnd) {
16057     auto Next = std::next(SelectMBBI);
16058     if (isSelectPseudo(*SelectMBBI)) {
16059       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
16060       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
16061               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
16062           .addReg(SelectMBBI->getOperand(4).getReg())
16063           .addMBB(HeadMBB)
16064           .addReg(SelectMBBI->getOperand(5).getReg())
16065           .addMBB(IfFalseMBB);
16066       SelectMBBI->eraseFromParent();
16067     }
16068     SelectMBBI = Next;
16069   }
16070
16071   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
16072   return TailMBB;
16073 }
16074
16075 static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB,
16076                                        unsigned Opcode) {
16077   DebugLoc DL = MI.getDebugLoc();
16078
16079   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16080
16081   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16082   Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16083
16084   assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
16085   unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
16086
16087   // Update FRM and save the old value.
16088   BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
16089       .addImm(MI.getOperand(FRMIdx).getImm());
16090
16091   // Emit an VFCVT with the FRM == DYN
16092   auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
16093
16094   for (unsigned I = 0; I < MI.getNumOperands(); I++)
16095     if (I != FRMIdx)
16096       MIB = MIB.add(MI.getOperand(I));
16097     else
16098       MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
16099
16100   MIB.add(MachineOperand::CreateReg(RISCV::FRM,
16101                                     /*IsDef*/ false,
16102                                     /*IsImp*/ true));
16103
16104   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16105     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16106
16107   // Restore FRM.
16108   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
16109       .addReg(SavedFRM, RegState::Kill);
16110
16111   // Erase the pseudoinstruction.
16112   MI.eraseFromParent();
16113   return BB;
16114 }
16115
16116 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
16117                                                     MachineBasicBlock *BB,
16118                                                     unsigned CVTXOpc,
16119                                                     unsigned CVTFOpc) {
16120   DebugLoc DL = MI.getDebugLoc();
16121
16122   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
16123
16124   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
16125   Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16126
16127   // Save the old value of FFLAGS.
16128   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
16129
16130   assert(MI.getNumOperands() == 7);
16131
16132   // Emit a VFCVT_X_F
16133   const TargetRegisterInfo *TRI =
16134       BB->getParent()->getSubtarget().getRegisterInfo();
16135   const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
16136   Register Tmp = MRI.createVirtualRegister(RC);
16137   BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
16138       .add(MI.getOperand(1))
16139       .add(MI.getOperand(2))
16140       .add(MI.getOperand(3))
16141       .add(MachineOperand::CreateImm(7)) // frm = DYN
16142       .add(MI.getOperand(4))
16143       .add(MI.getOperand(5))
16144       .add(MI.getOperand(6))
16145       .add(MachineOperand::CreateReg(RISCV::FRM,
16146                                      /*IsDef*/ false,
16147                                      /*IsImp*/ true));
16148
16149   // Emit a VFCVT_F_X
16150   BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
16151       .add(MI.getOperand(0))
16152       .add(MI.getOperand(1))
16153       .addReg(Tmp)
16154       .add(MI.getOperand(3))
16155       .add(MachineOperand::CreateImm(7)) // frm = DYN
16156       .add(MI.getOperand(4))
16157       .add(MI.getOperand(5))
16158       .add(MI.getOperand(6))
16159       .add(MachineOperand::CreateReg(RISCV::FRM,
16160                                      /*IsDef*/ false,
16161                                      /*IsImp*/ true));
16162
16163   // Restore FFLAGS.
16164   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
16165       .addReg(SavedFFLAGS, RegState::Kill);
16166
16167   // Erase the pseudoinstruction.
16168   MI.eraseFromParent();
16169   return BB;
16170 }
16171
16172 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
16173                                      const RISCVSubtarget &Subtarget) {
16174   unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
16175   const TargetRegisterClass *RC;
16176   switch (MI.getOpcode()) {
16177   default:
16178     llvm_unreachable("Unexpected opcode");
16179   case RISCV::PseudoFROUND_H:
16180     CmpOpc = RISCV::FLT_H;
16181     F2IOpc = RISCV::FCVT_W_H;
16182     I2FOpc = RISCV::FCVT_H_W;
16183     FSGNJOpc = RISCV::FSGNJ_H;
16184     FSGNJXOpc = RISCV::FSGNJX_H;
16185     RC = &RISCV::FPR16RegClass;
16186     break;
16187   case RISCV::PseudoFROUND_H_INX:
16188     CmpOpc = RISCV::FLT_H_INX;
16189     F2IOpc = RISCV::FCVT_W_H_INX;
16190     I2FOpc = RISCV::FCVT_H_W_INX;
16191     FSGNJOpc = RISCV::FSGNJ_H_INX;
16192     FSGNJXOpc = RISCV::FSGNJX_H_INX;
16193     RC = &RISCV::GPRF16RegClass;
16194     break;
16195   case RISCV::PseudoFROUND_S:
16196     CmpOpc = RISCV::FLT_S;
16197     F2IOpc = RISCV::FCVT_W_S;
16198     I2FOpc = RISCV::FCVT_S_W;
16199     FSGNJOpc = RISCV::FSGNJ_S;
16200     FSGNJXOpc = RISCV::FSGNJX_S;
16201     RC = &RISCV::FPR32RegClass;
16202     break;
16203   case RISCV::PseudoFROUND_S_INX:
16204     CmpOpc = RISCV::FLT_S_INX;
16205     F2IOpc = RISCV::FCVT_W_S_INX;
16206     I2FOpc = RISCV::FCVT_S_W_INX;
16207     FSGNJOpc = RISCV::FSGNJ_S_INX;
16208     FSGNJXOpc = RISCV::FSGNJX_S_INX;
16209     RC = &RISCV::GPRF32RegClass;
16210     break;
16211   case RISCV::PseudoFROUND_D:
16212     assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
16213     CmpOpc = RISCV::FLT_D;
16214     F2IOpc = RISCV::FCVT_L_D;
16215     I2FOpc = RISCV::FCVT_D_L;
16216     FSGNJOpc = RISCV::FSGNJ_D;
16217     FSGNJXOpc = RISCV::FSGNJX_D;
16218     RC = &RISCV::FPR64RegClass;
16219     break;
16220   case RISCV::PseudoFROUND_D_INX:
16221     assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
16222     CmpOpc = RISCV::FLT_D_INX;
16223     F2IOpc = RISCV::FCVT_L_D_INX;
16224     I2FOpc = RISCV::FCVT_D_L_INX;
16225     FSGNJOpc = RISCV::FSGNJ_D_INX;
16226     FSGNJXOpc = RISCV::FSGNJX_D_INX;
16227     RC = &RISCV::GPRRegClass;
16228     break;
16229   }
16230
16231   const BasicBlock *BB = MBB->getBasicBlock();
16232   DebugLoc DL = MI.getDebugLoc();
16233   MachineFunction::iterator I = ++MBB->getIterator();
16234
16235   MachineFunction *F = MBB->getParent();
16236   MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
16237   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
16238
16239   F->insert(I, CvtMBB);
16240   F->insert(I, DoneMBB);
16241   // Move all instructions after the sequence to DoneMBB.
16242   DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
16243                   MBB->end());
16244   // Update machine-CFG edges by transferring all successors of the current
16245   // block to the new block which will contain the Phi nodes for the selects.
16246   DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
16247   // Set the successors for MBB.
16248   MBB->addSuccessor(CvtMBB);
16249   MBB->addSuccessor(DoneMBB);
16250
16251   Register DstReg = MI.getOperand(0).getReg();
16252   Register SrcReg = MI.getOperand(1).getReg();
16253   Register MaxReg = MI.getOperand(2).getReg();
16254   int64_t FRM = MI.getOperand(3).getImm();
16255
16256   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
16257   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
16258
16259   Register FabsReg = MRI.createVirtualRegister(RC);
16260   BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
16261
16262   // Compare the FP value to the max value.
16263   Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16264   auto MIB =
16265       BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
16266   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16267     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16268
16269   // Insert branch.
16270   BuildMI(MBB, DL, TII.get(RISCV::BEQ))
16271       .addReg(CmpReg)
16272       .addReg(RISCV::X0)
16273       .addMBB(DoneMBB);
16274
16275   CvtMBB->addSuccessor(DoneMBB);
16276
16277   // Convert to integer.
16278   Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
16279   MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
16280   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16281     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16282
16283   // Convert back to FP.
16284   Register I2FReg = MRI.createVirtualRegister(RC);
16285   MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
16286   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
16287     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
16288
16289   // Restore the sign bit.
16290   Register CvtReg = MRI.createVirtualRegister(RC);
16291   BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
16292
16293   // Merge the results.
16294   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
16295       .addReg(SrcReg)
16296       .addMBB(MBB)
16297       .addReg(CvtReg)
16298       .addMBB(CvtMBB);
16299
16300   MI.eraseFromParent();
16301   return DoneMBB;
16302 }
16303
16304 MachineBasicBlock *
16305 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
16306                                                  MachineBasicBlock *BB) const {
16307   switch (MI.getOpcode()) {
16308   default:
16309     llvm_unreachable("Unexpected instr type to insert");
16310   case RISCV::ReadCycleWide:
16311     assert(!Subtarget.is64Bit() &&
16312            "ReadCycleWrite is only to be used on riscv32");
16313     return emitReadCycleWidePseudo(MI, BB);
16314   case RISCV::Select_GPR_Using_CC_GPR:
16315   case RISCV::Select_FPR16_Using_CC_GPR:
16316   case RISCV::Select_FPR16INX_Using_CC_GPR:
16317   case RISCV::Select_FPR32_Using_CC_GPR:
16318   case RISCV::Select_FPR32INX_Using_CC_GPR:
16319   case RISCV::Select_FPR64_Using_CC_GPR:
16320   case RISCV::Select_FPR64INX_Using_CC_GPR:
16321   case RISCV::Select_FPR64IN32X_Using_CC_GPR:
16322     return emitSelectPseudo(MI, BB, Subtarget);
16323   case RISCV::BuildPairF64Pseudo:
16324   case RISCV::BuildPairF64Pseudo_INX:
16325     return emitBuildPairF64Pseudo(MI, BB, Subtarget);
16326   case RISCV::SplitF64Pseudo:
16327   case RISCV::SplitF64Pseudo_INX:
16328     return emitSplitF64Pseudo(MI, BB, Subtarget);
16329   case RISCV::PseudoQuietFLE_H:
16330     return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
16331   case RISCV::PseudoQuietFLE_H_INX:
16332     return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
16333   case RISCV::PseudoQuietFLT_H:
16334     return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
16335   case RISCV::PseudoQuietFLT_H_INX:
16336     return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
16337   case RISCV::PseudoQuietFLE_S:
16338     return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
16339   case RISCV::PseudoQuietFLE_S_INX:
16340     return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
16341   case RISCV::PseudoQuietFLT_S:
16342     return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
16343   case RISCV::PseudoQuietFLT_S_INX:
16344     return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
16345   case RISCV::PseudoQuietFLE_D:
16346     return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
16347   case RISCV::PseudoQuietFLE_D_INX:
16348     return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
16349   case RISCV::PseudoQuietFLE_D_IN32X:
16350     return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
16351                          Subtarget);
16352   case RISCV::PseudoQuietFLT_D:
16353     return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
16354   case RISCV::PseudoQuietFLT_D_INX:
16355     return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
16356   case RISCV::PseudoQuietFLT_D_IN32X:
16357     return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
16358                          Subtarget);
16359
16360 #define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL)                             \
16361   case RISCV::RMOpc##_##LMUL:                                                  \
16362     return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL);                          \
16363   case RISCV::RMOpc##_##LMUL##_MASK:                                           \
16364     return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
16365
16366 #define PseudoVFCVT_RM_CASE(RMOpc, Opc)                                        \
16367   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1)                                     \
16368   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2)                                     \
16369   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4)                                     \
16370   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2)                                    \
16371   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
16372
16373 #define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc)                                     \
16374   PseudoVFCVT_RM_CASE(RMOpc, Opc)                                              \
16375   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
16376
16377 #define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc)                                    \
16378   PseudoVFCVT_RM_CASE(RMOpc, Opc)                                              \
16379   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
16380
16381   // VFCVT
16382   PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
16383   PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
16384   PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
16385   PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
16386
16387   // VFWCVT
16388   PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
16389   PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
16390
16391   // VFNCVT
16392   PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
16393   PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
16394   PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
16395   PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
16396
16397   case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
16398     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
16399                                      RISCV::PseudoVFCVT_F_X_V_M1_MASK);
16400   case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
16401     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
16402                                      RISCV::PseudoVFCVT_F_X_V_M2_MASK);
16403   case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
16404     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
16405                                      RISCV::PseudoVFCVT_F_X_V_M4_MASK);
16406   case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
16407     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
16408                                      RISCV::PseudoVFCVT_F_X_V_M8_MASK);
16409   case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
16410     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
16411                                      RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
16412   case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
16413     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
16414                                      RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
16415   case RISCV::PseudoFROUND_H:
16416   case RISCV::PseudoFROUND_H_INX:
16417   case RISCV::PseudoFROUND_S:
16418   case RISCV::PseudoFROUND_S_INX:
16419   case RISCV::PseudoFROUND_D:
16420   case RISCV::PseudoFROUND_D_INX:
16421   case RISCV::PseudoFROUND_D_IN32X:
16422     return emitFROUND(MI, BB, Subtarget);
16423   case TargetOpcode::STATEPOINT:
16424   case TargetOpcode::STACKMAP:
16425   case TargetOpcode::PATCHPOINT:
16426     if (!Subtarget.is64Bit())
16427       report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
16428                          "supported on 64-bit targets");
16429     return emitPatchPoint(MI, BB);
16430   }
16431 }
16432
16433 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
16434                                                         SDNode *Node) const {
16435   // Add FRM dependency to any instructions with dynamic rounding mode.
16436   int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
16437   if (Idx < 0) {
16438     // Vector pseudos have FRM index indicated by TSFlags.
16439     Idx = RISCVII::getFRMOpNum(MI.getDesc());
16440     if (Idx < 0)
16441       return;
16442   }
16443   if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
16444     return;
16445   // If the instruction already reads FRM, don't add another read.
16446   if (MI.readsRegister(RISCV::FRM))
16447     return;
16448   MI.addOperand(
16449       MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
16450 }
16451
16452 // Calling Convention Implementation.
16453 // The expectations for frontend ABI lowering vary from target to target.
16454 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
16455 // details, but this is a longer term goal. For now, we simply try to keep the
16456 // role of the frontend as simple and well-defined as possible. The rules can
16457 // be summarised as:
16458 // * Never split up large scalar arguments. We handle them here.
16459 // * If a hardfloat calling convention is being used, and the struct may be
16460 // passed in a pair of registers (fp+fp, int+fp), and both registers are
16461 // available, then pass as two separate arguments. If either the GPRs or FPRs
16462 // are exhausted, then pass according to the rule below.
16463 // * If a struct could never be passed in registers or directly in a stack
16464 // slot (as it is larger than 2*XLEN and the floating point rules don't
16465 // apply), then pass it using a pointer with the byval attribute.
16466 // * If a struct is less than 2*XLEN, then coerce to either a two-element
16467 // word-sized array or a 2*XLEN scalar (depending on alignment).
16468 // * The frontend can determine whether a struct is returned by reference or
16469 // not based on its size and fields. If it will be returned by reference, the
16470 // frontend must modify the prototype so a pointer with the sret annotation is
16471 // passed as the first argument. This is not necessary for large scalar
16472 // returns.
16473 // * Struct return values and varargs should be coerced to structs containing
16474 // register-size fields in the same situations they would be for fixed
16475 // arguments.
16476
16477 static const MCPhysReg ArgGPRs[] = {
16478   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
16479   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
16480 };
16481 static const MCPhysReg ArgFPR16s[] = {
16482   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
16483   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
16484 };
16485 static const MCPhysReg ArgFPR32s[] = {
16486   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
16487   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
16488 };
16489 static const MCPhysReg ArgFPR64s[] = {
16490   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
16491   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
16492 };
16493 // This is an interim calling convention and it may be changed in the future.
16494 static const MCPhysReg ArgVRs[] = {
16495     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
16496     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
16497     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
16498 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
16499                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
16500                                      RISCV::V20M2, RISCV::V22M2};
16501 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
16502                                      RISCV::V20M4};
16503 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
16504
16505 // Pass a 2*XLEN argument that has been split into two XLEN values through
16506 // registers or the stack as necessary.
16507 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
16508                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
16509                                 MVT ValVT2, MVT LocVT2,
16510                                 ISD::ArgFlagsTy ArgFlags2) {
16511   unsigned XLenInBytes = XLen / 8;
16512   if (Register Reg = State.AllocateReg(ArgGPRs)) {
16513     // At least one half can be passed via register.
16514     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
16515                                      VA1.getLocVT(), CCValAssign::Full));
16516   } else {
16517     // Both halves must be passed on the stack, with proper alignment.
16518     Align StackAlign =
16519         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
16520     State.addLoc(
16521         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
16522                             State.AllocateStack(XLenInBytes, StackAlign),
16523                             VA1.getLocVT(), CCValAssign::Full));
16524     State.addLoc(CCValAssign::getMem(
16525         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
16526         LocVT2, CCValAssign::Full));
16527     return false;
16528   }
16529
16530   if (Register Reg = State.AllocateReg(ArgGPRs)) {
16531     // The second half can also be passed via register.
16532     State.addLoc(
16533         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
16534   } else {
16535     // The second half is passed via the stack, without additional alignment.
16536     State.addLoc(CCValAssign::getMem(
16537         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
16538         LocVT2, CCValAssign::Full));
16539   }
16540
16541   return false;
16542 }
16543
16544 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
16545                                std::optional<unsigned> FirstMaskArgument,
16546                                CCState &State, const RISCVTargetLowering &TLI) {
16547   const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
16548   if (RC == &RISCV::VRRegClass) {
16549     // Assign the first mask argument to V0.
16550     // This is an interim calling convention and it may be changed in the
16551     // future.
16552     if (FirstMaskArgument && ValNo == *FirstMaskArgument)
16553       return State.AllocateReg(RISCV::V0);
16554     return State.AllocateReg(ArgVRs);
16555   }
16556   if (RC == &RISCV::VRM2RegClass)
16557     return State.AllocateReg(ArgVRM2s);
16558   if (RC == &RISCV::VRM4RegClass)
16559     return State.AllocateReg(ArgVRM4s);
16560   if (RC == &RISCV::VRM8RegClass)
16561     return State.AllocateReg(ArgVRM8s);
16562   llvm_unreachable("Unhandled register class for ValueType");
16563 }
16564
16565 // Implements the RISC-V calling convention. Returns true upon failure.
16566 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
16567                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
16568                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
16569                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
16570                      std::optional<unsigned> FirstMaskArgument) {
16571   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
16572   assert(XLen == 32 || XLen == 64);
16573   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
16574
16575   // Static chain parameter must not be passed in normal argument registers,
16576   // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
16577   if (ArgFlags.isNest()) {
16578     if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
16579       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
16580       return false;
16581     }
16582   }
16583
16584   // Any return value split in to more than two values can't be returned
16585   // directly. Vectors are returned via the available vector registers.
16586   if (!LocVT.isVector() && IsRet && ValNo > 1)
16587     return true;
16588
16589   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
16590   // variadic argument, or if no F16/F32 argument registers are available.
16591   bool UseGPRForF16_F32 = true;
16592   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
16593   // variadic argument, or if no F64 argument registers are available.
16594   bool UseGPRForF64 = true;
16595
16596   switch (ABI) {
16597   default:
16598     llvm_unreachable("Unexpected ABI");
16599   case RISCVABI::ABI_ILP32:
16600   case RISCVABI::ABI_LP64:
16601     break;
16602   case RISCVABI::ABI_ILP32F:
16603   case RISCVABI::ABI_LP64F:
16604     UseGPRForF16_F32 = !IsFixed;
16605     break;
16606   case RISCVABI::ABI_ILP32D:
16607   case RISCVABI::ABI_LP64D:
16608     UseGPRForF16_F32 = !IsFixed;
16609     UseGPRForF64 = !IsFixed;
16610     break;
16611   }
16612
16613   // FPR16, FPR32, and FPR64 alias each other.
16614   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
16615     UseGPRForF16_F32 = true;
16616     UseGPRForF64 = true;
16617   }
16618
16619   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
16620   // similar local variables rather than directly checking against the target
16621   // ABI.
16622
16623   if (UseGPRForF16_F32 &&
16624       (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
16625     LocVT = XLenVT;
16626     LocInfo = CCValAssign::BCvt;
16627   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
16628     LocVT = MVT::i64;
16629     LocInfo = CCValAssign::BCvt;
16630   }
16631
16632   // If this is a variadic argument, the RISC-V calling convention requires
16633   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
16634   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
16635   // be used regardless of whether the original argument was split during
16636   // legalisation or not. The argument will not be passed by registers if the
16637   // original type is larger than 2*XLEN, so the register alignment rule does
16638   // not apply.
16639   unsigned TwoXLenInBytes = (2 * XLen) / 8;
16640   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
16641       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
16642     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
16643     // Skip 'odd' register if necessary.
16644     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
16645       State.AllocateReg(ArgGPRs);
16646   }
16647
16648   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
16649   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
16650       State.getPendingArgFlags();
16651
16652   assert(PendingLocs.size() == PendingArgFlags.size() &&
16653          "PendingLocs and PendingArgFlags out of sync");
16654
16655   // Handle passing f64 on RV32D with a soft float ABI or when floating point
16656   // registers are exhausted.
16657   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
16658     assert(PendingLocs.empty() && "Can't lower f64 if it is split");
16659     // Depending on available argument GPRS, f64 may be passed in a pair of
16660     // GPRs, split between a GPR and the stack, or passed completely on the
16661     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
16662     // cases.
16663     Register Reg = State.AllocateReg(ArgGPRs);
16664     if (!Reg) {
16665       unsigned StackOffset = State.AllocateStack(8, Align(8));
16666       State.addLoc(
16667           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
16668       return false;
16669     }
16670     LocVT = MVT::i32;
16671     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
16672     Register HiReg = State.AllocateReg(ArgGPRs);
16673     if (HiReg) {
16674       State.addLoc(
16675           CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
16676     } else {
16677       unsigned StackOffset = State.AllocateStack(4, Align(4));
16678       State.addLoc(
16679           CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
16680     }
16681     return false;
16682   }
16683
16684   // Fixed-length vectors are located in the corresponding scalable-vector
16685   // container types.
16686   if (ValVT.isFixedLengthVector())
16687     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
16688
16689   // Split arguments might be passed indirectly, so keep track of the pending
16690   // values. Split vectors are passed via a mix of registers and indirectly, so
16691   // treat them as we would any other argument.
16692   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
16693     LocVT = XLenVT;
16694     LocInfo = CCValAssign::Indirect;
16695     PendingLocs.push_back(
16696         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
16697     PendingArgFlags.push_back(ArgFlags);
16698     if (!ArgFlags.isSplitEnd()) {
16699       return false;
16700     }
16701   }
16702
16703   // If the split argument only had two elements, it should be passed directly
16704   // in registers or on the stack.
16705   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
16706       PendingLocs.size() <= 2) {
16707     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
16708     // Apply the normal calling convention rules to the first half of the
16709     // split argument.
16710     CCValAssign VA = PendingLocs[0];
16711     ISD::ArgFlagsTy AF = PendingArgFlags[0];
16712     PendingLocs.clear();
16713     PendingArgFlags.clear();
16714     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
16715                                ArgFlags);
16716   }
16717
16718   // Allocate to a register if possible, or else a stack slot.
16719   Register Reg;
16720   unsigned StoreSizeBytes = XLen / 8;
16721   Align StackAlign = Align(XLen / 8);
16722
16723   if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
16724     Reg = State.AllocateReg(ArgFPR16s);
16725   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
16726     Reg = State.AllocateReg(ArgFPR32s);
16727   else if (ValVT == MVT::f64 && !UseGPRForF64)
16728     Reg = State.AllocateReg(ArgFPR64s);
16729   else if (ValVT.isVector()) {
16730     Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
16731     if (!Reg) {
16732       // For return values, the vector must be passed fully via registers or
16733       // via the stack.
16734       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
16735       // but we're using all of them.
16736       if (IsRet)
16737         return true;
16738       // Try using a GPR to pass the address
16739       if ((Reg = State.AllocateReg(ArgGPRs))) {
16740         LocVT = XLenVT;
16741         LocInfo = CCValAssign::Indirect;
16742       } else if (ValVT.isScalableVector()) {
16743         LocVT = XLenVT;
16744         LocInfo = CCValAssign::Indirect;
16745       } else {
16746         // Pass fixed-length vectors on the stack.
16747         LocVT = ValVT;
16748         StoreSizeBytes = ValVT.getStoreSize();
16749         // Align vectors to their element sizes, being careful for vXi1
16750         // vectors.
16751         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
16752       }
16753     }
16754   } else {
16755     Reg = State.AllocateReg(ArgGPRs);
16756   }
16757
16758   unsigned StackOffset =
16759       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
16760
16761   // If we reach this point and PendingLocs is non-empty, we must be at the
16762   // end of a split argument that must be passed indirectly.
16763   if (!PendingLocs.empty()) {
16764     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
16765     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
16766
16767     for (auto &It : PendingLocs) {
16768       if (Reg)
16769         It.convertToReg(Reg);
16770       else
16771         It.convertToMem(StackOffset);
16772       State.addLoc(It);
16773     }
16774     PendingLocs.clear();
16775     PendingArgFlags.clear();
16776     return false;
16777   }
16778
16779   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
16780           (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
16781          "Expected an XLenVT or vector types at this stage");
16782
16783   if (Reg) {
16784     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
16785     return false;
16786   }
16787
16788   // When a scalar floating-point value is passed on the stack, no
16789   // bit-conversion is needed.
16790   if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
16791     assert(!ValVT.isVector());
16792     LocVT = ValVT;
16793     LocInfo = CCValAssign::Full;
16794   }
16795   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
16796   return false;
16797 }
16798
16799 template <typename ArgTy>
16800 static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
16801   for (const auto &ArgIdx : enumerate(Args)) {
16802     MVT ArgVT = ArgIdx.value().VT;
16803     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
16804       return ArgIdx.index();
16805   }
16806   return std::nullopt;
16807 }
16808
16809 void RISCVTargetLowering::analyzeInputArgs(
16810     MachineFunction &MF, CCState &CCInfo,
16811     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
16812     RISCVCCAssignFn Fn) const {
16813   unsigned NumArgs = Ins.size();
16814   FunctionType *FType = MF.getFunction().getFunctionType();
16815
16816   std::optional<unsigned> FirstMaskArgument;
16817   if (Subtarget.hasVInstructions())
16818     FirstMaskArgument = preAssignMask(Ins);
16819
16820   for (unsigned i = 0; i != NumArgs; ++i) {
16821     MVT ArgVT = Ins[i].VT;
16822     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
16823
16824     Type *ArgTy = nullptr;
16825     if (IsRet)
16826       ArgTy = FType->getReturnType();
16827     else if (Ins[i].isOrigArg())
16828       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
16829
16830     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
16831     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
16832            ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
16833            FirstMaskArgument)) {
16834       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
16835                         << ArgVT << '\n');
16836       llvm_unreachable(nullptr);
16837     }
16838   }
16839 }
16840
16841 void RISCVTargetLowering::analyzeOutputArgs(
16842     MachineFunction &MF, CCState &CCInfo,
16843     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
16844     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
16845   unsigned NumArgs = Outs.size();
16846
16847   std::optional<unsigned> FirstMaskArgument;
16848   if (Subtarget.hasVInstructions())
16849     FirstMaskArgument = preAssignMask(Outs);
16850
16851   for (unsigned i = 0; i != NumArgs; i++) {
16852     MVT ArgVT = Outs[i].VT;
16853     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
16854     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
16855
16856     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
16857     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
16858            ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
16859            FirstMaskArgument)) {
16860       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
16861                         << ArgVT << "\n");
16862       llvm_unreachable(nullptr);
16863     }
16864   }
16865 }
16866
16867 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
16868 // values.
16869 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
16870                                    const CCValAssign &VA, const SDLoc &DL,
16871                                    const RISCVSubtarget &Subtarget) {
16872   switch (VA.getLocInfo()) {
16873   default:
16874     llvm_unreachable("Unexpected CCValAssign::LocInfo");
16875   case CCValAssign::Full:
16876     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
16877       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
16878     break;
16879   case CCValAssign::BCvt:
16880     if (VA.getLocVT().isInteger() &&
16881         (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
16882       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
16883     } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
16884       if (RV64LegalI32) {
16885         Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
16886         Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
16887       } else {
16888         Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
16889       }
16890     } else {
16891       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
16892     }
16893     break;
16894   }
16895   return Val;
16896 }
16897
16898 // The caller is responsible for loading the full value if the argument is
16899 // passed with CCValAssign::Indirect.
16900 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
16901                                 const CCValAssign &VA, const SDLoc &DL,
16902                                 const ISD::InputArg &In,
16903                                 const RISCVTargetLowering &TLI) {
16904   MachineFunction &MF = DAG.getMachineFunction();
16905   MachineRegisterInfo &RegInfo = MF.getRegInfo();
16906   EVT LocVT = VA.getLocVT();
16907   SDValue Val;
16908   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
16909   Register VReg = RegInfo.createVirtualRegister(RC);
16910   RegInfo.addLiveIn(VA.getLocReg(), VReg);
16911   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
16912
16913   // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
16914   if (In.isOrigArg()) {
16915     Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
16916     if (OrigArg->getType()->isIntegerTy()) {
16917       unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
16918       // An input zero extended from i31 can also be considered sign extended.
16919       if ((BitWidth <= 32 && In.Flags.isSExt()) ||
16920           (BitWidth < 32 && In.Flags.isZExt())) {
16921         RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
16922         RVFI->addSExt32Register(VReg);
16923       }
16924     }
16925   }
16926
16927   if (VA.getLocInfo() == CCValAssign::Indirect)
16928     return Val;
16929
16930   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
16931 }
16932
16933 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
16934                                    const CCValAssign &VA, const SDLoc &DL,
16935                                    const RISCVSubtarget &Subtarget) {
16936   EVT LocVT = VA.getLocVT();
16937
16938   switch (VA.getLocInfo()) {
16939   default:
16940     llvm_unreachable("Unexpected CCValAssign::LocInfo");
16941   case CCValAssign::Full:
16942     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
16943       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
16944     break;
16945   case CCValAssign::BCvt:
16946     if (LocVT.isInteger() &&
16947         (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
16948       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
16949     } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
16950       if (RV64LegalI32) {
16951         Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
16952         Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
16953       } else {
16954         Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
16955       }
16956     } else {
16957       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
16958     }
16959     break;
16960   }
16961   return Val;
16962 }
16963
16964 // The caller is responsible for loading the full value if the argument is
16965 // passed with CCValAssign::Indirect.
16966 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
16967                                 const CCValAssign &VA, const SDLoc &DL) {
16968   MachineFunction &MF = DAG.getMachineFunction();
16969   MachineFrameInfo &MFI = MF.getFrameInfo();
16970   EVT LocVT = VA.getLocVT();
16971   EVT ValVT = VA.getValVT();
16972   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
16973   if (ValVT.isScalableVector()) {
16974     // When the value is a scalable vector, we save the pointer which points to
16975     // the scalable vector value in the stack. The ValVT will be the pointer
16976     // type, instead of the scalable vector type.
16977     ValVT = LocVT;
16978   }
16979   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
16980                                  /*IsImmutable=*/true);
16981   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
16982   SDValue Val;
16983
16984   ISD::LoadExtType ExtType;
16985   switch (VA.getLocInfo()) {
16986   default:
16987     llvm_unreachable("Unexpected CCValAssign::LocInfo");
16988   case CCValAssign::Full:
16989   case CCValAssign::Indirect:
16990   case CCValAssign::BCvt:
16991     ExtType = ISD::NON_EXTLOAD;
16992     break;
16993   }
16994   Val = DAG.getExtLoad(
16995       ExtType, DL, LocVT, Chain, FIN,
16996       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
16997   return Val;
16998 }
16999
17000 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
17001                                        const CCValAssign &VA,
17002                                        const CCValAssign &HiVA,
17003                                        const SDLoc &DL) {
17004   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
17005          "Unexpected VA");
17006   MachineFunction &MF = DAG.getMachineFunction();
17007   MachineFrameInfo &MFI = MF.getFrameInfo();
17008   MachineRegisterInfo &RegInfo = MF.getRegInfo();
17009
17010   assert(VA.isRegLoc() && "Expected register VA assignment");
17011
17012   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17013   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
17014   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
17015   SDValue Hi;
17016   if (HiVA.isMemLoc()) {
17017     // Second half of f64 is passed on the stack.
17018     int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
17019                                    /*IsImmutable=*/true);
17020     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
17021     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
17022                      MachinePointerInfo::getFixedStack(MF, FI));
17023   } else {
17024     // Second half of f64 is passed in another GPR.
17025     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17026     RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
17027     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
17028   }
17029   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
17030 }
17031
17032 // FastCC has less than 1% performance improvement for some particular
17033 // benchmark. But theoretically, it may has benenfit for some cases.
17034 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
17035                             unsigned ValNo, MVT ValVT, MVT LocVT,
17036                             CCValAssign::LocInfo LocInfo,
17037                             ISD::ArgFlagsTy ArgFlags, CCState &State,
17038                             bool IsFixed, bool IsRet, Type *OrigTy,
17039                             const RISCVTargetLowering &TLI,
17040                             std::optional<unsigned> FirstMaskArgument) {
17041
17042   // X5 and X6 might be used for save-restore libcall.
17043   static const MCPhysReg GPRList[] = {
17044       RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
17045       RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
17046       RISCV::X29, RISCV::X30, RISCV::X31};
17047
17048   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
17049     if (unsigned Reg = State.AllocateReg(GPRList)) {
17050       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17051       return false;
17052     }
17053   }
17054
17055   const RISCVSubtarget &Subtarget = TLI.getSubtarget();
17056
17057   if (LocVT == MVT::f16 &&
17058       (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
17059     static const MCPhysReg FPR16List[] = {
17060         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
17061         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
17062         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
17063         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
17064     if (unsigned Reg = State.AllocateReg(FPR16List)) {
17065       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17066       return false;
17067     }
17068   }
17069
17070   if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
17071     static const MCPhysReg FPR32List[] = {
17072         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
17073         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
17074         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
17075         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
17076     if (unsigned Reg = State.AllocateReg(FPR32List)) {
17077       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17078       return false;
17079     }
17080   }
17081
17082   if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
17083     static const MCPhysReg FPR64List[] = {
17084         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
17085         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
17086         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
17087         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
17088     if (unsigned Reg = State.AllocateReg(FPR64List)) {
17089       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17090       return false;
17091     }
17092   }
17093
17094   // Check if there is an available GPR before hitting the stack.
17095   if ((LocVT == MVT::f16 &&
17096        (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
17097       (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
17098       (LocVT == MVT::f64 && Subtarget.is64Bit() &&
17099        Subtarget.hasStdExtZdinx())) {
17100     if (unsigned Reg = State.AllocateReg(GPRList)) {
17101       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17102       return false;
17103     }
17104   }
17105
17106   if (LocVT == MVT::f16) {
17107     unsigned Offset2 = State.AllocateStack(2, Align(2));
17108     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
17109     return false;
17110   }
17111
17112   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
17113     unsigned Offset4 = State.AllocateStack(4, Align(4));
17114     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
17115     return false;
17116   }
17117
17118   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
17119     unsigned Offset5 = State.AllocateStack(8, Align(8));
17120     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
17121     return false;
17122   }
17123
17124   if (LocVT.isVector()) {
17125     if (unsigned Reg =
17126             allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
17127       // Fixed-length vectors are located in the corresponding scalable-vector
17128       // container types.
17129       if (ValVT.isFixedLengthVector())
17130         LocVT = TLI.getContainerForFixedLengthVector(LocVT);
17131       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17132     } else {
17133       // Try and pass the address via a "fast" GPR.
17134       if (unsigned GPRReg = State.AllocateReg(GPRList)) {
17135         LocInfo = CCValAssign::Indirect;
17136         LocVT = TLI.getSubtarget().getXLenVT();
17137         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
17138       } else if (ValVT.isFixedLengthVector()) {
17139         auto StackAlign =
17140             MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
17141         unsigned StackOffset =
17142             State.AllocateStack(ValVT.getStoreSize(), StackAlign);
17143         State.addLoc(
17144             CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
17145       } else {
17146         // Can't pass scalable vectors on the stack.
17147         return true;
17148       }
17149     }
17150
17151     return false;
17152   }
17153
17154   return true; // CC didn't match.
17155 }
17156
17157 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
17158                          CCValAssign::LocInfo LocInfo,
17159                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
17160   if (ArgFlags.isNest()) {
17161     report_fatal_error(
17162         "Attribute 'nest' is not supported in GHC calling convention");
17163   }
17164
17165   static const MCPhysReg GPRList[] = {
17166       RISCV::X9,  RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
17167       RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
17168
17169   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
17170     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
17171     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
17172     if (unsigned Reg = State.AllocateReg(GPRList)) {
17173       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17174       return false;
17175     }
17176   }
17177
17178   const RISCVSubtarget &Subtarget =
17179       State.getMachineFunction().getSubtarget<RISCVSubtarget>();
17180
17181   if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
17182     // Pass in STG registers: F1, ..., F6
17183     //                        fs0 ... fs5
17184     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
17185                                           RISCV::F18_F, RISCV::F19_F,
17186                                           RISCV::F20_F, RISCV::F21_F};
17187     if (unsigned Reg = State.AllocateReg(FPR32List)) {
17188       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17189       return false;
17190     }
17191   }
17192
17193   if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
17194     // Pass in STG registers: D1, ..., D6
17195     //                        fs6 ... fs11
17196     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
17197                                           RISCV::F24_D, RISCV::F25_D,
17198                                           RISCV::F26_D, RISCV::F27_D};
17199     if (unsigned Reg = State.AllocateReg(FPR64List)) {
17200       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17201       return false;
17202     }
17203   }
17204
17205   if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
17206       (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
17207        Subtarget.is64Bit())) {
17208     if (unsigned Reg = State.AllocateReg(GPRList)) {
17209       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
17210       return false;
17211     }
17212   }
17213
17214   report_fatal_error("No registers left in GHC calling convention");
17215   return true;
17216 }
17217
17218 // Transform physical registers into virtual registers.
17219 SDValue RISCVTargetLowering::LowerFormalArguments(
17220     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
17221     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
17222     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
17223
17224   MachineFunction &MF = DAG.getMachineFunction();
17225
17226   switch (CallConv) {
17227   default:
17228     report_fatal_error("Unsupported calling convention");
17229   case CallingConv::C:
17230   case CallingConv::Fast:
17231   case CallingConv::SPIR_KERNEL:
17232     break;
17233   case CallingConv::GHC:
17234     if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
17235       report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
17236                          "(Zdinx/D) instruction set extensions");
17237   }
17238
17239   const Function &Func = MF.getFunction();
17240   if (Func.hasFnAttribute("interrupt")) {
17241     if (!Func.arg_empty())
17242       report_fatal_error(
17243         "Functions with the interrupt attribute cannot have arguments!");
17244
17245     StringRef Kind =
17246       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
17247
17248     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
17249       report_fatal_error(
17250         "Function interrupt attribute argument not supported!");
17251   }
17252
17253   EVT PtrVT = getPointerTy(DAG.getDataLayout());
17254   MVT XLenVT = Subtarget.getXLenVT();
17255   unsigned XLenInBytes = Subtarget.getXLen() / 8;
17256   // Used with vargs to acumulate store chains.
17257   std::vector<SDValue> OutChains;
17258
17259   // Assign locations to all of the incoming arguments.
17260   SmallVector<CCValAssign, 16> ArgLocs;
17261   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
17262
17263   if (CallConv == CallingConv::GHC)
17264     CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC);
17265   else
17266     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
17267                      CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
17268                                                    : RISCV::CC_RISCV);
17269
17270   for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
17271     CCValAssign &VA = ArgLocs[i];
17272     SDValue ArgValue;
17273     // Passing f64 on RV32D with a soft float ABI must be handled as a special
17274     // case.
17275     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17276       assert(VA.needsCustom());
17277       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
17278     } else if (VA.isRegLoc())
17279       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
17280     else
17281       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
17282
17283     if (VA.getLocInfo() == CCValAssign::Indirect) {
17284       // If the original argument was split and passed by reference (e.g. i128
17285       // on RV32), we need to load all parts of it here (using the same
17286       // address). Vectors may be partly split to registers and partly to the
17287       // stack, in which case the base address is partly offset and subsequent
17288       // stores are relative to that.
17289       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
17290                                    MachinePointerInfo()));
17291       unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
17292       unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
17293       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
17294       while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
17295         CCValAssign &PartVA = ArgLocs[i + 1];
17296         unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
17297         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
17298         if (PartVA.getValVT().isScalableVector())
17299           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
17300         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
17301         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
17302                                      MachinePointerInfo()));
17303         ++i;
17304         ++InsIdx;
17305       }
17306       continue;
17307     }
17308     InVals.push_back(ArgValue);
17309   }
17310
17311   if (any_of(ArgLocs,
17312              [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
17313     MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
17314
17315   if (IsVarArg) {
17316     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
17317     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
17318     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
17319     MachineFrameInfo &MFI = MF.getFrameInfo();
17320     MachineRegisterInfo &RegInfo = MF.getRegInfo();
17321     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
17322
17323     // Offset of the first variable argument from stack pointer, and size of
17324     // the vararg save area. For now, the varargs save area is either zero or
17325     // large enough to hold a0-a7.
17326     int VaArgOffset, VarArgsSaveSize;
17327
17328     // If all registers are allocated, then all varargs must be passed on the
17329     // stack and we don't need to save any argregs.
17330     if (ArgRegs.size() == Idx) {
17331       VaArgOffset = CCInfo.getStackSize();
17332       VarArgsSaveSize = 0;
17333     } else {
17334       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
17335       VaArgOffset = -VarArgsSaveSize;
17336     }
17337
17338     // Record the frame index of the first variable argument
17339     // which is a value necessary to VASTART.
17340     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
17341     RVFI->setVarArgsFrameIndex(FI);
17342
17343     // If saving an odd number of registers then create an extra stack slot to
17344     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
17345     // offsets to even-numbered registered remain 2*XLEN-aligned.
17346     if (Idx % 2) {
17347       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
17348       VarArgsSaveSize += XLenInBytes;
17349     }
17350
17351     // Copy the integer registers that may have been used for passing varargs
17352     // to the vararg save area.
17353     for (unsigned I = Idx; I < ArgRegs.size();
17354          ++I, VaArgOffset += XLenInBytes) {
17355       const Register Reg = RegInfo.createVirtualRegister(RC);
17356       RegInfo.addLiveIn(ArgRegs[I], Reg);
17357       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
17358       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
17359       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
17360       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
17361                                    MachinePointerInfo::getFixedStack(MF, FI));
17362       cast<StoreSDNode>(Store.getNode())
17363           ->getMemOperand()
17364           ->setValue((Value *)nullptr);
17365       OutChains.push_back(Store);
17366     }
17367     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
17368   }
17369
17370   // All stores are grouped in one node to allow the matching between
17371   // the size of Ins and InVals. This only happens for vararg functions.
17372   if (!OutChains.empty()) {
17373     OutChains.push_back(Chain);
17374     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
17375   }
17376
17377   return Chain;
17378 }
17379
17380 /// isEligibleForTailCallOptimization - Check whether the call is eligible
17381 /// for tail call optimization.
17382 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
17383 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
17384     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
17385     const SmallVector<CCValAssign, 16> &ArgLocs) const {
17386
17387   auto CalleeCC = CLI.CallConv;
17388   auto &Outs = CLI.Outs;
17389   auto &Caller = MF.getFunction();
17390   auto CallerCC = Caller.getCallingConv();
17391
17392   // Exception-handling functions need a special set of instructions to
17393   // indicate a return to the hardware. Tail-calling another function would
17394   // probably break this.
17395   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
17396   // should be expanded as new function attributes are introduced.
17397   if (Caller.hasFnAttribute("interrupt"))
17398     return false;
17399
17400   // Do not tail call opt if the stack is used to pass parameters.
17401   if (CCInfo.getStackSize() != 0)
17402     return false;
17403
17404   // Do not tail call opt if any parameters need to be passed indirectly.
17405   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
17406   // passed indirectly. So the address of the value will be passed in a
17407   // register, or if not available, then the address is put on the stack. In
17408   // order to pass indirectly, space on the stack often needs to be allocated
17409   // in order to store the value. In this case the CCInfo.getNextStackOffset()
17410   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
17411   // are passed CCValAssign::Indirect.
17412   for (auto &VA : ArgLocs)
17413     if (VA.getLocInfo() == CCValAssign::Indirect)
17414       return false;
17415
17416   // Do not tail call opt if either caller or callee uses struct return
17417   // semantics.
17418   auto IsCallerStructRet = Caller.hasStructRetAttr();
17419   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
17420   if (IsCallerStructRet || IsCalleeStructRet)
17421     return false;
17422
17423   // The callee has to preserve all registers the caller needs to preserve.
17424   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
17425   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
17426   if (CalleeCC != CallerCC) {
17427     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
17428     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
17429       return false;
17430   }
17431
17432   // Byval parameters hand the function a pointer directly into the stack area
17433   // we want to reuse during a tail call. Working around this *is* possible
17434   // but less efficient and uglier in LowerCall.
17435   for (auto &Arg : Outs)
17436     if (Arg.Flags.isByVal())
17437       return false;
17438
17439   return true;
17440 }
17441
17442 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
17443   return DAG.getDataLayout().getPrefTypeAlign(
17444       VT.getTypeForEVT(*DAG.getContext()));
17445 }
17446
17447 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
17448 // and output parameter nodes.
17449 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
17450                                        SmallVectorImpl<SDValue> &InVals) const {
17451   SelectionDAG &DAG = CLI.DAG;
17452   SDLoc &DL = CLI.DL;
17453   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
17454   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
17455   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
17456   SDValue Chain = CLI.Chain;
17457   SDValue Callee = CLI.Callee;
17458   bool &IsTailCall = CLI.IsTailCall;
17459   CallingConv::ID CallConv = CLI.CallConv;
17460   bool IsVarArg = CLI.IsVarArg;
17461   EVT PtrVT = getPointerTy(DAG.getDataLayout());
17462   MVT XLenVT = Subtarget.getXLenVT();
17463
17464   MachineFunction &MF = DAG.getMachineFunction();
17465
17466   // Analyze the operands of the call, assigning locations to each operand.
17467   SmallVector<CCValAssign, 16> ArgLocs;
17468   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
17469
17470   if (CallConv == CallingConv::GHC)
17471     ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
17472   else
17473     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
17474                       CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
17475                                                     : RISCV::CC_RISCV);
17476
17477   // Check if it's really possible to do a tail call.
17478   if (IsTailCall)
17479     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
17480
17481   if (IsTailCall)
17482     ++NumTailCalls;
17483   else if (CLI.CB && CLI.CB->isMustTailCall())
17484     report_fatal_error("failed to perform tail call elimination on a call "
17485                        "site marked musttail");
17486
17487   // Get a count of how many bytes are to be pushed on the stack.
17488   unsigned NumBytes = ArgCCInfo.getStackSize();
17489
17490   // Create local copies for byval args
17491   SmallVector<SDValue, 8> ByValArgs;
17492   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
17493     ISD::ArgFlagsTy Flags = Outs[i].Flags;
17494     if (!Flags.isByVal())
17495       continue;
17496
17497     SDValue Arg = OutVals[i];
17498     unsigned Size = Flags.getByValSize();
17499     Align Alignment = Flags.getNonZeroByValAlign();
17500
17501     int FI =
17502         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
17503     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
17504     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
17505
17506     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
17507                           /*IsVolatile=*/false,
17508                           /*AlwaysInline=*/false, IsTailCall,
17509                           MachinePointerInfo(), MachinePointerInfo());
17510     ByValArgs.push_back(FIPtr);
17511   }
17512
17513   if (!IsTailCall)
17514     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
17515
17516   // Copy argument values to their designated locations.
17517   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
17518   SmallVector<SDValue, 8> MemOpChains;
17519   SDValue StackPtr;
17520   for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
17521        ++i, ++OutIdx) {
17522     CCValAssign &VA = ArgLocs[i];
17523     SDValue ArgValue = OutVals[OutIdx];
17524     ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
17525
17526     // Handle passing f64 on RV32D with a soft float ABI as a special case.
17527     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17528       assert(VA.isRegLoc() && "Expected register VA assignment");
17529       assert(VA.needsCustom());
17530       SDValue SplitF64 = DAG.getNode(
17531           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
17532       SDValue Lo = SplitF64.getValue(0);
17533       SDValue Hi = SplitF64.getValue(1);
17534
17535       Register RegLo = VA.getLocReg();
17536       RegsToPass.push_back(std::make_pair(RegLo, Lo));
17537
17538       // Get the CCValAssign for the Hi part.
17539       CCValAssign &HiVA = ArgLocs[++i];
17540
17541       if (HiVA.isMemLoc()) {
17542         // Second half of f64 is passed on the stack.
17543         if (!StackPtr.getNode())
17544           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
17545         SDValue Address =
17546             DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
17547                         DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
17548         // Emit the store.
17549         MemOpChains.push_back(
17550             DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
17551       } else {
17552         // Second half of f64 is passed in another GPR.
17553         Register RegHigh = HiVA.getLocReg();
17554         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
17555       }
17556       continue;
17557     }
17558
17559     // Promote the value if needed.
17560     // For now, only handle fully promoted and indirect arguments.
17561     if (VA.getLocInfo() == CCValAssign::Indirect) {
17562       // Store the argument in a stack slot and pass its address.
17563       Align StackAlign =
17564           std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
17565                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
17566       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
17567       // If the original argument was split (e.g. i128), we need
17568       // to store the required parts of it here (and pass just one address).
17569       // Vectors may be partly split to registers and partly to the stack, in
17570       // which case the base address is partly offset and subsequent stores are
17571       // relative to that.
17572       unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
17573       unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
17574       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
17575       // Calculate the total size to store. We don't have access to what we're
17576       // actually storing other than performing the loop and collecting the
17577       // info.
17578       SmallVector<std::pair<SDValue, SDValue>> Parts;
17579       while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
17580         SDValue PartValue = OutVals[OutIdx + 1];
17581         unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
17582         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
17583         EVT PartVT = PartValue.getValueType();
17584         if (PartVT.isScalableVector())
17585           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
17586         StoredSize += PartVT.getStoreSize();
17587         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
17588         Parts.push_back(std::make_pair(PartValue, Offset));
17589         ++i;
17590         ++OutIdx;
17591       }
17592       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
17593       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
17594       MemOpChains.push_back(
17595           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
17596                        MachinePointerInfo::getFixedStack(MF, FI)));
17597       for (const auto &Part : Parts) {
17598         SDValue PartValue = Part.first;
17599         SDValue PartOffset = Part.second;
17600         SDValue Address =
17601             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
17602         MemOpChains.push_back(
17603             DAG.getStore(Chain, DL, PartValue, Address,
17604                          MachinePointerInfo::getFixedStack(MF, FI)));
17605       }
17606       ArgValue = SpillSlot;
17607     } else {
17608       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
17609     }
17610
17611     // Use local copy if it is a byval arg.
17612     if (Flags.isByVal())
17613       ArgValue = ByValArgs[j++];
17614
17615     if (VA.isRegLoc()) {
17616       // Queue up the argument copies and emit them at the end.
17617       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
17618     } else {
17619       assert(VA.isMemLoc() && "Argument not register or memory");
17620       assert(!IsTailCall && "Tail call not allowed if stack is used "
17621                             "for passing parameters");
17622
17623       // Work out the address of the stack slot.
17624       if (!StackPtr.getNode())
17625         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
17626       SDValue Address =
17627           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
17628                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
17629
17630       // Emit the store.
17631       MemOpChains.push_back(
17632           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
17633     }
17634   }
17635
17636   // Join the stores, which are independent of one another.
17637   if (!MemOpChains.empty())
17638     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
17639
17640   SDValue Glue;
17641
17642   // Build a sequence of copy-to-reg nodes, chained and glued together.
17643   for (auto &Reg : RegsToPass) {
17644     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
17645     Glue = Chain.getValue(1);
17646   }
17647
17648   // Validate that none of the argument registers have been marked as
17649   // reserved, if so report an error. Do the same for the return address if this
17650   // is not a tailcall.
17651   validateCCReservedRegs(RegsToPass, MF);
17652   if (!IsTailCall &&
17653       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
17654     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
17655         MF.getFunction(),
17656         "Return address register required, but has been reserved."});
17657
17658   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
17659   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
17660   // split it and then direct call can be matched by PseudoCALL.
17661   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
17662     const GlobalValue *GV = S->getGlobal();
17663
17664     unsigned OpFlags = RISCVII::MO_CALL;
17665     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
17666       OpFlags = RISCVII::MO_PLT;
17667
17668     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
17669   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
17670     unsigned OpFlags = RISCVII::MO_CALL;
17671
17672     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
17673                                                  nullptr))
17674       OpFlags = RISCVII::MO_PLT;
17675
17676     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
17677   }
17678
17679   // The first call operand is the chain and the second is the target address.
17680   SmallVector<SDValue, 8> Ops;
17681   Ops.push_back(Chain);
17682   Ops.push_back(Callee);
17683
17684   // Add argument registers to the end of the list so that they are
17685   // known live into the call.
17686   for (auto &Reg : RegsToPass)
17687     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
17688
17689   if (!IsTailCall) {
17690     // Add a register mask operand representing the call-preserved registers.
17691     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
17692     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
17693     assert(Mask && "Missing call preserved mask for calling convention");
17694     Ops.push_back(DAG.getRegisterMask(Mask));
17695   }
17696
17697   // Glue the call to the argument copies, if any.
17698   if (Glue.getNode())
17699     Ops.push_back(Glue);
17700
17701   assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
17702          "Unexpected CFI type for a direct call");
17703
17704   // Emit the call.
17705   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
17706
17707   if (IsTailCall) {
17708     MF.getFrameInfo().setHasTailCall();
17709     SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
17710     if (CLI.CFIType)
17711       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
17712     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
17713     return Ret;
17714   }
17715
17716   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
17717   if (CLI.CFIType)
17718     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
17719   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
17720   Glue = Chain.getValue(1);
17721
17722   // Mark the end of the call, which is glued to the call itself.
17723   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
17724   Glue = Chain.getValue(1);
17725
17726   // Assign locations to each value returned by this call.
17727   SmallVector<CCValAssign, 16> RVLocs;
17728   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
17729   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
17730
17731   // Copy all of the result registers out of their specified physreg.
17732   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
17733     auto &VA = RVLocs[i];
17734     // Copy the value out
17735     SDValue RetValue =
17736         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
17737     // Glue the RetValue to the end of the call sequence
17738     Chain = RetValue.getValue(1);
17739     Glue = RetValue.getValue(2);
17740
17741     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17742       assert(VA.needsCustom());
17743       SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
17744                                              MVT::i32, Glue);
17745       Chain = RetValue2.getValue(1);
17746       Glue = RetValue2.getValue(2);
17747       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
17748                              RetValue2);
17749     }
17750
17751     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
17752
17753     InVals.push_back(RetValue);
17754   }
17755
17756   return Chain;
17757 }
17758
17759 bool RISCVTargetLowering::CanLowerReturn(
17760     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
17761     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
17762   SmallVector<CCValAssign, 16> RVLocs;
17763   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
17764
17765   std::optional<unsigned> FirstMaskArgument;
17766   if (Subtarget.hasVInstructions())
17767     FirstMaskArgument = preAssignMask(Outs);
17768
17769   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
17770     MVT VT = Outs[i].VT;
17771     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
17772     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
17773     if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
17774                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
17775                  *this, FirstMaskArgument))
17776       return false;
17777   }
17778   return true;
17779 }
17780
17781 SDValue
17782 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
17783                                  bool IsVarArg,
17784                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
17785                                  const SmallVectorImpl<SDValue> &OutVals,
17786                                  const SDLoc &DL, SelectionDAG &DAG) const {
17787   MachineFunction &MF = DAG.getMachineFunction();
17788   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
17789
17790   // Stores the assignment of the return value to a location.
17791   SmallVector<CCValAssign, 16> RVLocs;
17792
17793   // Info about the registers and stack slot.
17794   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
17795                  *DAG.getContext());
17796
17797   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
17798                     nullptr, RISCV::CC_RISCV);
17799
17800   if (CallConv == CallingConv::GHC && !RVLocs.empty())
17801     report_fatal_error("GHC functions return void only");
17802
17803   SDValue Glue;
17804   SmallVector<SDValue, 4> RetOps(1, Chain);
17805
17806   // Copy the result values into the output registers.
17807   for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
17808     SDValue Val = OutVals[OutIdx];
17809     CCValAssign &VA = RVLocs[i];
17810     assert(VA.isRegLoc() && "Can only return in registers!");
17811
17812     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
17813       // Handle returning f64 on RV32D with a soft float ABI.
17814       assert(VA.isRegLoc() && "Expected return via registers");
17815       assert(VA.needsCustom());
17816       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
17817                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
17818       SDValue Lo = SplitF64.getValue(0);
17819       SDValue Hi = SplitF64.getValue(1);
17820       Register RegLo = VA.getLocReg();
17821       Register RegHi = RVLocs[++i].getLocReg();
17822
17823       if (STI.isRegisterReservedByUser(RegLo) ||
17824           STI.isRegisterReservedByUser(RegHi))
17825         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
17826             MF.getFunction(),
17827             "Return value register required, but has been reserved."});
17828
17829       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
17830       Glue = Chain.getValue(1);
17831       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
17832       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
17833       Glue = Chain.getValue(1);
17834       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
17835     } else {
17836       // Handle a 'normal' return.
17837       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
17838       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
17839
17840       if (STI.isRegisterReservedByUser(VA.getLocReg()))
17841         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
17842             MF.getFunction(),
17843             "Return value register required, but has been reserved."});
17844
17845       // Guarantee that all emitted copies are stuck together.
17846       Glue = Chain.getValue(1);
17847       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
17848     }
17849   }
17850
17851   RetOps[0] = Chain; // Update chain.
17852
17853   // Add the glue node if we have it.
17854   if (Glue.getNode()) {
17855     RetOps.push_back(Glue);
17856   }
17857
17858   if (any_of(RVLocs,
17859              [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
17860     MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
17861
17862   unsigned RetOpc = RISCVISD::RET_GLUE;
17863   // Interrupt service routines use different return instructions.
17864   const Function &Func = DAG.getMachineFunction().getFunction();
17865   if (Func.hasFnAttribute("interrupt")) {
17866     if (!Func.getReturnType()->isVoidTy())
17867       report_fatal_error(
17868           "Functions with the interrupt attribute must have void return type!");
17869
17870     MachineFunction &MF = DAG.getMachineFunction();
17871     StringRef Kind =
17872       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
17873
17874     if (Kind == "supervisor")
17875       RetOpc = RISCVISD::SRET_GLUE;
17876     else
17877       RetOpc = RISCVISD::MRET_GLUE;
17878   }
17879
17880   return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
17881 }
17882
17883 void RISCVTargetLowering::validateCCReservedRegs(
17884     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
17885     MachineFunction &MF) const {
17886   const Function &F = MF.getFunction();
17887   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
17888
17889   if (llvm::any_of(Regs, [&STI](auto Reg) {
17890         return STI.isRegisterReservedByUser(Reg.first);
17891       }))
17892     F.getContext().diagnose(DiagnosticInfoUnsupported{
17893         F, "Argument register required, but has been reserved."});
17894 }
17895
17896 // Check if the result of the node is only used as a return value, as
17897 // otherwise we can't perform a tail-call.
17898 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
17899   if (N->getNumValues() != 1)
17900     return false;
17901   if (!N->hasNUsesOfValue(1, 0))
17902     return false;
17903
17904   SDNode *Copy = *N->use_begin();
17905
17906   if (Copy->getOpcode() == ISD::BITCAST) {
17907     return isUsedByReturnOnly(Copy, Chain);
17908   }
17909
17910   // TODO: Handle additional opcodes in order to support tail-calling libcalls
17911   // with soft float ABIs.
17912   if (Copy->getOpcode() != ISD::CopyToReg) {
17913     return false;
17914   }
17915
17916   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
17917   // isn't safe to perform a tail call.
17918   if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
17919     return false;
17920
17921   // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
17922   bool HasRet = false;
17923   for (SDNode *Node : Copy->uses()) {
17924     if (Node->getOpcode() != RISCVISD::RET_GLUE)
17925       return false;
17926     HasRet = true;
17927   }
17928   if (!HasRet)
17929     return false;
17930
17931   Chain = Copy->getOperand(0);
17932   return true;
17933 }
17934
17935 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17936   return CI->isTailCall();
17937 }
17938
17939 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
17940 #define NODE_NAME_CASE(NODE)                                                   \
17941   case RISCVISD::NODE:                                                         \
17942     return "RISCVISD::" #NODE;
17943   // clang-format off
17944   switch ((RISCVISD::NodeType)Opcode) {
17945   case RISCVISD::FIRST_NUMBER:
17946     break;
17947   NODE_NAME_CASE(RET_GLUE)
17948   NODE_NAME_CASE(SRET_GLUE)
17949   NODE_NAME_CASE(MRET_GLUE)
17950   NODE_NAME_CASE(CALL)
17951   NODE_NAME_CASE(SELECT_CC)
17952   NODE_NAME_CASE(BR_CC)
17953   NODE_NAME_CASE(BuildPairF64)
17954   NODE_NAME_CASE(SplitF64)
17955   NODE_NAME_CASE(TAIL)
17956   NODE_NAME_CASE(ADD_LO)
17957   NODE_NAME_CASE(HI)
17958   NODE_NAME_CASE(LLA)
17959   NODE_NAME_CASE(ADD_TPREL)
17960   NODE_NAME_CASE(MULHSU)
17961   NODE_NAME_CASE(SLLW)
17962   NODE_NAME_CASE(SRAW)
17963   NODE_NAME_CASE(SRLW)
17964   NODE_NAME_CASE(DIVW)
17965   NODE_NAME_CASE(DIVUW)
17966   NODE_NAME_CASE(REMUW)
17967   NODE_NAME_CASE(ROLW)
17968   NODE_NAME_CASE(RORW)
17969   NODE_NAME_CASE(CLZW)
17970   NODE_NAME_CASE(CTZW)
17971   NODE_NAME_CASE(ABSW)
17972   NODE_NAME_CASE(FMV_H_X)
17973   NODE_NAME_CASE(FMV_X_ANYEXTH)
17974   NODE_NAME_CASE(FMV_X_SIGNEXTH)
17975   NODE_NAME_CASE(FMV_W_X_RV64)
17976   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
17977   NODE_NAME_CASE(FCVT_X)
17978   NODE_NAME_CASE(FCVT_XU)
17979   NODE_NAME_CASE(FCVT_W_RV64)
17980   NODE_NAME_CASE(FCVT_WU_RV64)
17981   NODE_NAME_CASE(STRICT_FCVT_W_RV64)
17982   NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
17983   NODE_NAME_CASE(FP_ROUND_BF16)
17984   NODE_NAME_CASE(FP_EXTEND_BF16)
17985   NODE_NAME_CASE(FROUND)
17986   NODE_NAME_CASE(FPCLASS)
17987   NODE_NAME_CASE(FMAX)
17988   NODE_NAME_CASE(FMIN)
17989   NODE_NAME_CASE(READ_CYCLE_WIDE)
17990   NODE_NAME_CASE(BREV8)
17991   NODE_NAME_CASE(ORC_B)
17992   NODE_NAME_CASE(ZIP)
17993   NODE_NAME_CASE(UNZIP)
17994   NODE_NAME_CASE(CLMUL)
17995   NODE_NAME_CASE(CLMULH)
17996   NODE_NAME_CASE(CLMULR)
17997   NODE_NAME_CASE(SHA256SIG0)
17998   NODE_NAME_CASE(SHA256SIG1)
17999   NODE_NAME_CASE(SHA256SUM0)
18000   NODE_NAME_CASE(SHA256SUM1)
18001   NODE_NAME_CASE(SM4KS)
18002   NODE_NAME_CASE(SM4ED)
18003   NODE_NAME_CASE(SM3P0)
18004   NODE_NAME_CASE(SM3P1)
18005   NODE_NAME_CASE(TH_LWD)
18006   NODE_NAME_CASE(TH_LWUD)
18007   NODE_NAME_CASE(TH_LDD)
18008   NODE_NAME_CASE(TH_SWD)
18009   NODE_NAME_CASE(TH_SDD)
18010   NODE_NAME_CASE(VMV_V_V_VL)
18011   NODE_NAME_CASE(VMV_V_X_VL)
18012   NODE_NAME_CASE(VFMV_V_F_VL)
18013   NODE_NAME_CASE(VMV_X_S)
18014   NODE_NAME_CASE(VMV_S_X_VL)
18015   NODE_NAME_CASE(VFMV_S_F_VL)
18016   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
18017   NODE_NAME_CASE(READ_VLENB)
18018   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
18019   NODE_NAME_CASE(VSLIDEUP_VL)
18020   NODE_NAME_CASE(VSLIDE1UP_VL)
18021   NODE_NAME_CASE(VSLIDEDOWN_VL)
18022   NODE_NAME_CASE(VSLIDE1DOWN_VL)
18023   NODE_NAME_CASE(VFSLIDE1UP_VL)
18024   NODE_NAME_CASE(VFSLIDE1DOWN_VL)
18025   NODE_NAME_CASE(VID_VL)
18026   NODE_NAME_CASE(VFNCVT_ROD_VL)
18027   NODE_NAME_CASE(VECREDUCE_ADD_VL)
18028   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
18029   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
18030   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
18031   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
18032   NODE_NAME_CASE(VECREDUCE_AND_VL)
18033   NODE_NAME_CASE(VECREDUCE_OR_VL)
18034   NODE_NAME_CASE(VECREDUCE_XOR_VL)
18035   NODE_NAME_CASE(VECREDUCE_FADD_VL)
18036   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
18037   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
18038   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
18039   NODE_NAME_CASE(ADD_VL)
18040   NODE_NAME_CASE(AND_VL)
18041   NODE_NAME_CASE(MUL_VL)
18042   NODE_NAME_CASE(OR_VL)
18043   NODE_NAME_CASE(SDIV_VL)
18044   NODE_NAME_CASE(SHL_VL)
18045   NODE_NAME_CASE(SREM_VL)
18046   NODE_NAME_CASE(SRA_VL)
18047   NODE_NAME_CASE(SRL_VL)
18048   NODE_NAME_CASE(ROTL_VL)
18049   NODE_NAME_CASE(ROTR_VL)
18050   NODE_NAME_CASE(SUB_VL)
18051   NODE_NAME_CASE(UDIV_VL)
18052   NODE_NAME_CASE(UREM_VL)
18053   NODE_NAME_CASE(XOR_VL)
18054   NODE_NAME_CASE(SADDSAT_VL)
18055   NODE_NAME_CASE(UADDSAT_VL)
18056   NODE_NAME_CASE(SSUBSAT_VL)
18057   NODE_NAME_CASE(USUBSAT_VL)
18058   NODE_NAME_CASE(FADD_VL)
18059   NODE_NAME_CASE(FSUB_VL)
18060   NODE_NAME_CASE(FMUL_VL)
18061   NODE_NAME_CASE(FDIV_VL)
18062   NODE_NAME_CASE(FNEG_VL)
18063   NODE_NAME_CASE(FABS_VL)
18064   NODE_NAME_CASE(FSQRT_VL)
18065   NODE_NAME_CASE(FCLASS_VL)
18066   NODE_NAME_CASE(VFMADD_VL)
18067   NODE_NAME_CASE(VFNMADD_VL)
18068   NODE_NAME_CASE(VFMSUB_VL)
18069   NODE_NAME_CASE(VFNMSUB_VL)
18070   NODE_NAME_CASE(VFWMADD_VL)
18071   NODE_NAME_CASE(VFWNMADD_VL)
18072   NODE_NAME_CASE(VFWMSUB_VL)
18073   NODE_NAME_CASE(VFWNMSUB_VL)
18074   NODE_NAME_CASE(FCOPYSIGN_VL)
18075   NODE_NAME_CASE(SMIN_VL)
18076   NODE_NAME_CASE(SMAX_VL)
18077   NODE_NAME_CASE(UMIN_VL)
18078   NODE_NAME_CASE(UMAX_VL)
18079   NODE_NAME_CASE(BITREVERSE_VL)
18080   NODE_NAME_CASE(BSWAP_VL)
18081   NODE_NAME_CASE(CTLZ_VL)
18082   NODE_NAME_CASE(CTTZ_VL)
18083   NODE_NAME_CASE(CTPOP_VL)
18084   NODE_NAME_CASE(VFMIN_VL)
18085   NODE_NAME_CASE(VFMAX_VL)
18086   NODE_NAME_CASE(MULHS_VL)
18087   NODE_NAME_CASE(MULHU_VL)
18088   NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
18089   NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
18090   NODE_NAME_CASE(VFCVT_RM_X_F_VL)
18091   NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
18092   NODE_NAME_CASE(VFCVT_X_F_VL)
18093   NODE_NAME_CASE(VFCVT_XU_F_VL)
18094   NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
18095   NODE_NAME_CASE(SINT_TO_FP_VL)
18096   NODE_NAME_CASE(UINT_TO_FP_VL)
18097   NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
18098   NODE_NAME_CASE(VFCVT_RM_F_X_VL)
18099   NODE_NAME_CASE(FP_EXTEND_VL)
18100   NODE_NAME_CASE(FP_ROUND_VL)
18101   NODE_NAME_CASE(STRICT_FADD_VL)
18102   NODE_NAME_CASE(STRICT_FSUB_VL)
18103   NODE_NAME_CASE(STRICT_FMUL_VL)
18104   NODE_NAME_CASE(STRICT_FDIV_VL)
18105   NODE_NAME_CASE(STRICT_FSQRT_VL)
18106   NODE_NAME_CASE(STRICT_VFMADD_VL)
18107   NODE_NAME_CASE(STRICT_VFNMADD_VL)
18108   NODE_NAME_CASE(STRICT_VFMSUB_VL)
18109   NODE_NAME_CASE(STRICT_VFNMSUB_VL)
18110   NODE_NAME_CASE(STRICT_FP_ROUND_VL)
18111   NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
18112   NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
18113   NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
18114   NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
18115   NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
18116   NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
18117   NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
18118   NODE_NAME_CASE(STRICT_FSETCC_VL)
18119   NODE_NAME_CASE(STRICT_FSETCCS_VL)
18120   NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
18121   NODE_NAME_CASE(VWMUL_VL)
18122   NODE_NAME_CASE(VWMULU_VL)
18123   NODE_NAME_CASE(VWMULSU_VL)
18124   NODE_NAME_CASE(VWADD_VL)
18125   NODE_NAME_CASE(VWADDU_VL)
18126   NODE_NAME_CASE(VWSUB_VL)
18127   NODE_NAME_CASE(VWSUBU_VL)
18128   NODE_NAME_CASE(VWADD_W_VL)
18129   NODE_NAME_CASE(VWADDU_W_VL)
18130   NODE_NAME_CASE(VWSUB_W_VL)
18131   NODE_NAME_CASE(VWSUBU_W_VL)
18132   NODE_NAME_CASE(VWSLL_VL)
18133   NODE_NAME_CASE(VFWMUL_VL)
18134   NODE_NAME_CASE(VFWADD_VL)
18135   NODE_NAME_CASE(VFWSUB_VL)
18136   NODE_NAME_CASE(VFWADD_W_VL)
18137   NODE_NAME_CASE(VFWSUB_W_VL)
18138   NODE_NAME_CASE(VWMACC_VL)
18139   NODE_NAME_CASE(VWMACCU_VL)
18140   NODE_NAME_CASE(VWMACCSU_VL)
18141   NODE_NAME_CASE(VNSRL_VL)
18142   NODE_NAME_CASE(SETCC_VL)
18143   NODE_NAME_CASE(VSELECT_VL)
18144   NODE_NAME_CASE(VP_MERGE_VL)
18145   NODE_NAME_CASE(VMAND_VL)
18146   NODE_NAME_CASE(VMOR_VL)
18147   NODE_NAME_CASE(VMXOR_VL)
18148   NODE_NAME_CASE(VMCLR_VL)
18149   NODE_NAME_CASE(VMSET_VL)
18150   NODE_NAME_CASE(VRGATHER_VX_VL)
18151   NODE_NAME_CASE(VRGATHER_VV_VL)
18152   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
18153   NODE_NAME_CASE(VSEXT_VL)
18154   NODE_NAME_CASE(VZEXT_VL)
18155   NODE_NAME_CASE(VCPOP_VL)
18156   NODE_NAME_CASE(VFIRST_VL)
18157   NODE_NAME_CASE(READ_CSR)
18158   NODE_NAME_CASE(WRITE_CSR)
18159   NODE_NAME_CASE(SWAP_CSR)
18160   NODE_NAME_CASE(CZERO_EQZ)
18161   NODE_NAME_CASE(CZERO_NEZ)
18162   }
18163   // clang-format on
18164   return nullptr;
18165 #undef NODE_NAME_CASE
18166 }
18167
18168 /// getConstraintType - Given a constraint letter, return the type of
18169 /// constraint it is for this target.
18170 RISCVTargetLowering::ConstraintType
18171 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
18172   if (Constraint.size() == 1) {
18173     switch (Constraint[0]) {
18174     default:
18175       break;
18176     case 'f':
18177       return C_RegisterClass;
18178     case 'I':
18179     case 'J':
18180     case 'K':
18181       return C_Immediate;
18182     case 'A':
18183       return C_Memory;
18184     case 'S': // A symbolic address
18185       return C_Other;
18186     }
18187   } else {
18188     if (Constraint == "vr" || Constraint == "vm")
18189       return C_RegisterClass;
18190   }
18191   return TargetLowering::getConstraintType(Constraint);
18192 }
18193
18194 std::pair<unsigned, const TargetRegisterClass *>
18195 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
18196                                                   StringRef Constraint,
18197                                                   MVT VT) const {
18198   // First, see if this is a constraint that directly corresponds to a RISC-V
18199   // register class.
18200   if (Constraint.size() == 1) {
18201     switch (Constraint[0]) {
18202     case 'r':
18203       // TODO: Support fixed vectors up to XLen for P extension?
18204       if (VT.isVector())
18205         break;
18206       return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
18207     case 'f':
18208       if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
18209         return std::make_pair(0U, &RISCV::FPR16RegClass);
18210       if (Subtarget.hasStdExtF() && VT == MVT::f32)
18211         return std::make_pair(0U, &RISCV::FPR32RegClass);
18212       if (Subtarget.hasStdExtD() && VT == MVT::f64)
18213         return std::make_pair(0U, &RISCV::FPR64RegClass);
18214       break;
18215     default:
18216       break;
18217     }
18218   } else if (Constraint == "vr") {
18219     for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
18220                            &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
18221       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
18222         return std::make_pair(0U, RC);
18223     }
18224   } else if (Constraint == "vm") {
18225     if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
18226       return std::make_pair(0U, &RISCV::VMV0RegClass);
18227   }
18228
18229   // Clang will correctly decode the usage of register name aliases into their
18230   // official names. However, other frontends like `rustc` do not. This allows
18231   // users of these frontends to use the ABI names for registers in LLVM-style
18232   // register constraints.
18233   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
18234                                .Case("{zero}", RISCV::X0)
18235                                .Case("{ra}", RISCV::X1)
18236                                .Case("{sp}", RISCV::X2)
18237                                .Case("{gp}", RISCV::X3)
18238                                .Case("{tp}", RISCV::X4)
18239                                .Case("{t0}", RISCV::X5)
18240                                .Case("{t1}", RISCV::X6)
18241                                .Case("{t2}", RISCV::X7)
18242                                .Cases("{s0}", "{fp}", RISCV::X8)
18243                                .Case("{s1}", RISCV::X9)
18244                                .Case("{a0}", RISCV::X10)
18245                                .Case("{a1}", RISCV::X11)
18246                                .Case("{a2}", RISCV::X12)
18247                                .Case("{a3}", RISCV::X13)
18248                                .Case("{a4}", RISCV::X14)
18249                                .Case("{a5}", RISCV::X15)
18250                                .Case("{a6}", RISCV::X16)
18251                                .Case("{a7}", RISCV::X17)
18252                                .Case("{s2}", RISCV::X18)
18253                                .Case("{s3}", RISCV::X19)
18254                                .Case("{s4}", RISCV::X20)
18255                                .Case("{s5}", RISCV::X21)
18256                                .Case("{s6}", RISCV::X22)
18257                                .Case("{s7}", RISCV::X23)
18258                                .Case("{s8}", RISCV::X24)
18259                                .Case("{s9}", RISCV::X25)
18260                                .Case("{s10}", RISCV::X26)
18261                                .Case("{s11}", RISCV::X27)
18262                                .Case("{t3}", RISCV::X28)
18263                                .Case("{t4}", RISCV::X29)
18264                                .Case("{t5}", RISCV::X30)
18265                                .Case("{t6}", RISCV::X31)
18266                                .Default(RISCV::NoRegister);
18267   if (XRegFromAlias != RISCV::NoRegister)
18268     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
18269
18270   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
18271   // TableGen record rather than the AsmName to choose registers for InlineAsm
18272   // constraints, plus we want to match those names to the widest floating point
18273   // register type available, manually select floating point registers here.
18274   //
18275   // The second case is the ABI name of the register, so that frontends can also
18276   // use the ABI names in register constraint lists.
18277   if (Subtarget.hasStdExtF()) {
18278     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
18279                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
18280                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
18281                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
18282                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
18283                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
18284                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
18285                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
18286                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
18287                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
18288                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
18289                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
18290                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
18291                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
18292                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
18293                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
18294                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
18295                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
18296                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
18297                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
18298                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
18299                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
18300                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
18301                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
18302                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
18303                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
18304                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
18305                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
18306                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
18307                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
18308                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
18309                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
18310                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
18311                         .Default(RISCV::NoRegister);
18312     if (FReg != RISCV::NoRegister) {
18313       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
18314       if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
18315         unsigned RegNo = FReg - RISCV::F0_F;
18316         unsigned DReg = RISCV::F0_D + RegNo;
18317         return std::make_pair(DReg, &RISCV::FPR64RegClass);
18318       }
18319       if (VT == MVT::f32 || VT == MVT::Other)
18320         return std::make_pair(FReg, &RISCV::FPR32RegClass);
18321       if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) {
18322         unsigned RegNo = FReg - RISCV::F0_F;
18323         unsigned HReg = RISCV::F0_H + RegNo;
18324         return std::make_pair(HReg, &RISCV::FPR16RegClass);
18325       }
18326     }
18327   }
18328
18329   if (Subtarget.hasVInstructions()) {
18330     Register VReg = StringSwitch<Register>(Constraint.lower())
18331                         .Case("{v0}", RISCV::V0)
18332                         .Case("{v1}", RISCV::V1)
18333                         .Case("{v2}", RISCV::V2)
18334                         .Case("{v3}", RISCV::V3)
18335                         .Case("{v4}", RISCV::V4)
18336                         .Case("{v5}", RISCV::V5)
18337                         .Case("{v6}", RISCV::V6)
18338                         .Case("{v7}", RISCV::V7)
18339                         .Case("{v8}", RISCV::V8)
18340                         .Case("{v9}", RISCV::V9)
18341                         .Case("{v10}", RISCV::V10)
18342                         .Case("{v11}", RISCV::V11)
18343                         .Case("{v12}", RISCV::V12)
18344                         .Case("{v13}", RISCV::V13)
18345                         .Case("{v14}", RISCV::V14)
18346                         .Case("{v15}", RISCV::V15)
18347                         .Case("{v16}", RISCV::V16)
18348                         .Case("{v17}", RISCV::V17)
18349                         .Case("{v18}", RISCV::V18)
18350                         .Case("{v19}", RISCV::V19)
18351                         .Case("{v20}", RISCV::V20)
18352                         .Case("{v21}", RISCV::V21)
18353                         .Case("{v22}", RISCV::V22)
18354                         .Case("{v23}", RISCV::V23)
18355                         .Case("{v24}", RISCV::V24)
18356                         .Case("{v25}", RISCV::V25)
18357                         .Case("{v26}", RISCV::V26)
18358                         .Case("{v27}", RISCV::V27)
18359                         .Case("{v28}", RISCV::V28)
18360                         .Case("{v29}", RISCV::V29)
18361                         .Case("{v30}", RISCV::V30)
18362                         .Case("{v31}", RISCV::V31)
18363                         .Default(RISCV::NoRegister);
18364     if (VReg != RISCV::NoRegister) {
18365       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
18366         return std::make_pair(VReg, &RISCV::VMRegClass);
18367       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
18368         return std::make_pair(VReg, &RISCV::VRRegClass);
18369       for (const auto *RC :
18370            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
18371         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
18372           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
18373           return std::make_pair(VReg, RC);
18374         }
18375       }
18376     }
18377   }
18378
18379   std::pair<Register, const TargetRegisterClass *> Res =
18380       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
18381
18382   // If we picked one of the Zfinx register classes, remap it to the GPR class.
18383   // FIXME: When Zfinx is supported in CodeGen this will need to take the
18384   // Subtarget into account.
18385   if (Res.second == &RISCV::GPRF16RegClass ||
18386       Res.second == &RISCV::GPRF32RegClass ||
18387       Res.second == &RISCV::GPRPF64RegClass)
18388     return std::make_pair(Res.first, &RISCV::GPRRegClass);
18389
18390   return Res;
18391 }
18392
18393 InlineAsm::ConstraintCode
18394 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
18395   // Currently only support length 1 constraints.
18396   if (ConstraintCode.size() == 1) {
18397     switch (ConstraintCode[0]) {
18398     case 'A':
18399       return InlineAsm::ConstraintCode::A;
18400     default:
18401       break;
18402     }
18403   }
18404
18405   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
18406 }
18407
18408 void RISCVTargetLowering::LowerAsmOperandForConstraint(
18409     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
18410     SelectionDAG &DAG) const {
18411   // Currently only support length 1 constraints.
18412   if (Constraint.size() == 1) {
18413     switch (Constraint[0]) {
18414     case 'I':
18415       // Validate & create a 12-bit signed immediate operand.
18416       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
18417         uint64_t CVal = C->getSExtValue();
18418         if (isInt<12>(CVal))
18419           Ops.push_back(
18420               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
18421       }
18422       return;
18423     case 'J':
18424       // Validate & create an integer zero operand.
18425       if (isNullConstant(Op))
18426         Ops.push_back(
18427             DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
18428       return;
18429     case 'K':
18430       // Validate & create a 5-bit unsigned immediate operand.
18431       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
18432         uint64_t CVal = C->getZExtValue();
18433         if (isUInt<5>(CVal))
18434           Ops.push_back(
18435               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
18436       }
18437       return;
18438     case 'S':
18439       if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
18440         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
18441                                                  GA->getValueType(0)));
18442       } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
18443         Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
18444                                                 BA->getValueType(0)));
18445       }
18446       return;
18447     default:
18448       break;
18449     }
18450   }
18451   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
18452 }
18453
18454 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
18455                                                    Instruction *Inst,
18456                                                    AtomicOrdering Ord) const {
18457   if (Subtarget.hasStdExtZtso()) {
18458     if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
18459       return Builder.CreateFence(Ord);
18460     return nullptr;
18461   }
18462
18463   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
18464     return Builder.CreateFence(Ord);
18465   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
18466     return Builder.CreateFence(AtomicOrdering::Release);
18467   return nullptr;
18468 }
18469
18470 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
18471                                                     Instruction *Inst,
18472                                                     AtomicOrdering Ord) const {
18473   if (Subtarget.hasStdExtZtso()) {
18474     if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
18475       return Builder.CreateFence(Ord);
18476     return nullptr;
18477   }
18478
18479   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
18480     return Builder.CreateFence(AtomicOrdering::Acquire);
18481   if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
18482       Ord == AtomicOrdering::SequentiallyConsistent)
18483     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
18484   return nullptr;
18485 }
18486
18487 TargetLowering::AtomicExpansionKind
18488 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
18489   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
18490   // point operations can't be used in an lr/sc sequence without breaking the
18491   // forward-progress guarantee.
18492   if (AI->isFloatingPointOperation() ||
18493       AI->getOperation() == AtomicRMWInst::UIncWrap ||
18494       AI->getOperation() == AtomicRMWInst::UDecWrap)
18495     return AtomicExpansionKind::CmpXChg;
18496
18497   // Don't expand forced atomics, we want to have __sync libcalls instead.
18498   if (Subtarget.hasForcedAtomics())
18499     return AtomicExpansionKind::None;
18500
18501   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18502   if (Size == 8 || Size == 16)
18503     return AtomicExpansionKind::MaskedIntrinsic;
18504   return AtomicExpansionKind::None;
18505 }
18506
18507 static Intrinsic::ID
18508 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
18509   if (XLen == 32) {
18510     switch (BinOp) {
18511     default:
18512       llvm_unreachable("Unexpected AtomicRMW BinOp");
18513     case AtomicRMWInst::Xchg:
18514       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
18515     case AtomicRMWInst::Add:
18516       return Intrinsic::riscv_masked_atomicrmw_add_i32;
18517     case AtomicRMWInst::Sub:
18518       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
18519     case AtomicRMWInst::Nand:
18520       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
18521     case AtomicRMWInst::Max:
18522       return Intrinsic::riscv_masked_atomicrmw_max_i32;
18523     case AtomicRMWInst::Min:
18524       return Intrinsic::riscv_masked_atomicrmw_min_i32;
18525     case AtomicRMWInst::UMax:
18526       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
18527     case AtomicRMWInst::UMin:
18528       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
18529     }
18530   }
18531
18532   if (XLen == 64) {
18533     switch (BinOp) {
18534     default:
18535       llvm_unreachable("Unexpected AtomicRMW BinOp");
18536     case AtomicRMWInst::Xchg:
18537       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
18538     case AtomicRMWInst::Add:
18539       return Intrinsic::riscv_masked_atomicrmw_add_i64;
18540     case AtomicRMWInst::Sub:
18541       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
18542     case AtomicRMWInst::Nand:
18543       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
18544     case AtomicRMWInst::Max:
18545       return Intrinsic::riscv_masked_atomicrmw_max_i64;
18546     case AtomicRMWInst::Min:
18547       return Intrinsic::riscv_masked_atomicrmw_min_i64;
18548     case AtomicRMWInst::UMax:
18549       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
18550     case AtomicRMWInst::UMin:
18551       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
18552     }
18553   }
18554
18555   llvm_unreachable("Unexpected XLen\n");
18556 }
18557
18558 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
18559     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18560     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18561   // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
18562   // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
18563   // mask, as this produces better code than the LR/SC loop emitted by
18564   // int_riscv_masked_atomicrmw_xchg.
18565   if (AI->getOperation() == AtomicRMWInst::Xchg &&
18566       isa<ConstantInt>(AI->getValOperand())) {
18567     ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
18568     if (CVal->isZero())
18569       return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
18570                                      Builder.CreateNot(Mask, "Inv_Mask"),
18571                                      AI->getAlign(), Ord);
18572     if (CVal->isMinusOne())
18573       return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
18574                                      AI->getAlign(), Ord);
18575   }
18576
18577   unsigned XLen = Subtarget.getXLen();
18578   Value *Ordering =
18579       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
18580   Type *Tys[] = {AlignedAddr->getType()};
18581   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
18582       AI->getModule(),
18583       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
18584
18585   if (XLen == 64) {
18586     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
18587     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
18588     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
18589   }
18590
18591   Value *Result;
18592
18593   // Must pass the shift amount needed to sign extend the loaded value prior
18594   // to performing a signed comparison for min/max. ShiftAmt is the number of
18595   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
18596   // is the number of bits to left+right shift the value in order to
18597   // sign-extend.
18598   if (AI->getOperation() == AtomicRMWInst::Min ||
18599       AI->getOperation() == AtomicRMWInst::Max) {
18600     const DataLayout &DL = AI->getModule()->getDataLayout();
18601     unsigned ValWidth =
18602         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
18603     Value *SextShamt =
18604         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
18605     Result = Builder.CreateCall(LrwOpScwLoop,
18606                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
18607   } else {
18608     Result =
18609         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
18610   }
18611
18612   if (XLen == 64)
18613     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
18614   return Result;
18615 }
18616
18617 TargetLowering::AtomicExpansionKind
18618 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
18619     AtomicCmpXchgInst *CI) const {
18620   // Don't expand forced atomics, we want to have __sync libcalls instead.
18621   if (Subtarget.hasForcedAtomics())
18622     return AtomicExpansionKind::None;
18623
18624   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
18625   if (Size == 8 || Size == 16)
18626     return AtomicExpansionKind::MaskedIntrinsic;
18627   return AtomicExpansionKind::None;
18628 }
18629
18630 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18631     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18632     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18633   unsigned XLen = Subtarget.getXLen();
18634   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
18635   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
18636   if (XLen == 64) {
18637     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
18638     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
18639     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
18640     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
18641   }
18642   Type *Tys[] = {AlignedAddr->getType()};
18643   Function *MaskedCmpXchg =
18644       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
18645   Value *Result = Builder.CreateCall(
18646       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
18647   if (XLen == 64)
18648     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
18649   return Result;
18650 }
18651
18652 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
18653                                                         EVT DataVT) const {
18654   // We have indexed loads for all legal index types.  Indices are always
18655   // zero extended
18656   return Extend.getOpcode() == ISD::ZERO_EXTEND &&
18657     isTypeLegal(Extend.getValueType()) &&
18658     isTypeLegal(Extend.getOperand(0).getValueType());
18659 }
18660
18661 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
18662                                                EVT VT) const {
18663   if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
18664     return false;
18665
18666   switch (FPVT.getSimpleVT().SimpleTy) {
18667   case MVT::f16:
18668     return Subtarget.hasStdExtZfhOrZfhmin();
18669   case MVT::f32:
18670     return Subtarget.hasStdExtF();
18671   case MVT::f64:
18672     return Subtarget.hasStdExtD();
18673   default:
18674     return false;
18675   }
18676 }
18677
18678 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
18679   // If we are using the small code model, we can reduce size of jump table
18680   // entry to 4 bytes.
18681   if (Subtarget.is64Bit() && !isPositionIndependent() &&
18682       getTargetMachine().getCodeModel() == CodeModel::Small) {
18683     return MachineJumpTableInfo::EK_Custom32;
18684   }
18685   return TargetLowering::getJumpTableEncoding();
18686 }
18687
18688 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
18689     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
18690     unsigned uid, MCContext &Ctx) const {
18691   assert(Subtarget.is64Bit() && !isPositionIndependent() &&
18692          getTargetMachine().getCodeModel() == CodeModel::Small);
18693   return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
18694 }
18695
18696 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
18697   // We define vscale to be VLEN/RVVBitsPerBlock.  VLEN is always a power
18698   // of two >= 64, and RVVBitsPerBlock is 64.  Thus, vscale must be
18699   // a power of two as well.
18700   // FIXME: This doesn't work for zve32, but that's already broken
18701   // elsewhere for the same reason.
18702   assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
18703   static_assert(RISCV::RVVBitsPerBlock == 64,
18704                 "RVVBitsPerBlock changed, audit needed");
18705   return true;
18706 }
18707
18708 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
18709                                                  SDValue &Offset,
18710                                                  ISD::MemIndexedMode &AM,
18711                                                  bool &IsInc,
18712                                                  SelectionDAG &DAG) const {
18713   // Target does not support indexed loads.
18714   if (!Subtarget.hasVendorXTHeadMemIdx())
18715     return false;
18716
18717   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
18718     return false;
18719
18720   Base = Op->getOperand(0);
18721   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
18722     int64_t RHSC = RHS->getSExtValue();
18723     if (Op->getOpcode() == ISD::SUB)
18724       RHSC = -(uint64_t)RHSC;
18725
18726     // The constants that can be encoded in the THeadMemIdx instructions
18727     // are of the form (sign_extend(imm5) << imm2).
18728     bool isLegalIndexedOffset = false;
18729     for (unsigned i = 0; i < 4; i++)
18730       if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
18731         isLegalIndexedOffset = true;
18732         break;
18733       }
18734
18735     if (!isLegalIndexedOffset)
18736       return false;
18737
18738     IsInc = (Op->getOpcode() == ISD::ADD);
18739     Offset = Op->getOperand(1);
18740     return true;
18741   }
18742
18743   return false;
18744 }
18745
18746 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
18747                                                     SDValue &Offset,
18748                                                     ISD::MemIndexedMode &AM,
18749                                                     SelectionDAG &DAG) const {
18750   EVT VT;
18751   SDValue Ptr;
18752   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18753     VT = LD->getMemoryVT();
18754     Ptr = LD->getBasePtr();
18755   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18756     VT = ST->getMemoryVT();
18757     Ptr = ST->getBasePtr();
18758   } else
18759     return false;
18760
18761   bool IsInc;
18762   if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
18763     return false;
18764
18765   AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
18766   return true;
18767 }
18768
18769 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
18770                                                      SDValue &Base,
18771                                                      SDValue &Offset,
18772                                                      ISD::MemIndexedMode &AM,
18773                                                      SelectionDAG &DAG) const {
18774   EVT VT;
18775   SDValue Ptr;
18776   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18777     VT = LD->getMemoryVT();
18778     Ptr = LD->getBasePtr();
18779   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18780     VT = ST->getMemoryVT();
18781     Ptr = ST->getBasePtr();
18782   } else
18783     return false;
18784
18785   bool IsInc;
18786   if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
18787     return false;
18788   // Post-indexing updates the base, so it's not a valid transform
18789   // if that's not the same as the load's pointer.
18790   if (Ptr != Base)
18791     return false;
18792
18793   AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
18794   return true;
18795 }
18796
18797 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
18798                                                      EVT VT) const {
18799   EVT SVT = VT.getScalarType();
18800
18801   if (!SVT.isSimple())
18802     return false;
18803
18804   switch (SVT.getSimpleVT().SimpleTy) {
18805   case MVT::f16:
18806     return VT.isVector() ? Subtarget.hasVInstructionsF16()
18807                          : Subtarget.hasStdExtZfhOrZhinx();
18808   case MVT::f32:
18809     return Subtarget.hasStdExtFOrZfinx();
18810   case MVT::f64:
18811     return Subtarget.hasStdExtDOrZdinx();
18812   default:
18813     break;
18814   }
18815
18816   return false;
18817 }
18818
18819 Register RISCVTargetLowering::getExceptionPointerRegister(
18820     const Constant *PersonalityFn) const {
18821   return RISCV::X10;
18822 }
18823
18824 Register RISCVTargetLowering::getExceptionSelectorRegister(
18825     const Constant *PersonalityFn) const {
18826   return RISCV::X11;
18827 }
18828
18829 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
18830   // Return false to suppress the unnecessary extensions if the LibCall
18831   // arguments or return value is a float narrower than XLEN on a soft FP ABI.
18832   if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
18833                                   Type.getSizeInBits() < Subtarget.getXLen()))
18834     return false;
18835
18836   return true;
18837 }
18838
18839 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
18840   if (Subtarget.is64Bit() && Type == MVT::i32)
18841     return true;
18842
18843   return IsSigned;
18844 }
18845
18846 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
18847                                                  SDValue C) const {
18848   // Check integral scalar types.
18849   const bool HasExtMOrZmmul =
18850       Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
18851   if (!VT.isScalarInteger())
18852     return false;
18853
18854   // Omit the optimization if the sub target has the M extension and the data
18855   // size exceeds XLen.
18856   if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
18857     return false;
18858
18859   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18860     // Break the MUL to a SLLI and an ADD/SUB.
18861     const APInt &Imm = ConstNode->getAPIntValue();
18862     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
18863         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
18864       return true;
18865
18866     // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
18867     if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
18868         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
18869          (Imm - 8).isPowerOf2()))
18870       return true;
18871
18872     // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
18873     // a pair of LUI/ADDI.
18874     if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
18875         ConstNode->hasOneUse()) {
18876       APInt ImmS = Imm.ashr(Imm.countr_zero());
18877       if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
18878           (1 - ImmS).isPowerOf2())
18879         return true;
18880     }
18881   }
18882
18883   return false;
18884 }
18885
18886 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
18887                                                       SDValue ConstNode) const {
18888   // Let the DAGCombiner decide for vectors.
18889   EVT VT = AddNode.getValueType();
18890   if (VT.isVector())
18891     return true;
18892
18893   // Let the DAGCombiner decide for larger types.
18894   if (VT.getScalarSizeInBits() > Subtarget.getXLen())
18895     return true;
18896
18897   // It is worse if c1 is simm12 while c1*c2 is not.
18898   ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
18899   ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
18900   const APInt &C1 = C1Node->getAPIntValue();
18901   const APInt &C2 = C2Node->getAPIntValue();
18902   if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
18903     return false;
18904
18905   // Default to true and let the DAGCombiner decide.
18906   return true;
18907 }
18908
18909 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
18910     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
18911     unsigned *Fast) const {
18912   if (!VT.isVector()) {
18913     if (Fast)
18914       *Fast = Subtarget.enableUnalignedScalarMem();
18915     return Subtarget.enableUnalignedScalarMem();
18916   }
18917
18918   // All vector implementations must support element alignment
18919   EVT ElemVT = VT.getVectorElementType();
18920   if (Alignment >= ElemVT.getStoreSize()) {
18921     if (Fast)
18922       *Fast = 1;
18923     return true;
18924   }
18925
18926   // Note: We lower an unmasked unaligned vector access to an equally sized
18927   // e8 element type access.  Given this, we effectively support all unmasked
18928   // misaligned accesses.  TODO: Work through the codegen implications of
18929   // allowing such accesses to be formed, and considered fast.
18930   if (Fast)
18931     *Fast = Subtarget.enableUnalignedVectorMem();
18932   return Subtarget.enableUnalignedVectorMem();
18933 }
18934
18935
18936 EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
18937                                              const AttributeList &FuncAttributes) const {
18938   if (!Subtarget.hasVInstructions())
18939     return MVT::Other;
18940
18941   if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
18942     return MVT::Other;
18943
18944   // We use LMUL1 memory operations here for a non-obvious reason.  Our caller
18945   // has an expansion threshold, and we want the number of hardware memory
18946   // operations to correspond roughly to that threshold.  LMUL>1 operations
18947   // are typically expanded linearly internally, and thus correspond to more
18948   // than one actual memory operation.  Note that store merging and load
18949   // combining will typically form larger LMUL operations from the LMUL1
18950   // operations emitted here, and that's okay because combining isn't
18951   // introducing new memory operations; it's just merging existing ones.
18952   const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
18953   if (Op.size() < MinVLenInBytes)
18954     // TODO: Figure out short memops.  For the moment, do the default thing
18955     // which ends up using scalar sequences.
18956     return MVT::Other;
18957
18958   // Prefer i8 for non-zero memset as it allows us to avoid materializing
18959   // a large scalar constant and instead use vmv.v.x/i to do the
18960   // broadcast.  For everything else, prefer ELenVT to minimize VL and thus
18961   // maximize the chance we can encode the size in the vsetvli.
18962   MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
18963   MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
18964
18965   // Do we have sufficient alignment for our preferred VT?  If not, revert
18966   // to largest size allowed by our alignment criteria.
18967   if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
18968     Align RequiredAlign(PreferredVT.getStoreSize());
18969     if (Op.isFixedDstAlign())
18970       RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
18971     if (Op.isMemcpy())
18972       RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
18973     PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
18974   }
18975   return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
18976 }
18977
18978 bool RISCVTargetLowering::splitValueIntoRegisterParts(
18979     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18980     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18981   bool IsABIRegCopy = CC.has_value();
18982   EVT ValueVT = Val.getValueType();
18983   if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
18984       PartVT == MVT::f32) {
18985     // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
18986     // nan, and cast to f32.
18987     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
18988     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
18989     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
18990                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
18991     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18992     Parts[0] = Val;
18993     return true;
18994   }
18995
18996   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
18997     LLVMContext &Context = *DAG.getContext();
18998     EVT ValueEltVT = ValueVT.getVectorElementType();
18999     EVT PartEltVT = PartVT.getVectorElementType();
19000     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
19001     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
19002     if (PartVTBitSize % ValueVTBitSize == 0) {
19003       assert(PartVTBitSize >= ValueVTBitSize);
19004       // If the element types are different, bitcast to the same element type of
19005       // PartVT first.
19006       // Give an example here, we want copy a <vscale x 1 x i8> value to
19007       // <vscale x 4 x i16>.
19008       // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
19009       // subvector, then we can bitcast to <vscale x 4 x i16>.
19010       if (ValueEltVT != PartEltVT) {
19011         if (PartVTBitSize > ValueVTBitSize) {
19012           unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
19013           assert(Count != 0 && "The number of element should not be zero.");
19014           EVT SameEltTypeVT =
19015               EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
19016           Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
19017                             DAG.getUNDEF(SameEltTypeVT), Val,
19018                             DAG.getVectorIdxConstant(0, DL));
19019         }
19020         Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
19021       } else {
19022         Val =
19023             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
19024                         Val, DAG.getVectorIdxConstant(0, DL));
19025       }
19026       Parts[0] = Val;
19027       return true;
19028     }
19029   }
19030   return false;
19031 }
19032
19033 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
19034     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
19035     MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
19036   bool IsABIRegCopy = CC.has_value();
19037   if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
19038       PartVT == MVT::f32) {
19039     SDValue Val = Parts[0];
19040
19041     // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
19042     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
19043     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
19044     Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
19045     return Val;
19046   }
19047
19048   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
19049     LLVMContext &Context = *DAG.getContext();
19050     SDValue Val = Parts[0];
19051     EVT ValueEltVT = ValueVT.getVectorElementType();
19052     EVT PartEltVT = PartVT.getVectorElementType();
19053     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
19054     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
19055     if (PartVTBitSize % ValueVTBitSize == 0) {
19056       assert(PartVTBitSize >= ValueVTBitSize);
19057       EVT SameEltTypeVT = ValueVT;
19058       // If the element types are different, convert it to the same element type
19059       // of PartVT.
19060       // Give an example here, we want copy a <vscale x 1 x i8> value from
19061       // <vscale x 4 x i16>.
19062       // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
19063       // then we can extract <vscale x 1 x i8>.
19064       if (ValueEltVT != PartEltVT) {
19065         unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
19066         assert(Count != 0 && "The number of element should not be zero.");
19067         SameEltTypeVT =
19068             EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
19069         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
19070       }
19071       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
19072                         DAG.getVectorIdxConstant(0, DL));
19073       return Val;
19074     }
19075   }
19076   return SDValue();
19077 }
19078
19079 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
19080   // When aggressively optimizing for code size, we prefer to use a div
19081   // instruction, as it is usually smaller than the alternative sequence.
19082   // TODO: Add vector division?
19083   bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
19084   return OptSize && !VT.isVector();
19085 }
19086
19087 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
19088   // Scalarize zero_ext and sign_ext might stop match to widening instruction in
19089   // some situation.
19090   unsigned Opc = N->getOpcode();
19091   if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
19092     return false;
19093   return true;
19094 }
19095
19096 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
19097   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
19098   Function *ThreadPointerFunc =
19099       Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
19100   return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
19101                                 IRB.CreateCall(ThreadPointerFunc), Offset);
19102 }
19103
19104 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
19105   // Fuchsia provides a fixed TLS slot for the stack cookie.
19106   // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
19107   if (Subtarget.isTargetFuchsia())
19108     return useTpOffset(IRB, -0x10);
19109
19110   return TargetLowering::getIRStackGuard(IRB);
19111 }
19112
19113 bool RISCVTargetLowering::isLegalInterleavedAccessType(
19114     VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
19115     const DataLayout &DL) const {
19116   EVT VT = getValueType(DL, VTy);
19117   // Don't lower vlseg/vsseg for vector types that can't be split.
19118   if (!isTypeLegal(VT))
19119     return false;
19120
19121   if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
19122       !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
19123                                       Alignment))
19124     return false;
19125
19126   MVT ContainerVT = VT.getSimpleVT();
19127
19128   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19129     if (!Subtarget.useRVVForFixedLengthVectors())
19130       return false;
19131     // Sometimes the interleaved access pass picks up splats as interleaves of
19132     // one element. Don't lower these.
19133     if (FVTy->getNumElements() < 2)
19134       return false;
19135
19136     ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());
19137   }
19138
19139   // Need to make sure that EMUL * NFIELDS ≤ 8
19140   auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
19141   if (Fractional)
19142     return true;
19143   return Factor * LMUL <= 8;
19144 }
19145
19146 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
19147                                                   Align Alignment) const {
19148   if (!Subtarget.hasVInstructions())
19149     return false;
19150
19151   // Only support fixed vectors if we know the minimum vector size.
19152   if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
19153     return false;
19154
19155   EVT ScalarType = DataType.getScalarType();
19156   if (!isLegalElementTypeForRVV(ScalarType))
19157     return false;
19158
19159   if (!Subtarget.enableUnalignedVectorMem() &&
19160       Alignment < ScalarType.getStoreSize())
19161     return false;
19162
19163   return true;
19164 }
19165
19166 static const Intrinsic::ID FixedVlsegIntrIds[] = {
19167     Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
19168     Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
19169     Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
19170     Intrinsic::riscv_seg8_load};
19171
19172 /// Lower an interleaved load into a vlsegN intrinsic.
19173 ///
19174 /// E.g. Lower an interleaved load (Factor = 2):
19175 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
19176 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>  ; Extract even elements
19177 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>  ; Extract odd elements
19178 ///
19179 /// Into:
19180 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
19181 ///                                        %ptr, i64 4)
19182 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
19183 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
19184 bool RISCVTargetLowering::lowerInterleavedLoad(
19185     LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
19186     ArrayRef<unsigned> Indices, unsigned Factor) const {
19187   IRBuilder<> Builder(LI);
19188
19189   auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
19190   if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
19191                                     LI->getPointerAddressSpace(),
19192                                     LI->getModule()->getDataLayout()))
19193     return false;
19194
19195   auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
19196
19197   Function *VlsegNFunc =
19198       Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2],
19199                                 {VTy, LI->getPointerOperandType(), XLenTy});
19200
19201   Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
19202
19203   CallInst *VlsegN =
19204       Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
19205
19206   for (unsigned i = 0; i < Shuffles.size(); i++) {
19207     Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
19208     Shuffles[i]->replaceAllUsesWith(SubVec);
19209   }
19210
19211   return true;
19212 }
19213
19214 static const Intrinsic::ID FixedVssegIntrIds[] = {
19215     Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
19216     Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
19217     Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
19218     Intrinsic::riscv_seg8_store};
19219
19220 /// Lower an interleaved store into a vssegN intrinsic.
19221 ///
19222 /// E.g. Lower an interleaved store (Factor = 3):
19223 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
19224 ///                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
19225 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
19226 ///
19227 /// Into:
19228 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
19229 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
19230 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
19231 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
19232 ///                                              %ptr, i32 4)
19233 ///
19234 /// Note that the new shufflevectors will be removed and we'll only generate one
19235 /// vsseg3 instruction in CodeGen.
19236 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
19237                                                 ShuffleVectorInst *SVI,
19238                                                 unsigned Factor) const {
19239   IRBuilder<> Builder(SI);
19240   auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
19241   // Given SVI : <n*factor x ty>, then VTy : <n x ty>
19242   auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
19243                                    ShuffleVTy->getNumElements() / Factor);
19244   if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
19245                                     SI->getPointerAddressSpace(),
19246                                     SI->getModule()->getDataLayout()))
19247     return false;
19248
19249   auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
19250
19251   Function *VssegNFunc =
19252       Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
19253                                 {VTy, SI->getPointerOperandType(), XLenTy});
19254
19255   auto Mask = SVI->getShuffleMask();
19256   SmallVector<Value *, 10> Ops;
19257
19258   for (unsigned i = 0; i < Factor; i++) {
19259     Value *Shuffle = Builder.CreateShuffleVector(
19260         SVI->getOperand(0), SVI->getOperand(1),
19261         createSequentialMask(Mask[i], VTy->getNumElements(), 0));
19262     Ops.push_back(Shuffle);
19263   }
19264   // This VL should be OK (should be executable in one vsseg instruction,
19265   // potentially under larger LMULs) because we checked that the fixed vector
19266   // type fits in isLegalInterleavedAccessType
19267   Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
19268   Ops.append({SI->getPointerOperand(), VL});
19269
19270   Builder.CreateCall(VssegNFunc, Ops);
19271
19272   return true;
19273 }
19274
19275 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
19276                                                            LoadInst *LI) const {
19277   assert(LI->isSimple());
19278   IRBuilder<> Builder(LI);
19279
19280   // Only deinterleave2 supported at present.
19281   if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
19282     return false;
19283
19284   unsigned Factor = 2;
19285
19286   VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
19287   VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
19288
19289   if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
19290                                     LI->getPointerAddressSpace(),
19291                                     LI->getModule()->getDataLayout()))
19292     return false;
19293
19294   Function *VlsegNFunc;
19295   Value *VL;
19296   Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
19297   SmallVector<Value *, 10> Ops;
19298
19299   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19300     VlsegNFunc = Intrinsic::getDeclaration(
19301         LI->getModule(), FixedVlsegIntrIds[Factor - 2],
19302         {ResVTy, LI->getPointerOperandType(), XLenTy});
19303     VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
19304   } else {
19305     static const Intrinsic::ID IntrIds[] = {
19306         Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
19307         Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
19308         Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
19309         Intrinsic::riscv_vlseg8};
19310
19311     VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
19312                                            {ResVTy, XLenTy});
19313     VL = Constant::getAllOnesValue(XLenTy);
19314     Ops.append(Factor, PoisonValue::get(ResVTy));
19315   }
19316
19317   Ops.append({LI->getPointerOperand(), VL});
19318
19319   Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
19320   DI->replaceAllUsesWith(Vlseg);
19321
19322   return true;
19323 }
19324
19325 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
19326                                                           StoreInst *SI) const {
19327   assert(SI->isSimple());
19328   IRBuilder<> Builder(SI);
19329
19330   // Only interleave2 supported at present.
19331   if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
19332     return false;
19333
19334   unsigned Factor = 2;
19335
19336   VectorType *VTy = cast<VectorType>(II->getType());
19337   VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
19338
19339   if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
19340                                     SI->getPointerAddressSpace(),
19341                                     SI->getModule()->getDataLayout()))
19342     return false;
19343
19344   Function *VssegNFunc;
19345   Value *VL;
19346   Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
19347
19348   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
19349     VssegNFunc = Intrinsic::getDeclaration(
19350         SI->getModule(), FixedVssegIntrIds[Factor - 2],
19351         {InVTy, SI->getPointerOperandType(), XLenTy});
19352     VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
19353   } else {
19354     static const Intrinsic::ID IntrIds[] = {
19355         Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
19356         Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
19357         Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
19358         Intrinsic::riscv_vsseg8};
19359
19360     VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
19361                                            {InVTy, XLenTy});
19362     VL = Constant::getAllOnesValue(XLenTy);
19363   }
19364
19365   Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
19366                                   SI->getPointerOperand(), VL});
19367
19368   return true;
19369 }
19370
19371 MachineInstr *
19372 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
19373                                    MachineBasicBlock::instr_iterator &MBBI,
19374                                    const TargetInstrInfo *TII) const {
19375   assert(MBBI->isCall() && MBBI->getCFIType() &&
19376          "Invalid call instruction for a KCFI check");
19377   assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
19378                       MBBI->getOpcode()));
19379
19380   MachineOperand &Target = MBBI->getOperand(0);
19381   Target.setIsRenamable(false);
19382
19383   return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
19384       .addReg(Target.getReg())
19385       .addImm(MBBI->getCFIType())
19386       .getInstr();
19387 }
19388
19389 #define GET_REGISTER_MATCHER
19390 #include "RISCVGenAsmMatcher.inc"
19391
19392 Register
19393 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
19394                                        const MachineFunction &MF) const {
19395   Register Reg = MatchRegisterAltName(RegName);
19396   if (Reg == RISCV::NoRegister)
19397     Reg = MatchRegisterName(RegName);
19398   if (Reg == RISCV::NoRegister)
19399     report_fatal_error(
19400         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
19401   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
19402   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
19403     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
19404                              StringRef(RegName) + "\"."));
19405   return Reg;
19406 }
19407
19408 MachineMemOperand::Flags
19409 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
19410   const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
19411
19412   if (NontemporalInfo == nullptr)
19413     return MachineMemOperand::MONone;
19414
19415   // 1 for default value work as __RISCV_NTLH_ALL
19416   // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
19417   // 3 -> __RISCV_NTLH_ALL_PRIVATE
19418   // 4 -> __RISCV_NTLH_INNERMOST_SHARED
19419   // 5 -> __RISCV_NTLH_ALL
19420   int NontemporalLevel = 5;
19421   const MDNode *RISCVNontemporalInfo =
19422       I.getMetadata("riscv-nontemporal-domain");
19423   if (RISCVNontemporalInfo != nullptr)
19424     NontemporalLevel =
19425         cast<ConstantInt>(
19426             cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
19427                 ->getValue())
19428             ->getZExtValue();
19429
19430   assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
19431          "RISC-V target doesn't support this non-temporal domain.");
19432
19433   NontemporalLevel -= 2;
19434   MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
19435   if (NontemporalLevel & 0b1)
19436     Flags |= MONontemporalBit0;
19437   if (NontemporalLevel & 0b10)
19438     Flags |= MONontemporalBit1;
19439
19440   return Flags;
19441 }
19442
19443 MachineMemOperand::Flags
19444 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
19445
19446   MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
19447   MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
19448   TargetFlags |= (NodeFlags & MONontemporalBit0);
19449   TargetFlags |= (NodeFlags & MONontemporalBit1);
19450
19451   return TargetFlags;
19452 }
19453
19454 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
19455     const MemSDNode &NodeX, const MemSDNode &NodeY) const {
19456   return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
19457 }
19458
19459 bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
19460   if (VT.isScalableVector())
19461     return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
19462   if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
19463     return true;
19464   return Subtarget.hasStdExtZbb() &&
19465          (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
19466 }
19467
19468 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
19469                                                  ISD::CondCode Cond) const {
19470   return isCtpopFast(VT) ? 0 : 1;
19471 }
19472
19473 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
19474   // We don't support scalable vectors in GISel.
19475   if (Inst.getType()->isScalableTy())
19476     return true;
19477
19478   for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
19479     if (Inst.getOperand(i)->getType()->isScalableTy())
19480       return true;
19481
19482   if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
19483     if (AI->getAllocatedType()->isScalableTy())
19484       return true;
19485   }
19486
19487   return false;
19488 }
19489
19490 namespace llvm::RISCVVIntrinsicsTable {
19491
19492 #define GET_RISCVVIntrinsicsTable_IMPL
19493 #include "RISCVGenSearchableTables.inc"
19494
19495 } // namespace llvm::RISCVVIntrinsicsTable