llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

   1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines an instruction selector for the RISC-V target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "RISCVISelDAGToDAG.h"
  14 #include "MCTargetDesc/RISCVBaseInfo.h"
  15 #include "MCTargetDesc/RISCVMCTargetDesc.h"
  16 #include "MCTargetDesc/RISCVMatInt.h"
  17 #include "RISCVISelLowering.h"
  18 #include "RISCVInstrInfo.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/IR/IntrinsicsRISCV.h"
  21 #include "llvm/Support/Alignment.h"
  22 #include "llvm/Support/Debug.h"
  23 #include "llvm/Support/MathExtras.h"
  24 #include "llvm/Support/raw_ostream.h"
  25
  26 using namespace llvm;
  27
  28 #define DEBUG_TYPE "riscv-isel"
  29 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
  30
  31 static cl::opt<bool> UsePseudoMovImm(
  32     "riscv-use-rematerializable-movimm", cl::Hidden,
  33     cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
  34              "constant materialization"),
  35     cl::init(false));
  36
  37 namespace llvm::RISCV {
  38 #define GET_RISCVVSSEGTable_IMPL
  39 #define GET_RISCVVLSEGTable_IMPL
  40 #define GET_RISCVVLXSEGTable_IMPL
  41 #define GET_RISCVVSXSEGTable_IMPL
  42 #define GET_RISCVVLETable_IMPL
  43 #define GET_RISCVVSETable_IMPL
  44 #define GET_RISCVVLXTable_IMPL
  45 #define GET_RISCVVSXTable_IMPL
  46 #include "RISCVGenSearchableTables.inc"
  47 } // namespace llvm::RISCV
  48
  49 void RISCVDAGToDAGISel::PreprocessISelDAG() {
  50   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  51
  52   bool MadeChange = false;
  53   while (Position != CurDAG->allnodes_begin()) {
  54     SDNode *N = &*--Position;
  55     if (N->use_empty())
  56       continue;
  57
  58     SDValue Result;
  59     switch (N->getOpcode()) {
  60     case ISD::SPLAT_VECTOR: {
  61       // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
  62       // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
  63       MVT VT = N->getSimpleValueType(0);
  64       unsigned Opc =
  65           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
  66       SDLoc DL(N);
  67       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
  68       SDValue Src = N->getOperand(0);
  69       if (VT.isInteger())
  70         Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
  71                               N->getOperand(0));
  72       Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
  73       break;
  74     }
  75     case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
  76       // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
  77       // load. Done after lowering and combining so that we have a chance to
  78       // optimize this to VMV_V_X_VL when the upper bits aren't needed.
  79       assert(N->getNumOperands() == 4 && "Unexpected number of operands");
  80       MVT VT = N->getSimpleValueType(0);
  81       SDValue Passthru = N->getOperand(0);
  82       SDValue Lo = N->getOperand(1);
  83       SDValue Hi = N->getOperand(2);
  84       SDValue VL = N->getOperand(3);
  85       assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
  86              Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
  87              "Unexpected VTs!");
  88       MachineFunction &MF = CurDAG->getMachineFunction();
  89       SDLoc DL(N);
  90
  91       // Create temporary stack for each expanding node.
  92       SDValue StackSlot =
  93           CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
  94       int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
  95       MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
  96
  97       SDValue Chain = CurDAG->getEntryNode();
  98       Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
  99
 100       SDValue OffsetSlot =
 101           CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
 102       Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
 103                             Align(8));
 104
 105       Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
 106
 107       SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
 108       SDValue IntID =
 109           CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
 110       SDValue Ops[] = {Chain,
 111                        IntID,
 112                        Passthru,
 113                        StackSlot,
 114                        CurDAG->getRegister(RISCV::X0, MVT::i64),
 115                        VL};
 116
 117       Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
 118                                            MVT::i64, MPI, Align(8),
 119                                            MachineMemOperand::MOLoad);
 120       break;
 121     }
 122     }
 123
 124     if (Result) {
 125       LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld:    ");
 126       LLVM_DEBUG(N->dump(CurDAG));
 127       LLVM_DEBUG(dbgs() << "\nNew: ");
 128       LLVM_DEBUG(Result->dump(CurDAG));
 129       LLVM_DEBUG(dbgs() << "\n");
 130
 131       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
 132       MadeChange = true;
 133     }
 134   }
 135
 136   if (MadeChange)
 137     CurDAG->RemoveDeadNodes();
 138 }
 139
 140 void RISCVDAGToDAGISel::PostprocessISelDAG() {
 141   HandleSDNode Dummy(CurDAG->getRoot());
 142   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
 143
 144   bool MadeChange = false;
 145   while (Position != CurDAG->allnodes_begin()) {
 146     SDNode *N = &*--Position;
 147     // Skip dead nodes and any non-machine opcodes.
 148     if (N->use_empty() || !N->isMachineOpcode())
 149       continue;
 150
 151     MadeChange |= doPeepholeSExtW(N);
 152
 153     // FIXME: This is here only because the VMerge transform doesn't
 154     // know how to handle masked true inputs.  Once that has been moved
 155     // to post-ISEL, this can be deleted as well.
 156     MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
 157   }
 158
 159   CurDAG->setRoot(Dummy.getValue());
 160
 161   MadeChange |= doPeepholeMergeVVMFold();
 162
 163   // After we're done with everything else, convert IMPLICIT_DEF
 164   // passthru operands to NoRegister.  This is required to workaround
 165   // an optimization deficiency in MachineCSE.  This really should
 166   // be merged back into each of the patterns (i.e. there's no good
 167   // reason not to go directly to NoReg), but is being done this way
 168   // to allow easy backporting.
 169   MadeChange |= doPeepholeNoRegPassThru();
 170
 171   if (MadeChange)
 172     CurDAG->RemoveDeadNodes();
 173 }
 174
 175 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
 176                             RISCVMatInt::InstSeq &Seq) {
 177   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
 178   for (const RISCVMatInt::Inst &Inst : Seq) {
 179     SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
 180     SDNode *Result = nullptr;
 181     switch (Inst.getOpndKind()) {
 182     case RISCVMatInt::Imm:
 183       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
 184       break;
 185     case RISCVMatInt::RegX0:
 186       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
 187                                       CurDAG->getRegister(RISCV::X0, VT));
 188       break;
 189     case RISCVMatInt::RegReg:
 190       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
 191       break;
 192     case RISCVMatInt::RegImm:
 193       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
 194       break;
 195     }
 196
 197     // Only the first instruction has X0 as its source.
 198     SrcReg = SDValue(Result, 0);
 199   }
 200
 201   return SrcReg;
 202 }
 203
 204 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
 205                          int64_t Imm, const RISCVSubtarget &Subtarget) {
 206   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
 207
 208   // Use a rematerializable pseudo instruction for short sequences if enabled.
 209   if (Seq.size() == 2 && UsePseudoMovImm)
 210     return SDValue(
 211         CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
 212                                CurDAG->getSignedTargetConstant(Imm, DL, VT)),
 213         0);
 214
 215   // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
 216   // worst an LUI+ADDIW. This will require an extra register, but avoids a
 217   // constant pool.
 218   // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
 219   // low and high 32 bits are the same and bit 31 and 63 are set.
 220   if (Seq.size() > 3) {
 221     unsigned ShiftAmt, AddOpc;
 222     RISCVMatInt::InstSeq SeqLo =
 223         RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
 224     if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
 225       SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
 226
 227       SDValue SLLI = SDValue(
 228           CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
 229                                  CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
 230           0);
 231       return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
 232     }
 233   }
 234
 235   // Otherwise, use the original sequence.
 236   return selectImmSeq(CurDAG, DL, VT, Seq);
 237 }
 238
 239 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
 240     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
 241     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
 242     bool IsLoad, MVT *IndexVT) {
 243   SDValue Chain = Node->getOperand(0);
 244   SDValue Glue;
 245
 246   Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
 247
 248   if (IsStridedOrIndexed) {
 249     Operands.push_back(Node->getOperand(CurOp++)); // Index.
 250     if (IndexVT)
 251       *IndexVT = Operands.back()->getSimpleValueType(0);
 252   }
 253
 254   if (IsMasked) {
 255     // Mask needs to be copied to V0.
 256     SDValue Mask = Node->getOperand(CurOp++);
 257     Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
 258     Glue = Chain.getValue(1);
 259     Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
 260   }
 261   SDValue VL;
 262   selectVLOp(Node->getOperand(CurOp++), VL);
 263   Operands.push_back(VL);
 264
 265   MVT XLenVT = Subtarget->getXLenVT();
 266   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
 267   Operands.push_back(SEWOp);
 268
 269   // At the IR layer, all the masked load intrinsics have policy operands,
 270   // none of the others do.  All have passthru operands.  For our pseudos,
 271   // all loads have policy operands.
 272   if (IsLoad) {
 273     uint64_t Policy = RISCVII::MASK_AGNOSTIC;
 274     if (IsMasked)
 275       Policy = Node->getConstantOperandVal(CurOp++);
 276     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
 277     Operands.push_back(PolicyOp);
 278   }
 279
 280   Operands.push_back(Chain); // Chain.
 281   if (Glue)
 282     Operands.push_back(Glue);
 283 }
 284
 285 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
 286                                     bool IsStrided) {
 287   SDLoc DL(Node);
 288   MVT VT = Node->getSimpleValueType(0);
 289   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
 290   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
 291
 292   unsigned CurOp = 2;
 293   SmallVector<SDValue, 8> Operands;
 294
 295   Operands.push_back(Node->getOperand(CurOp++));
 296
 297   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
 298                              Operands, /*IsLoad=*/true);
 299
 300   const RISCV::VLSEGPseudo *P =
 301       RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
 302                             static_cast<unsigned>(LMUL));
 303   MachineSDNode *Load =
 304       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
 305
 306   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
 307     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
 308
 309   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
 310   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
 311   CurDAG->RemoveDeadNode(Node);
 312 }
 313
 314 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
 315                                       bool IsMasked) {
 316   SDLoc DL(Node);
 317   MVT VT = Node->getSimpleValueType(0);
 318   MVT XLenVT = Subtarget->getXLenVT();
 319   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
 320   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
 321
 322   unsigned CurOp = 2;
 323   SmallVector<SDValue, 7> Operands;
 324
 325   Operands.push_back(Node->getOperand(CurOp++));
 326
 327   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
 328                              /*IsStridedOrIndexed*/ false, Operands,
 329                              /*IsLoad=*/true);
 330
 331   const RISCV::VLSEGPseudo *P =
 332       RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
 333                             Log2SEW, static_cast<unsigned>(LMUL));
 334   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
 335                                                XLenVT, MVT::Other, Operands);
 336
 337   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
 338     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
 339
 340   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
 341   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
 342   ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
 343   CurDAG->RemoveDeadNode(Node);
 344 }
 345
 346 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
 347                                      bool IsOrdered) {
 348   SDLoc DL(Node);
 349   MVT VT = Node->getSimpleValueType(0);
 350   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
 351   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
 352
 353   unsigned CurOp = 2;
 354   SmallVector<SDValue, 8> Operands;
 355
 356   Operands.push_back(Node->getOperand(CurOp++));
 357
 358   MVT IndexVT;
 359   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
 360                              /*IsStridedOrIndexed*/ true, Operands,
 361                              /*IsLoad=*/true, &IndexVT);
 362
 363 #ifndef NDEBUG
 364   // Number of element = RVVBitsPerBlock * LMUL / SEW
 365   unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
 366   auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
 367   if (DecodedLMUL.second)
 368     ContainedTyNumElts /= DecodedLMUL.first;
 369   else
 370     ContainedTyNumElts *= DecodedLMUL.first;
 371   assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
 372          "Element count mismatch");
 373 #endif
 374
 375   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
 376   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
 377   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
 378     report_fatal_error("The V extension does not support EEW=64 for index "
 379                        "values when XLEN=32");
 380   }
 381   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
 382       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
 383       static_cast<unsigned>(IndexLMUL));
 384   MachineSDNode *Load =
 385       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
 386
 387   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
 388     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
 389
 390   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
 391   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
 392   CurDAG->RemoveDeadNode(Node);
 393 }
 394
 395 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
 396                                     bool IsStrided) {
 397   SDLoc DL(Node);
 398   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
 399   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
 400   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
 401
 402   unsigned CurOp = 2;
 403   SmallVector<SDValue, 8> Operands;
 404
 405   Operands.push_back(Node->getOperand(CurOp++));
 406
 407   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
 408                              Operands);
 409
 410   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
 411       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
 412   MachineSDNode *Store =
 413       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
 414
 415   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
 416     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
 417
 418   ReplaceNode(Node, Store);
 419 }
 420
 421 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
 422                                      bool IsOrdered) {
 423   SDLoc DL(Node);
 424   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
 425   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
 426   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
 427
 428   unsigned CurOp = 2;
 429   SmallVector<SDValue, 8> Operands;
 430
 431   Operands.push_back(Node->getOperand(CurOp++));
 432
 433   MVT IndexVT;
 434   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
 435                              /*IsStridedOrIndexed*/ true, Operands,
 436                              /*IsLoad=*/false, &IndexVT);
 437
 438 #ifndef NDEBUG
 439   // Number of element = RVVBitsPerBlock * LMUL / SEW
 440   unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
 441   auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
 442   if (DecodedLMUL.second)
 443     ContainedTyNumElts /= DecodedLMUL.first;
 444   else
 445     ContainedTyNumElts *= DecodedLMUL.first;
 446   assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
 447          "Element count mismatch");
 448 #endif
 449
 450   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
 451   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
 452   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
 453     report_fatal_error("The V extension does not support EEW=64 for index "
 454                        "values when XLEN=32");
 455   }
 456   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
 457       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
 458       static_cast<unsigned>(IndexLMUL));
 459   MachineSDNode *Store =
 460       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
 461
 462   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
 463     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
 464
 465   ReplaceNode(Node, Store);
 466 }
 467
 468 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
 469   if (!Subtarget->hasVInstructions())
 470     return;
 471
 472   assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
 473
 474   SDLoc DL(Node);
 475   MVT XLenVT = Subtarget->getXLenVT();
 476
 477   unsigned IntNo = Node->getConstantOperandVal(0);
 478
 479   assert((IntNo == Intrinsic::riscv_vsetvli ||
 480           IntNo == Intrinsic::riscv_vsetvlimax) &&
 481          "Unexpected vsetvli intrinsic");
 482
 483   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
 484   unsigned Offset = (VLMax ? 1 : 2);
 485
 486   assert(Node->getNumOperands() == Offset + 2 &&
 487          "Unexpected number of operands");
 488
 489   unsigned SEW =
 490       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
 491   RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
 492       Node->getConstantOperandVal(Offset + 1) & 0x7);
 493
 494   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
 495                                             /*MaskAgnostic*/ true);
 496   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
 497
 498   SDValue VLOperand;
 499   unsigned Opcode = RISCV::PseudoVSETVLI;
 500   if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
 501     if (auto VLEN = Subtarget->getRealVLen())
 502       if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
 503         VLMax = true;
 504   }
 505   if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
 506     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
 507     Opcode = RISCV::PseudoVSETVLIX0;
 508   } else {
 509     VLOperand = Node->getOperand(1);
 510
 511     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
 512       uint64_t AVL = C->getZExtValue();
 513       if (isUInt<5>(AVL)) {
 514         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
 515         ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
 516                                                  XLenVT, VLImm, VTypeIOp));
 517         return;
 518       }
 519     }
 520   }
 521
 522   ReplaceNode(Node,
 523               CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
 524 }
 525
 526 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
 527   MVT VT = Node->getSimpleValueType(0);
 528   unsigned Opcode = Node->getOpcode();
 529   assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
 530          "Unexpected opcode");
 531   SDLoc DL(Node);
 532
 533   // For operations of the form (x << C1) op C2, check if we can use
 534   // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
 535   SDValue N0 = Node->getOperand(0);
 536   SDValue N1 = Node->getOperand(1);
 537
 538   ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
 539   if (!Cst)
 540     return false;
 541
 542   int64_t Val = Cst->getSExtValue();
 543
 544   // Check if immediate can already use ANDI/ORI/XORI.
 545   if (isInt<12>(Val))
 546     return false;
 547
 548   SDValue Shift = N0;
 549
 550   // If Val is simm32 and we have a sext_inreg from i32, then the binop
 551   // produces at least 33 sign bits. We can peek through the sext_inreg and use
 552   // a SLLIW at the end.
 553   bool SignExt = false;
 554   if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
 555       N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
 556     SignExt = true;
 557     Shift = N0.getOperand(0);
 558   }
 559
 560   if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
 561     return false;
 562
 563   ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
 564   if (!ShlCst)
 565     return false;
 566
 567   uint64_t ShAmt = ShlCst->getZExtValue();
 568
 569   // Make sure that we don't change the operation by removing bits.
 570   // This only matters for OR and XOR, AND is unaffected.
 571   uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
 572   if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
 573     return false;
 574
 575   int64_t ShiftedVal = Val >> ShAmt;
 576   if (!isInt<12>(ShiftedVal))
 577     return false;
 578
 579   // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
 580   if (SignExt && ShAmt >= 32)
 581     return false;
 582
 583   // Ok, we can reorder to get a smaller immediate.
 584   unsigned BinOpc;
 585   switch (Opcode) {
 586   default: llvm_unreachable("Unexpected opcode");
 587   case ISD::AND: BinOpc = RISCV::ANDI; break;
 588   case ISD::OR:  BinOpc = RISCV::ORI;  break;
 589   case ISD::XOR: BinOpc = RISCV::XORI; break;
 590   }
 591
 592   unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
 593
 594   SDNode *BinOp = CurDAG->getMachineNode(
 595       BinOpc, DL, VT, Shift.getOperand(0),
 596       CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
 597   SDNode *SLLI =
 598       CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
 599                              CurDAG->getTargetConstant(ShAmt, DL, VT));
 600   ReplaceNode(Node, SLLI);
 601   return true;
 602 }
 603
 604 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
 605   // Only supported with XTHeadBb at the moment.
 606   if (!Subtarget->hasVendorXTHeadBb())
 607     return false;
 608
 609   auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
 610   if (!N1C)
 611     return false;
 612
 613   SDValue N0 = Node->getOperand(0);
 614   if (!N0.hasOneUse())
 615     return false;
 616
 617   auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
 618                              MVT VT) {
 619     return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
 620                                   CurDAG->getTargetConstant(Msb, DL, VT),
 621                                   CurDAG->getTargetConstant(Lsb, DL, VT));
 622   };
 623
 624   SDLoc DL(Node);
 625   MVT VT = Node->getSimpleValueType(0);
 626   const unsigned RightShAmt = N1C->getZExtValue();
 627
 628   // Transform (sra (shl X, C1) C2) with C1 < C2
 629   //        -> (TH.EXT X, msb, lsb)
 630   if (N0.getOpcode() == ISD::SHL) {
 631     auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
 632     if (!N01C)
 633       return false;
 634
 635     const unsigned LeftShAmt = N01C->getZExtValue();
 636     // Make sure that this is a bitfield extraction (i.e., the shift-right
 637     // amount can not be less than the left-shift).
 638     if (LeftShAmt > RightShAmt)
 639       return false;
 640
 641     const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
 642     const unsigned Msb = MsbPlusOne - 1;
 643     const unsigned Lsb = RightShAmt - LeftShAmt;
 644
 645     SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
 646     ReplaceNode(Node, TH_EXT);
 647     return true;
 648   }
 649
 650   // Transform (sra (sext_inreg X, _), C) ->
 651   //           (TH.EXT X, msb, lsb)
 652   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
 653     unsigned ExtSize =
 654         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
 655
 656     // ExtSize of 32 should use sraiw via tablegen pattern.
 657     if (ExtSize == 32)
 658       return false;
 659
 660     const unsigned Msb = ExtSize - 1;
 661     const unsigned Lsb = RightShAmt;
 662
 663     SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
 664     ReplaceNode(Node, TH_EXT);
 665     return true;
 666   }
 667
 668   return false;
 669 }
 670
 671 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
 672   // Target does not support indexed loads.
 673   if (!Subtarget->hasVendorXTHeadMemIdx())
 674     return false;
 675
 676   LoadSDNode *Ld = cast<LoadSDNode>(Node);
 677   ISD::MemIndexedMode AM = Ld->getAddressingMode();
 678   if (AM == ISD::UNINDEXED)
 679     return false;
 680
 681   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
 682   if (!C)
 683     return false;
 684
 685   EVT LoadVT = Ld->getMemoryVT();
 686   assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
 687          "Unexpected addressing mode");
 688   bool IsPre = AM == ISD::PRE_INC;
 689   bool IsPost = AM == ISD::POST_INC;
 690   int64_t Offset = C->getSExtValue();
 691
 692   // The constants that can be encoded in the THeadMemIdx instructions
 693   // are of the form (sign_extend(imm5) << imm2).
 694   unsigned Shift;
 695   for (Shift = 0; Shift < 4; Shift++)
 696     if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
 697       break;
 698
 699   // Constant cannot be encoded.
 700   if (Shift == 4)
 701     return false;
 702
 703   bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
 704   unsigned Opcode;
 705   if (LoadVT == MVT::i8 && IsPre)
 706     Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
 707   else if (LoadVT == MVT::i8 && IsPost)
 708     Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
 709   else if (LoadVT == MVT::i16 && IsPre)
 710     Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
 711   else if (LoadVT == MVT::i16 && IsPost)
 712     Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
 713   else if (LoadVT == MVT::i32 && IsPre)
 714     Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
 715   else if (LoadVT == MVT::i32 && IsPost)
 716     Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
 717   else if (LoadVT == MVT::i64 && IsPre)
 718     Opcode = RISCV::TH_LDIB;
 719   else if (LoadVT == MVT::i64 && IsPost)
 720     Opcode = RISCV::TH_LDIA;
 721   else
 722     return false;
 723
 724   EVT Ty = Ld->getOffset().getValueType();
 725   SDValue Ops[] = {
 726       Ld->getBasePtr(),
 727       CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
 728       CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
 729   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
 730                                        Ld->getValueType(1), MVT::Other, Ops);
 731
 732   MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
 733   CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
 734
 735   ReplaceNode(Node, New);
 736
 737   return true;
 738 }
 739
 740 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
 741   if (!Subtarget->hasVInstructions())
 742     return;
 743
 744   assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
 745
 746   SDLoc DL(Node);
 747   unsigned IntNo = Node->getConstantOperandVal(1);
 748
 749   assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
 750           IntNo == Intrinsic::riscv_sf_vc_i_se) &&
 751          "Unexpected vsetvli intrinsic");
 752
 753   // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
 754   unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
 755   SDValue SEWOp =
 756       CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
 757   SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
 758                                       Node->getOperand(4), Node->getOperand(5),
 759                                       Node->getOperand(8), SEWOp,
 760                                       Node->getOperand(0)};
 761
 762   unsigned Opcode;
 763   auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
 764   switch (LMulSDNode->getSExtValue()) {
 765   case 5:
 766     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
 767                                                   : RISCV::PseudoVC_I_SE_MF8;
 768     break;
 769   case 6:
 770     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
 771                                                   : RISCV::PseudoVC_I_SE_MF4;
 772     break;
 773   case 7:
 774     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
 775                                                   : RISCV::PseudoVC_I_SE_MF2;
 776     break;
 777   case 0:
 778     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
 779                                                   : RISCV::PseudoVC_I_SE_M1;
 780     break;
 781   case 1:
 782     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
 783                                                   : RISCV::PseudoVC_I_SE_M2;
 784     break;
 785   case 2:
 786     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
 787                                                   : RISCV::PseudoVC_I_SE_M4;
 788     break;
 789   case 3:
 790     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
 791                                                   : RISCV::PseudoVC_I_SE_M8;
 792     break;
 793   }
 794
 795   ReplaceNode(Node, CurDAG->getMachineNode(
 796                         Opcode, DL, Node->getSimpleValueType(0), Operands));
 797 }
 798
 799 static unsigned getSegInstNF(unsigned Intrinsic) {
 800 #define INST_NF_CASE(NAME, NF)                                                 \
 801   case Intrinsic::riscv_##NAME##NF:                                            \
 802     return NF;
 803 #define INST_NF_CASE_MASK(NAME, NF)                                            \
 804   case Intrinsic::riscv_##NAME##NF##_mask:                                     \
 805     return NF;
 806 #define INST_NF_CASE_FF(NAME, NF)                                              \
 807   case Intrinsic::riscv_##NAME##NF##ff:                                        \
 808     return NF;
 809 #define INST_NF_CASE_FF_MASK(NAME, NF)                                         \
 810   case Intrinsic::riscv_##NAME##NF##ff_mask:                                   \
 811     return NF;
 812 #define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME)                                \
 813   MACRO_NAME(NAME, 2)                                                          \
 814   MACRO_NAME(NAME, 3)                                                          \
 815   MACRO_NAME(NAME, 4)                                                          \
 816   MACRO_NAME(NAME, 5)                                                          \
 817   MACRO_NAME(NAME, 6)                                                          \
 818   MACRO_NAME(NAME, 7)                                                          \
 819   MACRO_NAME(NAME, 8)
 820 #define INST_ALL_NF_CASE(NAME)                                                 \
 821   INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME)                                    \
 822   INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
 823 #define INST_ALL_NF_CASE_WITH_FF(NAME)                                         \
 824   INST_ALL_NF_CASE(NAME)                                                       \
 825   INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME)                                 \
 826   INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
 827   switch (Intrinsic) {
 828   default:
 829     llvm_unreachable("Unexpected segment load/store intrinsic");
 830     INST_ALL_NF_CASE_WITH_FF(vlseg)
 831     INST_ALL_NF_CASE(vlsseg)
 832     INST_ALL_NF_CASE(vloxseg)
 833     INST_ALL_NF_CASE(vluxseg)
 834     INST_ALL_NF_CASE(vsseg)
 835     INST_ALL_NF_CASE(vssseg)
 836     INST_ALL_NF_CASE(vsoxseg)
 837     INST_ALL_NF_CASE(vsuxseg)
 838   }
 839 }
 840
 841 void RISCVDAGToDAGISel::Select(SDNode *Node) {
 842   // If we have a custom node, we have already selected.
 843   if (Node->isMachineOpcode()) {
 844     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
 845     Node->setNodeId(-1);
 846     return;
 847   }
 848
 849   // Instruction Selection not handled by the auto-generated tablegen selection
 850   // should be handled here.
 851   unsigned Opcode = Node->getOpcode();
 852   MVT XLenVT = Subtarget->getXLenVT();
 853   SDLoc DL(Node);
 854   MVT VT = Node->getSimpleValueType(0);
 855
 856   bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
 857
 858   switch (Opcode) {
 859   case ISD::Constant: {
 860     assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
 861     auto *ConstNode = cast<ConstantSDNode>(Node);
 862     if (ConstNode->isZero()) {
 863       SDValue New =
 864           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
 865       ReplaceNode(Node, New.getNode());
 866       return;
 867     }
 868     int64_t Imm = ConstNode->getSExtValue();
 869     // If only the lower 8 bits are used, try to convert this to a simm6 by
 870     // sign-extending bit 7. This is neutral without the C extension, and
 871     // allows C.LI to be used if C is present.
 872     if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
 873       Imm = SignExtend64<8>(Imm);
 874     // If the upper XLen-16 bits are not used, try to convert this to a simm12
 875     // by sign extending bit 15.
 876     if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
 877         hasAllHUsers(Node))
 878       Imm = SignExtend64<16>(Imm);
 879     // If the upper 32-bits are not used try to convert this into a simm32 by
 880     // sign extending bit 32.
 881     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
 882       Imm = SignExtend64<32>(Imm);
 883
 884     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
 885     return;
 886   }
 887   case ISD::ConstantFP: {
 888     const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
 889
 890     bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
 891     SDValue Imm;
 892     // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
 893     // create an integer immediate.
 894     if (APF.isPosZero() || NegZeroF64)
 895       Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
 896     else
 897       Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
 898                       *Subtarget);
 899
 900     bool HasZdinx = Subtarget->hasStdExtZdinx();
 901     bool Is64Bit = Subtarget->is64Bit();
 902     unsigned Opc;
 903     switch (VT.SimpleTy) {
 904     default:
 905       llvm_unreachable("Unexpected size");
 906     case MVT::bf16:
 907       assert(Subtarget->hasStdExtZfbfmin());
 908       Opc = RISCV::FMV_H_X;
 909       break;
 910     case MVT::f16:
 911       Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
 912       break;
 913     case MVT::f32:
 914       Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
 915       break;
 916     case MVT::f64:
 917       // For RV32, we can't move from a GPR, we need to convert instead. This
 918       // should only happen for +0.0 and -0.0.
 919       assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
 920       if (Is64Bit)
 921         Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
 922       else
 923         Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
 924       break;
 925     }
 926
 927     SDNode *Res;
 928     if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
 929       Res =
 930           CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
 931     } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
 932       Res =
 933           CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
 934     } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
 935       Res = CurDAG->getMachineNode(
 936           Opc, DL, VT, Imm,
 937           CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
 938     else
 939       Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
 940
 941     // For f64 -0.0, we need to insert a fneg.d idiom.
 942     if (NegZeroF64) {
 943       Opc = RISCV::FSGNJN_D;
 944       if (HasZdinx)
 945         Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
 946       Res =
 947           CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
 948     }
 949
 950     ReplaceNode(Node, Res);
 951     return;
 952   }
 953   case RISCVISD::BuildGPRPair:
 954   case RISCVISD::BuildPairF64: {
 955     if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
 956       break;
 957
 958     assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
 959            "BuildPairF64 only handled here on rv32i_zdinx");
 960
 961     SDValue Ops[] = {
 962         CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
 963         Node->getOperand(0),
 964         CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
 965         Node->getOperand(1),
 966         CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
 967
 968     SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
 969     ReplaceNode(Node, N);
 970     return;
 971   }
 972   case RISCVISD::SplitGPRPair:
 973   case RISCVISD::SplitF64: {
 974     if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
 975       assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
 976              "SplitF64 only handled here on rv32i_zdinx");
 977
 978       if (!SDValue(Node, 0).use_empty()) {
 979         SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
 980                                                     Node->getValueType(0),
 981                                                     Node->getOperand(0));
 982         ReplaceUses(SDValue(Node, 0), Lo);
 983       }
 984
 985       if (!SDValue(Node, 1).use_empty()) {
 986         SDValue Hi = CurDAG->getTargetExtractSubreg(
 987             RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
 988         ReplaceUses(SDValue(Node, 1), Hi);
 989       }
 990
 991       CurDAG->RemoveDeadNode(Node);
 992       return;
 993     }
 994
 995     assert(Opcode != RISCVISD::SplitGPRPair &&
 996            "SplitGPRPair should already be handled");
 997
 998     if (!Subtarget->hasStdExtZfa())
 999       break;
1000     assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1001            "Unexpected subtarget");
1002
1003     // With Zfa, lower to fmv.x.w and fmvh.x.d.
1004     if (!SDValue(Node, 0).use_empty()) {
1005       SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1006                                           Node->getOperand(0));
1007       ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1008     }
1009     if (!SDValue(Node, 1).use_empty()) {
1010       SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1011                                           Node->getOperand(0));
1012       ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1013     }
1014
1015     CurDAG->RemoveDeadNode(Node);
1016     return;
1017   }
1018   case ISD::SHL: {
1019     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1020     if (!N1C)
1021       break;
1022     SDValue N0 = Node->getOperand(0);
1023     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1024         !isa<ConstantSDNode>(N0.getOperand(1)))
1025       break;
1026     unsigned ShAmt = N1C->getZExtValue();
1027     uint64_t Mask = N0.getConstantOperandVal(1);
1028
1029     if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1030       unsigned XLen = Subtarget->getXLen();
1031       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1032       unsigned TrailingZeros = llvm::countr_zero(Mask);
1033       if (TrailingZeros > 0 && LeadingZeros == 32) {
1034         // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1035         // where C2 has 32 leading zeros and C3 trailing zeros.
1036         SDNode *SRLIW = CurDAG->getMachineNode(
1037             RISCV::SRLIW, DL, VT, N0->getOperand(0),
1038             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1039         SDNode *SLLI = CurDAG->getMachineNode(
1040             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1041             CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1042         ReplaceNode(Node, SLLI);
1043         return;
1044       }
1045       if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1046           XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1047         // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1048         // where C2 has C4 leading zeros and no trailing zeros.
1049         // This is profitable if the "and" was to be lowered to
1050         // (srli (slli X, C4), C4) and not (andi X, C2).
1051         // For "LeadingZeros == 32":
1052         // - with Zba it's just (slli.uw X, C)
1053         // - without Zba a tablegen pattern applies the very same
1054         //   transform as we would have done here
1055         SDNode *SLLI = CurDAG->getMachineNode(
1056             RISCV::SLLI, DL, VT, N0->getOperand(0),
1057             CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1058         SDNode *SRLI = CurDAG->getMachineNode(
1059             RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1060             CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1061         ReplaceNode(Node, SRLI);
1062         return;
1063       }
1064     }
1065     break;
1066   }
1067   case ISD::SRL: {
1068     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1069     if (!N1C)
1070       break;
1071     SDValue N0 = Node->getOperand(0);
1072     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1073       break;
1074     unsigned ShAmt = N1C->getZExtValue();
1075     uint64_t Mask = N0.getConstantOperandVal(1);
1076
1077     // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1078     // 32 leading zeros and C3 trailing zeros.
1079     if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1080       unsigned XLen = Subtarget->getXLen();
1081       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1082       unsigned TrailingZeros = llvm::countr_zero(Mask);
1083       if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1084         SDNode *SRLIW = CurDAG->getMachineNode(
1085             RISCV::SRLIW, DL, VT, N0->getOperand(0),
1086             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1087         SDNode *SLLI = CurDAG->getMachineNode(
1088             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1089             CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1090         ReplaceNode(Node, SLLI);
1091         return;
1092       }
1093     }
1094
1095     // Optimize (srl (and X, C2), C) ->
1096     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
1097     // Where C2 is a mask with C3 trailing ones.
1098     // Taking into account that the C2 may have had lower bits unset by
1099     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1100     // This pattern occurs when type legalizing right shifts for types with
1101     // less than XLen bits.
1102     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1103     if (!isMask_64(Mask))
1104       break;
1105     unsigned TrailingOnes = llvm::countr_one(Mask);
1106     if (ShAmt >= TrailingOnes)
1107       break;
1108     // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1109     if (TrailingOnes == 32) {
1110       SDNode *SRLI = CurDAG->getMachineNode(
1111           Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1112           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1113       ReplaceNode(Node, SRLI);
1114       return;
1115     }
1116
1117     // Only do the remaining transforms if the AND has one use.
1118     if (!N0.hasOneUse())
1119       break;
1120
1121     // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1122     if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1123       SDNode *BEXTI = CurDAG->getMachineNode(
1124           Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1125           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1126       ReplaceNode(Node, BEXTI);
1127       return;
1128     }
1129
1130     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1131     if (Subtarget->hasVendorXTHeadBb()) {
1132       SDNode *THEXTU = CurDAG->getMachineNode(
1133           RISCV::TH_EXTU, DL, VT, N0->getOperand(0),
1134           CurDAG->getTargetConstant(TrailingOnes - 1, DL, VT),
1135           CurDAG->getTargetConstant(ShAmt, DL, VT));
1136       ReplaceNode(Node, THEXTU);
1137       return;
1138     }
1139
1140     SDNode *SLLI =
1141         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1142                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1143     SDNode *SRLI = CurDAG->getMachineNode(
1144         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1145         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1146     ReplaceNode(Node, SRLI);
1147     return;
1148   }
1149   case ISD::SRA: {
1150     if (trySignedBitfieldExtract(Node))
1151       return;
1152
1153     // Optimize (sra (sext_inreg X, i16), C) ->
1154     //          (srai (slli X, (XLen-16), (XLen-16) + C)
1155     // And      (sra (sext_inreg X, i8), C) ->
1156     //          (srai (slli X, (XLen-8), (XLen-8) + C)
1157     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1158     // This transform matches the code we get without Zbb. The shifts are more
1159     // compressible, and this can help expose CSE opportunities in the sdiv by
1160     // constant optimization.
1161     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1162     if (!N1C)
1163       break;
1164     SDValue N0 = Node->getOperand(0);
1165     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1166       break;
1167     unsigned ShAmt = N1C->getZExtValue();
1168     unsigned ExtSize =
1169         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1170     // ExtSize of 32 should use sraiw via tablegen pattern.
1171     if (ExtSize >= 32 || ShAmt >= ExtSize)
1172       break;
1173     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1174     SDNode *SLLI =
1175         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1176                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1177     SDNode *SRAI = CurDAG->getMachineNode(
1178         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1179         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1180     ReplaceNode(Node, SRAI);
1181     return;
1182   }
1183   case ISD::OR:
1184   case ISD::XOR:
1185     if (tryShrinkShlLogicImm(Node))
1186       return;
1187
1188     break;
1189   case ISD::AND: {
1190     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1191     if (!N1C)
1192       break;
1193
1194     SDValue N0 = Node->getOperand(0);
1195
1196     auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1197                                           SDValue X, unsigned Msb,
1198                                           unsigned Lsb) {
1199       if (!Subtarget->hasVendorXTHeadBb())
1200         return false;
1201
1202       SDNode *TH_EXTU = CurDAG->getMachineNode(
1203           RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1204           CurDAG->getTargetConstant(Lsb, DL, VT));
1205       ReplaceNode(Node, TH_EXTU);
1206       return true;
1207     };
1208
1209     bool LeftShift = N0.getOpcode() == ISD::SHL;
1210     if (LeftShift || N0.getOpcode() == ISD::SRL) {
1211       auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1212       if (!C)
1213         break;
1214       unsigned C2 = C->getZExtValue();
1215       unsigned XLen = Subtarget->getXLen();
1216       assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1217
1218       // Keep track of whether this is a c.andi. If we can't use c.andi, the
1219       // shift pair might offer more compression opportunities.
1220       // TODO: We could check for C extension here, but we don't have many lit
1221       // tests with the C extension enabled so not checking gets better
1222       // coverage.
1223       // TODO: What if ANDI faster than shift?
1224       bool IsCANDI = isInt<6>(N1C->getSExtValue());
1225
1226       uint64_t C1 = N1C->getZExtValue();
1227
1228       // Clear irrelevant bits in the mask.
1229       if (LeftShift)
1230         C1 &= maskTrailingZeros<uint64_t>(C2);
1231       else
1232         C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1233
1234       // Some transforms should only be done if the shift has a single use or
1235       // the AND would become (srli (slli X, 32), 32)
1236       bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1237
1238       SDValue X = N0.getOperand(0);
1239
1240       // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1241       // with c3 leading zeros.
1242       if (!LeftShift && isMask_64(C1)) {
1243         unsigned Leading = XLen - llvm::bit_width(C1);
1244         if (C2 < Leading) {
1245           // If the number of leading zeros is C2+32 this can be SRLIW.
1246           if (C2 + 32 == Leading) {
1247             SDNode *SRLIW = CurDAG->getMachineNode(
1248                 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1249             ReplaceNode(Node, SRLIW);
1250             return;
1251           }
1252
1253           // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1254           // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1255           //
1256           // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1257           // legalized and goes through DAG combine.
1258           if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1259               X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1260               cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1261             SDNode *SRAIW =
1262                 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1263                                        CurDAG->getTargetConstant(31, DL, VT));
1264             SDNode *SRLIW = CurDAG->getMachineNode(
1265                 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1266                 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1267             ReplaceNode(Node, SRLIW);
1268             return;
1269           }
1270
1271           // Try to use an unsigned bitfield extract (e.g., th.extu) if
1272           // available.
1273           // Transform (and (srl x, C2), C1)
1274           //        -> (<bfextract> x, msb, lsb)
1275           //
1276           // Make sure to keep this below the SRLIW cases, as we always want to
1277           // prefer the more common instruction.
1278           const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1279           const unsigned Lsb = C2;
1280           if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1281             return;
1282
1283           // (srli (slli x, c3-c2), c3).
1284           // Skip if we could use (zext.w (sraiw X, C2)).
1285           bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1286                       X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287                       cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1288           // Also Skip if we can use bexti or th.tst.
1289           Skip |= HasBitTest && Leading == XLen - 1;
1290           if (OneUseOrZExtW && !Skip) {
1291             SDNode *SLLI = CurDAG->getMachineNode(
1292                 RISCV::SLLI, DL, VT, X,
1293                 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1294             SDNode *SRLI = CurDAG->getMachineNode(
1295                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1296                 CurDAG->getTargetConstant(Leading, DL, VT));
1297             ReplaceNode(Node, SRLI);
1298             return;
1299           }
1300         }
1301       }
1302
1303       // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1304       // shifted by c2 bits with c3 leading zeros.
1305       if (LeftShift && isShiftedMask_64(C1)) {
1306         unsigned Leading = XLen - llvm::bit_width(C1);
1307
1308         if (C2 + Leading < XLen &&
1309             C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1310           // Use slli.uw when possible.
1311           if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1312             SDNode *SLLI_UW =
1313                 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1314                                        CurDAG->getTargetConstant(C2, DL, VT));
1315             ReplaceNode(Node, SLLI_UW);
1316             return;
1317           }
1318
1319           // (srli (slli c2+c3), c3)
1320           if (OneUseOrZExtW && !IsCANDI) {
1321             SDNode *SLLI = CurDAG->getMachineNode(
1322                 RISCV::SLLI, DL, VT, X,
1323                 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1324             SDNode *SRLI = CurDAG->getMachineNode(
1325                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1326                 CurDAG->getTargetConstant(Leading, DL, VT));
1327             ReplaceNode(Node, SRLI);
1328             return;
1329           }
1330         }
1331       }
1332
1333       // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1334       // shifted mask with c2 leading zeros and c3 trailing zeros.
1335       if (!LeftShift && isShiftedMask_64(C1)) {
1336         unsigned Leading = XLen - llvm::bit_width(C1);
1337         unsigned Trailing = llvm::countr_zero(C1);
1338         if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1339             !IsCANDI) {
1340           unsigned SrliOpc = RISCV::SRLI;
1341           // If the input is zexti32 we should use SRLIW.
1342           if (X.getOpcode() == ISD::AND &&
1343               isa<ConstantSDNode>(X.getOperand(1)) &&
1344               X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1345             SrliOpc = RISCV::SRLIW;
1346             X = X.getOperand(0);
1347           }
1348           SDNode *SRLI = CurDAG->getMachineNode(
1349               SrliOpc, DL, VT, X,
1350               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1351           SDNode *SLLI = CurDAG->getMachineNode(
1352               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1353               CurDAG->getTargetConstant(Trailing, DL, VT));
1354           ReplaceNode(Node, SLLI);
1355           return;
1356         }
1357         // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1358         if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1359             OneUseOrZExtW && !IsCANDI) {
1360           SDNode *SRLIW = CurDAG->getMachineNode(
1361               RISCV::SRLIW, DL, VT, X,
1362               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1363           SDNode *SLLI = CurDAG->getMachineNode(
1364               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1365               CurDAG->getTargetConstant(Trailing, DL, VT));
1366           ReplaceNode(Node, SLLI);
1367           return;
1368         }
1369         // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1370         if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1371             OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1372           SDNode *SRLI = CurDAG->getMachineNode(
1373               RISCV::SRLI, DL, VT, X,
1374               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1375           SDNode *SLLI_UW = CurDAG->getMachineNode(
1376               RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1377               CurDAG->getTargetConstant(Trailing, DL, VT));
1378           ReplaceNode(Node, SLLI_UW);
1379           return;
1380         }
1381       }
1382
1383       // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1384       // shifted mask with no leading zeros and c3 trailing zeros.
1385       if (LeftShift && isShiftedMask_64(C1)) {
1386         unsigned Leading = XLen - llvm::bit_width(C1);
1387         unsigned Trailing = llvm::countr_zero(C1);
1388         if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1389           SDNode *SRLI = CurDAG->getMachineNode(
1390               RISCV::SRLI, DL, VT, X,
1391               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1392           SDNode *SLLI = CurDAG->getMachineNode(
1393               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1394               CurDAG->getTargetConstant(Trailing, DL, VT));
1395           ReplaceNode(Node, SLLI);
1396           return;
1397         }
1398         // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1399         if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1400           SDNode *SRLIW = CurDAG->getMachineNode(
1401               RISCV::SRLIW, DL, VT, X,
1402               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1403           SDNode *SLLI = CurDAG->getMachineNode(
1404               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1405               CurDAG->getTargetConstant(Trailing, DL, VT));
1406           ReplaceNode(Node, SLLI);
1407           return;
1408         }
1409
1410         // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1411         if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1412             Subtarget->hasStdExtZba()) {
1413           SDNode *SRLI = CurDAG->getMachineNode(
1414               RISCV::SRLI, DL, VT, X,
1415               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1416           SDNode *SLLI_UW = CurDAG->getMachineNode(
1417               RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1418               CurDAG->getTargetConstant(Trailing, DL, VT));
1419           ReplaceNode(Node, SLLI_UW);
1420           return;
1421         }
1422       }
1423     }
1424
1425     const uint64_t C1 = N1C->getZExtValue();
1426
1427     if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1428         N0.hasOneUse()) {
1429       unsigned C2 = N0.getConstantOperandVal(1);
1430       unsigned XLen = Subtarget->getXLen();
1431       assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1432
1433       SDValue X = N0.getOperand(0);
1434
1435       // Prefer SRAIW + ANDI when possible.
1436       bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1437                   X.getOpcode() == ISD::SHL &&
1438                   isa<ConstantSDNode>(X.getOperand(1)) &&
1439                   X.getConstantOperandVal(1) == 32;
1440       // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1441       // mask with c3 leading zeros and c2 is larger than c3.
1442       if (isMask_64(C1) && !Skip) {
1443         unsigned Leading = XLen - llvm::bit_width(C1);
1444         if (C2 > Leading) {
1445           SDNode *SRAI = CurDAG->getMachineNode(
1446               RISCV::SRAI, DL, VT, X,
1447               CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1448           SDNode *SRLI = CurDAG->getMachineNode(
1449               RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1450               CurDAG->getTargetConstant(Leading, DL, VT));
1451           ReplaceNode(Node, SRLI);
1452           return;
1453         }
1454       }
1455
1456       // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1457       // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1458       // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1459       if (isShiftedMask_64(C1) && !Skip) {
1460         unsigned Leading = XLen - llvm::bit_width(C1);
1461         unsigned Trailing = llvm::countr_zero(C1);
1462         if (C2 > Leading && Leading > 0 && Trailing > 0) {
1463           SDNode *SRAI = CurDAG->getMachineNode(
1464               RISCV::SRAI, DL, VT, N0.getOperand(0),
1465               CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1466           SDNode *SRLI = CurDAG->getMachineNode(
1467               RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1468               CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1469           SDNode *SLLI = CurDAG->getMachineNode(
1470               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1471               CurDAG->getTargetConstant(Trailing, DL, VT));
1472           ReplaceNode(Node, SLLI);
1473           return;
1474         }
1475       }
1476     }
1477
1478     // If C1 masks off the upper bits only (but can't be formed as an
1479     // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1480     // available.
1481     // Transform (and x, C1)
1482     //        -> (<bfextract> x, msb, lsb)
1483     if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1484       const unsigned Msb = llvm::bit_width(C1) - 1;
1485       if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1486         return;
1487     }
1488
1489     if (tryShrinkShlLogicImm(Node))
1490       return;
1491
1492     break;
1493   }
1494   case ISD::MUL: {
1495     // Special case for calculating (mul (and X, C2), C1) where the full product
1496     // fits in XLen bits. We can shift X left by the number of leading zeros in
1497     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1498     // product has XLen trailing zeros, putting it in the output of MULHU. This
1499     // can avoid materializing a constant in a register for C2.
1500
1501     // RHS should be a constant.
1502     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1503     if (!N1C || !N1C->hasOneUse())
1504       break;
1505
1506     // LHS should be an AND with constant.
1507     SDValue N0 = Node->getOperand(0);
1508     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1509       break;
1510
1511     uint64_t C2 = N0.getConstantOperandVal(1);
1512
1513     // Constant should be a mask.
1514     if (!isMask_64(C2))
1515       break;
1516
1517     // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1518     // multiple users or the constant is a simm12. This prevents inserting a
1519     // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1520     // make it more costly to materialize. Otherwise, using a SLLI might allow
1521     // it to be compressed.
1522     bool IsANDIOrZExt =
1523         isInt<12>(C2) ||
1524         (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1525     // With XTHeadBb, we can use TH.EXTU.
1526     IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1527     if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1528       break;
1529     // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1530     // the constant is a simm32.
1531     bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1532     // With XTHeadBb, we can use TH.EXTU.
1533     IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1534     if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1535       break;
1536
1537     // We need to shift left the AND input and C1 by a total of XLen bits.
1538
1539     // How far left do we need to shift the AND input?
1540     unsigned XLen = Subtarget->getXLen();
1541     unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1542
1543     // The constant gets shifted by the remaining amount unless that would
1544     // shift bits out.
1545     uint64_t C1 = N1C->getZExtValue();
1546     unsigned ConstantShift = XLen - LeadingZeros;
1547     if (ConstantShift > (XLen - llvm::bit_width(C1)))
1548       break;
1549
1550     uint64_t ShiftedC1 = C1 << ConstantShift;
1551     // If this RV32, we need to sign extend the constant.
1552     if (XLen == 32)
1553       ShiftedC1 = SignExtend64<32>(ShiftedC1);
1554
1555     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1556     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1557     SDNode *SLLI =
1558         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1559                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1560     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1561                                            SDValue(SLLI, 0), SDValue(Imm, 0));
1562     ReplaceNode(Node, MULHU);
1563     return;
1564   }
1565   case ISD::LOAD: {
1566     if (tryIndexedLoad(Node))
1567       return;
1568
1569     if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1570       // We match post-incrementing load here
1571       LoadSDNode *Load = cast<LoadSDNode>(Node);
1572       if (Load->getAddressingMode() != ISD::POST_INC)
1573         break;
1574
1575       SDValue Chain = Node->getOperand(0);
1576       SDValue Base = Node->getOperand(1);
1577       SDValue Offset = Node->getOperand(2);
1578
1579       bool Simm12 = false;
1580       bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1581
1582       if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1583         int ConstantVal = ConstantOffset->getSExtValue();
1584         Simm12 = isInt<12>(ConstantVal);
1585         if (Simm12)
1586           Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1587                                              Offset.getValueType());
1588       }
1589
1590       unsigned Opcode = 0;
1591       switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1592       case MVT::i8:
1593         if (Simm12 && SignExtend)
1594           Opcode = RISCV::CV_LB_ri_inc;
1595         else if (Simm12 && !SignExtend)
1596           Opcode = RISCV::CV_LBU_ri_inc;
1597         else if (!Simm12 && SignExtend)
1598           Opcode = RISCV::CV_LB_rr_inc;
1599         else
1600           Opcode = RISCV::CV_LBU_rr_inc;
1601         break;
1602       case MVT::i16:
1603         if (Simm12 && SignExtend)
1604           Opcode = RISCV::CV_LH_ri_inc;
1605         else if (Simm12 && !SignExtend)
1606           Opcode = RISCV::CV_LHU_ri_inc;
1607         else if (!Simm12 && SignExtend)
1608           Opcode = RISCV::CV_LH_rr_inc;
1609         else
1610           Opcode = RISCV::CV_LHU_rr_inc;
1611         break;
1612       case MVT::i32:
1613         if (Simm12)
1614           Opcode = RISCV::CV_LW_ri_inc;
1615         else
1616           Opcode = RISCV::CV_LW_rr_inc;
1617         break;
1618       default:
1619         break;
1620       }
1621       if (!Opcode)
1622         break;
1623
1624       ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1625                                                Chain.getSimpleValueType(), Base,
1626                                                Offset, Chain));
1627       return;
1628     }
1629     break;
1630   }
1631   case ISD::INTRINSIC_WO_CHAIN: {
1632     unsigned IntNo = Node->getConstantOperandVal(0);
1633     switch (IntNo) {
1634       // By default we do not custom select any intrinsic.
1635     default:
1636       break;
1637     case Intrinsic::riscv_vmsgeu:
1638     case Intrinsic::riscv_vmsge: {
1639       SDValue Src1 = Node->getOperand(1);
1640       SDValue Src2 = Node->getOperand(2);
1641       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1642       bool IsCmpConstant = false;
1643       bool IsCmpMinimum = false;
1644       // Only custom select scalar second operand.
1645       if (Src2.getValueType() != XLenVT)
1646         break;
1647       // Small constants are handled with patterns.
1648       int64_t CVal = 0;
1649       MVT Src1VT = Src1.getSimpleValueType();
1650       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1651         IsCmpConstant = true;
1652         CVal = C->getSExtValue();
1653         if (CVal >= -15 && CVal <= 16) {
1654           if (!IsUnsigned || CVal != 0)
1655             break;
1656           IsCmpMinimum = true;
1657         } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1658                                               Src1VT.getScalarSizeInBits())
1659                                               .getSExtValue()) {
1660           IsCmpMinimum = true;
1661         }
1662       }
1663       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1664       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1665       default:
1666         llvm_unreachable("Unexpected LMUL!");
1667 #define CASE_VMSLT_OPCODES(lmulenum, suffix)                                   \
1668   case RISCVII::VLMUL::lmulenum:                                               \
1669     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1670                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1671     VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix                 \
1672                              : RISCV::PseudoVMSGT_VX_##suffix;                 \
1673     break;
1674         CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1675         CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1676         CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1677         CASE_VMSLT_OPCODES(LMUL_1, M1)
1678         CASE_VMSLT_OPCODES(LMUL_2, M2)
1679         CASE_VMSLT_OPCODES(LMUL_4, M4)
1680         CASE_VMSLT_OPCODES(LMUL_8, M8)
1681 #undef CASE_VMSLT_OPCODES
1682       }
1683       // Mask operations use the LMUL from the mask type.
1684       switch (RISCVTargetLowering::getLMUL(VT)) {
1685       default:
1686         llvm_unreachable("Unexpected LMUL!");
1687 #define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)                            \
1688   case RISCVII::VLMUL::lmulenum:                                               \
1689     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1690     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix;                               \
1691     break;
1692         CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1693         CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1694         CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1695         CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1696         CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1697         CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1698         CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1699 #undef CASE_VMNAND_VMSET_OPCODES
1700       }
1701       SDValue SEW = CurDAG->getTargetConstant(
1702           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1703       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1704       SDValue VL;
1705       selectVLOp(Node->getOperand(3), VL);
1706
1707       // If vmsge(u) with minimum value, expand it to vmset.
1708       if (IsCmpMinimum) {
1709         ReplaceNode(Node,
1710                     CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1711         return;
1712       }
1713
1714       if (IsCmpConstant) {
1715         SDValue Imm =
1716             selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1717
1718         ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1719                                                  {Src1, Imm, VL, SEW}));
1720         return;
1721       }
1722
1723       // Expand to
1724       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1725       SDValue Cmp = SDValue(
1726           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1727           0);
1728       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1729                                                {Cmp, Cmp, VL, MaskSEW}));
1730       return;
1731     }
1732     case Intrinsic::riscv_vmsgeu_mask:
1733     case Intrinsic::riscv_vmsge_mask: {
1734       SDValue Src1 = Node->getOperand(2);
1735       SDValue Src2 = Node->getOperand(3);
1736       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1737       bool IsCmpConstant = false;
1738       bool IsCmpMinimum = false;
1739       // Only custom select scalar second operand.
1740       if (Src2.getValueType() != XLenVT)
1741         break;
1742       // Small constants are handled with patterns.
1743       MVT Src1VT = Src1.getSimpleValueType();
1744       int64_t CVal = 0;
1745       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1746         IsCmpConstant = true;
1747         CVal = C->getSExtValue();
1748         if (CVal >= -15 && CVal <= 16) {
1749           if (!IsUnsigned || CVal != 0)
1750             break;
1751           IsCmpMinimum = true;
1752         } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1753                                               Src1VT.getScalarSizeInBits())
1754                                               .getSExtValue()) {
1755           IsCmpMinimum = true;
1756         }
1757       }
1758       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1759           VMOROpcode, VMSGTMaskOpcode;
1760       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1761       default:
1762         llvm_unreachable("Unexpected LMUL!");
1763 #define CASE_VMSLT_OPCODES(lmulenum, suffix)                                   \
1764   case RISCVII::VLMUL::lmulenum:                                               \
1765     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1766                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1767     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1768                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1769     VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK      \
1770                                  : RISCV::PseudoVMSGT_VX_##suffix##_MASK;      \
1771     break;
1772         CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1773         CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1774         CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1775         CASE_VMSLT_OPCODES(LMUL_1, M1)
1776         CASE_VMSLT_OPCODES(LMUL_2, M2)
1777         CASE_VMSLT_OPCODES(LMUL_4, M4)
1778         CASE_VMSLT_OPCODES(LMUL_8, M8)
1779 #undef CASE_VMSLT_OPCODES
1780       }
1781       // Mask operations use the LMUL from the mask type.
1782       switch (RISCVTargetLowering::getLMUL(VT)) {
1783       default:
1784         llvm_unreachable("Unexpected LMUL!");
1785 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
1786   case RISCVII::VLMUL::lmulenum:                                               \
1787     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1788     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1789     VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
1790     break;
1791         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
1792         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
1793         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
1794         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
1795         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
1796         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
1797         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
1798 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1799       }
1800       SDValue SEW = CurDAG->getTargetConstant(
1801           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1802       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1803       SDValue VL;
1804       selectVLOp(Node->getOperand(5), VL);
1805       SDValue MaskedOff = Node->getOperand(1);
1806       SDValue Mask = Node->getOperand(4);
1807
1808       // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
1809       if (IsCmpMinimum) {
1810         // We don't need vmor if the MaskedOff and the Mask are the same
1811         // value.
1812         if (Mask == MaskedOff) {
1813           ReplaceUses(Node, Mask.getNode());
1814           return;
1815         }
1816         ReplaceNode(Node,
1817                     CurDAG->getMachineNode(VMOROpcode, DL, VT,
1818                                            {Mask, MaskedOff, VL, MaskSEW}));
1819         return;
1820       }
1821
1822       // If the MaskedOff value and the Mask are the same value use
1823       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1824       // This avoids needing to copy v0 to vd before starting the next sequence.
1825       if (Mask == MaskedOff) {
1826         SDValue Cmp = SDValue(
1827             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1828             0);
1829         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1830                                                  {Mask, Cmp, VL, MaskSEW}));
1831         return;
1832       }
1833
1834       // Mask needs to be copied to V0.
1835       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1836                                            RISCV::V0, Mask, SDValue());
1837       SDValue Glue = Chain.getValue(1);
1838       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1839
1840       if (IsCmpConstant) {
1841         SDValue Imm =
1842             selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1843
1844         ReplaceNode(Node, CurDAG->getMachineNode(
1845                               VMSGTMaskOpcode, DL, VT,
1846                               {MaskedOff, Src1, Imm, V0, VL, SEW, Glue}));
1847         return;
1848       }
1849
1850       // Otherwise use
1851       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1852       // The result is mask undisturbed.
1853       // We use the same instructions to emulate mask agnostic behavior, because
1854       // the agnostic result can be either undisturbed or all 1.
1855       SDValue Cmp = SDValue(
1856           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1857                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1858           0);
1859       // vmxor.mm vd, vd, v0 is used to update active value.
1860       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1861                                                {Cmp, Mask, VL, MaskSEW}));
1862       return;
1863     }
1864     case Intrinsic::riscv_vsetvli:
1865     case Intrinsic::riscv_vsetvlimax:
1866       return selectVSETVLI(Node);
1867     }
1868     break;
1869   }
1870   case ISD::INTRINSIC_W_CHAIN: {
1871     unsigned IntNo = Node->getConstantOperandVal(1);
1872     switch (IntNo) {
1873       // By default we do not custom select any intrinsic.
1874     default:
1875       break;
1876     case Intrinsic::riscv_vlseg2:
1877     case Intrinsic::riscv_vlseg3:
1878     case Intrinsic::riscv_vlseg4:
1879     case Intrinsic::riscv_vlseg5:
1880     case Intrinsic::riscv_vlseg6:
1881     case Intrinsic::riscv_vlseg7:
1882     case Intrinsic::riscv_vlseg8: {
1883       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1884                   /*IsStrided*/ false);
1885       return;
1886     }
1887     case Intrinsic::riscv_vlseg2_mask:
1888     case Intrinsic::riscv_vlseg3_mask:
1889     case Intrinsic::riscv_vlseg4_mask:
1890     case Intrinsic::riscv_vlseg5_mask:
1891     case Intrinsic::riscv_vlseg6_mask:
1892     case Intrinsic::riscv_vlseg7_mask:
1893     case Intrinsic::riscv_vlseg8_mask: {
1894       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1895                   /*IsStrided*/ false);
1896       return;
1897     }
1898     case Intrinsic::riscv_vlsseg2:
1899     case Intrinsic::riscv_vlsseg3:
1900     case Intrinsic::riscv_vlsseg4:
1901     case Intrinsic::riscv_vlsseg5:
1902     case Intrinsic::riscv_vlsseg6:
1903     case Intrinsic::riscv_vlsseg7:
1904     case Intrinsic::riscv_vlsseg8: {
1905       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1906                   /*IsStrided*/ true);
1907       return;
1908     }
1909     case Intrinsic::riscv_vlsseg2_mask:
1910     case Intrinsic::riscv_vlsseg3_mask:
1911     case Intrinsic::riscv_vlsseg4_mask:
1912     case Intrinsic::riscv_vlsseg5_mask:
1913     case Intrinsic::riscv_vlsseg6_mask:
1914     case Intrinsic::riscv_vlsseg7_mask:
1915     case Intrinsic::riscv_vlsseg8_mask: {
1916       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1917                   /*IsStrided*/ true);
1918       return;
1919     }
1920     case Intrinsic::riscv_vloxseg2:
1921     case Intrinsic::riscv_vloxseg3:
1922     case Intrinsic::riscv_vloxseg4:
1923     case Intrinsic::riscv_vloxseg5:
1924     case Intrinsic::riscv_vloxseg6:
1925     case Intrinsic::riscv_vloxseg7:
1926     case Intrinsic::riscv_vloxseg8:
1927       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1928                    /*IsOrdered*/ true);
1929       return;
1930     case Intrinsic::riscv_vluxseg2:
1931     case Intrinsic::riscv_vluxseg3:
1932     case Intrinsic::riscv_vluxseg4:
1933     case Intrinsic::riscv_vluxseg5:
1934     case Intrinsic::riscv_vluxseg6:
1935     case Intrinsic::riscv_vluxseg7:
1936     case Intrinsic::riscv_vluxseg8:
1937       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1938                    /*IsOrdered*/ false);
1939       return;
1940     case Intrinsic::riscv_vloxseg2_mask:
1941     case Intrinsic::riscv_vloxseg3_mask:
1942     case Intrinsic::riscv_vloxseg4_mask:
1943     case Intrinsic::riscv_vloxseg5_mask:
1944     case Intrinsic::riscv_vloxseg6_mask:
1945     case Intrinsic::riscv_vloxseg7_mask:
1946     case Intrinsic::riscv_vloxseg8_mask:
1947       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1948                    /*IsOrdered*/ true);
1949       return;
1950     case Intrinsic::riscv_vluxseg2_mask:
1951     case Intrinsic::riscv_vluxseg3_mask:
1952     case Intrinsic::riscv_vluxseg4_mask:
1953     case Intrinsic::riscv_vluxseg5_mask:
1954     case Intrinsic::riscv_vluxseg6_mask:
1955     case Intrinsic::riscv_vluxseg7_mask:
1956     case Intrinsic::riscv_vluxseg8_mask:
1957       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1958                    /*IsOrdered*/ false);
1959       return;
1960     case Intrinsic::riscv_vlseg8ff:
1961     case Intrinsic::riscv_vlseg7ff:
1962     case Intrinsic::riscv_vlseg6ff:
1963     case Intrinsic::riscv_vlseg5ff:
1964     case Intrinsic::riscv_vlseg4ff:
1965     case Intrinsic::riscv_vlseg3ff:
1966     case Intrinsic::riscv_vlseg2ff: {
1967       selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
1968       return;
1969     }
1970     case Intrinsic::riscv_vlseg8ff_mask:
1971     case Intrinsic::riscv_vlseg7ff_mask:
1972     case Intrinsic::riscv_vlseg6ff_mask:
1973     case Intrinsic::riscv_vlseg5ff_mask:
1974     case Intrinsic::riscv_vlseg4ff_mask:
1975     case Intrinsic::riscv_vlseg3ff_mask:
1976     case Intrinsic::riscv_vlseg2ff_mask: {
1977       selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
1978       return;
1979     }
1980     case Intrinsic::riscv_vloxei:
1981     case Intrinsic::riscv_vloxei_mask:
1982     case Intrinsic::riscv_vluxei:
1983     case Intrinsic::riscv_vluxei_mask: {
1984       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1985                       IntNo == Intrinsic::riscv_vluxei_mask;
1986       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1987                        IntNo == Intrinsic::riscv_vloxei_mask;
1988
1989       MVT VT = Node->getSimpleValueType(0);
1990       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1991
1992       unsigned CurOp = 2;
1993       SmallVector<SDValue, 8> Operands;
1994       Operands.push_back(Node->getOperand(CurOp++));
1995
1996       MVT IndexVT;
1997       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1998                                  /*IsStridedOrIndexed*/ true, Operands,
1999                                  /*IsLoad=*/true, &IndexVT);
2000
2001       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2002              "Element count mismatch");
2003
2004       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2005       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2006       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2007       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2008         report_fatal_error("The V extension does not support EEW=64 for index "
2009                            "values when XLEN=32");
2010       }
2011       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2012           IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2013           static_cast<unsigned>(IndexLMUL));
2014       MachineSDNode *Load =
2015           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2016
2017       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2018         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2019
2020       ReplaceNode(Node, Load);
2021       return;
2022     }
2023     case Intrinsic::riscv_vlm:
2024     case Intrinsic::riscv_vle:
2025     case Intrinsic::riscv_vle_mask:
2026     case Intrinsic::riscv_vlse:
2027     case Intrinsic::riscv_vlse_mask: {
2028       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2029                       IntNo == Intrinsic::riscv_vlse_mask;
2030       bool IsStrided =
2031           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2032
2033       MVT VT = Node->getSimpleValueType(0);
2034       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2035
2036       // The riscv_vlm intrinsic are always tail agnostic and no passthru
2037       // operand at the IR level.  In pseudos, they have both policy and
2038       // passthru operand. The passthru operand is needed to track the
2039       // "tail undefined" state, and the policy is there just for
2040       // for consistency - it will always be "don't care" for the
2041       // unmasked form.
2042       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2043       unsigned CurOp = 2;
2044       SmallVector<SDValue, 8> Operands;
2045       if (HasPassthruOperand)
2046         Operands.push_back(Node->getOperand(CurOp++));
2047       else {
2048         // We eagerly lower to implicit_def (instead of undef), as we
2049         // otherwise fail to select nodes such as: nxv1i1 = undef
2050         SDNode *Passthru =
2051           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2052         Operands.push_back(SDValue(Passthru, 0));
2053       }
2054       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2055                                  Operands, /*IsLoad=*/true);
2056
2057       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2058       const RISCV::VLEPseudo *P =
2059           RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2060                               static_cast<unsigned>(LMUL));
2061       MachineSDNode *Load =
2062           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2063
2064       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2065         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2066
2067       ReplaceNode(Node, Load);
2068       return;
2069     }
2070     case Intrinsic::riscv_vleff:
2071     case Intrinsic::riscv_vleff_mask: {
2072       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2073
2074       MVT VT = Node->getSimpleValueType(0);
2075       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2076
2077       unsigned CurOp = 2;
2078       SmallVector<SDValue, 7> Operands;
2079       Operands.push_back(Node->getOperand(CurOp++));
2080       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2081                                  /*IsStridedOrIndexed*/ false, Operands,
2082                                  /*IsLoad=*/true);
2083
2084       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2085       const RISCV::VLEPseudo *P =
2086           RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2087                               Log2SEW, static_cast<unsigned>(LMUL));
2088       MachineSDNode *Load = CurDAG->getMachineNode(
2089           P->Pseudo, DL, Node->getVTList(), Operands);
2090       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2091         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2092
2093       ReplaceNode(Node, Load);
2094       return;
2095     }
2096     }
2097     break;
2098   }
2099   case ISD::INTRINSIC_VOID: {
2100     unsigned IntNo = Node->getConstantOperandVal(1);
2101     switch (IntNo) {
2102     case Intrinsic::riscv_vsseg2:
2103     case Intrinsic::riscv_vsseg3:
2104     case Intrinsic::riscv_vsseg4:
2105     case Intrinsic::riscv_vsseg5:
2106     case Intrinsic::riscv_vsseg6:
2107     case Intrinsic::riscv_vsseg7:
2108     case Intrinsic::riscv_vsseg8: {
2109       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2110                   /*IsStrided*/ false);
2111       return;
2112     }
2113     case Intrinsic::riscv_vsseg2_mask:
2114     case Intrinsic::riscv_vsseg3_mask:
2115     case Intrinsic::riscv_vsseg4_mask:
2116     case Intrinsic::riscv_vsseg5_mask:
2117     case Intrinsic::riscv_vsseg6_mask:
2118     case Intrinsic::riscv_vsseg7_mask:
2119     case Intrinsic::riscv_vsseg8_mask: {
2120       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2121                   /*IsStrided*/ false);
2122       return;
2123     }
2124     case Intrinsic::riscv_vssseg2:
2125     case Intrinsic::riscv_vssseg3:
2126     case Intrinsic::riscv_vssseg4:
2127     case Intrinsic::riscv_vssseg5:
2128     case Intrinsic::riscv_vssseg6:
2129     case Intrinsic::riscv_vssseg7:
2130     case Intrinsic::riscv_vssseg8: {
2131       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2132                   /*IsStrided*/ true);
2133       return;
2134     }
2135     case Intrinsic::riscv_vssseg2_mask:
2136     case Intrinsic::riscv_vssseg3_mask:
2137     case Intrinsic::riscv_vssseg4_mask:
2138     case Intrinsic::riscv_vssseg5_mask:
2139     case Intrinsic::riscv_vssseg6_mask:
2140     case Intrinsic::riscv_vssseg7_mask:
2141     case Intrinsic::riscv_vssseg8_mask: {
2142       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2143                   /*IsStrided*/ true);
2144       return;
2145     }
2146     case Intrinsic::riscv_vsoxseg2:
2147     case Intrinsic::riscv_vsoxseg3:
2148     case Intrinsic::riscv_vsoxseg4:
2149     case Intrinsic::riscv_vsoxseg5:
2150     case Intrinsic::riscv_vsoxseg6:
2151     case Intrinsic::riscv_vsoxseg7:
2152     case Intrinsic::riscv_vsoxseg8:
2153       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2154                    /*IsOrdered*/ true);
2155       return;
2156     case Intrinsic::riscv_vsuxseg2:
2157     case Intrinsic::riscv_vsuxseg3:
2158     case Intrinsic::riscv_vsuxseg4:
2159     case Intrinsic::riscv_vsuxseg5:
2160     case Intrinsic::riscv_vsuxseg6:
2161     case Intrinsic::riscv_vsuxseg7:
2162     case Intrinsic::riscv_vsuxseg8:
2163       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2164                    /*IsOrdered*/ false);
2165       return;
2166     case Intrinsic::riscv_vsoxseg2_mask:
2167     case Intrinsic::riscv_vsoxseg3_mask:
2168     case Intrinsic::riscv_vsoxseg4_mask:
2169     case Intrinsic::riscv_vsoxseg5_mask:
2170     case Intrinsic::riscv_vsoxseg6_mask:
2171     case Intrinsic::riscv_vsoxseg7_mask:
2172     case Intrinsic::riscv_vsoxseg8_mask:
2173       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2174                    /*IsOrdered*/ true);
2175       return;
2176     case Intrinsic::riscv_vsuxseg2_mask:
2177     case Intrinsic::riscv_vsuxseg3_mask:
2178     case Intrinsic::riscv_vsuxseg4_mask:
2179     case Intrinsic::riscv_vsuxseg5_mask:
2180     case Intrinsic::riscv_vsuxseg6_mask:
2181     case Intrinsic::riscv_vsuxseg7_mask:
2182     case Intrinsic::riscv_vsuxseg8_mask:
2183       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2184                    /*IsOrdered*/ false);
2185       return;
2186     case Intrinsic::riscv_vsoxei:
2187     case Intrinsic::riscv_vsoxei_mask:
2188     case Intrinsic::riscv_vsuxei:
2189     case Intrinsic::riscv_vsuxei_mask: {
2190       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2191                       IntNo == Intrinsic::riscv_vsuxei_mask;
2192       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2193                        IntNo == Intrinsic::riscv_vsoxei_mask;
2194
2195       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2196       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2197
2198       unsigned CurOp = 2;
2199       SmallVector<SDValue, 8> Operands;
2200       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2201
2202       MVT IndexVT;
2203       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2204                                  /*IsStridedOrIndexed*/ true, Operands,
2205                                  /*IsLoad=*/false, &IndexVT);
2206
2207       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2208              "Element count mismatch");
2209
2210       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2211       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2212       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2213       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2214         report_fatal_error("The V extension does not support EEW=64 for index "
2215                            "values when XLEN=32");
2216       }
2217       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2218           IsMasked, IsOrdered, IndexLog2EEW,
2219           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2220       MachineSDNode *Store =
2221           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2222
2223       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2224         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2225
2226       ReplaceNode(Node, Store);
2227       return;
2228     }
2229     case Intrinsic::riscv_vsm:
2230     case Intrinsic::riscv_vse:
2231     case Intrinsic::riscv_vse_mask:
2232     case Intrinsic::riscv_vsse:
2233     case Intrinsic::riscv_vsse_mask: {
2234       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2235                       IntNo == Intrinsic::riscv_vsse_mask;
2236       bool IsStrided =
2237           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2238
2239       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2240       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2241
2242       unsigned CurOp = 2;
2243       SmallVector<SDValue, 8> Operands;
2244       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2245
2246       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2247                                  Operands);
2248
2249       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2250       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2251           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2252       MachineSDNode *Store =
2253           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2254       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2255         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2256
2257       ReplaceNode(Node, Store);
2258       return;
2259     }
2260     case Intrinsic::riscv_sf_vc_x_se:
2261     case Intrinsic::riscv_sf_vc_i_se:
2262       selectSF_VC_X_SE(Node);
2263       return;
2264     }
2265     break;
2266   }
2267   case ISD::BITCAST: {
2268     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2269     // Just drop bitcasts between vectors if both are fixed or both are
2270     // scalable.
2271     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2272         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2273       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2274       CurDAG->RemoveDeadNode(Node);
2275       return;
2276     }
2277     break;
2278   }
2279   case ISD::INSERT_SUBVECTOR:
2280   case RISCVISD::TUPLE_INSERT: {
2281     SDValue V = Node->getOperand(0);
2282     SDValue SubV = Node->getOperand(1);
2283     SDLoc DL(SubV);
2284     auto Idx = Node->getConstantOperandVal(2);
2285     MVT SubVecVT = SubV.getSimpleValueType();
2286
2287     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2288     MVT SubVecContainerVT = SubVecVT;
2289     // Establish the correct scalable-vector types for any fixed-length type.
2290     if (SubVecVT.isFixedLengthVector()) {
2291       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2292       TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2293       [[maybe_unused]] bool ExactlyVecRegSized =
2294           Subtarget->expandVScale(SubVecVT.getSizeInBits())
2295               .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2296       assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2297                                .getKnownMinValue()));
2298       assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2299     }
2300     MVT ContainerVT = VT;
2301     if (VT.isFixedLengthVector())
2302       ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2303
2304     const auto *TRI = Subtarget->getRegisterInfo();
2305     unsigned SubRegIdx;
2306     std::tie(SubRegIdx, Idx) =
2307         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2308             ContainerVT, SubVecContainerVT, Idx, TRI);
2309
2310     // If the Idx hasn't been completely eliminated then this is a subvector
2311     // insert which doesn't naturally align to a vector register. These must
2312     // be handled using instructions to manipulate the vector registers.
2313     if (Idx != 0)
2314       break;
2315
2316     RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2317     [[maybe_unused]] bool IsSubVecPartReg =
2318         SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2319         SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2320         SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2321     assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2322             V.isUndef()) &&
2323            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2324            "the subvector is smaller than a full-sized register");
2325
2326     // If we haven't set a SubRegIdx, then we must be going between
2327     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2328     if (SubRegIdx == RISCV::NoSubRegister) {
2329       unsigned InRegClassID =
2330           RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);
2331       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2332                  InRegClassID &&
2333              "Unexpected subvector extraction");
2334       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2335       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2336                                                DL, VT, SubV, RC);
2337       ReplaceNode(Node, NewNode);
2338       return;
2339     }
2340
2341     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2342     ReplaceNode(Node, Insert.getNode());
2343     return;
2344   }
2345   case ISD::EXTRACT_SUBVECTOR:
2346   case RISCVISD::TUPLE_EXTRACT: {
2347     SDValue V = Node->getOperand(0);
2348     auto Idx = Node->getConstantOperandVal(1);
2349     MVT InVT = V.getSimpleValueType();
2350     SDLoc DL(V);
2351
2352     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2353     MVT SubVecContainerVT = VT;
2354     // Establish the correct scalable-vector types for any fixed-length type.
2355     if (VT.isFixedLengthVector()) {
2356       assert(Idx == 0);
2357       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2358     }
2359     if (InVT.isFixedLengthVector())
2360       InVT = TLI.getContainerForFixedLengthVector(InVT);
2361
2362     const auto *TRI = Subtarget->getRegisterInfo();
2363     unsigned SubRegIdx;
2364     std::tie(SubRegIdx, Idx) =
2365         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2366             InVT, SubVecContainerVT, Idx, TRI);
2367
2368     // If the Idx hasn't been completely eliminated then this is a subvector
2369     // extract which doesn't naturally align to a vector register. These must
2370     // be handled using instructions to manipulate the vector registers.
2371     if (Idx != 0)
2372       break;
2373
2374     // If we haven't set a SubRegIdx, then we must be going between
2375     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2376     if (SubRegIdx == RISCV::NoSubRegister) {
2377       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2378       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2379                  InRegClassID &&
2380              "Unexpected subvector extraction");
2381       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2382       SDNode *NewNode =
2383           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2384       ReplaceNode(Node, NewNode);
2385       return;
2386     }
2387
2388     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2389     ReplaceNode(Node, Extract.getNode());
2390     return;
2391   }
2392   case RISCVISD::VMV_S_X_VL:
2393   case RISCVISD::VFMV_S_F_VL:
2394   case RISCVISD::VMV_V_X_VL:
2395   case RISCVISD::VFMV_V_F_VL: {
2396     // Try to match splat of a scalar load to a strided load with stride of x0.
2397     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2398                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2399     if (!Node->getOperand(0).isUndef())
2400       break;
2401     SDValue Src = Node->getOperand(1);
2402     auto *Ld = dyn_cast<LoadSDNode>(Src);
2403     // Can't fold load update node because the second
2404     // output is used so that load update node can't be removed.
2405     if (!Ld || Ld->isIndexed())
2406       break;
2407     EVT MemVT = Ld->getMemoryVT();
2408     // The memory VT should be the same size as the element type.
2409     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2410       break;
2411     if (!IsProfitableToFold(Src, Node, Node) ||
2412         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2413       break;
2414
2415     SDValue VL;
2416     if (IsScalarMove) {
2417       // We could deal with more VL if we update the VSETVLI insert pass to
2418       // avoid introducing more VSETVLI.
2419       if (!isOneConstant(Node->getOperand(2)))
2420         break;
2421       selectVLOp(Node->getOperand(2), VL);
2422     } else
2423       selectVLOp(Node->getOperand(2), VL);
2424
2425     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2426     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2427
2428     // If VL=1, then we don't need to do a strided load and can just do a
2429     // regular load.
2430     bool IsStrided = !isOneConstant(VL);
2431
2432     // Only do a strided load if we have optimized zero-stride vector load.
2433     if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2434       break;
2435
2436     SmallVector<SDValue> Operands = {
2437         SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2438         Ld->getBasePtr()};
2439     if (IsStrided)
2440       Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2441     uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
2442     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2443     Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2444
2445     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2446     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2447         /*IsMasked*/ false, IsStrided, /*FF*/ false,
2448         Log2SEW, static_cast<unsigned>(LMUL));
2449     MachineSDNode *Load =
2450         CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2451     // Update the chain.
2452     ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2453     // Record the mem-refs
2454     CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2455     // Replace the splat with the vlse.
2456     ReplaceNode(Node, Load);
2457     return;
2458   }
2459   case ISD::PREFETCH:
2460     unsigned Locality = Node->getConstantOperandVal(3);
2461     if (Locality > 2)
2462       break;
2463
2464     if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2465       MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2466       MMO->setFlags(MachineMemOperand::MONonTemporal);
2467
2468       int NontemporalLevel = 0;
2469       switch (Locality) {
2470       case 0:
2471         NontemporalLevel = 3; // NTL.ALL
2472         break;
2473       case 1:
2474         NontemporalLevel = 1; // NTL.PALL
2475         break;
2476       case 2:
2477         NontemporalLevel = 0; // NTL.P1
2478         break;
2479       default:
2480         llvm_unreachable("unexpected locality value.");
2481       }
2482
2483       if (NontemporalLevel & 0b1)
2484         MMO->setFlags(MONontemporalBit0);
2485       if (NontemporalLevel & 0b10)
2486         MMO->setFlags(MONontemporalBit1);
2487     }
2488     break;
2489   }
2490
2491   // Select the default instruction.
2492   SelectCode(Node);
2493 }
2494
2495 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2496     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2497     std::vector<SDValue> &OutOps) {
2498   // Always produce a register and immediate operand, as expected by
2499   // RISCVAsmPrinter::PrintAsmMemoryOperand.
2500   switch (ConstraintID) {
2501   case InlineAsm::ConstraintCode::o:
2502   case InlineAsm::ConstraintCode::m: {
2503     SDValue Op0, Op1;
2504     [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2505     assert(Found && "SelectAddrRegImm should always succeed");
2506     OutOps.push_back(Op0);
2507     OutOps.push_back(Op1);
2508     return false;
2509   }
2510   case InlineAsm::ConstraintCode::A:
2511     OutOps.push_back(Op);
2512     OutOps.push_back(
2513         CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2514     return false;
2515   default:
2516     report_fatal_error("Unexpected asm memory constraint " +
2517                        InlineAsm::getMemConstraintName(ConstraintID));
2518   }
2519
2520   return true;
2521 }
2522
2523 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2524                                              SDValue &Offset) {
2525   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2526     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2527     Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2528     return true;
2529   }
2530
2531   return false;
2532 }
2533
2534 // Fold constant addresses.
2535 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2536                                const MVT VT, const RISCVSubtarget *Subtarget,
2537                                SDValue Addr, SDValue &Base, SDValue &Offset,
2538                                bool IsPrefetch = false,
2539                                bool IsRV32Zdinx = false) {
2540   if (!isa<ConstantSDNode>(Addr))
2541     return false;
2542
2543   int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2544
2545   // If the constant is a simm12, we can fold the whole constant and use X0 as
2546   // the base. If the constant can be materialized with LUI+simm12, use LUI as
2547   // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2548   int64_t Lo12 = SignExtend64<12>(CVal);
2549   int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2550   if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2551     if (IsPrefetch && (Lo12 & 0b11111) != 0)
2552       return false;
2553     if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2554       return false;
2555
2556     if (Hi) {
2557       int64_t Hi20 = (Hi >> 12) & 0xfffff;
2558       Base = SDValue(
2559           CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2560                                  CurDAG->getTargetConstant(Hi20, DL, VT)),
2561           0);
2562     } else {
2563       Base = CurDAG->getRegister(RISCV::X0, VT);
2564     }
2565     Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2566     return true;
2567   }
2568
2569   // Ask how constant materialization would handle this constant.
2570   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2571
2572   // If the last instruction would be an ADDI, we can fold its immediate and
2573   // emit the rest of the sequence as the base.
2574   if (Seq.back().getOpcode() != RISCV::ADDI)
2575     return false;
2576   Lo12 = Seq.back().getImm();
2577   if (IsPrefetch && (Lo12 & 0b11111) != 0)
2578     return false;
2579   if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2580     return false;
2581
2582   // Drop the last instruction.
2583   Seq.pop_back();
2584   assert(!Seq.empty() && "Expected more instructions in sequence");
2585
2586   Base = selectImmSeq(CurDAG, DL, VT, Seq);
2587   Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2588   return true;
2589 }
2590
2591 // Is this ADD instruction only used as the base pointer of scalar loads and
2592 // stores?
2593 static bool isWorthFoldingAdd(SDValue Add) {
2594   for (auto *User : Add->users()) {
2595     if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2596         User->getOpcode() != ISD::ATOMIC_LOAD &&
2597         User->getOpcode() != ISD::ATOMIC_STORE)
2598       return false;
2599     EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2600     if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2601         VT != MVT::f64)
2602       return false;
2603     // Don't allow stores of the value. It must be used as the address.
2604     if (User->getOpcode() == ISD::STORE &&
2605         cast<StoreSDNode>(User)->getValue() == Add)
2606       return false;
2607     if (User->getOpcode() == ISD::ATOMIC_STORE &&
2608         cast<AtomicSDNode>(User)->getVal() == Add)
2609       return false;
2610   }
2611
2612   return true;
2613 }
2614
2615 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2616                                               unsigned MaxShiftAmount,
2617                                               SDValue &Base, SDValue &Index,
2618                                               SDValue &Scale) {
2619   EVT VT = Addr.getSimpleValueType();
2620   auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2621                                               SDValue &Shift) {
2622     uint64_t ShiftAmt = 0;
2623     Index = N;
2624
2625     if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2626       // Only match shifts by a value in range [0, MaxShiftAmount].
2627       if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2628         Index = N.getOperand(0);
2629         ShiftAmt = N.getConstantOperandVal(1);
2630       }
2631     }
2632
2633     Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2634     return ShiftAmt != 0;
2635   };
2636
2637   if (Addr.getOpcode() == ISD::ADD) {
2638     if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2639       SDValue AddrB = Addr.getOperand(0);
2640       if (AddrB.getOpcode() == ISD::ADD &&
2641           UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2642           !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2643           isInt<12>(C1->getSExtValue())) {
2644         // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2645         SDValue C1Val =
2646             CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2647         Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2648                                               AddrB.getOperand(1), C1Val),
2649                        0);
2650         return true;
2651       }
2652     } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2653       Base = Addr.getOperand(1);
2654       return true;
2655     } else {
2656       UnwrapShl(Addr.getOperand(1), Index, Scale);
2657       Base = Addr.getOperand(0);
2658       return true;
2659     }
2660   } else if (UnwrapShl(Addr, Index, Scale)) {
2661     EVT VT = Addr.getValueType();
2662     Base = CurDAG->getRegister(RISCV::X0, VT);
2663     return true;
2664   }
2665
2666   return false;
2667 }
2668
2669 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2670                                          SDValue &Offset, bool IsRV32Zdinx) {
2671   if (SelectAddrFrameIndex(Addr, Base, Offset))
2672     return true;
2673
2674   SDLoc DL(Addr);
2675   MVT VT = Addr.getSimpleValueType();
2676
2677   if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2678     // If this is non RV32Zdinx we can always fold.
2679     if (!IsRV32Zdinx) {
2680       Base = Addr.getOperand(0);
2681       Offset = Addr.getOperand(1);
2682       return true;
2683     }
2684
2685     // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4
2686     // to the offset when we expand in RISCVExpandPseudoInsts.
2687     if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
2688       const DataLayout &DL = CurDAG->getDataLayout();
2689       Align Alignment = commonAlignment(
2690           GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2691       if (Alignment > 4) {
2692         Base = Addr.getOperand(0);
2693         Offset = Addr.getOperand(1);
2694         return true;
2695       }
2696     }
2697     if (auto *CP = dyn_cast<ConstantPoolSDNode>(Addr.getOperand(1))) {
2698       Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
2699       if (Alignment > 4) {
2700         Base = Addr.getOperand(0);
2701         Offset = Addr.getOperand(1);
2702         return true;
2703       }
2704     }
2705   }
2706
2707   int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0;
2708   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2709     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2710     if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2711       Base = Addr.getOperand(0);
2712       if (Base.getOpcode() == RISCVISD::ADD_LO) {
2713         SDValue LoOperand = Base.getOperand(1);
2714         if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2715           // If the Lo in (ADD_LO hi, lo) is a global variable's address
2716           // (its low part, really), then we can rely on the alignment of that
2717           // variable to provide a margin of safety before low part can overflow
2718           // the 12 bits of the load/store offset. Check if CVal falls within
2719           // that margin; if so (low part + CVal) can't overflow.
2720           const DataLayout &DL = CurDAG->getDataLayout();
2721           Align Alignment = commonAlignment(
2722               GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2723           if ((CVal == 0 || Alignment > CVal) &&
2724               (!IsRV32Zdinx || commonAlignment(Alignment, CVal) > 4)) {
2725             int64_t CombinedOffset = CVal + GA->getOffset();
2726             Base = Base.getOperand(0);
2727             Offset = CurDAG->getTargetGlobalAddress(
2728                 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2729                 CombinedOffset, GA->getTargetFlags());
2730             return true;
2731           }
2732         }
2733       }
2734
2735       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2736         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2737       Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2738       return true;
2739     }
2740   }
2741
2742   // Handle ADD with large immediates.
2743   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2744     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2745     assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2746            "simm12 not already handled?");
2747
2748     // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2749     // an ADDI for part of the offset and fold the rest into the load/store.
2750     // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2751     if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) {
2752       int64_t Adj = CVal < 0 ? -2048 : 2047;
2753       Base = SDValue(
2754           CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2755                                  CurDAG->getSignedTargetConstant(Adj, DL, VT)),
2756           0);
2757       Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
2758       return true;
2759     }
2760
2761     // For larger immediates, we might be able to save one instruction from
2762     // constant materialization by folding the Lo12 bits of the immediate into
2763     // the address. We should only do this if the ADD is only used by loads and
2764     // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2765     // separately with the full materialized immediate creating extra
2766     // instructions.
2767     if (isWorthFoldingAdd(Addr) &&
2768         selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2769                            Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) {
2770       // Insert an ADD instruction with the materialized Hi52 bits.
2771       Base = SDValue(
2772           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2773           0);
2774       return true;
2775     }
2776   }
2777
2778   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2779                          /*IsPrefetch=*/false, RV32ZdinxRange))
2780     return true;
2781
2782   Base = Addr;
2783   Offset = CurDAG->getTargetConstant(0, DL, VT);
2784   return true;
2785 }
2786
2787 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2788 /// Offset should be all zeros.
2789 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2790                                                  SDValue &Offset) {
2791   if (SelectAddrFrameIndex(Addr, Base, Offset))
2792     return true;
2793
2794   SDLoc DL(Addr);
2795   MVT VT = Addr.getSimpleValueType();
2796
2797   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2798     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2799     if (isInt<12>(CVal)) {
2800       Base = Addr.getOperand(0);
2801
2802       // Early-out if not a valid offset.
2803       if ((CVal & 0b11111) != 0) {
2804         Base = Addr;
2805         Offset = CurDAG->getTargetConstant(0, DL, VT);
2806         return true;
2807       }
2808
2809       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2810         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2811       Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2812       return true;
2813     }
2814   }
2815
2816   // Handle ADD with large immediates.
2817   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2818     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2819     assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2820            "simm12 not already handled?");
2821
2822     // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2823     // one instruction by folding adjustment (-2048 or 2016) into the address.
2824     if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2825       int64_t Adj = CVal < 0 ? -2048 : 2016;
2826       int64_t AdjustedOffset = CVal - Adj;
2827       Base =
2828           SDValue(CurDAG->getMachineNode(
2829                       RISCV::ADDI, DL, VT, Addr.getOperand(0),
2830                       CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
2831                   0);
2832       Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
2833       return true;
2834     }
2835
2836     if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2837                            Offset, /*IsPrefetch=*/true)) {
2838       // Insert an ADD instruction with the materialized Hi52 bits.
2839       Base = SDValue(
2840           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2841           0);
2842       return true;
2843     }
2844   }
2845
2846   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2847                          /*IsPrefetch=*/true))
2848     return true;
2849
2850   Base = Addr;
2851   Offset = CurDAG->getTargetConstant(0, DL, VT);
2852   return true;
2853 }
2854
2855 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
2856                                          SDValue &Offset) {
2857   if (Addr.getOpcode() != ISD::ADD)
2858     return false;
2859
2860   if (isa<ConstantSDNode>(Addr.getOperand(1)))
2861     return false;
2862
2863   Base = Addr.getOperand(1);
2864   Offset = Addr.getOperand(0);
2865   return true;
2866 }
2867
2868 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
2869                                         SDValue &ShAmt) {
2870   ShAmt = N;
2871
2872   // Peek through zext.
2873   if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2874     ShAmt = ShAmt.getOperand(0);
2875
2876   // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2877   // amount. If there is an AND on the shift amount, we can bypass it if it
2878   // doesn't affect any of those bits.
2879   if (ShAmt.getOpcode() == ISD::AND &&
2880       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2881     const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2882
2883     // Since the max shift amount is a power of 2 we can subtract 1 to make a
2884     // mask that covers the bits needed to represent all shift amounts.
2885     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2886     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2887
2888     if (ShMask.isSubsetOf(AndMask)) {
2889       ShAmt = ShAmt.getOperand(0);
2890     } else {
2891       // SimplifyDemandedBits may have optimized the mask so try restoring any
2892       // bits that are known zero.
2893       KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2894       if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2895         return true;
2896       ShAmt = ShAmt.getOperand(0);
2897     }
2898   }
2899
2900   if (ShAmt.getOpcode() == ISD::ADD &&
2901       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2902     uint64_t Imm = ShAmt.getConstantOperandVal(1);
2903     // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2904     // to avoid the ADD.
2905     if (Imm != 0 && Imm % ShiftWidth == 0) {
2906       ShAmt = ShAmt.getOperand(0);
2907       return true;
2908     }
2909   } else if (ShAmt.getOpcode() == ISD::SUB &&
2910              isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2911     uint64_t Imm = ShAmt.getConstantOperandVal(0);
2912     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2913     // generate a NEG instead of a SUB of a constant.
2914     if (Imm != 0 && Imm % ShiftWidth == 0) {
2915       SDLoc DL(ShAmt);
2916       EVT VT = ShAmt.getValueType();
2917       SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2918       unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2919       MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2920                                                   ShAmt.getOperand(1));
2921       ShAmt = SDValue(Neg, 0);
2922       return true;
2923     }
2924     // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2925     // to generate a NOT instead of a SUB of a constant.
2926     if (Imm % ShiftWidth == ShiftWidth - 1) {
2927       SDLoc DL(ShAmt);
2928       EVT VT = ShAmt.getValueType();
2929       MachineSDNode *Not = CurDAG->getMachineNode(
2930           RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2931           CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
2932       ShAmt = SDValue(Not, 0);
2933       return true;
2934     }
2935   }
2936
2937   return true;
2938 }
2939
2940 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2941 /// check for equality with 0. This function emits instructions that convert the
2942 /// seteq/setne into something that can be compared with 0.
2943 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2944 /// ISD::SETNE).
2945 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
2946                                     SDValue &Val) {
2947   assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2948          "Unexpected condition code!");
2949
2950   // We're looking for a setcc.
2951   if (N->getOpcode() != ISD::SETCC)
2952     return false;
2953
2954   // Must be an equality comparison.
2955   ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2956   if (CCVal != ExpectedCCVal)
2957     return false;
2958
2959   SDValue LHS = N->getOperand(0);
2960   SDValue RHS = N->getOperand(1);
2961
2962   if (!LHS.getValueType().isScalarInteger())
2963     return false;
2964
2965   // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2966   if (isNullConstant(RHS)) {
2967     Val = LHS;
2968     return true;
2969   }
2970
2971   SDLoc DL(N);
2972
2973   if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2974     int64_t CVal = C->getSExtValue();
2975     // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2976     // non-zero otherwise.
2977     if (CVal == -2048) {
2978       Val = SDValue(
2979           CurDAG->getMachineNode(
2980               RISCV::XORI, DL, N->getValueType(0), LHS,
2981               CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
2982           0);
2983       return true;
2984     }
2985     // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2986     // LHS is equal to the RHS and non-zero otherwise.
2987     if (isInt<12>(CVal) || CVal == 2048) {
2988       Val = SDValue(
2989           CurDAG->getMachineNode(
2990               RISCV::ADDI, DL, N->getValueType(0), LHS,
2991               CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
2992           0);
2993       return true;
2994     }
2995     if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
2996       Val = SDValue(
2997           CurDAG->getMachineNode(
2998               RISCV::BINVI, DL, N->getValueType(0), LHS,
2999               CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3000           0);
3001       return true;
3002     }
3003   }
3004
3005   // If nothing else we can XOR the LHS and RHS to produce zero if they are
3006   // equal and a non-zero value if they aren't.
3007   Val = SDValue(
3008       CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3009   return true;
3010 }
3011
3012 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3013   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3014       cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3015     Val = N.getOperand(0);
3016     return true;
3017   }
3018
3019   auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3020     if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3021       return N;
3022
3023     SDValue N0 = N.getOperand(0);
3024     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3025         N.getConstantOperandVal(1) == ShiftAmt &&
3026         N0.getConstantOperandVal(1) == ShiftAmt)
3027       return N0.getOperand(0);
3028
3029     return N;
3030   };
3031
3032   MVT VT = N.getSimpleValueType();
3033   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3034     Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3035     return true;
3036   }
3037
3038   return false;
3039 }
3040
3041 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3042   if (N.getOpcode() == ISD::AND) {
3043     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3044     if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3045       Val = N.getOperand(0);
3046       return true;
3047     }
3048   }
3049   MVT VT = N.getSimpleValueType();
3050   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3051   if (CurDAG->MaskedValueIsZero(N, Mask)) {
3052     Val = N;
3053     return true;
3054   }
3055
3056   return false;
3057 }
3058
3059 /// Look for various patterns that can be done with a SHL that can be folded
3060 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3061 /// SHXADD we are trying to match.
3062 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
3063                                        SDValue &Val) {
3064   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3065     SDValue N0 = N.getOperand(0);
3066
3067     if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3068         (LeftShift || N0.getOpcode() == ISD::SRL) &&
3069         isa<ConstantSDNode>(N0.getOperand(1))) {
3070       uint64_t Mask = N.getConstantOperandVal(1);
3071       unsigned C2 = N0.getConstantOperandVal(1);
3072
3073       unsigned XLen = Subtarget->getXLen();
3074       if (LeftShift)
3075         Mask &= maskTrailingZeros<uint64_t>(C2);
3076       else
3077         Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3078
3079       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3080       // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3081       // followed by a SHXADD with c3 for the X amount.
3082       if (isShiftedMask_64(Mask)) {
3083         unsigned Leading = XLen - llvm::bit_width(Mask);
3084         unsigned Trailing = llvm::countr_zero(Mask);
3085         if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3086           SDLoc DL(N);
3087           EVT VT = N.getValueType();
3088           Val = SDValue(CurDAG->getMachineNode(
3089                             RISCV::SRLI, DL, VT, N0.getOperand(0),
3090                             CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3091                         0);
3092           return true;
3093         }
3094         // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3095         // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3096         // followed by a SHXADD using c3 for the X amount.
3097         if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3098           SDLoc DL(N);
3099           EVT VT = N.getValueType();
3100           Val = SDValue(
3101               CurDAG->getMachineNode(
3102                   RISCV::SRLI, DL, VT, N0.getOperand(0),
3103                   CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3104               0);
3105           return true;
3106         }
3107       }
3108     } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3109                isa<ConstantSDNode>(N0.getOperand(1))) {
3110       uint64_t Mask = N.getConstantOperandVal(1);
3111       unsigned C2 = N0.getConstantOperandVal(1);
3112
3113       // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3114       // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3115       // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3116       // the X amount.
3117       if (isShiftedMask_64(Mask)) {
3118         unsigned XLen = Subtarget->getXLen();
3119         unsigned Leading = XLen - llvm::bit_width(Mask);
3120         unsigned Trailing = llvm::countr_zero(Mask);
3121         if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3122           SDLoc DL(N);
3123           EVT VT = N.getValueType();
3124           Val = SDValue(CurDAG->getMachineNode(
3125                             RISCV::SRAI, DL, VT, N0.getOperand(0),
3126                             CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3127                         0);
3128           Val = SDValue(CurDAG->getMachineNode(
3129                             RISCV::SRLI, DL, VT, Val,
3130                             CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3131                         0);
3132           return true;
3133         }
3134       }
3135     }
3136   } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3137              (LeftShift || N.getOpcode() == ISD::SRL) &&
3138              isa<ConstantSDNode>(N.getOperand(1))) {
3139     SDValue N0 = N.getOperand(0);
3140     if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3141         isa<ConstantSDNode>(N0.getOperand(1))) {
3142       uint64_t Mask = N0.getConstantOperandVal(1);
3143       if (isShiftedMask_64(Mask)) {
3144         unsigned C1 = N.getConstantOperandVal(1);
3145         unsigned XLen = Subtarget->getXLen();
3146         unsigned Leading = XLen - llvm::bit_width(Mask);
3147         unsigned Trailing = llvm::countr_zero(Mask);
3148         // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3149         // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3150         if (LeftShift && Leading == 32 && Trailing > 0 &&
3151             (Trailing + C1) == ShAmt) {
3152           SDLoc DL(N);
3153           EVT VT = N.getValueType();
3154           Val = SDValue(CurDAG->getMachineNode(
3155                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
3156                             CurDAG->getTargetConstant(Trailing, DL, VT)),
3157                         0);
3158           return true;
3159         }
3160         // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3161         // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3162         if (!LeftShift && Leading == 32 && Trailing > C1 &&
3163             (Trailing - C1) == ShAmt) {
3164           SDLoc DL(N);
3165           EVT VT = N.getValueType();
3166           Val = SDValue(CurDAG->getMachineNode(
3167                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
3168                             CurDAG->getTargetConstant(Trailing, DL, VT)),
3169                         0);
3170           return true;
3171         }
3172       }
3173     }
3174   }
3175
3176   return false;
3177 }
3178
3179 /// Look for various patterns that can be done with a SHL that can be folded
3180 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3181 /// SHXADD_UW we are trying to match.
3182 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3183                                           SDValue &Val) {
3184   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3185       N.hasOneUse()) {
3186     SDValue N0 = N.getOperand(0);
3187     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3188         N0.hasOneUse()) {
3189       uint64_t Mask = N.getConstantOperandVal(1);
3190       unsigned C2 = N0.getConstantOperandVal(1);
3191
3192       Mask &= maskTrailingZeros<uint64_t>(C2);
3193
3194       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3195       // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3196       // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3197       if (isShiftedMask_64(Mask)) {
3198         unsigned Leading = llvm::countl_zero(Mask);
3199         unsigned Trailing = llvm::countr_zero(Mask);
3200         if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3201           SDLoc DL(N);
3202           EVT VT = N.getValueType();
3203           Val = SDValue(CurDAG->getMachineNode(
3204                             RISCV::SLLI, DL, VT, N0.getOperand(0),
3205                             CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3206                         0);
3207           return true;
3208         }
3209       }
3210     }
3211   }
3212
3213   return false;
3214 }
3215
3216 bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
3217   if (!isa<ConstantSDNode>(N))
3218     return false;
3219   int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3220
3221   // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3222   if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3223     return false;
3224
3225   // Abandon this transform if the constant is needed elsewhere.
3226   for (const SDNode *U : N->users()) {
3227     if (!ISD::isBitwiseLogicOp(U->getOpcode()))
3228       return false;
3229   }
3230
3231   // For 64-bit constants, the instruction sequences get complex,
3232   // so we select inverted only if it's cheaper.
3233   if (!isInt<32>(Imm)) {
3234     int OrigImmCost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3235                                                  /*CompressionCost=*/true);
3236     int NegImmCost = RISCVMatInt::getIntMatCost(APInt(64, ~Imm), 64, *Subtarget,
3237                                                 /*CompressionCost=*/true);
3238     if (OrigImmCost <= NegImmCost)
3239       return false;
3240   }
3241
3242   Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3243   return true;
3244 }
3245
3246 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3247                                         unsigned Bits,
3248                                         const TargetInstrInfo *TII) {
3249   unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3250
3251   if (!MCOpcode)
3252     return false;
3253
3254   const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3255   const uint64_t TSFlags = MCID.TSFlags;
3256   if (!RISCVII::hasSEWOp(TSFlags))
3257     return false;
3258   assert(RISCVII::hasVLOp(TSFlags));
3259
3260   bool HasGlueOp = User->getGluedNode() != nullptr;
3261   unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3262   bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3263   bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3264   unsigned VLIdx =
3265       User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3266   const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3267
3268   if (UserOpNo == VLIdx)
3269     return false;
3270
3271   auto NumDemandedBits =
3272       RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3273   return NumDemandedBits && Bits >= *NumDemandedBits;
3274 }
3275
3276 // Return true if all users of this SDNode* only consume the lower \p Bits.
3277 // This can be used to form W instructions for add/sub/mul/shl even when the
3278 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3279 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
3280 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3281 // the add/sub/mul/shl to become non-W instructions. By checking the users we
3282 // may be able to use a W instruction and CSE with the other instruction if
3283 // this has happened. We could try to detect that the CSE opportunity exists
3284 // before doing this, but that would be more complicated.
3285 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3286                                         const unsigned Depth) const {
3287   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3288           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3289           Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3290           Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3291           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3292           isa<ConstantSDNode>(Node) || Depth != 0) &&
3293          "Unexpected opcode");
3294
3295   if (Depth >= SelectionDAG::MaxRecursionDepth)
3296     return false;
3297
3298   // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3299   // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3300   if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3301     return false;
3302
3303   for (SDUse &Use : Node->uses()) {
3304     SDNode *User = Use.getUser();
3305     // Users of this node should have already been instruction selected
3306     if (!User->isMachineOpcode())
3307       return false;
3308
3309     // TODO: Add more opcodes?
3310     switch (User->getMachineOpcode()) {
3311     default:
3312       if (vectorPseudoHasAllNBitUsers(User, Use.getOperandNo(), Bits, TII))
3313         break;
3314       return false;
3315     case RISCV::ADDW:
3316     case RISCV::ADDIW:
3317     case RISCV::SUBW:
3318     case RISCV::MULW:
3319     case RISCV::SLLW:
3320     case RISCV::SLLIW:
3321     case RISCV::SRAW:
3322     case RISCV::SRAIW:
3323     case RISCV::SRLW:
3324     case RISCV::SRLIW:
3325     case RISCV::DIVW:
3326     case RISCV::DIVUW:
3327     case RISCV::REMW:
3328     case RISCV::REMUW:
3329     case RISCV::ROLW:
3330     case RISCV::RORW:
3331     case RISCV::RORIW:
3332     case RISCV::CLZW:
3333     case RISCV::CTZW:
3334     case RISCV::CPOPW:
3335     case RISCV::SLLI_UW:
3336     case RISCV::FMV_W_X:
3337     case RISCV::FCVT_H_W:
3338     case RISCV::FCVT_H_W_INX:
3339     case RISCV::FCVT_H_WU:
3340     case RISCV::FCVT_H_WU_INX:
3341     case RISCV::FCVT_S_W:
3342     case RISCV::FCVT_S_W_INX:
3343     case RISCV::FCVT_S_WU:
3344     case RISCV::FCVT_S_WU_INX:
3345     case RISCV::FCVT_D_W:
3346     case RISCV::FCVT_D_W_INX:
3347     case RISCV::FCVT_D_WU:
3348     case RISCV::FCVT_D_WU_INX:
3349     case RISCV::TH_REVW:
3350     case RISCV::TH_SRRIW:
3351       if (Bits >= 32)
3352         break;
3353       return false;
3354     case RISCV::SLL:
3355     case RISCV::SRA:
3356     case RISCV::SRL:
3357     case RISCV::ROL:
3358     case RISCV::ROR:
3359     case RISCV::BSET:
3360     case RISCV::BCLR:
3361     case RISCV::BINV:
3362       // Shift amount operands only use log2(Xlen) bits.
3363       if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3364         break;
3365       return false;
3366     case RISCV::SLLI:
3367       // SLLI only uses the lower (XLen - ShAmt) bits.
3368       if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3369         break;
3370       return false;
3371     case RISCV::ANDI:
3372       if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3373         break;
3374       goto RecCheck;
3375     case RISCV::ORI: {
3376       uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3377       if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3378         break;
3379       [[fallthrough]];
3380     }
3381     case RISCV::AND:
3382     case RISCV::OR:
3383     case RISCV::XOR:
3384     case RISCV::XORI:
3385     case RISCV::ANDN:
3386     case RISCV::ORN:
3387     case RISCV::XNOR:
3388     case RISCV::SH1ADD:
3389     case RISCV::SH2ADD:
3390     case RISCV::SH3ADD:
3391     RecCheck:
3392       if (hasAllNBitUsers(User, Bits, Depth + 1))
3393         break;
3394       return false;
3395     case RISCV::SRLI: {
3396       unsigned ShAmt = User->getConstantOperandVal(1);
3397       // If we are shifting right by less than Bits, and users don't demand any
3398       // bits that were shifted into [Bits-1:0], then we can consider this as an
3399       // N-Bit user.
3400       if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3401         break;
3402       return false;
3403     }
3404     case RISCV::SEXT_B:
3405     case RISCV::PACKH:
3406       if (Bits >= 8)
3407         break;
3408       return false;
3409     case RISCV::SEXT_H:
3410     case RISCV::FMV_H_X:
3411     case RISCV::ZEXT_H_RV32:
3412     case RISCV::ZEXT_H_RV64:
3413     case RISCV::PACKW:
3414       if (Bits >= 16)
3415         break;
3416       return false;
3417     case RISCV::PACK:
3418       if (Bits >= (Subtarget->getXLen() / 2))
3419         break;
3420       return false;
3421     case RISCV::ADD_UW:
3422     case RISCV::SH1ADD_UW:
3423     case RISCV::SH2ADD_UW:
3424     case RISCV::SH3ADD_UW:
3425       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3426       // 32 bits.
3427       if (Use.getOperandNo() == 0 && Bits >= 32)
3428         break;
3429       return false;
3430     case RISCV::SB:
3431       if (Use.getOperandNo() == 0 && Bits >= 8)
3432         break;
3433       return false;
3434     case RISCV::SH:
3435       if (Use.getOperandNo() == 0 && Bits >= 16)
3436         break;
3437       return false;
3438     case RISCV::SW:
3439       if (Use.getOperandNo() == 0 && Bits >= 32)
3440         break;
3441       return false;
3442     }
3443   }
3444
3445   return true;
3446 }
3447
3448 // Select a constant that can be represented as (sign_extend(imm5) << imm2).
3449 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3450                                         SDValue &Shl2) {
3451   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3452     int64_t Offset = C->getSExtValue();
3453     unsigned Shift;
3454     for (Shift = 0; Shift < 4; Shift++)
3455       if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3456         break;
3457
3458     // Constant cannot be encoded.
3459     if (Shift == 4)
3460       return false;
3461
3462     EVT Ty = N->getValueType(0);
3463     Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3464     Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3465     return true;
3466   }
3467
3468   return false;
3469 }
3470
3471 // Select VL as a 5 bit immediate or a value that will become a register. This
3472 // allows us to choose betwen VSETIVLI or VSETVLI later.
3473 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3474   auto *C = dyn_cast<ConstantSDNode>(N);
3475   if (C && isUInt<5>(C->getZExtValue())) {
3476     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3477                                    N->getValueType(0));
3478   } else if (C && C->isAllOnes()) {
3479     // Treat all ones as VLMax.
3480     VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3481                                          N->getValueType(0));
3482   } else if (isa<RegisterSDNode>(N) &&
3483              cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3484     // All our VL operands use an operand that allows GPRNoX0 or an immediate
3485     // as the register class. Convert X0 to a special immediate to pass the
3486     // MachineVerifier. This is recognized specially by the vsetvli insertion
3487     // pass.
3488     VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3489                                          N->getValueType(0));
3490   } else {
3491     VL = N;
3492   }
3493
3494   return true;
3495 }
3496
3497 static SDValue findVSplat(SDValue N) {
3498   if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3499     if (!N.getOperand(0).isUndef())
3500       return SDValue();
3501     N = N.getOperand(1);
3502   }
3503   SDValue Splat = N;
3504   if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3505        Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3506       !Splat.getOperand(0).isUndef())
3507     return SDValue();
3508   assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3509   return Splat;
3510 }
3511
3512 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3513   SDValue Splat = findVSplat(N);
3514   if (!Splat)
3515     return false;
3516
3517   SplatVal = Splat.getOperand(1);
3518   return true;
3519 }
3520
3521 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3522                                   SelectionDAG &DAG,
3523                                   const RISCVSubtarget &Subtarget,
3524                                   std::function<bool(int64_t)> ValidateImm) {
3525   SDValue Splat = findVSplat(N);
3526   if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3527     return false;
3528
3529   const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3530   assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3531          "Unexpected splat operand type");
3532
3533   // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3534   // type is wider than the resulting vector element type: an implicit
3535   // truncation first takes place. Therefore, perform a manual
3536   // truncation/sign-extension in order to ignore any truncated bits and catch
3537   // any zero-extended immediate.
3538   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3539   // sign-extending to (XLenVT -1).
3540   APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3541
3542   int64_t SplatImm = SplatConst.getSExtValue();
3543
3544   if (!ValidateImm(SplatImm))
3545     return false;
3546
3547   SplatVal =
3548       DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3549   return true;
3550 }
3551
3552 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3553   return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3554                                [](int64_t Imm) { return isInt<5>(Imm); });
3555 }
3556
3557 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3558   return selectVSplatImmHelper(
3559       N, SplatVal, *CurDAG, *Subtarget,
3560       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3561 }
3562
3563 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3564                                                       SDValue &SplatVal) {
3565   return selectVSplatImmHelper(
3566       N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3567         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3568       });
3569 }
3570
3571 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3572                                          SDValue &SplatVal) {
3573   return selectVSplatImmHelper(
3574       N, SplatVal, *CurDAG, *Subtarget,
3575       [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3576 }
3577
3578 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3579   auto IsExtOrTrunc = [](SDValue N) {
3580     switch (N->getOpcode()) {
3581     case ISD::SIGN_EXTEND:
3582     case ISD::ZERO_EXTEND:
3583     // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3584     // inactive elements will be undef.
3585     case RISCVISD::TRUNCATE_VECTOR_VL:
3586     case RISCVISD::VSEXT_VL:
3587     case RISCVISD::VZEXT_VL:
3588       return true;
3589     default:
3590       return false;
3591     }
3592   };
3593
3594   // We can have multiple nested nodes, so unravel them all if needed.
3595   while (IsExtOrTrunc(N)) {
3596     if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3597       return false;
3598     N = N->getOperand(0);
3599   }
3600
3601   return selectVSplat(N, SplatVal);
3602 }
3603
3604 bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
3605   // Allow bitcasts from XLenVT -> FP.
3606   if (N.getOpcode() == ISD::BITCAST &&
3607       N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
3608     Imm = N.getOperand(0);
3609     return true;
3610   }
3611   // Allow moves from XLenVT to FP.
3612   if (N.getOpcode() == RISCVISD::FMV_H_X ||
3613       N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3614     Imm = N.getOperand(0);
3615     return true;
3616   }
3617
3618   // Otherwise, look for FP constants that can materialized with scalar int.
3619   ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3620   if (!CFP)
3621     return false;
3622   const APFloat &APF = CFP->getValueAPF();
3623   // td can handle +0.0 already.
3624   if (APF.isPosZero())
3625     return false;
3626
3627   MVT VT = CFP->getSimpleValueType(0);
3628
3629   MVT XLenVT = Subtarget->getXLenVT();
3630   if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3631     assert(APF.isNegZero() && "Unexpected constant.");
3632     return false;
3633   }
3634   SDLoc DL(N);
3635   Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3636                   *Subtarget);
3637   return true;
3638 }
3639
3640 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3641                                        SDValue &Imm) {
3642   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3643     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3644
3645     if (!isInt<5>(ImmVal))
3646       return false;
3647
3648     Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
3649                                           Subtarget->getXLenVT());
3650     return true;
3651   }
3652
3653   return false;
3654 }
3655
3656 // Try to remove sext.w if the input is a W instruction or can be made into
3657 // a W instruction cheaply.
3658 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3659   // Look for the sext.w pattern, addiw rd, rs1, 0.
3660   if (N->getMachineOpcode() != RISCV::ADDIW ||
3661       !isNullConstant(N->getOperand(1)))
3662     return false;
3663
3664   SDValue N0 = N->getOperand(0);
3665   if (!N0.isMachineOpcode())
3666     return false;
3667
3668   switch (N0.getMachineOpcode()) {
3669   default:
3670     break;
3671   case RISCV::ADD:
3672   case RISCV::ADDI:
3673   case RISCV::SUB:
3674   case RISCV::MUL:
3675   case RISCV::SLLI: {
3676     // Convert sext.w+add/sub/mul to their W instructions. This will create
3677     // a new independent instruction. This improves latency.
3678     unsigned Opc;
3679     switch (N0.getMachineOpcode()) {
3680     default:
3681       llvm_unreachable("Unexpected opcode!");
3682     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
3683     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3684     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
3685     case RISCV::MUL:  Opc = RISCV::MULW;  break;
3686     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3687     }
3688
3689     SDValue N00 = N0.getOperand(0);
3690     SDValue N01 = N0.getOperand(1);
3691
3692     // Shift amount needs to be uimm5.
3693     if (N0.getMachineOpcode() == RISCV::SLLI &&
3694         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3695       break;
3696
3697     SDNode *Result =
3698         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3699                                N00, N01);
3700     ReplaceUses(N, Result);
3701     return true;
3702   }
3703   case RISCV::ADDW:
3704   case RISCV::ADDIW:
3705   case RISCV::SUBW:
3706   case RISCV::MULW:
3707   case RISCV::SLLIW:
3708   case RISCV::PACKW:
3709   case RISCV::TH_MULAW:
3710   case RISCV::TH_MULAH:
3711   case RISCV::TH_MULSW:
3712   case RISCV::TH_MULSH:
3713     if (N0.getValueType() == MVT::i32)
3714       break;
3715
3716     // Result is already sign extended just remove the sext.w.
3717     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3718     ReplaceUses(N, N0.getNode());
3719     return true;
3720   }
3721
3722   return false;
3723 }
3724
3725 // After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3726 // that's glued to the pseudo. This tries to look up the value that was copied
3727 // to V0.
3728 static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3729   // Check that we're using V0 as a mask register.
3730   if (!isa<RegisterSDNode>(MaskOp) ||
3731       cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3732     return SDValue();
3733
3734   // The glued user defines V0.
3735   const auto *Glued = GlueOp.getNode();
3736
3737   if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3738     return SDValue();
3739
3740   // Check that we're defining V0 as a mask register.
3741   if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3742       cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3743     return SDValue();
3744
3745   SDValue MaskSetter = Glued->getOperand(2);
3746
3747   // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3748   // from an extract_subvector or insert_subvector.
3749   if (MaskSetter->isMachineOpcode() &&
3750       MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3751     MaskSetter = MaskSetter->getOperand(0);
3752
3753   return MaskSetter;
3754 }
3755
3756 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3757   // Check the instruction defining V0; it needs to be a VMSET pseudo.
3758   SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3759   if (!MaskSetter)
3760     return false;
3761
3762   const auto IsVMSet = [](unsigned Opc) {
3763     return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3764            Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3765            Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3766            Opc == RISCV::PseudoVMSET_M_B8;
3767   };
3768
3769   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3770   // undefined behaviour if it's the wrong bitwidth, so we could choose to
3771   // assume that it's all-ones? Same applies to its VL.
3772   return MaskSetter->isMachineOpcode() &&
3773          IsVMSet(MaskSetter.getMachineOpcode());
3774 }
3775
3776 // Return true if we can make sure mask of N is all-ones mask.
3777 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3778   return usesAllOnesMask(N->getOperand(MaskOpIdx),
3779                          N->getOperand(N->getNumOperands() - 1));
3780 }
3781
3782 static bool isImplicitDef(SDValue V) {
3783   if (!V.isMachineOpcode())
3784     return false;
3785   if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3786     for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3787       if (!isImplicitDef(V.getOperand(I)))
3788         return false;
3789     return true;
3790   }
3791   return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3792 }
3793
3794 static bool hasGPROut(unsigned Opc) {
3795   switch (RISCV::getRVVMCOpcode(Opc)) {
3796   case RISCV::VCPOP_M:
3797   case RISCV::VFIRST_M:
3798     return true;
3799   }
3800   return false;
3801 }
3802
3803 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
3804 // corresponding "unmasked" pseudo versions. The mask we're interested in will
3805 // take the form of a V0 physical register operand, with a glued
3806 // register-setting instruction.
3807 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3808   const RISCV::RISCVMaskedPseudoInfo *I =
3809       RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3810   if (!I)
3811     return false;
3812
3813   unsigned MaskOpIdx = I->MaskOpIdx;
3814   if (!usesAllOnesMask(N, MaskOpIdx))
3815     return false;
3816
3817   // There are two classes of pseudos in the table - compares and
3818   // everything else.  See the comment on RISCVMaskedPseudo for details.
3819   const unsigned Opc = I->UnmaskedPseudo;
3820   const MCInstrDesc &MCID = TII->get(Opc);
3821   const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3822 #ifndef NDEBUG
3823   const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3824   assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
3825          RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
3826          "Masked and unmasked pseudos are inconsistent");
3827   const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3828   assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3829 #endif
3830
3831   SmallVector<SDValue, 8> Ops;
3832   // Skip the passthru operand at index 0 if !UseTUPseudo and no GPR out.
3833   bool ShouldSkip = !UseTUPseudo && !hasGPROut(Opc);
3834   for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
3835     // Skip the mask, and the Glue.
3836     SDValue Op = N->getOperand(I);
3837     if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3838       continue;
3839     Ops.push_back(Op);
3840   }
3841
3842   // Transitively apply any node glued to our new node.
3843   const auto *Glued = N->getGluedNode();
3844   if (auto *TGlued = Glued->getGluedNode())
3845     Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3846
3847   MachineSDNode *Result =
3848       CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3849
3850   if (!N->memoperands_empty())
3851     CurDAG->setNodeMemRefs(Result, N->memoperands());
3852
3853   Result->setFlags(N->getFlags());
3854   ReplaceUses(N, Result);
3855
3856   return true;
3857 }
3858
3859 static bool IsVMerge(SDNode *N) {
3860   return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3861 }
3862
3863 // Try to fold away VMERGE_VVM instructions into their true operands:
3864 //
3865 // %true = PseudoVADD_VV ...
3866 // %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3867 // ->
3868 // %x = PseudoVADD_VV_MASK %false, ..., %mask
3869 //
3870 // We can only fold if vmerge's passthru operand, vmerge's false operand and
3871 // %true's passthru operand (if it has one) are the same. This is because we
3872 // have to consolidate them into one passthru operand in the result.
3873 //
3874 // If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3875 // mask is all ones.
3876 //
3877 // The resulting VL is the minimum of the two VLs.
3878 //
3879 // The resulting policy is the effective policy the vmerge would have had,
3880 // i.e. whether or not it's passthru operand was implicit-def.
3881 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3882   SDValue Passthru, False, True, VL, Mask, Glue;
3883   assert(IsVMerge(N));
3884   Passthru = N->getOperand(0);
3885   False = N->getOperand(1);
3886   True = N->getOperand(2);
3887   Mask = N->getOperand(3);
3888   VL = N->getOperand(4);
3889   // We always have a glue node for the mask at v0.
3890   Glue = N->getOperand(N->getNumOperands() - 1);
3891   assert(cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3892   assert(Glue.getValueType() == MVT::Glue);
3893
3894   // If the EEW of True is different from vmerge's SEW, then we can't fold.
3895   if (True.getSimpleValueType() != N->getSimpleValueType(0))
3896     return false;
3897
3898   // We require that either passthru and false are the same, or that passthru
3899   // is undefined.
3900   if (Passthru != False && !isImplicitDef(Passthru))
3901     return false;
3902
3903   assert(True.getResNo() == 0 &&
3904          "Expect True is the first output of an instruction.");
3905
3906   // Need N is the exactly one using True.
3907   if (!True.hasOneUse())
3908     return false;
3909
3910   if (!True.isMachineOpcode())
3911     return false;
3912
3913   unsigned TrueOpc = True.getMachineOpcode();
3914   const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3915   uint64_t TrueTSFlags = TrueMCID.TSFlags;
3916   bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3917
3918   const RISCV::RISCVMaskedPseudoInfo *Info =
3919       RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3920   if (!Info)
3921     return false;
3922
3923   // If True has a passthru operand then it needs to be the same as vmerge's
3924   // False, since False will be used for the result's passthru operand.
3925   if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3926     SDValue PassthruOpTrue = True->getOperand(0);
3927     if (False != PassthruOpTrue)
3928       return false;
3929   }
3930
3931   // Skip if True has side effect.
3932   if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3933     return false;
3934
3935   // The last operand of a masked instruction may be glued.
3936   bool HasGlueOp = True->getGluedNode() != nullptr;
3937
3938   // The chain operand may exist either before the glued operands or in the last
3939   // position.
3940   unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3941   bool HasChainOp =
3942       True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3943
3944   if (HasChainOp) {
3945     // Avoid creating cycles in the DAG. We must ensure that none of the other
3946     // operands depend on True through it's Chain.
3947     SmallVector<const SDNode *, 4> LoopWorklist;
3948     SmallPtrSet<const SDNode *, 16> Visited;
3949     LoopWorklist.push_back(False.getNode());
3950     LoopWorklist.push_back(Mask.getNode());
3951     LoopWorklist.push_back(VL.getNode());
3952     LoopWorklist.push_back(Glue.getNode());
3953     if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3954       return false;
3955   }
3956
3957   // The vector policy operand may be present for masked intrinsics
3958   bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3959   unsigned TrueVLIndex =
3960       True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3961   SDValue TrueVL = True.getOperand(TrueVLIndex);
3962   SDValue SEW = True.getOperand(TrueVLIndex + 1);
3963
3964   auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3965     if (LHS == RHS)
3966       return LHS;
3967     if (isAllOnesConstant(LHS))
3968       return RHS;
3969     if (isAllOnesConstant(RHS))
3970       return LHS;
3971     auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3972     auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3973     if (!CLHS || !CRHS)
3974       return SDValue();
3975     return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3976   };
3977
3978   // Because N and True must have the same passthru operand (or True's operand
3979   // is implicit_def), the "effective" body is the minimum of their VLs.
3980   SDValue OrigVL = VL;
3981   VL = GetMinVL(TrueVL, VL);
3982   if (!VL)
3983     return false;
3984
3985   // Some operations produce different elementwise results depending on the
3986   // active elements, like viota.m or vredsum. This transformation is illegal
3987   // for these if we change the active elements (i.e. mask or VL).
3988   const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
3989   if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL))
3990     return false;
3991   if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) &&
3992       (Mask && !usesAllOnesMask(Mask, Glue)))
3993     return false;
3994
3995   // Make sure it doesn't raise any observable fp exceptions, since changing the
3996   // active elements will affect how fflags is set.
3997   if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
3998     return false;
3999
4000   SDLoc DL(N);
4001
4002   unsigned MaskedOpc = Info->MaskedPseudo;
4003 #ifndef NDEBUG
4004   const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
4005   assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
4006          "Expected instructions with mask have policy operand.");
4007   assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
4008                                          MCOI::TIED_TO) == 0 &&
4009          "Expected instructions with mask have a tied dest.");
4010 #endif
4011
4012   // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
4013   // operand is undefined.
4014   //
4015   // However, if the VL became smaller than what the vmerge had originally, then
4016   // elements past VL that were previously in the vmerge's body will have moved
4017   // to the tail. In that case we always need to use tail undisturbed to
4018   // preserve them.
4019   bool MergeVLShrunk = VL != OrigVL;
4020   uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
4021                         ? RISCVII::TAIL_AGNOSTIC
4022                         : /*TUMU*/ 0;
4023   SDValue PolicyOp =
4024     CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
4025
4026
4027   SmallVector<SDValue, 8> Ops;
4028   Ops.push_back(False);
4029
4030   const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
4031   const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
4032   Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
4033
4034   Ops.push_back(Mask);
4035
4036   // For unmasked "VOp" with rounding mode operand, that is interfaces like
4037   // (..., rm, vl) or (..., rm, vl, policy).
4038   // Its masked version is (..., vm, rm, vl, policy).
4039   // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
4040   if (HasRoundingMode)
4041     Ops.push_back(True->getOperand(TrueVLIndex - 1));
4042
4043   Ops.append({VL, SEW, PolicyOp});
4044
4045   // Result node should have chain operand of True.
4046   if (HasChainOp)
4047     Ops.push_back(True.getOperand(TrueChainOpIdx));
4048
4049   // Add the glue for the CopyToReg of mask->v0.
4050   Ops.push_back(Glue);
4051
4052   MachineSDNode *Result =
4053       CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
4054   Result->setFlags(True->getFlags());
4055
4056   if (!cast<MachineSDNode>(True)->memoperands_empty())
4057     CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
4058
4059   // Replace vmerge.vvm node by Result.
4060   ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
4061
4062   // Replace another value of True. E.g. chain and VL.
4063   for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
4064     ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
4065
4066   return true;
4067 }
4068
4069 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4070   bool MadeChange = false;
4071   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4072
4073   while (Position != CurDAG->allnodes_begin()) {
4074     SDNode *N = &*--Position;
4075     if (N->use_empty() || !N->isMachineOpcode())
4076       continue;
4077
4078     if (IsVMerge(N))
4079       MadeChange |= performCombineVMergeAndVOps(N);
4080   }
4081   return MadeChange;
4082 }
4083
4084 /// If our passthru is an implicit_def, use noreg instead.  This side
4085 /// steps issues with MachineCSE not being able to CSE expressions with
4086 /// IMPLICIT_DEF operands while preserving the semantic intent. See
4087 /// pr64282 for context. Note that this transform is the last one
4088 /// performed at ISEL DAG to DAG.
4089 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4090   bool MadeChange = false;
4091   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4092
4093   while (Position != CurDAG->allnodes_begin()) {
4094     SDNode *N = &*--Position;
4095     if (N->use_empty() || !N->isMachineOpcode())
4096       continue;
4097
4098     const unsigned Opc = N->getMachineOpcode();
4099     if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4100         !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
4101         !isImplicitDef(N->getOperand(0)))
4102       continue;
4103
4104     SmallVector<SDValue> Ops;
4105     Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4106     for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4107       SDValue Op = N->getOperand(I);
4108       Ops.push_back(Op);
4109     }
4110
4111     MachineSDNode *Result =
4112       CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4113     Result->setFlags(N->getFlags());
4114     CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4115     ReplaceUses(N, Result);
4116     MadeChange = true;
4117   }
4118   return MadeChange;
4119 }
4120
4121
4122 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
4123 // for instruction scheduling.
4124 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4125                                        CodeGenOptLevel OptLevel) {
4126   return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4127 }
4128
4129 char RISCVDAGToDAGISelLegacy::ID = 0;
4130
4131 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4132                                                  CodeGenOptLevel OptLevel)
4133     : SelectionDAGISelLegacy(
4134           ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4135
4136 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)