1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines an instruction selector for the RISC-V target.
11 //===----------------------------------------------------------------------===//
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMCTargetDesc.h"
16 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVISelLowering.h"
18 #include "RISCVInstrInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/IR/IntrinsicsRISCV.h"
21 #include "llvm/Support/Alignment.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
28 #define DEBUG_TYPE "riscv-isel"
29 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
31 static cl::opt
<bool> UsePseudoMovImm(
32 "riscv-use-rematerializable-movimm", cl::Hidden
,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
37 namespace llvm::RISCV
{
38 #define GET_RISCVVSSEGTable_IMPL
39 #define GET_RISCVVLSEGTable_IMPL
40 #define GET_RISCVVLXSEGTable_IMPL
41 #define GET_RISCVVSXSEGTable_IMPL
42 #define GET_RISCVVLETable_IMPL
43 #define GET_RISCVVSETable_IMPL
44 #define GET_RISCVVLXTable_IMPL
45 #define GET_RISCVVSXTable_IMPL
46 #include "RISCVGenSearchableTables.inc"
47 } // namespace llvm::RISCV
49 void RISCVDAGToDAGISel::PreprocessISelDAG() {
50 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
52 bool MadeChange
= false;
53 while (Position
!= CurDAG
->allnodes_begin()) {
54 SDNode
*N
= &*--Position
;
59 switch (N
->getOpcode()) {
60 case ISD::SPLAT_VECTOR
: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT
= N
->getSimpleValueType(0);
65 VT
.isInteger() ? RISCVISD::VMV_V_X_VL
: RISCVISD::VFMV_V_F_VL
;
67 SDValue VL
= CurDAG
->getRegister(RISCV::X0
, Subtarget
->getXLenVT());
68 SDValue Src
= N
->getOperand(0);
70 Src
= CurDAG
->getNode(ISD::ANY_EXTEND
, DL
, Subtarget
->getXLenVT(),
72 Result
= CurDAG
->getNode(Opc
, DL
, VT
, CurDAG
->getUNDEF(VT
), Src
, VL
);
75 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
: {
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N
->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT
= N
->getSimpleValueType(0);
81 SDValue Passthru
= N
->getOperand(0);
82 SDValue Lo
= N
->getOperand(1);
83 SDValue Hi
= N
->getOperand(2);
84 SDValue VL
= N
->getOperand(3);
85 assert(VT
.getVectorElementType() == MVT::i64
&& VT
.isScalableVector() &&
86 Lo
.getValueType() == MVT::i32
&& Hi
.getValueType() == MVT::i32
&&
88 MachineFunction
&MF
= CurDAG
->getMachineFunction();
91 // Create temporary stack for each expanding node.
93 CurDAG
->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
94 int FI
= cast
<FrameIndexSDNode
>(StackSlot
.getNode())->getIndex();
95 MachinePointerInfo MPI
= MachinePointerInfo::getFixedStack(MF
, FI
);
97 SDValue Chain
= CurDAG
->getEntryNode();
98 Lo
= CurDAG
->getStore(Chain
, DL
, Lo
, StackSlot
, MPI
, Align(8));
101 CurDAG
->getMemBasePlusOffset(StackSlot
, TypeSize::getFixed(4), DL
);
102 Hi
= CurDAG
->getStore(Chain
, DL
, Hi
, OffsetSlot
, MPI
.getWithOffset(4),
105 Chain
= CurDAG
->getNode(ISD::TokenFactor
, DL
, MVT::Other
, Lo
, Hi
);
107 SDVTList VTs
= CurDAG
->getVTList({VT
, MVT::Other
});
109 CurDAG
->getTargetConstant(Intrinsic::riscv_vlse
, DL
, MVT::i64
);
110 SDValue Ops
[] = {Chain
,
114 CurDAG
->getRegister(RISCV::X0
, MVT::i64
),
117 Result
= CurDAG
->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, DL
, VTs
, Ops
,
118 MVT::i64
, MPI
, Align(8),
119 MachineMemOperand::MOLoad
);
125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126 LLVM_DEBUG(N
->dump(CurDAG
));
127 LLVM_DEBUG(dbgs() << "\nNew: ");
128 LLVM_DEBUG(Result
->dump(CurDAG
));
129 LLVM_DEBUG(dbgs() << "\n");
131 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
);
137 CurDAG
->RemoveDeadNodes();
140 void RISCVDAGToDAGISel::PostprocessISelDAG() {
141 HandleSDNode
Dummy(CurDAG
->getRoot());
142 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
144 bool MadeChange
= false;
145 while (Position
!= CurDAG
->allnodes_begin()) {
146 SDNode
*N
= &*--Position
;
147 // Skip dead nodes and any non-machine opcodes.
148 if (N
->use_empty() || !N
->isMachineOpcode())
151 MadeChange
|= doPeepholeSExtW(N
);
153 // FIXME: This is here only because the VMerge transform doesn't
154 // know how to handle masked true inputs. Once that has been moved
155 // to post-ISEL, this can be deleted as well.
156 MadeChange
|= doPeepholeMaskedRVV(cast
<MachineSDNode
>(N
));
159 CurDAG
->setRoot(Dummy
.getValue());
161 MadeChange
|= doPeepholeMergeVVMFold();
163 // After we're done with everything else, convert IMPLICIT_DEF
164 // passthru operands to NoRegister. This is required to workaround
165 // an optimization deficiency in MachineCSE. This really should
166 // be merged back into each of the patterns (i.e. there's no good
167 // reason not to go directly to NoReg), but is being done this way
168 // to allow easy backporting.
169 MadeChange
|= doPeepholeNoRegPassThru();
172 CurDAG
->RemoveDeadNodes();
175 static SDValue
selectImmSeq(SelectionDAG
*CurDAG
, const SDLoc
&DL
, const MVT VT
,
176 RISCVMatInt::InstSeq
&Seq
) {
177 SDValue SrcReg
= CurDAG
->getRegister(RISCV::X0
, VT
);
178 for (const RISCVMatInt::Inst
&Inst
: Seq
) {
179 SDValue SDImm
= CurDAG
->getSignedTargetConstant(Inst
.getImm(), DL
, VT
);
180 SDNode
*Result
= nullptr;
181 switch (Inst
.getOpndKind()) {
182 case RISCVMatInt::Imm
:
183 Result
= CurDAG
->getMachineNode(Inst
.getOpcode(), DL
, VT
, SDImm
);
185 case RISCVMatInt::RegX0
:
186 Result
= CurDAG
->getMachineNode(Inst
.getOpcode(), DL
, VT
, SrcReg
,
187 CurDAG
->getRegister(RISCV::X0
, VT
));
189 case RISCVMatInt::RegReg
:
190 Result
= CurDAG
->getMachineNode(Inst
.getOpcode(), DL
, VT
, SrcReg
, SrcReg
);
192 case RISCVMatInt::RegImm
:
193 Result
= CurDAG
->getMachineNode(Inst
.getOpcode(), DL
, VT
, SrcReg
, SDImm
);
197 // Only the first instruction has X0 as its source.
198 SrcReg
= SDValue(Result
, 0);
204 static SDValue
selectImm(SelectionDAG
*CurDAG
, const SDLoc
&DL
, const MVT VT
,
205 int64_t Imm
, const RISCVSubtarget
&Subtarget
) {
206 RISCVMatInt::InstSeq Seq
= RISCVMatInt::generateInstSeq(Imm
, Subtarget
);
208 // Use a rematerializable pseudo instruction for short sequences if enabled.
209 if (Seq
.size() == 2 && UsePseudoMovImm
)
211 CurDAG
->getMachineNode(RISCV::PseudoMovImm
, DL
, VT
,
212 CurDAG
->getSignedTargetConstant(Imm
, DL
, VT
)),
215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216 // worst an LUI+ADDIW. This will require an extra register, but avoids a
218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219 // low and high 32 bits are the same and bit 31 and 63 are set.
220 if (Seq
.size() > 3) {
221 unsigned ShiftAmt
, AddOpc
;
222 RISCVMatInt::InstSeq SeqLo
=
223 RISCVMatInt::generateTwoRegInstSeq(Imm
, Subtarget
, ShiftAmt
, AddOpc
);
224 if (!SeqLo
.empty() && (SeqLo
.size() + 2) < Seq
.size()) {
225 SDValue Lo
= selectImmSeq(CurDAG
, DL
, VT
, SeqLo
);
227 SDValue SLLI
= SDValue(
228 CurDAG
->getMachineNode(RISCV::SLLI
, DL
, VT
, Lo
,
229 CurDAG
->getTargetConstant(ShiftAmt
, DL
, VT
)),
231 return SDValue(CurDAG
->getMachineNode(AddOpc
, DL
, VT
, Lo
, SLLI
), 0);
235 // Otherwise, use the original sequence.
236 return selectImmSeq(CurDAG
, DL
, VT
, Seq
);
239 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
240 SDNode
*Node
, unsigned Log2SEW
, const SDLoc
&DL
, unsigned CurOp
,
241 bool IsMasked
, bool IsStridedOrIndexed
, SmallVectorImpl
<SDValue
> &Operands
,
242 bool IsLoad
, MVT
*IndexVT
) {
243 SDValue Chain
= Node
->getOperand(0);
246 Operands
.push_back(Node
->getOperand(CurOp
++)); // Base pointer.
248 if (IsStridedOrIndexed
) {
249 Operands
.push_back(Node
->getOperand(CurOp
++)); // Index.
251 *IndexVT
= Operands
.back()->getSimpleValueType(0);
255 // Mask needs to be copied to V0.
256 SDValue Mask
= Node
->getOperand(CurOp
++);
257 Chain
= CurDAG
->getCopyToReg(Chain
, DL
, RISCV::V0
, Mask
, SDValue());
258 Glue
= Chain
.getValue(1);
259 Operands
.push_back(CurDAG
->getRegister(RISCV::V0
, Mask
.getValueType()));
262 selectVLOp(Node
->getOperand(CurOp
++), VL
);
263 Operands
.push_back(VL
);
265 MVT XLenVT
= Subtarget
->getXLenVT();
266 SDValue SEWOp
= CurDAG
->getTargetConstant(Log2SEW
, DL
, XLenVT
);
267 Operands
.push_back(SEWOp
);
269 // At the IR layer, all the masked load intrinsics have policy operands,
270 // none of the others do. All have passthru operands. For our pseudos,
271 // all loads have policy operands.
273 uint64_t Policy
= RISCVII::MASK_AGNOSTIC
;
275 Policy
= Node
->getConstantOperandVal(CurOp
++);
276 SDValue PolicyOp
= CurDAG
->getTargetConstant(Policy
, DL
, XLenVT
);
277 Operands
.push_back(PolicyOp
);
280 Operands
.push_back(Chain
); // Chain.
282 Operands
.push_back(Glue
);
285 void RISCVDAGToDAGISel::selectVLSEG(SDNode
*Node
, unsigned NF
, bool IsMasked
,
288 MVT VT
= Node
->getSimpleValueType(0);
289 unsigned Log2SEW
= Node
->getConstantOperandVal(Node
->getNumOperands() - 1);
290 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
293 SmallVector
<SDValue
, 8> Operands
;
295 Operands
.push_back(Node
->getOperand(CurOp
++));
297 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
, IsStrided
,
298 Operands
, /*IsLoad=*/true);
300 const RISCV::VLSEGPseudo
*P
=
301 RISCV::getVLSEGPseudo(NF
, IsMasked
, IsStrided
, /*FF*/ false, Log2SEW
,
302 static_cast<unsigned>(LMUL
));
303 MachineSDNode
*Load
=
304 CurDAG
->getMachineNode(P
->Pseudo
, DL
, MVT::Untyped
, MVT::Other
, Operands
);
306 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
307 CurDAG
->setNodeMemRefs(Load
, {MemOp
->getMemOperand()});
309 ReplaceUses(SDValue(Node
, 0), SDValue(Load
, 0));
310 ReplaceUses(SDValue(Node
, 1), SDValue(Load
, 1));
311 CurDAG
->RemoveDeadNode(Node
);
314 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode
*Node
, unsigned NF
,
317 MVT VT
= Node
->getSimpleValueType(0);
318 MVT XLenVT
= Subtarget
->getXLenVT();
319 unsigned Log2SEW
= Node
->getConstantOperandVal(Node
->getNumOperands() - 1);
320 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
323 SmallVector
<SDValue
, 7> Operands
;
325 Operands
.push_back(Node
->getOperand(CurOp
++));
327 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
,
328 /*IsStridedOrIndexed*/ false, Operands
,
331 const RISCV::VLSEGPseudo
*P
=
332 RISCV::getVLSEGPseudo(NF
, IsMasked
, /*Strided*/ false, /*FF*/ true,
333 Log2SEW
, static_cast<unsigned>(LMUL
));
334 MachineSDNode
*Load
= CurDAG
->getMachineNode(P
->Pseudo
, DL
, MVT::Untyped
,
335 XLenVT
, MVT::Other
, Operands
);
337 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
338 CurDAG
->setNodeMemRefs(Load
, {MemOp
->getMemOperand()});
340 ReplaceUses(SDValue(Node
, 0), SDValue(Load
, 0)); // Result
341 ReplaceUses(SDValue(Node
, 1), SDValue(Load
, 1)); // VL
342 ReplaceUses(SDValue(Node
, 2), SDValue(Load
, 2)); // Chain
343 CurDAG
->RemoveDeadNode(Node
);
346 void RISCVDAGToDAGISel::selectVLXSEG(SDNode
*Node
, unsigned NF
, bool IsMasked
,
349 MVT VT
= Node
->getSimpleValueType(0);
350 unsigned Log2SEW
= Node
->getConstantOperandVal(Node
->getNumOperands() - 1);
351 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
354 SmallVector
<SDValue
, 8> Operands
;
356 Operands
.push_back(Node
->getOperand(CurOp
++));
359 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
,
360 /*IsStridedOrIndexed*/ true, Operands
,
361 /*IsLoad=*/true, &IndexVT
);
364 // Number of element = RVVBitsPerBlock * LMUL / SEW
365 unsigned ContainedTyNumElts
= RISCV::RVVBitsPerBlock
>> Log2SEW
;
366 auto DecodedLMUL
= RISCVVType::decodeVLMUL(LMUL
);
367 if (DecodedLMUL
.second
)
368 ContainedTyNumElts
/= DecodedLMUL
.first
;
370 ContainedTyNumElts
*= DecodedLMUL
.first
;
371 assert(ContainedTyNumElts
== IndexVT
.getVectorMinNumElements() &&
372 "Element count mismatch");
375 RISCVII::VLMUL IndexLMUL
= RISCVTargetLowering::getLMUL(IndexVT
);
376 unsigned IndexLog2EEW
= Log2_32(IndexVT
.getScalarSizeInBits());
377 if (IndexLog2EEW
== 6 && !Subtarget
->is64Bit()) {
378 report_fatal_error("The V extension does not support EEW=64 for index "
379 "values when XLEN=32");
381 const RISCV::VLXSEGPseudo
*P
= RISCV::getVLXSEGPseudo(
382 NF
, IsMasked
, IsOrdered
, IndexLog2EEW
, static_cast<unsigned>(LMUL
),
383 static_cast<unsigned>(IndexLMUL
));
384 MachineSDNode
*Load
=
385 CurDAG
->getMachineNode(P
->Pseudo
, DL
, MVT::Untyped
, MVT::Other
, Operands
);
387 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
388 CurDAG
->setNodeMemRefs(Load
, {MemOp
->getMemOperand()});
390 ReplaceUses(SDValue(Node
, 0), SDValue(Load
, 0));
391 ReplaceUses(SDValue(Node
, 1), SDValue(Load
, 1));
392 CurDAG
->RemoveDeadNode(Node
);
395 void RISCVDAGToDAGISel::selectVSSEG(SDNode
*Node
, unsigned NF
, bool IsMasked
,
398 MVT VT
= Node
->getOperand(2)->getSimpleValueType(0);
399 unsigned Log2SEW
= Node
->getConstantOperandVal(Node
->getNumOperands() - 1);
400 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
403 SmallVector
<SDValue
, 8> Operands
;
405 Operands
.push_back(Node
->getOperand(CurOp
++));
407 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
, IsStrided
,
410 const RISCV::VSSEGPseudo
*P
= RISCV::getVSSEGPseudo(
411 NF
, IsMasked
, IsStrided
, Log2SEW
, static_cast<unsigned>(LMUL
));
412 MachineSDNode
*Store
=
413 CurDAG
->getMachineNode(P
->Pseudo
, DL
, Node
->getValueType(0), Operands
);
415 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
416 CurDAG
->setNodeMemRefs(Store
, {MemOp
->getMemOperand()});
418 ReplaceNode(Node
, Store
);
421 void RISCVDAGToDAGISel::selectVSXSEG(SDNode
*Node
, unsigned NF
, bool IsMasked
,
424 MVT VT
= Node
->getOperand(2)->getSimpleValueType(0);
425 unsigned Log2SEW
= Node
->getConstantOperandVal(Node
->getNumOperands() - 1);
426 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
429 SmallVector
<SDValue
, 8> Operands
;
431 Operands
.push_back(Node
->getOperand(CurOp
++));
434 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
,
435 /*IsStridedOrIndexed*/ true, Operands
,
436 /*IsLoad=*/false, &IndexVT
);
439 // Number of element = RVVBitsPerBlock * LMUL / SEW
440 unsigned ContainedTyNumElts
= RISCV::RVVBitsPerBlock
>> Log2SEW
;
441 auto DecodedLMUL
= RISCVVType::decodeVLMUL(LMUL
);
442 if (DecodedLMUL
.second
)
443 ContainedTyNumElts
/= DecodedLMUL
.first
;
445 ContainedTyNumElts
*= DecodedLMUL
.first
;
446 assert(ContainedTyNumElts
== IndexVT
.getVectorMinNumElements() &&
447 "Element count mismatch");
450 RISCVII::VLMUL IndexLMUL
= RISCVTargetLowering::getLMUL(IndexVT
);
451 unsigned IndexLog2EEW
= Log2_32(IndexVT
.getScalarSizeInBits());
452 if (IndexLog2EEW
== 6 && !Subtarget
->is64Bit()) {
453 report_fatal_error("The V extension does not support EEW=64 for index "
454 "values when XLEN=32");
456 const RISCV::VSXSEGPseudo
*P
= RISCV::getVSXSEGPseudo(
457 NF
, IsMasked
, IsOrdered
, IndexLog2EEW
, static_cast<unsigned>(LMUL
),
458 static_cast<unsigned>(IndexLMUL
));
459 MachineSDNode
*Store
=
460 CurDAG
->getMachineNode(P
->Pseudo
, DL
, Node
->getValueType(0), Operands
);
462 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
463 CurDAG
->setNodeMemRefs(Store
, {MemOp
->getMemOperand()});
465 ReplaceNode(Node
, Store
);
468 void RISCVDAGToDAGISel::selectVSETVLI(SDNode
*Node
) {
469 if (!Subtarget
->hasVInstructions())
472 assert(Node
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&& "Unexpected opcode");
475 MVT XLenVT
= Subtarget
->getXLenVT();
477 unsigned IntNo
= Node
->getConstantOperandVal(0);
479 assert((IntNo
== Intrinsic::riscv_vsetvli
||
480 IntNo
== Intrinsic::riscv_vsetvlimax
) &&
481 "Unexpected vsetvli intrinsic");
483 bool VLMax
= IntNo
== Intrinsic::riscv_vsetvlimax
;
484 unsigned Offset
= (VLMax
? 1 : 2);
486 assert(Node
->getNumOperands() == Offset
+ 2 &&
487 "Unexpected number of operands");
490 RISCVVType::decodeVSEW(Node
->getConstantOperandVal(Offset
) & 0x7);
491 RISCVII::VLMUL VLMul
= static_cast<RISCVII::VLMUL
>(
492 Node
->getConstantOperandVal(Offset
+ 1) & 0x7);
494 unsigned VTypeI
= RISCVVType::encodeVTYPE(VLMul
, SEW
, /*TailAgnostic*/ true,
495 /*MaskAgnostic*/ true);
496 SDValue VTypeIOp
= CurDAG
->getTargetConstant(VTypeI
, DL
, XLenVT
);
499 unsigned Opcode
= RISCV::PseudoVSETVLI
;
500 if (auto *C
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1))) {
501 if (auto VLEN
= Subtarget
->getRealVLen())
502 if (*VLEN
/ RISCVVType::getSEWLMULRatio(SEW
, VLMul
) == C
->getZExtValue())
505 if (VLMax
|| isAllOnesConstant(Node
->getOperand(1))) {
506 VLOperand
= CurDAG
->getRegister(RISCV::X0
, XLenVT
);
507 Opcode
= RISCV::PseudoVSETVLIX0
;
509 VLOperand
= Node
->getOperand(1);
511 if (auto *C
= dyn_cast
<ConstantSDNode
>(VLOperand
)) {
512 uint64_t AVL
= C
->getZExtValue();
513 if (isUInt
<5>(AVL
)) {
514 SDValue VLImm
= CurDAG
->getTargetConstant(AVL
, DL
, XLenVT
);
515 ReplaceNode(Node
, CurDAG
->getMachineNode(RISCV::PseudoVSETIVLI
, DL
,
516 XLenVT
, VLImm
, VTypeIOp
));
523 CurDAG
->getMachineNode(Opcode
, DL
, XLenVT
, VLOperand
, VTypeIOp
));
526 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode
*Node
) {
527 MVT VT
= Node
->getSimpleValueType(0);
528 unsigned Opcode
= Node
->getOpcode();
529 assert((Opcode
== ISD::AND
|| Opcode
== ISD::OR
|| Opcode
== ISD::XOR
) &&
530 "Unexpected opcode");
533 // For operations of the form (x << C1) op C2, check if we can use
534 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
535 SDValue N0
= Node
->getOperand(0);
536 SDValue N1
= Node
->getOperand(1);
538 ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(N1
);
542 int64_t Val
= Cst
->getSExtValue();
544 // Check if immediate can already use ANDI/ORI/XORI.
550 // If Val is simm32 and we have a sext_inreg from i32, then the binop
551 // produces at least 33 sign bits. We can peek through the sext_inreg and use
552 // a SLLIW at the end.
553 bool SignExt
= false;
554 if (isInt
<32>(Val
) && N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
555 N0
.hasOneUse() && cast
<VTSDNode
>(N0
.getOperand(1))->getVT() == MVT::i32
) {
557 Shift
= N0
.getOperand(0);
560 if (Shift
.getOpcode() != ISD::SHL
|| !Shift
.hasOneUse())
563 ConstantSDNode
*ShlCst
= dyn_cast
<ConstantSDNode
>(Shift
.getOperand(1));
567 uint64_t ShAmt
= ShlCst
->getZExtValue();
569 // Make sure that we don't change the operation by removing bits.
570 // This only matters for OR and XOR, AND is unaffected.
571 uint64_t RemovedBitsMask
= maskTrailingOnes
<uint64_t>(ShAmt
);
572 if (Opcode
!= ISD::AND
&& (Val
& RemovedBitsMask
) != 0)
575 int64_t ShiftedVal
= Val
>> ShAmt
;
576 if (!isInt
<12>(ShiftedVal
))
579 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
580 if (SignExt
&& ShAmt
>= 32)
583 // Ok, we can reorder to get a smaller immediate.
586 default: llvm_unreachable("Unexpected opcode");
587 case ISD::AND
: BinOpc
= RISCV::ANDI
; break;
588 case ISD::OR
: BinOpc
= RISCV::ORI
; break;
589 case ISD::XOR
: BinOpc
= RISCV::XORI
; break;
592 unsigned ShOpc
= SignExt
? RISCV::SLLIW
: RISCV::SLLI
;
594 SDNode
*BinOp
= CurDAG
->getMachineNode(
595 BinOpc
, DL
, VT
, Shift
.getOperand(0),
596 CurDAG
->getSignedTargetConstant(ShiftedVal
, DL
, VT
));
598 CurDAG
->getMachineNode(ShOpc
, DL
, VT
, SDValue(BinOp
, 0),
599 CurDAG
->getTargetConstant(ShAmt
, DL
, VT
));
600 ReplaceNode(Node
, SLLI
);
604 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode
*Node
) {
605 // Only supported with XTHeadBb at the moment.
606 if (!Subtarget
->hasVendorXTHeadBb())
609 auto *N1C
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1));
613 SDValue N0
= Node
->getOperand(0);
617 auto BitfieldExtract
= [&](SDValue N0
, unsigned Msb
, unsigned Lsb
, SDLoc DL
,
619 return CurDAG
->getMachineNode(RISCV::TH_EXT
, DL
, VT
, N0
.getOperand(0),
620 CurDAG
->getTargetConstant(Msb
, DL
, VT
),
621 CurDAG
->getTargetConstant(Lsb
, DL
, VT
));
625 MVT VT
= Node
->getSimpleValueType(0);
626 const unsigned RightShAmt
= N1C
->getZExtValue();
628 // Transform (sra (shl X, C1) C2) with C1 < C2
629 // -> (TH.EXT X, msb, lsb)
630 if (N0
.getOpcode() == ISD::SHL
) {
631 auto *N01C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
635 const unsigned LeftShAmt
= N01C
->getZExtValue();
636 // Make sure that this is a bitfield extraction (i.e., the shift-right
637 // amount can not be less than the left-shift).
638 if (LeftShAmt
> RightShAmt
)
641 const unsigned MsbPlusOne
= VT
.getSizeInBits() - LeftShAmt
;
642 const unsigned Msb
= MsbPlusOne
- 1;
643 const unsigned Lsb
= RightShAmt
- LeftShAmt
;
645 SDNode
*TH_EXT
= BitfieldExtract(N0
, Msb
, Lsb
, DL
, VT
);
646 ReplaceNode(Node
, TH_EXT
);
650 // Transform (sra (sext_inreg X, _), C) ->
651 // (TH.EXT X, msb, lsb)
652 if (N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
) {
654 cast
<VTSDNode
>(N0
.getOperand(1))->getVT().getSizeInBits();
656 // ExtSize of 32 should use sraiw via tablegen pattern.
660 const unsigned Msb
= ExtSize
- 1;
661 const unsigned Lsb
= RightShAmt
;
663 SDNode
*TH_EXT
= BitfieldExtract(N0
, Msb
, Lsb
, DL
, VT
);
664 ReplaceNode(Node
, TH_EXT
);
671 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode
*Node
) {
672 // Target does not support indexed loads.
673 if (!Subtarget
->hasVendorXTHeadMemIdx())
676 LoadSDNode
*Ld
= cast
<LoadSDNode
>(Node
);
677 ISD::MemIndexedMode AM
= Ld
->getAddressingMode();
678 if (AM
== ISD::UNINDEXED
)
681 const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Ld
->getOffset());
685 EVT LoadVT
= Ld
->getMemoryVT();
686 assert((AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
) &&
687 "Unexpected addressing mode");
688 bool IsPre
= AM
== ISD::PRE_INC
;
689 bool IsPost
= AM
== ISD::POST_INC
;
690 int64_t Offset
= C
->getSExtValue();
692 // The constants that can be encoded in the THeadMemIdx instructions
693 // are of the form (sign_extend(imm5) << imm2).
695 for (Shift
= 0; Shift
< 4; Shift
++)
696 if (isInt
<5>(Offset
>> Shift
) && ((Offset
% (1LL << Shift
)) == 0))
699 // Constant cannot be encoded.
703 bool IsZExt
= (Ld
->getExtensionType() == ISD::ZEXTLOAD
);
705 if (LoadVT
== MVT::i8
&& IsPre
)
706 Opcode
= IsZExt
? RISCV::TH_LBUIB
: RISCV::TH_LBIB
;
707 else if (LoadVT
== MVT::i8
&& IsPost
)
708 Opcode
= IsZExt
? RISCV::TH_LBUIA
: RISCV::TH_LBIA
;
709 else if (LoadVT
== MVT::i16
&& IsPre
)
710 Opcode
= IsZExt
? RISCV::TH_LHUIB
: RISCV::TH_LHIB
;
711 else if (LoadVT
== MVT::i16
&& IsPost
)
712 Opcode
= IsZExt
? RISCV::TH_LHUIA
: RISCV::TH_LHIA
;
713 else if (LoadVT
== MVT::i32
&& IsPre
)
714 Opcode
= IsZExt
? RISCV::TH_LWUIB
: RISCV::TH_LWIB
;
715 else if (LoadVT
== MVT::i32
&& IsPost
)
716 Opcode
= IsZExt
? RISCV::TH_LWUIA
: RISCV::TH_LWIA
;
717 else if (LoadVT
== MVT::i64
&& IsPre
)
718 Opcode
= RISCV::TH_LDIB
;
719 else if (LoadVT
== MVT::i64
&& IsPost
)
720 Opcode
= RISCV::TH_LDIA
;
724 EVT Ty
= Ld
->getOffset().getValueType();
727 CurDAG
->getSignedTargetConstant(Offset
>> Shift
, SDLoc(Node
), Ty
),
728 CurDAG
->getTargetConstant(Shift
, SDLoc(Node
), Ty
), Ld
->getChain()};
729 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(Node
), Ld
->getValueType(0),
730 Ld
->getValueType(1), MVT::Other
, Ops
);
732 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(Node
)->getMemOperand();
733 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(New
), {MemOp
});
735 ReplaceNode(Node
, New
);
740 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode
*Node
) {
741 if (!Subtarget
->hasVInstructions())
744 assert(Node
->getOpcode() == ISD::INTRINSIC_VOID
&& "Unexpected opcode");
747 unsigned IntNo
= Node
->getConstantOperandVal(1);
749 assert((IntNo
== Intrinsic::riscv_sf_vc_x_se
||
750 IntNo
== Intrinsic::riscv_sf_vc_i_se
) &&
751 "Unexpected vsetvli intrinsic");
753 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
754 unsigned Log2SEW
= Log2_32(Node
->getConstantOperandVal(6));
756 CurDAG
->getTargetConstant(Log2SEW
, DL
, Subtarget
->getXLenVT());
757 SmallVector
<SDValue
, 8> Operands
= {Node
->getOperand(2), Node
->getOperand(3),
758 Node
->getOperand(4), Node
->getOperand(5),
759 Node
->getOperand(8), SEWOp
,
760 Node
->getOperand(0)};
763 auto *LMulSDNode
= cast
<ConstantSDNode
>(Node
->getOperand(7));
764 switch (LMulSDNode
->getSExtValue()) {
766 Opcode
= IntNo
== Intrinsic::riscv_sf_vc_x_se
? RISCV::PseudoVC_X_SE_MF8
767 : RISCV::PseudoVC_I_SE_MF8
;
770 Opcode
= IntNo
== Intrinsic::riscv_sf_vc_x_se
? RISCV::PseudoVC_X_SE_MF4
771 : RISCV::PseudoVC_I_SE_MF4
;
774 Opcode
= IntNo
== Intrinsic::riscv_sf_vc_x_se
? RISCV::PseudoVC_X_SE_MF2
775 : RISCV::PseudoVC_I_SE_MF2
;
778 Opcode
= IntNo
== Intrinsic::riscv_sf_vc_x_se
? RISCV::PseudoVC_X_SE_M1
779 : RISCV::PseudoVC_I_SE_M1
;
782 Opcode
= IntNo
== Intrinsic::riscv_sf_vc_x_se
? RISCV::PseudoVC_X_SE_M2
783 : RISCV::PseudoVC_I_SE_M2
;
786 Opcode
= IntNo
== Intrinsic::riscv_sf_vc_x_se
? RISCV::PseudoVC_X_SE_M4
787 : RISCV::PseudoVC_I_SE_M4
;
790 Opcode
= IntNo
== Intrinsic::riscv_sf_vc_x_se
? RISCV::PseudoVC_X_SE_M8
791 : RISCV::PseudoVC_I_SE_M8
;
795 ReplaceNode(Node
, CurDAG
->getMachineNode(
796 Opcode
, DL
, Node
->getSimpleValueType(0), Operands
));
799 static unsigned getSegInstNF(unsigned Intrinsic
) {
800 #define INST_NF_CASE(NAME, NF) \
801 case Intrinsic::riscv_##NAME##NF: \
803 #define INST_NF_CASE_MASK(NAME, NF) \
804 case Intrinsic::riscv_##NAME##NF##_mask: \
806 #define INST_NF_CASE_FF(NAME, NF) \
807 case Intrinsic::riscv_##NAME##NF##ff: \
809 #define INST_NF_CASE_FF_MASK(NAME, NF) \
810 case Intrinsic::riscv_##NAME##NF##ff_mask: \
812 #define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
813 MACRO_NAME(NAME, 2) \
814 MACRO_NAME(NAME, 3) \
815 MACRO_NAME(NAME, 4) \
816 MACRO_NAME(NAME, 5) \
817 MACRO_NAME(NAME, 6) \
818 MACRO_NAME(NAME, 7) \
820 #define INST_ALL_NF_CASE(NAME) \
821 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
822 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
823 #define INST_ALL_NF_CASE_WITH_FF(NAME) \
824 INST_ALL_NF_CASE(NAME) \
825 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
826 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
829 llvm_unreachable("Unexpected segment load/store intrinsic");
830 INST_ALL_NF_CASE_WITH_FF(vlseg
)
831 INST_ALL_NF_CASE(vlsseg
)
832 INST_ALL_NF_CASE(vloxseg
)
833 INST_ALL_NF_CASE(vluxseg
)
834 INST_ALL_NF_CASE(vsseg
)
835 INST_ALL_NF_CASE(vssseg
)
836 INST_ALL_NF_CASE(vsoxseg
)
837 INST_ALL_NF_CASE(vsuxseg
)
841 void RISCVDAGToDAGISel::Select(SDNode
*Node
) {
842 // If we have a custom node, we have already selected.
843 if (Node
->isMachineOpcode()) {
844 LLVM_DEBUG(dbgs() << "== "; Node
->dump(CurDAG
); dbgs() << "\n");
849 // Instruction Selection not handled by the auto-generated tablegen selection
850 // should be handled here.
851 unsigned Opcode
= Node
->getOpcode();
852 MVT XLenVT
= Subtarget
->getXLenVT();
854 MVT VT
= Node
->getSimpleValueType(0);
856 bool HasBitTest
= Subtarget
->hasStdExtZbs() || Subtarget
->hasVendorXTHeadBs();
859 case ISD::Constant
: {
860 assert((VT
== Subtarget
->getXLenVT() || VT
== MVT::i32
) && "Unexpected VT");
861 auto *ConstNode
= cast
<ConstantSDNode
>(Node
);
862 if (ConstNode
->isZero()) {
864 CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), DL
, RISCV::X0
, VT
);
865 ReplaceNode(Node
, New
.getNode());
868 int64_t Imm
= ConstNode
->getSExtValue();
869 // If only the lower 8 bits are used, try to convert this to a simm6 by
870 // sign-extending bit 7. This is neutral without the C extension, and
871 // allows C.LI to be used if C is present.
872 if (isUInt
<8>(Imm
) && isInt
<6>(SignExtend64
<8>(Imm
)) && hasAllBUsers(Node
))
873 Imm
= SignExtend64
<8>(Imm
);
874 // If the upper XLen-16 bits are not used, try to convert this to a simm12
875 // by sign extending bit 15.
876 if (isUInt
<16>(Imm
) && isInt
<12>(SignExtend64
<16>(Imm
)) &&
878 Imm
= SignExtend64
<16>(Imm
);
879 // If the upper 32-bits are not used try to convert this into a simm32 by
880 // sign extending bit 32.
881 if (!isInt
<32>(Imm
) && isUInt
<32>(Imm
) && hasAllWUsers(Node
))
882 Imm
= SignExtend64
<32>(Imm
);
884 ReplaceNode(Node
, selectImm(CurDAG
, DL
, VT
, Imm
, *Subtarget
).getNode());
887 case ISD::ConstantFP
: {
888 const APFloat
&APF
= cast
<ConstantFPSDNode
>(Node
)->getValueAPF();
890 bool NegZeroF64
= APF
.isNegZero() && VT
== MVT::f64
;
892 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
893 // create an integer immediate.
894 if (APF
.isPosZero() || NegZeroF64
)
895 Imm
= CurDAG
->getRegister(RISCV::X0
, XLenVT
);
897 Imm
= selectImm(CurDAG
, DL
, XLenVT
, APF
.bitcastToAPInt().getSExtValue(),
900 bool HasZdinx
= Subtarget
->hasStdExtZdinx();
901 bool Is64Bit
= Subtarget
->is64Bit();
903 switch (VT
.SimpleTy
) {
905 llvm_unreachable("Unexpected size");
907 assert(Subtarget
->hasStdExtZfbfmin());
908 Opc
= RISCV::FMV_H_X
;
911 Opc
= Subtarget
->hasStdExtZhinxmin() ? RISCV::COPY
: RISCV::FMV_H_X
;
914 Opc
= Subtarget
->hasStdExtZfinx() ? RISCV::COPY
: RISCV::FMV_W_X
;
917 // For RV32, we can't move from a GPR, we need to convert instead. This
918 // should only happen for +0.0 and -0.0.
919 assert((Subtarget
->is64Bit() || APF
.isZero()) && "Unexpected constant");
921 Opc
= HasZdinx
? RISCV::COPY
: RISCV::FMV_D_X
;
923 Opc
= HasZdinx
? RISCV::FCVT_D_W_IN32X
: RISCV::FCVT_D_W
;
928 if (VT
.SimpleTy
== MVT::f16
&& Opc
== RISCV::COPY
) {
930 CurDAG
->getTargetExtractSubreg(RISCV::sub_16
, DL
, VT
, Imm
).getNode();
931 } else if (VT
.SimpleTy
== MVT::f32
&& Opc
== RISCV::COPY
) {
933 CurDAG
->getTargetExtractSubreg(RISCV::sub_32
, DL
, VT
, Imm
).getNode();
934 } else if (Opc
== RISCV::FCVT_D_W_IN32X
|| Opc
== RISCV::FCVT_D_W
)
935 Res
= CurDAG
->getMachineNode(
937 CurDAG
->getTargetConstant(RISCVFPRndMode::RNE
, DL
, XLenVT
));
939 Res
= CurDAG
->getMachineNode(Opc
, DL
, VT
, Imm
);
941 // For f64 -0.0, we need to insert a fneg.d idiom.
943 Opc
= RISCV::FSGNJN_D
;
945 Opc
= Is64Bit
? RISCV::FSGNJN_D_INX
: RISCV::FSGNJN_D_IN32X
;
947 CurDAG
->getMachineNode(Opc
, DL
, VT
, SDValue(Res
, 0), SDValue(Res
, 0));
950 ReplaceNode(Node
, Res
);
953 case RISCVISD::BuildGPRPair
:
954 case RISCVISD::BuildPairF64
: {
955 if (Opcode
== RISCVISD::BuildPairF64
&& !Subtarget
->hasStdExtZdinx())
958 assert((!Subtarget
->is64Bit() || Opcode
== RISCVISD::BuildGPRPair
) &&
959 "BuildPairF64 only handled here on rv32i_zdinx");
962 CurDAG
->getTargetConstant(RISCV::GPRPairRegClassID
, DL
, MVT::i32
),
964 CurDAG
->getTargetConstant(RISCV::sub_gpr_even
, DL
, MVT::i32
),
966 CurDAG
->getTargetConstant(RISCV::sub_gpr_odd
, DL
, MVT::i32
)};
968 SDNode
*N
= CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, DL
, VT
, Ops
);
969 ReplaceNode(Node
, N
);
972 case RISCVISD::SplitGPRPair
:
973 case RISCVISD::SplitF64
: {
974 if (Subtarget
->hasStdExtZdinx() || Opcode
!= RISCVISD::SplitF64
) {
975 assert((!Subtarget
->is64Bit() || Opcode
== RISCVISD::SplitGPRPair
) &&
976 "SplitF64 only handled here on rv32i_zdinx");
978 if (!SDValue(Node
, 0).use_empty()) {
979 SDValue Lo
= CurDAG
->getTargetExtractSubreg(RISCV::sub_gpr_even
, DL
,
980 Node
->getValueType(0),
981 Node
->getOperand(0));
982 ReplaceUses(SDValue(Node
, 0), Lo
);
985 if (!SDValue(Node
, 1).use_empty()) {
986 SDValue Hi
= CurDAG
->getTargetExtractSubreg(
987 RISCV::sub_gpr_odd
, DL
, Node
->getValueType(1), Node
->getOperand(0));
988 ReplaceUses(SDValue(Node
, 1), Hi
);
991 CurDAG
->RemoveDeadNode(Node
);
995 assert(Opcode
!= RISCVISD::SplitGPRPair
&&
996 "SplitGPRPair should already be handled");
998 if (!Subtarget
->hasStdExtZfa())
1000 assert(Subtarget
->hasStdExtD() && !Subtarget
->is64Bit() &&
1001 "Unexpected subtarget");
1003 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1004 if (!SDValue(Node
, 0).use_empty()) {
1005 SDNode
*Lo
= CurDAG
->getMachineNode(RISCV::FMV_X_W_FPR64
, DL
, VT
,
1006 Node
->getOperand(0));
1007 ReplaceUses(SDValue(Node
, 0), SDValue(Lo
, 0));
1009 if (!SDValue(Node
, 1).use_empty()) {
1010 SDNode
*Hi
= CurDAG
->getMachineNode(RISCV::FMVH_X_D
, DL
, VT
,
1011 Node
->getOperand(0));
1012 ReplaceUses(SDValue(Node
, 1), SDValue(Hi
, 0));
1015 CurDAG
->RemoveDeadNode(Node
);
1019 auto *N1C
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1));
1022 SDValue N0
= Node
->getOperand(0);
1023 if (N0
.getOpcode() != ISD::AND
|| !N0
.hasOneUse() ||
1024 !isa
<ConstantSDNode
>(N0
.getOperand(1)))
1026 unsigned ShAmt
= N1C
->getZExtValue();
1027 uint64_t Mask
= N0
.getConstantOperandVal(1);
1029 if (ShAmt
<= 32 && isShiftedMask_64(Mask
)) {
1030 unsigned XLen
= Subtarget
->getXLen();
1031 unsigned LeadingZeros
= XLen
- llvm::bit_width(Mask
);
1032 unsigned TrailingZeros
= llvm::countr_zero(Mask
);
1033 if (TrailingZeros
> 0 && LeadingZeros
== 32) {
1034 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1035 // where C2 has 32 leading zeros and C3 trailing zeros.
1036 SDNode
*SRLIW
= CurDAG
->getMachineNode(
1037 RISCV::SRLIW
, DL
, VT
, N0
->getOperand(0),
1038 CurDAG
->getTargetConstant(TrailingZeros
, DL
, VT
));
1039 SDNode
*SLLI
= CurDAG
->getMachineNode(
1040 RISCV::SLLI
, DL
, VT
, SDValue(SRLIW
, 0),
1041 CurDAG
->getTargetConstant(TrailingZeros
+ ShAmt
, DL
, VT
));
1042 ReplaceNode(Node
, SLLI
);
1045 if (TrailingZeros
== 0 && LeadingZeros
> ShAmt
&&
1046 XLen
- LeadingZeros
> 11 && LeadingZeros
!= 32) {
1047 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1048 // where C2 has C4 leading zeros and no trailing zeros.
1049 // This is profitable if the "and" was to be lowered to
1050 // (srli (slli X, C4), C4) and not (andi X, C2).
1051 // For "LeadingZeros == 32":
1052 // - with Zba it's just (slli.uw X, C)
1053 // - without Zba a tablegen pattern applies the very same
1054 // transform as we would have done here
1055 SDNode
*SLLI
= CurDAG
->getMachineNode(
1056 RISCV::SLLI
, DL
, VT
, N0
->getOperand(0),
1057 CurDAG
->getTargetConstant(LeadingZeros
, DL
, VT
));
1058 SDNode
*SRLI
= CurDAG
->getMachineNode(
1059 RISCV::SRLI
, DL
, VT
, SDValue(SLLI
, 0),
1060 CurDAG
->getTargetConstant(LeadingZeros
- ShAmt
, DL
, VT
));
1061 ReplaceNode(Node
, SRLI
);
1068 auto *N1C
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1));
1071 SDValue N0
= Node
->getOperand(0);
1072 if (N0
.getOpcode() != ISD::AND
|| !isa
<ConstantSDNode
>(N0
.getOperand(1)))
1074 unsigned ShAmt
= N1C
->getZExtValue();
1075 uint64_t Mask
= N0
.getConstantOperandVal(1);
1077 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1078 // 32 leading zeros and C3 trailing zeros.
1079 if (isShiftedMask_64(Mask
) && N0
.hasOneUse()) {
1080 unsigned XLen
= Subtarget
->getXLen();
1081 unsigned LeadingZeros
= XLen
- llvm::bit_width(Mask
);
1082 unsigned TrailingZeros
= llvm::countr_zero(Mask
);
1083 if (LeadingZeros
== 32 && TrailingZeros
> ShAmt
) {
1084 SDNode
*SRLIW
= CurDAG
->getMachineNode(
1085 RISCV::SRLIW
, DL
, VT
, N0
->getOperand(0),
1086 CurDAG
->getTargetConstant(TrailingZeros
, DL
, VT
));
1087 SDNode
*SLLI
= CurDAG
->getMachineNode(
1088 RISCV::SLLI
, DL
, VT
, SDValue(SRLIW
, 0),
1089 CurDAG
->getTargetConstant(TrailingZeros
- ShAmt
, DL
, VT
));
1090 ReplaceNode(Node
, SLLI
);
1095 // Optimize (srl (and X, C2), C) ->
1096 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1097 // Where C2 is a mask with C3 trailing ones.
1098 // Taking into account that the C2 may have had lower bits unset by
1099 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1100 // This pattern occurs when type legalizing right shifts for types with
1101 // less than XLen bits.
1102 Mask
|= maskTrailingOnes
<uint64_t>(ShAmt
);
1103 if (!isMask_64(Mask
))
1105 unsigned TrailingOnes
= llvm::countr_one(Mask
);
1106 if (ShAmt
>= TrailingOnes
)
1108 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1109 if (TrailingOnes
== 32) {
1110 SDNode
*SRLI
= CurDAG
->getMachineNode(
1111 Subtarget
->is64Bit() ? RISCV::SRLIW
: RISCV::SRLI
, DL
, VT
,
1112 N0
->getOperand(0), CurDAG
->getTargetConstant(ShAmt
, DL
, VT
));
1113 ReplaceNode(Node
, SRLI
);
1117 // Only do the remaining transforms if the AND has one use.
1118 if (!N0
.hasOneUse())
1121 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1122 if (HasBitTest
&& ShAmt
+ 1 == TrailingOnes
) {
1123 SDNode
*BEXTI
= CurDAG
->getMachineNode(
1124 Subtarget
->hasStdExtZbs() ? RISCV::BEXTI
: RISCV::TH_TST
, DL
, VT
,
1125 N0
->getOperand(0), CurDAG
->getTargetConstant(ShAmt
, DL
, VT
));
1126 ReplaceNode(Node
, BEXTI
);
1130 unsigned LShAmt
= Subtarget
->getXLen() - TrailingOnes
;
1131 if (Subtarget
->hasVendorXTHeadBb()) {
1132 SDNode
*THEXTU
= CurDAG
->getMachineNode(
1133 RISCV::TH_EXTU
, DL
, VT
, N0
->getOperand(0),
1134 CurDAG
->getTargetConstant(TrailingOnes
- 1, DL
, VT
),
1135 CurDAG
->getTargetConstant(ShAmt
, DL
, VT
));
1136 ReplaceNode(Node
, THEXTU
);
1141 CurDAG
->getMachineNode(RISCV::SLLI
, DL
, VT
, N0
->getOperand(0),
1142 CurDAG
->getTargetConstant(LShAmt
, DL
, VT
));
1143 SDNode
*SRLI
= CurDAG
->getMachineNode(
1144 RISCV::SRLI
, DL
, VT
, SDValue(SLLI
, 0),
1145 CurDAG
->getTargetConstant(LShAmt
+ ShAmt
, DL
, VT
));
1146 ReplaceNode(Node
, SRLI
);
1150 if (trySignedBitfieldExtract(Node
))
1153 // Optimize (sra (sext_inreg X, i16), C) ->
1154 // (srai (slli X, (XLen-16), (XLen-16) + C)
1155 // And (sra (sext_inreg X, i8), C) ->
1156 // (srai (slli X, (XLen-8), (XLen-8) + C)
1157 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1158 // This transform matches the code we get without Zbb. The shifts are more
1159 // compressible, and this can help expose CSE opportunities in the sdiv by
1160 // constant optimization.
1161 auto *N1C
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1));
1164 SDValue N0
= Node
->getOperand(0);
1165 if (N0
.getOpcode() != ISD::SIGN_EXTEND_INREG
|| !N0
.hasOneUse())
1167 unsigned ShAmt
= N1C
->getZExtValue();
1169 cast
<VTSDNode
>(N0
.getOperand(1))->getVT().getSizeInBits();
1170 // ExtSize of 32 should use sraiw via tablegen pattern.
1171 if (ExtSize
>= 32 || ShAmt
>= ExtSize
)
1173 unsigned LShAmt
= Subtarget
->getXLen() - ExtSize
;
1175 CurDAG
->getMachineNode(RISCV::SLLI
, DL
, VT
, N0
->getOperand(0),
1176 CurDAG
->getTargetConstant(LShAmt
, DL
, VT
));
1177 SDNode
*SRAI
= CurDAG
->getMachineNode(
1178 RISCV::SRAI
, DL
, VT
, SDValue(SLLI
, 0),
1179 CurDAG
->getTargetConstant(LShAmt
+ ShAmt
, DL
, VT
));
1180 ReplaceNode(Node
, SRAI
);
1185 if (tryShrinkShlLogicImm(Node
))
1190 auto *N1C
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1));
1194 SDValue N0
= Node
->getOperand(0);
1196 auto tryUnsignedBitfieldExtract
= [&](SDNode
*Node
, SDLoc DL
, MVT VT
,
1197 SDValue X
, unsigned Msb
,
1199 if (!Subtarget
->hasVendorXTHeadBb())
1202 SDNode
*TH_EXTU
= CurDAG
->getMachineNode(
1203 RISCV::TH_EXTU
, DL
, VT
, X
, CurDAG
->getTargetConstant(Msb
, DL
, VT
),
1204 CurDAG
->getTargetConstant(Lsb
, DL
, VT
));
1205 ReplaceNode(Node
, TH_EXTU
);
1209 bool LeftShift
= N0
.getOpcode() == ISD::SHL
;
1210 if (LeftShift
|| N0
.getOpcode() == ISD::SRL
) {
1211 auto *C
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
1214 unsigned C2
= C
->getZExtValue();
1215 unsigned XLen
= Subtarget
->getXLen();
1216 assert((C2
> 0 && C2
< XLen
) && "Unexpected shift amount!");
1218 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1219 // shift pair might offer more compression opportunities.
1220 // TODO: We could check for C extension here, but we don't have many lit
1221 // tests with the C extension enabled so not checking gets better
1223 // TODO: What if ANDI faster than shift?
1224 bool IsCANDI
= isInt
<6>(N1C
->getSExtValue());
1226 uint64_t C1
= N1C
->getZExtValue();
1228 // Clear irrelevant bits in the mask.
1230 C1
&= maskTrailingZeros
<uint64_t>(C2
);
1232 C1
&= maskTrailingOnes
<uint64_t>(XLen
- C2
);
1234 // Some transforms should only be done if the shift has a single use or
1235 // the AND would become (srli (slli X, 32), 32)
1236 bool OneUseOrZExtW
= N0
.hasOneUse() || C1
== UINT64_C(0xFFFFFFFF);
1238 SDValue X
= N0
.getOperand(0);
1240 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1241 // with c3 leading zeros.
1242 if (!LeftShift
&& isMask_64(C1
)) {
1243 unsigned Leading
= XLen
- llvm::bit_width(C1
);
1245 // If the number of leading zeros is C2+32 this can be SRLIW.
1246 if (C2
+ 32 == Leading
) {
1247 SDNode
*SRLIW
= CurDAG
->getMachineNode(
1248 RISCV::SRLIW
, DL
, VT
, X
, CurDAG
->getTargetConstant(C2
, DL
, VT
));
1249 ReplaceNode(Node
, SRLIW
);
1253 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1254 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1256 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1257 // legalized and goes through DAG combine.
1258 if (C2
>= 32 && (Leading
- C2
) == 1 && N0
.hasOneUse() &&
1259 X
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
1260 cast
<VTSDNode
>(X
.getOperand(1))->getVT() == MVT::i32
) {
1262 CurDAG
->getMachineNode(RISCV::SRAIW
, DL
, VT
, X
.getOperand(0),
1263 CurDAG
->getTargetConstant(31, DL
, VT
));
1264 SDNode
*SRLIW
= CurDAG
->getMachineNode(
1265 RISCV::SRLIW
, DL
, VT
, SDValue(SRAIW
, 0),
1266 CurDAG
->getTargetConstant(Leading
- 32, DL
, VT
));
1267 ReplaceNode(Node
, SRLIW
);
1271 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1273 // Transform (and (srl x, C2), C1)
1274 // -> (<bfextract> x, msb, lsb)
1276 // Make sure to keep this below the SRLIW cases, as we always want to
1277 // prefer the more common instruction.
1278 const unsigned Msb
= llvm::bit_width(C1
) + C2
- 1;
1279 const unsigned Lsb
= C2
;
1280 if (tryUnsignedBitfieldExtract(Node
, DL
, VT
, X
, Msb
, Lsb
))
1283 // (srli (slli x, c3-c2), c3).
1284 // Skip if we could use (zext.w (sraiw X, C2)).
1285 bool Skip
= Subtarget
->hasStdExtZba() && Leading
== 32 &&
1286 X
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
1287 cast
<VTSDNode
>(X
.getOperand(1))->getVT() == MVT::i32
;
1288 // Also Skip if we can use bexti or th.tst.
1289 Skip
|= HasBitTest
&& Leading
== XLen
- 1;
1290 if (OneUseOrZExtW
&& !Skip
) {
1291 SDNode
*SLLI
= CurDAG
->getMachineNode(
1292 RISCV::SLLI
, DL
, VT
, X
,
1293 CurDAG
->getTargetConstant(Leading
- C2
, DL
, VT
));
1294 SDNode
*SRLI
= CurDAG
->getMachineNode(
1295 RISCV::SRLI
, DL
, VT
, SDValue(SLLI
, 0),
1296 CurDAG
->getTargetConstant(Leading
, DL
, VT
));
1297 ReplaceNode(Node
, SRLI
);
1303 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1304 // shifted by c2 bits with c3 leading zeros.
1305 if (LeftShift
&& isShiftedMask_64(C1
)) {
1306 unsigned Leading
= XLen
- llvm::bit_width(C1
);
1308 if (C2
+ Leading
< XLen
&&
1309 C1
== (maskTrailingOnes
<uint64_t>(XLen
- (C2
+ Leading
)) << C2
)) {
1310 // Use slli.uw when possible.
1311 if ((XLen
- (C2
+ Leading
)) == 32 && Subtarget
->hasStdExtZba()) {
1313 CurDAG
->getMachineNode(RISCV::SLLI_UW
, DL
, VT
, X
,
1314 CurDAG
->getTargetConstant(C2
, DL
, VT
));
1315 ReplaceNode(Node
, SLLI_UW
);
1319 // (srli (slli c2+c3), c3)
1320 if (OneUseOrZExtW
&& !IsCANDI
) {
1321 SDNode
*SLLI
= CurDAG
->getMachineNode(
1322 RISCV::SLLI
, DL
, VT
, X
,
1323 CurDAG
->getTargetConstant(C2
+ Leading
, DL
, VT
));
1324 SDNode
*SRLI
= CurDAG
->getMachineNode(
1325 RISCV::SRLI
, DL
, VT
, SDValue(SLLI
, 0),
1326 CurDAG
->getTargetConstant(Leading
, DL
, VT
));
1327 ReplaceNode(Node
, SRLI
);
1333 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1334 // shifted mask with c2 leading zeros and c3 trailing zeros.
1335 if (!LeftShift
&& isShiftedMask_64(C1
)) {
1336 unsigned Leading
= XLen
- llvm::bit_width(C1
);
1337 unsigned Trailing
= llvm::countr_zero(C1
);
1338 if (Leading
== C2
&& C2
+ Trailing
< XLen
&& OneUseOrZExtW
&&
1340 unsigned SrliOpc
= RISCV::SRLI
;
1341 // If the input is zexti32 we should use SRLIW.
1342 if (X
.getOpcode() == ISD::AND
&&
1343 isa
<ConstantSDNode
>(X
.getOperand(1)) &&
1344 X
.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1345 SrliOpc
= RISCV::SRLIW
;
1346 X
= X
.getOperand(0);
1348 SDNode
*SRLI
= CurDAG
->getMachineNode(
1350 CurDAG
->getTargetConstant(C2
+ Trailing
, DL
, VT
));
1351 SDNode
*SLLI
= CurDAG
->getMachineNode(
1352 RISCV::SLLI
, DL
, VT
, SDValue(SRLI
, 0),
1353 CurDAG
->getTargetConstant(Trailing
, DL
, VT
));
1354 ReplaceNode(Node
, SLLI
);
1357 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1358 if (Leading
> 32 && (Leading
- 32) == C2
&& C2
+ Trailing
< 32 &&
1359 OneUseOrZExtW
&& !IsCANDI
) {
1360 SDNode
*SRLIW
= CurDAG
->getMachineNode(
1361 RISCV::SRLIW
, DL
, VT
, X
,
1362 CurDAG
->getTargetConstant(C2
+ Trailing
, DL
, VT
));
1363 SDNode
*SLLI
= CurDAG
->getMachineNode(
1364 RISCV::SLLI
, DL
, VT
, SDValue(SRLIW
, 0),
1365 CurDAG
->getTargetConstant(Trailing
, DL
, VT
));
1366 ReplaceNode(Node
, SLLI
);
1369 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1370 if (Trailing
> 0 && Leading
+ Trailing
== 32 && C2
+ Trailing
< XLen
&&
1371 OneUseOrZExtW
&& Subtarget
->hasStdExtZba()) {
1372 SDNode
*SRLI
= CurDAG
->getMachineNode(
1373 RISCV::SRLI
, DL
, VT
, X
,
1374 CurDAG
->getTargetConstant(C2
+ Trailing
, DL
, VT
));
1375 SDNode
*SLLI_UW
= CurDAG
->getMachineNode(
1376 RISCV::SLLI_UW
, DL
, VT
, SDValue(SRLI
, 0),
1377 CurDAG
->getTargetConstant(Trailing
, DL
, VT
));
1378 ReplaceNode(Node
, SLLI_UW
);
1383 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1384 // shifted mask with no leading zeros and c3 trailing zeros.
1385 if (LeftShift
&& isShiftedMask_64(C1
)) {
1386 unsigned Leading
= XLen
- llvm::bit_width(C1
);
1387 unsigned Trailing
= llvm::countr_zero(C1
);
1388 if (Leading
== 0 && C2
< Trailing
&& OneUseOrZExtW
&& !IsCANDI
) {
1389 SDNode
*SRLI
= CurDAG
->getMachineNode(
1390 RISCV::SRLI
, DL
, VT
, X
,
1391 CurDAG
->getTargetConstant(Trailing
- C2
, DL
, VT
));
1392 SDNode
*SLLI
= CurDAG
->getMachineNode(
1393 RISCV::SLLI
, DL
, VT
, SDValue(SRLI
, 0),
1394 CurDAG
->getTargetConstant(Trailing
, DL
, VT
));
1395 ReplaceNode(Node
, SLLI
);
1398 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1399 if (C2
< Trailing
&& Leading
+ C2
== 32 && OneUseOrZExtW
&& !IsCANDI
) {
1400 SDNode
*SRLIW
= CurDAG
->getMachineNode(
1401 RISCV::SRLIW
, DL
, VT
, X
,
1402 CurDAG
->getTargetConstant(Trailing
- C2
, DL
, VT
));
1403 SDNode
*SLLI
= CurDAG
->getMachineNode(
1404 RISCV::SLLI
, DL
, VT
, SDValue(SRLIW
, 0),
1405 CurDAG
->getTargetConstant(Trailing
, DL
, VT
));
1406 ReplaceNode(Node
, SLLI
);
1410 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1411 if (C2
< Trailing
&& Leading
+ Trailing
== 32 && OneUseOrZExtW
&&
1412 Subtarget
->hasStdExtZba()) {
1413 SDNode
*SRLI
= CurDAG
->getMachineNode(
1414 RISCV::SRLI
, DL
, VT
, X
,
1415 CurDAG
->getTargetConstant(Trailing
- C2
, DL
, VT
));
1416 SDNode
*SLLI_UW
= CurDAG
->getMachineNode(
1417 RISCV::SLLI_UW
, DL
, VT
, SDValue(SRLI
, 0),
1418 CurDAG
->getTargetConstant(Trailing
, DL
, VT
));
1419 ReplaceNode(Node
, SLLI_UW
);
1425 const uint64_t C1
= N1C
->getZExtValue();
1427 if (N0
.getOpcode() == ISD::SRA
&& isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
1429 unsigned C2
= N0
.getConstantOperandVal(1);
1430 unsigned XLen
= Subtarget
->getXLen();
1431 assert((C2
> 0 && C2
< XLen
) && "Unexpected shift amount!");
1433 SDValue X
= N0
.getOperand(0);
1435 // Prefer SRAIW + ANDI when possible.
1436 bool Skip
= C2
> 32 && isInt
<12>(N1C
->getSExtValue()) &&
1437 X
.getOpcode() == ISD::SHL
&&
1438 isa
<ConstantSDNode
>(X
.getOperand(1)) &&
1439 X
.getConstantOperandVal(1) == 32;
1440 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1441 // mask with c3 leading zeros and c2 is larger than c3.
1442 if (isMask_64(C1
) && !Skip
) {
1443 unsigned Leading
= XLen
- llvm::bit_width(C1
);
1445 SDNode
*SRAI
= CurDAG
->getMachineNode(
1446 RISCV::SRAI
, DL
, VT
, X
,
1447 CurDAG
->getTargetConstant(C2
- Leading
, DL
, VT
));
1448 SDNode
*SRLI
= CurDAG
->getMachineNode(
1449 RISCV::SRLI
, DL
, VT
, SDValue(SRAI
, 0),
1450 CurDAG
->getTargetConstant(Leading
, DL
, VT
));
1451 ReplaceNode(Node
, SRLI
);
1456 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1457 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1458 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1459 if (isShiftedMask_64(C1
) && !Skip
) {
1460 unsigned Leading
= XLen
- llvm::bit_width(C1
);
1461 unsigned Trailing
= llvm::countr_zero(C1
);
1462 if (C2
> Leading
&& Leading
> 0 && Trailing
> 0) {
1463 SDNode
*SRAI
= CurDAG
->getMachineNode(
1464 RISCV::SRAI
, DL
, VT
, N0
.getOperand(0),
1465 CurDAG
->getTargetConstant(C2
- Leading
, DL
, VT
));
1466 SDNode
*SRLI
= CurDAG
->getMachineNode(
1467 RISCV::SRLI
, DL
, VT
, SDValue(SRAI
, 0),
1468 CurDAG
->getTargetConstant(Leading
+ Trailing
, DL
, VT
));
1469 SDNode
*SLLI
= CurDAG
->getMachineNode(
1470 RISCV::SLLI
, DL
, VT
, SDValue(SRLI
, 0),
1471 CurDAG
->getTargetConstant(Trailing
, DL
, VT
));
1472 ReplaceNode(Node
, SLLI
);
1478 // If C1 masks off the upper bits only (but can't be formed as an
1479 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1481 // Transform (and x, C1)
1482 // -> (<bfextract> x, msb, lsb)
1483 if (isMask_64(C1
) && !isInt
<12>(N1C
->getSExtValue())) {
1484 const unsigned Msb
= llvm::bit_width(C1
) - 1;
1485 if (tryUnsignedBitfieldExtract(Node
, DL
, VT
, N0
, Msb
, 0))
1489 if (tryShrinkShlLogicImm(Node
))
1495 // Special case for calculating (mul (and X, C2), C1) where the full product
1496 // fits in XLen bits. We can shift X left by the number of leading zeros in
1497 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1498 // product has XLen trailing zeros, putting it in the output of MULHU. This
1499 // can avoid materializing a constant in a register for C2.
1501 // RHS should be a constant.
1502 auto *N1C
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(1));
1503 if (!N1C
|| !N1C
->hasOneUse())
1506 // LHS should be an AND with constant.
1507 SDValue N0
= Node
->getOperand(0);
1508 if (N0
.getOpcode() != ISD::AND
|| !isa
<ConstantSDNode
>(N0
.getOperand(1)))
1511 uint64_t C2
= N0
.getConstantOperandVal(1);
1513 // Constant should be a mask.
1517 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1518 // multiple users or the constant is a simm12. This prevents inserting a
1519 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1520 // make it more costly to materialize. Otherwise, using a SLLI might allow
1521 // it to be compressed.
1524 (C2
== UINT64_C(0xFFFF) && Subtarget
->hasStdExtZbb());
1525 // With XTHeadBb, we can use TH.EXTU.
1526 IsANDIOrZExt
|= C2
== UINT64_C(0xFFFF) && Subtarget
->hasVendorXTHeadBb();
1527 if (IsANDIOrZExt
&& (isInt
<12>(N1C
->getSExtValue()) || !N0
.hasOneUse()))
1529 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1530 // the constant is a simm32.
1531 bool IsZExtW
= C2
== UINT64_C(0xFFFFFFFF) && Subtarget
->hasStdExtZba();
1532 // With XTHeadBb, we can use TH.EXTU.
1533 IsZExtW
|= C2
== UINT64_C(0xFFFFFFFF) && Subtarget
->hasVendorXTHeadBb();
1534 if (IsZExtW
&& (isInt
<32>(N1C
->getSExtValue()) || !N0
.hasOneUse()))
1537 // We need to shift left the AND input and C1 by a total of XLen bits.
1539 // How far left do we need to shift the AND input?
1540 unsigned XLen
= Subtarget
->getXLen();
1541 unsigned LeadingZeros
= XLen
- llvm::bit_width(C2
);
1543 // The constant gets shifted by the remaining amount unless that would
1545 uint64_t C1
= N1C
->getZExtValue();
1546 unsigned ConstantShift
= XLen
- LeadingZeros
;
1547 if (ConstantShift
> (XLen
- llvm::bit_width(C1
)))
1550 uint64_t ShiftedC1
= C1
<< ConstantShift
;
1551 // If this RV32, we need to sign extend the constant.
1553 ShiftedC1
= SignExtend64
<32>(ShiftedC1
);
1555 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1556 SDNode
*Imm
= selectImm(CurDAG
, DL
, VT
, ShiftedC1
, *Subtarget
).getNode();
1558 CurDAG
->getMachineNode(RISCV::SLLI
, DL
, VT
, N0
.getOperand(0),
1559 CurDAG
->getTargetConstant(LeadingZeros
, DL
, VT
));
1560 SDNode
*MULHU
= CurDAG
->getMachineNode(RISCV::MULHU
, DL
, VT
,
1561 SDValue(SLLI
, 0), SDValue(Imm
, 0));
1562 ReplaceNode(Node
, MULHU
);
1566 if (tryIndexedLoad(Node
))
1569 if (Subtarget
->hasVendorXCVmem() && !Subtarget
->is64Bit()) {
1570 // We match post-incrementing load here
1571 LoadSDNode
*Load
= cast
<LoadSDNode
>(Node
);
1572 if (Load
->getAddressingMode() != ISD::POST_INC
)
1575 SDValue Chain
= Node
->getOperand(0);
1576 SDValue Base
= Node
->getOperand(1);
1577 SDValue Offset
= Node
->getOperand(2);
1579 bool Simm12
= false;
1580 bool SignExtend
= Load
->getExtensionType() == ISD::SEXTLOAD
;
1582 if (auto ConstantOffset
= dyn_cast
<ConstantSDNode
>(Offset
)) {
1583 int ConstantVal
= ConstantOffset
->getSExtValue();
1584 Simm12
= isInt
<12>(ConstantVal
);
1586 Offset
= CurDAG
->getTargetConstant(ConstantVal
, SDLoc(Offset
),
1587 Offset
.getValueType());
1590 unsigned Opcode
= 0;
1591 switch (Load
->getMemoryVT().getSimpleVT().SimpleTy
) {
1593 if (Simm12
&& SignExtend
)
1594 Opcode
= RISCV::CV_LB_ri_inc
;
1595 else if (Simm12
&& !SignExtend
)
1596 Opcode
= RISCV::CV_LBU_ri_inc
;
1597 else if (!Simm12
&& SignExtend
)
1598 Opcode
= RISCV::CV_LB_rr_inc
;
1600 Opcode
= RISCV::CV_LBU_rr_inc
;
1603 if (Simm12
&& SignExtend
)
1604 Opcode
= RISCV::CV_LH_ri_inc
;
1605 else if (Simm12
&& !SignExtend
)
1606 Opcode
= RISCV::CV_LHU_ri_inc
;
1607 else if (!Simm12
&& SignExtend
)
1608 Opcode
= RISCV::CV_LH_rr_inc
;
1610 Opcode
= RISCV::CV_LHU_rr_inc
;
1614 Opcode
= RISCV::CV_LW_ri_inc
;
1616 Opcode
= RISCV::CV_LW_rr_inc
;
1624 ReplaceNode(Node
, CurDAG
->getMachineNode(Opcode
, DL
, XLenVT
, XLenVT
,
1625 Chain
.getSimpleValueType(), Base
,
1631 case ISD::INTRINSIC_WO_CHAIN
: {
1632 unsigned IntNo
= Node
->getConstantOperandVal(0);
1634 // By default we do not custom select any intrinsic.
1637 case Intrinsic::riscv_vmsgeu
:
1638 case Intrinsic::riscv_vmsge
: {
1639 SDValue Src1
= Node
->getOperand(1);
1640 SDValue Src2
= Node
->getOperand(2);
1641 bool IsUnsigned
= IntNo
== Intrinsic::riscv_vmsgeu
;
1642 bool IsCmpConstant
= false;
1643 bool IsCmpMinimum
= false;
1644 // Only custom select scalar second operand.
1645 if (Src2
.getValueType() != XLenVT
)
1647 // Small constants are handled with patterns.
1649 MVT Src1VT
= Src1
.getSimpleValueType();
1650 if (auto *C
= dyn_cast
<ConstantSDNode
>(Src2
)) {
1651 IsCmpConstant
= true;
1652 CVal
= C
->getSExtValue();
1653 if (CVal
>= -15 && CVal
<= 16) {
1654 if (!IsUnsigned
|| CVal
!= 0)
1656 IsCmpMinimum
= true;
1657 } else if (!IsUnsigned
&& CVal
== APInt::getSignedMinValue(
1658 Src1VT
.getScalarSizeInBits())
1660 IsCmpMinimum
= true;
1663 unsigned VMSLTOpcode
, VMNANDOpcode
, VMSetOpcode
, VMSGTOpcode
;
1664 switch (RISCVTargetLowering::getLMUL(Src1VT
)) {
1666 llvm_unreachable("Unexpected LMUL!");
1667 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1668 case RISCVII::VLMUL::lmulenum: \
1669 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1670 : RISCV::PseudoVMSLT_VX_##suffix; \
1671 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1672 : RISCV::PseudoVMSGT_VX_##suffix; \
1674 CASE_VMSLT_OPCODES(LMUL_F8
, MF8
)
1675 CASE_VMSLT_OPCODES(LMUL_F4
, MF4
)
1676 CASE_VMSLT_OPCODES(LMUL_F2
, MF2
)
1677 CASE_VMSLT_OPCODES(LMUL_1
, M1
)
1678 CASE_VMSLT_OPCODES(LMUL_2
, M2
)
1679 CASE_VMSLT_OPCODES(LMUL_4
, M4
)
1680 CASE_VMSLT_OPCODES(LMUL_8
, M8
)
1681 #undef CASE_VMSLT_OPCODES
1683 // Mask operations use the LMUL from the mask type.
1684 switch (RISCVTargetLowering::getLMUL(VT
)) {
1686 llvm_unreachable("Unexpected LMUL!");
1687 #define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1688 case RISCVII::VLMUL::lmulenum: \
1689 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1690 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1692 CASE_VMNAND_VMSET_OPCODES(LMUL_F8
, B64
)
1693 CASE_VMNAND_VMSET_OPCODES(LMUL_F4
, B32
)
1694 CASE_VMNAND_VMSET_OPCODES(LMUL_F2
, B16
)
1695 CASE_VMNAND_VMSET_OPCODES(LMUL_1
, B8
)
1696 CASE_VMNAND_VMSET_OPCODES(LMUL_2
, B4
)
1697 CASE_VMNAND_VMSET_OPCODES(LMUL_4
, B2
)
1698 CASE_VMNAND_VMSET_OPCODES(LMUL_8
, B1
)
1699 #undef CASE_VMNAND_VMSET_OPCODES
1701 SDValue SEW
= CurDAG
->getTargetConstant(
1702 Log2_32(Src1VT
.getScalarSizeInBits()), DL
, XLenVT
);
1703 SDValue MaskSEW
= CurDAG
->getTargetConstant(0, DL
, XLenVT
);
1705 selectVLOp(Node
->getOperand(3), VL
);
1707 // If vmsge(u) with minimum value, expand it to vmset.
1710 CurDAG
->getMachineNode(VMSetOpcode
, DL
, VT
, VL
, MaskSEW
));
1714 if (IsCmpConstant
) {
1716 selectImm(CurDAG
, SDLoc(Src2
), XLenVT
, CVal
- 1, *Subtarget
);
1718 ReplaceNode(Node
, CurDAG
->getMachineNode(VMSGTOpcode
, DL
, VT
,
1719 {Src1
, Imm
, VL
, SEW
}));
1724 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1725 SDValue Cmp
= SDValue(
1726 CurDAG
->getMachineNode(VMSLTOpcode
, DL
, VT
, {Src1
, Src2
, VL
, SEW
}),
1728 ReplaceNode(Node
, CurDAG
->getMachineNode(VMNANDOpcode
, DL
, VT
,
1729 {Cmp
, Cmp
, VL
, MaskSEW
}));
1732 case Intrinsic::riscv_vmsgeu_mask
:
1733 case Intrinsic::riscv_vmsge_mask
: {
1734 SDValue Src1
= Node
->getOperand(2);
1735 SDValue Src2
= Node
->getOperand(3);
1736 bool IsUnsigned
= IntNo
== Intrinsic::riscv_vmsgeu_mask
;
1737 bool IsCmpConstant
= false;
1738 bool IsCmpMinimum
= false;
1739 // Only custom select scalar second operand.
1740 if (Src2
.getValueType() != XLenVT
)
1742 // Small constants are handled with patterns.
1743 MVT Src1VT
= Src1
.getSimpleValueType();
1745 if (auto *C
= dyn_cast
<ConstantSDNode
>(Src2
)) {
1746 IsCmpConstant
= true;
1747 CVal
= C
->getSExtValue();
1748 if (CVal
>= -15 && CVal
<= 16) {
1749 if (!IsUnsigned
|| CVal
!= 0)
1751 IsCmpMinimum
= true;
1752 } else if (!IsUnsigned
&& CVal
== APInt::getSignedMinValue(
1753 Src1VT
.getScalarSizeInBits())
1755 IsCmpMinimum
= true;
1758 unsigned VMSLTOpcode
, VMSLTMaskOpcode
, VMXOROpcode
, VMANDNOpcode
,
1759 VMOROpcode
, VMSGTMaskOpcode
;
1760 switch (RISCVTargetLowering::getLMUL(Src1VT
)) {
1762 llvm_unreachable("Unexpected LMUL!");
1763 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1764 case RISCVII::VLMUL::lmulenum: \
1765 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1766 : RISCV::PseudoVMSLT_VX_##suffix; \
1767 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1768 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1769 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
1770 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
1772 CASE_VMSLT_OPCODES(LMUL_F8
, MF8
)
1773 CASE_VMSLT_OPCODES(LMUL_F4
, MF4
)
1774 CASE_VMSLT_OPCODES(LMUL_F2
, MF2
)
1775 CASE_VMSLT_OPCODES(LMUL_1
, M1
)
1776 CASE_VMSLT_OPCODES(LMUL_2
, M2
)
1777 CASE_VMSLT_OPCODES(LMUL_4
, M4
)
1778 CASE_VMSLT_OPCODES(LMUL_8
, M8
)
1779 #undef CASE_VMSLT_OPCODES
1781 // Mask operations use the LMUL from the mask type.
1782 switch (RISCVTargetLowering::getLMUL(VT
)) {
1784 llvm_unreachable("Unexpected LMUL!");
1785 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1786 case RISCVII::VLMUL::lmulenum: \
1787 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1788 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1789 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1791 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8
, B64
)
1792 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4
, B32
)
1793 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2
, B16
)
1794 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1
, B8
)
1795 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2
, B4
)
1796 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4
, B2
)
1797 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8
, B1
)
1798 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1800 SDValue SEW
= CurDAG
->getTargetConstant(
1801 Log2_32(Src1VT
.getScalarSizeInBits()), DL
, XLenVT
);
1802 SDValue MaskSEW
= CurDAG
->getTargetConstant(0, DL
, XLenVT
);
1804 selectVLOp(Node
->getOperand(5), VL
);
1805 SDValue MaskedOff
= Node
->getOperand(1);
1806 SDValue Mask
= Node
->getOperand(4);
1808 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
1810 // We don't need vmor if the MaskedOff and the Mask are the same
1812 if (Mask
== MaskedOff
) {
1813 ReplaceUses(Node
, Mask
.getNode());
1817 CurDAG
->getMachineNode(VMOROpcode
, DL
, VT
,
1818 {Mask
, MaskedOff
, VL
, MaskSEW
}));
1822 // If the MaskedOff value and the Mask are the same value use
1823 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1824 // This avoids needing to copy v0 to vd before starting the next sequence.
1825 if (Mask
== MaskedOff
) {
1826 SDValue Cmp
= SDValue(
1827 CurDAG
->getMachineNode(VMSLTOpcode
, DL
, VT
, {Src1
, Src2
, VL
, SEW
}),
1829 ReplaceNode(Node
, CurDAG
->getMachineNode(VMANDNOpcode
, DL
, VT
,
1830 {Mask
, Cmp
, VL
, MaskSEW
}));
1834 // Mask needs to be copied to V0.
1835 SDValue Chain
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), DL
,
1836 RISCV::V0
, Mask
, SDValue());
1837 SDValue Glue
= Chain
.getValue(1);
1838 SDValue V0
= CurDAG
->getRegister(RISCV::V0
, VT
);
1840 if (IsCmpConstant
) {
1842 selectImm(CurDAG
, SDLoc(Src2
), XLenVT
, CVal
- 1, *Subtarget
);
1844 ReplaceNode(Node
, CurDAG
->getMachineNode(
1845 VMSGTMaskOpcode
, DL
, VT
,
1846 {MaskedOff
, Src1
, Imm
, V0
, VL
, SEW
, Glue
}));
1851 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1852 // The result is mask undisturbed.
1853 // We use the same instructions to emulate mask agnostic behavior, because
1854 // the agnostic result can be either undisturbed or all 1.
1855 SDValue Cmp
= SDValue(
1856 CurDAG
->getMachineNode(VMSLTMaskOpcode
, DL
, VT
,
1857 {MaskedOff
, Src1
, Src2
, V0
, VL
, SEW
, Glue
}),
1859 // vmxor.mm vd, vd, v0 is used to update active value.
1860 ReplaceNode(Node
, CurDAG
->getMachineNode(VMXOROpcode
, DL
, VT
,
1861 {Cmp
, Mask
, VL
, MaskSEW
}));
1864 case Intrinsic::riscv_vsetvli
:
1865 case Intrinsic::riscv_vsetvlimax
:
1866 return selectVSETVLI(Node
);
1870 case ISD::INTRINSIC_W_CHAIN
: {
1871 unsigned IntNo
= Node
->getConstantOperandVal(1);
1873 // By default we do not custom select any intrinsic.
1876 case Intrinsic::riscv_vlseg2
:
1877 case Intrinsic::riscv_vlseg3
:
1878 case Intrinsic::riscv_vlseg4
:
1879 case Intrinsic::riscv_vlseg5
:
1880 case Intrinsic::riscv_vlseg6
:
1881 case Intrinsic::riscv_vlseg7
:
1882 case Intrinsic::riscv_vlseg8
: {
1883 selectVLSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false,
1884 /*IsStrided*/ false);
1887 case Intrinsic::riscv_vlseg2_mask
:
1888 case Intrinsic::riscv_vlseg3_mask
:
1889 case Intrinsic::riscv_vlseg4_mask
:
1890 case Intrinsic::riscv_vlseg5_mask
:
1891 case Intrinsic::riscv_vlseg6_mask
:
1892 case Intrinsic::riscv_vlseg7_mask
:
1893 case Intrinsic::riscv_vlseg8_mask
: {
1894 selectVLSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true,
1895 /*IsStrided*/ false);
1898 case Intrinsic::riscv_vlsseg2
:
1899 case Intrinsic::riscv_vlsseg3
:
1900 case Intrinsic::riscv_vlsseg4
:
1901 case Intrinsic::riscv_vlsseg5
:
1902 case Intrinsic::riscv_vlsseg6
:
1903 case Intrinsic::riscv_vlsseg7
:
1904 case Intrinsic::riscv_vlsseg8
: {
1905 selectVLSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false,
1906 /*IsStrided*/ true);
1909 case Intrinsic::riscv_vlsseg2_mask
:
1910 case Intrinsic::riscv_vlsseg3_mask
:
1911 case Intrinsic::riscv_vlsseg4_mask
:
1912 case Intrinsic::riscv_vlsseg5_mask
:
1913 case Intrinsic::riscv_vlsseg6_mask
:
1914 case Intrinsic::riscv_vlsseg7_mask
:
1915 case Intrinsic::riscv_vlsseg8_mask
: {
1916 selectVLSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true,
1917 /*IsStrided*/ true);
1920 case Intrinsic::riscv_vloxseg2
:
1921 case Intrinsic::riscv_vloxseg3
:
1922 case Intrinsic::riscv_vloxseg4
:
1923 case Intrinsic::riscv_vloxseg5
:
1924 case Intrinsic::riscv_vloxseg6
:
1925 case Intrinsic::riscv_vloxseg7
:
1926 case Intrinsic::riscv_vloxseg8
:
1927 selectVLXSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false,
1928 /*IsOrdered*/ true);
1930 case Intrinsic::riscv_vluxseg2
:
1931 case Intrinsic::riscv_vluxseg3
:
1932 case Intrinsic::riscv_vluxseg4
:
1933 case Intrinsic::riscv_vluxseg5
:
1934 case Intrinsic::riscv_vluxseg6
:
1935 case Intrinsic::riscv_vluxseg7
:
1936 case Intrinsic::riscv_vluxseg8
:
1937 selectVLXSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false,
1938 /*IsOrdered*/ false);
1940 case Intrinsic::riscv_vloxseg2_mask
:
1941 case Intrinsic::riscv_vloxseg3_mask
:
1942 case Intrinsic::riscv_vloxseg4_mask
:
1943 case Intrinsic::riscv_vloxseg5_mask
:
1944 case Intrinsic::riscv_vloxseg6_mask
:
1945 case Intrinsic::riscv_vloxseg7_mask
:
1946 case Intrinsic::riscv_vloxseg8_mask
:
1947 selectVLXSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true,
1948 /*IsOrdered*/ true);
1950 case Intrinsic::riscv_vluxseg2_mask
:
1951 case Intrinsic::riscv_vluxseg3_mask
:
1952 case Intrinsic::riscv_vluxseg4_mask
:
1953 case Intrinsic::riscv_vluxseg5_mask
:
1954 case Intrinsic::riscv_vluxseg6_mask
:
1955 case Intrinsic::riscv_vluxseg7_mask
:
1956 case Intrinsic::riscv_vluxseg8_mask
:
1957 selectVLXSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true,
1958 /*IsOrdered*/ false);
1960 case Intrinsic::riscv_vlseg8ff
:
1961 case Intrinsic::riscv_vlseg7ff
:
1962 case Intrinsic::riscv_vlseg6ff
:
1963 case Intrinsic::riscv_vlseg5ff
:
1964 case Intrinsic::riscv_vlseg4ff
:
1965 case Intrinsic::riscv_vlseg3ff
:
1966 case Intrinsic::riscv_vlseg2ff
: {
1967 selectVLSEGFF(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false);
1970 case Intrinsic::riscv_vlseg8ff_mask
:
1971 case Intrinsic::riscv_vlseg7ff_mask
:
1972 case Intrinsic::riscv_vlseg6ff_mask
:
1973 case Intrinsic::riscv_vlseg5ff_mask
:
1974 case Intrinsic::riscv_vlseg4ff_mask
:
1975 case Intrinsic::riscv_vlseg3ff_mask
:
1976 case Intrinsic::riscv_vlseg2ff_mask
: {
1977 selectVLSEGFF(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true);
1980 case Intrinsic::riscv_vloxei
:
1981 case Intrinsic::riscv_vloxei_mask
:
1982 case Intrinsic::riscv_vluxei
:
1983 case Intrinsic::riscv_vluxei_mask
: {
1984 bool IsMasked
= IntNo
== Intrinsic::riscv_vloxei_mask
||
1985 IntNo
== Intrinsic::riscv_vluxei_mask
;
1986 bool IsOrdered
= IntNo
== Intrinsic::riscv_vloxei
||
1987 IntNo
== Intrinsic::riscv_vloxei_mask
;
1989 MVT VT
= Node
->getSimpleValueType(0);
1990 unsigned Log2SEW
= Log2_32(VT
.getScalarSizeInBits());
1993 SmallVector
<SDValue
, 8> Operands
;
1994 Operands
.push_back(Node
->getOperand(CurOp
++));
1997 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
,
1998 /*IsStridedOrIndexed*/ true, Operands
,
1999 /*IsLoad=*/true, &IndexVT
);
2001 assert(VT
.getVectorElementCount() == IndexVT
.getVectorElementCount() &&
2002 "Element count mismatch");
2004 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
2005 RISCVII::VLMUL IndexLMUL
= RISCVTargetLowering::getLMUL(IndexVT
);
2006 unsigned IndexLog2EEW
= Log2_32(IndexVT
.getScalarSizeInBits());
2007 if (IndexLog2EEW
== 6 && !Subtarget
->is64Bit()) {
2008 report_fatal_error("The V extension does not support EEW=64 for index "
2009 "values when XLEN=32");
2011 const RISCV::VLX_VSXPseudo
*P
= RISCV::getVLXPseudo(
2012 IsMasked
, IsOrdered
, IndexLog2EEW
, static_cast<unsigned>(LMUL
),
2013 static_cast<unsigned>(IndexLMUL
));
2014 MachineSDNode
*Load
=
2015 CurDAG
->getMachineNode(P
->Pseudo
, DL
, Node
->getVTList(), Operands
);
2017 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
2018 CurDAG
->setNodeMemRefs(Load
, {MemOp
->getMemOperand()});
2020 ReplaceNode(Node
, Load
);
2023 case Intrinsic::riscv_vlm
:
2024 case Intrinsic::riscv_vle
:
2025 case Intrinsic::riscv_vle_mask
:
2026 case Intrinsic::riscv_vlse
:
2027 case Intrinsic::riscv_vlse_mask
: {
2028 bool IsMasked
= IntNo
== Intrinsic::riscv_vle_mask
||
2029 IntNo
== Intrinsic::riscv_vlse_mask
;
2031 IntNo
== Intrinsic::riscv_vlse
|| IntNo
== Intrinsic::riscv_vlse_mask
;
2033 MVT VT
= Node
->getSimpleValueType(0);
2034 unsigned Log2SEW
= Log2_32(VT
.getScalarSizeInBits());
2036 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2037 // operand at the IR level. In pseudos, they have both policy and
2038 // passthru operand. The passthru operand is needed to track the
2039 // "tail undefined" state, and the policy is there just for
2040 // for consistency - it will always be "don't care" for the
2042 bool HasPassthruOperand
= IntNo
!= Intrinsic::riscv_vlm
;
2044 SmallVector
<SDValue
, 8> Operands
;
2045 if (HasPassthruOperand
)
2046 Operands
.push_back(Node
->getOperand(CurOp
++));
2048 // We eagerly lower to implicit_def (instead of undef), as we
2049 // otherwise fail to select nodes such as: nxv1i1 = undef
2051 CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, VT
);
2052 Operands
.push_back(SDValue(Passthru
, 0));
2054 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
, IsStrided
,
2055 Operands
, /*IsLoad=*/true);
2057 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
2058 const RISCV::VLEPseudo
*P
=
2059 RISCV::getVLEPseudo(IsMasked
, IsStrided
, /*FF*/ false, Log2SEW
,
2060 static_cast<unsigned>(LMUL
));
2061 MachineSDNode
*Load
=
2062 CurDAG
->getMachineNode(P
->Pseudo
, DL
, Node
->getVTList(), Operands
);
2064 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
2065 CurDAG
->setNodeMemRefs(Load
, {MemOp
->getMemOperand()});
2067 ReplaceNode(Node
, Load
);
2070 case Intrinsic::riscv_vleff
:
2071 case Intrinsic::riscv_vleff_mask
: {
2072 bool IsMasked
= IntNo
== Intrinsic::riscv_vleff_mask
;
2074 MVT VT
= Node
->getSimpleValueType(0);
2075 unsigned Log2SEW
= Log2_32(VT
.getScalarSizeInBits());
2078 SmallVector
<SDValue
, 7> Operands
;
2079 Operands
.push_back(Node
->getOperand(CurOp
++));
2080 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
,
2081 /*IsStridedOrIndexed*/ false, Operands
,
2084 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
2085 const RISCV::VLEPseudo
*P
=
2086 RISCV::getVLEPseudo(IsMasked
, /*Strided*/ false, /*FF*/ true,
2087 Log2SEW
, static_cast<unsigned>(LMUL
));
2088 MachineSDNode
*Load
= CurDAG
->getMachineNode(
2089 P
->Pseudo
, DL
, Node
->getVTList(), Operands
);
2090 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
2091 CurDAG
->setNodeMemRefs(Load
, {MemOp
->getMemOperand()});
2093 ReplaceNode(Node
, Load
);
2099 case ISD::INTRINSIC_VOID
: {
2100 unsigned IntNo
= Node
->getConstantOperandVal(1);
2102 case Intrinsic::riscv_vsseg2
:
2103 case Intrinsic::riscv_vsseg3
:
2104 case Intrinsic::riscv_vsseg4
:
2105 case Intrinsic::riscv_vsseg5
:
2106 case Intrinsic::riscv_vsseg6
:
2107 case Intrinsic::riscv_vsseg7
:
2108 case Intrinsic::riscv_vsseg8
: {
2109 selectVSSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false,
2110 /*IsStrided*/ false);
2113 case Intrinsic::riscv_vsseg2_mask
:
2114 case Intrinsic::riscv_vsseg3_mask
:
2115 case Intrinsic::riscv_vsseg4_mask
:
2116 case Intrinsic::riscv_vsseg5_mask
:
2117 case Intrinsic::riscv_vsseg6_mask
:
2118 case Intrinsic::riscv_vsseg7_mask
:
2119 case Intrinsic::riscv_vsseg8_mask
: {
2120 selectVSSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true,
2121 /*IsStrided*/ false);
2124 case Intrinsic::riscv_vssseg2
:
2125 case Intrinsic::riscv_vssseg3
:
2126 case Intrinsic::riscv_vssseg4
:
2127 case Intrinsic::riscv_vssseg5
:
2128 case Intrinsic::riscv_vssseg6
:
2129 case Intrinsic::riscv_vssseg7
:
2130 case Intrinsic::riscv_vssseg8
: {
2131 selectVSSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false,
2132 /*IsStrided*/ true);
2135 case Intrinsic::riscv_vssseg2_mask
:
2136 case Intrinsic::riscv_vssseg3_mask
:
2137 case Intrinsic::riscv_vssseg4_mask
:
2138 case Intrinsic::riscv_vssseg5_mask
:
2139 case Intrinsic::riscv_vssseg6_mask
:
2140 case Intrinsic::riscv_vssseg7_mask
:
2141 case Intrinsic::riscv_vssseg8_mask
: {
2142 selectVSSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true,
2143 /*IsStrided*/ true);
2146 case Intrinsic::riscv_vsoxseg2
:
2147 case Intrinsic::riscv_vsoxseg3
:
2148 case Intrinsic::riscv_vsoxseg4
:
2149 case Intrinsic::riscv_vsoxseg5
:
2150 case Intrinsic::riscv_vsoxseg6
:
2151 case Intrinsic::riscv_vsoxseg7
:
2152 case Intrinsic::riscv_vsoxseg8
:
2153 selectVSXSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false,
2154 /*IsOrdered*/ true);
2156 case Intrinsic::riscv_vsuxseg2
:
2157 case Intrinsic::riscv_vsuxseg3
:
2158 case Intrinsic::riscv_vsuxseg4
:
2159 case Intrinsic::riscv_vsuxseg5
:
2160 case Intrinsic::riscv_vsuxseg6
:
2161 case Intrinsic::riscv_vsuxseg7
:
2162 case Intrinsic::riscv_vsuxseg8
:
2163 selectVSXSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ false,
2164 /*IsOrdered*/ false);
2166 case Intrinsic::riscv_vsoxseg2_mask
:
2167 case Intrinsic::riscv_vsoxseg3_mask
:
2168 case Intrinsic::riscv_vsoxseg4_mask
:
2169 case Intrinsic::riscv_vsoxseg5_mask
:
2170 case Intrinsic::riscv_vsoxseg6_mask
:
2171 case Intrinsic::riscv_vsoxseg7_mask
:
2172 case Intrinsic::riscv_vsoxseg8_mask
:
2173 selectVSXSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true,
2174 /*IsOrdered*/ true);
2176 case Intrinsic::riscv_vsuxseg2_mask
:
2177 case Intrinsic::riscv_vsuxseg3_mask
:
2178 case Intrinsic::riscv_vsuxseg4_mask
:
2179 case Intrinsic::riscv_vsuxseg5_mask
:
2180 case Intrinsic::riscv_vsuxseg6_mask
:
2181 case Intrinsic::riscv_vsuxseg7_mask
:
2182 case Intrinsic::riscv_vsuxseg8_mask
:
2183 selectVSXSEG(Node
, getSegInstNF(IntNo
), /*IsMasked*/ true,
2184 /*IsOrdered*/ false);
2186 case Intrinsic::riscv_vsoxei
:
2187 case Intrinsic::riscv_vsoxei_mask
:
2188 case Intrinsic::riscv_vsuxei
:
2189 case Intrinsic::riscv_vsuxei_mask
: {
2190 bool IsMasked
= IntNo
== Intrinsic::riscv_vsoxei_mask
||
2191 IntNo
== Intrinsic::riscv_vsuxei_mask
;
2192 bool IsOrdered
= IntNo
== Intrinsic::riscv_vsoxei
||
2193 IntNo
== Intrinsic::riscv_vsoxei_mask
;
2195 MVT VT
= Node
->getOperand(2)->getSimpleValueType(0);
2196 unsigned Log2SEW
= Log2_32(VT
.getScalarSizeInBits());
2199 SmallVector
<SDValue
, 8> Operands
;
2200 Operands
.push_back(Node
->getOperand(CurOp
++)); // Store value.
2203 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
,
2204 /*IsStridedOrIndexed*/ true, Operands
,
2205 /*IsLoad=*/false, &IndexVT
);
2207 assert(VT
.getVectorElementCount() == IndexVT
.getVectorElementCount() &&
2208 "Element count mismatch");
2210 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
2211 RISCVII::VLMUL IndexLMUL
= RISCVTargetLowering::getLMUL(IndexVT
);
2212 unsigned IndexLog2EEW
= Log2_32(IndexVT
.getScalarSizeInBits());
2213 if (IndexLog2EEW
== 6 && !Subtarget
->is64Bit()) {
2214 report_fatal_error("The V extension does not support EEW=64 for index "
2215 "values when XLEN=32");
2217 const RISCV::VLX_VSXPseudo
*P
= RISCV::getVSXPseudo(
2218 IsMasked
, IsOrdered
, IndexLog2EEW
,
2219 static_cast<unsigned>(LMUL
), static_cast<unsigned>(IndexLMUL
));
2220 MachineSDNode
*Store
=
2221 CurDAG
->getMachineNode(P
->Pseudo
, DL
, Node
->getVTList(), Operands
);
2223 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
2224 CurDAG
->setNodeMemRefs(Store
, {MemOp
->getMemOperand()});
2226 ReplaceNode(Node
, Store
);
2229 case Intrinsic::riscv_vsm
:
2230 case Intrinsic::riscv_vse
:
2231 case Intrinsic::riscv_vse_mask
:
2232 case Intrinsic::riscv_vsse
:
2233 case Intrinsic::riscv_vsse_mask
: {
2234 bool IsMasked
= IntNo
== Intrinsic::riscv_vse_mask
||
2235 IntNo
== Intrinsic::riscv_vsse_mask
;
2237 IntNo
== Intrinsic::riscv_vsse
|| IntNo
== Intrinsic::riscv_vsse_mask
;
2239 MVT VT
= Node
->getOperand(2)->getSimpleValueType(0);
2240 unsigned Log2SEW
= Log2_32(VT
.getScalarSizeInBits());
2243 SmallVector
<SDValue
, 8> Operands
;
2244 Operands
.push_back(Node
->getOperand(CurOp
++)); // Store value.
2246 addVectorLoadStoreOperands(Node
, Log2SEW
, DL
, CurOp
, IsMasked
, IsStrided
,
2249 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
2250 const RISCV::VSEPseudo
*P
= RISCV::getVSEPseudo(
2251 IsMasked
, IsStrided
, Log2SEW
, static_cast<unsigned>(LMUL
));
2252 MachineSDNode
*Store
=
2253 CurDAG
->getMachineNode(P
->Pseudo
, DL
, Node
->getVTList(), Operands
);
2254 if (auto *MemOp
= dyn_cast
<MemSDNode
>(Node
))
2255 CurDAG
->setNodeMemRefs(Store
, {MemOp
->getMemOperand()});
2257 ReplaceNode(Node
, Store
);
2260 case Intrinsic::riscv_sf_vc_x_se
:
2261 case Intrinsic::riscv_sf_vc_i_se
:
2262 selectSF_VC_X_SE(Node
);
2267 case ISD::BITCAST
: {
2268 MVT SrcVT
= Node
->getOperand(0).getSimpleValueType();
2269 // Just drop bitcasts between vectors if both are fixed or both are
2271 if ((VT
.isScalableVector() && SrcVT
.isScalableVector()) ||
2272 (VT
.isFixedLengthVector() && SrcVT
.isFixedLengthVector())) {
2273 ReplaceUses(SDValue(Node
, 0), Node
->getOperand(0));
2274 CurDAG
->RemoveDeadNode(Node
);
2279 case ISD::INSERT_SUBVECTOR
:
2280 case RISCVISD::TUPLE_INSERT
: {
2281 SDValue V
= Node
->getOperand(0);
2282 SDValue SubV
= Node
->getOperand(1);
2284 auto Idx
= Node
->getConstantOperandVal(2);
2285 MVT SubVecVT
= SubV
.getSimpleValueType();
2287 const RISCVTargetLowering
&TLI
= *Subtarget
->getTargetLowering();
2288 MVT SubVecContainerVT
= SubVecVT
;
2289 // Establish the correct scalable-vector types for any fixed-length type.
2290 if (SubVecVT
.isFixedLengthVector()) {
2291 SubVecContainerVT
= TLI
.getContainerForFixedLengthVector(SubVecVT
);
2292 TypeSize VecRegSize
= TypeSize::getScalable(RISCV::RVVBitsPerBlock
);
2293 [[maybe_unused
]] bool ExactlyVecRegSized
=
2294 Subtarget
->expandVScale(SubVecVT
.getSizeInBits())
2295 .isKnownMultipleOf(Subtarget
->expandVScale(VecRegSize
));
2296 assert(isPowerOf2_64(Subtarget
->expandVScale(SubVecVT
.getSizeInBits())
2297 .getKnownMinValue()));
2298 assert(Idx
== 0 && (ExactlyVecRegSized
|| V
.isUndef()));
2300 MVT ContainerVT
= VT
;
2301 if (VT
.isFixedLengthVector())
2302 ContainerVT
= TLI
.getContainerForFixedLengthVector(VT
);
2304 const auto *TRI
= Subtarget
->getRegisterInfo();
2306 std::tie(SubRegIdx
, Idx
) =
2307 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2308 ContainerVT
, SubVecContainerVT
, Idx
, TRI
);
2310 // If the Idx hasn't been completely eliminated then this is a subvector
2311 // insert which doesn't naturally align to a vector register. These must
2312 // be handled using instructions to manipulate the vector registers.
2316 RISCVII::VLMUL SubVecLMUL
= RISCVTargetLowering::getLMUL(SubVecContainerVT
);
2317 [[maybe_unused
]] bool IsSubVecPartReg
=
2318 SubVecLMUL
== RISCVII::VLMUL::LMUL_F2
||
2319 SubVecLMUL
== RISCVII::VLMUL::LMUL_F4
||
2320 SubVecLMUL
== RISCVII::VLMUL::LMUL_F8
;
2321 assert((V
.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg
||
2323 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2324 "the subvector is smaller than a full-sized register");
2326 // If we haven't set a SubRegIdx, then we must be going between
2327 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2328 if (SubRegIdx
== RISCV::NoSubRegister
) {
2329 unsigned InRegClassID
=
2330 RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT
);
2331 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT
) ==
2333 "Unexpected subvector extraction");
2334 SDValue RC
= CurDAG
->getTargetConstant(InRegClassID
, DL
, XLenVT
);
2335 SDNode
*NewNode
= CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
,
2337 ReplaceNode(Node
, NewNode
);
2341 SDValue Insert
= CurDAG
->getTargetInsertSubreg(SubRegIdx
, DL
, VT
, V
, SubV
);
2342 ReplaceNode(Node
, Insert
.getNode());
2345 case ISD::EXTRACT_SUBVECTOR
:
2346 case RISCVISD::TUPLE_EXTRACT
: {
2347 SDValue V
= Node
->getOperand(0);
2348 auto Idx
= Node
->getConstantOperandVal(1);
2349 MVT InVT
= V
.getSimpleValueType();
2352 const RISCVTargetLowering
&TLI
= *Subtarget
->getTargetLowering();
2353 MVT SubVecContainerVT
= VT
;
2354 // Establish the correct scalable-vector types for any fixed-length type.
2355 if (VT
.isFixedLengthVector()) {
2357 SubVecContainerVT
= TLI
.getContainerForFixedLengthVector(VT
);
2359 if (InVT
.isFixedLengthVector())
2360 InVT
= TLI
.getContainerForFixedLengthVector(InVT
);
2362 const auto *TRI
= Subtarget
->getRegisterInfo();
2364 std::tie(SubRegIdx
, Idx
) =
2365 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2366 InVT
, SubVecContainerVT
, Idx
, TRI
);
2368 // If the Idx hasn't been completely eliminated then this is a subvector
2369 // extract which doesn't naturally align to a vector register. These must
2370 // be handled using instructions to manipulate the vector registers.
2374 // If we haven't set a SubRegIdx, then we must be going between
2375 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2376 if (SubRegIdx
== RISCV::NoSubRegister
) {
2377 unsigned InRegClassID
= RISCVTargetLowering::getRegClassIDForVecVT(InVT
);
2378 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT
) ==
2380 "Unexpected subvector extraction");
2381 SDValue RC
= CurDAG
->getTargetConstant(InRegClassID
, DL
, XLenVT
);
2383 CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
, DL
, VT
, V
, RC
);
2384 ReplaceNode(Node
, NewNode
);
2388 SDValue Extract
= CurDAG
->getTargetExtractSubreg(SubRegIdx
, DL
, VT
, V
);
2389 ReplaceNode(Node
, Extract
.getNode());
2392 case RISCVISD::VMV_S_X_VL
:
2393 case RISCVISD::VFMV_S_F_VL
:
2394 case RISCVISD::VMV_V_X_VL
:
2395 case RISCVISD::VFMV_V_F_VL
: {
2396 // Try to match splat of a scalar load to a strided load with stride of x0.
2397 bool IsScalarMove
= Node
->getOpcode() == RISCVISD::VMV_S_X_VL
||
2398 Node
->getOpcode() == RISCVISD::VFMV_S_F_VL
;
2399 if (!Node
->getOperand(0).isUndef())
2401 SDValue Src
= Node
->getOperand(1);
2402 auto *Ld
= dyn_cast
<LoadSDNode
>(Src
);
2403 // Can't fold load update node because the second
2404 // output is used so that load update node can't be removed.
2405 if (!Ld
|| Ld
->isIndexed())
2407 EVT MemVT
= Ld
->getMemoryVT();
2408 // The memory VT should be the same size as the element type.
2409 if (MemVT
.getStoreSize() != VT
.getVectorElementType().getStoreSize())
2411 if (!IsProfitableToFold(Src
, Node
, Node
) ||
2412 !IsLegalToFold(Src
, Node
, Node
, TM
.getOptLevel()))
2417 // We could deal with more VL if we update the VSETVLI insert pass to
2418 // avoid introducing more VSETVLI.
2419 if (!isOneConstant(Node
->getOperand(2)))
2421 selectVLOp(Node
->getOperand(2), VL
);
2423 selectVLOp(Node
->getOperand(2), VL
);
2425 unsigned Log2SEW
= Log2_32(VT
.getScalarSizeInBits());
2426 SDValue SEW
= CurDAG
->getTargetConstant(Log2SEW
, DL
, XLenVT
);
2428 // If VL=1, then we don't need to do a strided load and can just do a
2430 bool IsStrided
= !isOneConstant(VL
);
2432 // Only do a strided load if we have optimized zero-stride vector load.
2433 if (IsStrided
&& !Subtarget
->hasOptimizedZeroStrideLoad())
2436 SmallVector
<SDValue
> Operands
= {
2437 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, VT
), 0),
2440 Operands
.push_back(CurDAG
->getRegister(RISCV::X0
, XLenVT
));
2441 uint64_t Policy
= RISCVII::MASK_AGNOSTIC
| RISCVII::TAIL_AGNOSTIC
;
2442 SDValue PolicyOp
= CurDAG
->getTargetConstant(Policy
, DL
, XLenVT
);
2443 Operands
.append({VL
, SEW
, PolicyOp
, Ld
->getChain()});
2445 RISCVII::VLMUL LMUL
= RISCVTargetLowering::getLMUL(VT
);
2446 const RISCV::VLEPseudo
*P
= RISCV::getVLEPseudo(
2447 /*IsMasked*/ false, IsStrided
, /*FF*/ false,
2448 Log2SEW
, static_cast<unsigned>(LMUL
));
2449 MachineSDNode
*Load
=
2450 CurDAG
->getMachineNode(P
->Pseudo
, DL
, {VT
, MVT::Other
}, Operands
);
2451 // Update the chain.
2452 ReplaceUses(Src
.getValue(1), SDValue(Load
, 1));
2453 // Record the mem-refs
2454 CurDAG
->setNodeMemRefs(Load
, {Ld
->getMemOperand()});
2455 // Replace the splat with the vlse.
2456 ReplaceNode(Node
, Load
);
2460 unsigned Locality
= Node
->getConstantOperandVal(3);
2464 if (auto *LoadStoreMem
= dyn_cast
<MemSDNode
>(Node
)) {
2465 MachineMemOperand
*MMO
= LoadStoreMem
->getMemOperand();
2466 MMO
->setFlags(MachineMemOperand::MONonTemporal
);
2468 int NontemporalLevel
= 0;
2471 NontemporalLevel
= 3; // NTL.ALL
2474 NontemporalLevel
= 1; // NTL.PALL
2477 NontemporalLevel
= 0; // NTL.P1
2480 llvm_unreachable("unexpected locality value.");
2483 if (NontemporalLevel
& 0b1)
2484 MMO
->setFlags(MONontemporalBit0
);
2485 if (NontemporalLevel
& 0b10)
2486 MMO
->setFlags(MONontemporalBit1
);
2491 // Select the default instruction.
2495 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2496 const SDValue
&Op
, InlineAsm::ConstraintCode ConstraintID
,
2497 std::vector
<SDValue
> &OutOps
) {
2498 // Always produce a register and immediate operand, as expected by
2499 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2500 switch (ConstraintID
) {
2501 case InlineAsm::ConstraintCode::o
:
2502 case InlineAsm::ConstraintCode::m
: {
2504 [[maybe_unused
]] bool Found
= SelectAddrRegImm(Op
, Op0
, Op1
);
2505 assert(Found
&& "SelectAddrRegImm should always succeed");
2506 OutOps
.push_back(Op0
);
2507 OutOps
.push_back(Op1
);
2510 case InlineAsm::ConstraintCode::A
:
2511 OutOps
.push_back(Op
);
2513 CurDAG
->getTargetConstant(0, SDLoc(Op
), Subtarget
->getXLenVT()));
2516 report_fatal_error("Unexpected asm memory constraint " +
2517 InlineAsm::getMemConstraintName(ConstraintID
));
2523 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr
, SDValue
&Base
,
2525 if (auto *FIN
= dyn_cast
<FrameIndexSDNode
>(Addr
)) {
2526 Base
= CurDAG
->getTargetFrameIndex(FIN
->getIndex(), Subtarget
->getXLenVT());
2527 Offset
= CurDAG
->getTargetConstant(0, SDLoc(Addr
), Subtarget
->getXLenVT());
2534 // Fold constant addresses.
2535 static bool selectConstantAddr(SelectionDAG
*CurDAG
, const SDLoc
&DL
,
2536 const MVT VT
, const RISCVSubtarget
*Subtarget
,
2537 SDValue Addr
, SDValue
&Base
, SDValue
&Offset
,
2538 bool IsPrefetch
= false,
2539 bool IsRV32Zdinx
= false) {
2540 if (!isa
<ConstantSDNode
>(Addr
))
2543 int64_t CVal
= cast
<ConstantSDNode
>(Addr
)->getSExtValue();
2545 // If the constant is a simm12, we can fold the whole constant and use X0 as
2546 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2547 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2548 int64_t Lo12
= SignExtend64
<12>(CVal
);
2549 int64_t Hi
= (uint64_t)CVal
- (uint64_t)Lo12
;
2550 if (!Subtarget
->is64Bit() || isInt
<32>(Hi
)) {
2551 if (IsPrefetch
&& (Lo12
& 0b11111) != 0)
2553 if (IsRV32Zdinx
&& !isInt
<12>(Lo12
+ 4))
2557 int64_t Hi20
= (Hi
>> 12) & 0xfffff;
2559 CurDAG
->getMachineNode(RISCV::LUI
, DL
, VT
,
2560 CurDAG
->getTargetConstant(Hi20
, DL
, VT
)),
2563 Base
= CurDAG
->getRegister(RISCV::X0
, VT
);
2565 Offset
= CurDAG
->getSignedTargetConstant(Lo12
, DL
, VT
);
2569 // Ask how constant materialization would handle this constant.
2570 RISCVMatInt::InstSeq Seq
= RISCVMatInt::generateInstSeq(CVal
, *Subtarget
);
2572 // If the last instruction would be an ADDI, we can fold its immediate and
2573 // emit the rest of the sequence as the base.
2574 if (Seq
.back().getOpcode() != RISCV::ADDI
)
2576 Lo12
= Seq
.back().getImm();
2577 if (IsPrefetch
&& (Lo12
& 0b11111) != 0)
2579 if (IsRV32Zdinx
&& !isInt
<12>(Lo12
+ 4))
2582 // Drop the last instruction.
2584 assert(!Seq
.empty() && "Expected more instructions in sequence");
2586 Base
= selectImmSeq(CurDAG
, DL
, VT
, Seq
);
2587 Offset
= CurDAG
->getSignedTargetConstant(Lo12
, DL
, VT
);
2591 // Is this ADD instruction only used as the base pointer of scalar loads and
2593 static bool isWorthFoldingAdd(SDValue Add
) {
2594 for (auto *User
: Add
->users()) {
2595 if (User
->getOpcode() != ISD::LOAD
&& User
->getOpcode() != ISD::STORE
&&
2596 User
->getOpcode() != ISD::ATOMIC_LOAD
&&
2597 User
->getOpcode() != ISD::ATOMIC_STORE
)
2599 EVT VT
= cast
<MemSDNode
>(User
)->getMemoryVT();
2600 if (!VT
.isScalarInteger() && VT
!= MVT::f16
&& VT
!= MVT::f32
&&
2603 // Don't allow stores of the value. It must be used as the address.
2604 if (User
->getOpcode() == ISD::STORE
&&
2605 cast
<StoreSDNode
>(User
)->getValue() == Add
)
2607 if (User
->getOpcode() == ISD::ATOMIC_STORE
&&
2608 cast
<AtomicSDNode
>(User
)->getVal() == Add
)
2615 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr
,
2616 unsigned MaxShiftAmount
,
2617 SDValue
&Base
, SDValue
&Index
,
2619 EVT VT
= Addr
.getSimpleValueType();
2620 auto UnwrapShl
= [this, VT
, MaxShiftAmount
](SDValue N
, SDValue
&Index
,
2622 uint64_t ShiftAmt
= 0;
2625 if (N
.getOpcode() == ISD::SHL
&& isa
<ConstantSDNode
>(N
.getOperand(1))) {
2626 // Only match shifts by a value in range [0, MaxShiftAmount].
2627 if (N
.getConstantOperandVal(1) <= MaxShiftAmount
) {
2628 Index
= N
.getOperand(0);
2629 ShiftAmt
= N
.getConstantOperandVal(1);
2633 Shift
= CurDAG
->getTargetConstant(ShiftAmt
, SDLoc(N
), VT
);
2634 return ShiftAmt
!= 0;
2637 if (Addr
.getOpcode() == ISD::ADD
) {
2638 if (auto *C1
= dyn_cast
<ConstantSDNode
>(Addr
.getOperand(1))) {
2639 SDValue AddrB
= Addr
.getOperand(0);
2640 if (AddrB
.getOpcode() == ISD::ADD
&&
2641 UnwrapShl(AddrB
.getOperand(0), Index
, Scale
) &&
2642 !isa
<ConstantSDNode
>(AddrB
.getOperand(1)) &&
2643 isInt
<12>(C1
->getSExtValue())) {
2644 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2646 CurDAG
->getTargetConstant(C1
->getZExtValue(), SDLoc(Addr
), VT
);
2647 Base
= SDValue(CurDAG
->getMachineNode(RISCV::ADDI
, SDLoc(Addr
), VT
,
2648 AddrB
.getOperand(1), C1Val
),
2652 } else if (UnwrapShl(Addr
.getOperand(0), Index
, Scale
)) {
2653 Base
= Addr
.getOperand(1);
2656 UnwrapShl(Addr
.getOperand(1), Index
, Scale
);
2657 Base
= Addr
.getOperand(0);
2660 } else if (UnwrapShl(Addr
, Index
, Scale
)) {
2661 EVT VT
= Addr
.getValueType();
2662 Base
= CurDAG
->getRegister(RISCV::X0
, VT
);
2669 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr
, SDValue
&Base
,
2670 SDValue
&Offset
, bool IsRV32Zdinx
) {
2671 if (SelectAddrFrameIndex(Addr
, Base
, Offset
))
2675 MVT VT
= Addr
.getSimpleValueType();
2677 if (Addr
.getOpcode() == RISCVISD::ADD_LO
) {
2678 // If this is non RV32Zdinx we can always fold.
2680 Base
= Addr
.getOperand(0);
2681 Offset
= Addr
.getOperand(1);
2685 // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4
2686 // to the offset when we expand in RISCVExpandPseudoInsts.
2687 if (auto *GA
= dyn_cast
<GlobalAddressSDNode
>(Addr
.getOperand(1))) {
2688 const DataLayout
&DL
= CurDAG
->getDataLayout();
2689 Align Alignment
= commonAlignment(
2690 GA
->getGlobal()->getPointerAlignment(DL
), GA
->getOffset());
2691 if (Alignment
> 4) {
2692 Base
= Addr
.getOperand(0);
2693 Offset
= Addr
.getOperand(1);
2697 if (auto *CP
= dyn_cast
<ConstantPoolSDNode
>(Addr
.getOperand(1))) {
2698 Align Alignment
= commonAlignment(CP
->getAlign(), CP
->getOffset());
2699 if (Alignment
> 4) {
2700 Base
= Addr
.getOperand(0);
2701 Offset
= Addr
.getOperand(1);
2707 int64_t RV32ZdinxRange
= IsRV32Zdinx
? 4 : 0;
2708 if (CurDAG
->isBaseWithConstantOffset(Addr
)) {
2709 int64_t CVal
= cast
<ConstantSDNode
>(Addr
.getOperand(1))->getSExtValue();
2710 if (isInt
<12>(CVal
) && isInt
<12>(CVal
+ RV32ZdinxRange
)) {
2711 Base
= Addr
.getOperand(0);
2712 if (Base
.getOpcode() == RISCVISD::ADD_LO
) {
2713 SDValue LoOperand
= Base
.getOperand(1);
2714 if (auto *GA
= dyn_cast
<GlobalAddressSDNode
>(LoOperand
)) {
2715 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2716 // (its low part, really), then we can rely on the alignment of that
2717 // variable to provide a margin of safety before low part can overflow
2718 // the 12 bits of the load/store offset. Check if CVal falls within
2719 // that margin; if so (low part + CVal) can't overflow.
2720 const DataLayout
&DL
= CurDAG
->getDataLayout();
2721 Align Alignment
= commonAlignment(
2722 GA
->getGlobal()->getPointerAlignment(DL
), GA
->getOffset());
2723 if ((CVal
== 0 || Alignment
> CVal
) &&
2724 (!IsRV32Zdinx
|| commonAlignment(Alignment
, CVal
) > 4)) {
2725 int64_t CombinedOffset
= CVal
+ GA
->getOffset();
2726 Base
= Base
.getOperand(0);
2727 Offset
= CurDAG
->getTargetGlobalAddress(
2728 GA
->getGlobal(), SDLoc(LoOperand
), LoOperand
.getValueType(),
2729 CombinedOffset
, GA
->getTargetFlags());
2735 if (auto *FIN
= dyn_cast
<FrameIndexSDNode
>(Base
))
2736 Base
= CurDAG
->getTargetFrameIndex(FIN
->getIndex(), VT
);
2737 Offset
= CurDAG
->getSignedTargetConstant(CVal
, DL
, VT
);
2742 // Handle ADD with large immediates.
2743 if (Addr
.getOpcode() == ISD::ADD
&& isa
<ConstantSDNode
>(Addr
.getOperand(1))) {
2744 int64_t CVal
= cast
<ConstantSDNode
>(Addr
.getOperand(1))->getSExtValue();
2745 assert(!(isInt
<12>(CVal
) && isInt
<12>(CVal
+ RV32ZdinxRange
)) &&
2746 "simm12 not already handled?");
2748 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2749 // an ADDI for part of the offset and fold the rest into the load/store.
2750 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2751 if (CVal
>= -4096 && CVal
<= (4094 - RV32ZdinxRange
)) {
2752 int64_t Adj
= CVal
< 0 ? -2048 : 2047;
2754 CurDAG
->getMachineNode(RISCV::ADDI
, DL
, VT
, Addr
.getOperand(0),
2755 CurDAG
->getSignedTargetConstant(Adj
, DL
, VT
)),
2757 Offset
= CurDAG
->getSignedTargetConstant(CVal
- Adj
, DL
, VT
);
2761 // For larger immediates, we might be able to save one instruction from
2762 // constant materialization by folding the Lo12 bits of the immediate into
2763 // the address. We should only do this if the ADD is only used by loads and
2764 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2765 // separately with the full materialized immediate creating extra
2767 if (isWorthFoldingAdd(Addr
) &&
2768 selectConstantAddr(CurDAG
, DL
, VT
, Subtarget
, Addr
.getOperand(1), Base
,
2769 Offset
, /*IsPrefetch=*/false, RV32ZdinxRange
)) {
2770 // Insert an ADD instruction with the materialized Hi52 bits.
2772 CurDAG
->getMachineNode(RISCV::ADD
, DL
, VT
, Addr
.getOperand(0), Base
),
2778 if (selectConstantAddr(CurDAG
, DL
, VT
, Subtarget
, Addr
, Base
, Offset
,
2779 /*IsPrefetch=*/false, RV32ZdinxRange
))
2783 Offset
= CurDAG
->getTargetConstant(0, DL
, VT
);
2787 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2788 /// Offset should be all zeros.
2789 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr
, SDValue
&Base
,
2791 if (SelectAddrFrameIndex(Addr
, Base
, Offset
))
2795 MVT VT
= Addr
.getSimpleValueType();
2797 if (CurDAG
->isBaseWithConstantOffset(Addr
)) {
2798 int64_t CVal
= cast
<ConstantSDNode
>(Addr
.getOperand(1))->getSExtValue();
2799 if (isInt
<12>(CVal
)) {
2800 Base
= Addr
.getOperand(0);
2802 // Early-out if not a valid offset.
2803 if ((CVal
& 0b11111) != 0) {
2805 Offset
= CurDAG
->getTargetConstant(0, DL
, VT
);
2809 if (auto *FIN
= dyn_cast
<FrameIndexSDNode
>(Base
))
2810 Base
= CurDAG
->getTargetFrameIndex(FIN
->getIndex(), VT
);
2811 Offset
= CurDAG
->getSignedTargetConstant(CVal
, DL
, VT
);
2816 // Handle ADD with large immediates.
2817 if (Addr
.getOpcode() == ISD::ADD
&& isa
<ConstantSDNode
>(Addr
.getOperand(1))) {
2818 int64_t CVal
= cast
<ConstantSDNode
>(Addr
.getOperand(1))->getSExtValue();
2819 assert(!(isInt
<12>(CVal
) && isInt
<12>(CVal
)) &&
2820 "simm12 not already handled?");
2822 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2823 // one instruction by folding adjustment (-2048 or 2016) into the address.
2824 if ((-2049 >= CVal
&& CVal
>= -4096) || (4065 >= CVal
&& CVal
>= 2017)) {
2825 int64_t Adj
= CVal
< 0 ? -2048 : 2016;
2826 int64_t AdjustedOffset
= CVal
- Adj
;
2828 SDValue(CurDAG
->getMachineNode(
2829 RISCV::ADDI
, DL
, VT
, Addr
.getOperand(0),
2830 CurDAG
->getSignedTargetConstant(AdjustedOffset
, DL
, VT
)),
2832 Offset
= CurDAG
->getSignedTargetConstant(Adj
, DL
, VT
);
2836 if (selectConstantAddr(CurDAG
, DL
, VT
, Subtarget
, Addr
.getOperand(1), Base
,
2837 Offset
, /*IsPrefetch=*/true)) {
2838 // Insert an ADD instruction with the materialized Hi52 bits.
2840 CurDAG
->getMachineNode(RISCV::ADD
, DL
, VT
, Addr
.getOperand(0), Base
),
2846 if (selectConstantAddr(CurDAG
, DL
, VT
, Subtarget
, Addr
, Base
, Offset
,
2847 /*IsPrefetch=*/true))
2851 Offset
= CurDAG
->getTargetConstant(0, DL
, VT
);
2855 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr
, SDValue
&Base
,
2857 if (Addr
.getOpcode() != ISD::ADD
)
2860 if (isa
<ConstantSDNode
>(Addr
.getOperand(1)))
2863 Base
= Addr
.getOperand(1);
2864 Offset
= Addr
.getOperand(0);
2868 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N
, unsigned ShiftWidth
,
2872 // Peek through zext.
2873 if (ShAmt
->getOpcode() == ISD::ZERO_EXTEND
)
2874 ShAmt
= ShAmt
.getOperand(0);
2876 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2877 // amount. If there is an AND on the shift amount, we can bypass it if it
2878 // doesn't affect any of those bits.
2879 if (ShAmt
.getOpcode() == ISD::AND
&&
2880 isa
<ConstantSDNode
>(ShAmt
.getOperand(1))) {
2881 const APInt
&AndMask
= ShAmt
.getConstantOperandAPInt(1);
2883 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2884 // mask that covers the bits needed to represent all shift amounts.
2885 assert(isPowerOf2_32(ShiftWidth
) && "Unexpected max shift amount!");
2886 APInt
ShMask(AndMask
.getBitWidth(), ShiftWidth
- 1);
2888 if (ShMask
.isSubsetOf(AndMask
)) {
2889 ShAmt
= ShAmt
.getOperand(0);
2891 // SimplifyDemandedBits may have optimized the mask so try restoring any
2892 // bits that are known zero.
2893 KnownBits Known
= CurDAG
->computeKnownBits(ShAmt
.getOperand(0));
2894 if (!ShMask
.isSubsetOf(AndMask
| Known
.Zero
))
2896 ShAmt
= ShAmt
.getOperand(0);
2900 if (ShAmt
.getOpcode() == ISD::ADD
&&
2901 isa
<ConstantSDNode
>(ShAmt
.getOperand(1))) {
2902 uint64_t Imm
= ShAmt
.getConstantOperandVal(1);
2903 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2904 // to avoid the ADD.
2905 if (Imm
!= 0 && Imm
% ShiftWidth
== 0) {
2906 ShAmt
= ShAmt
.getOperand(0);
2909 } else if (ShAmt
.getOpcode() == ISD::SUB
&&
2910 isa
<ConstantSDNode
>(ShAmt
.getOperand(0))) {
2911 uint64_t Imm
= ShAmt
.getConstantOperandVal(0);
2912 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2913 // generate a NEG instead of a SUB of a constant.
2914 if (Imm
!= 0 && Imm
% ShiftWidth
== 0) {
2916 EVT VT
= ShAmt
.getValueType();
2917 SDValue Zero
= CurDAG
->getRegister(RISCV::X0
, VT
);
2918 unsigned NegOpc
= VT
== MVT::i64
? RISCV::SUBW
: RISCV::SUB
;
2919 MachineSDNode
*Neg
= CurDAG
->getMachineNode(NegOpc
, DL
, VT
, Zero
,
2920 ShAmt
.getOperand(1));
2921 ShAmt
= SDValue(Neg
, 0);
2924 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2925 // to generate a NOT instead of a SUB of a constant.
2926 if (Imm
% ShiftWidth
== ShiftWidth
- 1) {
2928 EVT VT
= ShAmt
.getValueType();
2929 MachineSDNode
*Not
= CurDAG
->getMachineNode(
2930 RISCV::XORI
, DL
, VT
, ShAmt
.getOperand(1),
2931 CurDAG
->getAllOnesConstant(DL
, VT
, /*isTarget=*/true));
2932 ShAmt
= SDValue(Not
, 0);
2940 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2941 /// check for equality with 0. This function emits instructions that convert the
2942 /// seteq/setne into something that can be compared with 0.
2943 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2945 bool RISCVDAGToDAGISel::selectSETCC(SDValue N
, ISD::CondCode ExpectedCCVal
,
2947 assert(ISD::isIntEqualitySetCC(ExpectedCCVal
) &&
2948 "Unexpected condition code!");
2950 // We're looking for a setcc.
2951 if (N
->getOpcode() != ISD::SETCC
)
2954 // Must be an equality comparison.
2955 ISD::CondCode CCVal
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
2956 if (CCVal
!= ExpectedCCVal
)
2959 SDValue LHS
= N
->getOperand(0);
2960 SDValue RHS
= N
->getOperand(1);
2962 if (!LHS
.getValueType().isScalarInteger())
2965 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2966 if (isNullConstant(RHS
)) {
2973 if (auto *C
= dyn_cast
<ConstantSDNode
>(RHS
)) {
2974 int64_t CVal
= C
->getSExtValue();
2975 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2976 // non-zero otherwise.
2977 if (CVal
== -2048) {
2979 CurDAG
->getMachineNode(
2980 RISCV::XORI
, DL
, N
->getValueType(0), LHS
,
2981 CurDAG
->getSignedTargetConstant(CVal
, DL
, N
->getValueType(0))),
2985 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2986 // LHS is equal to the RHS and non-zero otherwise.
2987 if (isInt
<12>(CVal
) || CVal
== 2048) {
2989 CurDAG
->getMachineNode(
2990 RISCV::ADDI
, DL
, N
->getValueType(0), LHS
,
2991 CurDAG
->getSignedTargetConstant(-CVal
, DL
, N
->getValueType(0))),
2995 if (isPowerOf2_64(CVal
) && Subtarget
->hasStdExtZbs()) {
2997 CurDAG
->getMachineNode(
2998 RISCV::BINVI
, DL
, N
->getValueType(0), LHS
,
2999 CurDAG
->getTargetConstant(Log2_64(CVal
), DL
, N
->getValueType(0))),
3005 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3006 // equal and a non-zero value if they aren't.
3008 CurDAG
->getMachineNode(RISCV::XOR
, DL
, N
->getValueType(0), LHS
, RHS
), 0);
3012 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N
, unsigned Bits
, SDValue
&Val
) {
3013 if (N
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
3014 cast
<VTSDNode
>(N
.getOperand(1))->getVT().getSizeInBits() == Bits
) {
3015 Val
= N
.getOperand(0);
3019 auto UnwrapShlSra
= [](SDValue N
, unsigned ShiftAmt
) {
3020 if (N
.getOpcode() != ISD::SRA
|| !isa
<ConstantSDNode
>(N
.getOperand(1)))
3023 SDValue N0
= N
.getOperand(0);
3024 if (N0
.getOpcode() == ISD::SHL
&& isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
3025 N
.getConstantOperandVal(1) == ShiftAmt
&&
3026 N0
.getConstantOperandVal(1) == ShiftAmt
)
3027 return N0
.getOperand(0);
3032 MVT VT
= N
.getSimpleValueType();
3033 if (CurDAG
->ComputeNumSignBits(N
) > (VT
.getSizeInBits() - Bits
)) {
3034 Val
= UnwrapShlSra(N
, VT
.getSizeInBits() - Bits
);
3041 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N
, unsigned Bits
, SDValue
&Val
) {
3042 if (N
.getOpcode() == ISD::AND
) {
3043 auto *C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
3044 if (C
&& C
->getZExtValue() == maskTrailingOnes
<uint64_t>(Bits
)) {
3045 Val
= N
.getOperand(0);
3049 MVT VT
= N
.getSimpleValueType();
3050 APInt Mask
= APInt::getBitsSetFrom(VT
.getSizeInBits(), Bits
);
3051 if (CurDAG
->MaskedValueIsZero(N
, Mask
)) {
3059 /// Look for various patterns that can be done with a SHL that can be folded
3060 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3061 /// SHXADD we are trying to match.
3062 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N
, unsigned ShAmt
,
3064 if (N
.getOpcode() == ISD::AND
&& isa
<ConstantSDNode
>(N
.getOperand(1))) {
3065 SDValue N0
= N
.getOperand(0);
3067 if (bool LeftShift
= N0
.getOpcode() == ISD::SHL
;
3068 (LeftShift
|| N0
.getOpcode() == ISD::SRL
) &&
3069 isa
<ConstantSDNode
>(N0
.getOperand(1))) {
3070 uint64_t Mask
= N
.getConstantOperandVal(1);
3071 unsigned C2
= N0
.getConstantOperandVal(1);
3073 unsigned XLen
= Subtarget
->getXLen();
3075 Mask
&= maskTrailingZeros
<uint64_t>(C2
);
3077 Mask
&= maskTrailingOnes
<uint64_t>(XLen
- C2
);
3079 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3080 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3081 // followed by a SHXADD with c3 for the X amount.
3082 if (isShiftedMask_64(Mask
)) {
3083 unsigned Leading
= XLen
- llvm::bit_width(Mask
);
3084 unsigned Trailing
= llvm::countr_zero(Mask
);
3085 if (LeftShift
&& Leading
== 0 && C2
< Trailing
&& Trailing
== ShAmt
) {
3087 EVT VT
= N
.getValueType();
3088 Val
= SDValue(CurDAG
->getMachineNode(
3089 RISCV::SRLI
, DL
, VT
, N0
.getOperand(0),
3090 CurDAG
->getTargetConstant(Trailing
- C2
, DL
, VT
)),
3094 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3095 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3096 // followed by a SHXADD using c3 for the X amount.
3097 if (!LeftShift
&& Leading
== C2
&& Trailing
== ShAmt
) {
3099 EVT VT
= N
.getValueType();
3101 CurDAG
->getMachineNode(
3102 RISCV::SRLI
, DL
, VT
, N0
.getOperand(0),
3103 CurDAG
->getTargetConstant(Leading
+ Trailing
, DL
, VT
)),
3108 } else if (N0
.getOpcode() == ISD::SRA
&& N0
.hasOneUse() &&
3109 isa
<ConstantSDNode
>(N0
.getOperand(1))) {
3110 uint64_t Mask
= N
.getConstantOperandVal(1);
3111 unsigned C2
= N0
.getConstantOperandVal(1);
3113 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3114 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3115 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3117 if (isShiftedMask_64(Mask
)) {
3118 unsigned XLen
= Subtarget
->getXLen();
3119 unsigned Leading
= XLen
- llvm::bit_width(Mask
);
3120 unsigned Trailing
= llvm::countr_zero(Mask
);
3121 if (C2
> Leading
&& Leading
> 0 && Trailing
== ShAmt
) {
3123 EVT VT
= N
.getValueType();
3124 Val
= SDValue(CurDAG
->getMachineNode(
3125 RISCV::SRAI
, DL
, VT
, N0
.getOperand(0),
3126 CurDAG
->getTargetConstant(C2
- Leading
, DL
, VT
)),
3128 Val
= SDValue(CurDAG
->getMachineNode(
3129 RISCV::SRLI
, DL
, VT
, Val
,
3130 CurDAG
->getTargetConstant(Leading
+ ShAmt
, DL
, VT
)),
3136 } else if (bool LeftShift
= N
.getOpcode() == ISD::SHL
;
3137 (LeftShift
|| N
.getOpcode() == ISD::SRL
) &&
3138 isa
<ConstantSDNode
>(N
.getOperand(1))) {
3139 SDValue N0
= N
.getOperand(0);
3140 if (N0
.getOpcode() == ISD::AND
&& N0
.hasOneUse() &&
3141 isa
<ConstantSDNode
>(N0
.getOperand(1))) {
3142 uint64_t Mask
= N0
.getConstantOperandVal(1);
3143 if (isShiftedMask_64(Mask
)) {
3144 unsigned C1
= N
.getConstantOperandVal(1);
3145 unsigned XLen
= Subtarget
->getXLen();
3146 unsigned Leading
= XLen
- llvm::bit_width(Mask
);
3147 unsigned Trailing
= llvm::countr_zero(Mask
);
3148 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3149 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3150 if (LeftShift
&& Leading
== 32 && Trailing
> 0 &&
3151 (Trailing
+ C1
) == ShAmt
) {
3153 EVT VT
= N
.getValueType();
3154 Val
= SDValue(CurDAG
->getMachineNode(
3155 RISCV::SRLIW
, DL
, VT
, N0
.getOperand(0),
3156 CurDAG
->getTargetConstant(Trailing
, DL
, VT
)),
3160 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3161 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3162 if (!LeftShift
&& Leading
== 32 && Trailing
> C1
&&
3163 (Trailing
- C1
) == ShAmt
) {
3165 EVT VT
= N
.getValueType();
3166 Val
= SDValue(CurDAG
->getMachineNode(
3167 RISCV::SRLIW
, DL
, VT
, N0
.getOperand(0),
3168 CurDAG
->getTargetConstant(Trailing
, DL
, VT
)),
3179 /// Look for various patterns that can be done with a SHL that can be folded
3180 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3181 /// SHXADD_UW we are trying to match.
3182 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N
, unsigned ShAmt
,
3184 if (N
.getOpcode() == ISD::AND
&& isa
<ConstantSDNode
>(N
.getOperand(1)) &&
3186 SDValue N0
= N
.getOperand(0);
3187 if (N0
.getOpcode() == ISD::SHL
&& isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
3189 uint64_t Mask
= N
.getConstantOperandVal(1);
3190 unsigned C2
= N0
.getConstantOperandVal(1);
3192 Mask
&= maskTrailingZeros
<uint64_t>(C2
);
3194 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3195 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3196 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3197 if (isShiftedMask_64(Mask
)) {
3198 unsigned Leading
= llvm::countl_zero(Mask
);
3199 unsigned Trailing
= llvm::countr_zero(Mask
);
3200 if (Leading
== 32 - ShAmt
&& Trailing
== C2
&& Trailing
> ShAmt
) {
3202 EVT VT
= N
.getValueType();
3203 Val
= SDValue(CurDAG
->getMachineNode(
3204 RISCV::SLLI
, DL
, VT
, N0
.getOperand(0),
3205 CurDAG
->getTargetConstant(C2
- ShAmt
, DL
, VT
)),
3216 bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N
, SDValue
&Val
) {
3217 if (!isa
<ConstantSDNode
>(N
))
3219 int64_t Imm
= cast
<ConstantSDNode
>(N
)->getSExtValue();
3221 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3222 if (isInt
<32>(Imm
) && ((Imm
& 0xfff) != 0xfff || Imm
== -1))
3225 // Abandon this transform if the constant is needed elsewhere.
3226 for (const SDNode
*U
: N
->users()) {
3227 if (!ISD::isBitwiseLogicOp(U
->getOpcode()))
3231 // For 64-bit constants, the instruction sequences get complex,
3232 // so we select inverted only if it's cheaper.
3233 if (!isInt
<32>(Imm
)) {
3234 int OrigImmCost
= RISCVMatInt::getIntMatCost(APInt(64, Imm
), 64, *Subtarget
,
3235 /*CompressionCost=*/true);
3236 int NegImmCost
= RISCVMatInt::getIntMatCost(APInt(64, ~Imm
), 64, *Subtarget
,
3237 /*CompressionCost=*/true);
3238 if (OrigImmCost
<= NegImmCost
)
3242 Val
= selectImm(CurDAG
, SDLoc(N
), N
->getSimpleValueType(0), ~Imm
, *Subtarget
);
3246 static bool vectorPseudoHasAllNBitUsers(SDNode
*User
, unsigned UserOpNo
,
3248 const TargetInstrInfo
*TII
) {
3249 unsigned MCOpcode
= RISCV::getRVVMCOpcode(User
->getMachineOpcode());
3254 const MCInstrDesc
&MCID
= TII
->get(User
->getMachineOpcode());
3255 const uint64_t TSFlags
= MCID
.TSFlags
;
3256 if (!RISCVII::hasSEWOp(TSFlags
))
3258 assert(RISCVII::hasVLOp(TSFlags
));
3260 bool HasGlueOp
= User
->getGluedNode() != nullptr;
3261 unsigned ChainOpIdx
= User
->getNumOperands() - HasGlueOp
- 1;
3262 bool HasChainOp
= User
->getOperand(ChainOpIdx
).getValueType() == MVT::Other
;
3263 bool HasVecPolicyOp
= RISCVII::hasVecPolicyOp(TSFlags
);
3265 User
->getNumOperands() - HasVecPolicyOp
- HasChainOp
- HasGlueOp
- 2;
3266 const unsigned Log2SEW
= User
->getConstantOperandVal(VLIdx
+ 1);
3268 if (UserOpNo
== VLIdx
)
3271 auto NumDemandedBits
=
3272 RISCV::getVectorLowDemandedScalarBits(MCOpcode
, Log2SEW
);
3273 return NumDemandedBits
&& Bits
>= *NumDemandedBits
;
3276 // Return true if all users of this SDNode* only consume the lower \p Bits.
3277 // This can be used to form W instructions for add/sub/mul/shl even when the
3278 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3279 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
3280 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3281 // the add/sub/mul/shl to become non-W instructions. By checking the users we
3282 // may be able to use a W instruction and CSE with the other instruction if
3283 // this has happened. We could try to detect that the CSE opportunity exists
3284 // before doing this, but that would be more complicated.
3285 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode
*Node
, unsigned Bits
,
3286 const unsigned Depth
) const {
3287 assert((Node
->getOpcode() == ISD::ADD
|| Node
->getOpcode() == ISD::SUB
||
3288 Node
->getOpcode() == ISD::MUL
|| Node
->getOpcode() == ISD::SHL
||
3289 Node
->getOpcode() == ISD::SRL
|| Node
->getOpcode() == ISD::AND
||
3290 Node
->getOpcode() == ISD::OR
|| Node
->getOpcode() == ISD::XOR
||
3291 Node
->getOpcode() == ISD::SIGN_EXTEND_INREG
||
3292 isa
<ConstantSDNode
>(Node
) || Depth
!= 0) &&
3293 "Unexpected opcode");
3295 if (Depth
>= SelectionDAG::MaxRecursionDepth
)
3298 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3299 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3300 if (Depth
== 0 && !Node
->getValueType(0).isScalarInteger())
3303 for (SDUse
&Use
: Node
->uses()) {
3304 SDNode
*User
= Use
.getUser();
3305 // Users of this node should have already been instruction selected
3306 if (!User
->isMachineOpcode())
3309 // TODO: Add more opcodes?
3310 switch (User
->getMachineOpcode()) {
3312 if (vectorPseudoHasAllNBitUsers(User
, Use
.getOperandNo(), Bits
, TII
))
3335 case RISCV::SLLI_UW
:
3336 case RISCV::FMV_W_X
:
3337 case RISCV::FCVT_H_W
:
3338 case RISCV::FCVT_H_W_INX
:
3339 case RISCV::FCVT_H_WU
:
3340 case RISCV::FCVT_H_WU_INX
:
3341 case RISCV::FCVT_S_W
:
3342 case RISCV::FCVT_S_W_INX
:
3343 case RISCV::FCVT_S_WU
:
3344 case RISCV::FCVT_S_WU_INX
:
3345 case RISCV::FCVT_D_W
:
3346 case RISCV::FCVT_D_W_INX
:
3347 case RISCV::FCVT_D_WU
:
3348 case RISCV::FCVT_D_WU_INX
:
3349 case RISCV::TH_REVW
:
3350 case RISCV::TH_SRRIW
:
3362 // Shift amount operands only use log2(Xlen) bits.
3363 if (Use
.getOperandNo() == 1 && Bits
>= Log2_32(Subtarget
->getXLen()))
3367 // SLLI only uses the lower (XLen - ShAmt) bits.
3368 if (Bits
>= Subtarget
->getXLen() - User
->getConstantOperandVal(1))
3372 if (Bits
>= (unsigned)llvm::bit_width(User
->getConstantOperandVal(1)))
3376 uint64_t Imm
= cast
<ConstantSDNode
>(User
->getOperand(1))->getSExtValue();
3377 if (Bits
>= (unsigned)llvm::bit_width
<uint64_t>(~Imm
))
3392 if (hasAllNBitUsers(User
, Bits
, Depth
+ 1))
3396 unsigned ShAmt
= User
->getConstantOperandVal(1);
3397 // If we are shifting right by less than Bits, and users don't demand any
3398 // bits that were shifted into [Bits-1:0], then we can consider this as an
3400 if (Bits
> ShAmt
&& hasAllNBitUsers(User
, Bits
- ShAmt
, Depth
+ 1))
3410 case RISCV::FMV_H_X
:
3411 case RISCV::ZEXT_H_RV32
:
3412 case RISCV::ZEXT_H_RV64
:
3418 if (Bits
>= (Subtarget
->getXLen() / 2))
3422 case RISCV::SH1ADD_UW
:
3423 case RISCV::SH2ADD_UW
:
3424 case RISCV::SH3ADD_UW
:
3425 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3427 if (Use
.getOperandNo() == 0 && Bits
>= 32)
3431 if (Use
.getOperandNo() == 0 && Bits
>= 8)
3435 if (Use
.getOperandNo() == 0 && Bits
>= 16)
3439 if (Use
.getOperandNo() == 0 && Bits
>= 32)
3448 // Select a constant that can be represented as (sign_extend(imm5) << imm2).
3449 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N
, SDValue
&Simm5
,
3451 if (auto *C
= dyn_cast
<ConstantSDNode
>(N
)) {
3452 int64_t Offset
= C
->getSExtValue();
3454 for (Shift
= 0; Shift
< 4; Shift
++)
3455 if (isInt
<5>(Offset
>> Shift
) && ((Offset
% (1LL << Shift
)) == 0))
3458 // Constant cannot be encoded.
3462 EVT Ty
= N
->getValueType(0);
3463 Simm5
= CurDAG
->getSignedTargetConstant(Offset
>> Shift
, SDLoc(N
), Ty
);
3464 Shl2
= CurDAG
->getTargetConstant(Shift
, SDLoc(N
), Ty
);
3471 // Select VL as a 5 bit immediate or a value that will become a register. This
3472 // allows us to choose betwen VSETIVLI or VSETVLI later.
3473 bool RISCVDAGToDAGISel::selectVLOp(SDValue N
, SDValue
&VL
) {
3474 auto *C
= dyn_cast
<ConstantSDNode
>(N
);
3475 if (C
&& isUInt
<5>(C
->getZExtValue())) {
3476 VL
= CurDAG
->getTargetConstant(C
->getZExtValue(), SDLoc(N
),
3477 N
->getValueType(0));
3478 } else if (C
&& C
->isAllOnes()) {
3479 // Treat all ones as VLMax.
3480 VL
= CurDAG
->getSignedTargetConstant(RISCV::VLMaxSentinel
, SDLoc(N
),
3481 N
->getValueType(0));
3482 } else if (isa
<RegisterSDNode
>(N
) &&
3483 cast
<RegisterSDNode
>(N
)->getReg() == RISCV::X0
) {
3484 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3485 // as the register class. Convert X0 to a special immediate to pass the
3486 // MachineVerifier. This is recognized specially by the vsetvli insertion
3488 VL
= CurDAG
->getSignedTargetConstant(RISCV::VLMaxSentinel
, SDLoc(N
),
3489 N
->getValueType(0));
3497 static SDValue
findVSplat(SDValue N
) {
3498 if (N
.getOpcode() == ISD::INSERT_SUBVECTOR
) {
3499 if (!N
.getOperand(0).isUndef())
3501 N
= N
.getOperand(1);
3504 if ((Splat
.getOpcode() != RISCVISD::VMV_V_X_VL
&&
3505 Splat
.getOpcode() != RISCVISD::VMV_S_X_VL
) ||
3506 !Splat
.getOperand(0).isUndef())
3508 assert(Splat
.getNumOperands() == 3 && "Unexpected number of operands");
3512 bool RISCVDAGToDAGISel::selectVSplat(SDValue N
, SDValue
&SplatVal
) {
3513 SDValue Splat
= findVSplat(N
);
3517 SplatVal
= Splat
.getOperand(1);
3521 static bool selectVSplatImmHelper(SDValue N
, SDValue
&SplatVal
,
3523 const RISCVSubtarget
&Subtarget
,
3524 std::function
<bool(int64_t)> ValidateImm
) {
3525 SDValue Splat
= findVSplat(N
);
3526 if (!Splat
|| !isa
<ConstantSDNode
>(Splat
.getOperand(1)))
3529 const unsigned SplatEltSize
= Splat
.getScalarValueSizeInBits();
3530 assert(Subtarget
.getXLenVT() == Splat
.getOperand(1).getSimpleValueType() &&
3531 "Unexpected splat operand type");
3533 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3534 // type is wider than the resulting vector element type: an implicit
3535 // truncation first takes place. Therefore, perform a manual
3536 // truncation/sign-extension in order to ignore any truncated bits and catch
3537 // any zero-extended immediate.
3538 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3539 // sign-extending to (XLenVT -1).
3540 APInt SplatConst
= Splat
.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize
);
3542 int64_t SplatImm
= SplatConst
.getSExtValue();
3544 if (!ValidateImm(SplatImm
))
3548 DAG
.getSignedTargetConstant(SplatImm
, SDLoc(N
), Subtarget
.getXLenVT());
3552 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N
, SDValue
&SplatVal
) {
3553 return selectVSplatImmHelper(N
, SplatVal
, *CurDAG
, *Subtarget
,
3554 [](int64_t Imm
) { return isInt
<5>(Imm
); });
3557 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N
, SDValue
&SplatVal
) {
3558 return selectVSplatImmHelper(
3559 N
, SplatVal
, *CurDAG
, *Subtarget
,
3560 [](int64_t Imm
) { return (isInt
<5>(Imm
) && Imm
!= -16) || Imm
== 16; });
3563 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N
,
3564 SDValue
&SplatVal
) {
3565 return selectVSplatImmHelper(
3566 N
, SplatVal
, *CurDAG
, *Subtarget
, [](int64_t Imm
) {
3567 return Imm
!= 0 && ((isInt
<5>(Imm
) && Imm
!= -16) || Imm
== 16);
3571 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N
, unsigned Bits
,
3572 SDValue
&SplatVal
) {
3573 return selectVSplatImmHelper(
3574 N
, SplatVal
, *CurDAG
, *Subtarget
,
3575 [Bits
](int64_t Imm
) { return isUIntN(Bits
, Imm
); });
3578 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N
, SDValue
&SplatVal
) {
3579 auto IsExtOrTrunc
= [](SDValue N
) {
3580 switch (N
->getOpcode()) {
3581 case ISD::SIGN_EXTEND
:
3582 case ISD::ZERO_EXTEND
:
3583 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3584 // inactive elements will be undef.
3585 case RISCVISD::TRUNCATE_VECTOR_VL
:
3586 case RISCVISD::VSEXT_VL
:
3587 case RISCVISD::VZEXT_VL
:
3594 // We can have multiple nested nodes, so unravel them all if needed.
3595 while (IsExtOrTrunc(N
)) {
3596 if (!N
.hasOneUse() || N
.getScalarValueSizeInBits() < 8)
3598 N
= N
->getOperand(0);
3601 return selectVSplat(N
, SplatVal
);
3604 bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N
, SDValue
&Imm
) {
3605 // Allow bitcasts from XLenVT -> FP.
3606 if (N
.getOpcode() == ISD::BITCAST
&&
3607 N
.getOperand(0).getValueType() == Subtarget
->getXLenVT()) {
3608 Imm
= N
.getOperand(0);
3611 // Allow moves from XLenVT to FP.
3612 if (N
.getOpcode() == RISCVISD::FMV_H_X
||
3613 N
.getOpcode() == RISCVISD::FMV_W_X_RV64
) {
3614 Imm
= N
.getOperand(0);
3618 // Otherwise, look for FP constants that can materialized with scalar int.
3619 ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(N
.getNode());
3622 const APFloat
&APF
= CFP
->getValueAPF();
3623 // td can handle +0.0 already.
3624 if (APF
.isPosZero())
3627 MVT VT
= CFP
->getSimpleValueType(0);
3629 MVT XLenVT
= Subtarget
->getXLenVT();
3630 if (VT
== MVT::f64
&& !Subtarget
->is64Bit()) {
3631 assert(APF
.isNegZero() && "Unexpected constant.");
3635 Imm
= selectImm(CurDAG
, DL
, XLenVT
, APF
.bitcastToAPInt().getSExtValue(),
3640 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N
, unsigned Width
,
3642 if (auto *C
= dyn_cast
<ConstantSDNode
>(N
)) {
3643 int64_t ImmVal
= SignExtend64(C
->getSExtValue(), Width
);
3645 if (!isInt
<5>(ImmVal
))
3648 Imm
= CurDAG
->getSignedTargetConstant(ImmVal
, SDLoc(N
),
3649 Subtarget
->getXLenVT());
3656 // Try to remove sext.w if the input is a W instruction or can be made into
3657 // a W instruction cheaply.
3658 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode
*N
) {
3659 // Look for the sext.w pattern, addiw rd, rs1, 0.
3660 if (N
->getMachineOpcode() != RISCV::ADDIW
||
3661 !isNullConstant(N
->getOperand(1)))
3664 SDValue N0
= N
->getOperand(0);
3665 if (!N0
.isMachineOpcode())
3668 switch (N0
.getMachineOpcode()) {
3676 // Convert sext.w+add/sub/mul to their W instructions. This will create
3677 // a new independent instruction. This improves latency.
3679 switch (N0
.getMachineOpcode()) {
3681 llvm_unreachable("Unexpected opcode!");
3682 case RISCV::ADD
: Opc
= RISCV::ADDW
; break;
3683 case RISCV::ADDI
: Opc
= RISCV::ADDIW
; break;
3684 case RISCV::SUB
: Opc
= RISCV::SUBW
; break;
3685 case RISCV::MUL
: Opc
= RISCV::MULW
; break;
3686 case RISCV::SLLI
: Opc
= RISCV::SLLIW
; break;
3689 SDValue N00
= N0
.getOperand(0);
3690 SDValue N01
= N0
.getOperand(1);
3692 // Shift amount needs to be uimm5.
3693 if (N0
.getMachineOpcode() == RISCV::SLLI
&&
3694 !isUInt
<5>(cast
<ConstantSDNode
>(N01
)->getSExtValue()))
3698 CurDAG
->getMachineNode(Opc
, SDLoc(N
), N
->getValueType(0),
3700 ReplaceUses(N
, Result
);
3709 case RISCV::TH_MULAW
:
3710 case RISCV::TH_MULAH
:
3711 case RISCV::TH_MULSW
:
3712 case RISCV::TH_MULSH
:
3713 if (N0
.getValueType() == MVT::i32
)
3716 // Result is already sign extended just remove the sext.w.
3717 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3718 ReplaceUses(N
, N0
.getNode());
3725 // After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3726 // that's glued to the pseudo. This tries to look up the value that was copied
3728 static SDValue
getMaskSetter(SDValue MaskOp
, SDValue GlueOp
) {
3729 // Check that we're using V0 as a mask register.
3730 if (!isa
<RegisterSDNode
>(MaskOp
) ||
3731 cast
<RegisterSDNode
>(MaskOp
)->getReg() != RISCV::V0
)
3734 // The glued user defines V0.
3735 const auto *Glued
= GlueOp
.getNode();
3737 if (!Glued
|| Glued
->getOpcode() != ISD::CopyToReg
)
3740 // Check that we're defining V0 as a mask register.
3741 if (!isa
<RegisterSDNode
>(Glued
->getOperand(1)) ||
3742 cast
<RegisterSDNode
>(Glued
->getOperand(1))->getReg() != RISCV::V0
)
3745 SDValue MaskSetter
= Glued
->getOperand(2);
3747 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3748 // from an extract_subvector or insert_subvector.
3749 if (MaskSetter
->isMachineOpcode() &&
3750 MaskSetter
->getMachineOpcode() == RISCV::COPY_TO_REGCLASS
)
3751 MaskSetter
= MaskSetter
->getOperand(0);
3756 static bool usesAllOnesMask(SDValue MaskOp
, SDValue GlueOp
) {
3757 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3758 SDValue MaskSetter
= getMaskSetter(MaskOp
, GlueOp
);
3762 const auto IsVMSet
= [](unsigned Opc
) {
3763 return Opc
== RISCV::PseudoVMSET_M_B1
|| Opc
== RISCV::PseudoVMSET_M_B16
||
3764 Opc
== RISCV::PseudoVMSET_M_B2
|| Opc
== RISCV::PseudoVMSET_M_B32
||
3765 Opc
== RISCV::PseudoVMSET_M_B4
|| Opc
== RISCV::PseudoVMSET_M_B64
||
3766 Opc
== RISCV::PseudoVMSET_M_B8
;
3769 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3770 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3771 // assume that it's all-ones? Same applies to its VL.
3772 return MaskSetter
->isMachineOpcode() &&
3773 IsVMSet(MaskSetter
.getMachineOpcode());
3776 // Return true if we can make sure mask of N is all-ones mask.
3777 static bool usesAllOnesMask(SDNode
*N
, unsigned MaskOpIdx
) {
3778 return usesAllOnesMask(N
->getOperand(MaskOpIdx
),
3779 N
->getOperand(N
->getNumOperands() - 1));
3782 static bool isImplicitDef(SDValue V
) {
3783 if (!V
.isMachineOpcode())
3785 if (V
.getMachineOpcode() == TargetOpcode::REG_SEQUENCE
) {
3786 for (unsigned I
= 1; I
< V
.getNumOperands(); I
+= 2)
3787 if (!isImplicitDef(V
.getOperand(I
)))
3791 return V
.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
;
3794 static bool hasGPROut(unsigned Opc
) {
3795 switch (RISCV::getRVVMCOpcode(Opc
)) {
3796 case RISCV::VCPOP_M
:
3797 case RISCV::VFIRST_M
:
3803 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
3804 // corresponding "unmasked" pseudo versions. The mask we're interested in will
3805 // take the form of a V0 physical register operand, with a glued
3806 // register-setting instruction.
3807 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode
*N
) {
3808 const RISCV::RISCVMaskedPseudoInfo
*I
=
3809 RISCV::getMaskedPseudoInfo(N
->getMachineOpcode());
3813 unsigned MaskOpIdx
= I
->MaskOpIdx
;
3814 if (!usesAllOnesMask(N
, MaskOpIdx
))
3817 // There are two classes of pseudos in the table - compares and
3818 // everything else. See the comment on RISCVMaskedPseudo for details.
3819 const unsigned Opc
= I
->UnmaskedPseudo
;
3820 const MCInstrDesc
&MCID
= TII
->get(Opc
);
3821 const bool UseTUPseudo
= RISCVII::hasVecPolicyOp(MCID
.TSFlags
);
3823 const MCInstrDesc
&MaskedMCID
= TII
->get(N
->getMachineOpcode());
3824 assert(RISCVII::hasVecPolicyOp(MaskedMCID
.TSFlags
) ==
3825 RISCVII::hasVecPolicyOp(MCID
.TSFlags
) &&
3826 "Masked and unmasked pseudos are inconsistent");
3827 const bool HasTiedDest
= RISCVII::isFirstDefTiedToFirstUse(MCID
);
3828 assert(UseTUPseudo
== HasTiedDest
&& "Unexpected pseudo structure");
3831 SmallVector
<SDValue
, 8> Ops
;
3832 // Skip the passthru operand at index 0 if !UseTUPseudo and no GPR out.
3833 bool ShouldSkip
= !UseTUPseudo
&& !hasGPROut(Opc
);
3834 for (unsigned I
= ShouldSkip
, E
= N
->getNumOperands(); I
!= E
; I
++) {
3835 // Skip the mask, and the Glue.
3836 SDValue Op
= N
->getOperand(I
);
3837 if (I
== MaskOpIdx
|| Op
.getValueType() == MVT::Glue
)
3842 // Transitively apply any node glued to our new node.
3843 const auto *Glued
= N
->getGluedNode();
3844 if (auto *TGlued
= Glued
->getGluedNode())
3845 Ops
.push_back(SDValue(TGlued
, TGlued
->getNumValues() - 1));
3847 MachineSDNode
*Result
=
3848 CurDAG
->getMachineNode(Opc
, SDLoc(N
), N
->getVTList(), Ops
);
3850 if (!N
->memoperands_empty())
3851 CurDAG
->setNodeMemRefs(Result
, N
->memoperands());
3853 Result
->setFlags(N
->getFlags());
3854 ReplaceUses(N
, Result
);
3859 static bool IsVMerge(SDNode
*N
) {
3860 return RISCV::getRVVMCOpcode(N
->getMachineOpcode()) == RISCV::VMERGE_VVM
;
3863 // Try to fold away VMERGE_VVM instructions into their true operands:
3865 // %true = PseudoVADD_VV ...
3866 // %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3868 // %x = PseudoVADD_VV_MASK %false, ..., %mask
3870 // We can only fold if vmerge's passthru operand, vmerge's false operand and
3871 // %true's passthru operand (if it has one) are the same. This is because we
3872 // have to consolidate them into one passthru operand in the result.
3874 // If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3875 // mask is all ones.
3877 // The resulting VL is the minimum of the two VLs.
3879 // The resulting policy is the effective policy the vmerge would have had,
3880 // i.e. whether or not it's passthru operand was implicit-def.
3881 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode
*N
) {
3882 SDValue Passthru
, False
, True
, VL
, Mask
, Glue
;
3883 assert(IsVMerge(N
));
3884 Passthru
= N
->getOperand(0);
3885 False
= N
->getOperand(1);
3886 True
= N
->getOperand(2);
3887 Mask
= N
->getOperand(3);
3888 VL
= N
->getOperand(4);
3889 // We always have a glue node for the mask at v0.
3890 Glue
= N
->getOperand(N
->getNumOperands() - 1);
3891 assert(cast
<RegisterSDNode
>(Mask
)->getReg() == RISCV::V0
);
3892 assert(Glue
.getValueType() == MVT::Glue
);
3894 // If the EEW of True is different from vmerge's SEW, then we can't fold.
3895 if (True
.getSimpleValueType() != N
->getSimpleValueType(0))
3898 // We require that either passthru and false are the same, or that passthru
3900 if (Passthru
!= False
&& !isImplicitDef(Passthru
))
3903 assert(True
.getResNo() == 0 &&
3904 "Expect True is the first output of an instruction.");
3906 // Need N is the exactly one using True.
3907 if (!True
.hasOneUse())
3910 if (!True
.isMachineOpcode())
3913 unsigned TrueOpc
= True
.getMachineOpcode();
3914 const MCInstrDesc
&TrueMCID
= TII
->get(TrueOpc
);
3915 uint64_t TrueTSFlags
= TrueMCID
.TSFlags
;
3916 bool HasTiedDest
= RISCVII::isFirstDefTiedToFirstUse(TrueMCID
);
3918 const RISCV::RISCVMaskedPseudoInfo
*Info
=
3919 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc
);
3923 // If True has a passthru operand then it needs to be the same as vmerge's
3924 // False, since False will be used for the result's passthru operand.
3925 if (HasTiedDest
&& !isImplicitDef(True
->getOperand(0))) {
3926 SDValue PassthruOpTrue
= True
->getOperand(0);
3927 if (False
!= PassthruOpTrue
)
3931 // Skip if True has side effect.
3932 if (TII
->get(TrueOpc
).hasUnmodeledSideEffects())
3935 // The last operand of a masked instruction may be glued.
3936 bool HasGlueOp
= True
->getGluedNode() != nullptr;
3938 // The chain operand may exist either before the glued operands or in the last
3940 unsigned TrueChainOpIdx
= True
.getNumOperands() - HasGlueOp
- 1;
3942 True
.getOperand(TrueChainOpIdx
).getValueType() == MVT::Other
;
3945 // Avoid creating cycles in the DAG. We must ensure that none of the other
3946 // operands depend on True through it's Chain.
3947 SmallVector
<const SDNode
*, 4> LoopWorklist
;
3948 SmallPtrSet
<const SDNode
*, 16> Visited
;
3949 LoopWorklist
.push_back(False
.getNode());
3950 LoopWorklist
.push_back(Mask
.getNode());
3951 LoopWorklist
.push_back(VL
.getNode());
3952 LoopWorklist
.push_back(Glue
.getNode());
3953 if (SDNode::hasPredecessorHelper(True
.getNode(), Visited
, LoopWorklist
))
3957 // The vector policy operand may be present for masked intrinsics
3958 bool HasVecPolicyOp
= RISCVII::hasVecPolicyOp(TrueTSFlags
);
3959 unsigned TrueVLIndex
=
3960 True
.getNumOperands() - HasVecPolicyOp
- HasChainOp
- HasGlueOp
- 2;
3961 SDValue TrueVL
= True
.getOperand(TrueVLIndex
);
3962 SDValue SEW
= True
.getOperand(TrueVLIndex
+ 1);
3964 auto GetMinVL
= [](SDValue LHS
, SDValue RHS
) {
3967 if (isAllOnesConstant(LHS
))
3969 if (isAllOnesConstant(RHS
))
3971 auto *CLHS
= dyn_cast
<ConstantSDNode
>(LHS
);
3972 auto *CRHS
= dyn_cast
<ConstantSDNode
>(RHS
);
3975 return CLHS
->getZExtValue() <= CRHS
->getZExtValue() ? LHS
: RHS
;
3978 // Because N and True must have the same passthru operand (or True's operand
3979 // is implicit_def), the "effective" body is the minimum of their VLs.
3980 SDValue OrigVL
= VL
;
3981 VL
= GetMinVL(TrueVL
, VL
);
3985 // Some operations produce different elementwise results depending on the
3986 // active elements, like viota.m or vredsum. This transformation is illegal
3987 // for these if we change the active elements (i.e. mask or VL).
3988 const MCInstrDesc
&TrueBaseMCID
= TII
->get(RISCV::getRVVMCOpcode(TrueOpc
));
3989 if (RISCVII::elementsDependOnVL(TrueBaseMCID
.TSFlags
) && (TrueVL
!= VL
))
3991 if (RISCVII::elementsDependOnMask(TrueBaseMCID
.TSFlags
) &&
3992 (Mask
&& !usesAllOnesMask(Mask
, Glue
)))
3995 // Make sure it doesn't raise any observable fp exceptions, since changing the
3996 // active elements will affect how fflags is set.
3997 if (mayRaiseFPException(True
.getNode()) && !True
->getFlags().hasNoFPExcept())
4002 unsigned MaskedOpc
= Info
->MaskedPseudo
;
4004 const MCInstrDesc
&MaskedMCID
= TII
->get(MaskedOpc
);
4005 assert(RISCVII::hasVecPolicyOp(MaskedMCID
.TSFlags
) &&
4006 "Expected instructions with mask have policy operand.");
4007 assert(MaskedMCID
.getOperandConstraint(MaskedMCID
.getNumDefs(),
4008 MCOI::TIED_TO
) == 0 &&
4009 "Expected instructions with mask have a tied dest.");
4012 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
4013 // operand is undefined.
4015 // However, if the VL became smaller than what the vmerge had originally, then
4016 // elements past VL that were previously in the vmerge's body will have moved
4017 // to the tail. In that case we always need to use tail undisturbed to
4019 bool MergeVLShrunk
= VL
!= OrigVL
;
4020 uint64_t Policy
= (isImplicitDef(Passthru
) && !MergeVLShrunk
)
4021 ? RISCVII::TAIL_AGNOSTIC
4024 CurDAG
->getTargetConstant(Policy
, DL
, Subtarget
->getXLenVT());
4027 SmallVector
<SDValue
, 8> Ops
;
4028 Ops
.push_back(False
);
4030 const bool HasRoundingMode
= RISCVII::hasRoundModeOp(TrueTSFlags
);
4031 const unsigned NormalOpsEnd
= TrueVLIndex
- HasRoundingMode
;
4032 Ops
.append(True
->op_begin() + HasTiedDest
, True
->op_begin() + NormalOpsEnd
);
4034 Ops
.push_back(Mask
);
4036 // For unmasked "VOp" with rounding mode operand, that is interfaces like
4037 // (..., rm, vl) or (..., rm, vl, policy).
4038 // Its masked version is (..., vm, rm, vl, policy).
4039 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
4040 if (HasRoundingMode
)
4041 Ops
.push_back(True
->getOperand(TrueVLIndex
- 1));
4043 Ops
.append({VL
, SEW
, PolicyOp
});
4045 // Result node should have chain operand of True.
4047 Ops
.push_back(True
.getOperand(TrueChainOpIdx
));
4049 // Add the glue for the CopyToReg of mask->v0.
4050 Ops
.push_back(Glue
);
4052 MachineSDNode
*Result
=
4053 CurDAG
->getMachineNode(MaskedOpc
, DL
, True
->getVTList(), Ops
);
4054 Result
->setFlags(True
->getFlags());
4056 if (!cast
<MachineSDNode
>(True
)->memoperands_empty())
4057 CurDAG
->setNodeMemRefs(Result
, cast
<MachineSDNode
>(True
)->memoperands());
4059 // Replace vmerge.vvm node by Result.
4060 ReplaceUses(SDValue(N
, 0), SDValue(Result
, 0));
4062 // Replace another value of True. E.g. chain and VL.
4063 for (unsigned Idx
= 1; Idx
< True
->getNumValues(); ++Idx
)
4064 ReplaceUses(True
.getValue(Idx
), SDValue(Result
, Idx
));
4069 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4070 bool MadeChange
= false;
4071 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
4073 while (Position
!= CurDAG
->allnodes_begin()) {
4074 SDNode
*N
= &*--Position
;
4075 if (N
->use_empty() || !N
->isMachineOpcode())
4079 MadeChange
|= performCombineVMergeAndVOps(N
);
4084 /// If our passthru is an implicit_def, use noreg instead. This side
4085 /// steps issues with MachineCSE not being able to CSE expressions with
4086 /// IMPLICIT_DEF operands while preserving the semantic intent. See
4087 /// pr64282 for context. Note that this transform is the last one
4088 /// performed at ISEL DAG to DAG.
4089 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4090 bool MadeChange
= false;
4091 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
4093 while (Position
!= CurDAG
->allnodes_begin()) {
4094 SDNode
*N
= &*--Position
;
4095 if (N
->use_empty() || !N
->isMachineOpcode())
4098 const unsigned Opc
= N
->getMachineOpcode();
4099 if (!RISCVVPseudosTable::getPseudoInfo(Opc
) ||
4100 !RISCVII::isFirstDefTiedToFirstUse(TII
->get(Opc
)) ||
4101 !isImplicitDef(N
->getOperand(0)))
4104 SmallVector
<SDValue
> Ops
;
4105 Ops
.push_back(CurDAG
->getRegister(RISCV::NoRegister
, N
->getValueType(0)));
4106 for (unsigned I
= 1, E
= N
->getNumOperands(); I
!= E
; I
++) {
4107 SDValue Op
= N
->getOperand(I
);
4111 MachineSDNode
*Result
=
4112 CurDAG
->getMachineNode(Opc
, SDLoc(N
), N
->getVTList(), Ops
);
4113 Result
->setFlags(N
->getFlags());
4114 CurDAG
->setNodeMemRefs(Result
, cast
<MachineSDNode
>(N
)->memoperands());
4115 ReplaceUses(N
, Result
);
4122 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
4123 // for instruction scheduling.
4124 FunctionPass
*llvm::createRISCVISelDag(RISCVTargetMachine
&TM
,
4125 CodeGenOptLevel OptLevel
) {
4126 return new RISCVDAGToDAGISelLegacy(TM
, OptLevel
);
4129 char RISCVDAGToDAGISelLegacy::ID
= 0;
4131 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine
&TM
,
4132 CodeGenOptLevel OptLevel
)
4133 : SelectionDAGISelLegacy(
4134 ID
, std::make_unique
<RISCVDAGToDAGISel
>(TM
, OptLevel
)) {}
4136 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy
, DEBUG_TYPE
, PASS_NAME
, false, false)