1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "Utils/RISCVMatInt.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/SelectionDAGISel.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/raw_ostream.h"
39 #define DEBUG_TYPE "riscv-lower"
41 STATISTIC(NumTailCalls
, "Number of tail calls");
43 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine
&TM
,
44 const RISCVSubtarget
&STI
)
45 : TargetLowering(TM
), Subtarget(STI
) {
47 if (Subtarget
.isRV32E())
48 report_fatal_error("Codegen not yet implemented for RV32E");
50 RISCVABI::ABI ABI
= Subtarget
.getTargetABI();
51 assert(ABI
!= RISCVABI::ABI_Unknown
&& "Improperly initialised target ABI");
55 report_fatal_error("Don't know how to lower this ABI");
56 case RISCVABI::ABI_ILP32
:
57 case RISCVABI::ABI_ILP32F
:
58 case RISCVABI::ABI_ILP32D
:
59 case RISCVABI::ABI_LP64
:
60 case RISCVABI::ABI_LP64F
:
61 case RISCVABI::ABI_LP64D
:
65 MVT XLenVT
= Subtarget
.getXLenVT();
67 // Set up the register classes.
68 addRegisterClass(XLenVT
, &RISCV::GPRRegClass
);
70 if (Subtarget
.hasStdExtF())
71 addRegisterClass(MVT::f32
, &RISCV::FPR32RegClass
);
72 if (Subtarget
.hasStdExtD())
73 addRegisterClass(MVT::f64
, &RISCV::FPR64RegClass
);
75 // Compute derived properties from the register classes.
76 computeRegisterProperties(STI
.getRegisterInfo());
78 setStackPointerRegisterToSaveRestore(RISCV::X2
);
80 for (auto N
: {ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
})
81 setLoadExtAction(N
, XLenVT
, MVT::i1
, Promote
);
83 // TODO: add all necessary setOperationAction calls.
84 setOperationAction(ISD::DYNAMIC_STACKALLOC
, XLenVT
, Expand
);
86 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
87 setOperationAction(ISD::BR_CC
, XLenVT
, Expand
);
88 setOperationAction(ISD::SELECT
, XLenVT
, Custom
);
89 setOperationAction(ISD::SELECT_CC
, XLenVT
, Expand
);
91 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
92 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
94 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
95 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
96 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
97 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
99 for (auto VT
: {MVT::i1
, MVT::i8
, MVT::i16
})
100 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Expand
);
102 if (Subtarget
.is64Bit()) {
103 setOperationAction(ISD::ADD
, MVT::i32
, Custom
);
104 setOperationAction(ISD::SUB
, MVT::i32
, Custom
);
105 setOperationAction(ISD::SHL
, MVT::i32
, Custom
);
106 setOperationAction(ISD::SRA
, MVT::i32
, Custom
);
107 setOperationAction(ISD::SRL
, MVT::i32
, Custom
);
110 if (!Subtarget
.hasStdExtM()) {
111 setOperationAction(ISD::MUL
, XLenVT
, Expand
);
112 setOperationAction(ISD::MULHS
, XLenVT
, Expand
);
113 setOperationAction(ISD::MULHU
, XLenVT
, Expand
);
114 setOperationAction(ISD::SDIV
, XLenVT
, Expand
);
115 setOperationAction(ISD::UDIV
, XLenVT
, Expand
);
116 setOperationAction(ISD::SREM
, XLenVT
, Expand
);
117 setOperationAction(ISD::UREM
, XLenVT
, Expand
);
120 if (Subtarget
.is64Bit() && Subtarget
.hasStdExtM()) {
121 setOperationAction(ISD::MUL
, MVT::i32
, Custom
);
122 setOperationAction(ISD::SDIV
, MVT::i32
, Custom
);
123 setOperationAction(ISD::UDIV
, MVT::i32
, Custom
);
124 setOperationAction(ISD::UREM
, MVT::i32
, Custom
);
127 setOperationAction(ISD::SDIVREM
, XLenVT
, Expand
);
128 setOperationAction(ISD::UDIVREM
, XLenVT
, Expand
);
129 setOperationAction(ISD::SMUL_LOHI
, XLenVT
, Expand
);
130 setOperationAction(ISD::UMUL_LOHI
, XLenVT
, Expand
);
132 setOperationAction(ISD::SHL_PARTS
, XLenVT
, Custom
);
133 setOperationAction(ISD::SRL_PARTS
, XLenVT
, Custom
);
134 setOperationAction(ISD::SRA_PARTS
, XLenVT
, Custom
);
136 setOperationAction(ISD::ROTL
, XLenVT
, Expand
);
137 setOperationAction(ISD::ROTR
, XLenVT
, Expand
);
138 setOperationAction(ISD::BSWAP
, XLenVT
, Expand
);
139 setOperationAction(ISD::CTTZ
, XLenVT
, Expand
);
140 setOperationAction(ISD::CTLZ
, XLenVT
, Expand
);
141 setOperationAction(ISD::CTPOP
, XLenVT
, Expand
);
143 ISD::CondCode FPCCToExtend
[] = {
144 ISD::SETOGT
, ISD::SETOGE
, ISD::SETONE
, ISD::SETUEQ
, ISD::SETUGT
,
145 ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
, ISD::SETUNE
, ISD::SETGT
,
146 ISD::SETGE
, ISD::SETNE
};
148 ISD::NodeType FPOpToExtend
[] = {
149 ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
, ISD::FPOW
, ISD::FREM
};
151 if (Subtarget
.hasStdExtF()) {
152 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
153 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
154 for (auto CC
: FPCCToExtend
)
155 setCondCodeAction(CC
, MVT::f32
, Expand
);
156 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
157 setOperationAction(ISD::SELECT
, MVT::f32
, Custom
);
158 setOperationAction(ISD::BR_CC
, MVT::f32
, Expand
);
159 for (auto Op
: FPOpToExtend
)
160 setOperationAction(Op
, MVT::f32
, Expand
);
163 if (Subtarget
.hasStdExtF() && Subtarget
.is64Bit())
164 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
166 if (Subtarget
.hasStdExtD()) {
167 setOperationAction(ISD::FMINNUM
, MVT::f64
, Legal
);
168 setOperationAction(ISD::FMAXNUM
, MVT::f64
, Legal
);
169 for (auto CC
: FPCCToExtend
)
170 setCondCodeAction(CC
, MVT::f64
, Expand
);
171 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
172 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
173 setOperationAction(ISD::BR_CC
, MVT::f64
, Expand
);
174 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
175 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
176 for (auto Op
: FPOpToExtend
)
177 setOperationAction(Op
, MVT::f64
, Expand
);
180 setOperationAction(ISD::GlobalAddress
, XLenVT
, Custom
);
181 setOperationAction(ISD::BlockAddress
, XLenVT
, Custom
);
182 setOperationAction(ISD::ConstantPool
, XLenVT
, Custom
);
184 setOperationAction(ISD::GlobalTLSAddress
, XLenVT
, Custom
);
186 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
187 // Unfortunately this can't be determined just from the ISA naming string.
188 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
,
189 Subtarget
.is64Bit() ? Legal
: Custom
);
191 if (Subtarget
.hasStdExtA()) {
192 setMaxAtomicSizeInBitsSupported(Subtarget
.getXLen());
193 setMinCmpXchgSizeInBits(32);
195 setMaxAtomicSizeInBitsSupported(0);
198 setBooleanContents(ZeroOrOneBooleanContent
);
200 // Function alignments.
201 const Align
FunctionAlignment(Subtarget
.hasStdExtC() ? 2 : 4);
202 setMinFunctionAlignment(FunctionAlignment
);
203 setPrefFunctionAlignment(FunctionAlignment
);
205 // Effectively disable jump table generation.
206 setMinimumJumpTableEntries(INT_MAX
);
209 EVT
RISCVTargetLowering::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&,
212 return getPointerTy(DL
);
213 return VT
.changeVectorElementTypeToInteger();
216 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
219 unsigned Intrinsic
) const {
223 case Intrinsic::riscv_masked_atomicrmw_xchg_i32
:
224 case Intrinsic::riscv_masked_atomicrmw_add_i32
:
225 case Intrinsic::riscv_masked_atomicrmw_sub_i32
:
226 case Intrinsic::riscv_masked_atomicrmw_nand_i32
:
227 case Intrinsic::riscv_masked_atomicrmw_max_i32
:
228 case Intrinsic::riscv_masked_atomicrmw_min_i32
:
229 case Intrinsic::riscv_masked_atomicrmw_umax_i32
:
230 case Intrinsic::riscv_masked_atomicrmw_umin_i32
:
231 case Intrinsic::riscv_masked_cmpxchg_i32
:
232 PointerType
*PtrTy
= cast
<PointerType
>(I
.getArgOperand(0)->getType());
233 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
234 Info
.memVT
= MVT::getVT(PtrTy
->getElementType());
235 Info
.ptrVal
= I
.getArgOperand(0);
237 Info
.align
= Align(4);
238 Info
.flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOStore
|
239 MachineMemOperand::MOVolatile
;
244 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
245 const AddrMode
&AM
, Type
*Ty
,
247 Instruction
*I
) const {
248 // No global is ever allowed as a base.
252 // Require a 12-bit signed offset.
253 if (!isInt
<12>(AM
.BaseOffs
))
257 case 0: // "r+i" or just "i", depending on HasBaseReg.
260 if (!AM
.HasBaseReg
) // allow "r+i".
262 return false; // disallow "r+r" or "r+r+i".
270 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
271 return isInt
<12>(Imm
);
274 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
275 return isInt
<12>(Imm
);
278 // On RV32, 64-bit integers are split into their high and low parts and held
279 // in two different registers, so the trunc is free since the low register can
281 bool RISCVTargetLowering::isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const {
282 if (Subtarget
.is64Bit() || !SrcTy
->isIntegerTy() || !DstTy
->isIntegerTy())
284 unsigned SrcBits
= SrcTy
->getPrimitiveSizeInBits();
285 unsigned DestBits
= DstTy
->getPrimitiveSizeInBits();
286 return (SrcBits
== 64 && DestBits
== 32);
289 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT
, EVT DstVT
) const {
290 if (Subtarget
.is64Bit() || SrcVT
.isVector() || DstVT
.isVector() ||
291 !SrcVT
.isInteger() || !DstVT
.isInteger())
293 unsigned SrcBits
= SrcVT
.getSizeInBits();
294 unsigned DestBits
= DstVT
.getSizeInBits();
295 return (SrcBits
== 64 && DestBits
== 32);
298 bool RISCVTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
299 // Zexts are free if they can be combined with a load.
300 if (auto *LD
= dyn_cast
<LoadSDNode
>(Val
)) {
301 EVT MemVT
= LD
->getMemoryVT();
302 if ((MemVT
== MVT::i8
|| MemVT
== MVT::i16
||
303 (Subtarget
.is64Bit() && MemVT
== MVT::i32
)) &&
304 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
305 LD
->getExtensionType() == ISD::ZEXTLOAD
))
309 return TargetLowering::isZExtFree(Val
, VT2
);
312 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT
, EVT DstVT
) const {
313 return Subtarget
.is64Bit() && SrcVT
== MVT::i32
&& DstVT
== MVT::i64
;
316 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT
) const {
317 return (VT
== MVT::f32
&& Subtarget
.hasStdExtF()) ||
318 (VT
== MVT::f64
&& Subtarget
.hasStdExtD());
321 // Changes the condition code and swaps operands if necessary, so the SetCC
322 // operation matches one of the comparisons supported directly in the RISC-V
324 static void normaliseSetCC(SDValue
&LHS
, SDValue
&RHS
, ISD::CondCode
&CC
) {
332 CC
= ISD::getSetCCSwappedOperands(CC
);
338 // Return the RISC-V branch opcode that matches the given DAG integer
339 // condition code. The CondCode must be one of those supported by the RISC-V
340 // ISA (see normaliseSetCC).
341 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC
) {
344 llvm_unreachable("Unsupported CondCode");
360 SDValue
RISCVTargetLowering::LowerOperation(SDValue Op
,
361 SelectionDAG
&DAG
) const {
362 switch (Op
.getOpcode()) {
364 report_fatal_error("unimplemented operand");
365 case ISD::GlobalAddress
:
366 return lowerGlobalAddress(Op
, DAG
);
367 case ISD::BlockAddress
:
368 return lowerBlockAddress(Op
, DAG
);
369 case ISD::ConstantPool
:
370 return lowerConstantPool(Op
, DAG
);
371 case ISD::GlobalTLSAddress
:
372 return lowerGlobalTLSAddress(Op
, DAG
);
374 return lowerSELECT(Op
, DAG
);
376 return lowerVASTART(Op
, DAG
);
378 return lowerFRAMEADDR(Op
, DAG
);
379 case ISD::RETURNADDR
:
380 return lowerRETURNADDR(Op
, DAG
);
382 return lowerShiftLeftParts(Op
, DAG
);
384 return lowerShiftRightParts(Op
, DAG
, true);
386 return lowerShiftRightParts(Op
, DAG
, false);
388 assert(Subtarget
.is64Bit() && Subtarget
.hasStdExtF() &&
389 "Unexpected custom legalisation");
391 SDValue Op0
= Op
.getOperand(0);
392 if (Op
.getValueType() != MVT::f32
|| Op0
.getValueType() != MVT::i32
)
394 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
395 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, NewOp0
);
401 static SDValue
getTargetNode(GlobalAddressSDNode
*N
, SDLoc DL
, EVT Ty
,
402 SelectionDAG
&DAG
, unsigned Flags
) {
403 return DAG
.getTargetGlobalAddress(N
->getGlobal(), DL
, Ty
, 0, Flags
);
406 static SDValue
getTargetNode(BlockAddressSDNode
*N
, SDLoc DL
, EVT Ty
,
407 SelectionDAG
&DAG
, unsigned Flags
) {
408 return DAG
.getTargetBlockAddress(N
->getBlockAddress(), Ty
, N
->getOffset(),
412 static SDValue
getTargetNode(ConstantPoolSDNode
*N
, SDLoc DL
, EVT Ty
,
413 SelectionDAG
&DAG
, unsigned Flags
) {
414 return DAG
.getTargetConstantPool(N
->getConstVal(), Ty
, N
->getAlignment(),
415 N
->getOffset(), Flags
);
418 template <class NodeTy
>
419 SDValue
RISCVTargetLowering::getAddr(NodeTy
*N
, SelectionDAG
&DAG
,
420 bool IsLocal
) const {
422 EVT Ty
= getPointerTy(DAG
.getDataLayout());
424 if (isPositionIndependent()) {
425 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
427 // Use PC-relative addressing to access the symbol. This generates the
428 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
429 // %pcrel_lo(auipc)).
430 return SDValue(DAG
.getMachineNode(RISCV::PseudoLLA
, DL
, Ty
, Addr
), 0);
432 // Use PC-relative addressing to access the GOT for this symbol, then load
433 // the address from the GOT. This generates the pattern (PseudoLA sym),
434 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
435 return SDValue(DAG
.getMachineNode(RISCV::PseudoLA
, DL
, Ty
, Addr
), 0);
438 switch (getTargetMachine().getCodeModel()) {
440 report_fatal_error("Unsupported code model for lowering");
441 case CodeModel::Small
: {
442 // Generate a sequence for accessing addresses within the first 2 GiB of
443 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
444 SDValue AddrHi
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_HI
);
445 SDValue AddrLo
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_LO
);
446 SDValue MNHi
= SDValue(DAG
.getMachineNode(RISCV::LUI
, DL
, Ty
, AddrHi
), 0);
447 return SDValue(DAG
.getMachineNode(RISCV::ADDI
, DL
, Ty
, MNHi
, AddrLo
), 0);
449 case CodeModel::Medium
: {
450 // Generate a sequence for accessing addresses within any 2GiB range within
451 // the address space. This generates the pattern (PseudoLLA sym), which
452 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
453 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
454 return SDValue(DAG
.getMachineNode(RISCV::PseudoLLA
, DL
, Ty
, Addr
), 0);
459 SDValue
RISCVTargetLowering::lowerGlobalAddress(SDValue Op
,
460 SelectionDAG
&DAG
) const {
462 EVT Ty
= Op
.getValueType();
463 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
464 int64_t Offset
= N
->getOffset();
465 MVT XLenVT
= Subtarget
.getXLenVT();
467 const GlobalValue
*GV
= N
->getGlobal();
468 bool IsLocal
= getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
);
469 SDValue Addr
= getAddr(N
, DAG
, IsLocal
);
471 // In order to maximise the opportunity for common subexpression elimination,
472 // emit a separate ADD node for the global address offset instead of folding
473 // it in the global address node. Later peephole optimisations may choose to
474 // fold it back in when profitable.
476 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Addr
,
477 DAG
.getConstant(Offset
, DL
, XLenVT
));
481 SDValue
RISCVTargetLowering::lowerBlockAddress(SDValue Op
,
482 SelectionDAG
&DAG
) const {
483 BlockAddressSDNode
*N
= cast
<BlockAddressSDNode
>(Op
);
485 return getAddr(N
, DAG
);
488 SDValue
RISCVTargetLowering::lowerConstantPool(SDValue Op
,
489 SelectionDAG
&DAG
) const {
490 ConstantPoolSDNode
*N
= cast
<ConstantPoolSDNode
>(Op
);
492 return getAddr(N
, DAG
);
495 SDValue
RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode
*N
,
499 EVT Ty
= getPointerTy(DAG
.getDataLayout());
500 const GlobalValue
*GV
= N
->getGlobal();
501 MVT XLenVT
= Subtarget
.getXLenVT();
504 // Use PC-relative addressing to access the GOT for this TLS symbol, then
505 // load the address from the GOT and add the thread pointer. This generates
506 // the pattern (PseudoLA_TLS_IE sym), which expands to
507 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
508 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
510 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_IE
, DL
, Ty
, Addr
), 0);
512 // Add the thread pointer.
513 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
514 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Load
, TPReg
);
517 // Generate a sequence for accessing the address relative to the thread
518 // pointer, with the appropriate adjustment for the thread pointer offset.
519 // This generates the pattern
520 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
522 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_HI
);
524 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_ADD
);
526 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_LO
);
528 SDValue MNHi
= SDValue(DAG
.getMachineNode(RISCV::LUI
, DL
, Ty
, AddrHi
), 0);
529 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
530 SDValue MNAdd
= SDValue(
531 DAG
.getMachineNode(RISCV::PseudoAddTPRel
, DL
, Ty
, MNHi
, TPReg
, AddrAdd
),
533 return SDValue(DAG
.getMachineNode(RISCV::ADDI
, DL
, Ty
, MNAdd
, AddrLo
), 0);
536 SDValue
RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode
*N
,
537 SelectionDAG
&DAG
) const {
539 EVT Ty
= getPointerTy(DAG
.getDataLayout());
540 IntegerType
*CallTy
= Type::getIntNTy(*DAG
.getContext(), Ty
.getSizeInBits());
541 const GlobalValue
*GV
= N
->getGlobal();
543 // Use a PC-relative addressing mode to access the global dynamic GOT address.
544 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
545 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
546 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
548 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_GD
, DL
, Ty
, Addr
), 0);
550 // Prepare argument list to generate call.
555 Args
.push_back(Entry
);
557 // Setup call to __tls_get_addr.
558 TargetLowering::CallLoweringInfo
CLI(DAG
);
560 .setChain(DAG
.getEntryNode())
561 .setLibCallee(CallingConv::C
, CallTy
,
562 DAG
.getExternalSymbol("__tls_get_addr", Ty
),
565 return LowerCallTo(CLI
).first
;
568 SDValue
RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op
,
569 SelectionDAG
&DAG
) const {
571 EVT Ty
= Op
.getValueType();
572 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
573 int64_t Offset
= N
->getOffset();
574 MVT XLenVT
= Subtarget
.getXLenVT();
576 // Non-PIC TLS lowering should always use the LocalExec model.
577 TLSModel::Model Model
= isPositionIndependent()
578 ? getTargetMachine().getTLSModel(N
->getGlobal())
579 : TLSModel::LocalExec
;
583 case TLSModel::LocalExec
:
584 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/false);
586 case TLSModel::InitialExec
:
587 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/true);
589 case TLSModel::LocalDynamic
:
590 case TLSModel::GeneralDynamic
:
591 Addr
= getDynamicTLSAddr(N
, DAG
);
595 // In order to maximise the opportunity for common subexpression elimination,
596 // emit a separate ADD node for the global address offset instead of folding
597 // it in the global address node. Later peephole optimisations may choose to
598 // fold it back in when profitable.
600 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Addr
,
601 DAG
.getConstant(Offset
, DL
, XLenVT
));
605 SDValue
RISCVTargetLowering::lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
606 SDValue CondV
= Op
.getOperand(0);
607 SDValue TrueV
= Op
.getOperand(1);
608 SDValue FalseV
= Op
.getOperand(2);
610 MVT XLenVT
= Subtarget
.getXLenVT();
612 // If the result type is XLenVT and CondV is the output of a SETCC node
613 // which also operated on XLenVT inputs, then merge the SETCC node into the
614 // lowered RISCVISD::SELECT_CC to take advantage of the integer
615 // compare+branch instructions. i.e.:
616 // (select (setcc lhs, rhs, cc), truev, falsev)
617 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
618 if (Op
.getSimpleValueType() == XLenVT
&& CondV
.getOpcode() == ISD::SETCC
&&
619 CondV
.getOperand(0).getSimpleValueType() == XLenVT
) {
620 SDValue LHS
= CondV
.getOperand(0);
621 SDValue RHS
= CondV
.getOperand(1);
622 auto CC
= cast
<CondCodeSDNode
>(CondV
.getOperand(2));
623 ISD::CondCode CCVal
= CC
->get();
625 normaliseSetCC(LHS
, RHS
, CCVal
);
627 SDValue TargetCC
= DAG
.getConstant(CCVal
, DL
, XLenVT
);
628 SDVTList VTs
= DAG
.getVTList(Op
.getValueType(), MVT::Glue
);
629 SDValue Ops
[] = {LHS
, RHS
, TargetCC
, TrueV
, FalseV
};
630 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VTs
, Ops
);
634 // (select condv, truev, falsev)
635 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
636 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
637 SDValue SetNE
= DAG
.getConstant(ISD::SETNE
, DL
, XLenVT
);
639 SDVTList VTs
= DAG
.getVTList(Op
.getValueType(), MVT::Glue
);
640 SDValue Ops
[] = {CondV
, Zero
, SetNE
, TrueV
, FalseV
};
642 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VTs
, Ops
);
645 SDValue
RISCVTargetLowering::lowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
646 MachineFunction
&MF
= DAG
.getMachineFunction();
647 RISCVMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
650 SDValue FI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
651 getPointerTy(MF
.getDataLayout()));
653 // vastart just stores the address of the VarArgsFrameIndex slot into the
654 // memory location argument.
655 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
656 return DAG
.getStore(Op
.getOperand(0), DL
, FI
, Op
.getOperand(1),
657 MachinePointerInfo(SV
));
660 SDValue
RISCVTargetLowering::lowerFRAMEADDR(SDValue Op
,
661 SelectionDAG
&DAG
) const {
662 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
663 MachineFunction
&MF
= DAG
.getMachineFunction();
664 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
665 MFI
.setFrameAddressIsTaken(true);
666 Register FrameReg
= RI
.getFrameRegister(MF
);
667 int XLenInBytes
= Subtarget
.getXLen() / 8;
669 EVT VT
= Op
.getValueType();
671 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, FrameReg
, VT
);
672 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
674 int Offset
= -(XLenInBytes
* 2);
675 SDValue Ptr
= DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
,
676 DAG
.getIntPtrConstant(Offset
, DL
));
678 DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
683 SDValue
RISCVTargetLowering::lowerRETURNADDR(SDValue Op
,
684 SelectionDAG
&DAG
) const {
685 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
686 MachineFunction
&MF
= DAG
.getMachineFunction();
687 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
688 MFI
.setReturnAddressIsTaken(true);
689 MVT XLenVT
= Subtarget
.getXLenVT();
690 int XLenInBytes
= Subtarget
.getXLen() / 8;
692 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
695 EVT VT
= Op
.getValueType();
697 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
699 int Off
= -XLenInBytes
;
700 SDValue FrameAddr
= lowerFRAMEADDR(Op
, DAG
);
701 SDValue Offset
= DAG
.getConstant(Off
, DL
, VT
);
702 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
703 DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
, Offset
),
704 MachinePointerInfo());
707 // Return the value of the return address register, marking it an implicit
709 Register Reg
= MF
.addLiveIn(RI
.getRARegister(), getRegClassFor(XLenVT
));
710 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, Reg
, XLenVT
);
713 SDValue
RISCVTargetLowering::lowerShiftLeftParts(SDValue Op
,
714 SelectionDAG
&DAG
) const {
716 SDValue Lo
= Op
.getOperand(0);
717 SDValue Hi
= Op
.getOperand(1);
718 SDValue Shamt
= Op
.getOperand(2);
719 EVT VT
= Lo
.getValueType();
721 // if Shamt-XLEN < 0: // Shamt < XLEN
723 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
726 // Hi = Lo << (Shamt-XLEN)
728 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
729 SDValue One
= DAG
.getConstant(1, DL
, VT
);
730 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
731 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
732 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
733 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
735 SDValue LoTrue
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, Shamt
);
736 SDValue ShiftRight1Lo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, One
);
737 SDValue ShiftRightLo
=
738 DAG
.getNode(ISD::SRL
, DL
, VT
, ShiftRight1Lo
, XLenMinus1Shamt
);
739 SDValue ShiftLeftHi
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, Shamt
);
740 SDValue HiTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftLeftHi
, ShiftRightLo
);
741 SDValue HiFalse
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, ShamtMinusXLen
);
743 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
745 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, Zero
);
746 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
748 SDValue Parts
[2] = {Lo
, Hi
};
749 return DAG
.getMergeValues(Parts
, DL
);
752 SDValue
RISCVTargetLowering::lowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
,
755 SDValue Lo
= Op
.getOperand(0);
756 SDValue Hi
= Op
.getOperand(1);
757 SDValue Shamt
= Op
.getOperand(2);
758 EVT VT
= Lo
.getValueType();
761 // if Shamt-XLEN < 0: // Shamt < XLEN
762 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
765 // Lo = Hi >>s (Shamt-XLEN);
766 // Hi = Hi >>s (XLEN-1)
769 // if Shamt-XLEN < 0: // Shamt < XLEN
770 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
773 // Lo = Hi >>u (Shamt-XLEN);
776 unsigned ShiftRightOp
= IsSRA
? ISD::SRA
: ISD::SRL
;
778 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
779 SDValue One
= DAG
.getConstant(1, DL
, VT
);
780 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
781 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
782 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
783 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
785 SDValue ShiftRightLo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, Shamt
);
786 SDValue ShiftLeftHi1
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, One
);
787 SDValue ShiftLeftHi
=
788 DAG
.getNode(ISD::SHL
, DL
, VT
, ShiftLeftHi1
, XLenMinus1Shamt
);
789 SDValue LoTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftRightLo
, ShiftLeftHi
);
790 SDValue HiTrue
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, Shamt
);
791 SDValue LoFalse
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, ShamtMinusXLen
);
793 IsSRA
? DAG
.getNode(ISD::SRA
, DL
, VT
, Hi
, XLenMinus1
) : Zero
;
795 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
797 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, LoFalse
);
798 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
800 SDValue Parts
[2] = {Lo
, Hi
};
801 return DAG
.getMergeValues(Parts
, DL
);
804 // Returns the opcode of the target-specific SDNode that implements the 32-bit
805 // form of the given Opcode.
806 static RISCVISD::NodeType
getRISCVWOpcode(unsigned Opcode
) {
809 llvm_unreachable("Unexpected opcode");
811 return RISCVISD::SLLW
;
813 return RISCVISD::SRAW
;
815 return RISCVISD::SRLW
;
817 return RISCVISD::DIVW
;
819 return RISCVISD::DIVUW
;
821 return RISCVISD::REMUW
;
825 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
826 // Because i32 isn't a legal type for RV64, these operations would otherwise
827 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
828 // later one because the fact the operation was originally of type i32 is
830 static SDValue
customLegalizeToWOp(SDNode
*N
, SelectionDAG
&DAG
) {
832 RISCVISD::NodeType WOpcode
= getRISCVWOpcode(N
->getOpcode());
833 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
834 SDValue NewOp1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
835 SDValue NewRes
= DAG
.getNode(WOpcode
, DL
, MVT::i64
, NewOp0
, NewOp1
);
836 // ReplaceNodeResults requires we maintain the same type for the return value.
837 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
);
840 // Converts the given 32-bit operation to a i64 operation with signed extension
841 // semantic to reduce the signed extension instructions.
842 static SDValue
customLegalizeToWOpWithSExt(SDNode
*N
, SelectionDAG
&DAG
) {
844 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
845 SDValue NewOp1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
846 SDValue NewWOp
= DAG
.getNode(N
->getOpcode(), DL
, MVT::i64
, NewOp0
, NewOp1
);
847 SDValue NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewWOp
,
848 DAG
.getValueType(MVT::i32
));
849 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
);
852 void RISCVTargetLowering::ReplaceNodeResults(SDNode
*N
,
853 SmallVectorImpl
<SDValue
> &Results
,
854 SelectionDAG
&DAG
) const {
856 switch (N
->getOpcode()) {
858 llvm_unreachable("Don't know how to custom type legalize this operation!");
859 case ISD::READCYCLECOUNTER
: {
860 assert(!Subtarget
.is64Bit() &&
861 "READCYCLECOUNTER only has custom type legalization on riscv32");
863 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
865 DAG
.getNode(RISCVISD::READ_CYCLE_WIDE
, DL
, VTs
, N
->getOperand(0));
867 Results
.push_back(RCW
);
868 Results
.push_back(RCW
.getValue(1));
869 Results
.push_back(RCW
.getValue(2));
875 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
876 "Unexpected custom legalisation");
877 if (N
->getOperand(1).getOpcode() == ISD::Constant
)
879 Results
.push_back(customLegalizeToWOpWithSExt(N
, DAG
));
884 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
885 "Unexpected custom legalisation");
886 if (N
->getOperand(1).getOpcode() == ISD::Constant
)
888 Results
.push_back(customLegalizeToWOp(N
, DAG
));
893 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
894 Subtarget
.hasStdExtM() && "Unexpected custom legalisation");
895 if (N
->getOperand(0).getOpcode() == ISD::Constant
||
896 N
->getOperand(1).getOpcode() == ISD::Constant
)
898 Results
.push_back(customLegalizeToWOp(N
, DAG
));
901 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
902 Subtarget
.hasStdExtF() && "Unexpected custom legalisation");
904 SDValue Op0
= N
->getOperand(0);
905 if (Op0
.getValueType() != MVT::f32
)
908 DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Op0
);
909 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, FPConv
));
915 SDValue
RISCVTargetLowering::PerformDAGCombine(SDNode
*N
,
916 DAGCombinerInfo
&DCI
) const {
917 SelectionDAG
&DAG
= DCI
.DAG
;
919 switch (N
->getOpcode()) {
922 case RISCVISD::SplitF64
: {
923 SDValue Op0
= N
->getOperand(0);
924 // If the input to SplitF64 is just BuildPairF64 then the operation is
925 // redundant. Instead, use BuildPairF64's operands directly.
926 if (Op0
->getOpcode() == RISCVISD::BuildPairF64
)
927 return DCI
.CombineTo(N
, Op0
.getOperand(0), Op0
.getOperand(1));
931 // It's cheaper to materialise two 32-bit integers than to load a double
932 // from the constant pool and transfer it to integer registers through the
934 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(Op0
)) {
935 APInt V
= C
->getValueAPF().bitcastToAPInt();
936 SDValue Lo
= DAG
.getConstant(V
.trunc(32), DL
, MVT::i32
);
937 SDValue Hi
= DAG
.getConstant(V
.lshr(32).trunc(32), DL
, MVT::i32
);
938 return DCI
.CombineTo(N
, Lo
, Hi
);
941 // This is a target-specific version of a DAGCombine performed in
942 // DAGCombiner::visitBITCAST. It performs the equivalent of:
943 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
944 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
945 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
946 !Op0
.getNode()->hasOneUse())
948 SDValue NewSplitF64
=
949 DAG
.getNode(RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
),
951 SDValue Lo
= NewSplitF64
.getValue(0);
952 SDValue Hi
= NewSplitF64
.getValue(1);
953 APInt SignBit
= APInt::getSignMask(32);
954 if (Op0
.getOpcode() == ISD::FNEG
) {
955 SDValue NewHi
= DAG
.getNode(ISD::XOR
, DL
, MVT::i32
, Hi
,
956 DAG
.getConstant(SignBit
, DL
, MVT::i32
));
957 return DCI
.CombineTo(N
, Lo
, NewHi
);
959 assert(Op0
.getOpcode() == ISD::FABS
);
960 SDValue NewHi
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, Hi
,
961 DAG
.getConstant(~SignBit
, DL
, MVT::i32
));
962 return DCI
.CombineTo(N
, Lo
, NewHi
);
966 case RISCVISD::SRLW
: {
967 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
968 SDValue LHS
= N
->getOperand(0);
969 SDValue RHS
= N
->getOperand(1);
970 APInt LHSMask
= APInt::getLowBitsSet(LHS
.getValueSizeInBits(), 32);
971 APInt RHSMask
= APInt::getLowBitsSet(RHS
.getValueSizeInBits(), 5);
972 if ((SimplifyDemandedBits(N
->getOperand(0), LHSMask
, DCI
)) ||
973 (SimplifyDemandedBits(N
->getOperand(1), RHSMask
, DCI
)))
977 case RISCVISD::FMV_X_ANYEXTW_RV64
: {
979 SDValue Op0
= N
->getOperand(0);
980 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
981 // conversion is unnecessary and can be replaced with an ANY_EXTEND
982 // of the FMV_W_X_RV64 operand.
983 if (Op0
->getOpcode() == RISCVISD::FMV_W_X_RV64
) {
985 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
.getOperand(0));
986 return DCI
.CombineTo(N
, AExtOp
);
989 // This is a target-specific version of a DAGCombine performed in
990 // DAGCombiner::visitBITCAST. It performs the equivalent of:
991 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
992 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
993 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
994 !Op0
.getNode()->hasOneUse())
996 SDValue NewFMV
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
,
998 APInt SignBit
= APInt::getSignMask(32).sext(64);
999 if (Op0
.getOpcode() == ISD::FNEG
) {
1000 return DCI
.CombineTo(N
,
1001 DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, NewFMV
,
1002 DAG
.getConstant(SignBit
, DL
, MVT::i64
)));
1004 assert(Op0
.getOpcode() == ISD::FABS
);
1005 return DCI
.CombineTo(N
,
1006 DAG
.getNode(ISD::AND
, DL
, MVT::i64
, NewFMV
,
1007 DAG
.getConstant(~SignBit
, DL
, MVT::i64
)));
1014 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1015 const SDNode
*N
, CombineLevel Level
) const {
1016 // The following folds are only desirable if `(OP _, c1 << c2)` can be
1017 // materialised in fewer instructions than `(OP _, c1)`:
1019 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1020 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1021 SDValue N0
= N
->getOperand(0);
1022 EVT Ty
= N0
.getValueType();
1023 if (Ty
.isScalarInteger() &&
1024 (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::OR
)) {
1025 auto *C1
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
1026 auto *C2
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
1028 APInt C1Int
= C1
->getAPIntValue();
1029 APInt ShiftedC1Int
= C1Int
<< C2
->getAPIntValue();
1031 // We can materialise `c1 << c2` into an add immediate, so it's "free",
1032 // and the combine should happen, to potentially allow further combines
1034 if (ShiftedC1Int
.getMinSignedBits() <= 64 &&
1035 isLegalAddImmediate(ShiftedC1Int
.getSExtValue()))
1038 // We can materialise `c1` in an add immediate, so it's "free", and the
1039 // combine should be prevented.
1040 if (C1Int
.getMinSignedBits() <= 64 &&
1041 isLegalAddImmediate(C1Int
.getSExtValue()))
1044 // Neither constant will fit into an immediate, so find materialisation
1046 int C1Cost
= RISCVMatInt::getIntMatCost(C1Int
, Ty
.getSizeInBits(),
1047 Subtarget
.is64Bit());
1048 int ShiftedC1Cost
= RISCVMatInt::getIntMatCost(
1049 ShiftedC1Int
, Ty
.getSizeInBits(), Subtarget
.is64Bit());
1051 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
1052 // combine should be prevented.
1053 if (C1Cost
< ShiftedC1Cost
)
1060 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
1061 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
1062 unsigned Depth
) const {
1063 switch (Op
.getOpcode()) {
1066 case RISCVISD::SLLW
:
1067 case RISCVISD::SRAW
:
1068 case RISCVISD::SRLW
:
1069 case RISCVISD::DIVW
:
1070 case RISCVISD::DIVUW
:
1071 case RISCVISD::REMUW
:
1072 // TODO: As the result is sign-extended, this is conservatively correct. A
1073 // more precise answer could be calculated for SRAW depending on known
1074 // bits in the shift amount.
1081 static MachineBasicBlock
*emitReadCycleWidePseudo(MachineInstr
&MI
,
1082 MachineBasicBlock
*BB
) {
1083 assert(MI
.getOpcode() == RISCV::ReadCycleWide
&& "Unexpected instruction");
1085 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
1086 // Should the count have wrapped while it was being read, we need to try
1090 // rdcycleh x3 # load high word of cycle
1091 // rdcycle x2 # load low word of cycle
1092 // rdcycleh x4 # load high word of cycle
1093 // bne x3, x4, read # check if high word reads match, otherwise try again
1096 MachineFunction
&MF
= *BB
->getParent();
1097 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
1098 MachineFunction::iterator It
= ++BB
->getIterator();
1100 MachineBasicBlock
*LoopMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
1101 MF
.insert(It
, LoopMBB
);
1103 MachineBasicBlock
*DoneMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
1104 MF
.insert(It
, DoneMBB
);
1106 // Transfer the remainder of BB and its successor edges to DoneMBB.
1107 DoneMBB
->splice(DoneMBB
->begin(), BB
,
1108 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
1109 DoneMBB
->transferSuccessorsAndUpdatePHIs(BB
);
1111 BB
->addSuccessor(LoopMBB
);
1113 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1114 Register ReadAgainReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
1115 Register LoReg
= MI
.getOperand(0).getReg();
1116 Register HiReg
= MI
.getOperand(1).getReg();
1117 DebugLoc DL
= MI
.getDebugLoc();
1119 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
1120 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), HiReg
)
1121 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
1123 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), LoReg
)
1124 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding
)
1126 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), ReadAgainReg
)
1127 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
1130 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::BNE
))
1132 .addReg(ReadAgainReg
)
1135 LoopMBB
->addSuccessor(LoopMBB
);
1136 LoopMBB
->addSuccessor(DoneMBB
);
1138 MI
.eraseFromParent();
1143 static MachineBasicBlock
*emitSplitF64Pseudo(MachineInstr
&MI
,
1144 MachineBasicBlock
*BB
) {
1145 assert(MI
.getOpcode() == RISCV::SplitF64Pseudo
&& "Unexpected instruction");
1147 MachineFunction
&MF
= *BB
->getParent();
1148 DebugLoc DL
= MI
.getDebugLoc();
1149 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1150 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
1151 Register LoReg
= MI
.getOperand(0).getReg();
1152 Register HiReg
= MI
.getOperand(1).getReg();
1153 Register SrcReg
= MI
.getOperand(2).getReg();
1154 const TargetRegisterClass
*SrcRC
= &RISCV::FPR64RegClass
;
1155 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex();
1157 TII
.storeRegToStackSlot(*BB
, MI
, SrcReg
, MI
.getOperand(2).isKill(), FI
, SrcRC
,
1159 MachineMemOperand
*MMO
=
1160 MF
.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF
, FI
),
1161 MachineMemOperand::MOLoad
, 8, 8);
1162 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), LoReg
)
1165 .addMemOperand(MMO
);
1166 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), HiReg
)
1169 .addMemOperand(MMO
);
1170 MI
.eraseFromParent(); // The pseudo instruction is gone now.
1174 static MachineBasicBlock
*emitBuildPairF64Pseudo(MachineInstr
&MI
,
1175 MachineBasicBlock
*BB
) {
1176 assert(MI
.getOpcode() == RISCV::BuildPairF64Pseudo
&&
1177 "Unexpected instruction");
1179 MachineFunction
&MF
= *BB
->getParent();
1180 DebugLoc DL
= MI
.getDebugLoc();
1181 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1182 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
1183 Register DstReg
= MI
.getOperand(0).getReg();
1184 Register LoReg
= MI
.getOperand(1).getReg();
1185 Register HiReg
= MI
.getOperand(2).getReg();
1186 const TargetRegisterClass
*DstRC
= &RISCV::FPR64RegClass
;
1187 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex();
1189 MachineMemOperand
*MMO
=
1190 MF
.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF
, FI
),
1191 MachineMemOperand::MOStore
, 8, 8);
1192 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
1193 .addReg(LoReg
, getKillRegState(MI
.getOperand(1).isKill()))
1196 .addMemOperand(MMO
);
1197 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
1198 .addReg(HiReg
, getKillRegState(MI
.getOperand(2).isKill()))
1201 .addMemOperand(MMO
);
1202 TII
.loadRegFromStackSlot(*BB
, MI
, DstReg
, FI
, DstRC
, RI
);
1203 MI
.eraseFromParent(); // The pseudo instruction is gone now.
1207 static bool isSelectPseudo(MachineInstr
&MI
) {
1208 switch (MI
.getOpcode()) {
1211 case RISCV::Select_GPR_Using_CC_GPR
:
1212 case RISCV::Select_FPR32_Using_CC_GPR
:
1213 case RISCV::Select_FPR64_Using_CC_GPR
:
1218 static MachineBasicBlock
*emitSelectPseudo(MachineInstr
&MI
,
1219 MachineBasicBlock
*BB
) {
1220 // To "insert" Select_* instructions, we actually have to insert the triangle
1221 // control-flow pattern. The incoming instructions know the destination vreg
1222 // to set, the condition code register to branch on, the true/false values to
1223 // select between, and the condcode to use to select the appropriate branch.
1225 // We produce the following control flow:
1232 // When we find a sequence of selects we attempt to optimize their emission
1233 // by sharing the control flow. Currently we only handle cases where we have
1234 // multiple selects with the exact same condition (same LHS, RHS and CC).
1235 // The selects may be interleaved with other instructions if the other
1236 // instructions meet some requirements we deem safe:
1237 // - They are debug instructions. Otherwise,
1238 // - They do not have side-effects, do not access memory and their inputs do
1239 // not depend on the results of the select pseudo-instructions.
1240 // The TrueV/FalseV operands of the selects cannot depend on the result of
1241 // previous selects in the sequence.
1242 // These conditions could be further relaxed. See the X86 target for a
1243 // related approach and more information.
1244 Register LHS
= MI
.getOperand(1).getReg();
1245 Register RHS
= MI
.getOperand(2).getReg();
1246 auto CC
= static_cast<ISD::CondCode
>(MI
.getOperand(3).getImm());
1248 SmallVector
<MachineInstr
*, 4> SelectDebugValues
;
1249 SmallSet
<Register
, 4> SelectDests
;
1250 SelectDests
.insert(MI
.getOperand(0).getReg());
1252 MachineInstr
*LastSelectPseudo
= &MI
;
1254 for (auto E
= BB
->end(), SequenceMBBI
= MachineBasicBlock::iterator(MI
);
1255 SequenceMBBI
!= E
; ++SequenceMBBI
) {
1256 if (SequenceMBBI
->isDebugInstr())
1258 else if (isSelectPseudo(*SequenceMBBI
)) {
1259 if (SequenceMBBI
->getOperand(1).getReg() != LHS
||
1260 SequenceMBBI
->getOperand(2).getReg() != RHS
||
1261 SequenceMBBI
->getOperand(3).getImm() != CC
||
1262 SelectDests
.count(SequenceMBBI
->getOperand(4).getReg()) ||
1263 SelectDests
.count(SequenceMBBI
->getOperand(5).getReg()))
1265 LastSelectPseudo
= &*SequenceMBBI
;
1266 SequenceMBBI
->collectDebugValues(SelectDebugValues
);
1267 SelectDests
.insert(SequenceMBBI
->getOperand(0).getReg());
1269 if (SequenceMBBI
->hasUnmodeledSideEffects() ||
1270 SequenceMBBI
->mayLoadOrStore())
1272 if (llvm::any_of(SequenceMBBI
->operands(), [&](MachineOperand
&MO
) {
1273 return MO
.isReg() && MO
.isUse() && SelectDests
.count(MO
.getReg());
1279 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
1280 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
1281 DebugLoc DL
= MI
.getDebugLoc();
1282 MachineFunction::iterator I
= ++BB
->getIterator();
1284 MachineBasicBlock
*HeadMBB
= BB
;
1285 MachineFunction
*F
= BB
->getParent();
1286 MachineBasicBlock
*TailMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
1287 MachineBasicBlock
*IfFalseMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
1289 F
->insert(I
, IfFalseMBB
);
1290 F
->insert(I
, TailMBB
);
1292 // Transfer debug instructions associated with the selects to TailMBB.
1293 for (MachineInstr
*DebugInstr
: SelectDebugValues
) {
1294 TailMBB
->push_back(DebugInstr
->removeFromParent());
1297 // Move all instructions after the sequence to TailMBB.
1298 TailMBB
->splice(TailMBB
->end(), HeadMBB
,
1299 std::next(LastSelectPseudo
->getIterator()), HeadMBB
->end());
1300 // Update machine-CFG edges by transferring all successors of the current
1301 // block to the new block which will contain the Phi nodes for the selects.
1302 TailMBB
->transferSuccessorsAndUpdatePHIs(HeadMBB
);
1303 // Set the successors for HeadMBB.
1304 HeadMBB
->addSuccessor(IfFalseMBB
);
1305 HeadMBB
->addSuccessor(TailMBB
);
1307 // Insert appropriate branch.
1308 unsigned Opcode
= getBranchOpcodeForIntCondCode(CC
);
1310 BuildMI(HeadMBB
, DL
, TII
.get(Opcode
))
1315 // IfFalseMBB just falls through to TailMBB.
1316 IfFalseMBB
->addSuccessor(TailMBB
);
1318 // Create PHIs for all of the select pseudo-instructions.
1319 auto SelectMBBI
= MI
.getIterator();
1320 auto SelectEnd
= std::next(LastSelectPseudo
->getIterator());
1321 auto InsertionPoint
= TailMBB
->begin();
1322 while (SelectMBBI
!= SelectEnd
) {
1323 auto Next
= std::next(SelectMBBI
);
1324 if (isSelectPseudo(*SelectMBBI
)) {
1325 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
1326 BuildMI(*TailMBB
, InsertionPoint
, SelectMBBI
->getDebugLoc(),
1327 TII
.get(RISCV::PHI
), SelectMBBI
->getOperand(0).getReg())
1328 .addReg(SelectMBBI
->getOperand(4).getReg())
1330 .addReg(SelectMBBI
->getOperand(5).getReg())
1331 .addMBB(IfFalseMBB
);
1332 SelectMBBI
->eraseFromParent();
1337 F
->getProperties().reset(MachineFunctionProperties::Property::NoPHIs
);
1342 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
1343 MachineBasicBlock
*BB
) const {
1344 switch (MI
.getOpcode()) {
1346 llvm_unreachable("Unexpected instr type to insert");
1347 case RISCV::ReadCycleWide
:
1348 assert(!Subtarget
.is64Bit() &&
1349 "ReadCycleWrite is only to be used on riscv32");
1350 return emitReadCycleWidePseudo(MI
, BB
);
1351 case RISCV::Select_GPR_Using_CC_GPR
:
1352 case RISCV::Select_FPR32_Using_CC_GPR
:
1353 case RISCV::Select_FPR64_Using_CC_GPR
:
1354 return emitSelectPseudo(MI
, BB
);
1355 case RISCV::BuildPairF64Pseudo
:
1356 return emitBuildPairF64Pseudo(MI
, BB
);
1357 case RISCV::SplitF64Pseudo
:
1358 return emitSplitF64Pseudo(MI
, BB
);
1362 // Calling Convention Implementation.
1363 // The expectations for frontend ABI lowering vary from target to target.
1364 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
1365 // details, but this is a longer term goal. For now, we simply try to keep the
1366 // role of the frontend as simple and well-defined as possible. The rules can
1367 // be summarised as:
1368 // * Never split up large scalar arguments. We handle them here.
1369 // * If a hardfloat calling convention is being used, and the struct may be
1370 // passed in a pair of registers (fp+fp, int+fp), and both registers are
1371 // available, then pass as two separate arguments. If either the GPRs or FPRs
1372 // are exhausted, then pass according to the rule below.
1373 // * If a struct could never be passed in registers or directly in a stack
1374 // slot (as it is larger than 2*XLEN and the floating point rules don't
1375 // apply), then pass it using a pointer with the byval attribute.
1376 // * If a struct is less than 2*XLEN, then coerce to either a two-element
1377 // word-sized array or a 2*XLEN scalar (depending on alignment).
1378 // * The frontend can determine whether a struct is returned by reference or
1379 // not based on its size and fields. If it will be returned by reference, the
1380 // frontend must modify the prototype so a pointer with the sret annotation is
1381 // passed as the first argument. This is not necessary for large scalar
1383 // * Struct return values and varargs should be coerced to structs containing
1384 // register-size fields in the same situations they would be for fixed
1387 static const MCPhysReg ArgGPRs
[] = {
1388 RISCV::X10
, RISCV::X11
, RISCV::X12
, RISCV::X13
,
1389 RISCV::X14
, RISCV::X15
, RISCV::X16
, RISCV::X17
1391 static const MCPhysReg ArgFPR32s
[] = {
1392 RISCV::F10_F
, RISCV::F11_F
, RISCV::F12_F
, RISCV::F13_F
,
1393 RISCV::F14_F
, RISCV::F15_F
, RISCV::F16_F
, RISCV::F17_F
1395 static const MCPhysReg ArgFPR64s
[] = {
1396 RISCV::F10_D
, RISCV::F11_D
, RISCV::F12_D
, RISCV::F13_D
,
1397 RISCV::F14_D
, RISCV::F15_D
, RISCV::F16_D
, RISCV::F17_D
1400 // Pass a 2*XLEN argument that has been split into two XLEN values through
1401 // registers or the stack as necessary.
1402 static bool CC_RISCVAssign2XLen(unsigned XLen
, CCState
&State
, CCValAssign VA1
,
1403 ISD::ArgFlagsTy ArgFlags1
, unsigned ValNo2
,
1404 MVT ValVT2
, MVT LocVT2
,
1405 ISD::ArgFlagsTy ArgFlags2
) {
1406 unsigned XLenInBytes
= XLen
/ 8;
1407 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
1408 // At least one half can be passed via register.
1409 State
.addLoc(CCValAssign::getReg(VA1
.getValNo(), VA1
.getValVT(), Reg
,
1410 VA1
.getLocVT(), CCValAssign::Full
));
1412 // Both halves must be passed on the stack, with proper alignment.
1413 unsigned StackAlign
= std::max(XLenInBytes
, ArgFlags1
.getOrigAlign());
1415 CCValAssign::getMem(VA1
.getValNo(), VA1
.getValVT(),
1416 State
.AllocateStack(XLenInBytes
, StackAlign
),
1417 VA1
.getLocVT(), CCValAssign::Full
));
1418 State
.addLoc(CCValAssign::getMem(
1419 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, XLenInBytes
), LocVT2
,
1420 CCValAssign::Full
));
1424 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
1425 // The second half can also be passed via register.
1427 CCValAssign::getReg(ValNo2
, ValVT2
, Reg
, LocVT2
, CCValAssign::Full
));
1429 // The second half is passed via the stack, without additional alignment.
1430 State
.addLoc(CCValAssign::getMem(
1431 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, XLenInBytes
), LocVT2
,
1432 CCValAssign::Full
));
1438 // Implements the RISC-V calling convention. Returns true upon failure.
1439 static bool CC_RISCV(const DataLayout
&DL
, RISCVABI::ABI ABI
, unsigned ValNo
,
1440 MVT ValVT
, MVT LocVT
, CCValAssign::LocInfo LocInfo
,
1441 ISD::ArgFlagsTy ArgFlags
, CCState
&State
, bool IsFixed
,
1442 bool IsRet
, Type
*OrigTy
) {
1443 unsigned XLen
= DL
.getLargestLegalIntTypeSizeInBits();
1444 assert(XLen
== 32 || XLen
== 64);
1445 MVT XLenVT
= XLen
== 32 ? MVT::i32
: MVT::i64
;
1447 // Any return value split in to more than two values can't be returned
1449 if (IsRet
&& ValNo
> 1)
1452 // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
1453 // variadic argument, or if no F32 argument registers are available.
1454 bool UseGPRForF32
= true;
1455 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
1456 // variadic argument, or if no F64 argument registers are available.
1457 bool UseGPRForF64
= true;
1461 llvm_unreachable("Unexpected ABI");
1462 case RISCVABI::ABI_ILP32
:
1463 case RISCVABI::ABI_LP64
:
1465 case RISCVABI::ABI_ILP32F
:
1466 case RISCVABI::ABI_LP64F
:
1467 UseGPRForF32
= !IsFixed
;
1469 case RISCVABI::ABI_ILP32D
:
1470 case RISCVABI::ABI_LP64D
:
1471 UseGPRForF32
= !IsFixed
;
1472 UseGPRForF64
= !IsFixed
;
1476 if (State
.getFirstUnallocated(ArgFPR32s
) == array_lengthof(ArgFPR32s
))
1477 UseGPRForF32
= true;
1478 if (State
.getFirstUnallocated(ArgFPR64s
) == array_lengthof(ArgFPR64s
))
1479 UseGPRForF64
= true;
1481 // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
1482 // variables rather than directly checking against the target ABI.
1484 if (UseGPRForF32
&& ValVT
== MVT::f32
) {
1486 LocInfo
= CCValAssign::BCvt
;
1487 } else if (UseGPRForF64
&& XLen
== 64 && ValVT
== MVT::f64
) {
1489 LocInfo
= CCValAssign::BCvt
;
1492 // If this is a variadic argument, the RISC-V calling convention requires
1493 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
1494 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
1495 // be used regardless of whether the original argument was split during
1496 // legalisation or not. The argument will not be passed by registers if the
1497 // original type is larger than 2*XLEN, so the register alignment rule does
1499 unsigned TwoXLenInBytes
= (2 * XLen
) / 8;
1500 if (!IsFixed
&& ArgFlags
.getOrigAlign() == TwoXLenInBytes
&&
1501 DL
.getTypeAllocSize(OrigTy
) == TwoXLenInBytes
) {
1502 unsigned RegIdx
= State
.getFirstUnallocated(ArgGPRs
);
1503 // Skip 'odd' register if necessary.
1504 if (RegIdx
!= array_lengthof(ArgGPRs
) && RegIdx
% 2 == 1)
1505 State
.AllocateReg(ArgGPRs
);
1508 SmallVectorImpl
<CCValAssign
> &PendingLocs
= State
.getPendingLocs();
1509 SmallVectorImpl
<ISD::ArgFlagsTy
> &PendingArgFlags
=
1510 State
.getPendingArgFlags();
1512 assert(PendingLocs
.size() == PendingArgFlags
.size() &&
1513 "PendingLocs and PendingArgFlags out of sync");
1515 // Handle passing f64 on RV32D with a soft float ABI or when floating point
1516 // registers are exhausted.
1517 if (UseGPRForF64
&& XLen
== 32 && ValVT
== MVT::f64
) {
1518 assert(!ArgFlags
.isSplit() && PendingLocs
.empty() &&
1519 "Can't lower f64 if it is split");
1520 // Depending on available argument GPRS, f64 may be passed in a pair of
1521 // GPRs, split between a GPR and the stack, or passed completely on the
1522 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
1524 Register Reg
= State
.AllocateReg(ArgGPRs
);
1527 unsigned StackOffset
= State
.AllocateStack(8, 8);
1529 CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
1532 if (!State
.AllocateReg(ArgGPRs
))
1533 State
.AllocateStack(4, 4);
1534 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
1538 // Split arguments might be passed indirectly, so keep track of the pending
1540 if (ArgFlags
.isSplit() || !PendingLocs
.empty()) {
1542 LocInfo
= CCValAssign::Indirect
;
1543 PendingLocs
.push_back(
1544 CCValAssign::getPending(ValNo
, ValVT
, LocVT
, LocInfo
));
1545 PendingArgFlags
.push_back(ArgFlags
);
1546 if (!ArgFlags
.isSplitEnd()) {
1551 // If the split argument only had two elements, it should be passed directly
1552 // in registers or on the stack.
1553 if (ArgFlags
.isSplitEnd() && PendingLocs
.size() <= 2) {
1554 assert(PendingLocs
.size() == 2 && "Unexpected PendingLocs.size()");
1555 // Apply the normal calling convention rules to the first half of the
1557 CCValAssign VA
= PendingLocs
[0];
1558 ISD::ArgFlagsTy AF
= PendingArgFlags
[0];
1559 PendingLocs
.clear();
1560 PendingArgFlags
.clear();
1561 return CC_RISCVAssign2XLen(XLen
, State
, VA
, AF
, ValNo
, ValVT
, LocVT
,
1565 // Allocate to a register if possible, or else a stack slot.
1567 if (ValVT
== MVT::f32
&& !UseGPRForF32
)
1568 Reg
= State
.AllocateReg(ArgFPR32s
, ArgFPR64s
);
1569 else if (ValVT
== MVT::f64
&& !UseGPRForF64
)
1570 Reg
= State
.AllocateReg(ArgFPR64s
, ArgFPR32s
);
1572 Reg
= State
.AllocateReg(ArgGPRs
);
1573 unsigned StackOffset
= Reg
? 0 : State
.AllocateStack(XLen
/ 8, XLen
/ 8);
1575 // If we reach this point and PendingLocs is non-empty, we must be at the
1576 // end of a split argument that must be passed indirectly.
1577 if (!PendingLocs
.empty()) {
1578 assert(ArgFlags
.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1579 assert(PendingLocs
.size() > 2 && "Unexpected PendingLocs.size()");
1581 for (auto &It
: PendingLocs
) {
1583 It
.convertToReg(Reg
);
1585 It
.convertToMem(StackOffset
);
1588 PendingLocs
.clear();
1589 PendingArgFlags
.clear();
1593 assert((!UseGPRForF32
|| !UseGPRForF64
|| LocVT
== XLenVT
) &&
1594 "Expected an XLenVT at this stage");
1597 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
1601 // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1602 if (ValVT
== MVT::f32
|| ValVT
== MVT::f64
) {
1604 LocInfo
= CCValAssign::Full
;
1606 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
1610 void RISCVTargetLowering::analyzeInputArgs(
1611 MachineFunction
&MF
, CCState
&CCInfo
,
1612 const SmallVectorImpl
<ISD::InputArg
> &Ins
, bool IsRet
) const {
1613 unsigned NumArgs
= Ins
.size();
1614 FunctionType
*FType
= MF
.getFunction().getFunctionType();
1616 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
1617 MVT ArgVT
= Ins
[i
].VT
;
1618 ISD::ArgFlagsTy ArgFlags
= Ins
[i
].Flags
;
1620 Type
*ArgTy
= nullptr;
1622 ArgTy
= FType
->getReturnType();
1623 else if (Ins
[i
].isOrigArg())
1624 ArgTy
= FType
->getParamType(Ins
[i
].getOrigArgIndex());
1626 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
1627 if (CC_RISCV(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
1628 ArgFlags
, CCInfo
, /*IsRet=*/true, IsRet
, ArgTy
)) {
1629 LLVM_DEBUG(dbgs() << "InputArg #" << i
<< " has unhandled type "
1630 << EVT(ArgVT
).getEVTString() << '\n');
1631 llvm_unreachable(nullptr);
1636 void RISCVTargetLowering::analyzeOutputArgs(
1637 MachineFunction
&MF
, CCState
&CCInfo
,
1638 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, bool IsRet
,
1639 CallLoweringInfo
*CLI
) const {
1640 unsigned NumArgs
= Outs
.size();
1642 for (unsigned i
= 0; i
!= NumArgs
; i
++) {
1643 MVT ArgVT
= Outs
[i
].VT
;
1644 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
1645 Type
*OrigTy
= CLI
? CLI
->getArgs()[Outs
[i
].OrigArgIndex
].Ty
: nullptr;
1647 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
1648 if (CC_RISCV(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
1649 ArgFlags
, CCInfo
, Outs
[i
].IsFixed
, IsRet
, OrigTy
)) {
1650 LLVM_DEBUG(dbgs() << "OutputArg #" << i
<< " has unhandled type "
1651 << EVT(ArgVT
).getEVTString() << "\n");
1652 llvm_unreachable(nullptr);
1657 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1659 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, SDValue Val
,
1660 const CCValAssign
&VA
, const SDLoc
&DL
) {
1661 switch (VA
.getLocInfo()) {
1663 llvm_unreachable("Unexpected CCValAssign::LocInfo");
1664 case CCValAssign::Full
:
1666 case CCValAssign::BCvt
:
1667 if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
) {
1668 Val
= DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, Val
);
1671 Val
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Val
);
1677 // The caller is responsible for loading the full value if the argument is
1678 // passed with CCValAssign::Indirect.
1679 static SDValue
unpackFromRegLoc(SelectionDAG
&DAG
, SDValue Chain
,
1680 const CCValAssign
&VA
, const SDLoc
&DL
) {
1681 MachineFunction
&MF
= DAG
.getMachineFunction();
1682 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1683 EVT LocVT
= VA
.getLocVT();
1685 const TargetRegisterClass
*RC
;
1687 switch (LocVT
.getSimpleVT().SimpleTy
) {
1689 llvm_unreachable("Unexpected register type");
1692 RC
= &RISCV::GPRRegClass
;
1695 RC
= &RISCV::FPR32RegClass
;
1698 RC
= &RISCV::FPR64RegClass
;
1702 Register VReg
= RegInfo
.createVirtualRegister(RC
);
1703 RegInfo
.addLiveIn(VA
.getLocReg(), VReg
);
1704 Val
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
1706 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1709 return convertLocVTToValVT(DAG
, Val
, VA
, DL
);
1712 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, SDValue Val
,
1713 const CCValAssign
&VA
, const SDLoc
&DL
) {
1714 EVT LocVT
= VA
.getLocVT();
1716 switch (VA
.getLocInfo()) {
1718 llvm_unreachable("Unexpected CCValAssign::LocInfo");
1719 case CCValAssign::Full
:
1721 case CCValAssign::BCvt
:
1722 if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
) {
1723 Val
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Val
);
1726 Val
= DAG
.getNode(ISD::BITCAST
, DL
, LocVT
, Val
);
1732 // The caller is responsible for loading the full value if the argument is
1733 // passed with CCValAssign::Indirect.
1734 static SDValue
unpackFromMemLoc(SelectionDAG
&DAG
, SDValue Chain
,
1735 const CCValAssign
&VA
, const SDLoc
&DL
) {
1736 MachineFunction
&MF
= DAG
.getMachineFunction();
1737 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1738 EVT LocVT
= VA
.getLocVT();
1739 EVT ValVT
= VA
.getValVT();
1740 EVT PtrVT
= MVT::getIntegerVT(DAG
.getDataLayout().getPointerSizeInBits(0));
1741 int FI
= MFI
.CreateFixedObject(ValVT
.getSizeInBits() / 8,
1742 VA
.getLocMemOffset(), /*Immutable=*/true);
1743 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1746 ISD::LoadExtType ExtType
;
1747 switch (VA
.getLocInfo()) {
1749 llvm_unreachable("Unexpected CCValAssign::LocInfo");
1750 case CCValAssign::Full
:
1751 case CCValAssign::Indirect
:
1752 case CCValAssign::BCvt
:
1753 ExtType
= ISD::NON_EXTLOAD
;
1756 Val
= DAG
.getExtLoad(
1757 ExtType
, DL
, LocVT
, Chain
, FIN
,
1758 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
), ValVT
);
1762 static SDValue
unpackF64OnRV32DSoftABI(SelectionDAG
&DAG
, SDValue Chain
,
1763 const CCValAssign
&VA
, const SDLoc
&DL
) {
1764 assert(VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
&&
1766 MachineFunction
&MF
= DAG
.getMachineFunction();
1767 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1768 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1770 if (VA
.isMemLoc()) {
1771 // f64 is passed on the stack.
1772 int FI
= MFI
.CreateFixedObject(8, VA
.getLocMemOffset(), /*Immutable=*/true);
1773 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
1774 return DAG
.getLoad(MVT::f64
, DL
, Chain
, FIN
,
1775 MachinePointerInfo::getFixedStack(MF
, FI
));
1778 assert(VA
.isRegLoc() && "Expected register VA assignment");
1780 Register LoVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
1781 RegInfo
.addLiveIn(VA
.getLocReg(), LoVReg
);
1782 SDValue Lo
= DAG
.getCopyFromReg(Chain
, DL
, LoVReg
, MVT::i32
);
1784 if (VA
.getLocReg() == RISCV::X17
) {
1785 // Second half of f64 is passed on the stack.
1786 int FI
= MFI
.CreateFixedObject(4, 0, /*Immutable=*/true);
1787 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
1788 Hi
= DAG
.getLoad(MVT::i32
, DL
, Chain
, FIN
,
1789 MachinePointerInfo::getFixedStack(MF
, FI
));
1791 // Second half of f64 is passed in another GPR.
1792 Register HiVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
1793 RegInfo
.addLiveIn(VA
.getLocReg() + 1, HiVReg
);
1794 Hi
= DAG
.getCopyFromReg(Chain
, DL
, HiVReg
, MVT::i32
);
1796 return DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
1799 // Transform physical registers into virtual registers.
1800 SDValue
RISCVTargetLowering::LowerFormalArguments(
1801 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
1802 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1803 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1807 report_fatal_error("Unsupported calling convention");
1808 case CallingConv::C
:
1809 case CallingConv::Fast
:
1813 MachineFunction
&MF
= DAG
.getMachineFunction();
1815 const Function
&Func
= MF
.getFunction();
1816 if (Func
.hasFnAttribute("interrupt")) {
1817 if (!Func
.arg_empty())
1819 "Functions with the interrupt attribute cannot have arguments!");
1822 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
1824 if (!(Kind
== "user" || Kind
== "supervisor" || Kind
== "machine"))
1826 "Function interrupt attribute argument not supported!");
1829 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1830 MVT XLenVT
= Subtarget
.getXLenVT();
1831 unsigned XLenInBytes
= Subtarget
.getXLen() / 8;
1832 // Used with vargs to acumulate store chains.
1833 std::vector
<SDValue
> OutChains
;
1835 // Assign locations to all of the incoming arguments.
1836 SmallVector
<CCValAssign
, 16> ArgLocs
;
1837 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1838 analyzeInputArgs(MF
, CCInfo
, Ins
, /*IsRet=*/false);
1840 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1841 CCValAssign
&VA
= ArgLocs
[i
];
1843 // Passing f64 on RV32D with a soft float ABI must be handled as a special
1845 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
)
1846 ArgValue
= unpackF64OnRV32DSoftABI(DAG
, Chain
, VA
, DL
);
1847 else if (VA
.isRegLoc())
1848 ArgValue
= unpackFromRegLoc(DAG
, Chain
, VA
, DL
);
1850 ArgValue
= unpackFromMemLoc(DAG
, Chain
, VA
, DL
);
1852 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1853 // If the original argument was split and passed by reference (e.g. i128
1854 // on RV32), we need to load all parts of it here (using the same
1856 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
1857 MachinePointerInfo()));
1858 unsigned ArgIndex
= Ins
[i
].OrigArgIndex
;
1859 assert(Ins
[i
].PartOffset
== 0);
1860 while (i
+ 1 != e
&& Ins
[i
+ 1].OrigArgIndex
== ArgIndex
) {
1861 CCValAssign
&PartVA
= ArgLocs
[i
+ 1];
1862 unsigned PartOffset
= Ins
[i
+ 1].PartOffset
;
1863 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
,
1864 DAG
.getIntPtrConstant(PartOffset
, DL
));
1865 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
1866 MachinePointerInfo()));
1871 InVals
.push_back(ArgValue
);
1875 ArrayRef
<MCPhysReg
> ArgRegs
= makeArrayRef(ArgGPRs
);
1876 unsigned Idx
= CCInfo
.getFirstUnallocated(ArgRegs
);
1877 const TargetRegisterClass
*RC
= &RISCV::GPRRegClass
;
1878 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1879 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1880 RISCVMachineFunctionInfo
*RVFI
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
1882 // Offset of the first variable argument from stack pointer, and size of
1883 // the vararg save area. For now, the varargs save area is either zero or
1884 // large enough to hold a0-a7.
1885 int VaArgOffset
, VarArgsSaveSize
;
1887 // If all registers are allocated, then all varargs must be passed on the
1888 // stack and we don't need to save any argregs.
1889 if (ArgRegs
.size() == Idx
) {
1890 VaArgOffset
= CCInfo
.getNextStackOffset();
1891 VarArgsSaveSize
= 0;
1893 VarArgsSaveSize
= XLenInBytes
* (ArgRegs
.size() - Idx
);
1894 VaArgOffset
= -VarArgsSaveSize
;
1897 // Record the frame index of the first variable argument
1898 // which is a value necessary to VASTART.
1899 int FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
, true);
1900 RVFI
->setVarArgsFrameIndex(FI
);
1902 // If saving an odd number of registers then create an extra stack slot to
1903 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1904 // offsets to even-numbered registered remain 2*XLEN-aligned.
1906 MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
- (int)XLenInBytes
, true);
1907 VarArgsSaveSize
+= XLenInBytes
;
1910 // Copy the integer registers that may have been used for passing varargs
1911 // to the vararg save area.
1912 for (unsigned I
= Idx
; I
< ArgRegs
.size();
1913 ++I
, VaArgOffset
+= XLenInBytes
) {
1914 const Register Reg
= RegInfo
.createVirtualRegister(RC
);
1915 RegInfo
.addLiveIn(ArgRegs
[I
], Reg
);
1916 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, XLenVT
);
1917 FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
, true);
1918 SDValue PtrOff
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
1919 SDValue Store
= DAG
.getStore(Chain
, DL
, ArgValue
, PtrOff
,
1920 MachinePointerInfo::getFixedStack(MF
, FI
));
1921 cast
<StoreSDNode
>(Store
.getNode())
1923 ->setValue((Value
*)nullptr);
1924 OutChains
.push_back(Store
);
1926 RVFI
->setVarArgsSaveSize(VarArgsSaveSize
);
1929 // All stores are grouped in one node to allow the matching between
1930 // the size of Ins and InVals. This only happens for vararg functions.
1931 if (!OutChains
.empty()) {
1932 OutChains
.push_back(Chain
);
1933 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1939 /// isEligibleForTailCallOptimization - Check whether the call is eligible
1940 /// for tail call optimization.
1941 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1942 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
1943 CCState
&CCInfo
, CallLoweringInfo
&CLI
, MachineFunction
&MF
,
1944 const SmallVector
<CCValAssign
, 16> &ArgLocs
) const {
1946 auto &Callee
= CLI
.Callee
;
1947 auto CalleeCC
= CLI
.CallConv
;
1948 auto &Outs
= CLI
.Outs
;
1949 auto &Caller
= MF
.getFunction();
1950 auto CallerCC
= Caller
.getCallingConv();
1952 // Do not tail call opt functions with "disable-tail-calls" attribute.
1953 if (Caller
.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1956 // Exception-handling functions need a special set of instructions to
1957 // indicate a return to the hardware. Tail-calling another function would
1958 // probably break this.
1959 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1960 // should be expanded as new function attributes are introduced.
1961 if (Caller
.hasFnAttribute("interrupt"))
1964 // Do not tail call opt if the stack is used to pass parameters.
1965 if (CCInfo
.getNextStackOffset() != 0)
1968 // Do not tail call opt if any parameters need to be passed indirectly.
1969 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1970 // passed indirectly. So the address of the value will be passed in a
1971 // register, or if not available, then the address is put on the stack. In
1972 // order to pass indirectly, space on the stack often needs to be allocated
1973 // in order to store the value. In this case the CCInfo.getNextStackOffset()
1974 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1975 // are passed CCValAssign::Indirect.
1976 for (auto &VA
: ArgLocs
)
1977 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1980 // Do not tail call opt if either caller or callee uses struct return
1982 auto IsCallerStructRet
= Caller
.hasStructRetAttr();
1983 auto IsCalleeStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
1984 if (IsCallerStructRet
|| IsCalleeStructRet
)
1987 // Externally-defined functions with weak linkage should not be
1988 // tail-called. The behaviour of branch instructions in this situation (as
1989 // used for tail calls) is implementation-defined, so we cannot rely on the
1990 // linker replacing the tail call with a return.
1991 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1992 const GlobalValue
*GV
= G
->getGlobal();
1993 if (GV
->hasExternalWeakLinkage())
1997 // The callee has to preserve all registers the caller needs to preserve.
1998 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
1999 const uint32_t *CallerPreserved
= TRI
->getCallPreservedMask(MF
, CallerCC
);
2000 if (CalleeCC
!= CallerCC
) {
2001 const uint32_t *CalleePreserved
= TRI
->getCallPreservedMask(MF
, CalleeCC
);
2002 if (!TRI
->regmaskSubsetEqual(CallerPreserved
, CalleePreserved
))
2006 // Byval parameters hand the function a pointer directly into the stack area
2007 // we want to reuse during a tail call. Working around this *is* possible
2008 // but less efficient and uglier in LowerCall.
2009 for (auto &Arg
: Outs
)
2010 if (Arg
.Flags
.isByVal())
2016 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
2017 // and output parameter nodes.
2018 SDValue
RISCVTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
2019 SmallVectorImpl
<SDValue
> &InVals
) const {
2020 SelectionDAG
&DAG
= CLI
.DAG
;
2022 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
2023 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
2024 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
2025 SDValue Chain
= CLI
.Chain
;
2026 SDValue Callee
= CLI
.Callee
;
2027 bool &IsTailCall
= CLI
.IsTailCall
;
2028 CallingConv::ID CallConv
= CLI
.CallConv
;
2029 bool IsVarArg
= CLI
.IsVarArg
;
2030 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2031 MVT XLenVT
= Subtarget
.getXLenVT();
2033 MachineFunction
&MF
= DAG
.getMachineFunction();
2035 // Analyze the operands of the call, assigning locations to each operand.
2036 SmallVector
<CCValAssign
, 16> ArgLocs
;
2037 CCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
2038 analyzeOutputArgs(MF
, ArgCCInfo
, Outs
, /*IsRet=*/false, &CLI
);
2040 // Check if it's really possible to do a tail call.
2042 IsTailCall
= isEligibleForTailCallOptimization(ArgCCInfo
, CLI
, MF
, ArgLocs
);
2046 else if (CLI
.CS
&& CLI
.CS
.isMustTailCall())
2047 report_fatal_error("failed to perform tail call elimination on a call "
2048 "site marked musttail");
2050 // Get a count of how many bytes are to be pushed on the stack.
2051 unsigned NumBytes
= ArgCCInfo
.getNextStackOffset();
2053 // Create local copies for byval args
2054 SmallVector
<SDValue
, 8> ByValArgs
;
2055 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
2056 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
2057 if (!Flags
.isByVal())
2060 SDValue Arg
= OutVals
[i
];
2061 unsigned Size
= Flags
.getByValSize();
2062 unsigned Align
= Flags
.getByValAlign();
2064 int FI
= MF
.getFrameInfo().CreateStackObject(Size
, Align
, /*isSS=*/false);
2065 SDValue FIPtr
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
2066 SDValue SizeNode
= DAG
.getConstant(Size
, DL
, XLenVT
);
2068 Chain
= DAG
.getMemcpy(Chain
, DL
, FIPtr
, Arg
, SizeNode
, Align
,
2069 /*IsVolatile=*/false,
2070 /*AlwaysInline=*/false,
2071 IsTailCall
, MachinePointerInfo(),
2072 MachinePointerInfo());
2073 ByValArgs
.push_back(FIPtr
);
2077 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, CLI
.DL
);
2079 // Copy argument values to their designated locations.
2080 SmallVector
<std::pair
<Register
, SDValue
>, 8> RegsToPass
;
2081 SmallVector
<SDValue
, 8> MemOpChains
;
2083 for (unsigned i
= 0, j
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
2084 CCValAssign
&VA
= ArgLocs
[i
];
2085 SDValue ArgValue
= OutVals
[i
];
2086 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
2088 // Handle passing f64 on RV32D with a soft float ABI as a special case.
2089 bool IsF64OnRV32DSoftABI
=
2090 VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
;
2091 if (IsF64OnRV32DSoftABI
&& VA
.isRegLoc()) {
2092 SDValue SplitF64
= DAG
.getNode(
2093 RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
), ArgValue
);
2094 SDValue Lo
= SplitF64
.getValue(0);
2095 SDValue Hi
= SplitF64
.getValue(1);
2097 Register RegLo
= VA
.getLocReg();
2098 RegsToPass
.push_back(std::make_pair(RegLo
, Lo
));
2100 if (RegLo
== RISCV::X17
) {
2101 // Second half of f64 is passed on the stack.
2102 // Work out the address of the stack slot.
2103 if (!StackPtr
.getNode())
2104 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
2106 MemOpChains
.push_back(
2107 DAG
.getStore(Chain
, DL
, Hi
, StackPtr
, MachinePointerInfo()));
2109 // Second half of f64 is passed in another GPR.
2110 assert(RegLo
< RISCV::X31
&& "Invalid register pair");
2111 Register RegHigh
= RegLo
+ 1;
2112 RegsToPass
.push_back(std::make_pair(RegHigh
, Hi
));
2117 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
2118 // as any other MemLoc.
2120 // Promote the value if needed.
2121 // For now, only handle fully promoted and indirect arguments.
2122 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
2123 // Store the argument in a stack slot and pass its address.
2124 SDValue SpillSlot
= DAG
.CreateStackTemporary(Outs
[i
].ArgVT
);
2125 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
2126 MemOpChains
.push_back(
2127 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
2128 MachinePointerInfo::getFixedStack(MF
, FI
)));
2129 // If the original argument was split (e.g. i128), we need
2130 // to store all parts of it here (and pass just one address).
2131 unsigned ArgIndex
= Outs
[i
].OrigArgIndex
;
2132 assert(Outs
[i
].PartOffset
== 0);
2133 while (i
+ 1 != e
&& Outs
[i
+ 1].OrigArgIndex
== ArgIndex
) {
2134 SDValue PartValue
= OutVals
[i
+ 1];
2135 unsigned PartOffset
= Outs
[i
+ 1].PartOffset
;
2136 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
,
2137 DAG
.getIntPtrConstant(PartOffset
, DL
));
2138 MemOpChains
.push_back(
2139 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
2140 MachinePointerInfo::getFixedStack(MF
, FI
)));
2143 ArgValue
= SpillSlot
;
2145 ArgValue
= convertValVTToLocVT(DAG
, ArgValue
, VA
, DL
);
2148 // Use local copy if it is a byval arg.
2149 if (Flags
.isByVal())
2150 ArgValue
= ByValArgs
[j
++];
2152 if (VA
.isRegLoc()) {
2153 // Queue up the argument copies and emit them at the end.
2154 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
2156 assert(VA
.isMemLoc() && "Argument not register or memory");
2157 assert(!IsTailCall
&& "Tail call not allowed if stack is used "
2158 "for passing parameters");
2160 // Work out the address of the stack slot.
2161 if (!StackPtr
.getNode())
2162 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
2164 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
2165 DAG
.getIntPtrConstant(VA
.getLocMemOffset(), DL
));
2168 MemOpChains
.push_back(
2169 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
2173 // Join the stores, which are independent of one another.
2174 if (!MemOpChains
.empty())
2175 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
2179 // Build a sequence of copy-to-reg nodes, chained and glued together.
2180 for (auto &Reg
: RegsToPass
) {
2181 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
.first
, Reg
.second
, Glue
);
2182 Glue
= Chain
.getValue(1);
2185 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
2186 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
2187 // split it and then direct call can be matched by PseudoCALL.
2188 if (GlobalAddressSDNode
*S
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
2189 const GlobalValue
*GV
= S
->getGlobal();
2191 unsigned OpFlags
= RISCVII::MO_CALL
;
2192 if (!getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
))
2193 OpFlags
= RISCVII::MO_PLT
;
2195 Callee
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, OpFlags
);
2196 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
2197 unsigned OpFlags
= RISCVII::MO_CALL
;
2199 if (!getTargetMachine().shouldAssumeDSOLocal(*MF
.getFunction().getParent(),
2201 OpFlags
= RISCVII::MO_PLT
;
2203 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, OpFlags
);
2206 // The first call operand is the chain and the second is the target address.
2207 SmallVector
<SDValue
, 8> Ops
;
2208 Ops
.push_back(Chain
);
2209 Ops
.push_back(Callee
);
2211 // Add argument registers to the end of the list so that they are
2212 // known live into the call.
2213 for (auto &Reg
: RegsToPass
)
2214 Ops
.push_back(DAG
.getRegister(Reg
.first
, Reg
.second
.getValueType()));
2217 // Add a register mask operand representing the call-preserved registers.
2218 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
2219 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
2220 assert(Mask
&& "Missing call preserved mask for calling convention");
2221 Ops
.push_back(DAG
.getRegisterMask(Mask
));
2224 // Glue the call to the argument copies, if any.
2226 Ops
.push_back(Glue
);
2229 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
2232 MF
.getFrameInfo().setHasTailCall();
2233 return DAG
.getNode(RISCVISD::TAIL
, DL
, NodeTys
, Ops
);
2236 Chain
= DAG
.getNode(RISCVISD::CALL
, DL
, NodeTys
, Ops
);
2237 Glue
= Chain
.getValue(1);
2239 // Mark the end of the call, which is glued to the call itself.
2240 Chain
= DAG
.getCALLSEQ_END(Chain
,
2241 DAG
.getConstant(NumBytes
, DL
, PtrVT
, true),
2242 DAG
.getConstant(0, DL
, PtrVT
, true),
2244 Glue
= Chain
.getValue(1);
2246 // Assign locations to each value returned by this call.
2247 SmallVector
<CCValAssign
, 16> RVLocs
;
2248 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, *DAG
.getContext());
2249 analyzeInputArgs(MF
, RetCCInfo
, Ins
, /*IsRet=*/true);
2251 // Copy all of the result registers out of their specified physreg.
2252 for (auto &VA
: RVLocs
) {
2253 // Copy the value out
2255 DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(), VA
.getLocVT(), Glue
);
2256 // Glue the RetValue to the end of the call sequence
2257 Chain
= RetValue
.getValue(1);
2258 Glue
= RetValue
.getValue(2);
2260 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
2261 assert(VA
.getLocReg() == ArgGPRs
[0] && "Unexpected reg assignment");
2263 DAG
.getCopyFromReg(Chain
, DL
, ArgGPRs
[1], MVT::i32
, Glue
);
2264 Chain
= RetValue2
.getValue(1);
2265 Glue
= RetValue2
.getValue(2);
2266 RetValue
= DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, RetValue
,
2270 RetValue
= convertLocVTToValVT(DAG
, RetValue
, VA
, DL
);
2272 InVals
.push_back(RetValue
);
2278 bool RISCVTargetLowering::CanLowerReturn(
2279 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
2280 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, LLVMContext
&Context
) const {
2281 SmallVector
<CCValAssign
, 16> RVLocs
;
2282 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
2283 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
2284 MVT VT
= Outs
[i
].VT
;
2285 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
2286 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
2287 if (CC_RISCV(MF
.getDataLayout(), ABI
, i
, VT
, VT
, CCValAssign::Full
,
2288 ArgFlags
, CCInfo
, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
2295 RISCVTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
2297 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
2298 const SmallVectorImpl
<SDValue
> &OutVals
,
2299 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
2300 // Stores the assignment of the return value to a location.
2301 SmallVector
<CCValAssign
, 16> RVLocs
;
2303 // Info about the registers and stack slot.
2304 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
2307 analyzeOutputArgs(DAG
.getMachineFunction(), CCInfo
, Outs
, /*IsRet=*/true,
2311 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
2313 // Copy the result values into the output registers.
2314 for (unsigned i
= 0, e
= RVLocs
.size(); i
< e
; ++i
) {
2315 SDValue Val
= OutVals
[i
];
2316 CCValAssign
&VA
= RVLocs
[i
];
2317 assert(VA
.isRegLoc() && "Can only return in registers!");
2319 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
2320 // Handle returning f64 on RV32D with a soft float ABI.
2321 assert(VA
.isRegLoc() && "Expected return via registers");
2322 SDValue SplitF64
= DAG
.getNode(RISCVISD::SplitF64
, DL
,
2323 DAG
.getVTList(MVT::i32
, MVT::i32
), Val
);
2324 SDValue Lo
= SplitF64
.getValue(0);
2325 SDValue Hi
= SplitF64
.getValue(1);
2326 Register RegLo
= VA
.getLocReg();
2327 assert(RegLo
< RISCV::X31
&& "Invalid register pair");
2328 Register RegHi
= RegLo
+ 1;
2329 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegLo
, Lo
, Glue
);
2330 Glue
= Chain
.getValue(1);
2331 RetOps
.push_back(DAG
.getRegister(RegLo
, MVT::i32
));
2332 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegHi
, Hi
, Glue
);
2333 Glue
= Chain
.getValue(1);
2334 RetOps
.push_back(DAG
.getRegister(RegHi
, MVT::i32
));
2336 // Handle a 'normal' return.
2337 Val
= convertValVTToLocVT(DAG
, Val
, VA
, DL
);
2338 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), Val
, Glue
);
2340 // Guarantee that all emitted copies are stuck together.
2341 Glue
= Chain
.getValue(1);
2342 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
2346 RetOps
[0] = Chain
; // Update chain.
2348 // Add the glue node if we have it.
2349 if (Glue
.getNode()) {
2350 RetOps
.push_back(Glue
);
2353 // Interrupt service routines use different return instructions.
2354 const Function
&Func
= DAG
.getMachineFunction().getFunction();
2355 if (Func
.hasFnAttribute("interrupt")) {
2356 if (!Func
.getReturnType()->isVoidTy())
2358 "Functions with the interrupt attribute must have void return type!");
2360 MachineFunction
&MF
= DAG
.getMachineFunction();
2362 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
2366 RetOpc
= RISCVISD::URET_FLAG
;
2367 else if (Kind
== "supervisor")
2368 RetOpc
= RISCVISD::SRET_FLAG
;
2370 RetOpc
= RISCVISD::MRET_FLAG
;
2372 return DAG
.getNode(RetOpc
, DL
, MVT::Other
, RetOps
);
2375 return DAG
.getNode(RISCVISD::RET_FLAG
, DL
, MVT::Other
, RetOps
);
2378 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode
) const {
2379 switch ((RISCVISD::NodeType
)Opcode
) {
2380 case RISCVISD::FIRST_NUMBER
:
2382 case RISCVISD::RET_FLAG
:
2383 return "RISCVISD::RET_FLAG";
2384 case RISCVISD::URET_FLAG
:
2385 return "RISCVISD::URET_FLAG";
2386 case RISCVISD::SRET_FLAG
:
2387 return "RISCVISD::SRET_FLAG";
2388 case RISCVISD::MRET_FLAG
:
2389 return "RISCVISD::MRET_FLAG";
2390 case RISCVISD::CALL
:
2391 return "RISCVISD::CALL";
2392 case RISCVISD::SELECT_CC
:
2393 return "RISCVISD::SELECT_CC";
2394 case RISCVISD::BuildPairF64
:
2395 return "RISCVISD::BuildPairF64";
2396 case RISCVISD::SplitF64
:
2397 return "RISCVISD::SplitF64";
2398 case RISCVISD::TAIL
:
2399 return "RISCVISD::TAIL";
2400 case RISCVISD::SLLW
:
2401 return "RISCVISD::SLLW";
2402 case RISCVISD::SRAW
:
2403 return "RISCVISD::SRAW";
2404 case RISCVISD::SRLW
:
2405 return "RISCVISD::SRLW";
2406 case RISCVISD::DIVW
:
2407 return "RISCVISD::DIVW";
2408 case RISCVISD::DIVUW
:
2409 return "RISCVISD::DIVUW";
2410 case RISCVISD::REMUW
:
2411 return "RISCVISD::REMUW";
2412 case RISCVISD::FMV_W_X_RV64
:
2413 return "RISCVISD::FMV_W_X_RV64";
2414 case RISCVISD::FMV_X_ANYEXTW_RV64
:
2415 return "RISCVISD::FMV_X_ANYEXTW_RV64";
2416 case RISCVISD::READ_CYCLE_WIDE
:
2417 return "RISCVISD::READ_CYCLE_WIDE";
2422 /// getConstraintType - Given a constraint letter, return the type of
2423 /// constraint it is for this target.
2424 RISCVTargetLowering::ConstraintType
2425 RISCVTargetLowering::getConstraintType(StringRef Constraint
) const {
2426 if (Constraint
.size() == 1) {
2427 switch (Constraint
[0]) {
2431 return C_RegisterClass
;
2440 return TargetLowering::getConstraintType(Constraint
);
2443 std::pair
<unsigned, const TargetRegisterClass
*>
2444 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
2445 StringRef Constraint
,
2447 // First, see if this is a constraint that directly corresponds to a
2448 // RISCV register class.
2449 if (Constraint
.size() == 1) {
2450 switch (Constraint
[0]) {
2452 return std::make_pair(0U, &RISCV::GPRRegClass
);
2454 if (Subtarget
.hasStdExtF() && VT
== MVT::f32
)
2455 return std::make_pair(0U, &RISCV::FPR32RegClass
);
2456 if (Subtarget
.hasStdExtD() && VT
== MVT::f64
)
2457 return std::make_pair(0U, &RISCV::FPR64RegClass
);
2464 // Clang will correctly decode the usage of register name aliases into their
2465 // official names. However, other frontends like `rustc` do not. This allows
2466 // users of these frontends to use the ABI names for registers in LLVM-style
2467 // register constraints.
2468 Register XRegFromAlias
= StringSwitch
<Register
>(Constraint
.lower())
2469 .Case("{zero}", RISCV::X0
)
2470 .Case("{ra}", RISCV::X1
)
2471 .Case("{sp}", RISCV::X2
)
2472 .Case("{gp}", RISCV::X3
)
2473 .Case("{tp}", RISCV::X4
)
2474 .Case("{t0}", RISCV::X5
)
2475 .Case("{t1}", RISCV::X6
)
2476 .Case("{t2}", RISCV::X7
)
2477 .Cases("{s0}", "{fp}", RISCV::X8
)
2478 .Case("{s1}", RISCV::X9
)
2479 .Case("{a0}", RISCV::X10
)
2480 .Case("{a1}", RISCV::X11
)
2481 .Case("{a2}", RISCV::X12
)
2482 .Case("{a3}", RISCV::X13
)
2483 .Case("{a4}", RISCV::X14
)
2484 .Case("{a5}", RISCV::X15
)
2485 .Case("{a6}", RISCV::X16
)
2486 .Case("{a7}", RISCV::X17
)
2487 .Case("{s2}", RISCV::X18
)
2488 .Case("{s3}", RISCV::X19
)
2489 .Case("{s4}", RISCV::X20
)
2490 .Case("{s5}", RISCV::X21
)
2491 .Case("{s6}", RISCV::X22
)
2492 .Case("{s7}", RISCV::X23
)
2493 .Case("{s8}", RISCV::X24
)
2494 .Case("{s9}", RISCV::X25
)
2495 .Case("{s10}", RISCV::X26
)
2496 .Case("{s11}", RISCV::X27
)
2497 .Case("{t3}", RISCV::X28
)
2498 .Case("{t4}", RISCV::X29
)
2499 .Case("{t5}", RISCV::X30
)
2500 .Case("{t6}", RISCV::X31
)
2501 .Default(RISCV::NoRegister
);
2502 if (XRegFromAlias
!= RISCV::NoRegister
)
2503 return std::make_pair(XRegFromAlias
, &RISCV::GPRRegClass
);
2505 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
2506 // TableGen record rather than the AsmName to choose registers for InlineAsm
2507 // constraints, plus we want to match those names to the widest floating point
2508 // register type available, manually select floating point registers here.
2510 // The second case is the ABI name of the register, so that frontends can also
2511 // use the ABI names in register constraint lists.
2512 if (Subtarget
.hasStdExtF() || Subtarget
.hasStdExtD()) {
2513 std::pair
<Register
, Register
> FReg
=
2514 StringSwitch
<std::pair
<Register
, Register
>>(Constraint
.lower())
2515 .Cases("{f0}", "{ft0}", {RISCV::F0_F
, RISCV::F0_D
})
2516 .Cases("{f1}", "{ft1}", {RISCV::F1_F
, RISCV::F1_D
})
2517 .Cases("{f2}", "{ft2}", {RISCV::F2_F
, RISCV::F2_D
})
2518 .Cases("{f3}", "{ft3}", {RISCV::F3_F
, RISCV::F3_D
})
2519 .Cases("{f4}", "{ft4}", {RISCV::F4_F
, RISCV::F4_D
})
2520 .Cases("{f5}", "{ft5}", {RISCV::F5_F
, RISCV::F5_D
})
2521 .Cases("{f6}", "{ft6}", {RISCV::F6_F
, RISCV::F6_D
})
2522 .Cases("{f7}", "{ft7}", {RISCV::F7_F
, RISCV::F7_D
})
2523 .Cases("{f8}", "{fs0}", {RISCV::F8_F
, RISCV::F8_D
})
2524 .Cases("{f9}", "{fs1}", {RISCV::F9_F
, RISCV::F9_D
})
2525 .Cases("{f10}", "{fa0}", {RISCV::F10_F
, RISCV::F10_D
})
2526 .Cases("{f11}", "{fa1}", {RISCV::F11_F
, RISCV::F11_D
})
2527 .Cases("{f12}", "{fa2}", {RISCV::F12_F
, RISCV::F12_D
})
2528 .Cases("{f13}", "{fa3}", {RISCV::F13_F
, RISCV::F13_D
})
2529 .Cases("{f14}", "{fa4}", {RISCV::F14_F
, RISCV::F14_D
})
2530 .Cases("{f15}", "{fa5}", {RISCV::F15_F
, RISCV::F15_D
})
2531 .Cases("{f16}", "{fa6}", {RISCV::F16_F
, RISCV::F16_D
})
2532 .Cases("{f17}", "{fa7}", {RISCV::F17_F
, RISCV::F17_D
})
2533 .Cases("{f18}", "{fs2}", {RISCV::F18_F
, RISCV::F18_D
})
2534 .Cases("{f19}", "{fs3}", {RISCV::F19_F
, RISCV::F19_D
})
2535 .Cases("{f20}", "{fs4}", {RISCV::F20_F
, RISCV::F20_D
})
2536 .Cases("{f21}", "{fs5}", {RISCV::F21_F
, RISCV::F21_D
})
2537 .Cases("{f22}", "{fs6}", {RISCV::F22_F
, RISCV::F22_D
})
2538 .Cases("{f23}", "{fs7}", {RISCV::F23_F
, RISCV::F23_D
})
2539 .Cases("{f24}", "{fs8}", {RISCV::F24_F
, RISCV::F24_D
})
2540 .Cases("{f25}", "{fs9}", {RISCV::F25_F
, RISCV::F25_D
})
2541 .Cases("{f26}", "{fs10}", {RISCV::F26_F
, RISCV::F26_D
})
2542 .Cases("{f27}", "{fs11}", {RISCV::F27_F
, RISCV::F27_D
})
2543 .Cases("{f28}", "{ft8}", {RISCV::F28_F
, RISCV::F28_D
})
2544 .Cases("{f29}", "{ft9}", {RISCV::F29_F
, RISCV::F29_D
})
2545 .Cases("{f30}", "{ft10}", {RISCV::F30_F
, RISCV::F30_D
})
2546 .Cases("{f31}", "{ft11}", {RISCV::F31_F
, RISCV::F31_D
})
2547 .Default({RISCV::NoRegister
, RISCV::NoRegister
});
2548 if (FReg
.first
!= RISCV::NoRegister
)
2549 return Subtarget
.hasStdExtD()
2550 ? std::make_pair(FReg
.second
, &RISCV::FPR64RegClass
)
2551 : std::make_pair(FReg
.first
, &RISCV::FPR32RegClass
);
2554 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
2558 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode
) const {
2559 // Currently only support length 1 constraints.
2560 if (ConstraintCode
.size() == 1) {
2561 switch (ConstraintCode
[0]) {
2563 return InlineAsm::Constraint_A
;
2569 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode
);
2572 void RISCVTargetLowering::LowerAsmOperandForConstraint(
2573 SDValue Op
, std::string
&Constraint
, std::vector
<SDValue
> &Ops
,
2574 SelectionDAG
&DAG
) const {
2575 // Currently only support length 1 constraints.
2576 if (Constraint
.length() == 1) {
2577 switch (Constraint
[0]) {
2579 // Validate & create a 12-bit signed immediate operand.
2580 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
2581 uint64_t CVal
= C
->getSExtValue();
2582 if (isInt
<12>(CVal
))
2584 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
2588 // Validate & create an integer zero operand.
2589 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
2590 if (C
->getZExtValue() == 0)
2592 DAG
.getTargetConstant(0, SDLoc(Op
), Subtarget
.getXLenVT()));
2595 // Validate & create a 5-bit unsigned immediate operand.
2596 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
2597 uint64_t CVal
= C
->getZExtValue();
2598 if (isUInt
<5>(CVal
))
2600 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
2607 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
2610 Instruction
*RISCVTargetLowering::emitLeadingFence(IRBuilder
<> &Builder
,
2612 AtomicOrdering Ord
) const {
2613 if (isa
<LoadInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
2614 return Builder
.CreateFence(Ord
);
2615 if (isa
<StoreInst
>(Inst
) && isReleaseOrStronger(Ord
))
2616 return Builder
.CreateFence(AtomicOrdering::Release
);
2620 Instruction
*RISCVTargetLowering::emitTrailingFence(IRBuilder
<> &Builder
,
2622 AtomicOrdering Ord
) const {
2623 if (isa
<LoadInst
>(Inst
) && isAcquireOrStronger(Ord
))
2624 return Builder
.CreateFence(AtomicOrdering::Acquire
);
2628 TargetLowering::AtomicExpansionKind
2629 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
2630 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
2631 // point operations can't be used in an lr/sc sequence without breaking the
2632 // forward-progress guarantee.
2633 if (AI
->isFloatingPointOperation())
2634 return AtomicExpansionKind::CmpXChg
;
2636 unsigned Size
= AI
->getType()->getPrimitiveSizeInBits();
2637 if (Size
== 8 || Size
== 16)
2638 return AtomicExpansionKind::MaskedIntrinsic
;
2639 return AtomicExpansionKind::None
;
2642 static Intrinsic::ID
2643 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen
, AtomicRMWInst::BinOp BinOp
) {
2647 llvm_unreachable("Unexpected AtomicRMW BinOp");
2648 case AtomicRMWInst::Xchg
:
2649 return Intrinsic::riscv_masked_atomicrmw_xchg_i32
;
2650 case AtomicRMWInst::Add
:
2651 return Intrinsic::riscv_masked_atomicrmw_add_i32
;
2652 case AtomicRMWInst::Sub
:
2653 return Intrinsic::riscv_masked_atomicrmw_sub_i32
;
2654 case AtomicRMWInst::Nand
:
2655 return Intrinsic::riscv_masked_atomicrmw_nand_i32
;
2656 case AtomicRMWInst::Max
:
2657 return Intrinsic::riscv_masked_atomicrmw_max_i32
;
2658 case AtomicRMWInst::Min
:
2659 return Intrinsic::riscv_masked_atomicrmw_min_i32
;
2660 case AtomicRMWInst::UMax
:
2661 return Intrinsic::riscv_masked_atomicrmw_umax_i32
;
2662 case AtomicRMWInst::UMin
:
2663 return Intrinsic::riscv_masked_atomicrmw_umin_i32
;
2670 llvm_unreachable("Unexpected AtomicRMW BinOp");
2671 case AtomicRMWInst::Xchg
:
2672 return Intrinsic::riscv_masked_atomicrmw_xchg_i64
;
2673 case AtomicRMWInst::Add
:
2674 return Intrinsic::riscv_masked_atomicrmw_add_i64
;
2675 case AtomicRMWInst::Sub
:
2676 return Intrinsic::riscv_masked_atomicrmw_sub_i64
;
2677 case AtomicRMWInst::Nand
:
2678 return Intrinsic::riscv_masked_atomicrmw_nand_i64
;
2679 case AtomicRMWInst::Max
:
2680 return Intrinsic::riscv_masked_atomicrmw_max_i64
;
2681 case AtomicRMWInst::Min
:
2682 return Intrinsic::riscv_masked_atomicrmw_min_i64
;
2683 case AtomicRMWInst::UMax
:
2684 return Intrinsic::riscv_masked_atomicrmw_umax_i64
;
2685 case AtomicRMWInst::UMin
:
2686 return Intrinsic::riscv_masked_atomicrmw_umin_i64
;
2690 llvm_unreachable("Unexpected XLen\n");
2693 Value
*RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
2694 IRBuilder
<> &Builder
, AtomicRMWInst
*AI
, Value
*AlignedAddr
, Value
*Incr
,
2695 Value
*Mask
, Value
*ShiftAmt
, AtomicOrdering Ord
) const {
2696 unsigned XLen
= Subtarget
.getXLen();
2698 Builder
.getIntN(XLen
, static_cast<uint64_t>(AI
->getOrdering()));
2699 Type
*Tys
[] = {AlignedAddr
->getType()};
2700 Function
*LrwOpScwLoop
= Intrinsic::getDeclaration(
2702 getIntrinsicForMaskedAtomicRMWBinOp(XLen
, AI
->getOperation()), Tys
);
2705 Incr
= Builder
.CreateSExt(Incr
, Builder
.getInt64Ty());
2706 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
2707 ShiftAmt
= Builder
.CreateSExt(ShiftAmt
, Builder
.getInt64Ty());
2712 // Must pass the shift amount needed to sign extend the loaded value prior
2713 // to performing a signed comparison for min/max. ShiftAmt is the number of
2714 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
2715 // is the number of bits to left+right shift the value in order to
2717 if (AI
->getOperation() == AtomicRMWInst::Min
||
2718 AI
->getOperation() == AtomicRMWInst::Max
) {
2719 const DataLayout
&DL
= AI
->getModule()->getDataLayout();
2721 DL
.getTypeStoreSizeInBits(AI
->getValOperand()->getType());
2723 Builder
.CreateSub(Builder
.getIntN(XLen
, XLen
- ValWidth
), ShiftAmt
);
2724 Result
= Builder
.CreateCall(LrwOpScwLoop
,
2725 {AlignedAddr
, Incr
, Mask
, SextShamt
, Ordering
});
2728 Builder
.CreateCall(LrwOpScwLoop
, {AlignedAddr
, Incr
, Mask
, Ordering
});
2732 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
2736 TargetLowering::AtomicExpansionKind
2737 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
2738 AtomicCmpXchgInst
*CI
) const {
2739 unsigned Size
= CI
->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2740 if (Size
== 8 || Size
== 16)
2741 return AtomicExpansionKind::MaskedIntrinsic
;
2742 return AtomicExpansionKind::None
;
2745 Value
*RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2746 IRBuilder
<> &Builder
, AtomicCmpXchgInst
*CI
, Value
*AlignedAddr
,
2747 Value
*CmpVal
, Value
*NewVal
, Value
*Mask
, AtomicOrdering Ord
) const {
2748 unsigned XLen
= Subtarget
.getXLen();
2749 Value
*Ordering
= Builder
.getIntN(XLen
, static_cast<uint64_t>(Ord
));
2750 Intrinsic::ID CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i32
;
2752 CmpVal
= Builder
.CreateSExt(CmpVal
, Builder
.getInt64Ty());
2753 NewVal
= Builder
.CreateSExt(NewVal
, Builder
.getInt64Ty());
2754 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
2755 CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i64
;
2757 Type
*Tys
[] = {AlignedAddr
->getType()};
2758 Function
*MaskedCmpXchg
=
2759 Intrinsic::getDeclaration(CI
->getModule(), CmpXchgIntrID
, Tys
);
2760 Value
*Result
= Builder
.CreateCall(
2761 MaskedCmpXchg
, {AlignedAddr
, CmpVal
, NewVal
, Mask
, Ordering
});
2763 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
2767 unsigned RISCVTargetLowering::getExceptionPointerRegister(
2768 const Constant
*PersonalityFn
) const {
2772 unsigned RISCVTargetLowering::getExceptionSelectorRegister(
2773 const Constant
*PersonalityFn
) const {
2777 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type
) const {
2778 // Return false to suppress the unnecessary extensions if the LibCall
2779 // arguments or return value is f32 type for LP64 ABI.
2780 RISCVABI::ABI ABI
= Subtarget
.getTargetABI();
2781 if (ABI
== RISCVABI::ABI_LP64
&& (Type
== MVT::f32
))