1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "Utils/RISCVMatInt.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/SelectionDAGISel.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/raw_ostream.h"
39 #define DEBUG_TYPE "riscv-lower"
41 STATISTIC(NumTailCalls
, "Number of tail calls");
43 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine
&TM
,
44 const RISCVSubtarget
&STI
)
45 : TargetLowering(TM
), Subtarget(STI
) {
47 if (Subtarget
.isRV32E())
48 report_fatal_error("Codegen not yet implemented for RV32E");
50 RISCVABI::ABI ABI
= Subtarget
.getTargetABI();
51 assert(ABI
!= RISCVABI::ABI_Unknown
&& "Improperly initialised target ABI");
55 report_fatal_error("Don't know how to lower this ABI");
56 case RISCVABI::ABI_ILP32
:
57 case RISCVABI::ABI_ILP32F
:
58 case RISCVABI::ABI_ILP32D
:
59 case RISCVABI::ABI_LP64
:
60 case RISCVABI::ABI_LP64F
:
61 case RISCVABI::ABI_LP64D
:
65 MVT XLenVT
= Subtarget
.getXLenVT();
67 // Set up the register classes.
68 addRegisterClass(XLenVT
, &RISCV::GPRRegClass
);
70 if (Subtarget
.hasStdExtF())
71 addRegisterClass(MVT::f32
, &RISCV::FPR32RegClass
);
72 if (Subtarget
.hasStdExtD())
73 addRegisterClass(MVT::f64
, &RISCV::FPR64RegClass
);
75 // Compute derived properties from the register classes.
76 computeRegisterProperties(STI
.getRegisterInfo());
78 setStackPointerRegisterToSaveRestore(RISCV::X2
);
80 for (auto N
: {ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
})
81 setLoadExtAction(N
, XLenVT
, MVT::i1
, Promote
);
83 // TODO: add all necessary setOperationAction calls.
84 setOperationAction(ISD::DYNAMIC_STACKALLOC
, XLenVT
, Expand
);
86 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
87 setOperationAction(ISD::BR_CC
, XLenVT
, Expand
);
88 setOperationAction(ISD::SELECT
, XLenVT
, Custom
);
89 setOperationAction(ISD::SELECT_CC
, XLenVT
, Expand
);
91 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
92 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
94 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
95 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
96 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
97 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
99 for (auto VT
: {MVT::i1
, MVT::i8
, MVT::i16
})
100 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Expand
);
102 if (Subtarget
.is64Bit()) {
103 setOperationAction(ISD::SHL
, MVT::i32
, Custom
);
104 setOperationAction(ISD::SRA
, MVT::i32
, Custom
);
105 setOperationAction(ISD::SRL
, MVT::i32
, Custom
);
108 if (!Subtarget
.hasStdExtM()) {
109 setOperationAction(ISD::MUL
, XLenVT
, Expand
);
110 setOperationAction(ISD::MULHS
, XLenVT
, Expand
);
111 setOperationAction(ISD::MULHU
, XLenVT
, Expand
);
112 setOperationAction(ISD::SDIV
, XLenVT
, Expand
);
113 setOperationAction(ISD::UDIV
, XLenVT
, Expand
);
114 setOperationAction(ISD::SREM
, XLenVT
, Expand
);
115 setOperationAction(ISD::UREM
, XLenVT
, Expand
);
118 if (Subtarget
.is64Bit() && Subtarget
.hasStdExtM()) {
119 setOperationAction(ISD::SDIV
, MVT::i32
, Custom
);
120 setOperationAction(ISD::UDIV
, MVT::i32
, Custom
);
121 setOperationAction(ISD::UREM
, MVT::i32
, Custom
);
124 setOperationAction(ISD::SDIVREM
, XLenVT
, Expand
);
125 setOperationAction(ISD::UDIVREM
, XLenVT
, Expand
);
126 setOperationAction(ISD::SMUL_LOHI
, XLenVT
, Expand
);
127 setOperationAction(ISD::UMUL_LOHI
, XLenVT
, Expand
);
129 setOperationAction(ISD::SHL_PARTS
, XLenVT
, Custom
);
130 setOperationAction(ISD::SRL_PARTS
, XLenVT
, Custom
);
131 setOperationAction(ISD::SRA_PARTS
, XLenVT
, Custom
);
133 setOperationAction(ISD::ROTL
, XLenVT
, Expand
);
134 setOperationAction(ISD::ROTR
, XLenVT
, Expand
);
135 setOperationAction(ISD::BSWAP
, XLenVT
, Expand
);
136 setOperationAction(ISD::CTTZ
, XLenVT
, Expand
);
137 setOperationAction(ISD::CTLZ
, XLenVT
, Expand
);
138 setOperationAction(ISD::CTPOP
, XLenVT
, Expand
);
140 ISD::CondCode FPCCToExtend
[] = {
141 ISD::SETOGT
, ISD::SETOGE
, ISD::SETONE
, ISD::SETUEQ
, ISD::SETUGT
,
142 ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
, ISD::SETUNE
, ISD::SETGT
,
143 ISD::SETGE
, ISD::SETNE
};
145 ISD::NodeType FPOpToExtend
[] = {
146 ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
, ISD::FPOW
, ISD::FREM
};
148 if (Subtarget
.hasStdExtF()) {
149 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
150 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
151 for (auto CC
: FPCCToExtend
)
152 setCondCodeAction(CC
, MVT::f32
, Expand
);
153 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
154 setOperationAction(ISD::SELECT
, MVT::f32
, Custom
);
155 setOperationAction(ISD::BR_CC
, MVT::f32
, Expand
);
156 for (auto Op
: FPOpToExtend
)
157 setOperationAction(Op
, MVT::f32
, Expand
);
160 if (Subtarget
.hasStdExtF() && Subtarget
.is64Bit())
161 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
163 if (Subtarget
.hasStdExtD()) {
164 setOperationAction(ISD::FMINNUM
, MVT::f64
, Legal
);
165 setOperationAction(ISD::FMAXNUM
, MVT::f64
, Legal
);
166 for (auto CC
: FPCCToExtend
)
167 setCondCodeAction(CC
, MVT::f64
, Expand
);
168 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
169 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
170 setOperationAction(ISD::BR_CC
, MVT::f64
, Expand
);
171 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
172 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
173 for (auto Op
: FPOpToExtend
)
174 setOperationAction(Op
, MVT::f64
, Expand
);
177 setOperationAction(ISD::GlobalAddress
, XLenVT
, Custom
);
178 setOperationAction(ISD::BlockAddress
, XLenVT
, Custom
);
179 setOperationAction(ISD::ConstantPool
, XLenVT
, Custom
);
181 setOperationAction(ISD::GlobalTLSAddress
, XLenVT
, Custom
);
183 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
184 // Unfortunately this can't be determined just from the ISA naming string.
185 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
,
186 Subtarget
.is64Bit() ? Legal
: Custom
);
188 if (Subtarget
.hasStdExtA()) {
189 setMaxAtomicSizeInBitsSupported(Subtarget
.getXLen());
190 setMinCmpXchgSizeInBits(32);
192 setMaxAtomicSizeInBitsSupported(0);
195 setBooleanContents(ZeroOrOneBooleanContent
);
197 // Function alignments (log2).
198 unsigned FunctionAlignment
= Subtarget
.hasStdExtC() ? 1 : 2;
199 setMinFunctionAlignment(FunctionAlignment
);
200 setPrefFunctionAlignment(FunctionAlignment
);
202 // Effectively disable jump table generation.
203 setMinimumJumpTableEntries(INT_MAX
);
206 EVT
RISCVTargetLowering::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&,
209 return getPointerTy(DL
);
210 return VT
.changeVectorElementTypeToInteger();
213 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
216 unsigned Intrinsic
) const {
220 case Intrinsic::riscv_masked_atomicrmw_xchg_i32
:
221 case Intrinsic::riscv_masked_atomicrmw_add_i32
:
222 case Intrinsic::riscv_masked_atomicrmw_sub_i32
:
223 case Intrinsic::riscv_masked_atomicrmw_nand_i32
:
224 case Intrinsic::riscv_masked_atomicrmw_max_i32
:
225 case Intrinsic::riscv_masked_atomicrmw_min_i32
:
226 case Intrinsic::riscv_masked_atomicrmw_umax_i32
:
227 case Intrinsic::riscv_masked_atomicrmw_umin_i32
:
228 case Intrinsic::riscv_masked_cmpxchg_i32
:
229 PointerType
*PtrTy
= cast
<PointerType
>(I
.getArgOperand(0)->getType());
230 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
231 Info
.memVT
= MVT::getVT(PtrTy
->getElementType());
232 Info
.ptrVal
= I
.getArgOperand(0);
235 Info
.flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOStore
|
236 MachineMemOperand::MOVolatile
;
241 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
242 const AddrMode
&AM
, Type
*Ty
,
244 Instruction
*I
) const {
245 // No global is ever allowed as a base.
249 // Require a 12-bit signed offset.
250 if (!isInt
<12>(AM
.BaseOffs
))
254 case 0: // "r+i" or just "i", depending on HasBaseReg.
257 if (!AM
.HasBaseReg
) // allow "r+i".
259 return false; // disallow "r+r" or "r+r+i".
267 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
268 return isInt
<12>(Imm
);
271 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
272 return isInt
<12>(Imm
);
275 // On RV32, 64-bit integers are split into their high and low parts and held
276 // in two different registers, so the trunc is free since the low register can
278 bool RISCVTargetLowering::isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const {
279 if (Subtarget
.is64Bit() || !SrcTy
->isIntegerTy() || !DstTy
->isIntegerTy())
281 unsigned SrcBits
= SrcTy
->getPrimitiveSizeInBits();
282 unsigned DestBits
= DstTy
->getPrimitiveSizeInBits();
283 return (SrcBits
== 64 && DestBits
== 32);
286 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT
, EVT DstVT
) const {
287 if (Subtarget
.is64Bit() || SrcVT
.isVector() || DstVT
.isVector() ||
288 !SrcVT
.isInteger() || !DstVT
.isInteger())
290 unsigned SrcBits
= SrcVT
.getSizeInBits();
291 unsigned DestBits
= DstVT
.getSizeInBits();
292 return (SrcBits
== 64 && DestBits
== 32);
295 bool RISCVTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
296 // Zexts are free if they can be combined with a load.
297 if (auto *LD
= dyn_cast
<LoadSDNode
>(Val
)) {
298 EVT MemVT
= LD
->getMemoryVT();
299 if ((MemVT
== MVT::i8
|| MemVT
== MVT::i16
||
300 (Subtarget
.is64Bit() && MemVT
== MVT::i32
)) &&
301 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
302 LD
->getExtensionType() == ISD::ZEXTLOAD
))
306 return TargetLowering::isZExtFree(Val
, VT2
);
309 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT
, EVT DstVT
) const {
310 return Subtarget
.is64Bit() && SrcVT
== MVT::i32
&& DstVT
== MVT::i64
;
313 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT
) const {
314 return (VT
== MVT::f32
&& Subtarget
.hasStdExtF()) ||
315 (VT
== MVT::f64
&& Subtarget
.hasStdExtD());
318 // Changes the condition code and swaps operands if necessary, so the SetCC
319 // operation matches one of the comparisons supported directly in the RISC-V
321 static void normaliseSetCC(SDValue
&LHS
, SDValue
&RHS
, ISD::CondCode
&CC
) {
329 CC
= ISD::getSetCCSwappedOperands(CC
);
335 // Return the RISC-V branch opcode that matches the given DAG integer
336 // condition code. The CondCode must be one of those supported by the RISC-V
337 // ISA (see normaliseSetCC).
338 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC
) {
341 llvm_unreachable("Unsupported CondCode");
357 SDValue
RISCVTargetLowering::LowerOperation(SDValue Op
,
358 SelectionDAG
&DAG
) const {
359 switch (Op
.getOpcode()) {
361 report_fatal_error("unimplemented operand");
362 case ISD::GlobalAddress
:
363 return lowerGlobalAddress(Op
, DAG
);
364 case ISD::BlockAddress
:
365 return lowerBlockAddress(Op
, DAG
);
366 case ISD::ConstantPool
:
367 return lowerConstantPool(Op
, DAG
);
368 case ISD::GlobalTLSAddress
:
369 return lowerGlobalTLSAddress(Op
, DAG
);
371 return lowerSELECT(Op
, DAG
);
373 return lowerVASTART(Op
, DAG
);
375 return lowerFRAMEADDR(Op
, DAG
);
376 case ISD::RETURNADDR
:
377 return lowerRETURNADDR(Op
, DAG
);
379 return lowerShiftLeftParts(Op
, DAG
);
381 return lowerShiftRightParts(Op
, DAG
, true);
383 return lowerShiftRightParts(Op
, DAG
, false);
385 assert(Subtarget
.is64Bit() && Subtarget
.hasStdExtF() &&
386 "Unexpected custom legalisation");
388 SDValue Op0
= Op
.getOperand(0);
389 if (Op
.getValueType() != MVT::f32
|| Op0
.getValueType() != MVT::i32
)
391 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
392 SDValue FPConv
= DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, NewOp0
);
398 static SDValue
getTargetNode(GlobalAddressSDNode
*N
, SDLoc DL
, EVT Ty
,
399 SelectionDAG
&DAG
, unsigned Flags
) {
400 return DAG
.getTargetGlobalAddress(N
->getGlobal(), DL
, Ty
, 0, Flags
);
403 static SDValue
getTargetNode(BlockAddressSDNode
*N
, SDLoc DL
, EVT Ty
,
404 SelectionDAG
&DAG
, unsigned Flags
) {
405 return DAG
.getTargetBlockAddress(N
->getBlockAddress(), Ty
, N
->getOffset(),
409 static SDValue
getTargetNode(ConstantPoolSDNode
*N
, SDLoc DL
, EVT Ty
,
410 SelectionDAG
&DAG
, unsigned Flags
) {
411 return DAG
.getTargetConstantPool(N
->getConstVal(), Ty
, N
->getAlignment(),
412 N
->getOffset(), Flags
);
415 template <class NodeTy
>
416 SDValue
RISCVTargetLowering::getAddr(NodeTy
*N
, SelectionDAG
&DAG
,
417 bool IsLocal
) const {
419 EVT Ty
= getPointerTy(DAG
.getDataLayout());
421 if (isPositionIndependent()) {
422 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
424 // Use PC-relative addressing to access the symbol. This generates the
425 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
426 // %pcrel_lo(auipc)).
427 return SDValue(DAG
.getMachineNode(RISCV::PseudoLLA
, DL
, Ty
, Addr
), 0);
429 // Use PC-relative addressing to access the GOT for this symbol, then load
430 // the address from the GOT. This generates the pattern (PseudoLA sym),
431 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
432 return SDValue(DAG
.getMachineNode(RISCV::PseudoLA
, DL
, Ty
, Addr
), 0);
435 switch (getTargetMachine().getCodeModel()) {
437 report_fatal_error("Unsupported code model for lowering");
438 case CodeModel::Small
: {
439 // Generate a sequence for accessing addresses within the first 2 GiB of
440 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
441 SDValue AddrHi
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_HI
);
442 SDValue AddrLo
= getTargetNode(N
, DL
, Ty
, DAG
, RISCVII::MO_LO
);
443 SDValue MNHi
= SDValue(DAG
.getMachineNode(RISCV::LUI
, DL
, Ty
, AddrHi
), 0);
444 return SDValue(DAG
.getMachineNode(RISCV::ADDI
, DL
, Ty
, MNHi
, AddrLo
), 0);
446 case CodeModel::Medium
: {
447 // Generate a sequence for accessing addresses within any 2GiB range within
448 // the address space. This generates the pattern (PseudoLLA sym), which
449 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
450 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
451 return SDValue(DAG
.getMachineNode(RISCV::PseudoLLA
, DL
, Ty
, Addr
), 0);
456 SDValue
RISCVTargetLowering::lowerGlobalAddress(SDValue Op
,
457 SelectionDAG
&DAG
) const {
459 EVT Ty
= Op
.getValueType();
460 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
461 int64_t Offset
= N
->getOffset();
462 MVT XLenVT
= Subtarget
.getXLenVT();
464 const GlobalValue
*GV
= N
->getGlobal();
465 bool IsLocal
= getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
);
466 SDValue Addr
= getAddr(N
, DAG
, IsLocal
);
468 // In order to maximise the opportunity for common subexpression elimination,
469 // emit a separate ADD node for the global address offset instead of folding
470 // it in the global address node. Later peephole optimisations may choose to
471 // fold it back in when profitable.
473 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Addr
,
474 DAG
.getConstant(Offset
, DL
, XLenVT
));
478 SDValue
RISCVTargetLowering::lowerBlockAddress(SDValue Op
,
479 SelectionDAG
&DAG
) const {
480 BlockAddressSDNode
*N
= cast
<BlockAddressSDNode
>(Op
);
482 return getAddr(N
, DAG
);
485 SDValue
RISCVTargetLowering::lowerConstantPool(SDValue Op
,
486 SelectionDAG
&DAG
) const {
487 ConstantPoolSDNode
*N
= cast
<ConstantPoolSDNode
>(Op
);
489 return getAddr(N
, DAG
);
492 SDValue
RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode
*N
,
496 EVT Ty
= getPointerTy(DAG
.getDataLayout());
497 const GlobalValue
*GV
= N
->getGlobal();
498 MVT XLenVT
= Subtarget
.getXLenVT();
501 // Use PC-relative addressing to access the GOT for this TLS symbol, then
502 // load the address from the GOT and add the thread pointer. This generates
503 // the pattern (PseudoLA_TLS_IE sym), which expands to
504 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
505 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
507 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_IE
, DL
, Ty
, Addr
), 0);
509 // Add the thread pointer.
510 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
511 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Load
, TPReg
);
514 // Generate a sequence for accessing the address relative to the thread
515 // pointer, with the appropriate adjustment for the thread pointer offset.
516 // This generates the pattern
517 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
519 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_HI
);
521 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_ADD
);
523 DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, RISCVII::MO_TPREL_LO
);
525 SDValue MNHi
= SDValue(DAG
.getMachineNode(RISCV::LUI
, DL
, Ty
, AddrHi
), 0);
526 SDValue TPReg
= DAG
.getRegister(RISCV::X4
, XLenVT
);
527 SDValue MNAdd
= SDValue(
528 DAG
.getMachineNode(RISCV::PseudoAddTPRel
, DL
, Ty
, MNHi
, TPReg
, AddrAdd
),
530 return SDValue(DAG
.getMachineNode(RISCV::ADDI
, DL
, Ty
, MNAdd
, AddrLo
), 0);
533 SDValue
RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode
*N
,
534 SelectionDAG
&DAG
) const {
536 EVT Ty
= getPointerTy(DAG
.getDataLayout());
537 IntegerType
*CallTy
= Type::getIntNTy(*DAG
.getContext(), Ty
.getSizeInBits());
538 const GlobalValue
*GV
= N
->getGlobal();
540 // Use a PC-relative addressing mode to access the global dynamic GOT address.
541 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
542 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
543 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
545 SDValue(DAG
.getMachineNode(RISCV::PseudoLA_TLS_GD
, DL
, Ty
, Addr
), 0);
547 // Prepare argument list to generate call.
552 Args
.push_back(Entry
);
554 // Setup call to __tls_get_addr.
555 TargetLowering::CallLoweringInfo
CLI(DAG
);
557 .setChain(DAG
.getEntryNode())
558 .setLibCallee(CallingConv::C
, CallTy
,
559 DAG
.getExternalSymbol("__tls_get_addr", Ty
),
562 return LowerCallTo(CLI
).first
;
565 SDValue
RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op
,
566 SelectionDAG
&DAG
) const {
568 EVT Ty
= Op
.getValueType();
569 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
570 int64_t Offset
= N
->getOffset();
571 MVT XLenVT
= Subtarget
.getXLenVT();
573 // Non-PIC TLS lowering should always use the LocalExec model.
574 TLSModel::Model Model
= isPositionIndependent()
575 ? getTargetMachine().getTLSModel(N
->getGlobal())
576 : TLSModel::LocalExec
;
580 case TLSModel::LocalExec
:
581 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/false);
583 case TLSModel::InitialExec
:
584 Addr
= getStaticTLSAddr(N
, DAG
, /*UseGOT=*/true);
586 case TLSModel::LocalDynamic
:
587 case TLSModel::GeneralDynamic
:
588 Addr
= getDynamicTLSAddr(N
, DAG
);
592 // In order to maximise the opportunity for common subexpression elimination,
593 // emit a separate ADD node for the global address offset instead of folding
594 // it in the global address node. Later peephole optimisations may choose to
595 // fold it back in when profitable.
597 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Addr
,
598 DAG
.getConstant(Offset
, DL
, XLenVT
));
602 SDValue
RISCVTargetLowering::lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
603 SDValue CondV
= Op
.getOperand(0);
604 SDValue TrueV
= Op
.getOperand(1);
605 SDValue FalseV
= Op
.getOperand(2);
607 MVT XLenVT
= Subtarget
.getXLenVT();
609 // If the result type is XLenVT and CondV is the output of a SETCC node
610 // which also operated on XLenVT inputs, then merge the SETCC node into the
611 // lowered RISCVISD::SELECT_CC to take advantage of the integer
612 // compare+branch instructions. i.e.:
613 // (select (setcc lhs, rhs, cc), truev, falsev)
614 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
615 if (Op
.getSimpleValueType() == XLenVT
&& CondV
.getOpcode() == ISD::SETCC
&&
616 CondV
.getOperand(0).getSimpleValueType() == XLenVT
) {
617 SDValue LHS
= CondV
.getOperand(0);
618 SDValue RHS
= CondV
.getOperand(1);
619 auto CC
= cast
<CondCodeSDNode
>(CondV
.getOperand(2));
620 ISD::CondCode CCVal
= CC
->get();
622 normaliseSetCC(LHS
, RHS
, CCVal
);
624 SDValue TargetCC
= DAG
.getConstant(CCVal
, DL
, XLenVT
);
625 SDVTList VTs
= DAG
.getVTList(Op
.getValueType(), MVT::Glue
);
626 SDValue Ops
[] = {LHS
, RHS
, TargetCC
, TrueV
, FalseV
};
627 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VTs
, Ops
);
631 // (select condv, truev, falsev)
632 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
633 SDValue Zero
= DAG
.getConstant(0, DL
, XLenVT
);
634 SDValue SetNE
= DAG
.getConstant(ISD::SETNE
, DL
, XLenVT
);
636 SDVTList VTs
= DAG
.getVTList(Op
.getValueType(), MVT::Glue
);
637 SDValue Ops
[] = {CondV
, Zero
, SetNE
, TrueV
, FalseV
};
639 return DAG
.getNode(RISCVISD::SELECT_CC
, DL
, VTs
, Ops
);
642 SDValue
RISCVTargetLowering::lowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
643 MachineFunction
&MF
= DAG
.getMachineFunction();
644 RISCVMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
647 SDValue FI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
648 getPointerTy(MF
.getDataLayout()));
650 // vastart just stores the address of the VarArgsFrameIndex slot into the
651 // memory location argument.
652 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
653 return DAG
.getStore(Op
.getOperand(0), DL
, FI
, Op
.getOperand(1),
654 MachinePointerInfo(SV
));
657 SDValue
RISCVTargetLowering::lowerFRAMEADDR(SDValue Op
,
658 SelectionDAG
&DAG
) const {
659 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
660 MachineFunction
&MF
= DAG
.getMachineFunction();
661 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
662 MFI
.setFrameAddressIsTaken(true);
663 unsigned FrameReg
= RI
.getFrameRegister(MF
);
664 int XLenInBytes
= Subtarget
.getXLen() / 8;
666 EVT VT
= Op
.getValueType();
668 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, FrameReg
, VT
);
669 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
671 int Offset
= -(XLenInBytes
* 2);
672 SDValue Ptr
= DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
,
673 DAG
.getIntPtrConstant(Offset
, DL
));
675 DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
680 SDValue
RISCVTargetLowering::lowerRETURNADDR(SDValue Op
,
681 SelectionDAG
&DAG
) const {
682 const RISCVRegisterInfo
&RI
= *Subtarget
.getRegisterInfo();
683 MachineFunction
&MF
= DAG
.getMachineFunction();
684 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
685 MFI
.setReturnAddressIsTaken(true);
686 MVT XLenVT
= Subtarget
.getXLenVT();
687 int XLenInBytes
= Subtarget
.getXLen() / 8;
689 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
692 EVT VT
= Op
.getValueType();
694 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
696 int Off
= -XLenInBytes
;
697 SDValue FrameAddr
= lowerFRAMEADDR(Op
, DAG
);
698 SDValue Offset
= DAG
.getConstant(Off
, DL
, VT
);
699 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
700 DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
, Offset
),
701 MachinePointerInfo());
704 // Return the value of the return address register, marking it an implicit
706 unsigned Reg
= MF
.addLiveIn(RI
.getRARegister(), getRegClassFor(XLenVT
));
707 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, Reg
, XLenVT
);
710 SDValue
RISCVTargetLowering::lowerShiftLeftParts(SDValue Op
,
711 SelectionDAG
&DAG
) const {
713 SDValue Lo
= Op
.getOperand(0);
714 SDValue Hi
= Op
.getOperand(1);
715 SDValue Shamt
= Op
.getOperand(2);
716 EVT VT
= Lo
.getValueType();
718 // if Shamt-XLEN < 0: // Shamt < XLEN
720 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
723 // Hi = Lo << (Shamt-XLEN)
725 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
726 SDValue One
= DAG
.getConstant(1, DL
, VT
);
727 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
728 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
729 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
730 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
732 SDValue LoTrue
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, Shamt
);
733 SDValue ShiftRight1Lo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, One
);
734 SDValue ShiftRightLo
=
735 DAG
.getNode(ISD::SRL
, DL
, VT
, ShiftRight1Lo
, XLenMinus1Shamt
);
736 SDValue ShiftLeftHi
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, Shamt
);
737 SDValue HiTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftLeftHi
, ShiftRightLo
);
738 SDValue HiFalse
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, ShamtMinusXLen
);
740 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
742 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, Zero
);
743 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
745 SDValue Parts
[2] = {Lo
, Hi
};
746 return DAG
.getMergeValues(Parts
, DL
);
749 SDValue
RISCVTargetLowering::lowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
,
752 SDValue Lo
= Op
.getOperand(0);
753 SDValue Hi
= Op
.getOperand(1);
754 SDValue Shamt
= Op
.getOperand(2);
755 EVT VT
= Lo
.getValueType();
758 // if Shamt-XLEN < 0: // Shamt < XLEN
759 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
762 // Lo = Hi >>s (Shamt-XLEN);
763 // Hi = Hi >>s (XLEN-1)
766 // if Shamt-XLEN < 0: // Shamt < XLEN
767 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
770 // Lo = Hi >>u (Shamt-XLEN);
773 unsigned ShiftRightOp
= IsSRA
? ISD::SRA
: ISD::SRL
;
775 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
776 SDValue One
= DAG
.getConstant(1, DL
, VT
);
777 SDValue MinusXLen
= DAG
.getConstant(-(int)Subtarget
.getXLen(), DL
, VT
);
778 SDValue XLenMinus1
= DAG
.getConstant(Subtarget
.getXLen() - 1, DL
, VT
);
779 SDValue ShamtMinusXLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusXLen
);
780 SDValue XLenMinus1Shamt
= DAG
.getNode(ISD::SUB
, DL
, VT
, XLenMinus1
, Shamt
);
782 SDValue ShiftRightLo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, Shamt
);
783 SDValue ShiftLeftHi1
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, One
);
784 SDValue ShiftLeftHi
=
785 DAG
.getNode(ISD::SHL
, DL
, VT
, ShiftLeftHi1
, XLenMinus1Shamt
);
786 SDValue LoTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftRightLo
, ShiftLeftHi
);
787 SDValue HiTrue
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, Shamt
);
788 SDValue LoFalse
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, ShamtMinusXLen
);
790 IsSRA
? DAG
.getNode(ISD::SRA
, DL
, VT
, Hi
, XLenMinus1
) : Zero
;
792 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusXLen
, Zero
, ISD::SETLT
);
794 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, LoFalse
);
795 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
797 SDValue Parts
[2] = {Lo
, Hi
};
798 return DAG
.getMergeValues(Parts
, DL
);
801 // Returns the opcode of the target-specific SDNode that implements the 32-bit
802 // form of the given Opcode.
803 static RISCVISD::NodeType
getRISCVWOpcode(unsigned Opcode
) {
806 llvm_unreachable("Unexpected opcode");
808 return RISCVISD::SLLW
;
810 return RISCVISD::SRAW
;
812 return RISCVISD::SRLW
;
814 return RISCVISD::DIVW
;
816 return RISCVISD::DIVUW
;
818 return RISCVISD::REMUW
;
822 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
823 // Because i32 isn't a legal type for RV64, these operations would otherwise
824 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
825 // later one because the fact the operation was originally of type i32 is
827 static SDValue
customLegalizeToWOp(SDNode
*N
, SelectionDAG
&DAG
) {
829 RISCVISD::NodeType WOpcode
= getRISCVWOpcode(N
->getOpcode());
830 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
831 SDValue NewOp1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
832 SDValue NewRes
= DAG
.getNode(WOpcode
, DL
, MVT::i64
, NewOp0
, NewOp1
);
833 // ReplaceNodeResults requires we maintain the same type for the return value.
834 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
);
837 void RISCVTargetLowering::ReplaceNodeResults(SDNode
*N
,
838 SmallVectorImpl
<SDValue
> &Results
,
839 SelectionDAG
&DAG
) const {
841 switch (N
->getOpcode()) {
843 llvm_unreachable("Don't know how to custom type legalize this operation!");
844 case ISD::READCYCLECOUNTER
: {
845 assert(!Subtarget
.is64Bit() &&
846 "READCYCLECOUNTER only has custom type legalization on riscv32");
848 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
850 DAG
.getNode(RISCVISD::READ_CYCLE_WIDE
, DL
, VTs
, N
->getOperand(0));
852 Results
.push_back(RCW
);
853 Results
.push_back(RCW
.getValue(1));
854 Results
.push_back(RCW
.getValue(2));
860 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
861 "Unexpected custom legalisation");
862 if (N
->getOperand(1).getOpcode() == ISD::Constant
)
864 Results
.push_back(customLegalizeToWOp(N
, DAG
));
869 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
870 Subtarget
.hasStdExtM() && "Unexpected custom legalisation");
871 if (N
->getOperand(0).getOpcode() == ISD::Constant
||
872 N
->getOperand(1).getOpcode() == ISD::Constant
)
874 Results
.push_back(customLegalizeToWOp(N
, DAG
));
877 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
878 Subtarget
.hasStdExtF() && "Unexpected custom legalisation");
880 SDValue Op0
= N
->getOperand(0);
881 if (Op0
.getValueType() != MVT::f32
)
884 DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Op0
);
885 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, FPConv
));
891 SDValue
RISCVTargetLowering::PerformDAGCombine(SDNode
*N
,
892 DAGCombinerInfo
&DCI
) const {
893 SelectionDAG
&DAG
= DCI
.DAG
;
895 switch (N
->getOpcode()) {
898 case RISCVISD::SplitF64
: {
899 SDValue Op0
= N
->getOperand(0);
900 // If the input to SplitF64 is just BuildPairF64 then the operation is
901 // redundant. Instead, use BuildPairF64's operands directly.
902 if (Op0
->getOpcode() == RISCVISD::BuildPairF64
)
903 return DCI
.CombineTo(N
, Op0
.getOperand(0), Op0
.getOperand(1));
907 // It's cheaper to materialise two 32-bit integers than to load a double
908 // from the constant pool and transfer it to integer registers through the
910 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(Op0
)) {
911 APInt V
= C
->getValueAPF().bitcastToAPInt();
912 SDValue Lo
= DAG
.getConstant(V
.trunc(32), DL
, MVT::i32
);
913 SDValue Hi
= DAG
.getConstant(V
.lshr(32).trunc(32), DL
, MVT::i32
);
914 return DCI
.CombineTo(N
, Lo
, Hi
);
917 // This is a target-specific version of a DAGCombine performed in
918 // DAGCombiner::visitBITCAST. It performs the equivalent of:
919 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
920 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
921 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
922 !Op0
.getNode()->hasOneUse())
924 SDValue NewSplitF64
=
925 DAG
.getNode(RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
),
927 SDValue Lo
= NewSplitF64
.getValue(0);
928 SDValue Hi
= NewSplitF64
.getValue(1);
929 APInt SignBit
= APInt::getSignMask(32);
930 if (Op0
.getOpcode() == ISD::FNEG
) {
931 SDValue NewHi
= DAG
.getNode(ISD::XOR
, DL
, MVT::i32
, Hi
,
932 DAG
.getConstant(SignBit
, DL
, MVT::i32
));
933 return DCI
.CombineTo(N
, Lo
, NewHi
);
935 assert(Op0
.getOpcode() == ISD::FABS
);
936 SDValue NewHi
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, Hi
,
937 DAG
.getConstant(~SignBit
, DL
, MVT::i32
));
938 return DCI
.CombineTo(N
, Lo
, NewHi
);
942 case RISCVISD::SRLW
: {
943 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
944 SDValue LHS
= N
->getOperand(0);
945 SDValue RHS
= N
->getOperand(1);
946 APInt LHSMask
= APInt::getLowBitsSet(LHS
.getValueSizeInBits(), 32);
947 APInt RHSMask
= APInt::getLowBitsSet(RHS
.getValueSizeInBits(), 5);
948 if ((SimplifyDemandedBits(N
->getOperand(0), LHSMask
, DCI
)) ||
949 (SimplifyDemandedBits(N
->getOperand(1), RHSMask
, DCI
)))
953 case RISCVISD::FMV_X_ANYEXTW_RV64
: {
955 SDValue Op0
= N
->getOperand(0);
956 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
957 // conversion is unnecessary and can be replaced with an ANY_EXTEND
958 // of the FMV_W_X_RV64 operand.
959 if (Op0
->getOpcode() == RISCVISD::FMV_W_X_RV64
) {
961 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
.getOperand(0));
962 return DCI
.CombineTo(N
, AExtOp
);
965 // This is a target-specific version of a DAGCombine performed in
966 // DAGCombiner::visitBITCAST. It performs the equivalent of:
967 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
968 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
969 if (!(Op0
.getOpcode() == ISD::FNEG
|| Op0
.getOpcode() == ISD::FABS
) ||
970 !Op0
.getNode()->hasOneUse())
972 SDValue NewFMV
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
,
974 APInt SignBit
= APInt::getSignMask(32).sext(64);
975 if (Op0
.getOpcode() == ISD::FNEG
) {
976 return DCI
.CombineTo(N
,
977 DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, NewFMV
,
978 DAG
.getConstant(SignBit
, DL
, MVT::i64
)));
980 assert(Op0
.getOpcode() == ISD::FABS
);
981 return DCI
.CombineTo(N
,
982 DAG
.getNode(ISD::AND
, DL
, MVT::i64
, NewFMV
,
983 DAG
.getConstant(~SignBit
, DL
, MVT::i64
)));
990 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
991 const SDNode
*N
, CombineLevel Level
) const {
992 // The following folds are only desirable if `(OP _, c1 << c2)` can be
993 // materialised in fewer instructions than `(OP _, c1)`:
995 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
996 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
997 SDValue N0
= N
->getOperand(0);
998 EVT Ty
= N0
.getValueType();
999 if (Ty
.isScalarInteger() &&
1000 (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::OR
)) {
1001 auto *C1
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
1002 auto *C2
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
1004 APInt C1Int
= C1
->getAPIntValue();
1005 APInt ShiftedC1Int
= C1Int
<< C2
->getAPIntValue();
1007 // We can materialise `c1 << c2` into an add immediate, so it's "free",
1008 // and the combine should happen, to potentially allow further combines
1010 if (isLegalAddImmediate(ShiftedC1Int
.getSExtValue()))
1013 // We can materialise `c1` in an add immediate, so it's "free", and the
1014 // combine should be prevented.
1015 if (isLegalAddImmediate(C1Int
.getSExtValue()))
1018 // Neither constant will fit into an immediate, so find materialisation
1020 int C1Cost
= RISCVMatInt::getIntMatCost(C1Int
, Ty
.getSizeInBits(),
1021 Subtarget
.is64Bit());
1022 int ShiftedC1Cost
= RISCVMatInt::getIntMatCost(
1023 ShiftedC1Int
, Ty
.getSizeInBits(), Subtarget
.is64Bit());
1025 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
1026 // combine should be prevented.
1027 if (C1Cost
< ShiftedC1Cost
)
1034 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
1035 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
1036 unsigned Depth
) const {
1037 switch (Op
.getOpcode()) {
1040 case RISCVISD::SLLW
:
1041 case RISCVISD::SRAW
:
1042 case RISCVISD::SRLW
:
1043 case RISCVISD::DIVW
:
1044 case RISCVISD::DIVUW
:
1045 case RISCVISD::REMUW
:
1046 // TODO: As the result is sign-extended, this is conservatively correct. A
1047 // more precise answer could be calculated for SRAW depending on known
1048 // bits in the shift amount.
1055 MachineBasicBlock
*emitReadCycleWidePseudo(MachineInstr
&MI
,
1056 MachineBasicBlock
*BB
) {
1057 assert(MI
.getOpcode() == RISCV::ReadCycleWide
&& "Unexpected instruction");
1059 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
1060 // Should the count have wrapped while it was being read, we need to try
1064 // rdcycleh x3 # load high word of cycle
1065 // rdcycle x2 # load low word of cycle
1066 // rdcycleh x4 # load high word of cycle
1067 // bne x3, x4, read # check if high word reads match, otherwise try again
1070 MachineFunction
&MF
= *BB
->getParent();
1071 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
1072 MachineFunction::iterator It
= ++BB
->getIterator();
1074 MachineBasicBlock
*LoopMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
1075 MF
.insert(It
, LoopMBB
);
1077 MachineBasicBlock
*DoneMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
1078 MF
.insert(It
, DoneMBB
);
1080 // Transfer the remainder of BB and its successor edges to DoneMBB.
1081 DoneMBB
->splice(DoneMBB
->begin(), BB
,
1082 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
1083 DoneMBB
->transferSuccessorsAndUpdatePHIs(BB
);
1085 BB
->addSuccessor(LoopMBB
);
1087 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1088 unsigned ReadAgainReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
1089 unsigned LoReg
= MI
.getOperand(0).getReg();
1090 unsigned HiReg
= MI
.getOperand(1).getReg();
1091 DebugLoc DL
= MI
.getDebugLoc();
1093 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
1094 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), HiReg
)
1095 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
1097 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), LoReg
)
1098 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding
)
1100 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::CSRRS
), ReadAgainReg
)
1101 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding
)
1104 BuildMI(LoopMBB
, DL
, TII
->get(RISCV::BNE
))
1106 .addReg(ReadAgainReg
)
1109 LoopMBB
->addSuccessor(LoopMBB
);
1110 LoopMBB
->addSuccessor(DoneMBB
);
1112 MI
.eraseFromParent();
1117 static MachineBasicBlock
*emitSplitF64Pseudo(MachineInstr
&MI
,
1118 MachineBasicBlock
*BB
) {
1119 assert(MI
.getOpcode() == RISCV::SplitF64Pseudo
&& "Unexpected instruction");
1121 MachineFunction
&MF
= *BB
->getParent();
1122 DebugLoc DL
= MI
.getDebugLoc();
1123 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1124 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
1125 unsigned LoReg
= MI
.getOperand(0).getReg();
1126 unsigned HiReg
= MI
.getOperand(1).getReg();
1127 unsigned SrcReg
= MI
.getOperand(2).getReg();
1128 const TargetRegisterClass
*SrcRC
= &RISCV::FPR64RegClass
;
1129 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex();
1131 TII
.storeRegToStackSlot(*BB
, MI
, SrcReg
, MI
.getOperand(2).isKill(), FI
, SrcRC
,
1133 MachineMemOperand
*MMO
=
1134 MF
.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF
, FI
),
1135 MachineMemOperand::MOLoad
, 8, 8);
1136 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), LoReg
)
1139 .addMemOperand(MMO
);
1140 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::LW
), HiReg
)
1143 .addMemOperand(MMO
);
1144 MI
.eraseFromParent(); // The pseudo instruction is gone now.
1148 static MachineBasicBlock
*emitBuildPairF64Pseudo(MachineInstr
&MI
,
1149 MachineBasicBlock
*BB
) {
1150 assert(MI
.getOpcode() == RISCV::BuildPairF64Pseudo
&&
1151 "Unexpected instruction");
1153 MachineFunction
&MF
= *BB
->getParent();
1154 DebugLoc DL
= MI
.getDebugLoc();
1155 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1156 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
1157 unsigned DstReg
= MI
.getOperand(0).getReg();
1158 unsigned LoReg
= MI
.getOperand(1).getReg();
1159 unsigned HiReg
= MI
.getOperand(2).getReg();
1160 const TargetRegisterClass
*DstRC
= &RISCV::FPR64RegClass
;
1161 int FI
= MF
.getInfo
<RISCVMachineFunctionInfo
>()->getMoveF64FrameIndex();
1163 MachineMemOperand
*MMO
=
1164 MF
.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF
, FI
),
1165 MachineMemOperand::MOStore
, 8, 8);
1166 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
1167 .addReg(LoReg
, getKillRegState(MI
.getOperand(1).isKill()))
1170 .addMemOperand(MMO
);
1171 BuildMI(*BB
, MI
, DL
, TII
.get(RISCV::SW
))
1172 .addReg(HiReg
, getKillRegState(MI
.getOperand(2).isKill()))
1175 .addMemOperand(MMO
);
1176 TII
.loadRegFromStackSlot(*BB
, MI
, DstReg
, FI
, DstRC
, RI
);
1177 MI
.eraseFromParent(); // The pseudo instruction is gone now.
1181 static bool isSelectPseudo(MachineInstr
&MI
) {
1182 switch (MI
.getOpcode()) {
1185 case RISCV::Select_GPR_Using_CC_GPR
:
1186 case RISCV::Select_FPR32_Using_CC_GPR
:
1187 case RISCV::Select_FPR64_Using_CC_GPR
:
1192 static MachineBasicBlock
*emitSelectPseudo(MachineInstr
&MI
,
1193 MachineBasicBlock
*BB
) {
1194 // To "insert" Select_* instructions, we actually have to insert the triangle
1195 // control-flow pattern. The incoming instructions know the destination vreg
1196 // to set, the condition code register to branch on, the true/false values to
1197 // select between, and the condcode to use to select the appropriate branch.
1199 // We produce the following control flow:
1206 // When we find a sequence of selects we attempt to optimize their emission
1207 // by sharing the control flow. Currently we only handle cases where we have
1208 // multiple selects with the exact same condition (same LHS, RHS and CC).
1209 // The selects may be interleaved with other instructions if the other
1210 // instructions meet some requirements we deem safe:
1211 // - They are debug instructions. Otherwise,
1212 // - They do not have side-effects, do not access memory and their inputs do
1213 // not depend on the results of the select pseudo-instructions.
1214 // The TrueV/FalseV operands of the selects cannot depend on the result of
1215 // previous selects in the sequence.
1216 // These conditions could be further relaxed. See the X86 target for a
1217 // related approach and more information.
1218 unsigned LHS
= MI
.getOperand(1).getReg();
1219 unsigned RHS
= MI
.getOperand(2).getReg();
1220 auto CC
= static_cast<ISD::CondCode
>(MI
.getOperand(3).getImm());
1222 SmallVector
<MachineInstr
*, 4> SelectDebugValues
;
1223 SmallSet
<unsigned, 4> SelectDests
;
1224 SelectDests
.insert(MI
.getOperand(0).getReg());
1226 MachineInstr
*LastSelectPseudo
= &MI
;
1228 for (auto E
= BB
->end(), SequenceMBBI
= MachineBasicBlock::iterator(MI
);
1229 SequenceMBBI
!= E
; ++SequenceMBBI
) {
1230 if (SequenceMBBI
->isDebugInstr())
1232 else if (isSelectPseudo(*SequenceMBBI
)) {
1233 if (SequenceMBBI
->getOperand(1).getReg() != LHS
||
1234 SequenceMBBI
->getOperand(2).getReg() != RHS
||
1235 SequenceMBBI
->getOperand(3).getImm() != CC
||
1236 SelectDests
.count(SequenceMBBI
->getOperand(4).getReg()) ||
1237 SelectDests
.count(SequenceMBBI
->getOperand(5).getReg()))
1239 LastSelectPseudo
= &*SequenceMBBI
;
1240 SequenceMBBI
->collectDebugValues(SelectDebugValues
);
1241 SelectDests
.insert(SequenceMBBI
->getOperand(0).getReg());
1243 if (SequenceMBBI
->hasUnmodeledSideEffects() ||
1244 SequenceMBBI
->mayLoadOrStore())
1246 if (llvm::any_of(SequenceMBBI
->operands(), [&](MachineOperand
&MO
) {
1247 return MO
.isReg() && MO
.isUse() && SelectDests
.count(MO
.getReg());
1253 const TargetInstrInfo
&TII
= *BB
->getParent()->getSubtarget().getInstrInfo();
1254 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
1255 DebugLoc DL
= MI
.getDebugLoc();
1256 MachineFunction::iterator I
= ++BB
->getIterator();
1258 MachineBasicBlock
*HeadMBB
= BB
;
1259 MachineFunction
*F
= BB
->getParent();
1260 MachineBasicBlock
*TailMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
1261 MachineBasicBlock
*IfFalseMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
1263 F
->insert(I
, IfFalseMBB
);
1264 F
->insert(I
, TailMBB
);
1266 // Transfer debug instructions associated with the selects to TailMBB.
1267 for (MachineInstr
*DebugInstr
: SelectDebugValues
) {
1268 TailMBB
->push_back(DebugInstr
->removeFromParent());
1271 // Move all instructions after the sequence to TailMBB.
1272 TailMBB
->splice(TailMBB
->end(), HeadMBB
,
1273 std::next(LastSelectPseudo
->getIterator()), HeadMBB
->end());
1274 // Update machine-CFG edges by transferring all successors of the current
1275 // block to the new block which will contain the Phi nodes for the selects.
1276 TailMBB
->transferSuccessorsAndUpdatePHIs(HeadMBB
);
1277 // Set the successors for HeadMBB.
1278 HeadMBB
->addSuccessor(IfFalseMBB
);
1279 HeadMBB
->addSuccessor(TailMBB
);
1281 // Insert appropriate branch.
1282 unsigned Opcode
= getBranchOpcodeForIntCondCode(CC
);
1284 BuildMI(HeadMBB
, DL
, TII
.get(Opcode
))
1289 // IfFalseMBB just falls through to TailMBB.
1290 IfFalseMBB
->addSuccessor(TailMBB
);
1292 // Create PHIs for all of the select pseudo-instructions.
1293 auto SelectMBBI
= MI
.getIterator();
1294 auto SelectEnd
= std::next(LastSelectPseudo
->getIterator());
1295 auto InsertionPoint
= TailMBB
->begin();
1296 while (SelectMBBI
!= SelectEnd
) {
1297 auto Next
= std::next(SelectMBBI
);
1298 if (isSelectPseudo(*SelectMBBI
)) {
1299 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
1300 BuildMI(*TailMBB
, InsertionPoint
, SelectMBBI
->getDebugLoc(),
1301 TII
.get(RISCV::PHI
), SelectMBBI
->getOperand(0).getReg())
1302 .addReg(SelectMBBI
->getOperand(4).getReg())
1304 .addReg(SelectMBBI
->getOperand(5).getReg())
1305 .addMBB(IfFalseMBB
);
1306 SelectMBBI
->eraseFromParent();
1315 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
1316 MachineBasicBlock
*BB
) const {
1317 switch (MI
.getOpcode()) {
1319 llvm_unreachable("Unexpected instr type to insert");
1320 case RISCV::ReadCycleWide
:
1321 assert(!Subtarget
.is64Bit() &&
1322 "ReadCycleWrite is only to be used on riscv32");
1323 return emitReadCycleWidePseudo(MI
, BB
);
1324 case RISCV::Select_GPR_Using_CC_GPR
:
1325 case RISCV::Select_FPR32_Using_CC_GPR
:
1326 case RISCV::Select_FPR64_Using_CC_GPR
:
1327 return emitSelectPseudo(MI
, BB
);
1328 case RISCV::BuildPairF64Pseudo
:
1329 return emitBuildPairF64Pseudo(MI
, BB
);
1330 case RISCV::SplitF64Pseudo
:
1331 return emitSplitF64Pseudo(MI
, BB
);
1335 // Calling Convention Implementation.
1336 // The expectations for frontend ABI lowering vary from target to target.
1337 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
1338 // details, but this is a longer term goal. For now, we simply try to keep the
1339 // role of the frontend as simple and well-defined as possible. The rules can
1340 // be summarised as:
1341 // * Never split up large scalar arguments. We handle them here.
1342 // * If a hardfloat calling convention is being used, and the struct may be
1343 // passed in a pair of registers (fp+fp, int+fp), and both registers are
1344 // available, then pass as two separate arguments. If either the GPRs or FPRs
1345 // are exhausted, then pass according to the rule below.
1346 // * If a struct could never be passed in registers or directly in a stack
1347 // slot (as it is larger than 2*XLEN and the floating point rules don't
1348 // apply), then pass it using a pointer with the byval attribute.
1349 // * If a struct is less than 2*XLEN, then coerce to either a two-element
1350 // word-sized array or a 2*XLEN scalar (depending on alignment).
1351 // * The frontend can determine whether a struct is returned by reference or
1352 // not based on its size and fields. If it will be returned by reference, the
1353 // frontend must modify the prototype so a pointer with the sret annotation is
1354 // passed as the first argument. This is not necessary for large scalar
1356 // * Struct return values and varargs should be coerced to structs containing
1357 // register-size fields in the same situations they would be for fixed
1360 static const MCPhysReg ArgGPRs
[] = {
1361 RISCV::X10
, RISCV::X11
, RISCV::X12
, RISCV::X13
,
1362 RISCV::X14
, RISCV::X15
, RISCV::X16
, RISCV::X17
1364 static const MCPhysReg ArgFPR32s
[] = {
1365 RISCV::F10_32
, RISCV::F11_32
, RISCV::F12_32
, RISCV::F13_32
,
1366 RISCV::F14_32
, RISCV::F15_32
, RISCV::F16_32
, RISCV::F17_32
1368 static const MCPhysReg ArgFPR64s
[] = {
1369 RISCV::F10_64
, RISCV::F11_64
, RISCV::F12_64
, RISCV::F13_64
,
1370 RISCV::F14_64
, RISCV::F15_64
, RISCV::F16_64
, RISCV::F17_64
1373 // Pass a 2*XLEN argument that has been split into two XLEN values through
1374 // registers or the stack as necessary.
1375 static bool CC_RISCVAssign2XLen(unsigned XLen
, CCState
&State
, CCValAssign VA1
,
1376 ISD::ArgFlagsTy ArgFlags1
, unsigned ValNo2
,
1377 MVT ValVT2
, MVT LocVT2
,
1378 ISD::ArgFlagsTy ArgFlags2
) {
1379 unsigned XLenInBytes
= XLen
/ 8;
1380 if (unsigned Reg
= State
.AllocateReg(ArgGPRs
)) {
1381 // At least one half can be passed via register.
1382 State
.addLoc(CCValAssign::getReg(VA1
.getValNo(), VA1
.getValVT(), Reg
,
1383 VA1
.getLocVT(), CCValAssign::Full
));
1385 // Both halves must be passed on the stack, with proper alignment.
1386 unsigned StackAlign
= std::max(XLenInBytes
, ArgFlags1
.getOrigAlign());
1388 CCValAssign::getMem(VA1
.getValNo(), VA1
.getValVT(),
1389 State
.AllocateStack(XLenInBytes
, StackAlign
),
1390 VA1
.getLocVT(), CCValAssign::Full
));
1391 State
.addLoc(CCValAssign::getMem(
1392 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, XLenInBytes
), LocVT2
,
1393 CCValAssign::Full
));
1397 if (unsigned Reg
= State
.AllocateReg(ArgGPRs
)) {
1398 // The second half can also be passed via register.
1400 CCValAssign::getReg(ValNo2
, ValVT2
, Reg
, LocVT2
, CCValAssign::Full
));
1402 // The second half is passed via the stack, without additional alignment.
1403 State
.addLoc(CCValAssign::getMem(
1404 ValNo2
, ValVT2
, State
.AllocateStack(XLenInBytes
, XLenInBytes
), LocVT2
,
1405 CCValAssign::Full
));
1411 // Implements the RISC-V calling convention. Returns true upon failure.
1412 static bool CC_RISCV(const DataLayout
&DL
, RISCVABI::ABI ABI
, unsigned ValNo
,
1413 MVT ValVT
, MVT LocVT
, CCValAssign::LocInfo LocInfo
,
1414 ISD::ArgFlagsTy ArgFlags
, CCState
&State
, bool IsFixed
,
1415 bool IsRet
, Type
*OrigTy
) {
1416 unsigned XLen
= DL
.getLargestLegalIntTypeSizeInBits();
1417 assert(XLen
== 32 || XLen
== 64);
1418 MVT XLenVT
= XLen
== 32 ? MVT::i32
: MVT::i64
;
1420 // Any return value split in to more than two values can't be returned
1422 if (IsRet
&& ValNo
> 1)
1425 // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
1426 // variadic argument, or if no F32 argument registers are available.
1427 bool UseGPRForF32
= true;
1428 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
1429 // variadic argument, or if no F64 argument registers are available.
1430 bool UseGPRForF64
= true;
1434 llvm_unreachable("Unexpected ABI");
1435 case RISCVABI::ABI_ILP32
:
1436 case RISCVABI::ABI_LP64
:
1438 case RISCVABI::ABI_ILP32F
:
1439 case RISCVABI::ABI_LP64F
:
1440 UseGPRForF32
= !IsFixed
;
1442 case RISCVABI::ABI_ILP32D
:
1443 case RISCVABI::ABI_LP64D
:
1444 UseGPRForF32
= !IsFixed
;
1445 UseGPRForF64
= !IsFixed
;
1449 if (State
.getFirstUnallocated(ArgFPR32s
) == array_lengthof(ArgFPR32s
))
1450 UseGPRForF32
= true;
1451 if (State
.getFirstUnallocated(ArgFPR64s
) == array_lengthof(ArgFPR64s
))
1452 UseGPRForF64
= true;
1454 // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
1455 // variables rather than directly checking against the target ABI.
1457 if (UseGPRForF32
&& ValVT
== MVT::f32
) {
1459 LocInfo
= CCValAssign::BCvt
;
1460 } else if (UseGPRForF64
&& XLen
== 64 && ValVT
== MVT::f64
) {
1462 LocInfo
= CCValAssign::BCvt
;
1465 // If this is a variadic argument, the RISC-V calling convention requires
1466 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
1467 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
1468 // be used regardless of whether the original argument was split during
1469 // legalisation or not. The argument will not be passed by registers if the
1470 // original type is larger than 2*XLEN, so the register alignment rule does
1472 unsigned TwoXLenInBytes
= (2 * XLen
) / 8;
1473 if (!IsFixed
&& ArgFlags
.getOrigAlign() == TwoXLenInBytes
&&
1474 DL
.getTypeAllocSize(OrigTy
) == TwoXLenInBytes
) {
1475 unsigned RegIdx
= State
.getFirstUnallocated(ArgGPRs
);
1476 // Skip 'odd' register if necessary.
1477 if (RegIdx
!= array_lengthof(ArgGPRs
) && RegIdx
% 2 == 1)
1478 State
.AllocateReg(ArgGPRs
);
1481 SmallVectorImpl
<CCValAssign
> &PendingLocs
= State
.getPendingLocs();
1482 SmallVectorImpl
<ISD::ArgFlagsTy
> &PendingArgFlags
=
1483 State
.getPendingArgFlags();
1485 assert(PendingLocs
.size() == PendingArgFlags
.size() &&
1486 "PendingLocs and PendingArgFlags out of sync");
1488 // Handle passing f64 on RV32D with a soft float ABI or when floating point
1489 // registers are exhausted.
1490 if (UseGPRForF64
&& XLen
== 32 && ValVT
== MVT::f64
) {
1491 assert(!ArgFlags
.isSplit() && PendingLocs
.empty() &&
1492 "Can't lower f64 if it is split");
1493 // Depending on available argument GPRS, f64 may be passed in a pair of
1494 // GPRs, split between a GPR and the stack, or passed completely on the
1495 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
1497 unsigned Reg
= State
.AllocateReg(ArgGPRs
);
1500 unsigned StackOffset
= State
.AllocateStack(8, 8);
1502 CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
1505 if (!State
.AllocateReg(ArgGPRs
))
1506 State
.AllocateStack(4, 4);
1507 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
1511 // Split arguments might be passed indirectly, so keep track of the pending
1513 if (ArgFlags
.isSplit() || !PendingLocs
.empty()) {
1515 LocInfo
= CCValAssign::Indirect
;
1516 PendingLocs
.push_back(
1517 CCValAssign::getPending(ValNo
, ValVT
, LocVT
, LocInfo
));
1518 PendingArgFlags
.push_back(ArgFlags
);
1519 if (!ArgFlags
.isSplitEnd()) {
1524 // If the split argument only had two elements, it should be passed directly
1525 // in registers or on the stack.
1526 if (ArgFlags
.isSplitEnd() && PendingLocs
.size() <= 2) {
1527 assert(PendingLocs
.size() == 2 && "Unexpected PendingLocs.size()");
1528 // Apply the normal calling convention rules to the first half of the
1530 CCValAssign VA
= PendingLocs
[0];
1531 ISD::ArgFlagsTy AF
= PendingArgFlags
[0];
1532 PendingLocs
.clear();
1533 PendingArgFlags
.clear();
1534 return CC_RISCVAssign2XLen(XLen
, State
, VA
, AF
, ValNo
, ValVT
, LocVT
,
1538 // Allocate to a register if possible, or else a stack slot.
1540 if (ValVT
== MVT::f32
&& !UseGPRForF32
)
1541 Reg
= State
.AllocateReg(ArgFPR32s
, ArgFPR64s
);
1542 else if (ValVT
== MVT::f64
&& !UseGPRForF64
)
1543 Reg
= State
.AllocateReg(ArgFPR64s
, ArgFPR32s
);
1545 Reg
= State
.AllocateReg(ArgGPRs
);
1546 unsigned StackOffset
= Reg
? 0 : State
.AllocateStack(XLen
/ 8, XLen
/ 8);
1548 // If we reach this point and PendingLocs is non-empty, we must be at the
1549 // end of a split argument that must be passed indirectly.
1550 if (!PendingLocs
.empty()) {
1551 assert(ArgFlags
.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1552 assert(PendingLocs
.size() > 2 && "Unexpected PendingLocs.size()");
1554 for (auto &It
: PendingLocs
) {
1556 It
.convertToReg(Reg
);
1558 It
.convertToMem(StackOffset
);
1561 PendingLocs
.clear();
1562 PendingArgFlags
.clear();
1566 assert((!UseGPRForF32
|| !UseGPRForF64
|| LocVT
== XLenVT
) &&
1567 "Expected an XLenVT at this stage");
1570 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
1574 // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1575 if (ValVT
== MVT::f32
|| ValVT
== MVT::f64
) {
1577 LocInfo
= CCValAssign::Full
;
1579 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
1583 void RISCVTargetLowering::analyzeInputArgs(
1584 MachineFunction
&MF
, CCState
&CCInfo
,
1585 const SmallVectorImpl
<ISD::InputArg
> &Ins
, bool IsRet
) const {
1586 unsigned NumArgs
= Ins
.size();
1587 FunctionType
*FType
= MF
.getFunction().getFunctionType();
1589 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
1590 MVT ArgVT
= Ins
[i
].VT
;
1591 ISD::ArgFlagsTy ArgFlags
= Ins
[i
].Flags
;
1593 Type
*ArgTy
= nullptr;
1595 ArgTy
= FType
->getReturnType();
1596 else if (Ins
[i
].isOrigArg())
1597 ArgTy
= FType
->getParamType(Ins
[i
].getOrigArgIndex());
1599 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
1600 if (CC_RISCV(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
1601 ArgFlags
, CCInfo
, /*IsRet=*/true, IsRet
, ArgTy
)) {
1602 LLVM_DEBUG(dbgs() << "InputArg #" << i
<< " has unhandled type "
1603 << EVT(ArgVT
).getEVTString() << '\n');
1604 llvm_unreachable(nullptr);
1609 void RISCVTargetLowering::analyzeOutputArgs(
1610 MachineFunction
&MF
, CCState
&CCInfo
,
1611 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, bool IsRet
,
1612 CallLoweringInfo
*CLI
) const {
1613 unsigned NumArgs
= Outs
.size();
1615 for (unsigned i
= 0; i
!= NumArgs
; i
++) {
1616 MVT ArgVT
= Outs
[i
].VT
;
1617 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
1618 Type
*OrigTy
= CLI
? CLI
->getArgs()[Outs
[i
].OrigArgIndex
].Ty
: nullptr;
1620 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
1621 if (CC_RISCV(MF
.getDataLayout(), ABI
, i
, ArgVT
, ArgVT
, CCValAssign::Full
,
1622 ArgFlags
, CCInfo
, Outs
[i
].IsFixed
, IsRet
, OrigTy
)) {
1623 LLVM_DEBUG(dbgs() << "OutputArg #" << i
<< " has unhandled type "
1624 << EVT(ArgVT
).getEVTString() << "\n");
1625 llvm_unreachable(nullptr);
1630 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1632 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, SDValue Val
,
1633 const CCValAssign
&VA
, const SDLoc
&DL
) {
1634 switch (VA
.getLocInfo()) {
1636 llvm_unreachable("Unexpected CCValAssign::LocInfo");
1637 case CCValAssign::Full
:
1639 case CCValAssign::BCvt
:
1640 if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
) {
1641 Val
= DAG
.getNode(RISCVISD::FMV_W_X_RV64
, DL
, MVT::f32
, Val
);
1644 Val
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Val
);
1650 // The caller is responsible for loading the full value if the argument is
1651 // passed with CCValAssign::Indirect.
1652 static SDValue
unpackFromRegLoc(SelectionDAG
&DAG
, SDValue Chain
,
1653 const CCValAssign
&VA
, const SDLoc
&DL
) {
1654 MachineFunction
&MF
= DAG
.getMachineFunction();
1655 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1656 EVT LocVT
= VA
.getLocVT();
1658 const TargetRegisterClass
*RC
;
1660 switch (LocVT
.getSimpleVT().SimpleTy
) {
1662 llvm_unreachable("Unexpected register type");
1665 RC
= &RISCV::GPRRegClass
;
1668 RC
= &RISCV::FPR32RegClass
;
1671 RC
= &RISCV::FPR64RegClass
;
1675 unsigned VReg
= RegInfo
.createVirtualRegister(RC
);
1676 RegInfo
.addLiveIn(VA
.getLocReg(), VReg
);
1677 Val
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
1679 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1682 return convertLocVTToValVT(DAG
, Val
, VA
, DL
);
1685 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, SDValue Val
,
1686 const CCValAssign
&VA
, const SDLoc
&DL
) {
1687 EVT LocVT
= VA
.getLocVT();
1689 switch (VA
.getLocInfo()) {
1691 llvm_unreachable("Unexpected CCValAssign::LocInfo");
1692 case CCValAssign::Full
:
1694 case CCValAssign::BCvt
:
1695 if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
) {
1696 Val
= DAG
.getNode(RISCVISD::FMV_X_ANYEXTW_RV64
, DL
, MVT::i64
, Val
);
1699 Val
= DAG
.getNode(ISD::BITCAST
, DL
, LocVT
, Val
);
1705 // The caller is responsible for loading the full value if the argument is
1706 // passed with CCValAssign::Indirect.
1707 static SDValue
unpackFromMemLoc(SelectionDAG
&DAG
, SDValue Chain
,
1708 const CCValAssign
&VA
, const SDLoc
&DL
) {
1709 MachineFunction
&MF
= DAG
.getMachineFunction();
1710 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1711 EVT LocVT
= VA
.getLocVT();
1712 EVT ValVT
= VA
.getValVT();
1713 EVT PtrVT
= MVT::getIntegerVT(DAG
.getDataLayout().getPointerSizeInBits(0));
1714 int FI
= MFI
.CreateFixedObject(ValVT
.getSizeInBits() / 8,
1715 VA
.getLocMemOffset(), /*Immutable=*/true);
1716 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1719 ISD::LoadExtType ExtType
;
1720 switch (VA
.getLocInfo()) {
1722 llvm_unreachable("Unexpected CCValAssign::LocInfo");
1723 case CCValAssign::Full
:
1724 case CCValAssign::Indirect
:
1725 case CCValAssign::BCvt
:
1726 ExtType
= ISD::NON_EXTLOAD
;
1729 Val
= DAG
.getExtLoad(
1730 ExtType
, DL
, LocVT
, Chain
, FIN
,
1731 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
), ValVT
);
1735 static SDValue
unpackF64OnRV32DSoftABI(SelectionDAG
&DAG
, SDValue Chain
,
1736 const CCValAssign
&VA
, const SDLoc
&DL
) {
1737 assert(VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
&&
1739 MachineFunction
&MF
= DAG
.getMachineFunction();
1740 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1741 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1743 if (VA
.isMemLoc()) {
1744 // f64 is passed on the stack.
1745 int FI
= MFI
.CreateFixedObject(8, VA
.getLocMemOffset(), /*Immutable=*/true);
1746 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
1747 return DAG
.getLoad(MVT::f64
, DL
, Chain
, FIN
,
1748 MachinePointerInfo::getFixedStack(MF
, FI
));
1751 assert(VA
.isRegLoc() && "Expected register VA assignment");
1753 unsigned LoVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
1754 RegInfo
.addLiveIn(VA
.getLocReg(), LoVReg
);
1755 SDValue Lo
= DAG
.getCopyFromReg(Chain
, DL
, LoVReg
, MVT::i32
);
1757 if (VA
.getLocReg() == RISCV::X17
) {
1758 // Second half of f64 is passed on the stack.
1759 int FI
= MFI
.CreateFixedObject(4, 0, /*Immutable=*/true);
1760 SDValue FIN
= DAG
.getFrameIndex(FI
, MVT::i32
);
1761 Hi
= DAG
.getLoad(MVT::i32
, DL
, Chain
, FIN
,
1762 MachinePointerInfo::getFixedStack(MF
, FI
));
1764 // Second half of f64 is passed in another GPR.
1765 unsigned HiVReg
= RegInfo
.createVirtualRegister(&RISCV::GPRRegClass
);
1766 RegInfo
.addLiveIn(VA
.getLocReg() + 1, HiVReg
);
1767 Hi
= DAG
.getCopyFromReg(Chain
, DL
, HiVReg
, MVT::i32
);
1769 return DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
1772 // Transform physical registers into virtual registers.
1773 SDValue
RISCVTargetLowering::LowerFormalArguments(
1774 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
1775 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1776 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1780 report_fatal_error("Unsupported calling convention");
1781 case CallingConv::C
:
1782 case CallingConv::Fast
:
1786 MachineFunction
&MF
= DAG
.getMachineFunction();
1788 const Function
&Func
= MF
.getFunction();
1789 if (Func
.hasFnAttribute("interrupt")) {
1790 if (!Func
.arg_empty())
1792 "Functions with the interrupt attribute cannot have arguments!");
1795 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
1797 if (!(Kind
== "user" || Kind
== "supervisor" || Kind
== "machine"))
1799 "Function interrupt attribute argument not supported!");
1802 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1803 MVT XLenVT
= Subtarget
.getXLenVT();
1804 unsigned XLenInBytes
= Subtarget
.getXLen() / 8;
1805 // Used with vargs to acumulate store chains.
1806 std::vector
<SDValue
> OutChains
;
1808 // Assign locations to all of the incoming arguments.
1809 SmallVector
<CCValAssign
, 16> ArgLocs
;
1810 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1811 analyzeInputArgs(MF
, CCInfo
, Ins
, /*IsRet=*/false);
1813 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1814 CCValAssign
&VA
= ArgLocs
[i
];
1816 // Passing f64 on RV32D with a soft float ABI must be handled as a special
1818 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
)
1819 ArgValue
= unpackF64OnRV32DSoftABI(DAG
, Chain
, VA
, DL
);
1820 else if (VA
.isRegLoc())
1821 ArgValue
= unpackFromRegLoc(DAG
, Chain
, VA
, DL
);
1823 ArgValue
= unpackFromMemLoc(DAG
, Chain
, VA
, DL
);
1825 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1826 // If the original argument was split and passed by reference (e.g. i128
1827 // on RV32), we need to load all parts of it here (using the same
1829 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
1830 MachinePointerInfo()));
1831 unsigned ArgIndex
= Ins
[i
].OrigArgIndex
;
1832 assert(Ins
[i
].PartOffset
== 0);
1833 while (i
+ 1 != e
&& Ins
[i
+ 1].OrigArgIndex
== ArgIndex
) {
1834 CCValAssign
&PartVA
= ArgLocs
[i
+ 1];
1835 unsigned PartOffset
= Ins
[i
+ 1].PartOffset
;
1836 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
,
1837 DAG
.getIntPtrConstant(PartOffset
, DL
));
1838 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
1839 MachinePointerInfo()));
1844 InVals
.push_back(ArgValue
);
1848 ArrayRef
<MCPhysReg
> ArgRegs
= makeArrayRef(ArgGPRs
);
1849 unsigned Idx
= CCInfo
.getFirstUnallocated(ArgRegs
);
1850 const TargetRegisterClass
*RC
= &RISCV::GPRRegClass
;
1851 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1852 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1853 RISCVMachineFunctionInfo
*RVFI
= MF
.getInfo
<RISCVMachineFunctionInfo
>();
1855 // Offset of the first variable argument from stack pointer, and size of
1856 // the vararg save area. For now, the varargs save area is either zero or
1857 // large enough to hold a0-a7.
1858 int VaArgOffset
, VarArgsSaveSize
;
1860 // If all registers are allocated, then all varargs must be passed on the
1861 // stack and we don't need to save any argregs.
1862 if (ArgRegs
.size() == Idx
) {
1863 VaArgOffset
= CCInfo
.getNextStackOffset();
1864 VarArgsSaveSize
= 0;
1866 VarArgsSaveSize
= XLenInBytes
* (ArgRegs
.size() - Idx
);
1867 VaArgOffset
= -VarArgsSaveSize
;
1870 // Record the frame index of the first variable argument
1871 // which is a value necessary to VASTART.
1872 int FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
, true);
1873 RVFI
->setVarArgsFrameIndex(FI
);
1875 // If saving an odd number of registers then create an extra stack slot to
1876 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1877 // offsets to even-numbered registered remain 2*XLEN-aligned.
1879 FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
- (int)XLenInBytes
,
1881 VarArgsSaveSize
+= XLenInBytes
;
1884 // Copy the integer registers that may have been used for passing varargs
1885 // to the vararg save area.
1886 for (unsigned I
= Idx
; I
< ArgRegs
.size();
1887 ++I
, VaArgOffset
+= XLenInBytes
) {
1888 const unsigned Reg
= RegInfo
.createVirtualRegister(RC
);
1889 RegInfo
.addLiveIn(ArgRegs
[I
], Reg
);
1890 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, XLenVT
);
1891 FI
= MFI
.CreateFixedObject(XLenInBytes
, VaArgOffset
, true);
1892 SDValue PtrOff
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
1893 SDValue Store
= DAG
.getStore(Chain
, DL
, ArgValue
, PtrOff
,
1894 MachinePointerInfo::getFixedStack(MF
, FI
));
1895 cast
<StoreSDNode
>(Store
.getNode())
1897 ->setValue((Value
*)nullptr);
1898 OutChains
.push_back(Store
);
1900 RVFI
->setVarArgsSaveSize(VarArgsSaveSize
);
1903 // All stores are grouped in one node to allow the matching between
1904 // the size of Ins and InVals. This only happens for vararg functions.
1905 if (!OutChains
.empty()) {
1906 OutChains
.push_back(Chain
);
1907 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
1913 /// isEligibleForTailCallOptimization - Check whether the call is eligible
1914 /// for tail call optimization.
1915 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1916 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
1917 CCState
&CCInfo
, CallLoweringInfo
&CLI
, MachineFunction
&MF
,
1918 const SmallVector
<CCValAssign
, 16> &ArgLocs
) const {
1920 auto &Callee
= CLI
.Callee
;
1921 auto CalleeCC
= CLI
.CallConv
;
1922 auto IsVarArg
= CLI
.IsVarArg
;
1923 auto &Outs
= CLI
.Outs
;
1924 auto &Caller
= MF
.getFunction();
1925 auto CallerCC
= Caller
.getCallingConv();
1927 // Do not tail call opt functions with "disable-tail-calls" attribute.
1928 if (Caller
.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1931 // Exception-handling functions need a special set of instructions to
1932 // indicate a return to the hardware. Tail-calling another function would
1933 // probably break this.
1934 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1935 // should be expanded as new function attributes are introduced.
1936 if (Caller
.hasFnAttribute("interrupt"))
1939 // Do not tail call opt functions with varargs.
1943 // Do not tail call opt if the stack is used to pass parameters.
1944 if (CCInfo
.getNextStackOffset() != 0)
1947 // Do not tail call opt if any parameters need to be passed indirectly.
1948 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1949 // passed indirectly. So the address of the value will be passed in a
1950 // register, or if not available, then the address is put on the stack. In
1951 // order to pass indirectly, space on the stack often needs to be allocated
1952 // in order to store the value. In this case the CCInfo.getNextStackOffset()
1953 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1954 // are passed CCValAssign::Indirect.
1955 for (auto &VA
: ArgLocs
)
1956 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1959 // Do not tail call opt if either caller or callee uses struct return
1961 auto IsCallerStructRet
= Caller
.hasStructRetAttr();
1962 auto IsCalleeStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
1963 if (IsCallerStructRet
|| IsCalleeStructRet
)
1966 // Externally-defined functions with weak linkage should not be
1967 // tail-called. The behaviour of branch instructions in this situation (as
1968 // used for tail calls) is implementation-defined, so we cannot rely on the
1969 // linker replacing the tail call with a return.
1970 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1971 const GlobalValue
*GV
= G
->getGlobal();
1972 if (GV
->hasExternalWeakLinkage())
1976 // The callee has to preserve all registers the caller needs to preserve.
1977 const RISCVRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
1978 const uint32_t *CallerPreserved
= TRI
->getCallPreservedMask(MF
, CallerCC
);
1979 if (CalleeCC
!= CallerCC
) {
1980 const uint32_t *CalleePreserved
= TRI
->getCallPreservedMask(MF
, CalleeCC
);
1981 if (!TRI
->regmaskSubsetEqual(CallerPreserved
, CalleePreserved
))
1985 // Byval parameters hand the function a pointer directly into the stack area
1986 // we want to reuse during a tail call. Working around this *is* possible
1987 // but less efficient and uglier in LowerCall.
1988 for (auto &Arg
: Outs
)
1989 if (Arg
.Flags
.isByVal())
1995 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
1996 // and output parameter nodes.
1997 SDValue
RISCVTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
1998 SmallVectorImpl
<SDValue
> &InVals
) const {
1999 SelectionDAG
&DAG
= CLI
.DAG
;
2001 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
2002 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
2003 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
2004 SDValue Chain
= CLI
.Chain
;
2005 SDValue Callee
= CLI
.Callee
;
2006 bool &IsTailCall
= CLI
.IsTailCall
;
2007 CallingConv::ID CallConv
= CLI
.CallConv
;
2008 bool IsVarArg
= CLI
.IsVarArg
;
2009 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2010 MVT XLenVT
= Subtarget
.getXLenVT();
2012 MachineFunction
&MF
= DAG
.getMachineFunction();
2014 // Analyze the operands of the call, assigning locations to each operand.
2015 SmallVector
<CCValAssign
, 16> ArgLocs
;
2016 CCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
2017 analyzeOutputArgs(MF
, ArgCCInfo
, Outs
, /*IsRet=*/false, &CLI
);
2019 // Check if it's really possible to do a tail call.
2021 IsTailCall
= isEligibleForTailCallOptimization(ArgCCInfo
, CLI
, MF
, ArgLocs
);
2025 else if (CLI
.CS
&& CLI
.CS
.isMustTailCall())
2026 report_fatal_error("failed to perform tail call elimination on a call "
2027 "site marked musttail");
2029 // Get a count of how many bytes are to be pushed on the stack.
2030 unsigned NumBytes
= ArgCCInfo
.getNextStackOffset();
2032 // Create local copies for byval args
2033 SmallVector
<SDValue
, 8> ByValArgs
;
2034 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
2035 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
2036 if (!Flags
.isByVal())
2039 SDValue Arg
= OutVals
[i
];
2040 unsigned Size
= Flags
.getByValSize();
2041 unsigned Align
= Flags
.getByValAlign();
2043 int FI
= MF
.getFrameInfo().CreateStackObject(Size
, Align
, /*isSS=*/false);
2044 SDValue FIPtr
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
2045 SDValue SizeNode
= DAG
.getConstant(Size
, DL
, XLenVT
);
2047 Chain
= DAG
.getMemcpy(Chain
, DL
, FIPtr
, Arg
, SizeNode
, Align
,
2048 /*IsVolatile=*/false,
2049 /*AlwaysInline=*/false,
2050 IsTailCall
, MachinePointerInfo(),
2051 MachinePointerInfo());
2052 ByValArgs
.push_back(FIPtr
);
2056 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, CLI
.DL
);
2058 // Copy argument values to their designated locations.
2059 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
2060 SmallVector
<SDValue
, 8> MemOpChains
;
2062 for (unsigned i
= 0, j
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
2063 CCValAssign
&VA
= ArgLocs
[i
];
2064 SDValue ArgValue
= OutVals
[i
];
2065 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
2067 // Handle passing f64 on RV32D with a soft float ABI as a special case.
2068 bool IsF64OnRV32DSoftABI
=
2069 VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
;
2070 if (IsF64OnRV32DSoftABI
&& VA
.isRegLoc()) {
2071 SDValue SplitF64
= DAG
.getNode(
2072 RISCVISD::SplitF64
, DL
, DAG
.getVTList(MVT::i32
, MVT::i32
), ArgValue
);
2073 SDValue Lo
= SplitF64
.getValue(0);
2074 SDValue Hi
= SplitF64
.getValue(1);
2076 unsigned RegLo
= VA
.getLocReg();
2077 RegsToPass
.push_back(std::make_pair(RegLo
, Lo
));
2079 if (RegLo
== RISCV::X17
) {
2080 // Second half of f64 is passed on the stack.
2081 // Work out the address of the stack slot.
2082 if (!StackPtr
.getNode())
2083 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
2085 MemOpChains
.push_back(
2086 DAG
.getStore(Chain
, DL
, Hi
, StackPtr
, MachinePointerInfo()));
2088 // Second half of f64 is passed in another GPR.
2089 unsigned RegHigh
= RegLo
+ 1;
2090 RegsToPass
.push_back(std::make_pair(RegHigh
, Hi
));
2095 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
2096 // as any other MemLoc.
2098 // Promote the value if needed.
2099 // For now, only handle fully promoted and indirect arguments.
2100 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
2101 // Store the argument in a stack slot and pass its address.
2102 SDValue SpillSlot
= DAG
.CreateStackTemporary(Outs
[i
].ArgVT
);
2103 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
2104 MemOpChains
.push_back(
2105 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
2106 MachinePointerInfo::getFixedStack(MF
, FI
)));
2107 // If the original argument was split (e.g. i128), we need
2108 // to store all parts of it here (and pass just one address).
2109 unsigned ArgIndex
= Outs
[i
].OrigArgIndex
;
2110 assert(Outs
[i
].PartOffset
== 0);
2111 while (i
+ 1 != e
&& Outs
[i
+ 1].OrigArgIndex
== ArgIndex
) {
2112 SDValue PartValue
= OutVals
[i
+ 1];
2113 unsigned PartOffset
= Outs
[i
+ 1].PartOffset
;
2114 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
,
2115 DAG
.getIntPtrConstant(PartOffset
, DL
));
2116 MemOpChains
.push_back(
2117 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
2118 MachinePointerInfo::getFixedStack(MF
, FI
)));
2121 ArgValue
= SpillSlot
;
2123 ArgValue
= convertValVTToLocVT(DAG
, ArgValue
, VA
, DL
);
2126 // Use local copy if it is a byval arg.
2127 if (Flags
.isByVal())
2128 ArgValue
= ByValArgs
[j
++];
2130 if (VA
.isRegLoc()) {
2131 // Queue up the argument copies and emit them at the end.
2132 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
2134 assert(VA
.isMemLoc() && "Argument not register or memory");
2135 assert(!IsTailCall
&& "Tail call not allowed if stack is used "
2136 "for passing parameters");
2138 // Work out the address of the stack slot.
2139 if (!StackPtr
.getNode())
2140 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, RISCV::X2
, PtrVT
);
2142 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
2143 DAG
.getIntPtrConstant(VA
.getLocMemOffset(), DL
));
2146 MemOpChains
.push_back(
2147 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
2151 // Join the stores, which are independent of one another.
2152 if (!MemOpChains
.empty())
2153 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
2157 // Build a sequence of copy-to-reg nodes, chained and glued together.
2158 for (auto &Reg
: RegsToPass
) {
2159 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
.first
, Reg
.second
, Glue
);
2160 Glue
= Chain
.getValue(1);
2163 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
2164 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
2165 // split it and then direct call can be matched by PseudoCALL.
2166 if (GlobalAddressSDNode
*S
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
2167 const GlobalValue
*GV
= S
->getGlobal();
2169 unsigned OpFlags
= RISCVII::MO_CALL
;
2170 if (!getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
))
2171 OpFlags
= RISCVII::MO_PLT
;
2173 Callee
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, OpFlags
);
2174 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
2175 unsigned OpFlags
= RISCVII::MO_CALL
;
2177 if (!getTargetMachine().shouldAssumeDSOLocal(*MF
.getFunction().getParent(),
2179 OpFlags
= RISCVII::MO_PLT
;
2181 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, OpFlags
);
2184 // The first call operand is the chain and the second is the target address.
2185 SmallVector
<SDValue
, 8> Ops
;
2186 Ops
.push_back(Chain
);
2187 Ops
.push_back(Callee
);
2189 // Add argument registers to the end of the list so that they are
2190 // known live into the call.
2191 for (auto &Reg
: RegsToPass
)
2192 Ops
.push_back(DAG
.getRegister(Reg
.first
, Reg
.second
.getValueType()));
2195 // Add a register mask operand representing the call-preserved registers.
2196 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
2197 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
2198 assert(Mask
&& "Missing call preserved mask for calling convention");
2199 Ops
.push_back(DAG
.getRegisterMask(Mask
));
2202 // Glue the call to the argument copies, if any.
2204 Ops
.push_back(Glue
);
2207 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
2210 MF
.getFrameInfo().setHasTailCall();
2211 return DAG
.getNode(RISCVISD::TAIL
, DL
, NodeTys
, Ops
);
2214 Chain
= DAG
.getNode(RISCVISD::CALL
, DL
, NodeTys
, Ops
);
2215 Glue
= Chain
.getValue(1);
2217 // Mark the end of the call, which is glued to the call itself.
2218 Chain
= DAG
.getCALLSEQ_END(Chain
,
2219 DAG
.getConstant(NumBytes
, DL
, PtrVT
, true),
2220 DAG
.getConstant(0, DL
, PtrVT
, true),
2222 Glue
= Chain
.getValue(1);
2224 // Assign locations to each value returned by this call.
2225 SmallVector
<CCValAssign
, 16> RVLocs
;
2226 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, *DAG
.getContext());
2227 analyzeInputArgs(MF
, RetCCInfo
, Ins
, /*IsRet=*/true);
2229 // Copy all of the result registers out of their specified physreg.
2230 for (auto &VA
: RVLocs
) {
2231 // Copy the value out
2233 DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(), VA
.getLocVT(), Glue
);
2234 // Glue the RetValue to the end of the call sequence
2235 Chain
= RetValue
.getValue(1);
2236 Glue
= RetValue
.getValue(2);
2238 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
2239 assert(VA
.getLocReg() == ArgGPRs
[0] && "Unexpected reg assignment");
2241 DAG
.getCopyFromReg(Chain
, DL
, ArgGPRs
[1], MVT::i32
, Glue
);
2242 Chain
= RetValue2
.getValue(1);
2243 Glue
= RetValue2
.getValue(2);
2244 RetValue
= DAG
.getNode(RISCVISD::BuildPairF64
, DL
, MVT::f64
, RetValue
,
2248 RetValue
= convertLocVTToValVT(DAG
, RetValue
, VA
, DL
);
2250 InVals
.push_back(RetValue
);
2256 bool RISCVTargetLowering::CanLowerReturn(
2257 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
2258 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, LLVMContext
&Context
) const {
2259 SmallVector
<CCValAssign
, 16> RVLocs
;
2260 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
2261 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
2262 MVT VT
= Outs
[i
].VT
;
2263 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
2264 RISCVABI::ABI ABI
= MF
.getSubtarget
<RISCVSubtarget
>().getTargetABI();
2265 if (CC_RISCV(MF
.getDataLayout(), ABI
, i
, VT
, VT
, CCValAssign::Full
,
2266 ArgFlags
, CCInfo
, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
2273 RISCVTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
2275 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
2276 const SmallVectorImpl
<SDValue
> &OutVals
,
2277 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
2278 // Stores the assignment of the return value to a location.
2279 SmallVector
<CCValAssign
, 16> RVLocs
;
2281 // Info about the registers and stack slot.
2282 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
2285 analyzeOutputArgs(DAG
.getMachineFunction(), CCInfo
, Outs
, /*IsRet=*/true,
2289 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
2291 // Copy the result values into the output registers.
2292 for (unsigned i
= 0, e
= RVLocs
.size(); i
< e
; ++i
) {
2293 SDValue Val
= OutVals
[i
];
2294 CCValAssign
&VA
= RVLocs
[i
];
2295 assert(VA
.isRegLoc() && "Can only return in registers!");
2297 if (VA
.getLocVT() == MVT::i32
&& VA
.getValVT() == MVT::f64
) {
2298 // Handle returning f64 on RV32D with a soft float ABI.
2299 assert(VA
.isRegLoc() && "Expected return via registers");
2300 SDValue SplitF64
= DAG
.getNode(RISCVISD::SplitF64
, DL
,
2301 DAG
.getVTList(MVT::i32
, MVT::i32
), Val
);
2302 SDValue Lo
= SplitF64
.getValue(0);
2303 SDValue Hi
= SplitF64
.getValue(1);
2304 unsigned RegLo
= VA
.getLocReg();
2305 unsigned RegHi
= RegLo
+ 1;
2306 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegLo
, Lo
, Glue
);
2307 Glue
= Chain
.getValue(1);
2308 RetOps
.push_back(DAG
.getRegister(RegLo
, MVT::i32
));
2309 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegHi
, Hi
, Glue
);
2310 Glue
= Chain
.getValue(1);
2311 RetOps
.push_back(DAG
.getRegister(RegHi
, MVT::i32
));
2313 // Handle a 'normal' return.
2314 Val
= convertValVTToLocVT(DAG
, Val
, VA
, DL
);
2315 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), Val
, Glue
);
2317 // Guarantee that all emitted copies are stuck together.
2318 Glue
= Chain
.getValue(1);
2319 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
2323 RetOps
[0] = Chain
; // Update chain.
2325 // Add the glue node if we have it.
2326 if (Glue
.getNode()) {
2327 RetOps
.push_back(Glue
);
2330 // Interrupt service routines use different return instructions.
2331 const Function
&Func
= DAG
.getMachineFunction().getFunction();
2332 if (Func
.hasFnAttribute("interrupt")) {
2333 if (!Func
.getReturnType()->isVoidTy())
2335 "Functions with the interrupt attribute must have void return type!");
2337 MachineFunction
&MF
= DAG
.getMachineFunction();
2339 MF
.getFunction().getFnAttribute("interrupt").getValueAsString();
2343 RetOpc
= RISCVISD::URET_FLAG
;
2344 else if (Kind
== "supervisor")
2345 RetOpc
= RISCVISD::SRET_FLAG
;
2347 RetOpc
= RISCVISD::MRET_FLAG
;
2349 return DAG
.getNode(RetOpc
, DL
, MVT::Other
, RetOps
);
2352 return DAG
.getNode(RISCVISD::RET_FLAG
, DL
, MVT::Other
, RetOps
);
2355 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode
) const {
2356 switch ((RISCVISD::NodeType
)Opcode
) {
2357 case RISCVISD::FIRST_NUMBER
:
2359 case RISCVISD::RET_FLAG
:
2360 return "RISCVISD::RET_FLAG";
2361 case RISCVISD::URET_FLAG
:
2362 return "RISCVISD::URET_FLAG";
2363 case RISCVISD::SRET_FLAG
:
2364 return "RISCVISD::SRET_FLAG";
2365 case RISCVISD::MRET_FLAG
:
2366 return "RISCVISD::MRET_FLAG";
2367 case RISCVISD::CALL
:
2368 return "RISCVISD::CALL";
2369 case RISCVISD::SELECT_CC
:
2370 return "RISCVISD::SELECT_CC";
2371 case RISCVISD::BuildPairF64
:
2372 return "RISCVISD::BuildPairF64";
2373 case RISCVISD::SplitF64
:
2374 return "RISCVISD::SplitF64";
2375 case RISCVISD::TAIL
:
2376 return "RISCVISD::TAIL";
2377 case RISCVISD::SLLW
:
2378 return "RISCVISD::SLLW";
2379 case RISCVISD::SRAW
:
2380 return "RISCVISD::SRAW";
2381 case RISCVISD::SRLW
:
2382 return "RISCVISD::SRLW";
2383 case RISCVISD::DIVW
:
2384 return "RISCVISD::DIVW";
2385 case RISCVISD::DIVUW
:
2386 return "RISCVISD::DIVUW";
2387 case RISCVISD::REMUW
:
2388 return "RISCVISD::REMUW";
2389 case RISCVISD::FMV_W_X_RV64
:
2390 return "RISCVISD::FMV_W_X_RV64";
2391 case RISCVISD::FMV_X_ANYEXTW_RV64
:
2392 return "RISCVISD::FMV_X_ANYEXTW_RV64";
2393 case RISCVISD::READ_CYCLE_WIDE
:
2394 return "RISCVISD::READ_CYCLE_WIDE";
2399 std::pair
<unsigned, const TargetRegisterClass
*>
2400 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
2401 StringRef Constraint
,
2403 // First, see if this is a constraint that directly corresponds to a
2404 // RISCV register class.
2405 if (Constraint
.size() == 1) {
2406 switch (Constraint
[0]) {
2408 return std::make_pair(0U, &RISCV::GPRRegClass
);
2414 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
2417 void RISCVTargetLowering::LowerAsmOperandForConstraint(
2418 SDValue Op
, std::string
&Constraint
, std::vector
<SDValue
> &Ops
,
2419 SelectionDAG
&DAG
) const {
2420 // Currently only support length 1 constraints.
2421 if (Constraint
.length() == 1) {
2422 switch (Constraint
[0]) {
2424 // Validate & create a 12-bit signed immediate operand.
2425 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
2426 uint64_t CVal
= C
->getSExtValue();
2427 if (isInt
<12>(CVal
))
2429 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
2433 // Validate & create an integer zero operand.
2434 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
2435 if (C
->getZExtValue() == 0)
2437 DAG
.getTargetConstant(0, SDLoc(Op
), Subtarget
.getXLenVT()));
2440 // Validate & create a 5-bit unsigned immediate operand.
2441 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
2442 uint64_t CVal
= C
->getZExtValue();
2443 if (isUInt
<5>(CVal
))
2445 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getXLenVT()));
2452 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
2455 Instruction
*RISCVTargetLowering::emitLeadingFence(IRBuilder
<> &Builder
,
2457 AtomicOrdering Ord
) const {
2458 if (isa
<LoadInst
>(Inst
) && Ord
== AtomicOrdering::SequentiallyConsistent
)
2459 return Builder
.CreateFence(Ord
);
2460 if (isa
<StoreInst
>(Inst
) && isReleaseOrStronger(Ord
))
2461 return Builder
.CreateFence(AtomicOrdering::Release
);
2465 Instruction
*RISCVTargetLowering::emitTrailingFence(IRBuilder
<> &Builder
,
2467 AtomicOrdering Ord
) const {
2468 if (isa
<LoadInst
>(Inst
) && isAcquireOrStronger(Ord
))
2469 return Builder
.CreateFence(AtomicOrdering::Acquire
);
2473 TargetLowering::AtomicExpansionKind
2474 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
2475 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
2476 // point operations can't be used in an lr/sc sequence without breaking the
2477 // forward-progress guarantee.
2478 if (AI
->isFloatingPointOperation())
2479 return AtomicExpansionKind::CmpXChg
;
2481 unsigned Size
= AI
->getType()->getPrimitiveSizeInBits();
2482 if (Size
== 8 || Size
== 16)
2483 return AtomicExpansionKind::MaskedIntrinsic
;
2484 return AtomicExpansionKind::None
;
2487 static Intrinsic::ID
2488 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen
, AtomicRMWInst::BinOp BinOp
) {
2492 llvm_unreachable("Unexpected AtomicRMW BinOp");
2493 case AtomicRMWInst::Xchg
:
2494 return Intrinsic::riscv_masked_atomicrmw_xchg_i32
;
2495 case AtomicRMWInst::Add
:
2496 return Intrinsic::riscv_masked_atomicrmw_add_i32
;
2497 case AtomicRMWInst::Sub
:
2498 return Intrinsic::riscv_masked_atomicrmw_sub_i32
;
2499 case AtomicRMWInst::Nand
:
2500 return Intrinsic::riscv_masked_atomicrmw_nand_i32
;
2501 case AtomicRMWInst::Max
:
2502 return Intrinsic::riscv_masked_atomicrmw_max_i32
;
2503 case AtomicRMWInst::Min
:
2504 return Intrinsic::riscv_masked_atomicrmw_min_i32
;
2505 case AtomicRMWInst::UMax
:
2506 return Intrinsic::riscv_masked_atomicrmw_umax_i32
;
2507 case AtomicRMWInst::UMin
:
2508 return Intrinsic::riscv_masked_atomicrmw_umin_i32
;
2515 llvm_unreachable("Unexpected AtomicRMW BinOp");
2516 case AtomicRMWInst::Xchg
:
2517 return Intrinsic::riscv_masked_atomicrmw_xchg_i64
;
2518 case AtomicRMWInst::Add
:
2519 return Intrinsic::riscv_masked_atomicrmw_add_i64
;
2520 case AtomicRMWInst::Sub
:
2521 return Intrinsic::riscv_masked_atomicrmw_sub_i64
;
2522 case AtomicRMWInst::Nand
:
2523 return Intrinsic::riscv_masked_atomicrmw_nand_i64
;
2524 case AtomicRMWInst::Max
:
2525 return Intrinsic::riscv_masked_atomicrmw_max_i64
;
2526 case AtomicRMWInst::Min
:
2527 return Intrinsic::riscv_masked_atomicrmw_min_i64
;
2528 case AtomicRMWInst::UMax
:
2529 return Intrinsic::riscv_masked_atomicrmw_umax_i64
;
2530 case AtomicRMWInst::UMin
:
2531 return Intrinsic::riscv_masked_atomicrmw_umin_i64
;
2535 llvm_unreachable("Unexpected XLen\n");
2538 Value
*RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
2539 IRBuilder
<> &Builder
, AtomicRMWInst
*AI
, Value
*AlignedAddr
, Value
*Incr
,
2540 Value
*Mask
, Value
*ShiftAmt
, AtomicOrdering Ord
) const {
2541 unsigned XLen
= Subtarget
.getXLen();
2543 Builder
.getIntN(XLen
, static_cast<uint64_t>(AI
->getOrdering()));
2544 Type
*Tys
[] = {AlignedAddr
->getType()};
2545 Function
*LrwOpScwLoop
= Intrinsic::getDeclaration(
2547 getIntrinsicForMaskedAtomicRMWBinOp(XLen
, AI
->getOperation()), Tys
);
2550 Incr
= Builder
.CreateSExt(Incr
, Builder
.getInt64Ty());
2551 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
2552 ShiftAmt
= Builder
.CreateSExt(ShiftAmt
, Builder
.getInt64Ty());
2557 // Must pass the shift amount needed to sign extend the loaded value prior
2558 // to performing a signed comparison for min/max. ShiftAmt is the number of
2559 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
2560 // is the number of bits to left+right shift the value in order to
2562 if (AI
->getOperation() == AtomicRMWInst::Min
||
2563 AI
->getOperation() == AtomicRMWInst::Max
) {
2564 const DataLayout
&DL
= AI
->getModule()->getDataLayout();
2566 DL
.getTypeStoreSizeInBits(AI
->getValOperand()->getType());
2568 Builder
.CreateSub(Builder
.getIntN(XLen
, XLen
- ValWidth
), ShiftAmt
);
2569 Result
= Builder
.CreateCall(LrwOpScwLoop
,
2570 {AlignedAddr
, Incr
, Mask
, SextShamt
, Ordering
});
2573 Builder
.CreateCall(LrwOpScwLoop
, {AlignedAddr
, Incr
, Mask
, Ordering
});
2577 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
2581 TargetLowering::AtomicExpansionKind
2582 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
2583 AtomicCmpXchgInst
*CI
) const {
2584 unsigned Size
= CI
->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2585 if (Size
== 8 || Size
== 16)
2586 return AtomicExpansionKind::MaskedIntrinsic
;
2587 return AtomicExpansionKind::None
;
2590 Value
*RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2591 IRBuilder
<> &Builder
, AtomicCmpXchgInst
*CI
, Value
*AlignedAddr
,
2592 Value
*CmpVal
, Value
*NewVal
, Value
*Mask
, AtomicOrdering Ord
) const {
2593 unsigned XLen
= Subtarget
.getXLen();
2594 Value
*Ordering
= Builder
.getIntN(XLen
, static_cast<uint64_t>(Ord
));
2595 Intrinsic::ID CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i32
;
2597 CmpVal
= Builder
.CreateSExt(CmpVal
, Builder
.getInt64Ty());
2598 NewVal
= Builder
.CreateSExt(NewVal
, Builder
.getInt64Ty());
2599 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
2600 CmpXchgIntrID
= Intrinsic::riscv_masked_cmpxchg_i64
;
2602 Type
*Tys
[] = {AlignedAddr
->getType()};
2603 Function
*MaskedCmpXchg
=
2604 Intrinsic::getDeclaration(CI
->getModule(), CmpXchgIntrID
, Tys
);
2605 Value
*Result
= Builder
.CreateCall(
2606 MaskedCmpXchg
, {AlignedAddr
, CmpVal
, NewVal
, Mask
, Ordering
});
2608 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
2612 unsigned RISCVTargetLowering::getExceptionPointerRegister(
2613 const Constant
*PersonalityFn
) const {
2617 unsigned RISCVTargetLowering::getExceptionSelectorRegister(
2618 const Constant
*PersonalityFn
) const {