1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file implements the lowering of LLVM calls to machine code calls for
14 //===----------------------------------------------------------------------===//
16 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUISelLowering.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering
&TLI
)
31 : CallLowering(&TLI
), AMDGPUASI(TLI
.getAMDGPUAS()) {
34 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder
&MIRBuilder
,
35 const Value
*Val
, unsigned VReg
) const {
36 // FIXME: Add support for non-void returns.
40 MIRBuilder
.buildInstr(AMDGPU::S_ENDPGM
);
44 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder
&MIRBuilder
,
46 uint64_t Offset
) const {
48 MachineFunction
&MF
= MIRBuilder
.getMF();
49 const SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
50 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
51 const Function
&F
= MF
.getFunction();
52 const DataLayout
&DL
= F
.getParent()->getDataLayout();
53 PointerType
*PtrTy
= PointerType::get(ParamTy
, AMDGPUASI
.CONSTANT_ADDRESS
);
54 LLT PtrType
= getLLTForType(*PtrTy
, DL
);
55 unsigned DstReg
= MRI
.createGenericVirtualRegister(PtrType
);
56 unsigned KernArgSegmentPtr
=
57 MFI
->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR
);
58 unsigned KernArgSegmentVReg
= MRI
.getLiveInVirtReg(KernArgSegmentPtr
);
60 unsigned OffsetReg
= MRI
.createGenericVirtualRegister(LLT::scalar(64));
61 MIRBuilder
.buildConstant(OffsetReg
, Offset
);
63 MIRBuilder
.buildGEP(DstReg
, KernArgSegmentVReg
, OffsetReg
);
68 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder
&MIRBuilder
,
69 Type
*ParamTy
, uint64_t Offset
,
71 unsigned DstReg
) const {
72 MachineFunction
&MF
= MIRBuilder
.getMF();
73 const Function
&F
= MF
.getFunction();
74 const DataLayout
&DL
= F
.getParent()->getDataLayout();
75 PointerType
*PtrTy
= PointerType::get(ParamTy
, AMDGPUASI
.CONSTANT_ADDRESS
);
76 MachinePointerInfo
PtrInfo(UndefValue::get(PtrTy
));
77 unsigned TypeSize
= DL
.getTypeStoreSize(ParamTy
);
78 unsigned PtrReg
= lowerParameterPtr(MIRBuilder
, ParamTy
, Offset
);
80 MachineMemOperand
*MMO
=
81 MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOLoad
|
82 MachineMemOperand::MONonTemporal
|
83 MachineMemOperand::MOInvariant
,
86 MIRBuilder
.buildLoad(DstReg
, PtrReg
, *MMO
);
89 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder
&MIRBuilder
,
91 ArrayRef
<unsigned> VRegs
) const {
92 // AMDGPU_GS and AMDGP_HS are not supported yet.
93 if (F
.getCallingConv() == CallingConv::AMDGPU_GS
||
94 F
.getCallingConv() == CallingConv::AMDGPU_HS
)
97 MachineFunction
&MF
= MIRBuilder
.getMF();
98 const GCNSubtarget
*Subtarget
= &MF
.getSubtarget
<GCNSubtarget
>();
99 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
100 SIMachineFunctionInfo
*Info
= MF
.getInfo
<SIMachineFunctionInfo
>();
101 const SIRegisterInfo
*TRI
= MF
.getSubtarget
<GCNSubtarget
>().getRegisterInfo();
102 const DataLayout
&DL
= F
.getParent()->getDataLayout();
104 SmallVector
<CCValAssign
, 16> ArgLocs
;
105 CCState
CCInfo(F
.getCallingConv(), F
.isVarArg(), MF
, ArgLocs
, F
.getContext());
107 // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
108 if (Info
->hasPrivateSegmentBuffer()) {
109 unsigned PrivateSegmentBufferReg
= Info
->addPrivateSegmentBuffer(*TRI
);
110 MF
.addLiveIn(PrivateSegmentBufferReg
, &AMDGPU::SReg_128RegClass
);
111 CCInfo
.AllocateReg(PrivateSegmentBufferReg
);
114 if (Info
->hasDispatchPtr()) {
115 unsigned DispatchPtrReg
= Info
->addDispatchPtr(*TRI
);
116 // FIXME: Need to add reg as live-in
117 CCInfo
.AllocateReg(DispatchPtrReg
);
120 if (Info
->hasQueuePtr()) {
121 unsigned QueuePtrReg
= Info
->addQueuePtr(*TRI
);
122 // FIXME: Need to add reg as live-in
123 CCInfo
.AllocateReg(QueuePtrReg
);
126 if (Info
->hasKernargSegmentPtr()) {
127 unsigned InputPtrReg
= Info
->addKernargSegmentPtr(*TRI
);
128 const LLT P2
= LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS
, 64);
129 unsigned VReg
= MRI
.createGenericVirtualRegister(P2
);
130 MRI
.addLiveIn(InputPtrReg
, VReg
);
131 MIRBuilder
.getMBB().addLiveIn(InputPtrReg
);
132 MIRBuilder
.buildCopy(VReg
, InputPtrReg
);
133 CCInfo
.AllocateReg(InputPtrReg
);
136 if (Info
->hasDispatchID()) {
137 unsigned DispatchIDReg
= Info
->addDispatchID(*TRI
);
138 // FIXME: Need to add reg as live-in
139 CCInfo
.AllocateReg(DispatchIDReg
);
142 if (Info
->hasFlatScratchInit()) {
143 unsigned FlatScratchInitReg
= Info
->addFlatScratchInit(*TRI
);
144 // FIXME: Need to add reg as live-in
145 CCInfo
.AllocateReg(FlatScratchInitReg
);
148 // The infrastructure for normal calling convention lowering is essentially
149 // useless for kernels. We want to avoid any kind of legalization or argument
151 if (F
.getCallingConv() == CallingConv::AMDGPU_KERNEL
) {
153 const unsigned KernArgBaseAlign
= 16;
154 const unsigned BaseOffset
= Subtarget
->getExplicitKernelArgOffset(F
);
155 uint64_t ExplicitArgOffset
= 0;
157 // TODO: Align down to dword alignment and extract bits for extending loads.
158 for (auto &Arg
: F
.args()) {
159 Type
*ArgTy
= Arg
.getType();
160 unsigned AllocSize
= DL
.getTypeAllocSize(ArgTy
);
164 unsigned ABIAlign
= DL
.getABITypeAlignment(ArgTy
);
166 uint64_t ArgOffset
= alignTo(ExplicitArgOffset
, ABIAlign
) + BaseOffset
;
167 ExplicitArgOffset
= alignTo(ExplicitArgOffset
, ABIAlign
) + AllocSize
;
169 unsigned Align
= MinAlign(KernArgBaseAlign
, ArgOffset
);
170 ArgOffset
= alignTo(ArgOffset
, DL
.getABITypeAlignment(ArgTy
));
171 lowerParameter(MIRBuilder
, ArgTy
, ArgOffset
, Align
, VRegs
[i
]);
178 unsigned NumArgs
= F
.arg_size();
179 Function::const_arg_iterator CurOrigArg
= F
.arg_begin();
180 const AMDGPUTargetLowering
&TLI
= *getTLI
<AMDGPUTargetLowering
>();
181 unsigned PSInputNum
= 0;
182 BitVector
Skipped(NumArgs
);
183 for (unsigned i
= 0; i
!= NumArgs
; ++i
, ++CurOrigArg
) {
184 EVT ValEVT
= TLI
.getValueType(DL
, CurOrigArg
->getType());
186 // We can only hanlde simple value types at the moment.
187 ISD::ArgFlagsTy Flags
;
188 ArgInfo OrigArg
{VRegs
[i
], CurOrigArg
->getType()};
189 setArgFlags(OrigArg
, i
+ 1, DL
, F
);
190 Flags
.setOrigAlign(DL
.getABITypeAlignment(CurOrigArg
->getType()));
192 if (F
.getCallingConv() == CallingConv::AMDGPU_PS
&&
193 !OrigArg
.Flags
.isInReg() && !OrigArg
.Flags
.isByVal() &&
195 if (CurOrigArg
->use_empty() && !Info
->isPSInputAllocated(PSInputNum
)) {
201 Info
->markPSInputAllocated(PSInputNum
);
202 if (!CurOrigArg
->use_empty())
203 Info
->markPSInputEnabled(PSInputNum
);
208 CCAssignFn
*AssignFn
= CCAssignFnForCall(F
.getCallingConv(),
211 if (ValEVT
.isVector()) {
212 EVT ElemVT
= ValEVT
.getVectorElementType();
213 if (!ValEVT
.isSimple())
215 MVT ValVT
= ElemVT
.getSimpleVT();
216 bool Res
= AssignFn(i
, ValVT
, ValVT
, CCValAssign::Full
,
217 OrigArg
.Flags
, CCInfo
);
221 MVT ValVT
= ValEVT
.getSimpleVT();
222 if (!ValEVT
.isSimple())
225 AssignFn(i
, ValVT
, ValVT
, CCValAssign::Full
, OrigArg
.Flags
, CCInfo
);
227 // Fail if we don't know how to handle this type.
233 Function::const_arg_iterator Arg
= F
.arg_begin();
235 if (F
.getCallingConv() == CallingConv::AMDGPU_VS
||
236 F
.getCallingConv() == CallingConv::AMDGPU_PS
) {
237 for (unsigned i
= 0, OrigArgIdx
= 0;
238 OrigArgIdx
!= NumArgs
&& i
!= ArgLocs
.size(); ++Arg
, ++OrigArgIdx
) {
239 if (Skipped
.test(OrigArgIdx
))
241 CCValAssign
&VA
= ArgLocs
[i
++];
242 MRI
.addLiveIn(VA
.getLocReg(), VRegs
[OrigArgIdx
]);
243 MIRBuilder
.getMBB().addLiveIn(VA
.getLocReg());
244 MIRBuilder
.buildCopy(VRegs
[OrigArgIdx
], VA
.getLocReg());