lib/Target/AMDGPU/AMDGPUCallLowering.cpp

   1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// This file implements the lowering of LLVM calls to machine code calls for
  12 /// GlobalISel.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AMDGPUCallLowering.h"
  17 #include "AMDGPU.h"
  18 #include "AMDGPUISelLowering.h"
  19 #include "AMDGPUSubtarget.h"
  20 #include "SIISelLowering.h"
  21 #include "SIMachineFunctionInfo.h"
  22 #include "SIRegisterInfo.h"
  23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  24 #include "llvm/CodeGen/CallingConvLower.h"
  25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27
  28 using namespace llvm;
  29
  30 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
  31   : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
  32 }
  33
  34 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
  35                                      const Value *Val, unsigned VReg) const {
  36   // FIXME: Add support for non-void returns.
  37   if (Val)
  38     return false;
  39
  40   MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
  41   return true;
  42 }
  43
  44 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
  45                                                Type *ParamTy,
  46                                                uint64_t Offset) const {
  47
  48   MachineFunction &MF = MIRBuilder.getMF();
  49   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
  50   MachineRegisterInfo &MRI = MF.getRegInfo();
  51   const Function &F = MF.getFunction();
  52   const DataLayout &DL = F.getParent()->getDataLayout();
  53   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
  54   LLT PtrType = getLLTForType(*PtrTy, DL);
  55   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
  56   unsigned KernArgSegmentPtr =
  57     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
  58   unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
  59
  60   unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
  61   MIRBuilder.buildConstant(OffsetReg, Offset);
  62
  63   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
  64
  65   return DstReg;
  66 }
  67
  68 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
  69                                         Type *ParamTy, uint64_t Offset,
  70                                         unsigned Align,
  71                                         unsigned DstReg) const {
  72   MachineFunction &MF = MIRBuilder.getMF();
  73   const Function &F = MF.getFunction();
  74   const DataLayout &DL = F.getParent()->getDataLayout();
  75   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
  76   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
  77   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
  78   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
  79
  80   MachineMemOperand *MMO =
  81       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
  82                                        MachineMemOperand::MONonTemporal |
  83                                        MachineMemOperand::MOInvariant,
  84                                        TypeSize, Align);
  85
  86   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
  87 }
  88
  89 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
  90                                               const Function &F,
  91                                               ArrayRef<unsigned> VRegs) const {
  92   // AMDGPU_GS and AMDGP_HS are not supported yet.
  93   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
  94       F.getCallingConv() == CallingConv::AMDGPU_HS)
  95     return false;
  96
  97   MachineFunction &MF = MIRBuilder.getMF();
  98   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
  99   MachineRegisterInfo &MRI = MF.getRegInfo();
 100   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
 101   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
 102   const DataLayout &DL = F.getParent()->getDataLayout();
 103
 104   SmallVector<CCValAssign, 16> ArgLocs;
 105   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
 106
 107   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
 108   if (Info->hasPrivateSegmentBuffer()) {
 109     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
 110     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
 111     CCInfo.AllocateReg(PrivateSegmentBufferReg);
 112   }
 113
 114   if (Info->hasDispatchPtr()) {
 115     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
 116     // FIXME: Need to add reg as live-in
 117     CCInfo.AllocateReg(DispatchPtrReg);
 118   }
 119
 120   if (Info->hasQueuePtr()) {
 121     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
 122     // FIXME: Need to add reg as live-in
 123     CCInfo.AllocateReg(QueuePtrReg);
 124   }
 125
 126   if (Info->hasKernargSegmentPtr()) {
 127     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
 128     const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
 129     unsigned VReg = MRI.createGenericVirtualRegister(P2);
 130     MRI.addLiveIn(InputPtrReg, VReg);
 131     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
 132     MIRBuilder.buildCopy(VReg, InputPtrReg);
 133     CCInfo.AllocateReg(InputPtrReg);
 134   }
 135
 136   if (Info->hasDispatchID()) {
 137     unsigned DispatchIDReg = Info->addDispatchID(*TRI);
 138     // FIXME: Need to add reg as live-in
 139     CCInfo.AllocateReg(DispatchIDReg);
 140   }
 141
 142   if (Info->hasFlatScratchInit()) {
 143     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
 144     // FIXME: Need to add reg as live-in
 145     CCInfo.AllocateReg(FlatScratchInitReg);
 146   }
 147
 148   // The infrastructure for normal calling convention lowering is essentially
 149   // useless for kernels. We want to avoid any kind of legalization or argument
 150   // splitting.
 151   if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
 152     unsigned i = 0;
 153     const unsigned KernArgBaseAlign = 16;
 154     const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
 155     uint64_t ExplicitArgOffset = 0;
 156
 157     // TODO: Align down to dword alignment and extract bits for extending loads.
 158     for (auto &Arg : F.args()) {
 159       Type *ArgTy = Arg.getType();
 160       unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
 161       if (AllocSize == 0)
 162         continue;
 163
 164       unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
 165
 166       uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
 167       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
 168
 169       unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
 170       ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
 171       lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
 172       ++i;
 173     }
 174
 175     return true;
 176   }
 177
 178   unsigned NumArgs = F.arg_size();
 179   Function::const_arg_iterator CurOrigArg = F.arg_begin();
 180   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
 181   unsigned PSInputNum = 0;
 182   BitVector Skipped(NumArgs);
 183   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
 184     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
 185
 186     // We can only hanlde simple value types at the moment.
 187     ISD::ArgFlagsTy Flags;
 188     ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
 189     setArgFlags(OrigArg, i + 1, DL, F);
 190     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
 191
 192     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
 193         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
 194         PSInputNum <= 15) {
 195       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
 196         Skipped.set(i);
 197         ++PSInputNum;
 198         continue;
 199       }
 200
 201       Info->markPSInputAllocated(PSInputNum);
 202       if (!CurOrigArg->use_empty())
 203         Info->markPSInputEnabled(PSInputNum);
 204
 205       ++PSInputNum;
 206     }
 207
 208     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
 209                                              /*IsVarArg=*/false);
 210
 211     if (ValEVT.isVector()) {
 212       EVT ElemVT = ValEVT.getVectorElementType();
 213       if (!ValEVT.isSimple())
 214         return false;
 215       MVT ValVT = ElemVT.getSimpleVT();
 216       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
 217                           OrigArg.Flags, CCInfo);
 218       if (!Res)
 219         return false;
 220     } else {
 221       MVT ValVT = ValEVT.getSimpleVT();
 222       if (!ValEVT.isSimple())
 223         return false;
 224       bool Res =
 225           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
 226
 227       // Fail if we don't know how to handle this type.
 228       if (Res)
 229         return false;
 230     }
 231   }
 232
 233   Function::const_arg_iterator Arg = F.arg_begin();
 234
 235   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
 236       F.getCallingConv() == CallingConv::AMDGPU_PS) {
 237     for (unsigned i = 0, OrigArgIdx = 0;
 238          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
 239        if (Skipped.test(OrigArgIdx))
 240           continue;
 241       CCValAssign &VA = ArgLocs[i++];
 242       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
 243       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
 244       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
 245     }
 246     return true;
 247   }
 248
 249   return false;
 250 }