1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "AMDGPUMachineFunction.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
13 #include "Utils/AMDGPUBaseInfo.h"
14 #include "llvm/CodeGen/MachineModuleInfo.h"
15 #include "llvm/IR/ConstantRange.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/Metadata.h"
18 #include "llvm/Target/TargetMachine.h"
22 static const GlobalVariable
*
23 getKernelDynLDSGlobalFromFunction(const Function
&F
) {
24 const Module
*M
= F
.getParent();
25 SmallString
<64> KernelDynLDSName("llvm.amdgcn.");
26 KernelDynLDSName
+= F
.getName();
27 KernelDynLDSName
+= ".dynlds";
28 return M
->getNamedGlobal(KernelDynLDSName
);
31 static bool hasLDSKernelArgument(const Function
&F
) {
32 for (const Argument
&Arg
: F
.args()) {
33 Type
*ArgTy
= Arg
.getType();
34 if (auto PtrTy
= dyn_cast
<PointerType
>(ArgTy
)) {
35 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
)
42 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function
&F
,
43 const AMDGPUSubtarget
&ST
)
44 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F
.getCallingConv())),
45 IsModuleEntryFunction(
46 AMDGPU::isModuleEntryFunctionCC(F
.getCallingConv())),
47 IsChainFunction(AMDGPU::isChainCC(F
.getCallingConv())) {
49 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
50 // except reserved size is not correctly aligned.
52 Attribute MemBoundAttr
= F
.getFnAttribute("amdgpu-memory-bound");
53 MemoryBound
= MemBoundAttr
.getValueAsBool();
55 Attribute WaveLimitAttr
= F
.getFnAttribute("amdgpu-wave-limiter");
56 WaveLimiter
= WaveLimitAttr
.getValueAsBool();
58 // FIXME: How is this attribute supposed to interact with statically known
60 StringRef S
= F
.getFnAttribute("amdgpu-gds-size").getValueAsString();
62 S
.consumeInteger(0, GDSSize
);
64 // Assume the attribute allocates before any known GDS globals.
65 StaticGDSSize
= GDSSize
;
67 // Second value, if present, is the maximum value that can be assigned.
68 // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
70 std::pair
<unsigned, unsigned> LDSSizeRange
= AMDGPU::getIntegerPairAttribute(
71 F
, "amdgpu-lds-size", {0, UINT32_MAX
}, true);
73 // The two separate variables are only profitable when the LDS module lowering
74 // pass is disabled. If graphics does not use dynamic LDS, this is never
75 // profitable. Leaving cleanup for a later change.
76 LDSSize
= LDSSizeRange
.first
;
77 StaticLDSSize
= LDSSize
;
79 CallingConv::ID CC
= F
.getCallingConv();
80 if (CC
== CallingConv::AMDGPU_KERNEL
|| CC
== CallingConv::SPIR_KERNEL
)
81 ExplicitKernArgSize
= ST
.getExplicitKernArgSize(F
, MaxKernArgAlign
);
83 // FIXME: Shouldn't be target specific
84 Attribute NSZAttr
= F
.getFnAttribute("no-signed-zeros-fp-math");
86 NSZAttr
.isStringAttribute() && NSZAttr
.getValueAsString() == "true";
88 const GlobalVariable
*DynLdsGlobal
= getKernelDynLDSGlobalFromFunction(F
);
89 if (DynLdsGlobal
|| hasLDSKernelArgument(F
))
90 UsesDynamicLDS
= true;
93 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout
&DL
,
94 const GlobalVariable
&GV
,
96 auto Entry
= LocalMemoryObjects
.insert(std::pair(&GV
, 0));
98 return Entry
.first
->second
;
101 DL
.getValueOrABITypeAlignment(GV
.getAlign(), GV
.getValueType());
104 if (GV
.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
106 std::optional
<uint32_t> MaybeAbs
= getLDSAbsoluteAddress(GV
);
108 // Absolute address LDS variables that exist prior to the LDS lowering
109 // pass raise a fatal error in that pass. These failure modes are only
110 // reachable if that lowering pass is disabled or broken. If/when adding
111 // support for absolute addresses on user specified variables, the
112 // alignment check moves to the lowering pass and the frame calculation
113 // needs to take the user variables into consideration.
115 uint32_t ObjectStart
= *MaybeAbs
;
117 if (ObjectStart
!= alignTo(ObjectStart
, Alignment
)) {
118 report_fatal_error("Absolute address LDS variable inconsistent with "
119 "variable alignment");
122 if (isModuleEntryFunction()) {
123 // If this is a module entry function, we can also sanity check against
124 // the static frame. Strictly it would be better to check against the
125 // attribute, i.e. that the variable is within the always-allocated
126 // section, and not within some other non-absolute-address object
127 // allocated here, but the extra error detection is minimal and we would
128 // have to pass the Function around or cache the attribute value.
130 ObjectStart
+ DL
.getTypeAllocSize(GV
.getValueType());
131 if (ObjectEnd
> StaticLDSSize
) {
133 "Absolute address LDS variable outside of static frame");
137 Entry
.first
->second
= ObjectStart
;
141 /// TODO: We should sort these to minimize wasted space due to alignment
142 /// padding. Currently the padding is decided by the first encountered use
144 Offset
= StaticLDSSize
= alignTo(StaticLDSSize
, Alignment
);
146 StaticLDSSize
+= DL
.getTypeAllocSize(GV
.getValueType());
148 // Align LDS size to trailing, e.g. for aligning dynamic shared memory
149 LDSSize
= alignTo(StaticLDSSize
, Trailing
);
151 assert(GV
.getAddressSpace() == AMDGPUAS::REGION_ADDRESS
&&
152 "expected region address space");
154 Offset
= StaticGDSSize
= alignTo(StaticGDSSize
, Alignment
);
155 StaticGDSSize
+= DL
.getTypeAllocSize(GV
.getValueType());
157 // FIXME: Apply alignment of dynamic GDS
158 GDSSize
= StaticGDSSize
;
161 Entry
.first
->second
= Offset
;
165 std::optional
<uint32_t>
166 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function
&F
) {
167 // TODO: Would be more consistent with the abs symbols to use a range
168 MDNode
*MD
= F
.getMetadata("llvm.amdgcn.lds.kernel.id");
169 if (MD
&& MD
->getNumOperands() == 1) {
170 if (ConstantInt
*KnownSize
=
171 mdconst::extract
<ConstantInt
>(MD
->getOperand(0))) {
172 uint64_t ZExt
= KnownSize
->getZExtValue();
173 if (ZExt
<= UINT32_MAX
) {
181 std::optional
<uint32_t>
182 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue
&GV
) {
183 if (GV
.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS
)
186 std::optional
<ConstantRange
> AbsSymRange
= GV
.getAbsoluteSymbolRange();
190 if (const APInt
*V
= AbsSymRange
->getSingleElement()) {
191 std::optional
<uint64_t> ZExt
= V
->tryZExtValue();
192 if (ZExt
&& (*ZExt
<= UINT32_MAX
)) {
200 void AMDGPUMachineFunction::setDynLDSAlign(const Function
&F
,
201 const GlobalVariable
&GV
) {
202 const Module
*M
= F
.getParent();
203 const DataLayout
&DL
= M
->getDataLayout();
204 assert(DL
.getTypeAllocSize(GV
.getValueType()).isZero());
207 DL
.getValueOrABITypeAlignment(GV
.getAlign(), GV
.getValueType());
208 if (Alignment
<= DynLDSAlign
)
211 LDSSize
= alignTo(StaticLDSSize
, Alignment
);
212 DynLDSAlign
= Alignment
;
214 // If there is a dynamic LDS variable associated with this function F, every
215 // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
216 // map to the same address. This holds because no LDS is allocated after the
217 // lowering pass if there are dynamic LDS variables present.
218 const GlobalVariable
*Dyn
= getKernelDynLDSGlobalFromFunction(F
);
220 unsigned Offset
= LDSSize
; // return this?
221 std::optional
<uint32_t> Expect
= getLDSAbsoluteAddress(*Dyn
);
222 if (!Expect
|| (Offset
!= *Expect
)) {
223 report_fatal_error("Inconsistent metadata on dynamic LDS variable");
228 void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS
) {
229 UsesDynamicLDS
= DynLDS
;
232 bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS
; }