1 //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
12 #include "Utils/AMDGPUBaseInfo.h"
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/CodeGen/MachineFunction.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/GlobalVariable.h"
22 class AMDGPUSubtarget
;
24 class AMDGPUMachineFunction
: public MachineFunctionInfo
{
25 /// A map to keep track of local memory objects and their offsets within the
26 /// local memory space.
27 SmallDenseMap
<const GlobalValue
*, unsigned, 4> LocalMemoryObjects
;
30 uint64_t ExplicitKernArgSize
= 0; // Cache for this.
31 Align MaxKernArgAlign
; // Cache for this.
33 /// Number of bytes in the LDS that are being used.
37 /// Number of bytes in the LDS allocated statically. This field is only used
38 /// in the instruction selector and not part of the machine function info.
39 uint32_t StaticLDSSize
= 0;
40 uint32_t StaticGDSSize
= 0;
42 /// Align for dynamic shared memory if any. Dynamic shared memory is
43 /// allocated directly after the static one, i.e., LDSSize. Need to pad
44 /// LDSSize to ensure that dynamic one is aligned accordingly.
45 /// The maximal alignment is updated during IR translation or lowering
49 // Flag to check dynamic LDS usage by kernel.
50 bool UsesDynamicLDS
= false;
52 // Kernels + shaders. i.e. functions called by the hardware and not called
53 // by other functions.
54 bool IsEntryFunction
= false;
56 // Entry points called by other functions instead of directly by the hardware.
57 bool IsModuleEntryFunction
= false;
59 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC.
60 bool IsChainFunction
= false;
62 bool NoSignedZerosFPMath
= false;
64 // Function may be memory bound.
65 bool MemoryBound
= false;
67 // Kernel may need limited waves per EU for better performance.
68 bool WaveLimiter
= false;
71 AMDGPUMachineFunction(const Function
&F
, const AMDGPUSubtarget
&ST
);
73 uint64_t getExplicitKernArgSize() const {
74 return ExplicitKernArgSize
;
77 Align
getMaxKernArgAlign() const { return MaxKernArgAlign
; }
79 uint32_t getLDSSize() const {
83 uint32_t getGDSSize() const {
87 bool isEntryFunction() const {
88 return IsEntryFunction
;
91 bool isModuleEntryFunction() const { return IsModuleEntryFunction
; }
93 bool isChainFunction() const { return IsChainFunction
; }
95 // The stack is empty upon entry to this function.
96 bool isBottomOfStack() const {
97 return isEntryFunction() || isChainFunction();
100 bool hasNoSignedZerosFPMath() const {
101 return NoSignedZerosFPMath
;
104 bool isMemoryBound() const {
108 bool needsWaveLimiter() const {
112 unsigned allocateLDSGlobal(const DataLayout
&DL
, const GlobalVariable
&GV
) {
113 return allocateLDSGlobal(DL
, GV
, DynLDSAlign
);
116 unsigned allocateLDSGlobal(const DataLayout
&DL
, const GlobalVariable
&GV
,
119 static std::optional
<uint32_t> getLDSKernelIdMetadata(const Function
&F
);
120 static std::optional
<uint32_t> getLDSAbsoluteAddress(const GlobalValue
&GV
);
122 Align
getDynLDSAlign() const { return DynLDSAlign
; }
124 void setDynLDSAlign(const Function
&F
, const GlobalVariable
&GV
);
126 void setUsesDynamicLDS(bool DynLDS
);
128 bool isDynamicLDSUsed() const;