1 //===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This is AMDGPU specific replacement of the standard inliner.
11 /// The main purpose is to account for the fact that calls not only expensive
12 /// on the AMDGPU, but much more expensive if a private memory pointer is
13 /// passed to a function as an argument. In this situation, we are unable to
14 /// eliminate private memory in the caller unless inlined and end up with slow
15 /// and expensive scratch access. Thus, we boost the inline threshold for such
18 //===----------------------------------------------------------------------===//
22 #include "llvm/Transforms/IPO.h"
23 #include "llvm/Analysis/AssumptionCache.h"
24 #include "llvm/Analysis/CallGraph.h"
25 #include "llvm/Analysis/InlineCost.h"
26 #include "llvm/Analysis/ValueTracking.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Transforms/IPO/Inliner.h"
39 #define DEBUG_TYPE "inline"
42 ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden
, cl::init(2200),
43 cl::desc("Cost of alloca argument"));
45 // If the amount of scratch memory to eliminate exceeds our ability to allocate
46 // it into registers we gain nothing by aggressively inlining functions for that
48 static cl::opt
<unsigned>
49 ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden
, cl::init(256),
50 cl::desc("Maximum alloca size to use for inline cost"));
54 class AMDGPUInliner
: public LegacyInlinerBase
{
57 AMDGPUInliner() : LegacyInlinerBase(ID
) {
58 initializeAMDGPUInlinerPass(*PassRegistry::getPassRegistry());
59 Params
= getInlineParams();
62 static char ID
; // Pass identification, replacement for typeid
64 unsigned getInlineThreshold(CallSite CS
) const;
66 InlineCost
getInlineCost(CallSite CS
) override
;
68 bool runOnSCC(CallGraphSCC
&SCC
) override
;
70 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
73 TargetTransformInfoWrapperPass
*TTIWP
;
78 } // end anonymous namespace
80 char AMDGPUInliner::ID
= 0;
81 INITIALIZE_PASS_BEGIN(AMDGPUInliner
, "amdgpu-inline",
82 "AMDGPU Function Integration/Inlining", false, false)
83 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
84 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass
)
85 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass
)
86 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
87 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
88 INITIALIZE_PASS_END(AMDGPUInliner
, "amdgpu-inline",
89 "AMDGPU Function Integration/Inlining", false, false)
91 Pass
*llvm::createAMDGPUFunctionInliningPass() { return new AMDGPUInliner(); }
93 bool AMDGPUInliner::runOnSCC(CallGraphSCC
&SCC
) {
94 TTIWP
= &getAnalysis
<TargetTransformInfoWrapperPass
>();
95 return LegacyInlinerBase::runOnSCC(SCC
);
98 void AMDGPUInliner::getAnalysisUsage(AnalysisUsage
&AU
) const {
99 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
100 LegacyInlinerBase::getAnalysisUsage(AU
);
103 unsigned AMDGPUInliner::getInlineThreshold(CallSite CS
) const {
104 int Thres
= Params
.DefaultThreshold
;
106 Function
*Caller
= CS
.getCaller();
107 // Listen to the inlinehint attribute when it would increase the threshold
108 // and the caller does not need to minimize its size.
109 Function
*Callee
= CS
.getCalledFunction();
110 bool InlineHint
= Callee
&& !Callee
->isDeclaration() &&
111 Callee
->hasFnAttribute(Attribute::InlineHint
);
112 if (InlineHint
&& Params
.HintThreshold
&& Params
.HintThreshold
> Thres
113 && !Caller
->hasFnAttribute(Attribute::MinSize
))
114 Thres
= Params
.HintThreshold
.getValue();
116 const DataLayout
&DL
= Caller
->getParent()->getDataLayout();
118 return (unsigned)Thres
;
120 // If we have a pointer to private array passed into a function
121 // it will not be optimized out, leaving scratch usage.
122 // Increase the inline threshold to allow inliniting in this case.
123 uint64_t AllocaSize
= 0;
124 SmallPtrSet
<const AllocaInst
*, 8> AIVisited
;
125 for (Value
*PtrArg
: CS
.args()) {
126 Type
*Ty
= PtrArg
->getType();
127 if (!Ty
->isPointerTy() ||
128 Ty
->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS
)
130 PtrArg
= GetUnderlyingObject(PtrArg
, DL
);
131 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(PtrArg
)) {
132 if (!AI
->isStaticAlloca() || !AIVisited
.insert(AI
).second
)
134 AllocaSize
+= DL
.getTypeAllocSize(AI
->getAllocatedType());
135 // If the amount of stack memory is excessive we will not be able
136 // to get rid of the scratch anyway, bail out.
137 if (AllocaSize
> ArgAllocaCutoff
) {
144 Thres
+= ArgAllocaCost
;
146 return (unsigned)Thres
;
149 // Check if call is just a wrapper around another call.
150 // In this case we only have call and ret instructions.
151 static bool isWrapperOnlyCall(CallSite CS
) {
152 Function
*Callee
= CS
.getCalledFunction();
153 if (!Callee
|| Callee
->size() != 1)
155 const BasicBlock
&BB
= Callee
->getEntryBlock();
156 if (const Instruction
*I
= BB
.getFirstNonPHI()) {
157 if (!isa
<CallInst
>(I
)) {
160 if (isa
<ReturnInst
>(*std::next(I
->getIterator()))) {
161 LLVM_DEBUG(dbgs() << " Wrapper only call detected: "
162 << Callee
->getName() << '\n');
169 InlineCost
AMDGPUInliner::getInlineCost(CallSite CS
) {
170 Function
*Callee
= CS
.getCalledFunction();
171 Function
*Caller
= CS
.getCaller();
172 TargetTransformInfo
&TTI
= TTIWP
->getTTI(*Callee
);
174 if (!Callee
|| Callee
->isDeclaration())
175 return llvm::InlineCost::getNever("undefined callee");
178 return llvm::InlineCost::getNever("noinline");
180 if (!TTI
.areInlineCompatible(Caller
, Callee
))
181 return llvm::InlineCost::getNever("incompatible");
183 if (CS
.hasFnAttr(Attribute::AlwaysInline
)) {
184 auto IsViable
= isInlineViable(*Callee
);
186 return llvm::InlineCost::getAlways("alwaysinline viable");
187 return llvm::InlineCost::getNever(IsViable
.message
);
190 if (isWrapperOnlyCall(CS
))
191 return llvm::InlineCost::getAlways("wrapper-only call");
193 InlineParams LocalParams
= Params
;
194 LocalParams
.DefaultThreshold
= (int)getInlineThreshold(CS
);
195 bool RemarksEnabled
= false;
196 const auto &BBs
= Caller
->getBasicBlockList();
198 auto DI
= OptimizationRemark(DEBUG_TYPE
, "", DebugLoc(), &BBs
.front());
200 RemarksEnabled
= true;
203 OptimizationRemarkEmitter
ORE(Caller
);
204 std::function
<AssumptionCache
&(Function
&)> GetAssumptionCache
=
205 [this](Function
&F
) -> AssumptionCache
& {
206 return ACT
->getAssumptionCache(F
);
209 return llvm::getInlineCost(CS
, Callee
, LocalParams
, TTI
, GetAssumptionCache
,
210 None
, PSI
, RemarksEnabled
? &ORE
: nullptr);