1 //===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This is AMDGPU specific replacement of the standard inliner.
12 /// The main purpose is to account for the fact that calls not only expensive
13 /// on the AMDGPU, but much more expensive if a private memory pointer is
14 /// passed to a function as an argument. In this situation, we are unable to
15 /// eliminate private memory in the caller unless inlined and end up with slow
16 /// and expensive scratch access. Thus, we boost the inline threshold for such
19 //===----------------------------------------------------------------------===//
23 #include "llvm/Transforms/IPO.h"
24 #include "llvm/Analysis/AssumptionCache.h"
25 #include "llvm/Analysis/CallGraph.h"
26 #include "llvm/Analysis/InlineCost.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Transforms/IPO/Inliner.h"
40 #define DEBUG_TYPE "inline"
43 ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden
, cl::init(2200),
44 cl::desc("Cost of alloca argument"));
46 // If the amount of scratch memory to eliminate exceeds our ability to allocate
47 // it into registers we gain nothing by agressively inlining functions for that
49 static cl::opt
<unsigned>
50 ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden
, cl::init(256),
51 cl::desc("Maximum alloca size to use for inline cost"));
55 class AMDGPUInliner
: public LegacyInlinerBase
{
58 AMDGPUInliner() : LegacyInlinerBase(ID
) {
59 initializeAMDGPUInlinerPass(*PassRegistry::getPassRegistry());
60 Params
= getInlineParams();
63 static char ID
; // Pass identification, replacement for typeid
65 unsigned getInlineThreshold(CallSite CS
) const;
67 InlineCost
getInlineCost(CallSite CS
) override
;
69 bool runOnSCC(CallGraphSCC
&SCC
) override
;
71 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
74 TargetTransformInfoWrapperPass
*TTIWP
;
79 } // end anonymous namespace
81 char AMDGPUInliner::ID
= 0;
82 INITIALIZE_PASS_BEGIN(AMDGPUInliner
, "amdgpu-inline",
83 "AMDGPU Function Integration/Inlining", false, false)
84 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
85 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass
)
86 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass
)
87 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
88 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
89 INITIALIZE_PASS_END(AMDGPUInliner
, "amdgpu-inline",
90 "AMDGPU Function Integration/Inlining", false, false)
92 Pass
*llvm::createAMDGPUFunctionInliningPass() { return new AMDGPUInliner(); }
94 bool AMDGPUInliner::runOnSCC(CallGraphSCC
&SCC
) {
95 TTIWP
= &getAnalysis
<TargetTransformInfoWrapperPass
>();
96 return LegacyInlinerBase::runOnSCC(SCC
);
99 void AMDGPUInliner::getAnalysisUsage(AnalysisUsage
&AU
) const {
100 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
101 LegacyInlinerBase::getAnalysisUsage(AU
);
104 unsigned AMDGPUInliner::getInlineThreshold(CallSite CS
) const {
105 int Thres
= Params
.DefaultThreshold
;
107 Function
*Caller
= CS
.getCaller();
108 // Listen to the inlinehint attribute when it would increase the threshold
109 // and the caller does not need to minimize its size.
110 Function
*Callee
= CS
.getCalledFunction();
111 bool InlineHint
= Callee
&& !Callee
->isDeclaration() &&
112 Callee
->hasFnAttribute(Attribute::InlineHint
);
113 if (InlineHint
&& Params
.HintThreshold
&& Params
.HintThreshold
> Thres
114 && !Caller
->hasFnAttribute(Attribute::MinSize
))
115 Thres
= Params
.HintThreshold
.getValue();
117 const DataLayout
&DL
= Caller
->getParent()->getDataLayout();
119 return (unsigned)Thres
;
121 // If we have a pointer to private array passed into a function
122 // it will not be optimized out, leaving scratch usage.
123 // Increase the inline threshold to allow inliniting in this case.
124 uint64_t AllocaSize
= 0;
125 SmallPtrSet
<const AllocaInst
*, 8> AIVisited
;
126 for (Value
*PtrArg
: CS
.args()) {
127 Type
*Ty
= PtrArg
->getType();
128 if (!Ty
->isPointerTy() ||
129 Ty
->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS
)
131 PtrArg
= GetUnderlyingObject(PtrArg
, DL
);
132 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(PtrArg
)) {
133 if (!AI
->isStaticAlloca() || !AIVisited
.insert(AI
).second
)
135 AllocaSize
+= DL
.getTypeAllocSize(AI
->getAllocatedType());
136 // If the amount of stack memory is excessive we will not be able
137 // to get rid of the scratch anyway, bail out.
138 if (AllocaSize
> ArgAllocaCutoff
) {
145 Thres
+= ArgAllocaCost
;
147 return (unsigned)Thres
;
150 // Check if call is just a wrapper around another call.
151 // In this case we only have call and ret instructions.
152 static bool isWrapperOnlyCall(CallSite CS
) {
153 Function
*Callee
= CS
.getCalledFunction();
154 if (!Callee
|| Callee
->size() != 1)
156 const BasicBlock
&BB
= Callee
->getEntryBlock();
157 if (const Instruction
*I
= BB
.getFirstNonPHI()) {
158 if (!isa
<CallInst
>(I
)) {
161 if (isa
<ReturnInst
>(*std::next(I
->getIterator()))) {
162 LLVM_DEBUG(dbgs() << " Wrapper only call detected: "
163 << Callee
->getName() << '\n');
170 InlineCost
AMDGPUInliner::getInlineCost(CallSite CS
) {
171 Function
*Callee
= CS
.getCalledFunction();
172 Function
*Caller
= CS
.getCaller();
173 TargetTransformInfo
&TTI
= TTIWP
->getTTI(*Callee
);
175 if (!Callee
|| Callee
->isDeclaration())
176 return llvm::InlineCost::getNever("undefined callee");
179 return llvm::InlineCost::getNever("noinline");
181 if (!TTI
.areInlineCompatible(Caller
, Callee
))
182 return llvm::InlineCost::getNever("incompatible");
184 if (CS
.hasFnAttr(Attribute::AlwaysInline
)) {
185 if (isInlineViable(*Callee
))
186 return llvm::InlineCost::getAlways("alwaysinline viable");
187 return llvm::InlineCost::getNever("alwaysinline unviable");
190 if (isWrapperOnlyCall(CS
))
191 return llvm::InlineCost::getAlways("wrapper-only call");
193 InlineParams LocalParams
= Params
;
194 LocalParams
.DefaultThreshold
= (int)getInlineThreshold(CS
);
195 bool RemarksEnabled
= false;
196 const auto &BBs
= Caller
->getBasicBlockList();
198 auto DI
= OptimizationRemark(DEBUG_TYPE
, "", DebugLoc(), &BBs
.front());
200 RemarksEnabled
= true;
203 OptimizationRemarkEmitter
ORE(Caller
);
204 std::function
<AssumptionCache
&(Function
&)> GetAssumptionCache
=
205 [this](Function
&F
) -> AssumptionCache
& {
206 return ACT
->getAssumptionCache(F
);
209 return llvm::getInlineCost(CS
, Callee
, LocalParams
, TTI
, GetAssumptionCache
,
210 None
, PSI
, RemarksEnabled
? &ORE
: nullptr);