1 //===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the KernelInfoPrinter class used to emit remarks about
10 // function properties from a GPU kernel.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/KernelInfo.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/Instructions.h"
22 #include "llvm/IR/Metadata.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/PassManager.h"
28 #define DEBUG_TYPE "kernel-info"
32 /// Data structure holding function info for kernels.
34 void updateForBB(const BasicBlock
&BB
, OptimizationRemarkEmitter
&ORE
);
37 static void emitKernelInfo(Function
&F
, FunctionAnalysisManager
&FAM
,
40 /// Whether the function has external linkage and is not a kernel function.
41 bool ExternalNotKernel
= false;
44 SmallVector
<std::pair
<StringRef
, int64_t>> LaunchBounds
;
46 /// The number of alloca instructions inside the function, the number of those
47 /// with allocation sizes that cannot be determined at compile time, and the
48 /// sum of the sizes that can be.
50 /// With the current implementation for at least some GPU archs,
51 /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
52 /// case the implementation changes.
54 int64_t AllocasDyn
= 0;
55 int64_t AllocasStaticSizeSum
= 0;
57 /// Number of direct/indirect calls (anything derived from CallBase).
58 int64_t DirectCalls
= 0;
59 int64_t IndirectCalls
= 0;
61 /// Number of direct calls made from this function to other functions
62 /// defined in this module.
63 int64_t DirectCallsToDefinedFunctions
= 0;
65 /// Number of direct calls to inline assembly.
66 int64_t InlineAssemblyCalls
= 0;
68 /// Number of calls of type InvokeInst.
71 /// Target-specific flat address space.
72 unsigned FlatAddrspace
;
74 /// Number of flat address space memory accesses (via load, store, etc.).
75 int64_t FlatAddrspaceAccesses
= 0;
78 } // end anonymous namespace
80 static void identifyCallee(OptimizationRemark
&R
, const Module
*M
,
81 const Value
*V
, StringRef Kind
= "") {
82 SmallString
<100> Name
; // might be function name or asm expression
83 if (const Function
*F
= dyn_cast
<Function
>(V
)) {
84 if (auto *SubProgram
= F
->getSubprogram()) {
85 if (SubProgram
->isArtificial())
87 Name
= SubProgram
->getName();
91 raw_svector_ostream
OS(Name
);
92 V
->printAsOperand(OS
, /*PrintType=*/false, M
);
96 R
<< "'" << Name
<< "'";
99 static void identifyFunction(OptimizationRemark
&R
, const Function
&F
) {
100 identifyCallee(R
, F
.getParent(), &F
, "function");
103 static void remarkAlloca(OptimizationRemarkEmitter
&ORE
, const Function
&Caller
,
104 const AllocaInst
&Alloca
,
105 TypeSize::ScalarTy StaticSize
) {
109 bool Artificial
= false;
110 auto DVRs
= findDVRDeclares(&const_cast<AllocaInst
&>(Alloca
));
112 const DbgVariableRecord
&DVR
= **DVRs
.begin();
113 DbgName
= DVR
.getVariable()->getName();
114 Loc
= DVR
.getDebugLoc();
115 Artificial
= DVR
.Variable
->isArtificial();
117 OptimizationRemark
R(DEBUG_TYPE
, "Alloca", DiagnosticLocation(Loc
),
120 identifyFunction(R
, Caller
);
124 SmallString
<20> ValName
;
125 raw_svector_ostream
OS(ValName
);
126 Alloca
.printAsOperand(OS
, /*PrintType=*/false, Caller
.getParent());
127 R
<< "alloca ('" << ValName
<< "') ";
128 if (!DbgName
.empty())
129 R
<< "for '" << DbgName
<< "' ";
131 R
<< "without debug info ";
134 R
<< "static size of " << itostr(StaticSize
) << " bytes";
141 static void remarkCall(OptimizationRemarkEmitter
&ORE
, const Function
&Caller
,
142 const CallBase
&Call
, StringRef CallKind
,
143 StringRef RemarkKind
) {
145 OptimizationRemark
R(DEBUG_TYPE
, RemarkKind
, &Call
);
147 identifyFunction(R
, Caller
);
148 R
<< ", " << CallKind
<< ", callee is ";
149 identifyCallee(R
, Caller
.getParent(), Call
.getCalledOperand());
154 static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter
&ORE
,
155 const Function
&Caller
,
156 const Instruction
&Inst
) {
158 OptimizationRemark
R(DEBUG_TYPE
, "FlatAddrspaceAccess", &Inst
);
160 identifyFunction(R
, Caller
);
161 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(&Inst
)) {
162 R
<< ", '" << II
->getCalledFunction()->getName() << "' call";
164 R
<< ", '" << Inst
.getOpcodeName() << "' instruction";
166 if (!Inst
.getType()->isVoidTy()) {
167 SmallString
<20> Name
;
168 raw_svector_ostream
OS(Name
);
169 Inst
.printAsOperand(OS
, /*PrintType=*/false, Caller
.getParent());
170 R
<< " ('" << Name
<< "')";
172 R
<< " accesses memory in flat address space";
177 void KernelInfo::updateForBB(const BasicBlock
&BB
,
178 OptimizationRemarkEmitter
&ORE
) {
179 const Function
&F
= *BB
.getParent();
180 const Module
&M
= *F
.getParent();
181 const DataLayout
&DL
= M
.getDataLayout();
182 for (const Instruction
&I
: BB
.instructionsWithoutDebug()) {
183 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(&I
)) {
185 TypeSize::ScalarTy StaticSize
= 0;
186 if (std::optional
<TypeSize
> Size
= Alloca
->getAllocationSize(DL
)) {
187 StaticSize
= Size
->getFixedValue();
189 (TypeSize::ScalarTy
)std::numeric_limits
<int64_t>::max());
190 AllocasStaticSizeSum
+= StaticSize
;
194 remarkAlloca(ORE
, F
, *Alloca
, StaticSize
);
195 } else if (const CallBase
*Call
= dyn_cast
<CallBase
>(&I
)) {
196 SmallString
<40> CallKind
;
197 SmallString
<40> RemarkKind
;
198 if (Call
->isIndirectCall()) {
200 CallKind
+= "indirect";
201 RemarkKind
+= "Indirect";
204 CallKind
+= "direct";
205 RemarkKind
+= "Direct";
207 if (isa
<InvokeInst
>(Call
)) {
209 CallKind
+= " invoke";
210 RemarkKind
+= "Invoke";
213 RemarkKind
+= "Call";
215 if (!Call
->isIndirectCall()) {
216 if (const Function
*Callee
= Call
->getCalledFunction()) {
217 if (!Callee
->isIntrinsic() && !Callee
->isDeclaration()) {
218 ++DirectCallsToDefinedFunctions
;
219 CallKind
+= " to defined function";
220 RemarkKind
+= "ToDefinedFunction";
222 } else if (Call
->isInlineAsm()) {
223 ++InlineAssemblyCalls
;
224 CallKind
+= " to inline assembly";
225 RemarkKind
+= "ToInlineAssembly";
228 remarkCall(ORE
, F
, *Call
, CallKind
, RemarkKind
);
229 if (const AnyMemIntrinsic
*MI
= dyn_cast
<AnyMemIntrinsic
>(Call
)) {
230 if (MI
->getDestAddressSpace() == FlatAddrspace
) {
231 ++FlatAddrspaceAccesses
;
232 remarkFlatAddrspaceAccess(ORE
, F
, I
);
233 } else if (const AnyMemTransferInst
*MT
=
234 dyn_cast
<AnyMemTransferInst
>(MI
)) {
235 if (MT
->getSourceAddressSpace() == FlatAddrspace
) {
236 ++FlatAddrspaceAccesses
;
237 remarkFlatAddrspaceAccess(ORE
, F
, I
);
241 } else if (const LoadInst
*Load
= dyn_cast
<LoadInst
>(&I
)) {
242 if (Load
->getPointerAddressSpace() == FlatAddrspace
) {
243 ++FlatAddrspaceAccesses
;
244 remarkFlatAddrspaceAccess(ORE
, F
, I
);
246 } else if (const StoreInst
*Store
= dyn_cast
<StoreInst
>(&I
)) {
247 if (Store
->getPointerAddressSpace() == FlatAddrspace
) {
248 ++FlatAddrspaceAccesses
;
249 remarkFlatAddrspaceAccess(ORE
, F
, I
);
251 } else if (const AtomicRMWInst
*At
= dyn_cast
<AtomicRMWInst
>(&I
)) {
252 if (At
->getPointerAddressSpace() == FlatAddrspace
) {
253 ++FlatAddrspaceAccesses
;
254 remarkFlatAddrspaceAccess(ORE
, F
, I
);
256 } else if (const AtomicCmpXchgInst
*At
= dyn_cast
<AtomicCmpXchgInst
>(&I
)) {
257 if (At
->getPointerAddressSpace() == FlatAddrspace
) {
258 ++FlatAddrspaceAccesses
;
259 remarkFlatAddrspaceAccess(ORE
, F
, I
);
265 static void remarkProperty(OptimizationRemarkEmitter
&ORE
, const Function
&F
,
266 StringRef Name
, int64_t Value
) {
268 OptimizationRemark
R(DEBUG_TYPE
, Name
, &F
);
270 identifyFunction(R
, F
);
271 R
<< ", " << Name
<< " = " << itostr(Value
);
276 static std::optional
<int64_t> parseFnAttrAsInteger(Function
&F
,
278 if (!F
.hasFnAttribute(Name
))
280 return F
.getFnAttributeAsParsedInteger(Name
);
283 void KernelInfo::emitKernelInfo(Function
&F
, FunctionAnalysisManager
&FAM
,
286 TargetTransformInfo
&TheTTI
= FAM
.getResult
<TargetIRAnalysis
>(F
);
287 KI
.FlatAddrspace
= TheTTI
.getFlatAddressSpace();
289 // Record function properties.
290 KI
.ExternalNotKernel
= F
.hasExternalLinkage() && !F
.hasKernelCallingConv();
291 for (StringRef Name
: {"omp_target_num_teams", "omp_target_thread_limit"}) {
292 if (auto Val
= parseFnAttrAsInteger(F
, Name
))
293 KI
.LaunchBounds
.push_back({Name
, *Val
});
295 TheTTI
.collectKernelLaunchBounds(F
, KI
.LaunchBounds
);
297 auto &ORE
= FAM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
298 for (const auto &BB
: F
)
299 KI
.updateForBB(BB
, ORE
);
301 #define REMARK_PROPERTY(PROP_NAME) \
302 remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
303 REMARK_PROPERTY(ExternalNotKernel
);
304 for (auto LB
: KI
.LaunchBounds
)
305 remarkProperty(ORE
, F
, LB
.first
, LB
.second
);
306 REMARK_PROPERTY(Allocas
);
307 REMARK_PROPERTY(AllocasStaticSizeSum
);
308 REMARK_PROPERTY(AllocasDyn
);
309 REMARK_PROPERTY(DirectCalls
);
310 REMARK_PROPERTY(IndirectCalls
);
311 REMARK_PROPERTY(DirectCallsToDefinedFunctions
);
312 REMARK_PROPERTY(InlineAssemblyCalls
);
313 REMARK_PROPERTY(Invokes
);
314 REMARK_PROPERTY(FlatAddrspaceAccesses
);
315 #undef REMARK_PROPERTY
320 PreservedAnalyses
KernelInfoPrinter::run(Function
&F
,
321 FunctionAnalysisManager
&AM
) {
322 // Skip it if remarks are not enabled as it will do nothing useful.
323 if (F
.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE
))
324 KernelInfo::emitKernelInfo(F
, AM
, TM
);
325 return PreservedAnalyses::all();