[clang][bytecode][NFC] Only get expr when checking for UB (#125397)
[llvm-project.git] / llvm / lib / Analysis / KernelInfo.cpp
blob9c8f8699401ce397141e12ca4cc19135f823eb7d
1 //===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the KernelInfoPrinter class used to emit remarks about
10 // function properties from a GPU kernel.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/KernelInfo.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/Instructions.h"
22 #include "llvm/IR/Metadata.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/PassManager.h"
26 using namespace llvm;
28 #define DEBUG_TYPE "kernel-info"
30 namespace {
32 /// Data structure holding function info for kernels.
33 class KernelInfo {
34 void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE);
36 public:
37 static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
38 TargetMachine *TM);
40 /// Whether the function has external linkage and is not a kernel function.
41 bool ExternalNotKernel = false;
43 /// Launch bounds.
44 SmallVector<std::pair<StringRef, int64_t>> LaunchBounds;
46 /// The number of alloca instructions inside the function, the number of those
47 /// with allocation sizes that cannot be determined at compile time, and the
48 /// sum of the sizes that can be.
49 ///
50 /// With the current implementation for at least some GPU archs,
51 /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
52 /// case the implementation changes.
53 int64_t Allocas = 0;
54 int64_t AllocasDyn = 0;
55 int64_t AllocasStaticSizeSum = 0;
57 /// Number of direct/indirect calls (anything derived from CallBase).
58 int64_t DirectCalls = 0;
59 int64_t IndirectCalls = 0;
61 /// Number of direct calls made from this function to other functions
62 /// defined in this module.
63 int64_t DirectCallsToDefinedFunctions = 0;
65 /// Number of direct calls to inline assembly.
66 int64_t InlineAssemblyCalls = 0;
68 /// Number of calls of type InvokeInst.
69 int64_t Invokes = 0;
71 /// Target-specific flat address space.
72 unsigned FlatAddrspace;
74 /// Number of flat address space memory accesses (via load, store, etc.).
75 int64_t FlatAddrspaceAccesses = 0;
78 } // end anonymous namespace
80 static void identifyCallee(OptimizationRemark &R, const Module *M,
81 const Value *V, StringRef Kind = "") {
82 SmallString<100> Name; // might be function name or asm expression
83 if (const Function *F = dyn_cast<Function>(V)) {
84 if (auto *SubProgram = F->getSubprogram()) {
85 if (SubProgram->isArtificial())
86 R << "artificial ";
87 Name = SubProgram->getName();
90 if (Name.empty()) {
91 raw_svector_ostream OS(Name);
92 V->printAsOperand(OS, /*PrintType=*/false, M);
94 if (!Kind.empty())
95 R << Kind << " ";
96 R << "'" << Name << "'";
99 static void identifyFunction(OptimizationRemark &R, const Function &F) {
100 identifyCallee(R, F.getParent(), &F, "function");
103 static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
104 const AllocaInst &Alloca,
105 TypeSize::ScalarTy StaticSize) {
106 ORE.emit([&] {
107 StringRef DbgName;
108 DebugLoc Loc;
109 bool Artificial = false;
110 auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
111 if (!DVRs.empty()) {
112 const DbgVariableRecord &DVR = **DVRs.begin();
113 DbgName = DVR.getVariable()->getName();
114 Loc = DVR.getDebugLoc();
115 Artificial = DVR.Variable->isArtificial();
117 OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
118 Alloca.getParent());
119 R << "in ";
120 identifyFunction(R, Caller);
121 R << ", ";
122 if (Artificial)
123 R << "artificial ";
124 SmallString<20> ValName;
125 raw_svector_ostream OS(ValName);
126 Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
127 R << "alloca ('" << ValName << "') ";
128 if (!DbgName.empty())
129 R << "for '" << DbgName << "' ";
130 else
131 R << "without debug info ";
132 R << "with ";
133 if (StaticSize)
134 R << "static size of " << itostr(StaticSize) << " bytes";
135 else
136 R << "dynamic size";
137 return R;
141 static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
142 const CallBase &Call, StringRef CallKind,
143 StringRef RemarkKind) {
144 ORE.emit([&] {
145 OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
146 R << "in ";
147 identifyFunction(R, Caller);
148 R << ", " << CallKind << ", callee is ";
149 identifyCallee(R, Caller.getParent(), Call.getCalledOperand());
150 return R;
154 static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
155 const Function &Caller,
156 const Instruction &Inst) {
157 ORE.emit([&] {
158 OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
159 R << "in ";
160 identifyFunction(R, Caller);
161 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
162 R << ", '" << II->getCalledFunction()->getName() << "' call";
163 } else {
164 R << ", '" << Inst.getOpcodeName() << "' instruction";
166 if (!Inst.getType()->isVoidTy()) {
167 SmallString<20> Name;
168 raw_svector_ostream OS(Name);
169 Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
170 R << " ('" << Name << "')";
172 R << " accesses memory in flat address space";
173 return R;
177 void KernelInfo::updateForBB(const BasicBlock &BB,
178 OptimizationRemarkEmitter &ORE) {
179 const Function &F = *BB.getParent();
180 const Module &M = *F.getParent();
181 const DataLayout &DL = M.getDataLayout();
182 for (const Instruction &I : BB.instructionsWithoutDebug()) {
183 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
184 ++Allocas;
185 TypeSize::ScalarTy StaticSize = 0;
186 if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
187 StaticSize = Size->getFixedValue();
188 assert(StaticSize <=
189 (TypeSize::ScalarTy)std::numeric_limits<int64_t>::max());
190 AllocasStaticSizeSum += StaticSize;
191 } else {
192 ++AllocasDyn;
194 remarkAlloca(ORE, F, *Alloca, StaticSize);
195 } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
196 SmallString<40> CallKind;
197 SmallString<40> RemarkKind;
198 if (Call->isIndirectCall()) {
199 ++IndirectCalls;
200 CallKind += "indirect";
201 RemarkKind += "Indirect";
202 } else {
203 ++DirectCalls;
204 CallKind += "direct";
205 RemarkKind += "Direct";
207 if (isa<InvokeInst>(Call)) {
208 ++Invokes;
209 CallKind += " invoke";
210 RemarkKind += "Invoke";
211 } else {
212 CallKind += " call";
213 RemarkKind += "Call";
215 if (!Call->isIndirectCall()) {
216 if (const Function *Callee = Call->getCalledFunction()) {
217 if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
218 ++DirectCallsToDefinedFunctions;
219 CallKind += " to defined function";
220 RemarkKind += "ToDefinedFunction";
222 } else if (Call->isInlineAsm()) {
223 ++InlineAssemblyCalls;
224 CallKind += " to inline assembly";
225 RemarkKind += "ToInlineAssembly";
228 remarkCall(ORE, F, *Call, CallKind, RemarkKind);
229 if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
230 if (MI->getDestAddressSpace() == FlatAddrspace) {
231 ++FlatAddrspaceAccesses;
232 remarkFlatAddrspaceAccess(ORE, F, I);
233 } else if (const AnyMemTransferInst *MT =
234 dyn_cast<AnyMemTransferInst>(MI)) {
235 if (MT->getSourceAddressSpace() == FlatAddrspace) {
236 ++FlatAddrspaceAccesses;
237 remarkFlatAddrspaceAccess(ORE, F, I);
241 } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
242 if (Load->getPointerAddressSpace() == FlatAddrspace) {
243 ++FlatAddrspaceAccesses;
244 remarkFlatAddrspaceAccess(ORE, F, I);
246 } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
247 if (Store->getPointerAddressSpace() == FlatAddrspace) {
248 ++FlatAddrspaceAccesses;
249 remarkFlatAddrspaceAccess(ORE, F, I);
251 } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
252 if (At->getPointerAddressSpace() == FlatAddrspace) {
253 ++FlatAddrspaceAccesses;
254 remarkFlatAddrspaceAccess(ORE, F, I);
256 } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
257 if (At->getPointerAddressSpace() == FlatAddrspace) {
258 ++FlatAddrspaceAccesses;
259 remarkFlatAddrspaceAccess(ORE, F, I);
265 static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
266 StringRef Name, int64_t Value) {
267 ORE.emit([&] {
268 OptimizationRemark R(DEBUG_TYPE, Name, &F);
269 R << "in ";
270 identifyFunction(R, F);
271 R << ", " << Name << " = " << itostr(Value);
272 return R;
276 static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
277 StringRef Name) {
278 if (!F.hasFnAttribute(Name))
279 return std::nullopt;
280 return F.getFnAttributeAsParsedInteger(Name);
283 void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
284 TargetMachine *TM) {
285 KernelInfo KI;
286 TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F);
287 KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
289 // Record function properties.
290 KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
291 for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
292 if (auto Val = parseFnAttrAsInteger(F, Name))
293 KI.LaunchBounds.push_back({Name, *Val});
295 TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds);
297 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
298 for (const auto &BB : F)
299 KI.updateForBB(BB, ORE);
301 #define REMARK_PROPERTY(PROP_NAME) \
302 remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
303 REMARK_PROPERTY(ExternalNotKernel);
304 for (auto LB : KI.LaunchBounds)
305 remarkProperty(ORE, F, LB.first, LB.second);
306 REMARK_PROPERTY(Allocas);
307 REMARK_PROPERTY(AllocasStaticSizeSum);
308 REMARK_PROPERTY(AllocasDyn);
309 REMARK_PROPERTY(DirectCalls);
310 REMARK_PROPERTY(IndirectCalls);
311 REMARK_PROPERTY(DirectCallsToDefinedFunctions);
312 REMARK_PROPERTY(InlineAssemblyCalls);
313 REMARK_PROPERTY(Invokes);
314 REMARK_PROPERTY(FlatAddrspaceAccesses);
315 #undef REMARK_PROPERTY
317 return;
320 PreservedAnalyses KernelInfoPrinter::run(Function &F,
321 FunctionAnalysisManager &AM) {
322 // Skip it if remarks are not enabled as it will do nothing useful.
323 if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE))
324 KernelInfo::emitKernelInfo(F, AM, TM);
325 return PreservedAnalyses::all();