1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 #include "polly/CodeGen/PerfMonitor.h"
12 #include "polly/CodeGen/RuntimeDebugBuilder.h"
13 #include "polly/ScopInfo.h"
14 #include "llvm/ADT/Twine.h"
15 #include "llvm/IR/IntrinsicsX86.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/TargetParser/Triple.h"
20 using namespace polly
;
22 Function
*PerfMonitor::getAtExit() {
23 const char *Name
= "atexit";
24 Function
*F
= M
->getFunction(Name
);
27 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
29 FunctionType::get(Builder
.getInt32Ty(), {Builder
.getPtrTy()}, false);
30 F
= Function::Create(Ty
, Linkage
, Name
, M
);
36 void PerfMonitor::addToGlobalConstructors(Function
*Fn
) {
37 const char *Name
= "llvm.global_ctors";
38 GlobalVariable
*GV
= M
->getGlobalVariable(Name
);
39 std::vector
<Constant
*> V
;
42 Constant
*Array
= GV
->getInitializer();
43 for (Value
*X
: Array
->operand_values())
44 V
.push_back(cast
<Constant
>(X
));
45 GV
->eraseFromParent();
49 StructType::get(Builder
.getInt32Ty(), Fn
->getType(), Builder
.getPtrTy());
52 ConstantStruct::get(ST
, Builder
.getInt32(10), Fn
,
53 ConstantPointerNull::get(Builder
.getPtrTy())));
54 ArrayType
*Ty
= ArrayType::get(ST
, V
.size());
56 GV
= new GlobalVariable(*M
, Ty
, true, GlobalValue::AppendingLinkage
,
57 ConstantArray::get(Ty
, V
), Name
, nullptr,
58 GlobalVariable::NotThreadLocal
);
61 Function
*PerfMonitor::getRDTSCP() {
62 return Intrinsic::getOrInsertDeclaration(M
, Intrinsic::x86_rdtscp
);
65 PerfMonitor::PerfMonitor(const Scop
&S
, Module
*M
)
66 : M(M
), Builder(M
->getContext()), S(S
) {
67 if (Triple(M
->getTargetTriple()).getArch() == llvm::Triple::x86_64
)
73 static void TryRegisterGlobal(Module
*M
, const char *Name
,
74 Constant
*InitialValue
, Value
**Location
) {
75 *Location
= M
->getGlobalVariable(Name
);
78 *Location
= new GlobalVariable(
79 *M
, InitialValue
->getType(), true, GlobalValue::WeakAnyLinkage
,
80 InitialValue
, Name
, nullptr, GlobalVariable::InitialExecTLSModel
);
83 // Generate a unique name that is usable as a LLVM name for a scop to name its
84 // performance counter.
85 static std::string
GetScopUniqueVarname(const Scop
&S
) {
86 std::string EntryString
, ExitString
;
87 std::tie(EntryString
, ExitString
) = S
.getEntryExitStr();
89 return (Twine("__polly_perf_in_") + S
.getFunction().getName() + "_from__" +
90 EntryString
+ "__to__" + ExitString
)
94 void PerfMonitor::addScopCounter() {
95 const std::string varname
= GetScopUniqueVarname(S
);
96 TryRegisterGlobal(M
, (varname
+ "_cycles").c_str(), Builder
.getInt64(0),
97 &CyclesInCurrentScopPtr
);
99 TryRegisterGlobal(M
, (varname
+ "_trip_count").c_str(), Builder
.getInt64(0),
100 &TripCountForCurrentScopPtr
);
103 void PerfMonitor::addGlobalVariables() {
104 TryRegisterGlobal(M
, "__polly_perf_cycles_total_start", Builder
.getInt64(0),
105 &CyclesTotalStartPtr
);
107 TryRegisterGlobal(M
, "__polly_perf_initialized", Builder
.getInt1(false),
108 &AlreadyInitializedPtr
);
110 TryRegisterGlobal(M
, "__polly_perf_cycles_in_scops", Builder
.getInt64(0),
113 TryRegisterGlobal(M
, "__polly_perf_cycles_in_scop_start", Builder
.getInt64(0),
114 &CyclesInScopStartPtr
);
117 static const char *InitFunctionName
= "__polly_perf_init";
118 static const char *FinalReportingFunctionName
= "__polly_perf_final";
120 static BasicBlock
*FinalStartBB
= nullptr;
121 static ReturnInst
*ReturnFromFinal
= nullptr;
123 Function
*PerfMonitor::insertFinalReporting() {
124 // Create new function.
125 GlobalValue::LinkageTypes Linkage
= Function::WeakODRLinkage
;
126 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), {}, false);
128 Function::Create(Ty
, Linkage
, FinalReportingFunctionName
, M
);
129 FinalStartBB
= BasicBlock::Create(M
->getContext(), "start", ExitFn
);
130 Builder
.SetInsertPoint(FinalStartBB
);
133 RuntimeDebugBuilder::createCPUPrinter(
134 Builder
, "Polly runtime information generation not supported\n");
135 Builder
.CreateRetVoid();
139 // Measure current cycles and compute final timings.
140 Function
*RDTSCPFn
= getRDTSCP();
142 Type
*Int64Ty
= Builder
.getInt64Ty();
143 Value
*CurrentCycles
=
144 Builder
.CreateExtractValue(Builder
.CreateCall(RDTSCPFn
), {0});
145 Value
*CyclesStart
= Builder
.CreateLoad(Int64Ty
, CyclesTotalStartPtr
, true);
146 Value
*CyclesTotal
= Builder
.CreateSub(CurrentCycles
, CyclesStart
);
147 Value
*CyclesInScops
= Builder
.CreateLoad(Int64Ty
, CyclesInScopsPtr
, true);
149 // Print the runtime information.
150 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Polly runtime information\n");
151 RuntimeDebugBuilder::createCPUPrinter(Builder
, "-------------------------\n");
152 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Total: ", CyclesTotal
, "\n");
153 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Scops: ", CyclesInScops
,
156 // Print the preamble for per-scop information.
157 RuntimeDebugBuilder::createCPUPrinter(Builder
, "\n");
158 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Per SCoP information\n");
159 RuntimeDebugBuilder::createCPUPrinter(Builder
, "--------------------\n");
161 RuntimeDebugBuilder::createCPUPrinter(
162 Builder
, "scop function, "
163 "entry block name, exit block name, total time, trip count\n");
164 ReturnFromFinal
= Builder
.CreateRetVoid();
168 void PerfMonitor::AppendScopReporting() {
172 assert(FinalStartBB
&& "Expected FinalStartBB to be initialized by "
173 "PerfMonitor::insertFinalReporting.");
174 assert(ReturnFromFinal
&& "Expected ReturnFromFinal to be initialized by "
175 "PerfMonitor::insertFinalReporting.");
177 Builder
.SetInsertPoint(FinalStartBB
);
178 ReturnFromFinal
->eraseFromParent();
180 Type
*Int64Ty
= Builder
.getInt64Ty();
181 Value
*CyclesInCurrentScop
=
182 Builder
.CreateLoad(Int64Ty
, this->CyclesInCurrentScopPtr
, true);
184 Value
*TripCountForCurrentScop
=
185 Builder
.CreateLoad(Int64Ty
, this->TripCountForCurrentScopPtr
, true);
187 std::string EntryName
, ExitName
;
188 std::tie(EntryName
, ExitName
) = S
.getEntryExitStr();
190 // print in CSV for easy parsing with other tools.
191 RuntimeDebugBuilder::createCPUPrinter(
192 Builder
, S
.getFunction().getName(), ", ", EntryName
, ", ", ExitName
, ", ",
193 CyclesInCurrentScop
, ", ", TripCountForCurrentScop
, "\n");
195 ReturnFromFinal
= Builder
.CreateRetVoid();
198 static Function
*FinalReporting
= nullptr;
200 void PerfMonitor::initialize() {
201 addGlobalVariables();
204 // Ensure that we only add the final reporting function once.
205 // On later invocations, append to the reporting function.
206 if (!FinalReporting
) {
207 FinalReporting
= insertFinalReporting();
209 Function
*InitFn
= insertInitFunction(FinalReporting
);
210 addToGlobalConstructors(InitFn
);
213 AppendScopReporting();
216 Function
*PerfMonitor::insertInitFunction(Function
*FinalReporting
) {
217 // Insert function definition and BBs.
218 GlobalValue::LinkageTypes Linkage
= Function::WeakODRLinkage
;
219 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), {}, false);
220 Function
*InitFn
= Function::Create(Ty
, Linkage
, InitFunctionName
, M
);
221 BasicBlock
*Start
= BasicBlock::Create(M
->getContext(), "start", InitFn
);
222 BasicBlock
*EarlyReturn
=
223 BasicBlock::Create(M
->getContext(), "earlyreturn", InitFn
);
224 BasicBlock
*InitBB
= BasicBlock::Create(M
->getContext(), "initbb", InitFn
);
226 Builder
.SetInsertPoint(Start
);
228 // Check if this function was already run. If yes, return.
230 // In case profiling has been enabled in multiple translation units, the
231 // initializer function will be added to the global constructors list of
232 // each translation unit. When merging translation units, the global
233 // constructor lists are just appended, such that the initializer will appear
234 // multiple times. To avoid initializations being run multiple times (and
235 // especially to avoid that atExitFn is called more than once), we bail
236 // out if the initializer is run more than once.
237 Value
*HasRunBefore
=
238 Builder
.CreateLoad(Builder
.getInt1Ty(), AlreadyInitializedPtr
);
239 Builder
.CreateCondBr(HasRunBefore
, EarlyReturn
, InitBB
);
240 Builder
.SetInsertPoint(EarlyReturn
);
241 Builder
.CreateRetVoid();
243 // Keep track that this function has been run once.
244 Builder
.SetInsertPoint(InitBB
);
245 Value
*True
= Builder
.getInt1(true);
246 Builder
.CreateStore(True
, AlreadyInitializedPtr
);
248 // Register the final reporting function with atexit().
249 Value
*FinalReportingPtr
=
250 Builder
.CreatePointerCast(FinalReporting
, Builder
.getPtrTy());
251 Function
*AtExitFn
= getAtExit();
252 Builder
.CreateCall(AtExitFn
, {FinalReportingPtr
});
255 // Read the currently cycle counter and store the result for later.
256 Function
*RDTSCPFn
= getRDTSCP();
257 Value
*CurrentCycles
=
258 Builder
.CreateExtractValue(Builder
.CreateCall(RDTSCPFn
), {0});
259 Builder
.CreateStore(CurrentCycles
, CyclesTotalStartPtr
, true);
261 Builder
.CreateRetVoid();
266 void PerfMonitor::insertRegionStart(Instruction
*InsertBefore
) {
270 Builder
.SetInsertPoint(InsertBefore
);
271 Function
*RDTSCPFn
= getRDTSCP();
272 Value
*CurrentCycles
=
273 Builder
.CreateExtractValue(Builder
.CreateCall(RDTSCPFn
), {0});
274 Builder
.CreateStore(CurrentCycles
, CyclesInScopStartPtr
, true);
277 void PerfMonitor::insertRegionEnd(Instruction
*InsertBefore
) {
281 Builder
.SetInsertPoint(InsertBefore
);
282 Function
*RDTSCPFn
= getRDTSCP();
283 Type
*Int64Ty
= Builder
.getInt64Ty();
284 LoadInst
*CyclesStart
=
285 Builder
.CreateLoad(Int64Ty
, CyclesInScopStartPtr
, true);
286 Value
*CurrentCycles
=
287 Builder
.CreateExtractValue(Builder
.CreateCall(RDTSCPFn
), {0});
288 Value
*CyclesInScop
= Builder
.CreateSub(CurrentCycles
, CyclesStart
);
289 Value
*CyclesInScops
= Builder
.CreateLoad(Int64Ty
, CyclesInScopsPtr
, true);
290 CyclesInScops
= Builder
.CreateAdd(CyclesInScops
, CyclesInScop
);
291 Builder
.CreateStore(CyclesInScops
, CyclesInScopsPtr
, true);
293 Value
*CyclesInCurrentScop
=
294 Builder
.CreateLoad(Int64Ty
, CyclesInCurrentScopPtr
, true);
295 CyclesInCurrentScop
= Builder
.CreateAdd(CyclesInCurrentScop
, CyclesInScop
);
296 Builder
.CreateStore(CyclesInCurrentScop
, CyclesInCurrentScopPtr
, true);
298 Value
*TripCountForCurrentScop
=
299 Builder
.CreateLoad(Int64Ty
, TripCountForCurrentScopPtr
, true);
300 TripCountForCurrentScop
=
301 Builder
.CreateAdd(TripCountForCurrentScop
, Builder
.getInt64(1));
302 Builder
.CreateStore(TripCountForCurrentScop
, TripCountForCurrentScopPtr
,