1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 #include "polly/CodeGen/PerfMonitor.h"
12 #include "polly/CodeGen/RuntimeDebugBuilder.h"
13 #include "polly/ScopInfo.h"
14 #include "llvm/ADT/Twine.h"
15 #include "llvm/IR/IntrinsicsX86.h"
16 #include "llvm/TargetParser/Triple.h"
19 using namespace polly
;
21 Function
*PerfMonitor::getAtExit() {
22 const char *Name
= "atexit";
23 Function
*F
= M
->getFunction(Name
);
26 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
27 FunctionType
*Ty
= FunctionType::get(Builder
.getInt32Ty(),
28 {Builder
.getInt8PtrTy()}, false);
29 F
= Function::Create(Ty
, Linkage
, Name
, M
);
35 void PerfMonitor::addToGlobalConstructors(Function
*Fn
) {
36 const char *Name
= "llvm.global_ctors";
37 GlobalVariable
*GV
= M
->getGlobalVariable(Name
);
38 std::vector
<Constant
*> V
;
41 Constant
*Array
= GV
->getInitializer();
42 for (Value
*X
: Array
->operand_values())
43 V
.push_back(cast
<Constant
>(X
));
44 GV
->eraseFromParent();
47 StructType
*ST
= StructType::get(Builder
.getInt32Ty(), Fn
->getType(),
48 Builder
.getInt8PtrTy());
51 ConstantStruct::get(ST
, Builder
.getInt32(10), Fn
,
52 ConstantPointerNull::get(Builder
.getInt8PtrTy())));
53 ArrayType
*Ty
= ArrayType::get(ST
, V
.size());
55 GV
= new GlobalVariable(*M
, Ty
, true, GlobalValue::AppendingLinkage
,
56 ConstantArray::get(Ty
, V
), Name
, nullptr,
57 GlobalVariable::NotThreadLocal
);
60 Function
*PerfMonitor::getRDTSCP() {
61 return Intrinsic::getDeclaration(M
, Intrinsic::x86_rdtscp
);
64 PerfMonitor::PerfMonitor(const Scop
&S
, Module
*M
)
65 : M(M
), Builder(M
->getContext()), S(S
) {
66 if (Triple(M
->getTargetTriple()).getArch() == llvm::Triple::x86_64
)
72 static void TryRegisterGlobal(Module
*M
, const char *Name
,
73 Constant
*InitialValue
, Value
**Location
) {
74 *Location
= M
->getGlobalVariable(Name
);
77 *Location
= new GlobalVariable(
78 *M
, InitialValue
->getType(), true, GlobalValue::WeakAnyLinkage
,
79 InitialValue
, Name
, nullptr, GlobalVariable::InitialExecTLSModel
);
82 // Generate a unique name that is usable as a LLVM name for a scop to name its
83 // performance counter.
84 static std::string
GetScopUniqueVarname(const Scop
&S
) {
85 std::string EntryString
, ExitString
;
86 std::tie(EntryString
, ExitString
) = S
.getEntryExitStr();
88 return (Twine("__polly_perf_in_") + S
.getFunction().getName() + "_from__" +
89 EntryString
+ "__to__" + ExitString
)
93 void PerfMonitor::addScopCounter() {
94 const std::string varname
= GetScopUniqueVarname(S
);
95 TryRegisterGlobal(M
, (varname
+ "_cycles").c_str(), Builder
.getInt64(0),
96 &CyclesInCurrentScopPtr
);
98 TryRegisterGlobal(M
, (varname
+ "_trip_count").c_str(), Builder
.getInt64(0),
99 &TripCountForCurrentScopPtr
);
102 void PerfMonitor::addGlobalVariables() {
103 TryRegisterGlobal(M
, "__polly_perf_cycles_total_start", Builder
.getInt64(0),
104 &CyclesTotalStartPtr
);
106 TryRegisterGlobal(M
, "__polly_perf_initialized", Builder
.getInt1(false),
107 &AlreadyInitializedPtr
);
109 TryRegisterGlobal(M
, "__polly_perf_cycles_in_scops", Builder
.getInt64(0),
112 TryRegisterGlobal(M
, "__polly_perf_cycles_in_scop_start", Builder
.getInt64(0),
113 &CyclesInScopStartPtr
);
116 static const char *InitFunctionName
= "__polly_perf_init";
117 static const char *FinalReportingFunctionName
= "__polly_perf_final";
119 static BasicBlock
*FinalStartBB
= nullptr;
120 static ReturnInst
*ReturnFromFinal
= nullptr;
122 Function
*PerfMonitor::insertFinalReporting() {
123 // Create new function.
124 GlobalValue::LinkageTypes Linkage
= Function::WeakODRLinkage
;
125 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), {}, false);
127 Function::Create(Ty
, Linkage
, FinalReportingFunctionName
, M
);
128 FinalStartBB
= BasicBlock::Create(M
->getContext(), "start", ExitFn
);
129 Builder
.SetInsertPoint(FinalStartBB
);
132 RuntimeDebugBuilder::createCPUPrinter(
133 Builder
, "Polly runtime information generation not supported\n");
134 Builder
.CreateRetVoid();
138 // Measure current cycles and compute final timings.
139 Function
*RDTSCPFn
= getRDTSCP();
141 Type
*Int64Ty
= Builder
.getInt64Ty();
142 Value
*CurrentCycles
=
143 Builder
.CreateExtractValue(Builder
.CreateCall(RDTSCPFn
), {0});
144 Value
*CyclesStart
= Builder
.CreateLoad(Int64Ty
, CyclesTotalStartPtr
, true);
145 Value
*CyclesTotal
= Builder
.CreateSub(CurrentCycles
, CyclesStart
);
146 Value
*CyclesInScops
= Builder
.CreateLoad(Int64Ty
, CyclesInScopsPtr
, true);
148 // Print the runtime information.
149 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Polly runtime information\n");
150 RuntimeDebugBuilder::createCPUPrinter(Builder
, "-------------------------\n");
151 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Total: ", CyclesTotal
, "\n");
152 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Scops: ", CyclesInScops
,
155 // Print the preamble for per-scop information.
156 RuntimeDebugBuilder::createCPUPrinter(Builder
, "\n");
157 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Per SCoP information\n");
158 RuntimeDebugBuilder::createCPUPrinter(Builder
, "--------------------\n");
160 RuntimeDebugBuilder::createCPUPrinter(
161 Builder
, "scop function, "
162 "entry block name, exit block name, total time, trip count\n");
163 ReturnFromFinal
= Builder
.CreateRetVoid();
167 void PerfMonitor::AppendScopReporting() {
171 assert(FinalStartBB
&& "Expected FinalStartBB to be initialized by "
172 "PerfMonitor::insertFinalReporting.");
173 assert(ReturnFromFinal
&& "Expected ReturnFromFinal to be initialized by "
174 "PerfMonitor::insertFinalReporting.");
176 Builder
.SetInsertPoint(FinalStartBB
);
177 ReturnFromFinal
->eraseFromParent();
179 Type
*Int64Ty
= Builder
.getInt64Ty();
180 Value
*CyclesInCurrentScop
=
181 Builder
.CreateLoad(Int64Ty
, this->CyclesInCurrentScopPtr
, true);
183 Value
*TripCountForCurrentScop
=
184 Builder
.CreateLoad(Int64Ty
, this->TripCountForCurrentScopPtr
, true);
186 std::string EntryName
, ExitName
;
187 std::tie(EntryName
, ExitName
) = S
.getEntryExitStr();
189 // print in CSV for easy parsing with other tools.
190 RuntimeDebugBuilder::createCPUPrinter(
191 Builder
, S
.getFunction().getName(), ", ", EntryName
, ", ", ExitName
, ", ",
192 CyclesInCurrentScop
, ", ", TripCountForCurrentScop
, "\n");
194 ReturnFromFinal
= Builder
.CreateRetVoid();
197 static Function
*FinalReporting
= nullptr;
199 void PerfMonitor::initialize() {
200 addGlobalVariables();
203 // Ensure that we only add the final reporting function once.
204 // On later invocations, append to the reporting function.
205 if (!FinalReporting
) {
206 FinalReporting
= insertFinalReporting();
208 Function
*InitFn
= insertInitFunction(FinalReporting
);
209 addToGlobalConstructors(InitFn
);
212 AppendScopReporting();
215 Function
*PerfMonitor::insertInitFunction(Function
*FinalReporting
) {
216 // Insert function definition and BBs.
217 GlobalValue::LinkageTypes Linkage
= Function::WeakODRLinkage
;
218 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), {}, false);
219 Function
*InitFn
= Function::Create(Ty
, Linkage
, InitFunctionName
, M
);
220 BasicBlock
*Start
= BasicBlock::Create(M
->getContext(), "start", InitFn
);
221 BasicBlock
*EarlyReturn
=
222 BasicBlock::Create(M
->getContext(), "earlyreturn", InitFn
);
223 BasicBlock
*InitBB
= BasicBlock::Create(M
->getContext(), "initbb", InitFn
);
225 Builder
.SetInsertPoint(Start
);
227 // Check if this function was already run. If yes, return.
229 // In case profiling has been enabled in multiple translation units, the
230 // initializer function will be added to the global constructors list of
231 // each translation unit. When merging translation units, the global
232 // constructor lists are just appended, such that the initializer will appear
233 // multiple times. To avoid initializations being run multiple times (and
234 // especially to avoid that atExitFn is called more than once), we bail
235 // out if the initializer is run more than once.
236 Value
*HasRunBefore
=
237 Builder
.CreateLoad(Builder
.getInt1Ty(), AlreadyInitializedPtr
);
238 Builder
.CreateCondBr(HasRunBefore
, EarlyReturn
, InitBB
);
239 Builder
.SetInsertPoint(EarlyReturn
);
240 Builder
.CreateRetVoid();
242 // Keep track that this function has been run once.
243 Builder
.SetInsertPoint(InitBB
);
244 Value
*True
= Builder
.getInt1(true);
245 Builder
.CreateStore(True
, AlreadyInitializedPtr
);
247 // Register the final reporting function with atexit().
248 Value
*FinalReportingPtr
=
249 Builder
.CreatePointerCast(FinalReporting
, Builder
.getInt8PtrTy());
250 Function
*AtExitFn
= getAtExit();
251 Builder
.CreateCall(AtExitFn
, {FinalReportingPtr
});
254 // Read the currently cycle counter and store the result for later.
255 Function
*RDTSCPFn
= getRDTSCP();
256 Value
*CurrentCycles
=
257 Builder
.CreateExtractValue(Builder
.CreateCall(RDTSCPFn
), {0});
258 Builder
.CreateStore(CurrentCycles
, CyclesTotalStartPtr
, true);
260 Builder
.CreateRetVoid();
265 void PerfMonitor::insertRegionStart(Instruction
*InsertBefore
) {
269 Builder
.SetInsertPoint(InsertBefore
);
270 Function
*RDTSCPFn
= getRDTSCP();
271 Value
*CurrentCycles
=
272 Builder
.CreateExtractValue(Builder
.CreateCall(RDTSCPFn
), {0});
273 Builder
.CreateStore(CurrentCycles
, CyclesInScopStartPtr
, true);
276 void PerfMonitor::insertRegionEnd(Instruction
*InsertBefore
) {
280 Builder
.SetInsertPoint(InsertBefore
);
281 Function
*RDTSCPFn
= getRDTSCP();
282 Type
*Int64Ty
= Builder
.getInt64Ty();
283 LoadInst
*CyclesStart
=
284 Builder
.CreateLoad(Int64Ty
, CyclesInScopStartPtr
, true);
285 Value
*CurrentCycles
=
286 Builder
.CreateExtractValue(Builder
.CreateCall(RDTSCPFn
), {0});
287 Value
*CyclesInScop
= Builder
.CreateSub(CurrentCycles
, CyclesStart
);
288 Value
*CyclesInScops
= Builder
.CreateLoad(Int64Ty
, CyclesInScopsPtr
, true);
289 CyclesInScops
= Builder
.CreateAdd(CyclesInScops
, CyclesInScop
);
290 Builder
.CreateStore(CyclesInScops
, CyclesInScopsPtr
, true);
292 Value
*CyclesInCurrentScop
=
293 Builder
.CreateLoad(Int64Ty
, CyclesInCurrentScopPtr
, true);
294 CyclesInCurrentScop
= Builder
.CreateAdd(CyclesInCurrentScop
, CyclesInScop
);
295 Builder
.CreateStore(CyclesInCurrentScop
, CyclesInCurrentScopPtr
, true);
297 Value
*TripCountForCurrentScop
=
298 Builder
.CreateLoad(Int64Ty
, TripCountForCurrentScopPtr
, true);
299 TripCountForCurrentScop
=
300 Builder
.CreateAdd(TripCountForCurrentScop
, Builder
.getInt64(1));
301 Builder
.CreateStore(TripCountForCurrentScop
, TripCountForCurrentScopPtr
,