1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements hierarchical time profiler.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLFunctionalExtras.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/Path.h"
18 #include "llvm/Support/Process.h"
19 #include "llvm/Support/Threading.h"
31 using std::chrono::duration
;
32 using std::chrono::duration_cast
;
33 using std::chrono::microseconds
;
34 using std::chrono::steady_clock
;
35 using std::chrono::system_clock
;
36 using std::chrono::time_point
;
37 using std::chrono::time_point_cast
;
39 struct TimeTraceProfilerInstances
{
41 std::vector
<TimeTraceProfiler
*> List
;
44 TimeTraceProfilerInstances
&getTimeTraceProfilerInstances() {
45 static TimeTraceProfilerInstances Instances
;
49 } // anonymous namespace
51 // Per Thread instance
52 static LLVM_THREAD_LOCAL TimeTraceProfiler
*TimeTraceProfilerInstance
= nullptr;
54 TimeTraceProfiler
*llvm::getTimeTraceProfilerInstance() {
55 return TimeTraceProfilerInstance
;
60 using ClockType
= steady_clock
;
61 using TimePointType
= time_point
<ClockType
>;
62 using DurationType
= duration
<ClockType::rep
, ClockType::period
>;
63 using CountAndDurationType
= std::pair
<size_t, DurationType
>;
64 using NameAndCountAndDurationType
=
65 std::pair
<std::string
, CountAndDurationType
>;
67 /// Represents an open or completed time section entry to be captured.
68 struct TimeTraceProfilerEntry
{
69 const TimePointType Start
;
71 const std::string Name
;
72 const std::string Detail
;
74 TimeTraceProfilerEntry(TimePointType
&&S
, TimePointType
&&E
, std::string
&&N
,
76 : Start(std::move(S
)), End(std::move(E
)), Name(std::move(N
)),
77 Detail(std::move(Dt
)) {}
79 // Calculate timings for FlameGraph. Cast time points to microsecond precision
80 // rather than casting duration. This avoids truncation issues causing inner
81 // scopes overruning outer scopes.
82 ClockType::rep
getFlameGraphStartUs(TimePointType StartTime
) const {
83 return (time_point_cast
<microseconds
>(Start
) -
84 time_point_cast
<microseconds
>(StartTime
))
88 ClockType::rep
getFlameGraphDurUs() const {
89 return (time_point_cast
<microseconds
>(End
) -
90 time_point_cast
<microseconds
>(Start
))
95 } // anonymous namespace
97 struct llvm::TimeTraceProfiler
{
98 TimeTraceProfiler(unsigned TimeTraceGranularity
= 0, StringRef ProcName
= "")
99 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
100 ProcName(ProcName
), Pid(sys::Process::getProcessId()),
101 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity
) {
102 llvm::get_thread_name(ThreadName
);
105 void begin(std::string Name
, llvm::function_ref
<std::string()> Detail
) {
106 Stack
.emplace_back(ClockType::now(), TimePointType(), std::move(Name
),
111 assert(!Stack
.empty() && "Must call begin() first");
112 TimeTraceProfilerEntry
&E
= Stack
.back();
113 E
.End
= ClockType::now();
115 // Check that end times monotonically increase.
116 assert((Entries
.empty() ||
117 (E
.getFlameGraphStartUs(StartTime
) + E
.getFlameGraphDurUs() >=
118 Entries
.back().getFlameGraphStartUs(StartTime
) +
119 Entries
.back().getFlameGraphDurUs())) &&
120 "TimeProfiler scope ended earlier than previous scope");
122 // Calculate duration at full precision for overall counts.
123 DurationType Duration
= E
.End
- E
.Start
;
125 // Only include sections longer or equal to TimeTraceGranularity msec.
126 if (duration_cast
<microseconds
>(Duration
).count() >= TimeTraceGranularity
)
127 Entries
.emplace_back(E
);
129 // Track total time taken by each "name", but only the topmost levels of
130 // them; e.g. if there's a template instantiation that instantiates other
131 // templates from within, we only want to add the topmost one. "topmost"
132 // happens to be the ones that don't have any currently open entries above
134 if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack
)),
135 [&](const TimeTraceProfilerEntry
&Val
) {
136 return Val
.Name
== E
.Name
;
138 auto &CountAndTotal
= CountAndTotalPerName
[E
.Name
];
139 CountAndTotal
.first
++;
140 CountAndTotal
.second
+= Duration
;
146 // Write events from this TimeTraceProfilerInstance and
147 // ThreadTimeTraceProfilerInstances.
148 void write(raw_pwrite_stream
&OS
) {
149 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
150 auto &Instances
= getTimeTraceProfilerInstances();
151 std::lock_guard
<std::mutex
> Lock(Instances
.Lock
);
152 assert(Stack
.empty() &&
153 "All profiler sections should be ended when calling write");
154 assert(llvm::all_of(Instances
.List
,
155 [](const auto &TTP
) { return TTP
->Stack
.empty(); }) &&
156 "All profiler sections should be ended when calling write");
160 J
.attributeBegin("traceEvents");
163 // Emit all events for the main flame graph.
164 auto writeEvent
= [&](const auto &E
, uint64_t Tid
) {
165 auto StartUs
= E
.getFlameGraphStartUs(StartTime
);
166 auto DurUs
= E
.getFlameGraphDurUs();
169 J
.attribute("pid", Pid
);
170 J
.attribute("tid", int64_t(Tid
));
171 J
.attribute("ph", "X");
172 J
.attribute("ts", StartUs
);
173 J
.attribute("dur", DurUs
);
174 J
.attribute("name", E
.Name
);
175 if (!E
.Detail
.empty()) {
176 J
.attributeObject("args", [&] { J
.attribute("detail", E
.Detail
); });
180 for (const TimeTraceProfilerEntry
&E
: Entries
)
181 writeEvent(E
, this->Tid
);
182 for (const TimeTraceProfiler
*TTP
: Instances
.List
)
183 for (const TimeTraceProfilerEntry
&E
: TTP
->Entries
)
184 writeEvent(E
, TTP
->Tid
);
186 // Emit totals by section name as additional "thread" events, sorted from
188 // Find highest used thread id.
189 uint64_t MaxTid
= this->Tid
;
190 for (const TimeTraceProfiler
*TTP
: Instances
.List
)
191 MaxTid
= std::max(MaxTid
, TTP
->Tid
);
193 // Combine all CountAndTotalPerName from threads into one.
194 StringMap
<CountAndDurationType
> AllCountAndTotalPerName
;
195 auto combineStat
= [&](const auto &Stat
) {
196 StringRef Key
= Stat
.getKey();
197 auto Value
= Stat
.getValue();
198 auto &CountAndTotal
= AllCountAndTotalPerName
[Key
];
199 CountAndTotal
.first
+= Value
.first
;
200 CountAndTotal
.second
+= Value
.second
;
202 for (const auto &Stat
: CountAndTotalPerName
)
204 for (const TimeTraceProfiler
*TTP
: Instances
.List
)
205 for (const auto &Stat
: TTP
->CountAndTotalPerName
)
208 std::vector
<NameAndCountAndDurationType
> SortedTotals
;
209 SortedTotals
.reserve(AllCountAndTotalPerName
.size());
210 for (const auto &Total
: AllCountAndTotalPerName
)
211 SortedTotals
.emplace_back(std::string(Total
.getKey()), Total
.getValue());
213 llvm::sort(SortedTotals
, [](const NameAndCountAndDurationType
&A
,
214 const NameAndCountAndDurationType
&B
) {
215 return A
.second
.second
> B
.second
.second
;
218 // Report totals on separate threads of tracing file.
219 uint64_t TotalTid
= MaxTid
+ 1;
220 for (const NameAndCountAndDurationType
&Total
: SortedTotals
) {
221 auto DurUs
= duration_cast
<microseconds
>(Total
.second
.second
).count();
222 auto Count
= AllCountAndTotalPerName
[Total
.first
].first
;
225 J
.attribute("pid", Pid
);
226 J
.attribute("tid", int64_t(TotalTid
));
227 J
.attribute("ph", "X");
228 J
.attribute("ts", 0);
229 J
.attribute("dur", DurUs
);
230 J
.attribute("name", "Total " + Total
.first
);
231 J
.attributeObject("args", [&] {
232 J
.attribute("count", int64_t(Count
));
233 J
.attribute("avg ms", int64_t(DurUs
/ Count
/ 1000));
240 auto writeMetadataEvent
= [&](const char *Name
, uint64_t Tid
,
243 J
.attribute("cat", "");
244 J
.attribute("pid", Pid
);
245 J
.attribute("tid", int64_t(Tid
));
246 J
.attribute("ts", 0);
247 J
.attribute("ph", "M");
248 J
.attribute("name", Name
);
249 J
.attributeObject("args", [&] { J
.attribute("name", arg
); });
253 writeMetadataEvent("process_name", Tid
, ProcName
);
254 writeMetadataEvent("thread_name", Tid
, ThreadName
);
255 for (const TimeTraceProfiler
*TTP
: Instances
.List
)
256 writeMetadataEvent("thread_name", TTP
->Tid
, TTP
->ThreadName
);
261 // Emit the absolute time when this TimeProfiler started.
262 // This can be used to combine the profiling data from
263 // multiple processes and preserve actual time intervals.
264 J
.attribute("beginningOfTime",
265 time_point_cast
<microseconds
>(BeginningOfTime
)
272 SmallVector
<TimeTraceProfilerEntry
, 16> Stack
;
273 SmallVector
<TimeTraceProfilerEntry
, 128> Entries
;
274 StringMap
<CountAndDurationType
> CountAndTotalPerName
;
275 // System clock time when the session was begun.
276 const time_point
<system_clock
> BeginningOfTime
;
277 // Profiling clock time when the session was begun.
278 const TimePointType StartTime
;
279 const std::string ProcName
;
280 const sys::Process::Pid Pid
;
281 SmallString
<0> ThreadName
;
284 // Minimum time granularity (in microseconds)
285 const unsigned TimeTraceGranularity
;
288 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity
,
289 StringRef ProcName
) {
290 assert(TimeTraceProfilerInstance
== nullptr &&
291 "Profiler should not be initialized");
292 TimeTraceProfilerInstance
= new TimeTraceProfiler(
293 TimeTraceGranularity
, llvm::sys::path::filename(ProcName
));
296 // Removes all TimeTraceProfilerInstances.
297 // Called from main thread.
298 void llvm::timeTraceProfilerCleanup() {
299 delete TimeTraceProfilerInstance
;
300 TimeTraceProfilerInstance
= nullptr;
302 auto &Instances
= getTimeTraceProfilerInstances();
303 std::lock_guard
<std::mutex
> Lock(Instances
.Lock
);
304 for (auto *TTP
: Instances
.List
)
306 Instances
.List
.clear();
309 // Finish TimeTraceProfilerInstance on a worker thread.
310 // This doesn't remove the instance, just moves the pointer to global vector.
311 void llvm::timeTraceProfilerFinishThread() {
312 auto &Instances
= getTimeTraceProfilerInstances();
313 std::lock_guard
<std::mutex
> Lock(Instances
.Lock
);
314 Instances
.List
.push_back(TimeTraceProfilerInstance
);
315 TimeTraceProfilerInstance
= nullptr;
318 void llvm::timeTraceProfilerWrite(raw_pwrite_stream
&OS
) {
319 assert(TimeTraceProfilerInstance
!= nullptr &&
320 "Profiler object can't be null");
321 TimeTraceProfilerInstance
->write(OS
);
324 Error
llvm::timeTraceProfilerWrite(StringRef PreferredFileName
,
325 StringRef FallbackFileName
) {
326 assert(TimeTraceProfilerInstance
!= nullptr &&
327 "Profiler object can't be null");
329 std::string Path
= PreferredFileName
.str();
331 Path
= FallbackFileName
== "-" ? "out" : FallbackFileName
.str();
332 Path
+= ".time-trace";
336 raw_fd_ostream
OS(Path
, EC
, sys::fs::OF_TextWithCRLF
);
338 return createStringError(EC
, "Could not open " + Path
);
340 timeTraceProfilerWrite(OS
);
341 return Error::success();
344 void llvm::timeTraceProfilerBegin(StringRef Name
, StringRef Detail
) {
345 if (TimeTraceProfilerInstance
!= nullptr)
346 TimeTraceProfilerInstance
->begin(std::string(Name
),
347 [&]() { return std::string(Detail
); });
350 void llvm::timeTraceProfilerBegin(StringRef Name
,
351 llvm::function_ref
<std::string()> Detail
) {
352 if (TimeTraceProfilerInstance
!= nullptr)
353 TimeTraceProfilerInstance
->begin(std::string(Name
), Detail
);
356 void llvm::timeTraceProfilerEnd() {
357 if (TimeTraceProfilerInstance
!= nullptr)
358 TimeTraceProfilerInstance
->end();