1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements hierarchical time profiler.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/STLFunctionalExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/Support/JSON.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/Process.h"
21 #include "llvm/Support/Threading.h"
34 using std::chrono::duration
;
35 using std::chrono::duration_cast
;
36 using std::chrono::microseconds
;
37 using std::chrono::steady_clock
;
38 using std::chrono::system_clock
;
39 using std::chrono::time_point
;
40 using std::chrono::time_point_cast
;
42 struct TimeTraceProfilerInstances
{
44 std::vector
<TimeTraceProfiler
*> List
;
47 TimeTraceProfilerInstances
&getTimeTraceProfilerInstances() {
48 static TimeTraceProfilerInstances Instances
;
52 } // anonymous namespace
54 // Per Thread instance
55 static LLVM_THREAD_LOCAL TimeTraceProfiler
*TimeTraceProfilerInstance
= nullptr;
57 TimeTraceProfiler
*llvm::getTimeTraceProfilerInstance() {
58 return TimeTraceProfilerInstance
;
63 using ClockType
= steady_clock
;
64 using TimePointType
= time_point
<ClockType
>;
65 using DurationType
= duration
<ClockType::rep
, ClockType::period
>;
66 using CountAndDurationType
= std::pair
<size_t, DurationType
>;
67 using NameAndCountAndDurationType
=
68 std::pair
<std::string
, CountAndDurationType
>;
70 } // anonymous namespace
72 /// Represents an open or completed time section entry to be captured.
73 struct llvm::TimeTraceProfilerEntry
{
74 const TimePointType Start
;
76 const std::string Name
;
77 TimeTraceMetadata Metadata
;
79 const TimeTraceEventType EventType
= TimeTraceEventType::CompleteEvent
;
80 TimeTraceProfilerEntry(TimePointType
&&S
, TimePointType
&&E
, std::string
&&N
,
81 std::string
&&Dt
, TimeTraceEventType Et
)
82 : Start(std::move(S
)), End(std::move(E
)), Name(std::move(N
)), Metadata(),
84 Metadata
.Detail
= std::move(Dt
);
87 TimeTraceProfilerEntry(TimePointType
&&S
, TimePointType
&&E
, std::string
&&N
,
88 TimeTraceMetadata
&&Mt
, TimeTraceEventType Et
)
89 : Start(std::move(S
)), End(std::move(E
)), Name(std::move(N
)),
90 Metadata(std::move(Mt
)), EventType(Et
) {}
92 // Calculate timings for FlameGraph. Cast time points to microsecond precision
93 // rather than casting duration. This avoids truncation issues causing inner
94 // scopes overruning outer scopes.
95 ClockType::rep
getFlameGraphStartUs(TimePointType StartTime
) const {
96 return (time_point_cast
<microseconds
>(Start
) -
97 time_point_cast
<microseconds
>(StartTime
))
101 ClockType::rep
getFlameGraphDurUs() const {
102 return (time_point_cast
<microseconds
>(End
) -
103 time_point_cast
<microseconds
>(Start
))
108 // Represents a currently open (in-progress) time trace entry. InstantEvents
109 // that happen during an open event are associated with the duration of this
110 // parent event and they are dropped if parent duration is shorter than
112 struct InProgressEntry
{
113 TimeTraceProfilerEntry Event
;
114 std::vector
<TimeTraceProfilerEntry
> InstantEvents
;
116 InProgressEntry(TimePointType S
, TimePointType E
, std::string N
,
117 std::string Dt
, TimeTraceEventType Et
)
118 : Event(std::move(S
), std::move(E
), std::move(N
), std::move(Dt
), Et
),
121 InProgressEntry(TimePointType S
, TimePointType E
, std::string N
,
122 TimeTraceMetadata Mt
, TimeTraceEventType Et
)
123 : Event(std::move(S
), std::move(E
), std::move(N
), std::move(Mt
), Et
),
127 struct llvm::TimeTraceProfiler
{
128 TimeTraceProfiler(unsigned TimeTraceGranularity
= 0, StringRef ProcName
= "",
129 bool TimeTraceVerbose
= false)
130 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
131 ProcName(ProcName
), Pid(sys::Process::getProcessId()),
132 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity
),
133 TimeTraceVerbose(TimeTraceVerbose
) {
134 llvm::get_thread_name(ThreadName
);
137 TimeTraceProfilerEntry
*
138 begin(std::string Name
, llvm::function_ref
<std::string()> Detail
,
139 TimeTraceEventType EventType
= TimeTraceEventType::CompleteEvent
) {
140 assert(EventType
!= TimeTraceEventType::InstantEvent
&&
141 "Instant Events don't have begin and end.");
142 Stack
.emplace_back(std::make_unique
<InProgressEntry
>(
143 ClockType::now(), TimePointType(), std::move(Name
), Detail(),
145 return &Stack
.back()->Event
;
148 TimeTraceProfilerEntry
*
149 begin(std::string Name
, llvm::function_ref
<TimeTraceMetadata()> Metadata
,
150 TimeTraceEventType EventType
= TimeTraceEventType::CompleteEvent
) {
151 assert(EventType
!= TimeTraceEventType::InstantEvent
&&
152 "Instant Events don't have begin and end.");
153 Stack
.emplace_back(std::make_unique
<InProgressEntry
>(
154 ClockType::now(), TimePointType(), std::move(Name
), Metadata(),
156 return &Stack
.back()->Event
;
159 void insert(std::string Name
, llvm::function_ref
<std::string()> Detail
) {
163 Stack
.back()->InstantEvents
.emplace_back(TimeTraceProfilerEntry(
164 ClockType::now(), TimePointType(), std::move(Name
), Detail(),
165 TimeTraceEventType::InstantEvent
));
169 assert(!Stack
.empty() && "Must call begin() first");
170 end(Stack
.back()->Event
);
173 void end(TimeTraceProfilerEntry
&E
) {
174 assert(!Stack
.empty() && "Must call begin() first");
175 E
.End
= ClockType::now();
177 // Calculate duration at full precision for overall counts.
178 DurationType Duration
= E
.End
- E
.Start
;
181 llvm::find_if(Stack
, [&](const std::unique_ptr
<InProgressEntry
> &Val
) {
182 return &Val
->Event
== &E
;
184 assert(Iter
!= Stack
.end() && "Event not in the Stack");
186 // Only include sections longer or equal to TimeTraceGranularity msec.
187 if (duration_cast
<microseconds
>(Duration
).count() >= TimeTraceGranularity
) {
188 Entries
.emplace_back(E
);
189 for (auto &IE
: Iter
->get()->InstantEvents
) {
190 Entries
.emplace_back(IE
);
194 // Track total time taken by each "name", but only the topmost levels of
195 // them; e.g. if there's a template instantiation that instantiates other
196 // templates from within, we only want to add the topmost one. "topmost"
197 // happens to be the ones that don't have any currently open entries above
199 if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack
)),
200 [&](const std::unique_ptr
<InProgressEntry
> &Val
) {
201 return Val
->Event
.Name
== E
.Name
;
203 auto &CountAndTotal
= CountAndTotalPerName
[E
.Name
];
204 CountAndTotal
.first
++;
205 CountAndTotal
.second
+= Duration
;
211 // Write events from this TimeTraceProfilerInstance and
212 // ThreadTimeTraceProfilerInstances.
213 void write(raw_pwrite_stream
&OS
) {
214 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
215 auto &Instances
= getTimeTraceProfilerInstances();
216 std::lock_guard
<std::mutex
> Lock(Instances
.Lock
);
217 assert(Stack
.empty() &&
218 "All profiler sections should be ended when calling write");
219 assert(llvm::all_of(Instances
.List
,
220 [](const auto &TTP
) { return TTP
->Stack
.empty(); }) &&
221 "All profiler sections should be ended when calling write");
225 J
.attributeBegin("traceEvents");
228 // Emit all events for the main flame graph.
229 auto writeEvent
= [&](const auto &E
, uint64_t Tid
) {
230 auto StartUs
= E
.getFlameGraphStartUs(StartTime
);
231 auto DurUs
= E
.getFlameGraphDurUs();
234 J
.attribute("pid", Pid
);
235 J
.attribute("tid", int64_t(Tid
));
236 J
.attribute("ts", StartUs
);
237 if (E
.EventType
== TimeTraceEventType::AsyncEvent
) {
238 J
.attribute("cat", E
.Name
);
239 J
.attribute("ph", "b");
240 J
.attribute("id", 0);
241 } else if (E
.EventType
== TimeTraceEventType::CompleteEvent
) {
242 J
.attribute("ph", "X");
243 J
.attribute("dur", DurUs
);
244 } else { // instant event
245 assert(E
.EventType
== TimeTraceEventType::InstantEvent
&&
246 "InstantEvent expected");
247 J
.attribute("ph", "i");
249 J
.attribute("name", E
.Name
);
250 if (!E
.Metadata
.isEmpty()) {
251 J
.attributeObject("args", [&] {
252 if (!E
.Metadata
.Detail
.empty())
253 J
.attribute("detail", E
.Metadata
.Detail
);
254 if (!E
.Metadata
.File
.empty())
255 J
.attribute("file", E
.Metadata
.File
);
256 if (E
.Metadata
.Line
> 0)
257 J
.attribute("line", E
.Metadata
.Line
);
262 if (E
.EventType
== TimeTraceEventType::AsyncEvent
) {
264 J
.attribute("pid", Pid
);
265 J
.attribute("tid", int64_t(Tid
));
266 J
.attribute("ts", StartUs
+ DurUs
);
267 J
.attribute("cat", E
.Name
);
268 J
.attribute("ph", "e");
269 J
.attribute("id", 0);
270 J
.attribute("name", E
.Name
);
274 for (const TimeTraceProfilerEntry
&E
: Entries
)
275 writeEvent(E
, this->Tid
);
276 for (const TimeTraceProfiler
*TTP
: Instances
.List
)
277 for (const TimeTraceProfilerEntry
&E
: TTP
->Entries
)
278 writeEvent(E
, TTP
->Tid
);
280 // Emit totals by section name as additional "thread" events, sorted from
282 // Find highest used thread id.
283 uint64_t MaxTid
= this->Tid
;
284 for (const TimeTraceProfiler
*TTP
: Instances
.List
)
285 MaxTid
= std::max(MaxTid
, TTP
->Tid
);
287 // Combine all CountAndTotalPerName from threads into one.
288 StringMap
<CountAndDurationType
> AllCountAndTotalPerName
;
289 auto combineStat
= [&](const auto &Stat
) {
290 StringRef Key
= Stat
.getKey();
291 auto Value
= Stat
.getValue();
292 auto &CountAndTotal
= AllCountAndTotalPerName
[Key
];
293 CountAndTotal
.first
+= Value
.first
;
294 CountAndTotal
.second
+= Value
.second
;
296 for (const auto &Stat
: CountAndTotalPerName
)
298 for (const TimeTraceProfiler
*TTP
: Instances
.List
)
299 for (const auto &Stat
: TTP
->CountAndTotalPerName
)
302 std::vector
<NameAndCountAndDurationType
> SortedTotals
;
303 SortedTotals
.reserve(AllCountAndTotalPerName
.size());
304 for (const auto &Total
: AllCountAndTotalPerName
)
305 SortedTotals
.emplace_back(std::string(Total
.getKey()), Total
.getValue());
307 llvm::sort(SortedTotals
, [](const NameAndCountAndDurationType
&A
,
308 const NameAndCountAndDurationType
&B
) {
309 return A
.second
.second
> B
.second
.second
;
312 // Report totals on separate threads of tracing file.
313 uint64_t TotalTid
= MaxTid
+ 1;
314 for (const NameAndCountAndDurationType
&Total
: SortedTotals
) {
315 auto DurUs
= duration_cast
<microseconds
>(Total
.second
.second
).count();
316 auto Count
= AllCountAndTotalPerName
[Total
.first
].first
;
319 J
.attribute("pid", Pid
);
320 J
.attribute("tid", int64_t(TotalTid
));
321 J
.attribute("ph", "X");
322 J
.attribute("ts", 0);
323 J
.attribute("dur", DurUs
);
324 J
.attribute("name", "Total " + Total
.first
);
325 J
.attributeObject("args", [&] {
326 J
.attribute("count", int64_t(Count
));
327 J
.attribute("avg ms", int64_t(DurUs
/ Count
/ 1000));
334 auto writeMetadataEvent
= [&](const char *Name
, uint64_t Tid
,
337 J
.attribute("cat", "");
338 J
.attribute("pid", Pid
);
339 J
.attribute("tid", int64_t(Tid
));
340 J
.attribute("ts", 0);
341 J
.attribute("ph", "M");
342 J
.attribute("name", Name
);
343 J
.attributeObject("args", [&] { J
.attribute("name", arg
); });
347 writeMetadataEvent("process_name", Tid
, ProcName
);
348 writeMetadataEvent("thread_name", Tid
, ThreadName
);
349 for (const TimeTraceProfiler
*TTP
: Instances
.List
)
350 writeMetadataEvent("thread_name", TTP
->Tid
, TTP
->ThreadName
);
355 // Emit the absolute time when this TimeProfiler started.
356 // This can be used to combine the profiling data from
357 // multiple processes and preserve actual time intervals.
358 J
.attribute("beginningOfTime",
359 time_point_cast
<microseconds
>(BeginningOfTime
)
366 SmallVector
<std::unique_ptr
<InProgressEntry
>, 16> Stack
;
367 SmallVector
<TimeTraceProfilerEntry
, 128> Entries
;
368 StringMap
<CountAndDurationType
> CountAndTotalPerName
;
369 // System clock time when the session was begun.
370 const time_point
<system_clock
> BeginningOfTime
;
371 // Profiling clock time when the session was begun.
372 const TimePointType StartTime
;
373 const std::string ProcName
;
374 const sys::Process::Pid Pid
;
375 SmallString
<0> ThreadName
;
378 // Minimum time granularity (in microseconds)
379 const unsigned TimeTraceGranularity
;
381 // Make time trace capture verbose event details (e.g. source filenames). This
382 // can increase the size of the output by 2-3 times.
383 const bool TimeTraceVerbose
;
386 bool llvm::isTimeTraceVerbose() {
387 return getTimeTraceProfilerInstance() &&
388 getTimeTraceProfilerInstance()->TimeTraceVerbose
;
391 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity
,
393 bool TimeTraceVerbose
) {
394 assert(TimeTraceProfilerInstance
== nullptr &&
395 "Profiler should not be initialized");
396 TimeTraceProfilerInstance
= new TimeTraceProfiler(
397 TimeTraceGranularity
, llvm::sys::path::filename(ProcName
),
401 // Removes all TimeTraceProfilerInstances.
402 // Called from main thread.
403 void llvm::timeTraceProfilerCleanup() {
404 delete TimeTraceProfilerInstance
;
405 TimeTraceProfilerInstance
= nullptr;
407 auto &Instances
= getTimeTraceProfilerInstances();
408 std::lock_guard
<std::mutex
> Lock(Instances
.Lock
);
409 for (auto *TTP
: Instances
.List
)
411 Instances
.List
.clear();
414 // Finish TimeTraceProfilerInstance on a worker thread.
415 // This doesn't remove the instance, just moves the pointer to global vector.
416 void llvm::timeTraceProfilerFinishThread() {
417 auto &Instances
= getTimeTraceProfilerInstances();
418 std::lock_guard
<std::mutex
> Lock(Instances
.Lock
);
419 Instances
.List
.push_back(TimeTraceProfilerInstance
);
420 TimeTraceProfilerInstance
= nullptr;
423 void llvm::timeTraceProfilerWrite(raw_pwrite_stream
&OS
) {
424 assert(TimeTraceProfilerInstance
!= nullptr &&
425 "Profiler object can't be null");
426 TimeTraceProfilerInstance
->write(OS
);
429 Error
llvm::timeTraceProfilerWrite(StringRef PreferredFileName
,
430 StringRef FallbackFileName
) {
431 assert(TimeTraceProfilerInstance
!= nullptr &&
432 "Profiler object can't be null");
434 std::string Path
= PreferredFileName
.str();
436 Path
= FallbackFileName
== "-" ? "out" : FallbackFileName
.str();
437 Path
+= ".time-trace";
441 raw_fd_ostream
OS(Path
, EC
, sys::fs::OF_TextWithCRLF
);
443 return createStringError(EC
, "Could not open " + Path
);
445 timeTraceProfilerWrite(OS
);
446 return Error::success();
449 TimeTraceProfilerEntry
*llvm::timeTraceProfilerBegin(StringRef Name
,
451 if (TimeTraceProfilerInstance
!= nullptr)
452 return TimeTraceProfilerInstance
->begin(
453 std::string(Name
), [&]() { return std::string(Detail
); },
454 TimeTraceEventType::CompleteEvent
);
458 TimeTraceProfilerEntry
*
459 llvm::timeTraceProfilerBegin(StringRef Name
,
460 llvm::function_ref
<std::string()> Detail
) {
461 if (TimeTraceProfilerInstance
!= nullptr)
462 return TimeTraceProfilerInstance
->begin(std::string(Name
), Detail
,
463 TimeTraceEventType::CompleteEvent
);
467 TimeTraceProfilerEntry
*
468 llvm::timeTraceProfilerBegin(StringRef Name
,
469 llvm::function_ref
<TimeTraceMetadata()> Metadata
) {
470 if (TimeTraceProfilerInstance
!= nullptr)
471 return TimeTraceProfilerInstance
->begin(std::string(Name
), Metadata
,
472 TimeTraceEventType::CompleteEvent
);
476 TimeTraceProfilerEntry
*llvm::timeTraceAsyncProfilerBegin(StringRef Name
,
478 if (TimeTraceProfilerInstance
!= nullptr)
479 return TimeTraceProfilerInstance
->begin(
480 std::string(Name
), [&]() { return std::string(Detail
); },
481 TimeTraceEventType::AsyncEvent
);
485 void llvm::timeTraceAddInstantEvent(StringRef Name
,
486 llvm::function_ref
<std::string()> Detail
) {
487 if (TimeTraceProfilerInstance
!= nullptr)
488 TimeTraceProfilerInstance
->insert(std::string(Name
), Detail
);
491 void llvm::timeTraceProfilerEnd() {
492 if (TimeTraceProfilerInstance
!= nullptr)
493 TimeTraceProfilerInstance
->end();
496 void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry
*E
) {
497 if (TimeTraceProfilerInstance
!= nullptr)
498 TimeTraceProfilerInstance
->end(*E
);