1 //===- Timing.cpp - Execution time measurement facilities -----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Facilities to measure and provide statistics on execution time.
11 //===----------------------------------------------------------------------===//
13 #include "mlir/Support/Timing.h"
14 #include "mlir/Support/ThreadLocalCache.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/RWMutex.h"
25 #include "llvm/Support/Threading.h"
26 #include "llvm/Support/raw_ostream.h"
33 using namespace detail
;
34 using DisplayMode
= DefaultTimingManager::DisplayMode
;
35 using OutputFormat
= DefaultTimingManager::OutputFormat
;
37 constexpr llvm::StringLiteral kTimingDescription
=
38 "... Execution time report ...";
40 //===----------------------------------------------------------------------===//
42 //===----------------------------------------------------------------------===//
46 /// Private implementation details of the `TimingManager`.
47 class TimingManagerImpl
{
49 // Identifier allocator, map, and mutex for thread safety.
50 llvm::BumpPtrAllocator identifierAllocator
;
51 llvm::StringSet
<llvm::BumpPtrAllocator
&> identifiers
;
52 llvm::sys::SmartRWMutex
<true> identifierMutex
;
54 /// A thread local cache of identifiers to reduce lock contention.
55 ThreadLocalCache
<llvm::StringMap
<llvm::StringMapEntry
<std::nullopt_t
> *>>
58 TimingManagerImpl() : identifiers(identifierAllocator
) {}
63 TimingManager::TimingManager() : impl(std::make_unique
<TimingManagerImpl
>()) {}
65 TimingManager::~TimingManager() = default;
67 /// Get the root timer of this timing manager.
68 Timer
TimingManager::getRootTimer() {
69 auto rt
= rootTimer();
70 return rt
? Timer(*this, *rt
) : Timer();
73 /// Get the root timer of this timing manager wrapped in a `TimingScope`.
74 TimingScope
TimingManager::getRootScope() {
75 return TimingScope(getRootTimer());
78 //===----------------------------------------------------------------------===//
79 // Identifier uniquing
80 //===----------------------------------------------------------------------===//
82 /// Return an identifier for the specified string.
83 TimingIdentifier
TimingIdentifier::get(StringRef str
, TimingManager
&tm
) {
84 // Check for an existing instance in the local cache.
85 auto &impl
= *tm
.impl
;
86 auto *&localEntry
= (*impl
.localIdentifierCache
)[str
];
88 return TimingIdentifier(localEntry
);
90 // Check for an existing identifier in read-only mode.
92 llvm::sys::SmartScopedReader
<true> contextLock(impl
.identifierMutex
);
93 auto it
= impl
.identifiers
.find(str
);
94 if (it
!= impl
.identifiers
.end()) {
96 return TimingIdentifier(localEntry
);
100 // Acquire a writer-lock so that we can safely create the new instance.
101 llvm::sys::SmartScopedWriter
<true> contextLock(impl
.identifierMutex
);
102 auto it
= impl
.identifiers
.insert(str
).first
;
104 return TimingIdentifier(localEntry
);
107 //===----------------------------------------------------------------------===//
108 // Helpers for time record printing
109 //===----------------------------------------------------------------------===//
113 class OutputTextStrategy
: public OutputStrategy
{
115 OutputTextStrategy(raw_ostream
&os
) : OutputStrategy(os
) {}
117 void printHeader(const TimeRecord
&total
) override
{
118 // Figure out how many spaces to description name.
119 unsigned padding
= (80 - kTimingDescription
.size()) / 2;
120 os
<< "===" << std::string(73, '-') << "===\n";
121 os
.indent(padding
) << kTimingDescription
<< '\n';
122 os
<< "===" << std::string(73, '-') << "===\n";
124 // Print the total time followed by the section headers.
125 os
<< llvm::format(" Total Execution Time: %.4f seconds\n\n", total
.wall
);
126 if (total
.user
!= total
.wall
)
127 os
<< " ----User Time----";
128 os
<< " ----Wall Time---- ----Name----\n";
131 void printFooter() override
{ os
.flush(); }
133 void printTime(const TimeRecord
&time
, const TimeRecord
&total
) override
{
134 if (total
.user
!= total
.wall
) {
135 os
<< llvm::format(" %8.4f (%5.1f%%)", time
.user
,
136 100.0 * time
.user
/ total
.user
);
138 os
<< llvm::format(" %8.4f (%5.1f%%) ", time
.wall
,
139 100.0 * time
.wall
/ total
.wall
);
142 void printListEntry(StringRef name
, const TimeRecord
&time
,
143 const TimeRecord
&total
, bool lastEntry
) override
{
144 printTime(time
, total
);
148 void printTreeEntry(unsigned indent
, StringRef name
, const TimeRecord
&time
,
149 const TimeRecord
&total
) override
{
150 printTime(time
, total
);
151 os
.indent(indent
) << name
<< "\n";
154 void printTreeEntryEnd(unsigned indent
, bool lastEntry
) override
{}
157 class OutputJsonStrategy
: public OutputStrategy
{
159 OutputJsonStrategy(raw_ostream
&os
) : OutputStrategy(os
) {}
161 void printHeader(const TimeRecord
&total
) override
{ os
<< "[" << "\n"; }
163 void printFooter() override
{
168 void printTime(const TimeRecord
&time
, const TimeRecord
&total
) override
{
169 if (total
.user
!= total
.wall
) {
171 os
<< "\"duration\": " << llvm::format("%8.4f", time
.user
) << ", ";
172 os
<< "\"percentage\": "
173 << llvm::format("%5.1f", 100.0 * time
.user
/ total
.user
);
177 os
<< "\"duration\": " << llvm::format("%8.4f", time
.wall
) << ", ";
178 os
<< "\"percentage\": "
179 << llvm::format("%5.1f", 100.0 * time
.wall
/ total
.wall
);
183 void printListEntry(StringRef name
, const TimeRecord
&time
,
184 const TimeRecord
&total
, bool lastEntry
) override
{
186 printTime(time
, total
);
187 os
<< ", \"name\": " << "\"" << name
<< "\"";
194 void printTreeEntry(unsigned indent
, StringRef name
, const TimeRecord
&time
,
195 const TimeRecord
&total
) override
{
196 os
.indent(indent
) << "{";
197 printTime(time
, total
);
198 os
<< ", \"name\": " << "\"" << name
<< "\"";
199 os
<< ", \"passes\": [" << "\n";
202 void printTreeEntryEnd(unsigned indent
, bool lastEntry
) override
{
203 os
.indent(indent
) << "{}]";
213 //===----------------------------------------------------------------------===//
214 // Timer Implementation for DefaultTimingManager
215 //===----------------------------------------------------------------------===//
219 /// A timer used to sample execution time.
221 /// Separately tracks wall time and user time to account for parallel threads of
222 /// execution. Timers are intended to be started and stopped multiple times.
223 /// Each start and stop will add to the timer's wall and user time.
226 using ChildrenMap
= llvm::MapVector
<const void *, std::unique_ptr
<TimerImpl
>>;
227 using AsyncChildrenMap
= llvm::DenseMap
<uint64_t, ChildrenMap
>;
229 TimerImpl(std::string
&&name
, std::unique_ptr
<OutputStrategy
> &output
)
230 : threadId(llvm::get_threadid()), name(name
), output(output
) {}
233 void start() { startTime
= std::chrono::steady_clock::now(); }
237 auto newTime
= std::chrono::steady_clock::now() - startTime
;
242 /// Create a child timer nested within this one. Multiple calls to this
243 /// function with the same unique identifier `id` will return the same child
246 /// This function can be called from other threads, as long as this timer
247 /// outlives any uses of the child timer on the other thread.
248 TimerImpl
*nest(const void *id
, function_ref
<std::string()> nameBuilder
) {
249 auto tid
= llvm::get_threadid();
251 return nestTail(children
[id
], nameBuilder
);
252 std::unique_lock
<std::mutex
> lock(asyncMutex
);
253 return nestTail(asyncChildren
[tid
][id
], nameBuilder
);
256 /// Tail-called from `nest()`.
257 TimerImpl
*nestTail(std::unique_ptr
<TimerImpl
> &child
,
258 function_ref
<std::string()> nameBuilder
) {
260 child
= std::make_unique
<TimerImpl
>(nameBuilder(), output
);
264 /// Finalize this timer and all its children.
266 /// If this timer has async children, which happens if `nest()` was called
267 /// from another thread, this function merges the async childr timers into the
268 /// main list of child timers.
270 /// Caution: Call this function only after all nested timers running on other
271 /// threads no longer need their timers!
274 mergeAsyncChildren();
277 /// Add the user time of all async children to this timer's user time. This is
278 /// necessary since the user time already contains all regular child timers,
279 /// but not the asynchronous ones (by the nesting nature of the timers).
280 std::chrono::nanoseconds
addAsyncUserTime() {
281 auto added
= std::chrono::nanoseconds(0);
282 for (auto &child
: children
)
283 added
+= child
.second
->addAsyncUserTime();
284 for (auto &thread
: asyncChildren
) {
285 for (auto &child
: thread
.second
) {
286 child
.second
->addAsyncUserTime();
287 added
+= child
.second
->userTime
;
294 /// Ensure that this timer and recursively all its children have their async
295 /// children folded into the main map of children.
296 void mergeAsyncChildren() {
297 for (auto &child
: children
)
298 child
.second
->mergeAsyncChildren();
299 mergeChildren(std::move(asyncChildren
));
300 assert(asyncChildren
.empty());
303 /// Merge multiple child timers into this timer.
305 /// Children in `other` are added as children to this timer, or, if this timer
306 /// already contains a child with the corresponding unique identifier, are
307 /// merged into the existing child.
308 void mergeChildren(ChildrenMap
&&other
) {
309 if (children
.empty()) {
310 children
= std::move(other
);
311 for (auto &child
: children
)
312 child
.second
->mergeAsyncChildren();
314 for (auto &child
: other
)
315 mergeChild(child
.first
, std::move(child
.second
));
321 void mergeChildren(AsyncChildrenMap
&&other
) {
322 for (auto &thread
: other
) {
323 mergeChildren(std::move(thread
.second
));
324 assert(thread
.second
.empty());
329 /// Merge a child timer into this timer for a given unique identifier.
331 /// Moves all child and async child timers of `other` into this timer's child
332 /// for the given unique identifier.
333 void mergeChild(const void *id
, std::unique_ptr
<TimerImpl
> &&other
) {
334 auto &into
= children
[id
];
336 into
= std::move(other
);
337 into
->mergeAsyncChildren();
339 into
->wallTime
= std::max(into
->wallTime
, other
->wallTime
);
340 into
->userTime
+= other
->userTime
;
341 into
->mergeChildren(std::move(other
->children
));
342 into
->mergeChildren(std::move(other
->asyncChildren
));
347 /// Dump a human-readable tree representation of the timer and its children.
348 /// This is useful for debugging the timing mechanisms and structure of the
350 void dump(raw_ostream
&os
, unsigned indent
= 0, unsigned markThreadId
= 0) {
351 auto time
= getTimeRecord();
352 os
<< std::string(indent
* 2, ' ') << name
<< " [" << threadId
<< "]"
353 << llvm::format(" %7.4f / %7.4f", time
.user
, time
.wall
);
354 if (threadId
!= markThreadId
&& markThreadId
!= 0)
357 for (auto &child
: children
)
358 child
.second
->dump(os
, indent
+ 1, threadId
);
359 for (auto &thread
: asyncChildren
)
360 for (auto &child
: thread
.second
)
361 child
.second
->dump(os
, indent
+ 1, threadId
);
364 /// Returns the time for this timer in seconds.
365 TimeRecord
getTimeRecord() {
367 std::chrono::duration_cast
<std::chrono::duration
<double>>(wallTime
)
369 std::chrono::duration_cast
<std::chrono::duration
<double>>(userTime
)
373 /// Print the timing result in list mode.
374 void printAsList(TimeRecord total
) {
375 // Flatten the leaf timers in the tree and merge them by name.
376 llvm::StringMap
<TimeRecord
> mergedTimers
;
377 std::function
<void(TimerImpl
*)> addTimer
= [&](TimerImpl
*timer
) {
378 mergedTimers
[timer
->name
] += timer
->getTimeRecord();
379 for (auto &children
: timer
->children
)
380 addTimer(children
.second
.get());
384 // Sort the timing information by wall time.
385 std::vector
<std::pair
<StringRef
, TimeRecord
>> timerNameAndTime
;
386 for (auto &it
: mergedTimers
)
387 timerNameAndTime
.emplace_back(it
.first(), it
.second
);
388 llvm::array_pod_sort(timerNameAndTime
.begin(), timerNameAndTime
.end(),
389 [](const std::pair
<StringRef
, TimeRecord
> *lhs
,
390 const std::pair
<StringRef
, TimeRecord
> *rhs
) {
391 return llvm::array_pod_sort_comparator
<double>(
392 &rhs
->second
.wall
, &lhs
->second
.wall
);
395 // Print the timing information sequentially.
396 for (auto &timeData
: timerNameAndTime
)
397 output
->printListEntry(timeData
.first
, timeData
.second
, total
);
400 /// Print the timing result in tree mode.
401 void printAsTree(TimeRecord total
, unsigned indent
= 0) {
402 unsigned childIndent
= indent
;
404 output
->printTreeEntry(indent
, name
, getTimeRecord(), total
);
407 for (auto &child
: children
) {
408 child
.second
->printAsTree(total
, childIndent
);
411 output
->printTreeEntryEnd(indent
);
415 /// Print the current timing information.
416 void print(DisplayMode displayMode
) {
418 auto total
= getTimeRecord();
419 output
->printHeader(total
);
421 // Defer to a specialized printer for each display mode.
422 switch (displayMode
) {
423 case DisplayMode::List
:
426 case DisplayMode::Tree
:
431 // Print the top-level time not accounted for by child timers, and the
434 for (auto &child
: children
)
435 rest
-= child
.second
->getTimeRecord();
436 output
->printListEntry("Rest", rest
, total
);
437 output
->printListEntry("Total", total
, total
, /*lastEntry=*/true);
438 output
->printFooter();
441 /// The last time instant at which the timer was started.
442 std::chrono::time_point
<std::chrono::steady_clock
> startTime
;
444 /// Accumulated wall time. If multiple threads of execution are merged into
445 /// this timer, the wall time will hold the maximum wall time of each thread
447 std::chrono::nanoseconds wallTime
= std::chrono::nanoseconds(0);
449 /// Accumulated user time. If multiple threads of execution are merged into
450 /// this timer, each thread's user time is added here.
451 std::chrono::nanoseconds userTime
= std::chrono::nanoseconds(0);
453 /// The thread on which this timer is running.
456 /// A descriptive name for this timer.
459 /// Whether to omit this timer from reports and directly show its children.
462 /// Child timers on the same thread the timer itself. We keep at most one
463 /// timer per unique identifier.
464 ChildrenMap children
;
466 /// Child timers on other threads. We keep at most one timer per unique
468 AsyncChildrenMap asyncChildren
;
470 /// Mutex for the async children.
471 std::mutex asyncMutex
;
473 std::unique_ptr
<OutputStrategy
> &output
;
478 //===----------------------------------------------------------------------===//
479 // DefaultTimingManager
480 //===----------------------------------------------------------------------===//
485 /// Implementation details of the `DefaultTimingManager`.
486 class DefaultTimingManagerImpl
{
488 /// Whether we should do our work or not.
489 bool enabled
= false;
491 /// The configured display mode.
492 DisplayMode displayMode
= DisplayMode::Tree
;
495 std::unique_ptr
<TimerImpl
> rootTimer
;
498 } // namespace detail
501 DefaultTimingManager::DefaultTimingManager()
502 : impl(std::make_unique
<DefaultTimingManagerImpl
>()),
503 out(std::make_unique
<OutputTextStrategy
>(llvm::errs())) {
504 clear(); // initializes the root timer
507 DefaultTimingManager::~DefaultTimingManager() { print(); }
509 /// Enable or disable execution time sampling.
510 void DefaultTimingManager::setEnabled(bool enabled
) { impl
->enabled
= enabled
; }
512 /// Return whether execution time sampling is enabled.
513 bool DefaultTimingManager::isEnabled() const { return impl
->enabled
; }
515 /// Change the display mode.
516 void DefaultTimingManager::setDisplayMode(DisplayMode displayMode
) {
517 impl
->displayMode
= displayMode
;
520 /// Return the current display mode;
521 DefaultTimingManager::DisplayMode
DefaultTimingManager::getDisplayMode() const {
522 return impl
->displayMode
;
525 /// Change the stream where the output will be printed to.
526 void DefaultTimingManager::setOutput(std::unique_ptr
<OutputStrategy
> output
) {
527 out
= std::move(output
);
530 /// Print and clear the timing results.
531 void DefaultTimingManager::print() {
533 impl
->rootTimer
->finalize();
534 impl
->rootTimer
->print(impl
->displayMode
);
539 /// Clear the timing results.
540 void DefaultTimingManager::clear() {
541 impl
->rootTimer
= std::make_unique
<TimerImpl
>("root", out
);
542 impl
->rootTimer
->hidden
= true;
545 /// Debug print the timer data structures to an output stream.
546 void DefaultTimingManager::dumpTimers(raw_ostream
&os
) {
547 impl
->rootTimer
->dump(os
);
550 /// Debug print the timers as a list.
551 void DefaultTimingManager::dumpAsList(raw_ostream
&os
) {
552 impl
->rootTimer
->finalize();
553 impl
->rootTimer
->print(DisplayMode::List
);
556 /// Debug print the timers as a tree.
557 void DefaultTimingManager::dumpAsTree(raw_ostream
&os
) {
558 impl
->rootTimer
->finalize();
559 impl
->rootTimer
->print(DisplayMode::Tree
);
562 std::optional
<void *> DefaultTimingManager::rootTimer() {
564 return impl
->rootTimer
.get();
568 void DefaultTimingManager::startTimer(void *handle
) {
569 static_cast<TimerImpl
*>(handle
)->start();
572 void DefaultTimingManager::stopTimer(void *handle
) {
573 static_cast<TimerImpl
*>(handle
)->stop();
576 void *DefaultTimingManager::nestTimer(void *handle
, const void *id
,
577 function_ref
<std::string()> nameBuilder
) {
578 return static_cast<TimerImpl
*>(handle
)->nest(id
, nameBuilder
);
581 void DefaultTimingManager::hideTimer(void *handle
) {
582 static_cast<TimerImpl
*>(handle
)->hidden
= true;
585 //===----------------------------------------------------------------------===//
586 // DefaultTimingManager Command Line Options
587 //===----------------------------------------------------------------------===//
590 struct DefaultTimingManagerOptions
{
591 llvm::cl::opt
<bool> timing
{"mlir-timing",
592 llvm::cl::desc("Display execution times"),
593 llvm::cl::init(false)};
594 llvm::cl::opt
<DisplayMode
> displayMode
{
595 "mlir-timing-display", llvm::cl::desc("Display method for timing data"),
596 llvm::cl::init(DisplayMode::Tree
),
598 clEnumValN(DisplayMode::List
, "list",
599 "display the results in a list sorted by total time"),
600 clEnumValN(DisplayMode::Tree
, "tree",
601 "display the results ina with a nested tree view"))};
602 llvm::cl::opt
<OutputFormat
> outputFormat
{
603 "mlir-output-format", llvm::cl::desc("Output format for timing data"),
604 llvm::cl::init(OutputFormat::Text
),
605 llvm::cl::values(clEnumValN(OutputFormat::Text
, "text",
606 "display the results in text format"),
607 clEnumValN(OutputFormat::Json
, "json",
608 "display the results in JSON format"))};
612 static llvm::ManagedStatic
<DefaultTimingManagerOptions
> options
;
614 void mlir::registerDefaultTimingManagerCLOptions() {
615 // Make sure that the options struct has been constructed.
619 void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager
&tm
) {
620 if (!options
.isConstructed())
622 tm
.setEnabled(options
->timing
);
623 tm
.setDisplayMode(options
->displayMode
);
625 std::unique_ptr
<OutputStrategy
> printer
;
626 if (options
->outputFormat
== OutputFormat::Text
)
627 printer
= std::make_unique
<OutputTextStrategy
>(llvm::errs());
628 else if (options
->outputFormat
== OutputFormat::Json
)
629 printer
= std::make_unique
<OutputJsonStrategy
>(llvm::errs());
630 tm
.setOutput(std::move(printer
));