1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/base_switches.h"
7 #include "base/command_line.h"
8 #include "base/location.h"
9 #include "base/memory/scoped_vector.h"
10 #include "base/single_thread_task_runner.h"
11 #include "base/strings/stringprintf.h"
12 #include "base/synchronization/condition_variable.h"
13 #include "base/synchronization/lock.h"
14 #include "base/synchronization/waitable_event.h"
15 #include "base/threading/thread.h"
16 #include "base/time/time.h"
17 #include "build/build_config.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19 #include "testing/perf/perf_test.h"
29 const int kNumRuns
= 100000;
31 // Base class for a threading perf-test. This sets up some threads for the
32 // test and measures the clock-time in addition to time spent on each thread.
33 class ThreadPerfTest
: public testing::Test
{
36 : done_(false, false) {
37 // Disable the task profiler as it adds significant cost!
38 CommandLine::Init(0, NULL
);
39 CommandLine::ForCurrentProcess()->AppendSwitchASCII(
40 switches::kProfilerTiming
,
41 switches::kProfilerTimingDisabledValue
);
44 // To be implemented by each test. Subclass must uses threads_ such that
45 // their cpu-time can be measured. Test must return from PingPong() _and_
46 // call FinishMeasurement from any thread to complete the test.
47 virtual void Init() {}
48 virtual void PingPong(int hops
) = 0;
49 virtual void Reset() {}
51 void TimeOnThread(base::ThreadTicks
* ticks
, base::WaitableEvent
* done
) {
52 *ticks
= base::ThreadTicks::Now();
56 base::ThreadTicks
ThreadNow(base::Thread
* thread
) {
57 base::WaitableEvent
done(false, false);
58 base::ThreadTicks ticks
;
59 thread
->task_runner()->PostTask(
60 FROM_HERE
, base::Bind(&ThreadPerfTest::TimeOnThread
,
61 base::Unretained(this), &ticks
, &done
));
66 void RunPingPongTest(const std::string
& name
, unsigned num_threads
) {
67 // Create threads and collect starting cpu-time for each thread.
68 std::vector
<base::ThreadTicks
> thread_starts
;
69 while (threads_
.size() < num_threads
) {
70 threads_
.push_back(new base::Thread("PingPonger"));
71 threads_
.back()->Start();
72 if (base::ThreadTicks::IsSupported())
73 thread_starts
.push_back(ThreadNow(threads_
.back()));
78 base::TimeTicks start
= base::TimeTicks::Now();
81 base::TimeTicks end
= base::TimeTicks::Now();
83 // Gather the cpu-time spent on each thread. This does one extra tasks,
84 // but that should be in the noise given enough runs.
85 base::TimeDelta thread_time
;
86 while (threads_
.size()) {
87 if (base::ThreadTicks::IsSupported()) {
88 thread_time
+= ThreadNow(threads_
.back()) - thread_starts
.back();
89 thread_starts
.pop_back();
96 double num_runs
= static_cast<double>(kNumRuns
);
97 double us_per_task_clock
= (end
- start
).InMicroseconds() / num_runs
;
98 double us_per_task_cpu
= thread_time
.InMicroseconds() / num_runs
;
100 // Clock time per task.
101 perf_test::PrintResult(
102 "task", "", name
+ "_time ", us_per_task_clock
, "us/hop", true);
104 // Total utilization across threads if available (likely higher).
105 if (base::ThreadTicks::IsSupported()) {
106 perf_test::PrintResult(
107 "task", "", name
+ "_cpu ", us_per_task_cpu
, "us/hop", true);
112 void FinishMeasurement() { done_
.Signal(); }
113 ScopedVector
<base::Thread
> threads_
;
116 base::WaitableEvent done_
;
119 // Class to test task performance by posting empty tasks back and forth.
120 class TaskPerfTest
: public ThreadPerfTest
{
121 base::Thread
* NextThread(int count
) {
122 return threads_
[count
% threads_
.size()];
125 void PingPong(int hops
) override
{
130 NextThread(hops
)->task_runner()->PostTask(
131 FROM_HERE
, base::Bind(&ThreadPerfTest::PingPong
, base::Unretained(this),
136 // This tries to test the 'best-case' as well as the 'worst-case' task posting
137 // performance. The best-case keeps one thread alive such that it never yeilds,
138 // while the worse-case forces a context switch for every task. Four threads are
139 // used to ensure the threads do yeild (with just two it might be possible for
140 // both threads to stay awake if they can signal each other fast enough).
141 TEST_F(TaskPerfTest
, TaskPingPong
) {
142 RunPingPongTest("1_Task_Threads", 1);
143 RunPingPongTest("4_Task_Threads", 4);
147 // Same as above, but add observers to test their perf impact.
148 class MessageLoopObserver
: public base::MessageLoop::TaskObserver
{
150 void WillProcessTask(const base::PendingTask
& pending_task
) override
{}
151 void DidProcessTask(const base::PendingTask
& pending_task
) override
{}
153 MessageLoopObserver message_loop_observer
;
155 class TaskObserverPerfTest
: public TaskPerfTest
{
157 void Init() override
{
158 TaskPerfTest::Init();
159 for (size_t i
= 0; i
< threads_
.size(); i
++) {
160 threads_
[i
]->message_loop()->AddTaskObserver(&message_loop_observer
);
165 TEST_F(TaskObserverPerfTest
, TaskPingPong
) {
166 RunPingPongTest("1_Task_Threads_With_Observer", 1);
167 RunPingPongTest("4_Task_Threads_With_Observer", 4);
170 // Class to test our WaitableEvent performance by signaling back and fort.
171 // WaitableEvent is templated so we can also compare with other versions.
172 template <typename WaitableEventType
>
173 class EventPerfTest
: public ThreadPerfTest
{
175 void Init() override
{
176 for (size_t i
= 0; i
< threads_
.size(); i
++)
177 events_
.push_back(new WaitableEventType(false, false));
180 void Reset() override
{ events_
.clear(); }
182 void WaitAndSignalOnThread(size_t event
) {
183 size_t next_event
= (event
+ 1) % events_
.size();
186 events_
[event
]->Wait();
187 my_hops
= --remaining_hops_
; // We own 'hops' between Wait and Signal.
188 events_
[next_event
]->Signal();
189 } while (my_hops
> 0);
190 // Once we are done, all threads will signal as hops passes zero.
191 // We only signal completion once, on the thread that reaches zero.
196 void PingPong(int hops
) override
{
197 remaining_hops_
= hops
;
198 for (size_t i
= 0; i
< threads_
.size(); i
++) {
199 threads_
[i
]->task_runner()->PostTask(
200 FROM_HERE
, base::Bind(&EventPerfTest::WaitAndSignalOnThread
,
201 base::Unretained(this), i
));
204 // Kick off the Signal ping-ponging.
205 events_
.front()->Signal();
209 ScopedVector
<WaitableEventType
> events_
;
212 // Similar to the task posting test, this just tests similar functionality
213 // using WaitableEvents. We only test four threads (worst-case), but we
214 // might want to craft a way to test the best-case (where the thread doesn't
215 // end up blocking because the event is already signalled).
216 typedef EventPerfTest
<base::WaitableEvent
> WaitableEventPerfTest
;
217 TEST_F(WaitableEventPerfTest
, EventPingPong
) {
218 RunPingPongTest("4_WaitableEvent_Threads", 4);
221 // Build a minimal event using ConditionVariable.
222 class ConditionVariableEvent
{
224 ConditionVariableEvent(bool manual_reset
, bool initially_signaled
)
225 : cond_(&lock_
), signaled_(false) {
226 DCHECK(!manual_reset
);
227 DCHECK(!initially_signaled
);
232 base::AutoLock
scoped_lock(lock_
);
239 base::AutoLock
scoped_lock(lock_
);
247 base::ConditionVariable cond_
;
251 // This is meant to test the absolute minimal context switching time
252 // using our own base synchronization code.
253 typedef EventPerfTest
<ConditionVariableEvent
> ConditionVariablePerfTest
;
254 TEST_F(ConditionVariablePerfTest
, EventPingPong
) {
255 RunPingPongTest("4_ConditionVariable_Threads", 4);
257 #if defined(OS_POSIX)
259 // Absolutely 100% minimal posix waitable event. If there is a better/faster
260 // way to force a context switch, we should use that instead.
263 PthreadEvent(bool manual_reset
, bool initially_signaled
) {
264 DCHECK(!manual_reset
);
265 DCHECK(!initially_signaled
);
266 pthread_mutex_init(&mutex_
, 0);
267 pthread_cond_init(&cond_
, 0);
272 pthread_cond_destroy(&cond_
);
273 pthread_mutex_destroy(&mutex_
);
277 pthread_mutex_lock(&mutex_
);
279 pthread_mutex_unlock(&mutex_
);
280 pthread_cond_signal(&cond_
);
284 pthread_mutex_lock(&mutex_
);
286 pthread_cond_wait(&cond_
, &mutex_
);
288 pthread_mutex_unlock(&mutex_
);
293 pthread_mutex_t mutex_
;
294 pthread_cond_t cond_
;
297 // This is meant to test the absolute minimal context switching time.
298 // If there is any faster way to do this we should substitute it in.
299 typedef EventPerfTest
<PthreadEvent
> PthreadEventPerfTest
;
300 TEST_F(PthreadEventPerfTest
, EventPingPong
) {
301 RunPingPongTest("4_PthreadCondVar_Threads", 4);