QUIC - cleanup changes to sync chromium tree with internal source.
[chromium-blink-merge.git] / base / threading / thread_perftest.cc
blobe865ffa90856490a5e3a837459e2c8fb6c24f3a8
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/base_switches.h"
6 #include "base/bind.h"
7 #include "base/command_line.h"
8 #include "base/location.h"
9 #include "base/memory/scoped_vector.h"
10 #include "base/single_thread_task_runner.h"
11 #include "base/strings/stringprintf.h"
12 #include "base/synchronization/condition_variable.h"
13 #include "base/synchronization/lock.h"
14 #include "base/synchronization/waitable_event.h"
15 #include "base/threading/thread.h"
16 #include "base/time/time.h"
17 #include "build/build_config.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19 #include "testing/perf/perf_test.h"
21 #if defined(OS_POSIX)
22 #include <pthread.h>
23 #endif
25 namespace base {
27 namespace {
29 const int kNumRuns = 100000;
31 // Base class for a threading perf-test. This sets up some threads for the
32 // test and measures the clock-time in addition to time spent on each thread.
33 class ThreadPerfTest : public testing::Test {
34 public:
35 ThreadPerfTest()
36 : done_(false, false) {
37 // Disable the task profiler as it adds significant cost!
38 CommandLine::Init(0, NULL);
39 CommandLine::ForCurrentProcess()->AppendSwitchASCII(
40 switches::kProfilerTiming,
41 switches::kProfilerTimingDisabledValue);
44 // To be implemented by each test. Subclass must uses threads_ such that
45 // their cpu-time can be measured. Test must return from PingPong() _and_
46 // call FinishMeasurement from any thread to complete the test.
47 virtual void Init() {}
48 virtual void PingPong(int hops) = 0;
49 virtual void Reset() {}
51 void TimeOnThread(base::ThreadTicks* ticks, base::WaitableEvent* done) {
52 *ticks = base::ThreadTicks::Now();
53 done->Signal();
56 base::ThreadTicks ThreadNow(base::Thread* thread) {
57 base::WaitableEvent done(false, false);
58 base::ThreadTicks ticks;
59 thread->task_runner()->PostTask(
60 FROM_HERE, base::Bind(&ThreadPerfTest::TimeOnThread,
61 base::Unretained(this), &ticks, &done));
62 done.Wait();
63 return ticks;
66 void RunPingPongTest(const std::string& name, unsigned num_threads) {
67 // Create threads and collect starting cpu-time for each thread.
68 std::vector<base::ThreadTicks> thread_starts;
69 while (threads_.size() < num_threads) {
70 threads_.push_back(new base::Thread("PingPonger"));
71 threads_.back()->Start();
72 if (base::ThreadTicks::IsSupported())
73 thread_starts.push_back(ThreadNow(threads_.back()));
76 Init();
78 base::TimeTicks start = base::TimeTicks::Now();
79 PingPong(kNumRuns);
80 done_.Wait();
81 base::TimeTicks end = base::TimeTicks::Now();
83 // Gather the cpu-time spent on each thread. This does one extra tasks,
84 // but that should be in the noise given enough runs.
85 base::TimeDelta thread_time;
86 while (threads_.size()) {
87 if (base::ThreadTicks::IsSupported()) {
88 thread_time += ThreadNow(threads_.back()) - thread_starts.back();
89 thread_starts.pop_back();
91 threads_.pop_back();
94 Reset();
96 double num_runs = static_cast<double>(kNumRuns);
97 double us_per_task_clock = (end - start).InMicroseconds() / num_runs;
98 double us_per_task_cpu = thread_time.InMicroseconds() / num_runs;
100 // Clock time per task.
101 perf_test::PrintResult(
102 "task", "", name + "_time ", us_per_task_clock, "us/hop", true);
104 // Total utilization across threads if available (likely higher).
105 if (base::ThreadTicks::IsSupported()) {
106 perf_test::PrintResult(
107 "task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true);
111 protected:
112 void FinishMeasurement() { done_.Signal(); }
113 ScopedVector<base::Thread> threads_;
115 private:
116 base::WaitableEvent done_;
119 // Class to test task performance by posting empty tasks back and forth.
120 class TaskPerfTest : public ThreadPerfTest {
121 base::Thread* NextThread(int count) {
122 return threads_[count % threads_.size()];
125 void PingPong(int hops) override {
126 if (!hops) {
127 FinishMeasurement();
128 return;
130 NextThread(hops)->task_runner()->PostTask(
131 FROM_HERE, base::Bind(&ThreadPerfTest::PingPong, base::Unretained(this),
132 hops - 1));
136 // This tries to test the 'best-case' as well as the 'worst-case' task posting
137 // performance. The best-case keeps one thread alive such that it never yeilds,
138 // while the worse-case forces a context switch for every task. Four threads are
139 // used to ensure the threads do yeild (with just two it might be possible for
140 // both threads to stay awake if they can signal each other fast enough).
141 TEST_F(TaskPerfTest, TaskPingPong) {
142 RunPingPongTest("1_Task_Threads", 1);
143 RunPingPongTest("4_Task_Threads", 4);
147 // Same as above, but add observers to test their perf impact.
148 class MessageLoopObserver : public base::MessageLoop::TaskObserver {
149 public:
150 void WillProcessTask(const base::PendingTask& pending_task) override {}
151 void DidProcessTask(const base::PendingTask& pending_task) override {}
153 MessageLoopObserver message_loop_observer;
155 class TaskObserverPerfTest : public TaskPerfTest {
156 public:
157 void Init() override {
158 TaskPerfTest::Init();
159 for (size_t i = 0; i < threads_.size(); i++) {
160 threads_[i]->message_loop()->AddTaskObserver(&message_loop_observer);
165 TEST_F(TaskObserverPerfTest, TaskPingPong) {
166 RunPingPongTest("1_Task_Threads_With_Observer", 1);
167 RunPingPongTest("4_Task_Threads_With_Observer", 4);
170 // Class to test our WaitableEvent performance by signaling back and fort.
171 // WaitableEvent is templated so we can also compare with other versions.
172 template <typename WaitableEventType>
173 class EventPerfTest : public ThreadPerfTest {
174 public:
175 void Init() override {
176 for (size_t i = 0; i < threads_.size(); i++)
177 events_.push_back(new WaitableEventType(false, false));
180 void Reset() override { events_.clear(); }
182 void WaitAndSignalOnThread(size_t event) {
183 size_t next_event = (event + 1) % events_.size();
184 int my_hops = 0;
185 do {
186 events_[event]->Wait();
187 my_hops = --remaining_hops_; // We own 'hops' between Wait and Signal.
188 events_[next_event]->Signal();
189 } while (my_hops > 0);
190 // Once we are done, all threads will signal as hops passes zero.
191 // We only signal completion once, on the thread that reaches zero.
192 if (!my_hops)
193 FinishMeasurement();
196 void PingPong(int hops) override {
197 remaining_hops_ = hops;
198 for (size_t i = 0; i < threads_.size(); i++) {
199 threads_[i]->task_runner()->PostTask(
200 FROM_HERE, base::Bind(&EventPerfTest::WaitAndSignalOnThread,
201 base::Unretained(this), i));
204 // Kick off the Signal ping-ponging.
205 events_.front()->Signal();
208 int remaining_hops_;
209 ScopedVector<WaitableEventType> events_;
212 // Similar to the task posting test, this just tests similar functionality
213 // using WaitableEvents. We only test four threads (worst-case), but we
214 // might want to craft a way to test the best-case (where the thread doesn't
215 // end up blocking because the event is already signalled).
216 typedef EventPerfTest<base::WaitableEvent> WaitableEventPerfTest;
217 TEST_F(WaitableEventPerfTest, EventPingPong) {
218 RunPingPongTest("4_WaitableEvent_Threads", 4);
221 // Build a minimal event using ConditionVariable.
222 class ConditionVariableEvent {
223 public:
224 ConditionVariableEvent(bool manual_reset, bool initially_signaled)
225 : cond_(&lock_), signaled_(false) {
226 DCHECK(!manual_reset);
227 DCHECK(!initially_signaled);
230 void Signal() {
232 base::AutoLock scoped_lock(lock_);
233 signaled_ = true;
235 cond_.Signal();
238 void Wait() {
239 base::AutoLock scoped_lock(lock_);
240 while (!signaled_)
241 cond_.Wait();
242 signaled_ = false;
245 private:
246 base::Lock lock_;
247 base::ConditionVariable cond_;
248 bool signaled_;
251 // This is meant to test the absolute minimal context switching time
252 // using our own base synchronization code.
253 typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest;
254 TEST_F(ConditionVariablePerfTest, EventPingPong) {
255 RunPingPongTest("4_ConditionVariable_Threads", 4);
257 #if defined(OS_POSIX)
259 // Absolutely 100% minimal posix waitable event. If there is a better/faster
260 // way to force a context switch, we should use that instead.
261 class PthreadEvent {
262 public:
263 PthreadEvent(bool manual_reset, bool initially_signaled) {
264 DCHECK(!manual_reset);
265 DCHECK(!initially_signaled);
266 pthread_mutex_init(&mutex_, 0);
267 pthread_cond_init(&cond_, 0);
268 signaled_ = false;
271 ~PthreadEvent() {
272 pthread_cond_destroy(&cond_);
273 pthread_mutex_destroy(&mutex_);
276 void Signal() {
277 pthread_mutex_lock(&mutex_);
278 signaled_ = true;
279 pthread_mutex_unlock(&mutex_);
280 pthread_cond_signal(&cond_);
283 void Wait() {
284 pthread_mutex_lock(&mutex_);
285 while (!signaled_)
286 pthread_cond_wait(&cond_, &mutex_);
287 signaled_ = false;
288 pthread_mutex_unlock(&mutex_);
291 private:
292 bool signaled_;
293 pthread_mutex_t mutex_;
294 pthread_cond_t cond_;
297 // This is meant to test the absolute minimal context switching time.
298 // If there is any faster way to do this we should substitute it in.
299 typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest;
300 TEST_F(PthreadEventPerfTest, EventPingPong) {
301 RunPingPongTest("4_PthreadCondVar_Threads", 4);
304 #endif
306 } // namespace
308 } // namespace base