1 #include "benchmark/benchmark.h"
22 #define BENCHMARK_NOINLINE __attribute__((noinline))
24 #define BENCHMARK_NOINLINE
29 int BENCHMARK_NOINLINE
Factorial(uint32_t n
) {
30 return (n
== 1) ? 1 : n
* Factorial(n
- 1);
33 double CalculatePi(int depth
) {
35 for (int i
= 0; i
< depth
; ++i
) {
36 double numerator
= static_cast<double>(((i
% 2) * 2) - 1);
37 double denominator
= static_cast<double>((2 * i
) - 1);
38 pi
+= numerator
/ denominator
;
40 return (pi
- 1.0) * 4;
43 std::set
<int64_t> ConstructRandomSet(int64_t size
) {
45 for (int i
= 0; i
< size
; ++i
) s
.insert(s
.end(), i
);
49 std::mutex test_vector_mu
;
50 std::vector
<int>* test_vector
= nullptr;
54 static void BM_Factorial(benchmark::State
& state
) {
56 for (auto _
: state
) fac_42
= Factorial(8);
57 // Prevent compiler optimizations
60 state
.SetLabel(ss
.str());
62 BENCHMARK(BM_Factorial
);
63 BENCHMARK(BM_Factorial
)->UseRealTime();
65 static void BM_CalculatePiRange(benchmark::State
& state
) {
67 for (auto _
: state
) pi
= CalculatePi(static_cast<int>(state
.range(0)));
70 state
.SetLabel(ss
.str());
72 BENCHMARK_RANGE(BM_CalculatePiRange
, 1, 1024 * 1024);
74 static void BM_CalculatePi(benchmark::State
& state
) {
75 static const int depth
= 1024;
76 for (auto _
: state
) {
77 benchmark::DoNotOptimize(CalculatePi(static_cast<int>(depth
)));
80 BENCHMARK(BM_CalculatePi
)->Threads(8);
81 BENCHMARK(BM_CalculatePi
)->ThreadRange(1, 32);
82 BENCHMARK(BM_CalculatePi
)->ThreadPerCpu();
84 static void BM_SetInsert(benchmark::State
& state
) {
85 std::set
<int64_t> data
;
86 for (auto _
: state
) {
88 data
= ConstructRandomSet(state
.range(0));
90 for (int j
= 0; j
< state
.range(1); ++j
) data
.insert(rand());
92 state
.SetItemsProcessed(state
.iterations() * state
.range(1));
93 state
.SetBytesProcessed(state
.iterations() * state
.range(1) * sizeof(int));
96 // Test many inserts at once to reduce the total iterations needed. Otherwise, the slower,
97 // non-timed part of each iteration will make the benchmark take forever.
98 BENCHMARK(BM_SetInsert
)->Ranges({{1 << 10, 8 << 10}, {128, 512}});
100 template <typename Container
,
101 typename ValueType
= typename
Container::value_type
>
102 static void BM_Sequential(benchmark::State
& state
) {
104 for (auto _
: state
) {
106 for (int64_t i
= state
.range(0); --i
;) c
.push_back(v
);
108 const int64_t items_processed
= state
.iterations() * state
.range(0);
109 state
.SetItemsProcessed(items_processed
);
110 state
.SetBytesProcessed(items_processed
* sizeof(v
));
112 BENCHMARK_TEMPLATE2(BM_Sequential
, std::vector
<int>, int)
113 ->Range(1 << 0, 1 << 10);
114 BENCHMARK_TEMPLATE(BM_Sequential
, std::list
<int>)->Range(1 << 0, 1 << 10);
115 // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
116 #ifdef BENCHMARK_HAS_CXX11
117 BENCHMARK_TEMPLATE(BM_Sequential
, std::vector
<int>, int)->Arg(512);
120 static void BM_StringCompare(benchmark::State
& state
) {
121 size_t len
= static_cast<size_t>(state
.range(0));
122 std::string
s1(len
, '-');
123 std::string
s2(len
, '-');
124 for (auto _
: state
) benchmark::DoNotOptimize(s1
.compare(s2
));
126 BENCHMARK(BM_StringCompare
)->Range(1, 1 << 20);
128 static void BM_SetupTeardown(benchmark::State
& state
) {
129 if (state
.thread_index
== 0) {
130 // No need to lock test_vector_mu here as this is running single-threaded.
131 test_vector
= new std::vector
<int>();
134 for (auto _
: state
) {
135 std::lock_guard
<std::mutex
> l(test_vector_mu
);
137 test_vector
->push_back(i
);
139 test_vector
->pop_back();
142 if (state
.thread_index
== 0) {
146 BENCHMARK(BM_SetupTeardown
)->ThreadPerCpu();
148 static void BM_LongTest(benchmark::State
& state
) {
149 double tracker
= 0.0;
150 for (auto _
: state
) {
151 for (int i
= 0; i
< state
.range(0); ++i
)
152 benchmark::DoNotOptimize(tracker
+= i
);
155 BENCHMARK(BM_LongTest
)->Range(1 << 16, 1 << 28);
157 static void BM_ParallelMemset(benchmark::State
& state
) {
158 int64_t size
= state
.range(0) / static_cast<int64_t>(sizeof(int));
159 int thread_size
= static_cast<int>(size
) / state
.threads
;
160 int from
= thread_size
* state
.thread_index
;
161 int to
= from
+ thread_size
;
163 if (state
.thread_index
== 0) {
164 test_vector
= new std::vector
<int>(static_cast<size_t>(size
));
167 for (auto _
: state
) {
168 for (int i
= from
; i
< to
; i
++) {
169 // No need to lock test_vector_mu as ranges
170 // do not overlap between threads.
171 benchmark::DoNotOptimize(test_vector
->at(i
) = 1);
175 if (state
.thread_index
== 0) {
179 BENCHMARK(BM_ParallelMemset
)->Arg(10 << 20)->ThreadRange(1, 4);
181 static void BM_ManualTiming(benchmark::State
& state
) {
182 int64_t slept_for
= 0;
183 int64_t microseconds
= state
.range(0);
184 std::chrono::duration
<double, std::micro
> sleep_duration
{
185 static_cast<double>(microseconds
)};
187 for (auto _
: state
) {
188 auto start
= std::chrono::high_resolution_clock::now();
189 // Simulate some useful workload with a sleep
190 std::this_thread::sleep_for(
191 std::chrono::duration_cast
<std::chrono::nanoseconds
>(sleep_duration
));
192 auto end
= std::chrono::high_resolution_clock::now();
195 std::chrono::duration_cast
<std::chrono::duration
<double>>(end
- start
);
197 state
.SetIterationTime(elapsed
.count());
198 slept_for
+= microseconds
;
200 state
.SetItemsProcessed(slept_for
);
202 BENCHMARK(BM_ManualTiming
)->Range(1, 1 << 14)->UseRealTime();
203 BENCHMARK(BM_ManualTiming
)->Range(1, 1 << 14)->UseManualTime();
205 #ifdef BENCHMARK_HAS_CXX11
207 template <class... Args
>
208 void BM_with_args(benchmark::State
& state
, Args
&&...) {
209 for (auto _
: state
) {
212 BENCHMARK_CAPTURE(BM_with_args
, int_test
, 42, 43, 44);
213 BENCHMARK_CAPTURE(BM_with_args
, string_and_pair_test
, std::string("abc"),
214 std::pair
<int, double>(42, 3.8));
216 void BM_non_template_args(benchmark::State
& state
, int, double) {
217 while(state
.KeepRunning()) {}
219 BENCHMARK_CAPTURE(BM_non_template_args
, basic_test
, 0, 0);
221 #endif // BENCHMARK_HAS_CXX11
223 static void BM_DenseThreadRanges(benchmark::State
& st
) {
224 switch (st
.range(0)) {
226 assert(st
.threads
== 1 || st
.threads
== 2 || st
.threads
== 3);
229 assert(st
.threads
== 1 || st
.threads
== 3 || st
.threads
== 4);
232 assert(st
.threads
== 5 || st
.threads
== 8 || st
.threads
== 11 ||
236 assert(false && "Invalid test case number");
238 while (st
.KeepRunning()) {
241 BENCHMARK(BM_DenseThreadRanges
)->Arg(1)->DenseThreadRange(1, 3);
242 BENCHMARK(BM_DenseThreadRanges
)->Arg(2)->DenseThreadRange(1, 4, 2);
243 BENCHMARK(BM_DenseThreadRanges
)->Arg(3)->DenseThreadRange(5, 14, 3);