1 #include "benchmark/benchmark.h"
22 #define BENCHMARK_NOINLINE __attribute__((noinline))
24 #define BENCHMARK_NOINLINE
29 int BENCHMARK_NOINLINE
Factorial(uint32_t n
) {
30 return (n
== 1) ? 1 : n
* Factorial(n
- 1);
33 double CalculatePi(int depth
) {
35 for (int i
= 0; i
< depth
; ++i
) {
36 double numerator
= static_cast<double>(((i
% 2) * 2) - 1);
37 double denominator
= static_cast<double>((2 * i
) - 1);
38 pi
+= numerator
/ denominator
;
40 return (pi
- 1.0) * 4;
43 std::set
<int64_t> ConstructRandomSet(int64_t size
) {
45 for (int i
= 0; i
< size
; ++i
) s
.insert(s
.end(), i
);
49 std::mutex test_vector_mu
;
50 std::vector
<int>* test_vector
= nullptr;
54 static void BM_Factorial(benchmark::State
& state
) {
56 for (auto _
: state
) fac_42
= Factorial(8);
57 // Prevent compiler optimizations
60 state
.SetLabel(ss
.str());
62 BENCHMARK(BM_Factorial
);
63 BENCHMARK(BM_Factorial
)->UseRealTime();
65 static void BM_CalculatePiRange(benchmark::State
& state
) {
67 for (auto _
: state
) pi
= CalculatePi(static_cast<int>(state
.range(0)));
70 state
.SetLabel(ss
.str());
72 BENCHMARK_RANGE(BM_CalculatePiRange
, 1, 1024 * 1024);
74 static void BM_CalculatePi(benchmark::State
& state
) {
75 static const int depth
= 1024;
76 for (auto _
: state
) {
77 benchmark::DoNotOptimize(CalculatePi(static_cast<int>(depth
)));
80 BENCHMARK(BM_CalculatePi
)->Threads(8);
81 BENCHMARK(BM_CalculatePi
)->ThreadRange(1, 32);
82 BENCHMARK(BM_CalculatePi
)->ThreadPerCpu();
84 static void BM_SetInsert(benchmark::State
& state
) {
85 std::set
<int64_t> data
;
86 for (auto _
: state
) {
88 data
= ConstructRandomSet(state
.range(0));
90 for (int j
= 0; j
< state
.range(1); ++j
) data
.insert(rand());
92 state
.SetItemsProcessed(state
.iterations() * state
.range(1));
93 state
.SetBytesProcessed(state
.iterations() * state
.range(1) * sizeof(int));
96 // Test many inserts at once to reduce the total iterations needed. Otherwise,
97 // the slower, non-timed part of each iteration will make the benchmark take
99 BENCHMARK(BM_SetInsert
)->Ranges({{1 << 10, 8 << 10}, {128, 512}});
101 template <typename Container
,
102 typename ValueType
= typename
Container::value_type
>
103 static void BM_Sequential(benchmark::State
& state
) {
105 for (auto _
: state
) {
107 for (int64_t i
= state
.range(0); --i
;) c
.push_back(v
);
109 const int64_t items_processed
= state
.iterations() * state
.range(0);
110 state
.SetItemsProcessed(items_processed
);
111 state
.SetBytesProcessed(items_processed
* sizeof(v
));
113 BENCHMARK_TEMPLATE2(BM_Sequential
, std::vector
<int>, int)
114 ->Range(1 << 0, 1 << 10);
115 BENCHMARK_TEMPLATE(BM_Sequential
, std::list
<int>)->Range(1 << 0, 1 << 10);
116 // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
117 #ifdef BENCHMARK_HAS_CXX11
118 BENCHMARK_TEMPLATE(BM_Sequential
, std::vector
<int>, int)->Arg(512);
121 static void BM_StringCompare(benchmark::State
& state
) {
122 size_t len
= static_cast<size_t>(state
.range(0));
123 std::string
s1(len
, '-');
124 std::string
s2(len
, '-');
125 for (auto _
: state
) benchmark::DoNotOptimize(s1
.compare(s2
));
127 BENCHMARK(BM_StringCompare
)->Range(1, 1 << 20);
129 static void BM_SetupTeardown(benchmark::State
& state
) {
130 if (state
.thread_index() == 0) {
131 // No need to lock test_vector_mu here as this is running single-threaded.
132 test_vector
= new std::vector
<int>();
135 for (auto _
: state
) {
136 std::lock_guard
<std::mutex
> l(test_vector_mu
);
138 test_vector
->push_back(i
);
140 test_vector
->pop_back();
143 if (state
.thread_index() == 0) {
147 BENCHMARK(BM_SetupTeardown
)->ThreadPerCpu();
149 static void BM_LongTest(benchmark::State
& state
) {
150 double tracker
= 0.0;
151 for (auto _
: state
) {
152 for (int i
= 0; i
< state
.range(0); ++i
)
153 benchmark::DoNotOptimize(tracker
+= i
);
156 BENCHMARK(BM_LongTest
)->Range(1 << 16, 1 << 28);
158 static void BM_ParallelMemset(benchmark::State
& state
) {
159 int64_t size
= state
.range(0) / static_cast<int64_t>(sizeof(int));
160 int thread_size
= static_cast<int>(size
) / state
.threads();
161 int from
= thread_size
* state
.thread_index();
162 int to
= from
+ thread_size
;
164 if (state
.thread_index() == 0) {
165 test_vector
= new std::vector
<int>(static_cast<size_t>(size
));
168 for (auto _
: state
) {
169 for (int i
= from
; i
< to
; i
++) {
170 // No need to lock test_vector_mu as ranges
171 // do not overlap between threads.
172 benchmark::DoNotOptimize(test_vector
->at(i
) = 1);
176 if (state
.thread_index() == 0) {
180 BENCHMARK(BM_ParallelMemset
)->Arg(10 << 20)->ThreadRange(1, 4);
182 static void BM_ManualTiming(benchmark::State
& state
) {
183 int64_t slept_for
= 0;
184 int64_t microseconds
= state
.range(0);
185 std::chrono::duration
<double, std::micro
> sleep_duration
{
186 static_cast<double>(microseconds
)};
188 for (auto _
: state
) {
189 auto start
= std::chrono::high_resolution_clock::now();
190 // Simulate some useful workload with a sleep
191 std::this_thread::sleep_for(
192 std::chrono::duration_cast
<std::chrono::nanoseconds
>(sleep_duration
));
193 auto end
= std::chrono::high_resolution_clock::now();
196 std::chrono::duration_cast
<std::chrono::duration
<double>>(end
- start
);
198 state
.SetIterationTime(elapsed
.count());
199 slept_for
+= microseconds
;
201 state
.SetItemsProcessed(slept_for
);
203 BENCHMARK(BM_ManualTiming
)->Range(1, 1 << 14)->UseRealTime();
204 BENCHMARK(BM_ManualTiming
)->Range(1, 1 << 14)->UseManualTime();
206 #ifdef BENCHMARK_HAS_CXX11
208 template <class... Args
>
209 void BM_with_args(benchmark::State
& state
, Args
&&...) {
210 for (auto _
: state
) {
213 BENCHMARK_CAPTURE(BM_with_args
, int_test
, 42, 43, 44);
214 BENCHMARK_CAPTURE(BM_with_args
, string_and_pair_test
, std::string("abc"),
215 std::pair
<int, double>(42, 3.8));
217 void BM_non_template_args(benchmark::State
& state
, int, double) {
218 while (state
.KeepRunning()) {
221 BENCHMARK_CAPTURE(BM_non_template_args
, basic_test
, 0, 0);
223 #endif // BENCHMARK_HAS_CXX11
225 static void BM_DenseThreadRanges(benchmark::State
& st
) {
226 switch (st
.range(0)) {
228 assert(st
.threads() == 1 || st
.threads() == 2 || st
.threads() == 3);
231 assert(st
.threads() == 1 || st
.threads() == 3 || st
.threads() == 4);
234 assert(st
.threads() == 5 || st
.threads() == 8 || st
.threads() == 11 ||
238 assert(false && "Invalid test case number");
240 while (st
.KeepRunning()) {
243 BENCHMARK(BM_DenseThreadRanges
)->Arg(1)->DenseThreadRange(1, 3);
244 BENCHMARK(BM_DenseThreadRanges
)->Arg(2)->DenseThreadRange(1, 4, 2);
245 BENCHMARK(BM_DenseThreadRanges
)->Arg(3)->DenseThreadRange(5, 14, 3);