1 // Copyright 2015 Google Inc. All rights reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "benchmark_runner.h"
17 #include "benchmark/benchmark.h"
18 #include "benchmark_api_internal.h"
19 #include "internal_macros.h"
21 #ifndef BENCHMARK_OS_WINDOWS
22 #ifndef BENCHMARK_OS_FUCHSIA
23 #include <sys/resource.h>
31 #include <condition_variable>
42 #include "colorprint.h"
43 #include "commandlineflags.h"
44 #include "complexity.h"
46 #include "internal_macros.h"
49 #include "perf_counters.h"
51 #include "statistics.h"
52 #include "string_util.h"
53 #include "thread_manager.h"
54 #include "thread_timer.h"
60 MemoryManager
* memory_manager
= nullptr;
64 static constexpr IterationCount kMaxIterations
= 1000000000;
66 BenchmarkReporter::Run
CreateRunReport(
67 const benchmark::internal::BenchmarkInstance
& b
,
68 const internal::ThreadManager::Result
& results
,
69 IterationCount memory_iterations
,
70 const MemoryManager::Result
* memory_result
, double seconds
,
71 int64_t repetition_index
, int64_t repeats
) {
72 // Create report about this benchmark run.
73 BenchmarkReporter::Run report
;
75 report
.run_name
= b
.name();
76 report
.family_index
= b
.family_index();
77 report
.per_family_instance_index
= b
.per_family_instance_index();
78 report
.error_occurred
= results
.has_error_
;
79 report
.error_message
= results
.error_message_
;
80 report
.report_label
= results
.report_label_
;
81 // This is the total iterations across all threads.
82 report
.iterations
= results
.iterations
;
83 report
.time_unit
= b
.time_unit();
84 report
.threads
= b
.threads();
85 report
.repetition_index
= repetition_index
;
86 report
.repetitions
= repeats
;
88 if (!report
.error_occurred
) {
89 if (b
.use_manual_time()) {
90 report
.real_accumulated_time
= results
.manual_time_used
;
92 report
.real_accumulated_time
= results
.real_time_used
;
94 report
.cpu_accumulated_time
= results
.cpu_time_used
;
95 report
.complexity_n
= results
.complexity_n
;
96 report
.complexity
= b
.complexity();
97 report
.complexity_lambda
= b
.complexity_lambda();
98 report
.statistics
= &b
.statistics();
99 report
.counters
= results
.counters
;
101 if (memory_iterations
> 0) {
102 assert(memory_result
!= nullptr);
103 report
.memory_result
= memory_result
;
104 report
.allocs_per_iter
=
105 memory_iterations
? static_cast<double>(memory_result
->num_allocs
) /
110 internal::Finish(&report
.counters
, results
.iterations
, seconds
,
116 // Execute one thread of benchmark b for the specified number of iterations.
117 // Adds the stats collected for the thread into manager->results.
118 void RunInThread(const BenchmarkInstance
* b
, IterationCount iters
,
119 int thread_id
, ThreadManager
* manager
,
120 PerfCountersMeasurement
* perf_counters_measurement
) {
121 internal::ThreadTimer
timer(
122 b
->measure_process_cpu_time()
123 ? internal::ThreadTimer::CreateProcessCpuTime()
124 : internal::ThreadTimer::Create());
126 b
->Run(iters
, thread_id
, &timer
, manager
, perf_counters_measurement
);
127 BM_CHECK(st
.error_occurred() || st
.iterations() >= st
.max_iterations
)
128 << "Benchmark returned before State::KeepRunning() returned false!";
130 MutexLock
l(manager
->GetBenchmarkMutex());
131 internal::ThreadManager::Result
& results
= manager
->results
;
132 results
.iterations
+= st
.iterations();
133 results
.cpu_time_used
+= timer
.cpu_time_used();
134 results
.real_time_used
+= timer
.real_time_used();
135 results
.manual_time_used
+= timer
.manual_time_used();
136 results
.complexity_n
+= st
.complexity_length_n();
137 internal::Increment(&results
.counters
, st
.counters
);
139 manager
->NotifyThreadComplete();
144 BenchmarkRunner::BenchmarkRunner(
145 const benchmark::internal::BenchmarkInstance
& b_
,
146 BenchmarkReporter::PerFamilyRunReports
* reports_for_family_
)
148 reports_for_family(reports_for_family_
),
149 min_time(!IsZero(b
.min_time()) ? b
.min_time() : FLAGS_benchmark_min_time
),
150 repeats(b
.repetitions() != 0 ? b
.repetitions()
151 : FLAGS_benchmark_repetitions
),
152 has_explicit_iteration_count(b
.iterations() != 0),
153 pool(b
.threads() - 1),
154 iters(has_explicit_iteration_count
? b
.iterations() : 1),
155 perf_counters_measurement(
156 PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters
, ','))),
157 perf_counters_measurement_ptr(perf_counters_measurement
.IsValid()
158 ? &perf_counters_measurement
160 run_results
.display_report_aggregates_only
=
161 (FLAGS_benchmark_report_aggregates_only
||
162 FLAGS_benchmark_display_aggregates_only
);
163 run_results
.file_report_aggregates_only
=
164 FLAGS_benchmark_report_aggregates_only
;
165 if (b
.aggregation_report_mode() != internal::ARM_Unspecified
) {
166 run_results
.display_report_aggregates_only
=
167 (b
.aggregation_report_mode() &
168 internal::ARM_DisplayReportAggregatesOnly
);
169 run_results
.file_report_aggregates_only
=
170 (b
.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly
);
171 BM_CHECK(FLAGS_benchmark_perf_counters
.empty() ||
172 perf_counters_measurement
.IsValid())
173 << "Perf counters were requested but could not be set up.";
177 BenchmarkRunner::IterationResults
BenchmarkRunner::DoNIterations() {
178 BM_VLOG(2) << "Running " << b
.name().str() << " for " << iters
<< "\n";
180 std::unique_ptr
<internal::ThreadManager
> manager
;
181 manager
.reset(new internal::ThreadManager(b
.threads()));
183 // Run all but one thread in separate threads
184 for (std::size_t ti
= 0; ti
< pool
.size(); ++ti
) {
185 pool
[ti
] = std::thread(&RunInThread
, &b
, iters
, static_cast<int>(ti
+ 1),
186 manager
.get(), perf_counters_measurement_ptr
);
188 // And run one thread here directly.
189 // (If we were asked to run just one thread, we don't create new threads.)
190 // Yes, we need to do this here *after* we start the separate threads.
191 RunInThread(&b
, iters
, 0, manager
.get(), perf_counters_measurement_ptr
);
193 // The main thread has finished. Now let's wait for the other threads.
194 manager
->WaitForAllThreads();
195 for (std::thread
& thread
: pool
) thread
.join();
198 // Acquire the measurements/counters from the manager, UNDER THE LOCK!
200 MutexLock
l(manager
->GetBenchmarkMutex());
201 i
.results
= manager
->results
;
204 // And get rid of the manager.
207 // Adjust real/manual time stats since they were reported per thread.
208 i
.results
.real_time_used
/= b
.threads();
209 i
.results
.manual_time_used
/= b
.threads();
210 // If we were measuring whole-process CPU usage, adjust the CPU time too.
211 if (b
.measure_process_cpu_time()) i
.results
.cpu_time_used
/= b
.threads();
213 BM_VLOG(2) << "Ran in " << i
.results
.cpu_time_used
<< "/"
214 << i
.results
.real_time_used
<< "\n";
216 // By using KeepRunningBatch a benchmark can iterate more times than
217 // requested, so take the iteration count from i.results.
218 i
.iters
= i
.results
.iterations
/ b
.threads();
220 // Base decisions off of real time if requested by this benchmark.
221 i
.seconds
= i
.results
.cpu_time_used
;
222 if (b
.use_manual_time()) {
223 i
.seconds
= i
.results
.manual_time_used
;
224 } else if (b
.use_real_time()) {
225 i
.seconds
= i
.results
.real_time_used
;
231 IterationCount
BenchmarkRunner::PredictNumItersNeeded(
232 const IterationResults
& i
) const {
233 // See how much iterations should be increased by.
234 // Note: Avoid division by zero with max(seconds, 1ns).
235 double multiplier
= min_time
* 1.4 / std::max(i
.seconds
, 1e-9);
236 // If our last run was at least 10% of FLAGS_benchmark_min_time then we
237 // use the multiplier directly.
238 // Otherwise we use at most 10 times expansion.
239 // NOTE: When the last run was at least 10% of the min time the max
240 // expansion should be 14x.
241 bool is_significant
= (i
.seconds
/ min_time
) > 0.1;
242 multiplier
= is_significant
? multiplier
: 10.0;
244 // So what seems to be the sufficiently-large iteration count? Round up.
245 const IterationCount max_next_iters
= static_cast<IterationCount
>(
246 std::lround(std::max(multiplier
* static_cast<double>(i
.iters
),
247 static_cast<double>(i
.iters
) + 1.0)));
248 // But we do have *some* sanity limits though..
249 const IterationCount next_iters
= std::min(max_next_iters
, kMaxIterations
);
251 BM_VLOG(3) << "Next iters: " << next_iters
<< ", " << multiplier
<< "\n";
252 return next_iters
; // round up before conversion to integer.
255 bool BenchmarkRunner::ShouldReportIterationResults(
256 const IterationResults
& i
) const {
257 // Determine if this run should be reported;
258 // Either it has run for a sufficient amount of time
259 // or because an error was reported.
260 return i
.results
.has_error_
||
261 i
.iters
>= kMaxIterations
|| // Too many iterations already.
262 i
.seconds
>= min_time
|| // The elapsed time is large enough.
263 // CPU time is specified but the elapsed real time greatly exceeds
265 // Note that user provided timers are except from this sanity check.
266 ((i
.results
.real_time_used
>= 5 * min_time
) && !b
.use_manual_time());
269 void BenchmarkRunner::DoOneRepetition() {
270 assert(HasRepeatsRemaining() && "Already done all repetitions?");
272 const bool is_the_first_repetition
= num_repetitions_done
== 0;
275 // We *may* be gradually increasing the length (iteration count)
276 // of the benchmark until we decide the results are significant.
277 // And once we do, we report those last results and exit.
278 // Please do note that the if there are repetitions, the iteration count
279 // is *only* calculated for the *first* repetition, and other repetitions
280 // simply use that precomputed iteration count.
286 // Do we consider the results to be significant?
287 // If we are doing repetitions, and the first repetition was already done,
288 // it has calculated the correct iteration time, so we have run that very
289 // iteration count just now. No need to calculate anything. Just report.
290 // Else, the normal rules apply.
291 const bool results_are_significant
= !is_the_first_repetition
||
292 has_explicit_iteration_count
||
293 ShouldReportIterationResults(i
);
295 if (results_are_significant
) break; // Good, let's report them!
297 // Nope, bad iteration. Let's re-estimate the hopefully-sufficient
298 // iteration count, and run the benchmark again...
300 iters
= PredictNumItersNeeded(i
);
301 assert(iters
> i
.iters
&&
302 "if we did more iterations than we want to do the next time, "
303 "then we should have accepted the current iteration run.");
306 // Oh, one last thing, we need to also produce the 'memory measurements'..
307 MemoryManager::Result
* memory_result
= nullptr;
308 IterationCount memory_iterations
= 0;
309 if (memory_manager
!= nullptr) {
310 // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
311 // optional so we don't have to own the Result here.
312 // Can't do it now due to cxx03.
313 memory_results
.push_back(MemoryManager::Result());
314 memory_result
= &memory_results
.back();
315 // Only run a few iterations to reduce the impact of one-time
316 // allocations in benchmarks that are not properly managed.
317 memory_iterations
= std::min
<IterationCount
>(16, iters
);
318 memory_manager
->Start();
319 std::unique_ptr
<internal::ThreadManager
> manager
;
320 manager
.reset(new internal::ThreadManager(1));
322 RunInThread(&b
, memory_iterations
, 0, manager
.get(),
323 perf_counters_measurement_ptr
);
324 manager
->WaitForAllThreads();
328 BENCHMARK_DISABLE_DEPRECATED_WARNING
329 memory_manager
->Stop(memory_result
);
330 BENCHMARK_RESTORE_DEPRECATED_WARNING
333 // Ok, now actually report.
334 BenchmarkReporter::Run report
=
335 CreateRunReport(b
, i
.results
, memory_iterations
, memory_result
, i
.seconds
,
336 num_repetitions_done
, repeats
);
338 if (reports_for_family
) {
339 ++reports_for_family
->num_runs_done
;
340 if (!report
.error_occurred
) reports_for_family
->Runs
.push_back(report
);
343 run_results
.non_aggregates
.push_back(report
);
345 ++num_repetitions_done
;
348 RunResults
&& BenchmarkRunner::GetResults() {
349 assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
351 // Calculate additional statistics over the repetitions of this instance.
352 run_results
.aggregates_only
= ComputeStats(run_results
.non_aggregates
);
354 return std::move(run_results
);
357 } // end namespace internal
359 } // end namespace benchmark