1 // Copyright 2015 Google Inc. All rights reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "benchmark_runner.h"
17 #include "benchmark/benchmark.h"
18 #include "benchmark_api_internal.h"
19 #include "internal_macros.h"
21 #ifndef BENCHMARK_OS_WINDOWS
22 #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
23 #include <sys/resource.h>
33 #include <condition_variable>
45 #include "colorprint.h"
46 #include "commandlineflags.h"
47 #include "complexity.h"
49 #include "internal_macros.h"
52 #include "perf_counters.h"
54 #include "statistics.h"
55 #include "string_util.h"
56 #include "thread_manager.h"
57 #include "thread_timer.h"
63 MemoryManager
* memory_manager
= nullptr;
67 static constexpr IterationCount kMaxIterations
= 1000000000000;
68 const double kDefaultMinTime
=
69 std::strtod(::benchmark::kDefaultMinTimeStr
, /*p_end*/ nullptr);
71 BenchmarkReporter::Run
CreateRunReport(
72 const benchmark::internal::BenchmarkInstance
& b
,
73 const internal::ThreadManager::Result
& results
,
74 IterationCount memory_iterations
,
75 const MemoryManager::Result
* memory_result
, double seconds
,
76 int64_t repetition_index
, int64_t repeats
) {
77 // Create report about this benchmark run.
78 BenchmarkReporter::Run report
;
80 report
.run_name
= b
.name();
81 report
.family_index
= b
.family_index();
82 report
.per_family_instance_index
= b
.per_family_instance_index();
83 report
.skipped
= results
.skipped_
;
84 report
.skip_message
= results
.skip_message_
;
85 report
.report_label
= results
.report_label_
;
86 // This is the total iterations across all threads.
87 report
.iterations
= results
.iterations
;
88 report
.time_unit
= b
.time_unit();
89 report
.threads
= b
.threads();
90 report
.repetition_index
= repetition_index
;
91 report
.repetitions
= repeats
;
93 if (!report
.skipped
) {
94 if (b
.use_manual_time()) {
95 report
.real_accumulated_time
= results
.manual_time_used
;
97 report
.real_accumulated_time
= results
.real_time_used
;
99 report
.use_real_time_for_initial_big_o
= b
.use_manual_time();
100 report
.cpu_accumulated_time
= results
.cpu_time_used
;
101 report
.complexity_n
= results
.complexity_n
;
102 report
.complexity
= b
.complexity();
103 report
.complexity_lambda
= b
.complexity_lambda();
104 report
.statistics
= &b
.statistics();
105 report
.counters
= results
.counters
;
107 if (memory_iterations
> 0) {
108 assert(memory_result
!= nullptr);
109 report
.memory_result
= memory_result
;
110 report
.allocs_per_iter
=
111 memory_iterations
? static_cast<double>(memory_result
->num_allocs
) /
112 static_cast<double>(memory_iterations
)
116 internal::Finish(&report
.counters
, results
.iterations
, seconds
,
122 // Execute one thread of benchmark b for the specified number of iterations.
123 // Adds the stats collected for the thread into manager->results.
124 void RunInThread(const BenchmarkInstance
* b
, IterationCount iters
,
125 int thread_id
, ThreadManager
* manager
,
126 PerfCountersMeasurement
* perf_counters_measurement
) {
127 internal::ThreadTimer
timer(
128 b
->measure_process_cpu_time()
129 ? internal::ThreadTimer::CreateProcessCpuTime()
130 : internal::ThreadTimer::Create());
133 b
->Run(iters
, thread_id
, &timer
, manager
, perf_counters_measurement
);
134 BM_CHECK(st
.skipped() || st
.iterations() >= st
.max_iterations
)
135 << "Benchmark returned before State::KeepRunning() returned false!";
137 MutexLock
l(manager
->GetBenchmarkMutex());
138 internal::ThreadManager::Result
& results
= manager
->results
;
139 results
.iterations
+= st
.iterations();
140 results
.cpu_time_used
+= timer
.cpu_time_used();
141 results
.real_time_used
+= timer
.real_time_used();
142 results
.manual_time_used
+= timer
.manual_time_used();
143 results
.complexity_n
+= st
.complexity_length_n();
144 internal::Increment(&results
.counters
, st
.counters
);
146 manager
->NotifyThreadComplete();
149 double ComputeMinTime(const benchmark::internal::BenchmarkInstance
& b
,
150 const BenchTimeType
& iters_or_time
) {
151 if (!IsZero(b
.min_time())) return b
.min_time();
152 // If the flag was used to specify number of iters, then return the default
154 if (iters_or_time
.tag
== BenchTimeType::ITERS
) return kDefaultMinTime
;
156 return iters_or_time
.time
;
159 IterationCount
ComputeIters(const benchmark::internal::BenchmarkInstance
& b
,
160 const BenchTimeType
& iters_or_time
) {
161 if (b
.iterations() != 0) return b
.iterations();
163 // We've already concluded that this flag is currently used to pass
164 // iters but do a check here again anyway.
165 BM_CHECK(iters_or_time
.tag
== BenchTimeType::ITERS
);
166 return iters_or_time
.iters
;
171 BenchTimeType
ParseBenchMinTime(const std::string
& value
) {
175 ret
.tag
= BenchTimeType::TIME
;
180 if (value
.back() == 'x') {
182 // Reset errno before it's changed by strtol.
184 IterationCount num_iters
= std::strtol(value
.c_str(), &p_end
, 10);
186 // After a valid parse, p_end should have been set to
187 // point to the 'x' suffix.
188 BM_CHECK(errno
== 0 && p_end
!= nullptr && *p_end
== 'x')
189 << "Malformed iters value passed to --benchmark_min_time: `" << value
190 << "`. Expected --benchmark_min_time=<integer>x.";
192 ret
.tag
= BenchTimeType::ITERS
;
193 ret
.iters
= num_iters
;
197 bool has_suffix
= value
.back() == 's';
199 BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
200 "Eg., `30s` for 30-seconds.";
204 // Reset errno before it's changed by strtod.
206 double min_time
= std::strtod(value
.c_str(), &p_end
);
208 // After a successful parse, p_end should point to the suffix 's',
209 // or the end of the string if the suffix was omitted.
210 BM_CHECK(errno
== 0 && p_end
!= nullptr &&
211 ((has_suffix
&& *p_end
== 's') || *p_end
== '\0'))
212 << "Malformed seconds value passed to --benchmark_min_time: `" << value
213 << "`. Expected --benchmark_min_time=<float>x.";
215 ret
.tag
= BenchTimeType::TIME
;
221 BenchmarkRunner::BenchmarkRunner(
222 const benchmark::internal::BenchmarkInstance
& b_
,
223 PerfCountersMeasurement
* pcm_
,
224 BenchmarkReporter::PerFamilyRunReports
* reports_for_family_
)
226 reports_for_family(reports_for_family_
),
227 parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time
)),
228 min_time(ComputeMinTime(b_
, parsed_benchtime_flag
)),
229 min_warmup_time((!IsZero(b
.min_time()) && b
.min_warmup_time() > 0.0)
230 ? b
.min_warmup_time()
231 : FLAGS_benchmark_min_warmup_time
),
232 warmup_done(!(min_warmup_time
> 0.0)),
233 repeats(b
.repetitions() != 0 ? b
.repetitions()
234 : FLAGS_benchmark_repetitions
),
235 has_explicit_iteration_count(b
.iterations() != 0 ||
236 parsed_benchtime_flag
.tag
==
237 BenchTimeType::ITERS
),
238 pool(b
.threads() - 1),
239 iters(has_explicit_iteration_count
240 ? ComputeIters(b_
, parsed_benchtime_flag
)
242 perf_counters_measurement_ptr(pcm_
) {
243 run_results
.display_report_aggregates_only
=
244 (FLAGS_benchmark_report_aggregates_only
||
245 FLAGS_benchmark_display_aggregates_only
);
246 run_results
.file_report_aggregates_only
=
247 FLAGS_benchmark_report_aggregates_only
;
248 if (b
.aggregation_report_mode() != internal::ARM_Unspecified
) {
249 run_results
.display_report_aggregates_only
=
250 (b
.aggregation_report_mode() &
251 internal::ARM_DisplayReportAggregatesOnly
);
252 run_results
.file_report_aggregates_only
=
253 (b
.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly
);
254 BM_CHECK(FLAGS_benchmark_perf_counters
.empty() ||
255 (perf_counters_measurement_ptr
->num_counters() == 0))
256 << "Perf counters were requested but could not be set up.";
260 BenchmarkRunner::IterationResults
BenchmarkRunner::DoNIterations() {
261 BM_VLOG(2) << "Running " << b
.name().str() << " for " << iters
<< "\n";
263 std::unique_ptr
<internal::ThreadManager
> manager
;
264 manager
.reset(new internal::ThreadManager(b
.threads()));
266 // Run all but one thread in separate threads
267 for (std::size_t ti
= 0; ti
< pool
.size(); ++ti
) {
268 pool
[ti
] = std::thread(&RunInThread
, &b
, iters
, static_cast<int>(ti
+ 1),
269 manager
.get(), perf_counters_measurement_ptr
);
271 // And run one thread here directly.
272 // (If we were asked to run just one thread, we don't create new threads.)
273 // Yes, we need to do this here *after* we start the separate threads.
274 RunInThread(&b
, iters
, 0, manager
.get(), perf_counters_measurement_ptr
);
276 // The main thread has finished. Now let's wait for the other threads.
277 manager
->WaitForAllThreads();
278 for (std::thread
& thread
: pool
) thread
.join();
281 // Acquire the measurements/counters from the manager, UNDER THE LOCK!
283 MutexLock
l(manager
->GetBenchmarkMutex());
284 i
.results
= manager
->results
;
287 // And get rid of the manager.
290 // Adjust real/manual time stats since they were reported per thread.
291 i
.results
.real_time_used
/= b
.threads();
292 i
.results
.manual_time_used
/= b
.threads();
293 // If we were measuring whole-process CPU usage, adjust the CPU time too.
294 if (b
.measure_process_cpu_time()) i
.results
.cpu_time_used
/= b
.threads();
296 BM_VLOG(2) << "Ran in " << i
.results
.cpu_time_used
<< "/"
297 << i
.results
.real_time_used
<< "\n";
299 // By using KeepRunningBatch a benchmark can iterate more times than
300 // requested, so take the iteration count from i.results.
301 i
.iters
= i
.results
.iterations
/ b
.threads();
303 // Base decisions off of real time if requested by this benchmark.
304 i
.seconds
= i
.results
.cpu_time_used
;
305 if (b
.use_manual_time()) {
306 i
.seconds
= i
.results
.manual_time_used
;
307 } else if (b
.use_real_time()) {
308 i
.seconds
= i
.results
.real_time_used
;
314 IterationCount
BenchmarkRunner::PredictNumItersNeeded(
315 const IterationResults
& i
) const {
316 // See how much iterations should be increased by.
317 // Note: Avoid division by zero with max(seconds, 1ns).
318 double multiplier
= GetMinTimeToApply() * 1.4 / std::max(i
.seconds
, 1e-9);
319 // If our last run was at least 10% of FLAGS_benchmark_min_time then we
320 // use the multiplier directly.
321 // Otherwise we use at most 10 times expansion.
322 // NOTE: When the last run was at least 10% of the min time the max
323 // expansion should be 14x.
324 const bool is_significant
= (i
.seconds
/ GetMinTimeToApply()) > 0.1;
325 multiplier
= is_significant
? multiplier
: 10.0;
327 // So what seems to be the sufficiently-large iteration count? Round up.
328 const IterationCount max_next_iters
= static_cast<IterationCount
>(
329 std::llround(std::max(multiplier
* static_cast<double>(i
.iters
),
330 static_cast<double>(i
.iters
) + 1.0)));
331 // But we do have *some* limits though..
332 const IterationCount next_iters
= std::min(max_next_iters
, kMaxIterations
);
334 BM_VLOG(3) << "Next iters: " << next_iters
<< ", " << multiplier
<< "\n";
335 return next_iters
; // round up before conversion to integer.
338 bool BenchmarkRunner::ShouldReportIterationResults(
339 const IterationResults
& i
) const {
340 // Determine if this run should be reported;
341 // Either it has run for a sufficient amount of time
342 // or because an error was reported.
343 return i
.results
.skipped_
||
344 i
.iters
>= kMaxIterations
|| // Too many iterations already.
346 GetMinTimeToApply() || // The elapsed time is large enough.
347 // CPU time is specified but the elapsed real time greatly exceeds
349 // Note that user provided timers are except from this test.
350 ((i
.results
.real_time_used
>= 5 * GetMinTimeToApply()) &&
351 !b
.use_manual_time());
354 double BenchmarkRunner::GetMinTimeToApply() const {
355 // In order to re-use functionality to run and measure benchmarks for running
356 // a warmup phase of the benchmark, we need a way of telling whether to apply
357 // min_time or min_warmup_time. This function will figure out if we are in the
358 // warmup phase and therefore need to apply min_warmup_time or if we already
359 // in the benchmarking phase and min_time needs to be applied.
360 return warmup_done
? min_time
: min_warmup_time
;
363 void BenchmarkRunner::FinishWarmUp(const IterationCount
& i
) {
368 void BenchmarkRunner::RunWarmUp() {
369 // Use the same mechanisms for warming up the benchmark as used for actually
370 // running and measuring the benchmark.
371 IterationResults i_warmup
;
372 // Dont use the iterations determined in the warmup phase for the actual
373 // measured benchmark phase. While this may be a good starting point for the
374 // benchmark and it would therefore get rid of the need to figure out how many
375 // iterations are needed if min_time is set again, this may also be a complete
376 // wrong guess since the warmup loops might be considerably slower (e.g
377 // because of caching effects).
378 const IterationCount i_backup
= iters
;
382 i_warmup
= DoNIterations();
385 const bool finish
= ShouldReportIterationResults(i_warmup
);
388 FinishWarmUp(i_backup
);
392 // Although we are running "only" a warmup phase where running enough
393 // iterations at once without measuring time isn't as important as it is for
394 // the benchmarking phase, we still do it the same way as otherwise it is
395 // very confusing for the user to know how to choose a proper value for
396 // min_warmup_time if a different approach on running it is used.
397 iters
= PredictNumItersNeeded(i_warmup
);
398 assert(iters
> i_warmup
.iters
&&
399 "if we did more iterations than we want to do the next time, "
400 "then we should have accepted the current iteration run.");
404 void BenchmarkRunner::DoOneRepetition() {
405 assert(HasRepeatsRemaining() && "Already done all repetitions?");
407 const bool is_the_first_repetition
= num_repetitions_done
== 0;
409 // In case a warmup phase is requested by the benchmark, run it now.
410 // After running the warmup phase the BenchmarkRunner should be in a state as
411 // this warmup never happened except the fact that warmup_done is set. Every
412 // other manipulation of the BenchmarkRunner instance would be a bug! Please
414 if (!warmup_done
) RunWarmUp();
417 // We *may* be gradually increasing the length (iteration count)
418 // of the benchmark until we decide the results are significant.
419 // And once we do, we report those last results and exit.
420 // Please do note that the if there are repetitions, the iteration count
421 // is *only* calculated for the *first* repetition, and other repetitions
422 // simply use that precomputed iteration count.
428 // Do we consider the results to be significant?
429 // If we are doing repetitions, and the first repetition was already done,
430 // it has calculated the correct iteration time, so we have run that very
431 // iteration count just now. No need to calculate anything. Just report.
432 // Else, the normal rules apply.
433 const bool results_are_significant
= !is_the_first_repetition
||
434 has_explicit_iteration_count
||
435 ShouldReportIterationResults(i
);
437 if (results_are_significant
) break; // Good, let's report them!
439 // Nope, bad iteration. Let's re-estimate the hopefully-sufficient
440 // iteration count, and run the benchmark again...
442 iters
= PredictNumItersNeeded(i
);
443 assert(iters
> i
.iters
&&
444 "if we did more iterations than we want to do the next time, "
445 "then we should have accepted the current iteration run.");
448 // Oh, one last thing, we need to also produce the 'memory measurements'..
449 MemoryManager::Result
* memory_result
= nullptr;
450 IterationCount memory_iterations
= 0;
451 if (memory_manager
!= nullptr) {
452 // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
453 // optional so we don't have to own the Result here.
454 // Can't do it now due to cxx03.
455 memory_results
.push_back(MemoryManager::Result());
456 memory_result
= &memory_results
.back();
457 // Only run a few iterations to reduce the impact of one-time
458 // allocations in benchmarks that are not properly managed.
459 memory_iterations
= std::min
<IterationCount
>(16, iters
);
460 memory_manager
->Start();
461 std::unique_ptr
<internal::ThreadManager
> manager
;
462 manager
.reset(new internal::ThreadManager(1));
464 RunInThread(&b
, memory_iterations
, 0, manager
.get(),
465 perf_counters_measurement_ptr
);
466 manager
->WaitForAllThreads();
469 memory_manager
->Stop(*memory_result
);
472 // Ok, now actually report.
473 BenchmarkReporter::Run report
=
474 CreateRunReport(b
, i
.results
, memory_iterations
, memory_result
, i
.seconds
,
475 num_repetitions_done
, repeats
);
477 if (reports_for_family
) {
478 ++reports_for_family
->num_runs_done
;
479 if (!report
.skipped
) reports_for_family
->Runs
.push_back(report
);
482 run_results
.non_aggregates
.push_back(report
);
484 ++num_repetitions_done
;
487 RunResults
&& BenchmarkRunner::GetResults() {
488 assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
490 // Calculate additional statistics over the repetitions of this instance.
491 run_results
.aggregates_only
= ComputeStats(run_results
.non_aggregates
);
493 return std::move(run_results
);
496 } // end namespace internal
498 } // end namespace benchmark