[ARM] Fix for MVE VPT block pass
[llvm-complete.git] / utils / benchmark / src / benchmark.cc
blob82b15ac7090cfbdff902e138bf47e910043f479e
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "benchmark/benchmark.h"
16 #include "benchmark_api_internal.h"
17 #include "internal_macros.h"
19 #ifndef BENCHMARK_OS_WINDOWS
20 #ifndef BENCHMARK_OS_FUCHSIA
21 #include <sys/resource.h>
22 #endif
23 #include <sys/time.h>
24 #include <unistd.h>
25 #endif
27 #include <algorithm>
28 #include <atomic>
29 #include <condition_variable>
30 #include <cstdio>
31 #include <cstdlib>
32 #include <fstream>
33 #include <iostream>
34 #include <memory>
35 #include <string>
36 #include <thread>
38 #include "check.h"
39 #include "colorprint.h"
40 #include "commandlineflags.h"
41 #include "complexity.h"
42 #include "counter.h"
43 #include "internal_macros.h"
44 #include "log.h"
45 #include "mutex.h"
46 #include "re.h"
47 #include "statistics.h"
48 #include "string_util.h"
49 #include "thread_manager.h"
50 #include "thread_timer.h"
52 DEFINE_bool(benchmark_list_tests, false,
53 "Print a list of benchmarks. This option overrides all other "
54 "options.");
56 DEFINE_string(benchmark_filter, ".",
57 "A regular expression that specifies the set of benchmarks "
58 "to execute. If this flag is empty, no benchmarks are run. "
59 "If this flag is the string \"all\", all benchmarks linked "
60 "into the process are run.");
62 DEFINE_double(benchmark_min_time, 0.5,
63 "Minimum number of seconds we should run benchmark before "
64 "results are considered significant. For cpu-time based "
65 "tests, this is the lower bound on the total cpu time "
66 "used by all threads that make up the test. For real-time "
67 "based tests, this is the lower bound on the elapsed time "
68 "of the benchmark execution, regardless of number of "
69 "threads.");
71 DEFINE_int32(benchmark_repetitions, 1,
72 "The number of runs of each benchmark. If greater than 1, the "
73 "mean and standard deviation of the runs will be reported.");
75 DEFINE_bool(benchmark_report_aggregates_only, false,
76 "Report the result of each benchmark repetitions. When 'true' is "
77 "specified only the mean, standard deviation, and other statistics "
78 "are reported for repeated benchmarks.");
80 DEFINE_string(benchmark_format, "console",
81 "The format to use for console output. Valid values are "
82 "'console', 'json', or 'csv'.");
84 DEFINE_string(benchmark_out_format, "json",
85 "The format to use for file output. Valid values are "
86 "'console', 'json', or 'csv'.");
88 DEFINE_string(benchmark_out, "", "The file to write additional output to");
90 DEFINE_string(benchmark_color, "auto",
91 "Whether to use colors in the output. Valid values: "
92 "'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use "
93 "colors if the output is being sent to a terminal and the TERM "
94 "environment variable is set to a terminal type that supports "
95 "colors.");
97 DEFINE_bool(benchmark_counters_tabular, false,
98 "Whether to use tabular format when printing user counters to "
99 "the console. Valid values: 'true'/'yes'/1, 'false'/'no'/0."
100 "Defaults to false.");
102 DEFINE_int32(v, 0, "The level of verbose logging to output");
104 namespace benchmark {
106 namespace {
107 static const size_t kMaxIterations = 1000000000;
108 } // end namespace
110 namespace internal {
112 void UseCharPointer(char const volatile*) {}
114 namespace {
116 BenchmarkReporter::Run CreateRunReport(
117 const benchmark::internal::Benchmark::Instance& b,
118 const internal::ThreadManager::Result& results,
119 double seconds) {
120 // Create report about this benchmark run.
121 BenchmarkReporter::Run report;
123 report.benchmark_name = b.name;
124 report.error_occurred = results.has_error_;
125 report.error_message = results.error_message_;
126 report.report_label = results.report_label_;
127 // This is the total iterations across all threads.
128 report.iterations = results.iterations;
129 report.time_unit = b.time_unit;
131 if (!report.error_occurred) {
132 double bytes_per_second = 0;
133 if (results.bytes_processed > 0 && seconds > 0.0) {
134 bytes_per_second = (results.bytes_processed / seconds);
136 double items_per_second = 0;
137 if (results.items_processed > 0 && seconds > 0.0) {
138 items_per_second = (results.items_processed / seconds);
141 if (b.use_manual_time) {
142 report.real_accumulated_time = results.manual_time_used;
143 } else {
144 report.real_accumulated_time = results.real_time_used;
146 report.cpu_accumulated_time = results.cpu_time_used;
147 report.bytes_per_second = bytes_per_second;
148 report.items_per_second = items_per_second;
149 report.complexity_n = results.complexity_n;
150 report.complexity = b.complexity;
151 report.complexity_lambda = b.complexity_lambda;
152 report.statistics = b.statistics;
153 report.counters = results.counters;
154 internal::Finish(&report.counters, seconds, b.threads);
156 return report;
159 // Execute one thread of benchmark b for the specified number of iterations.
160 // Adds the stats collected for the thread into *total.
161 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
162 size_t iters, int thread_id,
163 internal::ThreadManager* manager) {
164 internal::ThreadTimer timer;
165 State st(iters, b->arg, thread_id, b->threads, &timer, manager);
166 b->benchmark->Run(st);
167 CHECK(st.iterations() >= st.max_iterations)
168 << "Benchmark returned before State::KeepRunning() returned false!";
170 MutexLock l(manager->GetBenchmarkMutex());
171 internal::ThreadManager::Result& results = manager->results;
172 results.iterations += st.iterations();
173 results.cpu_time_used += timer.cpu_time_used();
174 results.real_time_used += timer.real_time_used();
175 results.manual_time_used += timer.manual_time_used();
176 results.bytes_processed += st.bytes_processed();
177 results.items_processed += st.items_processed();
178 results.complexity_n += st.complexity_length_n();
179 internal::Increment(&results.counters, st.counters);
181 manager->NotifyThreadComplete();
184 std::vector<BenchmarkReporter::Run> RunBenchmark(
185 const benchmark::internal::Benchmark::Instance& b,
186 std::vector<BenchmarkReporter::Run>* complexity_reports) {
187 std::vector<BenchmarkReporter::Run> reports; // return value
189 const bool has_explicit_iteration_count = b.iterations != 0;
190 size_t iters = has_explicit_iteration_count ? b.iterations : 1;
191 std::unique_ptr<internal::ThreadManager> manager;
192 std::vector<std::thread> pool(b.threads - 1);
193 const int repeats =
194 b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions;
195 const bool report_aggregates_only =
196 repeats != 1 &&
197 (b.report_mode == internal::RM_Unspecified
198 ? FLAGS_benchmark_report_aggregates_only
199 : b.report_mode == internal::RM_ReportAggregatesOnly);
200 for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
201 for (;;) {
202 // Try benchmark
203 VLOG(2) << "Running " << b.name << " for " << iters << "\n";
205 manager.reset(new internal::ThreadManager(b.threads));
206 for (std::size_t ti = 0; ti < pool.size(); ++ti) {
207 pool[ti] = std::thread(&RunInThread, &b, iters,
208 static_cast<int>(ti + 1), manager.get());
210 RunInThread(&b, iters, 0, manager.get());
211 manager->WaitForAllThreads();
212 for (std::thread& thread : pool) thread.join();
213 internal::ThreadManager::Result results;
215 MutexLock l(manager->GetBenchmarkMutex());
216 results = manager->results;
218 manager.reset();
219 // Adjust real/manual time stats since they were reported per thread.
220 results.real_time_used /= b.threads;
221 results.manual_time_used /= b.threads;
223 VLOG(2) << "Ran in " << results.cpu_time_used << "/"
224 << results.real_time_used << "\n";
226 // Base decisions off of real time if requested by this benchmark.
227 double seconds = results.cpu_time_used;
228 if (b.use_manual_time) {
229 seconds = results.manual_time_used;
230 } else if (b.use_real_time) {
231 seconds = results.real_time_used;
234 const double min_time =
235 !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time;
237 // Determine if this run should be reported; Either it has
238 // run for a sufficient amount of time or because an error was reported.
239 const bool should_report = repetition_num > 0
240 || has_explicit_iteration_count // An exact iteration count was requested
241 || results.has_error_
242 || iters >= kMaxIterations // No chance to try again, we hit the limit.
243 || seconds >= min_time // the elapsed time is large enough
244 // CPU time is specified but the elapsed real time greatly exceeds the
245 // minimum time. Note that user provided timers are except from this
246 // sanity check.
247 || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time);
249 if (should_report) {
250 BenchmarkReporter::Run report = CreateRunReport(b, results, seconds);
251 if (!report.error_occurred && b.complexity != oNone)
252 complexity_reports->push_back(report);
253 reports.push_back(report);
254 break;
257 // See how much iterations should be increased by
258 // Note: Avoid division by zero with max(seconds, 1ns).
259 double multiplier = min_time * 1.4 / std::max(seconds, 1e-9);
260 // If our last run was at least 10% of FLAGS_benchmark_min_time then we
261 // use the multiplier directly. Otherwise we use at most 10 times
262 // expansion.
263 // NOTE: When the last run was at least 10% of the min time the max
264 // expansion should be 14x.
265 bool is_significant = (seconds / min_time) > 0.1;
266 multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
267 if (multiplier <= 1.0) multiplier = 2.0;
268 double next_iters = std::max(multiplier * iters, iters + 1.0);
269 if (next_iters > kMaxIterations) {
270 next_iters = kMaxIterations;
272 VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
273 iters = static_cast<int>(next_iters + 0.5);
276 // Calculate additional statistics
277 auto stat_reports = ComputeStats(reports);
278 if ((b.complexity != oNone) && b.last_benchmark_instance) {
279 auto additional_run_stats = ComputeBigO(*complexity_reports);
280 stat_reports.insert(stat_reports.end(), additional_run_stats.begin(),
281 additional_run_stats.end());
282 complexity_reports->clear();
285 if (report_aggregates_only) reports.clear();
286 reports.insert(reports.end(), stat_reports.begin(), stat_reports.end());
287 return reports;
290 } // namespace
291 } // namespace internal
293 State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
294 int n_threads, internal::ThreadTimer* timer,
295 internal::ThreadManager* manager)
296 : total_iterations_(0),
297 batch_leftover_(0),
298 max_iterations(max_iters),
299 started_(false),
300 finished_(false),
301 error_occurred_(false),
302 range_(ranges),
303 bytes_processed_(0),
304 items_processed_(0),
305 complexity_n_(0),
306 counters(),
307 thread_index(thread_i),
308 threads(n_threads),
309 timer_(timer),
310 manager_(manager) {
311 CHECK(max_iterations != 0) << "At least one iteration must be run";
312 CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
314 // Note: The use of offsetof below is technically undefined until C++17
315 // because State is not a standard layout type. However, all compilers
316 // currently provide well-defined behavior as an extension (which is
317 // demonstrated since constexpr evaluation must diagnose all undefined
318 // behavior). However, GCC and Clang also warn about this use of offsetof,
319 // which must be suppressed.
320 #ifdef __GNUC__
321 #pragma GCC diagnostic push
322 #pragma GCC diagnostic ignored "-Winvalid-offsetof"
323 #endif
324 // Offset tests to ensure commonly accessed data is on the first cache line.
325 const int cache_line_size = 64;
326 static_assert(offsetof(State, error_occurred_) <=
327 (cache_line_size - sizeof(error_occurred_)), "");
328 #ifdef __GNUC__
329 #pragma GCC diagnostic pop
330 #endif
333 void State::PauseTiming() {
334 // Add in time accumulated so far
335 CHECK(started_ && !finished_ && !error_occurred_);
336 timer_->StopTimer();
339 void State::ResumeTiming() {
340 CHECK(started_ && !finished_ && !error_occurred_);
341 timer_->StartTimer();
344 void State::SkipWithError(const char* msg) {
345 CHECK(msg);
346 error_occurred_ = true;
348 MutexLock l(manager_->GetBenchmarkMutex());
349 if (manager_->results.has_error_ == false) {
350 manager_->results.error_message_ = msg;
351 manager_->results.has_error_ = true;
354 total_iterations_ = 0;
355 if (timer_->running()) timer_->StopTimer();
358 void State::SetIterationTime(double seconds) {
359 timer_->SetIterationTime(seconds);
362 void State::SetLabel(const char* label) {
363 MutexLock l(manager_->GetBenchmarkMutex());
364 manager_->results.report_label_ = label;
367 void State::StartKeepRunning() {
368 CHECK(!started_ && !finished_);
369 started_ = true;
370 total_iterations_ = error_occurred_ ? 0 : max_iterations;
371 manager_->StartStopBarrier();
372 if (!error_occurred_) ResumeTiming();
375 void State::FinishKeepRunning() {
376 CHECK(started_ && (!finished_ || error_occurred_));
377 if (!error_occurred_) {
378 PauseTiming();
380 // Total iterations has now wrapped around past 0. Fix this.
381 total_iterations_ = 0;
382 finished_ = true;
383 manager_->StartStopBarrier();
386 namespace internal {
387 namespace {
389 void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
390 BenchmarkReporter* console_reporter,
391 BenchmarkReporter* file_reporter) {
392 // Note the file_reporter can be null.
393 CHECK(console_reporter != nullptr);
395 // Determine the width of the name field using a minimum width of 10.
396 bool has_repetitions = FLAGS_benchmark_repetitions > 1;
397 size_t name_field_width = 10;
398 size_t stat_field_width = 0;
399 for (const Benchmark::Instance& benchmark : benchmarks) {
400 name_field_width =
401 std::max<size_t>(name_field_width, benchmark.name.size());
402 has_repetitions |= benchmark.repetitions > 1;
404 for(const auto& Stat : *benchmark.statistics)
405 stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
407 if (has_repetitions) name_field_width += 1 + stat_field_width;
409 // Print header here
410 BenchmarkReporter::Context context;
411 context.name_field_width = name_field_width;
413 // Keep track of running times of all instances of current benchmark
414 std::vector<BenchmarkReporter::Run> complexity_reports;
416 // We flush streams after invoking reporter methods that write to them. This
417 // ensures users get timely updates even when streams are not line-buffered.
418 auto flushStreams = [](BenchmarkReporter* reporter) {
419 if (!reporter) return;
420 std::flush(reporter->GetOutputStream());
421 std::flush(reporter->GetErrorStream());
424 if (console_reporter->ReportContext(context) &&
425 (!file_reporter || file_reporter->ReportContext(context))) {
426 flushStreams(console_reporter);
427 flushStreams(file_reporter);
428 for (const auto& benchmark : benchmarks) {
429 std::vector<BenchmarkReporter::Run> reports =
430 RunBenchmark(benchmark, &complexity_reports);
431 console_reporter->ReportRuns(reports);
432 if (file_reporter) file_reporter->ReportRuns(reports);
433 flushStreams(console_reporter);
434 flushStreams(file_reporter);
437 console_reporter->Finalize();
438 if (file_reporter) file_reporter->Finalize();
439 flushStreams(console_reporter);
440 flushStreams(file_reporter);
443 std::unique_ptr<BenchmarkReporter> CreateReporter(
444 std::string const& name, ConsoleReporter::OutputOptions output_opts) {
445 typedef std::unique_ptr<BenchmarkReporter> PtrType;
446 if (name == "console") {
447 return PtrType(new ConsoleReporter(output_opts));
448 } else if (name == "json") {
449 return PtrType(new JSONReporter);
450 } else if (name == "csv") {
451 return PtrType(new CSVReporter);
452 } else {
453 std::cerr << "Unexpected format: '" << name << "'\n";
454 std::exit(1);
458 } // end namespace
460 bool IsZero(double n) {
461 return std::abs(n) < std::numeric_limits<double>::epsilon();
464 ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
465 int output_opts = ConsoleReporter::OO_Defaults;
466 if ((FLAGS_benchmark_color == "auto" && IsColorTerminal()) ||
467 IsTruthyFlagValue(FLAGS_benchmark_color)) {
468 output_opts |= ConsoleReporter::OO_Color;
469 } else {
470 output_opts &= ~ConsoleReporter::OO_Color;
472 if(force_no_color) {
473 output_opts &= ~ConsoleReporter::OO_Color;
475 if(FLAGS_benchmark_counters_tabular) {
476 output_opts |= ConsoleReporter::OO_Tabular;
477 } else {
478 output_opts &= ~ConsoleReporter::OO_Tabular;
480 return static_cast< ConsoleReporter::OutputOptions >(output_opts);
483 } // end namespace internal
485 size_t RunSpecifiedBenchmarks() {
486 return RunSpecifiedBenchmarks(nullptr, nullptr);
489 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) {
490 return RunSpecifiedBenchmarks(console_reporter, nullptr);
493 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
494 BenchmarkReporter* file_reporter) {
495 std::string spec = FLAGS_benchmark_filter;
496 if (spec.empty() || spec == "all")
497 spec = "."; // Regexp that matches all benchmarks
499 // Setup the reporters
500 std::ofstream output_file;
501 std::unique_ptr<BenchmarkReporter> default_console_reporter;
502 std::unique_ptr<BenchmarkReporter> default_file_reporter;
503 if (!console_reporter) {
504 default_console_reporter = internal::CreateReporter(
505 FLAGS_benchmark_format, internal::GetOutputOptions());
506 console_reporter = default_console_reporter.get();
508 auto& Out = console_reporter->GetOutputStream();
509 auto& Err = console_reporter->GetErrorStream();
511 std::string const& fname = FLAGS_benchmark_out;
512 if (fname.empty() && file_reporter) {
513 Err << "A custom file reporter was provided but "
514 "--benchmark_out=<file> was not specified."
515 << std::endl;
516 std::exit(1);
518 if (!fname.empty()) {
519 output_file.open(fname);
520 if (!output_file.is_open()) {
521 Err << "invalid file name: '" << fname << std::endl;
522 std::exit(1);
524 if (!file_reporter) {
525 default_file_reporter = internal::CreateReporter(
526 FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
527 file_reporter = default_file_reporter.get();
529 file_reporter->SetOutputStream(&output_file);
530 file_reporter->SetErrorStream(&output_file);
533 std::vector<internal::Benchmark::Instance> benchmarks;
534 if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0;
536 if (benchmarks.empty()) {
537 Err << "Failed to match any benchmarks against regex: " << spec << "\n";
538 return 0;
541 if (FLAGS_benchmark_list_tests) {
542 for (auto const& benchmark : benchmarks) Out << benchmark.name << "\n";
543 } else {
544 internal::RunBenchmarks(benchmarks, console_reporter, file_reporter);
547 return benchmarks.size();
550 namespace internal {
552 void PrintUsageAndExit() {
553 fprintf(stdout,
554 "benchmark"
555 " [--benchmark_list_tests={true|false}]\n"
556 " [--benchmark_filter=<regex>]\n"
557 " [--benchmark_min_time=<min_time>]\n"
558 " [--benchmark_repetitions=<num_repetitions>]\n"
559 " [--benchmark_report_aggregates_only={true|false}\n"
560 " [--benchmark_format=<console|json|csv>]\n"
561 " [--benchmark_out=<filename>]\n"
562 " [--benchmark_out_format=<json|console|csv>]\n"
563 " [--benchmark_color={auto|true|false}]\n"
564 " [--benchmark_counters_tabular={true|false}]\n"
565 " [--v=<verbosity>]\n");
566 exit(0);
569 void ParseCommandLineFlags(int* argc, char** argv) {
570 using namespace benchmark;
571 BenchmarkReporter::Context::executable_name = argv[0];
572 for (int i = 1; i < *argc; ++i) {
573 if (ParseBoolFlag(argv[i], "benchmark_list_tests",
574 &FLAGS_benchmark_list_tests) ||
575 ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
576 ParseDoubleFlag(argv[i], "benchmark_min_time",
577 &FLAGS_benchmark_min_time) ||
578 ParseInt32Flag(argv[i], "benchmark_repetitions",
579 &FLAGS_benchmark_repetitions) ||
580 ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
581 &FLAGS_benchmark_report_aggregates_only) ||
582 ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) ||
583 ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) ||
584 ParseStringFlag(argv[i], "benchmark_out_format",
585 &FLAGS_benchmark_out_format) ||
586 ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
587 // "color_print" is the deprecated name for "benchmark_color".
588 // TODO: Remove this.
589 ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
590 ParseBoolFlag(argv[i], "benchmark_counters_tabular",
591 &FLAGS_benchmark_counters_tabular) ||
592 ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
593 for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
595 --(*argc);
596 --i;
597 } else if (IsFlag(argv[i], "help")) {
598 PrintUsageAndExit();
601 for (auto const* flag :
602 {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format})
603 if (*flag != "console" && *flag != "json" && *flag != "csv") {
604 PrintUsageAndExit();
606 if (FLAGS_benchmark_color.empty()) {
607 PrintUsageAndExit();
611 int InitializeStreams() {
612 static std::ios_base::Init init;
613 return 0;
616 } // end namespace internal
618 void Initialize(int* argc, char** argv) {
619 internal::ParseCommandLineFlags(argc, argv);
620 internal::LogLevel() = FLAGS_v;
623 bool ReportUnrecognizedArguments(int argc, char** argv) {
624 for (int i = 1; i < argc; ++i) {
625 fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], argv[i]);
627 return argc > 1;
630 } // end namespace benchmark