1 #ifndef LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
2 #define LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
4 #include "benchmarks/gpu/BenchmarkLogger.h"
5 #include "benchmarks/gpu/timing/timing.h"
6 #include "src/__support/CPP/functional.h"
7 #include "src/__support/CPP/limits.h"
8 #include "src/__support/CPP/string_view.h"
9 #include "src/__support/macros/config.h"
10 #include "src/time/clock.h"
14 namespace LIBC_NAMESPACE_DECL
{
16 namespace benchmarks
{
18 struct BenchmarkOptions
{
19 uint32_t initial_iterations
= 1;
20 uint32_t max_iterations
= 10000000;
21 uint32_t min_samples
= 4;
22 uint32_t max_samples
= 1000;
23 int64_t min_duration
= 0; // in nanoseconds (ns)
24 int64_t max_duration
= 1000 * 1000 * 1000; // 1e9 nanoseconds = 1 second
25 double epsilon
= 0.01;
26 double scaling_factor
= 1.4;
30 uint32_t iterations
= 0;
31 uint64_t elapsed_cycles
= 0;
34 class RefinableRuntimeEstimation
{
35 uint64_t total_cycles
= 0;
36 uint32_t total_iterations
= 0;
39 uint64_t update(const Measurement
&M
) {
40 total_cycles
+= M
.elapsed_cycles
;
41 total_iterations
+= M
.iterations
;
42 return total_cycles
/ total_iterations
;
46 // Tracks the progression of the runtime estimation
47 class RuntimeEstimationProgression
{
48 RefinableRuntimeEstimation rre
;
51 uint64_t current_estimation
= 0;
53 double compute_improvement(const Measurement
&M
) {
54 const uint64_t new_estimation
= rre
.update(M
);
56 (static_cast<double>(current_estimation
) / new_estimation
) - 1.0;
62 current_estimation
= new_estimation
;
67 struct BenchmarkResult
{
69 double standard_deviation
= 0;
70 uint64_t min
= UINT64_MAX
;
73 uint32_t total_iterations
= 0;
74 clock_t total_time
= 0;
77 BenchmarkResult
benchmark(const BenchmarkOptions
&options
,
78 cpp::function
<uint64_t(void)> wrapper_func
);
81 const cpp::function
<uint64_t(void)> func
;
82 const cpp::string_view suite_name
;
83 const cpp::string_view test_name
;
84 const uint32_t num_threads
;
87 Benchmark(cpp::function
<uint64_t(void)> func
, char const *suite_name
,
88 char const *test_name
, uint32_t num_threads
)
89 : func(func
), suite_name(suite_name
), test_name(test_name
),
90 num_threads(num_threads
) {
94 static void run_benchmarks();
95 const cpp::string_view
get_suite_name() const { return suite_name
; }
96 const cpp::string_view
get_test_name() const { return test_name
; }
99 static void add_benchmark(Benchmark
*benchmark
);
102 BenchmarkResult
run() {
103 BenchmarkOptions options
;
104 return benchmark(options
, func
);
107 } // namespace benchmarks
108 } // namespace LIBC_NAMESPACE_DECL
110 // Passing -1 indicates the benchmark should be run with as many threads as
111 // allocated by the user in the benchmark's CMake.
112 #define BENCHMARK(SuiteName, TestName, Func) \
113 LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
114 Func, #SuiteName, #TestName, -1)
116 #define BENCHMARK_N_THREADS(SuiteName, TestName, Func, NumThreads) \
117 LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
118 Func, #SuiteName, #TestName, NumThreads)
120 #define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
121 BENCHMARK_N_THREADS(SuiteName, TestName, Func, 1)
123 #define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func) \
124 BENCHMARK_N_THREADS(SuiteName, TestName, Func, \
125 LIBC_NAMESPACE::gpu::get_lane_size())