[SLP]Fix PR107036: Check if the type of the user is sizable before requesting its...
[llvm-project.git] / libc / benchmarks / gpu / LibcGpuBenchmark.h
blobf5cf4822f6fd38cb14713758292ac34c30f52488
1 #ifndef LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
2 #define LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
4 #include "benchmarks/gpu/BenchmarkLogger.h"
5 #include "benchmarks/gpu/timing/timing.h"
6 #include "src/__support/CPP/functional.h"
7 #include "src/__support/CPP/limits.h"
8 #include "src/__support/CPP/string_view.h"
9 #include "src/__support/macros/config.h"
10 #include "src/time/clock.h"
12 #include <stdint.h>
14 namespace LIBC_NAMESPACE_DECL {
16 namespace benchmarks {
18 struct BenchmarkOptions {
19 uint32_t initial_iterations = 1;
20 uint32_t max_iterations = 10000000;
21 uint32_t min_samples = 4;
22 uint32_t max_samples = 1000;
23 int64_t min_duration = 0; // in nanoseconds (ns)
24 int64_t max_duration = 1000 * 1000 * 1000; // 1e9 nanoseconds = 1 second
25 double epsilon = 0.01;
26 double scaling_factor = 1.4;
29 struct Measurement {
30 uint32_t iterations = 0;
31 uint64_t elapsed_cycles = 0;
34 class RefinableRuntimeEstimation {
35 uint64_t total_cycles = 0;
36 uint32_t total_iterations = 0;
38 public:
39 uint64_t update(const Measurement &M) {
40 total_cycles += M.elapsed_cycles;
41 total_iterations += M.iterations;
42 return total_cycles / total_iterations;
46 // Tracks the progression of the runtime estimation
47 class RuntimeEstimationProgression {
48 RefinableRuntimeEstimation rre;
50 public:
51 uint64_t current_estimation = 0;
53 double compute_improvement(const Measurement &M) {
54 const uint64_t new_estimation = rre.update(M);
55 double ratio =
56 (static_cast<double>(current_estimation) / new_estimation) - 1.0;
58 // Get absolute value
59 if (ratio < 0)
60 ratio *= -1;
62 current_estimation = new_estimation;
63 return ratio;
67 struct BenchmarkResult {
68 uint64_t cycles = 0;
69 double standard_deviation = 0;
70 uint64_t min = UINT64_MAX;
71 uint64_t max = 0;
72 uint32_t samples = 0;
73 uint32_t total_iterations = 0;
74 clock_t total_time = 0;
77 BenchmarkResult benchmark(const BenchmarkOptions &options,
78 cpp::function<uint64_t(void)> wrapper_func);
80 class Benchmark {
81 const cpp::function<uint64_t(void)> func;
82 const cpp::string_view suite_name;
83 const cpp::string_view test_name;
84 const uint32_t num_threads;
86 public:
87 Benchmark(cpp::function<uint64_t(void)> func, char const *suite_name,
88 char const *test_name, uint32_t num_threads)
89 : func(func), suite_name(suite_name), test_name(test_name),
90 num_threads(num_threads) {
91 add_benchmark(this);
94 static void run_benchmarks();
95 const cpp::string_view get_suite_name() const { return suite_name; }
96 const cpp::string_view get_test_name() const { return test_name; }
98 protected:
99 static void add_benchmark(Benchmark *benchmark);
101 private:
102 BenchmarkResult run() {
103 BenchmarkOptions options;
104 return benchmark(options, func);
107 } // namespace benchmarks
108 } // namespace LIBC_NAMESPACE_DECL
110 // Passing -1 indicates the benchmark should be run with as many threads as
111 // allocated by the user in the benchmark's CMake.
112 #define BENCHMARK(SuiteName, TestName, Func) \
113 LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
114 Func, #SuiteName, #TestName, -1)
116 #define BENCHMARK_N_THREADS(SuiteName, TestName, Func, NumThreads) \
117 LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
118 Func, #SuiteName, #TestName, NumThreads)
120 #define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
121 BENCHMARK_N_THREADS(SuiteName, TestName, Func, 1)
123 #define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func) \
124 BENCHMARK_N_THREADS(SuiteName, TestName, Func, \
125 LIBC_NAMESPACE::gpu::get_lane_size())
127 #endif