libc/benchmarks/gpu/LibcGpuBenchmark.h

   1 #ifndef LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
   2 #define LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
   3
   4 #include "benchmarks/gpu/BenchmarkLogger.h"
   5 #include "benchmarks/gpu/timing/timing.h"
   6 #include "src/__support/CPP/functional.h"
   7 #include "src/__support/CPP/limits.h"
   8 #include "src/__support/CPP/string_view.h"
   9 #include "src/__support/macros/config.h"
  10 #include "src/time/clock.h"
  11
  12 #include <stdint.h>
  13
  14 namespace LIBC_NAMESPACE_DECL {
  15
  16 namespace benchmarks {
  17
  18 struct BenchmarkOptions {
  19   uint32_t initial_iterations = 1;
  20   uint32_t max_iterations = 10000000;
  21   uint32_t min_samples = 4;
  22   uint32_t max_samples = 1000;
  23   int64_t min_duration = 0;                  // in nanoseconds (ns)
  24   int64_t max_duration = 1000 * 1000 * 1000; // 1e9 nanoseconds = 1 second
  25   double epsilon = 0.01;
  26   double scaling_factor = 1.4;
  27 };
  28
  29 struct Measurement {
  30   uint32_t iterations = 0;
  31   uint64_t elapsed_cycles = 0;
  32 };
  33
  34 class RefinableRuntimeEstimation {
  35   uint64_t total_cycles = 0;
  36   uint32_t total_iterations = 0;
  37
  38 public:
  39   uint64_t update(const Measurement &M) {
  40     total_cycles += M.elapsed_cycles;
  41     total_iterations += M.iterations;
  42     return total_cycles / total_iterations;
  43   }
  44 };
  45
  46 // Tracks the progression of the runtime estimation
  47 class RuntimeEstimationProgression {
  48   RefinableRuntimeEstimation rre;
  49
  50 public:
  51   uint64_t current_estimation = 0;
  52
  53   double compute_improvement(const Measurement &M) {
  54     const uint64_t new_estimation = rre.update(M);
  55     double ratio =
  56         (static_cast<double>(current_estimation) / new_estimation) - 1.0;
  57
  58     // Get absolute value
  59     if (ratio < 0)
  60       ratio *= -1;
  61
  62     current_estimation = new_estimation;
  63     return ratio;
  64   }
  65 };
  66
  67 struct BenchmarkResult {
  68   uint64_t cycles = 0;
  69   double standard_deviation = 0;
  70   uint64_t min = UINT64_MAX;
  71   uint64_t max = 0;
  72   uint32_t samples = 0;
  73   uint32_t total_iterations = 0;
  74   clock_t total_time = 0;
  75 };
  76
  77 BenchmarkResult benchmark(const BenchmarkOptions &options,
  78                           cpp::function<uint64_t(void)> wrapper_func);
  79
  80 class Benchmark {
  81   const cpp::function<uint64_t(void)> func;
  82   const cpp::string_view suite_name;
  83   const cpp::string_view test_name;
  84   const uint32_t num_threads;
  85
  86 public:
  87   Benchmark(cpp::function<uint64_t(void)> func, char const *suite_name,
  88             char const *test_name, uint32_t num_threads)
  89       : func(func), suite_name(suite_name), test_name(test_name),
  90         num_threads(num_threads) {
  91     add_benchmark(this);
  92   }
  93
  94   static void run_benchmarks();
  95   const cpp::string_view get_suite_name() const { return suite_name; }
  96   const cpp::string_view get_test_name() const { return test_name; }
  97
  98 protected:
  99   static void add_benchmark(Benchmark *benchmark);
 100
 101 private:
 102   BenchmarkResult run() {
 103     BenchmarkOptions options;
 104     return benchmark(options, func);
 105   }
 106 };
 107 } // namespace benchmarks
 108 } // namespace LIBC_NAMESPACE_DECL
 109
 110 // Passing -1 indicates the benchmark should be run with as many threads as
 111 // allocated by the user in the benchmark's CMake.
 112 #define BENCHMARK(SuiteName, TestName, Func)                                   \
 113   LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance(     \
 114       Func, #SuiteName, #TestName, -1)
 115
 116 #define BENCHMARK_N_THREADS(SuiteName, TestName, Func, NumThreads)             \
 117   LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance(     \
 118       Func, #SuiteName, #TestName, NumThreads)
 119
 120 #define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func)                   \
 121   BENCHMARK_N_THREADS(SuiteName, TestName, Func, 1)
 122
 123 #define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func)                       \
 124   BENCHMARK_N_THREADS(SuiteName, TestName, Func,                               \
 125                       LIBC_NAMESPACE::gpu::get_lane_size())
 126
 127 #endif