1 //===-- Benchmark function --------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file mainly defines a `Benchmark` function.
11 // The benchmarking process is as follows:
12 // - We start by measuring the time it takes to run the function
13 // `InitialIterations` times. This is called a Sample. From this we can derive
14 // the time it took to run a single iteration.
16 // - We repeat the previous step with a greater number of iterations to lower
17 // the impact of the measurement. We can derive a more precise estimation of the
18 // runtime for a single iteration.
20 // - Each sample gives a more accurate estimation of the runtime for a single
21 // iteration but also takes more time to run. We stop the process when:
22 // * The measure stabilize under a certain precision (Epsilon),
23 // * The overall benchmarking time is greater than MaxDuration,
24 // * The overall sample count is greater than MaxSamples,
25 // * The last sample used more than MaxIterations iterations.
27 // - We also makes sure that the benchmark doesn't run for a too short period of
28 // time by defining MinDuration and MinSamples.
30 #ifndef LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
31 #define LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H
33 #include "benchmark/benchmark.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/Optional.h"
36 #include "llvm/ADT/SmallVector.h"
43 namespace libc_benchmarks
{
45 using Duration
= std::chrono::duration
<double>;
47 enum class BenchmarkLog
{
48 None
, // Don't keep the internal state of the benchmark.
49 Last
, // Keep only the last batch.
50 Full
// Keep all iterations states, useful for testing or debugging.
53 // An object to configure the benchmark stopping conditions.
54 // See documentation at the beginning of the file for the overall algorithm and
55 // meaning of each field.
56 struct BenchmarkOptions
{
57 // The minimum time for which the benchmark is running.
58 Duration MinDuration
= std::chrono::seconds(0);
59 // The maximum time for which the benchmark is running.
60 Duration MaxDuration
= std::chrono::seconds(10);
61 // The number of iterations in the first sample.
62 uint32_t InitialIterations
= 1;
63 // The maximum number of iterations for any given sample.
64 uint32_t MaxIterations
= 10000000;
65 // The minimum number of samples.
66 uint32_t MinSamples
= 4;
67 // The maximum number of samples.
68 uint32_t MaxSamples
= 1000;
69 // The benchmark will stop if the relative difference between the current and
70 // the last estimation is less than epsilon. This is 1% by default.
71 double Epsilon
= 0.01;
72 // The number of iterations grows exponentially between each sample.
73 // Must be greater or equal to 1.
74 double ScalingFactor
= 1.4;
75 BenchmarkLog Log
= BenchmarkLog::None
;
78 // The state of a benchmark.
79 enum class BenchmarkStatus
{
87 // The internal state of the benchmark, useful to debug, test or report
89 struct BenchmarkState
{
90 size_t LastSampleIterations
;
91 Duration LastBatchElapsed
;
92 BenchmarkStatus CurrentStatus
;
93 Duration CurrentBestGuess
; // The time estimation for a single run of `foo`.
94 double ChangeRatio
; // The change in time estimation between previous and
98 // A lightweight result for a benchmark.
99 struct BenchmarkResult
{
100 BenchmarkStatus TerminationStatus
= BenchmarkStatus::Running
;
101 Duration BestGuess
= {};
102 llvm::Optional
<llvm::SmallVector
<BenchmarkState
, 16>> MaybeBenchmarkLog
;
105 // Stores information about a cache in the host memory system.
107 std::string Type
; // e.g. "Instruction", "Data", "Unified".
108 int Level
; // 0 is closest to processing unit.
109 int Size
; // In bytes.
110 int NumSharing
; // The number of processing units (Hyper-Threading Thread)
111 // with which this cache is shared.
114 // Stores information about the host.
116 std::string CpuName
; // returns a string compatible with the -march option.
117 double CpuFrequency
; // in Hertz.
118 std::vector
<CacheInfo
> Caches
;
120 static HostState
get();
126 size_t Iterations
= 0;
127 Duration Elapsed
= {};
130 // Updates the estimation of the elapsed time for a single iteration.
131 class RefinableRuntimeEstimation
{
132 Duration TotalTime
= {};
133 size_t TotalIterations
= 0;
136 Duration
update(const Measurement
&M
) {
137 assert(M
.Iterations
> 0);
138 // Duration is encoded as a double (see definition).
139 // `TotalTime` and `M.Elapsed` are of the same magnitude so we don't expect
140 // loss of precision due to radically different scales.
141 TotalTime
+= M
.Elapsed
;
142 TotalIterations
+= M
.Iterations
;
143 return TotalTime
/ TotalIterations
;
147 // This class tracks the progression of the runtime estimation.
148 class RuntimeEstimationProgression
{
149 RefinableRuntimeEstimation RRE
;
152 Duration CurrentEstimation
= {};
154 // Returns the change ratio between our best guess so far and the one from the
156 double computeImprovement(const Measurement
&M
) {
157 const Duration NewEstimation
= RRE
.update(M
);
158 const double Ratio
= fabs(((CurrentEstimation
/ NewEstimation
) - 1.0));
159 CurrentEstimation
= NewEstimation
;
164 } // namespace internal
166 // Measures the runtime of `foo` until conditions defined by `Options` are met.
168 // To avoid measurement's imprecisions we measure batches of `foo`.
169 // The batch size is growing by `ScalingFactor` to minimize the effect of
172 // Note: The benchmark is not responsible for serializing the executions of
173 // `foo`. It is not suitable for measuring, very small & side effect free
174 // functions, as the processor is free to execute several executions in
177 // - Options: A set of parameters controlling the stopping conditions for the
179 // - foo: The function under test. It takes one value and returns one value.
180 // The input value is used to randomize the execution of `foo` as part of a
181 // batch to mitigate the effect of the branch predictor. Signature:
182 // `ProductType foo(ParameterProvider::value_type value);`
183 // The output value is a product of the execution of `foo` and prevents the
184 // compiler from optimizing out foo's body.
185 // - ParameterProvider: An object responsible for providing a range of
186 // `Iterations` values to use as input for `foo`. The `value_type` of the
187 // returned container has to be compatible with `foo` argument.
188 // Must implement one of:
189 // `Container<ParameterType> generateBatch(size_t Iterations);`
190 // `const Container<ParameterType>& generateBatch(size_t Iterations);`
191 // - Clock: An object providing the current time. Must implement:
192 // `std::chrono::time_point now();`
193 template <typename Function
, typename ParameterProvider
,
194 typename BenchmarkClock
= const std::chrono::high_resolution_clock
>
195 BenchmarkResult
benchmark(const BenchmarkOptions
&Options
,
196 ParameterProvider
&PP
, Function foo
,
197 BenchmarkClock
&Clock
= BenchmarkClock()) {
198 BenchmarkResult Result
;
199 internal::RuntimeEstimationProgression REP
;
200 Duration TotalBenchmarkDuration
= {};
201 size_t Iterations
= std::max(Options
.InitialIterations
, uint32_t(1));
203 if (Options
.ScalingFactor
< 1.0)
204 report_fatal_error("ScalingFactor should be >= 1");
205 if (Options
.Log
!= BenchmarkLog::None
)
206 Result
.MaybeBenchmarkLog
.emplace();
208 // Request a new Batch of size `Iterations`.
209 const auto &Batch
= PP
.generateBatch(Iterations
);
211 // Measuring this Batch.
212 const auto StartTime
= Clock
.now();
213 for (const auto Parameter
: Batch
) {
214 const auto Production
= foo(Parameter
);
215 benchmark::DoNotOptimize(Production
);
217 const auto EndTime
= Clock
.now();
218 const Duration Elapsed
= EndTime
- StartTime
;
220 // Updating statistics.
222 TotalBenchmarkDuration
+= Elapsed
;
223 const double ChangeRatio
= REP
.computeImprovement({Iterations
, Elapsed
});
224 Result
.BestGuess
= REP
.CurrentEstimation
;
226 // Stopping condition.
227 if (TotalBenchmarkDuration
>= Options
.MinDuration
&&
228 Samples
>= Options
.MinSamples
&& ChangeRatio
< Options
.Epsilon
)
229 Result
.TerminationStatus
= BenchmarkStatus::PrecisionReached
;
230 else if (Samples
>= Options
.MaxSamples
)
231 Result
.TerminationStatus
= BenchmarkStatus::MaxSamplesReached
;
232 else if (TotalBenchmarkDuration
>= Options
.MaxDuration
)
233 Result
.TerminationStatus
= BenchmarkStatus::MaxDurationReached
;
234 else if (Iterations
>= Options
.MaxIterations
)
235 Result
.TerminationStatus
= BenchmarkStatus::MaxIterationsReached
;
237 if (Result
.MaybeBenchmarkLog
) {
238 auto &BenchmarkLog
= *Result
.MaybeBenchmarkLog
;
239 if (Options
.Log
== BenchmarkLog::Last
&& !BenchmarkLog
.empty())
240 BenchmarkLog
.pop_back();
242 BS
.LastSampleIterations
= Iterations
;
243 BS
.LastBatchElapsed
= Elapsed
;
244 BS
.CurrentStatus
= Result
.TerminationStatus
;
245 BS
.CurrentBestGuess
= Result
.BestGuess
;
246 BS
.ChangeRatio
= ChangeRatio
;
247 BenchmarkLog
.push_back(BS
);
250 if (Result
.TerminationStatus
!= BenchmarkStatus::Running
)
253 if (Options
.ScalingFactor
> 1 &&
254 Iterations
* Options
.ScalingFactor
== Iterations
)
256 "`Iterations *= ScalingFactor` is idempotent, increase ScalingFactor "
257 "or InitialIterations.");
259 Iterations
*= Options
.ScalingFactor
;
263 // Interprets `Array` as a circular buffer of `Size` elements.
264 template <typename T
> class CircularArrayRef
{
265 llvm::ArrayRef
<T
> Array
;
269 using value_type
= T
;
270 using reference
= T
&;
271 using const_reference
= const T
&;
272 using difference_type
= ssize_t
;
273 using size_type
= size_t;
276 : public std::iterator
<std::input_iterator_tag
, T
, ssize_t
> {
277 llvm::ArrayRef
<T
> Array
;
282 explicit const_iterator(llvm::ArrayRef
<T
> Array
, size_t Index
= 0)
283 : Array(Array
), Index(Index
), Offset(Index
% Array
.size()) {}
284 const_iterator
&operator++() {
287 if (Offset
== Array
.size())
291 bool operator==(const_iterator Other
) const { return Index
== Other
.Index
; }
292 bool operator!=(const_iterator Other
) const { return !(*this == Other
); }
293 const T
&operator*() const { return Array
[Offset
]; }
296 CircularArrayRef(llvm::ArrayRef
<T
> Array
, size_t Size
)
297 : Array(Array
), Size(Size
) {
298 assert(Array
.size() > 0);
301 const_iterator
begin() const { return const_iterator(Array
); }
302 const_iterator
end() const { return const_iterator(Array
, Size
); }
305 // A convenient helper to produce a CircularArrayRef from an ArrayRef.
306 template <typename T
>
307 CircularArrayRef
<T
> cycle(llvm::ArrayRef
<T
> Array
, size_t Size
) {
308 return {Array
, Size
};
311 // Creates an std::array which storage size is constrained under `Bytes`.
312 template <typename T
, size_t Bytes
>
313 using ByteConstrainedArray
= std::array
<T
, Bytes
/ sizeof(T
)>;
315 // A convenient helper to produce a CircularArrayRef from a
316 // ByteConstrainedArray.
317 template <typename T
, size_t N
>
318 CircularArrayRef
<T
> cycle(const std::array
<T
, N
> &Container
, size_t Size
) {
319 return {llvm::ArrayRef
<T
>(Container
.cbegin(), Container
.cend()), Size
};
322 // Makes sure the binary was compiled in release mode and that frequency
323 // governor is set on performance.
324 void checkRequirements();
326 } // namespace libc_benchmarks
329 #endif // LLVM_LIBC_UTILS_BENCHMARK_BENCHMARK_H