[flang][openacc] Support early return in acc.loop (#73841)
[llvm-project.git] / third-party / benchmark / src / complexity.cc
blob825c57394a8ca3142d1190f4ee7f58daad75ae91
1 // Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 // Source project : https://github.com/ismaelJimenez/cpp.leastsq
16 // Adapted to be used with google benchmark
18 #include "complexity.h"
20 #include <algorithm>
21 #include <cmath>
23 #include "benchmark/benchmark.h"
24 #include "check.h"
26 namespace benchmark {
28 // Internal function to calculate the different scalability forms
29 BigOFunc* FittingCurve(BigO complexity) {
30 static const double kLog2E = 1.44269504088896340736;
31 switch (complexity) {
32 case oN:
33 return [](IterationCount n) -> double { return static_cast<double>(n); };
34 case oNSquared:
35 return [](IterationCount n) -> double { return std::pow(n, 2); };
36 case oNCubed:
37 return [](IterationCount n) -> double { return std::pow(n, 3); };
38 case oLogN:
39 /* Note: can't use log2 because Android's GNU STL lacks it */
40 return
41 [](IterationCount n) { return kLog2E * log(static_cast<double>(n)); };
42 case oNLogN:
43 /* Note: can't use log2 because Android's GNU STL lacks it */
44 return [](IterationCount n) {
45 return kLog2E * n * log(static_cast<double>(n));
47 case o1:
48 default:
49 return [](IterationCount) { return 1.0; };
53 // Function to return an string for the calculated complexity
54 std::string GetBigOString(BigO complexity) {
55 switch (complexity) {
56 case oN:
57 return "N";
58 case oNSquared:
59 return "N^2";
60 case oNCubed:
61 return "N^3";
62 case oLogN:
63 return "lgN";
64 case oNLogN:
65 return "NlgN";
66 case o1:
67 return "(1)";
68 default:
69 return "f(N)";
73 // Find the coefficient for the high-order term in the running time, by
74 // minimizing the sum of squares of relative error, for the fitting curve
75 // given by the lambda expression.
76 // - n : Vector containing the size of the benchmark tests.
77 // - time : Vector containing the times for the benchmark tests.
78 // - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };).
80 // For a deeper explanation on the algorithm logic, please refer to
81 // https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics
83 LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
84 const std::vector<double>& time,
85 BigOFunc* fitting_curve) {
86 double sigma_gn_squared = 0.0;
87 double sigma_time = 0.0;
88 double sigma_time_gn = 0.0;
90 // Calculate least square fitting parameter
91 for (size_t i = 0; i < n.size(); ++i) {
92 double gn_i = fitting_curve(n[i]);
93 sigma_gn_squared += gn_i * gn_i;
94 sigma_time += time[i];
95 sigma_time_gn += time[i] * gn_i;
98 LeastSq result;
99 result.complexity = oLambda;
101 // Calculate complexity.
102 result.coef = sigma_time_gn / sigma_gn_squared;
104 // Calculate RMS
105 double rms = 0.0;
106 for (size_t i = 0; i < n.size(); ++i) {
107 double fit = result.coef * fitting_curve(n[i]);
108 rms += pow((time[i] - fit), 2);
111 // Normalized RMS by the mean of the observed values
112 double mean = sigma_time / n.size();
113 result.rms = sqrt(rms / n.size()) / mean;
115 return result;
118 // Find the coefficient for the high-order term in the running time, by
119 // minimizing the sum of squares of relative error.
120 // - n : Vector containing the size of the benchmark tests.
121 // - time : Vector containing the times for the benchmark tests.
122 // - complexity : If different than oAuto, the fitting curve will stick to
123 // this one. If it is oAuto, it will be calculated the best
124 // fitting curve.
125 LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
126 const std::vector<double>& time, const BigO complexity) {
127 BM_CHECK_EQ(n.size(), time.size());
128 BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
129 // benchmark runs are given
130 BM_CHECK_NE(complexity, oNone);
132 LeastSq best_fit;
134 if (complexity == oAuto) {
135 std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed};
137 // Take o1 as default best fitting curve
138 best_fit = MinimalLeastSq(n, time, FittingCurve(o1));
139 best_fit.complexity = o1;
141 // Compute all possible fitting curves and stick to the best one
142 for (const auto& fit : fit_curves) {
143 LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit));
144 if (current_fit.rms < best_fit.rms) {
145 best_fit = current_fit;
146 best_fit.complexity = fit;
149 } else {
150 best_fit = MinimalLeastSq(n, time, FittingCurve(complexity));
151 best_fit.complexity = complexity;
154 return best_fit;
157 std::vector<BenchmarkReporter::Run> ComputeBigO(
158 const std::vector<BenchmarkReporter::Run>& reports) {
159 typedef BenchmarkReporter::Run Run;
160 std::vector<Run> results;
162 if (reports.size() < 2) return results;
164 // Accumulators.
165 std::vector<int64_t> n;
166 std::vector<double> real_time;
167 std::vector<double> cpu_time;
169 // Populate the accumulators.
170 for (const Run& run : reports) {
171 BM_CHECK_GT(run.complexity_n, 0)
172 << "Did you forget to call SetComplexityN?";
173 n.push_back(run.complexity_n);
174 real_time.push_back(run.real_accumulated_time / run.iterations);
175 cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
178 LeastSq result_cpu;
179 LeastSq result_real;
181 if (reports[0].complexity == oLambda) {
182 result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
183 result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
184 } else {
185 result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
186 result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
189 // Drop the 'args' when reporting complexity.
190 auto run_name = reports[0].run_name;
191 run_name.args.clear();
193 // Get the data from the accumulator to BenchmarkReporter::Run's.
194 Run big_o;
195 big_o.run_name = run_name;
196 big_o.family_index = reports[0].family_index;
197 big_o.per_family_instance_index = reports[0].per_family_instance_index;
198 big_o.run_type = BenchmarkReporter::Run::RT_Aggregate;
199 big_o.repetitions = reports[0].repetitions;
200 big_o.repetition_index = Run::no_repetition_index;
201 big_o.threads = reports[0].threads;
202 big_o.aggregate_name = "BigO";
203 big_o.aggregate_unit = StatisticUnit::kTime;
204 big_o.report_label = reports[0].report_label;
205 big_o.iterations = 0;
206 big_o.real_accumulated_time = result_real.coef;
207 big_o.cpu_accumulated_time = result_cpu.coef;
208 big_o.report_big_o = true;
209 big_o.complexity = result_cpu.complexity;
211 // All the time results are reported after being multiplied by the
212 // time unit multiplier. But since RMS is a relative quantity it
213 // should not be multiplied at all. So, here, we _divide_ it by the
214 // multiplier so that when it is multiplied later the result is the
215 // correct one.
216 double multiplier = GetTimeUnitMultiplier(reports[0].time_unit);
218 // Only add label to mean/stddev if it is same for all runs
219 Run rms;
220 rms.run_name = run_name;
221 rms.family_index = reports[0].family_index;
222 rms.per_family_instance_index = reports[0].per_family_instance_index;
223 rms.run_type = BenchmarkReporter::Run::RT_Aggregate;
224 rms.aggregate_name = "RMS";
225 rms.aggregate_unit = StatisticUnit::kPercentage;
226 rms.report_label = big_o.report_label;
227 rms.iterations = 0;
228 rms.repetition_index = Run::no_repetition_index;
229 rms.repetitions = reports[0].repetitions;
230 rms.threads = reports[0].threads;
231 rms.real_accumulated_time = result_real.rms / multiplier;
232 rms.cpu_accumulated_time = result_cpu.rms / multiplier;
233 rms.report_rms = true;
234 rms.complexity = result_cpu.complexity;
235 // don't forget to keep the time unit, or we won't be able to
236 // recover the correct value.
237 rms.time_unit = reports[0].time_unit;
239 results.push_back(big_o);
240 results.push_back(rms);
241 return results;
244 } // end namespace benchmark