[NFC][Py Reformat] Added more commits to .git-blame-ignore-revs
[llvm-project.git] / libc / benchmarks / automemcpy / lib / ResultAnalyzer.cpp
blobb134f6c83a0dfe21fbe397aa6faf3f9b93fdb02d
1 //===-- Analyze benchmark JSON files --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This code analyzes the json file produced by the `automemcpy` binary.
9 //
10 // As a remainder, `automemcpy` will benchmark each autogenerated memory
11 // functions against one of the predefined distributions available in the
12 // `libc/benchmarks/distributions` folder.
14 // It works as follows:
15 // - Reads one or more json files.
16 // - If there are several runs for the same function and distribution, picks the
17 // median throughput (aka `BytesPerSecond`).
18 // - Aggregates the throughput per distributions and scores them from worst (0)
19 // to best (1).
20 // - Each distribution categorizes each function into one of the following
21 // categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
22 // BAD.
23 // - A process similar to the Majority Judgment voting system is used to `elect`
24 // the best function. The histogram of grades is returned so we can
25 // distinguish between functions with the same final grade. In the following
26 // example both functions grade EXCELLENT but we may prefer the second one.
28 // | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
29 // |------------|-----------|-----------|------|----------| ...
30 // | Function_1 | 7 | 1 | 2 | | ...
31 // | Function_2 | 6 | 4 | | | ...
33 #include "automemcpy/ResultAnalyzer.h"
34 #include "llvm/ADT/StringRef.h"
35 #include <numeric>
36 #include <unordered_map>
38 namespace llvm {
40 namespace automemcpy {
42 StringRef Grade::getString(const GradeEnum &GE) {
43 switch (GE) {
44 case EXCELLENT:
45 return "EXCELLENT";
46 case VERY_GOOD:
47 return "VERY_GOOD";
48 case GOOD:
49 return "GOOD";
50 case PASSABLE:
51 return "PASSABLE";
52 case INADEQUATE:
53 return "INADEQUATE";
54 case MEDIOCRE:
55 return "MEDIOCRE";
56 case BAD:
57 return "BAD";
58 case ARRAY_SIZE:
59 report_fatal_error("logic error");
63 Grade::GradeEnum Grade::judge(double Score) {
64 if (Score >= 6. / 7)
65 return EXCELLENT;
66 if (Score >= 5. / 7)
67 return VERY_GOOD;
68 if (Score >= 4. / 7)
69 return GOOD;
70 if (Score >= 3. / 7)
71 return PASSABLE;
72 if (Score >= 2. / 7)
73 return INADEQUATE;
74 if (Score >= 1. / 7)
75 return MEDIOCRE;
76 return BAD;
79 static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
80 const double SampleMean) {
81 assert(!Samples.empty());
82 if (Samples.size() == 1)
83 return 0;
84 double DiffSquaresSum = 0;
85 for (const double S : Samples) {
86 const double Diff = S - SampleMean;
87 DiffSquaresSum += Diff * Diff;
89 return DiffSquaresSum / (Samples.size() - 1);
92 static void processPerDistributionData(PerDistributionData &Data) {
93 auto &Samples = Data.BytesPerSecondSamples;
94 assert(!Samples.empty());
95 // Sample Mean
96 const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
97 Data.BytesPerSecondMean = Sum / Samples.size();
98 // Unbiased Sample Variance
99 Data.BytesPerSecondVariance =
100 computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
101 // Median
102 const size_t HalfSize = Samples.size() / 2;
103 std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
104 Data.BytesPerSecondMedian = Samples[HalfSize];
107 std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
108 std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
109 for (const auto &S : Samples) {
110 if (S.Type != SampleType::ITERATION)
111 break;
112 auto &Function = Functions[S.Id.Function];
113 auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
114 Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
117 std::vector<FunctionData> Output;
118 for (auto &[FunctionId, Function] : Functions) {
119 Function.Id = FunctionId;
120 for (auto &Pair : Function.PerDistributionData)
121 processPerDistributionData(Pair.second);
122 Output.push_back(std::move(Function));
124 return Output;
127 void fillScores(MutableArrayRef<FunctionData> Functions) {
128 // A key to bucket throughput per function type and distribution.
129 struct Key {
130 FunctionType Type;
131 StringRef Distribution;
133 COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
136 // Tracks minimum and maximum values.
137 struct MinMax {
138 double Min = std::numeric_limits<double>::max();
139 double Max = std::numeric_limits<double>::min();
140 void update(double Value) {
141 if (Value < Min)
142 Min = Value;
143 if (Value > Max)
144 Max = Value;
146 double normalize(double Value) const { return (Value - Min) / (Max - Min); }
149 std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
150 for (const auto &Function : Functions) {
151 const FunctionType Type = Function.Id.Type;
152 for (const auto &Pair : Function.PerDistributionData) {
153 const auto &Distribution = Pair.getKey();
154 const double Throughput = Pair.getValue().BytesPerSecondMedian;
155 const Key K{Type, Distribution};
156 ThroughputMinMax[K].update(Throughput);
160 for (auto &Function : Functions) {
161 const FunctionType Type = Function.Id.Type;
162 for (const auto &Pair : Function.PerDistributionData) {
163 const auto &Distribution = Pair.getKey();
164 const double Throughput = Pair.getValue().BytesPerSecondMedian;
165 const Key K{Type, Distribution};
166 Function.PerDistributionData[Distribution].Score =
167 ThroughputMinMax[K].normalize(Throughput);
172 void castVotes(MutableArrayRef<FunctionData> Functions) {
173 for (FunctionData &Function : Functions) {
174 Function.ScoresGeoMean = 1.0;
175 for (const auto &Pair : Function.PerDistributionData) {
176 const StringRef Distribution = Pair.getKey();
177 const double Score = Pair.getValue().Score;
178 Function.ScoresGeoMean *= Score;
179 const auto G = Grade::judge(Score);
180 ++(Function.GradeHisto[G]);
181 Function.PerDistributionData[Distribution].Grade = G;
185 for (FunctionData &Function : Functions) {
186 const auto &GradeHisto = Function.GradeHisto;
187 const size_t Votes =
188 std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
189 const size_t MedianVote = Votes / 2;
190 size_t CountedVotes = 0;
191 Grade::GradeEnum MedianGrade = Grade::BAD;
192 for (size_t I = 0; I < GradeHisto.size(); ++I) {
193 CountedVotes += GradeHisto[I];
194 if (CountedVotes > MedianVote) {
195 MedianGrade = Grade::GradeEnum(I);
196 break;
199 Function.FinalGrade = MedianGrade;
203 } // namespace automemcpy
204 } // namespace llvm