1 //===-- Analyze benchmark JSON files --------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This code analyzes the json file produced by the `automemcpy` binary.
10 // As a remainder, `automemcpy` will benchmark each autogenerated memory
11 // functions against one of the predefined distributions available in the
12 // `libc/benchmarks/distributions` folder.
14 // It works as follows:
15 // - Reads one or more json files.
16 // - If there are several runs for the same function and distribution, picks the
17 // median throughput (aka `BytesPerSecond`).
18 // - Aggregates the throughput per distributions and scores them from worst (0)
20 // - Each distribution categorizes each function into one of the following
21 // categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
23 // - A process similar to the Majority Judgment voting system is used to `elect`
24 // the best function. The histogram of grades is returned so we can
25 // distinguish between functions with the same final grade. In the following
26 // example both functions grade EXCELLENT but we may prefer the second one.
28 // | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
29 // |------------|-----------|-----------|------|----------| ...
30 // | Function_1 | 7 | 1 | 2 | | ...
31 // | Function_2 | 6 | 4 | | | ...
33 #include "automemcpy/ResultAnalyzer.h"
34 #include "llvm/ADT/StringRef.h"
36 #include <unordered_map>
40 namespace automemcpy
{
42 StringRef
Grade::getString(const GradeEnum
&GE
) {
59 report_fatal_error("logic error");
63 Grade::GradeEnum
Grade::judge(double Score
) {
79 static double computeUnbiasedSampleVariance(const std::vector
<double> &Samples
,
80 const double SampleMean
) {
81 assert(!Samples
.empty());
82 if (Samples
.size() == 1)
84 double DiffSquaresSum
= 0;
85 for (const double S
: Samples
) {
86 const double Diff
= S
- SampleMean
;
87 DiffSquaresSum
+= Diff
* Diff
;
89 return DiffSquaresSum
/ (Samples
.size() - 1);
92 static void processPerDistributionData(PerDistributionData
&Data
) {
93 auto &Samples
= Data
.BytesPerSecondSamples
;
94 assert(!Samples
.empty());
96 const double Sum
= std::accumulate(Samples
.begin(), Samples
.end(), 0.0);
97 Data
.BytesPerSecondMean
= Sum
/ Samples
.size();
98 // Unbiased Sample Variance
99 Data
.BytesPerSecondVariance
=
100 computeUnbiasedSampleVariance(Samples
, Data
.BytesPerSecondMean
);
102 const size_t HalfSize
= Samples
.size() / 2;
103 std::nth_element(Samples
.begin(), Samples
.begin() + HalfSize
, Samples
.end());
104 Data
.BytesPerSecondMedian
= Samples
[HalfSize
];
107 std::vector
<FunctionData
> getThroughputs(ArrayRef
<Sample
> Samples
) {
108 std::unordered_map
<FunctionId
, FunctionData
, FunctionId::Hasher
> Functions
;
109 for (const auto &S
: Samples
) {
110 if (S
.Type
!= SampleType::ITERATION
)
112 auto &Function
= Functions
[S
.Id
.Function
];
113 auto &Data
= Function
.PerDistributionData
[S
.Id
.Distribution
.Name
];
114 Data
.BytesPerSecondSamples
.push_back(S
.BytesPerSecond
);
117 std::vector
<FunctionData
> Output
;
118 for (auto &[FunctionId
, Function
] : Functions
) {
119 Function
.Id
= FunctionId
;
120 for (auto &Pair
: Function
.PerDistributionData
)
121 processPerDistributionData(Pair
.second
);
122 Output
.push_back(std::move(Function
));
127 void fillScores(MutableArrayRef
<FunctionData
> Functions
) {
128 // A key to bucket throughput per function type and distribution.
131 StringRef Distribution
;
133 COMPARABLE_AND_HASHABLE(Key
, Type
, Distribution
)
136 // Tracks minimum and maximum values.
138 double Min
= std::numeric_limits
<double>::max();
139 double Max
= std::numeric_limits
<double>::min();
140 void update(double Value
) {
146 double normalize(double Value
) const { return (Value
- Min
) / (Max
- Min
); }
149 std::unordered_map
<Key
, MinMax
, Key::Hasher
> ThroughputMinMax
;
150 for (const auto &Function
: Functions
) {
151 const FunctionType Type
= Function
.Id
.Type
;
152 for (const auto &Pair
: Function
.PerDistributionData
) {
153 const auto &Distribution
= Pair
.getKey();
154 const double Throughput
= Pair
.getValue().BytesPerSecondMedian
;
155 const Key K
{Type
, Distribution
};
156 ThroughputMinMax
[K
].update(Throughput
);
160 for (auto &Function
: Functions
) {
161 const FunctionType Type
= Function
.Id
.Type
;
162 for (const auto &Pair
: Function
.PerDistributionData
) {
163 const auto &Distribution
= Pair
.getKey();
164 const double Throughput
= Pair
.getValue().BytesPerSecondMedian
;
165 const Key K
{Type
, Distribution
};
166 Function
.PerDistributionData
[Distribution
].Score
=
167 ThroughputMinMax
[K
].normalize(Throughput
);
172 void castVotes(MutableArrayRef
<FunctionData
> Functions
) {
173 for (FunctionData
&Function
: Functions
) {
174 Function
.ScoresGeoMean
= 1.0;
175 for (const auto &Pair
: Function
.PerDistributionData
) {
176 const StringRef Distribution
= Pair
.getKey();
177 const double Score
= Pair
.getValue().Score
;
178 Function
.ScoresGeoMean
*= Score
;
179 const auto G
= Grade::judge(Score
);
180 ++(Function
.GradeHisto
[G
]);
181 Function
.PerDistributionData
[Distribution
].Grade
= G
;
185 for (FunctionData
&Function
: Functions
) {
186 const auto &GradeHisto
= Function
.GradeHisto
;
188 std::accumulate(GradeHisto
.begin(), GradeHisto
.end(), 0U);
189 const size_t MedianVote
= Votes
/ 2;
190 size_t CountedVotes
= 0;
191 Grade::GradeEnum MedianGrade
= Grade::BAD
;
192 for (size_t I
= 0; I
< GradeHisto
.size(); ++I
) {
193 CountedVotes
+= GradeHisto
[I
];
194 if (CountedVotes
> MedianVote
) {
195 MedianGrade
= Grade::GradeEnum(I
);
199 Function
.FinalGrade
= MedianGrade
;
203 } // namespace automemcpy