libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp

   1 //===-- Analyze benchmark JSON files --------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 // This code analyzes the json file produced by the `automemcpy` binary.
   9 //
  10 // As a remainder, `automemcpy` will benchmark each autogenerated memory
  11 // functions against one of the predefined distributions available in the
  12 // `libc/benchmarks/distributions` folder.
  13 //
  14 // It works as follows:
  15 // - Reads one or more json files.
  16 // - If there are several runs for the same function and distribution, picks the
  17 //   median throughput (aka `BytesPerSecond`).
  18 // - Aggregates the throughput per distributions and scores them from worst (0)
  19 //   to best (1).
  20 // - Each distribution categorizes each function into one of the following
  21 //   categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
  22 //   BAD.
  23 // - A process similar to the Majority Judgment voting system is used to `elect`
  24 //   the best function. The histogram of grades is returned so we can
  25 //   distinguish between functions with the same final grade. In the following
  26 //   example both functions grade EXCELLENT but we may prefer the second one.
  27 //
  28 //   |            | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
  29 //   |------------|-----------|-----------|------|----------| ...
  30 //   | Function_1 |     7     |     1     |   2  |          | ...
  31 //   | Function_2 |     6     |     4     |      |          | ...
  32
  33 #include "automemcpy/ResultAnalyzer.h"
  34 #include "llvm/ADT/StringRef.h"
  35 #include <numeric>
  36 #include <unordered_map>
  37
  38 namespace llvm {
  39
  40 namespace automemcpy {
  41
  42 StringRef Grade::getString(const GradeEnum &GE) {
  43   switch (GE) {
  44   case EXCELLENT:
  45     return "EXCELLENT";
  46   case VERY_GOOD:
  47     return "VERY_GOOD";
  48   case GOOD:
  49     return "GOOD";
  50   case PASSABLE:
  51     return "PASSABLE";
  52   case INADEQUATE:
  53     return "INADEQUATE";
  54   case MEDIOCRE:
  55     return "MEDIOCRE";
  56   case BAD:
  57     return "BAD";
  58   case ARRAY_SIZE:
  59     report_fatal_error("logic error");
  60   }
  61 }
  62
  63 Grade::GradeEnum Grade::judge(double Score) {
  64   if (Score >= 6. / 7)
  65     return EXCELLENT;
  66   if (Score >= 5. / 7)
  67     return VERY_GOOD;
  68   if (Score >= 4. / 7)
  69     return GOOD;
  70   if (Score >= 3. / 7)
  71     return PASSABLE;
  72   if (Score >= 2. / 7)
  73     return INADEQUATE;
  74   if (Score >= 1. / 7)
  75     return MEDIOCRE;
  76   return BAD;
  77 }
  78
  79 static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
  80                                             const double SampleMean) {
  81   assert(!Samples.empty());
  82   if (Samples.size() == 1)
  83     return 0;
  84   double DiffSquaresSum = 0;
  85   for (const double S : Samples) {
  86     const double Diff = S - SampleMean;
  87     DiffSquaresSum += Diff * Diff;
  88   }
  89   return DiffSquaresSum / (Samples.size() - 1);
  90 }
  91
  92 static void processPerDistributionData(PerDistributionData &Data) {
  93   auto &Samples = Data.BytesPerSecondSamples;
  94   assert(!Samples.empty());
  95   // Sample Mean
  96   const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
  97   Data.BytesPerSecondMean = Sum / Samples.size();
  98   // Unbiased Sample Variance
  99   Data.BytesPerSecondVariance =
 100       computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
 101   // Median
 102   const size_t HalfSize = Samples.size() / 2;
 103   std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
 104   Data.BytesPerSecondMedian = Samples[HalfSize];
 105 }
 106
 107 std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
 108   std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
 109   for (const auto &S : Samples) {
 110     if (S.Type != SampleType::ITERATION)
 111       break;
 112     auto &Function = Functions[S.Id.Function];
 113     auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
 114     Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
 115   }
 116
 117   std::vector<FunctionData> Output;
 118   for (auto &[FunctionId, Function] : Functions) {
 119     Function.Id = FunctionId;
 120     for (auto &Pair : Function.PerDistributionData)
 121       processPerDistributionData(Pair.second);
 122     Output.push_back(std::move(Function));
 123   }
 124   return Output;
 125 }
 126
 127 void fillScores(MutableArrayRef<FunctionData> Functions) {
 128   // A key to bucket throughput per function type and distribution.
 129   struct Key {
 130     FunctionType Type;
 131     StringRef Distribution;
 132
 133     COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
 134   };
 135
 136   // Tracks minimum and maximum values.
 137   struct MinMax {
 138     double Min = std::numeric_limits<double>::max();
 139     double Max = std::numeric_limits<double>::min();
 140     void update(double Value) {
 141       if (Value < Min)
 142         Min = Value;
 143       if (Value > Max)
 144         Max = Value;
 145     }
 146     double normalize(double Value) const { return (Value - Min) / (Max - Min); }
 147   };
 148
 149   std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
 150   for (const auto &Function : Functions) {
 151     const FunctionType Type = Function.Id.Type;
 152     for (const auto &Pair : Function.PerDistributionData) {
 153       const auto &Distribution = Pair.getKey();
 154       const double Throughput = Pair.getValue().BytesPerSecondMedian;
 155       const Key K{Type, Distribution};
 156       ThroughputMinMax[K].update(Throughput);
 157     }
 158   }
 159
 160   for (auto &Function : Functions) {
 161     const FunctionType Type = Function.Id.Type;
 162     for (const auto &Pair : Function.PerDistributionData) {
 163       const auto &Distribution = Pair.getKey();
 164       const double Throughput = Pair.getValue().BytesPerSecondMedian;
 165       const Key K{Type, Distribution};
 166       Function.PerDistributionData[Distribution].Score =
 167           ThroughputMinMax[K].normalize(Throughput);
 168     }
 169   }
 170 }
 171
 172 void castVotes(MutableArrayRef<FunctionData> Functions) {
 173   for (FunctionData &Function : Functions) {
 174     Function.ScoresGeoMean = 1.0;
 175     for (const auto &Pair : Function.PerDistributionData) {
 176       const StringRef Distribution = Pair.getKey();
 177       const double Score = Pair.getValue().Score;
 178       Function.ScoresGeoMean *= Score;
 179       const auto G = Grade::judge(Score);
 180       ++(Function.GradeHisto[G]);
 181       Function.PerDistributionData[Distribution].Grade = G;
 182     }
 183   }
 184
 185   for (FunctionData &Function : Functions) {
 186     const auto &GradeHisto = Function.GradeHisto;
 187     const size_t Votes =
 188         std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
 189     const size_t MedianVote = Votes / 2;
 190     size_t CountedVotes = 0;
 191     Grade::GradeEnum MedianGrade = Grade::BAD;
 192     for (size_t I = 0; I < GradeHisto.size(); ++I) {
 193       CountedVotes += GradeHisto[I];
 194       if (CountedVotes > MedianVote) {
 195         MedianGrade = Grade::GradeEnum(I);
 196         break;
 197       }
 198     }
 199     Function.FinalGrade = MedianGrade;
 200   }
 201 }
 202
 203 } // namespace automemcpy
 204 } // namespace llvm