libc/benchmarks/libc-benchmark-analysis.py3

   1 """Reads JSON files produced by the benchmarking framework and renders them.
   2
   3 Installation:
   4 > apt-get install python3-pip
   5 > pip3 install matplotlib pandas seaborn
   6
   7 Run:
   8 > python3 libc/benchmarks/libc-benchmark-analysis.py3 <files>
   9 """
  10
  11 import argparse
  12 import json
  13 import pandas as pd
  14 import seaborn as sns
  15 import matplotlib.pyplot as plt
  16 from matplotlib.ticker import EngFormatter
  17
  18 def formatUnit(value, unit):
  19     return EngFormatter(unit, sep="").format_data(value)
  20
  21 def formatCache(cache):
  22   letter = cache["Type"][0].lower()
  23   level = cache["Level"]
  24   size = formatUnit(cache["Size"], "B")
  25   ways = cache["NumSharing"]
  26   return F'{letter}L{level}:{size}/{ways}'
  27
  28 def getCpuFrequency(study):
  29     return study["Runtime"]["Host"]["CpuFrequency"]
  30
  31 def getId(study):
  32     CpuName = study["Runtime"]["Host"]["CpuName"]
  33     CpuFrequency = formatUnit(getCpuFrequency(study), "Hz")
  34     Mode = " (Sweep)" if study["Configuration"]["IsSweepMode"] else ""
  35     CpuCaches = ", ".join(formatCache(c) for c in study["Runtime"]["Host"]["Caches"])
  36     return F'{CpuName} {CpuFrequency}{Mode}\n{CpuCaches}'
  37
  38 def getFunction(study):
  39     return study["Configuration"]["Function"]
  40
  41 def getLabel(study):
  42     return F'{getFunction(study)} {study["StudyName"]}'
  43
  44 def displaySweepData(id, studies, mode):
  45     df = None
  46     for study in studies:
  47         Measurements = study["Measurements"]
  48         SweepModeMaxSize = study["Configuration"]["SweepModeMaxSize"]
  49         NumSizes = SweepModeMaxSize + 1
  50         NumTrials = study["Configuration"]["NumTrials"]
  51         assert NumTrials * NumSizes  == len(Measurements), 'not a multiple of NumSizes'
  52         Index = pd.MultiIndex.from_product([range(NumSizes), range(NumTrials)], names=['size', 'trial'])
  53         if df is None:
  54             df = pd.DataFrame(Measurements, index=Index, columns=[getLabel(study)])
  55         else:
  56             df[getLabel(study)] = pd.Series(Measurements, index=Index)
  57     df = df.reset_index(level='trial', drop=True)
  58     if mode == "cycles":
  59         df *= getCpuFrequency(study)
  60     if mode == "bytespercycle":
  61         df *= getCpuFrequency(study)
  62         for col in df.columns:
  63             df[col] = pd.Series(data=df.index, index=df.index).divide(df[col])
  64     FormatterUnit = {"time":"s","cycles":"","bytespercycle":"B/cycle"}[mode]
  65     Label = {"time":"Time","cycles":"Cycles","bytespercycle":"Byte/cycle"}[mode]
  66     graph = sns.lineplot(data=df, palette="muted", ci=95)
  67     graph.set_title(id)
  68     graph.yaxis.set_major_formatter(EngFormatter(unit=FormatterUnit))
  69     graph.yaxis.set_label_text(Label)
  70     graph.xaxis.set_major_formatter(EngFormatter(unit="B"))
  71     graph.xaxis.set_label_text("Copy Size")
  72     _ = plt.xticks(rotation=90)
  73     plt.show()
  74
  75 def displayDistributionData(id, studies, mode):
  76     distributions = set()
  77     df = None
  78     for study in studies:
  79         distribution = study["Configuration"]["SizeDistributionName"]
  80         distributions.add(distribution)
  81         local = pd.DataFrame(study["Measurements"], columns=["time"])
  82         local["distribution"] = distribution
  83         local["label"] = getLabel(study)
  84         local["cycles"] = local["time"] * getCpuFrequency(study)
  85         if df is None:
  86             df = local
  87         else:
  88             df = df.append(local)
  89     if mode == "bytespercycle":
  90         mode = "time"
  91         print("`--mode=bytespercycle` is ignored for distribution mode reports")
  92     FormatterUnit = {"time":"s","cycles":""}[mode]
  93     Label = {"time":"Time","cycles":"Cycles"}[mode]
  94     graph = sns.violinplot(data=df, x="distribution", y=mode, palette="muted", hue="label", order=sorted(distributions))
  95     graph.set_title(id)
  96     graph.yaxis.set_major_formatter(EngFormatter(unit=FormatterUnit))
  97     graph.yaxis.set_label_text(Label)
  98     _ = plt.xticks(rotation=90)
  99     plt.show()
 100
 101
 102 def main():
 103     parser = argparse.ArgumentParser(description="Process benchmark json files.")
 104     parser.add_argument("--mode", choices=["time", "cycles", "bytespercycle"], default="time", help="Use to display either 'time', 'cycles' or 'bytes/cycle'.")
 105     parser.add_argument("files", nargs="+", help="The json files to read from.")
 106
 107     args = parser.parse_args()
 108     study_groups = dict()
 109     for file in args.files:
 110         with open(file) as json_file:
 111             json_obj = json.load(json_file)
 112             Id = getId(json_obj)
 113             if Id in study_groups:
 114                 study_groups[Id].append(json_obj)
 115             else:
 116                 study_groups[Id] = [json_obj]
 117
 118     plt.tight_layout()
 119     sns.set_theme(style="ticks")
 120     for id, study_collection in study_groups.items():
 121         if "(Sweep)" in id:
 122             displaySweepData(id, study_collection, args.mode)
 123         else:
 124             displayDistributionData(id, study_collection, args.mode)
 125
 126
 127 if __name__ == "__main__":
 128     main()