third-party/benchmark/tools/gbench/report.py

   1 # type: ignore
   2
   3 """
   4 report.py - Utilities for reporting statistics about benchmark results
   5 """
   6
   7 import copy
   8 import os
   9 import random
  10 import re
  11 import unittest
  12
  13 from numpy import array
  14 from scipy.stats import gmean, mannwhitneyu
  15
  16
  17 class BenchmarkColor(object):
  18     def __init__(self, name, code):
  19         self.name = name
  20         self.code = code
  21
  22     def __repr__(self):
  23         return "%s%r" % (self.__class__.__name__, (self.name, self.code))
  24
  25     def __format__(self, format):
  26         return self.code
  27
  28
  29 # Benchmark Colors Enumeration
  30 BC_NONE = BenchmarkColor("NONE", "")
  31 BC_MAGENTA = BenchmarkColor("MAGENTA", "\033[95m")
  32 BC_CYAN = BenchmarkColor("CYAN", "\033[96m")
  33 BC_OKBLUE = BenchmarkColor("OKBLUE", "\033[94m")
  34 BC_OKGREEN = BenchmarkColor("OKGREEN", "\033[32m")
  35 BC_HEADER = BenchmarkColor("HEADER", "\033[92m")
  36 BC_WARNING = BenchmarkColor("WARNING", "\033[93m")
  37 BC_WHITE = BenchmarkColor("WHITE", "\033[97m")
  38 BC_FAIL = BenchmarkColor("FAIL", "\033[91m")
  39 BC_ENDC = BenchmarkColor("ENDC", "\033[0m")
  40 BC_BOLD = BenchmarkColor("BOLD", "\033[1m")
  41 BC_UNDERLINE = BenchmarkColor("UNDERLINE", "\033[4m")
  42
  43 UTEST_MIN_REPETITIONS = 2
  44 UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
  45 UTEST_COL_NAME = "_pvalue"
  46
  47 _TIME_UNIT_TO_SECONDS_MULTIPLIER = {
  48     "s": 1.0,
  49     "ms": 1e-3,
  50     "us": 1e-6,
  51     "ns": 1e-9,
  52 }
  53
  54
  55 def color_format(use_color, fmt_str, *args, **kwargs):
  56     """
  57     Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
  58     'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
  59     is False then all color codes in 'args' and 'kwargs' are replaced with
  60     the empty string.
  61     """
  62     assert use_color is True or use_color is False
  63     if not use_color:
  64         args = [
  65             arg if not isinstance(arg, BenchmarkColor) else BC_NONE
  66             for arg in args
  67         ]
  68         kwargs = {
  69             key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
  70             for key, arg in kwargs.items()
  71         }
  72     return fmt_str.format(*args, **kwargs)
  73
  74
  75 def find_longest_name(benchmark_list):
  76     """
  77     Return the length of the longest benchmark name in a given list of
  78     benchmark JSON objects
  79     """
  80     longest_name = 1
  81     for bc in benchmark_list:
  82         if len(bc["name"]) > longest_name:
  83             longest_name = len(bc["name"])
  84     return longest_name
  85
  86
  87 def calculate_change(old_val, new_val):
  88     """
  89     Return a float representing the decimal change between old_val and new_val.
  90     """
  91     if old_val == 0 and new_val == 0:
  92         return 0.0
  93     if old_val == 0:
  94         return float(new_val - old_val) / (float(old_val + new_val) / 2)
  95     return float(new_val - old_val) / abs(old_val)
  96
  97
  98 def filter_benchmark(json_orig, family, replacement=""):
  99     """
 100     Apply a filter to the json, and only leave the 'family' of benchmarks.
 101     """
 102     regex = re.compile(family)
 103     filtered = {}
 104     filtered["benchmarks"] = []
 105     for be in json_orig["benchmarks"]:
 106         if not regex.search(be["name"]):
 107             continue
 108         filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
 109         filteredbench["name"] = regex.sub(replacement, filteredbench["name"])
 110         filtered["benchmarks"].append(filteredbench)
 111     return filtered
 112
 113
 114 def get_unique_benchmark_names(json):
 115     """
 116     While *keeping* the order, give all the unique 'names' used for benchmarks.
 117     """
 118     seen = set()
 119     uniqued = [
 120         x["name"]
 121         for x in json["benchmarks"]
 122         if x["name"] not in seen and (seen.add(x["name"]) or True)
 123     ]
 124     return uniqued
 125
 126
 127 def intersect(list1, list2):
 128     """
 129     Given two lists, get a new list consisting of the elements only contained
 130     in *both of the input lists*, while preserving the ordering.
 131     """
 132     return [x for x in list1 if x in list2]
 133
 134
 135 def is_potentially_comparable_benchmark(x):
 136     return "time_unit" in x and "real_time" in x and "cpu_time" in x
 137
 138
 139 def partition_benchmarks(json1, json2):
 140     """
 141     While preserving the ordering, find benchmarks with the same names in
 142     both of the inputs, and group them.
 143     (i.e. partition/filter into groups with common name)
 144     """
 145     json1_unique_names = get_unique_benchmark_names(json1)
 146     json2_unique_names = get_unique_benchmark_names(json2)
 147     names = intersect(json1_unique_names, json2_unique_names)
 148     partitions = []
 149     for name in names:
 150         time_unit = None
 151         # Pick the time unit from the first entry of the lhs benchmark.
 152         # We should be careful not to crash with unexpected input.
 153         for x in json1["benchmarks"]:
 154             if x["name"] == name and is_potentially_comparable_benchmark(x):
 155                 time_unit = x["time_unit"]
 156                 break
 157         if time_unit is None:
 158             continue
 159         # Filter by name and time unit.
 160         # All the repetitions are assumed to be comparable.
 161         lhs = [
 162             x
 163             for x in json1["benchmarks"]
 164             if x["name"] == name and x["time_unit"] == time_unit
 165         ]
 166         rhs = [
 167             x
 168             for x in json2["benchmarks"]
 169             if x["name"] == name and x["time_unit"] == time_unit
 170         ]
 171         partitions.append([lhs, rhs])
 172     return partitions
 173
 174
 175 def get_timedelta_field_as_seconds(benchmark, field_name):
 176     """
 177     Get value of field_name field of benchmark, which is time with time unit
 178     time_unit, as time in seconds.
 179     """
 180     timedelta = benchmark[field_name]
 181     time_unit = benchmark.get("time_unit", "s")
 182     return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit)
 183
 184
 185 def calculate_geomean(json):
 186     """
 187     Extract all real/cpu times from all the benchmarks as seconds,
 188     and calculate their geomean.
 189     """
 190     times = []
 191     for benchmark in json["benchmarks"]:
 192         if "run_type" in benchmark and benchmark["run_type"] == "aggregate":
 193             continue
 194         times.append(
 195             [
 196                 get_timedelta_field_as_seconds(benchmark, "real_time"),
 197                 get_timedelta_field_as_seconds(benchmark, "cpu_time"),
 198             ]
 199         )
 200     return gmean(times) if times else array([])
 201
 202
 203 def extract_field(partition, field_name):
 204     # The count of elements may be different. We want *all* of them.
 205     lhs = [x[field_name] for x in partition[0]]
 206     rhs = [x[field_name] for x in partition[1]]
 207     return [lhs, rhs]
 208
 209
 210 def calc_utest(timings_cpu, timings_time):
 211     min_rep_cnt = min(
 212         len(timings_time[0]),
 213         len(timings_time[1]),
 214         len(timings_cpu[0]),
 215         len(timings_cpu[1]),
 216     )
 217
 218     # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
 219     if min_rep_cnt < UTEST_MIN_REPETITIONS:
 220         return False, None, None
 221
 222     time_pvalue = mannwhitneyu(
 223         timings_time[0], timings_time[1], alternative="two-sided"
 224     ).pvalue
 225     cpu_pvalue = mannwhitneyu(
 226         timings_cpu[0], timings_cpu[1], alternative="two-sided"
 227     ).pvalue
 228
 229     return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
 230
 231
 232 def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
 233     def get_utest_color(pval):
 234         return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
 235
 236     # Check if we failed miserably with minimum required repetitions for utest
 237     if (
 238         not utest["have_optimal_repetitions"]
 239         and utest["cpu_pvalue"] is None
 240         and utest["time_pvalue"] is None
 241     ):
 242         return []
 243
 244     dsc = "U Test, Repetitions: {} vs {}".format(
 245         utest["nr_of_repetitions"], utest["nr_of_repetitions_other"]
 246     )
 247     dsc_color = BC_OKGREEN
 248
 249     # We still got some results to show but issue a warning about it.
 250     if not utest["have_optimal_repetitions"]:
 251         dsc_color = BC_WARNING
 252         dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
 253             UTEST_OPTIMAL_REPETITIONS
 254         )
 255
 256     special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
 257
 258     return [
 259         color_format(
 260             use_color,
 261             special_str,
 262             BC_HEADER,
 263             "{}{}".format(bc_name, UTEST_COL_NAME),
 264             first_col_width,
 265             get_utest_color(utest["time_pvalue"]),
 266             utest["time_pvalue"],
 267             get_utest_color(utest["cpu_pvalue"]),
 268             utest["cpu_pvalue"],
 269             dsc_color,
 270             dsc,
 271             endc=BC_ENDC,
 272         )
 273     ]
 274
 275
 276 def get_difference_report(json1, json2, utest=False):
 277     """
 278     Calculate and report the difference between each test of two benchmarks
 279     runs specified as 'json1' and 'json2'. Output is another json containing
 280     relevant details for each test run.
 281     """
 282     assert utest is True or utest is False
 283
 284     diff_report = []
 285     partitions = partition_benchmarks(json1, json2)
 286     for partition in partitions:
 287         benchmark_name = partition[0][0]["name"]
 288         label = partition[0][0]["label"] if "label" in partition[0][0] else ""
 289         time_unit = partition[0][0]["time_unit"]
 290         measurements = []
 291         utest_results = {}
 292         # Careful, we may have different repetition count.
 293         for i in range(min(len(partition[0]), len(partition[1]))):
 294             bn = partition[0][i]
 295             other_bench = partition[1][i]
 296             measurements.append(
 297                 {
 298                     "real_time": bn["real_time"],
 299                     "cpu_time": bn["cpu_time"],
 300                     "real_time_other": other_bench["real_time"],
 301                     "cpu_time_other": other_bench["cpu_time"],
 302                     "time": calculate_change(
 303                         bn["real_time"], other_bench["real_time"]
 304                     ),
 305                     "cpu": calculate_change(
 306                         bn["cpu_time"], other_bench["cpu_time"]
 307                     ),
 308                 }
 309             )
 310
 311         # After processing the whole partition, if requested, do the U test.
 312         if utest:
 313             timings_cpu = extract_field(partition, "cpu_time")
 314             timings_time = extract_field(partition, "real_time")
 315             have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
 316                 timings_cpu, timings_time
 317             )
 318             if cpu_pvalue is not None and time_pvalue is not None:
 319                 utest_results = {
 320                     "have_optimal_repetitions": have_optimal_repetitions,
 321                     "cpu_pvalue": cpu_pvalue,
 322                     "time_pvalue": time_pvalue,
 323                     "nr_of_repetitions": len(timings_cpu[0]),
 324                     "nr_of_repetitions_other": len(timings_cpu[1]),
 325                 }
 326
 327         # Store only if we had any measurements for given benchmark.
 328         # E.g. partition_benchmarks will filter out the benchmarks having
 329         # time units which are not compatible with other time units in the
 330         # benchmark suite.
 331         if measurements:
 332             run_type = (
 333                 partition[0][0]["run_type"]
 334                 if "run_type" in partition[0][0]
 335                 else ""
 336             )
 337             aggregate_name = (
 338                 partition[0][0]["aggregate_name"]
 339                 if run_type == "aggregate"
 340                 and "aggregate_name" in partition[0][0]
 341                 else ""
 342             )
 343             diff_report.append(
 344                 {
 345                     "name": benchmark_name,
 346                     "label": label,
 347                     "measurements": measurements,
 348                     "time_unit": time_unit,
 349                     "run_type": run_type,
 350                     "aggregate_name": aggregate_name,
 351                     "utest": utest_results,
 352                 }
 353             )
 354
 355     lhs_gmean = calculate_geomean(json1)
 356     rhs_gmean = calculate_geomean(json2)
 357     if lhs_gmean.any() and rhs_gmean.any():
 358         diff_report.append(
 359             {
 360                 "name": "OVERALL_GEOMEAN",
 361                 "label": "",
 362                 "measurements": [
 363                     {
 364                         "real_time": lhs_gmean[0],
 365                         "cpu_time": lhs_gmean[1],
 366                         "real_time_other": rhs_gmean[0],
 367                         "cpu_time_other": rhs_gmean[1],
 368                         "time": calculate_change(lhs_gmean[0], rhs_gmean[0]),
 369                         "cpu": calculate_change(lhs_gmean[1], rhs_gmean[1]),
 370                     }
 371                 ],
 372                 "time_unit": "s",
 373                 "run_type": "aggregate",
 374                 "aggregate_name": "geomean",
 375                 "utest": {},
 376             }
 377         )
 378
 379     return diff_report
 380
 381
 382 def print_difference_report(
 383     json_diff_report,
 384     include_aggregates_only=False,
 385     utest=False,
 386     utest_alpha=0.05,
 387     use_color=True,
 388 ):
 389     """
 390     Calculate and report the difference between each test of two benchmarks
 391     runs specified as 'json1' and 'json2'.
 392     """
 393     assert utest is True or utest is False
 394
 395     def get_color(res):
 396         if res > 0.05:
 397             return BC_FAIL
 398         elif res > -0.07:
 399             return BC_WHITE
 400         else:
 401             return BC_CYAN
 402
 403     first_col_width = find_longest_name(json_diff_report)
 404     first_col_width = max(first_col_width, len("Benchmark"))
 405     first_col_width += len(UTEST_COL_NAME)
 406     first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
 407         "Benchmark", 12 + first_col_width
 408     )
 409     output_strs = [first_line, "-" * len(first_line)]
 410
 411     fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
 412     for benchmark in json_diff_report:
 413         # *If* we were asked to only include aggregates,
 414         # and if it is non-aggregate, then don't print it.
 415         if (
 416             not include_aggregates_only
 417             or "run_type" not in benchmark
 418             or benchmark["run_type"] == "aggregate"
 419         ):
 420             for measurement in benchmark["measurements"]:
 421                 output_strs += [
 422                     color_format(
 423                         use_color,
 424                         fmt_str,
 425                         BC_HEADER,
 426                         benchmark["name"],
 427                         first_col_width,
 428                         get_color(measurement["time"]),
 429                         measurement["time"],
 430                         get_color(measurement["cpu"]),
 431                         measurement["cpu"],
 432                         measurement["real_time"],
 433                         measurement["real_time_other"],
 434                         measurement["cpu_time"],
 435                         measurement["cpu_time_other"],
 436                         endc=BC_ENDC,
 437                     )
 438                 ]
 439
 440         # After processing the measurements, if requested and
 441         # if applicable (e.g. u-test exists for given benchmark),
 442         # print the U test.
 443         if utest and benchmark["utest"]:
 444             output_strs += print_utest(
 445                 benchmark["name"],
 446                 benchmark["utest"],
 447                 utest_alpha=utest_alpha,
 448                 first_col_width=first_col_width,
 449                 use_color=use_color,
 450             )
 451
 452     return output_strs
 453
 454
 455 ###############################################################################
 456 # Unit tests
 457
 458
 459 class TestGetUniqueBenchmarkNames(unittest.TestCase):
 460     def load_results(self):
 461         import json
 462
 463         testInputs = os.path.join(
 464             os.path.dirname(os.path.realpath(__file__)), "Inputs"
 465         )
 466         testOutput = os.path.join(testInputs, "test3_run0.json")
 467         with open(testOutput, "r") as f:
 468             json = json.load(f)
 469         return json
 470
 471     def test_basic(self):
 472         expect_lines = [
 473             "BM_One",
 474             "BM_Two",
 475             "short",  # These two are not sorted
 476             "medium",  # These two are not sorted
 477         ]
 478         json = self.load_results()
 479         output_lines = get_unique_benchmark_names(json)
 480         print("\n")
 481         print("\n".join(output_lines))
 482         self.assertEqual(len(output_lines), len(expect_lines))
 483         for i in range(0, len(output_lines)):
 484             self.assertEqual(expect_lines[i], output_lines[i])
 485
 486
 487 class TestReportDifference(unittest.TestCase):
 488     @classmethod
 489     def setUpClass(cls):
 490         def load_results():
 491             import json
 492
 493             testInputs = os.path.join(
 494                 os.path.dirname(os.path.realpath(__file__)), "Inputs"
 495             )
 496             testOutput1 = os.path.join(testInputs, "test1_run1.json")
 497             testOutput2 = os.path.join(testInputs, "test1_run2.json")
 498             with open(testOutput1, "r") as f:
 499                 json1 = json.load(f)
 500             with open(testOutput2, "r") as f:
 501                 json2 = json.load(f)
 502             return json1, json2
 503
 504         json1, json2 = load_results()
 505         cls.json_diff_report = get_difference_report(json1, json2)
 506
 507     def test_json_diff_report_pretty_printing(self):
 508         expect_lines = [
 509             ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"],
 510             ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"],
 511             ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"],
 512             [
 513                 "BM_1PercentFaster",
 514                 "-0.0100",
 515                 "-0.0100",
 516                 "100",
 517                 "99",
 518                 "100",
 519                 "99",
 520             ],
 521             [
 522                 "BM_1PercentSlower",
 523                 "+0.0100",
 524                 "+0.0100",
 525                 "100",
 526                 "101",
 527                 "100",
 528                 "101",
 529             ],
 530             [
 531                 "BM_10PercentFaster",
 532                 "-0.1000",
 533                 "-0.1000",
 534                 "100",
 535                 "90",
 536                 "100",
 537                 "90",
 538             ],
 539             [
 540                 "BM_10PercentSlower",
 541                 "+0.1000",
 542                 "+0.1000",
 543                 "100",
 544                 "110",
 545                 "100",
 546                 "110",
 547             ],
 548             [
 549                 "BM_100xSlower",
 550                 "+99.0000",
 551                 "+99.0000",
 552                 "100",
 553                 "10000",
 554                 "100",
 555                 "10000",
 556             ],
 557             [
 558                 "BM_100xFaster",
 559                 "-0.9900",
 560                 "-0.9900",
 561                 "10000",
 562                 "100",
 563                 "10000",
 564                 "100",
 565             ],
 566             [
 567                 "BM_10PercentCPUToTime",
 568                 "+0.1000",
 569                 "-0.1000",
 570                 "100",
 571                 "110",
 572                 "100",
 573                 "90",
 574             ],
 575             ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"],
 576             ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"],
 577             ["BM_hasLabel", "+0.0000", "+0.0000", "1", "1", "1", "1"],
 578             ["OVERALL_GEOMEAN", "-0.8113", "-0.7779", "0", "0", "0", "0"],
 579         ]
 580         output_lines_with_header = print_difference_report(
 581             self.json_diff_report, use_color=False
 582         )
 583         output_lines = output_lines_with_header[2:]
 584         print("\n")
 585         print("\n".join(output_lines_with_header))
 586         self.assertEqual(len(output_lines), len(expect_lines))
 587         for i in range(0, len(output_lines)):
 588             parts = [x for x in output_lines[i].split(" ") if x]
 589             self.assertEqual(len(parts), 7)
 590             self.assertEqual(expect_lines[i], parts)
 591
 592     def test_json_diff_report_output(self):
 593         expected_output = [
 594             {
 595                 "name": "BM_SameTimes",
 596                 "label": "",
 597                 "measurements": [
 598                     {
 599                         "time": 0.0000,
 600                         "cpu": 0.0000,
 601                         "real_time": 10,
 602                         "real_time_other": 10,
 603                         "cpu_time": 10,
 604                         "cpu_time_other": 10,
 605                     }
 606                 ],
 607                 "time_unit": "ns",
 608                 "utest": {},
 609             },
 610             {
 611                 "name": "BM_2xFaster",
 612                 "label": "",
 613                 "measurements": [
 614                     {
 615                         "time": -0.5000,
 616                         "cpu": -0.5000,
 617                         "real_time": 50,
 618                         "real_time_other": 25,
 619                         "cpu_time": 50,
 620                         "cpu_time_other": 25,
 621                     }
 622                 ],
 623                 "time_unit": "ns",
 624                 "utest": {},
 625             },
 626             {
 627                 "name": "BM_2xSlower",
 628                 "label": "",
 629                 "measurements": [
 630                     {
 631                         "time": 1.0000,
 632                         "cpu": 1.0000,
 633                         "real_time": 50,
 634                         "real_time_other": 100,
 635                         "cpu_time": 50,
 636                         "cpu_time_other": 100,
 637                     }
 638                 ],
 639                 "time_unit": "ns",
 640                 "utest": {},
 641             },
 642             {
 643                 "name": "BM_1PercentFaster",
 644                 "label": "",
 645                 "measurements": [
 646                     {
 647                         "time": -0.0100,
 648                         "cpu": -0.0100,
 649                         "real_time": 100,
 650                         "real_time_other": 98.9999999,
 651                         "cpu_time": 100,
 652                         "cpu_time_other": 98.9999999,
 653                     }
 654                 ],
 655                 "time_unit": "ns",
 656                 "utest": {},
 657             },
 658             {
 659                 "name": "BM_1PercentSlower",
 660                 "label": "",
 661                 "measurements": [
 662                     {
 663                         "time": 0.0100,
 664                         "cpu": 0.0100,
 665                         "real_time": 100,
 666                         "real_time_other": 101,
 667                         "cpu_time": 100,
 668                         "cpu_time_other": 101,
 669                     }
 670                 ],
 671                 "time_unit": "ns",
 672                 "utest": {},
 673             },
 674             {
 675                 "name": "BM_10PercentFaster",
 676                 "label": "",
 677                 "measurements": [
 678                     {
 679                         "time": -0.1000,
 680                         "cpu": -0.1000,
 681                         "real_time": 100,
 682                         "real_time_other": 90,
 683                         "cpu_time": 100,
 684                         "cpu_time_other": 90,
 685                     }
 686                 ],
 687                 "time_unit": "ns",
 688                 "utest": {},
 689             },
 690             {
 691                 "name": "BM_10PercentSlower",
 692                 "label": "",
 693                 "measurements": [
 694                     {
 695                         "time": 0.1000,
 696                         "cpu": 0.1000,
 697                         "real_time": 100,
 698                         "real_time_other": 110,
 699                         "cpu_time": 100,
 700                         "cpu_time_other": 110,
 701                     }
 702                 ],
 703                 "time_unit": "ns",
 704                 "utest": {},
 705             },
 706             {
 707                 "name": "BM_100xSlower",
 708                 "label": "",
 709                 "measurements": [
 710                     {
 711                         "time": 99.0000,
 712                         "cpu": 99.0000,
 713                         "real_time": 100,
 714                         "real_time_other": 10000,
 715                         "cpu_time": 100,
 716                         "cpu_time_other": 10000,
 717                     }
 718                 ],
 719                 "time_unit": "ns",
 720                 "utest": {},
 721             },
 722             {
 723                 "name": "BM_100xFaster",
 724                 "label": "",
 725                 "measurements": [
 726                     {
 727                         "time": -0.9900,
 728                         "cpu": -0.9900,
 729                         "real_time": 10000,
 730                         "real_time_other": 100,
 731                         "cpu_time": 10000,
 732                         "cpu_time_other": 100,
 733                     }
 734                 ],
 735                 "time_unit": "ns",
 736                 "utest": {},
 737             },
 738             {
 739                 "name": "BM_10PercentCPUToTime",
 740                 "label": "",
 741                 "measurements": [
 742                     {
 743                         "time": 0.1000,
 744                         "cpu": -0.1000,
 745                         "real_time": 100,
 746                         "real_time_other": 110,
 747                         "cpu_time": 100,
 748                         "cpu_time_other": 90,
 749                     }
 750                 ],
 751                 "time_unit": "ns",
 752                 "utest": {},
 753             },
 754             {
 755                 "name": "BM_ThirdFaster",
 756                 "label": "",
 757                 "measurements": [
 758                     {
 759                         "time": -0.3333,
 760                         "cpu": -0.3334,
 761                         "real_time": 100,
 762                         "real_time_other": 67,
 763                         "cpu_time": 100,
 764                         "cpu_time_other": 67,
 765                     }
 766                 ],
 767                 "time_unit": "ns",
 768                 "utest": {},
 769             },
 770             {
 771                 "name": "BM_NotBadTimeUnit",
 772                 "label": "",
 773                 "measurements": [
 774                     {
 775                         "time": -0.9000,
 776                         "cpu": 0.2000,
 777                         "real_time": 0.4,
 778                         "real_time_other": 0.04,
 779                         "cpu_time": 0.5,
 780                         "cpu_time_other": 0.6,
 781                     }
 782                 ],
 783                 "time_unit": "s",
 784                 "utest": {},
 785             },
 786             {
 787                 "name": "BM_hasLabel",
 788                 "label": "a label",
 789                 "measurements": [
 790                     {
 791                         "time": 0.0000,
 792                         "cpu": 0.0000,
 793                         "real_time": 1,
 794                         "real_time_other": 1,
 795                         "cpu_time": 1,
 796                         "cpu_time_other": 1,
 797                     }
 798                 ],
 799                 "time_unit": "s",
 800                 "utest": {},
 801             },
 802             {
 803                 "name": "OVERALL_GEOMEAN",
 804                 "label": "",
 805                 "measurements": [
 806                     {
 807                         "real_time": 3.1622776601683826e-06,
 808                         "cpu_time": 3.2130844755623912e-06,
 809                         "real_time_other": 1.9768988699420897e-07,
 810                         "cpu_time_other": 2.397447755209533e-07,
 811                         "time": -0.8112976497120911,
 812                         "cpu": -0.7778551721181174,
 813                     }
 814                 ],
 815                 "time_unit": "s",
 816                 "run_type": "aggregate",
 817                 "aggregate_name": "geomean",
 818                 "utest": {},
 819             },
 820         ]
 821         self.assertEqual(len(self.json_diff_report), len(expected_output))
 822         for out, expected in zip(self.json_diff_report, expected_output):
 823             self.assertEqual(out["name"], expected["name"])
 824             self.assertEqual(out["label"], expected["label"])
 825             self.assertEqual(out["time_unit"], expected["time_unit"])
 826             assert_utest(self, out, expected)
 827             assert_measurements(self, out, expected)
 828
 829
 830 class TestReportDifferenceBetweenFamilies(unittest.TestCase):
 831     @classmethod
 832     def setUpClass(cls):
 833         def load_result():
 834             import json
 835
 836             testInputs = os.path.join(
 837                 os.path.dirname(os.path.realpath(__file__)), "Inputs"
 838             )
 839             testOutput = os.path.join(testInputs, "test2_run.json")
 840             with open(testOutput, "r") as f:
 841                 json = json.load(f)
 842             return json
 843
 844         json = load_result()
 845         json1 = filter_benchmark(json, "BM_Z.ro", ".")
 846         json2 = filter_benchmark(json, "BM_O.e", ".")
 847         cls.json_diff_report = get_difference_report(json1, json2)
 848
 849     def test_json_diff_report_pretty_printing(self):
 850         expect_lines = [
 851             [".", "-0.5000", "-0.5000", "10", "5", "10", "5"],
 852             ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"],
 853             ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"],
 854             ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"],
 855             ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"],
 856         ]
 857         output_lines_with_header = print_difference_report(
 858             self.json_diff_report, use_color=False
 859         )
 860         output_lines = output_lines_with_header[2:]
 861         print("\n")
 862         print("\n".join(output_lines_with_header))
 863         self.assertEqual(len(output_lines), len(expect_lines))
 864         for i in range(0, len(output_lines)):
 865             parts = [x for x in output_lines[i].split(" ") if x]
 866             self.assertEqual(len(parts), 7)
 867             self.assertEqual(expect_lines[i], parts)
 868
 869     def test_json_diff_report(self):
 870         expected_output = [
 871             {
 872                 "name": ".",
 873                 "measurements": [
 874                     {
 875                         "time": -0.5,
 876                         "cpu": -0.5,
 877                         "real_time": 10,
 878                         "real_time_other": 5,
 879                         "cpu_time": 10,
 880                         "cpu_time_other": 5,
 881                     }
 882                 ],
 883                 "time_unit": "ns",
 884                 "utest": {},
 885             },
 886             {
 887                 "name": "./4",
 888                 "measurements": [
 889                     {
 890                         "time": -0.5,
 891                         "cpu": -0.5,
 892                         "real_time": 40,
 893                         "real_time_other": 20,
 894                         "cpu_time": 40,
 895                         "cpu_time_other": 20,
 896                     }
 897                 ],
 898                 "time_unit": "ns",
 899                 "utest": {},
 900             },
 901             {
 902                 "name": "Prefix/.",
 903                 "measurements": [
 904                     {
 905                         "time": -0.5,
 906                         "cpu": -0.5,
 907                         "real_time": 20,
 908                         "real_time_other": 10,
 909                         "cpu_time": 20,
 910                         "cpu_time_other": 10,
 911                     }
 912                 ],
 913                 "time_unit": "ns",
 914                 "utest": {},
 915             },
 916             {
 917                 "name": "Prefix/./3",
 918                 "measurements": [
 919                     {
 920                         "time": -0.5,
 921                         "cpu": -0.5,
 922                         "real_time": 30,
 923                         "real_time_other": 15,
 924                         "cpu_time": 30,
 925                         "cpu_time_other": 15,
 926                     }
 927                 ],
 928                 "time_unit": "ns",
 929                 "utest": {},
 930             },
 931             {
 932                 "name": "OVERALL_GEOMEAN",
 933                 "measurements": [
 934                     {
 935                         "real_time": 2.213363839400641e-08,
 936                         "cpu_time": 2.213363839400641e-08,
 937                         "real_time_other": 1.1066819197003185e-08,
 938                         "cpu_time_other": 1.1066819197003185e-08,
 939                         "time": -0.5000000000000009,
 940                         "cpu": -0.5000000000000009,
 941                     }
 942                 ],
 943                 "time_unit": "s",
 944                 "run_type": "aggregate",
 945                 "aggregate_name": "geomean",
 946                 "utest": {},
 947             },
 948         ]
 949         self.assertEqual(len(self.json_diff_report), len(expected_output))
 950         for out, expected in zip(self.json_diff_report, expected_output):
 951             self.assertEqual(out["name"], expected["name"])
 952             self.assertEqual(out["time_unit"], expected["time_unit"])
 953             assert_utest(self, out, expected)
 954             assert_measurements(self, out, expected)
 955
 956
 957 class TestReportDifferenceWithUTest(unittest.TestCase):
 958     @classmethod
 959     def setUpClass(cls):
 960         def load_results():
 961             import json
 962
 963             testInputs = os.path.join(
 964                 os.path.dirname(os.path.realpath(__file__)), "Inputs"
 965             )
 966             testOutput1 = os.path.join(testInputs, "test3_run0.json")
 967             testOutput2 = os.path.join(testInputs, "test3_run1.json")
 968             with open(testOutput1, "r") as f:
 969                 json1 = json.load(f)
 970             with open(testOutput2, "r") as f:
 971                 json2 = json.load(f)
 972             return json1, json2
 973
 974         json1, json2 = load_results()
 975         cls.json_diff_report = get_difference_report(json1, json2, utest=True)
 976
 977     def test_json_diff_report_pretty_printing(self):
 978         expect_lines = [
 979             ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
 980             ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
 981             ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
 982             [
 983                 "BM_Two_pvalue",
 984                 "1.0000",
 985                 "0.6667",
 986                 "U",
 987                 "Test,",
 988                 "Repetitions:",
 989                 "2",
 990                 "vs",
 991                 "2.",
 992                 "WARNING:",
 993                 "Results",
 994                 "unreliable!",
 995                 "9+",
 996                 "repetitions",
 997                 "recommended.",
 998             ],
 999             ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
1000             ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
1001             [
1002                 "short_pvalue",
1003                 "0.7671",
1004                 "0.2000",
1005                 "U",
1006                 "Test,",
1007                 "Repetitions:",
1008                 "2",
1009                 "vs",
1010                 "3.",
1011                 "WARNING:",
1012                 "Results",
1013                 "unreliable!",
1014                 "9+",
1015                 "repetitions",
1016                 "recommended.",
1017             ],
1018             ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
1019             ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
1020         ]
1021         output_lines_with_header = print_difference_report(
1022             self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1023         )
1024         output_lines = output_lines_with_header[2:]
1025         print("\n")
1026         print("\n".join(output_lines_with_header))
1027         self.assertEqual(len(output_lines), len(expect_lines))
1028         for i in range(0, len(output_lines)):
1029             parts = [x for x in output_lines[i].split(" ") if x]
1030             self.assertEqual(expect_lines[i], parts)
1031
1032     def test_json_diff_report_pretty_printing_aggregates_only(self):
1033         expect_lines = [
1034             ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
1035             [
1036                 "BM_Two_pvalue",
1037                 "1.0000",
1038                 "0.6667",
1039                 "U",
1040                 "Test,",
1041                 "Repetitions:",
1042                 "2",
1043                 "vs",
1044                 "2.",
1045                 "WARNING:",
1046                 "Results",
1047                 "unreliable!",
1048                 "9+",
1049                 "repetitions",
1050                 "recommended.",
1051             ],
1052             ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
1053             ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
1054             [
1055                 "short_pvalue",
1056                 "0.7671",
1057                 "0.2000",
1058                 "U",
1059                 "Test,",
1060                 "Repetitions:",
1061                 "2",
1062                 "vs",
1063                 "3.",
1064                 "WARNING:",
1065                 "Results",
1066                 "unreliable!",
1067                 "9+",
1068                 "repetitions",
1069                 "recommended.",
1070             ],
1071             ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
1072         ]
1073         output_lines_with_header = print_difference_report(
1074             self.json_diff_report,
1075             include_aggregates_only=True,
1076             utest=True,
1077             utest_alpha=0.05,
1078             use_color=False,
1079         )
1080         output_lines = output_lines_with_header[2:]
1081         print("\n")
1082         print("\n".join(output_lines_with_header))
1083         self.assertEqual(len(output_lines), len(expect_lines))
1084         for i in range(0, len(output_lines)):
1085             parts = [x for x in output_lines[i].split(" ") if x]
1086             self.assertEqual(expect_lines[i], parts)
1087
1088     def test_json_diff_report(self):
1089         expected_output = [
1090             {
1091                 "name": "BM_One",
1092                 "measurements": [
1093                     {
1094                         "time": -0.1,
1095                         "cpu": 0.1,
1096                         "real_time": 10,
1097                         "real_time_other": 9,
1098                         "cpu_time": 100,
1099                         "cpu_time_other": 110,
1100                     }
1101                 ],
1102                 "time_unit": "ns",
1103                 "utest": {},
1104             },
1105             {
1106                 "name": "BM_Two",
1107                 "measurements": [
1108                     {
1109                         "time": 0.1111111111111111,
1110                         "cpu": -0.011111111111111112,
1111                         "real_time": 9,
1112                         "real_time_other": 10,
1113                         "cpu_time": 90,
1114                         "cpu_time_other": 89,
1115                     },
1116                     {
1117                         "time": -0.125,
1118                         "cpu": -0.16279069767441862,
1119                         "real_time": 8,
1120                         "real_time_other": 7,
1121                         "cpu_time": 86,
1122                         "cpu_time_other": 72,
1123                     },
1124                 ],
1125                 "time_unit": "ns",
1126                 "utest": {
1127                     "have_optimal_repetitions": False,
1128                     "cpu_pvalue": 0.6666666666666666,
1129                     "time_pvalue": 1.0,
1130                 },
1131             },
1132             {
1133                 "name": "short",
1134                 "measurements": [
1135                     {
1136                         "time": -0.125,
1137                         "cpu": -0.0625,
1138                         "real_time": 8,
1139                         "real_time_other": 7,
1140                         "cpu_time": 80,
1141                         "cpu_time_other": 75,
1142                     },
1143                     {
1144                         "time": -0.4325,
1145                         "cpu": -0.13506493506493514,
1146                         "real_time": 8,
1147                         "real_time_other": 4.54,
1148                         "cpu_time": 77,
1149                         "cpu_time_other": 66.6,
1150                     },
1151                 ],
1152                 "time_unit": "ns",
1153                 "utest": {
1154                     "have_optimal_repetitions": False,
1155                     "cpu_pvalue": 0.2,
1156                     "time_pvalue": 0.7670968684102772,
1157                 },
1158             },
1159             {
1160                 "name": "medium",
1161                 "measurements": [
1162                     {
1163                         "time": -0.375,
1164                         "cpu": -0.3375,
1165                         "real_time": 8,
1166                         "real_time_other": 5,
1167                         "cpu_time": 80,
1168                         "cpu_time_other": 53,
1169                     }
1170                 ],
1171                 "time_unit": "ns",
1172                 "utest": {},
1173             },
1174             {
1175                 "name": "OVERALL_GEOMEAN",
1176                 "measurements": [
1177                     {
1178                         "real_time": 8.48528137423858e-09,
1179                         "cpu_time": 8.441336246629233e-08,
1180                         "real_time_other": 2.2405267593145244e-08,
1181                         "cpu_time_other": 2.5453661413660466e-08,
1182                         "time": 1.6404861082353634,
1183                         "cpu": -0.6984640740519662,
1184                     }
1185                 ],
1186                 "time_unit": "s",
1187                 "run_type": "aggregate",
1188                 "aggregate_name": "geomean",
1189                 "utest": {},
1190             },
1191         ]
1192         self.assertEqual(len(self.json_diff_report), len(expected_output))
1193         for out, expected in zip(self.json_diff_report, expected_output):
1194             self.assertEqual(out["name"], expected["name"])
1195             self.assertEqual(out["time_unit"], expected["time_unit"])
1196             assert_utest(self, out, expected)
1197             assert_measurements(self, out, expected)
1198
1199
1200 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
1201     unittest.TestCase
1202 ):
1203     @classmethod
1204     def setUpClass(cls):
1205         def load_results():
1206             import json
1207
1208             testInputs = os.path.join(
1209                 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1210             )
1211             testOutput1 = os.path.join(testInputs, "test3_run0.json")
1212             testOutput2 = os.path.join(testInputs, "test3_run1.json")
1213             with open(testOutput1, "r") as f:
1214                 json1 = json.load(f)
1215             with open(testOutput2, "r") as f:
1216                 json2 = json.load(f)
1217             return json1, json2
1218
1219         json1, json2 = load_results()
1220         cls.json_diff_report = get_difference_report(json1, json2, utest=True)
1221
1222     def test_json_diff_report_pretty_printing(self):
1223         expect_lines = [
1224             ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
1225             ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
1226             ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
1227             [
1228                 "BM_Two_pvalue",
1229                 "1.0000",
1230                 "0.6667",
1231                 "U",
1232                 "Test,",
1233                 "Repetitions:",
1234                 "2",
1235                 "vs",
1236                 "2.",
1237                 "WARNING:",
1238                 "Results",
1239                 "unreliable!",
1240                 "9+",
1241                 "repetitions",
1242                 "recommended.",
1243             ],
1244             ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
1245             ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
1246             [
1247                 "short_pvalue",
1248                 "0.7671",
1249                 "0.2000",
1250                 "U",
1251                 "Test,",
1252                 "Repetitions:",
1253                 "2",
1254                 "vs",
1255                 "3.",
1256                 "WARNING:",
1257                 "Results",
1258                 "unreliable!",
1259                 "9+",
1260                 "repetitions",
1261                 "recommended.",
1262             ],
1263             ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
1264             ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
1265         ]
1266         output_lines_with_header = print_difference_report(
1267             self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1268         )
1269         output_lines = output_lines_with_header[2:]
1270         print("\n")
1271         print("\n".join(output_lines_with_header))
1272         self.assertEqual(len(output_lines), len(expect_lines))
1273         for i in range(0, len(output_lines)):
1274             parts = [x for x in output_lines[i].split(" ") if x]
1275             self.assertEqual(expect_lines[i], parts)
1276
1277     def test_json_diff_report(self):
1278         expected_output = [
1279             {
1280                 "name": "BM_One",
1281                 "measurements": [
1282                     {
1283                         "time": -0.1,
1284                         "cpu": 0.1,
1285                         "real_time": 10,
1286                         "real_time_other": 9,
1287                         "cpu_time": 100,
1288                         "cpu_time_other": 110,
1289                     }
1290                 ],
1291                 "time_unit": "ns",
1292                 "utest": {},
1293             },
1294             {
1295                 "name": "BM_Two",
1296                 "measurements": [
1297                     {
1298                         "time": 0.1111111111111111,
1299                         "cpu": -0.011111111111111112,
1300                         "real_time": 9,
1301                         "real_time_other": 10,
1302                         "cpu_time": 90,
1303                         "cpu_time_other": 89,
1304                     },
1305                     {
1306                         "time": -0.125,
1307                         "cpu": -0.16279069767441862,
1308                         "real_time": 8,
1309                         "real_time_other": 7,
1310                         "cpu_time": 86,
1311                         "cpu_time_other": 72,
1312                     },
1313                 ],
1314                 "time_unit": "ns",
1315                 "utest": {
1316                     "have_optimal_repetitions": False,
1317                     "cpu_pvalue": 0.6666666666666666,
1318                     "time_pvalue": 1.0,
1319                 },
1320             },
1321             {
1322                 "name": "short",
1323                 "measurements": [
1324                     {
1325                         "time": -0.125,
1326                         "cpu": -0.0625,
1327                         "real_time": 8,
1328                         "real_time_other": 7,
1329                         "cpu_time": 80,
1330                         "cpu_time_other": 75,
1331                     },
1332                     {
1333                         "time": -0.4325,
1334                         "cpu": -0.13506493506493514,
1335                         "real_time": 8,
1336                         "real_time_other": 4.54,
1337                         "cpu_time": 77,
1338                         "cpu_time_other": 66.6,
1339                     },
1340                 ],
1341                 "time_unit": "ns",
1342                 "utest": {
1343                     "have_optimal_repetitions": False,
1344                     "cpu_pvalue": 0.2,
1345                     "time_pvalue": 0.7670968684102772,
1346                 },
1347             },
1348             {
1349                 "name": "medium",
1350                 "measurements": [
1351                     {
1352                         "real_time_other": 5,
1353                         "cpu_time": 80,
1354                         "time": -0.375,
1355                         "real_time": 8,
1356                         "cpu_time_other": 53,
1357                         "cpu": -0.3375,
1358                     }
1359                 ],
1360                 "utest": {},
1361                 "time_unit": "ns",
1362                 "aggregate_name": "",
1363             },
1364             {
1365                 "name": "OVERALL_GEOMEAN",
1366                 "measurements": [
1367                     {
1368                         "real_time": 8.48528137423858e-09,
1369                         "cpu_time": 8.441336246629233e-08,
1370                         "real_time_other": 2.2405267593145244e-08,
1371                         "cpu_time_other": 2.5453661413660466e-08,
1372                         "time": 1.6404861082353634,
1373                         "cpu": -0.6984640740519662,
1374                     }
1375                 ],
1376                 "time_unit": "s",
1377                 "run_type": "aggregate",
1378                 "aggregate_name": "geomean",
1379                 "utest": {},
1380             },
1381         ]
1382         self.assertEqual(len(self.json_diff_report), len(expected_output))
1383         for out, expected in zip(self.json_diff_report, expected_output):
1384             self.assertEqual(out["name"], expected["name"])
1385             self.assertEqual(out["time_unit"], expected["time_unit"])
1386             assert_utest(self, out, expected)
1387             assert_measurements(self, out, expected)
1388
1389
1390 class TestReportDifferenceForPercentageAggregates(unittest.TestCase):
1391     @classmethod
1392     def setUpClass(cls):
1393         def load_results():
1394             import json
1395
1396             testInputs = os.path.join(
1397                 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1398             )
1399             testOutput1 = os.path.join(testInputs, "test4_run0.json")
1400             testOutput2 = os.path.join(testInputs, "test4_run1.json")
1401             with open(testOutput1, "r") as f:
1402                 json1 = json.load(f)
1403             with open(testOutput2, "r") as f:
1404                 json2 = json.load(f)
1405             return json1, json2
1406
1407         json1, json2 = load_results()
1408         cls.json_diff_report = get_difference_report(json1, json2, utest=True)
1409
1410     def test_json_diff_report_pretty_printing(self):
1411         expect_lines = [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]]
1412         output_lines_with_header = print_difference_report(
1413             self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1414         )
1415         output_lines = output_lines_with_header[2:]
1416         print("\n")
1417         print("\n".join(output_lines_with_header))
1418         self.assertEqual(len(output_lines), len(expect_lines))
1419         for i in range(0, len(output_lines)):
1420             parts = [x for x in output_lines[i].split(" ") if x]
1421             self.assertEqual(expect_lines[i], parts)
1422
1423     def test_json_diff_report(self):
1424         expected_output = [
1425             {
1426                 "name": "whocares",
1427                 "measurements": [
1428                     {
1429                         "time": -0.5,
1430                         "cpu": 0.5,
1431                         "real_time": 0.01,
1432                         "real_time_other": 0.005,
1433                         "cpu_time": 0.10,
1434                         "cpu_time_other": 0.15,
1435                     }
1436                 ],
1437                 "time_unit": "ns",
1438                 "utest": {},
1439             }
1440         ]
1441         self.assertEqual(len(self.json_diff_report), len(expected_output))
1442         for out, expected in zip(self.json_diff_report, expected_output):
1443             self.assertEqual(out["name"], expected["name"])
1444             self.assertEqual(out["time_unit"], expected["time_unit"])
1445             assert_utest(self, out, expected)
1446             assert_measurements(self, out, expected)
1447
1448
1449 class TestReportSorting(unittest.TestCase):
1450     @classmethod
1451     def setUpClass(cls):
1452         def load_result():
1453             import json
1454
1455             testInputs = os.path.join(
1456                 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1457             )
1458             testOutput = os.path.join(testInputs, "test4_run.json")
1459             with open(testOutput, "r") as f:
1460                 json = json.load(f)
1461             return json
1462
1463         cls.json = load_result()
1464
1465     def test_json_diff_report_pretty_printing(self):
1466         import util
1467
1468         expected_names = [
1469             "99 family 0 instance 0 repetition 0",
1470             "98 family 0 instance 0 repetition 1",
1471             "97 family 0 instance 0 aggregate",
1472             "96 family 0 instance 1 repetition 0",
1473             "95 family 0 instance 1 repetition 1",
1474             "94 family 0 instance 1 aggregate",
1475             "93 family 1 instance 0 repetition 0",
1476             "92 family 1 instance 0 repetition 1",
1477             "91 family 1 instance 0 aggregate",
1478             "90 family 1 instance 1 repetition 0",
1479             "89 family 1 instance 1 repetition 1",
1480             "88 family 1 instance 1 aggregate",
1481         ]
1482
1483         for n in range(len(self.json["benchmarks"]) ** 2):
1484             random.shuffle(self.json["benchmarks"])
1485             sorted_benchmarks = util.sort_benchmark_results(self.json)[
1486                 "benchmarks"
1487             ]
1488             self.assertEqual(len(expected_names), len(sorted_benchmarks))
1489             for out, expected in zip(sorted_benchmarks, expected_names):
1490                 self.assertEqual(out["name"], expected)
1491
1492
1493 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly2(
1494     unittest.TestCase
1495 ):
1496     @classmethod
1497     def setUpClass(cls):
1498         def load_results():
1499             import json
1500
1501             testInputs = os.path.join(
1502                 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1503             )
1504             testOutput1 = os.path.join(testInputs, "test5_run0.json")
1505             testOutput2 = os.path.join(testInputs, "test5_run1.json")
1506             with open(testOutput1, "r") as f:
1507                 json1 = json.load(f)
1508                 json1["benchmarks"] = [
1509                     json1["benchmarks"][0] for i in range(1000)
1510                 ]
1511             with open(testOutput2, "r") as f:
1512                 json2 = json.load(f)
1513                 json2["benchmarks"] = [
1514                     json2["benchmarks"][0] for i in range(1000)
1515                 ]
1516             return json1, json2
1517
1518         json1, json2 = load_results()
1519         cls.json_diff_report = get_difference_report(json1, json2, utest=True)
1520
1521     def test_json_diff_report_pretty_printing(self):
1522         expect_line = [
1523             "BM_ManyRepetitions_pvalue",
1524             "0.0000",
1525             "0.0000",
1526             "U",
1527             "Test,",
1528             "Repetitions:",
1529             "1000",
1530             "vs",
1531             "1000",
1532         ]
1533         output_lines_with_header = print_difference_report(
1534             self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1535         )
1536         output_lines = output_lines_with_header[2:]
1537         found = False
1538         for i in range(0, len(output_lines)):
1539             parts = [x for x in output_lines[i].split(" ") if x]
1540             found = expect_line == parts
1541             if found:
1542                 break
1543         self.assertTrue(found)
1544
1545     def test_json_diff_report(self):
1546         expected_output = [
1547             {
1548                 "name": "BM_ManyRepetitions",
1549                 "label": "",
1550                 "time_unit": "s",
1551                 "run_type": "",
1552                 "aggregate_name": "",
1553                 "utest": {
1554                     "have_optimal_repetitions": True,
1555                     "cpu_pvalue": 0.0,
1556                     "time_pvalue": 0.0,
1557                     "nr_of_repetitions": 1000,
1558                     "nr_of_repetitions_other": 1000,
1559                 },
1560             },
1561             {
1562                 "name": "OVERALL_GEOMEAN",
1563                 "label": "",
1564                 "measurements": [
1565                     {
1566                         "real_time": 1.0,
1567                         "cpu_time": 1000.000000000069,
1568                         "real_time_other": 1000.000000000069,
1569                         "cpu_time_other": 1.0,
1570                         "time": 999.000000000069,
1571                         "cpu": -0.9990000000000001,
1572                     }
1573                 ],
1574                 "time_unit": "s",
1575                 "run_type": "aggregate",
1576                 "aggregate_name": "geomean",
1577                 "utest": {},
1578             },
1579         ]
1580         self.assertEqual(len(self.json_diff_report), len(expected_output))
1581         for out, expected in zip(self.json_diff_report, expected_output):
1582             self.assertEqual(out["name"], expected["name"])
1583             self.assertEqual(out["time_unit"], expected["time_unit"])
1584             assert_utest(self, out, expected)
1585
1586
1587 def assert_utest(unittest_instance, lhs, rhs):
1588     if lhs["utest"]:
1589         unittest_instance.assertAlmostEqual(
1590             lhs["utest"]["cpu_pvalue"], rhs["utest"]["cpu_pvalue"]
1591         )
1592         unittest_instance.assertAlmostEqual(
1593             lhs["utest"]["time_pvalue"], rhs["utest"]["time_pvalue"]
1594         )
1595         unittest_instance.assertEqual(
1596             lhs["utest"]["have_optimal_repetitions"],
1597             rhs["utest"]["have_optimal_repetitions"],
1598         )
1599     else:
1600         # lhs is empty. assert if rhs is not.
1601         unittest_instance.assertEqual(lhs["utest"], rhs["utest"])
1602
1603
1604 def assert_measurements(unittest_instance, lhs, rhs):
1605     for m1, m2 in zip(lhs["measurements"], rhs["measurements"]):
1606         unittest_instance.assertEqual(m1["real_time"], m2["real_time"])
1607         unittest_instance.assertEqual(m1["cpu_time"], m2["cpu_time"])
1608         # m1['time'] and m1['cpu'] hold values which are being calculated,
1609         # and therefore we must use almost-equal pattern.
1610         unittest_instance.assertAlmostEqual(m1["time"], m2["time"], places=4)
1611         unittest_instance.assertAlmostEqual(m1["cpu"], m2["cpu"], places=4)
1612
1613
1614 if __name__ == "__main__":
1615     unittest.main()
1616
1617 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
1618 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
1619 # kate: indent-mode python; remove-trailing-spaces modified;