[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / third-party / benchmark / tools / gbench / report.py
blob5092b0bf1469c34b676ba2116d4e55cb6b94e4cb
1 """report.py - Utilities for reporting statistics about benchmark results
2 """
4 import unittest
5 import os
6 import re
7 import copy
8 import random
10 from scipy.stats import mannwhitneyu, gmean
11 from numpy import array
12 from pandas import Timedelta
15 class BenchmarkColor(object):
16 def __init__(self, name, code):
17 self.name = name
18 self.code = code
20 def __repr__(self):
21 return "%s%r" % (self.__class__.__name__, (self.name, self.code))
23 def __format__(self, format):
24 return self.code
27 # Benchmark Colors Enumeration
28 BC_NONE = BenchmarkColor("NONE", "")
29 BC_MAGENTA = BenchmarkColor("MAGENTA", "\033[95m")
30 BC_CYAN = BenchmarkColor("CYAN", "\033[96m")
31 BC_OKBLUE = BenchmarkColor("OKBLUE", "\033[94m")
32 BC_OKGREEN = BenchmarkColor("OKGREEN", "\033[32m")
33 BC_HEADER = BenchmarkColor("HEADER", "\033[92m")
34 BC_WARNING = BenchmarkColor("WARNING", "\033[93m")
35 BC_WHITE = BenchmarkColor("WHITE", "\033[97m")
36 BC_FAIL = BenchmarkColor("FAIL", "\033[91m")
37 BC_ENDC = BenchmarkColor("ENDC", "\033[0m")
38 BC_BOLD = BenchmarkColor("BOLD", "\033[1m")
39 BC_UNDERLINE = BenchmarkColor("UNDERLINE", "\033[4m")
41 UTEST_MIN_REPETITIONS = 2
42 UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
43 UTEST_COL_NAME = "_pvalue"
46 def color_format(use_color, fmt_str, *args, **kwargs):
47 """
48 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
49 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
50 is False then all color codes in 'args' and 'kwargs' are replaced with
51 the empty string.
52 """
53 assert use_color is True or use_color is False
54 if not use_color:
55 args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE for arg in args]
56 kwargs = {
57 key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
58 for key, arg in kwargs.items()
60 return fmt_str.format(*args, **kwargs)
63 def find_longest_name(benchmark_list):
64 """
65 Return the length of the longest benchmark name in a given list of
66 benchmark JSON objects
67 """
68 longest_name = 1
69 for bc in benchmark_list:
70 if len(bc["name"]) > longest_name:
71 longest_name = len(bc["name"])
72 return longest_name
75 def calculate_change(old_val, new_val):
76 """
77 Return a float representing the decimal change between old_val and new_val.
78 """
79 if old_val == 0 and new_val == 0:
80 return 0.0
81 if old_val == 0:
82 return float(new_val - old_val) / (float(old_val + new_val) / 2)
83 return float(new_val - old_val) / abs(old_val)
86 def filter_benchmark(json_orig, family, replacement=""):
87 """
88 Apply a filter to the json, and only leave the 'family' of benchmarks.
89 """
90 regex = re.compile(family)
91 filtered = {}
92 filtered["benchmarks"] = []
93 for be in json_orig["benchmarks"]:
94 if not regex.search(be["name"]):
95 continue
96 filteredbench = copy.deepcopy(be) # Do NOT modify the old name!
97 filteredbench["name"] = regex.sub(replacement, filteredbench["name"])
98 filtered["benchmarks"].append(filteredbench)
99 return filtered
102 def get_unique_benchmark_names(json):
104 While *keeping* the order, give all the unique 'names' used for benchmarks.
106 seen = set()
107 uniqued = [
108 x["name"]
109 for x in json["benchmarks"]
110 if x["name"] not in seen and (seen.add(x["name"]) or True)
112 return uniqued
115 def intersect(list1, list2):
117 Given two lists, get a new list consisting of the elements only contained
118 in *both of the input lists*, while preserving the ordering.
120 return [x for x in list1 if x in list2]
123 def is_potentially_comparable_benchmark(x):
124 return "time_unit" in x and "real_time" in x and "cpu_time" in x
127 def partition_benchmarks(json1, json2):
129 While preserving the ordering, find benchmarks with the same names in
130 both of the inputs, and group them.
131 (i.e. partition/filter into groups with common name)
133 json1_unique_names = get_unique_benchmark_names(json1)
134 json2_unique_names = get_unique_benchmark_names(json2)
135 names = intersect(json1_unique_names, json2_unique_names)
136 partitions = []
137 for name in names:
138 time_unit = None
139 # Pick the time unit from the first entry of the lhs benchmark.
140 # We should be careful not to crash with unexpected input.
141 for x in json1["benchmarks"]:
142 if x["name"] == name and is_potentially_comparable_benchmark(x):
143 time_unit = x["time_unit"]
144 break
145 if time_unit is None:
146 continue
147 # Filter by name and time unit.
148 # All the repetitions are assumed to be comparable.
149 lhs = [
151 for x in json1["benchmarks"]
152 if x["name"] == name and x["time_unit"] == time_unit
154 rhs = [
156 for x in json2["benchmarks"]
157 if x["name"] == name and x["time_unit"] == time_unit
159 partitions.append([lhs, rhs])
160 return partitions
163 def get_timedelta_field_as_seconds(benchmark, field_name):
165 Get value of field_name field of benchmark, which is time with time unit
166 time_unit, as time in seconds.
168 time_unit = benchmark["time_unit"] if "time_unit" in benchmark else "s"
169 dt = Timedelta(benchmark[field_name], time_unit)
170 return dt / Timedelta(1, "s")
173 def calculate_geomean(json):
175 Extract all real/cpu times from all the benchmarks as seconds,
176 and calculate their geomean.
178 times = []
179 for benchmark in json["benchmarks"]:
180 if "run_type" in benchmark and benchmark["run_type"] == "aggregate":
181 continue
182 times.append(
184 get_timedelta_field_as_seconds(benchmark, "real_time"),
185 get_timedelta_field_as_seconds(benchmark, "cpu_time"),
188 return gmean(times) if times else array([])
191 def extract_field(partition, field_name):
192 # The count of elements may be different. We want *all* of them.
193 lhs = [x[field_name] for x in partition[0]]
194 rhs = [x[field_name] for x in partition[1]]
195 return [lhs, rhs]
198 def calc_utest(timings_cpu, timings_time):
199 min_rep_cnt = min(
200 len(timings_time[0]),
201 len(timings_time[1]),
202 len(timings_cpu[0]),
203 len(timings_cpu[1]),
206 # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
207 if min_rep_cnt < UTEST_MIN_REPETITIONS:
208 return False, None, None
210 time_pvalue = mannwhitneyu(
211 timings_time[0], timings_time[1], alternative="two-sided"
212 ).pvalue
213 cpu_pvalue = mannwhitneyu(
214 timings_cpu[0], timings_cpu[1], alternative="two-sided"
215 ).pvalue
217 return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
220 def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
221 def get_utest_color(pval):
222 return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
224 # Check if we failed miserably with minimum required repetitions for utest
225 if (
226 not utest["have_optimal_repetitions"]
227 and utest["cpu_pvalue"] is None
228 and utest["time_pvalue"] is None
230 return []
232 dsc = "U Test, Repetitions: {} vs {}".format(
233 utest["nr_of_repetitions"], utest["nr_of_repetitions_other"]
235 dsc_color = BC_OKGREEN
237 # We still got some results to show but issue a warning about it.
238 if not utest["have_optimal_repetitions"]:
239 dsc_color = BC_WARNING
240 dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
241 UTEST_OPTIMAL_REPETITIONS
244 special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}"
246 return [
247 color_format(
248 use_color,
249 special_str,
250 BC_HEADER,
251 "{}{}".format(bc_name, UTEST_COL_NAME),
252 first_col_width,
253 get_utest_color(utest["time_pvalue"]),
254 utest["time_pvalue"],
255 get_utest_color(utest["cpu_pvalue"]),
256 utest["cpu_pvalue"],
257 dsc_color,
258 dsc,
259 endc=BC_ENDC,
264 def get_difference_report(json1, json2, utest=False):
266 Calculate and report the difference between each test of two benchmarks
267 runs specified as 'json1' and 'json2'. Output is another json containing
268 relevant details for each test run.
270 assert utest is True or utest is False
272 diff_report = []
273 partitions = partition_benchmarks(json1, json2)
274 for partition in partitions:
275 benchmark_name = partition[0][0]["name"]
276 time_unit = partition[0][0]["time_unit"]
277 measurements = []
278 utest_results = {}
279 # Careful, we may have different repetition count.
280 for i in range(min(len(partition[0]), len(partition[1]))):
281 bn = partition[0][i]
282 other_bench = partition[1][i]
283 measurements.append(
285 "real_time": bn["real_time"],
286 "cpu_time": bn["cpu_time"],
287 "real_time_other": other_bench["real_time"],
288 "cpu_time_other": other_bench["cpu_time"],
289 "time": calculate_change(bn["real_time"], other_bench["real_time"]),
290 "cpu": calculate_change(bn["cpu_time"], other_bench["cpu_time"]),
294 # After processing the whole partition, if requested, do the U test.
295 if utest:
296 timings_cpu = extract_field(partition, "cpu_time")
297 timings_time = extract_field(partition, "real_time")
298 have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
299 timings_cpu, timings_time
301 if cpu_pvalue and time_pvalue:
302 utest_results = {
303 "have_optimal_repetitions": have_optimal_repetitions,
304 "cpu_pvalue": cpu_pvalue,
305 "time_pvalue": time_pvalue,
306 "nr_of_repetitions": len(timings_cpu[0]),
307 "nr_of_repetitions_other": len(timings_cpu[1]),
310 # Store only if we had any measurements for given benchmark.
311 # E.g. partition_benchmarks will filter out the benchmarks having
312 # time units which are not compatible with other time units in the
313 # benchmark suite.
314 if measurements:
315 run_type = (
316 partition[0][0]["run_type"] if "run_type" in partition[0][0] else ""
318 aggregate_name = (
319 partition[0][0]["aggregate_name"]
320 if run_type == "aggregate" and "aggregate_name" in partition[0][0]
321 else ""
323 diff_report.append(
325 "name": benchmark_name,
326 "measurements": measurements,
327 "time_unit": time_unit,
328 "run_type": run_type,
329 "aggregate_name": aggregate_name,
330 "utest": utest_results,
334 lhs_gmean = calculate_geomean(json1)
335 rhs_gmean = calculate_geomean(json2)
336 if lhs_gmean.any() and rhs_gmean.any():
337 diff_report.append(
339 "name": "OVERALL_GEOMEAN",
340 "measurements": [
342 "real_time": lhs_gmean[0],
343 "cpu_time": lhs_gmean[1],
344 "real_time_other": rhs_gmean[0],
345 "cpu_time_other": rhs_gmean[1],
346 "time": calculate_change(lhs_gmean[0], rhs_gmean[0]),
347 "cpu": calculate_change(lhs_gmean[1], rhs_gmean[1]),
350 "time_unit": "s",
351 "run_type": "aggregate",
352 "aggregate_name": "geomean",
353 "utest": {},
357 return diff_report
360 def print_difference_report(
361 json_diff_report,
362 include_aggregates_only=False,
363 utest=False,
364 utest_alpha=0.05,
365 use_color=True,
368 Calculate and report the difference between each test of two benchmarks
369 runs specified as 'json1' and 'json2'.
371 assert utest is True or utest is False
373 def get_color(res):
374 if res > 0.05:
375 return BC_FAIL
376 elif res > -0.07:
377 return BC_WHITE
378 else:
379 return BC_CYAN
381 first_col_width = find_longest_name(json_diff_report)
382 first_col_width = max(first_col_width, len("Benchmark"))
383 first_col_width += len(UTEST_COL_NAME)
384 first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format(
385 "Benchmark", 12 + first_col_width
387 output_strs = [first_line, "-" * len(first_line)]
389 fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
390 for benchmark in json_diff_report:
391 # *If* we were asked to only include aggregates,
392 # and if it is non-aggregate, then don't print it.
393 if (
394 not include_aggregates_only
395 or not "run_type" in benchmark
396 or benchmark["run_type"] == "aggregate"
398 for measurement in benchmark["measurements"]:
399 output_strs += [
400 color_format(
401 use_color,
402 fmt_str,
403 BC_HEADER,
404 benchmark["name"],
405 first_col_width,
406 get_color(measurement["time"]),
407 measurement["time"],
408 get_color(measurement["cpu"]),
409 measurement["cpu"],
410 measurement["real_time"],
411 measurement["real_time_other"],
412 measurement["cpu_time"],
413 measurement["cpu_time_other"],
414 endc=BC_ENDC,
418 # After processing the measurements, if requested and
419 # if applicable (e.g. u-test exists for given benchmark),
420 # print the U test.
421 if utest and benchmark["utest"]:
422 output_strs += print_utest(
423 benchmark["name"],
424 benchmark["utest"],
425 utest_alpha=utest_alpha,
426 first_col_width=first_col_width,
427 use_color=use_color,
430 return output_strs
433 ###############################################################################
434 # Unit tests
437 class TestGetUniqueBenchmarkNames(unittest.TestCase):
438 def load_results(self):
439 import json
441 testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), "Inputs")
442 testOutput = os.path.join(testInputs, "test3_run0.json")
443 with open(testOutput, "r") as f:
444 json = json.load(f)
445 return json
447 def test_basic(self):
448 expect_lines = [
449 "BM_One",
450 "BM_Two",
451 "short", # These two are not sorted
452 "medium", # These two are not sorted
454 json = self.load_results()
455 output_lines = get_unique_benchmark_names(json)
456 print("\n")
457 print("\n".join(output_lines))
458 self.assertEqual(len(output_lines), len(expect_lines))
459 for i in range(0, len(output_lines)):
460 self.assertEqual(expect_lines[i], output_lines[i])
463 class TestReportDifference(unittest.TestCase):
464 @classmethod
465 def setUpClass(cls):
466 def load_results():
467 import json
469 testInputs = os.path.join(
470 os.path.dirname(os.path.realpath(__file__)), "Inputs"
472 testOutput1 = os.path.join(testInputs, "test1_run1.json")
473 testOutput2 = os.path.join(testInputs, "test1_run2.json")
474 with open(testOutput1, "r") as f:
475 json1 = json.load(f)
476 with open(testOutput2, "r") as f:
477 json2 = json.load(f)
478 return json1, json2
480 json1, json2 = load_results()
481 cls.json_diff_report = get_difference_report(json1, json2)
483 def test_json_diff_report_pretty_printing(self):
484 expect_lines = [
485 ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"],
486 ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"],
487 ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"],
488 ["BM_1PercentFaster", "-0.0100", "-0.0100", "100", "99", "100", "99"],
489 ["BM_1PercentSlower", "+0.0100", "+0.0100", "100", "101", "100", "101"],
490 ["BM_10PercentFaster", "-0.1000", "-0.1000", "100", "90", "100", "90"],
491 ["BM_10PercentSlower", "+0.1000", "+0.1000", "100", "110", "100", "110"],
492 ["BM_100xSlower", "+99.0000", "+99.0000", "100", "10000", "100", "10000"],
493 ["BM_100xFaster", "-0.9900", "-0.9900", "10000", "100", "10000", "100"],
494 ["BM_10PercentCPUToTime", "+0.1000", "-0.1000", "100", "110", "100", "90"],
495 ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"],
496 ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"],
497 ["OVERALL_GEOMEAN", "-0.8344", "-0.8026", "0", "0", "0", "0"],
499 output_lines_with_header = print_difference_report(
500 self.json_diff_report, use_color=False
502 output_lines = output_lines_with_header[2:]
503 print("\n")
504 print("\n".join(output_lines_with_header))
505 self.assertEqual(len(output_lines), len(expect_lines))
506 for i in range(0, len(output_lines)):
507 parts = [x for x in output_lines[i].split(" ") if x]
508 self.assertEqual(len(parts), 7)
509 self.assertEqual(expect_lines[i], parts)
511 def test_json_diff_report_output(self):
512 expected_output = [
514 "name": "BM_SameTimes",
515 "measurements": [
517 "time": 0.0000,
518 "cpu": 0.0000,
519 "real_time": 10,
520 "real_time_other": 10,
521 "cpu_time": 10,
522 "cpu_time_other": 10,
525 "time_unit": "ns",
526 "utest": {},
529 "name": "BM_2xFaster",
530 "measurements": [
532 "time": -0.5000,
533 "cpu": -0.5000,
534 "real_time": 50,
535 "real_time_other": 25,
536 "cpu_time": 50,
537 "cpu_time_other": 25,
540 "time_unit": "ns",
541 "utest": {},
544 "name": "BM_2xSlower",
545 "measurements": [
547 "time": 1.0000,
548 "cpu": 1.0000,
549 "real_time": 50,
550 "real_time_other": 100,
551 "cpu_time": 50,
552 "cpu_time_other": 100,
555 "time_unit": "ns",
556 "utest": {},
559 "name": "BM_1PercentFaster",
560 "measurements": [
562 "time": -0.0100,
563 "cpu": -0.0100,
564 "real_time": 100,
565 "real_time_other": 98.9999999,
566 "cpu_time": 100,
567 "cpu_time_other": 98.9999999,
570 "time_unit": "ns",
571 "utest": {},
574 "name": "BM_1PercentSlower",
575 "measurements": [
577 "time": 0.0100,
578 "cpu": 0.0100,
579 "real_time": 100,
580 "real_time_other": 101,
581 "cpu_time": 100,
582 "cpu_time_other": 101,
585 "time_unit": "ns",
586 "utest": {},
589 "name": "BM_10PercentFaster",
590 "measurements": [
592 "time": -0.1000,
593 "cpu": -0.1000,
594 "real_time": 100,
595 "real_time_other": 90,
596 "cpu_time": 100,
597 "cpu_time_other": 90,
600 "time_unit": "ns",
601 "utest": {},
604 "name": "BM_10PercentSlower",
605 "measurements": [
607 "time": 0.1000,
608 "cpu": 0.1000,
609 "real_time": 100,
610 "real_time_other": 110,
611 "cpu_time": 100,
612 "cpu_time_other": 110,
615 "time_unit": "ns",
616 "utest": {},
619 "name": "BM_100xSlower",
620 "measurements": [
622 "time": 99.0000,
623 "cpu": 99.0000,
624 "real_time": 100,
625 "real_time_other": 10000,
626 "cpu_time": 100,
627 "cpu_time_other": 10000,
630 "time_unit": "ns",
631 "utest": {},
634 "name": "BM_100xFaster",
635 "measurements": [
637 "time": -0.9900,
638 "cpu": -0.9900,
639 "real_time": 10000,
640 "real_time_other": 100,
641 "cpu_time": 10000,
642 "cpu_time_other": 100,
645 "time_unit": "ns",
646 "utest": {},
649 "name": "BM_10PercentCPUToTime",
650 "measurements": [
652 "time": 0.1000,
653 "cpu": -0.1000,
654 "real_time": 100,
655 "real_time_other": 110,
656 "cpu_time": 100,
657 "cpu_time_other": 90,
660 "time_unit": "ns",
661 "utest": {},
664 "name": "BM_ThirdFaster",
665 "measurements": [
667 "time": -0.3333,
668 "cpu": -0.3334,
669 "real_time": 100,
670 "real_time_other": 67,
671 "cpu_time": 100,
672 "cpu_time_other": 67,
675 "time_unit": "ns",
676 "utest": {},
679 "name": "BM_NotBadTimeUnit",
680 "measurements": [
682 "time": -0.9000,
683 "cpu": 0.2000,
684 "real_time": 0.4,
685 "real_time_other": 0.04,
686 "cpu_time": 0.5,
687 "cpu_time_other": 0.6,
690 "time_unit": "s",
691 "utest": {},
694 "name": "OVERALL_GEOMEAN",
695 "measurements": [
697 "real_time": 1.193776641714438e-06,
698 "cpu_time": 1.2144445585302297e-06,
699 "real_time_other": 1.9768988699420897e-07,
700 "cpu_time_other": 2.397447755209533e-07,
701 "time": -0.834399601997324,
702 "cpu": -0.8025889499549471,
705 "time_unit": "s",
706 "run_type": "aggregate",
707 "aggregate_name": "geomean",
708 "utest": {},
711 self.assertEqual(len(self.json_diff_report), len(expected_output))
712 for out, expected in zip(self.json_diff_report, expected_output):
713 self.assertEqual(out["name"], expected["name"])
714 self.assertEqual(out["time_unit"], expected["time_unit"])
715 assert_utest(self, out, expected)
716 assert_measurements(self, out, expected)
719 class TestReportDifferenceBetweenFamilies(unittest.TestCase):
720 @classmethod
721 def setUpClass(cls):
722 def load_result():
723 import json
725 testInputs = os.path.join(
726 os.path.dirname(os.path.realpath(__file__)), "Inputs"
728 testOutput = os.path.join(testInputs, "test2_run.json")
729 with open(testOutput, "r") as f:
730 json = json.load(f)
731 return json
733 json = load_result()
734 json1 = filter_benchmark(json, "BM_Z.ro", ".")
735 json2 = filter_benchmark(json, "BM_O.e", ".")
736 cls.json_diff_report = get_difference_report(json1, json2)
738 def test_json_diff_report_pretty_printing(self):
739 expect_lines = [
740 [".", "-0.5000", "-0.5000", "10", "5", "10", "5"],
741 ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"],
742 ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"],
743 ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"],
744 ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"],
746 output_lines_with_header = print_difference_report(
747 self.json_diff_report, use_color=False
749 output_lines = output_lines_with_header[2:]
750 print("\n")
751 print("\n".join(output_lines_with_header))
752 self.assertEqual(len(output_lines), len(expect_lines))
753 for i in range(0, len(output_lines)):
754 parts = [x for x in output_lines[i].split(" ") if x]
755 self.assertEqual(len(parts), 7)
756 self.assertEqual(expect_lines[i], parts)
758 def test_json_diff_report(self):
759 expected_output = [
761 "name": ".",
762 "measurements": [
764 "time": -0.5,
765 "cpu": -0.5,
766 "real_time": 10,
767 "real_time_other": 5,
768 "cpu_time": 10,
769 "cpu_time_other": 5,
772 "time_unit": "ns",
773 "utest": {},
776 "name": "./4",
777 "measurements": [
779 "time": -0.5,
780 "cpu": -0.5,
781 "real_time": 40,
782 "real_time_other": 20,
783 "cpu_time": 40,
784 "cpu_time_other": 20,
787 "time_unit": "ns",
788 "utest": {},
791 "name": "Prefix/.",
792 "measurements": [
794 "time": -0.5,
795 "cpu": -0.5,
796 "real_time": 20,
797 "real_time_other": 10,
798 "cpu_time": 20,
799 "cpu_time_other": 10,
802 "time_unit": "ns",
803 "utest": {},
806 "name": "Prefix/./3",
807 "measurements": [
809 "time": -0.5,
810 "cpu": -0.5,
811 "real_time": 30,
812 "real_time_other": 15,
813 "cpu_time": 30,
814 "cpu_time_other": 15,
817 "time_unit": "ns",
818 "utest": {},
821 "name": "OVERALL_GEOMEAN",
822 "measurements": [
824 "real_time": 2.213363839400641e-08,
825 "cpu_time": 2.213363839400641e-08,
826 "real_time_other": 1.1066819197003185e-08,
827 "cpu_time_other": 1.1066819197003185e-08,
828 "time": -0.5000000000000009,
829 "cpu": -0.5000000000000009,
832 "time_unit": "s",
833 "run_type": "aggregate",
834 "aggregate_name": "geomean",
835 "utest": {},
838 self.assertEqual(len(self.json_diff_report), len(expected_output))
839 for out, expected in zip(self.json_diff_report, expected_output):
840 self.assertEqual(out["name"], expected["name"])
841 self.assertEqual(out["time_unit"], expected["time_unit"])
842 assert_utest(self, out, expected)
843 assert_measurements(self, out, expected)
846 class TestReportDifferenceWithUTest(unittest.TestCase):
847 @classmethod
848 def setUpClass(cls):
849 def load_results():
850 import json
852 testInputs = os.path.join(
853 os.path.dirname(os.path.realpath(__file__)), "Inputs"
855 testOutput1 = os.path.join(testInputs, "test3_run0.json")
856 testOutput2 = os.path.join(testInputs, "test3_run1.json")
857 with open(testOutput1, "r") as f:
858 json1 = json.load(f)
859 with open(testOutput2, "r") as f:
860 json2 = json.load(f)
861 return json1, json2
863 json1, json2 = load_results()
864 cls.json_diff_report = get_difference_report(json1, json2, utest=True)
866 def test_json_diff_report_pretty_printing(self):
867 expect_lines = [
868 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
869 ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
870 ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
872 "BM_Two_pvalue",
873 "1.0000",
874 "0.6667",
875 "U",
876 "Test,",
877 "Repetitions:",
878 "2",
879 "vs",
880 "2.",
881 "WARNING:",
882 "Results",
883 "unreliable!",
884 "9+",
885 "repetitions",
886 "recommended.",
888 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
889 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
891 "short_pvalue",
892 "0.7671",
893 "0.2000",
894 "U",
895 "Test,",
896 "Repetitions:",
897 "2",
898 "vs",
899 "3.",
900 "WARNING:",
901 "Results",
902 "unreliable!",
903 "9+",
904 "repetitions",
905 "recommended.",
907 ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
908 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
910 output_lines_with_header = print_difference_report(
911 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
913 output_lines = output_lines_with_header[2:]
914 print("\n")
915 print("\n".join(output_lines_with_header))
916 self.assertEqual(len(output_lines), len(expect_lines))
917 for i in range(0, len(output_lines)):
918 parts = [x for x in output_lines[i].split(" ") if x]
919 self.assertEqual(expect_lines[i], parts)
921 def test_json_diff_report_pretty_printing_aggregates_only(self):
922 expect_lines = [
923 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
925 "BM_Two_pvalue",
926 "1.0000",
927 "0.6667",
928 "U",
929 "Test,",
930 "Repetitions:",
931 "2",
932 "vs",
933 "2.",
934 "WARNING:",
935 "Results",
936 "unreliable!",
937 "9+",
938 "repetitions",
939 "recommended.",
941 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
942 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
944 "short_pvalue",
945 "0.7671",
946 "0.2000",
947 "U",
948 "Test,",
949 "Repetitions:",
950 "2",
951 "vs",
952 "3.",
953 "WARNING:",
954 "Results",
955 "unreliable!",
956 "9+",
957 "repetitions",
958 "recommended.",
960 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
962 output_lines_with_header = print_difference_report(
963 self.json_diff_report,
964 include_aggregates_only=True,
965 utest=True,
966 utest_alpha=0.05,
967 use_color=False,
969 output_lines = output_lines_with_header[2:]
970 print("\n")
971 print("\n".join(output_lines_with_header))
972 self.assertEqual(len(output_lines), len(expect_lines))
973 for i in range(0, len(output_lines)):
974 parts = [x for x in output_lines[i].split(" ") if x]
975 self.assertEqual(expect_lines[i], parts)
977 def test_json_diff_report(self):
978 expected_output = [
980 "name": "BM_One",
981 "measurements": [
983 "time": -0.1,
984 "cpu": 0.1,
985 "real_time": 10,
986 "real_time_other": 9,
987 "cpu_time": 100,
988 "cpu_time_other": 110,
991 "time_unit": "ns",
992 "utest": {},
995 "name": "BM_Two",
996 "measurements": [
998 "time": 0.1111111111111111,
999 "cpu": -0.011111111111111112,
1000 "real_time": 9,
1001 "real_time_other": 10,
1002 "cpu_time": 90,
1003 "cpu_time_other": 89,
1006 "time": -0.125,
1007 "cpu": -0.16279069767441862,
1008 "real_time": 8,
1009 "real_time_other": 7,
1010 "cpu_time": 86,
1011 "cpu_time_other": 72,
1014 "time_unit": "ns",
1015 "utest": {
1016 "have_optimal_repetitions": False,
1017 "cpu_pvalue": 0.6666666666666666,
1018 "time_pvalue": 1.0,
1022 "name": "short",
1023 "measurements": [
1025 "time": -0.125,
1026 "cpu": -0.0625,
1027 "real_time": 8,
1028 "real_time_other": 7,
1029 "cpu_time": 80,
1030 "cpu_time_other": 75,
1033 "time": -0.4325,
1034 "cpu": -0.13506493506493514,
1035 "real_time": 8,
1036 "real_time_other": 4.54,
1037 "cpu_time": 77,
1038 "cpu_time_other": 66.6,
1041 "time_unit": "ns",
1042 "utest": {
1043 "have_optimal_repetitions": False,
1044 "cpu_pvalue": 0.2,
1045 "time_pvalue": 0.7670968684102772,
1049 "name": "medium",
1050 "measurements": [
1052 "time": -0.375,
1053 "cpu": -0.3375,
1054 "real_time": 8,
1055 "real_time_other": 5,
1056 "cpu_time": 80,
1057 "cpu_time_other": 53,
1060 "time_unit": "ns",
1061 "utest": {},
1064 "name": "OVERALL_GEOMEAN",
1065 "measurements": [
1067 "real_time": 8.48528137423858e-09,
1068 "cpu_time": 8.441336246629233e-08,
1069 "real_time_other": 2.2405267593145244e-08,
1070 "cpu_time_other": 2.5453661413660466e-08,
1071 "time": 1.6404861082353634,
1072 "cpu": -0.6984640740519662,
1075 "time_unit": "s",
1076 "run_type": "aggregate",
1077 "aggregate_name": "geomean",
1078 "utest": {},
1081 self.assertEqual(len(self.json_diff_report), len(expected_output))
1082 for out, expected in zip(self.json_diff_report, expected_output):
1083 self.assertEqual(out["name"], expected["name"])
1084 self.assertEqual(out["time_unit"], expected["time_unit"])
1085 assert_utest(self, out, expected)
1086 assert_measurements(self, out, expected)
1089 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(unittest.TestCase):
1090 @classmethod
1091 def setUpClass(cls):
1092 def load_results():
1093 import json
1095 testInputs = os.path.join(
1096 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1098 testOutput1 = os.path.join(testInputs, "test3_run0.json")
1099 testOutput2 = os.path.join(testInputs, "test3_run1.json")
1100 with open(testOutput1, "r") as f:
1101 json1 = json.load(f)
1102 with open(testOutput2, "r") as f:
1103 json2 = json.load(f)
1104 return json1, json2
1106 json1, json2 = load_results()
1107 cls.json_diff_report = get_difference_report(json1, json2, utest=True)
1109 def test_json_diff_report_pretty_printing(self):
1110 expect_lines = [
1111 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
1112 ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
1113 ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
1115 "BM_Two_pvalue",
1116 "1.0000",
1117 "0.6667",
1118 "U",
1119 "Test,",
1120 "Repetitions:",
1121 "2",
1122 "vs",
1123 "2.",
1124 "WARNING:",
1125 "Results",
1126 "unreliable!",
1127 "9+",
1128 "repetitions",
1129 "recommended.",
1131 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
1132 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
1134 "short_pvalue",
1135 "0.7671",
1136 "0.2000",
1137 "U",
1138 "Test,",
1139 "Repetitions:",
1140 "2",
1141 "vs",
1142 "3.",
1143 "WARNING:",
1144 "Results",
1145 "unreliable!",
1146 "9+",
1147 "repetitions",
1148 "recommended.",
1150 ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
1151 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
1153 output_lines_with_header = print_difference_report(
1154 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1156 output_lines = output_lines_with_header[2:]
1157 print("\n")
1158 print("\n".join(output_lines_with_header))
1159 self.assertEqual(len(output_lines), len(expect_lines))
1160 for i in range(0, len(output_lines)):
1161 parts = [x for x in output_lines[i].split(" ") if x]
1162 self.assertEqual(expect_lines[i], parts)
1164 def test_json_diff_report(self):
1165 expected_output = [
1167 "name": "BM_One",
1168 "measurements": [
1170 "time": -0.1,
1171 "cpu": 0.1,
1172 "real_time": 10,
1173 "real_time_other": 9,
1174 "cpu_time": 100,
1175 "cpu_time_other": 110,
1178 "time_unit": "ns",
1179 "utest": {},
1182 "name": "BM_Two",
1183 "measurements": [
1185 "time": 0.1111111111111111,
1186 "cpu": -0.011111111111111112,
1187 "real_time": 9,
1188 "real_time_other": 10,
1189 "cpu_time": 90,
1190 "cpu_time_other": 89,
1193 "time": -0.125,
1194 "cpu": -0.16279069767441862,
1195 "real_time": 8,
1196 "real_time_other": 7,
1197 "cpu_time": 86,
1198 "cpu_time_other": 72,
1201 "time_unit": "ns",
1202 "utest": {
1203 "have_optimal_repetitions": False,
1204 "cpu_pvalue": 0.6666666666666666,
1205 "time_pvalue": 1.0,
1209 "name": "short",
1210 "measurements": [
1212 "time": -0.125,
1213 "cpu": -0.0625,
1214 "real_time": 8,
1215 "real_time_other": 7,
1216 "cpu_time": 80,
1217 "cpu_time_other": 75,
1220 "time": -0.4325,
1221 "cpu": -0.13506493506493514,
1222 "real_time": 8,
1223 "real_time_other": 4.54,
1224 "cpu_time": 77,
1225 "cpu_time_other": 66.6,
1228 "time_unit": "ns",
1229 "utest": {
1230 "have_optimal_repetitions": False,
1231 "cpu_pvalue": 0.2,
1232 "time_pvalue": 0.7670968684102772,
1236 "name": "medium",
1237 "measurements": [
1239 "real_time_other": 5,
1240 "cpu_time": 80,
1241 "time": -0.375,
1242 "real_time": 8,
1243 "cpu_time_other": 53,
1244 "cpu": -0.3375,
1247 "utest": {},
1248 "time_unit": "ns",
1249 "aggregate_name": "",
1252 "name": "OVERALL_GEOMEAN",
1253 "measurements": [
1255 "real_time": 8.48528137423858e-09,
1256 "cpu_time": 8.441336246629233e-08,
1257 "real_time_other": 2.2405267593145244e-08,
1258 "cpu_time_other": 2.5453661413660466e-08,
1259 "time": 1.6404861082353634,
1260 "cpu": -0.6984640740519662,
1263 "time_unit": "s",
1264 "run_type": "aggregate",
1265 "aggregate_name": "geomean",
1266 "utest": {},
1269 self.assertEqual(len(self.json_diff_report), len(expected_output))
1270 for out, expected in zip(self.json_diff_report, expected_output):
1271 self.assertEqual(out["name"], expected["name"])
1272 self.assertEqual(out["time_unit"], expected["time_unit"])
1273 assert_utest(self, out, expected)
1274 assert_measurements(self, out, expected)
1277 class TestReportDifferenceForPercentageAggregates(unittest.TestCase):
1278 @classmethod
1279 def setUpClass(cls):
1280 def load_results():
1281 import json
1283 testInputs = os.path.join(
1284 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1286 testOutput1 = os.path.join(testInputs, "test4_run0.json")
1287 testOutput2 = os.path.join(testInputs, "test4_run1.json")
1288 with open(testOutput1, "r") as f:
1289 json1 = json.load(f)
1290 with open(testOutput2, "r") as f:
1291 json2 = json.load(f)
1292 return json1, json2
1294 json1, json2 = load_results()
1295 cls.json_diff_report = get_difference_report(json1, json2, utest=True)
1297 def test_json_diff_report_pretty_printing(self):
1298 expect_lines = [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]]
1299 output_lines_with_header = print_difference_report(
1300 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1302 output_lines = output_lines_with_header[2:]
1303 print("\n")
1304 print("\n".join(output_lines_with_header))
1305 self.assertEqual(len(output_lines), len(expect_lines))
1306 for i in range(0, len(output_lines)):
1307 parts = [x for x in output_lines[i].split(" ") if x]
1308 self.assertEqual(expect_lines[i], parts)
1310 def test_json_diff_report(self):
1311 expected_output = [
1313 "name": "whocares",
1314 "measurements": [
1316 "time": -0.5,
1317 "cpu": 0.5,
1318 "real_time": 0.01,
1319 "real_time_other": 0.005,
1320 "cpu_time": 0.10,
1321 "cpu_time_other": 0.15,
1324 "time_unit": "ns",
1325 "utest": {},
1328 self.assertEqual(len(self.json_diff_report), len(expected_output))
1329 for out, expected in zip(self.json_diff_report, expected_output):
1330 self.assertEqual(out["name"], expected["name"])
1331 self.assertEqual(out["time_unit"], expected["time_unit"])
1332 assert_utest(self, out, expected)
1333 assert_measurements(self, out, expected)
1336 class TestReportSorting(unittest.TestCase):
1337 @classmethod
1338 def setUpClass(cls):
1339 def load_result():
1340 import json
1342 testInputs = os.path.join(
1343 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1345 testOutput = os.path.join(testInputs, "test4_run.json")
1346 with open(testOutput, "r") as f:
1347 json = json.load(f)
1348 return json
1350 cls.json = load_result()
1352 def test_json_diff_report_pretty_printing(self):
1353 import util
1355 expected_names = [
1356 "99 family 0 instance 0 repetition 0",
1357 "98 family 0 instance 0 repetition 1",
1358 "97 family 0 instance 0 aggregate",
1359 "96 family 0 instance 1 repetition 0",
1360 "95 family 0 instance 1 repetition 1",
1361 "94 family 0 instance 1 aggregate",
1362 "93 family 1 instance 0 repetition 0",
1363 "92 family 1 instance 0 repetition 1",
1364 "91 family 1 instance 0 aggregate",
1365 "90 family 1 instance 1 repetition 0",
1366 "89 family 1 instance 1 repetition 1",
1367 "88 family 1 instance 1 aggregate",
1370 for n in range(len(self.json["benchmarks"]) ** 2):
1371 random.shuffle(self.json["benchmarks"])
1372 sorted_benchmarks = util.sort_benchmark_results(self.json)["benchmarks"]
1373 self.assertEqual(len(expected_names), len(sorted_benchmarks))
1374 for out, expected in zip(sorted_benchmarks, expected_names):
1375 self.assertEqual(out["name"], expected)
1378 def assert_utest(unittest_instance, lhs, rhs):
1379 if lhs["utest"]:
1380 unittest_instance.assertAlmostEqual(
1381 lhs["utest"]["cpu_pvalue"], rhs["utest"]["cpu_pvalue"]
1383 unittest_instance.assertAlmostEqual(
1384 lhs["utest"]["time_pvalue"], rhs["utest"]["time_pvalue"]
1386 unittest_instance.assertEqual(
1387 lhs["utest"]["have_optimal_repetitions"],
1388 rhs["utest"]["have_optimal_repetitions"],
1390 else:
1391 # lhs is empty. assert if rhs is not.
1392 unittest_instance.assertEqual(lhs["utest"], rhs["utest"])
1395 def assert_measurements(unittest_instance, lhs, rhs):
1396 for m1, m2 in zip(lhs["measurements"], rhs["measurements"]):
1397 unittest_instance.assertEqual(m1["real_time"], m2["real_time"])
1398 unittest_instance.assertEqual(m1["cpu_time"], m2["cpu_time"])
1399 # m1['time'] and m1['cpu'] hold values which are being calculated,
1400 # and therefore we must use almost-equal pattern.
1401 unittest_instance.assertAlmostEqual(m1["time"], m2["time"], places=4)
1402 unittest_instance.assertAlmostEqual(m1["cpu"], m2["cpu"], places=4)
1405 if __name__ == "__main__":
1406 unittest.main()
1408 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
1409 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
1410 # kate: indent-mode python; remove-trailing-spaces modified;