[OptTable] Fix typo VALUE => VALUES (NFCI) (#121523)
[llvm-project.git] / third-party / benchmark / tools / gbench / report.py
blob7158fd1654cb10454a7cd1aba7c20988f896ba68
1 # type: ignore
3 """
4 report.py - Utilities for reporting statistics about benchmark results
5 """
7 import copy
8 import os
9 import random
10 import re
11 import unittest
13 from numpy import array
14 from scipy.stats import gmean, mannwhitneyu
17 class BenchmarkColor(object):
18 def __init__(self, name, code):
19 self.name = name
20 self.code = code
22 def __repr__(self):
23 return "%s%r" % (self.__class__.__name__, (self.name, self.code))
25 def __format__(self, format):
26 return self.code
29 # Benchmark Colors Enumeration
30 BC_NONE = BenchmarkColor("NONE", "")
31 BC_MAGENTA = BenchmarkColor("MAGENTA", "\033[95m")
32 BC_CYAN = BenchmarkColor("CYAN", "\033[96m")
33 BC_OKBLUE = BenchmarkColor("OKBLUE", "\033[94m")
34 BC_OKGREEN = BenchmarkColor("OKGREEN", "\033[32m")
35 BC_HEADER = BenchmarkColor("HEADER", "\033[92m")
36 BC_WARNING = BenchmarkColor("WARNING", "\033[93m")
37 BC_WHITE = BenchmarkColor("WHITE", "\033[97m")
38 BC_FAIL = BenchmarkColor("FAIL", "\033[91m")
39 BC_ENDC = BenchmarkColor("ENDC", "\033[0m")
40 BC_BOLD = BenchmarkColor("BOLD", "\033[1m")
41 BC_UNDERLINE = BenchmarkColor("UNDERLINE", "\033[4m")
43 UTEST_MIN_REPETITIONS = 2
44 UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
45 UTEST_COL_NAME = "_pvalue"
47 _TIME_UNIT_TO_SECONDS_MULTIPLIER = {
48 "s": 1.0,
49 "ms": 1e-3,
50 "us": 1e-6,
51 "ns": 1e-9,
55 def color_format(use_color, fmt_str, *args, **kwargs):
56 """
57 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
58 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
59 is False then all color codes in 'args' and 'kwargs' are replaced with
60 the empty string.
61 """
62 assert use_color is True or use_color is False
63 if not use_color:
64 args = [
65 arg if not isinstance(arg, BenchmarkColor) else BC_NONE
66 for arg in args
68 kwargs = {
69 key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
70 for key, arg in kwargs.items()
72 return fmt_str.format(*args, **kwargs)
75 def find_longest_name(benchmark_list):
76 """
77 Return the length of the longest benchmark name in a given list of
78 benchmark JSON objects
79 """
80 longest_name = 1
81 for bc in benchmark_list:
82 if len(bc["name"]) > longest_name:
83 longest_name = len(bc["name"])
84 return longest_name
87 def calculate_change(old_val, new_val):
88 """
89 Return a float representing the decimal change between old_val and new_val.
90 """
91 if old_val == 0 and new_val == 0:
92 return 0.0
93 if old_val == 0:
94 return float(new_val - old_val) / (float(old_val + new_val) / 2)
95 return float(new_val - old_val) / abs(old_val)
98 def filter_benchmark(json_orig, family, replacement=""):
99 """
100 Apply a filter to the json, and only leave the 'family' of benchmarks.
102 regex = re.compile(family)
103 filtered = {}
104 filtered["benchmarks"] = []
105 for be in json_orig["benchmarks"]:
106 if not regex.search(be["name"]):
107 continue
108 filteredbench = copy.deepcopy(be) # Do NOT modify the old name!
109 filteredbench["name"] = regex.sub(replacement, filteredbench["name"])
110 filtered["benchmarks"].append(filteredbench)
111 return filtered
114 def get_unique_benchmark_names(json):
116 While *keeping* the order, give all the unique 'names' used for benchmarks.
118 seen = set()
119 uniqued = [
120 x["name"]
121 for x in json["benchmarks"]
122 if x["name"] not in seen and (seen.add(x["name"]) or True)
124 return uniqued
127 def intersect(list1, list2):
129 Given two lists, get a new list consisting of the elements only contained
130 in *both of the input lists*, while preserving the ordering.
132 return [x for x in list1 if x in list2]
135 def is_potentially_comparable_benchmark(x):
136 return "time_unit" in x and "real_time" in x and "cpu_time" in x
139 def partition_benchmarks(json1, json2):
141 While preserving the ordering, find benchmarks with the same names in
142 both of the inputs, and group them.
143 (i.e. partition/filter into groups with common name)
145 json1_unique_names = get_unique_benchmark_names(json1)
146 json2_unique_names = get_unique_benchmark_names(json2)
147 names = intersect(json1_unique_names, json2_unique_names)
148 partitions = []
149 for name in names:
150 time_unit = None
151 # Pick the time unit from the first entry of the lhs benchmark.
152 # We should be careful not to crash with unexpected input.
153 for x in json1["benchmarks"]:
154 if x["name"] == name and is_potentially_comparable_benchmark(x):
155 time_unit = x["time_unit"]
156 break
157 if time_unit is None:
158 continue
159 # Filter by name and time unit.
160 # All the repetitions are assumed to be comparable.
161 lhs = [
163 for x in json1["benchmarks"]
164 if x["name"] == name and x["time_unit"] == time_unit
166 rhs = [
168 for x in json2["benchmarks"]
169 if x["name"] == name and x["time_unit"] == time_unit
171 partitions.append([lhs, rhs])
172 return partitions
175 def get_timedelta_field_as_seconds(benchmark, field_name):
177 Get value of field_name field of benchmark, which is time with time unit
178 time_unit, as time in seconds.
180 timedelta = benchmark[field_name]
181 time_unit = benchmark.get("time_unit", "s")
182 return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit)
185 def calculate_geomean(json):
187 Extract all real/cpu times from all the benchmarks as seconds,
188 and calculate their geomean.
190 times = []
191 for benchmark in json["benchmarks"]:
192 if "run_type" in benchmark and benchmark["run_type"] == "aggregate":
193 continue
194 times.append(
196 get_timedelta_field_as_seconds(benchmark, "real_time"),
197 get_timedelta_field_as_seconds(benchmark, "cpu_time"),
200 return gmean(times) if times else array([])
203 def extract_field(partition, field_name):
204 # The count of elements may be different. We want *all* of them.
205 lhs = [x[field_name] for x in partition[0]]
206 rhs = [x[field_name] for x in partition[1]]
207 return [lhs, rhs]
210 def calc_utest(timings_cpu, timings_time):
211 min_rep_cnt = min(
212 len(timings_time[0]),
213 len(timings_time[1]),
214 len(timings_cpu[0]),
215 len(timings_cpu[1]),
218 # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
219 if min_rep_cnt < UTEST_MIN_REPETITIONS:
220 return False, None, None
222 time_pvalue = mannwhitneyu(
223 timings_time[0], timings_time[1], alternative="two-sided"
224 ).pvalue
225 cpu_pvalue = mannwhitneyu(
226 timings_cpu[0], timings_cpu[1], alternative="two-sided"
227 ).pvalue
229 return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
232 def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
233 def get_utest_color(pval):
234 return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
236 # Check if we failed miserably with minimum required repetitions for utest
237 if (
238 not utest["have_optimal_repetitions"]
239 and utest["cpu_pvalue"] is None
240 and utest["time_pvalue"] is None
242 return []
244 dsc = "U Test, Repetitions: {} vs {}".format(
245 utest["nr_of_repetitions"], utest["nr_of_repetitions_other"]
247 dsc_color = BC_OKGREEN
249 # We still got some results to show but issue a warning about it.
250 if not utest["have_optimal_repetitions"]:
251 dsc_color = BC_WARNING
252 dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
253 UTEST_OPTIMAL_REPETITIONS
256 special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}"
258 return [
259 color_format(
260 use_color,
261 special_str,
262 BC_HEADER,
263 "{}{}".format(bc_name, UTEST_COL_NAME),
264 first_col_width,
265 get_utest_color(utest["time_pvalue"]),
266 utest["time_pvalue"],
267 get_utest_color(utest["cpu_pvalue"]),
268 utest["cpu_pvalue"],
269 dsc_color,
270 dsc,
271 endc=BC_ENDC,
276 def get_difference_report(json1, json2, utest=False):
278 Calculate and report the difference between each test of two benchmarks
279 runs specified as 'json1' and 'json2'. Output is another json containing
280 relevant details for each test run.
282 assert utest is True or utest is False
284 diff_report = []
285 partitions = partition_benchmarks(json1, json2)
286 for partition in partitions:
287 benchmark_name = partition[0][0]["name"]
288 label = partition[0][0]["label"] if "label" in partition[0][0] else ""
289 time_unit = partition[0][0]["time_unit"]
290 measurements = []
291 utest_results = {}
292 # Careful, we may have different repetition count.
293 for i in range(min(len(partition[0]), len(partition[1]))):
294 bn = partition[0][i]
295 other_bench = partition[1][i]
296 measurements.append(
298 "real_time": bn["real_time"],
299 "cpu_time": bn["cpu_time"],
300 "real_time_other": other_bench["real_time"],
301 "cpu_time_other": other_bench["cpu_time"],
302 "time": calculate_change(
303 bn["real_time"], other_bench["real_time"]
305 "cpu": calculate_change(
306 bn["cpu_time"], other_bench["cpu_time"]
311 # After processing the whole partition, if requested, do the U test.
312 if utest:
313 timings_cpu = extract_field(partition, "cpu_time")
314 timings_time = extract_field(partition, "real_time")
315 have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
316 timings_cpu, timings_time
318 if cpu_pvalue is not None and time_pvalue is not None:
319 utest_results = {
320 "have_optimal_repetitions": have_optimal_repetitions,
321 "cpu_pvalue": cpu_pvalue,
322 "time_pvalue": time_pvalue,
323 "nr_of_repetitions": len(timings_cpu[0]),
324 "nr_of_repetitions_other": len(timings_cpu[1]),
327 # Store only if we had any measurements for given benchmark.
328 # E.g. partition_benchmarks will filter out the benchmarks having
329 # time units which are not compatible with other time units in the
330 # benchmark suite.
331 if measurements:
332 run_type = (
333 partition[0][0]["run_type"]
334 if "run_type" in partition[0][0]
335 else ""
337 aggregate_name = (
338 partition[0][0]["aggregate_name"]
339 if run_type == "aggregate"
340 and "aggregate_name" in partition[0][0]
341 else ""
343 diff_report.append(
345 "name": benchmark_name,
346 "label": label,
347 "measurements": measurements,
348 "time_unit": time_unit,
349 "run_type": run_type,
350 "aggregate_name": aggregate_name,
351 "utest": utest_results,
355 lhs_gmean = calculate_geomean(json1)
356 rhs_gmean = calculate_geomean(json2)
357 if lhs_gmean.any() and rhs_gmean.any():
358 diff_report.append(
360 "name": "OVERALL_GEOMEAN",
361 "label": "",
362 "measurements": [
364 "real_time": lhs_gmean[0],
365 "cpu_time": lhs_gmean[1],
366 "real_time_other": rhs_gmean[0],
367 "cpu_time_other": rhs_gmean[1],
368 "time": calculate_change(lhs_gmean[0], rhs_gmean[0]),
369 "cpu": calculate_change(lhs_gmean[1], rhs_gmean[1]),
372 "time_unit": "s",
373 "run_type": "aggregate",
374 "aggregate_name": "geomean",
375 "utest": {},
379 return diff_report
382 def print_difference_report(
383 json_diff_report,
384 include_aggregates_only=False,
385 utest=False,
386 utest_alpha=0.05,
387 use_color=True,
390 Calculate and report the difference between each test of two benchmarks
391 runs specified as 'json1' and 'json2'.
393 assert utest is True or utest is False
395 def get_color(res):
396 if res > 0.05:
397 return BC_FAIL
398 elif res > -0.07:
399 return BC_WHITE
400 else:
401 return BC_CYAN
403 first_col_width = find_longest_name(json_diff_report)
404 first_col_width = max(first_col_width, len("Benchmark"))
405 first_col_width += len(UTEST_COL_NAME)
406 first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format(
407 "Benchmark", 12 + first_col_width
409 output_strs = [first_line, "-" * len(first_line)]
411 fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
412 for benchmark in json_diff_report:
413 # *If* we were asked to only include aggregates,
414 # and if it is non-aggregate, then don't print it.
415 if (
416 not include_aggregates_only
417 or "run_type" not in benchmark
418 or benchmark["run_type"] == "aggregate"
420 for measurement in benchmark["measurements"]:
421 output_strs += [
422 color_format(
423 use_color,
424 fmt_str,
425 BC_HEADER,
426 benchmark["name"],
427 first_col_width,
428 get_color(measurement["time"]),
429 measurement["time"],
430 get_color(measurement["cpu"]),
431 measurement["cpu"],
432 measurement["real_time"],
433 measurement["real_time_other"],
434 measurement["cpu_time"],
435 measurement["cpu_time_other"],
436 endc=BC_ENDC,
440 # After processing the measurements, if requested and
441 # if applicable (e.g. u-test exists for given benchmark),
442 # print the U test.
443 if utest and benchmark["utest"]:
444 output_strs += print_utest(
445 benchmark["name"],
446 benchmark["utest"],
447 utest_alpha=utest_alpha,
448 first_col_width=first_col_width,
449 use_color=use_color,
452 return output_strs
455 ###############################################################################
456 # Unit tests
459 class TestGetUniqueBenchmarkNames(unittest.TestCase):
460 def load_results(self):
461 import json
463 testInputs = os.path.join(
464 os.path.dirname(os.path.realpath(__file__)), "Inputs"
466 testOutput = os.path.join(testInputs, "test3_run0.json")
467 with open(testOutput, "r") as f:
468 json = json.load(f)
469 return json
471 def test_basic(self):
472 expect_lines = [
473 "BM_One",
474 "BM_Two",
475 "short", # These two are not sorted
476 "medium", # These two are not sorted
478 json = self.load_results()
479 output_lines = get_unique_benchmark_names(json)
480 print("\n")
481 print("\n".join(output_lines))
482 self.assertEqual(len(output_lines), len(expect_lines))
483 for i in range(0, len(output_lines)):
484 self.assertEqual(expect_lines[i], output_lines[i])
487 class TestReportDifference(unittest.TestCase):
488 @classmethod
489 def setUpClass(cls):
490 def load_results():
491 import json
493 testInputs = os.path.join(
494 os.path.dirname(os.path.realpath(__file__)), "Inputs"
496 testOutput1 = os.path.join(testInputs, "test1_run1.json")
497 testOutput2 = os.path.join(testInputs, "test1_run2.json")
498 with open(testOutput1, "r") as f:
499 json1 = json.load(f)
500 with open(testOutput2, "r") as f:
501 json2 = json.load(f)
502 return json1, json2
504 json1, json2 = load_results()
505 cls.json_diff_report = get_difference_report(json1, json2)
507 def test_json_diff_report_pretty_printing(self):
508 expect_lines = [
509 ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"],
510 ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"],
511 ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"],
513 "BM_1PercentFaster",
514 "-0.0100",
515 "-0.0100",
516 "100",
517 "99",
518 "100",
519 "99",
522 "BM_1PercentSlower",
523 "+0.0100",
524 "+0.0100",
525 "100",
526 "101",
527 "100",
528 "101",
531 "BM_10PercentFaster",
532 "-0.1000",
533 "-0.1000",
534 "100",
535 "90",
536 "100",
537 "90",
540 "BM_10PercentSlower",
541 "+0.1000",
542 "+0.1000",
543 "100",
544 "110",
545 "100",
546 "110",
549 "BM_100xSlower",
550 "+99.0000",
551 "+99.0000",
552 "100",
553 "10000",
554 "100",
555 "10000",
558 "BM_100xFaster",
559 "-0.9900",
560 "-0.9900",
561 "10000",
562 "100",
563 "10000",
564 "100",
567 "BM_10PercentCPUToTime",
568 "+0.1000",
569 "-0.1000",
570 "100",
571 "110",
572 "100",
573 "90",
575 ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"],
576 ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"],
577 ["BM_hasLabel", "+0.0000", "+0.0000", "1", "1", "1", "1"],
578 ["OVERALL_GEOMEAN", "-0.8113", "-0.7779", "0", "0", "0", "0"],
580 output_lines_with_header = print_difference_report(
581 self.json_diff_report, use_color=False
583 output_lines = output_lines_with_header[2:]
584 print("\n")
585 print("\n".join(output_lines_with_header))
586 self.assertEqual(len(output_lines), len(expect_lines))
587 for i in range(0, len(output_lines)):
588 parts = [x for x in output_lines[i].split(" ") if x]
589 self.assertEqual(len(parts), 7)
590 self.assertEqual(expect_lines[i], parts)
592 def test_json_diff_report_output(self):
593 expected_output = [
595 "name": "BM_SameTimes",
596 "label": "",
597 "measurements": [
599 "time": 0.0000,
600 "cpu": 0.0000,
601 "real_time": 10,
602 "real_time_other": 10,
603 "cpu_time": 10,
604 "cpu_time_other": 10,
607 "time_unit": "ns",
608 "utest": {},
611 "name": "BM_2xFaster",
612 "label": "",
613 "measurements": [
615 "time": -0.5000,
616 "cpu": -0.5000,
617 "real_time": 50,
618 "real_time_other": 25,
619 "cpu_time": 50,
620 "cpu_time_other": 25,
623 "time_unit": "ns",
624 "utest": {},
627 "name": "BM_2xSlower",
628 "label": "",
629 "measurements": [
631 "time": 1.0000,
632 "cpu": 1.0000,
633 "real_time": 50,
634 "real_time_other": 100,
635 "cpu_time": 50,
636 "cpu_time_other": 100,
639 "time_unit": "ns",
640 "utest": {},
643 "name": "BM_1PercentFaster",
644 "label": "",
645 "measurements": [
647 "time": -0.0100,
648 "cpu": -0.0100,
649 "real_time": 100,
650 "real_time_other": 98.9999999,
651 "cpu_time": 100,
652 "cpu_time_other": 98.9999999,
655 "time_unit": "ns",
656 "utest": {},
659 "name": "BM_1PercentSlower",
660 "label": "",
661 "measurements": [
663 "time": 0.0100,
664 "cpu": 0.0100,
665 "real_time": 100,
666 "real_time_other": 101,
667 "cpu_time": 100,
668 "cpu_time_other": 101,
671 "time_unit": "ns",
672 "utest": {},
675 "name": "BM_10PercentFaster",
676 "label": "",
677 "measurements": [
679 "time": -0.1000,
680 "cpu": -0.1000,
681 "real_time": 100,
682 "real_time_other": 90,
683 "cpu_time": 100,
684 "cpu_time_other": 90,
687 "time_unit": "ns",
688 "utest": {},
691 "name": "BM_10PercentSlower",
692 "label": "",
693 "measurements": [
695 "time": 0.1000,
696 "cpu": 0.1000,
697 "real_time": 100,
698 "real_time_other": 110,
699 "cpu_time": 100,
700 "cpu_time_other": 110,
703 "time_unit": "ns",
704 "utest": {},
707 "name": "BM_100xSlower",
708 "label": "",
709 "measurements": [
711 "time": 99.0000,
712 "cpu": 99.0000,
713 "real_time": 100,
714 "real_time_other": 10000,
715 "cpu_time": 100,
716 "cpu_time_other": 10000,
719 "time_unit": "ns",
720 "utest": {},
723 "name": "BM_100xFaster",
724 "label": "",
725 "measurements": [
727 "time": -0.9900,
728 "cpu": -0.9900,
729 "real_time": 10000,
730 "real_time_other": 100,
731 "cpu_time": 10000,
732 "cpu_time_other": 100,
735 "time_unit": "ns",
736 "utest": {},
739 "name": "BM_10PercentCPUToTime",
740 "label": "",
741 "measurements": [
743 "time": 0.1000,
744 "cpu": -0.1000,
745 "real_time": 100,
746 "real_time_other": 110,
747 "cpu_time": 100,
748 "cpu_time_other": 90,
751 "time_unit": "ns",
752 "utest": {},
755 "name": "BM_ThirdFaster",
756 "label": "",
757 "measurements": [
759 "time": -0.3333,
760 "cpu": -0.3334,
761 "real_time": 100,
762 "real_time_other": 67,
763 "cpu_time": 100,
764 "cpu_time_other": 67,
767 "time_unit": "ns",
768 "utest": {},
771 "name": "BM_NotBadTimeUnit",
772 "label": "",
773 "measurements": [
775 "time": -0.9000,
776 "cpu": 0.2000,
777 "real_time": 0.4,
778 "real_time_other": 0.04,
779 "cpu_time": 0.5,
780 "cpu_time_other": 0.6,
783 "time_unit": "s",
784 "utest": {},
787 "name": "BM_hasLabel",
788 "label": "a label",
789 "measurements": [
791 "time": 0.0000,
792 "cpu": 0.0000,
793 "real_time": 1,
794 "real_time_other": 1,
795 "cpu_time": 1,
796 "cpu_time_other": 1,
799 "time_unit": "s",
800 "utest": {},
803 "name": "OVERALL_GEOMEAN",
804 "label": "",
805 "measurements": [
807 "real_time": 3.1622776601683826e-06,
808 "cpu_time": 3.2130844755623912e-06,
809 "real_time_other": 1.9768988699420897e-07,
810 "cpu_time_other": 2.397447755209533e-07,
811 "time": -0.8112976497120911,
812 "cpu": -0.7778551721181174,
815 "time_unit": "s",
816 "run_type": "aggregate",
817 "aggregate_name": "geomean",
818 "utest": {},
821 self.assertEqual(len(self.json_diff_report), len(expected_output))
822 for out, expected in zip(self.json_diff_report, expected_output):
823 self.assertEqual(out["name"], expected["name"])
824 self.assertEqual(out["label"], expected["label"])
825 self.assertEqual(out["time_unit"], expected["time_unit"])
826 assert_utest(self, out, expected)
827 assert_measurements(self, out, expected)
830 class TestReportDifferenceBetweenFamilies(unittest.TestCase):
831 @classmethod
832 def setUpClass(cls):
833 def load_result():
834 import json
836 testInputs = os.path.join(
837 os.path.dirname(os.path.realpath(__file__)), "Inputs"
839 testOutput = os.path.join(testInputs, "test2_run.json")
840 with open(testOutput, "r") as f:
841 json = json.load(f)
842 return json
844 json = load_result()
845 json1 = filter_benchmark(json, "BM_Z.ro", ".")
846 json2 = filter_benchmark(json, "BM_O.e", ".")
847 cls.json_diff_report = get_difference_report(json1, json2)
849 def test_json_diff_report_pretty_printing(self):
850 expect_lines = [
851 [".", "-0.5000", "-0.5000", "10", "5", "10", "5"],
852 ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"],
853 ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"],
854 ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"],
855 ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"],
857 output_lines_with_header = print_difference_report(
858 self.json_diff_report, use_color=False
860 output_lines = output_lines_with_header[2:]
861 print("\n")
862 print("\n".join(output_lines_with_header))
863 self.assertEqual(len(output_lines), len(expect_lines))
864 for i in range(0, len(output_lines)):
865 parts = [x for x in output_lines[i].split(" ") if x]
866 self.assertEqual(len(parts), 7)
867 self.assertEqual(expect_lines[i], parts)
869 def test_json_diff_report(self):
870 expected_output = [
872 "name": ".",
873 "measurements": [
875 "time": -0.5,
876 "cpu": -0.5,
877 "real_time": 10,
878 "real_time_other": 5,
879 "cpu_time": 10,
880 "cpu_time_other": 5,
883 "time_unit": "ns",
884 "utest": {},
887 "name": "./4",
888 "measurements": [
890 "time": -0.5,
891 "cpu": -0.5,
892 "real_time": 40,
893 "real_time_other": 20,
894 "cpu_time": 40,
895 "cpu_time_other": 20,
898 "time_unit": "ns",
899 "utest": {},
902 "name": "Prefix/.",
903 "measurements": [
905 "time": -0.5,
906 "cpu": -0.5,
907 "real_time": 20,
908 "real_time_other": 10,
909 "cpu_time": 20,
910 "cpu_time_other": 10,
913 "time_unit": "ns",
914 "utest": {},
917 "name": "Prefix/./3",
918 "measurements": [
920 "time": -0.5,
921 "cpu": -0.5,
922 "real_time": 30,
923 "real_time_other": 15,
924 "cpu_time": 30,
925 "cpu_time_other": 15,
928 "time_unit": "ns",
929 "utest": {},
932 "name": "OVERALL_GEOMEAN",
933 "measurements": [
935 "real_time": 2.213363839400641e-08,
936 "cpu_time": 2.213363839400641e-08,
937 "real_time_other": 1.1066819197003185e-08,
938 "cpu_time_other": 1.1066819197003185e-08,
939 "time": -0.5000000000000009,
940 "cpu": -0.5000000000000009,
943 "time_unit": "s",
944 "run_type": "aggregate",
945 "aggregate_name": "geomean",
946 "utest": {},
949 self.assertEqual(len(self.json_diff_report), len(expected_output))
950 for out, expected in zip(self.json_diff_report, expected_output):
951 self.assertEqual(out["name"], expected["name"])
952 self.assertEqual(out["time_unit"], expected["time_unit"])
953 assert_utest(self, out, expected)
954 assert_measurements(self, out, expected)
957 class TestReportDifferenceWithUTest(unittest.TestCase):
958 @classmethod
959 def setUpClass(cls):
960 def load_results():
961 import json
963 testInputs = os.path.join(
964 os.path.dirname(os.path.realpath(__file__)), "Inputs"
966 testOutput1 = os.path.join(testInputs, "test3_run0.json")
967 testOutput2 = os.path.join(testInputs, "test3_run1.json")
968 with open(testOutput1, "r") as f:
969 json1 = json.load(f)
970 with open(testOutput2, "r") as f:
971 json2 = json.load(f)
972 return json1, json2
974 json1, json2 = load_results()
975 cls.json_diff_report = get_difference_report(json1, json2, utest=True)
977 def test_json_diff_report_pretty_printing(self):
978 expect_lines = [
979 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
980 ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
981 ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
983 "BM_Two_pvalue",
984 "1.0000",
985 "0.6667",
986 "U",
987 "Test,",
988 "Repetitions:",
989 "2",
990 "vs",
991 "2.",
992 "WARNING:",
993 "Results",
994 "unreliable!",
995 "9+",
996 "repetitions",
997 "recommended.",
999 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
1000 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
1002 "short_pvalue",
1003 "0.7671",
1004 "0.2000",
1005 "U",
1006 "Test,",
1007 "Repetitions:",
1008 "2",
1009 "vs",
1010 "3.",
1011 "WARNING:",
1012 "Results",
1013 "unreliable!",
1014 "9+",
1015 "repetitions",
1016 "recommended.",
1018 ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
1019 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
1021 output_lines_with_header = print_difference_report(
1022 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1024 output_lines = output_lines_with_header[2:]
1025 print("\n")
1026 print("\n".join(output_lines_with_header))
1027 self.assertEqual(len(output_lines), len(expect_lines))
1028 for i in range(0, len(output_lines)):
1029 parts = [x for x in output_lines[i].split(" ") if x]
1030 self.assertEqual(expect_lines[i], parts)
1032 def test_json_diff_report_pretty_printing_aggregates_only(self):
1033 expect_lines = [
1034 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
1036 "BM_Two_pvalue",
1037 "1.0000",
1038 "0.6667",
1039 "U",
1040 "Test,",
1041 "Repetitions:",
1042 "2",
1043 "vs",
1044 "2.",
1045 "WARNING:",
1046 "Results",
1047 "unreliable!",
1048 "9+",
1049 "repetitions",
1050 "recommended.",
1052 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
1053 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
1055 "short_pvalue",
1056 "0.7671",
1057 "0.2000",
1058 "U",
1059 "Test,",
1060 "Repetitions:",
1061 "2",
1062 "vs",
1063 "3.",
1064 "WARNING:",
1065 "Results",
1066 "unreliable!",
1067 "9+",
1068 "repetitions",
1069 "recommended.",
1071 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
1073 output_lines_with_header = print_difference_report(
1074 self.json_diff_report,
1075 include_aggregates_only=True,
1076 utest=True,
1077 utest_alpha=0.05,
1078 use_color=False,
1080 output_lines = output_lines_with_header[2:]
1081 print("\n")
1082 print("\n".join(output_lines_with_header))
1083 self.assertEqual(len(output_lines), len(expect_lines))
1084 for i in range(0, len(output_lines)):
1085 parts = [x for x in output_lines[i].split(" ") if x]
1086 self.assertEqual(expect_lines[i], parts)
1088 def test_json_diff_report(self):
1089 expected_output = [
1091 "name": "BM_One",
1092 "measurements": [
1094 "time": -0.1,
1095 "cpu": 0.1,
1096 "real_time": 10,
1097 "real_time_other": 9,
1098 "cpu_time": 100,
1099 "cpu_time_other": 110,
1102 "time_unit": "ns",
1103 "utest": {},
1106 "name": "BM_Two",
1107 "measurements": [
1109 "time": 0.1111111111111111,
1110 "cpu": -0.011111111111111112,
1111 "real_time": 9,
1112 "real_time_other": 10,
1113 "cpu_time": 90,
1114 "cpu_time_other": 89,
1117 "time": -0.125,
1118 "cpu": -0.16279069767441862,
1119 "real_time": 8,
1120 "real_time_other": 7,
1121 "cpu_time": 86,
1122 "cpu_time_other": 72,
1125 "time_unit": "ns",
1126 "utest": {
1127 "have_optimal_repetitions": False,
1128 "cpu_pvalue": 0.6666666666666666,
1129 "time_pvalue": 1.0,
1133 "name": "short",
1134 "measurements": [
1136 "time": -0.125,
1137 "cpu": -0.0625,
1138 "real_time": 8,
1139 "real_time_other": 7,
1140 "cpu_time": 80,
1141 "cpu_time_other": 75,
1144 "time": -0.4325,
1145 "cpu": -0.13506493506493514,
1146 "real_time": 8,
1147 "real_time_other": 4.54,
1148 "cpu_time": 77,
1149 "cpu_time_other": 66.6,
1152 "time_unit": "ns",
1153 "utest": {
1154 "have_optimal_repetitions": False,
1155 "cpu_pvalue": 0.2,
1156 "time_pvalue": 0.7670968684102772,
1160 "name": "medium",
1161 "measurements": [
1163 "time": -0.375,
1164 "cpu": -0.3375,
1165 "real_time": 8,
1166 "real_time_other": 5,
1167 "cpu_time": 80,
1168 "cpu_time_other": 53,
1171 "time_unit": "ns",
1172 "utest": {},
1175 "name": "OVERALL_GEOMEAN",
1176 "measurements": [
1178 "real_time": 8.48528137423858e-09,
1179 "cpu_time": 8.441336246629233e-08,
1180 "real_time_other": 2.2405267593145244e-08,
1181 "cpu_time_other": 2.5453661413660466e-08,
1182 "time": 1.6404861082353634,
1183 "cpu": -0.6984640740519662,
1186 "time_unit": "s",
1187 "run_type": "aggregate",
1188 "aggregate_name": "geomean",
1189 "utest": {},
1192 self.assertEqual(len(self.json_diff_report), len(expected_output))
1193 for out, expected in zip(self.json_diff_report, expected_output):
1194 self.assertEqual(out["name"], expected["name"])
1195 self.assertEqual(out["time_unit"], expected["time_unit"])
1196 assert_utest(self, out, expected)
1197 assert_measurements(self, out, expected)
1200 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
1201 unittest.TestCase
1203 @classmethod
1204 def setUpClass(cls):
1205 def load_results():
1206 import json
1208 testInputs = os.path.join(
1209 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1211 testOutput1 = os.path.join(testInputs, "test3_run0.json")
1212 testOutput2 = os.path.join(testInputs, "test3_run1.json")
1213 with open(testOutput1, "r") as f:
1214 json1 = json.load(f)
1215 with open(testOutput2, "r") as f:
1216 json2 = json.load(f)
1217 return json1, json2
1219 json1, json2 = load_results()
1220 cls.json_diff_report = get_difference_report(json1, json2, utest=True)
1222 def test_json_diff_report_pretty_printing(self):
1223 expect_lines = [
1224 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
1225 ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
1226 ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
1228 "BM_Two_pvalue",
1229 "1.0000",
1230 "0.6667",
1231 "U",
1232 "Test,",
1233 "Repetitions:",
1234 "2",
1235 "vs",
1236 "2.",
1237 "WARNING:",
1238 "Results",
1239 "unreliable!",
1240 "9+",
1241 "repetitions",
1242 "recommended.",
1244 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
1245 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
1247 "short_pvalue",
1248 "0.7671",
1249 "0.2000",
1250 "U",
1251 "Test,",
1252 "Repetitions:",
1253 "2",
1254 "vs",
1255 "3.",
1256 "WARNING:",
1257 "Results",
1258 "unreliable!",
1259 "9+",
1260 "repetitions",
1261 "recommended.",
1263 ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
1264 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
1266 output_lines_with_header = print_difference_report(
1267 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1269 output_lines = output_lines_with_header[2:]
1270 print("\n")
1271 print("\n".join(output_lines_with_header))
1272 self.assertEqual(len(output_lines), len(expect_lines))
1273 for i in range(0, len(output_lines)):
1274 parts = [x for x in output_lines[i].split(" ") if x]
1275 self.assertEqual(expect_lines[i], parts)
1277 def test_json_diff_report(self):
1278 expected_output = [
1280 "name": "BM_One",
1281 "measurements": [
1283 "time": -0.1,
1284 "cpu": 0.1,
1285 "real_time": 10,
1286 "real_time_other": 9,
1287 "cpu_time": 100,
1288 "cpu_time_other": 110,
1291 "time_unit": "ns",
1292 "utest": {},
1295 "name": "BM_Two",
1296 "measurements": [
1298 "time": 0.1111111111111111,
1299 "cpu": -0.011111111111111112,
1300 "real_time": 9,
1301 "real_time_other": 10,
1302 "cpu_time": 90,
1303 "cpu_time_other": 89,
1306 "time": -0.125,
1307 "cpu": -0.16279069767441862,
1308 "real_time": 8,
1309 "real_time_other": 7,
1310 "cpu_time": 86,
1311 "cpu_time_other": 72,
1314 "time_unit": "ns",
1315 "utest": {
1316 "have_optimal_repetitions": False,
1317 "cpu_pvalue": 0.6666666666666666,
1318 "time_pvalue": 1.0,
1322 "name": "short",
1323 "measurements": [
1325 "time": -0.125,
1326 "cpu": -0.0625,
1327 "real_time": 8,
1328 "real_time_other": 7,
1329 "cpu_time": 80,
1330 "cpu_time_other": 75,
1333 "time": -0.4325,
1334 "cpu": -0.13506493506493514,
1335 "real_time": 8,
1336 "real_time_other": 4.54,
1337 "cpu_time": 77,
1338 "cpu_time_other": 66.6,
1341 "time_unit": "ns",
1342 "utest": {
1343 "have_optimal_repetitions": False,
1344 "cpu_pvalue": 0.2,
1345 "time_pvalue": 0.7670968684102772,
1349 "name": "medium",
1350 "measurements": [
1352 "real_time_other": 5,
1353 "cpu_time": 80,
1354 "time": -0.375,
1355 "real_time": 8,
1356 "cpu_time_other": 53,
1357 "cpu": -0.3375,
1360 "utest": {},
1361 "time_unit": "ns",
1362 "aggregate_name": "",
1365 "name": "OVERALL_GEOMEAN",
1366 "measurements": [
1368 "real_time": 8.48528137423858e-09,
1369 "cpu_time": 8.441336246629233e-08,
1370 "real_time_other": 2.2405267593145244e-08,
1371 "cpu_time_other": 2.5453661413660466e-08,
1372 "time": 1.6404861082353634,
1373 "cpu": -0.6984640740519662,
1376 "time_unit": "s",
1377 "run_type": "aggregate",
1378 "aggregate_name": "geomean",
1379 "utest": {},
1382 self.assertEqual(len(self.json_diff_report), len(expected_output))
1383 for out, expected in zip(self.json_diff_report, expected_output):
1384 self.assertEqual(out["name"], expected["name"])
1385 self.assertEqual(out["time_unit"], expected["time_unit"])
1386 assert_utest(self, out, expected)
1387 assert_measurements(self, out, expected)
1390 class TestReportDifferenceForPercentageAggregates(unittest.TestCase):
1391 @classmethod
1392 def setUpClass(cls):
1393 def load_results():
1394 import json
1396 testInputs = os.path.join(
1397 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1399 testOutput1 = os.path.join(testInputs, "test4_run0.json")
1400 testOutput2 = os.path.join(testInputs, "test4_run1.json")
1401 with open(testOutput1, "r") as f:
1402 json1 = json.load(f)
1403 with open(testOutput2, "r") as f:
1404 json2 = json.load(f)
1405 return json1, json2
1407 json1, json2 = load_results()
1408 cls.json_diff_report = get_difference_report(json1, json2, utest=True)
1410 def test_json_diff_report_pretty_printing(self):
1411 expect_lines = [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]]
1412 output_lines_with_header = print_difference_report(
1413 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1415 output_lines = output_lines_with_header[2:]
1416 print("\n")
1417 print("\n".join(output_lines_with_header))
1418 self.assertEqual(len(output_lines), len(expect_lines))
1419 for i in range(0, len(output_lines)):
1420 parts = [x for x in output_lines[i].split(" ") if x]
1421 self.assertEqual(expect_lines[i], parts)
1423 def test_json_diff_report(self):
1424 expected_output = [
1426 "name": "whocares",
1427 "measurements": [
1429 "time": -0.5,
1430 "cpu": 0.5,
1431 "real_time": 0.01,
1432 "real_time_other": 0.005,
1433 "cpu_time": 0.10,
1434 "cpu_time_other": 0.15,
1437 "time_unit": "ns",
1438 "utest": {},
1441 self.assertEqual(len(self.json_diff_report), len(expected_output))
1442 for out, expected in zip(self.json_diff_report, expected_output):
1443 self.assertEqual(out["name"], expected["name"])
1444 self.assertEqual(out["time_unit"], expected["time_unit"])
1445 assert_utest(self, out, expected)
1446 assert_measurements(self, out, expected)
1449 class TestReportSorting(unittest.TestCase):
1450 @classmethod
1451 def setUpClass(cls):
1452 def load_result():
1453 import json
1455 testInputs = os.path.join(
1456 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1458 testOutput = os.path.join(testInputs, "test4_run.json")
1459 with open(testOutput, "r") as f:
1460 json = json.load(f)
1461 return json
1463 cls.json = load_result()
1465 def test_json_diff_report_pretty_printing(self):
1466 import util
1468 expected_names = [
1469 "99 family 0 instance 0 repetition 0",
1470 "98 family 0 instance 0 repetition 1",
1471 "97 family 0 instance 0 aggregate",
1472 "96 family 0 instance 1 repetition 0",
1473 "95 family 0 instance 1 repetition 1",
1474 "94 family 0 instance 1 aggregate",
1475 "93 family 1 instance 0 repetition 0",
1476 "92 family 1 instance 0 repetition 1",
1477 "91 family 1 instance 0 aggregate",
1478 "90 family 1 instance 1 repetition 0",
1479 "89 family 1 instance 1 repetition 1",
1480 "88 family 1 instance 1 aggregate",
1483 for n in range(len(self.json["benchmarks"]) ** 2):
1484 random.shuffle(self.json["benchmarks"])
1485 sorted_benchmarks = util.sort_benchmark_results(self.json)[
1486 "benchmarks"
1488 self.assertEqual(len(expected_names), len(sorted_benchmarks))
1489 for out, expected in zip(sorted_benchmarks, expected_names):
1490 self.assertEqual(out["name"], expected)
1493 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly2(
1494 unittest.TestCase
1496 @classmethod
1497 def setUpClass(cls):
1498 def load_results():
1499 import json
1501 testInputs = os.path.join(
1502 os.path.dirname(os.path.realpath(__file__)), "Inputs"
1504 testOutput1 = os.path.join(testInputs, "test5_run0.json")
1505 testOutput2 = os.path.join(testInputs, "test5_run1.json")
1506 with open(testOutput1, "r") as f:
1507 json1 = json.load(f)
1508 json1["benchmarks"] = [
1509 json1["benchmarks"][0] for i in range(1000)
1511 with open(testOutput2, "r") as f:
1512 json2 = json.load(f)
1513 json2["benchmarks"] = [
1514 json2["benchmarks"][0] for i in range(1000)
1516 return json1, json2
1518 json1, json2 = load_results()
1519 cls.json_diff_report = get_difference_report(json1, json2, utest=True)
1521 def test_json_diff_report_pretty_printing(self):
1522 expect_line = [
1523 "BM_ManyRepetitions_pvalue",
1524 "0.0000",
1525 "0.0000",
1526 "U",
1527 "Test,",
1528 "Repetitions:",
1529 "1000",
1530 "vs",
1531 "1000",
1533 output_lines_with_header = print_difference_report(
1534 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
1536 output_lines = output_lines_with_header[2:]
1537 found = False
1538 for i in range(0, len(output_lines)):
1539 parts = [x for x in output_lines[i].split(" ") if x]
1540 found = expect_line == parts
1541 if found:
1542 break
1543 self.assertTrue(found)
1545 def test_json_diff_report(self):
1546 expected_output = [
1548 "name": "BM_ManyRepetitions",
1549 "label": "",
1550 "time_unit": "s",
1551 "run_type": "",
1552 "aggregate_name": "",
1553 "utest": {
1554 "have_optimal_repetitions": True,
1555 "cpu_pvalue": 0.0,
1556 "time_pvalue": 0.0,
1557 "nr_of_repetitions": 1000,
1558 "nr_of_repetitions_other": 1000,
1562 "name": "OVERALL_GEOMEAN",
1563 "label": "",
1564 "measurements": [
1566 "real_time": 1.0,
1567 "cpu_time": 1000.000000000069,
1568 "real_time_other": 1000.000000000069,
1569 "cpu_time_other": 1.0,
1570 "time": 999.000000000069,
1571 "cpu": -0.9990000000000001,
1574 "time_unit": "s",
1575 "run_type": "aggregate",
1576 "aggregate_name": "geomean",
1577 "utest": {},
1580 self.assertEqual(len(self.json_diff_report), len(expected_output))
1581 for out, expected in zip(self.json_diff_report, expected_output):
1582 self.assertEqual(out["name"], expected["name"])
1583 self.assertEqual(out["time_unit"], expected["time_unit"])
1584 assert_utest(self, out, expected)
1587 def assert_utest(unittest_instance, lhs, rhs):
1588 if lhs["utest"]:
1589 unittest_instance.assertAlmostEqual(
1590 lhs["utest"]["cpu_pvalue"], rhs["utest"]["cpu_pvalue"]
1592 unittest_instance.assertAlmostEqual(
1593 lhs["utest"]["time_pvalue"], rhs["utest"]["time_pvalue"]
1595 unittest_instance.assertEqual(
1596 lhs["utest"]["have_optimal_repetitions"],
1597 rhs["utest"]["have_optimal_repetitions"],
1599 else:
1600 # lhs is empty. assert if rhs is not.
1601 unittest_instance.assertEqual(lhs["utest"], rhs["utest"])
1604 def assert_measurements(unittest_instance, lhs, rhs):
1605 for m1, m2 in zip(lhs["measurements"], rhs["measurements"]):
1606 unittest_instance.assertEqual(m1["real_time"], m2["real_time"])
1607 unittest_instance.assertEqual(m1["cpu_time"], m2["cpu_time"])
1608 # m1['time'] and m1['cpu'] hold values which are being calculated,
1609 # and therefore we must use almost-equal pattern.
1610 unittest_instance.assertAlmostEqual(m1["time"], m2["time"], places=4)
1611 unittest_instance.assertAlmostEqual(m1["cpu"], m2["cpu"], places=4)
1614 if __name__ == "__main__":
1615 unittest.main()
1617 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
1618 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
1619 # kate: indent-mode python; remove-trailing-spaces modified;