1 """report.py - Utilities for reporting statistics about benchmark results
10 from scipy
.stats
import mannwhitneyu
, gmean
11 from numpy
import array
12 from pandas
import Timedelta
15 class BenchmarkColor(object):
16 def __init__(self
, name
, code
):
21 return "%s%r" % (self
.__class
__.__name
__, (self
.name
, self
.code
))
23 def __format__(self
, format
):
27 # Benchmark Colors Enumeration
28 BC_NONE
= BenchmarkColor("NONE", "")
29 BC_MAGENTA
= BenchmarkColor("MAGENTA", "\033[95m")
30 BC_CYAN
= BenchmarkColor("CYAN", "\033[96m")
31 BC_OKBLUE
= BenchmarkColor("OKBLUE", "\033[94m")
32 BC_OKGREEN
= BenchmarkColor("OKGREEN", "\033[32m")
33 BC_HEADER
= BenchmarkColor("HEADER", "\033[92m")
34 BC_WARNING
= BenchmarkColor("WARNING", "\033[93m")
35 BC_WHITE
= BenchmarkColor("WHITE", "\033[97m")
36 BC_FAIL
= BenchmarkColor("FAIL", "\033[91m")
37 BC_ENDC
= BenchmarkColor("ENDC", "\033[0m")
38 BC_BOLD
= BenchmarkColor("BOLD", "\033[1m")
39 BC_UNDERLINE
= BenchmarkColor("UNDERLINE", "\033[4m")
41 UTEST_MIN_REPETITIONS
= 2
42 UTEST_OPTIMAL_REPETITIONS
= 9 # Lowest reasonable number, More is better.
43 UTEST_COL_NAME
= "_pvalue"
46 def color_format(use_color
, fmt_str
, *args
, **kwargs
):
48 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
49 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
50 is False then all color codes in 'args' and 'kwargs' are replaced with
53 assert use_color
is True or use_color
is False
55 args
= [arg
if not isinstance(arg
, BenchmarkColor
) else BC_NONE
for arg
in args
]
57 key
: arg
if not isinstance(arg
, BenchmarkColor
) else BC_NONE
58 for key
, arg
in kwargs
.items()
60 return fmt_str
.format(*args
, **kwargs
)
63 def find_longest_name(benchmark_list
):
65 Return the length of the longest benchmark name in a given list of
66 benchmark JSON objects
69 for bc
in benchmark_list
:
70 if len(bc
["name"]) > longest_name
:
71 longest_name
= len(bc
["name"])
75 def calculate_change(old_val
, new_val
):
77 Return a float representing the decimal change between old_val and new_val.
79 if old_val
== 0 and new_val
== 0:
82 return float(new_val
- old_val
) / (float(old_val
+ new_val
) / 2)
83 return float(new_val
- old_val
) / abs(old_val
)
86 def filter_benchmark(json_orig
, family
, replacement
=""):
88 Apply a filter to the json, and only leave the 'family' of benchmarks.
90 regex
= re
.compile(family
)
92 filtered
["benchmarks"] = []
93 for be
in json_orig
["benchmarks"]:
94 if not regex
.search(be
["name"]):
96 filteredbench
= copy
.deepcopy(be
) # Do NOT modify the old name!
97 filteredbench
["name"] = regex
.sub(replacement
, filteredbench
["name"])
98 filtered
["benchmarks"].append(filteredbench
)
102 def get_unique_benchmark_names(json
):
104 While *keeping* the order, give all the unique 'names' used for benchmarks.
109 for x
in json
["benchmarks"]
110 if x
["name"] not in seen
and (seen
.add(x
["name"]) or True)
115 def intersect(list1
, list2
):
117 Given two lists, get a new list consisting of the elements only contained
118 in *both of the input lists*, while preserving the ordering.
120 return [x
for x
in list1
if x
in list2
]
123 def is_potentially_comparable_benchmark(x
):
124 return "time_unit" in x
and "real_time" in x
and "cpu_time" in x
127 def partition_benchmarks(json1
, json2
):
129 While preserving the ordering, find benchmarks with the same names in
130 both of the inputs, and group them.
131 (i.e. partition/filter into groups with common name)
133 json1_unique_names
= get_unique_benchmark_names(json1
)
134 json2_unique_names
= get_unique_benchmark_names(json2
)
135 names
= intersect(json1_unique_names
, json2_unique_names
)
139 # Pick the time unit from the first entry of the lhs benchmark.
140 # We should be careful not to crash with unexpected input.
141 for x
in json1
["benchmarks"]:
142 if x
["name"] == name
and is_potentially_comparable_benchmark(x
):
143 time_unit
= x
["time_unit"]
145 if time_unit
is None:
147 # Filter by name and time unit.
148 # All the repetitions are assumed to be comparable.
151 for x
in json1
["benchmarks"]
152 if x
["name"] == name
and x
["time_unit"] == time_unit
156 for x
in json2
["benchmarks"]
157 if x
["name"] == name
and x
["time_unit"] == time_unit
159 partitions
.append([lhs
, rhs
])
163 def get_timedelta_field_as_seconds(benchmark
, field_name
):
165 Get value of field_name field of benchmark, which is time with time unit
166 time_unit, as time in seconds.
168 time_unit
= benchmark
["time_unit"] if "time_unit" in benchmark
else "s"
169 dt
= Timedelta(benchmark
[field_name
], time_unit
)
170 return dt
/ Timedelta(1, "s")
173 def calculate_geomean(json
):
175 Extract all real/cpu times from all the benchmarks as seconds,
176 and calculate their geomean.
179 for benchmark
in json
["benchmarks"]:
180 if "run_type" in benchmark
and benchmark
["run_type"] == "aggregate":
184 get_timedelta_field_as_seconds(benchmark
, "real_time"),
185 get_timedelta_field_as_seconds(benchmark
, "cpu_time"),
188 return gmean(times
) if times
else array([])
191 def extract_field(partition
, field_name
):
192 # The count of elements may be different. We want *all* of them.
193 lhs
= [x
[field_name
] for x
in partition
[0]]
194 rhs
= [x
[field_name
] for x
in partition
[1]]
198 def calc_utest(timings_cpu
, timings_time
):
200 len(timings_time
[0]),
201 len(timings_time
[1]),
206 # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
207 if min_rep_cnt
< UTEST_MIN_REPETITIONS
:
208 return False, None, None
210 time_pvalue
= mannwhitneyu(
211 timings_time
[0], timings_time
[1], alternative
="two-sided"
213 cpu_pvalue
= mannwhitneyu(
214 timings_cpu
[0], timings_cpu
[1], alternative
="two-sided"
217 return (min_rep_cnt
>= UTEST_OPTIMAL_REPETITIONS
), cpu_pvalue
, time_pvalue
220 def print_utest(bc_name
, utest
, utest_alpha
, first_col_width
, use_color
=True):
221 def get_utest_color(pval
):
222 return BC_FAIL
if pval
>= utest_alpha
else BC_OKGREEN
224 # Check if we failed miserably with minimum required repetitions for utest
226 not utest
["have_optimal_repetitions"]
227 and utest
["cpu_pvalue"] is None
228 and utest
["time_pvalue"] is None
232 dsc
= "U Test, Repetitions: {} vs {}".format(
233 utest
["nr_of_repetitions"], utest
["nr_of_repetitions_other"]
235 dsc_color
= BC_OKGREEN
237 # We still got some results to show but issue a warning about it.
238 if not utest
["have_optimal_repetitions"]:
239 dsc_color
= BC_WARNING
240 dsc
+= ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
241 UTEST_OPTIMAL_REPETITIONS
244 special_str
= "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}"
251 "{}{}".format(bc_name
, UTEST_COL_NAME
),
253 get_utest_color(utest
["time_pvalue"]),
254 utest
["time_pvalue"],
255 get_utest_color(utest
["cpu_pvalue"]),
264 def get_difference_report(json1
, json2
, utest
=False):
266 Calculate and report the difference between each test of two benchmarks
267 runs specified as 'json1' and 'json2'. Output is another json containing
268 relevant details for each test run.
270 assert utest
is True or utest
is False
273 partitions
= partition_benchmarks(json1
, json2
)
274 for partition
in partitions
:
275 benchmark_name
= partition
[0][0]["name"]
276 time_unit
= partition
[0][0]["time_unit"]
279 # Careful, we may have different repetition count.
280 for i
in range(min(len(partition
[0]), len(partition
[1]))):
282 other_bench
= partition
[1][i
]
285 "real_time": bn
["real_time"],
286 "cpu_time": bn
["cpu_time"],
287 "real_time_other": other_bench
["real_time"],
288 "cpu_time_other": other_bench
["cpu_time"],
289 "time": calculate_change(bn
["real_time"], other_bench
["real_time"]),
290 "cpu": calculate_change(bn
["cpu_time"], other_bench
["cpu_time"]),
294 # After processing the whole partition, if requested, do the U test.
296 timings_cpu
= extract_field(partition
, "cpu_time")
297 timings_time
= extract_field(partition
, "real_time")
298 have_optimal_repetitions
, cpu_pvalue
, time_pvalue
= calc_utest(
299 timings_cpu
, timings_time
301 if cpu_pvalue
and time_pvalue
:
303 "have_optimal_repetitions": have_optimal_repetitions
,
304 "cpu_pvalue": cpu_pvalue
,
305 "time_pvalue": time_pvalue
,
306 "nr_of_repetitions": len(timings_cpu
[0]),
307 "nr_of_repetitions_other": len(timings_cpu
[1]),
310 # Store only if we had any measurements for given benchmark.
311 # E.g. partition_benchmarks will filter out the benchmarks having
312 # time units which are not compatible with other time units in the
316 partition
[0][0]["run_type"] if "run_type" in partition
[0][0] else ""
319 partition
[0][0]["aggregate_name"]
320 if run_type
== "aggregate" and "aggregate_name" in partition
[0][0]
325 "name": benchmark_name
,
326 "measurements": measurements
,
327 "time_unit": time_unit
,
328 "run_type": run_type
,
329 "aggregate_name": aggregate_name
,
330 "utest": utest_results
,
334 lhs_gmean
= calculate_geomean(json1
)
335 rhs_gmean
= calculate_geomean(json2
)
336 if lhs_gmean
.any() and rhs_gmean
.any():
339 "name": "OVERALL_GEOMEAN",
342 "real_time": lhs_gmean
[0],
343 "cpu_time": lhs_gmean
[1],
344 "real_time_other": rhs_gmean
[0],
345 "cpu_time_other": rhs_gmean
[1],
346 "time": calculate_change(lhs_gmean
[0], rhs_gmean
[0]),
347 "cpu": calculate_change(lhs_gmean
[1], rhs_gmean
[1]),
351 "run_type": "aggregate",
352 "aggregate_name": "geomean",
360 def print_difference_report(
362 include_aggregates_only
=False,
368 Calculate and report the difference between each test of two benchmarks
369 runs specified as 'json1' and 'json2'.
371 assert utest
is True or utest
is False
381 first_col_width
= find_longest_name(json_diff_report
)
382 first_col_width
= max(first_col_width
, len("Benchmark"))
383 first_col_width
+= len(UTEST_COL_NAME
)
384 first_line
= "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format(
385 "Benchmark", 12 + first_col_width
387 output_strs
= [first_line
, "-" * len(first_line
)]
389 fmt_str
= "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
390 for benchmark
in json_diff_report
:
391 # *If* we were asked to only include aggregates,
392 # and if it is non-aggregate, then don't print it.
394 not include_aggregates_only
395 or not "run_type" in benchmark
396 or benchmark
["run_type"] == "aggregate"
398 for measurement
in benchmark
["measurements"]:
406 get_color(measurement
["time"]),
408 get_color(measurement
["cpu"]),
410 measurement
["real_time"],
411 measurement
["real_time_other"],
412 measurement
["cpu_time"],
413 measurement
["cpu_time_other"],
418 # After processing the measurements, if requested and
419 # if applicable (e.g. u-test exists for given benchmark),
421 if utest
and benchmark
["utest"]:
422 output_strs
+= print_utest(
425 utest_alpha
=utest_alpha
,
426 first_col_width
=first_col_width
,
433 ###############################################################################
437 class TestGetUniqueBenchmarkNames(unittest
.TestCase
):
438 def load_results(self
):
441 testInputs
= os
.path
.join(os
.path
.dirname(os
.path
.realpath(__file__
)), "Inputs")
442 testOutput
= os
.path
.join(testInputs
, "test3_run0.json")
443 with
open(testOutput
, "r") as f
:
447 def test_basic(self
):
451 "short", # These two are not sorted
452 "medium", # These two are not sorted
454 json
= self
.load_results()
455 output_lines
= get_unique_benchmark_names(json
)
457 print("\n".join(output_lines
))
458 self
.assertEqual(len(output_lines
), len(expect_lines
))
459 for i
in range(0, len(output_lines
)):
460 self
.assertEqual(expect_lines
[i
], output_lines
[i
])
463 class TestReportDifference(unittest
.TestCase
):
469 testInputs
= os
.path
.join(
470 os
.path
.dirname(os
.path
.realpath(__file__
)), "Inputs"
472 testOutput1
= os
.path
.join(testInputs
, "test1_run1.json")
473 testOutput2
= os
.path
.join(testInputs
, "test1_run2.json")
474 with
open(testOutput1
, "r") as f
:
476 with
open(testOutput2
, "r") as f
:
480 json1
, json2
= load_results()
481 cls
.json_diff_report
= get_difference_report(json1
, json2
)
483 def test_json_diff_report_pretty_printing(self
):
485 ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"],
486 ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"],
487 ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"],
488 ["BM_1PercentFaster", "-0.0100", "-0.0100", "100", "99", "100", "99"],
489 ["BM_1PercentSlower", "+0.0100", "+0.0100", "100", "101", "100", "101"],
490 ["BM_10PercentFaster", "-0.1000", "-0.1000", "100", "90", "100", "90"],
491 ["BM_10PercentSlower", "+0.1000", "+0.1000", "100", "110", "100", "110"],
492 ["BM_100xSlower", "+99.0000", "+99.0000", "100", "10000", "100", "10000"],
493 ["BM_100xFaster", "-0.9900", "-0.9900", "10000", "100", "10000", "100"],
494 ["BM_10PercentCPUToTime", "+0.1000", "-0.1000", "100", "110", "100", "90"],
495 ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"],
496 ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"],
497 ["OVERALL_GEOMEAN", "-0.8344", "-0.8026", "0", "0", "0", "0"],
499 output_lines_with_header
= print_difference_report(
500 self
.json_diff_report
, use_color
=False
502 output_lines
= output_lines_with_header
[2:]
504 print("\n".join(output_lines_with_header
))
505 self
.assertEqual(len(output_lines
), len(expect_lines
))
506 for i
in range(0, len(output_lines
)):
507 parts
= [x
for x
in output_lines
[i
].split(" ") if x
]
508 self
.assertEqual(len(parts
), 7)
509 self
.assertEqual(expect_lines
[i
], parts
)
511 def test_json_diff_report_output(self
):
514 "name": "BM_SameTimes",
520 "real_time_other": 10,
522 "cpu_time_other": 10,
529 "name": "BM_2xFaster",
535 "real_time_other": 25,
537 "cpu_time_other": 25,
544 "name": "BM_2xSlower",
550 "real_time_other": 100,
552 "cpu_time_other": 100,
559 "name": "BM_1PercentFaster",
565 "real_time_other": 98.9999999,
567 "cpu_time_other": 98.9999999,
574 "name": "BM_1PercentSlower",
580 "real_time_other": 101,
582 "cpu_time_other": 101,
589 "name": "BM_10PercentFaster",
595 "real_time_other": 90,
597 "cpu_time_other": 90,
604 "name": "BM_10PercentSlower",
610 "real_time_other": 110,
612 "cpu_time_other": 110,
619 "name": "BM_100xSlower",
625 "real_time_other": 10000,
627 "cpu_time_other": 10000,
634 "name": "BM_100xFaster",
640 "real_time_other": 100,
642 "cpu_time_other": 100,
649 "name": "BM_10PercentCPUToTime",
655 "real_time_other": 110,
657 "cpu_time_other": 90,
664 "name": "BM_ThirdFaster",
670 "real_time_other": 67,
672 "cpu_time_other": 67,
679 "name": "BM_NotBadTimeUnit",
685 "real_time_other": 0.04,
687 "cpu_time_other": 0.6,
694 "name": "OVERALL_GEOMEAN",
697 "real_time": 1.193776641714438e-06,
698 "cpu_time": 1.2144445585302297e-06,
699 "real_time_other": 1.9768988699420897e-07,
700 "cpu_time_other": 2.397447755209533e-07,
701 "time": -0.834399601997324,
702 "cpu": -0.8025889499549471,
706 "run_type": "aggregate",
707 "aggregate_name": "geomean",
711 self
.assertEqual(len(self
.json_diff_report
), len(expected_output
))
712 for out
, expected
in zip(self
.json_diff_report
, expected_output
):
713 self
.assertEqual(out
["name"], expected
["name"])
714 self
.assertEqual(out
["time_unit"], expected
["time_unit"])
715 assert_utest(self
, out
, expected
)
716 assert_measurements(self
, out
, expected
)
719 class TestReportDifferenceBetweenFamilies(unittest
.TestCase
):
725 testInputs
= os
.path
.join(
726 os
.path
.dirname(os
.path
.realpath(__file__
)), "Inputs"
728 testOutput
= os
.path
.join(testInputs
, "test2_run.json")
729 with
open(testOutput
, "r") as f
:
734 json1
= filter_benchmark(json
, "BM_Z.ro", ".")
735 json2
= filter_benchmark(json
, "BM_O.e", ".")
736 cls
.json_diff_report
= get_difference_report(json1
, json2
)
738 def test_json_diff_report_pretty_printing(self
):
740 [".", "-0.5000", "-0.5000", "10", "5", "10", "5"],
741 ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"],
742 ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"],
743 ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"],
744 ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"],
746 output_lines_with_header
= print_difference_report(
747 self
.json_diff_report
, use_color
=False
749 output_lines
= output_lines_with_header
[2:]
751 print("\n".join(output_lines_with_header
))
752 self
.assertEqual(len(output_lines
), len(expect_lines
))
753 for i
in range(0, len(output_lines
)):
754 parts
= [x
for x
in output_lines
[i
].split(" ") if x
]
755 self
.assertEqual(len(parts
), 7)
756 self
.assertEqual(expect_lines
[i
], parts
)
758 def test_json_diff_report(self
):
767 "real_time_other": 5,
782 "real_time_other": 20,
784 "cpu_time_other": 20,
797 "real_time_other": 10,
799 "cpu_time_other": 10,
806 "name": "Prefix/./3",
812 "real_time_other": 15,
814 "cpu_time_other": 15,
821 "name": "OVERALL_GEOMEAN",
824 "real_time": 2.213363839400641e-08,
825 "cpu_time": 2.213363839400641e-08,
826 "real_time_other": 1.1066819197003185e-08,
827 "cpu_time_other": 1.1066819197003185e-08,
828 "time": -0.5000000000000009,
829 "cpu": -0.5000000000000009,
833 "run_type": "aggregate",
834 "aggregate_name": "geomean",
838 self
.assertEqual(len(self
.json_diff_report
), len(expected_output
))
839 for out
, expected
in zip(self
.json_diff_report
, expected_output
):
840 self
.assertEqual(out
["name"], expected
["name"])
841 self
.assertEqual(out
["time_unit"], expected
["time_unit"])
842 assert_utest(self
, out
, expected
)
843 assert_measurements(self
, out
, expected
)
846 class TestReportDifferenceWithUTest(unittest
.TestCase
):
852 testInputs
= os
.path
.join(
853 os
.path
.dirname(os
.path
.realpath(__file__
)), "Inputs"
855 testOutput1
= os
.path
.join(testInputs
, "test3_run0.json")
856 testOutput2
= os
.path
.join(testInputs
, "test3_run1.json")
857 with
open(testOutput1
, "r") as f
:
859 with
open(testOutput2
, "r") as f
:
863 json1
, json2
= load_results()
864 cls
.json_diff_report
= get_difference_report(json1
, json2
, utest
=True)
866 def test_json_diff_report_pretty_printing(self
):
868 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
869 ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
870 ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
888 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
889 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
907 ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
908 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
910 output_lines_with_header
= print_difference_report(
911 self
.json_diff_report
, utest
=True, utest_alpha
=0.05, use_color
=False
913 output_lines
= output_lines_with_header
[2:]
915 print("\n".join(output_lines_with_header
))
916 self
.assertEqual(len(output_lines
), len(expect_lines
))
917 for i
in range(0, len(output_lines
)):
918 parts
= [x
for x
in output_lines
[i
].split(" ") if x
]
919 self
.assertEqual(expect_lines
[i
], parts
)
921 def test_json_diff_report_pretty_printing_aggregates_only(self
):
923 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
941 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
942 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
960 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
962 output_lines_with_header
= print_difference_report(
963 self
.json_diff_report
,
964 include_aggregates_only
=True,
969 output_lines
= output_lines_with_header
[2:]
971 print("\n".join(output_lines_with_header
))
972 self
.assertEqual(len(output_lines
), len(expect_lines
))
973 for i
in range(0, len(output_lines
)):
974 parts
= [x
for x
in output_lines
[i
].split(" ") if x
]
975 self
.assertEqual(expect_lines
[i
], parts
)
977 def test_json_diff_report(self
):
986 "real_time_other": 9,
988 "cpu_time_other": 110,
998 "time": 0.1111111111111111,
999 "cpu": -0.011111111111111112,
1001 "real_time_other": 10,
1003 "cpu_time_other": 89,
1007 "cpu": -0.16279069767441862,
1009 "real_time_other": 7,
1011 "cpu_time_other": 72,
1016 "have_optimal_repetitions": False,
1017 "cpu_pvalue": 0.6666666666666666,
1028 "real_time_other": 7,
1030 "cpu_time_other": 75,
1034 "cpu": -0.13506493506493514,
1036 "real_time_other": 4.54,
1038 "cpu_time_other": 66.6,
1043 "have_optimal_repetitions": False,
1045 "time_pvalue": 0.7670968684102772,
1055 "real_time_other": 5,
1057 "cpu_time_other": 53,
1064 "name": "OVERALL_GEOMEAN",
1067 "real_time": 8.48528137423858e-09,
1068 "cpu_time": 8.441336246629233e-08,
1069 "real_time_other": 2.2405267593145244e-08,
1070 "cpu_time_other": 2.5453661413660466e-08,
1071 "time": 1.6404861082353634,
1072 "cpu": -0.6984640740519662,
1076 "run_type": "aggregate",
1077 "aggregate_name": "geomean",
1081 self
.assertEqual(len(self
.json_diff_report
), len(expected_output
))
1082 for out
, expected
in zip(self
.json_diff_report
, expected_output
):
1083 self
.assertEqual(out
["name"], expected
["name"])
1084 self
.assertEqual(out
["time_unit"], expected
["time_unit"])
1085 assert_utest(self
, out
, expected
)
1086 assert_measurements(self
, out
, expected
)
1089 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(unittest
.TestCase
):
1091 def setUpClass(cls
):
1095 testInputs
= os
.path
.join(
1096 os
.path
.dirname(os
.path
.realpath(__file__
)), "Inputs"
1098 testOutput1
= os
.path
.join(testInputs
, "test3_run0.json")
1099 testOutput2
= os
.path
.join(testInputs
, "test3_run1.json")
1100 with
open(testOutput1
, "r") as f
:
1101 json1
= json
.load(f
)
1102 with
open(testOutput2
, "r") as f
:
1103 json2
= json
.load(f
)
1106 json1
, json2
= load_results()
1107 cls
.json_diff_report
= get_difference_report(json1
, json2
, utest
=True)
1109 def test_json_diff_report_pretty_printing(self
):
1111 ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
1112 ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
1113 ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
1131 ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
1132 ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
1150 ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
1151 ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
1153 output_lines_with_header
= print_difference_report(
1154 self
.json_diff_report
, utest
=True, utest_alpha
=0.05, use_color
=False
1156 output_lines
= output_lines_with_header
[2:]
1158 print("\n".join(output_lines_with_header
))
1159 self
.assertEqual(len(output_lines
), len(expect_lines
))
1160 for i
in range(0, len(output_lines
)):
1161 parts
= [x
for x
in output_lines
[i
].split(" ") if x
]
1162 self
.assertEqual(expect_lines
[i
], parts
)
1164 def test_json_diff_report(self
):
1173 "real_time_other": 9,
1175 "cpu_time_other": 110,
1185 "time": 0.1111111111111111,
1186 "cpu": -0.011111111111111112,
1188 "real_time_other": 10,
1190 "cpu_time_other": 89,
1194 "cpu": -0.16279069767441862,
1196 "real_time_other": 7,
1198 "cpu_time_other": 72,
1203 "have_optimal_repetitions": False,
1204 "cpu_pvalue": 0.6666666666666666,
1215 "real_time_other": 7,
1217 "cpu_time_other": 75,
1221 "cpu": -0.13506493506493514,
1223 "real_time_other": 4.54,
1225 "cpu_time_other": 66.6,
1230 "have_optimal_repetitions": False,
1232 "time_pvalue": 0.7670968684102772,
1239 "real_time_other": 5,
1243 "cpu_time_other": 53,
1249 "aggregate_name": "",
1252 "name": "OVERALL_GEOMEAN",
1255 "real_time": 8.48528137423858e-09,
1256 "cpu_time": 8.441336246629233e-08,
1257 "real_time_other": 2.2405267593145244e-08,
1258 "cpu_time_other": 2.5453661413660466e-08,
1259 "time": 1.6404861082353634,
1260 "cpu": -0.6984640740519662,
1264 "run_type": "aggregate",
1265 "aggregate_name": "geomean",
1269 self
.assertEqual(len(self
.json_diff_report
), len(expected_output
))
1270 for out
, expected
in zip(self
.json_diff_report
, expected_output
):
1271 self
.assertEqual(out
["name"], expected
["name"])
1272 self
.assertEqual(out
["time_unit"], expected
["time_unit"])
1273 assert_utest(self
, out
, expected
)
1274 assert_measurements(self
, out
, expected
)
1277 class TestReportDifferenceForPercentageAggregates(unittest
.TestCase
):
1279 def setUpClass(cls
):
1283 testInputs
= os
.path
.join(
1284 os
.path
.dirname(os
.path
.realpath(__file__
)), "Inputs"
1286 testOutput1
= os
.path
.join(testInputs
, "test4_run0.json")
1287 testOutput2
= os
.path
.join(testInputs
, "test4_run1.json")
1288 with
open(testOutput1
, "r") as f
:
1289 json1
= json
.load(f
)
1290 with
open(testOutput2
, "r") as f
:
1291 json2
= json
.load(f
)
1294 json1
, json2
= load_results()
1295 cls
.json_diff_report
= get_difference_report(json1
, json2
, utest
=True)
1297 def test_json_diff_report_pretty_printing(self
):
1298 expect_lines
= [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]]
1299 output_lines_with_header
= print_difference_report(
1300 self
.json_diff_report
, utest
=True, utest_alpha
=0.05, use_color
=False
1302 output_lines
= output_lines_with_header
[2:]
1304 print("\n".join(output_lines_with_header
))
1305 self
.assertEqual(len(output_lines
), len(expect_lines
))
1306 for i
in range(0, len(output_lines
)):
1307 parts
= [x
for x
in output_lines
[i
].split(" ") if x
]
1308 self
.assertEqual(expect_lines
[i
], parts
)
1310 def test_json_diff_report(self
):
1319 "real_time_other": 0.005,
1321 "cpu_time_other": 0.15,
1328 self
.assertEqual(len(self
.json_diff_report
), len(expected_output
))
1329 for out
, expected
in zip(self
.json_diff_report
, expected_output
):
1330 self
.assertEqual(out
["name"], expected
["name"])
1331 self
.assertEqual(out
["time_unit"], expected
["time_unit"])
1332 assert_utest(self
, out
, expected
)
1333 assert_measurements(self
, out
, expected
)
1336 class TestReportSorting(unittest
.TestCase
):
1338 def setUpClass(cls
):
1342 testInputs
= os
.path
.join(
1343 os
.path
.dirname(os
.path
.realpath(__file__
)), "Inputs"
1345 testOutput
= os
.path
.join(testInputs
, "test4_run.json")
1346 with
open(testOutput
, "r") as f
:
1350 cls
.json
= load_result()
1352 def test_json_diff_report_pretty_printing(self
):
1356 "99 family 0 instance 0 repetition 0",
1357 "98 family 0 instance 0 repetition 1",
1358 "97 family 0 instance 0 aggregate",
1359 "96 family 0 instance 1 repetition 0",
1360 "95 family 0 instance 1 repetition 1",
1361 "94 family 0 instance 1 aggregate",
1362 "93 family 1 instance 0 repetition 0",
1363 "92 family 1 instance 0 repetition 1",
1364 "91 family 1 instance 0 aggregate",
1365 "90 family 1 instance 1 repetition 0",
1366 "89 family 1 instance 1 repetition 1",
1367 "88 family 1 instance 1 aggregate",
1370 for n
in range(len(self
.json
["benchmarks"]) ** 2):
1371 random
.shuffle(self
.json
["benchmarks"])
1372 sorted_benchmarks
= util
.sort_benchmark_results(self
.json
)["benchmarks"]
1373 self
.assertEqual(len(expected_names
), len(sorted_benchmarks
))
1374 for out
, expected
in zip(sorted_benchmarks
, expected_names
):
1375 self
.assertEqual(out
["name"], expected
)
1378 def assert_utest(unittest_instance
, lhs
, rhs
):
1380 unittest_instance
.assertAlmostEqual(
1381 lhs
["utest"]["cpu_pvalue"], rhs
["utest"]["cpu_pvalue"]
1383 unittest_instance
.assertAlmostEqual(
1384 lhs
["utest"]["time_pvalue"], rhs
["utest"]["time_pvalue"]
1386 unittest_instance
.assertEqual(
1387 lhs
["utest"]["have_optimal_repetitions"],
1388 rhs
["utest"]["have_optimal_repetitions"],
1391 # lhs is empty. assert if rhs is not.
1392 unittest_instance
.assertEqual(lhs
["utest"], rhs
["utest"])
1395 def assert_measurements(unittest_instance
, lhs
, rhs
):
1396 for m1
, m2
in zip(lhs
["measurements"], rhs
["measurements"]):
1397 unittest_instance
.assertEqual(m1
["real_time"], m2
["real_time"])
1398 unittest_instance
.assertEqual(m1
["cpu_time"], m2
["cpu_time"])
1399 # m1['time'] and m1['cpu'] hold values which are being calculated,
1400 # and therefore we must use almost-equal pattern.
1401 unittest_instance
.assertAlmostEqual(m1
["time"], m2
["time"], places
=4)
1402 unittest_instance
.assertAlmostEqual(m1
["cpu"], m2
["cpu"], places
=4)
1405 if __name__
== "__main__":
1408 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
1409 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
1410 # kate: indent-mode python; remove-trailing-spaces modified;