4 CmpRuns - A simple tool for comparing two static analyzer runs to determine
5 which reports have been added, removed, or changed.
7 This is designed to support automated testing using the static analyzer, from
9 1. To monitor changes in the static analyzer's reports on real code bases,
10 for regression testing.
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
20 resultsA = load_results_from_single_run(singleRunInfoA, delete_empty)
21 resultsB = load_results_from_single_run(singleRunInfoB, delete_empty)
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compare_results(resultsA, resultsB)
35 from collections
import defaultdict
54 Number
= Union
[int, float]
55 Stats
= Dict
[str, Dict
[str, Number
]]
56 Plist
= Dict
[str, Any
]
58 # Diff in a form: field -> (before, after)
59 JSONDiff
= Dict
[str, Tuple
[str, str]]
63 STATS_REGEXP
= re
.compile(r
"Statistics: (\{.+\})", re
.MULTILINE | re
.DOTALL
)
68 Color for terminal highlight.
72 GREEN
= "\x1b[6;30;42m"
76 class HistogramType(str, Enum
):
78 LOG_RELATIVE
= "log-relative"
82 class ResultsDirectory(NamedTuple
):
89 Information about analysis run:
90 path - the analysis output directory
91 root - the name of the root directory, which will be disregarded when
92 determining the source file name
95 def __init__(self
, results
: ResultsDirectory
, verbose_log
: Optional
[str] = None):
96 self
.path
= results
.path
97 self
.root
= results
.root
.rstrip("/\\")
98 self
.verbose_log
= verbose_log
101 class AnalysisDiagnostic
:
103 self
, data
: Plist
, report
: "AnalysisReport", html_report
: Optional
[str]
106 self
._loc
= self
._data
["location"]
107 self
._report
= report
108 self
._html
_report
= html_report
109 self
._report
_size
= len(self
._data
["path"])
111 def get_file_name(self
) -> str:
112 root
= self
._report
.run
.root
113 file_name
= self
._report
.files
[self
._loc
["file"]]
115 if file_name
.startswith(root
) and len(root
) > 0:
116 return file_name
[len(root
) + 1 :]
120 def get_root_file_name(self
) -> str:
121 path
= self
._data
["path"]
124 return self
.get_file_name()
128 file_index
= p
["location"]["file"]
130 file_index
= path
[0]["edges"][0]["start"][0]["file"]
132 out
= self
._report
.files
[file_index
]
133 root
= self
._report
.run
.root
135 if out
.startswith(root
):
136 return out
[len(root
) :]
140 def get_line(self
) -> int:
141 return self
._loc
["line"]
143 def get_column(self
) -> int:
144 return self
._loc
["col"]
146 def get_path_length(self
) -> int:
147 return self
._report
_size
149 def get_category(self
) -> str:
150 return self
._data
["category"]
152 def get_description(self
) -> str:
153 return self
._data
["description"]
155 def get_location(self
) -> str:
156 return f
"{self.get_file_name()}:{self.get_line()}:{self.get_column()}"
158 def get_issue_identifier(self
) -> str:
159 id = self
.get_file_name() + "+"
161 if "issue_context" in self
._data
:
162 id += self
._data
["issue_context"] + "+"
164 if "issue_hash_content_of_line_in_context" in self
._data
:
165 id += str(self
._data
["issue_hash_content_of_line_in_context"])
169 def get_html_report(self
) -> str:
170 if self
._html
_report
is None:
173 return os
.path
.join(self
._report
.run
.path
, self
._html
_report
)
175 def get_readable_name(self
) -> str:
176 if "issue_context" in self
._data
:
177 funcname_postfix
= "#" + self
._data
["issue_context"]
179 funcname_postfix
= ""
181 root_filename
= self
.get_root_file_name()
182 file_name
= self
.get_file_name()
184 if root_filename
!= file_name
:
185 file_prefix
= f
"[{root_filename}] {file_name}"
187 file_prefix
= root_filename
189 line
= self
.get_line()
190 col
= self
.get_column()
192 f
"{file_prefix}{funcname_postfix}:{line}:{col}"
193 f
", {self.get_category()}: {self.get_description()}"
196 KEY_FIELDS
= ["check_name", "category", "description"]
198 def is_similar_to(self
, other
: "AnalysisDiagnostic") -> bool:
199 # We consider two diagnostics similar only if at least one
200 # of the key fields is the same in both diagnostics.
201 return len(self
.get_diffs(other
)) != len(self
.KEY_FIELDS
)
203 def get_diffs(self
, other
: "AnalysisDiagnostic") -> JSONDiff
:
205 field
: (self
._data
[field
], other
._data
[field
])
206 for field
in self
.KEY_FIELDS
207 if self
._data
[field
] != other
._data
[field
]
210 # Note, the data format is not an API and may change from one analyzer
211 # version to another.
212 def get_raw_data(self
) -> Plist
:
215 def __eq__(self
, other
: object) -> bool:
216 return hash(self
) == hash(other
)
218 def __ne__(self
, other
: object) -> bool:
219 return hash(self
) != hash(other
)
221 def __hash__(self
) -> int:
222 return hash(self
.get_issue_identifier())
226 def __init__(self
, info
: SingleRunInfo
):
227 self
.path
= info
.path
228 self
.root
= info
.root
230 self
.reports
: List
[AnalysisReport
] = []
231 # Cumulative list of all diagnostics from all the reports.
232 self
.diagnostics
: List
[AnalysisDiagnostic
] = []
233 self
.clang_version
: Optional
[str] = None
234 self
.raw_stats
: List
[JSON
] = []
236 def get_clang_version(self
) -> Optional
[str]:
237 return self
.clang_version
239 def read_single_file(self
, path
: str, delete_empty
: bool):
240 with
open(path
, "rb") as plist_file
:
241 data
= plistlib
.load(plist_file
)
243 if "statistics" in data
:
244 self
.raw_stats
.append(json
.loads(data
["statistics"]))
245 data
.pop("statistics")
247 # We want to retrieve the clang version even if there are no
248 # reports. Assume that all reports were created using the same
249 # clang version (this is always true and is more efficient).
250 if "clang_version" in data
:
251 if self
.clang_version
is None:
252 self
.clang_version
= data
.pop("clang_version")
254 data
.pop("clang_version")
256 # Ignore/delete empty reports.
257 if not data
["files"]:
262 # Extract the HTML reports, if they exists.
264 for d
in data
["diagnostics"]:
265 if "HTMLDiagnostics_files" in d
:
266 # FIXME: Why is this named files, when does it have multiple
268 assert len(d
["HTMLDiagnostics_files"]) == 1
269 htmlFiles
.append(d
.pop("HTMLDiagnostics_files")[0])
271 htmlFiles
.append(None)
273 report
= AnalysisReport(self
, data
.pop("files"))
274 # Python 3.10 offers zip(..., strict=True). The following assertion
276 assert len(data
["diagnostics"]) == len(htmlFiles
)
278 AnalysisDiagnostic(d
, report
, h
)
279 for d
, h
in zip(data
.pop("diagnostics"), htmlFiles
)
284 report
.diagnostics
.extend(diagnostics
)
285 self
.reports
.append(report
)
286 self
.diagnostics
.extend(diagnostics
)
289 class AnalysisReport
:
290 def __init__(self
, run
: AnalysisRun
, files
: List
[str]):
293 self
.diagnostics
: List
[AnalysisDiagnostic
] = []
297 results
: ResultsDirectory
,
298 delete_empty
: bool = True,
299 verbose_log
: Optional
[str] = None,
302 Backwards compatibility API.
304 return load_results_from_single_run(
305 SingleRunInfo(results
, verbose_log
), delete_empty
309 def load_results_from_single_run(
310 info
: SingleRunInfo
, delete_empty
: bool = True
313 # Load results of the analyzes from a given output folder.
314 # - info is the SingleRunInfo object
315 # - delete_empty specifies if the empty plist files should be deleted
319 run
= AnalysisRun(info
)
321 if os
.path
.isfile(path
):
322 run
.read_single_file(path
, delete_empty
)
324 for dirpath
, dirnames
, filenames
in os
.walk(path
):
326 if not f
.endswith("plist"):
329 p
= os
.path
.join(dirpath
, f
)
330 run
.read_single_file(p
, delete_empty
)
335 def cmp_analysis_diagnostic(d
):
336 return d
.get_issue_identifier()
339 AnalysisDiagnosticPair
= Tuple
[AnalysisDiagnostic
, AnalysisDiagnostic
]
342 class ComparisonResult
:
344 self
.present_in_both
: List
[AnalysisDiagnostic
] = []
345 self
.present_only_in_old
: List
[AnalysisDiagnostic
] = []
346 self
.present_only_in_new
: List
[AnalysisDiagnostic
] = []
347 self
.changed_between_new_and_old
: List
[AnalysisDiagnosticPair
] = []
349 def add_common(self
, issue
: AnalysisDiagnostic
):
350 self
.present_in_both
.append(issue
)
352 def add_removed(self
, issue
: AnalysisDiagnostic
):
353 self
.present_only_in_old
.append(issue
)
355 def add_added(self
, issue
: AnalysisDiagnostic
):
356 self
.present_only_in_new
.append(issue
)
358 def add_changed(self
, old_issue
: AnalysisDiagnostic
, new_issue
: AnalysisDiagnostic
):
359 self
.changed_between_new_and_old
.append((old_issue
, new_issue
))
362 GroupedDiagnostics
= DefaultDict
[str, List
[AnalysisDiagnostic
]]
365 def get_grouped_diagnostics(
366 diagnostics
: List
[AnalysisDiagnostic
],
367 ) -> GroupedDiagnostics
:
368 result
: GroupedDiagnostics
= defaultdict(list)
369 for diagnostic
in diagnostics
:
370 result
[diagnostic
.get_location()].append(diagnostic
)
375 results_old
: AnalysisRun
,
376 results_new
: AnalysisRun
,
377 histogram
: Optional
[HistogramType
] = None,
378 ) -> ComparisonResult
:
380 compare_results - Generate a relation from diagnostics in run A to
381 diagnostics in run B.
383 The result is the relation as a list of triples (a, b) where
384 each element {a,b} is None or a matching element from the respective run
387 res
= ComparisonResult()
389 # Map size_before -> size_after
390 path_difference_data
: List
[float] = []
392 diags_old
= get_grouped_diagnostics(results_old
.diagnostics
)
393 diags_new
= get_grouped_diagnostics(results_new
.diagnostics
)
395 locations_old
= set(diags_old
.keys())
396 locations_new
= set(diags_new
.keys())
398 common_locations
= locations_old
& locations_new
400 for location
in common_locations
:
401 old
= diags_old
[location
]
402 new
= diags_new
[location
]
404 # Quadratic algorithms in this part are fine because 'old' and 'new'
405 # are most commonly of size 1.
406 common
: Set
[AnalysisDiagnostic
] = set()
409 if a
.get_issue_identifier() == b
.get_issue_identifier():
410 a_path_len
= a
.get_path_length()
411 b_path_len
= b
.get_path_length()
413 if a_path_len
!= b_path_len
:
415 if histogram
== HistogramType
.RELATIVE
:
416 path_difference_data
.append(float(a_path_len
) / b_path_len
)
418 elif histogram
== HistogramType
.LOG_RELATIVE
:
419 path_difference_data
.append(
420 log(float(a_path_len
) / b_path_len
)
423 elif histogram
== HistogramType
.ABSOLUTE
:
424 path_difference_data
.append(a_path_len
- b_path_len
)
429 old
= filter_issues(old
, common
)
430 new
= filter_issues(new
, common
)
435 if a
.is_similar_to(b
):
436 res
.add_changed(a
, b
)
440 old
= filter_issues(old
, common
)
441 new
= filter_issues(new
, common
)
443 # Whatever is left in 'old' doesn't have a corresponding diagnostic
444 # in 'new', so we need to mark it as 'removed'.
448 # Whatever is left in 'new' doesn't have a corresponding diagnostic
449 # in 'old', so we need to mark it as 'added'.
453 only_old_locations
= locations_old
- common_locations
454 for location
in only_old_locations
:
455 for a
in diags_old
[location
]:
456 # These locations have been found only in the old build, so we
457 # need to mark all of therm as 'removed'
460 only_new_locations
= locations_new
- common_locations
461 for location
in only_new_locations
:
462 for b
in diags_new
[location
]:
463 # These locations have been found only in the new build, so we
464 # need to mark all of therm as 'added'
467 # FIXME: Add fuzzy matching. One simple and possible effective idea would
468 # be to bin the diagnostics, print them in a normalized form (based solely
469 # on the structure of the diagnostic), compute the diff, then use that as
470 # the basis for matching. This has the nice property that we don't depend
471 # in any way on the diagnostic format.
474 from matplotlib
import pyplot
476 pyplot
.hist(path_difference_data
, bins
=100)
483 origin
: List
[AnalysisDiagnostic
], to_remove
: Set
[AnalysisDiagnostic
]
484 ) -> List
[AnalysisDiagnostic
]:
485 return [diag
for diag
in origin
if diag
not in to_remove
]
488 def compute_percentile(values
: Sequence
[T
], percentile
: float) -> T
:
490 Return computed percentile.
492 return sorted(values
)[int(round(percentile
* len(values
) + 0.5)) - 1]
495 def derive_stats(results
: AnalysisRun
) -> Stats
:
496 # Assume all keys are the same in each statistics bucket.
497 combined_data
= defaultdict(list)
499 # Collect data on paths length.
500 for report
in results
.reports
:
501 for diagnostic
in report
.diagnostics
:
502 combined_data
["PathsLength"].append(diagnostic
.get_path_length())
504 for stat
in results
.raw_stats
:
505 for key
, value
in stat
.items():
506 combined_data
[str(key
)].append(value
)
508 combined_stats
: Stats
= {}
510 for key
, values
in combined_data
.items():
511 combined_stats
[key
] = {
514 "mean": sum(values
) / len(values
),
515 "90th %tile": compute_percentile(values
, 0.9),
516 "95th %tile": compute_percentile(values
, 0.95),
517 "median": sorted(values
)[len(values
) // 2],
518 "total": sum(values
),
521 return combined_stats
524 # TODO: compare_results decouples comparison from the output, we should
527 results_old
: AnalysisRun
, results_new
: AnalysisRun
, out
: TextIO
= sys
.stdout
529 stats_old
= derive_stats(results_old
)
530 stats_new
= derive_stats(results_new
)
532 old_keys
= set(stats_old
.keys())
533 new_keys
= set(stats_new
.keys())
534 keys
= sorted(old_keys
& new_keys
)
537 out
.write(f
"{key}\n")
539 nested_keys
= sorted(set(stats_old
[key
]) & set(stats_new
[key
]))
541 for nested_key
in nested_keys
:
542 val_old
= float(stats_old
[key
][nested_key
])
543 val_new
= float(stats_new
[key
][nested_key
])
545 report
= f
"{val_old:.3f} -> {val_new:.3f}"
547 # Only apply highlighting when writing to TTY and it's not Windows
548 if out
.isatty() and os
.name
!= "nt":
550 ratio
= (val_new
- val_old
) / val_new
552 report
= Colors
.GREEN
+ report
+ Colors
.CLEAR
554 report
= Colors
.RED
+ report
+ Colors
.CLEAR
556 out
.write(f
"\t {nested_key} {report}\n")
558 removed_keys
= old_keys
- new_keys
560 out
.write(f
"REMOVED statistics: {removed_keys}\n")
562 added_keys
= new_keys
- old_keys
564 out
.write(f
"ADDED statistics: {added_keys}\n")
569 def dump_scan_build_results_diff(
570 dir_old
: ResultsDirectory
,
571 dir_new
: ResultsDirectory
,
572 delete_empty
: bool = True,
573 out
: TextIO
= sys
.stdout
,
574 show_stats
: bool = False,
575 stats_only
: bool = False,
576 histogram
: Optional
[HistogramType
] = None,
577 verbose_log
: Optional
[str] = None,
580 Compare directories with analysis results and dump results.
582 :param delete_empty: delete empty plist files
583 :param out: buffer to dump comparison results to.
584 :param show_stats: compare execution stats as well.
585 :param stats_only: compare ONLY execution stats.
586 :param histogram: optional histogram type to plot path differences.
587 :param verbose_log: optional path to an additional log file.
589 results_old
= load_results(dir_old
, delete_empty
, verbose_log
)
590 results_new
= load_results(dir_new
, delete_empty
, verbose_log
)
592 if show_stats
or stats_only
:
593 compare_stats(results_old
, results_new
)
597 # Open the verbose log, if given.
599 aux_log
: Optional
[TextIO
] = open(verbose_log
, "w")
603 diff
= compare_results(results_old
, results_new
, histogram
)
609 for new
in diff
.present_only_in_new
:
610 out
.write(f
"ADDED: {new.get_readable_name()}\n\n")
615 f
"('ADDED', {new.get_readable_name()}, " f
"{new.get_html_report()})\n"
618 for old
in diff
.present_only_in_old
:
619 out
.write(f
"REMOVED: {old.get_readable_name()}\n\n")
624 f
"('REMOVED', {old.get_readable_name()}, " f
"{old.get_html_report()})\n"
627 for old
, new
in diff
.changed_between_new_and_old
:
628 out
.write(f
"MODIFIED: {old.get_readable_name()}\n")
631 diffs
= old
.get_diffs(new
)
633 f
" '{key}' changed: " f
"'{old_value}' -> '{new_value}'"
634 for key
, (old_value
, new_value
) in diffs
.items()
636 out
.write(",\n".join(str_diffs
) + "\n\n")
639 f
"('MODIFIED', {old.get_readable_name()}, "
640 f
"{old.get_html_report()})\n"
643 total_reports
= len(results_new
.diagnostics
)
644 out
.write(f
"TOTAL REPORTS: {total_reports}\n")
645 out
.write(f
"TOTAL ADDED: {total_added}\n")
646 out
.write(f
"TOTAL REMOVED: {total_removed}\n")
647 out
.write(f
"TOTAL MODIFIED: {total_modified}\n")
650 aux_log
.write(f
"('TOTAL NEW REPORTS', {total_reports})\n")
651 aux_log
.write(f
"('TOTAL DIFFERENCES', {found_diffs})\n")
654 # TODO: change to NamedTuple
655 return found_diffs
, len(results_old
.diagnostics
), len(results_new
.diagnostics
)
658 if __name__
== "__main__":
659 print("CmpRuns.py should not be used on its own.")
660 print("Please use 'SATest.py compare' instead")