clang/utils/analyzer/CmpRuns.py

   1 #!/usr/bin/env python
   2
   3 """
   4 CmpRuns - A simple tool for comparing two static analyzer runs to determine
   5 which reports have been added, removed, or changed.
   6
   7 This is designed to support automated testing using the static analyzer, from
   8 two perspectives:
   9   1. To monitor changes in the static analyzer's reports on real code bases,
  10      for regression testing.
  11
  12   2. For use by end users who want to integrate regular static analyzer testing
  13      into a buildbot like environment.
  14
  15 Usage:
  16
  17     # Load the results of both runs, to obtain lists of the corresponding
  18     # AnalysisDiagnostic objects.
  19     #
  20     resultsA = load_results_from_single_run(singleRunInfoA, delete_empty)
  21     resultsB = load_results_from_single_run(singleRunInfoB, delete_empty)
  22
  23     # Generate a relation from diagnostics in run A to diagnostics in run B
  24     # to obtain a list of triples (a, b, confidence).
  25     diff = compare_results(resultsA, resultsB)
  26
  27 """
  28 import json
  29 import os
  30 import plistlib
  31 import re
  32 import sys
  33
  34 from math import log
  35 from collections import defaultdict
  36 from copy import copy
  37 from enum import Enum
  38 from typing import (
  39     Any,
  40     DefaultDict,
  41     Dict,
  42     List,
  43     NamedTuple,
  44     Optional,
  45     Sequence,
  46     Set,
  47     TextIO,
  48     TypeVar,
  49     Tuple,
  50     Union,
  51 )
  52
  53
  54 Number = Union[int, float]
  55 Stats = Dict[str, Dict[str, Number]]
  56 Plist = Dict[str, Any]
  57 JSON = Dict[str, Any]
  58 # Diff in a form: field -> (before, after)
  59 JSONDiff = Dict[str, Tuple[str, str]]
  60 # Type for generics
  61 T = TypeVar("T")
  62
  63 STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL)
  64
  65
  66 class Colors:
  67     """
  68     Color for terminal highlight.
  69     """
  70
  71     RED = "\x1b[2;30;41m"
  72     GREEN = "\x1b[6;30;42m"
  73     CLEAR = "\x1b[0m"
  74
  75
  76 class HistogramType(str, Enum):
  77     RELATIVE = "relative"
  78     LOG_RELATIVE = "log-relative"
  79     ABSOLUTE = "absolute"
  80
  81
  82 class ResultsDirectory(NamedTuple):
  83     path: str
  84     root: str = ""
  85
  86
  87 class SingleRunInfo:
  88     """
  89     Information about analysis run:
  90     path - the analysis output directory
  91     root - the name of the root directory, which will be disregarded when
  92     determining the source file name
  93     """
  94
  95     def __init__(self, results: ResultsDirectory, verbose_log: Optional[str] = None):
  96         self.path = results.path
  97         self.root = results.root.rstrip("/\\")
  98         self.verbose_log = verbose_log
  99
 100
 101 class AnalysisDiagnostic:
 102     def __init__(
 103         self, data: Plist, report: "AnalysisReport", html_report: Optional[str]
 104     ):
 105         self._data = data
 106         self._loc = self._data["location"]
 107         self._report = report
 108         self._html_report = html_report
 109         self._report_size = len(self._data["path"])
 110
 111     def get_file_name(self) -> str:
 112         root = self._report.run.root
 113         file_name = self._report.files[self._loc["file"]]
 114
 115         if file_name.startswith(root) and len(root) > 0:
 116             return file_name[len(root) + 1 :]
 117
 118         return file_name
 119
 120     def get_root_file_name(self) -> str:
 121         path = self._data["path"]
 122
 123         if not path:
 124             return self.get_file_name()
 125
 126         p = path[0]
 127         if "location" in p:
 128             file_index = p["location"]["file"]
 129         else:  # control edge
 130             file_index = path[0]["edges"][0]["start"][0]["file"]
 131
 132         out = self._report.files[file_index]
 133         root = self._report.run.root
 134
 135         if out.startswith(root):
 136             return out[len(root) :]
 137
 138         return out
 139
 140     def get_line(self) -> int:
 141         return self._loc["line"]
 142
 143     def get_column(self) -> int:
 144         return self._loc["col"]
 145
 146     def get_path_length(self) -> int:
 147         return self._report_size
 148
 149     def get_category(self) -> str:
 150         return self._data["category"]
 151
 152     def get_description(self) -> str:
 153         return self._data["description"]
 154
 155     def get_location(self) -> str:
 156         return f"{self.get_file_name()}:{self.get_line()}:{self.get_column()}"
 157
 158     def get_issue_identifier(self) -> str:
 159         id = self.get_file_name() + "+"
 160
 161         if "issue_context" in self._data:
 162             id += self._data["issue_context"] + "+"
 163
 164         if "issue_hash_content_of_line_in_context" in self._data:
 165             id += str(self._data["issue_hash_content_of_line_in_context"])
 166
 167         return id
 168
 169     def get_html_report(self) -> str:
 170         if self._html_report is None:
 171             return " "
 172
 173         return os.path.join(self._report.run.path, self._html_report)
 174
 175     def get_readable_name(self) -> str:
 176         if "issue_context" in self._data:
 177             funcname_postfix = "#" + self._data["issue_context"]
 178         else:
 179             funcname_postfix = ""
 180
 181         root_filename = self.get_root_file_name()
 182         file_name = self.get_file_name()
 183
 184         if root_filename != file_name:
 185             file_prefix = f"[{root_filename}] {file_name}"
 186         else:
 187             file_prefix = root_filename
 188
 189         line = self.get_line()
 190         col = self.get_column()
 191         return (
 192             f"{file_prefix}{funcname_postfix}:{line}:{col}"
 193             f", {self.get_category()}: {self.get_description()}"
 194         )
 195
 196     KEY_FIELDS = ["check_name", "category", "description"]
 197
 198     def is_similar_to(self, other: "AnalysisDiagnostic") -> bool:
 199         # We consider two diagnostics similar only if at least one
 200         # of the key fields is the same in both diagnostics.
 201         return len(self.get_diffs(other)) != len(self.KEY_FIELDS)
 202
 203     def get_diffs(self, other: "AnalysisDiagnostic") -> JSONDiff:
 204         return {
 205             field: (self._data[field], other._data[field])
 206             for field in self.KEY_FIELDS
 207             if self._data[field] != other._data[field]
 208         }
 209
 210     # Note, the data format is not an API and may change from one analyzer
 211     # version to another.
 212     def get_raw_data(self) -> Plist:
 213         return self._data
 214
 215     def __eq__(self, other: object) -> bool:
 216         return hash(self) == hash(other)
 217
 218     def __ne__(self, other: object) -> bool:
 219         return hash(self) != hash(other)
 220
 221     def __hash__(self) -> int:
 222         return hash(self.get_issue_identifier())
 223
 224
 225 class AnalysisRun:
 226     def __init__(self, info: SingleRunInfo):
 227         self.path = info.path
 228         self.root = info.root
 229         self.info = info
 230         self.reports: List[AnalysisReport] = []
 231         # Cumulative list of all diagnostics from all the reports.
 232         self.diagnostics: List[AnalysisDiagnostic] = []
 233         self.clang_version: Optional[str] = None
 234         self.raw_stats: List[JSON] = []
 235
 236     def get_clang_version(self) -> Optional[str]:
 237         return self.clang_version
 238
 239     def read_single_file(self, path: str, delete_empty: bool):
 240         with open(path, "rb") as plist_file:
 241             data = plistlib.load(plist_file)
 242
 243         if "statistics" in data:
 244             self.raw_stats.append(json.loads(data["statistics"]))
 245             data.pop("statistics")
 246
 247         # We want to retrieve the clang version even if there are no
 248         # reports. Assume that all reports were created using the same
 249         # clang version (this is always true and is more efficient).
 250         if "clang_version" in data:
 251             if self.clang_version is None:
 252                 self.clang_version = data.pop("clang_version")
 253             else:
 254                 data.pop("clang_version")
 255
 256         # Ignore/delete empty reports.
 257         if not data["files"]:
 258             if delete_empty:
 259                 os.remove(path)
 260             return
 261
 262         # Extract the HTML reports, if they exists.
 263         htmlFiles = []
 264         for d in data["diagnostics"]:
 265             if "HTMLDiagnostics_files" in d:
 266                 # FIXME: Why is this named files, when does it have multiple
 267                 # files?
 268                 assert len(d["HTMLDiagnostics_files"]) == 1
 269                 htmlFiles.append(d.pop("HTMLDiagnostics_files")[0])
 270             else:
 271                 htmlFiles.append(None)
 272
 273         report = AnalysisReport(self, data.pop("files"))
 274         # Python 3.10 offers zip(..., strict=True). The following assertion
 275         # mimics it.
 276         assert len(data["diagnostics"]) == len(htmlFiles)
 277         diagnostics = [
 278             AnalysisDiagnostic(d, report, h)
 279             for d, h in zip(data.pop("diagnostics"), htmlFiles)
 280         ]
 281
 282         assert not data
 283
 284         report.diagnostics.extend(diagnostics)
 285         self.reports.append(report)
 286         self.diagnostics.extend(diagnostics)
 287
 288
 289 class AnalysisReport:
 290     def __init__(self, run: AnalysisRun, files: List[str]):
 291         self.run = run
 292         self.files = files
 293         self.diagnostics: List[AnalysisDiagnostic] = []
 294
 295
 296 def load_results(
 297     results: ResultsDirectory,
 298     delete_empty: bool = True,
 299     verbose_log: Optional[str] = None,
 300 ) -> AnalysisRun:
 301     """
 302     Backwards compatibility API.
 303     """
 304     return load_results_from_single_run(
 305         SingleRunInfo(results, verbose_log), delete_empty
 306     )
 307
 308
 309 def load_results_from_single_run(
 310     info: SingleRunInfo, delete_empty: bool = True
 311 ) -> AnalysisRun:
 312     """
 313     # Load results of the analyzes from a given output folder.
 314     # - info is the SingleRunInfo object
 315     # - delete_empty specifies if the empty plist files should be deleted
 316
 317     """
 318     path = info.path
 319     run = AnalysisRun(info)
 320
 321     if os.path.isfile(path):
 322         run.read_single_file(path, delete_empty)
 323     else:
 324         for dirpath, dirnames, filenames in os.walk(path):
 325             for f in filenames:
 326                 if not f.endswith("plist"):
 327                     continue
 328
 329                 p = os.path.join(dirpath, f)
 330                 run.read_single_file(p, delete_empty)
 331
 332     return run
 333
 334
 335 def cmp_analysis_diagnostic(d):
 336     return d.get_issue_identifier()
 337
 338
 339 AnalysisDiagnosticPair = Tuple[AnalysisDiagnostic, AnalysisDiagnostic]
 340
 341
 342 class ComparisonResult:
 343     def __init__(self):
 344         self.present_in_both: List[AnalysisDiagnostic] = []
 345         self.present_only_in_old: List[AnalysisDiagnostic] = []
 346         self.present_only_in_new: List[AnalysisDiagnostic] = []
 347         self.changed_between_new_and_old: List[AnalysisDiagnosticPair] = []
 348
 349     def add_common(self, issue: AnalysisDiagnostic):
 350         self.present_in_both.append(issue)
 351
 352     def add_removed(self, issue: AnalysisDiagnostic):
 353         self.present_only_in_old.append(issue)
 354
 355     def add_added(self, issue: AnalysisDiagnostic):
 356         self.present_only_in_new.append(issue)
 357
 358     def add_changed(self, old_issue: AnalysisDiagnostic, new_issue: AnalysisDiagnostic):
 359         self.changed_between_new_and_old.append((old_issue, new_issue))
 360
 361
 362 GroupedDiagnostics = DefaultDict[str, List[AnalysisDiagnostic]]
 363
 364
 365 def get_grouped_diagnostics(
 366     diagnostics: List[AnalysisDiagnostic],
 367 ) -> GroupedDiagnostics:
 368     result: GroupedDiagnostics = defaultdict(list)
 369     for diagnostic in diagnostics:
 370         result[diagnostic.get_location()].append(diagnostic)
 371     return result
 372
 373
 374 def compare_results(
 375     results_old: AnalysisRun,
 376     results_new: AnalysisRun,
 377     histogram: Optional[HistogramType] = None,
 378 ) -> ComparisonResult:
 379     """
 380     compare_results - Generate a relation from diagnostics in run A to
 381     diagnostics in run B.
 382
 383     The result is the relation as a list of triples (a, b) where
 384     each element {a,b} is None or a matching element from the respective run
 385     """
 386
 387     res = ComparisonResult()
 388
 389     # Map size_before -> size_after
 390     path_difference_data: List[float] = []
 391
 392     diags_old = get_grouped_diagnostics(results_old.diagnostics)
 393     diags_new = get_grouped_diagnostics(results_new.diagnostics)
 394
 395     locations_old = set(diags_old.keys())
 396     locations_new = set(diags_new.keys())
 397
 398     common_locations = locations_old & locations_new
 399
 400     for location in common_locations:
 401         old = diags_old[location]
 402         new = diags_new[location]
 403
 404         # Quadratic algorithms in this part are fine because 'old' and 'new'
 405         # are most commonly of size 1.
 406         common: Set[AnalysisDiagnostic] = set()
 407         for a in old:
 408             for b in new:
 409                 if a.get_issue_identifier() == b.get_issue_identifier():
 410                     a_path_len = a.get_path_length()
 411                     b_path_len = b.get_path_length()
 412
 413                     if a_path_len != b_path_len:
 414
 415                         if histogram == HistogramType.RELATIVE:
 416                             path_difference_data.append(float(a_path_len) / b_path_len)
 417
 418                         elif histogram == HistogramType.LOG_RELATIVE:
 419                             path_difference_data.append(
 420                                 log(float(a_path_len) / b_path_len)
 421                             )
 422
 423                         elif histogram == HistogramType.ABSOLUTE:
 424                             path_difference_data.append(a_path_len - b_path_len)
 425
 426                     res.add_common(b)
 427                     common.add(a)
 428
 429         old = filter_issues(old, common)
 430         new = filter_issues(new, common)
 431         common = set()
 432
 433         for a in old:
 434             for b in new:
 435                 if a.is_similar_to(b):
 436                     res.add_changed(a, b)
 437                     common.add(a)
 438                     common.add(b)
 439
 440         old = filter_issues(old, common)
 441         new = filter_issues(new, common)
 442
 443         # Whatever is left in 'old' doesn't have a corresponding diagnostic
 444         # in 'new', so we need to mark it as 'removed'.
 445         for a in old:
 446             res.add_removed(a)
 447
 448         # Whatever is left in 'new' doesn't have a corresponding diagnostic
 449         # in 'old', so we need to mark it as 'added'.
 450         for b in new:
 451             res.add_added(b)
 452
 453     only_old_locations = locations_old - common_locations
 454     for location in only_old_locations:
 455         for a in diags_old[location]:
 456             # These locations have been found only in the old build, so we
 457             # need to mark all of therm as 'removed'
 458             res.add_removed(a)
 459
 460     only_new_locations = locations_new - common_locations
 461     for location in only_new_locations:
 462         for b in diags_new[location]:
 463             # These locations have been found only in the new build, so we
 464             # need to mark all of therm as 'added'
 465             res.add_added(b)
 466
 467     # FIXME: Add fuzzy matching. One simple and possible effective idea would
 468     # be to bin the diagnostics, print them in a normalized form (based solely
 469     # on the structure of the diagnostic), compute the diff, then use that as
 470     # the basis for matching. This has the nice property that we don't depend
 471     # in any way on the diagnostic format.
 472
 473     if histogram:
 474         from matplotlib import pyplot
 475
 476         pyplot.hist(path_difference_data, bins=100)
 477         pyplot.show()
 478
 479     return res
 480
 481
 482 def filter_issues(
 483     origin: List[AnalysisDiagnostic], to_remove: Set[AnalysisDiagnostic]
 484 ) -> List[AnalysisDiagnostic]:
 485     return [diag for diag in origin if diag not in to_remove]
 486
 487
 488 def compute_percentile(values: Sequence[T], percentile: float) -> T:
 489     """
 490     Return computed percentile.
 491     """
 492     return sorted(values)[int(round(percentile * len(values) + 0.5)) - 1]
 493
 494
 495 def derive_stats(results: AnalysisRun) -> Stats:
 496     # Assume all keys are the same in each statistics bucket.
 497     combined_data = defaultdict(list)
 498
 499     # Collect data on paths length.
 500     for report in results.reports:
 501         for diagnostic in report.diagnostics:
 502             combined_data["PathsLength"].append(diagnostic.get_path_length())
 503
 504     for stat in results.raw_stats:
 505         for key, value in stat.items():
 506             combined_data[str(key)].append(value)
 507
 508     combined_stats: Stats = {}
 509
 510     for key, values in combined_data.items():
 511         combined_stats[key] = {
 512             "max": max(values),
 513             "min": min(values),
 514             "mean": sum(values) / len(values),
 515             "90th %tile": compute_percentile(values, 0.9),
 516             "95th %tile": compute_percentile(values, 0.95),
 517             "median": sorted(values)[len(values) // 2],
 518             "total": sum(values),
 519         }
 520
 521     return combined_stats
 522
 523
 524 # TODO: compare_results decouples comparison from the output, we should
 525 #       do it here as well
 526 def compare_stats(
 527     results_old: AnalysisRun, results_new: AnalysisRun, out: TextIO = sys.stdout
 528 ):
 529     stats_old = derive_stats(results_old)
 530     stats_new = derive_stats(results_new)
 531
 532     old_keys = set(stats_old.keys())
 533     new_keys = set(stats_new.keys())
 534     keys = sorted(old_keys & new_keys)
 535
 536     for key in keys:
 537         out.write(f"{key}\n")
 538
 539         nested_keys = sorted(set(stats_old[key]) & set(stats_new[key]))
 540
 541         for nested_key in nested_keys:
 542             val_old = float(stats_old[key][nested_key])
 543             val_new = float(stats_new[key][nested_key])
 544
 545             report = f"{val_old:.3f} -> {val_new:.3f}"
 546
 547             # Only apply highlighting when writing to TTY and it's not Windows
 548             if out.isatty() and os.name != "nt":
 549                 if val_new != 0:
 550                     ratio = (val_new - val_old) / val_new
 551                     if ratio < -0.2:
 552                         report = Colors.GREEN + report + Colors.CLEAR
 553                     elif ratio > 0.2:
 554                         report = Colors.RED + report + Colors.CLEAR
 555
 556             out.write(f"\t {nested_key} {report}\n")
 557
 558     removed_keys = old_keys - new_keys
 559     if removed_keys:
 560         out.write(f"REMOVED statistics: {removed_keys}\n")
 561
 562     added_keys = new_keys - old_keys
 563     if added_keys:
 564         out.write(f"ADDED statistics: {added_keys}\n")
 565
 566     out.write("\n")
 567
 568
 569 def dump_scan_build_results_diff(
 570     dir_old: ResultsDirectory,
 571     dir_new: ResultsDirectory,
 572     delete_empty: bool = True,
 573     out: TextIO = sys.stdout,
 574     show_stats: bool = False,
 575     stats_only: bool = False,
 576     histogram: Optional[HistogramType] = None,
 577     verbose_log: Optional[str] = None,
 578 ):
 579     """
 580     Compare directories with analysis results and dump results.
 581
 582     :param delete_empty: delete empty plist files
 583     :param out: buffer to dump comparison results to.
 584     :param show_stats: compare execution stats as well.
 585     :param stats_only: compare ONLY execution stats.
 586     :param histogram: optional histogram type to plot path differences.
 587     :param verbose_log: optional path to an additional log file.
 588     """
 589     results_old = load_results(dir_old, delete_empty, verbose_log)
 590     results_new = load_results(dir_new, delete_empty, verbose_log)
 591
 592     if show_stats or stats_only:
 593         compare_stats(results_old, results_new)
 594     if stats_only:
 595         return
 596
 597     # Open the verbose log, if given.
 598     if verbose_log:
 599         aux_log: Optional[TextIO] = open(verbose_log, "w")
 600     else:
 601         aux_log = None
 602
 603     diff = compare_results(results_old, results_new, histogram)
 604     found_diffs = 0
 605     total_added = 0
 606     total_removed = 0
 607     total_modified = 0
 608
 609     for new in diff.present_only_in_new:
 610         out.write(f"ADDED: {new.get_readable_name()}\n\n")
 611         found_diffs += 1
 612         total_added += 1
 613         if aux_log:
 614             aux_log.write(
 615                 f"('ADDED', {new.get_readable_name()}, " f"{new.get_html_report()})\n"
 616             )
 617
 618     for old in diff.present_only_in_old:
 619         out.write(f"REMOVED: {old.get_readable_name()}\n\n")
 620         found_diffs += 1
 621         total_removed += 1
 622         if aux_log:
 623             aux_log.write(
 624                 f"('REMOVED', {old.get_readable_name()}, " f"{old.get_html_report()})\n"
 625             )
 626
 627     for old, new in diff.changed_between_new_and_old:
 628         out.write(f"MODIFIED: {old.get_readable_name()}\n")
 629         found_diffs += 1
 630         total_modified += 1
 631         diffs = old.get_diffs(new)
 632         str_diffs = [
 633             f"          '{key}' changed: " f"'{old_value}' -> '{new_value}'"
 634             for key, (old_value, new_value) in diffs.items()
 635         ]
 636         out.write(",\n".join(str_diffs) + "\n\n")
 637         if aux_log:
 638             aux_log.write(
 639                 f"('MODIFIED', {old.get_readable_name()}, "
 640                 f"{old.get_html_report()})\n"
 641             )
 642
 643     total_reports = len(results_new.diagnostics)
 644     out.write(f"TOTAL REPORTS: {total_reports}\n")
 645     out.write(f"TOTAL ADDED: {total_added}\n")
 646     out.write(f"TOTAL REMOVED: {total_removed}\n")
 647     out.write(f"TOTAL MODIFIED: {total_modified}\n")
 648
 649     if aux_log:
 650         aux_log.write(f"('TOTAL NEW REPORTS', {total_reports})\n")
 651         aux_log.write(f"('TOTAL DIFFERENCES', {found_diffs})\n")
 652         aux_log.close()
 653
 654     # TODO: change to NamedTuple
 655     return found_diffs, len(results_old.diagnostics), len(results_new.diagnostics)
 656
 657
 658 if __name__ == "__main__":
 659     print("CmpRuns.py should not be used on its own.")
 660     print("Please use 'SATest.py compare' instead")
 661     sys.exit(1)