clang/utils/analyzer/CmpRuns.py

   1 #!/usr/bin/env python
   2
   3 """
   4 CmpRuns - A simple tool for comparing two static analyzer runs to determine
   5 which reports have been added, removed, or changed.
   6
   7 This is designed to support automated testing using the static analyzer, from
   8 two perspectives:
   9   1. To monitor changes in the static analyzer's reports on real code bases,
  10      for regression testing.
  11
  12   2. For use by end users who want to integrate regular static analyzer testing
  13      into a buildbot like environment.
  14
  15 Usage:
  16
  17     # Load the results of both runs, to obtain lists of the corresponding
  18     # AnalysisDiagnostic objects.
  19     #
  20     resultsA = load_results_from_single_run(singleRunInfoA, delete_empty)
  21     resultsB = load_results_from_single_run(singleRunInfoB, delete_empty)
  22
  23     # Generate a relation from diagnostics in run A to diagnostics in run B
  24     # to obtain a list of triples (a, b, confidence).
  25     diff = compare_results(resultsA, resultsB)
  26
  27 """
  28 import json
  29 import os
  30 import plistlib
  31 import re
  32 import sys
  33
  34 from math import log
  35 from collections import defaultdict
  36 from copy import copy
  37 from enum import Enum
  38 from typing import (Any, DefaultDict, Dict, List, NamedTuple, Optional,
  39                     Sequence, Set, TextIO, TypeVar, Tuple, Union)
  40
  41
  42 Number = Union[int, float]
  43 Stats = Dict[str, Dict[str, Number]]
  44 Plist = Dict[str, Any]
  45 JSON = Dict[str, Any]
  46 # Diff in a form: field -> (before, after)
  47 JSONDiff = Dict[str, Tuple[str, str]]
  48 # Type for generics
  49 T = TypeVar('T')
  50
  51 STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL)
  52
  53
  54 class Colors:
  55     """
  56     Color for terminal highlight.
  57     """
  58     RED = '\x1b[2;30;41m'
  59     GREEN = '\x1b[6;30;42m'
  60     CLEAR = '\x1b[0m'
  61
  62
  63 class HistogramType(str, Enum):
  64     RELATIVE = "relative"
  65     LOG_RELATIVE = "log-relative"
  66     ABSOLUTE = "absolute"
  67
  68
  69 class ResultsDirectory(NamedTuple):
  70     path: str
  71     root: str = ""
  72
  73
  74 class SingleRunInfo:
  75     """
  76     Information about analysis run:
  77     path - the analysis output directory
  78     root - the name of the root directory, which will be disregarded when
  79     determining the source file name
  80     """
  81     def __init__(self, results: ResultsDirectory,
  82                  verbose_log: Optional[str] = None):
  83         self.path = results.path
  84         self.root = results.root.rstrip("/\\")
  85         self.verbose_log = verbose_log
  86
  87
  88 class AnalysisDiagnostic:
  89     def __init__(self, data: Plist, report: "AnalysisReport",
  90                  html_report: Optional[str]):
  91         self._data = data
  92         self._loc = self._data['location']
  93         self._report = report
  94         self._html_report = html_report
  95         self._report_size = len(self._data['path'])
  96
  97     def get_file_name(self) -> str:
  98         root = self._report.run.root
  99         file_name = self._report.files[self._loc['file']]
 100
 101         if file_name.startswith(root) and len(root) > 0:
 102             return file_name[len(root) + 1:]
 103
 104         return file_name
 105
 106     def get_root_file_name(self) -> str:
 107         path = self._data['path']
 108
 109         if not path:
 110             return self.get_file_name()
 111
 112         p = path[0]
 113         if 'location' in p:
 114             file_index = p['location']['file']
 115         else:  # control edge
 116             file_index = path[0]['edges'][0]['start'][0]['file']
 117
 118         out = self._report.files[file_index]
 119         root = self._report.run.root
 120
 121         if out.startswith(root):
 122             return out[len(root):]
 123
 124         return out
 125
 126     def get_line(self) -> int:
 127         return self._loc['line']
 128
 129     def get_column(self) -> int:
 130         return self._loc['col']
 131
 132     def get_path_length(self) -> int:
 133         return self._report_size
 134
 135     def get_category(self) -> str:
 136         return self._data['category']
 137
 138     def get_description(self) -> str:
 139         return self._data['description']
 140
 141     def get_location(self) -> str:
 142         return f"{self.get_file_name()}:{self.get_line()}:{self.get_column()}"
 143
 144     def get_issue_identifier(self) -> str:
 145         id = self.get_file_name() + "+"
 146
 147         if "issue_context" in self._data:
 148             id += self._data["issue_context"] + "+"
 149
 150         if "issue_hash_content_of_line_in_context" in self._data:
 151             id += str(self._data["issue_hash_content_of_line_in_context"])
 152
 153         return id
 154
 155     def get_html_report(self) -> str:
 156         if self._html_report is None:
 157             return " "
 158
 159         return os.path.join(self._report.run.path, self._html_report)
 160
 161     def get_readable_name(self) -> str:
 162         if "issue_context" in self._data:
 163             funcname_postfix = "#" + self._data["issue_context"]
 164         else:
 165             funcname_postfix = ""
 166
 167         root_filename = self.get_root_file_name()
 168         file_name = self.get_file_name()
 169
 170         if root_filename != file_name:
 171             file_prefix = f"[{root_filename}] {file_name}"
 172         else:
 173             file_prefix = root_filename
 174
 175         line = self.get_line()
 176         col = self.get_column()
 177         return f"{file_prefix}{funcname_postfix}:{line}:{col}" \
 178             f", {self.get_category()}: {self.get_description()}"
 179
 180     KEY_FIELDS = ["check_name", "category", "description"]
 181
 182     def is_similar_to(self, other: "AnalysisDiagnostic") -> bool:
 183         # We consider two diagnostics similar only if at least one
 184         # of the key fields is the same in both diagnostics.
 185         return len(self.get_diffs(other)) != len(self.KEY_FIELDS)
 186
 187     def get_diffs(self, other: "AnalysisDiagnostic") -> JSONDiff:
 188         return {field: (self._data[field], other._data[field])
 189                 for field in self.KEY_FIELDS
 190                 if self._data[field] != other._data[field]}
 191
 192     # Note, the data format is not an API and may change from one analyzer
 193     # version to another.
 194     def get_raw_data(self) -> Plist:
 195         return self._data
 196
 197     def __eq__(self, other: object) -> bool:
 198         return hash(self) == hash(other)
 199
 200     def __ne__(self, other: object) -> bool:
 201         return hash(self) != hash(other)
 202
 203     def __hash__(self) -> int:
 204         return hash(self.get_issue_identifier())
 205
 206
 207 class AnalysisRun:
 208     def __init__(self, info: SingleRunInfo):
 209         self.path = info.path
 210         self.root = info.root
 211         self.info = info
 212         self.reports: List[AnalysisReport] = []
 213         # Cumulative list of all diagnostics from all the reports.
 214         self.diagnostics: List[AnalysisDiagnostic] = []
 215         self.clang_version: Optional[str] = None
 216         self.raw_stats: List[JSON] = []
 217
 218     def get_clang_version(self) -> Optional[str]:
 219         return self.clang_version
 220
 221     def read_single_file(self, path: str, delete_empty: bool):
 222         with open(path, "rb") as plist_file:
 223             data = plistlib.load(plist_file)
 224
 225         if 'statistics' in data:
 226             self.raw_stats.append(json.loads(data['statistics']))
 227             data.pop('statistics')
 228
 229         # We want to retrieve the clang version even if there are no
 230         # reports. Assume that all reports were created using the same
 231         # clang version (this is always true and is more efficient).
 232         if 'clang_version' in data:
 233             if self.clang_version is None:
 234                 self.clang_version = data.pop('clang_version')
 235             else:
 236                 data.pop('clang_version')
 237
 238         # Ignore/delete empty reports.
 239         if not data['files']:
 240             if delete_empty:
 241                 os.remove(path)
 242             return
 243
 244         # Extract the HTML reports, if they exists.
 245         if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
 246             htmlFiles = []
 247             for d in data['diagnostics']:
 248                 # FIXME: Why is this named files, when does it have multiple
 249                 # files?
 250                 assert len(d['HTMLDiagnostics_files']) == 1
 251                 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
 252         else:
 253             htmlFiles = [None] * len(data['diagnostics'])
 254
 255         report = AnalysisReport(self, data.pop('files'))
 256         diagnostics = [AnalysisDiagnostic(d, report, h)
 257                        for d, h in zip(data.pop('diagnostics'), htmlFiles)]
 258
 259         assert not data
 260
 261         report.diagnostics.extend(diagnostics)
 262         self.reports.append(report)
 263         self.diagnostics.extend(diagnostics)
 264
 265
 266 class AnalysisReport:
 267     def __init__(self, run: AnalysisRun, files: List[str]):
 268         self.run = run
 269         self.files = files
 270         self.diagnostics: List[AnalysisDiagnostic] = []
 271
 272
 273 def load_results(results: ResultsDirectory, delete_empty: bool = True,
 274                  verbose_log: Optional[str] = None) -> AnalysisRun:
 275     """
 276     Backwards compatibility API.
 277     """
 278     return load_results_from_single_run(SingleRunInfo(results,
 279                                                       verbose_log),
 280                                         delete_empty)
 281
 282
 283 def load_results_from_single_run(info: SingleRunInfo,
 284                                  delete_empty: bool = True) -> AnalysisRun:
 285     """
 286     # Load results of the analyzes from a given output folder.
 287     # - info is the SingleRunInfo object
 288     # - delete_empty specifies if the empty plist files should be deleted
 289
 290     """
 291     path = info.path
 292     run = AnalysisRun(info)
 293
 294     if os.path.isfile(path):
 295         run.read_single_file(path, delete_empty)
 296     else:
 297         for dirpath, dirnames, filenames in os.walk(path):
 298             for f in filenames:
 299                 if not f.endswith('plist'):
 300                     continue
 301
 302                 p = os.path.join(dirpath, f)
 303                 run.read_single_file(p, delete_empty)
 304
 305     return run
 306
 307
 308 def cmp_analysis_diagnostic(d):
 309     return d.get_issue_identifier()
 310
 311
 312 AnalysisDiagnosticPair = Tuple[AnalysisDiagnostic, AnalysisDiagnostic]
 313
 314
 315 class ComparisonResult:
 316     def __init__(self):
 317         self.present_in_both: List[AnalysisDiagnostic] = []
 318         self.present_only_in_old: List[AnalysisDiagnostic] = []
 319         self.present_only_in_new: List[AnalysisDiagnostic] = []
 320         self.changed_between_new_and_old: List[AnalysisDiagnosticPair] = []
 321
 322     def add_common(self, issue: AnalysisDiagnostic):
 323         self.present_in_both.append(issue)
 324
 325     def add_removed(self, issue: AnalysisDiagnostic):
 326         self.present_only_in_old.append(issue)
 327
 328     def add_added(self, issue: AnalysisDiagnostic):
 329         self.present_only_in_new.append(issue)
 330
 331     def add_changed(self, old_issue: AnalysisDiagnostic,
 332                     new_issue: AnalysisDiagnostic):
 333         self.changed_between_new_and_old.append((old_issue, new_issue))
 334
 335
 336 GroupedDiagnostics = DefaultDict[str, List[AnalysisDiagnostic]]
 337
 338
 339 def get_grouped_diagnostics(diagnostics: List[AnalysisDiagnostic]
 340                             ) -> GroupedDiagnostics:
 341     result: GroupedDiagnostics = defaultdict(list)
 342     for diagnostic in diagnostics:
 343         result[diagnostic.get_location()].append(diagnostic)
 344     return result
 345
 346
 347 def compare_results(results_old: AnalysisRun, results_new: AnalysisRun,
 348                     histogram: Optional[HistogramType] = None
 349                     ) -> ComparisonResult:
 350     """
 351     compare_results - Generate a relation from diagnostics in run A to
 352     diagnostics in run B.
 353
 354     The result is the relation as a list of triples (a, b) where
 355     each element {a,b} is None or a matching element from the respective run
 356     """
 357
 358     res = ComparisonResult()
 359
 360     # Map size_before -> size_after
 361     path_difference_data: List[float] = []
 362
 363     diags_old = get_grouped_diagnostics(results_old.diagnostics)
 364     diags_new = get_grouped_diagnostics(results_new.diagnostics)
 365
 366     locations_old = set(diags_old.keys())
 367     locations_new = set(diags_new.keys())
 368
 369     common_locations = locations_old & locations_new
 370
 371     for location in common_locations:
 372         old = diags_old[location]
 373         new = diags_new[location]
 374
 375         # Quadratic algorithms in this part are fine because 'old' and 'new'
 376         # are most commonly of size 1.
 377         common: Set[AnalysisDiagnostic] = set()
 378         for a in old:
 379             for b in new:
 380                 if a.get_issue_identifier() == b.get_issue_identifier():
 381                     a_path_len = a.get_path_length()
 382                     b_path_len = b.get_path_length()
 383
 384                     if a_path_len != b_path_len:
 385
 386                         if histogram == HistogramType.RELATIVE:
 387                             path_difference_data.append(
 388                                 float(a_path_len) / b_path_len)
 389
 390                         elif histogram == HistogramType.LOG_RELATIVE:
 391                             path_difference_data.append(
 392                                 log(float(a_path_len) / b_path_len))
 393
 394                         elif histogram == HistogramType.ABSOLUTE:
 395                             path_difference_data.append(
 396                                 a_path_len - b_path_len)
 397
 398                     res.add_common(b)
 399                     common.add(a)
 400
 401         old = filter_issues(old, common)
 402         new = filter_issues(new, common)
 403         common = set()
 404
 405         for a in old:
 406             for b in new:
 407                 if a.is_similar_to(b):
 408                     res.add_changed(a, b)
 409                     common.add(a)
 410                     common.add(b)
 411
 412         old = filter_issues(old, common)
 413         new = filter_issues(new, common)
 414
 415         # Whatever is left in 'old' doesn't have a corresponding diagnostic
 416         # in 'new', so we need to mark it as 'removed'.
 417         for a in old:
 418             res.add_removed(a)
 419
 420         # Whatever is left in 'new' doesn't have a corresponding diagnostic
 421         # in 'old', so we need to mark it as 'added'.
 422         for b in new:
 423             res.add_added(b)
 424
 425     only_old_locations = locations_old - common_locations
 426     for location in only_old_locations:
 427         for a in diags_old[location]:
 428             # These locations have been found only in the old build, so we
 429             # need to mark all of therm as 'removed'
 430             res.add_removed(a)
 431
 432     only_new_locations = locations_new - common_locations
 433     for location in only_new_locations:
 434         for b in diags_new[location]:
 435             # These locations have been found only in the new build, so we
 436             # need to mark all of therm as 'added'
 437             res.add_added(b)
 438
 439     # FIXME: Add fuzzy matching. One simple and possible effective idea would
 440     # be to bin the diagnostics, print them in a normalized form (based solely
 441     # on the structure of the diagnostic), compute the diff, then use that as
 442     # the basis for matching. This has the nice property that we don't depend
 443     # in any way on the diagnostic format.
 444
 445     if histogram:
 446         from matplotlib import pyplot
 447         pyplot.hist(path_difference_data, bins=100)
 448         pyplot.show()
 449
 450     return res
 451
 452
 453 def filter_issues(origin: List[AnalysisDiagnostic],
 454                   to_remove: Set[AnalysisDiagnostic]) \
 455                   -> List[AnalysisDiagnostic]:
 456     return [diag for diag in origin if diag not in to_remove]
 457
 458
 459 def compute_percentile(values: Sequence[T], percentile: float) -> T:
 460     """
 461     Return computed percentile.
 462     """
 463     return sorted(values)[int(round(percentile * len(values) + 0.5)) - 1]
 464
 465
 466 def derive_stats(results: AnalysisRun) -> Stats:
 467     # Assume all keys are the same in each statistics bucket.
 468     combined_data = defaultdict(list)
 469
 470     # Collect data on paths length.
 471     for report in results.reports:
 472         for diagnostic in report.diagnostics:
 473             combined_data['PathsLength'].append(diagnostic.get_path_length())
 474
 475     for stat in results.raw_stats:
 476         for key, value in stat.items():
 477             combined_data[str(key)].append(value)
 478
 479     combined_stats: Stats = {}
 480
 481     for key, values in combined_data.items():
 482         combined_stats[key] = {
 483             "max": max(values),
 484             "min": min(values),
 485             "mean": sum(values) / len(values),
 486             "90th %tile": compute_percentile(values, 0.9),
 487             "95th %tile": compute_percentile(values, 0.95),
 488             "median": sorted(values)[len(values) // 2],
 489             "total": sum(values)
 490         }
 491
 492     return combined_stats
 493
 494
 495 # TODO: compare_results decouples comparison from the output, we should
 496 #       do it here as well
 497 def compare_stats(results_old: AnalysisRun, results_new: AnalysisRun,
 498                   out: TextIO = sys.stdout):
 499     stats_old = derive_stats(results_old)
 500     stats_new = derive_stats(results_new)
 501
 502     old_keys = set(stats_old.keys())
 503     new_keys = set(stats_new.keys())
 504     keys = sorted(old_keys & new_keys)
 505
 506     for key in keys:
 507         out.write(f"{key}\n")
 508
 509         nested_keys = sorted(set(stats_old[key]) & set(stats_new[key]))
 510
 511         for nested_key in nested_keys:
 512             val_old = float(stats_old[key][nested_key])
 513             val_new = float(stats_new[key][nested_key])
 514
 515             report = f"{val_old:.3f} -> {val_new:.3f}"
 516
 517             # Only apply highlighting when writing to TTY and it's not Windows
 518             if out.isatty() and os.name != 'nt':
 519                 if val_new != 0:
 520                     ratio = (val_new - val_old) / val_new
 521                     if ratio < -0.2:
 522                         report = Colors.GREEN + report + Colors.CLEAR
 523                     elif ratio > 0.2:
 524                         report = Colors.RED + report + Colors.CLEAR
 525
 526             out.write(f"\t {nested_key} {report}\n")
 527
 528     removed_keys = old_keys - new_keys
 529     if removed_keys:
 530         out.write(f"REMOVED statistics: {removed_keys}\n")
 531
 532     added_keys = new_keys - old_keys
 533     if added_keys:
 534         out.write(f"ADDED statistics: {added_keys}\n")
 535
 536     out.write("\n")
 537
 538
 539 def dump_scan_build_results_diff(dir_old: ResultsDirectory,
 540                                  dir_new: ResultsDirectory,
 541                                  delete_empty: bool = True,
 542                                  out: TextIO = sys.stdout,
 543                                  show_stats: bool = False,
 544                                  stats_only: bool = False,
 545                                  histogram: Optional[HistogramType] = None,
 546                                  verbose_log: Optional[str] = None):
 547     """
 548     Compare directories with analysis results and dump results.
 549
 550     :param delete_empty: delete empty plist files
 551     :param out: buffer to dump comparison results to.
 552     :param show_stats: compare execution stats as well.
 553     :param stats_only: compare ONLY execution stats.
 554     :param histogram: optional histogram type to plot path differences.
 555     :param verbose_log: optional path to an additional log file.
 556     """
 557     results_old = load_results(dir_old, delete_empty, verbose_log)
 558     results_new = load_results(dir_new, delete_empty, verbose_log)
 559
 560     if show_stats or stats_only:
 561         compare_stats(results_old, results_new)
 562     if stats_only:
 563         return
 564
 565     # Open the verbose log, if given.
 566     if verbose_log:
 567         aux_log: Optional[TextIO] = open(verbose_log, "w")
 568     else:
 569         aux_log = None
 570
 571     diff = compare_results(results_old, results_new, histogram)
 572     found_diffs = 0
 573     total_added = 0
 574     total_removed = 0
 575     total_modified = 0
 576
 577     for new in diff.present_only_in_new:
 578         out.write(f"ADDED: {new.get_readable_name()}\n\n")
 579         found_diffs += 1
 580         total_added += 1
 581         if aux_log:
 582             aux_log.write(f"('ADDED', {new.get_readable_name()}, "
 583                           f"{new.get_html_report()})\n")
 584
 585     for old in diff.present_only_in_old:
 586         out.write(f"REMOVED: {old.get_readable_name()}\n\n")
 587         found_diffs += 1
 588         total_removed += 1
 589         if aux_log:
 590             aux_log.write(f"('REMOVED', {old.get_readable_name()}, "
 591                           f"{old.get_html_report()})\n")
 592
 593     for old, new in diff.changed_between_new_and_old:
 594         out.write(f"MODIFIED: {old.get_readable_name()}\n")
 595         found_diffs += 1
 596         total_modified += 1
 597         diffs = old.get_diffs(new)
 598         str_diffs = [f"          '{key}' changed: "
 599                      f"'{old_value}' -> '{new_value}'"
 600                      for key, (old_value, new_value) in diffs.items()]
 601         out.write(",\n".join(str_diffs) + "\n\n")
 602         if aux_log:
 603             aux_log.write(f"('MODIFIED', {old.get_readable_name()}, "
 604                           f"{old.get_html_report()})\n")
 605
 606     total_reports = len(results_new.diagnostics)
 607     out.write(f"TOTAL REPORTS: {total_reports}\n")
 608     out.write(f"TOTAL ADDED: {total_added}\n")
 609     out.write(f"TOTAL REMOVED: {total_removed}\n")
 610     out.write(f"TOTAL MODIFIED: {total_modified}\n")
 611
 612     if aux_log:
 613         aux_log.write(f"('TOTAL NEW REPORTS', {total_reports})\n")
 614         aux_log.write(f"('TOTAL DIFFERENCES', {found_diffs})\n")
 615         aux_log.close()
 616
 617     # TODO: change to NamedTuple
 618     return found_diffs, len(results_old.diagnostics), \
 619         len(results_new.diagnostics)
 620
 621
 622 if __name__ == "__main__":
 623     print("CmpRuns.py should not be used on its own.")
 624     print("Please use 'SATest.py compare' instead")
 625     sys.exit(1)