clang/utils/analyzer/SATestBuild.py

   1 #!/usr/bin/env python
   2
   3 """
   4 Static Analyzer qualification infrastructure.
   5
   6 The goal is to test the analyzer against different projects,
   7 check for failures, compare results, and measure performance.
   8
   9 Repository Directory will contain sources of the projects as well as the
  10 information on how to build them and the expected output.
  11 Repository Directory structure:
  12    - ProjectMap file
  13    - Historical Performance Data
  14    - Project Dir1
  15      - ReferenceOutput
  16    - Project Dir2
  17      - ReferenceOutput
  18    ..
  19 Note that the build tree must be inside the project dir.
  20
  21 To test the build of the analyzer one would:
  22    - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
  23      the build directory does not pollute the repository to min network
  24      traffic).
  25    - Build all projects, until error. Produce logs to report errors.
  26    - Compare results.
  27
  28 The files which should be kept around for failure investigations:
  29    RepositoryCopy/Project DirI/ScanBuildResults
  30    RepositoryCopy/Project DirI/run_static_analyzer.log
  31
  32 Assumptions (TODO: shouldn't need to assume these.):
  33    The script is being run from the Repository Directory.
  34    The compiler for scan-build and scan-build are in the PATH.
  35    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
  36
  37 For more logging, set the  env variables:
  38    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
  39    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
  40
  41 The list of checkers tested are hardcoded in the Checkers variable.
  42 For testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
  43 variable. It should contain a comma separated list.
  44 """
  45 import CmpRuns
  46 import SATestUtils as utils
  47 from ProjectMap import DownloadType, ProjectInfo
  48
  49 import glob
  50 import logging
  51 import math
  52 import multiprocessing
  53 import os
  54 import plistlib
  55 import shutil
  56 import sys
  57 import threading
  58 import time
  59 import zipfile
  60
  61 from queue import Queue
  62
  63 # mypy has problems finding InvalidFileException in the module
  64 # and this is we can shush that false positive
  65 from plistlib import InvalidFileException  # type:ignore
  66 from subprocess import CalledProcessError, check_call
  67 from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
  68
  69
  70 ###############################################################################
  71 # Helper functions.
  72 ###############################################################################
  73
  74
  75 class StreamToLogger:
  76     def __init__(self, logger: logging.Logger, log_level: int = logging.INFO):
  77         self.logger = logger
  78         self.log_level = log_level
  79
  80     def write(self, message: str):
  81         # Rstrip in order not to write an extra newline.
  82         self.logger.log(self.log_level, message.rstrip())
  83
  84     def flush(self):
  85         pass
  86
  87     def fileno(self) -> int:
  88         return 0
  89
  90
  91 LOCAL = threading.local()
  92
  93
  94 def init_logger(name: str):
  95     # TODO: use debug levels for VERBOSE messages
  96     logger = logging.getLogger(name)
  97     logger.setLevel(logging.DEBUG)
  98     LOCAL.stdout = StreamToLogger(logger, logging.INFO)
  99     LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
 100
 101
 102 init_logger("main")
 103
 104
 105 def stderr(message: str):
 106     LOCAL.stderr.write(message)
 107
 108
 109 def stdout(message: str):
 110     LOCAL.stdout.write(message)
 111
 112
 113 logging.basicConfig(format="%(asctime)s:%(levelname)s:%(name)s: %(message)s")
 114
 115
 116 ###############################################################################
 117 # Configuration setup.
 118 ###############################################################################
 119
 120
 121 # Find Clang for static analysis.
 122 if "CC" in os.environ:
 123     cc_candidate: Optional[str] = os.environ["CC"]
 124 else:
 125     cc_candidate = utils.which("clang", os.environ["PATH"])
 126 if not cc_candidate:
 127     stderr("Error: cannot find 'clang' in PATH")
 128     sys.exit(1)
 129
 130 CLANG = cc_candidate
 131
 132 # Number of jobs.
 133 MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
 134
 135 # Names of the project specific scripts.
 136 # The script that downloads the project.
 137 DOWNLOAD_SCRIPT = "download_project.sh"
 138 # The script that needs to be executed before the build can start.
 139 CLEANUP_SCRIPT = "cleanup_run_static_analyzer.sh"
 140 # This is a file containing commands for scan-build.
 141 BUILD_SCRIPT = "run_static_analyzer.cmd"
 142
 143 # A comment in a build script which disables wrapping.
 144 NO_PREFIX_CMD = "#NOPREFIX"
 145
 146 # The log file name.
 147 LOG_DIR_NAME = "Logs"
 148 BUILD_LOG_NAME = "run_static_analyzer.log"
 149 # Summary file - contains the summary of the failures. Ex: This info can be be
 150 # displayed when buildbot detects a build failure.
 151 NUM_OF_FAILURES_IN_SUMMARY = 10
 152
 153 # The scan-build result directory.
 154 OUTPUT_DIR_NAME = "ScanBuildResults"
 155 REF_PREFIX = "Ref"
 156
 157 # The name of the directory storing the cached project source. If this
 158 # directory does not exist, the download script will be executed.
 159 # That script should create the "CachedSource" directory and download the
 160 # project source into it.
 161 CACHED_SOURCE_DIR_NAME = "CachedSource"
 162
 163 # The name of the directory containing the source code that will be analyzed.
 164 # Each time a project is analyzed, a fresh copy of its CachedSource directory
 165 # will be copied to the PatchedSource directory and then the local patches
 166 # in PATCHFILE_NAME will be applied (if PATCHFILE_NAME exists).
 167 PATCHED_SOURCE_DIR_NAME = "PatchedSource"
 168
 169 # The name of the patchfile specifying any changes that should be applied
 170 # to the CachedSource before analyzing.
 171 PATCHFILE_NAME = "changes_for_analyzer.patch"
 172
 173 # The list of checkers used during analyzes.
 174 # Currently, consists of all the non-experimental checkers, plus a few alpha
 175 # checkers we don't want to regress on.
 176 CHECKERS = ",".join(
 177     [
 178         "alpha.unix.SimpleStream",
 179         "alpha.security.taint",
 180         "cplusplus.NewDeleteLeaks",
 181         "core",
 182         "cplusplus",
 183         "deadcode",
 184         "security",
 185         "unix",
 186         "osx",
 187         "nullability",
 188     ]
 189 )
 190
 191 VERBOSE = 0
 192
 193
 194 ###############################################################################
 195 # Test harness logic.
 196 ###############################################################################
 197
 198
 199 def run_cleanup_script(directory: str, build_log_file: IO):
 200     """
 201     Run pre-processing script if any.
 202     """
 203     cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 204     script_path = os.path.join(directory, CLEANUP_SCRIPT)
 205
 206     utils.run_script(
 207         script_path,
 208         build_log_file,
 209         cwd,
 210         out=LOCAL.stdout,
 211         err=LOCAL.stderr,
 212         verbose=VERBOSE,
 213     )
 214
 215
 216 class TestInfo(NamedTuple):
 217     """
 218     Information about a project and settings for its analysis.
 219     """
 220
 221     project: ProjectInfo
 222     override_compiler: bool = False
 223     extra_analyzer_config: str = ""
 224     extra_checkers: str = ""
 225     is_reference_build: bool = False
 226     strictness: int = 0
 227
 228
 229 # typing package doesn't have a separate type for Queue, but has a generic stub
 230 # We still want to have a type-safe checked project queue, for this reason,
 231 # we specify generic type for mypy.
 232 #
 233 # It is a common workaround for this situation:
 234 # https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
 235 if TYPE_CHECKING:
 236     TestQueue = Queue[TestInfo]  # this is only processed by mypy
 237 else:
 238     TestQueue = Queue  # this will be executed at runtime
 239
 240
 241 class RegressionTester:
 242     """
 243     A component aggregating all of the project testing.
 244     """
 245
 246     def __init__(
 247         self,
 248         jobs: int,
 249         projects: List[ProjectInfo],
 250         override_compiler: bool,
 251         extra_analyzer_config: str,
 252         extra_checkers: str,
 253         regenerate: bool,
 254         strictness: bool,
 255     ):
 256         self.jobs = jobs
 257         self.projects = projects
 258         self.override_compiler = override_compiler
 259         self.extra_analyzer_config = extra_analyzer_config
 260         self.extra_checkers = extra_checkers
 261         self.regenerate = regenerate
 262         self.strictness = strictness
 263
 264     def test_all(self) -> bool:
 265         projects_to_test: List[TestInfo] = []
 266
 267         # Test the projects.
 268         for project in self.projects:
 269             projects_to_test.append(
 270                 TestInfo(
 271                     project,
 272                     self.override_compiler,
 273                     self.extra_analyzer_config,
 274                     self.extra_checkers,
 275                     self.regenerate,
 276                     self.strictness,
 277                 )
 278             )
 279         if self.jobs <= 1:
 280             return self._single_threaded_test_all(projects_to_test)
 281         else:
 282             return self._multi_threaded_test_all(projects_to_test)
 283
 284     def _single_threaded_test_all(self, projects_to_test: List[TestInfo]) -> bool:
 285         """
 286         Run all projects.
 287         :return: whether tests have passed.
 288         """
 289         success = True
 290         for project_info in projects_to_test:
 291             tester = ProjectTester(project_info)
 292             success &= tester.test()
 293         return success
 294
 295     def _multi_threaded_test_all(self, projects_to_test: List[TestInfo]) -> bool:
 296         """
 297         Run each project in a separate thread.
 298
 299         This is OK despite GIL, as testing is blocked
 300         on launching external processes.
 301
 302         :return: whether tests have passed.
 303         """
 304         tasks_queue = TestQueue()
 305
 306         for project_info in projects_to_test:
 307             tasks_queue.put(project_info)
 308
 309         results_differ = threading.Event()
 310         failure_flag = threading.Event()
 311
 312         for _ in range(self.jobs):
 313             T = TestProjectThread(tasks_queue, results_differ, failure_flag)
 314             T.start()
 315
 316         # Required to handle Ctrl-C gracefully.
 317         while tasks_queue.unfinished_tasks:
 318             time.sleep(0.1)  # Seconds.
 319             if failure_flag.is_set():
 320                 stderr("Test runner crashed\n")
 321                 sys.exit(1)
 322         return not results_differ.is_set()
 323
 324
 325 class ProjectTester:
 326     """
 327     A component aggregating testing for one project.
 328     """
 329
 330     def __init__(self, test_info: TestInfo, silent: bool = False):
 331         self.project = test_info.project
 332         self.override_compiler = test_info.override_compiler
 333         self.extra_analyzer_config = test_info.extra_analyzer_config
 334         self.extra_checkers = test_info.extra_checkers
 335         self.is_reference_build = test_info.is_reference_build
 336         self.strictness = test_info.strictness
 337         self.silent = silent
 338
 339     def test(self) -> bool:
 340         """
 341         Test a given project.
 342         :return tests_passed: Whether tests have passed according
 343         to the :param strictness: criteria.
 344         """
 345         if not self.project.enabled:
 346             self.out(f" \n\n--- Skipping disabled project {self.project.name}\n")
 347             return True
 348
 349         self.out(f" \n\n--- Building project {self.project.name}\n")
 350
 351         start_time = time.time()
 352
 353         project_dir = self.get_project_dir()
 354         self.vout(f"  Build directory: {project_dir}.\n")
 355
 356         # Set the build results directory.
 357         output_dir = self.get_output_dir()
 358
 359         self.build(project_dir, output_dir)
 360         check_build(output_dir)
 361
 362         if self.is_reference_build:
 363             cleanup_reference_results(output_dir)
 364             passed = True
 365         else:
 366             passed = run_cmp_results(project_dir, self.strictness)
 367
 368         self.out(
 369             f"Completed tests for project {self.project.name} "
 370             f"(time: {time.time() - start_time:.2f}).\n"
 371         )
 372
 373         return passed
 374
 375     def get_project_dir(self) -> str:
 376         return os.path.join(os.path.abspath(os.curdir), self.project.name)
 377
 378     def get_output_dir(self) -> str:
 379         if self.is_reference_build:
 380             dirname = REF_PREFIX + OUTPUT_DIR_NAME
 381         else:
 382             dirname = OUTPUT_DIR_NAME
 383
 384         return os.path.join(self.get_project_dir(), dirname)
 385
 386     def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
 387         build_log_path = get_build_log_path(output_dir)
 388
 389         self.out(f"Log file: {build_log_path}\n")
 390         self.out(f"Output directory: {output_dir}\n")
 391
 392         remove_log_file(output_dir)
 393
 394         # Clean up scan build results.
 395         if os.path.exists(output_dir):
 396             self.vout(f"  Removing old results: {output_dir}\n")
 397
 398             shutil.rmtree(output_dir)
 399
 400         assert not os.path.exists(output_dir)
 401         os.makedirs(os.path.join(output_dir, LOG_DIR_NAME))
 402
 403         # Build and analyze the project.
 404         with open(build_log_path, "w+") as build_log_file:
 405             if self.project.mode == 1:
 406                 self._download_and_patch(directory, build_log_file)
 407                 run_cleanup_script(directory, build_log_file)
 408                 build_time, memory = self.scan_build(
 409                     directory, output_dir, build_log_file
 410                 )
 411             else:
 412                 build_time, memory = self.analyze_preprocessed(directory, output_dir)
 413
 414             if self.is_reference_build:
 415                 run_cleanup_script(directory, build_log_file)
 416                 normalize_reference_results(directory, output_dir, self.project.mode)
 417
 418         self.out(
 419             f"Build complete (time: {utils.time_to_str(build_time)}, "
 420             f"peak memory: {utils.memory_to_str(memory)}). "
 421             f"See the log for more details: {build_log_path}\n"
 422         )
 423
 424         return build_time, memory
 425
 426     def scan_build(
 427         self, directory: str, output_dir: str, build_log_file: IO
 428     ) -> Tuple[float, int]:
 429         """
 430         Build the project with scan-build by reading in the commands and
 431         prefixing them with the scan-build options.
 432         """
 433         build_script_path = os.path.join(directory, BUILD_SCRIPT)
 434         if not os.path.exists(build_script_path):
 435             stderr(f"Error: build script is not defined: " f"{build_script_path}\n")
 436             sys.exit(1)
 437
 438         all_checkers = CHECKERS
 439         if "SA_ADDITIONAL_CHECKERS" in os.environ:
 440             all_checkers = all_checkers + "," + os.environ["SA_ADDITIONAL_CHECKERS"]
 441         if self.extra_checkers != "":
 442             all_checkers += "," + self.extra_checkers
 443
 444         # Run scan-build from within the patched source directory.
 445         cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 446
 447         options = f"--use-analyzer '{CLANG}' "
 448         options += f"-plist-html -o '{output_dir}' "
 449         options += f"-enable-checker {all_checkers} "
 450         options += "--keep-empty "
 451         options += f"-analyzer-config '{self.generate_config()}' "
 452
 453         if self.override_compiler:
 454             options += "--override-compiler "
 455
 456         extra_env: Dict[str, str] = {}
 457
 458         execution_time = 0.0
 459         peak_memory = 0
 460
 461         try:
 462             command_file = open(build_script_path, "r")
 463             command_prefix = "scan-build " + options + " "
 464
 465             for command in command_file:
 466                 command = command.strip()
 467
 468                 if len(command) == 0:
 469                     continue
 470
 471                 # Custom analyzer invocation specified by project.
 472                 # Communicate required information using environment variables
 473                 # instead.
 474                 if command == NO_PREFIX_CMD:
 475                     command_prefix = ""
 476                     extra_env["OUTPUT"] = output_dir
 477                     extra_env["CC"] = CLANG
 478                     extra_env["ANALYZER_CONFIG"] = self.generate_config()
 479                     continue
 480
 481                 if command.startswith("#"):
 482                     continue
 483
 484                 # If using 'make', auto imply a -jX argument
 485                 # to speed up analysis.  xcodebuild will
 486                 # automatically use the maximum number of cores.
 487                 if (
 488                     command.startswith("make ") or command == "make"
 489                 ) and "-j" not in command:
 490                     command += f" -j{MAX_JOBS}"
 491
 492                 command_to_run = command_prefix + command
 493
 494                 self.vout(f"  Executing: {command_to_run}\n")
 495
 496                 time, mem = utils.check_and_measure_call(
 497                     command_to_run,
 498                     cwd=cwd,
 499                     stderr=build_log_file,
 500                     stdout=build_log_file,
 501                     env=dict(os.environ, **extra_env),
 502                     shell=True,
 503                 )
 504
 505                 execution_time += time
 506                 peak_memory = max(peak_memory, mem)
 507
 508         except CalledProcessError:
 509             stderr("Error: scan-build failed. Its output was: \n")
 510             build_log_file.seek(0)
 511             shutil.copyfileobj(build_log_file, LOCAL.stderr)
 512             sys.exit(1)
 513
 514         return execution_time, peak_memory
 515
 516     def analyze_preprocessed(
 517         self, directory: str, output_dir: str
 518     ) -> Tuple[float, int]:
 519         """
 520         Run analysis on a set of preprocessed files.
 521         """
 522         if os.path.exists(os.path.join(directory, BUILD_SCRIPT)):
 523             stderr(
 524                 f"Error: The preprocessed files project "
 525                 f"should not contain {BUILD_SCRIPT}\n"
 526             )
 527             raise Exception()
 528
 529         prefix = CLANG + " --analyze "
 530
 531         prefix += "--analyzer-output plist "
 532         prefix += " -Xclang -analyzer-checker=" + CHECKERS
 533         prefix += " -fcxx-exceptions -fblocks "
 534         prefix += " -Xclang -analyzer-config "
 535         prefix += f"-Xclang {self.generate_config()} "
 536
 537         if self.project.mode == 2:
 538             prefix += "-std=c++11 "
 539
 540         plist_path = os.path.join(directory, output_dir, "date")
 541         fail_path = os.path.join(plist_path, "failures")
 542         os.makedirs(fail_path)
 543
 544         execution_time = 0.0
 545         peak_memory = 0
 546
 547         for full_file_name in glob.glob(directory + "/*"):
 548             file_name = os.path.basename(full_file_name)
 549             failed = False
 550
 551             # Only run the analyzes on supported files.
 552             if utils.has_no_extension(file_name):
 553                 continue
 554             if not utils.is_valid_single_input_file(file_name):
 555                 stderr(f"Error: Invalid single input file {full_file_name}.\n")
 556                 raise Exception()
 557
 558             # Build and call the analyzer command.
 559             plist_basename = os.path.join(plist_path, file_name)
 560             output_option = f"-o '{plist_basename}.plist' "
 561             command = f"{prefix}{output_option}'{file_name}'"
 562
 563             log_path = os.path.join(fail_path, file_name + ".stderr.txt")
 564             with open(log_path, "w+") as log_file:
 565                 try:
 566                     self.vout(f"  Executing: {command}\n")
 567
 568                     time, mem = utils.check_and_measure_call(
 569                         command,
 570                         cwd=directory,
 571                         stderr=log_file,
 572                         stdout=log_file,
 573                         shell=True,
 574                     )
 575
 576                     execution_time += time
 577                     peak_memory = max(peak_memory, mem)
 578
 579                 except CalledProcessError as e:
 580                     stderr(
 581                         f"Error: Analyzes of {full_file_name} failed. "
 582                         f"See {log_file.name} for details. "
 583                         f"Error code {e.returncode}.\n"
 584                     )
 585                     failed = True
 586
 587                 # If command did not fail, erase the log file.
 588                 if not failed:
 589                     os.remove(log_file.name)
 590
 591         return execution_time, peak_memory
 592
 593     def generate_config(self) -> str:
 594         out = "serialize-stats=true,stable-report-filename=true"
 595
 596         if self.extra_analyzer_config:
 597             out += "," + self.extra_analyzer_config
 598
 599         return out
 600
 601     def _download_and_patch(self, directory: str, build_log_file: IO):
 602         """
 603         Download the project and apply the local patchfile if it exists.
 604         """
 605         cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
 606
 607         # If the we don't already have the cached source, run the project's
 608         # download script to download it.
 609         if not os.path.exists(cached_source):
 610             self._download(directory, build_log_file)
 611             if not os.path.exists(cached_source):
 612                 stderr(f"Error: '{cached_source}' not found after download.\n")
 613                 exit(1)
 614
 615         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 616
 617         # Remove potentially stale patched source.
 618         if os.path.exists(patched_source):
 619             shutil.rmtree(patched_source)
 620
 621         # Copy the cached source and apply any patches to the copy.
 622         shutil.copytree(cached_source, patched_source, symlinks=True)
 623         self._apply_patch(directory, build_log_file)
 624
 625     def _download(self, directory: str, build_log_file: IO):
 626         """
 627         Run the script to download the project, if it exists.
 628         """
 629         if self.project.source == DownloadType.GIT:
 630             self._download_from_git(directory, build_log_file)
 631         elif self.project.source == DownloadType.ZIP:
 632             self._unpack_zip(directory, build_log_file)
 633         elif self.project.source == DownloadType.SCRIPT:
 634             self._run_download_script(directory, build_log_file)
 635         else:
 636             raise ValueError(
 637                 f"Unknown source type '{self.project.source}' is found "
 638                 f"for the '{self.project.name}' project"
 639             )
 640
 641     def _download_from_git(self, directory: str, build_log_file: IO):
 642         repo = self.project.origin
 643         cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
 644
 645         check_call(
 646             f"git clone --recursive {repo} {cached_source}",
 647             cwd=directory,
 648             stderr=build_log_file,
 649             stdout=build_log_file,
 650             shell=True,
 651         )
 652         check_call(
 653             f"git checkout --quiet {self.project.commit}",
 654             cwd=cached_source,
 655             stderr=build_log_file,
 656             stdout=build_log_file,
 657             shell=True,
 658         )
 659
 660     def _unpack_zip(self, directory: str, build_log_file: IO):
 661         zip_files = list(glob.glob(directory + "/*.zip"))
 662
 663         if len(zip_files) == 0:
 664             raise ValueError(
 665                 f"Couldn't find any zip files to unpack for the "
 666                 f"'{self.project.name}' project"
 667             )
 668
 669         if len(zip_files) > 1:
 670             raise ValueError(
 671                 f"Couldn't decide which of the zip files ({zip_files}) "
 672                 f"for the '{self.project.name}' project to unpack"
 673             )
 674
 675         with zipfile.ZipFile(zip_files[0], "r") as zip_file:
 676             zip_file.extractall(os.path.join(directory, CACHED_SOURCE_DIR_NAME))
 677
 678     @staticmethod
 679     def _run_download_script(directory: str, build_log_file: IO):
 680         script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
 681         utils.run_script(
 682             script_path,
 683             build_log_file,
 684             directory,
 685             out=LOCAL.stdout,
 686             err=LOCAL.stderr,
 687             verbose=VERBOSE,
 688         )
 689
 690     def _apply_patch(self, directory: str, build_log_file: IO):
 691         patchfile_path = os.path.join(directory, PATCHFILE_NAME)
 692         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 693
 694         if not os.path.exists(patchfile_path):
 695             self.out("  No local patches.\n")
 696             return
 697
 698         self.out("  Applying patch.\n")
 699         try:
 700             check_call(
 701                 f"patch -p1 < '{patchfile_path}'",
 702                 cwd=patched_source,
 703                 stderr=build_log_file,
 704                 stdout=build_log_file,
 705                 shell=True,
 706             )
 707
 708         except CalledProcessError:
 709             stderr(f"Error: Patch failed. " f"See {build_log_file.name} for details.\n")
 710             sys.exit(1)
 711
 712     def out(self, what: str):
 713         if not self.silent:
 714             stdout(what)
 715
 716     def vout(self, what: str):
 717         if VERBOSE >= 1:
 718             self.out(what)
 719
 720
 721 class TestProjectThread(threading.Thread):
 722     def __init__(
 723         self,
 724         tasks_queue: TestQueue,
 725         results_differ: threading.Event,
 726         failure_flag: threading.Event,
 727     ):
 728         """
 729         :param results_differ: Used to signify that results differ from
 730                the canonical ones.
 731         :param failure_flag: Used to signify a failure during the run.
 732         """
 733         self.tasks_queue = tasks_queue
 734         self.results_differ = results_differ
 735         self.failure_flag = failure_flag
 736         super().__init__()
 737
 738         # Needed to gracefully handle interrupts with Ctrl-C
 739         self.daemon = True
 740
 741     def run(self):
 742         while not self.tasks_queue.empty():
 743             try:
 744                 test_info = self.tasks_queue.get()
 745                 init_logger(test_info.project.name)
 746
 747                 tester = ProjectTester(test_info)
 748                 if not tester.test():
 749                     self.results_differ.set()
 750
 751                 self.tasks_queue.task_done()
 752
 753             except BaseException:
 754                 self.failure_flag.set()
 755                 raise
 756
 757
 758 ###############################################################################
 759 # Utility functions.
 760 ###############################################################################
 761
 762
 763 def check_build(output_dir: str):
 764     """
 765     Given the scan-build output directory, checks if the build failed
 766     (by searching for the failures directories). If there are failures, it
 767     creates a summary file in the output directory.
 768
 769     """
 770     # Check if there are failures.
 771     failures = glob.glob(output_dir + "/*/failures/*.stderr.txt")
 772     total_failed = len(failures)
 773
 774     if total_failed == 0:
 775         clean_up_empty_plists(output_dir)
 776         clean_up_empty_folders(output_dir)
 777
 778         plists = glob.glob(output_dir + "/*/*.plist")
 779         stdout(
 780             f"Number of bug reports "
 781             f"(non-empty plist files) produced: {len(plists)}\n"
 782         )
 783         return
 784
 785     stderr("Error: analysis failed.\n")
 786     stderr(f"Total of {total_failed} failures discovered.\n")
 787
 788     if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
 789         stderr(f"See the first {NUM_OF_FAILURES_IN_SUMMARY} below.\n")
 790
 791     for index, failed_log_path in enumerate(failures, start=1):
 792         if index >= NUM_OF_FAILURES_IN_SUMMARY:
 793             break
 794
 795         stderr(f"\n-- Error #{index} -----------\n")
 796
 797         with open(failed_log_path, "r") as failed_log:
 798             shutil.copyfileobj(failed_log, LOCAL.stdout)
 799
 800     if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
 801         stderr("See the results folder for more.")
 802
 803     sys.exit(1)
 804
 805
 806 def cleanup_reference_results(output_dir: str):
 807     """
 808     Delete html, css, and js files from reference results. These can
 809     include multiple copies of the benchmark source and so get very large.
 810     """
 811     extensions = ["html", "css", "js"]
 812
 813     for extension in extensions:
 814         for file_to_rm in glob.glob(f"{output_dir}/*/*.{extension}"):
 815             file_to_rm = os.path.join(output_dir, file_to_rm)
 816             os.remove(file_to_rm)
 817
 818     # Remove the log file. It leaks absolute path names.
 819     remove_log_file(output_dir)
 820
 821
 822 def run_cmp_results(directory: str, strictness: int = 0) -> bool:
 823     """
 824     Compare the warnings produced by scan-build.
 825     strictness defines the success criteria for the test:
 826       0 - success if there are no crashes or analyzer failure.
 827       1 - success if there are no difference in the number of reported bugs.
 828       2 - success if all the bug reports are identical.
 829
 830     :return success: Whether tests pass according to the strictness
 831     criteria.
 832     """
 833     tests_passed = True
 834     start_time = time.time()
 835
 836     ref_dir = os.path.join(directory, REF_PREFIX + OUTPUT_DIR_NAME)
 837     new_dir = os.path.join(directory, OUTPUT_DIR_NAME)
 838
 839     # We have to go one level down the directory tree.
 840     ref_list = glob.glob(ref_dir + "/*")
 841     new_list = glob.glob(new_dir + "/*")
 842
 843     # Log folders are also located in the results dir, so ignore them.
 844     ref_log_dir = os.path.join(ref_dir, LOG_DIR_NAME)
 845     if ref_log_dir in ref_list:
 846         ref_list.remove(ref_log_dir)
 847     new_list.remove(os.path.join(new_dir, LOG_DIR_NAME))
 848
 849     if len(ref_list) != len(new_list):
 850         stderr(f"Mismatch in number of results folders: " f"{ref_list} vs {new_list}")
 851         sys.exit(1)
 852
 853     # There might be more then one folder underneath - one per each scan-build
 854     # command (Ex: one for configure and one for make).
 855     if len(ref_list) > 1:
 856         # Assume that the corresponding folders have the same names.
 857         ref_list.sort()
 858         new_list.sort()
 859
 860     # Iterate and find the differences.
 861     num_diffs = 0
 862     for ref_dir, new_dir in zip(ref_list, new_list):
 863         assert ref_dir != new_dir
 864
 865         if VERBOSE >= 1:
 866             stdout(f"  Comparing Results: {ref_dir} {new_dir}\n")
 867
 868         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 869
 870         ref_results = CmpRuns.ResultsDirectory(ref_dir)
 871         new_results = CmpRuns.ResultsDirectory(new_dir, patched_source)
 872
 873         # Scan the results, delete empty plist files.
 874         (
 875             num_diffs,
 876             reports_in_ref,
 877             reports_in_new,
 878         ) = CmpRuns.dump_scan_build_results_diff(
 879             ref_results, new_results, delete_empty=False, out=LOCAL.stdout
 880         )
 881
 882         if num_diffs > 0:
 883             stdout(f"Warning: {num_diffs} differences in diagnostics.\n")
 884
 885         if strictness >= 2 and num_diffs > 0:
 886             stdout("Error: Diffs found in strict mode (2).\n")
 887             tests_passed = False
 888
 889         elif strictness >= 1 and reports_in_ref != reports_in_new:
 890             stdout("Error: The number of results are different " " strict mode (1).\n")
 891             tests_passed = False
 892
 893     stdout(
 894         f"Diagnostic comparison complete " f"(time: {time.time() - start_time:.2f}).\n"
 895     )
 896
 897     return tests_passed
 898
 899
 900 def normalize_reference_results(directory: str, output_dir: str, build_mode: int):
 901     """
 902     Make the absolute paths relative in the reference results.
 903     """
 904     for dir_path, _, filenames in os.walk(output_dir):
 905         for filename in filenames:
 906             if not filename.endswith("plist"):
 907                 continue
 908
 909             plist = os.path.join(dir_path, filename)
 910             with open(plist, "rb") as plist_file:
 911                 data = plistlib.load(plist_file)
 912             path_prefix = directory
 913
 914             if build_mode == 1:
 915                 path_prefix = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 916
 917             paths = [
 918                 source[len(path_prefix) + 1 :]
 919                 if source.startswith(path_prefix)
 920                 else source
 921                 for source in data["files"]
 922             ]
 923             data["files"] = paths
 924
 925             # Remove transient fields which change from run to run.
 926             for diagnostic in data["diagnostics"]:
 927                 if "HTMLDiagnostics_files" in diagnostic:
 928                     diagnostic.pop("HTMLDiagnostics_files")
 929
 930             if "clang_version" in data:
 931                 data.pop("clang_version")
 932
 933             with open(plist, "wb") as plist_file:
 934                 plistlib.dump(data, plist_file)
 935
 936
 937 def get_build_log_path(output_dir: str) -> str:
 938     return os.path.join(output_dir, LOG_DIR_NAME, BUILD_LOG_NAME)
 939
 940
 941 def remove_log_file(output_dir: str):
 942     build_log_path = get_build_log_path(output_dir)
 943
 944     # Clean up the log file.
 945     if os.path.exists(build_log_path):
 946         if VERBOSE >= 1:
 947             stdout(f"  Removing log file: {build_log_path}\n")
 948
 949         os.remove(build_log_path)
 950
 951
 952 def clean_up_empty_plists(output_dir: str):
 953     """
 954     A plist file is created for each call to the analyzer(each source file).
 955     We are only interested on the once that have bug reports,
 956     so delete the rest.
 957     """
 958     for plist in glob.glob(output_dir + "/*/*.plist"):
 959         plist = os.path.join(output_dir, plist)
 960
 961         try:
 962             with open(plist, "rb") as plist_file:
 963                 data = plistlib.load(plist_file)
 964             # Delete empty reports.
 965             if not data["files"]:
 966                 os.remove(plist)
 967                 continue
 968
 969         except InvalidFileException as e:
 970             stderr(f"Error parsing plist file {plist}: {str(e)}")
 971             continue
 972
 973
 974 def clean_up_empty_folders(output_dir: str):
 975     """
 976     Remove empty folders from results, as git would not store them.
 977     """
 978     subdirs = glob.glob(output_dir + "/*")
 979     for subdir in subdirs:
 980         if not os.listdir(subdir):
 981             os.removedirs(subdir)
 982
 983
 984 if __name__ == "__main__":
 985     print("SATestBuild.py should not be used on its own.")
 986     print("Please use 'SATest.py build' instead")
 987     sys.exit(1)