clang/utils/analyzer/SATestBuild.py

   1 #!/usr/bin/env python
   2
   3 """
   4 Static Analyzer qualification infrastructure.
   5
   6 The goal is to test the analyzer against different projects,
   7 check for failures, compare results, and measure performance.
   8
   9 Repository Directory will contain sources of the projects as well as the
  10 information on how to build them and the expected output.
  11 Repository Directory structure:
  12    - ProjectMap file
  13    - Historical Performance Data
  14    - Project Dir1
  15      - ReferenceOutput
  16    - Project Dir2
  17      - ReferenceOutput
  18    ..
  19 Note that the build tree must be inside the project dir.
  20
  21 To test the build of the analyzer one would:
  22    - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
  23      the build directory does not pollute the repository to min network
  24      traffic).
  25    - Build all projects, until error. Produce logs to report errors.
  26    - Compare results.
  27
  28 The files which should be kept around for failure investigations:
  29    RepositoryCopy/Project DirI/ScanBuildResults
  30    RepositoryCopy/Project DirI/run_static_analyzer.log
  31
  32 Assumptions (TODO: shouldn't need to assume these.):
  33    The script is being run from the Repository Directory.
  34    The compiler for scan-build and scan-build are in the PATH.
  35    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
  36
  37 For more logging, set the  env variables:
  38    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
  39    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
  40
  41 The list of checkers tested are hardcoded in the Checkers variable.
  42 For testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
  43 variable. It should contain a comma separated list.
  44 """
  45 import CmpRuns
  46 import SATestUtils as utils
  47 from ProjectMap import DownloadType, ProjectInfo
  48
  49 import glob
  50 import logging
  51 import math
  52 import multiprocessing
  53 import os
  54 import plistlib
  55 import shutil
  56 import sys
  57 import threading
  58 import time
  59 import zipfile
  60
  61 from queue import Queue
  62 # mypy has problems finding InvalidFileException in the module
  63 # and this is we can shush that false positive
  64 from plistlib import InvalidFileException  # type:ignore
  65 from subprocess import CalledProcessError, check_call
  66 from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
  67
  68
  69 ###############################################################################
  70 # Helper functions.
  71 ###############################################################################
  72
  73 class StreamToLogger:
  74     def __init__(self, logger: logging.Logger,
  75                  log_level: int = logging.INFO):
  76         self.logger = logger
  77         self.log_level = log_level
  78
  79     def write(self, message: str):
  80         # Rstrip in order not to write an extra newline.
  81         self.logger.log(self.log_level, message.rstrip())
  82
  83     def flush(self):
  84         pass
  85
  86     def fileno(self) -> int:
  87         return 0
  88
  89
  90 LOCAL = threading.local()
  91
  92
  93 def init_logger(name: str):
  94     # TODO: use debug levels for VERBOSE messages
  95     logger = logging.getLogger(name)
  96     logger.setLevel(logging.DEBUG)
  97     LOCAL.stdout = StreamToLogger(logger, logging.INFO)
  98     LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
  99
 100
 101 init_logger("main")
 102
 103
 104 def stderr(message: str):
 105     LOCAL.stderr.write(message)
 106
 107
 108 def stdout(message: str):
 109     LOCAL.stdout.write(message)
 110
 111
 112 logging.basicConfig(
 113     format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
 114
 115
 116 ###############################################################################
 117 # Configuration setup.
 118 ###############################################################################
 119
 120
 121 # Find Clang for static analysis.
 122 if 'CC' in os.environ:
 123     cc_candidate: Optional[str] = os.environ['CC']
 124 else:
 125     cc_candidate = utils.which("clang", os.environ['PATH'])
 126 if not cc_candidate:
 127     stderr("Error: cannot find 'clang' in PATH")
 128     sys.exit(1)
 129
 130 CLANG = cc_candidate
 131
 132 # Number of jobs.
 133 MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
 134
 135 # Names of the project specific scripts.
 136 # The script that downloads the project.
 137 DOWNLOAD_SCRIPT = "download_project.sh"
 138 # The script that needs to be executed before the build can start.
 139 CLEANUP_SCRIPT = "cleanup_run_static_analyzer.sh"
 140 # This is a file containing commands for scan-build.
 141 BUILD_SCRIPT = "run_static_analyzer.cmd"
 142
 143 # A comment in a build script which disables wrapping.
 144 NO_PREFIX_CMD = "#NOPREFIX"
 145
 146 # The log file name.
 147 LOG_DIR_NAME = "Logs"
 148 BUILD_LOG_NAME = "run_static_analyzer.log"
 149 # Summary file - contains the summary of the failures. Ex: This info can be be
 150 # displayed when buildbot detects a build failure.
 151 NUM_OF_FAILURES_IN_SUMMARY = 10
 152
 153 # The scan-build result directory.
 154 OUTPUT_DIR_NAME = "ScanBuildResults"
 155 REF_PREFIX = "Ref"
 156
 157 # The name of the directory storing the cached project source. If this
 158 # directory does not exist, the download script will be executed.
 159 # That script should create the "CachedSource" directory and download the
 160 # project source into it.
 161 CACHED_SOURCE_DIR_NAME = "CachedSource"
 162
 163 # The name of the directory containing the source code that will be analyzed.
 164 # Each time a project is analyzed, a fresh copy of its CachedSource directory
 165 # will be copied to the PatchedSource directory and then the local patches
 166 # in PATCHFILE_NAME will be applied (if PATCHFILE_NAME exists).
 167 PATCHED_SOURCE_DIR_NAME = "PatchedSource"
 168
 169 # The name of the patchfile specifying any changes that should be applied
 170 # to the CachedSource before analyzing.
 171 PATCHFILE_NAME = "changes_for_analyzer.patch"
 172
 173 # The list of checkers used during analyzes.
 174 # Currently, consists of all the non-experimental checkers, plus a few alpha
 175 # checkers we don't want to regress on.
 176 CHECKERS = ",".join([
 177     "alpha.unix.SimpleStream",
 178     "alpha.security.taint",
 179     "cplusplus.NewDeleteLeaks",
 180     "core",
 181     "cplusplus",
 182     "deadcode",
 183     "security",
 184     "unix",
 185     "osx",
 186     "nullability"
 187 ])
 188
 189 VERBOSE = 0
 190
 191
 192 ###############################################################################
 193 # Test harness logic.
 194 ###############################################################################
 195
 196
 197 def run_cleanup_script(directory: str, build_log_file: IO):
 198     """
 199     Run pre-processing script if any.
 200     """
 201     cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 202     script_path = os.path.join(directory, CLEANUP_SCRIPT)
 203
 204     utils.run_script(script_path, build_log_file, cwd,
 205                      out=LOCAL.stdout, err=LOCAL.stderr,
 206                      verbose=VERBOSE)
 207
 208
 209 class TestInfo(NamedTuple):
 210     """
 211     Information about a project and settings for its analysis.
 212     """
 213     project: ProjectInfo
 214     override_compiler: bool = False
 215     extra_analyzer_config: str = ""
 216     extra_checkers: str = ""
 217     is_reference_build: bool = False
 218     strictness: int = 0
 219
 220
 221 # typing package doesn't have a separate type for Queue, but has a generic stub
 222 # We still want to have a type-safe checked project queue, for this reason,
 223 # we specify generic type for mypy.
 224 #
 225 # It is a common workaround for this situation:
 226 # https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
 227 if TYPE_CHECKING:
 228     TestQueue = Queue[TestInfo]  # this is only processed by mypy
 229 else:
 230     TestQueue = Queue  # this will be executed at runtime
 231
 232
 233 class RegressionTester:
 234     """
 235     A component aggregating all of the project testing.
 236     """
 237
 238     def __init__(self, jobs: int, projects: List[ProjectInfo],
 239                  override_compiler: bool, extra_analyzer_config: str,
 240                  extra_checkers: str,
 241                  regenerate: bool, strictness: bool):
 242         self.jobs = jobs
 243         self.projects = projects
 244         self.override_compiler = override_compiler
 245         self.extra_analyzer_config = extra_analyzer_config
 246         self.extra_checkers = extra_checkers
 247         self.regenerate = regenerate
 248         self.strictness = strictness
 249
 250     def test_all(self) -> bool:
 251         projects_to_test: List[TestInfo] = []
 252
 253         # Test the projects.
 254         for project in self.projects:
 255             projects_to_test.append(
 256                 TestInfo(project,
 257                          self.override_compiler,
 258                          self.extra_analyzer_config,
 259                          self.extra_checkers,
 260                          self.regenerate, self.strictness))
 261         if self.jobs <= 1:
 262             return self._single_threaded_test_all(projects_to_test)
 263         else:
 264             return self._multi_threaded_test_all(projects_to_test)
 265
 266     def _single_threaded_test_all(self,
 267                                   projects_to_test: List[TestInfo]) -> bool:
 268         """
 269         Run all projects.
 270         :return: whether tests have passed.
 271         """
 272         success = True
 273         for project_info in projects_to_test:
 274             tester = ProjectTester(project_info)
 275             success &= tester.test()
 276         return success
 277
 278     def _multi_threaded_test_all(self,
 279                                  projects_to_test: List[TestInfo]) -> bool:
 280         """
 281         Run each project in a separate thread.
 282
 283         This is OK despite GIL, as testing is blocked
 284         on launching external processes.
 285
 286         :return: whether tests have passed.
 287         """
 288         tasks_queue = TestQueue()
 289
 290         for project_info in projects_to_test:
 291             tasks_queue.put(project_info)
 292
 293         results_differ = threading.Event()
 294         failure_flag = threading.Event()
 295
 296         for _ in range(self.jobs):
 297             T = TestProjectThread(tasks_queue, results_differ, failure_flag)
 298             T.start()
 299
 300         # Required to handle Ctrl-C gracefully.
 301         while tasks_queue.unfinished_tasks:
 302             time.sleep(0.1)  # Seconds.
 303             if failure_flag.is_set():
 304                 stderr("Test runner crashed\n")
 305                 sys.exit(1)
 306         return not results_differ.is_set()
 307
 308
 309 class ProjectTester:
 310     """
 311     A component aggregating testing for one project.
 312     """
 313
 314     def __init__(self, test_info: TestInfo, silent: bool = False):
 315         self.project = test_info.project
 316         self.override_compiler = test_info.override_compiler
 317         self.extra_analyzer_config = test_info.extra_analyzer_config
 318         self.extra_checkers = test_info.extra_checkers
 319         self.is_reference_build = test_info.is_reference_build
 320         self.strictness = test_info.strictness
 321         self.silent = silent
 322
 323     def test(self) -> bool:
 324         """
 325         Test a given project.
 326         :return tests_passed: Whether tests have passed according
 327         to the :param strictness: criteria.
 328         """
 329         if not self.project.enabled:
 330             self.out(
 331                 f" \n\n--- Skipping disabled project {self.project.name}\n")
 332             return True
 333
 334         self.out(f" \n\n--- Building project {self.project.name}\n")
 335
 336         start_time = time.time()
 337
 338         project_dir = self.get_project_dir()
 339         self.vout(f"  Build directory: {project_dir}.\n")
 340
 341         # Set the build results directory.
 342         output_dir = self.get_output_dir()
 343
 344         self.build(project_dir, output_dir)
 345         check_build(output_dir)
 346
 347         if self.is_reference_build:
 348             cleanup_reference_results(output_dir)
 349             passed = True
 350         else:
 351             passed = run_cmp_results(project_dir, self.strictness)
 352
 353         self.out(f"Completed tests for project {self.project.name} "
 354                  f"(time: {time.time() - start_time:.2f}).\n")
 355
 356         return passed
 357
 358     def get_project_dir(self) -> str:
 359         return os.path.join(os.path.abspath(os.curdir), self.project.name)
 360
 361     def get_output_dir(self) -> str:
 362         if self.is_reference_build:
 363             dirname = REF_PREFIX + OUTPUT_DIR_NAME
 364         else:
 365             dirname = OUTPUT_DIR_NAME
 366
 367         return os.path.join(self.get_project_dir(), dirname)
 368
 369     def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
 370         build_log_path = get_build_log_path(output_dir)
 371
 372         self.out(f"Log file: {build_log_path}\n")
 373         self.out(f"Output directory: {output_dir}\n")
 374
 375         remove_log_file(output_dir)
 376
 377         # Clean up scan build results.
 378         if os.path.exists(output_dir):
 379             self.vout(f"  Removing old results: {output_dir}\n")
 380
 381             shutil.rmtree(output_dir)
 382
 383         assert(not os.path.exists(output_dir))
 384         os.makedirs(os.path.join(output_dir, LOG_DIR_NAME))
 385
 386         # Build and analyze the project.
 387         with open(build_log_path, "w+") as build_log_file:
 388             if self.project.mode == 1:
 389                 self._download_and_patch(directory, build_log_file)
 390                 run_cleanup_script(directory, build_log_file)
 391                 build_time, memory = self.scan_build(directory, output_dir,
 392                                                      build_log_file)
 393             else:
 394                 build_time, memory = self.analyze_preprocessed(directory,
 395                                                                output_dir)
 396
 397             if self.is_reference_build:
 398                 run_cleanup_script(directory, build_log_file)
 399                 normalize_reference_results(directory, output_dir,
 400                                             self.project.mode)
 401
 402         self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
 403                  f"peak memory: {utils.memory_to_str(memory)}). "
 404                  f"See the log for more details: {build_log_path}\n")
 405
 406         return build_time, memory
 407
 408     def scan_build(self, directory: str, output_dir: str,
 409                    build_log_file: IO) -> Tuple[float, int]:
 410         """
 411         Build the project with scan-build by reading in the commands and
 412         prefixing them with the scan-build options.
 413         """
 414         build_script_path = os.path.join(directory, BUILD_SCRIPT)
 415         if not os.path.exists(build_script_path):
 416             stderr(f"Error: build script is not defined: "
 417                    f"{build_script_path}\n")
 418             sys.exit(1)
 419
 420         all_checkers = CHECKERS
 421         if 'SA_ADDITIONAL_CHECKERS' in os.environ:
 422             all_checkers = (all_checkers + ',' +
 423                             os.environ['SA_ADDITIONAL_CHECKERS'])
 424         if self.extra_checkers != "":
 425             all_checkers += "," + self.extra_checkers
 426
 427         # Run scan-build from within the patched source directory.
 428         cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 429
 430         options = f"--use-analyzer '{CLANG}' "
 431         options += f"-plist-html -o '{output_dir}' "
 432         options += f"-enable-checker {all_checkers} "
 433         options += "--keep-empty "
 434         options += f"-analyzer-config '{self.generate_config()}' "
 435
 436         if self.override_compiler:
 437             options += "--override-compiler "
 438
 439         extra_env: Dict[str, str] = {}
 440
 441         execution_time = 0.0
 442         peak_memory = 0
 443
 444         try:
 445             command_file = open(build_script_path, "r")
 446             command_prefix = "scan-build " + options + " "
 447
 448             for command in command_file:
 449                 command = command.strip()
 450
 451                 if len(command) == 0:
 452                     continue
 453
 454                 # Custom analyzer invocation specified by project.
 455                 # Communicate required information using environment variables
 456                 # instead.
 457                 if command == NO_PREFIX_CMD:
 458                     command_prefix = ""
 459                     extra_env['OUTPUT'] = output_dir
 460                     extra_env['CC'] = CLANG
 461                     extra_env['ANALYZER_CONFIG'] = self.generate_config()
 462                     continue
 463
 464                 if command.startswith("#"):
 465                     continue
 466
 467                 # If using 'make', auto imply a -jX argument
 468                 # to speed up analysis.  xcodebuild will
 469                 # automatically use the maximum number of cores.
 470                 if (command.startswith("make ") or command == "make") and \
 471                         "-j" not in command:
 472                     command += f" -j{MAX_JOBS}"
 473
 474                 command_to_run = command_prefix + command
 475
 476                 self.vout(f"  Executing: {command_to_run}\n")
 477
 478                 time, mem = utils.check_and_measure_call(
 479                     command_to_run, cwd=cwd,
 480                     stderr=build_log_file,
 481                     stdout=build_log_file,
 482                     env=dict(os.environ, **extra_env),
 483                     shell=True)
 484
 485                 execution_time += time
 486                 peak_memory = max(peak_memory, mem)
 487
 488         except CalledProcessError:
 489             stderr("Error: scan-build failed. Its output was: \n")
 490             build_log_file.seek(0)
 491             shutil.copyfileobj(build_log_file, LOCAL.stderr)
 492             sys.exit(1)
 493
 494         return execution_time, peak_memory
 495
 496     def analyze_preprocessed(self, directory: str,
 497                              output_dir: str) -> Tuple[float, int]:
 498         """
 499         Run analysis on a set of preprocessed files.
 500         """
 501         if os.path.exists(os.path.join(directory, BUILD_SCRIPT)):
 502             stderr(f"Error: The preprocessed files project "
 503                    f"should not contain {BUILD_SCRIPT}\n")
 504             raise Exception()
 505
 506         prefix = CLANG + " --analyze "
 507
 508         prefix += "--analyzer-output plist "
 509         prefix += " -Xclang -analyzer-checker=" + CHECKERS
 510         prefix += " -fcxx-exceptions -fblocks "
 511         prefix += " -Xclang -analyzer-config "
 512         prefix += f"-Xclang {self.generate_config()} "
 513
 514         if self.project.mode == 2:
 515             prefix += "-std=c++11 "
 516
 517         plist_path = os.path.join(directory, output_dir, "date")
 518         fail_path = os.path.join(plist_path, "failures")
 519         os.makedirs(fail_path)
 520
 521         execution_time = 0.0
 522         peak_memory = 0
 523
 524         for full_file_name in glob.glob(directory + "/*"):
 525             file_name = os.path.basename(full_file_name)
 526             failed = False
 527
 528             # Only run the analyzes on supported files.
 529             if utils.has_no_extension(file_name):
 530                 continue
 531             if not utils.is_valid_single_input_file(file_name):
 532                 stderr(f"Error: Invalid single input file {full_file_name}.\n")
 533                 raise Exception()
 534
 535             # Build and call the analyzer command.
 536             plist_basename = os.path.join(plist_path, file_name)
 537             output_option = f"-o '{plist_basename}.plist' "
 538             command = f"{prefix}{output_option}'{file_name}'"
 539
 540             log_path = os.path.join(fail_path, file_name + ".stderr.txt")
 541             with open(log_path, "w+") as log_file:
 542                 try:
 543                     self.vout(f"  Executing: {command}\n")
 544
 545                     time, mem = utils.check_and_measure_call(
 546                         command, cwd=directory, stderr=log_file,
 547                         stdout=log_file, shell=True)
 548
 549                     execution_time += time
 550                     peak_memory = max(peak_memory, mem)
 551
 552                 except CalledProcessError as e:
 553                     stderr(f"Error: Analyzes of {full_file_name} failed. "
 554                            f"See {log_file.name} for details. "
 555                            f"Error code {e.returncode}.\n")
 556                     failed = True
 557
 558                 # If command did not fail, erase the log file.
 559                 if not failed:
 560                     os.remove(log_file.name)
 561
 562         return execution_time, peak_memory
 563
 564     def generate_config(self) -> str:
 565         out = "serialize-stats=true,stable-report-filename=true"
 566
 567         if self.extra_analyzer_config:
 568             out += "," + self.extra_analyzer_config
 569
 570         return out
 571
 572     def _download_and_patch(self, directory: str, build_log_file: IO):
 573         """
 574         Download the project and apply the local patchfile if it exists.
 575         """
 576         cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
 577
 578         # If the we don't already have the cached source, run the project's
 579         # download script to download it.
 580         if not os.path.exists(cached_source):
 581             self._download(directory, build_log_file)
 582             if not os.path.exists(cached_source):
 583                 stderr(f"Error: '{cached_source}' not found after download.\n")
 584                 exit(1)
 585
 586         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 587
 588         # Remove potentially stale patched source.
 589         if os.path.exists(patched_source):
 590             shutil.rmtree(patched_source)
 591
 592         # Copy the cached source and apply any patches to the copy.
 593         shutil.copytree(cached_source, patched_source, symlinks=True)
 594         self._apply_patch(directory, build_log_file)
 595
 596     def _download(self, directory: str, build_log_file: IO):
 597         """
 598         Run the script to download the project, if it exists.
 599         """
 600         if self.project.source == DownloadType.GIT:
 601             self._download_from_git(directory, build_log_file)
 602         elif self.project.source == DownloadType.ZIP:
 603             self._unpack_zip(directory, build_log_file)
 604         elif self.project.source == DownloadType.SCRIPT:
 605             self._run_download_script(directory, build_log_file)
 606         else:
 607             raise ValueError(
 608                 f"Unknown source type '{self.project.source}' is found "
 609                 f"for the '{self.project.name}' project")
 610
 611     def _download_from_git(self, directory: str, build_log_file: IO):
 612         repo = self.project.origin
 613         cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
 614
 615         check_call(f"git clone --recursive {repo} {cached_source}",
 616                    cwd=directory, stderr=build_log_file,
 617                    stdout=build_log_file, shell=True)
 618         check_call(f"git checkout --quiet {self.project.commit}",
 619                    cwd=cached_source, stderr=build_log_file,
 620                    stdout=build_log_file, shell=True)
 621
 622     def _unpack_zip(self, directory: str, build_log_file: IO):
 623         zip_files = list(glob.glob(directory + "/*.zip"))
 624
 625         if len(zip_files) == 0:
 626             raise ValueError(
 627                 f"Couldn't find any zip files to unpack for the "
 628                 f"'{self.project.name}' project")
 629
 630         if len(zip_files) > 1:
 631             raise ValueError(
 632                 f"Couldn't decide which of the zip files ({zip_files}) "
 633                 f"for the '{self.project.name}' project to unpack")
 634
 635         with zipfile.ZipFile(zip_files[0], "r") as zip_file:
 636             zip_file.extractall(os.path.join(directory,
 637                                              CACHED_SOURCE_DIR_NAME))
 638
 639     @staticmethod
 640     def _run_download_script(directory: str, build_log_file: IO):
 641         script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
 642         utils.run_script(script_path, build_log_file, directory,
 643                          out=LOCAL.stdout, err=LOCAL.stderr,
 644                          verbose=VERBOSE)
 645
 646     def _apply_patch(self, directory: str, build_log_file: IO):
 647         patchfile_path = os.path.join(directory, PATCHFILE_NAME)
 648         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 649
 650         if not os.path.exists(patchfile_path):
 651             self.out("  No local patches.\n")
 652             return
 653
 654         self.out("  Applying patch.\n")
 655         try:
 656             check_call(f"patch -p1 < '{patchfile_path}'",
 657                        cwd=patched_source,
 658                        stderr=build_log_file,
 659                        stdout=build_log_file,
 660                        shell=True)
 661
 662         except CalledProcessError:
 663             stderr(f"Error: Patch failed. "
 664                    f"See {build_log_file.name} for details.\n")
 665             sys.exit(1)
 666
 667     def out(self, what: str):
 668         if not self.silent:
 669             stdout(what)
 670
 671     def vout(self, what: str):
 672         if VERBOSE >= 1:
 673             self.out(what)
 674
 675
 676 class TestProjectThread(threading.Thread):
 677     def __init__(self, tasks_queue: TestQueue,
 678                  results_differ: threading.Event,
 679                  failure_flag: threading.Event):
 680         """
 681         :param results_differ: Used to signify that results differ from
 682                the canonical ones.
 683         :param failure_flag: Used to signify a failure during the run.
 684         """
 685         self.tasks_queue = tasks_queue
 686         self.results_differ = results_differ
 687         self.failure_flag = failure_flag
 688         super().__init__()
 689
 690         # Needed to gracefully handle interrupts with Ctrl-C
 691         self.daemon = True
 692
 693     def run(self):
 694         while not self.tasks_queue.empty():
 695             try:
 696                 test_info = self.tasks_queue.get()
 697                 init_logger(test_info.project.name)
 698
 699                 tester = ProjectTester(test_info)
 700                 if not tester.test():
 701                     self.results_differ.set()
 702
 703                 self.tasks_queue.task_done()
 704
 705             except BaseException:
 706                 self.failure_flag.set()
 707                 raise
 708
 709
 710 ###############################################################################
 711 # Utility functions.
 712 ###############################################################################
 713
 714
 715 def check_build(output_dir: str):
 716     """
 717     Given the scan-build output directory, checks if the build failed
 718     (by searching for the failures directories). If there are failures, it
 719     creates a summary file in the output directory.
 720
 721     """
 722     # Check if there are failures.
 723     failures = glob.glob(output_dir + "/*/failures/*.stderr.txt")
 724     total_failed = len(failures)
 725
 726     if total_failed == 0:
 727         clean_up_empty_plists(output_dir)
 728         clean_up_empty_folders(output_dir)
 729
 730         plists = glob.glob(output_dir + "/*/*.plist")
 731         stdout(f"Number of bug reports "
 732                f"(non-empty plist files) produced: {len(plists)}\n")
 733         return
 734
 735     stderr("Error: analysis failed.\n")
 736     stderr(f"Total of {total_failed} failures discovered.\n")
 737
 738     if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
 739         stderr(f"See the first {NUM_OF_FAILURES_IN_SUMMARY} below.\n")
 740
 741     for index, failed_log_path in enumerate(failures, start=1):
 742         if index >= NUM_OF_FAILURES_IN_SUMMARY:
 743             break
 744
 745         stderr(f"\n-- Error #{index} -----------\n")
 746
 747         with open(failed_log_path, "r") as failed_log:
 748             shutil.copyfileobj(failed_log, LOCAL.stdout)
 749
 750     if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
 751         stderr("See the results folder for more.")
 752
 753     sys.exit(1)
 754
 755
 756 def cleanup_reference_results(output_dir: str):
 757     """
 758     Delete html, css, and js files from reference results. These can
 759     include multiple copies of the benchmark source and so get very large.
 760     """
 761     extensions = ["html", "css", "js"]
 762
 763     for extension in extensions:
 764         for file_to_rm in glob.glob(f"{output_dir}/*/*.{extension}"):
 765             file_to_rm = os.path.join(output_dir, file_to_rm)
 766             os.remove(file_to_rm)
 767
 768     # Remove the log file. It leaks absolute path names.
 769     remove_log_file(output_dir)
 770
 771
 772 def run_cmp_results(directory: str, strictness: int = 0) -> bool:
 773     """
 774     Compare the warnings produced by scan-build.
 775     strictness defines the success criteria for the test:
 776       0 - success if there are no crashes or analyzer failure.
 777       1 - success if there are no difference in the number of reported bugs.
 778       2 - success if all the bug reports are identical.
 779
 780     :return success: Whether tests pass according to the strictness
 781     criteria.
 782     """
 783     tests_passed = True
 784     start_time = time.time()
 785
 786     ref_dir = os.path.join(directory, REF_PREFIX + OUTPUT_DIR_NAME)
 787     new_dir = os.path.join(directory, OUTPUT_DIR_NAME)
 788
 789     # We have to go one level down the directory tree.
 790     ref_list = glob.glob(ref_dir + "/*")
 791     new_list = glob.glob(new_dir + "/*")
 792
 793     # Log folders are also located in the results dir, so ignore them.
 794     ref_log_dir = os.path.join(ref_dir, LOG_DIR_NAME)
 795     if ref_log_dir in ref_list:
 796         ref_list.remove(ref_log_dir)
 797     new_list.remove(os.path.join(new_dir, LOG_DIR_NAME))
 798
 799     if len(ref_list) != len(new_list):
 800         stderr(f"Mismatch in number of results folders: "
 801                f"{ref_list} vs {new_list}")
 802         sys.exit(1)
 803
 804     # There might be more then one folder underneath - one per each scan-build
 805     # command (Ex: one for configure and one for make).
 806     if len(ref_list) > 1:
 807         # Assume that the corresponding folders have the same names.
 808         ref_list.sort()
 809         new_list.sort()
 810
 811     # Iterate and find the differences.
 812     num_diffs = 0
 813     for ref_dir, new_dir in zip(ref_list, new_list):
 814         assert(ref_dir != new_dir)
 815
 816         if VERBOSE >= 1:
 817             stdout(f"  Comparing Results: {ref_dir} {new_dir}\n")
 818
 819         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 820
 821         ref_results = CmpRuns.ResultsDirectory(ref_dir)
 822         new_results = CmpRuns.ResultsDirectory(new_dir, patched_source)
 823
 824         # Scan the results, delete empty plist files.
 825         num_diffs, reports_in_ref, reports_in_new = \
 826             CmpRuns.dump_scan_build_results_diff(ref_results, new_results,
 827                                                  delete_empty=False,
 828                                                  out=LOCAL.stdout)
 829
 830         if num_diffs > 0:
 831             stdout(f"Warning: {num_diffs} differences in diagnostics.\n")
 832
 833         if strictness >= 2 and num_diffs > 0:
 834             stdout("Error: Diffs found in strict mode (2).\n")
 835             tests_passed = False
 836
 837         elif strictness >= 1 and reports_in_ref != reports_in_new:
 838             stdout("Error: The number of results are different "
 839                    " strict mode (1).\n")
 840             tests_passed = False
 841
 842     stdout(f"Diagnostic comparison complete "
 843            f"(time: {time.time() - start_time:.2f}).\n")
 844
 845     return tests_passed
 846
 847
 848 def normalize_reference_results(directory: str, output_dir: str,
 849                                 build_mode: int):
 850     """
 851     Make the absolute paths relative in the reference results.
 852     """
 853     for dir_path, _, filenames in os.walk(output_dir):
 854         for filename in filenames:
 855             if not filename.endswith('plist'):
 856                 continue
 857
 858             plist = os.path.join(dir_path, filename)
 859             data = plistlib.readPlist(plist)
 860             path_prefix = directory
 861
 862             if build_mode == 1:
 863                 path_prefix = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 864
 865             paths = [source[len(path_prefix) + 1:]
 866                      if source.startswith(path_prefix) else source
 867                      for source in data['files']]
 868             data['files'] = paths
 869
 870             # Remove transient fields which change from run to run.
 871             for diagnostic in data['diagnostics']:
 872                 if 'HTMLDiagnostics_files' in diagnostic:
 873                     diagnostic.pop('HTMLDiagnostics_files')
 874
 875             if 'clang_version' in data:
 876                 data.pop('clang_version')
 877
 878             plistlib.writePlist(data, plist)
 879
 880
 881 def get_build_log_path(output_dir: str) -> str:
 882     return os.path.join(output_dir, LOG_DIR_NAME, BUILD_LOG_NAME)
 883
 884
 885 def remove_log_file(output_dir: str):
 886     build_log_path = get_build_log_path(output_dir)
 887
 888     # Clean up the log file.
 889     if os.path.exists(build_log_path):
 890         if VERBOSE >= 1:
 891             stdout(f"  Removing log file: {build_log_path}\n")
 892
 893         os.remove(build_log_path)
 894
 895
 896 def clean_up_empty_plists(output_dir: str):
 897     """
 898     A plist file is created for each call to the analyzer(each source file).
 899     We are only interested on the once that have bug reports,
 900     so delete the rest.
 901     """
 902     for plist in glob.glob(output_dir + "/*/*.plist"):
 903         plist = os.path.join(output_dir, plist)
 904
 905         try:
 906             with open(plist, "rb") as plist_file:
 907                 data = plistlib.load(plist_file)
 908             # Delete empty reports.
 909             if not data['files']:
 910                 os.remove(plist)
 911                 continue
 912
 913         except InvalidFileException as e:
 914             stderr(f"Error parsing plist file {plist}: {str(e)}")
 915             continue
 916
 917
 918 def clean_up_empty_folders(output_dir: str):
 919     """
 920     Remove empty folders from results, as git would not store them.
 921     """
 922     subdirs = glob.glob(output_dir + "/*")
 923     for subdir in subdirs:
 924         if not os.listdir(subdir):
 925             os.removedirs(subdir)
 926
 927
 928 if __name__ == "__main__":
 929     print("SATestBuild.py should not be used on its own.")
 930     print("Please use 'SATest.py build' instead")
 931     sys.exit(1)