dev-support/checkcompatibility.py

   1 #!/usr/bin/env python
   2 #
   3 # Licensed to the Apache Software Foundation (ASF) under one
   4 # or more contributor license agreements.  See the NOTICE file
   5 # distributed with this work for additional information
   6 # regarding copyright ownership.  The ASF licenses this file
   7 # to you under the Apache License, Version 2.0 (the
   8 # "License"); you may not use this file except in compliance
   9 # with the License.  You may obtain a copy of the License at
  10 #
  11 #   http://www.apache.org/licenses/LICENSE-2.0
  12 #
  13 # Unless required by applicable law or agreed to in writing,
  14 # software distributed under the License is distributed on an
  15 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  16 # KIND, either express or implied.  See the License for the
  17 # specific language governing permissions and limitations
  18 # under the License.
  19
  20 # Script which checks Java API compatibility between two revisions of the
  21 # Java client.
  22 #
  23 # Originally sourced from Apache Kudu, which was based on the
  24 # compatibility checker from the Apache HBase project, but ported to
  25 # Python for better readability.
  26
  27 # The script can be invoked as follows:
  28 #   $ ./checkcompatibility.py ${SOURCE_GIT_REVISION} ${GIT_BRANCH_OR_TAG}
  29 # or with some options:
  30 #   $ ./dev-support/checkcompatibility.py \
  31 #      --annotation org.apache.yetus.audience.InterfaceAudience.Public \
  32 #      --annotation org.apache.yetus.audience.InterfaceAudience.LimitedPrivate \
  33 #      --include-file "hbase-*" \
  34 #      --known_problems_path ~/known_problems.json \
  35 #      rel/1.3.0 branch-1.4
  36
  37 import json
  38 import logging
  39 import os
  40 import re
  41 import shutil
  42 import subprocess
  43 import sys
  44 import urllib2
  45 from collections import namedtuple
  46 try:
  47     import argparse
  48 except ImportError:
  49     logging.error(
  50         "Please install argparse, e.g. via `pip install argparse`.")
  51     sys.exit(2)
  52
  53 # Various relative paths
  54 REPO_DIR = os.getcwd()
  55
  56
  57 def check_output(*popenargs, **kwargs):
  58     """ Run command with arguments and return its output as a byte string.
  59     Backported from Python 2.7 as it's implemented as pure python on stdlib.
  60     >>> check_output(['/usr/bin/python', '--version'])
  61     Python 2.6.2 """
  62     process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
  63     output, _ = process.communicate()
  64     retcode = process.poll()
  65     if retcode:
  66         cmd = kwargs.get("args")
  67         if cmd is None:
  68             cmd = popenargs[0]
  69         error = subprocess.CalledProcessError(retcode, cmd)
  70         error.output = output
  71         raise error
  72     return output
  73
  74
  75 def get_repo_dir():
  76     """ Return the path to the top of the repo. """
  77     dirname, _ = os.path.split(os.path.abspath(__file__))
  78     dirname = os.path.dirname(dirname)
  79     logging.debug("Repo dir is  %s", dirname)
  80     return dirname
  81
  82
  83 def get_scratch_dir():
  84     """ Return the path to the scratch dir that we build within. """
  85     scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
  86     if not os.path.exists(scratch_dir):
  87         os.makedirs(scratch_dir)
  88     return scratch_dir
  89
  90
  91 def get_java_acc_dir():
  92     """ Return the path where we check out the Java API Compliance Checker. """
  93     return os.path.join(get_repo_dir(), "target", "java-acc")
  94
  95
  96 def clean_scratch_dir(scratch_dir):
  97     """ Clean up and re-create the scratch directory. """
  98     if os.path.exists(scratch_dir):
  99         logging.info("Removing scratch dir %s ", scratch_dir)
 100         shutil.rmtree(scratch_dir)
 101     logging.info("Creating empty scratch dir %s ", scratch_dir)
 102     os.makedirs(scratch_dir)
 103
 104
 105 def checkout_java_tree(rev, path):
 106     """ Check out the Java source tree for the given revision into
 107     the given path. """
 108     logging.info("Checking out %s in %s", rev, path)
 109     os.makedirs(path)
 110     # Extract java source
 111     subprocess.check_call(["bash", '-o', 'pipefail', "-c",
 112                            ("git archive --format=tar %s | "
 113                             "tar -C '%s' -xf -") % (rev, path)],
 114                           cwd=get_repo_dir())
 115
 116
 117 def get_git_hash(revname):
 118     """ Convert 'revname' to its SHA-1 hash. """
 119     try:
 120         return check_output(["git", "rev-parse", revname],
 121                         cwd=get_repo_dir()).strip()
 122     except:
 123         revname = "origin/" + revname
 124         return check_output(["git", "rev-parse", revname],
 125                         cwd=get_repo_dir()).strip()
 126
 127
 128 def get_repo_name(remote_name="origin"):
 129     """ Get the name of the repo based on the git remote."""
 130     remote = check_output(["git", "config", "--get", "remote.{0}.url".format(remote_name)],
 131                            cwd=get_repo_dir()).strip()
 132     remote = remote.split("/")[-1]
 133     return remote[:-4] if remote.endswith(".git") else remote
 134
 135
 136 def build_tree(java_path, verbose):
 137     """ Run the Java build within 'path'. """
 138     logging.info("Building in %s ", java_path)
 139     mvn_cmd = ["mvn", "--batch-mode", "-DskipTests",
 140                "-Dmaven.javadoc.skip=true", "package"]
 141     if not verbose:
 142         mvn_cmd.insert(-1, "--quiet")
 143     subprocess.check_call(mvn_cmd, cwd=java_path)
 144
 145
 146 def checkout_java_acc(force):
 147     """ Check out the Java API Compliance Checker. If 'force' is true, will
 148     re-download even if the directory exists. """
 149     acc_dir = get_java_acc_dir()
 150     if os.path.exists(acc_dir):
 151         logging.info("Java ACC is already downloaded.")
 152         if not force:
 153             return
 154         logging.info("Forcing re-download.")
 155         shutil.rmtree(acc_dir)
 156
 157     logging.info("Downloading Java ACC...")
 158
 159     url = "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz"
 160     scratch_dir = get_scratch_dir()
 161     path = os.path.join(scratch_dir, os.path.basename(url))
 162     jacc = urllib2.urlopen(url)
 163     with open(path, 'wb') as w:
 164         w.write(jacc.read())
 165
 166     subprocess.check_call(["tar", "xzf", path],
 167                           cwd=scratch_dir)
 168
 169     shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-2.4"),
 170                 os.path.join(acc_dir))
 171
 172
 173 def find_jars(path):
 174     """ Return a list of jars within 'path' to be checked for compatibility. """
 175     all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())
 176
 177     return [j for j in all_jars if (
 178         "-tests" not in j and
 179         "-sources" not in j and
 180         "-with-dependencies" not in j)]
 181
 182
 183 def write_xml_file(path, version, jars):
 184     """ Write the XML manifest file for JACC. """
 185     with open(path, "wt") as f:
 186         f.write("<version>%s</version>\n" % version)
 187         f.write("<archives>")
 188         for j in jars:
 189             f.write("%s\n" % j)
 190         f.write("</archives>")
 191
 192
 193 def ascii_encode_dict(data):
 194     """ Iterate through a dictionary of data and convert all unicode to ascii.
 195     This method was taken from
 196     stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """
 197     ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
 198     return dict(map(ascii_encode, pair) for pair in data.items())
 199
 200
 201 def process_json(path):
 202     """ Process the known problems json file. The program raises an uncaught exception
 203     if it can't find the file or if the json is invalid """
 204     path = os.path.abspath(os.path.expanduser(path))
 205     try:
 206         with open(path) as f:
 207             return json.load(f, object_hook=ascii_encode_dict)
 208     except ValueError as e:
 209         logging.error("File: %s\nInvalid JSON:\n%s", str(path), str(e))
 210         raise
 211     except IOError as io:
 212         logging.error("Provided json file path does not exist %s", str(path))
 213         raise
 214
 215
 216 def compare_results(tool_results, known_issues, compare_warnings):
 217     """ Compare the number of problems found with the allowed number. If
 218     compare_warnings is true then also compare the number of warnings found.
 219
 220     tool_results = results from the JACC tool - a dictionary
 221     known_issues = dictionary of expected issue count
 222     compare_warnings = boolean - if true also compare warnings as well as problems """
 223     logging.info("Results: %s", str(tool_results))
 224
 225     unexpected_issue = namedtuple('unexpected_issue', ['check', 'issue_type',
 226                                                         'known_count', 'observed_count'])
 227     unexpected_issues = [unexpected_issue(check=check,  issue_type=issue_type,
 228                                       known_count=known_count,
 229                                       observed_count=tool_results[check][issue_type])
 230                      for check, known_issue_counts in known_issues.items()
 231                         for issue_type, known_count in known_issue_counts.items()
 232                             if tool_results[check][issue_type] > known_count]
 233
 234     if not compare_warnings:
 235         unexpected_issues = [tup for tup in unexpected_issues
 236                              if tup.issue_type != 'warnings']
 237
 238     for issue in unexpected_issues:
 239         logging.error('Found %s during  %s check (known issues: %d, observed issues: %d)',
 240                 issue.issue_type, issue.check, issue.known_count, issue.observed_count)
 241
 242     return bool(unexpected_issues)
 243
 244
 245 def process_java_acc_output(output):
 246     """ Process the output string to find the problems and warnings in both the
 247     binary and source compatibility. This is done in a way that is admittedly
 248     brittle; we are open to better implementations.
 249
 250     We expect a line containing the relevant information to look something like:
 251     "total binary compatibility problems: 123, warnings: 16" """
 252     return_value = {}
 253     output = output.split("\n")
 254     for line in output:
 255         # Line has relevant info
 256         if line.lower().startswith("total"):
 257             values = {}
 258             # Remove "total" keyword
 259             line = line[6:]
 260             # Seperate the two valuable parts
 261             line_list = line.split(",")
 262             for segment in line_list:
 263                 part = segment.split(":")
 264                 # Extract key and value
 265                 values[part[0][-8:]] = int(part[1])
 266             return_value[line[:6]] = values
 267     return return_value
 268
 269 def log_java_acc_version():
 270     java_acc_path = os.path.join(
 271         get_java_acc_dir(), "japi-compliance-checker.pl")
 272
 273     args = ["perl", java_acc_path, "-dumpversion"]
 274     logging.info("Java ACC version: " + check_output(args))
 275
 276 def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations, skip_annotations, name):
 277     """ Run the compliance checker to compare 'src' and 'dst'. """
 278     logging.info("Will check compatibility between original jars:\n\t%s\n"
 279                  "and new jars:\n\t%s",
 280                  "\n\t".join(src_jars),
 281                  "\n\t".join(dst_jars))
 282
 283     java_acc_path = os.path.join(
 284         get_java_acc_dir(), "japi-compliance-checker.pl")
 285
 286     src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
 287     dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
 288     write_xml_file(src_xml_path, src_name, src_jars)
 289     write_xml_file(dst_xml_path, dst_name, dst_jars)
 290
 291     out_path = os.path.join(get_scratch_dir(), "report.html")
 292
 293     args = ["perl", java_acc_path,
 294             "-l", name,
 295             "-d1", src_xml_path,
 296             "-d2", dst_xml_path,
 297             "-report-path", out_path]
 298     if annotations is not None:
 299         logging.info("Annotations are: %s", annotations)
 300         annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
 301         logging.info("Annotations path: %s", annotations_path)
 302         with file(annotations_path, "w") as f:
 303             f.write('\n'.join(annotations))
 304         args.extend(["-annotations-list", annotations_path])
 305
 306     if skip_annotations is not None:
 307         skip_annotations_path = os.path.join(
 308             get_scratch_dir(), "skip_annotations.txt")
 309         with file(skip_annotations_path, "w") as f:
 310             f.write('\n'.join(skip_annotations))
 311         args.extend(["-skip-annotations-list", skip_annotations_path])
 312
 313     try:
 314         output = check_output(args)
 315     except subprocess.CalledProcessError as e:
 316         # The program returns a nonzero error code if issues are found. We
 317         # almost always expect some issues and want to process the results.
 318         output = e.output
 319     acc_processed = process_java_acc_output(output)
 320     return acc_processed
 321
 322
 323 def get_known_problems(json_path, src_rev, dst_rev):
 324     """ The json file should be in the following format: a dictionary with the
 325     keys in the format source_branch/destination_branch and the values
 326     dictionaries with binary and source problems and warnings
 327     Example:
 328     {'branch-1.3': {
 329       'rel/1.3.0': {'binary': {'problems': 123, 'warnings': 16},
 330                       'source': {'problems': 167, 'warnings': 1}},
 331       'branch-1.4': {'binary': {'problems': 0, 'warnings': 0},
 332                       'source': {'problems': 0, 'warnings': 0}}
 333       },
 334     'branch-1.4': {
 335       'rel/1.4.1': {'binary': {'problems': 13, 'warnings': 1},
 336                       'source': {'problems': 23, 'warnings': 0}}
 337       }
 338     } """
 339     # These are the default values for allowed problems and warnings
 340     known_problems = {"binary": {"problems": 0, "warnings": 0},
 341                            "source": {"problems": 0, "warnings": 0}}
 342     if src_rev.startswith("origin/"):
 343       src_rev = src_rev[7:]
 344     if dst_rev.startswith("origin/"):
 345       dst_rev = dst_rev[7:]
 346     if json_path is not None:
 347         known_problems = process_json(json_path)
 348         try:
 349             return known_problems[src_rev][dst_rev]
 350         except KeyError:
 351             logging.error(("Known Problems values for %s %s are not in "
 352                             "provided json file. If you are trying to run "
 353                             "the test with the default values, don't "
 354                             "provide the --known_problems_path argument")
 355                             % (src_rev, dst_rev))
 356             raise
 357     return known_problems
 358
 359
 360 def filter_jars(jars, include_filters, exclude_filters):
 361     """ Filter the list of JARs based on include and exclude filters. """
 362     filtered = []
 363     # Apply include filters
 364     for j in jars:
 365         basename = os.path.basename(j)
 366         for f in include_filters:
 367             if f.match(basename):
 368                 filtered += [j]
 369                 break
 370         else:
 371             logging.debug("Ignoring JAR %s", j)
 372     # Apply exclude filters
 373     exclude_filtered = []
 374     for j in filtered:
 375         basename = os.path.basename(j)
 376         for f in exclude_filters:
 377             if f.match(basename):
 378                 logging.debug("Ignoring JAR %s", j)
 379                 break
 380         else:
 381             exclude_filtered += [j]
 382
 383     return exclude_filtered
 384
 385
 386 def main():
 387     """ Main function. """
 388     logging.basicConfig(level=logging.INFO)
 389     parser = argparse.ArgumentParser(
 390         description="Run Java API Compliance Checker.")
 391     parser.add_argument("-f", "--force-download",
 392                         action="store_true",
 393                         help="Download dependencies (i.e. Java JAVA_ACC) "
 394                         "even if they are already present")
 395     parser.add_argument("-i", "--include-file",
 396                         action="append",
 397                         dest="include_files",
 398                         help="Regex filter for JAR files to be included. "
 399                         "Applied before the exclude filters. "
 400                         "Can be specified multiple times.")
 401     parser.add_argument("-e", "--exclude-file",
 402                         action="append",
 403                         dest="exclude_files",
 404                         help="Regex filter for JAR files to be excluded. "
 405                         "Applied after the include filters. "
 406                         "Can be specified multiple times.")
 407     parser.add_argument("-a", "--annotation",
 408                         action="append",
 409                         dest="annotations",
 410                         help="Fully-qualified Java annotation. "
 411                         "Java ACC will only check compatibility of "
 412                         "annotated classes. Can be specified multiple times.")
 413     parser.add_argument("--skip-annotation",
 414                         action="append",
 415                         dest="skip_annotations",
 416                         help="Fully-qualified Java annotation. "
 417                         "Java ACC will not check compatibility of "
 418                         "these annotated classes. Can be specified multiple "
 419                         "times.")
 420     parser.add_argument("-p", "--known_problems_path",
 421                         default=None, dest="known_problems_path",
 422                         help="Path to file with json 'known_problems "
 423                         "dictionary.' Path can be relative or absolute. An "
 424                         "examples file can be seen in the pydocs for the "
 425                         "get_known_problems method.")
 426     parser.add_argument("--skip-clean",
 427                         action="store_true",
 428                         help="Skip cleaning the scratch directory.")
 429     parser.add_argument("--compare-warnings", dest="compare_warnings",
 430                         action="store_true", default=False,
 431                         help="Compare warnings as well as problems.")
 432     parser.add_argument("--skip-build",
 433                         action="store_true",
 434                         help="Skip building the projects.")
 435     parser.add_argument("--verbose",
 436                         action="store_true",
 437                         help="more output")
 438     parser.add_argument("-r", "--remote", default="origin", dest="remote_name",
 439                         help="Name of remote to use. e.g. its repo name will be used as the name "
 440                         "we pass to Java ACC for the library.")
 441     parser.add_argument("src_rev", nargs=1, help="Source revision.")
 442     parser.add_argument("dst_rev", nargs="?", default="HEAD",
 443                         help="Destination revision. "
 444                         "If not specified, will use HEAD.")
 445
 446     args = parser.parse_args()
 447
 448     src_rev, dst_rev = args.src_rev[0], args.dst_rev
 449
 450     logging.info("Source revision: %s", src_rev)
 451     logging.info("Destination revision: %s", dst_rev)
 452
 453     # Configure the expected numbers
 454     known_problems = get_known_problems(
 455         args.known_problems_path, src_rev, dst_rev)
 456
 457     # Construct the JAR regex patterns for filtering.
 458     include_filters = []
 459     if args.include_files is not None:
 460         for f in args.include_files:
 461             logging.info("Applying JAR filename include filter: %s", f)
 462             include_filters += [re.compile(f)]
 463     else:
 464         include_filters = [re.compile(".*")]
 465
 466     exclude_filters = []
 467     if args.exclude_files is not None:
 468         for f in args.exclude_files:
 469             logging.info("Applying JAR filename exclude filter: %s", f)
 470             exclude_filters += [re.compile(f)]
 471
 472     # Construct the annotation list
 473     if args.annotations is not None:
 474         logging.info("Filtering classes using %d annotation(s):",
 475                      len(args.annotations))
 476         for a in args.annotations:
 477             logging.info("\t%s", a)
 478
 479     skip_annotations = args.skip_annotations
 480     if skip_annotations is not None:
 481         logging.info("Skipping classes with %d annotation(s):",
 482                      len(skip_annotations))
 483         for a in skip_annotations:
 484             logging.info("\t%s", a)
 485
 486     # Download deps.
 487     checkout_java_acc(args.force_download)
 488     log_java_acc_version()
 489
 490     # Set up the build.
 491     scratch_dir = get_scratch_dir()
 492     src_dir = os.path.join(scratch_dir, "src")
 493     dst_dir = os.path.join(scratch_dir, "dst")
 494
 495     if args.skip_clean:
 496         logging.info("Skipping cleaning the scratch directory")
 497     else:
 498         clean_scratch_dir(scratch_dir)
 499         # Check out the src and dst source trees.
 500         checkout_java_tree(get_git_hash(src_rev), src_dir)
 501         checkout_java_tree(get_git_hash(dst_rev), dst_dir)
 502
 503     # Run the build in each.
 504     if args.skip_build:
 505         logging.info("Skipping the build")
 506     else:
 507         build_tree(src_dir, args.verbose)
 508         build_tree(dst_dir, args.verbose)
 509
 510     # Find the JARs.
 511     src_jars = find_jars(src_dir)
 512     dst_jars = find_jars(dst_dir)
 513
 514     # Filter the JARs.
 515     src_jars = filter_jars(src_jars, include_filters, exclude_filters)
 516     dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
 517
 518     if not src_jars or not dst_jars:
 519         logging.error("No JARs found! Are your filters too strong?")
 520         sys.exit(1)
 521
 522     output = run_java_acc(src_rev, src_jars, dst_rev,
 523                             dst_jars, args.annotations, skip_annotations,
 524                             get_repo_name(args.remote_name))
 525     sys.exit(compare_results(output, known_problems,
 526                               args.compare_warnings))
 527
 528
 529 if __name__ == "__main__":
 530     main()