dev-support/checkcompatibility.py

   1 #!/usr/bin/env python2
   2 #
   3 # Licensed to the Apache Software Foundation (ASF) under one
   4 # or more contributor license agreements.  See the NOTICE file
   5 # distributed with this work for additional information
   6 # regarding copyright ownership.  The ASF licenses this file
   7 # to you under the Apache License, Version 2.0 (the
   8 # "License"); you may not use this file except in compliance
   9 # with the License.  You may obtain a copy of the License at
  10 #
  11 #   http://www.apache.org/licenses/LICENSE-2.0
  12 #
  13 # Unless required by applicable law or agreed to in writing,
  14 # software distributed under the License is distributed on an
  15 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  16 # KIND, either express or implied.  See the License for the
  17 # specific language governing permissions and limitations
  18 # under the License.
  19
  20 # Script which checks Java API compatibility between two revisions of the
  21 # Java client.
  22 #
  23 # Originally sourced from Apache Kudu, which was based on the
  24 # compatibility checker from the Apache HBase project, but ported to
  25 # Python for better readability.
  26
  27 # The script can be invoked as follows:
  28 #   $ ./checkcompatibility.py ${SOURCE_GIT_REVISION} ${GIT_BRANCH_OR_TAG}
  29 # or with some options:
  30 #   $ ./dev-support/checkcompatibility.py \
  31 #      --annotation org.apache.yetus.audience.InterfaceAudience.Public \
  32 #      --annotation org.apache.yetus.audience.InterfaceAudience.LimitedPrivate \
  33 #      --include-file "hbase-*" \
  34 #      --known_problems_path ~/known_problems.json \
  35 #      rel/1.3.0 branch-1.4
  36
  37 import json
  38 import logging
  39 import os
  40 import re
  41 import shutil
  42 import subprocess
  43 import sys
  44 import urllib2
  45 from collections import namedtuple
  46 try:
  47     import argparse
  48 except ImportError:
  49     logging.error(
  50         "Please install argparse, e.g. via `pip install argparse`.")
  51     sys.exit(2)
  52
  53 # Various relative paths
  54 REPO_DIR = os.getcwd()
  55
  56
  57 def check_output(*popenargs, **kwargs):
  58     """ Run command with arguments and return its output as a byte string.
  59     Backported from Python 2.7 as it's implemented as pure python on stdlib.
  60     >>> check_output(['/usr/bin/python', '--version'])
  61     Python 2.6.2 """
  62     process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
  63     output, _ = process.communicate()
  64     retcode = process.poll()
  65     if retcode:
  66         cmd = kwargs.get("args")
  67         if cmd is None:
  68             cmd = popenargs[0]
  69         error = subprocess.CalledProcessError(retcode, cmd)
  70         error.output = output
  71         raise error
  72     return output
  73
  74
  75 def get_repo_dir():
  76     """ Return the path to the top of the repo. """
  77     dirname, _ = os.path.split(os.path.abspath(__file__))
  78     dirname = os.path.dirname(dirname)
  79     logging.debug("Repo dir is  %s", dirname)
  80     return dirname
  81
  82
  83 def get_scratch_dir():
  84     """ Return the path to the scratch dir that we build within. """
  85     scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
  86     if not os.path.exists(scratch_dir):
  87         os.makedirs(scratch_dir)
  88     return scratch_dir
  89
  90
  91 def get_java_acc_dir():
  92     """ Return the path where we check out the Java API Compliance Checker. """
  93     return os.path.join(get_repo_dir(), "target", "java-acc")
  94
  95
  96 def clean_scratch_dir(scratch_dir):
  97     """ Clean up and re-create the scratch directory. """
  98     if os.path.exists(scratch_dir):
  99         logging.info("Removing scratch dir %s ", scratch_dir)
 100         shutil.rmtree(scratch_dir)
 101     logging.info("Creating empty scratch dir %s ", scratch_dir)
 102     os.makedirs(scratch_dir)
 103
 104
 105 def checkout_java_tree(rev, path):
 106     """ Check out the Java source tree for the given revision into
 107     the given path. """
 108     logging.info("Checking out %s in %s", rev, path)
 109     os.makedirs(path)
 110     # Extract java source
 111     subprocess.check_call(["bash", '-o', 'pipefail', "-c",
 112                            ("git archive --format=tar %s | "
 113                             "tar -C '%s' -xf -") % (rev, path)],
 114                           cwd=get_repo_dir())
 115
 116
 117 def get_git_hash(revname):
 118     """ Convert 'revname' to its SHA-1 hash. """
 119     try:
 120         return check_output(["git", "rev-parse", revname],
 121                         cwd=get_repo_dir()).strip()
 122     except:
 123         revname = "origin/" + revname
 124         return check_output(["git", "rev-parse", revname],
 125                         cwd=get_repo_dir()).strip()
 126
 127
 128 def get_repo_name(remote_name="origin"):
 129     """ Get the name of the repo based on the git remote."""
 130     remote = check_output(["git", "config", "--get", "remote.{0}.url".format(remote_name)],
 131                            cwd=get_repo_dir()).strip()
 132     remote = remote.split("/")[-1]
 133     return remote[:-4] if remote.endswith(".git") else remote
 134
 135
 136 def build_tree(java_path, verbose):
 137     """ Run the Java build within 'path'. """
 138     logging.info("Building in %s ", java_path)
 139     # special hack for comparing with rel/2.0.0, see HBASE-26063 for more details
 140     subprocess.check_call(["sed", "-i", "2148s/3.0.0/3.0.4/g", "pom.xml"], cwd=java_path)
 141     mvn_cmd = ["mvn", "--batch-mode", "-DskipTests",
 142                "-Dmaven.javadoc.skip=true", "package"]
 143     if not verbose:
 144         mvn_cmd.insert(-1, "--quiet")
 145     subprocess.check_call(mvn_cmd, cwd=java_path)
 146
 147
 148 def checkout_java_acc(force):
 149     """ Check out the Java API Compliance Checker. If 'force' is true, will
 150     re-download even if the directory exists. """
 151     acc_dir = get_java_acc_dir()
 152     if os.path.exists(acc_dir):
 153         logging.info("Java ACC is already downloaded.")
 154         if not force:
 155             return
 156         logging.info("Forcing re-download.")
 157         shutil.rmtree(acc_dir)
 158
 159     logging.info("Downloading Java ACC...")
 160
 161     url = "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz"
 162     scratch_dir = get_scratch_dir()
 163     path = os.path.join(scratch_dir, os.path.basename(url))
 164     jacc = urllib2.urlopen(url)
 165     with open(path, 'wb') as w:
 166         w.write(jacc.read())
 167
 168     subprocess.check_call(["tar", "xzf", path],
 169                           cwd=scratch_dir)
 170
 171     shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-2.4"),
 172                 os.path.join(acc_dir))
 173
 174
 175 def find_jars(path):
 176     """ Return a list of jars within 'path' to be checked for compatibility. """
 177     all_jars = set(check_output(["find", path, "-type", "f", "-name", "*.jar"]).splitlines())
 178
 179     return [j for j in all_jars if (
 180         "-tests" not in j and
 181         "-sources" not in j and
 182         "-with-dependencies" not in j)]
 183
 184
 185 def write_xml_file(path, version, jars):
 186     """ Write the XML manifest file for JACC. """
 187     with open(path, "wt") as f:
 188         f.write("<version>%s</version>\n" % version)
 189         f.write("<archives>")
 190         for j in jars:
 191             f.write("%s\n" % j)
 192         f.write("</archives>")
 193
 194
 195 def ascii_encode_dict(data):
 196     """ Iterate through a dictionary of data and convert all unicode to ascii.
 197     This method was taken from
 198     stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """
 199     ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
 200     return dict(map(ascii_encode, pair) for pair in data.items())
 201
 202
 203 def process_json(path):
 204     """ Process the known problems json file. The program raises an uncaught exception
 205     if it can't find the file or if the json is invalid """
 206     path = os.path.abspath(os.path.expanduser(path))
 207     try:
 208         with open(path) as f:
 209             return json.load(f, object_hook=ascii_encode_dict)
 210     except ValueError as e:
 211         logging.error("File: %s\nInvalid JSON:\n%s", str(path), str(e))
 212         raise
 213     except IOError as io:
 214         logging.error("Provided json file path does not exist %s", str(path))
 215         raise
 216
 217
 218 def compare_results(tool_results, known_issues, compare_warnings):
 219     """ Compare the number of problems found with the allowed number. If
 220     compare_warnings is true then also compare the number of warnings found.
 221
 222     tool_results = results from the JACC tool - a dictionary
 223     known_issues = dictionary of expected issue count
 224     compare_warnings = boolean - if true also compare warnings as well as problems """
 225     logging.info("Results: %s", str(tool_results))
 226
 227     unexpected_issue = namedtuple('unexpected_issue', ['check', 'issue_type',
 228                                                         'known_count', 'observed_count'])
 229     unexpected_issues = [unexpected_issue(check=check,  issue_type=issue_type,
 230                                       known_count=known_count,
 231                                       observed_count=tool_results[check][issue_type])
 232                      for check, known_issue_counts in known_issues.items()
 233                         for issue_type, known_count in known_issue_counts.items()
 234                            if compare_tool_results_count(tool_results, check, issue_type, known_count)]
 235
 236     if not compare_warnings:
 237         unexpected_issues = [tup for tup in unexpected_issues
 238                              if tup.issue_type != 'warnings']
 239
 240     for issue in unexpected_issues:
 241         logging.error('Found %s during  %s check (known issues: %d, observed issues: %d)',
 242                 issue.issue_type, issue.check, issue.known_count, issue.observed_count)
 243
 244     return bool(unexpected_issues)
 245
 246 def compare_tool_results_count(tool_results, check, issue_type, known_count):
 247     """ Check problem counts are no more than the known count.
 248     (This function exists just so can add in logging; previous was inlined
 249     one-liner but this made it hard debugging)
 250     """
 251     # logging.info("known_count=%s, check key=%s, tool_results=%s, issue_type=%s",
 252     #        str(known_count), str(check), str(tool_results), str(issue_type))
 253     return tool_results[check][issue_type] > known_count
 254
 255 def process_java_acc_output(output):
 256     """ Process the output string to find the problems and warnings in both the
 257     binary and source compatibility. This is done in a way that is admittedly
 258     brittle; we are open to better implementations.
 259
 260     We expect a line containing the relevant information to look something like:
 261     "total binary compatibility problems: 123, warnings: 16" """
 262     return_value = {}
 263     output = output.split("\n")
 264     for line in output:
 265         # Line has relevant info
 266         if line.lower().startswith("total"):
 267             values = {}
 268             # Remove "total" keyword
 269             line = line[6:]
 270             # Seperate the two valuable parts
 271             line_list = line.split(",")
 272             for segment in line_list:
 273                 part = segment.split(":")
 274                 # Extract key and value
 275                 values[part[0][-8:]] = int(part[1])
 276             return_value[line[:6]] = values
 277     return return_value
 278
 279 def log_java_acc_version():
 280     java_acc_path = os.path.join(
 281         get_java_acc_dir(), "japi-compliance-checker.pl")
 282
 283     args = ["perl", java_acc_path, "-dumpversion"]
 284     logging.info("Java ACC version: " + check_output(args))
 285
 286 def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations, skip_annotations, name):
 287     """ Run the compliance checker to compare 'src' and 'dst'. """
 288     logging.info("Will check compatibility between original jars:\n\t%s\n"
 289                  "and new jars:\n\t%s",
 290                  "\n\t".join(src_jars),
 291                  "\n\t".join(dst_jars))
 292
 293     java_acc_path = os.path.join(
 294         get_java_acc_dir(), "japi-compliance-checker.pl")
 295
 296     src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
 297     dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
 298     write_xml_file(src_xml_path, src_name, src_jars)
 299     write_xml_file(dst_xml_path, dst_name, dst_jars)
 300
 301     out_path = os.path.join(get_scratch_dir(), "report.html")
 302
 303     args = ["perl", java_acc_path,
 304             "-l", name,
 305             "-d1", src_xml_path,
 306             "-d2", dst_xml_path,
 307             "-report-path", out_path]
 308     if annotations is not None:
 309         logging.info("Annotations are: %s", annotations)
 310         annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
 311         logging.info("Annotations path: %s", annotations_path)
 312         with file(annotations_path, "w") as f:
 313             f.write('\n'.join(annotations))
 314         args.extend(["-annotations-list", annotations_path])
 315
 316     if skip_annotations is not None:
 317         skip_annotations_path = os.path.join(
 318             get_scratch_dir(), "skip_annotations.txt")
 319         with file(skip_annotations_path, "w") as f:
 320             f.write('\n'.join(skip_annotations))
 321         args.extend(["-skip-annotations-list", skip_annotations_path])
 322
 323     try:
 324         output = check_output(args)
 325     except subprocess.CalledProcessError as e:
 326         # The program returns a nonzero error code if issues are found. We
 327         # almost always expect some issues and want to process the results.
 328         output = e.output
 329     acc_processed = process_java_acc_output(output)
 330     return acc_processed
 331
 332
 333 def get_known_problems(json_path, src_rev, dst_rev):
 334     """ The json file should be in the following format: a dictionary with the
 335     keys in the format source_branch/destination_branch and the values
 336     dictionaries with binary and source problems and warnings
 337     Example:
 338     {'branch-1.3': {
 339       'rel/1.3.0': {'binary': {'problems': 123, 'warnings': 16},
 340                       'source': {'problems': 167, 'warnings': 1}},
 341       'branch-1.4': {'binary': {'problems': 0, 'warnings': 0},
 342                       'source': {'problems': 0, 'warnings': 0}}
 343       },
 344     'branch-1.4': {
 345       'rel/1.4.1': {'binary': {'problems': 13, 'warnings': 1},
 346                       'source': {'problems': 23, 'warnings': 0}}
 347       }
 348     } """
 349     # These are the default values for allowed problems and warnings
 350     known_problems = {"binary": {"problems": 0, "warnings": 0},
 351                            "source": {"problems": 0, "warnings": 0}}
 352     if src_rev.startswith("origin/"):
 353       src_rev = src_rev[7:]
 354     if dst_rev.startswith("origin/"):
 355       dst_rev = dst_rev[7:]
 356     if json_path is not None:
 357         known_problems = process_json(json_path)
 358         try:
 359             return known_problems[src_rev][dst_rev]
 360         except KeyError:
 361             logging.error(("Known Problems values for %s %s are not in "
 362                             "provided json file. If you are trying to run "
 363                             "the test with the default values, don't "
 364                             "provide the --known_problems_path argument")
 365                             % (src_rev, dst_rev))
 366             raise
 367     return known_problems
 368
 369
 370 def filter_jars(jars, include_filters, exclude_filters):
 371     """ Filter the list of JARs based on include and exclude filters. """
 372     filtered = []
 373     # Apply include filters
 374     for j in jars:
 375         basename = os.path.basename(j)
 376         for f in include_filters:
 377             if f.match(basename):
 378                 filtered += [j]
 379                 break
 380         else:
 381             logging.debug("Ignoring JAR %s", j)
 382     # Apply exclude filters
 383     exclude_filtered = []
 384     for j in filtered:
 385         basename = os.path.basename(j)
 386         for f in exclude_filters:
 387             if f.match(basename):
 388                 logging.debug("Ignoring JAR %s", j)
 389                 break
 390         else:
 391             exclude_filtered += [j]
 392
 393     return exclude_filtered
 394
 395
 396 def main():
 397     """ Main function. """
 398     logging.basicConfig(level=logging.INFO)
 399     parser = argparse.ArgumentParser(
 400         description="Run Java API Compliance Checker.")
 401     parser.add_argument("-f", "--force-download",
 402                         action="store_true",
 403                         help="Download dependencies (i.e. Java JAVA_ACC) "
 404                         "even if they are already present")
 405     parser.add_argument("-i", "--include-file",
 406                         action="append",
 407                         dest="include_files",
 408                         help="Regex filter for JAR files to be included. "
 409                         "Applied before the exclude filters. "
 410                         "Can be specified multiple times.")
 411     parser.add_argument("-e", "--exclude-file",
 412                         action="append",
 413                         dest="exclude_files",
 414                         help="Regex filter for JAR files to be excluded. "
 415                         "Applied after the include filters. "
 416                         "Can be specified multiple times.")
 417     parser.add_argument("-a", "--annotation",
 418                         action="append",
 419                         dest="annotations",
 420                         help="Fully-qualified Java annotation. "
 421                         "Java ACC will only check compatibility of "
 422                         "annotated classes. Can be specified multiple times.")
 423     parser.add_argument("--skip-annotation",
 424                         action="append",
 425                         dest="skip_annotations",
 426                         help="Fully-qualified Java annotation. "
 427                         "Java ACC will not check compatibility of "
 428                         "these annotated classes. Can be specified multiple "
 429                         "times.")
 430     parser.add_argument("-p", "--known_problems_path",
 431                         default=None, dest="known_problems_path",
 432                         help="Path to file with json 'known_problems "
 433                         "dictionary.' Path can be relative or absolute. An "
 434                         "examples file can be seen in the pydocs for the "
 435                         "get_known_problems method.")
 436     parser.add_argument("--skip-clean",
 437                         action="store_true",
 438                         help="Skip cleaning the scratch directory.")
 439     parser.add_argument("--compare-warnings", dest="compare_warnings",
 440                         action="store_true", default=False,
 441                         help="Compare warnings as well as problems.")
 442     parser.add_argument("--skip-build",
 443                         action="store_true",
 444                         help="Skip building the projects.")
 445     parser.add_argument("--verbose",
 446                         action="store_true",
 447                         help="more output")
 448     parser.add_argument("-r", "--remote", default="origin", dest="remote_name",
 449                         help="Name of remote to use. e.g. its repo name will be used as the name "
 450                         "we pass to Java ACC for the library.")
 451     parser.add_argument("src_rev", nargs=1, help="Source revision.")
 452     parser.add_argument("dst_rev", nargs="?", default="HEAD",
 453                         help="Destination revision. "
 454                         "If not specified, will use HEAD.")
 455
 456     args = parser.parse_args()
 457
 458     src_rev, dst_rev = args.src_rev[0], args.dst_rev
 459
 460     logging.info("Source revision: %s", src_rev)
 461     logging.info("Destination revision: %s", dst_rev)
 462
 463     # Configure the expected numbers
 464     known_problems = get_known_problems(
 465         args.known_problems_path, src_rev, dst_rev)
 466
 467     # Construct the JAR regex patterns for filtering.
 468     include_filters = []
 469     if args.include_files is not None:
 470         for f in args.include_files:
 471             logging.info("Applying JAR filename include filter: %s", f)
 472             include_filters += [re.compile(f)]
 473     else:
 474         include_filters = [re.compile(".*")]
 475
 476     exclude_filters = []
 477     if args.exclude_files is not None:
 478         for f in args.exclude_files:
 479             logging.info("Applying JAR filename exclude filter: %s", f)
 480             exclude_filters += [re.compile(f)]
 481
 482     # Construct the annotation list
 483     if args.annotations is not None:
 484         logging.info("Filtering classes using %d annotation(s):",
 485                      len(args.annotations))
 486         for a in args.annotations:
 487             logging.info("\t%s", a)
 488
 489     skip_annotations = args.skip_annotations
 490     if skip_annotations is not None:
 491         logging.info("Skipping classes with %d annotation(s):",
 492                      len(skip_annotations))
 493         for a in skip_annotations:
 494             logging.info("\t%s", a)
 495
 496     # Download deps.
 497     checkout_java_acc(args.force_download)
 498     log_java_acc_version()
 499
 500     # Set up the build.
 501     scratch_dir = get_scratch_dir()
 502     src_dir = os.path.join(scratch_dir, "src")
 503     dst_dir = os.path.join(scratch_dir, "dst")
 504
 505     if args.skip_clean:
 506         logging.info("Skipping cleaning the scratch directory")
 507     else:
 508         clean_scratch_dir(scratch_dir)
 509         # Check out the src and dst source trees.
 510         checkout_java_tree(get_git_hash(src_rev), src_dir)
 511         checkout_java_tree(get_git_hash(dst_rev), dst_dir)
 512
 513     # Run the build in each.
 514     if args.skip_build:
 515         logging.info("Skipping the build")
 516     else:
 517         build_tree(src_dir, args.verbose)
 518         build_tree(dst_dir, args.verbose)
 519
 520     # Find the JARs.
 521     src_jars = find_jars(src_dir)
 522     dst_jars = find_jars(dst_dir)
 523
 524     # Filter the JARs.
 525     src_jars = filter_jars(src_jars, include_filters, exclude_filters)
 526     dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
 527
 528     if not src_jars or not dst_jars:
 529         logging.error("No JARs found! Are your filters too strong?")
 530         sys.exit(1)
 531
 532     output = run_java_acc(src_rev, src_jars, dst_rev,
 533                             dst_jars, args.annotations, skip_annotations,
 534                             get_repo_name(args.remote_name))
 535     sys.exit(compare_results(output, known_problems,
 536                               args.compare_warnings))
 537
 538
 539 if __name__ == "__main__":
 540     main()