HBASE-26921 Rewrite the counting cells part in TestMultiVersions (#4316)
[hbase.git] / dev-support / checkcompatibility.py
blobc6cc3be9ba25d115ee51c96cd7e0bea8ed44d806
1 #!/usr/bin/env python2
3 # Licensed to the Apache Software Foundation (ASF) under one
4 # or more contributor license agreements. See the NOTICE file
5 # distributed with this work for additional information
6 # regarding copyright ownership. The ASF licenses this file
7 # to you under the Apache License, Version 2.0 (the
8 # "License"); you may not use this file except in compliance
9 # with the License. You may obtain a copy of the License at
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing,
14 # software distributed under the License is distributed on an
15 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 # KIND, either express or implied. See the License for the
17 # specific language governing permissions and limitations
18 # under the License.
20 # Script which checks Java API compatibility between two revisions of the
21 # Java client.
23 # Originally sourced from Apache Kudu, which was based on the
24 # compatibility checker from the Apache HBase project, but ported to
25 # Python for better readability.
27 # The script can be invoked as follows:
28 # $ ./checkcompatibility.py ${SOURCE_GIT_REVISION} ${GIT_BRANCH_OR_TAG}
29 # or with some options:
30 # $ ./dev-support/checkcompatibility.py \
31 # --annotation org.apache.yetus.audience.InterfaceAudience.Public \
32 # --annotation org.apache.yetus.audience.InterfaceAudience.LimitedPrivate \
33 # --include-file "hbase-*" \
34 # --known_problems_path ~/known_problems.json \
35 # rel/1.3.0 branch-1.4
37 import json
38 import logging
39 import os
40 import re
41 import shutil
42 import subprocess
43 import sys
44 import urllib2
45 from collections import namedtuple
46 try:
47 import argparse
48 except ImportError:
49 logging.error(
50 "Please install argparse, e.g. via `pip install argparse`.")
51 sys.exit(2)
53 # Various relative paths
54 REPO_DIR = os.getcwd()
57 def check_output(*popenargs, **kwargs):
58 """ Run command with arguments and return its output as a byte string.
59 Backported from Python 2.7 as it's implemented as pure python on stdlib.
60 >>> check_output(['/usr/bin/python', '--version'])
61 Python 2.6.2 """
62 process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
63 output, _ = process.communicate()
64 retcode = process.poll()
65 if retcode:
66 cmd = kwargs.get("args")
67 if cmd is None:
68 cmd = popenargs[0]
69 error = subprocess.CalledProcessError(retcode, cmd)
70 error.output = output
71 raise error
72 return output
75 def get_repo_dir():
76 """ Return the path to the top of the repo. """
77 dirname, _ = os.path.split(os.path.abspath(__file__))
78 dirname = os.path.dirname(dirname)
79 logging.debug("Repo dir is %s", dirname)
80 return dirname
83 def get_scratch_dir():
84 """ Return the path to the scratch dir that we build within. """
85 scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
86 if not os.path.exists(scratch_dir):
87 os.makedirs(scratch_dir)
88 return scratch_dir
91 def get_java_acc_dir():
92 """ Return the path where we check out the Java API Compliance Checker. """
93 return os.path.join(get_repo_dir(), "target", "java-acc")
96 def clean_scratch_dir(scratch_dir):
97 """ Clean up and re-create the scratch directory. """
98 if os.path.exists(scratch_dir):
99 logging.info("Removing scratch dir %s ", scratch_dir)
100 shutil.rmtree(scratch_dir)
101 logging.info("Creating empty scratch dir %s ", scratch_dir)
102 os.makedirs(scratch_dir)
105 def checkout_java_tree(rev, path):
106 """ Check out the Java source tree for the given revision into
107 the given path. """
108 logging.info("Checking out %s in %s", rev, path)
109 os.makedirs(path)
110 # Extract java source
111 subprocess.check_call(["bash", '-o', 'pipefail', "-c",
112 ("git archive --format=tar %s | "
113 "tar -C '%s' -xf -") % (rev, path)],
114 cwd=get_repo_dir())
117 def get_git_hash(revname):
118 """ Convert 'revname' to its SHA-1 hash. """
119 try:
120 return check_output(["git", "rev-parse", revname],
121 cwd=get_repo_dir()).strip()
122 except:
123 revname = "origin/" + revname
124 return check_output(["git", "rev-parse", revname],
125 cwd=get_repo_dir()).strip()
128 def get_repo_name(remote_name="origin"):
129 """ Get the name of the repo based on the git remote."""
130 remote = check_output(["git", "config", "--get", "remote.{0}.url".format(remote_name)],
131 cwd=get_repo_dir()).strip()
132 remote = remote.split("/")[-1]
133 return remote[:-4] if remote.endswith(".git") else remote
136 def build_tree(java_path, verbose):
137 """ Run the Java build within 'path'. """
138 logging.info("Building in %s ", java_path)
139 # special hack for comparing with rel/2.0.0, see HBASE-26063 for more details
140 subprocess.check_call(["sed", "-i", "2148s/3.0.0/3.0.4/g", "pom.xml"], cwd=java_path)
141 mvn_cmd = ["mvn", "--batch-mode", "-DskipTests",
142 "-Dmaven.javadoc.skip=true", "package"]
143 if not verbose:
144 mvn_cmd.insert(-1, "--quiet")
145 subprocess.check_call(mvn_cmd, cwd=java_path)
148 def checkout_java_acc(force):
149 """ Check out the Java API Compliance Checker. If 'force' is true, will
150 re-download even if the directory exists. """
151 acc_dir = get_java_acc_dir()
152 if os.path.exists(acc_dir):
153 logging.info("Java ACC is already downloaded.")
154 if not force:
155 return
156 logging.info("Forcing re-download.")
157 shutil.rmtree(acc_dir)
159 logging.info("Downloading Java ACC...")
161 url = "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz"
162 scratch_dir = get_scratch_dir()
163 path = os.path.join(scratch_dir, os.path.basename(url))
164 jacc = urllib2.urlopen(url)
165 with open(path, 'wb') as w:
166 w.write(jacc.read())
168 subprocess.check_call(["tar", "xzf", path],
169 cwd=scratch_dir)
171 shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-2.4"),
172 os.path.join(acc_dir))
175 def find_jars(path):
176 """ Return a list of jars within 'path' to be checked for compatibility. """
177 all_jars = set(check_output(["find", path, "-type", "f", "-name", "*.jar"]).splitlines())
179 return [j for j in all_jars if (
180 "-tests" not in j and
181 "-sources" not in j and
182 "-with-dependencies" not in j)]
185 def write_xml_file(path, version, jars):
186 """ Write the XML manifest file for JACC. """
187 with open(path, "wt") as f:
188 f.write("<version>%s</version>\n" % version)
189 f.write("<archives>")
190 for j in jars:
191 f.write("%s\n" % j)
192 f.write("</archives>")
195 def ascii_encode_dict(data):
196 """ Iterate through a dictionary of data and convert all unicode to ascii.
197 This method was taken from
198 stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """
199 ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
200 return dict(map(ascii_encode, pair) for pair in data.items())
203 def process_json(path):
204 """ Process the known problems json file. The program raises an uncaught exception
205 if it can't find the file or if the json is invalid """
206 path = os.path.abspath(os.path.expanduser(path))
207 try:
208 with open(path) as f:
209 return json.load(f, object_hook=ascii_encode_dict)
210 except ValueError as e:
211 logging.error("File: %s\nInvalid JSON:\n%s", str(path), str(e))
212 raise
213 except IOError as io:
214 logging.error("Provided json file path does not exist %s", str(path))
215 raise
218 def compare_results(tool_results, known_issues, compare_warnings):
219 """ Compare the number of problems found with the allowed number. If
220 compare_warnings is true then also compare the number of warnings found.
222 tool_results = results from the JACC tool - a dictionary
223 known_issues = dictionary of expected issue count
224 compare_warnings = boolean - if true also compare warnings as well as problems """
225 logging.info("Results: %s", str(tool_results))
227 unexpected_issue = namedtuple('unexpected_issue', ['check', 'issue_type',
228 'known_count', 'observed_count'])
229 unexpected_issues = [unexpected_issue(check=check, issue_type=issue_type,
230 known_count=known_count,
231 observed_count=tool_results[check][issue_type])
232 for check, known_issue_counts in known_issues.items()
233 for issue_type, known_count in known_issue_counts.items()
234 if compare_tool_results_count(tool_results, check, issue_type, known_count)]
236 if not compare_warnings:
237 unexpected_issues = [tup for tup in unexpected_issues
238 if tup.issue_type != 'warnings']
240 for issue in unexpected_issues:
241 logging.error('Found %s during %s check (known issues: %d, observed issues: %d)',
242 issue.issue_type, issue.check, issue.known_count, issue.observed_count)
244 return bool(unexpected_issues)
246 def compare_tool_results_count(tool_results, check, issue_type, known_count):
247 """ Check problem counts are no more than the known count.
248 (This function exists just so can add in logging; previous was inlined
249 one-liner but this made it hard debugging)
251 # logging.info("known_count=%s, check key=%s, tool_results=%s, issue_type=%s",
252 # str(known_count), str(check), str(tool_results), str(issue_type))
253 return tool_results[check][issue_type] > known_count
255 def process_java_acc_output(output):
256 """ Process the output string to find the problems and warnings in both the
257 binary and source compatibility. This is done in a way that is admittedly
258 brittle; we are open to better implementations.
260 We expect a line containing the relevant information to look something like:
261 "total binary compatibility problems: 123, warnings: 16" """
262 return_value = {}
263 output = output.split("\n")
264 for line in output:
265 # Line has relevant info
266 if line.lower().startswith("total"):
267 values = {}
268 # Remove "total" keyword
269 line = line[6:]
270 # Seperate the two valuable parts
271 line_list = line.split(",")
272 for segment in line_list:
273 part = segment.split(":")
274 # Extract key and value
275 values[part[0][-8:]] = int(part[1])
276 return_value[line[:6]] = values
277 return return_value
279 def log_java_acc_version():
280 java_acc_path = os.path.join(
281 get_java_acc_dir(), "japi-compliance-checker.pl")
283 args = ["perl", java_acc_path, "-dumpversion"]
284 logging.info("Java ACC version: " + check_output(args))
286 def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations, skip_annotations, name):
287 """ Run the compliance checker to compare 'src' and 'dst'. """
288 logging.info("Will check compatibility between original jars:\n\t%s\n"
289 "and new jars:\n\t%s",
290 "\n\t".join(src_jars),
291 "\n\t".join(dst_jars))
293 java_acc_path = os.path.join(
294 get_java_acc_dir(), "japi-compliance-checker.pl")
296 src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
297 dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
298 write_xml_file(src_xml_path, src_name, src_jars)
299 write_xml_file(dst_xml_path, dst_name, dst_jars)
301 out_path = os.path.join(get_scratch_dir(), "report.html")
303 args = ["perl", java_acc_path,
304 "-l", name,
305 "-d1", src_xml_path,
306 "-d2", dst_xml_path,
307 "-report-path", out_path]
308 if annotations is not None:
309 logging.info("Annotations are: %s", annotations)
310 annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
311 logging.info("Annotations path: %s", annotations_path)
312 with file(annotations_path, "w") as f:
313 f.write('\n'.join(annotations))
314 args.extend(["-annotations-list", annotations_path])
316 if skip_annotations is not None:
317 skip_annotations_path = os.path.join(
318 get_scratch_dir(), "skip_annotations.txt")
319 with file(skip_annotations_path, "w") as f:
320 f.write('\n'.join(skip_annotations))
321 args.extend(["-skip-annotations-list", skip_annotations_path])
323 try:
324 output = check_output(args)
325 except subprocess.CalledProcessError as e:
326 # The program returns a nonzero error code if issues are found. We
327 # almost always expect some issues and want to process the results.
328 output = e.output
329 acc_processed = process_java_acc_output(output)
330 return acc_processed
333 def get_known_problems(json_path, src_rev, dst_rev):
334 """ The json file should be in the following format: a dictionary with the
335 keys in the format source_branch/destination_branch and the values
336 dictionaries with binary and source problems and warnings
337 Example:
338 {'branch-1.3': {
339 'rel/1.3.0': {'binary': {'problems': 123, 'warnings': 16},
340 'source': {'problems': 167, 'warnings': 1}},
341 'branch-1.4': {'binary': {'problems': 0, 'warnings': 0},
342 'source': {'problems': 0, 'warnings': 0}}
344 'branch-1.4': {
345 'rel/1.4.1': {'binary': {'problems': 13, 'warnings': 1},
346 'source': {'problems': 23, 'warnings': 0}}
348 } """
349 # These are the default values for allowed problems and warnings
350 known_problems = {"binary": {"problems": 0, "warnings": 0},
351 "source": {"problems": 0, "warnings": 0}}
352 if src_rev.startswith("origin/"):
353 src_rev = src_rev[7:]
354 if dst_rev.startswith("origin/"):
355 dst_rev = dst_rev[7:]
356 if json_path is not None:
357 known_problems = process_json(json_path)
358 try:
359 return known_problems[src_rev][dst_rev]
360 except KeyError:
361 logging.error(("Known Problems values for %s %s are not in "
362 "provided json file. If you are trying to run "
363 "the test with the default values, don't "
364 "provide the --known_problems_path argument")
365 % (src_rev, dst_rev))
366 raise
367 return known_problems
370 def filter_jars(jars, include_filters, exclude_filters):
371 """ Filter the list of JARs based on include and exclude filters. """
372 filtered = []
373 # Apply include filters
374 for j in jars:
375 basename = os.path.basename(j)
376 for f in include_filters:
377 if f.match(basename):
378 filtered += [j]
379 break
380 else:
381 logging.debug("Ignoring JAR %s", j)
382 # Apply exclude filters
383 exclude_filtered = []
384 for j in filtered:
385 basename = os.path.basename(j)
386 for f in exclude_filters:
387 if f.match(basename):
388 logging.debug("Ignoring JAR %s", j)
389 break
390 else:
391 exclude_filtered += [j]
393 return exclude_filtered
396 def main():
397 """ Main function. """
398 logging.basicConfig(level=logging.INFO)
399 parser = argparse.ArgumentParser(
400 description="Run Java API Compliance Checker.")
401 parser.add_argument("-f", "--force-download",
402 action="store_true",
403 help="Download dependencies (i.e. Java JAVA_ACC) "
404 "even if they are already present")
405 parser.add_argument("-i", "--include-file",
406 action="append",
407 dest="include_files",
408 help="Regex filter for JAR files to be included. "
409 "Applied before the exclude filters. "
410 "Can be specified multiple times.")
411 parser.add_argument("-e", "--exclude-file",
412 action="append",
413 dest="exclude_files",
414 help="Regex filter for JAR files to be excluded. "
415 "Applied after the include filters. "
416 "Can be specified multiple times.")
417 parser.add_argument("-a", "--annotation",
418 action="append",
419 dest="annotations",
420 help="Fully-qualified Java annotation. "
421 "Java ACC will only check compatibility of "
422 "annotated classes. Can be specified multiple times.")
423 parser.add_argument("--skip-annotation",
424 action="append",
425 dest="skip_annotations",
426 help="Fully-qualified Java annotation. "
427 "Java ACC will not check compatibility of "
428 "these annotated classes. Can be specified multiple "
429 "times.")
430 parser.add_argument("-p", "--known_problems_path",
431 default=None, dest="known_problems_path",
432 help="Path to file with json 'known_problems "
433 "dictionary.' Path can be relative or absolute. An "
434 "examples file can be seen in the pydocs for the "
435 "get_known_problems method.")
436 parser.add_argument("--skip-clean",
437 action="store_true",
438 help="Skip cleaning the scratch directory.")
439 parser.add_argument("--compare-warnings", dest="compare_warnings",
440 action="store_true", default=False,
441 help="Compare warnings as well as problems.")
442 parser.add_argument("--skip-build",
443 action="store_true",
444 help="Skip building the projects.")
445 parser.add_argument("--verbose",
446 action="store_true",
447 help="more output")
448 parser.add_argument("-r", "--remote", default="origin", dest="remote_name",
449 help="Name of remote to use. e.g. its repo name will be used as the name "
450 "we pass to Java ACC for the library.")
451 parser.add_argument("src_rev", nargs=1, help="Source revision.")
452 parser.add_argument("dst_rev", nargs="?", default="HEAD",
453 help="Destination revision. "
454 "If not specified, will use HEAD.")
456 args = parser.parse_args()
458 src_rev, dst_rev = args.src_rev[0], args.dst_rev
460 logging.info("Source revision: %s", src_rev)
461 logging.info("Destination revision: %s", dst_rev)
463 # Configure the expected numbers
464 known_problems = get_known_problems(
465 args.known_problems_path, src_rev, dst_rev)
467 # Construct the JAR regex patterns for filtering.
468 include_filters = []
469 if args.include_files is not None:
470 for f in args.include_files:
471 logging.info("Applying JAR filename include filter: %s", f)
472 include_filters += [re.compile(f)]
473 else:
474 include_filters = [re.compile(".*")]
476 exclude_filters = []
477 if args.exclude_files is not None:
478 for f in args.exclude_files:
479 logging.info("Applying JAR filename exclude filter: %s", f)
480 exclude_filters += [re.compile(f)]
482 # Construct the annotation list
483 if args.annotations is not None:
484 logging.info("Filtering classes using %d annotation(s):",
485 len(args.annotations))
486 for a in args.annotations:
487 logging.info("\t%s", a)
489 skip_annotations = args.skip_annotations
490 if skip_annotations is not None:
491 logging.info("Skipping classes with %d annotation(s):",
492 len(skip_annotations))
493 for a in skip_annotations:
494 logging.info("\t%s", a)
496 # Download deps.
497 checkout_java_acc(args.force_download)
498 log_java_acc_version()
500 # Set up the build.
501 scratch_dir = get_scratch_dir()
502 src_dir = os.path.join(scratch_dir, "src")
503 dst_dir = os.path.join(scratch_dir, "dst")
505 if args.skip_clean:
506 logging.info("Skipping cleaning the scratch directory")
507 else:
508 clean_scratch_dir(scratch_dir)
509 # Check out the src and dst source trees.
510 checkout_java_tree(get_git_hash(src_rev), src_dir)
511 checkout_java_tree(get_git_hash(dst_rev), dst_dir)
513 # Run the build in each.
514 if args.skip_build:
515 logging.info("Skipping the build")
516 else:
517 build_tree(src_dir, args.verbose)
518 build_tree(dst_dir, args.verbose)
520 # Find the JARs.
521 src_jars = find_jars(src_dir)
522 dst_jars = find_jars(dst_dir)
524 # Filter the JARs.
525 src_jars = filter_jars(src_jars, include_filters, exclude_filters)
526 dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
528 if not src_jars or not dst_jars:
529 logging.error("No JARs found! Are your filters too strong?")
530 sys.exit(1)
532 output = run_java_acc(src_rev, src_jars, dst_rev,
533 dst_jars, args.annotations, skip_annotations,
534 get_repo_name(args.remote_name))
535 sys.exit(compare_results(output, known_problems,
536 args.compare_warnings))
539 if __name__ == "__main__":
540 main()