3 # Licensed to the Apache Software Foundation (ASF) under one
4 # or more contributor license agreements. See the NOTICE file
5 # distributed with this work for additional information
6 # regarding copyright ownership. The ASF licenses this file
7 # to you under the Apache License, Version 2.0 (the
8 # "License"); you may not use this file except in compliance
9 # with the License. You may obtain a copy of the License at
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing,
14 # software distributed under the License is distributed on an
15 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 # KIND, either express or implied. See the License for the
17 # specific language governing permissions and limitations
20 # Script which checks Java API compatibility between two revisions of the
23 # Originally sourced from Apache Kudu, which was based on the
24 # compatibility checker from the Apache HBase project, but ported to
25 # Python for better readability.
27 # The script can be invoked as follows:
28 # $ ./checkcompatibility.py ${SOURCE_GIT_REVISION} ${GIT_BRANCH_OR_TAG}
29 # or with some options:
30 # $ ./dev-support/checkcompatibility.py \
31 # --annotation org.apache.yetus.audience.InterfaceAudience.Public \
32 # --annotation org.apache.yetus.audience.InterfaceAudience.LimitedPrivate \
33 # --include-file "hbase-*" \
34 # --known_problems_path ~/known_problems.json \
35 # rel/1.0.0 branch-1.2
45 from collections
import namedtuple
50 "Please install argparse, e.g. via `pip install argparse`.")
53 # Various relative paths
54 REPO_DIR
= os
.getcwd()
57 def check_output(*popenargs
, **kwargs
):
58 """ Run command with arguments and return its output as a byte string.
59 Backported from Python 2.7 as it's implemented as pure python on stdlib.
60 >>> check_output(['/usr/bin/python', '--version'])
62 process
= subprocess
.Popen(stdout
=subprocess
.PIPE
, *popenargs
, **kwargs
)
63 output
, _
= process
.communicate()
64 retcode
= process
.poll()
66 cmd
= kwargs
.get("args")
69 error
= subprocess
.CalledProcessError(retcode
, cmd
)
76 """ Return the path to the top of the repo. """
77 dirname
, _
= os
.path
.split(os
.path
.abspath(__file__
))
78 dirname
= os
.path
.dirname(dirname
)
79 logging
.debug("Repo dir is %s", dirname
)
83 def get_scratch_dir():
84 """ Return the path to the scratch dir that we build within. """
85 scratch_dir
= os
.path
.join(get_repo_dir(), "target", "compat-check")
86 if not os
.path
.exists(scratch_dir
):
87 os
.makedirs(scratch_dir
)
91 def get_java_acc_dir():
92 """ Return the path where we check out the Java API Compliance Checker. """
93 return os
.path
.join(get_repo_dir(), "target", "java-acc")
96 def clean_scratch_dir(scratch_dir
):
97 """ Clean up and re-create the scratch directory. """
98 if os
.path
.exists(scratch_dir
):
99 logging
.info("Removing scratch dir %s ", scratch_dir
)
100 shutil
.rmtree(scratch_dir
)
101 logging
.info("Creating empty scratch dir %s ", scratch_dir
)
102 os
.makedirs(scratch_dir
)
105 def checkout_java_tree(rev
, path
):
106 """ Check out the Java source tree for the given revision into
108 logging
.info("Checking out %s in %s", rev
, path
)
110 # Extract java source
111 subprocess
.check_call(["bash", '-o', 'pipefail', "-c",
112 ("git archive --format=tar %s | "
113 "tar -C '%s' -xf -") % (rev
, path
)],
117 def get_git_hash(revname
):
118 """ Convert 'revname' to its SHA-1 hash. """
120 return check_output(["git", "rev-parse", revname
],
121 cwd
=get_repo_dir()).strip()
123 revname
= "origin/" + revname
124 return check_output(["git", "rev-parse", revname
],
125 cwd
=get_repo_dir()).strip()
128 def get_repo_name(remote_name
="origin"):
129 """ Get the name of the repo based on the git remote."""
130 remote
= check_output(["git", "config", "--get", "remote.{0}.url".format(remote_name
)],
131 cwd
=get_repo_dir()).strip()
132 remote
= remote
.split("/")[-1]
133 return remote
[:-4] if remote
.endswith(".git") else remote
136 def build_tree(java_path
, verbose
):
137 """ Run the Java build within 'path'. """
138 logging
.info("Building in %s ", java_path
)
139 mvn_cmd
= ["mvn", "--batch-mode", "-DskipTests",
140 "-Dmaven.javadoc.skip=true", "package"]
142 mvn_cmd
.insert(-1, "--quiet")
143 subprocess
.check_call(mvn_cmd
, cwd
=java_path
)
146 def checkout_java_acc(force
):
147 """ Check out the Java API Compliance Checker. If 'force' is true, will
148 re-download even if the directory exists. """
149 acc_dir
= get_java_acc_dir()
150 if os
.path
.exists(acc_dir
):
151 logging
.info("Java ACC is already downloaded.")
154 logging
.info("Forcing re-download.")
155 shutil
.rmtree(acc_dir
)
157 logging
.info("Downloading Java ACC...")
159 url
= "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz"
160 scratch_dir
= get_scratch_dir()
161 path
= os
.path
.join(scratch_dir
, os
.path
.basename(url
))
162 jacc
= urllib2
.urlopen(url
)
163 with
open(path
, 'wb') as w
:
166 subprocess
.check_call(["tar", "xzf", path
],
169 shutil
.move(os
.path
.join(scratch_dir
, "japi-compliance-checker-2.4"),
170 os
.path
.join(acc_dir
))
174 """ Return a list of jars within 'path' to be checked for compatibility. """
175 all_jars
= set(check_output(["find", path
, "-name", "*.jar"]).splitlines())
177 return [j
for j
in all_jars
if (
178 "-tests" not in j
and
179 "-sources" not in j
and
180 "-with-dependencies" not in j
)]
183 def write_xml_file(path
, version
, jars
):
184 """ Write the XML manifest file for JACC. """
185 with
open(path
, "wt") as f
:
186 f
.write("<version>%s</version>\n" % version
)
187 f
.write("<archives>")
190 f
.write("</archives>")
193 def ascii_encode_dict(data
):
194 """ Iterate through a dictionary of data and convert all unicode to ascii.
195 This method was taken from
196 stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """
197 ascii_encode
= lambda x
: x
.encode('ascii') if isinstance(x
, unicode) else x
198 return dict(map(ascii_encode
, pair
) for pair
in data
.items())
201 def process_json(path
):
202 """ Process the known problems json file. The program raises an uncaught exception
203 if it can't find the file or if the json is invalid """
204 path
= os
.path
.abspath(os
.path
.expanduser(path
))
206 with
open(path
) as f
:
207 return json
.load(f
, object_hook
=ascii_encode_dict
)
208 except ValueError as e
:
209 logging
.error("File: %s\nInvalid JSON:\n%s", str(path
), str(e
))
211 except IOError as io
:
212 logging
.error("Provided json file path does not exist %s", str(path
))
216 def compare_results(tool_results
, known_issues
, compare_warnings
):
217 """ Compare the number of problems found with the allowed number. If
218 compare_warnings is true then also compare the number of warnings found.
220 tool_results = results from the JACC tool - a dictionary
221 known_issues = dictionary of expected issue count
222 compare_warnings = boolean - if true also compare warnings as well as problems """
223 logging
.info("Results: %s", str(tool_results
))
225 unexpected_issue
= namedtuple('unexpected_issue', ['check', 'issue_type',
226 'known_count', 'observed_count'])
227 unexpected_issues
= [unexpected_issue(check
=check
, issue_type
=issue_type
,
228 known_count
=known_count
,
229 observed_count
=tool_results
[check
][issue_type
])
230 for check
, known_issue_counts
in known_issues
.items()
231 for issue_type
, known_count
in known_issue_counts
.items()
232 if tool_results
[check
][issue_type
] > known_count
]
234 if not compare_warnings
:
235 unexpected_issues
= [tup
for tup
in unexpected_issues
236 if tup
.issue_type
!= 'warnings']
238 for issue
in unexpected_issues
:
239 logging
.error('Found %s during %s check (known issues: %d, observed issues: %d)',
240 issue
.issue_type
, issue
.check
, issue
.known_count
, issue
.observed_count
)
242 return bool(unexpected_issues
)
245 def process_java_acc_output(output
):
246 """ Process the output string to find the problems and warnings in both the
247 binary and source compatibility. This is done in a way that is admittedly
248 brittle; we are open to better implementations.
250 We expect a line containing the relevant information to look something like:
251 "total binary compatibility problems: 123, warnings: 16" """
253 output
= output
.split("\n")
255 # Line has relevant info
256 if line
.lower().startswith("total"):
258 # Remove "total" keyword
260 # Seperate the two valuable parts
261 line_list
= line
.split(",")
262 for segment
in line_list
:
263 part
= segment
.split(":")
264 # Extract key and value
265 values
[part
[0][-8:]] = int(part
[1])
266 return_value
[line
[:6]] = values
269 def log_java_acc_version():
270 java_acc_path
= os
.path
.join(
271 get_java_acc_dir(), "japi-compliance-checker.pl")
273 args
= ["perl", java_acc_path
, "-dumpversion"]
274 logging
.info("Java ACC version: " + check_output(args
))
276 def run_java_acc(src_name
, src_jars
, dst_name
, dst_jars
, annotations
, skip_annotations
, name
):
277 """ Run the compliance checker to compare 'src' and 'dst'. """
278 logging
.info("Will check compatibility between original jars:\n\t%s\n"
279 "and new jars:\n\t%s",
280 "\n\t".join(src_jars
),
281 "\n\t".join(dst_jars
))
283 java_acc_path
= os
.path
.join(
284 get_java_acc_dir(), "japi-compliance-checker.pl")
286 src_xml_path
= os
.path
.join(get_scratch_dir(), "src.xml")
287 dst_xml_path
= os
.path
.join(get_scratch_dir(), "dst.xml")
288 write_xml_file(src_xml_path
, src_name
, src_jars
)
289 write_xml_file(dst_xml_path
, dst_name
, dst_jars
)
291 out_path
= os
.path
.join(get_scratch_dir(), "report.html")
293 args
= ["perl", java_acc_path
,
297 "-report-path", out_path
]
298 if annotations
is not None:
299 logging
.info("Annotations are: %s", annotations
)
300 annotations_path
= os
.path
.join(get_scratch_dir(), "annotations.txt")
301 logging
.info("Annotations path: %s", annotations_path
)
302 with
file(annotations_path
, "w") as f
:
303 f
.write('\n'.join(annotations
))
304 args
.extend(["-annotations-list", annotations_path
])
306 if skip_annotations
is not None:
307 skip_annotations_path
= os
.path
.join(
308 get_scratch_dir(), "skip_annotations.txt")
309 with
file(skip_annotations_path
, "w") as f
:
310 f
.write('\n'.join(skip_annotations
))
311 args
.extend(["-skip-annotations-list", skip_annotations_path
])
314 output
= check_output(args
)
315 except subprocess
.CalledProcessError
as e
:
316 # The program returns a nonzero error code if issues are found. We
317 # almost always expect some issues and want to process the results.
319 acc_processed
= process_java_acc_output(output
)
323 def get_known_problems(json_path
, src_rev
, dst_rev
):
324 """ The json file should be in the following format: a dictionary with the
325 keys in the format source_branch/destination_branch and the values
326 dictionaries with binary and source problems and warnings
329 'rel/1.0.0': {'binary': {'problems': 123, 'warnings': 16},
330 'source': {'problems': 167, 'warnings': 1}},
331 'branch-1.2.0': {'binary': {'problems': 0, 'warnings': 0},
332 'source': {'problems': 0, 'warnings': 0}}
335 'rel/1.2.1': {'binary': {'problems': 13, 'warnings': 1},
336 'source': {'problems': 23, 'warnings': 0}}
339 # These are the default values for allowed problems and warnings
340 known_problems
= {"binary": {"problems": 0, "warnings": 0},
341 "source": {"problems": 0, "warnings": 0}}
342 if src_rev
.startswith("origin/"):
343 src_rev
= src_rev
[7:]
344 if dst_rev
.startswith("origin/"):
345 dst_rev
= dst_rev
[7:]
346 if json_path
is not None:
347 known_problems
= process_json(json_path
)
349 return known_problems
[src_rev
][dst_rev
]
351 logging
.error(("Known Problems values for %s %s are not in "
352 "provided json file. If you are trying to run "
353 "the test with the default values, don't "
354 "provide the --known_problems_path argument")
355 % (src_rev
, dst_rev
))
357 return known_problems
360 def filter_jars(jars
, include_filters
, exclude_filters
):
361 """ Filter the list of JARs based on include and exclude filters. """
363 # Apply include filters
365 basename
= os
.path
.basename(j
)
366 for f
in include_filters
:
367 if f
.match(basename
):
371 logging
.debug("Ignoring JAR %s", j
)
372 # Apply exclude filters
373 exclude_filtered
= []
375 basename
= os
.path
.basename(j
)
376 for f
in exclude_filters
:
377 if f
.match(basename
):
378 logging
.debug("Ignoring JAR %s", j
)
381 exclude_filtered
+= [j
]
383 return exclude_filtered
387 """ Main function. """
388 logging
.basicConfig(level
=logging
.INFO
)
389 parser
= argparse
.ArgumentParser(
390 description
="Run Java API Compliance Checker.")
391 parser
.add_argument("-f", "--force-download",
393 help="Download dependencies (i.e. Java JAVA_ACC) "
394 "even if they are already present")
395 parser
.add_argument("-i", "--include-file",
397 dest
="include_files",
398 help="Regex filter for JAR files to be included. "
399 "Applied before the exclude filters. "
400 "Can be specified multiple times.")
401 parser
.add_argument("-e", "--exclude-file",
403 dest
="exclude_files",
404 help="Regex filter for JAR files to be excluded. "
405 "Applied after the include filters. "
406 "Can be specified multiple times.")
407 parser
.add_argument("-a", "--annotation",
410 help="Fully-qualified Java annotation. "
411 "Java ACC will only check compatibility of "
412 "annotated classes. Can be specified multiple times.")
413 parser
.add_argument("--skip-annotation",
415 dest
="skip_annotations",
416 help="Fully-qualified Java annotation. "
417 "Java ACC will not check compatibility of "
418 "these annotated classes. Can be specified multiple "
420 parser
.add_argument("-p", "--known_problems_path",
421 default
=None, dest
="known_problems_path",
422 help="Path to file with json 'known_problems "
423 "dictionary.' Path can be relative or absolute. An "
424 "examples file can be seen in the pydocs for the "
425 "get_known_problems method.")
426 parser
.add_argument("--skip-clean",
428 help="Skip cleaning the scratch directory.")
429 parser
.add_argument("--compare-warnings", dest
="compare_warnings",
430 action
="store_true", default
=False,
431 help="Compare warnings as well as problems.")
432 parser
.add_argument("--skip-build",
434 help="Skip building the projects.")
435 parser
.add_argument("--verbose",
438 parser
.add_argument("-r", "--remote", default
="origin", dest
="remote_name",
439 help="Name of remote to use. e.g. its repo name will be used as the name "
440 "we pass to Java ACC for the library.")
441 parser
.add_argument("src_rev", nargs
=1, help="Source revision.")
442 parser
.add_argument("dst_rev", nargs
="?", default
="HEAD",
443 help="Destination revision. "
444 "If not specified, will use HEAD.")
446 args
= parser
.parse_args()
448 src_rev
, dst_rev
= args
.src_rev
[0], args
.dst_rev
450 logging
.info("Source revision: %s", src_rev
)
451 logging
.info("Destination revision: %s", dst_rev
)
453 # Configure the expected numbers
454 known_problems
= get_known_problems(
455 args
.known_problems_path
, src_rev
, dst_rev
)
457 # Construct the JAR regex patterns for filtering.
459 if args
.include_files
is not None:
460 for f
in args
.include_files
:
461 logging
.info("Applying JAR filename include filter: %s", f
)
462 include_filters
+= [re
.compile(f
)]
464 include_filters
= [re
.compile(".*")]
467 if args
.exclude_files
is not None:
468 for f
in args
.exclude_files
:
469 logging
.info("Applying JAR filename exclude filter: %s", f
)
470 exclude_filters
+= [re
.compile(f
)]
472 # Construct the annotation list
473 if args
.annotations
is not None:
474 logging
.info("Filtering classes using %d annotation(s):",
475 len(args
.annotations
))
476 for a
in args
.annotations
:
477 logging
.info("\t%s", a
)
479 skip_annotations
= args
.skip_annotations
480 if skip_annotations
is not None:
481 logging
.info("Skipping classes with %d annotation(s):",
482 len(skip_annotations
))
483 for a
in skip_annotations
:
484 logging
.info("\t%s", a
)
487 checkout_java_acc(args
.force_download
)
488 log_java_acc_version()
491 scratch_dir
= get_scratch_dir()
492 src_dir
= os
.path
.join(scratch_dir
, "src")
493 dst_dir
= os
.path
.join(scratch_dir
, "dst")
496 logging
.info("Skipping cleaning the scratch directory")
498 clean_scratch_dir(scratch_dir
)
499 # Check out the src and dst source trees.
500 checkout_java_tree(get_git_hash(src_rev
), src_dir
)
501 checkout_java_tree(get_git_hash(dst_rev
), dst_dir
)
503 # Run the build in each.
505 logging
.info("Skipping the build")
507 build_tree(src_dir
, args
.verbose
)
508 build_tree(dst_dir
, args
.verbose
)
511 src_jars
= find_jars(src_dir
)
512 dst_jars
= find_jars(dst_dir
)
515 src_jars
= filter_jars(src_jars
, include_filters
, exclude_filters
)
516 dst_jars
= filter_jars(dst_jars
, include_filters
, exclude_filters
)
518 if not src_jars
or not dst_jars
:
519 logging
.error("No JARs found! Are your filters too strong?")
522 output
= run_java_acc(src_rev
, src_jars
, dst_rev
,
523 dst_jars
, args
.annotations
, skip_annotations
,
524 get_repo_name(args
.remote_name
))
525 sys
.exit(compare_results(output
, known_problems
,
526 args
.compare_warnings
))
529 if __name__
== "__main__":