r2/check-code

   1 #!/usr/bin/python
   2 # The contents of this file are subject to the Common Public Attribution
   3 # License Version 1.0. (the "License"); you may not use this file except in
   4 # compliance with the License. You may obtain a copy of the License at
   5 # http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
   6 # License Version 1.1, but Sections 14 and 15 have been added to cover use of
   7 # software over a computer network and provide for limited attribution for the
   8 # Original Developer. In addition, Exhibit A has been modified to be consistent
   9 # with Exhibit B.
  10 #
  11 # Software distributed under the License is distributed on an "AS IS" basis,
  12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
  13 # the specific language governing rights and limitations under the License.
  14 #
  15 # The Original Code is reddit.
  16 #
  17 # The Original Developer is the Initial Developer.  The Initial Developer of
  18 # the Original Code is reddit Inc.
  19 #
  20 # All portions of the code written by reddit are Copyright (c) 2006-2015 reddit
  21 # Inc. All Rights Reserved.
  22 ###############################################################################
  23 """Check for new style guide violations in the current branch.
  24
  25 This script is meant to be used in a CI process to ensure that new changes
  26 do not violate PEP-8, PEP-257, or any of the validity checks of pyflakes.
  27
  28 """
  29 import argparse
  30 import collections
  31 import difflib
  32 import logging
  33 import lxml.etree as etree
  34 import os
  35 import re
  36 import subprocess
  37 import sys
  38
  39
  40 DEVNULL = open("/dev/null", "w")
  41 TOOLS = collections.OrderedDict((
  42     ('pep8', ["pep8", "--repeat"]),
  43     ('pep257', ["pep257"]),
  44     ('pyflakes', ["pyflakes"]),
  45 ))
  46
  47
  48 # Match *.py and *.pyx
  49 PYFILE = re.compile(r".*\.pyx?$")
  50
  51
  52 def assert_tools_available():
  53     """Check if the external binaries needed are available or exit."""
  54     for tool in TOOLS.values():
  55         binary = tool[0]
  56         try:
  57             subprocess.check_call(["which", binary], stdout=DEVNULL)
  58         except subprocess.CalledProcessError:
  59             logging.error("command %r not found. please install it!", binary)
  60             sys.exit(1)
  61
  62
  63 def assert_not_dirty():
  64     """Check if there are uncommitted changes in the repo and exit if so."""
  65     try:
  66         subprocess.check_call(["git", "diff",
  67                                "--no-ext-diff", "--quiet", "--exit-code"])
  68     except subprocess.CalledProcessError:
  69         logging.error("you have uncommitted changes. please commit them!")
  70         sys.exit(1)
  71
  72
  73 def _parse_ref(ref):
  74     """Return the result of git rev-parse on the given ref."""
  75     ref = subprocess.check_output(["git", "rev-parse", ref])
  76     return ref.strip()
  77
  78
  79 def get_current_ref():
  80     """Return the most descriptive name possible of the current HEAD."""
  81     try:
  82         ref = subprocess.check_output(["git", "symbolic-ref", "HEAD"]).strip()
  83         return ref[len("refs/heads/"):]
  84     except subprocess.CalledProcessError:
  85         return _parse_ref("HEAD")
  86
  87
  88 def get_upstream_ref():
  89     """Return the ref that this topic branch is based on."""
  90     return _parse_ref("master@{upstream}")
  91
  92
  93 def get_merge_base():
  94     upstream = get_upstream_ref()
  95     current = get_current_ref()
  96     output = subprocess.check_output(['git', 'merge-base', upstream, current])
  97     return output.strip()
  98
  99
 100 def get_root():
 101     """Return the root directory of this git project."""
 102     return os.path.dirname(_parse_ref("--git-dir"))
 103
 104
 105 def check_ref_out(ref):
 106     """Ask git to check out the specified ref."""
 107     try:
 108         subprocess.check_call(
 109             ["git", "checkout", ref],
 110             stdout=DEVNULL,
 111             stderr=DEVNULL,
 112         )
 113     except subprocess.CalledProcessError:
 114         logging.error("failed to check out %s", ref)
 115         sys.exit(1)
 116
 117
 118 def walk_workspace():
 119     root = get_root()
 120     files = subprocess.check_output(['git', 'ls-files', '--full-name',
 121                                      '--', root])
 122     for filename in files.splitlines():
 123         yield os.path.join(root, filename)
 124
 125
 126 def select_files(files):
 127     for f in files:
 128         if re.match(PYFILE, f):
 129             yield f
 130         else:
 131             try:
 132                 with open(f) as f_:
 133                     first = f_.readline()
 134                     if first.startswith('#!') and 'python' in first:
 135                         yield f
 136             except (IOError, OSError):
 137                 logging.exception("Unable to check-code against %s", f)
 138
 139
 140 def extract_errtype(violation, tool):
 141     """Based on a line of `tool`'s output, return the kind of infraction.
 142
 143     Mostly relevant for pep8, which has various kinds of infractions,
 144     such as E501 for line too long.
 145
 146     """
 147     if tool == 'pep8':
 148         # E501 line too long (91 characters)
 149         errtype, sep, message = violation.partition(" ")
 150         if not sep:
 151             errtype = 'PEP8'
 152     elif tool == 'pep257':
 153         errtype = 'PEP257'
 154     elif tool == 'pyflakes':
 155         errtype = 'pyflakes'
 156     return errtype
 157
 158
 159 def make_test_class(tool, filepath):
 160     no_ext, ext = os.path.splitext(filepath)
 161     test_class_suffix = no_ext.replace(os.path.sep, '_')
 162     return tool + '.' + test_class_suffix
 163
 164
 165 def extract_line_info(reportline, filepath, tool):
 166     if tool == 'pep257' and reportline.startswith('Note: checks'):
 167         return None
 168     file_info, sep, violation = reportline.partition(": ")
 169     if not sep:
 170         return None
 171     file_info = file_info.split(":")
 172     if len(file_info) < 2:
 173         logging.warn("I don't understand this report line: %r", reportline)
 174         line_num = ''
 175     else:
 176         line_num = file_info[1]
 177     report_entry = {
 178         'file': filepath,
 179         'test_class': make_test_class(tool, filepath),
 180         'line': line_num,
 181         'violation': violation,
 182         'errtype': extract_errtype(violation, tool),
 183         'tool': tool,
 184     }
 185     return report_entry
 186
 187
 188 def generate_report(toolname, files=None):
 189     if not files:
 190         files = walk_workspace()
 191
 192     report = []
 193     for filepath in select_files(files):
 194         command = TOOLS[toolname] + [filepath]
 195         logging.info(" ".join(command))
 196         process = subprocess.Popen(
 197             command,
 198             stdout=subprocess.PIPE,
 199             stderr=subprocess.STDOUT,
 200         )
 201
 202         lines = process.communicate()[0].splitlines()
 203         ws_root = get_root()
 204         ws_filepath = os.path.relpath(filepath, ws_root)
 205         for line in lines:
 206             line = extract_line_info(line, ws_filepath, toolname)
 207             if line:
 208                 report.append(line)
 209     return report
 210
 211
 212 def generate_all_reports(ref=None, files=None):
 213     """Run the tools on the specified files and return errors / warnings."""
 214     if ref:
 215         check_ref_out(ref)
 216
 217     report = collections.OrderedDict.fromkeys(TOOLS.keys())
 218     for tool in TOOLS:
 219         report[tool] = generate_report(tool, files)
 220
 221     return report
 222
 223
 224 def get_changed_files(old_ref, new_ref):
 225     """Return a list of files that have changed from one ref to another."""
 226     root = get_root()
 227     changed_files_text = subprocess.check_output(["git", "diff", "--name-only",
 228                                                   old_ref, new_ref])
 229     changed_files = changed_files_text.splitlines()
 230     return [os.path.join(root, x) for x in changed_files]
 231
 232
 233 def diffable(report):
 234     """Convert the report to a list of lines that are reasonably 'diffable'.
 235
 236     That is, standard diff tools should be able to identify new or fixed
 237     violations by comparing results of this function
 238
 239     """
 240     updated = []
 241     for toolname, violations in report.iteritems():
 242         updated.append(toolname)
 243         updated.extend('%(file)s %(violation)s' % v for v in violations)
 244         updated.append('')
 245     return updated
 246
 247
 248 def human(report):
 249     """Convert the report to a list of human useful lines."""
 250     updated = []
 251     for toolname, violations in report.iteritems():
 252         updated.append(toolname)
 253         updated.extend('%(file)s:%(line)s %(violation)s' % v
 254                        for v in violations)
 255         updated.append('')
 256     return updated
 257
 258
 259 def junitize(report):
 260     """Convert the report into JUnit style XML.
 261
 262     This allows the report to be consumed by tools that consume JUnit reports
 263
 264     The style used here is: each file is a <testsuite>; each violation
 265     will be a <testcase> (always failed) whose "classname" shall be the tool
 266     used (e.g., pep8) and "name" shall be the type of violation and line
 267     number. Any additional information shall be included as the <failure>
 268     message.
 269
 270     """
 271     by_file = {}
 272     for violations in report.itervalues():
 273         for violation in violations:
 274             file_errors = by_file.setdefault(violation['file'], [])
 275             file_errors.append(violation)
 276     violations = etree.Element("testsuites")
 277     for filename in by_file:
 278         file_errs = etree.SubElement(violations, "testsuite")
 279         for violation in by_file[filename]:
 280             entry = etree.SubElement(file_errs, "testcase")
 281             entry.attrib['classname'] = violation['test_class']
 282             entry.attrib['name'] = violation['line']
 283             error_info = etree.SubElement(entry, "failure")
 284             error_info.attrib['message'] = violation['violation']
 285             error_info.attrib['type'] = violation['errtype']
 286     return violations
 287
 288
 289 def make_errname(violation):
 290     """Create a unique "test name" for this violation."""
 291     name = '.'.join((violation['errtype'], violation['line']))
 292     return name
 293
 294
 295 def diff_report(options):
 296     if options.check_dirty:
 297         assert_not_dirty()
 298
 299     current_ref = get_current_ref()
 300     base_ref = get_merge_base()
 301     if options.files:
 302         files = options.files
 303     else:
 304         files = get_changed_files(base_ref, current_ref)
 305         logging.debug("files changed: %r", files)
 306
 307     try:
 308         new_report = diffable(generate_all_reports(current_ref, files))
 309         logging.debug("new report:\n%r", new_report)
 310         old_report = diffable(generate_all_reports(base_ref, files))
 311         logging.debug("old report:\n%r", old_report)
 312     finally:
 313         check_ref_out(current_ref)
 314
 315     return difflib.unified_diff(old_report, new_report)
 316
 317
 318 def regression_report(options):
 319     added, removed = 0, 0
 320     for line in diff_report(options):
 321         line = line.strip()
 322         if line == "+++" or line == "---":
 323             continue
 324         if line.startswith("+"):
 325             added += 1
 326         elif line.startswith("-"):
 327             removed += 1
 328
 329     if added:
 330         print >> options.out, "added %d issues" % added
 331     if removed:
 332         print >> options.out, "removed %d issues!" % removed
 333
 334     return 1 if added else 0
 335
 336
 337 def junit_report(options):
 338     report = generate_all_reports(files=options.files)
 339     junit = junitize(report)
 340     print >> options.out, etree.tostring(junit, pretty_print=True)
 341
 342
 343 def human_report(options):
 344     if options.full:
 345         report = human(generate_all_reports(files=options.files))
 346     else:
 347         files = (options.files or
 348                  get_changed_files(get_merge_base(), get_current_ref()))
 349         logging.debug("changed files: %r", files)
 350         report = human(generate_all_reports(files=files))
 351     for line in report:
 352         print >> options.out, line
 353
 354
 355 def parse_args(args):
 356     parser = argparse.ArgumentParser(description="Report on python problems")
 357     parser.add_argument('--dirty', dest='check_dirty', action='store_false',
 358                         help="Skip the dirty workspace check.")
 359     parser.add_argument('-O', dest='out', type=argparse.FileType('w'),
 360                         default=sys.stdout, help="Write the report to OUT"
 361                         " instead of stdout.")
 362     parser.add_argument('--verbose', '-v', action='count', dest='verbosity',
 363                         help="Show verbose reporting messages.",
 364                         default=0)
 365     parser.add_argument('--quiet', '-q', action='count', default=0,
 366                         help="Reduce verbosity")
 367     parser.add_argument('--full', action='store_true', help="When generating"
 368                         " a {report}, show all files, not just changed ones.")
 369     parser.add_argument('report', choices=('junit', 'regression', 'report'))
 370     parser.add_argument('files', nargs='*', metavar='FILE')
 371     options = parser.parse_args(args)
 372     set_up_logging(options.verbosity - options.quiet)
 373     logging.debug("Options: %r", options)
 374     return options
 375
 376
 377 def set_up_logging(verbosity):
 378     levels = {-2: logging.ERROR, -1: logging.WARN, 0: logging.INFO,
 379               1: logging.DEBUG}
 380     max_level = max(levels.keys())
 381     min_level = min(levels.keys())
 382     verbosity = min(verbosity, max_level)
 383     verbosity = max(verbosity, min_level)
 384     level = levels[verbosity]
 385     format_ = '%(levelname)s %(message)s'
 386     logging.basicConfig(level=level, format=format_)
 387
 388
 389 def main():
 390     options = parse_args(sys.argv[1:])
 391     if options.report == 'regression':
 392         command = regression_report
 393     elif options.report == 'junit':
 394         command = junit_report
 395     elif options.report == 'report':
 396         command = human_report
 397     assert_tools_available()
 398     sys.exit(command(options))
 399
 400 if __name__ == "__main__":
 401     main()