Update README for archival
[reddit.git] / r2 / check-code
blobe3fdfcfce00b03723bac33e6979189f66f79118b
1 #!/usr/bin/python
2 # The contents of this file are subject to the Common Public Attribution
3 # License Version 1.0. (the "License"); you may not use this file except in
4 # compliance with the License. You may obtain a copy of the License at
5 # http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
6 # License Version 1.1, but Sections 14 and 15 have been added to cover use of
7 # software over a computer network and provide for limited attribution for the
8 # Original Developer. In addition, Exhibit A has been modified to be consistent
9 # with Exhibit B.
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
13 # the specific language governing rights and limitations under the License.
15 # The Original Code is reddit.
17 # The Original Developer is the Initial Developer. The Initial Developer of
18 # the Original Code is reddit Inc.
20 # All portions of the code written by reddit are Copyright (c) 2006-2015 reddit
21 # Inc. All Rights Reserved.
22 ###############################################################################
23 """Check for new style guide violations in the current branch.
25 This script is meant to be used in a CI process to ensure that new changes
26 do not violate PEP-8, PEP-257, or any of the validity checks of pyflakes.
28 """
29 import argparse
30 import collections
31 import difflib
32 import logging
33 import lxml.etree as etree
34 import os
35 import re
36 import subprocess
37 import sys
40 DEVNULL = open("/dev/null", "w")
41 TOOLS = collections.OrderedDict((
42 ('pep8', ["pep8", "--repeat"]),
43 ('pep257', ["pep257"]),
44 ('pyflakes', ["pyflakes"]),
48 # Match *.py and *.pyx
49 PYFILE = re.compile(r".*\.pyx?$")
52 def assert_tools_available():
53 """Check if the external binaries needed are available or exit."""
54 for tool in TOOLS.values():
55 binary = tool[0]
56 try:
57 subprocess.check_call(["which", binary], stdout=DEVNULL)
58 except subprocess.CalledProcessError:
59 logging.error("command %r not found. please install it!", binary)
60 sys.exit(1)
63 def assert_not_dirty():
64 """Check if there are uncommitted changes in the repo and exit if so."""
65 try:
66 subprocess.check_call(["git", "diff",
67 "--no-ext-diff", "--quiet", "--exit-code"])
68 except subprocess.CalledProcessError:
69 logging.error("you have uncommitted changes. please commit them!")
70 sys.exit(1)
73 def _parse_ref(ref):
74 """Return the result of git rev-parse on the given ref."""
75 ref = subprocess.check_output(["git", "rev-parse", ref])
76 return ref.strip()
79 def get_current_ref():
80 """Return the most descriptive name possible of the current HEAD."""
81 try:
82 ref = subprocess.check_output(["git", "symbolic-ref", "HEAD"]).strip()
83 return ref[len("refs/heads/"):]
84 except subprocess.CalledProcessError:
85 return _parse_ref("HEAD")
88 def get_upstream_ref():
89 """Return the ref that this topic branch is based on."""
90 return _parse_ref("master@{upstream}")
93 def get_merge_base():
94 upstream = get_upstream_ref()
95 current = get_current_ref()
96 output = subprocess.check_output(['git', 'merge-base', upstream, current])
97 return output.strip()
100 def get_root():
101 """Return the root directory of this git project."""
102 return os.path.dirname(_parse_ref("--git-dir"))
105 def check_ref_out(ref):
106 """Ask git to check out the specified ref."""
107 try:
108 subprocess.check_call(
109 ["git", "checkout", ref],
110 stdout=DEVNULL,
111 stderr=DEVNULL,
113 except subprocess.CalledProcessError:
114 logging.error("failed to check out %s", ref)
115 sys.exit(1)
118 def walk_workspace():
119 root = get_root()
120 files = subprocess.check_output(['git', 'ls-files', '--full-name',
121 '--', root])
122 for filename in files.splitlines():
123 yield os.path.join(root, filename)
126 def select_files(files):
127 for f in files:
128 if re.match(PYFILE, f):
129 yield f
130 else:
131 try:
132 with open(f) as f_:
133 first = f_.readline()
134 if first.startswith('#!') and 'python' in first:
135 yield f
136 except (IOError, OSError):
137 logging.exception("Unable to check-code against %s", f)
140 def extract_errtype(violation, tool):
141 """Based on a line of `tool`'s output, return the kind of infraction.
143 Mostly relevant for pep8, which has various kinds of infractions,
144 such as E501 for line too long.
147 if tool == 'pep8':
148 # E501 line too long (91 characters)
149 errtype, sep, message = violation.partition(" ")
150 if not sep:
151 errtype = 'PEP8'
152 elif tool == 'pep257':
153 errtype = 'PEP257'
154 elif tool == 'pyflakes':
155 errtype = 'pyflakes'
156 return errtype
159 def make_test_class(tool, filepath):
160 no_ext, ext = os.path.splitext(filepath)
161 test_class_suffix = no_ext.replace(os.path.sep, '_')
162 return tool + '.' + test_class_suffix
165 def extract_line_info(reportline, filepath, tool):
166 if tool == 'pep257' and reportline.startswith('Note: checks'):
167 return None
168 file_info, sep, violation = reportline.partition(": ")
169 if not sep:
170 return None
171 file_info = file_info.split(":")
172 if len(file_info) < 2:
173 logging.warn("I don't understand this report line: %r", reportline)
174 line_num = ''
175 else:
176 line_num = file_info[1]
177 report_entry = {
178 'file': filepath,
179 'test_class': make_test_class(tool, filepath),
180 'line': line_num,
181 'violation': violation,
182 'errtype': extract_errtype(violation, tool),
183 'tool': tool,
185 return report_entry
188 def generate_report(toolname, files=None):
189 if not files:
190 files = walk_workspace()
192 report = []
193 for filepath in select_files(files):
194 command = TOOLS[toolname] + [filepath]
195 logging.info(" ".join(command))
196 process = subprocess.Popen(
197 command,
198 stdout=subprocess.PIPE,
199 stderr=subprocess.STDOUT,
202 lines = process.communicate()[0].splitlines()
203 ws_root = get_root()
204 ws_filepath = os.path.relpath(filepath, ws_root)
205 for line in lines:
206 line = extract_line_info(line, ws_filepath, toolname)
207 if line:
208 report.append(line)
209 return report
212 def generate_all_reports(ref=None, files=None):
213 """Run the tools on the specified files and return errors / warnings."""
214 if ref:
215 check_ref_out(ref)
217 report = collections.OrderedDict.fromkeys(TOOLS.keys())
218 for tool in TOOLS:
219 report[tool] = generate_report(tool, files)
221 return report
224 def get_changed_files(old_ref, new_ref):
225 """Return a list of files that have changed from one ref to another."""
226 root = get_root()
227 changed_files_text = subprocess.check_output(["git", "diff", "--name-only",
228 old_ref, new_ref])
229 changed_files = changed_files_text.splitlines()
230 return [os.path.join(root, x) for x in changed_files]
233 def diffable(report):
234 """Convert the report to a list of lines that are reasonably 'diffable'.
236 That is, standard diff tools should be able to identify new or fixed
237 violations by comparing results of this function
240 updated = []
241 for toolname, violations in report.iteritems():
242 updated.append(toolname)
243 updated.extend('%(file)s %(violation)s' % v for v in violations)
244 updated.append('')
245 return updated
248 def human(report):
249 """Convert the report to a list of human useful lines."""
250 updated = []
251 for toolname, violations in report.iteritems():
252 updated.append(toolname)
253 updated.extend('%(file)s:%(line)s %(violation)s' % v
254 for v in violations)
255 updated.append('')
256 return updated
259 def junitize(report):
260 """Convert the report into JUnit style XML.
262 This allows the report to be consumed by tools that consume JUnit reports
264 The style used here is: each file is a <testsuite>; each violation
265 will be a <testcase> (always failed) whose "classname" shall be the tool
266 used (e.g., pep8) and "name" shall be the type of violation and line
267 number. Any additional information shall be included as the <failure>
268 message.
271 by_file = {}
272 for violations in report.itervalues():
273 for violation in violations:
274 file_errors = by_file.setdefault(violation['file'], [])
275 file_errors.append(violation)
276 violations = etree.Element("testsuites")
277 for filename in by_file:
278 file_errs = etree.SubElement(violations, "testsuite")
279 for violation in by_file[filename]:
280 entry = etree.SubElement(file_errs, "testcase")
281 entry.attrib['classname'] = violation['test_class']
282 entry.attrib['name'] = violation['line']
283 error_info = etree.SubElement(entry, "failure")
284 error_info.attrib['message'] = violation['violation']
285 error_info.attrib['type'] = violation['errtype']
286 return violations
289 def make_errname(violation):
290 """Create a unique "test name" for this violation."""
291 name = '.'.join((violation['errtype'], violation['line']))
292 return name
295 def diff_report(options):
296 if options.check_dirty:
297 assert_not_dirty()
299 current_ref = get_current_ref()
300 base_ref = get_merge_base()
301 if options.files:
302 files = options.files
303 else:
304 files = get_changed_files(base_ref, current_ref)
305 logging.debug("files changed: %r", files)
307 try:
308 new_report = diffable(generate_all_reports(current_ref, files))
309 logging.debug("new report:\n%r", new_report)
310 old_report = diffable(generate_all_reports(base_ref, files))
311 logging.debug("old report:\n%r", old_report)
312 finally:
313 check_ref_out(current_ref)
315 return difflib.unified_diff(old_report, new_report)
318 def regression_report(options):
319 added, removed = 0, 0
320 for line in diff_report(options):
321 line = line.strip()
322 if line == "+++" or line == "---":
323 continue
324 if line.startswith("+"):
325 added += 1
326 elif line.startswith("-"):
327 removed += 1
329 if added:
330 print >> options.out, "added %d issues" % added
331 if removed:
332 print >> options.out, "removed %d issues!" % removed
334 return 1 if added else 0
337 def junit_report(options):
338 report = generate_all_reports(files=options.files)
339 junit = junitize(report)
340 print >> options.out, etree.tostring(junit, pretty_print=True)
343 def human_report(options):
344 if options.full:
345 report = human(generate_all_reports(files=options.files))
346 else:
347 files = (options.files or
348 get_changed_files(get_merge_base(), get_current_ref()))
349 logging.debug("changed files: %r", files)
350 report = human(generate_all_reports(files=files))
351 for line in report:
352 print >> options.out, line
355 def parse_args(args):
356 parser = argparse.ArgumentParser(description="Report on python problems")
357 parser.add_argument('--dirty', dest='check_dirty', action='store_false',
358 help="Skip the dirty workspace check.")
359 parser.add_argument('-O', dest='out', type=argparse.FileType('w'),
360 default=sys.stdout, help="Write the report to OUT"
361 " instead of stdout.")
362 parser.add_argument('--verbose', '-v', action='count', dest='verbosity',
363 help="Show verbose reporting messages.",
364 default=0)
365 parser.add_argument('--quiet', '-q', action='count', default=0,
366 help="Reduce verbosity")
367 parser.add_argument('--full', action='store_true', help="When generating"
368 " a {report}, show all files, not just changed ones.")
369 parser.add_argument('report', choices=('junit', 'regression', 'report'))
370 parser.add_argument('files', nargs='*', metavar='FILE')
371 options = parser.parse_args(args)
372 set_up_logging(options.verbosity - options.quiet)
373 logging.debug("Options: %r", options)
374 return options
377 def set_up_logging(verbosity):
378 levels = {-2: logging.ERROR, -1: logging.WARN, 0: logging.INFO,
379 1: logging.DEBUG}
380 max_level = max(levels.keys())
381 min_level = min(levels.keys())
382 verbosity = min(verbosity, max_level)
383 verbosity = max(verbosity, min_level)
384 level = levels[verbosity]
385 format_ = '%(levelname)s %(message)s'
386 logging.basicConfig(level=level, format=format_)
389 def main():
390 options = parse_args(sys.argv[1:])
391 if options.report == 'regression':
392 command = regression_report
393 elif options.report == 'junit':
394 command = junit_report
395 elif options.report == 'report':
396 command = human_report
397 assert_tools_available()
398 sys.exit(command(options))
400 if __name__ == "__main__":
401 main()