2 # The contents of this file are subject to the Common Public Attribution
3 # License Version 1.0. (the "License"); you may not use this file except in
4 # compliance with the License. You may obtain a copy of the License at
5 # http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
6 # License Version 1.1, but Sections 14 and 15 have been added to cover use of
7 # software over a computer network and provide for limited attribution for the
8 # Original Developer. In addition, Exhibit A has been modified to be consistent
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
13 # the specific language governing rights and limitations under the License.
15 # The Original Code is reddit.
17 # The Original Developer is the Initial Developer. The Initial Developer of
18 # the Original Code is reddit Inc.
20 # All portions of the code written by reddit are Copyright (c) 2006-2015 reddit
21 # Inc. All Rights Reserved.
22 ###############################################################################
23 """Check for new style guide violations in the current branch.
25 This script is meant to be used in a CI process to ensure that new changes
26 do not violate PEP-8, PEP-257, or any of the validity checks of pyflakes.
33 import lxml
.etree
as etree
40 DEVNULL
= open("/dev/null", "w")
41 TOOLS
= collections
.OrderedDict((
42 ('pep8', ["pep8", "--repeat"]),
43 ('pep257', ["pep257"]),
44 ('pyflakes', ["pyflakes"]),
48 # Match *.py and *.pyx
49 PYFILE
= re
.compile(r
".*\.pyx?$")
52 def assert_tools_available():
53 """Check if the external binaries needed are available or exit."""
54 for tool
in TOOLS
.values():
57 subprocess
.check_call(["which", binary
], stdout
=DEVNULL
)
58 except subprocess
.CalledProcessError
:
59 logging
.error("command %r not found. please install it!", binary
)
63 def assert_not_dirty():
64 """Check if there are uncommitted changes in the repo and exit if so."""
66 subprocess
.check_call(["git", "diff",
67 "--no-ext-diff", "--quiet", "--exit-code"])
68 except subprocess
.CalledProcessError
:
69 logging
.error("you have uncommitted changes. please commit them!")
74 """Return the result of git rev-parse on the given ref."""
75 ref
= subprocess
.check_output(["git", "rev-parse", ref
])
79 def get_current_ref():
80 """Return the most descriptive name possible of the current HEAD."""
82 ref
= subprocess
.check_output(["git", "symbolic-ref", "HEAD"]).strip()
83 return ref
[len("refs/heads/"):]
84 except subprocess
.CalledProcessError
:
85 return _parse_ref("HEAD")
88 def get_upstream_ref():
89 """Return the ref that this topic branch is based on."""
90 return _parse_ref("master@{upstream}")
94 upstream
= get_upstream_ref()
95 current
= get_current_ref()
96 output
= subprocess
.check_output(['git', 'merge-base', upstream
, current
])
101 """Return the root directory of this git project."""
102 return os
.path
.dirname(_parse_ref("--git-dir"))
105 def check_ref_out(ref
):
106 """Ask git to check out the specified ref."""
108 subprocess
.check_call(
109 ["git", "checkout", ref
],
113 except subprocess
.CalledProcessError
:
114 logging
.error("failed to check out %s", ref
)
118 def walk_workspace():
120 files
= subprocess
.check_output(['git', 'ls-files', '--full-name',
122 for filename
in files
.splitlines():
123 yield os
.path
.join(root
, filename
)
126 def select_files(files
):
128 if re
.match(PYFILE
, f
):
133 first
= f_
.readline()
134 if first
.startswith('#!') and 'python' in first
:
136 except (IOError, OSError):
137 logging
.exception("Unable to check-code against %s", f
)
140 def extract_errtype(violation
, tool
):
141 """Based on a line of `tool`'s output, return the kind of infraction.
143 Mostly relevant for pep8, which has various kinds of infractions,
144 such as E501 for line too long.
148 # E501 line too long (91 characters)
149 errtype
, sep
, message
= violation
.partition(" ")
152 elif tool
== 'pep257':
154 elif tool
== 'pyflakes':
159 def make_test_class(tool
, filepath
):
160 no_ext
, ext
= os
.path
.splitext(filepath
)
161 test_class_suffix
= no_ext
.replace(os
.path
.sep
, '_')
162 return tool
+ '.' + test_class_suffix
165 def extract_line_info(reportline
, filepath
, tool
):
166 if tool
== 'pep257' and reportline
.startswith('Note: checks'):
168 file_info
, sep
, violation
= reportline
.partition(": ")
171 file_info
= file_info
.split(":")
172 if len(file_info
) < 2:
173 logging
.warn("I don't understand this report line: %r", reportline
)
176 line_num
= file_info
[1]
179 'test_class': make_test_class(tool
, filepath
),
181 'violation': violation
,
182 'errtype': extract_errtype(violation
, tool
),
188 def generate_report(toolname
, files
=None):
190 files
= walk_workspace()
193 for filepath
in select_files(files
):
194 command
= TOOLS
[toolname
] + [filepath
]
195 logging
.info(" ".join(command
))
196 process
= subprocess
.Popen(
198 stdout
=subprocess
.PIPE
,
199 stderr
=subprocess
.STDOUT
,
202 lines
= process
.communicate()[0].splitlines()
204 ws_filepath
= os
.path
.relpath(filepath
, ws_root
)
206 line
= extract_line_info(line
, ws_filepath
, toolname
)
212 def generate_all_reports(ref
=None, files
=None):
213 """Run the tools on the specified files and return errors / warnings."""
217 report
= collections
.OrderedDict
.fromkeys(TOOLS
.keys())
219 report
[tool
] = generate_report(tool
, files
)
224 def get_changed_files(old_ref
, new_ref
):
225 """Return a list of files that have changed from one ref to another."""
227 changed_files_text
= subprocess
.check_output(["git", "diff", "--name-only",
229 changed_files
= changed_files_text
.splitlines()
230 return [os
.path
.join(root
, x
) for x
in changed_files
]
233 def diffable(report
):
234 """Convert the report to a list of lines that are reasonably 'diffable'.
236 That is, standard diff tools should be able to identify new or fixed
237 violations by comparing results of this function
241 for toolname
, violations
in report
.iteritems():
242 updated
.append(toolname
)
243 updated
.extend('%(file)s %(violation)s' % v
for v
in violations
)
249 """Convert the report to a list of human useful lines."""
251 for toolname
, violations
in report
.iteritems():
252 updated
.append(toolname
)
253 updated
.extend('%(file)s:%(line)s %(violation)s' % v
259 def junitize(report
):
260 """Convert the report into JUnit style XML.
262 This allows the report to be consumed by tools that consume JUnit reports
264 The style used here is: each file is a <testsuite>; each violation
265 will be a <testcase> (always failed) whose "classname" shall be the tool
266 used (e.g., pep8) and "name" shall be the type of violation and line
267 number. Any additional information shall be included as the <failure>
272 for violations
in report
.itervalues():
273 for violation
in violations
:
274 file_errors
= by_file
.setdefault(violation
['file'], [])
275 file_errors
.append(violation
)
276 violations
= etree
.Element("testsuites")
277 for filename
in by_file
:
278 file_errs
= etree
.SubElement(violations
, "testsuite")
279 for violation
in by_file
[filename
]:
280 entry
= etree
.SubElement(file_errs
, "testcase")
281 entry
.attrib
['classname'] = violation
['test_class']
282 entry
.attrib
['name'] = violation
['line']
283 error_info
= etree
.SubElement(entry
, "failure")
284 error_info
.attrib
['message'] = violation
['violation']
285 error_info
.attrib
['type'] = violation
['errtype']
289 def make_errname(violation
):
290 """Create a unique "test name" for this violation."""
291 name
= '.'.join((violation
['errtype'], violation
['line']))
295 def diff_report(options
):
296 if options
.check_dirty
:
299 current_ref
= get_current_ref()
300 base_ref
= get_merge_base()
302 files
= options
.files
304 files
= get_changed_files(base_ref
, current_ref
)
305 logging
.debug("files changed: %r", files
)
308 new_report
= diffable(generate_all_reports(current_ref
, files
))
309 logging
.debug("new report:\n%r", new_report
)
310 old_report
= diffable(generate_all_reports(base_ref
, files
))
311 logging
.debug("old report:\n%r", old_report
)
313 check_ref_out(current_ref
)
315 return difflib
.unified_diff(old_report
, new_report
)
318 def regression_report(options
):
319 added
, removed
= 0, 0
320 for line
in diff_report(options
):
322 if line
== "+++" or line
== "---":
324 if line
.startswith("+"):
326 elif line
.startswith("-"):
330 print >> options
.out
, "added %d issues" % added
332 print >> options
.out
, "removed %d issues!" % removed
334 return 1 if added
else 0
337 def junit_report(options
):
338 report
= generate_all_reports(files
=options
.files
)
339 junit
= junitize(report
)
340 print >> options
.out
, etree
.tostring(junit
, pretty_print
=True)
343 def human_report(options
):
345 report
= human(generate_all_reports(files
=options
.files
))
347 files
= (options
.files
or
348 get_changed_files(get_merge_base(), get_current_ref()))
349 logging
.debug("changed files: %r", files
)
350 report
= human(generate_all_reports(files
=files
))
352 print >> options
.out
, line
355 def parse_args(args
):
356 parser
= argparse
.ArgumentParser(description
="Report on python problems")
357 parser
.add_argument('--dirty', dest
='check_dirty', action
='store_false',
358 help="Skip the dirty workspace check.")
359 parser
.add_argument('-O', dest
='out', type=argparse
.FileType('w'),
360 default
=sys
.stdout
, help="Write the report to OUT"
361 " instead of stdout.")
362 parser
.add_argument('--verbose', '-v', action
='count', dest
='verbosity',
363 help="Show verbose reporting messages.",
365 parser
.add_argument('--quiet', '-q', action
='count', default
=0,
366 help="Reduce verbosity")
367 parser
.add_argument('--full', action
='store_true', help="When generating"
368 " a {report}, show all files, not just changed ones.")
369 parser
.add_argument('report', choices
=('junit', 'regression', 'report'))
370 parser
.add_argument('files', nargs
='*', metavar
='FILE')
371 options
= parser
.parse_args(args
)
372 set_up_logging(options
.verbosity
- options
.quiet
)
373 logging
.debug("Options: %r", options
)
377 def set_up_logging(verbosity
):
378 levels
= {-2: logging
.ERROR
, -1: logging
.WARN
, 0: logging
.INFO
,
380 max_level
= max(levels
.keys())
381 min_level
= min(levels
.keys())
382 verbosity
= min(verbosity
, max_level
)
383 verbosity
= max(verbosity
, min_level
)
384 level
= levels
[verbosity
]
385 format_
= '%(levelname)s %(message)s'
386 logging
.basicConfig(level
=level
, format
=format_
)
390 options
= parse_args(sys
.argv
[1:])
391 if options
.report
== 'regression':
392 command
= regression_report
393 elif options
.report
== 'junit':
394 command
= junit_report
395 elif options
.report
== 'report':
396 command
= human_report
397 assert_tools_available()
398 sys
.exit(command(options
))
400 if __name__
== "__main__":