Change DtmfSenderHandler to handle events on the signaling thread.
[chromium-blink-merge.git] / android_webview / tools / copyright_scanner.py
blob073c100e866e4d460bde6654c4486190ebec745b
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Utilities for scanning source files to determine code authorship.
6 """
8 import itertools
11 def FindFiles(input_api, root_dir, start_paths_list, excluded_dirs_list):
12 """Similar to UNIX utility find(1), searches for files in the directories.
13 Automatically leaves out only source code files and excludes third_party
14 directories.
15 Args:
16 input_api: InputAPI, as in presubmit scripts.
17 root_dir: The root directory, to which all other paths are relative.
18 start_paths_list: The list of paths to start search from. Each path can
19 be a file or a directory.
20 excluded_dirs_list: The list of directories to skip.
21 Returns:
22 The list of source code files found, relative to |root_dir|.
23 """
24 excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d]
25 # Using a common pattern for third-partyies makes the ignore regexp shorter
26 excluded_dirs_list.append('third_party')
28 EXTRA_EXCLUDED_DIRS = [
29 # VCS dirs
30 '.git',
31 '.svn',
32 # Build output
33 'out/Debug',
34 'out/Release',
35 # 'Copyright' appears in license agreements
36 'chrome/app/resources',
37 # Quickoffice js files from internal src used on buildbots.
38 # crbug.com/350472.
39 'chrome/browser/resources/chromeos/quickoffice',
40 # This is a test output directory
41 'chrome/tools/test/reference_build',
42 # blink style copy right headers.
43 'content/shell/renderer/test_runner',
44 # blink style copy right headers.
45 'content/shell/tools/plugin',
46 # This is tests directory, doesn't exist in the snapshot
47 'content/test/data',
48 # This is a tests directory that doesn't exist in the shipped product.
49 'gin/test',
50 # This is a test output directory
51 'data/dom_perf',
52 # This is a tests directory that doesn't exist in the shipped product.
53 'tools/perf/page_sets',
54 'tools/perf/page_sets/tough_animation_cases',
55 # Histogram tools, doesn't exist in the snapshot
56 'tools/histograms',
57 # Swarming tools, doesn't exist in the snapshot
58 'tools/swarming_client',
59 # ARM sysroot, doesn't exist in the snapshot
60 'chrome/installer/linux/debian_wheezy_arm-sysroot',
61 # Old location (TODO(sbc): Remove this once it no longer exists on any bots)
62 'arm-sysroot',
63 # Data is not part of open source chromium, but are included on some bots.
64 'data',
65 # This is not part of open source chromium, but are included on some bots.
66 'skia/tools/clusterfuzz-data'
68 excluded_dirs_list.extend(EXTRA_EXCLUDED_DIRS)
70 dirs_blacklist = ['/' + d + '/' for d in excluded_dirs_list]
71 def IsBlacklistedDir(d):
72 for item in dirs_blacklist:
73 if item in d:
74 return True
75 return False
77 files_whitelist_re = input_api.re.compile(
78 r'\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)'
79 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?'
80 '|tex|mli?)$')
81 files = []
83 base_path_len = len(root_dir)
84 for path in start_paths_list:
85 full_path = input_api.os_path.join(root_dir, path)
86 if input_api.os_path.isfile(full_path):
87 if files_whitelist_re.search(path) and \
88 not IsBlacklistedDir(full_path[base_path_len:]): # Keep '/' prefix.
89 files.append(path)
90 else:
91 for dirpath, dirnames, filenames in input_api.os_walk(full_path):
92 # Remove excluded subdirs for faster scanning.
93 for item in dirnames[:]:
94 if IsBlacklistedDir(
95 input_api.os_path.join(dirpath, item)[base_path_len + 1:]):
96 dirnames.remove(item)
97 for filename in filenames:
98 filepath = \
99 input_api.os_path.join(dirpath, filename)[base_path_len + 1:]
100 if files_whitelist_re.search(filepath) and \
101 not IsBlacklistedDir(filepath):
102 files.append(filepath)
103 return files
106 class _GeneratedFilesDetector(object):
107 GENERATED_FILE = 'GENERATED FILE'
108 NO_COPYRIGHT = '*No copyright*'
110 def __init__(self, input_api):
111 self.python_multiline_string_double_re = \
112 input_api.re.compile(r'"""[^"]*(?:"""|$)', flags=input_api.re.MULTILINE)
113 self.python_multiline_string_single_re = \
114 input_api.re.compile(r"'''[^']*(?:'''|$)", flags=input_api.re.MULTILINE)
115 self.automatically_generated_re = input_api.re.compile(
116 r'(All changes made in this file will be lost'
117 '|DO NOT (EDIT|delete this file)'
118 '|Generated (at|automatically|data)'
119 '|Automatically generated'
120 '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=input_api.re.IGNORECASE)
122 def IsGeneratedFile(self, header):
123 header = header.upper()
124 if '"""' in header:
125 header = self.python_multiline_string_double_re.sub('', header)
126 if "'''" in header:
127 header = self.python_multiline_string_single_re.sub('', header)
128 # First do simple strings lookup to save time.
129 if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header:
130 return True
131 if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \
132 'GENERATED' in header:
133 return self.automatically_generated_re.search(header)
134 return False
137 class _CopyrightsScanner(object):
138 @staticmethod
139 def StaticInit(input_api):
140 _CopyrightsScanner._c_comment_re = \
141 input_api.re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''')
142 _CopyrightsScanner._copyright_indicator = \
143 r'(?:copyright|copr\.|\xc2\xa9|\(c\))'
144 _CopyrightsScanner._full_copyright_indicator_re = input_api.re.compile(
145 r'(?:\W|^)' + _CopyrightsScanner._copyright_indicator + \
146 r'(?::\s*|\s+)(\w.*)$', input_api.re.IGNORECASE)
147 _CopyrightsScanner._copyright_disindicator_re = input_api.re.compile(
148 r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', input_api.re.IGNORECASE)
150 def __init__(self, input_api):
151 self.max_line_numbers_proximity = 3
152 self.last_a_item_line_number = -200
153 self.last_b_item_line_number = -100
154 self.re = input_api.re
156 def _CloseLineNumbers(self, a, b):
157 return 0 <= a - b <= self.max_line_numbers_proximity
159 def MatchLine(self, line_number, line):
160 if '"' in line:
161 line = _CopyrightsScanner._c_comment_re.sub('', line)
162 upcase_line = line.upper()
163 # Record '(a)' and '(b)' last occurences in C++ comments.
164 # This is to filter out '(c)' used as a list item inside C++ comments.
165 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
166 cpp_comment_idx = upcase_line.find('//')
167 if cpp_comment_idx != -1:
168 if upcase_line.find('(A)') > cpp_comment_idx:
169 self.last_a_item_line_number = line_number
170 if upcase_line.find('(B)') > cpp_comment_idx:
171 self.last_b_item_line_number = line_number
172 # Fast bailout, uses the same patterns as _copyright_indicator regexp.
173 if not 'COPYRIGHT' in upcase_line and not 'COPR.' in upcase_line \
174 and not '\xc2\xa9' in upcase_line:
175 c_item_index = upcase_line.find('(C)')
176 if c_item_index == -1:
177 return None
178 if c_item_index > cpp_comment_idx and \
179 self._CloseLineNumbers(line_number,
180 self.last_b_item_line_number) and \
181 self._CloseLineNumbers(self.last_b_item_line_number,
182 self.last_a_item_line_number):
183 return None
184 copyr = None
185 m = _CopyrightsScanner._full_copyright_indicator_re.search(line)
186 if m and \
187 not _CopyrightsScanner._copyright_disindicator_re.match(m.group(1)):
188 copyr = m.group(0)
189 # Prettify the authorship string.
190 copyr = self.re.sub(r'([,.])?\s*$/', '', copyr)
191 copyr = self.re.sub(
192 _CopyrightsScanner._copyright_indicator, '', copyr, \
193 flags=self.re.IGNORECASE)
194 copyr = self.re.sub(r'^\s+', '', copyr)
195 copyr = self.re.sub(r'\s{2,}', ' ', copyr)
196 copyr = self.re.sub(r'\\@', '@', copyr)
197 return copyr
200 def FindCopyrights(input_api, root_dir, files_to_scan):
201 """Determines code autorship, and finds generated files.
202 Args:
203 input_api: InputAPI, as in presubmit scripts.
204 root_dir: The root directory, to which all other paths are relative.
205 files_to_scan: The list of file names to scan.
206 Returns:
207 The list of copyrights associated with each of the files given.
208 If the certain file is generated, the corresponding list consists a single
209 entry -- 'GENERATED_FILE' string. If the file has no copyright info,
210 the corresponding list contains 'NO_COPYRIGHT' string.
212 generated_files_detector = _GeneratedFilesDetector(input_api)
213 _CopyrightsScanner.StaticInit(input_api)
214 copyrights = []
215 for file_name in files_to_scan:
216 linenum = 0
217 header = []
218 file_copyrights = []
219 scanner = _CopyrightsScanner(input_api)
220 contents = input_api.ReadFile(
221 input_api.os_path.join(root_dir, file_name), 'r')
222 for l in contents.split('\n'):
223 linenum += 1
224 if linenum <= 25:
225 header.append(l)
226 c = scanner.MatchLine(linenum, l)
227 if c:
228 file_copyrights.append(c)
229 if generated_files_detector.IsGeneratedFile('\n'.join(header)):
230 copyrights.append([_GeneratedFilesDetector.GENERATED_FILE])
231 elif file_copyrights:
232 copyrights.append(file_copyrights)
233 else:
234 copyrights.append([_GeneratedFilesDetector.NO_COPYRIGHT])
235 return copyrights
238 def FindCopyrightViolations(input_api, root_dir, files_to_scan):
239 """Looks for files that are not belong exlusively to the Chromium Authors.
240 Args:
241 input_api: InputAPI, as in presubmit scripts.
242 root_dir: The root directory, to which all other paths are relative.
243 files_to_scan: The list of file names to scan.
244 Returns:
245 The list of file names that contain non-Chromium copyrights.
247 copyrights = FindCopyrights(input_api, root_dir, files_to_scan)
248 offending_files = []
249 allowed_copyrights_re = input_api.re.compile(
250 r'^(?:20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. '
251 'All rights reserved.*)$')
252 for f, cs in itertools.izip(files_to_scan, copyrights):
253 if cs[0] == _GeneratedFilesDetector.GENERATED_FILE or \
254 cs[0] == _GeneratedFilesDetector.NO_COPYRIGHT:
255 continue
256 for c in cs:
257 if not allowed_copyrights_re.match(c):
258 offending_files.append(input_api.os_path.normpath(f))
259 break
260 return offending_files
263 def _GetWhitelistFileName(input_api):
264 return input_api.os_path.join(
265 'android_webview', 'tools', 'third_party_files_whitelist.txt')
267 def _ProcessWhitelistedFilesList(input_api, lines):
268 whitelisted_files = []
269 for line in lines:
270 match = input_api.re.match(r'([^#\s]+)', line)
271 if match:
272 whitelisted_files.append(match.group(1))
273 return whitelisted_files
276 def LoadWhitelistedFilesList(input_api):
277 """Loads and parses the 3rd party code whitelist file.
278 input_api: InputAPI of presubmit scripts.
279 Returns:
280 The list of files.
282 full_file_name = input_api.os_path.join(
283 input_api.change.RepositoryRoot(), _GetWhitelistFileName(input_api))
284 file_data = input_api.ReadFile(full_file_name, 'rb')
285 return _ProcessWhitelistedFilesList(input_api, file_data.splitlines())
288 def AnalyzeScanResults(input_api, whitelisted_files, offending_files):
289 """Compares whitelist contents with the results of file scanning.
290 input_api: InputAPI of presubmit scripts.
291 whitelisted_files: Whitelisted files list.
292 offending_files: Files that contain 3rd party code.
293 Returns:
294 A triplet of "unknown", "missing", and "stale" file lists.
295 "Unknown" are files that contain 3rd party code but not whitelisted.
296 "Missing" are files that are whitelisted but doesn't really exist.
297 "Stale" are files that are whitelisted unnecessarily.
299 unknown = set(offending_files) - set(whitelisted_files)
300 missing = [f for f in whitelisted_files if not input_api.os_path.isfile(f)]
301 stale = set(whitelisted_files) - set(offending_files) - set(missing)
302 return (list(unknown), missing, list(stale))
305 def _GetDeletedContents(affected_file):
306 """Returns a list of all deleted lines.
307 AffectedFile class from presubmit_support is lacking this functionality.
309 deleted_lines = []
310 for line in affected_file.GenerateScmDiff().splitlines():
311 if line.startswith('-') and not line.startswith('--'):
312 deleted_lines.append(line[1:])
313 return deleted_lines
315 def _DoScanAtPresubmit(input_api, whitelisted_files, files_to_check):
316 # We pass empty 'known third-party' dirs list here. Since this is a patch
317 # for the Chromium's src tree, it must contain properly licensed Chromium
318 # code. Any third-party code must be put into a directory named 'third_party',
319 # and such dirs are automatically excluded by FindFiles.
320 files_to_scan = FindFiles(
321 input_api, input_api.change.RepositoryRoot(), files_to_check, [])
322 offending_files = FindCopyrightViolations(
323 input_api, input_api.change.RepositoryRoot(), files_to_scan)
324 return AnalyzeScanResults(
325 input_api, whitelisted_files, offending_files)
327 def ScanAtPresubmit(input_api, output_api):
328 """Invoked at change presubmit time. Verifies that updated non third-party
329 code doesn't contain external copyrighted code.
330 input_api: InputAPI of presubmit scripts.
331 output_api: OutputAPI of presubmit scripts.
333 files_to_check = set([])
334 deleted_files = set([])
335 whitelist_contents_changed = False
336 for f in input_api.AffectedFiles():
337 if f.LocalPath() == _GetWhitelistFileName(input_api):
338 whitelist_contents_changed = True
339 deleted_files |= set(_ProcessWhitelistedFilesList(
340 input_api, _GetDeletedContents(f)))
341 continue
342 if f.Action() != 'D':
343 files_to_check.add(f.LocalPath())
344 else:
345 deleted_files.add(f.LocalPath())
346 whitelisted_files = set(LoadWhitelistedFilesList(input_api))
347 if not whitelist_contents_changed:
348 whitelisted_files &= files_to_check | deleted_files
349 else:
350 # Need to re-check the entire contents of the whitelist file.
351 # Also add files removed from the whitelist. If the file has indeed been
352 # deleted, the scanner will not complain.
353 files_to_check |= whitelisted_files | deleted_files
355 (unknown_files, missing_files, stale_files) = _DoScanAtPresubmit(
356 input_api, list(whitelisted_files), list(files_to_check))
357 results = []
358 if unknown_files:
359 results.append(output_api.PresubmitError(
360 'The following files contain a third-party license but are not in ' \
361 'a listed third-party directory and are not whitelisted. You must ' \
362 'add the following files to the whitelist file ' \
363 '%s:' % _GetWhitelistFileName(input_api),
364 sorted(unknown_files)))
365 if missing_files:
366 results.append(output_api.PresubmitPromptWarning(
367 'The following files are whitelisted in %s, ' \
368 'but do not exist or not files:' % _GetWhitelistFileName(input_api),
369 sorted(missing_files)))
370 if stale_files:
371 results.append(output_api.PresubmitPromptWarning(
372 'The following files are whitelisted unnecessarily. You must ' \
373 'remove the following files from the whitelist file ' \
374 '%s:' % _GetWhitelistFileName(input_api),
375 sorted(stale_files)))
376 return results