Supervised user whitelists: Cleanup
[chromium-blink-merge.git] / android_webview / tools / webview_licenses.py
blob05736f34bc7cd084f9bb92f2202706d84b13b5cd
1 #!/usr/bin/python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Checks third-party licenses for the purposes of the Android WebView build.
8 The Android tree includes a snapshot of Chromium in order to power the system
9 WebView. This tool checks that all code uses open-source licenses compatible
10 with Android, and that we meet the requirements of those licenses. It can also
11 be used to generate an Android NOTICE file for the third-party code.
13 It makes use of src/tools/licenses.py and the README.chromium files on which
14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
15 which whitelists indicidual files which contain third-party code but which
16 aren't in a third-party directory with a README.chromium file.
17 """
19 import glob
20 import imp
21 import json
22 import multiprocessing
23 import optparse
24 import os
25 import re
26 import sys
27 import textwrap
30 REPOSITORY_ROOT = os.path.abspath(os.path.join(
31 os.path.dirname(__file__), '..', '..'))
33 # Import third_party/PRESUBMIT.py via imp to avoid importing a random
34 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
35 sys.dont_write_bytecode = True
36 third_party = \
37 imp.load_source('PRESUBMIT', \
38 os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py'))
40 sys.path.append(os.path.join(REPOSITORY_ROOT, 'third_party'))
41 import jinja2
42 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
43 import licenses
45 import copyright_scanner
46 import known_issues
48 class InputApi(object):
49 def __init__(self):
50 self.os_path = os.path
51 self.os_walk = os.walk
52 self.re = re
53 self.ReadFile = _ReadFile
54 self.change = InputApiChange()
56 class InputApiChange(object):
57 def __init__(self):
58 self.RepositoryRoot = lambda: REPOSITORY_ROOT
61 def GetIncompatibleDirectories():
62 """Gets a list of third-party directories which use licenses incompatible
63 with Android. This is used by the snapshot tool.
64 Returns:
65 A list of directories.
66 """
68 result = []
69 for directory in _FindThirdPartyDirs():
70 if directory in known_issues.KNOWN_ISSUES:
71 result.append(directory)
72 continue
73 try:
74 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
75 require_license_file=False,
76 optional_keys=['License Android Compatible'])
77 except licenses.LicenseError as e:
78 print 'Got LicenseError while scanning ' + directory
79 raise
80 if metadata.get('License Android Compatible', 'no').upper() == 'YES':
81 continue
82 license = re.split(' [Ll]icenses?$', metadata['License'])[0]
83 if not third_party.LicenseIsCompatibleWithAndroid(InputApi(), license):
84 result.append(directory)
85 return result
87 def GetUnknownIncompatibleDirectories():
88 """Gets a list of third-party directories which use licenses incompatible
89 with Android which are not present in the known_issues.py file.
90 This is used by the AOSP bot.
91 Returns:
92 A list of directories.
93 """
94 incompatible_directories = frozenset(GetIncompatibleDirectories())
95 known_incompatible = []
96 input_api = InputApi()
97 for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems():
98 path = copyright_scanner.ForwardSlashesToOsPathSeps(input_api, path)
99 for exclude in exclude_list:
100 exclude = copyright_scanner.ForwardSlashesToOsPathSeps(input_api, exclude)
101 if glob.has_magic(exclude):
102 exclude_dirname = os.path.dirname(exclude)
103 if glob.has_magic(exclude_dirname):
104 print ('Exclude path %s contains an unexpected glob expression,' \
105 ' skipping.' % exclude)
106 exclude = exclude_dirname
107 known_incompatible.append(os.path.normpath(os.path.join(path, exclude)))
108 known_incompatible = frozenset(known_incompatible)
109 return incompatible_directories.difference(known_incompatible)
112 class ScanResult(object):
113 Ok, Warnings, Errors = range(3)
115 # Needs to be a top-level function for multiprocessing
116 def _FindCopyrightViolations(files_to_scan_as_string):
117 return copyright_scanner.FindCopyrightViolations(
118 InputApi(), REPOSITORY_ROOT, files_to_scan_as_string)
120 def _ShardList(l, shard_len):
121 return [l[i:i + shard_len] for i in range(0, len(l), shard_len)]
123 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
124 """Checks that all files which are not in a listed third-party directory,
125 and which do not use the standard Chromium license, are whitelisted.
126 Args:
127 excluded_dirs_list: The list of directories to exclude from scanning.
128 whitelisted_files: The whitelist of files.
129 Returns:
130 ScanResult.Ok if all files with non-standard license headers are whitelisted
131 and the whitelist contains no stale entries;
132 ScanResult.Warnings if there are stale entries;
133 ScanResult.Errors if new non-whitelisted entries found.
135 input_api = InputApi()
136 files_to_scan = copyright_scanner.FindFiles(
137 input_api, REPOSITORY_ROOT, ['.'], excluded_dirs_list)
138 sharded_files_to_scan = _ShardList(files_to_scan, 2000)
139 pool = multiprocessing.Pool()
140 offending_files_chunks = pool.map_async(
141 _FindCopyrightViolations, sharded_files_to_scan).get(999999)
142 pool.close()
143 pool.join()
144 # Flatten out the result
145 offending_files = \
146 [item for sublist in offending_files_chunks for item in sublist]
148 (unknown, missing, stale) = copyright_scanner.AnalyzeScanResults(
149 input_api, whitelisted_files, offending_files)
151 if unknown:
152 print 'The following files contain a third-party license but are not in ' \
153 'a listed third-party directory and are not whitelisted. You must ' \
154 'add the following files to the whitelist.\n%s' % \
155 '\n'.join(sorted(unknown))
156 if missing:
157 print 'The following files are whitelisted, but do not exist.\n%s' % \
158 '\n'.join(sorted(missing))
159 if stale:
160 print 'The following files are whitelisted unnecessarily. You must ' \
161 'remove the following files from the whitelist.\n%s' % \
162 '\n'.join(sorted(stale))
164 if unknown:
165 code = ScanResult.Errors
166 elif stale or missing:
167 code = ScanResult.Warnings
168 else:
169 code = ScanResult.Ok
171 problem_paths = sorted(set(unknown + missing + stale))
172 return (code, problem_paths)
175 def _ReadFile(full_path, mode='rU'):
176 """Reads a file from disk. This emulates presubmit InputApi.ReadFile func.
177 Args:
178 full_path: The path of the file to read.
179 Returns:
180 The contents of the file as a string.
183 with open(full_path, mode) as f:
184 return f.read()
187 def _ReadLocalFile(path, mode='rb'):
188 """Reads a file from disk.
189 Args:
190 path: The path of the file to read, relative to the root of the repository.
191 Returns:
192 The contents of the file as a string.
195 return _ReadFile(os.path.join(REPOSITORY_ROOT, path), mode)
198 def _FindThirdPartyDirs():
199 """Gets the list of third-party directories.
200 Returns:
201 The list of third-party directories.
204 # Please don't add here paths that have problems with license files,
205 # as they will end up included in Android WebView snapshot.
206 # Instead, add them into known_issues.py.
207 prune_paths = [
208 # Temporary until we figure out how not to check out quickoffice on the
209 # Android license check bot. Tracked in crbug.com/350472.
210 os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'),
211 # Placeholder directory, no third-party code.
212 os.path.join('third_party', 'adobe'),
213 # Apache 2.0 license. See
214 # https://code.google.com/p/chromium/issues/detail?id=140478.
215 os.path.join('third_party', 'bidichecker'),
216 # Isn't checked out on clients
217 os.path.join('third_party', 'gles2_conform'),
218 # The llvm-build doesn't exist for non-clang builder
219 os.path.join('third_party', 'llvm-build'),
220 # Binaries doesn't apply to android
221 os.path.join('third_party', 'widevine'),
222 # third_party directories in this tree aren't actually third party, but
223 # provide a way to shadow experimental buildfiles into those directories.
224 os.path.join('build', 'secondary'),
225 # Not shipped, Chromium code
226 os.path.join('tools', 'swarming_client'),
227 # Not shipped, only relates to Chrome for Android, but not to WebView
228 os.path.join('clank'),
229 # Bots only, is not a part of the build
230 os.path.join('isolate_deps_dir'),
232 third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
233 return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT)
236 def _Scan():
237 """Checks that license meta-data is present for all third-party code and
238 that all non third-party code doesn't contain external copyrighted code.
239 Returns:
240 ScanResult.Ok if everything is in order;
241 ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
242 entries)
243 ScanResult.Errors otherwise.
246 third_party_dirs = _FindThirdPartyDirs()
248 problem_paths = []
250 # First, check designated third-party directories using src/tools/licenses.py.
251 all_licenses_valid = True
252 for path in sorted(third_party_dirs):
253 try:
254 licenses.ParseDir(path, REPOSITORY_ROOT)
255 except licenses.LicenseError, e:
256 if not (path in known_issues.KNOWN_ISSUES):
257 print 'Got LicenseError "%s" while scanning %s' % (e, path)
258 problem_paths.append(path)
259 all_licenses_valid = False
261 # Second, check for non-standard license text.
262 whitelisted_files = copyright_scanner.LoadWhitelistedFilesList(InputApi())
263 licenses_check, more_problem_paths = _CheckLicenseHeaders(
264 third_party_dirs, whitelisted_files)
266 problem_paths.extend(more_problem_paths)
268 return (licenses_check if all_licenses_valid else ScanResult.Errors,
269 problem_paths)
272 class TemplateEntryGenerator(object):
273 def __init__(self):
274 self._generate_licenses_file_list_only = False
275 self._toc_index = 0
277 def SetGenerateLicensesFileListOnly(self, generate_licenses_file_list_only):
278 self._generate_licenses_file_list_only = generate_licenses_file_list_only
280 def _ReadFileGuessEncoding(self, name):
281 if self._generate_licenses_file_list_only:
282 return ''
283 contents = ''
284 with open(name, 'rb') as input_file:
285 contents = input_file.read()
286 try:
287 return contents.decode('utf8')
288 except UnicodeDecodeError:
289 pass
290 # If it's not UTF-8, it must be CP-1252. Fail otherwise.
291 return contents.decode('cp1252')
293 def MetadataToTemplateEntry(self, metadata):
294 self._toc_index += 1
295 return {
296 'name': metadata['Name'],
297 'url': metadata['URL'],
298 'license_file': metadata['License File'],
299 'license': self._ReadFileGuessEncoding(metadata['License File']),
300 'toc_href': 'entry' + str(self._toc_index),
304 def GenerateNoticeFile(generate_licenses_file_list_only=False):
305 """Generates the contents of an Android NOTICE file for the third-party code.
306 This is used by the snapshot tool.
307 Returns:
308 The contents of the NOTICE file.
311 generator = TemplateEntryGenerator()
312 generator.SetGenerateLicensesFileListOnly(generate_licenses_file_list_only)
313 # Start from Chromium's LICENSE file
314 entries = [generator.MetadataToTemplateEntry({
315 'Name': 'The Chromium Project',
316 'URL': 'http://www.chromium.org',
317 'License File': os.path.join(REPOSITORY_ROOT, 'LICENSE') })
320 third_party_dirs = _FindThirdPartyDirs()
321 # We provide attribution for all third-party directories.
322 # TODO(mnaganov): Limit this to only code used by the WebView binary.
323 for directory in sorted(third_party_dirs):
324 try:
325 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
326 require_license_file=False)
327 except licenses.LicenseError:
328 # Since this code is called during project files generation,
329 # we don't want to break the it. But we assume that release
330 # WebView apks are built using checkouts that pass
331 # 'webview_licenses.py scan' check, thus they don't contain
332 # projects with non-compatible licenses.
333 continue
334 license_file = metadata['License File']
335 if license_file and license_file != licenses.NOT_SHIPPED:
336 entries.append(generator.MetadataToTemplateEntry(metadata))
338 if generate_licenses_file_list_only:
339 return [entry['license_file'] for entry in entries]
340 else:
341 env = jinja2.Environment(
342 loader=jinja2.FileSystemLoader(os.path.dirname(__file__)),
343 extensions=['jinja2.ext.autoescape'])
344 template = env.get_template('licenses_notice.tmpl')
345 return template.render({ 'entries': entries }).encode('utf8')
348 def _ProcessIncompatibleResult(incompatible_directories):
349 if incompatible_directories:
350 print ("Incompatibly licensed directories found:\n" +
351 "\n".join(sorted(incompatible_directories)))
352 return ScanResult.Errors
353 return ScanResult.Ok
355 def main():
356 class FormatterWithNewLines(optparse.IndentedHelpFormatter):
357 def format_description(self, description):
358 paras = description.split('\n')
359 formatted_paras = [textwrap.fill(para, self.width) for para in paras]
360 return '\n'.join(formatted_paras) + '\n'
362 parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
363 usage='%prog [options]')
364 parser.add_option('--json', help='Path to JSON output file')
365 parser.description = (__doc__ +
366 '\nCommands:\n'
367 ' scan Check licenses.\n'
368 ' notice_deps Generate the list of dependencies for '
369 'Android NOTICE file.\n'
370 ' notice [file] Generate Android NOTICE file on '
371 'stdout or into |file|.\n'
372 ' incompatible_directories Scan for incompatibly'
373 ' licensed directories.\n'
374 ' all_incompatible_directories Scan for incompatibly'
375 ' licensed directories (even those in'
376 ' known_issues.py).\n'
377 ' display_copyrights Display autorship on the files'
378 ' using names provided via stdin.\n')
379 (options, args) = parser.parse_args()
380 if len(args) < 1:
381 parser.print_help()
382 return ScanResult.Errors
384 if args[0] == 'scan':
385 scan_result, problem_paths = _Scan()
386 if scan_result == ScanResult.Ok:
387 print 'OK!'
388 if options.json:
389 with open(options.json, 'w') as f:
390 json.dump(problem_paths, f)
391 return scan_result
392 elif args[0] == 'notice_deps':
393 # 'set' is used to eliminate duplicate references to the same license file.
394 print ' '.join(
395 sorted(set(GenerateNoticeFile(generate_licenses_file_list_only=True))))
396 return ScanResult.Ok
397 elif args[0] == 'notice':
398 notice_file_contents = GenerateNoticeFile()
399 if len(args) == 1:
400 print notice_file_contents
401 else:
402 with open(args[1], 'w') as output_file:
403 output_file.write(notice_file_contents)
404 return ScanResult.Ok
405 elif args[0] == 'incompatible_directories':
406 return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories())
407 elif args[0] == 'all_incompatible_directories':
408 return _ProcessIncompatibleResult(GetIncompatibleDirectories())
409 elif args[0] == 'display_copyrights':
410 files = sys.stdin.read().splitlines()
411 for f, c in \
412 zip(files, copyright_scanner.FindCopyrights(InputApi(), '.', files)):
413 print f, '\t', ' / '.join(sorted(c))
414 return ScanResult.Ok
415 parser.print_help()
416 return ScanResult.Errors
418 if __name__ == '__main__':
419 sys.exit(main())