Roll src/third_party/WebKit a452221:9ff6d11 (svn 202117:202119)
[chromium-blink-merge.git] / android_webview / tools / webview_licenses.py
blob87e55fc6419662c5f9cf4614379e8c2f95134419
1 #!/usr/bin/python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Checks third-party licenses for the purposes of the Android WebView build.
8 The Android tree includes a snapshot of Chromium in order to power the system
9 WebView. This tool checks that all code uses open-source licenses compatible
10 with Android, and that we meet the requirements of those licenses. It can also
11 be used to generate an Android NOTICE file for the third-party code.
13 It makes use of src/tools/licenses.py and the README.chromium files on which
14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
15 which whitelists indicidual files which contain third-party code but which
16 aren't in a third-party directory with a README.chromium file.
17 """
19 import imp
20 import json
21 import multiprocessing
22 import optparse
23 import os
24 import re
25 import sys
26 import textwrap
29 REPOSITORY_ROOT = os.path.abspath(os.path.join(
30 os.path.dirname(__file__), '..', '..'))
32 # Import third_party/PRESUBMIT.py via imp to avoid importing a random
33 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
34 sys.dont_write_bytecode = True
35 third_party = \
36 imp.load_source('PRESUBMIT', \
37 os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py'))
39 sys.path.append(os.path.join(REPOSITORY_ROOT, 'third_party'))
40 import jinja2
41 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
42 from copyright_scanner import copyright_scanner
43 import licenses
46 class InputApi(object):
47 def __init__(self):
48 self.os_path = os.path
49 self.os_walk = os.walk
50 self.re = re
51 self.ReadFile = _ReadFile
52 self.change = InputApiChange()
54 class InputApiChange(object):
55 def __init__(self):
56 self.RepositoryRoot = lambda: REPOSITORY_ROOT
58 class ScanResult(object):
59 Ok, Warnings, Errors = range(3)
61 # Needs to be a top-level function for multiprocessing
62 def _FindCopyrightViolations(files_to_scan_as_string):
63 return copyright_scanner.FindCopyrightViolations(
64 InputApi(), REPOSITORY_ROOT, files_to_scan_as_string)
66 def _ShardList(l, shard_len):
67 return [l[i:i + shard_len] for i in range(0, len(l), shard_len)]
69 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
70 """Checks that all files which are not in a listed third-party directory,
71 and which do not use the standard Chromium license, are whitelisted.
72 Args:
73 excluded_dirs_list: The list of directories to exclude from scanning.
74 whitelisted_files: The whitelist of files.
75 Returns:
76 ScanResult.Ok if all files with non-standard license headers are whitelisted
77 and the whitelist contains no stale entries;
78 ScanResult.Warnings if there are stale entries;
79 ScanResult.Errors if new non-whitelisted entries found.
80 """
81 input_api = InputApi()
82 files_to_scan = copyright_scanner.FindFiles(
83 input_api, REPOSITORY_ROOT, ['.'], excluded_dirs_list)
84 sharded_files_to_scan = _ShardList(files_to_scan, 2000)
85 pool = multiprocessing.Pool()
86 offending_files_chunks = pool.map_async(
87 _FindCopyrightViolations, sharded_files_to_scan).get(999999)
88 pool.close()
89 pool.join()
90 # Flatten out the result
91 offending_files = \
92 [item for sublist in offending_files_chunks for item in sublist]
94 (unknown, missing, stale) = copyright_scanner.AnalyzeScanResults(
95 input_api, whitelisted_files, offending_files)
97 if unknown:
98 print 'The following files contain a third-party license but are not in ' \
99 'a listed third-party directory and are not whitelisted. You must ' \
100 'add the following files to the whitelist.\n' \
101 '(Note that if the code you are adding does not actually contain ' \
102 'any third-party code, it may contain the word "copyright", which ' \
103 'should be masked out, e.g. by writing it as "copy-right")\n%s' % \
104 '\n'.join(sorted(unknown))
105 if missing:
106 print 'The following files are whitelisted, but do not exist.\n%s' % \
107 '\n'.join(sorted(missing))
108 if stale:
109 print 'The following files are whitelisted unnecessarily. You must ' \
110 'remove the following files from the whitelist.\n%s' % \
111 '\n'.join(sorted(stale))
113 if unknown:
114 code = ScanResult.Errors
115 elif stale or missing:
116 code = ScanResult.Warnings
117 else:
118 code = ScanResult.Ok
120 problem_paths = sorted(set(unknown + missing + stale))
121 return (code, problem_paths)
124 def _ReadFile(full_path, mode='rU'):
125 """Reads a file from disk. This emulates presubmit InputApi.ReadFile func.
126 Args:
127 full_path: The path of the file to read.
128 Returns:
129 The contents of the file as a string.
132 with open(full_path, mode) as f:
133 return f.read()
136 def _Scan():
137 """Checks that license meta-data is present for all third-party code and
138 that all non third-party code doesn't contain external copyrighted code.
139 Returns:
140 ScanResult.Ok if everything is in order;
141 ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
142 entries)
143 ScanResult.Errors otherwise.
146 third_party_dirs = licenses.FindThirdPartyDirsWithFiles(REPOSITORY_ROOT)
148 problem_paths = []
150 # First, check designated third-party directories using src/tools/licenses.py.
151 all_licenses_valid = True
152 for path in sorted(third_party_dirs):
153 try:
154 licenses.ParseDir(path, REPOSITORY_ROOT)
155 except licenses.LicenseError, e:
156 print 'Got LicenseError "%s" while scanning %s' % (e, path)
157 problem_paths.append(path)
158 all_licenses_valid = False
160 # Second, check for non-standard license text.
161 whitelisted_files = copyright_scanner.LoadWhitelistedFilesList(InputApi())
162 licenses_check, more_problem_paths = _CheckLicenseHeaders(
163 third_party_dirs, whitelisted_files)
165 problem_paths.extend(more_problem_paths)
167 return (licenses_check if all_licenses_valid else ScanResult.Errors,
168 problem_paths)
171 class TemplateEntryGenerator(object):
172 def __init__(self):
173 self._generate_licenses_file_list_only = False
174 self._toc_index = 0
176 def SetGenerateLicensesFileListOnly(self, generate_licenses_file_list_only):
177 self._generate_licenses_file_list_only = generate_licenses_file_list_only
179 def _ReadFileGuessEncoding(self, name):
180 if self._generate_licenses_file_list_only:
181 return ''
182 contents = ''
183 with open(name, 'rb') as input_file:
184 contents = input_file.read()
185 try:
186 return contents.decode('utf8')
187 except UnicodeDecodeError:
188 pass
189 # If it's not UTF-8, it must be CP-1252. Fail otherwise.
190 return contents.decode('cp1252')
192 def MetadataToTemplateEntry(self, metadata):
193 self._toc_index += 1
194 return {
195 'name': metadata['Name'],
196 'url': metadata['URL'],
197 'license_file': metadata['License File'],
198 'license': self._ReadFileGuessEncoding(metadata['License File']),
199 'toc_href': 'entry' + str(self._toc_index),
203 def GenerateNoticeFile(generate_licenses_file_list_only=False):
204 """Generates the contents of an Android NOTICE file for the third-party code.
205 This is used by the snapshot tool.
206 Returns:
207 The contents of the NOTICE file.
210 generator = TemplateEntryGenerator()
211 generator.SetGenerateLicensesFileListOnly(generate_licenses_file_list_only)
212 # Start from Chromium's LICENSE file
213 entries = [generator.MetadataToTemplateEntry({
214 'Name': 'The Chromium Project',
215 'URL': 'http://www.chromium.org',
216 'License File': os.path.join(REPOSITORY_ROOT, 'LICENSE') })
219 third_party_dirs = licenses.FindThirdPartyDirsWithFiles(REPOSITORY_ROOT)
220 # We provide attribution for all third-party directories.
221 # TODO(mnaganov): Limit this to only code used by the WebView binary.
222 for directory in sorted(third_party_dirs):
223 try:
224 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
225 require_license_file=False)
226 except licenses.LicenseError:
227 # Since this code is called during project files generation,
228 # we don't want to break the it. But we assume that release
229 # WebView apks are built using checkouts that pass
230 # 'webview_licenses.py scan' check, thus they don't contain
231 # projects with non-compatible licenses.
232 continue
233 license_file = metadata['License File']
234 if license_file and license_file != licenses.NOT_SHIPPED:
235 entries.append(generator.MetadataToTemplateEntry(metadata))
237 if generate_licenses_file_list_only:
238 return [entry['license_file'] for entry in entries]
239 else:
240 env = jinja2.Environment(
241 loader=jinja2.FileSystemLoader(os.path.dirname(__file__)),
242 extensions=['jinja2.ext.autoescape'])
243 template = env.get_template('licenses_notice.tmpl')
244 return template.render({ 'entries': entries }).encode('utf8')
247 def main():
248 class FormatterWithNewLines(optparse.IndentedHelpFormatter):
249 def format_description(self, description):
250 paras = description.split('\n')
251 formatted_paras = [textwrap.fill(para, self.width) for para in paras]
252 return '\n'.join(formatted_paras) + '\n'
254 parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
255 usage='%prog [options]')
256 parser.add_option('--json', help='Path to JSON output file')
257 parser.description = (__doc__ +
258 '\nCommands:\n'
259 ' scan Check licenses.\n'
260 ' notice_deps Generate the list of dependencies for '
261 'Android NOTICE file.\n'
262 ' notice [file] Generate Android NOTICE file on '
263 'stdout or into |file|.\n'
264 ' display_copyrights Display autorship on the files'
265 ' using names provided via stdin.\n')
266 (options, args) = parser.parse_args()
267 if len(args) < 1:
268 parser.print_help()
269 return ScanResult.Errors
271 if args[0] == 'scan':
272 scan_result, problem_paths = _Scan()
273 if scan_result == ScanResult.Ok:
274 print 'OK!'
275 if options.json:
276 with open(options.json, 'w') as f:
277 json.dump(problem_paths, f)
278 return scan_result
279 elif args[0] == 'notice_deps':
280 # 'set' is used to eliminate duplicate references to the same license file.
281 print ' '.join(
282 sorted(set(GenerateNoticeFile(generate_licenses_file_list_only=True))))
283 return ScanResult.Ok
284 elif args[0] == 'notice':
285 notice_file_contents = GenerateNoticeFile()
286 if len(args) == 1:
287 print notice_file_contents
288 else:
289 with open(args[1], 'w') as output_file:
290 output_file.write(notice_file_contents)
291 return ScanResult.Ok
292 elif args[0] == 'display_copyrights':
293 files = sys.stdin.read().splitlines()
294 for f, c in \
295 zip(files, copyright_scanner.FindCopyrights(InputApi(), '.', files)):
296 print f, '\t', ' / '.join(sorted(c))
297 return ScanResult.Ok
298 parser.print_help()
299 return ScanResult.Errors
301 if __name__ == '__main__':
302 sys.exit(main())