2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Checks third-party licenses for the purposes of the Android WebView build.
8 The Android tree includes a snapshot of Chromium in order to power the system
9 WebView. This tool checks that all code uses open-source licenses compatible
10 with Android, and that we meet the requirements of those licenses. It can also
11 be used to generate an Android NOTICE file for the third-party code.
13 It makes use of src/tools/licenses.py and the README.chromium files on which
14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
15 which whitelists indicidual files which contain third-party code but which
16 aren't in a third-party directory with a README.chromium file.
21 import multiprocessing
29 REPOSITORY_ROOT
= os
.path
.abspath(os
.path
.join(
30 os
.path
.dirname(__file__
), '..', '..'))
32 # Import third_party/PRESUBMIT.py via imp to avoid importing a random
33 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
34 sys
.dont_write_bytecode
= True
36 imp
.load_source('PRESUBMIT', \
37 os
.path
.join(REPOSITORY_ROOT
, 'third_party', 'PRESUBMIT.py'))
39 sys
.path
.append(os
.path
.join(REPOSITORY_ROOT
, 'third_party'))
41 sys
.path
.append(os
.path
.join(REPOSITORY_ROOT
, 'tools'))
42 from copyright_scanner
import copyright_scanner
46 class InputApi(object):
48 self
.os_path
= os
.path
49 self
.os_walk
= os
.walk
51 self
.ReadFile
= _ReadFile
52 self
.change
= InputApiChange()
54 class InputApiChange(object):
56 self
.RepositoryRoot
= lambda: REPOSITORY_ROOT
58 class ScanResult(object):
59 Ok
, Warnings
, Errors
= range(3)
61 # Needs to be a top-level function for multiprocessing
62 def _FindCopyrightViolations(files_to_scan_as_string
):
63 return copyright_scanner
.FindCopyrightViolations(
64 InputApi(), REPOSITORY_ROOT
, files_to_scan_as_string
)
66 def _ShardList(l
, shard_len
):
67 return [l
[i
:i
+ shard_len
] for i
in range(0, len(l
), shard_len
)]
69 def _CheckLicenseHeaders(excluded_dirs_list
, whitelisted_files
):
70 """Checks that all files which are not in a listed third-party directory,
71 and which do not use the standard Chromium license, are whitelisted.
73 excluded_dirs_list: The list of directories to exclude from scanning.
74 whitelisted_files: The whitelist of files.
76 ScanResult.Ok if all files with non-standard license headers are whitelisted
77 and the whitelist contains no stale entries;
78 ScanResult.Warnings if there are stale entries;
79 ScanResult.Errors if new non-whitelisted entries found.
81 input_api
= InputApi()
82 files_to_scan
= copyright_scanner
.FindFiles(
83 input_api
, REPOSITORY_ROOT
, ['.'], excluded_dirs_list
)
84 sharded_files_to_scan
= _ShardList(files_to_scan
, 2000)
85 pool
= multiprocessing
.Pool()
86 offending_files_chunks
= pool
.map_async(
87 _FindCopyrightViolations
, sharded_files_to_scan
).get(999999)
90 # Flatten out the result
92 [item
for sublist
in offending_files_chunks
for item
in sublist
]
94 (unknown
, missing
, stale
) = copyright_scanner
.AnalyzeScanResults(
95 input_api
, whitelisted_files
, offending_files
)
98 print 'The following files contain a third-party license but are not in ' \
99 'a listed third-party directory and are not whitelisted. You must ' \
100 'add the following files to the whitelist.\n' \
101 '(Note that if the code you are adding does not actually contain ' \
102 'any third-party code, it may contain the word "copyright", which ' \
103 'should be masked out, e.g. by writing it as "copy-right")\n%s' % \
104 '\n'.join(sorted(unknown
))
106 print 'The following files are whitelisted, but do not exist.\n%s' % \
107 '\n'.join(sorted(missing
))
109 print 'The following files are whitelisted unnecessarily. You must ' \
110 'remove the following files from the whitelist.\n%s' % \
111 '\n'.join(sorted(stale
))
114 code
= ScanResult
.Errors
115 elif stale
or missing
:
116 code
= ScanResult
.Warnings
120 problem_paths
= sorted(set(unknown
+ missing
+ stale
))
121 return (code
, problem_paths
)
124 def _ReadFile(full_path
, mode
='rU'):
125 """Reads a file from disk. This emulates presubmit InputApi.ReadFile func.
127 full_path: The path of the file to read.
129 The contents of the file as a string.
132 with
open(full_path
, mode
) as f
:
137 """Checks that license meta-data is present for all third-party code and
138 that all non third-party code doesn't contain external copyrighted code.
140 ScanResult.Ok if everything is in order;
141 ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
143 ScanResult.Errors otherwise.
146 third_party_dirs
= licenses
.FindThirdPartyDirsWithFiles(REPOSITORY_ROOT
)
150 # First, check designated third-party directories using src/tools/licenses.py.
151 all_licenses_valid
= True
152 for path
in sorted(third_party_dirs
):
154 licenses
.ParseDir(path
, REPOSITORY_ROOT
)
155 except licenses
.LicenseError
, e
:
156 print 'Got LicenseError "%s" while scanning %s' % (e
, path
)
157 problem_paths
.append(path
)
158 all_licenses_valid
= False
160 # Second, check for non-standard license text.
161 whitelisted_files
= copyright_scanner
.LoadWhitelistedFilesList(InputApi())
162 licenses_check
, more_problem_paths
= _CheckLicenseHeaders(
163 third_party_dirs
, whitelisted_files
)
165 problem_paths
.extend(more_problem_paths
)
167 return (licenses_check
if all_licenses_valid
else ScanResult
.Errors
,
171 class TemplateEntryGenerator(object):
173 self
._generate
_licenses
_file
_list
_only
= False
176 def SetGenerateLicensesFileListOnly(self
, generate_licenses_file_list_only
):
177 self
._generate
_licenses
_file
_list
_only
= generate_licenses_file_list_only
179 def _ReadFileGuessEncoding(self
, name
):
180 if self
._generate
_licenses
_file
_list
_only
:
183 with
open(name
, 'rb') as input_file
:
184 contents
= input_file
.read()
186 return contents
.decode('utf8')
187 except UnicodeDecodeError:
189 # If it's not UTF-8, it must be CP-1252. Fail otherwise.
190 return contents
.decode('cp1252')
192 def MetadataToTemplateEntry(self
, metadata
):
195 'name': metadata
['Name'],
196 'url': metadata
['URL'],
197 'license_file': metadata
['License File'],
198 'license': self
._ReadFileGuessEncoding
(metadata
['License File']),
199 'toc_href': 'entry' + str(self
._toc
_index
),
203 def GenerateNoticeFile(generate_licenses_file_list_only
=False):
204 """Generates the contents of an Android NOTICE file for the third-party code.
205 This is used by the snapshot tool.
207 The contents of the NOTICE file.
210 generator
= TemplateEntryGenerator()
211 generator
.SetGenerateLicensesFileListOnly(generate_licenses_file_list_only
)
212 # Start from Chromium's LICENSE file
213 entries
= [generator
.MetadataToTemplateEntry({
214 'Name': 'The Chromium Project',
215 'URL': 'http://www.chromium.org',
216 'License File': os
.path
.join(REPOSITORY_ROOT
, 'LICENSE') })
219 third_party_dirs
= licenses
.FindThirdPartyDirsWithFiles(REPOSITORY_ROOT
)
220 # We provide attribution for all third-party directories.
221 # TODO(mnaganov): Limit this to only code used by the WebView binary.
222 for directory
in sorted(third_party_dirs
):
224 metadata
= licenses
.ParseDir(directory
, REPOSITORY_ROOT
,
225 require_license_file
=False)
226 except licenses
.LicenseError
:
227 # Since this code is called during project files generation,
228 # we don't want to break the it. But we assume that release
229 # WebView apks are built using checkouts that pass
230 # 'webview_licenses.py scan' check, thus they don't contain
231 # projects with non-compatible licenses.
233 license_file
= metadata
['License File']
234 if license_file
and license_file
!= licenses
.NOT_SHIPPED
:
235 entries
.append(generator
.MetadataToTemplateEntry(metadata
))
237 if generate_licenses_file_list_only
:
238 return [entry
['license_file'] for entry
in entries
]
240 env
= jinja2
.Environment(
241 loader
=jinja2
.FileSystemLoader(os
.path
.dirname(__file__
)),
242 extensions
=['jinja2.ext.autoescape'])
243 template
= env
.get_template('licenses_notice.tmpl')
244 return template
.render({ 'entries': entries
}).encode('utf8')
248 class FormatterWithNewLines(optparse
.IndentedHelpFormatter
):
249 def format_description(self
, description
):
250 paras
= description
.split('\n')
251 formatted_paras
= [textwrap
.fill(para
, self
.width
) for para
in paras
]
252 return '\n'.join(formatted_paras
) + '\n'
254 parser
= optparse
.OptionParser(formatter
=FormatterWithNewLines(),
255 usage
='%prog [options]')
256 parser
.add_option('--json', help='Path to JSON output file')
257 parser
.description
= (__doc__
+
259 ' scan Check licenses.\n'
260 ' notice_deps Generate the list of dependencies for '
261 'Android NOTICE file.\n'
262 ' notice [file] Generate Android NOTICE file on '
263 'stdout or into |file|.\n'
264 ' display_copyrights Display autorship on the files'
265 ' using names provided via stdin.\n')
266 (options
, args
) = parser
.parse_args()
269 return ScanResult
.Errors
271 if args
[0] == 'scan':
272 scan_result
, problem_paths
= _Scan()
273 if scan_result
== ScanResult
.Ok
:
276 with
open(options
.json
, 'w') as f
:
277 json
.dump(problem_paths
, f
)
279 elif args
[0] == 'notice_deps':
280 # 'set' is used to eliminate duplicate references to the same license file.
282 sorted(set(GenerateNoticeFile(generate_licenses_file_list_only
=True))))
284 elif args
[0] == 'notice':
285 notice_file_contents
= GenerateNoticeFile()
287 print notice_file_contents
289 with
open(args
[1], 'w') as output_file
:
290 output_file
.write(notice_file_contents
)
292 elif args
[0] == 'display_copyrights':
293 files
= sys
.stdin
.read().splitlines()
295 zip(files
, copyright_scanner
.FindCopyrights(InputApi(), '.', files
)):
296 print f
, '\t', ' / '.join(sorted(c
))
299 return ScanResult
.Errors
301 if __name__
== '__main__':