2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Checks third-party licenses for the purposes of the Android WebView build.
8 The Android tree includes a snapshot of Chromium in order to power the system
9 WebView. This tool checks that all code uses open-source licenses compatible
10 with Android, and that we meet the requirements of those licenses. It can also
11 be used to generate an Android NOTICE file for the third-party code.
13 It makes use of src/tools/licenses.py and the README.chromium files on which
14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
15 which whitelists indicidual files which contain third-party code but which
16 aren't in a third-party directory with a README.chromium file.
22 import multiprocessing
30 REPOSITORY_ROOT
= os
.path
.abspath(os
.path
.join(
31 os
.path
.dirname(__file__
), '..', '..'))
33 # Import third_party/PRESUBMIT.py via imp to avoid importing a random
34 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
35 sys
.dont_write_bytecode
= True
37 imp
.load_source('PRESUBMIT', \
38 os
.path
.join(REPOSITORY_ROOT
, 'third_party', 'PRESUBMIT.py'))
40 sys
.path
.append(os
.path
.join(REPOSITORY_ROOT
, 'third_party'))
42 sys
.path
.append(os
.path
.join(REPOSITORY_ROOT
, 'tools'))
45 import copyright_scanner
48 class InputApi(object):
50 self
.os_path
= os
.path
51 self
.os_walk
= os
.walk
53 self
.ReadFile
= _ReadFile
54 self
.change
= InputApiChange()
56 class InputApiChange(object):
58 self
.RepositoryRoot
= lambda: REPOSITORY_ROOT
61 def GetIncompatibleDirectories():
62 """Gets a list of third-party directories which use licenses incompatible
63 with Android. This is used by the snapshot tool.
65 A list of directories.
69 for directory
in _FindThirdPartyDirs():
70 if directory
in known_issues
.KNOWN_ISSUES
:
71 result
.append(directory
)
74 metadata
= licenses
.ParseDir(directory
, REPOSITORY_ROOT
,
75 require_license_file
=False,
76 optional_keys
=['License Android Compatible'])
77 except licenses
.LicenseError
as e
:
78 print 'Got LicenseError while scanning ' + directory
80 if metadata
.get('License Android Compatible', 'no').upper() == 'YES':
82 license
= re
.split(' [Ll]icenses?$', metadata
['License'])[0]
83 if not third_party
.LicenseIsCompatibleWithAndroid(InputApi(), license
):
84 result
.append(directory
)
87 def GetUnknownIncompatibleDirectories():
88 """Gets a list of third-party directories which use licenses incompatible
89 with Android which are not present in the known_issues.py file.
90 This is used by the AOSP bot.
92 A list of directories.
94 incompatible_directories
= frozenset(GetIncompatibleDirectories())
95 known_incompatible
= []
96 input_api
= InputApi()
97 for path
, exclude_list
in known_issues
.KNOWN_INCOMPATIBLE
.iteritems():
98 path
= copyright_scanner
.ForwardSlashesToOsPathSeps(input_api
, path
)
99 for exclude
in exclude_list
:
100 exclude
= copyright_scanner
.ForwardSlashesToOsPathSeps(input_api
, exclude
)
101 if glob
.has_magic(exclude
):
102 exclude_dirname
= os
.path
.dirname(exclude
)
103 if glob
.has_magic(exclude_dirname
):
104 print ('Exclude path %s contains an unexpected glob expression,' \
105 ' skipping.' % exclude
)
106 exclude
= exclude_dirname
107 known_incompatible
.append(os
.path
.normpath(os
.path
.join(path
, exclude
)))
108 known_incompatible
= frozenset(known_incompatible
)
109 return incompatible_directories
.difference(known_incompatible
)
112 class ScanResult(object):
113 Ok
, Warnings
, Errors
= range(3)
115 # Needs to be a top-level function for multiprocessing
116 def _FindCopyrightViolations(files_to_scan_as_string
):
117 return copyright_scanner
.FindCopyrightViolations(
118 InputApi(), REPOSITORY_ROOT
, files_to_scan_as_string
)
120 def _ShardList(l
, shard_len
):
121 return [l
[i
:i
+ shard_len
] for i
in range(0, len(l
), shard_len
)]
123 def _CheckLicenseHeaders(excluded_dirs_list
, whitelisted_files
):
124 """Checks that all files which are not in a listed third-party directory,
125 and which do not use the standard Chromium license, are whitelisted.
127 excluded_dirs_list: The list of directories to exclude from scanning.
128 whitelisted_files: The whitelist of files.
130 ScanResult.Ok if all files with non-standard license headers are whitelisted
131 and the whitelist contains no stale entries;
132 ScanResult.Warnings if there are stale entries;
133 ScanResult.Errors if new non-whitelisted entries found.
135 input_api
= InputApi()
136 files_to_scan
= copyright_scanner
.FindFiles(
137 input_api
, REPOSITORY_ROOT
, ['.'], excluded_dirs_list
)
138 sharded_files_to_scan
= _ShardList(files_to_scan
, 2000)
139 pool
= multiprocessing
.Pool()
140 offending_files_chunks
= pool
.map_async(
141 _FindCopyrightViolations
, sharded_files_to_scan
).get(999999)
144 # Flatten out the result
146 [item
for sublist
in offending_files_chunks
for item
in sublist
]
148 (unknown
, missing
, stale
) = copyright_scanner
.AnalyzeScanResults(
149 input_api
, whitelisted_files
, offending_files
)
152 print 'The following files contain a third-party license but are not in ' \
153 'a listed third-party directory and are not whitelisted. You must ' \
154 'add the following files to the whitelist.\n%s' % \
155 '\n'.join(sorted(unknown
))
157 print 'The following files are whitelisted, but do not exist.\n%s' % \
158 '\n'.join(sorted(missing
))
160 print 'The following files are whitelisted unnecessarily. You must ' \
161 'remove the following files from the whitelist.\n%s' % \
162 '\n'.join(sorted(stale
))
165 code
= ScanResult
.Errors
166 elif stale
or missing
:
167 code
= ScanResult
.Warnings
171 problem_paths
= sorted(set(unknown
+ missing
+ stale
))
172 return (code
, problem_paths
)
175 def _ReadFile(full_path
, mode
='rU'):
176 """Reads a file from disk. This emulates presubmit InputApi.ReadFile func.
178 full_path: The path of the file to read.
180 The contents of the file as a string.
183 with
open(full_path
, mode
) as f
:
187 def _ReadLocalFile(path
, mode
='rb'):
188 """Reads a file from disk.
190 path: The path of the file to read, relative to the root of the repository.
192 The contents of the file as a string.
195 return _ReadFile(os
.path
.join(REPOSITORY_ROOT
, path
), mode
)
198 def _FindThirdPartyDirs():
199 """Gets the list of third-party directories.
201 The list of third-party directories.
204 # Please don't add here paths that have problems with license files,
205 # as they will end up included in Android WebView snapshot.
206 # Instead, add them into known_issues.py.
208 # Temporary until we figure out how not to check out quickoffice on the
209 # Android license check bot. Tracked in crbug.com/350472.
210 os
.path
.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'),
211 # Placeholder directory, no third-party code.
212 os
.path
.join('third_party', 'adobe'),
213 # Apache 2.0 license. See
214 # https://code.google.com/p/chromium/issues/detail?id=140478.
215 os
.path
.join('third_party', 'bidichecker'),
216 # Isn't checked out on clients
217 os
.path
.join('third_party', 'gles2_conform'),
218 # The llvm-build doesn't exist for non-clang builder
219 os
.path
.join('third_party', 'llvm-build'),
220 # Binaries doesn't apply to android
221 os
.path
.join('third_party', 'widevine'),
222 # third_party directories in this tree aren't actually third party, but
223 # provide a way to shadow experimental buildfiles into those directories.
224 os
.path
.join('build', 'secondary'),
225 # Not shipped, Chromium code
226 os
.path
.join('tools', 'swarming_client'),
227 # Not shipped, only relates to Chrome for Android, but not to WebView
228 os
.path
.join('clank'),
229 # Bots only, is not a part of the build
230 os
.path
.join('isolate_deps_dir'),
232 third_party_dirs
= licenses
.FindThirdPartyDirs(prune_paths
, REPOSITORY_ROOT
)
233 return licenses
.FilterDirsWithFiles(third_party_dirs
, REPOSITORY_ROOT
)
237 """Checks that license meta-data is present for all third-party code and
238 that all non third-party code doesn't contain external copyrighted code.
240 ScanResult.Ok if everything is in order;
241 ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
243 ScanResult.Errors otherwise.
246 third_party_dirs
= _FindThirdPartyDirs()
250 # First, check designated third-party directories using src/tools/licenses.py.
251 all_licenses_valid
= True
252 for path
in sorted(third_party_dirs
):
254 licenses
.ParseDir(path
, REPOSITORY_ROOT
)
255 except licenses
.LicenseError
, e
:
256 if not (path
in known_issues
.KNOWN_ISSUES
):
257 print 'Got LicenseError "%s" while scanning %s' % (e
, path
)
258 problem_paths
.append(path
)
259 all_licenses_valid
= False
261 # Second, check for non-standard license text.
262 whitelisted_files
= copyright_scanner
.LoadWhitelistedFilesList(InputApi())
263 licenses_check
, more_problem_paths
= _CheckLicenseHeaders(
264 third_party_dirs
, whitelisted_files
)
266 problem_paths
.extend(more_problem_paths
)
268 return (licenses_check
if all_licenses_valid
else ScanResult
.Errors
,
272 class TemplateEntryGenerator(object):
274 self
._generate
_licenses
_file
_list
_only
= False
277 def SetGenerateLicensesFileListOnly(self
, generate_licenses_file_list_only
):
278 self
._generate
_licenses
_file
_list
_only
= generate_licenses_file_list_only
280 def _ReadFileGuessEncoding(self
, name
):
281 if self
._generate
_licenses
_file
_list
_only
:
284 with
open(name
, 'rb') as input_file
:
285 contents
= input_file
.read()
287 return contents
.decode('utf8')
288 except UnicodeDecodeError:
290 # If it's not UTF-8, it must be CP-1252. Fail otherwise.
291 return contents
.decode('cp1252')
293 def MetadataToTemplateEntry(self
, metadata
):
296 'name': metadata
['Name'],
297 'url': metadata
['URL'],
298 'license_file': metadata
['License File'],
299 'license': self
._ReadFileGuessEncoding
(metadata
['License File']),
300 'toc_href': 'entry' + str(self
._toc
_index
),
304 def GenerateNoticeFile(generate_licenses_file_list_only
=False):
305 """Generates the contents of an Android NOTICE file for the third-party code.
306 This is used by the snapshot tool.
308 The contents of the NOTICE file.
311 generator
= TemplateEntryGenerator()
312 generator
.SetGenerateLicensesFileListOnly(generate_licenses_file_list_only
)
313 # Start from Chromium's LICENSE file
314 entries
= [generator
.MetadataToTemplateEntry({
315 'Name': 'The Chromium Project',
316 'URL': 'http://www.chromium.org',
317 'License File': os
.path
.join(REPOSITORY_ROOT
, 'LICENSE') })
320 third_party_dirs
= _FindThirdPartyDirs()
321 # We provide attribution for all third-party directories.
322 # TODO(mnaganov): Limit this to only code used by the WebView binary.
323 for directory
in sorted(third_party_dirs
):
325 metadata
= licenses
.ParseDir(directory
, REPOSITORY_ROOT
,
326 require_license_file
=False)
327 except licenses
.LicenseError
:
328 # Since this code is called during project files generation,
329 # we don't want to break the it. But we assume that release
330 # WebView apks are built using checkouts that pass
331 # 'webview_licenses.py scan' check, thus they don't contain
332 # projects with non-compatible licenses.
334 license_file
= metadata
['License File']
335 if license_file
and license_file
!= licenses
.NOT_SHIPPED
:
336 entries
.append(generator
.MetadataToTemplateEntry(metadata
))
338 if generate_licenses_file_list_only
:
339 return [entry
['license_file'] for entry
in entries
]
341 env
= jinja2
.Environment(
342 loader
=jinja2
.FileSystemLoader(os
.path
.dirname(__file__
)),
343 extensions
=['jinja2.ext.autoescape'])
344 template
= env
.get_template('licenses_notice.tmpl')
345 return template
.render({ 'entries': entries
}).encode('utf8')
348 def _ProcessIncompatibleResult(incompatible_directories
):
349 if incompatible_directories
:
350 print ("Incompatibly licensed directories found:\n" +
351 "\n".join(sorted(incompatible_directories
)))
352 return ScanResult
.Errors
356 class FormatterWithNewLines(optparse
.IndentedHelpFormatter
):
357 def format_description(self
, description
):
358 paras
= description
.split('\n')
359 formatted_paras
= [textwrap
.fill(para
, self
.width
) for para
in paras
]
360 return '\n'.join(formatted_paras
) + '\n'
362 parser
= optparse
.OptionParser(formatter
=FormatterWithNewLines(),
363 usage
='%prog [options]')
364 parser
.add_option('--json', help='Path to JSON output file')
365 parser
.description
= (__doc__
+
367 ' scan Check licenses.\n'
368 ' notice_deps Generate the list of dependencies for '
369 'Android NOTICE file.\n'
370 ' notice [file] Generate Android NOTICE file on '
371 'stdout or into |file|.\n'
372 ' incompatible_directories Scan for incompatibly'
373 ' licensed directories.\n'
374 ' all_incompatible_directories Scan for incompatibly'
375 ' licensed directories (even those in'
376 ' known_issues.py).\n'
377 ' display_copyrights Display autorship on the files'
378 ' using names provided via stdin.\n')
379 (options
, args
) = parser
.parse_args()
382 return ScanResult
.Errors
384 if args
[0] == 'scan':
385 scan_result
, problem_paths
= _Scan()
386 if scan_result
== ScanResult
.Ok
:
389 with
open(options
.json
, 'w') as f
:
390 json
.dump(problem_paths
, f
)
392 elif args
[0] == 'notice_deps':
393 # 'set' is used to eliminate duplicate references to the same license file.
395 sorted(set(GenerateNoticeFile(generate_licenses_file_list_only
=True))))
397 elif args
[0] == 'notice':
398 notice_file_contents
= GenerateNoticeFile()
400 print notice_file_contents
402 with
open(args
[1], 'w') as output_file
:
403 output_file
.write(notice_file_contents
)
405 elif args
[0] == 'incompatible_directories':
406 return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories())
407 elif args
[0] == 'all_incompatible_directories':
408 return _ProcessIncompatibleResult(GetIncompatibleDirectories())
409 elif args
[0] == 'display_copyrights':
410 files
= sys
.stdin
.read().splitlines()
412 zip(files
, copyright_scanner
.FindCopyrights(InputApi(), '.', files
)):
413 print f
, '\t', ' / '.join(sorted(c
))
416 return ScanResult
.Errors
418 if __name__
== '__main__':