Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / tools / licenses.py
blob8790c8db2f43cea32c143b7f3d0a978acbaa39ef
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Utility for checking and processing licensing information in third_party
7 directories.
9 Usage: licenses.py <command>
11 Commands:
12 scan scan third_party directories, verifying that we have licensing info
13 credits generate about:credits on stdout
15 (You can also import this as a module.)
16 """
18 import cgi
19 import os
20 import sys
22 # Paths from the root of the tree to directories to skip.
23 PRUNE_PATHS = set([
24 # Same module occurs in crypto/third_party/nss and net/third_party/nss, so
25 # skip this one.
26 os.path.join('third_party','nss'),
28 # Placeholder directory only, not third-party code.
29 os.path.join('third_party','adobe'),
31 # Apache 2.0 license. See crbug.com/140478
32 os.path.join('third_party','bidichecker'),
34 # Build files only, not third-party code.
35 os.path.join('third_party','widevine'),
37 # Only binaries, used during development.
38 os.path.join('third_party','valgrind'),
40 # Used for development and test, not in the shipping product.
41 os.path.join('build','secondary'),
42 os.path.join('third_party','bison'),
43 os.path.join('third_party','blanketjs'),
44 os.path.join('third_party','cygwin'),
45 os.path.join('third_party','gles2_conform'),
46 os.path.join('third_party','gnu_binutils'),
47 os.path.join('third_party','gold'),
48 os.path.join('third_party','gperf'),
49 os.path.join('third_party','lighttpd'),
50 os.path.join('third_party','llvm'),
51 os.path.join('third_party','llvm-build'),
52 os.path.join('third_party','mingw-w64'),
53 os.path.join('third_party','nacl_sdk_binaries'),
54 os.path.join('third_party','pefile'),
55 os.path.join('third_party','perl'),
56 os.path.join('third_party','psyco_win32'),
57 os.path.join('third_party','pylib'),
58 os.path.join('third_party','pywebsocket'),
59 os.path.join('third_party','qunit'),
60 os.path.join('third_party','sinonjs'),
61 os.path.join('third_party','syzygy'),
62 os.path.join('tools', 'profile_chrome', 'third_party'),
64 # Chromium code in third_party.
65 os.path.join('third_party','fuzzymatch'),
66 os.path.join('tools', 'swarming_client'),
68 # Stuff pulled in from chrome-internal for official builds/tools.
69 os.path.join('third_party', 'clear_cache'),
70 os.path.join('third_party', 'gnu'),
71 os.path.join('third_party', 'googlemac'),
72 os.path.join('third_party', 'pcre'),
73 os.path.join('third_party', 'psutils'),
74 os.path.join('third_party', 'sawbuck'),
75 # See crbug.com/350472
76 os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'),
77 # Chrome for Android proprietary code.
78 os.path.join('clank'),
80 # Redistribution does not require attribution in documentation.
81 os.path.join('third_party','directxsdk'),
82 os.path.join('third_party','platformsdk_win2008_6_1'),
83 os.path.join('third_party','platformsdk_win7'),
85 # For testing only, presents on some bots.
86 os.path.join('isolate_deps_dir'),
89 # Directories we don't scan through.
90 VCS_METADATA_DIRS = ('.svn', '.git')
91 PRUNE_DIRS = (VCS_METADATA_DIRS +
92 ('out', 'Debug', 'Release', # build files
93 'layout_tests')) # lots of subdirs
95 ADDITIONAL_PATHS = (
96 os.path.join('breakpad'),
97 os.path.join('chrome', 'common', 'extensions', 'docs', 'examples'),
98 os.path.join('chrome', 'test', 'chromeos', 'autotest'),
99 os.path.join('chrome', 'test', 'data'),
100 os.path.join('native_client'),
101 os.path.join('net', 'tools', 'spdyshark'),
102 os.path.join('sdch', 'open-vcdiff'),
103 os.path.join('testing', 'gmock'),
104 os.path.join('testing', 'gtest'),
105 os.path.join('tools', 'grit'),
106 os.path.join('tools', 'gyp'),
107 os.path.join('tools', 'page_cycler', 'acid3'),
108 os.path.join('url', 'third_party', 'mozilla'),
109 os.path.join('v8'),
110 # Fake directories to include the strongtalk and fdlibm licenses.
111 os.path.join('v8', 'strongtalk'),
112 os.path.join('v8', 'fdlibm'),
116 # Directories where we check out directly from upstream, and therefore
117 # can't provide a README.chromium. Please prefer a README.chromium
118 # wherever possible.
119 SPECIAL_CASES = {
120 os.path.join('native_client'): {
121 "Name": "native client",
122 "URL": "http://code.google.com/p/nativeclient",
123 "License": "BSD",
125 os.path.join('sdch', 'open-vcdiff'): {
126 "Name": "open-vcdiff",
127 "URL": "http://code.google.com/p/open-vcdiff",
128 "License": "Apache 2.0, MIT, GPL v2 and custom licenses",
129 "License Android Compatible": "yes",
131 os.path.join('testing', 'gmock'): {
132 "Name": "gmock",
133 "URL": "http://code.google.com/p/googlemock",
134 "License": "BSD",
135 "License File": "NOT_SHIPPED",
137 os.path.join('testing', 'gtest'): {
138 "Name": "gtest",
139 "URL": "http://code.google.com/p/googletest",
140 "License": "BSD",
141 "License File": "NOT_SHIPPED",
143 os.path.join('third_party', 'angle'): {
144 "Name": "Almost Native Graphics Layer Engine",
145 "URL": "http://code.google.com/p/angleproject/",
146 "License": "BSD",
148 os.path.join('third_party', 'cros_system_api'): {
149 "Name": "Chromium OS system API",
150 "URL": "http://www.chromium.org/chromium-os",
151 "License": "BSD",
152 # Absolute path here is resolved as relative to the source root.
153 "License File": "/LICENSE.chromium_os",
155 os.path.join('third_party', 'lss'): {
156 "Name": "linux-syscall-support",
157 "URL": "http://code.google.com/p/linux-syscall-support/",
158 "License": "BSD",
159 "License File": "/LICENSE",
161 os.path.join('third_party', 'ots'): {
162 "Name": "OTS (OpenType Sanitizer)",
163 "URL": "http://code.google.com/p/ots/",
164 "License": "BSD",
166 os.path.join('third_party', 'pdfium'): {
167 "Name": "PDFium",
168 "URL": "http://code.google.com/p/pdfium/",
169 "License": "BSD",
171 os.path.join('third_party', 'pdfsqueeze'): {
172 "Name": "pdfsqueeze",
173 "URL": "http://code.google.com/p/pdfsqueeze/",
174 "License": "Apache 2.0",
175 "License File": "COPYING",
177 os.path.join('third_party', 'ppapi'): {
178 "Name": "ppapi",
179 "URL": "http://code.google.com/p/ppapi/",
181 os.path.join('third_party', 'scons-2.0.1'): {
182 "Name": "scons-2.0.1",
183 "URL": "http://www.scons.org",
184 "License": "MIT",
185 "License File": "NOT_SHIPPED",
187 os.path.join('third_party', 'trace-viewer'): {
188 "Name": "trace-viewer",
189 "URL": "http://code.google.com/p/trace-viewer",
190 "License": "BSD",
191 "License File": "NOT_SHIPPED",
193 os.path.join('third_party', 'v8-i18n'): {
194 "Name": "Internationalization Library for v8",
195 "URL": "http://code.google.com/p/v8-i18n/",
196 "License": "Apache 2.0",
198 os.path.join('third_party', 'WebKit'): {
199 "Name": "WebKit",
200 "URL": "http://webkit.org/",
201 "License": "BSD and GPL v2",
202 # Absolute path here is resolved as relative to the source root.
203 "License File": "/third_party/WebKit/LICENSE_FOR_ABOUT_CREDITS",
205 os.path.join('third_party', 'webpagereplay'): {
206 "Name": "webpagereplay",
207 "URL": "http://code.google.com/p/web-page-replay",
208 "License": "Apache 2.0",
209 "License File": "NOT_SHIPPED",
211 os.path.join('tools', 'grit'): {
212 "Name": "grit",
213 "URL": "http://code.google.com/p/grit-i18n",
214 "License": "BSD",
215 "License File": "NOT_SHIPPED",
217 os.path.join('tools', 'gyp'): {
218 "Name": "gyp",
219 "URL": "http://code.google.com/p/gyp",
220 "License": "BSD",
221 "License File": "NOT_SHIPPED",
223 os.path.join('v8'): {
224 "Name": "V8 JavaScript Engine",
225 "URL": "http://code.google.com/p/v8",
226 "License": "BSD",
228 os.path.join('v8', 'strongtalk'): {
229 "Name": "Strongtalk",
230 "URL": "http://www.strongtalk.org/",
231 "License": "BSD",
232 # Absolute path here is resolved as relative to the source root.
233 "License File": "/v8/LICENSE.strongtalk",
235 os.path.join('v8', 'fdlibm'): {
236 "Name": "fdlibm",
237 "URL": "http://www.netlib.org/fdlibm/",
238 "License": "Freely Distributable",
239 # Absolute path here is resolved as relative to the source root.
240 "License File" : "/v8/src/third_party/fdlibm/LICENSE",
241 "License Android Compatible" : "yes",
243 os.path.join('third_party', 'khronos_glcts'): {
244 # These sources are not shipped, are not public, and it isn't
245 # clear why they're tripping the license check.
246 "Name": "khronos_glcts",
247 "URL": "http://no-public-url",
248 "License": "Khronos",
249 "License File": "NOT_SHIPPED",
251 os.path.join('tools', 'telemetry', 'third_party', 'gsutil'): {
252 "Name": "gsutil",
253 "URL": "https://cloud.google.com/storage/docs/gsutil",
254 "License": "Apache 2.0",
255 "License File": "NOT_SHIPPED",
259 # Special value for 'License File' field used to indicate that the license file
260 # should not be used in about:credits.
261 NOT_SHIPPED = "NOT_SHIPPED"
264 class LicenseError(Exception):
265 """We raise this exception when a directory's licensing info isn't
266 fully filled out."""
267 pass
269 def AbsolutePath(path, filename, root):
270 """Convert a path in README.chromium to be absolute based on the source
271 root."""
272 if filename.startswith('/'):
273 # Absolute-looking paths are relative to the source root
274 # (which is the directory we're run from).
275 absolute_path = os.path.join(root, filename[1:])
276 else:
277 absolute_path = os.path.join(root, path, filename)
278 if os.path.exists(absolute_path):
279 return absolute_path
280 return None
282 def ParseDir(path, root, require_license_file=True, optional_keys=None):
283 """Examine a third_party/foo component and extract its metadata."""
285 # Parse metadata fields out of README.chromium.
286 # We examine "LICENSE" for the license file by default.
287 metadata = {
288 "License File": "LICENSE", # Relative path to license text.
289 "Name": None, # Short name (for header on about:credits).
290 "URL": None, # Project home page.
291 "License": None, # Software license.
294 if optional_keys is None:
295 optional_keys = []
297 if path in SPECIAL_CASES:
298 metadata.update(SPECIAL_CASES[path])
299 else:
300 # Try to find README.chromium.
301 readme_path = os.path.join(root, path, 'README.chromium')
302 if not os.path.exists(readme_path):
303 raise LicenseError("missing README.chromium or licenses.py "
304 "SPECIAL_CASES entry")
306 for line in open(readme_path):
307 line = line.strip()
308 if not line:
309 break
310 for key in metadata.keys() + optional_keys:
311 field = key + ": "
312 if line.startswith(field):
313 metadata[key] = line[len(field):]
315 # Check that all expected metadata is present.
316 for key, value in metadata.iteritems():
317 if not value:
318 raise LicenseError("couldn't find '" + key + "' line "
319 "in README.chromium or licences.py "
320 "SPECIAL_CASES")
322 # Special-case modules that aren't in the shipping product, so don't need
323 # their license in about:credits.
324 if metadata["License File"] != NOT_SHIPPED:
325 # Check that the license file exists.
326 for filename in (metadata["License File"], "COPYING"):
327 license_path = AbsolutePath(path, filename, root)
328 if license_path is not None:
329 break
331 if require_license_file and not license_path:
332 raise LicenseError("License file not found. "
333 "Either add a file named LICENSE, "
334 "import upstream's COPYING if available, "
335 "or add a 'License File:' line to "
336 "README.chromium with the appropriate path.")
337 metadata["License File"] = license_path
339 return metadata
342 def ContainsFiles(path, root):
343 """Determines whether any files exist in a directory or in any of its
344 subdirectories."""
345 for _, dirs, files in os.walk(os.path.join(root, path)):
346 if files:
347 return True
348 for vcs_metadata in VCS_METADATA_DIRS:
349 if vcs_metadata in dirs:
350 dirs.remove(vcs_metadata)
351 return False
354 def FilterDirsWithFiles(dirs_list, root):
355 # If a directory contains no files, assume it's a DEPS directory for a
356 # project not used by our current configuration and skip it.
357 return [x for x in dirs_list if ContainsFiles(x, root)]
360 def FindThirdPartyDirs(prune_paths, root):
361 """Find all third_party directories underneath the source root."""
362 third_party_dirs = set()
363 for path, dirs, files in os.walk(root):
364 path = path[len(root)+1:] # Pretty up the path.
366 if path in prune_paths:
367 dirs[:] = []
368 continue
370 # Prune out directories we want to skip.
371 # (Note that we loop over PRUNE_DIRS so we're not iterating over a
372 # list that we're simultaneously mutating.)
373 for skip in PRUNE_DIRS:
374 if skip in dirs:
375 dirs.remove(skip)
377 if os.path.basename(path) == 'third_party':
378 # Add all subdirectories that are not marked for skipping.
379 for dir in dirs:
380 dirpath = os.path.join(path, dir)
381 if dirpath not in prune_paths:
382 third_party_dirs.add(dirpath)
384 # Don't recurse into any subdirs from here.
385 dirs[:] = []
386 continue
388 # Don't recurse into paths in ADDITIONAL_PATHS, like we do with regular
389 # third_party/foo paths.
390 if path in ADDITIONAL_PATHS:
391 dirs[:] = []
393 for dir in ADDITIONAL_PATHS:
394 if dir not in prune_paths:
395 third_party_dirs.add(dir)
397 return third_party_dirs
400 def FindThirdPartyDirsWithFiles(root):
401 third_party_dirs = FindThirdPartyDirs(PRUNE_PATHS, root)
402 return FilterDirsWithFiles(third_party_dirs, root)
405 def ScanThirdPartyDirs(root=None):
406 """Scan a list of directories and report on any problems we find."""
407 if root is None:
408 root = os.getcwd()
409 third_party_dirs = FindThirdPartyDirsWithFiles(root)
411 errors = []
412 for path in sorted(third_party_dirs):
413 try:
414 metadata = ParseDir(path, root)
415 except LicenseError, e:
416 errors.append((path, e.args[0]))
417 continue
419 for path, error in sorted(errors):
420 print path + ": " + error
422 return len(errors) == 0
425 def GenerateCredits():
426 """Generate about:credits."""
428 if len(sys.argv) not in (2, 3):
429 print 'usage: licenses.py credits [output_file]'
430 return False
432 def EvaluateTemplate(template, env, escape=True):
433 """Expand a template with variables like {{foo}} using a
434 dictionary of expansions."""
435 for key, val in env.items():
436 if escape:
437 val = cgi.escape(val)
438 template = template.replace('{{%s}}' % key, val)
439 return template
441 root = os.path.join(os.path.dirname(__file__), '..')
442 third_party_dirs = FindThirdPartyDirs(PRUNE_PATHS, root)
444 entry_template = open(os.path.join(root, 'chrome', 'browser', 'resources',
445 'about_credits_entry.tmpl'), 'rb').read()
446 entries = []
447 for path in sorted(third_party_dirs):
448 try:
449 metadata = ParseDir(path, root)
450 except LicenseError:
451 # TODO(phajdan.jr): Convert to fatal error (http://crbug.com/39240).
452 continue
453 if metadata['License File'] == NOT_SHIPPED:
454 continue
455 env = {
456 'name': metadata['Name'],
457 'url': metadata['URL'],
458 'license': open(metadata['License File'], 'rb').read(),
460 entries.append(EvaluateTemplate(entry_template, env))
462 file_template = open(os.path.join(root, 'chrome', 'browser', 'resources',
463 'about_credits.tmpl'), 'rb').read()
464 template_contents = "<!-- Generated by licenses.py; do not edit. -->"
465 template_contents += EvaluateTemplate(file_template,
466 {'entries': '\n'.join(entries)},
467 escape=False)
469 if len(sys.argv) == 3:
470 with open(sys.argv[2], 'w') as output_file:
471 output_file.write(template_contents)
472 elif len(sys.argv) == 2:
473 print template_contents
475 return True
478 def main():
479 command = 'help'
480 if len(sys.argv) > 1:
481 command = sys.argv[1]
483 if command == 'scan':
484 if not ScanThirdPartyDirs():
485 return 1
486 elif command == 'credits':
487 if not GenerateCredits():
488 return 1
489 else:
490 print __doc__
491 return 1
494 if __name__ == '__main__':
495 sys.exit(main())