3 # ***** BEGIN LICENSE BLOCK *****
4 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 # The contents of this file are subject to the Mozilla Public License Version
7 # 1.1 (the "License"); you may not use this file except in compliance with
8 # the License. You may obtain a copy of the License at
9 # http://www.mozilla.org/MPL/
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 # for the specific language governing rights and limitations under the
16 # The Original Code is the relic relicensing tool.
18 # The Initial Developer of the Original Code is
19 # Trent Mick <TrentM@ActiveState.com>.
20 # Portions created by the Initial Developer are Copyright (C) 2003-2005
21 # the Initial Developer. All Rights Reserved.
24 # Gervase Markham <gerv@gerv.net>
25 # Patrick Fey <bugzilla@nachtarbeiter.net>
27 # Alternatively, the contents of this file may be used under the terms of
28 # either the GNU General Public License Version 2 or later (the "GPL"), or
29 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 # in which case the provisions of the GPL or the LGPL are applicable instead
31 # of those above. If you wish to allow use of your version of this file only
32 # under the terms of either the GPL or the LGPL, and not to allow others to
33 # use your version of this file under the terms of the MPL, indicate your
34 # decision by deleting the provisions above and replace them with the notice
35 # and other provisions required by the GPL or the LGPL. If you do not delete
36 # the provisions above, a recipient may use your version of this file under
37 # the terms of any one of the MPL, the GPL or the LGPL.
39 # ***** END LICENSE BLOCK *****
41 # Adapted from the 'lick' and 'ripl' Python scripts. (See:
42 # <http://bugzilla.mozilla.org/show_bug.cgi?id=98089>)
45 relic - RE-LICense a given file, set of files, or directory of files
46 from the Mozilla source tree
49 relic [options...] [files...]
50 relic [options...] < files...
52 Options to Select Mode (use one):
53 <none> List the licenses in each file.
54 -s, --statistics Should a summary table of licenses in each file.
55 The -x, --extended option may be added to show
56 some additional detail to the stats.
57 -r, --relicense Modify the given files to include to
58 appropriate Mozilla license, where
59 "appropriate" is either the NPL/GPL/LPGL
60 tri-license if was already under the NPL or
61 the MPL/LPGL/GPL license in all other cases.
63 Relicenses files (as -r|--relicense), but
64 does NOT skip files that already appear to
65 have a complete license.
66 -A, --add Add a license to files that do not appear to
68 -I, --initial-developers
69 Display initial developer for each file.
72 -h, --help dump this help and exit
73 -V, --version dump this script's version and exit
74 -v, --verbose verbose output
75 -d, --debug more verbose output
77 -f, --force Continue processing after an error. (Errors
78 are summarized at end.)
79 -q, --quick Quick scanning. Use only basic license checks
80 (only use in report mode).
81 -M, --MPL Replace NPL licenses with MPL ones.
82 -a, --all Check all files (only skip CVS directories).
83 --dry-run Go through motions but don't actually change
85 --backup Make backups of changes files with
86 relicensing. Backup filenames are the
87 original filename suffixed with a ~# where
88 "#" is the lowest number to avoid a file
90 -o <orig_code_is> Provide fallback value for the "Original
92 -D <orig_code_date> Provide fallback value for the date
93 that is part of the "Original Code is" block.
94 -i <initial_dev> Provide fallback value for the "Initial
95 Developer of the Original Code is" block.
96 -y <year> Provide fallback value for "Initial
97 Developer" copyright year.
98 --defaults Use the following default fallback values:
99 original_code_is: "mozilla.org Code"
100 initial_copyright_date: "2001"
101 initial_developer: "Netscape Communications
103 Note: the "Original Code" date is generally
104 not required, so a default is not included
108 # List license in files under mozilla/js/src.
109 relic mozilla/js/src # list licenses in files
110 relic -s mozilla/js/src # show summary stats on licenses
111 relic -r mozilla/js/src # re-license files
123 class RelicError(Exception):
130 # This package will be std in Python 2.3, but many Python 2.2
131 # installation will not have it.
133 logging
.basicConfig()
135 # Local fallback logging module.
137 import _logging
as logging
139 sys
.stderr
.write("Your Python installation does not have the logging "
140 "package, nor could the fallback _logging module be "
141 "found. One of the two is required to run this "
145 log
= logging
.getLogger("relic")
149 _version_
= (0, 7, 2)
151 # When processing files, 'relic' skips files and directories according
152 # to these settings. Note: files identified in .cvsignore files are also
154 _g_skip_exts
= [".mdp", ".order", ".dsp", ".dsw", ".uf"]
155 _g_skip_file_basenames
= [
156 # Used by CVS (and this script)
159 # GPL with autoconf exception
163 # Auto-generated from other files
166 # license and readme files
174 # TODO: update with MPL block - or CVS remove (check history)
175 "tools/wizards/templates/licenses/MPL/lic.mak",
176 "tools/wizards/templates/licenses/MPL/lic.pl",
178 ###########################################################################
179 # Everything in _g_skip_files below this line needs no further work.
180 ###########################################################################
182 # Files containing copies of licence text which confuses the script
185 "security/svrcore/LICENSE",
186 "extensions/xmlterm/doc/MPL",
187 "gfx/cairo/cairo/COPYING-LGPL-2.1",
188 "gfx/cairo/cairo/COPYING-MPL-1.1",
190 # Files containing global licensing information
191 "toolkit/content/license.html",
193 # Ben Bucksch - files are tri-licensed with an extra clause.
194 "netwerk/streamconv/converters/mozTXTToHTMLConv.cpp",
195 "netwerk/streamconv/converters/mozTXTToHTMLConv.h",
196 "netwerk/streamconv/public/mozITXTToHTMLConv.idl",
199 "config/preprocessor.pl",
200 "intl/uconv/tools/parse-mozilla-encoding-table.pl",
201 "intl/uconv/tools/gen-big5hkscs-2001-mozilla.pl",
204 # Files which the script doesn't handle well. All have been relicensed
206 "xpinstall/wizard/windows/builder/readme.txt",
207 "xpfe/bootstrap/icons/windows/readme.txt",
208 "embedding/qa/testembed/README.TXT",
209 "security/nss/lib/freebl/ecl/README.FP",
210 "nsprpub/pkg/linux/sun-nspr.spec",
211 "security/nss/pkg/linux/sun-nss.spec",
212 "security/jss/pkg/linux/sun-jss.spec",
213 "security/nss/lib/freebl/mpi/utils/README",
214 "security/nss/lib/freebl/ecl/README",
215 "security/nss/lib/freebl/mpi/README",
216 "lib/mac/UserInterface/Tables/TableClasses.doc",
217 "parser/htmlparser/tests/html/bug23680.html",
218 "security/nss/lib/freebl/mpi/montmulfv9.s",
219 "tools/performance/pageload/base/lxr.mozilla.org/index.html",
220 "testing/performance/win32/page_load_test/" +\
221 "base/lxr.mozilla.org/index.html",
222 "testing/performance/win32/page_load_test/" +\
223 "base/lxr.mozilla.org/20001028.html.orig",
225 # Not sure what to do with this...
226 "gfx/cairo/stdint.diff",
228 # GPL with autoconf exception (same license as files distributed with)
229 "build/autoconf/codeset.m4",
230 "toolkit/airbag/airbag/autotools/depcomp",
231 "toolkit/airbag/airbag/autotools/missing",
232 "toolkit/airbag/airbag/autotools/ltmain.sh",
233 "js/tamarin/pcre/ltmain.sh",
234 "security/svrcore/compile",
235 "security/svrcore/ltmain.sh",
236 "security/svrcore/missing",
237 "security/svrcore/depcomp",
238 "security/svrcore/aclocal.m4",
240 # Public domain or equivalent
241 "nsprpub/config/nspr.m4",
242 "toolkit/airbag/airbag/aclocal.m4",
243 "security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s",
245 # GSSAPI has BSD-like licence requiring some attribution
246 "extensions/auth/gssapi.h",
251 _g_skip_dir_basenames
= [
255 _g_skip_dir_basenames_cvs_only
= [
259 # Complete path from mozilla dir to a dir to skip.
261 # Test files for this script, which cause it to crash!
264 # License template files (TODO: this directory may disappear)
265 "tools/wizards/templates/licenses",
267 # As per the "New Original Source Files" section of:
268 # http://www.mozilla.org/MPL/license-policy.html
269 # with obsolete or now-relicensed directories removed
270 "apache", # Obsolete mod_gzip code
271 "cck", # mkaply's baby; not core code anyway.
273 "js/rhino", # Currently MPL/GPL - may end up BSD
274 "webtools", # Various MPLed webtools
276 # These could be done, but no-one's clamouring for it, and it's a hassle
277 # sorting it all out, so let sleeping dogs lie.
282 # These have their own BSD-like license
284 "modules/libimg/mng",
286 # The following are not supposed to be relicensed, but they do have a
287 # few files in we care about (like makefiles)
288 "modules/libimg/png",
296 # Other directories we want to exclude
297 "embedding/tests", # Agreed as BSD
298 "calendar/libical", # LGPL/MPL
299 "gfx/cairo/cairo/src", # LGPL/MPL
302 _g_basename_to_comment_info
= {
303 "configure": (["dnl"], ),
305 "Makefile": (["#"], ),
306 "makefile": (["#"], ),
308 "typemap": (["#"], ),
309 "xmplflt.conf": (["#"], ),
310 "ldapfriendly": (["#"], ),
311 "ldaptemplates.conf": (["#"], ),
312 "ldapsearchprefs.conf": (["#"], ),
313 "ldapfilter.conf": (["#"], ),
314 "README.configure": (["#"], ),
315 "Options.txt": (["#"], ),
316 "fdsetsize.txt": (["#"], ),
317 "prototype": (["#"], ),
318 "prototype_i386": (["#"], ),
319 "prototype3_i386": (["#"], ),
320 "prototype_com": (["#"], ),
321 "prototype3_com": (["#"], ),
322 "prototype_sparc": (["#"], ),
323 "prototype3_sparc": (["#"], ),
324 "nglayout.mac": (["#"], ),
325 "pkgdepend": (["#"], ),
326 "Maketests": (["#"], ),
328 "csh-aliases": (["#"], ),
329 "csh-env": (["#"], ),
331 "MANIFEST": (["#"], ),
332 "mozconfig": (["#"], ),
333 "makecommon": (["#"], ),
334 "bld_awk_pkginfo": (["#"], ),
335 "prototype_i86pc": (["#"], ),
336 "pkgdepend_5_6": (["#"], ),
337 "awk_pkginfo-i386": (["#"], ),
338 "awk_pkginfo-sparc": (["#"], ),
339 "pkgdepend_64bit": (["#"], ),
341 "Makefile.linux": (["#"], ),
343 "README": ([""], ["#"]),
344 "copyright": ([""], ),
346 "xptcstubs_asm_ppc_darwin.s.m4": (["/*", " *", "*/"], ),
347 "xptcstubs_asm_mips.s.m4": (["/*", " *", "*/"], ),
349 "nsIDocCharsetTest.txt": (["<!--", " -", "-->"], ),
350 "nsIFontListTest.txt": (["<!--", " -", "-->"], ),
351 "ComponentListTest.txt": (["<!--", " -", "-->"], ),
352 "nsIWebBrowserPersistTest1.txt": (["<!--", " -", "-->"], ),
353 "nsIWebBrowserPersistTest2.txt": (["<!--", " -", "-->"], ),
354 "nsIWebBrowserPersistTest3.txt": (["<!--", " -", "-->"], ),
355 "plugins.txt": (["<!--", " -", "-->"], ),
356 "NsISHistoryTestCase1.txt": (["<!--", " -", "-->"], ),
357 "EmbedSmokeTest.txt": (["<!--", " -", "-->"], ),
359 "lineterm_LICENSE": (["/*", " *", "*/"], ),
360 "XMLterm_LICENSE": (["/*", " *", "*/"], ),
361 "BrowserView.cpp.mod": (["/*", " *", "*/"], ),
362 "header_template": (["/*", " *", "*/"], ),
363 "cpp_template": (["/*", " *", "*/"], ),
365 "abcFormat470.txt": (["//"], ),
366 "opcodes.tbl": (["//"], ),
369 _g_ext_to_comment_info
= {
370 ".txt": (["##", "#", ], ["#"]),
371 ".TXT": (["##", "#", ]),
380 ".CPP": (["/*", " *", "*/"], ),
381 ".cpp": (["/*", " *", "*/"], ),
382 ".H": (["/*", " *", "*/"], ),
383 ".h": (["/*", " *", "*/"], ),
384 ".hxx": (["/*", " *", "*/"], ),
385 ".c": (["/*", " *", "*/"], ),
386 ".css": (["/*", " *", "*/"], ['#']),
387 ".js": (["/*", " *", "*/"], ['#']),
388 ".idl": (["/*", " *", "*/"], ),
389 ".ut": (["/*", " *", "*/"], ),
390 ".rc": (["/*", " *", "*/"], ),
391 ".rc2": (["/*", " *", "*/"], ),
392 ".RC": (["/*", " *", "*/"], ),
393 ".Prefix": (["/*", " *", "*/"], ),
394 ".prefix": (["/*", " *", "*/"], ),
395 ".cfg": (["/*", " *", "*/"], ["#"]),
396 ".cp": (["/*", " *", "*/"], ),
397 ".cs": (["/*", " *", "*/"], ),
398 ".java": (["/*", " *", "*/"], ),
399 ".jst": (["/*", " *", "*/"], ),
400 ".tbl": (["/*", " *", "*/"], ),
401 ".tab": (["/*", " *", "*/"], ),
402 ".cc": (["/*", " *", "*/"], ),
403 ".msg": (["/*", " *", "*/"], ),
404 ".y": (["/*", " *", "*/"], ),
405 ".r": (["/*", " *", "*/"], ),
406 ".mm": (["/*", " *", "*/"], ),
407 ".x-ccmap":(["/*", " *", "*/"], ),
408 ".ccmap": (["/*", " *", "*/"], ),
409 ".sql": (["/*", " *", "*/"], ),
410 ".pch++": (["/*", " *", "*/"], ),
411 ".xpm": (["/*", " *", "*/"], ),
412 ".uih": (["/*", " *", "*/"], ),
413 ".uil": (["/*", " *", "*/"], ),
414 ".ccmap": (["/*", " *", "*/"], ),
415 ".map": (["/*", " *", "*/"], ),
416 ".win98": (["/*", " *", "*/"], ),
417 ".php": (["/*", " *", "*/"], ),
418 ".m": (["/*", " *", "*/"], ),
419 ".jnot": (["/*", " *", "*/"], ),
420 ".l": (["/*", " *", "*/"], ),
421 ".htp": (["/*", " *", "*/"], ),
422 ".xs": (["/*", " *", "*/"], ),
423 ".as": (["/*", " *", "*/"], ),
424 ".api": (["/*", " *", "*/"], ['#']),
426 ".html": (["<!--", " -", "-->"], ["#"]),
427 ".xml": (["<!--", " -", "-->"], ["#"]),
428 ".xbl": (["<!--", " -", "-->"], ["#"]),
429 ".xsl": (["<!--", " -", "-->"], ),
430 ".xul": (["<!--", " -", "-->"], ["#"]),
431 ".dtd": (["<!--", " -", "-->"], ["#"]),
432 ".rdf": (["<!--", " -", "-->"], ["#"]),
433 ".htm": (["<!--", " -", "-->"], ),
434 ".out": (["<!--", " -", "-->"], ),
435 ".resx": (["<!--", " -", "-->"], ),
436 ".bl": (["<!--", " -", "-->"], ),
437 ".xif": (["<!--", " -", "-->"], ),
438 ".xhtml":(["<!--", " -", "-->"], ["#"]),
440 ".inc": (["<!--", " -", "-->"],
445 ".properties": (["#"], ),
468 ".client": (["#"], ),
469 ".ref": (["#"], ), # all of them "Makefile.ref"
481 ".Solaris": (["#"], ),
484 ".server": (["#"], ),
487 ".gnuplot": (["#"], ),
495 ".template": (["#"], ),
501 ".def": ([";+#"], [";"]),
502 ".DEF": ([";+#"], [";"]),
505 ".lisp": ([";;;"], ),
507 ".cmd": (["rem"], ["REM"]),
508 ".bat": (["rem"], ["REM"]),
520 ".script": (["(*", " *", "*)"], ),
525 ".s": (["#"], ["//"], ["/*", " *", "*/"], ["!"], [";"], ["/"]),
527 _g_shebang_pattern_to_comment_info
= [
528 (re
.compile(ur
'\A#!.*/bin/(ba)?sh.*$', re
.IGNORECASE
), (["#"], )),
529 (re
.compile(ur
'\A#!.*perl.*$', re
.IGNORECASE
), (["#"], )),
530 (re
.compile(ur
'\A#!.*php.*$', re
.IGNORECASE
), (["#"], )),
531 (re
.compile(ur
'\A#!.*python.*$', re
.IGNORECASE
), (["#"], )),
532 (re
.compile(ur
'\A#!.*ruby.*$', re
.IGNORECASE
), (["#"], )),
533 (re
.compile(ur
'\A#!.*tclsh.*$', re
.IGNORECASE
), (["#"], )),
534 (re
.compile(ur
'\A#!.*wish.*$', re
.IGNORECASE
), (["#"], )),
535 (re
.compile(ur
'\A#!.*expect.*$', re
.IGNORECASE
), (["#"], )),
539 _g_trilicense_parts
= {
541 ***** BEGIN LICENSE BLOCK *****
542 Version: MPL 1.1/GPL 2.0/LGPL 2.1
544 The contents of this file are subject to the Mozilla Public License Version
545 1.1 (the "License"); you may not use this file except in compliance with
546 the License. You may obtain a copy of the License at
547 http://www.mozilla.org/MPL/
549 Software distributed under the License is distributed on an "AS IS" basis,
550 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
551 for the specific language governing rights and limitations under the
556 ***** BEGIN LICENSE BLOCK *****
557 Version: NPL 1.1/GPL 2.0/LGPL 2.1
559 The contents of this file are subject to the Netscape Public License
560 Version 1.1 (the "License"); you may not use this file except in
561 compliance with the License. You may obtain a copy of the License at
562 http://www.mozilla.org/NPL/
564 Software distributed under the License is distributed on an "AS IS" basis,
565 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
566 for the specific language governing rights and limitations under the
570 "original_code_is": """\
571 The Original Code is %(original_code_is)s.
574 "original_code_is_with_date": """\
575 The Original Code is %(original_code_is)s, released
576 %(original_code_date)s.
579 "initial_developer": """\
580 The Initial Developer of the Original Code is
581 %(initial_developer)s.
582 Portions created by the Initial Developer are Copyright (C) %(initial_copyright_date)s
583 the Initial Developer. All Rights Reserved.
591 Alternatively, the contents of this file may be used under the terms of
592 the GNU General Public License Version 2 or later (the "GPL"), in which
593 case the provisions of the GPL are applicable instead of those above. If
594 you wish to allow use of your version of this file only under the terms of
595 the GPL and not to allow others to use your version of this file under the
596 MPL, indicate your decision by deleting the provisions above and replacing
597 them with the notice and other provisions required by the GPL. If you do
598 not delete the provisions above, a recipient may use your version of this
599 file under either the MPL or the GPL.
601 ***** END LICENSE BLOCK *****""",
603 Alternatively, the contents of this file may be used under the terms of
604 the GNU General Public License Version 2 or later (the "GPL"), in which
605 case the provisions of the GPL are applicable instead of those above. If
606 you wish to allow use of your version of this file only under the terms of
607 the GPL and not to allow others to use your version of this file under the
608 NPL, indicate your decision by deleting the provisions above and replacing
609 them with the notice and other provisions required by the GPL. If you do
610 not delete the provisions above, a recipient may use your version of this
611 file under either the NPL or the GPL.
613 ***** END LICENSE BLOCK *****""",
614 "gpl/lgpl for mpl": """\
615 Alternatively, the contents of this file may be used under the terms of
616 either the GNU General Public License Version 2 or later (the "GPL"), or
617 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
618 in which case the provisions of the GPL or the LGPL are applicable instead
619 of those above. If you wish to allow use of your version of this file only
620 under the terms of either the GPL or the LGPL, and not to allow others to
621 use your version of this file under the terms of the MPL, indicate your
622 decision by deleting the provisions above and replace them with the notice
623 and other provisions required by the GPL or the LGPL. If you do not delete
624 the provisions above, a recipient may use your version of this file under
625 the terms of any one of the MPL, the GPL or the LGPL.
627 ***** END LICENSE BLOCK *****""",
628 "gpl/lgpl for npl": """\
629 Alternatively, the contents of this file may be used under the terms of
630 either the GNU General Public License Version 2 or later (the "GPL"), or
631 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
632 in which case the provisions of the GPL or the LGPL are applicable instead
633 of those above. If you wish to allow use of your version of this file only
634 under the terms of either the GPL or the LGPL, and not to allow others to
635 use your version of this file under the terms of the NPL, indicate your
636 decision by deleting the provisions above and replace them with the notice
637 and other provisions required by the GPL or the LGPL. If you do not delete
638 the provisions above, a recipient may use your version of this file under
639 the terms of any one of the NPL, the GPL or the LGPL.
641 ***** END LICENSE BLOCK *****""",
644 _g_dry_run
= 0 # iff true, don't modify any files
649 #---- internal support routines
651 def _is_binary(filename
):
652 """Return true iff the given filename is binary.
654 Raises an EnvironmentError if the file does not exist or cannot be
657 fin
= open(filename
, 'rb')
661 chunk
= fin
.read(CHUNKSIZE
)
662 if '\0' in chunk
: # found null byte
664 if len(chunk
) < CHUNKSIZE
:
672 _g_cvsignore_cache
= {} # optimization: keep a cache of .cvsignore content
673 def _should_skip_according_to_cvsignore(path
):
674 dirname
, basename
= os
.path
.split(path
)
675 cvsignore
= os
.path
.join(dirname
, ".cvsignore")
676 if not os
.path
.exists(cvsignore
):
678 elif cvsignore
not in _g_cvsignore_cache
:
679 fin
= open(cvsignore
, 'r')
683 if f
[-1] == "\n": f
= f
[:-1] # chomp
684 if not f
: continue # skip empty lines
688 _g_cvsignore_cache
[cvsignore
] = to_ignore
690 # At this point .cvsignore exists and its contents are in the cache.
691 to_ignore
= _g_cvsignore_cache
[cvsignore
]
692 if basename
in to_ignore
:
697 _g_backup_pattern
= re
.compile("~\d+$")
698 def _should_skip_file(path
):
699 log
.debug("_should_skip_file(path='%s')", path
)
702 ext
= os
.path
.splitext(path
)[1]
703 if ext
in _g_skip_exts
:
704 log
.info("Skipping '%s' (according to '_g_skip_exts').", path
)
706 xpath
= '/'.join(path
.split(os
.sep
)) # use same sep as in _g_skip_files
707 for sf
in _g_skip_files
:
708 if xpath
.endswith(sf
):
709 log
.info("Skipping '%s' (according to '_g_skip_files').", path
)
711 if os
.path
.basename(path
) in _g_skip_file_basenames
:
712 log
.info("Skipping '%s' (according to '_g_skip_file_basenames').", path
)
714 if _should_skip_according_to_cvsignore(path
):
715 log
.info("Skipping '%s' (according to .cvsignore).", path
)
717 if _g_backup_pattern
.search(path
):
718 log
.info("Skipping '%s' (looks like backup file).", path
)
722 def _should_skip_dir(path
):
723 log
.debug("_should_skip_dir(path='%s')", path
)
725 if os
.path
.basename(path
) in _g_skip_dir_basenames_cvs_only
:
728 if os
.path
.basename(path
) in _g_skip_dir_basenames
:
729 log
.info("Skipping '%s' (according to _g_skip_dir_basenames).", path
)
731 xpath
= '/'.join(path
.split(os
.sep
)) # use same sep as in _g_skip_dirs
732 # These could do with being a proper path canonicalisation function...
733 if xpath
[-1] == '/': xpath
= xpath
[:-1] # treat "calendar/" the same as "calendar"
734 if xpath
[0:2] == './': xpath
= xpath
[2:] # treat "./calendar" the same as "calendar"
735 for sd
in _g_skip_dirs
:
736 # Changed by gerv to make skip_dirs require whole path
738 log
.info("Skipping '%s' (according to _g_skip_dirs).", path
)
740 if _should_skip_according_to_cvsignore(path
):
741 log
.info("Skipping '%s' (according to .cvsignore).", path
)
746 def _get_license_info(filename
, show_initial
=0, quick
=0):
747 """Return license block information for the given file.
749 "filename" is the path to the file to scan.
750 "show_initial" is a boolean that indicates if initial developer info
752 "quick" is a boolean that can be set for a quick scan. In this
753 case, only the "parts" field of the return dictionary will
756 Returns a dictionary adequately describing the license block in the
757 given file for the purpose of determining whether to patch the
758 license block and how. Returns a dictionary of the following form:
759 {"parts": <list of zero or more of "mpl", "npl", "gpl", "lgpl",
760 "unknown", "block_begin", "block_end" in the
761 order in which they were found>,
762 # if necessary, the following keys are included as well
763 "begin_line": <(0-based) index at which license block starts>,
764 "end_line": <(0-based) index at which license block ends>,
765 "first_prefix": <prefix to use for new license block first line>,
766 "subsequent_prefix": <prefix to use for subsequent lines>,
767 "last_suffix": <suffix to use for last line>,
768 # The following fields are correspond to the file specific
769 # portions of the license template as described here:
770 # http://www.mozilla.org/MPL/relicensing-faq.html#new-license
771 # If the associated block is not found, then the value is None.
772 "original_code_is": ...,
773 "original_code_date": ...,
774 "initial_developer": ...,
775 "initial_copyright_date": ...,
779 precondition: should not be called on binary files
785 fin
= open(filename
, 'r')
791 # Help me find filena
792 log
.info("Next file is: %s", filename
)
794 # do quick search to see if any of the desired licenses is in here
795 # - if it looks like all the parts are there, good, done
796 # - if some but not all parts, continue
797 parts_pattern
= re
.compile("""(
798 (?P<block_begin>\*\*\*\*\*\ BEGIN\ LICENSE\ BLOCK\ \*\*\*\*\*)
799 | (?P<mpl>The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Mozilla)
800 | (?P<npl>The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Netscape)
801 | (?P<gpl>GNU\ (General\ )?Public\ License)
802 | (?P<lgpl>(Library|Lesser)\ General\ Public\ License)
803 | (?P<block_end>\*\*\*\*\*\ END\ LICENSE\ BLOCK\ \*\*\*\*\*)
806 parts
= [] # found license parts in this file
810 match
= parts_pattern
.search(content
, start
)
812 # Skip this block, if the last license block is more than 10 lines
813 # away (file is probably used for autogeneration of files then).
814 if blocks
== 1 and (match
.start()-start
) > 10:
817 parts
= match
.groupdict()
820 lic_info
["parts"].append(part
)
821 log
.info("%s license/delimeter found", part
)
823 if part
== "block_end":
829 raise RelicError("unexpected license part: %r" % parts
)
833 # no license block at all
835 # - if not, check to see if License or Copyright shows up in the
836 # file; if so, then error out; if not, skip out
837 any_lic_pattern
= re
.compile("(Copyright|Licen[sc]e)", re
.IGNORECASE
)
838 match
= any_lic_pattern
.search(content
)
840 lic_info
["parts"].append("unknown")
841 log
.info("unknown license found: %r",
842 content
[max(match
.start()-20,0):match
.end()+20])
844 log
.info("no license found")
847 # license block with non-tri-license version headers
848 elif lic_info
["parts"] == ["block_begin", "block_end"]:
849 lic_info
["parts"].append("unknown")
850 log
.info("unknown license found (license block with non-tri-license)")
853 # license block with tri-license version headers
854 elif (lic_info
["parts"] == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
855 lic_info
["parts"] == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
856 log
.info("license looks good, no changes necessary")
860 # Otherwise, the license needs to be fixed, so gather more detailed
861 # information. Here is the algorithm we will use:
862 # - find first license line
863 # - find the end of this comment block (assumption: from the first
864 # license line to the end of the comment block is the full
866 # This is a bad assumption in two cases and steps have been taken
867 # to try to deal with those cases:
868 # - There could be a trailing part bit of comment that is
869 # NOT part of the license but is part of the same comment
870 # block. A common example are the:
871 # This Original Code has been modified by IBM...
872 # files (about 130 of them in the moz tree).
873 # (c.f. test_relicense_ibm_copyright_suffix.c)
874 # - Some files have split up the license paragraphs into
875 # multiple comment blocks, e.g.
876 # "mozilla/build/unix/abs2rel.pl":
877 # # The contents of this file are subject to the
879 # # the License at http://www.mozilla.org/MPL/
881 # # The Initial Developer of the Original Code
884 # (c.f. test_relicense_separated_license_comment_blocks.pl)
885 # - these are the lines to replace
886 # - gather embedded lic data
887 # - use second line to determine line prefix
888 # ? Should we only allow processing of unknown-delimiter-files with
891 # Get comment delimiter info for this file.
892 comment_delim_sets
= _get_comment_delim_sets(filename
)
894 # - find first license line (and determine which set of comment
895 # delimiters are in use)
896 lines
= content
.splitlines()
897 for comment_delims
in comment_delim_sets
:
898 if len(comment_delims
) == 3:
899 prefix_pattern
= "%s|%s|" % (re
.escape(comment_delims
[0]),
900 re
.escape(comment_delims
[1]))
901 suffix_pattern
= "%s" % re
.escape(comment_delims
[2])
902 elif len(comment_delims
) == 2:
903 prefix_pattern
= "%s|" % re
.escape(comment_delims
[0])
904 suffix_pattern
= "%s" % re
.escape(comment_delims
[1])
905 elif len(comment_delims
) == 1:
906 prefix_pattern
= re
.escape(comment_delims
[-1])
908 else: # len(comment_delims) == 0
912 lic_begin_pattern
= re
.compile("""
915 (\*+\ BEGIN\ LICENSE\ BLOCK\ \*+
916 |\-+\ BEGIN\ LICENSE\ BLOCK\ \-+
917 | Version:\ MPL\ \d+\.\d+/GPL\ \d+\.\d+/LGPL\ \d+\.\d+
918 | The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Mozilla[\w ]*
919 | The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Netscape[\w ]*
920 | Alternatively,\ the\ contents\ of\ this\ file\ may\ be\ used\ under\ the[\w ]*)
922 """ % (prefix_pattern
, suffix_pattern
), re
.VERBOSE
)
924 for i
in range(len(lines
)):
925 match
= lic_begin_pattern
.search(lines
[i
])
930 "prefix": match
.group("prefix"),
931 "space": match
.group("space"),
932 "suffix": match
.group("suffix")
934 # Optimization: If the line before the "beginline" is simply
935 # a block comment open the include that line in parsed out
936 # license block. E.g.,
938 # - ***** BEGIN LICENSE BLOCK *****
940 if (len(comment_delims
) > 1 # only for block comments
941 and beginline
["prefix"] != comment_delims
[0]
943 and lines
[i
-1].strip() == comment_delims
[0]):
944 beginline
["linenum"] -= 1
945 beginline
["prefix"] = comment_delims
[0]
949 raise RelicError("couldn't find start line with this pattern (even "
950 "though it looks like there is a license block in "
951 "%s): %s" % (filename
, lic_begin_pattern
.pattern
))
952 log
.info("comment delimiters: %s", comment_delims
)
953 log
.debug("beginline dict: %s", beginline
)
954 lic_info
["comment_delims"] = comment_delims
955 lic_info
["begin_line"] = beginline
["linenum"]
956 lic_info
["first_prefix"] = beginline
["prefix"]
957 log
.info("prefix for first line: '%s'", beginline
["prefix"])
959 # - get second license line
960 lic_middle_pattern
= re
.compile("""
965 """ % (prefix_pattern
, suffix_pattern
),
967 # skip empty lines which might result in bogus scanning later, e.g.:
968 # mozilla/layout/html/tests/table/marvin/x_thead_align_center.xml
969 second_linenum
= beginline
["linenum"]+1
970 while second_linenum
< len(lines
):
971 if lines
[second_linenum
].strip():
973 log
.debug("skip blank 'second' line: %d", second_linenum
)
976 raise RelicError("all lines after the first license block line (%d) "
977 "were empty" % (beginline
["linenum"]+1))
978 match
= lic_middle_pattern
.search(lines
[second_linenum
])
981 "content": lines
[second_linenum
],
982 "linenum": second_linenum
,
983 "prefix": match
.group("prefix"),
984 "space": match
.group("space"),
985 "suffix": match
.group("suffix")
988 raise RelicError("didn't find second line with pattern: %s"
989 % lic_middle_pattern
.pattern
)
990 log
.debug("secondline dict: %s", secondline
)
991 lic_info
["subsequent_prefix"] = secondline
["prefix"]
992 log
.info("prefix for subsequent lines: '%s'", secondline
["prefix"])
994 # - find block comment end
995 orig_code_modified_pattern
= re
.compile("This Original Code has been "
997 non_lic_content_in_same_comment_block
= 0
998 if len(comment_delims
) == 1:
999 # line-style comments: The comment block "end" is defined as the
1000 # last line before a line NOT using the block comment delimiter.
1001 #XXX:BUG: This is not good enough for:
1002 # test/inputs/separated_license_comment_blocks.pl
1003 if comment_delims
[0] == "":
1005 "Don't know how to find the end of a line-style comment "
1006 "block when the delimiter is the empty string. (Basically "
1007 "this script cannot handle this type of file.)")
1008 for i
in range(beginline
["linenum"]+1, len(lines
)):
1009 if not lines
[i
].startswith(comment_delims
[0]):
1012 elif lines
[i
].find("END LICENSE BLOCK") != -1:
1015 # As per "test_relicense_trailing_orig_code_modified.pl", a
1016 # paragraph starting with:
1017 # This Original Code has been modified
1018 # is deemed to be OUTside the license block, i.e. it is not
1019 # replaced for relicensing.
1020 if orig_code_modified_pattern
.search(lines
[i
]):
1021 non_lic_content_in_same_comment_block
= 1
1022 # The endline is the first non-blank line before this one.
1025 line
= lines
[endlinenum
]
1026 match
= lic_middle_pattern
.search(line
)
1028 raise RelicError("Line did not match lic_middle_pattern "
1029 "unexpectedly: %r" % line
)
1030 if match
.group("content").strip(): # non-empty line
1035 raise RelicError("Could not find license comment block end "
1036 "line in '%s'." % filename
)
1037 elif len(comment_delims
) >= 2: # block-style comments
1038 for i
in range(beginline
["linenum"]+1, len(lines
)):
1039 if lines
[i
].find(comment_delims
[-1]) != -1:
1042 elif lines
[i
].find("END LICENSE BLOCK") != -1:
1044 non_lic_content_in_same_comment_block
= 1
1046 # As per "test_relicense_ibm_copyright_suffix.c", a
1047 # paragraph starting with:
1048 # This Original Code has been modified
1049 # is deemed to be OUTside the license block, i.e. it is not
1050 # replaced for relicensing.
1051 if orig_code_modified_pattern
.search(lines
[i
]):
1052 non_lic_content_in_same_comment_block
= 1
1053 # The endline is the first non-blank line before this one.
1056 line
= lines
[endlinenum
]
1057 match
= lic_middle_pattern
.search(line
)
1059 raise RelicError("Line did not match lic_middle_pattern "
1060 "unexpectedly: %r" % line
)
1061 if match
.group("content").strip(): # non-empty line
1066 raise RelicError("Could not find license comment block end "
1067 "line in '%s'." % filename
)
1068 if not non_lic_content_in_same_comment_block\
1069 and not lines
[endlinenum
].strip().endswith(comment_delims
[-1]):
1071 "There is text AFTER the license block comment end "
1072 "delimiter, but on the SAME LINE. This is unexpected. "
1073 "Bailing.\n%s:%s:%r"
1074 % (filename
, endlinenum
, lines
[endlinenum
]))
1075 else: # len(comment_delims) == 0
1076 # For files without a comment character to help out, we ONLY
1077 # successfully break one the full correct "END LICENSE BLOCK"
1079 for i
in range(beginline
["linenum"]+1, len(lines
)):
1080 if lines
[i
].find("END LICENSE BLOCK") != -1:
1083 elif i
> beginline
["linenum"]+1+50:
1084 raise RelicError("Haven't found 'END LICENSE BLOCK' marker "
1085 "within 50 lines of the start of the "
1086 "license block on line %d. Aborting."
1087 % (beginline
["linenum"]+1))
1088 # As per "test_relicense_trailing_orig_code_modified.pl", a
1089 # paragraph starting with:
1090 # This Original Code has been modified
1091 # is deemed to be OUTside the license block, i.e. it is not
1092 # replaced for relicensing.
1093 if orig_code_modified_pattern
.search(lines
[i
]):
1094 non_lic_content_in_same_comment_block
= 1
1095 # The endline is the first non-blank line before this one.
1098 line
= lines
[endlinenum
]
1099 match
= lic_middle_pattern
.search(line
)
1101 raise RelicError("Line did not match lic_middle_pattern "
1102 "unexpectedly: %r" % line
)
1103 if match
.group("content").strip(): # non-empty line
1108 raise RelicError("Could not find license comment block end "
1109 "line in '%s'." % filename
)
1111 # Test case: test_relicense_separated_license_comment_blocks.pl
1112 # It is possible that a separate comment block immediately following
1113 # the license block we just parsed should be included in the license
1115 if (not non_lic_content_in_same_comment_block
1116 and len(comment_delims
) == 1): # only do this for line-style comments
1118 re
.compile("^The content of this file are subject to", re
.I
),
1119 re
.compile("^Software distributed under the License", re
.I
),
1120 re
.compile("^The Original Code is", re
.I
),
1121 re
.compile("^The Initial Developer", re
.I
),
1122 re
.compile("^Contributor", re
.I
),
1123 re
.compile("^Alternatively, the content of this file", re
.I
),
1125 comment_line_pattern
= re
.compile("""
1129 (?P<suffix>%s|)\s*?$
1130 """ % (prefix_pattern
, suffix_pattern
),
1133 while i
+1 < len(lines
):
1134 i
+= 1; line
= lines
[i
]
1135 comment_index
= line
.find(comment_delims
[0])
1136 if comment_index
!= -1:
1137 content
= line
[:comment_index
].strip()
1138 comment
= line
[comment_index
+len(comment_delims
[0]):].strip()
1140 content
= line
.strip()
1142 if content
: # if non-comment content, then skip out
1146 for indicator
in lic_indicators
:
1147 if indicator
.search(comment
):
1148 # include this paragraph in the lic block
1149 while i
< len(lines
):
1150 i
+= 1; line
= lines
[i
]
1151 if not line
.strip().startswith(comment_delims
[0]):
1153 if not line
.strip()[len(comment_delims
[0]):]:
1158 break # this is a non-lic-related comment
1160 # Get the end-line data.
1161 if non_lic_content_in_same_comment_block
:
1162 lic_end_pattern
= re
.compile(
1163 "^(?P<prefix>%s)(?P<space>\s*).*?\s*?$"
1166 lic_end_pattern
= re
.compile(
1167 "^(?P<prefix>%s)(?P<space>\s*).*?(?P<suffix>%s)\s*?$"
1168 % (prefix_pattern
, suffix_pattern
))
1169 match
= lic_end_pattern
.match(lines
[endlinenum
])
1172 "content": lines
[endlinenum
],
1173 "linenum": endlinenum
,
1174 "prefix": match
.group("prefix"),
1175 "space": match
.group("space"),
1176 "suffix": match
.groupdict().get("suffix", ""),
1179 raise RelicError("license block end line did not match: line='%s', "
1181 % (lines
[endlinenum
], lic_end_pattern
.pattern
))
1182 log
.debug("endline dict: %s", endline
)
1183 lic_info
["last_suffix"] = endline
["suffix"]
1184 log
.info("suffix for last line: '%s'", endline
["suffix"])
1185 lic_info
["end_line"] = endline
["linenum"]
1186 log
.info("license lines: %d-%d", beginline
["linenum"], endline
["linenum"])
1188 # So at this point we have the beginline, secondline, and endline
1189 # dicts describing and bounding the license block.
1191 # - gather embedded lic data
1192 # As described here:
1193 # http://www.mozilla.org/MPL/relicensing-faq.html#new-license
1194 # we have to parse out the following possible fields:
1196 # original_code_date
1198 # initial_copyright_date
1200 lic_line_pattern
= re
.compile( # regex to parse out the line _body_
1201 "^(?P<prefix>%s)(?P<space>\s*)(?P<body>.*?)(?P<suffix>%s|)\s*?$"
1202 % (prefix_pattern
, suffix_pattern
))
1204 original_code_is
= None
1205 original_code_date
= None
1206 # Parse out the "The Original Code is ..." paragraph _content_.
1209 for i
in range(beginline
["linenum"], endline
["linenum"]+1):
1210 body
= lic_line_pattern
.match(lines
[i
]).group("body")
1211 if (not in_paragraph
and body
.startswith("The Original Code is")):
1214 if not body
.strip(): # i.e. a blank line, end of paragraph
1216 # ensure one space btwn lines
1217 if paragraph
: paragraph
= paragraph
.rstrip() + " "
1220 pattern1
= re
.compile('^The Original Code is (.*), released (.*)\.')
1221 match
= pattern1
.search(paragraph
)
1223 original_code_is
= match
.group(1)
1224 original_code_date
= match
.group(2)
1226 pattern2
= re
.compile('^The Original Code is (.*?)\.?$')
1227 match
= pattern2
.search(paragraph
)
1229 original_code_is
= match
.group(1)
1232 "%s: 'The Original Code is' paragraph did not match the "
1233 "expected patterns. paragraph=\n\t%r\n"
1234 "pattern1=\n\t%r\npattern2=\n\t%r"
1235 % (filename
, paragraph
, pattern1
.pattern
, pattern2
.pattern
))
1236 lic_info
["original_code_is"] = original_code_is
1237 lic_info
["original_code_date"] = original_code_date
1238 log
.info("original code is: %s", original_code_is
)
1239 log
.info("original_code_date: %s", original_code_date
)
1241 initial_developer
= None
1242 initial_copyright_date
= None
1243 # Parse out the "The Initial Developer..." paragraph _content_.
1246 for i
in range(beginline
["linenum"], endline
["linenum"]+1):
1247 body
= lic_line_pattern
.match(lines
[i
]).group("body")
1248 if (not in_paragraph
and
1249 (body
.startswith("The Initial Developer of") or
1250 body
.startswith("The Initial Developers of"))):
1253 if not body
.strip(): # i.e. a blank line, end of paragraph
1254 # Catch the possible case where there is an empty line
1255 # but the paragraph picks up on the next line with
1256 # "Portions created by"
1257 # (test_relicense_no_period_after_origcodeis.cpp).
1259 nextlinebody
= lic_line_pattern
.match(lines
[i
+1]).group("body")
1262 if not nextlinebody
.startswith("Portions created by"):
1264 # ensure one space btwn lines
1265 if paragraph
: paragraph
= paragraph
.rstrip() + " "
1268 pattern
= re
.compile("""^
1269 The\ Initial\ Developers?\ of\
1270 (the\ Original\ Code\ (is\ |are\ |is\.)|this\ code\ under\ the\ [MN]PL\ (is|are)\ )
1272 \.? # maybe a trailing period
1274 \s+Portions\ created\ by\ .*?
1275 are\ Copyright\ \(C\)\[?\ (?P<date>[\d-]+)
1276 .*? # maybe a trailing period
1277 (\s+All\ Rights\ Reserved\.)?
1280 match
= pattern
.search(paragraph
)
1283 "%s: 'This Initial Developer' paragraph did not match the "
1284 "expected pattern. paragraph=\n\t%r\npattern=\n\t%s"
1285 % (filename
, paragraph
, pattern
.pattern
))
1286 initial_developer
= match
.group("developer")
1287 initial_copyright_date
= match
.group("date")
1288 lic_info
["initial_developer"] = initial_developer
1289 lic_info
["initial_copyright_date"] = initial_copyright_date
1290 log
.info("initial developer paragraph: %r", paragraph
)
1291 log
.info("initial developer: %r", initial_developer
)
1292 log
.info("initial copyright date: %r", initial_copyright_date
)
1295 normal_leading_space
= None
1296 in_contributors_block
= 0
1297 contrib_end
= endline
["linenum"]
1298 # If line-style comment, include the last line in the block in the
1299 # range we examine; if block-style comment, we only allow it if the
1300 # comment-block doesn't end on the endline. On top of these
1301 # conditions we don't search the last line if it includes the
1302 # special end-of-license marker.
1303 if len(comment_delims
) == 1 or not endline
["suffix"]:
1304 if endline
["content"].find("END LICENSE BLOCK") == -1:
1306 for i
in range(beginline
["linenum"], contrib_end
):
1307 match
= lic_line_pattern
.match(lines
[i
])
1308 body
= match
.group("body")
1309 space
= match
.group("space").replace('\t', ' '*8)
1310 if not in_contributors_block \
1311 and body
.startswith("Contributor"):
1312 in_contributors_block
= 1
1313 normal_leading_space
= space
1314 # Try to pickup "foo@bar.org" as a contributor for a
1315 # possible line like this:
1316 # Contributor(s): foo@bar.org
1317 pivot
= body
.find(':')
1319 remainder
= body
[pivot
+1:].strip()
1321 contributors
.append(remainder
)
1322 elif in_contributors_block
:
1323 if not body
.strip():
1324 # i.e. a blank line, end of paragraph
1325 #XXX:BUG This condition causes the latter two
1326 # contributor lines to be lost from, e.g.,
1327 # test/x_thead_align_center.xml.
1329 if len(space
) <= len(normal_leading_space
):
1330 # A line in the "Contributor(s) paragraph is not
1331 # indented. This is considered an error. Likely this is
1332 # a (not indented) contributor, but it might also be the
1333 # start of another paragraph (i.e. no blank line
1334 # terminating the "Contributor(s):" paragraph). We could
1335 # just error out here, but this is very common in the
1336 # Moz tree (~500) so lets try to deal with it.
1337 # - Heuristic #1: if the line contains what looks like
1338 # an email address then this it is a contributor.
1339 # - Heuristic #2 (to accomodate js/rhino): if the line
1340 # looks like just a person's name.
1341 # Otherwise, error out.
1342 words
= body
.split()
1344 lic_info
["unindented_contributor_lines"] = 1
1345 elif (2 <= len(words
) <= 3 and
1346 words
== [word
[0].upper()+word
[1:] for word
in words
]):
1347 # Try to accept the following names:
1351 lic_info
["unindented_contributor_lines"] = 1
1353 raise RelicError("This line is part of the "
1354 "'Contributor(s):' paragraph but (1) is not indented "
1355 "and (2) does not look like it contains an email "
1356 "address: %s:%s: %r" % (filename
, i
, lines
[i
]))
1357 contributors
.append(body
.strip())
1358 log
.info("contributors: %s", contributors
)
1359 lic_info
["contributors"] = contributors
1361 ## Optimization: The only content in the remain license block lines
1362 ## (i.e. after the contributors block) should really be the GPL/LGPL
1363 ## or nothing. Trapping this will avoid losing the latter two
1364 ## contributor lines in test/x_thead_align_center.xml.
1365 #gpl_lgpl_lines = _g_trilicense_parts["gpl/lgpl"].splitlines(0)
1366 #gpl_lgpl = " ".join(gpl_lgpl_lines)
1367 #for i in range(i, endline["linenum"]):
1368 # match = lic_line_pattern.match(lines[i])
1369 # body = match.group("body")
1370 # space = match.group("space").replace('\t', ' '*8)
1371 # if not body.strip():
1373 # #XXX This test is no robust enough to use.
1374 # if (gpl_lgpl.find(body) == -1 and
1375 # body.find(gpl_lgpl) == -1):
1376 # print "QQQ: bogus following text: %r" % body
1381 def _report_on_file(path
, (results
, switch_to_mpl
, show_initial
, quick
, _errors
)):
1382 log
.debug("_report_on_file(path='%s', results)", path
)
1383 output
= path
+ "\n"
1386 if _is_binary(path
):
1387 output
+= "... binary, skipping this file\n"
1390 lic_info
= _get_license_info(path
, show_initial
, quick
)
1391 except RelicError
, ex
:
1392 return _relicensing_error(ex
, path
, _errors
)
1394 if log
.isEnabledFor(logging
.DEBUG
):
1395 pprint
.pprint(lic_info
)
1396 parts
= lic_info
["parts"]
1398 output
+= "... no license found\n"
1399 elif "unknown" in parts
:
1400 output
+= "... unknown license (possibly) found\n"
1401 elif ((parts
== ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
1402 parts
== ["block_begin", "npl", "gpl", "lgpl", "block_end"]) and
1403 not lic_info
.get("unindented_contributor_lines")):
1404 if (switch_to_mpl
and
1405 parts
== ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
1406 output
+= "... %s found (looks complete, but is not MPL)"\
1407 % "/".join(parts
) + "\n"
1409 output
+= "... %s found (looks complete)"\
1410 % "/".join(parts
) + "\n"
1412 output
+= "... %s found" % "/".join(parts
) + "\n"
1415 if "begin_line" in lic_info
and "end_line" in lic_info
:
1416 output
+= "... license block lines: %(begin_line)d-%(end_line)d"\
1418 if "original_code_is" in lic_info
:
1419 output
+= "... original code is: %(original_code_is)s"\
1421 if "original_code_date" in lic_info
:
1422 output
+= "... original code date: %(original_code_date)s"\
1424 if "initial_developer" in lic_info
:
1425 output
+= "... initial developer: %(initial_developer)s"\
1427 if "initial_copyright_date" in lic_info
:
1428 output
+= "... initial copyright date: %(initial_copyright_date)s"\
1430 if "contributors" in lic_info
:
1431 output
+= "... contributors: %s"\
1432 % ", ".join(lic_info
["contributors"]) + "\n"
1433 if lic_info
.get("unindented_contributor_lines"):
1434 output
+= "... one or more contributor lines were not indented properly"\
1438 if "initial_developer" in lic_info
:
1439 print lic_info
["initial_developer"]
1443 def _gather_info_on_file(path
, (results
, _errors
)):
1444 log
.debug("_gather_info_on_file(path='%s', results)", path
)
1445 # Skip binary files.
1447 if _is_binary(path
):
1448 log
.debug("Skipping binary file '%s'.", path
)
1450 except Exception, ex
:
1451 return _relicensing_error(
1452 "error determining if file is binary: %s" % ex
,
1456 results
[path
] = _get_license_info(path
)
1457 except RelicError
, ex
:
1458 return _relicensing_error(ex
, path
, _errors
, 1)
1461 def _make_backup_path(path
):
1462 for n
in range(100):
1463 backup_path
= "%s~%d" % (path
, n
)
1464 if not os
.path
.exists(backup_path
):
1466 raise RelicError("Could not find an unused backup path for '%s'." % path
)
1469 def _relicensing_error(err
, path
, cache
=None, quiet
=0):
1470 """Handle an error during relicensing.
1472 "err" may be an error string or an exception instance.
1473 "path" is the path of the file on which this error occured.
1474 "cache" is a mapping of path to errors on which errors may be
1475 stored for later reporting.
1476 "quiet" optionally allows one to silence the stdout output when
1479 If the --force option is in-effect then errors may be remembered and
1480 processing continues, rather than halting the whole process.
1485 if cache
is not None:
1487 elif isinstance(err
, Exception):
1490 raise RelicError("%s: %s" % (path
, err
))
1493 def _get_comment_delim_sets(filename
):
1494 comment_delims
= None
1495 if os
.path
.splitext(filename
)[1] == ".in":
1496 # "<foo>.in" is generally a precursor for a filetype
1497 # identifiable without the ".in". Drop it.
1498 xfilename
= os
.path
.splitext(filename
)[0]
1500 xfilename
= filename
1501 # special cases for some basenames
1502 basename
= os
.path
.basename(xfilename
)
1504 comment_delims
= _g_basename_to_comment_info
[basename
]
1507 if not comment_delims
: # use the file extension
1508 ext
= os
.path
.splitext(xfilename
)[1]
1510 comment_delims
= _g_ext_to_comment_info
[ext
]
1513 if not comment_delims
: # try to use the shebang line, if any
1514 fin
= open(filename
, 'r')
1515 firstline
= fin
.readline()
1517 if firstline
.startswith("#!"):
1518 for pattern
, cds
in _g_shebang_pattern_to_comment_info
:
1519 if pattern
.match(firstline
):
1520 comment_delims
= cds
1522 if not comment_delims
:
1523 raise RelicError("%s: couldn't determine file type (and "
1524 "comment delimiter info) from basename '%s' or "
1525 "extension '%s'): you may need to add to "
1526 "'_g_basename_to_comment_info', "
1527 "'_g_ext_to_comment_info', "
1528 "'_g_shebang_pattern_to_comment_info' "
1529 "or one of the '_g_skip_*' globals"
1530 % (filename
, basename
, ext
))
1531 return comment_delims
1534 def _relicense_file(original_path
,
1535 (fallback_initial_copyright_date
,
1536 fallback_initial_developer
,
1537 fallback_original_code_is
,
1538 fallback_original_code_date
,
1544 """Relicense the given file.
1546 "original_path" is the file to relicense
1547 "fallback_initial_copyright_date"
1548 "fallback_initial_developer"
1549 "fallback_original_code_is"
1550 "fallback_original_code_date"
1551 User-specified fallback values to use for the new license
1552 block if they cannot be found in the original.
1553 "switch_to_mpl" is a boolean indicating if an NPL-based license
1554 should be converted to MPL.
1555 "backup" (optional, default false) is a boolean indicating if
1556 backups should be made
1557 "results" is a dictionary in which to store statistics and errors.
1558 See relicense() for schema.
1559 "force_relicensing" is a boolean indicating if relicensing
1560 should be done even if the license block looks complete.
1561 "_errors" is a dictionary on which errors are reported
1562 (keyed by file path) when the force option is in effect.
1564 The function does not return anything.
1566 log
.debug("_relicense_file(original_path='%s')", original_path
)
1569 # Ensure can access file.
1570 if not os
.access(original_path
, os
.R_OK|os
.W_OK
):
1571 return _relicensing_error("cannot access", original_path
, _errors
)
1573 log
.info("have read/write access")
1575 # Skip binary files.
1577 if _is_binary(original_path
):
1578 print "... binary, skipping this file"
1579 results
["binary"] += 1
1581 except Exception, ex
:
1582 return _relicensing_error(
1583 "error determining if file is binary: %s" % ex
,
1584 original_path
, _errors
)
1587 lic_info
= _get_license_info(original_path
, 0)
1588 except RelicError
, ex
:
1589 return _relicensing_error(ex
, original_path
, _errors
)
1591 # Load fallback info if necessary.
1592 if not lic_info
.get("initial_copyright_date"):
1593 lic_info
["initial_copyright_date"] = fallback_initial_copyright_date
1594 if not lic_info
.get("initial_developer"):
1595 lic_info
["initial_developer"] = fallback_initial_developer
1596 if not lic_info
.get("original_code_is"):
1597 lic_info
["original_code_is"] = fallback_original_code_is
1598 if not lic_info
.get("original_code_date"):
1599 lic_info
["original_code_date"] = fallback_original_code_date
1601 # Return/abort if cannot or do not need to re-license.
1602 parts
= lic_info
["parts"]
1604 results
["no license"] += 1
1605 print "... no license found, skipping this file"
1607 elif "unknown" in parts
:
1608 return _relicensing_error("unknown license (possibly) found",
1609 original_path
, _errors
)
1610 elif parts
.count("block_begin") > 1: # sanity check
1611 return _relicensing_error(
1612 "'BEGIN LICENSE BLOCK' delimiter found more than once",
1613 original_path
, _errors
)
1614 elif parts
.count("block_end") > 1: # sanity check
1615 return _relicensing_error(
1616 "'END LICENSE BLOCK' delimiter found more than once",
1617 original_path
, _errors
)
1618 elif not lic_info
["initial_developer"]:
1619 return _relicensing_error(
1620 "no 'Initial Developer' section was found -- use "
1621 "the -i option to specify your own",
1622 original_path
, _errors
)
1623 elif not lic_info
["initial_copyright_date"]:
1624 return _relicensing_error(
1625 "no initial copyright year was found -- use "
1626 "the -y option to specify your own",
1627 original_path
, _errors
)
1628 elif not lic_info
["original_code_is"]:
1629 return _relicensing_error(
1630 "no 'Original Code is' section was found -- use "
1631 "the -o option to specify your own",
1632 original_path
, _errors
)
1633 elif ((parts
== ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
1634 parts
== ["block_begin", "npl", "gpl", "lgpl", "block_end"]) and
1635 not lic_info
.get("unindented_contributor_lines")):
1636 #XXX Should add an option to relicense anyway because matching
1637 # is not super-strict. E.g. nsWidgetFactory.cpp.
1638 if (switch_to_mpl
and
1639 parts
== ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
1640 print "... %s found (looks complete, but is not MPL)"\
1642 elif force_relicensing
:
1643 print "... %s found (looks complete, but forcing relicensing)"\
1646 results
["good"] += 1
1647 print "... %s found (looks complete), nothing to do"\
1651 # We need to re-license this file.
1652 print "... %s found, need to relicense" % "/".join(parts
)
1653 if lic_info
["original_code_is"]:
1654 print "... original code is: %(original_code_is)s" % lic_info
1655 if lic_info
["original_code_date"]:
1656 print "... original code date: %(original_code_date)s" % lic_info
1657 if lic_info
["initial_developer"]:
1658 print "... initial developer: %(initial_developer)s" % lic_info
1659 if lic_info
["initial_copyright_date"]:
1660 print "... initial copyright date: %(initial_copyright_date)s" % lic_info
1661 if lic_info
["contributors"]:
1662 print "... contributors: %s" % ", ".join(lic_info
["contributors"])
1664 # Put the license block together.
1665 # - build up the license block from the appropriate parts
1667 if (not switch_to_mpl
) and ( "npl" in parts
):
1668 trilicense_name
= "NPL/GPL/LGPL"
1669 trilicense
+= _g_trilicense_parts
["npl"]
1671 trilicense_name
= "MPL/GPL/LGPL"
1672 trilicense
+= _g_trilicense_parts
["mpl"]
1673 print "... replacing lines %d-%d with %s tri-license"\
1674 % (lic_info
["begin_line"], lic_info
["end_line"], trilicense_name
)
1675 if lic_info
["original_code_is"] is not None:
1676 if lic_info
["original_code_date"] is not None:
1677 trilicense
+= _g_trilicense_parts
["original_code_is_with_date"] % lic_info
1679 trilicense
+= _g_trilicense_parts
["original_code_is"] % lic_info
1681 # raise RelicError("Gerv, how should the new license block handle no "
1682 # "'Originial Code is...' information? --TM")
1683 if (lic_info
["initial_developer"] is not None
1684 and lic_info
["initial_copyright_date"] is not None):
1685 trilicense
+= _g_trilicense_parts
["initial_developer"] % lic_info
1687 # raise RelicError("Gerv, how should the new license block handle no "
1688 # "'Initial Developer is...' information? --TM")
1689 if lic_info
["contributors"]:
1690 contributors
= " " + "\n ".join(lic_info
["contributors"]) + "\n"
1693 trilicense
+= _g_trilicense_parts
["contributors"] % contributors
1694 if trilicense_name
== "NPL/GPL/LGPL":
1695 trilicense
+= _g_trilicense_parts
["gpl/lgpl for npl"]
1696 else: # trilicense_name == "MPL/GPL/LGPL"
1697 trilicense
+= _g_trilicense_parts
["gpl/lgpl for mpl"]
1699 # get fallback comment subsequent prefix
1700 fallback_prefix
= _get_comment_delim_sets(original_path
)
1702 # - add the comment delimiters
1703 lines
= trilicense
.splitlines()
1704 for i
in range(len(lines
)):
1706 prefix
= lic_info
["first_prefix"]
1708 if lic_info
["subsequent_prefix"]:
1709 prefix
= lic_info
["subsequent_prefix"]
1711 prefix
= fallback_prefix
[0][1]
1713 if len(lic_info
["comment_delims"]) == 0:
1714 lines
[i
] = prefix
+ lines
[i
]
1716 lines
[i
] = prefix
+ ' ' + lines
[i
]
1717 else: # don't add trailing whitespace
1719 if lic_info
["last_suffix"]: # don't add that ' ' if there is no suffix
1720 lines
[-1] += ' ' + lic_info
["last_suffix"]
1721 for i
in range(len(lines
)): lines
[i
] += '\n'
1722 trilicense_lines
= lines
1724 ##### uncomment to debug license block
1725 # pprint.pprint(lines)
1728 # Skip out now if doing a dry-run.
1730 results
["relicensed"] += 1
1735 backup_path
= _make_backup_path(original_path
)
1736 print "... backing up to '%s'" % backup_path
1738 shutil
.copy(original_path
, backup_path
)
1739 except EnvironmentError, ex
:
1740 return _relicensing_error(ex
, original_path
, _errors
)
1742 # Re-license the file.
1744 fin
= open(original_path
, "r")
1746 lines
= fin
.readlines()
1750 lines
[lic_info
["begin_line"]:lic_info
["end_line"]+1] = trilicense_lines
1752 fout
= open(original_path
, "w")
1754 fout
.write(''.join(lines
))
1757 results
["relicensed"] += 1
1758 print "... done relicensing '%s'" % original_path
1761 print "... error relicensing, restoring original"
1762 if os
.path
.exists(original_path
):
1763 os
.remove(original_path
)
1764 os
.rename(backup_path
, original_path
)
1766 print "... error relicensing, file may be corrupted"
1767 # fallback to type_ for string exceptions
1768 type_
, value
, tb
= sys
.exc_info()
1769 return _relicensing_error(value
or type_
,
1770 original_path
, _errors
)
1773 def _add_license_to_file(original_path
,
1774 (initial_copyright_date
,
1781 """Relicense the given file.
1783 "original_path" is the file to relicense
1784 "initial_copyright_date"
1787 "original_code_date"
1788 User-specified values to use for the new license. All but
1789 "original_code_date" are required.
1790 "backup" (optional, default false) is a boolean indicating if
1791 backups should be made
1792 "results" is a dictionary in which to store statistics and errors.
1793 See relicense() for schema.
1794 "_errors" is a dictionary on which errors are reported
1795 (keyed by file path) when the force option is in effect.
1797 The function does not return anything.
1799 log
.debug("_add_license_to_file(original_path='%s')", original_path
)
1802 # Ensure can access file.
1803 if not os
.access(original_path
, os
.R_OK|os
.W_OK
):
1804 return _relicensing_error("cannot access", original_path
, _errors
)
1806 log
.info("have read/write access")
1808 # Skip binary files.
1810 if _is_binary(original_path
):
1811 print "... binary, skipping this file"
1812 results
["binary"] += 1
1814 except Exception, ex
:
1815 return _relicensing_error(
1816 "error determining if file is binary: %s" % ex
,
1817 original_path
, _errors
)
1820 lic_info
= _get_license_info(original_path
, 0)
1821 except RelicError
, ex
:
1822 return _relicensing_error(ex
, original_path
, _errors
)
1824 # Return/abort if cannot or do not need to re-license.
1825 parts
= lic_info
["parts"]
1826 if lic_info
["parts"]: # has a license
1827 results
["license"] += 1
1828 print "... license found, skipping this file"
1830 #... else we need to add a license to this file.
1831 print "... no license found, need to add one"
1833 # Load license info.
1834 lic_info
["initial_developer"] = initial_developer
1835 print "... initial developer: %(initial_developer)s" % lic_info
1836 lic_info
["initial_copyright_date"] = initial_copyright_date
1837 print "... initial copyright date: %(initial_copyright_date)s" % lic_info
1838 lic_info
["original_code_is"] = original_code_is
1839 print "... original code is: %(original_code_is)s" % lic_info
1840 if original_code_date
:
1841 lic_info
["original_code_date"] = original_code_date
1842 print "... original code date: %(original_code_date)s" % lic_info
1844 lic_info
["original_code_date"] = None
1846 # Determine what line we can start putting the license block on.
1847 # Typically this would be line 0, but for the following exceptions:
1848 # - Shebang (#!) lines
1849 # - Emacs local variables line:
1850 # /* -*- Mode: C++; ... -*- */
1851 # This line does not HAVE to be first, but that seems to be a
1852 # trend, so might as well honour it.
1853 # - XML magic "number": <?xml version="2.0" ... ?>
1854 # where "..." might include newlines
1857 comment_delim_sets
= _get_comment_delim_sets(original_path
)
1858 except RelicError
, ex
:
1859 return _relicensing_error(ex
, original_path
, _errors
, 1)
1860 fin
= open(original_path
, 'r')
1862 lines
= fin
.readlines()
1865 # If this is an XML file, advance past the magic number tag.
1866 if lines
and lines
[0].find("<?xml") != -1:
1868 if (line
.find('encoding="utf-8"') != -1
1869 and line
.startswith("\xef\xbb\xbf")):
1871 # Note: this is hardly robust Unicode XML handling :)
1873 if line
.startswith("<?xml"):
1874 end_index
= lines
[startline
].find("?>")
1875 while startline
< len(lines
):
1877 if end_index
!= -1: # found end of tag
1879 # Note: this does not catch something like this:
1880 # <?xml version="2.0"?> <?stylesheet ...
1882 # but that is just crazy.
1883 # else, advance past a possible shebang line.
1885 for comment_delims
in comment_delim_sets
:
1886 if (len(comment_delims
) == 1 and comment_delims
[0] == "#"
1887 and lines
[0].startswith("#!")):
1889 # Advance past an Emacs local variable line.
1890 comment_delims
= None
1891 if lines
[startline
].find("-*-") != -1:
1892 for comment_delims
in comment_delim_sets
:
1893 if lines
[startline
].find(comment_delims
[0]) != -1:
1896 # We were hoping to be able to determine which of the set of
1897 # possible commenting styles was in use by finding the
1898 # comment start token on the same line as the -*-
1899 # Emacs-modeline signifier, but could not. This likely means
1900 # that this file uses a block-style comment but the block
1901 # doesn't start on the same line. Fallback to the
1902 # block-style comment delimiter set, if there is one.
1903 for comment_delims
in comment_delim_sets
:
1904 if len(comment_delims
) == 3:
1907 comment_delims
= comment_delim_sets
[0]
1909 if len(comment_delims
) == 1: # line-style comments
1911 else: # block-style comments
1913 while startline
< len(lines
):
1914 line
= lines
[startline
]
1916 while linepos
< len(line
):
1918 i
= line
.find(comment_delims
[0], linepos
)
1925 i
= line
.find(comment_delims
[-1], linepos
)
1935 # Put the license block together.
1936 # - build up the license block from the appropriate parts
1937 trilicense_name
= "MPL/GPL/LGPL"
1938 print "... adding %s tri-license starting at line %s (zero-based)"\
1939 % (trilicense_name
, startline
)
1940 trilicense
= _g_trilicense_parts
["mpl"]
1941 if lic_info
["original_code_date"] is not None:
1942 trilicense
+= _g_trilicense_parts
["original_code_is_with_date"] % lic_info
1944 trilicense
+= _g_trilicense_parts
["original_code_is"] % lic_info
1945 trilicense
+= _g_trilicense_parts
["initial_developer"] % lic_info
1946 if lic_info
.get("contributors"):
1947 contributors
= " " + "\n ".join(lic_info
["contributors"]) + "\n"
1950 trilicense
+= _g_trilicense_parts
["contributors"] % contributors
1951 trilicense
+= _g_trilicense_parts
["gpl/lgpl for mpl"]
1952 # - add the comment delimiters
1953 if comment_delims
is None:
1954 for comment_delims
in comment_delim_sets
:
1955 if lines
[startline
].find(comment_delims
[0]) != -1:
1957 elif len(comment_delims
) == 3 and lines
[startline
].find(comment_delims
[1]) != -1:
1960 # We were hoping to be able to determine which of the set of
1961 # possible commenting styles was in use by finding the
1962 # comment start token on the same line as the -*-
1963 # Emacs-modeline signifier, but could not. This likely means
1964 # that this file uses a block-style comment but the block
1965 # doesn't start on the same line. Fallback to the
1966 # block-style comment delimiter set, if there is one.
1967 for comment_delims
in comment_delim_sets
:
1968 if len(comment_delims
) == 3:
1971 comment_delims
= comment_delim_sets
[0]
1972 print "comment delims were none: %r" % comment_delims
1973 t_lines
= trilicense
.splitlines()
1974 if len(comment_delims
) == 1: # line-style comments
1975 for i
in range(len(t_lines
)):
1977 t_lines
[i
] = comment_delims
[0] + ' ' + t_lines
[i
]
1978 else: # don't add trailing whitespace
1979 t_lines
[i
] = comment_delims
[0]
1980 else: # block-style comments
1982 t_lines
[0] = comment_delims
[0] + ' ' + t_lines
[0]
1983 else: # don't add trailing whitespace
1984 t_lines
[0] = comment_delims
[0]
1985 for i
in range(1, len(t_lines
)):
1987 t_lines
[i
] = comment_delims
[1] + ' ' + t_lines
[i
]
1988 else: # don't add trailing whitespace
1989 t_lines
[i
] = comment_delims
[1]
1990 t_lines
[-1] += ' ' + comment_delims
[-1]
1991 for i
in range(len(t_lines
)): t_lines
[i
] += '\n'
1992 t_lines
[-1] += '\n' # add a blank line at end of lic block
1993 trilicense_lines
= t_lines
1994 #pprint.pprint(t_lines)
1996 # Skip out now if doing a dry-run.
1998 results
["added"] += 1
2003 backup_path
= _make_backup_path(original_path
)
2004 print "... backing up to '%s'" % backup_path
2006 shutil
.copy(original_path
, backup_path
)
2007 except EnvironmentError, ex
:
2008 return _relicensing_error(ex
, original_path
, _errors
)
2010 # Add the license to the file.
2012 lines
[startline
:startline
] = trilicense_lines
2014 fout
= open(original_path
, "w")
2016 fout
.write(''.join(lines
))
2019 results
["added"] += 1
2020 print "... done adding license to '%s'" % original_path
2023 print "... error adding license, restoring original"
2024 if os
.path
.exists(original_path
):
2025 os
.remove(original_path
)
2026 os
.rename(backup_path
, original_path
)
2028 print "... error adding license, file may be corrupted"
2029 # fallback to type_ for string exceptions
2030 type_
, value
, tb
= sys
.exc_info()
2031 return _relicensing_error(value
or type_
,
2032 original_path
, _errors
)
2035 def _traverse_dir((file_handler
, results
), dirname
, names
):
2036 """os.path.walk target to traverse the give dir"""
2037 log
.debug("_traverse_dir((file_handler, results), dirname='%s', "
2038 "names=%s)", dirname
, names
)
2039 # Iterate over names backwards because may modify it in-place.
2040 # Modifying it in-place ensures that removed entries are not
2041 # traversed by os.path.walk.
2042 for i
in range(len(names
)-1, -1, -1):
2043 path
= os
.path
.join(dirname
, names
[i
])
2044 if os
.path
.isdir(path
):
2045 if _should_skip_dir(path
):
2048 if os
.path
.isfile(path
):
2049 if _should_skip_file(path
):
2052 if file_handler
is not None:
2053 file_handler(path
, results
)
2055 def _traverse(paths
, file_handler
, arg
):
2056 """Traverse the given path(s) and call the given callback for each.
2058 "paths" is either a list of files or directories, or it is an
2059 input stream with a path on each line.
2060 "file_handler" is a callable to be called on each file traversed.
2061 It is called with the following signature:
2062 file_handler(path, arg)
2063 "arg" is some object passed to each callback. This is useful for
2066 This method takes care of skipping files and directories that should
2067 be skipped according to .cvsignore files and the configured skip
2068 paths. This method does not return anything.
2070 log
.debug("_traverse(paths=%s, file_handler=%s, arg=%s)",
2071 paths
, file_handler
, arg
)
2074 if path
[-1] == "\n": path
= path
[:-1] # chomp if 'paths' is a stream
2075 if not os
.path
.exists(path
):
2076 log
.warn("'%s' does not exist, skipping", path
)
2077 elif os
.path
.isfile(path
):
2078 if _should_skip_file(path
):
2080 if file_handler
is not None:
2081 file_handler(path
, arg
)
2082 elif os
.path
.isdir(path
):
2083 if _should_skip_dir(path
):
2085 os
.path
.walk(path
, _traverse_dir
, (file_handler
, arg
))
2087 raise RelicError("unexpected path type '%s'" % path
)
2091 #---- public routines
2093 def relicense(paths
,
2094 fallback_initial_copyright_date
=None,
2095 fallback_initial_developer
=None,
2096 fallback_original_code_is
=None,
2097 fallback_original_code_date
=None,
2100 force_relicensing
=0,
2102 """Relicense the given file(s) (or files in the given dir).
2104 "paths" is either a list of files or directories, or it is an
2105 input stream with a path on each line.
2106 "fallback_initial_copyright_date"
2107 "fallback_initial_developer"
2108 "fallback_original_code_is"
2109 "fallback_original_code_date"
2110 User-specified fallback values to use for the new license
2111 block if they cannot be found in the original.
2112 "switch_to_mpl" (optional, default false) is a boolean
2113 indicating if an NPL-based license should be converted to
2115 "backup" (optional, default false)is a boolean indicating if
2116 backups should be made
2117 "force_relicensing" (option, default false) is a boolean
2118 indicating if relicensing should happen even if the license
2119 block looks complete
2120 "_errors" (optional) is a dictionary on which errors are reported
2121 (keyed by file path) when the force option is in effect.
2123 This method does not return anything. It will raise RelicError if
2124 there is a problem. Note that OSError/IOError may also be raised.
2126 log
.debug("relicense(paths=%s, backup=%r)", paths
, backup
)
2133 _traverse(paths
, _relicense_file
,
2134 (fallback_initial_copyright_date
,
2135 fallback_initial_developer
,
2136 fallback_original_code_is
,
2137 fallback_original_code_date
,
2144 print "--------------------- Summary of Results ------------------------"
2145 print "Files skipped b/c they are binary:", results
["binary"]
2146 print "Files skipped b/c they already had proper license:", results
["good"]
2147 print "Files skipped b/c they had no license:", results
["no license"]
2149 print "Files re-licensed: %d (dry-run)" % results
["relicensed"]
2151 print "Files re-licensed:", results
["relicensed"]
2152 print "-----------------------------------------------------------------"
2155 def addlicense(paths
,
2156 initial_copyright_date
,
2159 original_code_date
=None,
2162 """Add a license to those of the given file(s) that do not appear to
2165 "paths" is either a list of files or directories, or it is an
2166 input stream with a path on each line.
2167 "initial_copyright_date"
2170 "original_code_date"
2171 User-specified values to use for the new license block. All
2172 but "original_code_date" are required.
2173 "backup" (optional, default false) is a boolean indicating if
2174 backups should be made
2175 "_errors" (optional) is a dictionary on which errors are reported
2176 (keyed by file path) when the force option is in effect.
2178 This method does not return anything. It will raise RelicError if
2179 there is a problem. Note that OSError/IOError may also be raised.
2181 log
.debug("addlicense(paths=%s, backup=%r)", paths
, backup
)
2182 if not initial_copyright_date
:
2183 raise RelicError("no Initial Developer copyright year was "
2184 "specified -- use the -y option")
2185 if not initial_developer
:
2186 raise RelicError("no 'Initial Developer' section was specified "
2187 "-- use the -i option")
2188 if not original_code_is
:
2189 raise RelicError("no 'Original Code is' section was specified "
2190 "-- use the -o option")
2197 _traverse(paths
, _add_license_to_file
,
2198 (initial_copyright_date
,
2206 print "--------------------- Summary of Results ------------------------"
2207 print "Files skipped b/c they are binary:", results
["binary"]
2208 print "Files skipped b/c they already had a license:", results
["license"]
2210 print "Files to which a license was added: %d (dry-run)" % results
["added"]
2212 print "Files to which a license was added: %d" % results
["added"]
2213 print "-----------------------------------------------------------------"
2216 def report(paths
, switch_to_mpl
=0, show_initial
=1, quick
=0, _errors
=None):
2217 """Report on the existing licenses in the given file(s).
2219 "paths" is either a list of files or directories, or it is an
2220 input stream with a path on each line.
2221 "switch_to_mpl" (optional, default false) is a boolean
2222 indicating if an NPL-based license should be converted to
2224 "show_initial" (optional, default true) is a boolean indicating
2225 if the initial developer should be displayed for each file.
2226 "quick" (optional, default false) is a boolean indicating if only
2227 basic license checking should be applied.
2228 "_errors" (optional) is a dictionary on which errors are reported
2229 (keyed by file path) when the force option is in effect.
2231 This method does not return anything. It will raise RelicError if
2234 log
.debug("report(paths=%s)", paths
)
2238 (results
, switch_to_mpl
, show_initial
, quick
, _errors
))
2241 def statistics(paths
, extended
=0, quick
=0, _errors
=None):
2242 """Show a summary table of licenses in files in the given path(s).
2244 "paths" is either a list of files or directories, or it is an
2245 input stream with a path on each line.
2246 "extended" (optional) is a boolean indicating if extended
2247 statistics should be shown
2248 "quick" (optional) is a boolean indicating if quick scan mode should
2250 "_errors" (optional) is a dictionary on which errors are reported
2251 (keyed by file path) when the force option is in effect.
2253 This method does not return anything.
2255 #XXX Info gathering returns a lot more info now. We might be able to
2256 # output more interesting stats.
2257 log
.debug("statistics(paths=%s, extended=%s)",
2260 _traverse(paths
, _gather_info_on_file
, (results
, _errors
))
2262 # Process results and print out some stats.
2264 # <lic type>: [<number of hits>, [<files>...]]
2266 for file, info
in results
.items():
2267 lic_types
= [p
for p
in info
["parts"]
2268 if p
not in ["block_begin", "block_end"]]
2270 name
= "<none found>"
2271 elif "unknown" in lic_types
:
2272 name
= "<unknown license>"
2273 # Distinguish between complete mpl/gpl/lgpl (i.e. with the block
2274 # begin and end tokens) and incomplete mpl/gpl/lgpl. Likewise
2276 elif (info
["parts"] == ["block_begin", "mpl", "gpl", "lgpl", "block_end"]):
2277 name
= "mpl/gpl/lgpl (complete)"
2278 elif (info
["parts"] == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
2279 name
= "npl/gpl/lgpl (complete)"
2281 name
= "/".join(lic_types
)
2282 if name
not in stats
: stats
[name
] = [0, []]
2284 stats
[name
][1].append(file)
2286 statslist
= [(hits
, name
, files
) for name
, (hits
, files
) in stats
.items()]
2287 statslist
.sort() # sort by number of hits
2288 statslist
.reverse() # most common first
2289 print "Summary of Licenses in Files"
2290 print "============================"
2291 print " Number Percent License"
2292 print "------- -------- -----------"
2293 # 115 55.55% npl/gpl
2294 for hits
, name
, files
in statslist
:
2295 print "%7d %7.2f%% %s"\
2296 % (hits
, (hits
*100.0/len(results
)), name
)
2297 #XXX Removed for now because I am not clear if this is at all
2301 # for file in files:
2302 # ext = os.path.splitext(file)[1]
2303 # if ext not in hits_per_ext: hits_per_ext[ext] = 0
2304 # hits_per_ext[ext] += 1
2305 # hits_per_ext_list = [(h, e) for e, h in hits_per_ext.items()]
2306 # hits_per_ext_list.sort()
2307 # hits_per_ext_list.reverse()
2308 # for ext_hits, ext in hits_per_ext_list:
2309 # if not ext: ext = "<no extension>"
2310 # print " %7d %s" % (ext_hits, ext)
2311 print "----------------------------"
2312 print "%7d files processed" % len(results
)
2314 # Print some other interesting statistics.
2315 no_original_code_is
= []
2316 no_initial_developer
= []
2317 unindented_contributor_lines
= []
2318 for file, info
in results
.items():
2319 if "original_code_is" in info
and info
["original_code_is"] is None:
2320 no_original_code_is
.append(file)
2321 if "initial_developer" in info
and info
["initial_developer"] is None:
2322 no_initial_developer
.append(file)
2323 if info
.get("unindented_contributor_lines"):
2324 unindented_contributor_lines
.append(file)
2326 print "Licensed files with no 'Initial Developer...' info: %d" % len(no_initial_developer
)
2328 for f
in no_initial_developer
:
2330 print "Licensed files with no 'Original Code is...' info: %d" % len(no_original_code_is
)
2332 for f
in no_original_code_is
:
2334 print "Licensed files with improperly indented 'Contributor(s):' line(s): %d" % len(unindented_contributor_lines
)
2336 for f
in unindented_contributor_lines
:
2339 for hits
, name
, files
in statslist
:
2340 print "Files in license category '%s'" % name
2341 sortedFiles
= files
[:]
2343 for file in sortedFiles
:
2353 opts
, args
= getopt
.getopt(argv
[1:], "VvadhqfML:sxry:i:o:D:ARI",
2354 ["version", "verbose", "all", "help", "debug",
2355 "dry-run", "force", "MPL", "license=",
2356 "statistics", "relicense", "backup", "add", "defaults",
2357 "force-relicense", "initial-developers", "quick"])
2358 except getopt
.GetoptError
, ex
:
2360 log
.error("Try `%s --help'.", argv
[0])
2368 force_relicensing
= 0
2369 fallback_initial_copyright_date
= None
2370 fallback_initial_developer
= None
2371 fallback_original_code_is
= None
2372 fallback_original_code_date
= None
2375 for opt
, optarg
in opts
:
2376 if opt
in ("-h", "--help"):
2377 sys
.stdout
.write(__doc__
)
2379 elif opt
in ("-V", "--version"):
2380 ver
= '.'.join([str(part
) for part
in _version_
])
2381 print "relic %s" % ver
2383 elif opt
in ("-v", "--verbose"):
2384 log
.setLevel(logging
.INFO
)
2385 elif opt
in ("-a", "--all"):
2388 elif opt
in ("-M", "--MPL"):
2390 elif opt
in ("-d", "--debug"):
2391 log
.setLevel(logging
.DEBUG
)
2393 elif opt
in ("--dry-run",):
2396 elif opt
in ("-f", "--force"):
2399 elif opt
in ("-s", "--statistics"):
2401 elif opt
in ("-x",):
2403 elif opt
in ("-r", "--relicense"):
2405 elif opt
in ("-R", "--force-relicense"):
2407 force_relicensing
= 1
2408 elif opt
in ("-A", "--add"):
2410 elif opt
== "--backup":
2413 fallback_initial_copyright_date
= optarg
2415 fallback_initial_developer
= optarg
2417 fallback_original_code_is
= optarg
2419 fallback_original_code_date
= optarg
2420 elif opt
in ("-I", "--initial-developers"):
2422 elif opt
== "--defaults":
2423 fallback_original_code_is
= "mozilla.org Code"
2424 fallback_initial_copyright_date
= "2001"
2425 fallback_initial_developer
= "Netscape Communications Corporation"
2426 elif opt
in ("-q", "--quick"):
2430 # Prepare the input.
2432 log
.debug("no given files, trying stdin")
2437 # Invoke the requested action.
2439 if mode
== "relicense":
2441 fallback_initial_copyright_date
,
2442 fallback_initial_developer
,
2443 fallback_original_code_is
,
2444 fallback_original_code_date
,
2449 elif mode
== "statistics":
2450 statistics(paths
, extended
, quick
, _errors
=_errors
)
2451 elif mode
== "report":
2452 report(paths
, switch_to_mpl
, show_initial
, quick
, _errors
=_errors
)
2455 fallback_initial_copyright_date
,
2456 fallback_initial_developer
,
2457 fallback_original_code_is
,
2458 fallback_original_code_date
,
2462 raise RelicError("unexpected mode: '%s'" % mode
)
2464 # Report any delayed errors.
2467 print "=================== Summary of Errors ==========================="
2468 print "Files with processing errors:", len(_errors
)
2469 print "================================================================="
2470 for file, error
in _errors
.items():
2471 print "%s: %s" % (file, error
)
2473 print "================================================================="
2474 except RelicError
, ex
:
2476 " (the --force option can be used to skip problematic "
2477 "files and continue processing rather than aborting)")
2481 traceback
.print_exception(*sys
.exc_info())
2483 except KeyboardInterrupt:
2487 if __name__
== "__main__":
2488 sys
.exit( main(sys
.argv
) )